train-and-write.F90 Source File


This file depends on

sourcefile~~train-and-write.f90~~EfferentGraph sourcefile~train-and-write.f90 train-and-write.F90 sourcefile~inference_engine_m.f90 inference_engine_m.f90 sourcefile~train-and-write.f90->sourcefile~inference_engine_m.f90 sourcefile~activation_strategy_m.f90 activation_strategy_m.f90 sourcefile~inference_engine_m.f90->sourcefile~activation_strategy_m.f90 sourcefile~differentiable_activation_strategy_m.f90 differentiable_activation_strategy_m.f90 sourcefile~inference_engine_m.f90->sourcefile~differentiable_activation_strategy_m.f90 sourcefile~hyperparameters_m.f90 hyperparameters_m.f90 sourcefile~inference_engine_m.f90->sourcefile~hyperparameters_m.f90 sourcefile~inference_engine_m_.f90 inference_engine_m_.f90 sourcefile~inference_engine_m.f90->sourcefile~inference_engine_m_.f90 sourcefile~input_output_pair_m.f90 input_output_pair_m.f90 sourcefile~inference_engine_m.f90->sourcefile~input_output_pair_m.f90 sourcefile~kind_parameters_m.f90 kind_parameters_m.f90 sourcefile~inference_engine_m.f90->sourcefile~kind_parameters_m.f90 sourcefile~mini_batch_m.f90 mini_batch_m.f90 sourcefile~inference_engine_m.f90->sourcefile~mini_batch_m.f90 sourcefile~network_configuration_m.f90 network_configuration_m.f90 sourcefile~inference_engine_m.f90->sourcefile~network_configuration_m.f90 sourcefile~relu_m.f90 relu_m.f90 sourcefile~inference_engine_m.f90->sourcefile~relu_m.f90 sourcefile~sigmoid_m.f90 sigmoid_m.f90 sourcefile~inference_engine_m.f90->sourcefile~sigmoid_m.f90 sourcefile~step_m.f90 step_m.f90 sourcefile~inference_engine_m.f90->sourcefile~step_m.f90 sourcefile~swish_m.f90 swish_m.f90 sourcefile~inference_engine_m.f90->sourcefile~swish_m.f90 sourcefile~tensor_m.f90 tensor_m.f90 sourcefile~inference_engine_m.f90->sourcefile~tensor_m.f90 sourcefile~tensor_range_m.f90 tensor_range_m.f90 sourcefile~inference_engine_m.f90->sourcefile~tensor_range_m.f90 sourcefile~trainable_engine_m.f90 trainable_engine_m.F90 sourcefile~inference_engine_m.f90->sourcefile~trainable_engine_m.f90 sourcefile~training_configuration_m.f90 training_configuration_m.f90 sourcefile~inference_engine_m.f90->sourcefile~training_configuration_m.f90 sourcefile~ubounds_m.f90 ubounds_m.f90 sourcefile~inference_engine_m.f90->sourcefile~ubounds_m.f90 sourcefile~activation_strategy_m.f90->sourcefile~kind_parameters_m.f90 sourcefile~differentiable_activation_strategy_m.f90->sourcefile~activation_strategy_m.f90 sourcefile~hyperparameters_m.f90->sourcefile~kind_parameters_m.f90 sourcefile~inference_engine_m_.f90->sourcefile~activation_strategy_m.f90 sourcefile~inference_engine_m_.f90->sourcefile~differentiable_activation_strategy_m.f90 sourcefile~inference_engine_m_.f90->sourcefile~kind_parameters_m.f90 sourcefile~inference_engine_m_.f90->sourcefile~tensor_m.f90 sourcefile~inference_engine_m_.f90->sourcefile~tensor_range_m.f90 sourcefile~input_output_pair_m.f90->sourcefile~kind_parameters_m.f90 sourcefile~input_output_pair_m.f90->sourcefile~tensor_m.f90 sourcefile~mini_batch_m.f90->sourcefile~input_output_pair_m.f90 sourcefile~mini_batch_m.f90->sourcefile~kind_parameters_m.f90 sourcefile~relu_m.f90->sourcefile~differentiable_activation_strategy_m.f90 sourcefile~relu_m.f90->sourcefile~kind_parameters_m.f90 sourcefile~sigmoid_m.f90->sourcefile~differentiable_activation_strategy_m.f90 sourcefile~sigmoid_m.f90->sourcefile~kind_parameters_m.f90 sourcefile~step_m.f90->sourcefile~activation_strategy_m.f90 sourcefile~step_m.f90->sourcefile~kind_parameters_m.f90 sourcefile~swish_m.f90->sourcefile~differentiable_activation_strategy_m.f90 sourcefile~swish_m.f90->sourcefile~kind_parameters_m.f90 sourcefile~tensor_m.f90->sourcefile~kind_parameters_m.f90 sourcefile~tensor_range_m.f90->sourcefile~tensor_m.f90 sourcefile~trainable_engine_m.f90->sourcefile~differentiable_activation_strategy_m.f90 sourcefile~trainable_engine_m.f90->sourcefile~inference_engine_m_.f90 sourcefile~trainable_engine_m.f90->sourcefile~kind_parameters_m.f90 sourcefile~trainable_engine_m.f90->sourcefile~mini_batch_m.f90 sourcefile~trainable_engine_m.f90->sourcefile~tensor_m.f90 sourcefile~trainable_engine_m.f90->sourcefile~tensor_range_m.f90 sourcefile~trainable_engine_m.f90->sourcefile~training_configuration_m.f90 sourcefile~training_configuration_m.f90->sourcefile~differentiable_activation_strategy_m.f90 sourcefile~training_configuration_m.f90->sourcefile~hyperparameters_m.f90 sourcefile~training_configuration_m.f90->sourcefile~kind_parameters_m.f90 sourcefile~training_configuration_m.f90->sourcefile~network_configuration_m.f90

Source Code

! Copyright (c), The Regents of the University of California
! Terms of use are as specified in LICENSE.txt
program train_and_write
  !! This program demonstrates how to train a simple neural network starting from a randomized initial condition and 
  !! how to write the initial network and the trained network to separate JSON files.  The network has two hiden layers.
  !! The input, hidden, and output layers are all two nodes wide.  The training data has outputs that identically match
  !! the corresponding inputs.  Hence, the desired network represents an identity mapping.  With RELU activation functions,
  !! the desired network therefore contains weights corresponding to identity matrices and biases that vanish everywhere.
  !! The initial condition corresponds to the desired network with all weights and biases perturbed by a random variable
  !! that is uniformly distributed on the range [0,0.1].
  use inference_engine_m, only : &
    inference_engine_t, trainable_engine_t, mini_batch_t, tensor_t, input_output_pair_t, shuffle, relu_t
  use sourcery_m, only : string_t, file_t, command_line_t, bin_t
  use assert_m, only : assert, intrinsic_array_t
  implicit none

  type(string_t) final_network_file
  type(command_line_t) command_line

  final_network_file = string_t(command_line%flag_value("--output-file"))

  if (len(final_network_file%string())==0) then
    error stop new_line('a') // new_line('a') // &
      'Usage: fpm run --example train-and-write --profile release --flag "-fopenmp" -- --output-file "<file-name>"' 
  end if

  block
    integer, parameter :: num_pairs = 5, num_epochs = 500, num_mini_batches= 3 ! num_pairs =  # input/output pairs in training data

    type(mini_batch_t), allocatable :: mini_batches(:)
    type(input_output_pair_t), allocatable :: input_output_pairs(:)
    type(tensor_t), allocatable :: inputs(:)
    type(trainable_engine_t)  trainable_engine
    type(bin_t), allocatable :: bins(:)
    real, allocatable :: cost(:), random_numbers(:)

    call random_init(image_distinct=.true., repeatable=.true.)
    trainable_engine = perturbed_identity_network(perturbation_magnitude=0.2)
    call output(trainable_engine%to_inference_engine(), string_t("initial-network.json"))

    associate(num_inputs => trainable_engine%num_inputs(), num_outputs => trainable_engine%num_outputs())

      call assert(num_inputs == num_outputs,"trainable_engine_test_m(identity_mapping): # inputs == # outputs", &
        intrinsic_array_t([num_inputs, num_outputs]) &
      )
      block
        integer i, j
        inputs = [(tensor_t(real([(j*i, j = 1,num_inputs)])/(num_inputs*num_pairs)), i = 1, num_pairs)]
      end block
      associate(outputs => inputs)
        input_output_pairs = input_output_pair_t(inputs, outputs)
      end associate
      block 
        integer b
        bins = [(bin_t(num_items=num_pairs, num_bins=num_mini_batches, bin_number=b), b = 1, num_mini_batches)]
      end block

      allocate(random_numbers(2:size(input_output_pairs)))

      print *,"Cost"
      block
        integer e, b
        do e = 1,num_epochs
          call random_number(random_numbers)
          call shuffle(input_output_pairs)
          mini_batches = [(mini_batch_t(input_output_pairs(bins(b)%first():bins(b)%last())), b = 1, size(bins))]
          call trainable_engine%train(mini_batches, cost, adam=.true., learning_rate=1.5)
          print *,sum(cost)/size(cost)
        end do
      end block

      block
        real, parameter :: tolerance = 1.E-06
        integer p
#ifdef _CRAYFTN
        type(tensor_t), allocatable :: network_outputs(:)
        network_outputs = trainable_engine%infer(inputs)
#else
        associate(network_outputs => trainable_engine%infer(inputs))
#endif
          print *," Outputs                          |&
                   Desired outputs                    |&
                   Errors"
          do p = 1, num_pairs
            print *,network_outputs(p)%values(),"|", inputs(p)%values(), "|",  network_outputs(p)%values() - inputs(p)%values()
          end do
#ifndef _CRAYFTN
        end associate
#endif
      end block

   end associate

   call output(trainable_engine%to_inference_engine(), final_network_file)

  end block

contains

  subroutine output(inference_engine, file_name)
    type(inference_engine_t), intent(in) :: inference_engine
    type(string_t), intent(in) :: file_name
    type(file_t) json_file
    json_file = inference_engine%to_json()
    call json_file%write_lines(file_name)
  end subroutine

  function perturbed_identity_network(perturbation_magnitude) result(trainable_engine)
    type(trainable_engine_t) trainable_engine
    real, intent(in) :: perturbation_magnitude
    integer, parameter :: nodes_per_layer(*) = [2, 2, 2, 2]
    integer, parameter :: max_n = maxval(nodes_per_layer), layers = size(nodes_per_layer)
    integer i
    real, parameter :: identity(*,*,*) = &
      reshape(real([( [1,0], [0,1], i=1,layers-1 )]), [max_n, max_n, layers-1])
    real w_harvest(size(identity,1), size(identity,2), size(identity,3)), b_harvest(size(identity,1), size(identity,3))

    call random_number(w_harvest)
    call random_number(b_harvest)

    associate(w => identity + perturbation_magnitude*(w_harvest-0.5)/0.5, b => perturbation_magnitude*(b_harvest-0.5)/0.5)

      trainable_engine = trainable_engine_t( &
        nodes = nodes_per_layer, weights = w, biases = b, differentiable_activation_strategy = relu_t(), &
        metadata = &
          [string_t("Perturbed Identity"), string_t("Damian Rouson"), string_t("2023-09-23"), string_t("relu"), string_t("false")] &
      )

    end associate
  end function

end program