def test_gdm(random_learning_rate, random_momentum_coef, wdecay, nesterov, select_variables): # Setup the baseline and reference optimizers to be tested gdm_args = {'learning_rate': random_learning_rate, 'momentum_coef': random_momentum_coef, 'wdecay': wdecay, 'nesterov': nesterov} gdm_ref = GDMReference(**gdm_args) gdm = GradientDescentMomentum(**gdm_args) # test baseline against reference if select_variables: compare_optimizer_variable_select(gdm, gdm_ref) else: compare_optimizer(gdm, gdm_ref)
Pooling(pool_shape=(7, 7), strides=1, pool_type="avg"), Affine(axes=ax.Y, weight_init=XavierInit(), bias_init=bias_init, activation=Softmax()) ]) lr_schedule = { 'name': 'schedule', 'base_lr': 0.01, 'gamma': (1 / 250.)**(1 / 3.), 'schedule': [22, 44, 65] } optimizer = GradientDescentMomentum(lr_schedule, 0.0, wdecay=0.0005, iteration=inputs['iteration']) train_prob = seq1(inputs['image']) train_loss = ng.cross_entropy_multi(train_prob, ng.one_hot(inputs['label'], axis=ax.Y)) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) with closing(ngt.make_transformer()) as transformer: train_computation = make_bound_computation(transformer, train_outputs, inputs) cbs = make_default_callbacks(transformer=transformer, output_file=args.output_file,
Affine(weight_init=init_uni, bias_init=init_uni, activation=Softmax(), axes=out_axis)]) # Optimizer # Initial learning rate is 0.01 (base_lr) # At iteration (num_iterations // 75), lr is multiplied by gamma (new lr = .95 * .01) # At iteration (num_iterations * 2 // 75), it is reduced by gamma again # So on.. no_steps = 75 step = num_iterations // no_steps schedule = list(np.arange(step, num_iterations, step)) learning_rate_policy = {'name': 'schedule', 'schedule': schedule, 'gamma': 0.95, 'base_lr': 0.01} optimizer = GradientDescentMomentum(learning_rate=learning_rate_policy, iteration=inputs['iteration']) # Define the loss function (Cross entropy loss) # Note that we convert the integer values of input['y'] to one hot here fwd_prop = seq1(inputs['X']) train_loss = ng.cross_entropy_multi(fwd_prop, ng.one_hot(inputs['y'], axis=out_axis), usebits=True) # Train cost computation batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_computation = ng.computation([batch_cost, fwd_prop], "all") train_outputs = dict(batch_cost=batch_cost) # Forward prop of evaluation set # Required for correct functioning of batch norm and dropout layers during inference mode with Layer.inference_mode_on():
args.size, en_bottleneck, num_resnet_mods, batch_norm=not args.disable_batch_norm) # Learning Rate Placeholder lr_ph = ng.placeholder(axes=(), initial_value=base_lr) # Optimizer # Provided learning policy takes learning rate as input to graph using a placeholder. # This allows you to control learning rate based on various factors of network learning_rate_policy = {'name': 'provided', 'lr_placeholder': lr_ph} optimizer = GradientDescentMomentum(learning_rate=learning_rate_policy, momentum_coef=momentum_coef, wdecay=wdecay, nesterov=False, iteration=input_ph['iteration']) label_indices = input_ph['label'] # Make a prediction prediction = resnet(input_ph['image']) # Calculate loss train_loss = ng.cross_entropy_multi(prediction, ng.one_hot(label_indices, axis=ax.Y)) # Average loss over the batch batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) # Instantiate the Saver object to save weights
###################### # Model specification def cifar_mean_subtract(x): bgr_mean = ng.persistent_tensor( axes=x.axes.find_by_name('C'), initial_value=np.array([104., 119., 127.])) return (x - bgr_mean) / 255. seq1 = Sequential([Preprocess(functor=cifar_mean_subtract), Affine(nout=200, weight_init=UniformInit(-0.1, 0.1), activation=Rectlin()), Affine(axes=ax.Y, weight_init=UniformInit(-0.1, 0.1), activation=Softmax())]) optimizer = GradientDescentMomentum(0.1, 0.9) train_prob = seq1(inputs['image']) train_loss = ng.cross_entropy_multi(train_prob, ng.one_hot(inputs['label'], axis=ax.Y)) batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) with Layer.inference_mode_on(): inference_prob = seq1(inputs['image']) eval_loss = ng.cross_entropy_multi(inference_prob, ng.one_hot(inputs['label'], axis=ax.Y)) eval_outputs = dict(results=inference_prob, cross_ent_loss=eval_loss) # Now bind the computations we are interested in with closing(ngt.make_transformer()) as transformer: train_computation = make_bound_computation(transformer, train_outputs, inputs) loss_computation = make_bound_computation(transformer, eval_outputs, inputs)