def test_rmsprop(random_learning_rate, decay_rate, epsilon): rmsprop_args = { 'learning_rate': random_learning_rate, 'epsilon': epsilon, 'decay_rate': decay_rate } rmsprop_ref = RMSPropReference(**rmsprop_args) rms = RMSProp(**rmsprop_args) # test baseline against reference compare_optimizer(rms, rmsprop_ref)
def test_rmsprop(random_learning_rate, decay_rate, epsilon, select_variables): rmsprop_args = {'learning_rate': random_learning_rate, 'epsilon': epsilon, 'decay_rate': decay_rate} rmsprop_ref = RMSPropReference(**rmsprop_args) rms = RMSProp(**rmsprop_args) # test baseline against reference if select_variables: compare_optimizer_variable_select(rms, rmsprop_ref) else: compare_optimizer(rms, rmsprop_ref)
def make_optimizer(name=None, weight_clip_value=None, loss_type="WGAN-GP"): if loss_type == "WGAN": optimizer = RMSProp(learning_rate=5e-5, decay_rate=0.99, epsilon=1e-8, weight_clip_value=weight_clip_value) if loss_type == "WGAN-GP": optimizer = Adam(learning_rate=1e-4, beta_1=0.5, beta_2=0.9, epsilon=1e-8, weight_clip_value=weight_clip_value) return optimizer
if args.layer_type == "rnn": rlayer = Recurrent(hidden_size, init, activation=Tanh()) elif args.layer_type == "birnn": rlayer = BiRNN(hidden_size, init, activation=Tanh(), return_sequence=True, sum_out=True) # model initialization seq1 = Sequential([ layer_0, rlayer, Affine(init, activation=Softmax(), bias_init=init, axes=(ax.Y, )) ]) optimizer = RMSProp() train_prob = seq1(inputs['inp_txt']) train_loss = ng.cross_entropy_multi(train_prob, ng.one_hot(inputs['tgt_txt'], axis=ax.Y), usebits=True) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) with Layer.inference_mode_on(): inference_prob = seq1(inputs['inp_txt']) eval_loss = ng.cross_entropy_multi(inference_prob, ng.one_hot(inputs['tgt_txt'], axis=ax.Y),
rlayer = BiRNN(hidden_size, init, activation=Tanh(), reset_cells=True, return_sequence=False, sum_out=True) # model initialization seq1 = Sequential([ LookupTable(vocab_size, embed_size, init, update=True, pad_idx=pad_idx), rlayer, Affine(init, activation=Softmax(), bias_init=init, axes=(ax.Y, )) ]) optimizer = RMSProp(decay_rate=0.95, learning_rate=2e-3, epsilon=1e-6, gradient_clip_value=gradient_clip_value) train_prob = seq1(inputs['review']) train_loss = ng.cross_entropy_multi(train_prob, ng.one_hot(inputs['label'], axis=ax.Y), usebits=True) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) with Layer.inference_mode_on(): inference_prob = seq1(inputs['review']) eval_loss = ng.cross_entropy_multi(inference_prob, ng.one_hot(inputs['label'], axis=ax.Y),
u_i_list.append(score) output_prob = ng.softmax(ng.stack(u_i_list, axis=ax.Y, pos=0), ax.Y) u_list.append(output_prob) pointer_out = ng.stack(u_list, axis=rec_axis, pos=2) # specify loss function, calculate loss and update weights one_hot_target = ng.one_hot(inputs['tgt_txt'], axis=ax.Y) loss = ng.cross_entropy_multi(pointer_out, one_hot_target, usebits=True) mean_cost = ng.mean(loss, out_axes=[]) optimizer = RMSProp(decay_rate=0.96, learning_rate=args.lr, epsilon=1e-6, gradient_clip_value=gradient_clip_value) updates = optimizer(loss) # provide outputs for bound computation train_outputs = dict(batch_cost=mean_cost, updates=updates, pointer_out=pointer_out) # Train Loop with closing(ngt.make_transformer()) as transformer: # bind the computations train_computation = make_bound_computation(transformer, train_outputs, inputs) eval_frequency = 500 loss = [] # iterate over training set for idx, data in enumerate(train_set): train_output = train_computation(data)
activation=Tanh(), gate_activation=Logistic(), return_sequence=True) rlayer2 = LSTM(hidden_size, init, activation=Tanh(), gate_activation=Logistic(), return_sequence=True) # model initialization seq1 = Sequential([ Preprocess(functor=expand_onehot), rlayer1, rlayer2, Affine(init, activation=Softmax(), bias_init=init, axes=(ax.Y, )) ]) optimizer = RMSProp(gradient_clip_value=gradient_clip_value) train_prob = seq1(inputs['inp_txt']) train_loss = ng.cross_entropy_multi(train_prob, ng.one_hot(inputs['tgt_txt'], axis=ax.Y), usebits=True) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) with Layer.inference_mode_on(): inference_prob = seq1(inputs['inp_txt']) errors = ng.not_equal(ng.argmax(inference_prob, reduction_axes=[ax.Y]), inputs['tgt_txt'])
} optimizer = GradientDescentMomentum(learning_rate=learning_rate_policy, momentum_coef=0.5, wdecay=4e-5, iteration=inputs['iteration']) elif args.optimizer_name == 'rmsprop': learning_rate_policy = { 'name': 'schedule', 'schedule': list(80000 * np.arange(1, 10, 1)), 'gamma': 0.94, 'base_lr': 0.01 } optimizer = RMSProp(learning_rate=learning_rate_policy, wdecay=4e-5, decay_rate=0.9, momentum_coef=0.9, epsilon=1., iteration=inputs['iteration']) else: raise NotImplementedError("Unrecognized Optimizer") # Build the main and auxiliary loss functions y_onehot = ng.one_hot(inputs['label'], axis=ax.Y) train_prob_main = inception.seq2(inception.seq1(inputs['image'])) train_prob_main = ng.map_roles(train_prob_main, {"C": ax.Y.name}) train_loss_main = ng.cross_entropy_multi(train_prob_main, y_onehot, enable_softmax_opt=False) train_prob_aux = inception.seq_aux(inception.seq1(inputs['image'])) train_prob_aux = ng.map_roles(train_prob_aux, {"C": ax.Y.name})
inputs = dict(X=ng.placeholder(in_axes), y=ng.placeholder(out_axes), iteration=ng.placeholder(axes=())) preds_inputs = dict(X=inputs['X']) # define model n_hidden = list(map(int, args.n_hidden.split(","))) filter_shape = list(map(int, args.filter_shape.split(","))) if args.modeltype in ["RNN", "LSTM"]: seq1 = Sequential(recurrent_model.define_model(out_axis, celltype=args.modeltype, recurrent_units=n_hidden, return_sequence=args.predict_seq).layers + [Rectlin()]) elif args.modeltype == "CNN": seq1 = convolutional_model.define_model(out_axis, filter_shapes=filter_shape, n_filters=n_hidden) layers_modified = [lambda op: ng.map_roles(op, {'REC': 'W', 'F': 'C'})] + seq1.layers + [Rectlin()] seq1 = Sequential(layers_modified) # Optimizer optimizer = RMSProp(learning_rate=args.lr, gradient_clip_value=args.grad_clip_value) # Define the loss function (squared L2 loss) fwd_prop = seq1(inputs['X']) train_loss = ng.squared_L2(fwd_prop - inputs['y']) # Cost calculation batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_computation = ng.computation(batch_cost, "all") # Forward prop of test set # Required for correct functioning of batch norm and dropout layers during inference mode with Layer.inference_mode_on(): preds = seq1(inputs['X']) preds = ng.axes_with_order(preds, out_axes) eval_loss = ng.mean(ng.squared_L2(preds - inputs['y']), out_axes=())
Affine(weight_init=init, activation=Softmax(), bias_init=init, axes=(ax.Y, ax.REC)) ]) # Bind axes lengths: ax.Y.length = len(tree_bank_data.vocab) ax.REC.length = time_steps ax.N.length = args.batch_size # placeholders with descriptive names inputs = dict(inp_txt=ng.placeholder([ax.REC, ax.N]), tgt_txt=ng.placeholder([ax.REC, ax.N])) optimizer = RMSProp(decay_rate=0.95, learning_rate=2e-3, epsilon=1e-6) output_prob = seq1.train_outputs(inputs['inp_txt']) loss = ng.cross_entropy_multi(output_prob, ng.one_hot(inputs['tgt_txt'], axis=ax.Y), usebits=True) mean_cost = ng.mean(loss, out_axes=[]) updates = optimizer(loss) train_outputs = dict(batch_cost=mean_cost, updates=updates) loss_outputs = dict(cross_ent_loss=loss) # Now bind the computations we are interested in transformer = ngt.make_transformer() train_computation = make_bound_computation(transformer, train_outputs, inputs) loss_computation = make_bound_computation(transformer, loss_outputs, inputs)