def expand_onehot(x): """ Simply converts an integer to a one-hot vector of the same size as out_axis """ return ng.one_hot(x, axis=out_axis)
]) lr_schedule = { 'name': 'schedule', 'base_lr': 0.01, 'gamma': (1 / 250.)**(1 / 3.), 'schedule': [22, 44, 65] } optimizer = GradientDescentMomentum(lr_schedule, 0.0, wdecay=0.0005, iteration=inputs['iteration']) train_prob = seq1(inputs['image']) train_loss = ng.cross_entropy_multi(train_prob, ng.one_hot(inputs['label'], axis=ax.Y)) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) with closing(ngt.make_transformer()) as transformer: train_computation = make_bound_computation(transformer, train_outputs, inputs) cbs = make_default_callbacks(transformer=transformer, output_file=args.output_file, frequency=args.iter_interval, train_computation=train_computation, total_iterations=args.num_iterations, use_progress_bar=args.progress_bar)
# Optimizer # Provided learning policy takes learning rate as input to graph using a placeholder. # This allows you to control learning rate based on various factors of network learning_rate_policy = {'name': 'provided', 'lr_placeholder': lr_ph} optimizer = GradientDescentMomentum(learning_rate=learning_rate_policy, momentum_coef=momentum_coef, wdecay=wdecay, nesterov=False, iteration=input_ph['iteration']) label_indices = input_ph['label'] # Make a prediction prediction = resnet(input_ph['image']) # Calculate loss train_loss = ng.cross_entropy_multi(prediction, ng.one_hot(label_indices, axis=ax.Y)) # Average loss over the batch batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) # Instantiate the Saver object to save weights weight_saver = Saver() with Layer.inference_mode_on(): # Doing inference inference_prob = resnet(input_ph['image']) eval_loss = ng.cross_entropy_multi(inference_prob, ng.one_hot(label_indices, axis=ax.Y)) # Computation for inference
# At iteration (num_iterations * 2 // 75), it is reduced by gamma again # So on.. no_steps = 75 step = num_iterations // no_steps schedule = list(np.arange(step, num_iterations, step)) learning_rate_policy = {'name': 'schedule', 'schedule': schedule, 'gamma': 0.95, 'base_lr': 0.01} optimizer = GradientDescentMomentum(learning_rate=learning_rate_policy, iteration=inputs['iteration']) # Define the loss function (Cross entropy loss) # Note that we convert the integer values of input['y'] to one hot here fwd_prop = seq1(inputs['X']) train_loss = ng.cross_entropy_multi(fwd_prop, ng.one_hot(inputs['y'], axis=out_axis), usebits=True) # Train cost computation batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_computation = ng.computation([batch_cost, fwd_prop], "all") train_outputs = dict(batch_cost=batch_cost) # Forward prop of evaluation set # Required for correct functioning of batch norm and dropout layers during inference mode with Layer.inference_mode_on(): inference_prop = seq1(inputs['X']) eval_loss = ng.cross_entropy_multi(inference_prop, ng.one_hot(inputs['y'], axis=out_axis), usebits=True) eval_computation = ng.computation([ng.mean(eval_loss, out_axes=()), inference_prop], "all")
def cifar_mean_subtract(x): bgr_mean = ng.persistent_tensor( axes=x.axes.find_by_name('C'), initial_value=np.array([104., 119., 127.])) return (x - bgr_mean) / 255. seq1 = Sequential([Preprocess(functor=cifar_mean_subtract), Affine(nout=200, weight_init=UniformInit(-0.1, 0.1), activation=Rectlin()), Affine(axes=ax.Y, weight_init=UniformInit(-0.1, 0.1), activation=Softmax())]) optimizer = GradientDescentMomentum(0.1, 0.9) train_prob = seq1(inputs['image']) train_loss = ng.cross_entropy_multi(train_prob, ng.one_hot(inputs['label'], axis=ax.Y)) batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) with Layer.inference_mode_on(): inference_prob = seq1(inputs['image']) eval_loss = ng.cross_entropy_multi(inference_prob, ng.one_hot(inputs['label'], axis=ax.Y)) eval_outputs = dict(results=inference_prob, cross_ent_loss=eval_loss) # Now bind the computations we are interested in with closing(ngt.make_transformer()) as transformer: train_computation = make_bound_computation(transformer, train_outputs, inputs) loss_computation = make_bound_computation(transformer, eval_outputs, inputs) cbs = make_default_callbacks(transformer=transformer, output_file=args.output_file,
def expand_onehot(x): return ng.one_hot(x, axis=ax.Y)
inputs = train_set.make_placeholders() ax.Y.length = len(tree_bank_data.vocab) def expand_onehot(x): return ng.one_hot(x, axis=ax.Y) # weight initialization init = UniformInit(low=-0.08, high=0.08) if args.use_lut: layer_0 = LookupTable(50, 100, init, update=True, pad_idx=0) else: layer_0 = Preprocess(functor=lambda x: ng.one_hot(x, axis=ax.Y)) if args.layer_type == "rnn": rlayer = Recurrent(hidden_size, init, activation=Tanh()) elif args.layer_type == "birnn": rlayer = BiRNN(hidden_size, init, activation=Tanh(), return_sequence=True, sum_out=True) # model initialization seq1 = Sequential([ layer_0, rlayer, Affine(init, activation=Softmax(), bias_init=init, axes=(ax.Y, )) ])
linear = Affine(init, activation=Softmax(), bias_init=init, axes=(ax.Y)) optimizer = RMSProp(decay_rate=0.95, learning_rate=2e-3, epsilon=1e-6, gradient_clip_value=gradient_clip_value) # build network graph one_hot_enc_out = one_hot_enc(inputs['inp_txt']) one_hot_dec_out = one_hot_dec(inputs['prev_tgt']) enc_out = enc(one_hot_enc_out) dec_out = dec(one_hot_dec_out, init_state=enc_out) output_prob = linear(dec_out) loss = ng.cross_entropy_multi(output_prob, ng.one_hot(inputs['tgt_txt'], axis=ax.Y), usebits=True) mean_cost = ng.mean(loss, out_axes=[]) updates = optimizer(loss) train_outputs = dict(batch_cost=mean_cost, updates=updates) loss_outputs = dict(cross_ent_loss=loss) # inference graph with Layer.inference_mode_on(): enc_out_inference = enc(one_hot_enc_out) # Create decoder placeholders axes = one_hot_dec_out.axes axes = axes - axes.recurrent_axis() + ng.make_axis(length=1, name="REC") decoder_input_inference = ng.placeholder(axes, name="input")
# Optimizer # Provided learning policy takes learning rate as input to graph using a placeholder. # This allows you to control learning rate based on various factors of network learning_rate_policy = {'name': 'provided', 'lr_placeholder': lr_ph} optimizer = GradientDescentMomentum(learning_rate=learning_rate_policy, momentum_coef=momentum_coef, wdecay=wdecay, nesterov=False, iteration=input_ph['iteration']) label_indices = input_ph['label'] # Make a prediction prediction = resnet(input_ph['image']) # Calculate loss train_loss = ng.cross_entropy_multi(prediction, ng.one_hot(label_indices, axis=ax.Y)) # Average loss over the batch batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) # Instantiate the Saver object to save weights weight_saver = Saver() with Layer.inference_mode_on(): # Doing inference inference_prob = resnet(input_ph['image']) eval_loss = ng.cross_entropy_multi(inference_prob, ng.one_hot(label_indices, axis=ax.Y)) # Computation for inference eval_outputs = dict(results=inference_prob, cross_ent_loss=eval_loss) # setup wrapper for additional feed for learning rate (train only)