def test_cross_entropy_multi_unmatched_axes(input_tensor): """If y and t have different axes, an error should be thrown immediately""" y = input_tensor feature_axis, batch_axis = y.axes t = ng.placeholder([ng.make_axis(feature_axis.length), batch_axis]) with pytest.raises(ng.UnmatchedAxesError): ng.cross_entropy_multi(y, t)
def create_loss_and_learner(model, labels, learning_rate, momentum_coef=0.0, wdecay=0.0, nesterov=False, gradient_clip_norm=None, gradient_clip_value=None): """ Auxiliary function to create loss function (cross entropy and softmax) and trainer using stochastic gradient descent with momentum. Arguments: model - imported model labels - placeholder for one-hot labels array learning_rate - learning rate for trainer momentum_coef - coefficient of momentum (deafult 0.0) wdecay - amount of weight decay (default 0.0) nesterov - use nesterov accelerated gradient (dafault False) gradient_clip_norm - target gradient norm (default None) gradient_clip_value - value to element-wise clip gradients (default None) Returns: Loss function (mean for batch) """ if model.axes.lengths != labels.axes.lengths: labels = ng.Transpose(labels) assert model.axes.lengths == labels.axes.lengths model = ng.cast_axes(model, axes=labels.axes) loss = ng.cross_entropy_multi(ng.softmax(model), labels) optimizer = GradientDescentMomentum(learning_rate, momentum_coef, wdecay, gradient_clip_norm, gradient_clip_value, nesterov) return ng.sequential([optimizer(loss), ng.mean(loss, out_axes=())])
def build_graphs(L, BS): """ TODO. Arguments: L: TODO BS: TODO Returns: TODO """ # Axes L = [ng.make_axis(length=N, name='L%d' % i) for i, N in enumerate(L)] BS = ng.make_axis(length=BS, name='BS') # Builds Network activations = [ng.tanh for i in range(len(L) - 2)] + [ng.softmax] X = ng.placeholder((L[0], BS)).named('X') Y = ng.placeholder((L[-1], )).named('Y') W = [ ng.variable((L_np1, L_n - 1)).named('W%d' % i) for i, (L_np1, L_n) in enumerate(zip(L[1:], L[:-1])) ] A = [] for i, f in enumerate(activations): Aim1 = A[i - 1] if i > 0 else X A.append(f(ng.dot(W[i], Aim1))) Error = ng.cross_entropy_multi(A[-1], Y) dW = [ng.deriv(Error, w) for w in W] transformer = ngt.make_transformer() dfg = an.DataFlowGraph(transformer, dW) ifg = an.InterferenceGraph(dfg.liveness()) return dfg, ifg
def test_cross_entropy_rec(transformer_factory, recurrent_input_tensor): p_x = recurrent_input_tensor p_t = ng.placeholder(p_x.axes) cross_entropy_sm_x_t = ng.cross_entropy_multi(ng.softmax(p_x), p_t) x = rng.uniform(0, 1, p_x.axes) t = np_softmax(rng.uniform(0, 1, p_t.axes), 0) def f_np(x, t): return np_cross_entropy_multi(np_softmax(x, 0), t, axis=0) compare_f_at_x(cross_entropy_sm_x_t, [p_x, p_t], f_np, [x, t], rtol=1e-5)
def test_cross_entropy_softmax_rec_deriv(transformer_factory, recurrent_input_tensor): p_x = recurrent_input_tensor p_t = ng.placeholder(p_x.axes) x = rng.uniform(0, 1, p_x.axes) t = np_softmax(rng.uniform(0, 1, p_t.axes), 0) check_derivative( ng.cross_entropy_multi(ng.softmax(p_x), p_t), p_x, 0.001, x, parameters=[p_t], parameter_values=[t], atol=1e-2, rtol=1e-2 )
def SparseSoftmaxCrossEntropyWithLogits(self, tf_node, inputs): """ Computes softmax cross entropy. The inputs `logits` are unscaled log probabilities, and each row of `labels[i]` must be a valid distribution. Reference: https://goo.gl/z5T2my Arguments: tf_node: NodeDef object, the tensorflow node to convert. inputs: List of ngraph Ops as inputs to this node. Returns: A ngraph Op corresponding to the tensorflow node. Inputs to tf_node: logits, labels, name """ # logits: (N1, Y1), labels: (N2,) logits, labels = inputs # check input dimension try: assert len(logits.axes) == 2 assert len(labels.axes) == 1 assert logits.axes[0].length == labels.axes[0].length except: raise NotImplementedError("logits' shape must be (Y, N), " "labels' shape must be (N,), " "other shapes not supported yet.") # get axis axis_y = logits.axes[1] # labels_one_hot: (Y2, N2) labels_one_hot = ng.one_hot(labels, axis=axis_y) # predicts: (N1, Y1) predicts = ng.softmax(logits, normalization_axes=axis_y) # dim-shuffle / cast to (Y1, N1) predicts_axes = ng.make_axes( [axis for axis in reversed(predicts.axes)]) predicts = ng.axes_with_order(predicts, axes=predicts_axes) labels_one_hot = ng.cast_axes(labels_one_hot, predicts_axes) # cross_entropy: (N1,) cross_entropy = ng.cross_entropy_multi(predicts, labels_one_hot, out_axes=(logits.axes[0], )) return cross_entropy
def test_cross_entropy_multi_axis_order(transformer_factory, input_tensor): """If y and t have different axis orders, it should give the same result""" y = input_tensor t1 = ng.placeholder(y.axes) # Reorder axes feature_axis, batch_axis = y.axes t2 = ng.placeholder(ng.make_axes([batch_axis, feature_axis])) # Set up numpy variables np_y = np.random.uniform(0, 1, y.axes.lengths) if feature_axis.length > batch_axis.length: np_t1 = np.eye(feature_axis.length)[:, :batch_axis.length] else: np_t1 = np.eye(batch_axis.length)[:feature_axis.length, :] np_t2 = np_t1.T with ExecutorFactory() as ex: f1 = ex.executor(ng.cross_entropy_multi(ng.softmax(y), t1), y, t1) f2 = ex.executor(ng.cross_entropy_multi(ng.softmax(y), t2), y, t2) out1 = f1(np_y, np_t1) out2 = f2(np_y, np_t2) ng.testing.assert_allclose(out1.ravel(), out2.ravel(), rtol=1e-5)
def LabelCrossEntropy(self, c2_op, inputs): """ Computes the cross entropy between the input and the label set. Arguments: c2_op: OperatorDef object, the caffe2 node to convert. inputs: List of ngraph Ops as inputs to this node. Returns: A ngraph Op corresponding to the caffe2 node. """ y, labels = inputs labels_one_hot = ng.one_hot(labels, axis=y.axes[1]) labels_one_hot = ng.cast_axes(labels_one_hot, [labels_one_hot.axes[0], y.axes[0]]) return ng.cross_entropy_multi(y, labels_one_hot, out_axes=y.axes[0])
def test_cross_entropy_softmax_large_input(input_tensor): p_x = input_tensor p_t = ng.placeholder(p_x.axes) cross_entropy_sm_x_t = ng.cross_entropy_multi(ng.softmax(p_x), p_t) x = np.eye(3)[np.random.choice(3, 8)].T * rng.uniform(-10, 10, p_x.axes) * 25 t = np.eye(3)[np.random.choice(3, 8)].T def f_np(x, t): return np_cross_entropy_multi(np_softmax(x, 0), t, axis=0) compare_f_at_x(cross_entropy_sm_x_t, [p_x, p_t], f_np, [x, t], atol=1e-7, rtol=1e-4)
def run_resnet_benchmark(dataset, num_iterations, n_skip, batch_size, device_id, transformer_type, device, bprop=True, batch_norm=False, visualize=False, stage_depth=1): inputs, data, train_set = get_fake_data(dataset, batch_size, num_iterations) # Running forward propagation model_out = get_mini_resnet(inputs, dataset, device, device_id, batch_norm=batch_norm, stage_depth=stage_depth) # Running back propagation if bprop: with ng.metadata(device=device, device_id=device_id, parallel=ax.N): optimizer = GradientDescentMomentum(0.01, 0.9) train_loss = ng.cross_entropy_multi( model_out, ng.one_hot(inputs['label'], axis=ax.Y)) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) batch_cost_computation_op = ng.computation(batch_cost, "all") benchmark = Benchmark(batch_cost_computation_op, train_set, inputs, transformer_type, device) Benchmark.print_benchmark_results( benchmark.time(num_iterations, n_skip, dataset + '_msra_bprop', visualize, 'device_id')) else: fprop_computation_op = ng.computation(model_out, 'all') benchmark = Benchmark(fprop_computation_op, train_set, inputs, transformer_type, device) Benchmark.print_benchmark_results( benchmark.time(num_iterations, n_skip, dataset + '_msra_fprop', visualize))
def CrossEntropyWithSoftmax(self, cntk_op, inputs): """ Computes the softmax cross entropy between the inputs[0] and inputs[1]. Arguments: inputs: List of inputs to this node. Returns: A ngraph Op. """ cast_0, cast_1 = self.cast_axes_for_compound_op(inputs) if isinstance(cast_0, ng.AssignableTensorOp): cast_1 = ng.softmax(cast_1) else: cast_0 = ng.softmax(cast_0) return ng.cross_entropy_multi(cast_0, cast_1).named(cntk_op.uid)
def cross_entropy_with_softmax(model, labels): """ Auxiliary function to add cross entropy and softmax (loss function) to imported model for training. Arguments: model - imported model labels - placeholder for one-hot labels array Returns: Loss function (mean for batch) """ if model.axes.lengths != labels.axes.lengths: model = ng.Transpose(model) assert model.axes.lengths == labels.axes.lengths model = ng.cast_axes(model, axes=labels.axes) loss = ng.cross_entropy_multi(ng.softmax(model), labels) return ng.mean(loss, out_axes=())
def test_cross_entropy_softmax(transformer_factory): N = ng.make_axis(name='N', batch=True) W = ng.make_axis(name='W') W.length = 3 N.length = 10 axes = ng.make_axes([W, N]) p_x = ng.placeholder(axes) p_t = ng.placeholder(axes) cross_entropy_sm_x_t = ng.cross_entropy_multi(ng.softmax(p_x), p_t) x = rng.uniform(0, 1, axes) t = np_softmax(rng.uniform(0, 1, axes), 0) def f_np(x, t): return np_cross_entropy_multi(np_softmax(x, 0), t, axis=0) compare_f_at_x(cross_entropy_sm_x_t, [p_x, p_t], f_np, [x, t], rtol=1e-5)
def CrossEntropyWithSoftmax(self, cntk_op, inputs): """ Computes the softmax cross entropy between the inputs[0] and inputs[1]. Arguments: cntk_op: CNTK operation to be imported. inputs: List of inputs to this node. Returns: A ngraph Op. """ cast_0, cast_1 = squeeze_axes(inputs) if cast_0.axes.lengths != cast_1.axes.lengths: cast_0 = ng.Transpose(cast_0) assert cast_0.axes.lengths == cast_1.axes.lengths cast_0 = ng.cast_axes(cast_0, axes=cast_1.axes) loss = ng.cross_entropy_multi(ng.softmax(cast_0), cast_1) return ng.mean(loss, out_axes=()).named(cntk_op.uid)
def test_cross_entropy_softmax_deriv(transformer_factory): N = ng.make_axis(name='N', batch=True) W = ng.make_axis(name='W') W.length = 3 N.length = 10 axes = ng.make_axes([W, N]) p_x = ng.placeholder(axes) p_t = ng.placeholder(axes) x = rng.uniform(0, 1, axes) t = np_softmax(rng.uniform(0, 1, axes), 0) check_derivative( ng.cross_entropy_multi(ng.softmax(p_x), p_t), p_x, 0.001, x, parameters=[p_t], parameter_values=[t], atol=1e-2, rtol=1e-2 )
def run_cifar_benchmark(n_iter=10, n_skip=5, batch_size=4, transformer_type='cpu'): inputs, data, train_set = get_fake_cifar(batch_size, n_iter) model = get_mini_resnet(inputs) optimizer = GradientDescentMomentum(0.01, 0.9) train_loss = ng.cross_entropy_multi(model(inputs['image']), ng.one_hot(inputs['label'], axis=ax.Y)) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) batch_cost_computation_op = ng.computation(batch_cost, "all") feed_dict = fill_feed_dict(train_set, inputs) benchmarks = dict() benchmarks['cifar_msra_fprop'] = run_benchmark(batch_cost_computation_op, transformer_type, feed_dict, n_skip, n_iter) print_benchmark_results(benchmarks)
def sparse_softmax_cross_entropy_with_logits(labels=None, logits=None, name=None): """ Computes softmax cross entropy. The inputs `logits` are unscaled log probabilities, and each row of `labels[i]` must be a valid distribution. Args: labels: of axis (N,) for (POS_0,) logits: of axis (N, Y) for (POS_1, POS_0) name: name of the ngraph op """ # Check input dimension # ( N, Y), ( N) # logits: (pos_1, pos_0), labels: (pos_0) try: assert len(logits.axes) == 2 assert len(labels.axes) == 1 assert logits.axes[0].length == labels.axes[0].length except: raise NotImplementedError("logits' shape must be (N, Y), " "labels' shape must be (N,), " "other shapes not supported yet.") # get axis axis_n, axis_y = logits.axes # convert labels to one-hot labels labels = ng.cast_axes(labels, ng.make_axes(axis_n)) labels = ng.one_hot(labels, axis=axis_y) labels = ng.axes_with_order(labels, axes=logits.axes) # predicts: (N, Y) predicts = ng.softmax(logits, normalization_axes=axis_y) # cross_entropy: (N) res = ng.cross_entropy_multi(predicts, labels, out_axes=(axis_n, )) return cast_to_pos_axes(res).named(name)
def run_resnet_benchmark(dataset, n_iter, n_skip, batch_size, device_id, transformer_type, device, bprop=False, visualize=False): inputs, data, train_set = get_fake_data(dataset, batch_size, n_iter) model_out = get_mini_resnet(inputs, dataset, device_id) # Running forward propagation fprop_computation_op = ng.computation(model_out, 'all') benchmark_fprop = Benchmark(fprop_computation_op, train_set, inputs, transformer_type, device) Benchmark.print_benchmark_results(benchmark_fprop.time(n_iter, n_skip, dataset + '_msra_fprop', visualize)) # Running back propagation if bprop: optimizer = GradientDescentMomentum(0.01, 0.9) train_loss = ng.cross_entropy_multi(model_out, ng.one_hot(inputs['label'], axis=ax.Y)) batch_cost = ng.sequential([optimizer(train_loss), ng.mean(train_loss, out_axes=())]) batch_cost_computation_op = ng.computation(batch_cost, "all") benchmark = Benchmark(batch_cost_computation_op, train_set, inputs, transformer_type, device) Benchmark.print_benchmark_results(benchmark.time(n_iter, n_skip, dataset + '_msra_bprop', visualize))
linear = Affine(init, activation=Softmax(), bias_init=init, axes=(ax.Y)) optimizer = RMSProp(decay_rate=0.95, learning_rate=2e-3, epsilon=1e-6, gradient_clip_value=gradient_clip_value) # build network graph one_hot_enc_out = one_hot_enc(inputs['inp_txt']) one_hot_dec_out = one_hot_dec(inputs['prev_tgt']) enc_out = enc(one_hot_enc_out) dec_out = dec(one_hot_dec_out, init_state=enc_out) output_prob = linear(dec_out) loss = ng.cross_entropy_multi(output_prob, ng.one_hot(inputs['tgt_txt'], axis=ax.Y), usebits=True) mean_cost = ng.mean(loss, out_axes=[]) updates = optimizer(loss) train_outputs = dict(batch_cost=mean_cost, updates=updates) loss_outputs = dict(cross_ent_loss=loss) # inference graph with Layer.inference_mode_on(): enc_out_inference = enc(one_hot_enc_out) # Create decoder placeholders axes = one_hot_dec_out.axes axes = axes - axes.recurrent_axis() + ng.make_axis(length=1, name="REC") decoder_input_inference = ng.placeholder(axes, name="input")
resnet = residual_network(args.stage_depth) learning_rate_policy = { 'name': 'schedule', 'schedule': [32000, 48000], 'gamma': 0.1, 'base_lr': 0.1 } optimizer = GradientDescentMomentum(learning_rate=learning_rate_policy, momentum_coef=0.9, wdecay=0.0001, iteration=inputs['iteration']) label_indices = inputs['label'] train_loss = ng.cross_entropy_multi(resnet(inputs['image']), ng.one_hot(label_indices, axis=ax.Y)) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_computation = ng.computation(batch_cost, "all") with Layer.inference_mode_on(): inference_prob = resnet(inputs['image']) errors = ng.not_equal(ng.argmax(inference_prob, out_axes=[ax.N]), label_indices) eval_loss = ng.cross_entropy_multi( inference_prob, ng.one_hot(label_indices, axis=ax.Y)) eval_loss_names = ['cross_ent_loss', 'misclass'] eval_computation = ng.computation([eval_loss, errors], "all") # Now bind the computations we are interested in
###################### # Input specification ax.C.length, ax.H.length, ax.W.length = train_set.shapes['image'] ax.D.length = 1 ax.N.length = args.batch_size ax.Y.length = 10 # placeholders with descriptive names inputs = dict(image=ng.placeholder([ax.C, ax.H, ax.W, ax.N]), label=ng.placeholder([ax.N])) optimizer = GradientDescentMomentum(0.01, 0.9) output_prob = seq1.train_outputs(inputs['image']) errors = ng.not_equal(ng.argmax(output_prob, out_axes=[ax.N]), inputs['label']) loss = ng.cross_entropy_multi(output_prob, ng.one_hot(inputs['label'], axis=ax.Y)) mean_cost = ng.mean(loss, out_axes=()) updates = optimizer(loss) train_outputs = dict(batch_cost=mean_cost, updates=updates) loss_outputs = dict(cross_ent_loss=loss, misclass_pct=errors) # Now bind the computations we are interested in transformer = ngt.make_transformer() train_computation = make_bound_computation(transformer, train_outputs, inputs) loss_computation = make_bound_computation(transformer, loss_outputs, inputs) cbs = make_default_callbacks(output_file=args.output_file, frequency=args.iter_interval, train_computation=train_computation, total_iterations=args.num_iterations,
babi.max_utt_len, babi.vocab_size, args.emb_size, args.batch_size, use_match_type=args.use_match_type, kb_ents_to_type=babi.kb_ents_to_type, kb_ents_to_cand_idxs=babi.kb_ents_to_cand_idxs, match_type_idxs=babi.match_type_idxs, nhops=args.nhops, eps=args.eps, init=GaussianInit(mean=0.0, std=0.1)) a_pred, attention = memn2n(inputs) # specify loss function, calculate loss and update weights loss = ng.cross_entropy_multi(a_pred, inputs['answer'], usebits=True) mean_cost = ng.sum(loss, out_axes=[]) optimizer = Adam(learning_rate=0.001) updates = optimizer(loss) batch_cost = ng.sequential([updates, mean_cost]) # provide outputs for bound computation train_outputs = dict(batch_cost=batch_cost, train_preds=a_pred) with Layer.inference_mode_on(): a_pred_inference, attention_inference = memn2n(inputs) eval_loss = ng.cross_entropy_multi(a_pred_inference, inputs['answer'], usebits=True)
init, activation=Tanh(), return_sequence=True, sum_out=True) # model initialization seq1 = Sequential([ layer_0, rlayer, Affine(init, activation=Softmax(), bias_init=init, axes=(ax.Y, )) ]) optimizer = RMSProp() train_prob = seq1(inputs['inp_txt']) train_loss = ng.cross_entropy_multi(train_prob, ng.one_hot(inputs['tgt_txt'], axis=ax.Y), usebits=True) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) with Layer.inference_mode_on(): inference_prob = seq1(inputs['inp_txt']) eval_loss = ng.cross_entropy_multi(inference_prob, ng.one_hot(inputs['tgt_txt'], axis=ax.Y), usebits=True) eval_outputs = dict(cross_ent_loss=eval_loss, results=inference_prob) # Now bind the computations we are interested in
train_set = ArrayIterator(wikimovies.data_dict['train'], batch_size=args.batch_size, total_iterations=num_iterations) test_set = ArrayIterator(wikimovies.data_dict['test'], batch_size=args.batch_size) inputs = train_set.make_placeholders() vocab_axis = ng.make_axis(length=wikimovies.vocab_size, name='vocab_axis') memn2n = KVMemN2N(num_iterations, args.batch_size, args.emb_size, args.nhops, wikimovies.story_length, wikimovies.memory_size, wikimovies.vocab_size, vocab_axis, args.use_v_luts) # Compute answer predictions a_pred, _ = memn2n(inputs) loss = ng.cross_entropy_multi(a_pred, ng.one_hot(inputs['answer'], axis=vocab_axis), usebits=True) mean_cost = ng.sum(loss, out_axes=[]) optimizer = Adam(learning_rate=args.lr) updates = optimizer(loss) batch_cost = ng.sequential([updates, mean_cost]) # provide outputs for bound computation train_outputs = dict(batch_cost=batch_cost, train_preds=a_pred) with Layer.inference_mode_on(): a_pred_inference, _ = memn2n(inputs)
def cost(y, t): return ng.cross_entropy_multi(y, t)
no_steps = 75 step = num_iterations // no_steps schedule = list(np.arange(step, num_iterations, step)) learning_rate_policy = { 'name': 'schedule', 'schedule': schedule, 'gamma': 0.95, 'base_lr': 0.01 } optimizer = GradientDescentMomentum(learning_rate=learning_rate_policy, iteration=inputs['iteration']) # Define the loss function (Cross entropy loss) # Note that we convert the integer values of input['y'] to one hot here fwd_prop = seq1(inputs['X']) train_loss = ng.cross_entropy_multi(fwd_prop, ng.one_hot(inputs['y'], axis=out_axis), usebits=True) # Train cost computation batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_computation = ng.computation([batch_cost, fwd_prop], "all") train_outputs = dict(batch_cost=batch_cost) # Forward prop of evaluation set # Required for correct functioning of batch norm and dropout layers during inference mode with Layer.inference_mode_on(): inference_prop = seq1(inputs['X']) eval_loss = ng.cross_entropy_multi(inference_prop, ng.one_hot(inputs['y'], axis=out_axis),
Pool2D(2, strides=2), Convolution((5, 5, 32), filter_init=init_uni, activation=Rectlin(), batch_norm=args.use_batch_norm), Pool2D(2, strides=2), Affine(nout=500, weight_init=init_uni, activation=Rectlin(), batch_norm=args.use_batch_norm), Affine(axes=ax.Y, weight_init=init_uni, activation=Softmax()) ]) optimizer = GradientDescentMomentum(0.01, 0.9) train_prob = seq1(inputs['image']) train_loss = ng.cross_entropy_multi(train_prob, ng.one_hot(inputs['label'], axis=ax.Y)) batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())]) train_outputs = dict(batch_cost=batch_cost) with Layer.inference_mode_on(): inference_prob = seq1(inputs['image']) errors = ng.not_equal(ng.argmax(inference_prob, out_axes=[ax.N]), inputs['label']) eval_loss = ng.cross_entropy_multi(inference_prob, ng.one_hot(inputs['label'], axis=ax.Y)) eval_outputs = dict(cross_ent_loss=eval_loss, misclass_pct=errors) # Now bind the computations we are interested in with closing(ngt.make_transformer()) as transformer:
def __init__(self): self.ng_computation = lambda Y, T: ng.cross_entropy_multi(Y, T)
for j in range(input_time_steps): # compute attention score for output time step i with input time step j W1_ej = ng.dot(W1, ng.slice_along_axis(enc_h_out, axis=rec_axis, idx=j)) score = ng.dot(v, ng.tanh(W1_ej + W2_di)) # u_i = v * tanh(W1 * e_j + W2 * d_i) u_i_list.append(score) output_prob = ng.softmax(ng.stack(u_i_list, axis=ax.Y, pos=0), ax.Y) u_list.append(output_prob) pointer_out = ng.stack(u_list, axis=rec_axis, pos=2) # specify loss function, calculate loss and update weights one_hot_target = ng.one_hot(inputs['tgt_txt'], axis=ax.Y) loss = ng.cross_entropy_multi(pointer_out, one_hot_target, usebits=True) mean_cost = ng.mean(loss, out_axes=[]) optimizer = RMSProp(decay_rate=0.96, learning_rate=args.lr, epsilon=1e-6, gradient_clip_value=gradient_clip_value) updates = optimizer(loss) # provide outputs for bound computation train_outputs = dict(batch_cost=mean_cost, updates=updates, pointer_out=pointer_out) # Train Loop with closing(ngt.make_transformer()) as transformer: # bind the computations train_computation = make_bound_computation(transformer, train_outputs, inputs)
args.batch_size, use_match_type=args.use_match_type, kb_ents_to_type=babi.kb_ents_to_type, kb_ents_to_cand_idxs=babi.kb_ents_to_cand_idxs, match_type_idxs=babi.match_type_idxs, nhops=args.nhops, eps=args.eps, init=GaussianInit( mean=0.0, std=0.1)) # Compute answer predictions a_pred, attention = memn2n(inputs) # specify loss function, calculate loss and update weights loss = ng.cross_entropy_multi(a_pred, inputs['answer'], usebits=True) mean_cost = ng.sum(loss, out_axes=[]) optimizer = Adam(learning_rate=args.lr) updates = optimizer(loss) batch_cost = ng.sequential([updates, mean_cost]) # provide outputs for bound computation train_outputs = dict(batch_cost=batch_cost, train_preds=a_pred) with Layer.inference_mode_on(): a_pred_inference, attention_inference = memn2n(inputs) eval_loss = ng.cross_entropy_multi( a_pred_inference, inputs['answer'], usebits=True)
# Logits logits1 = ng.cast_axes(logits_concat[0], [ax.Y, N]) logits2 = ng.cast_axes(logits_concat[1], [ax.Y, N]) # Compute loss function label1 = ng.slice_along_axis( inputs['answer'], axis=inputs['answer'].axes.feature_axes()[0], idx=0) label2 = ng.slice_along_axis( inputs['answer'], axis=inputs['answer'].axes.feature_axes()[0], idx=1) labels_concat = [label1, label2] loss1 = ng.cross_entropy_multi(logits1, ng.one_hot(label1, axis=ax.Y), usebits=False) loss2 = ng.cross_entropy_multi(logits2, ng.one_hot(label2, axis=ax.Y), usebits=False) # Total Loss train_loss = loss1 + loss2 # Set optimizer (no learning rate scheduler used) optimizer = Adam(learning_rate=2e-3) print('compiling the graph') # Cost set up batch_cost = ng.sequential( [optimizer(train_loss), ng.mean(train_loss, out_axes=())])
} optimizer = RMSProp(learning_rate=learning_rate_policy, wdecay=4e-5, decay_rate=0.9, momentum_coef=0.9, epsilon=1., iteration=inputs['iteration']) else: raise NotImplementedError("Unrecognized Optimizer") # Build the main and auxiliary loss functions y_onehot = ng.one_hot(inputs['label'], axis=ax.Y) train_prob_main = inception.seq2(inception.seq1(inputs['image'])) train_prob_main = ng.map_roles(train_prob_main, {"C": ax.Y.name}) train_loss_main = ng.cross_entropy_multi(train_prob_main, y_onehot, enable_softmax_opt=False) train_prob_aux = inception.seq_aux(inception.seq1(inputs['image'])) train_prob_aux = ng.map_roles(train_prob_aux, {"C": ax.Y.name}) train_loss_aux = ng.cross_entropy_multi(train_prob_aux, y_onehot, enable_softmax_opt=False) batch_cost = ng.sequential([ optimizer(train_loss_main + 0.4 * train_loss_aux), ng.mean(train_loss_main, out_axes=()) ]) train_computation = ng.computation([batch_cost], 'all')