return_sequence=False) one_hot_dec = Preprocess(functor=expand_onehot) dec = Recurrent(hidden_size, init, activation=Tanh(), reset_cells=True, return_sequence=True) linear = Affine(init, activation=Softmax(), bias_init=init, axes=(ax.Y)) optimizer = RMSProp(decay_rate=0.95, learning_rate=2e-3, epsilon=1e-6, gradient_clip_value=gradient_clip_value) # build network graph one_hot_enc_out = one_hot_enc.train_outputs(inputs['inp_txt']) one_hot_dec_out = one_hot_dec.train_outputs(inputs['prev_tgt']) enc_out = enc.train_outputs(one_hot_enc_out) dec_out = dec.train_outputs(one_hot_dec_out, init_state=enc_out) output_prob = linear.train_outputs(dec_out) loss = ng.cross_entropy_multi(output_prob, ng.one_hot(inputs['tgt_txt'], axis=ax.Y), usebits=True) mean_cost = ng.mean(loss, out_axes=[]) updates = optimizer(loss) train_outputs = dict(batch_cost=mean_cost, updates=updates) loss_outputs = dict(cross_ent_loss=loss) ######################
ax.Y.length = len(tree_bank_data.vocab) ax.REC.length = time_steps ax.N.length = args.batch_size # placeholders with descriptive names inputs = dict(inp_enc=ng.placeholder([ax.REC, ax.N]), inp_dec=ng.placeholder([ax.REC, ax.N]), tgt=ng.placeholder([ax.REC, ax.N])) optimizer = RMSProp(decay_rate=0.95, learning_rate=2e-3, epsilon=1e-6, gradient_clip_value=gradient_clip_value) # build network graph one_hot_enc_out = one_hot_enc.train_outputs(inputs['inp_enc']) one_hot_dec_out = one_hot_dec.train_outputs(inputs['inp_dec']) enc_out = enc.train_outputs(one_hot_enc_out) dec_out = dec.train_outputs(one_hot_dec_out, init_state=enc_out) output_prob = linear.train_outputs(dec_out) loss = ng.cross_entropy_multi(output_prob, ng.one_hot(inputs['tgt'], axis=ax.Y), usebits=True) mean_cost = ng.mean(loss, out_axes=[]) updates = optimizer(loss) train_outputs = dict(batch_cost=mean_cost, updates=updates) loss_outputs = dict(cross_ent_loss=loss) # Now bind the computations we are interested in