def train(args): source_vocab = Vocab(args.source, args.vocab) target_vocab = Vocab(args.target, args.vocab) att_encdec = ABED(args.vocab, args.hidden_size, args.maxout_hidden_size, args.embed_size) if args.use_gpu: att_encdec.to_gpu() if args.source_validation: if os.path.exists(PLOT_DIR) == False: os.mkdir(PLOT_DIR) fp_loss = open(PLOT_DIR + "loss", "w") fp_loss_val = open(PLOT_DIR + "loss_val", "w") opt = optimizers.AdaDelta(args.rho, args.eps) opt.setup(att_encdec) opt.add_hook(optimizer.WeightDecay(DECAY_COEFF)) opt.add_hook(optimizer.GradientClipping(CLIP_THR)) for epoch in xrange(args.epochs): print "--- epoch: %s/%s ---" % (epoch + 1, args.epochs) source_gen = word_list(args.source) target_gen = word_list(args.target) batch_gen = batch(sort(source_gen, target_gen, 100 * args.minibatch), args.minibatch) n = 0 total_loss = 0.0 for source_batch, target_batch in batch_gen: n += len(source_batch) source_batch = fill_batch_end(source_batch) target_batch = fill_batch_end(target_batch) hyp_batch, loss = forward(source_batch, target_batch, source_vocab, target_vocab, att_encdec, True, 0) total_loss += loss.data * len(source_batch) closed_test(source_batch, target_batch, hyp_batch) loss.backward() opt.update() print "[n=%s]" % (n) print "[total=%s]" % (n) prefix = args.model_path + '%s' % (epoch + 1) serializers.save_hdf5(prefix + '.attencdec', att_encdec) if args.source_validation: total_loss_val, n_val = validation_test(args, att_encdec, source_vocab, target_vocab) fp_loss.write("\t".join([str(epoch), str(total_loss / n) + "\n"])) fp_loss_val.write("\t".join( [str(epoch), str(total_loss_val / n_val) + "\n"])) fp_loss.flush() fp_loss_val.flush() hyp_params = att_encdec.get_hyper_params() Backup.dump(hyp_params, args.model_path + HPARAM_NAME) source_vocab.save(args.model_path + SRC_VOCAB_NAME) target_vocab.save(args.model_path + TAR_VOCAB_NAME) hyp_params = att_encdec.get_hyper_params() Backup.dump(hyp_params, args.model_path + HPARAM_NAME) source_vocab.save(args.model_path + SRC_VOCAB_NAME) target_vocab.save(args.model_path + TAR_VOCAB_NAME) if args.source_validation: fp_loss.close() fp_loss_val.close()
def train(args): source_vocab = Vocab(args.source, args.vocab) target_vocab = Vocab(args.target, args.vocab) att_encdec = ABED(args.vocab, args.hidden_size, args.maxout_hidden_size, args.embed_size) if args.use_gpu: att_encdec.to_gpu() if args.source_validation: if os.path.exists(PLOT_DIR)==False: os.mkdir(PLOT_DIR) fp_loss = open(PLOT_DIR+"loss", "w") fp_loss_val = open(PLOT_DIR+"loss_val", "w") opt = optimizers.AdaDelta(args.rho, args.eps) opt.setup(att_encdec) opt.add_hook(optimizer.WeightDecay(DECAY_COEFF)) opt.add_hook(optimizer.GradientClipping(CLIP_THR)) for epoch in xrange(args.epochs): print "--- epoch: %s/%s ---"%(epoch+1, args.epochs) source_gen = word_list(args.source) target_gen = word_list(args.target) batch_gen = batch(sort(source_gen, target_gen, 100*args.minibatch), args.minibatch) n = 0 total_loss = 0.0 for source_batch, target_batch in batch_gen: n += len(source_batch) source_batch = fill_batch_end(source_batch) target_batch = fill_batch_end(target_batch) hyp_batch, loss = forward(source_batch, target_batch, source_vocab, target_vocab, att_encdec, True, 0) total_loss += loss.data*len(source_batch) closed_test(source_batch, target_batch, hyp_batch) loss.backward() opt.update() print "[n=%s]"%(n) print "[total=%s]"%(n) prefix = args.model_path + '%s'%(epoch+1) serializers.save_hdf5(prefix+'.attencdec', att_encdec) if args.source_validation: total_loss_val, n_val = validation_test(args, att_encdec, source_vocab, target_vocab) fp_loss.write("\t".join([str(epoch), str(total_loss/n)+"\n"])) fp_loss_val.write("\t".join([str(epoch), str(total_loss_val/n_val)+"\n"])) fp_loss.flush() fp_loss_val.flush() hyp_params = att_encdec.get_hyper_params() Backup.dump(hyp_params, args.model_path+HPARAM_NAME) source_vocab.save(args.model_path+SRC_VOCAB_NAME) target_vocab.save(args.model_path+TAR_VOCAB_NAME) hyp_params = att_encdec.get_hyper_params() Backup.dump(hyp_params, args.model_path+HPARAM_NAME) source_vocab.save(args.model_path+SRC_VOCAB_NAME) target_vocab.save(args.model_path+TAR_VOCAB_NAME) if args.source_validation: fp_loss.close() fp_loss_val.close()