def main(): print 'Starting at: {}\n'.format(datetime.now()) s_time = time.time() df = read_df(args.df_path) df = df.fillna(u'') label_tags = pickle.load(open(args.tags_file, 'rb')) print '\nloaded {} tags'.format(len(label_tags)) raw_corpus = myio.read_corpus(args.corpus_w_tags, with_tags=True) embedding_layer = create_embedding_layer( n_d=200, embs=load_embedding_iterator(args.embeddings), only_words=False if args.use_embeddings else True, # only_words will take the words from embedding file and make random initial embeddings trainable=args.trainable ) ids_corpus = myio.map_corpus(raw_corpus, embedding_layer, label_tags, max_len=args.max_seq_len) print("vocab size={}, corpus size={}\n".format(embedding_layer.n_V, len(raw_corpus))) padding_id = embedding_layer.vocab_map["<padding>"] if args.reweight: weights = myio.create_idf_weights(args.corpus_w_tags, embedding_layer, with_tags=True) if args.layer.lower() == "lstm": from models import LstmMultiTagsClassifier as Model elif args.layer.lower() in ["bilstm", "bigru"]: from models import BiRNNMultiTagsClassifier as Model elif args.layer.lower() == "cnn": from models import CnnMultiTagsClassifier as Model elif args.layer.lower() == "gru": from models import GruMultiTagsClassifier as Model else: raise Exception("no correct layer given") if args.cross_val: train, dev, test = myio.create_cross_val_batches(df, ids_corpus, args.batch_size, padding_id) else: dev = list(myio.create_batches( df, ids_corpus, 'dev', args.batch_size, padding_id, N_neg=args.n_neg, samples_file=args.samples_file)) test = list(myio.create_batches( df, ids_corpus, 'test', args.batch_size, padding_id, N_neg=args.n_neg, samples_file=args.samples_file)) # baselines_eval(train, dev, test) model = Model(args, embedding_layer, len(label_tags), weights=weights if args.reweight else None) model.ready() print 'total (non) trainable params: ', model.num_parameters() if args.load_pre_trained_part: # need to remove the old assigns to embeddings model.init_assign_ops = model.load_pre_trained_part(args.load_pre_trained_part) print '\nmodel init_assign_ops: {}\n'.format(model.init_assign_ops) model.train_model(df, ids_corpus, dev=dev, test=test) print '\nEnded at: {}'.format(datetime.now())
def main(args): raw_corpus = myio.read_corpus(args.corpus) embedding_layer = myio.create_embedding_layer( raw_corpus, n_d = args.hidden_dim, cut_off = args.cut_off, embs = load_embedding_iterator(args.embeddings) if args.embeddings else None ) ids_corpus = myio.map_corpus(raw_corpus, embedding_layer) say("vocab size={}, corpus size={}\n".format( embedding_layer.n_V, len(raw_corpus) )) padding_id = embedding_layer.vocab_map["<padding>"] bos_id = embedding_layer.vocab_map["<s>"] eos_id = embedding_layer.vocab_map["</s>"] if args.reweight: weights = myio.create_idf_weights(args.corpus, embedding_layer) if args.dev: dev = myio.read_annotations(args.dev, K_neg=20, prune_pos_cnt=-1) dev = myio.create_eval_batches(ids_corpus, dev, padding_id) if args.test: test = myio.read_annotations(args.test, K_neg=20, prune_pos_cnt=-1) test = myio.create_eval_batches(ids_corpus, test, padding_id) if args.heldout: with open(args.heldout) as fin: heldout_ids = fin.read().split() heldout_corpus = dict((id, ids_corpus[id]) for id in heldout_ids if id in ids_corpus) train_corpus = dict((id, ids_corpus[id]) for id in ids_corpus if id not in heldout_corpus) heldout = myio.create_batches(heldout_corpus, [ ], args.batch_size, padding_id, bos_id, eos_id, auto_encode=True) heldout = [ myio.create_one_batch(b1, t2, padding_id) for t1, b1, t2 in heldout ] say("heldout examples={}\n".format(len(heldout_corpus))) if args.train: model = Model(args, embedding_layer, weights=weights if args.reweight else None) start_time = time.time() train = myio.read_annotations(args.train) if not args.use_anno: train = [ ] train_batches = myio.create_batches(ids_corpus, train, args.batch_size, model.padding_id, model.bos_id, model.eos_id, auto_encode=True) say("{} to create batches\n".format(time.time()-start_time)) model.ready() model.train( ids_corpus if not args.heldout else train_corpus, train, dev if args.dev else None, test if args.test else None, heldout if args.heldout else None )
def main(args): raw_corpus = myio.read_corpus(args.corpus) embedding_layer = myio.create_embedding_layer( raw_corpus, n_d=args.hidden_dim, embs=load_embedding_iterator(args.embeddings) if args.embeddings else None) ids_corpus = myio.map_corpus(raw_corpus, embedding_layer, max_len=args.max_seq_len) say("vocab size={}, corpus size={}\n".format(embedding_layer.n_V, len(raw_corpus))) padding_id = embedding_layer.vocab_map["<padding>"] if args.reweight: weights = myio.create_idf_weights(args.corpus, embedding_layer) if args.dev: dev_raw = myio.read_annotations(args.dev, K_neg=-1, prune_pos_cnt=-1) dev = myio.create_eval_batches(ids_corpus, dev_raw, padding_id, pad_left=not args.average, merge=args.merge) if args.test: test_raw = myio.read_annotations(args.test, K_neg=-1, prune_pos_cnt=-1) test = myio.create_eval_batches(ids_corpus, test_raw, padding_id, pad_left=not args.average, merge=args.merge) if args.train: start_time = time.time() train = myio.read_annotations(args.train) train_batches = myio.create_batches(ids_corpus, train, args.batch_size, padding_id, pad_left=not args.average, merge=args.merge) say("{} to create batches\n".format(time.time() - start_time)) say("{} batches, {} tokens in total, {} triples in total\n".format( len(train_batches), sum(len(x[0].ravel()) for x in train_batches), sum(len(x[1].ravel()) for x in train_batches))) train_batches = None model = Model(args, embedding_layer, weights=weights if args.reweight else None) model.ready() # set parameters using pre-trained network if args.load_pretrain: model.encoder.load_pretrained_parameters(args) model.train(ids_corpus, train, (dev, dev_raw) if args.dev else None, (test, test_raw) if args.test else None)
def main(args): raw_corpus = myio.read_corpus(args.corpus) print("raw corpus:", args.corpus, "len:", len(raw_corpus)) embedding_layer = myio.create_embedding_layer( raw_corpus, n_d = args.hidden_dim, cut_off = args.cut_off, embs = None # embs = load_embedding_iterator(args.embeddings) if args.embeddings else None ) ids_corpus = myio.map_corpus(raw_corpus, embedding_layer, max_len=args.max_seq_len) myio.say("vocab size={}, corpus size={}\n".format( embedding_layer.n_V, len(raw_corpus) )) padding_id = embedding_layer.vocab_map["<padding>"] if args.reweight: weights = myio.create_idf_weights(args.corpus, embedding_layer) # # if args.dev: # dev = myio.read_annotations(args.dev, K_neg=-1, prune_pos_cnt=-1) # dev = myio.create_eval_batches(ids_corpus, dev, padding_id, pad_left = not args.average) # if args.test: # test = myio.read_annotations(args.test, K_neg=-1, prune_pos_cnt=-1) # test = myio.create_eval_batches(ids_corpus, test, padding_id, pad_left = not args.average) if args.train: start_time = time.time() train = myio.read_annotations(args.train) print("training data:", args.train, "len:", len(train)) train_batches = myio.create_batches(ids_corpus, train, args.batch_size, padding_id, pad_left = not args.average) myio.say("{:.2f} secs to create {} batches of size {}\n".format( (time.time()-start_time), len(train_batches), args.batch_size)) myio.say("{} batches, {} tokens in total, {} triples in total\n".format( len(train_batches), sum(len(x[0].ravel())+len(x[1].ravel()) for x in train_batches), sum(len(x[2].ravel()) for x in train_batches) )) # train_batches = None model = Model(args, embedding_layer, weights=weights if args.reweight else None) model.ready() # # set parameters using pre-trained network # if args.load_pretrain: # model.load_pretrained_parameters(args) # model.train( ids_corpus, train, dev = None, # dev if args.dev else None, test = None # test if args.test else None )
def main(args): raw_corpus = myio.read_corpus(args.corpus) embedding_layer = myio.create_embedding_layer( raw_corpus, n_d = args.hidden_dim, embs = load_embedding_iterator(args.embeddings) if args.embeddings else None ) ids_corpus = myio.map_corpus(raw_corpus, embedding_layer, max_len=args.max_seq_len) say("vocab size={}, corpus size={}\n".format( embedding_layer.n_V, len(raw_corpus) )) padding_id = embedding_layer.vocab_map["<padding>"] if args.reweight: weights = myio.create_idf_weights(args.corpus, embedding_layer) if args.dev: dev_raw = myio.read_annotations(args.dev, K_neg=-1, prune_pos_cnt=-1) dev = myio.create_eval_batches(ids_corpus, dev_raw, padding_id, pad_left=not args.average, merge=args.merge) if args.test: test_raw = myio.read_annotations(args.test, K_neg=-1, prune_pos_cnt=-1) test = myio.create_eval_batches(ids_corpus, test_raw, padding_id, pad_left=not args.average, merge=args.merge) if args.train: start_time = time.time() train = myio.read_annotations(args.train) train_batches = myio.create_batches(ids_corpus, train, args.batch_size, padding_id, pad_left = not args.average, merge=args.merge) say("{} to create batches\n".format(time.time()-start_time)) say("{} batches, {} tokens in total, {} triples in total\n".format( len(train_batches), sum(len(x[0].ravel()) for x in train_batches), sum(len(x[1].ravel()) for x in train_batches) )) train_batches = None model = Model(args, embedding_layer, weights=weights if args.reweight else None) model.ready() # set parameters using pre-trained network if args.load_pretrain: model.encoder.load_pretrained_parameters(args) model.train( ids_corpus, train, (dev, dev_raw) if args.dev else None, (test, test_raw) if args.test else None )
def main(args): raw_corpus = myio.read_corpus(args.corpus) embedding_layer = myio.create_embedding_layer( raw_corpus, n_d=args.hidden_dim, cut_off=args.cut_off, embs=load_embedding_iterator(args.embeddings) if args.embeddings else None) ids_corpus = myio.map_corpus(raw_corpus, embedding_layer) say("vocab size={}, corpus size={}\n".format(embedding_layer.n_V, len(raw_corpus))) padding_id = embedding_layer.vocab_map["<padding>"] bos_id = embedding_layer.vocab_map["<s>"] eos_id = embedding_layer.vocab_map["</s>"] if args.reweight: weights = myio.create_idf_weights(args.corpus, embedding_layer) if args.dev: dev = myio.read_annotations(args.dev, K_neg=20, prune_pos_cnt=-1) dev = myio.create_eval_batches(ids_corpus, dev, padding_id) if args.test: test = myio.read_annotations(args.test, K_neg=20, prune_pos_cnt=-1) test = myio.create_eval_batches(ids_corpus, test, padding_id) if args.heldout: with open(args.heldout) as fin: heldout_ids = fin.read().split() heldout_corpus = dict( (id, ids_corpus[id]) for id in heldout_ids if id in ids_corpus) train_corpus = dict((id, ids_corpus[id]) for id in ids_corpus if id not in heldout_corpus) heldout = myio.create_batches(heldout_corpus, [], args.batch_size, padding_id, bos_id, eos_id, auto_encode=True) heldout = [ myio.create_one_batch(b1, t2, padding_id) for t1, b1, t2 in heldout ] say("heldout examples={}\n".format(len(heldout_corpus))) if args.train: model = Model(args, embedding_layer, weights=weights if args.reweight else None) start_time = time.time() train = myio.read_annotations(args.train) if not args.use_anno: train = [] train_batches = myio.create_batches(ids_corpus, train, args.batch_size, model.padding_id, model.bos_id, model.eos_id, auto_encode=True) say("{} to create batches\n".format(time.time() - start_time)) model.ready() model.train(ids_corpus if not args.heldout else train_corpus, train, dev if args.dev else None, test if args.test else None, heldout if args.heldout else None)
def train(self, ids_corpus, train, dev=None, test=None, heldout=None): args = self.args dropout_prob = np.float64(args.dropout).astype(theano.config.floatX) batch_size = args.batch_size padding_id = self.padding_id bos_id = self.bos_id eos_id = self.eos_id #train_batches = myio.create_batches(ids_corpus, train, batch_size, padding_id, args.loss) updates, lr, gnorm = create_optimization_updates( cost=self.cost, params=self.params, lr=args.learning_rate, method=args.learning)[:3] train_func = theano.function(inputs=[self.idxs, self.idys], outputs=[self.cost, self.loss, gnorm], updates=updates) eval_func = theano.function( inputs=[self.idxs], #outputs = self.scores2 outputs=self.scores) nll_func = theano.function(inputs=[self.idxs, self.idys], outputs=[self.nll, self.mask]) say("\tp_norm: {}\n".format(self.get_pnorm_stat())) result_table = PrettyTable( ["Epoch", "dev MAP", "dev MRR", "dev P@1", "dev P@5"] + ["tst MAP", "tst MRR", "tst P@1", "tst P@5"]) unchanged = 0 best_dev = -1 dev_MAP = dev_MRR = dev_P1 = dev_P5 = 0 test_MAP = test_MRR = test_P1 = test_P5 = 0 heldout_PPL = -1 start_time = 0 max_epoch = args.max_epoch for epoch in xrange(max_epoch): unchanged += 1 if unchanged > 8: break start_time = time.time() train_batches = myio.create_batches(ids_corpus, train, batch_size, padding_id, bos_id, eos_id, auto_encode=True) N = len(train_batches) train_cost = 0.0 train_loss = 0.0 train_loss2 = 0.0 for i in xrange(N): # get current batch t1, b1, t2 = train_batches[i] if args.use_title: idxs, idys = myio.create_one_batch(t1, t2, padding_id) cur_cost, cur_loss, grad_norm = train_func(idxs, idys) train_cost += cur_cost train_loss += cur_loss train_loss2 += cur_loss / idys.shape[0] if args.use_body: idxs, idys = myio.create_one_batch(b1, t2, padding_id) cur_cost, cur_loss, grad_norm = train_func(idxs, idys) train_cost += cur_cost train_loss += cur_loss train_loss2 += cur_loss / idys.shape[0] if i % 10 == 0: say("\r{}/{}".format(i, N)) if i == N - 1: self.dropout.set_value(0.0) if dev is not None: dev_MAP, dev_MRR, dev_P1, dev_P5 = self.evaluate( dev, eval_func) if test is not None: test_MAP, test_MRR, test_P1, test_P5 = self.evaluate( test, eval_func) if heldout is not None: heldout_PPL = self.evaluate_perplexity( heldout, nll_func) if dev_MRR > best_dev: unchanged = 0 best_dev = dev_MRR result_table.add_row([epoch] + [ "%.2f" % x for x in [dev_MAP, dev_MRR, dev_P1, dev_P5] + [test_MAP, test_MRR, test_P1, test_P5] ]) if args.model: self.save_model(args.model + ".pkl.gz") dropout_p = np.float64(args.dropout).astype( theano.config.floatX) self.dropout.set_value(dropout_p) say("\r\n\n") say( ( "Epoch {}\tcost={:.3f}\tloss={:.3f} {:.3f}\t" \ +"\tMRR={:.2f},{:.2f}\tPPL={:.1f}\t|g|={:.3f}\t[{:.3f}m]\n" ).format( epoch, train_cost / (i+1), train_loss / (i+1), train_loss2 / (i+1), dev_MRR, best_dev, heldout_PPL, float(grad_norm), (time.time()-start_time)/60.0 )) say("\tp_norm: {}\n".format(self.get_pnorm_stat())) say("\n") say("{}".format(result_table)) say("\n")
def train(self, ids_corpus, train, dev=None, test=None): dropout_prob = np.float64(args.dropout).astype(theano.config.floatX) batch_size = args.batch_size padding_id = self.padding_id #train_batches = myio.create_batches(ids_corpus, train, batch_size, padding_id) updates, lr, gnorm = create_optimization_updates( cost = self.cost, params = self.params, lr = args.learning_rate, method = args.learning )[:3] train_func = theano.function( inputs = [ self.idts, self.idbs, self.idps ], outputs = [ self.cost, self.loss, gnorm ], updates = updates ) eval_func = theano.function( inputs = [ self.idts, self.idbs ], outputs = self.scores, on_unused_input='ignore' ) say("\tp_norm: {}\n".format( self.get_pnorm_stat() )) result_table = PrettyTable(["Epoch", "dev MAP", "dev MRR", "dev P@1", "dev P@5"] + ["tst MAP", "tst MRR", "tst P@1", "tst P@5"]) unchanged = 0 best_dev = -1 dev_MAP = dev_MRR = dev_P1 = dev_P5 = 0 test_MAP = test_MRR = test_P1 = test_P5 = 0 start_time = 0 max_epoch = args.max_epoch for epoch in xrange(max_epoch): unchanged += 1 if unchanged > 15: break start_time = time.time() train = myio.read_annotations(args.train) train_batches = myio.create_batches(ids_corpus, train, batch_size, padding_id, pad_left = not args.average) N =len(train_batches) train_loss = 0.0 train_cost = 0.0 for i in xrange(N): # get current batch idts, idbs, idps = train_batches[i] cur_cost, cur_loss, grad_norm = train_func(idts, idbs, idps) train_loss += cur_loss train_cost += cur_cost if i % 10 == 0: say("\r{}/{}".format(i,N)) if i == N-1: self.dropout.set_value(0.0) if dev is not None: dev_MAP, dev_MRR, dev_P1, dev_P5 = self.evaluate(dev, eval_func) if test is not None: test_MAP, test_MRR, test_P1, test_P5 = self.evaluate(test, eval_func) if dev_MRR > best_dev: unchanged = 0 best_dev = dev_MRR result_table.add_row( [ epoch ] + [ "%.2f" % x for x in [ dev_MAP, dev_MRR, dev_P1, dev_P5 ] + [ test_MAP, test_MRR, test_P1, test_P5 ] ] ) if args.save_model: self.save_model(args.save_model) dropout_p = np.float64(args.dropout).astype( theano.config.floatX) self.dropout.set_value(dropout_p) say("\r\n\n") say( ( "Epoch {}\tcost={:.3f}\tloss={:.3f}" \ +"\tMRR={:.2f},{:.2f}\t|g|={:.3f}\t[{:.3f}m]\n" ).format( epoch, train_cost / (i+1), train_loss / (i+1), dev_MRR, best_dev, float(grad_norm), (time.time()-start_time)/60.0 )) say("\tp_norm: {}\n".format( self.get_pnorm_stat() )) say("\n") say("{}".format(result_table)) say("\n")
def train_model(self, ids_corpus, train, dev=None, test=None): with tf.Session() as sess: result_table = PrettyTable([ "Epoch", "Step", "dev MAP", "dev MRR", "dev P@1", "dev P@5", "tst MAP", "tst MRR", "tst P@1", "tst P@5" ]) dev_MAP = dev_MRR = dev_P1 = dev_P5 = 0 test_MAP = test_MRR = test_P1 = test_P5 = 0 best_dev = -1 # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(self.args.learning_rate) train_op = optimizer.minimize(self.cost, global_step=global_step) print '\n\ntrainable params: ', tf.trainable_variables(), '\n\n' sess.run(tf.global_variables_initializer()) emb = sess.run(self.embeddings) print '\nemb {}\n'.format(emb[10][0:10]) if self.init_assign_ops != {}: print 'assigning trained values ...\n' sess.run(self.init_assign_ops) emb = sess.run(self.embeddings) print '\nemb {}\n'.format(emb[10][0:10]) self.init_assign_ops = {} if self.args.save_dir != "": print("Writing to {}\n".format(self.args.save_dir)) # TRAIN LOSS train_loss_writer = tf.summary.FileWriter( os.path.join(self.args.save_dir, "summaries", "train", "loss"), ) train_cost_writer = tf.summary.FileWriter( os.path.join(self.args.save_dir, "summaries", "train", "cost"), sess.graph) # VARIABLE NORM p_norm_summaries = {} p_norm_placeholders = {} for param_name, param_norm in self.get_pnorm_stat( sess).iteritems(): p_norm_placeholders[param_name] = tf.placeholder(tf.float32) p_norm_summaries[param_name] = tf.summary.scalar( param_name, p_norm_placeholders[param_name]) p_norm_summary_op = tf.summary.merge(p_norm_summaries.values()) p_norm_summary_dir = os.path.join(self.args.save_dir, "summaries", "p_norm") p_norm_summary_writer = tf.summary.FileWriter(p_norm_summary_dir, ) # DEV LOSS & EVAL dev_loss0_writer = tf.summary.FileWriter( os.path.join(self.args.save_dir, "summaries", "dev", "loss0"), ) dev_loss1_writer = tf.summary.FileWriter( os.path.join(self.args.save_dir, "summaries", "dev", "loss1"), ) dev_loss2_writer = tf.summary.FileWriter( os.path.join(self.args.save_dir, "summaries", "dev", "loss2"), ) dev_eval_writer1 = tf.summary.FileWriter( os.path.join(self.args.save_dir, "summaries", "dev", "MAP"), ) dev_eval_writer2 = tf.summary.FileWriter( os.path.join(self.args.save_dir, "summaries", "dev", "MRR"), ) dev_eval_writer3 = tf.summary.FileWriter( os.path.join(self.args.save_dir, "summaries", "dev", "Pat1"), ) dev_eval_writer4 = tf.summary.FileWriter( os.path.join(self.args.save_dir, "summaries", "dev", "Pat5"), ) loss = tf.placeholder(tf.float32) loss_summary = tf.summary.scalar("loss", loss) dev_eval = tf.placeholder(tf.float32) dev_summary = tf.summary.scalar("QR_evaluation", dev_eval) cost = tf.placeholder(tf.float32) cost_summary = tf.summary.scalar("cost", cost) # train_eval = tf.placeholder(tf.float32) # train_summary = tf.summary.scalar("QR_train", train_eval) if self.args.save_dir != "": checkpoint_dir = os.path.join(self.args.save_dir, "checkpoints") checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) patience = 8 if 'patience' not in self.args else self.args.patience unchanged = 0 max_epoch = self.args.max_epoch for epoch in xrange(max_epoch): unchanged += 1 if unchanged > patience: break train_batches = myio.create_batches(ids_corpus, train, self.args.batch_size, self.padding_id, pad_left=False) N = len(train_batches) train_loss = 0.0 train_cost = 0.0 for i in xrange(N): idts, idbs, idps, qpp = train_batches[i] cur_step, cur_loss, cur_cost = self.train_batch( idts, idbs, idps, qpp, train_op, global_step, sess) summary = sess.run(loss_summary, {loss: cur_loss}) train_loss_writer.add_summary(summary, cur_step) train_loss_writer.flush() summary = sess.run(cost_summary, {cost: cur_cost}) train_cost_writer.add_summary(summary, cur_step) train_cost_writer.flush() train_loss += cur_loss train_cost += cur_cost if i % 10 == 0: say("\r{}/{}".format(i, N)) if i == N - 1 or (i % 10 == 0 and 'testing' in self.args and self.args.testing): # EVAL if dev: dev_MAP, dev_MRR, dev_P1, dev_P5, dloss0, dloss1, dloss2 = self.evaluate( dev, sess) summary = sess.run(loss_summary, {loss: dloss0}) dev_loss0_writer.add_summary(summary, cur_step) dev_loss0_writer.flush() summary = sess.run(loss_summary, {loss: dloss1}) dev_loss1_writer.add_summary(summary, cur_step) dev_loss1_writer.flush() summary = sess.run(loss_summary, {loss: dloss2}) dev_loss2_writer.add_summary(summary, cur_step) dev_loss2_writer.flush() summary = sess.run(dev_summary, {dev_eval: dev_MAP}) dev_eval_writer1.add_summary(summary, cur_step) dev_eval_writer1.flush() summary = sess.run(dev_summary, {dev_eval: dev_MRR}) dev_eval_writer2.add_summary(summary, cur_step) dev_eval_writer2.flush() summary = sess.run(dev_summary, {dev_eval: dev_P1}) dev_eval_writer3.add_summary(summary, cur_step) dev_eval_writer3.flush() summary = sess.run(dev_summary, {dev_eval: dev_P5}) dev_eval_writer4.add_summary(summary, cur_step) dev_eval_writer4.flush() feed_dict = {} for param_name, param_norm in self.get_pnorm_stat( sess).iteritems(): feed_dict[p_norm_placeholders[ param_name]] = param_norm _p_norm_sum = sess.run(p_norm_summary_op, feed_dict) p_norm_summary_writer.add_summary( _p_norm_sum, cur_step) if test: test_MAP, test_MRR, test_P1, test_P5, tloss0, tloss1, tloss2 = self.evaluate( test, sess) if self.args.performance == "MRR" and dev_MRR > best_dev: unchanged = 0 best_dev = dev_MRR result_table.add_row([ epoch, cur_step, dev_MAP, dev_MRR, dev_P1, dev_P5, test_MAP, test_MRR, test_P1, test_P5 ]) if self.args.save_dir != "": self.save(sess, checkpoint_prefix, cur_step) elif self.args.performance == "MAP" and dev_MAP > best_dev: unchanged = 0 best_dev = dev_MAP result_table.add_row([ epoch, cur_step, dev_MAP, dev_MRR, dev_P1, dev_P5, test_MAP, test_MRR, test_P1, test_P5 ]) if self.args.save_dir != "": self.save(sess, checkpoint_prefix, cur_step) say("\r\n\nEpoch {}\tcost={:.3f}\tloss={:.3f}\tMRR={:.2f},MAP={:.2f}\n" .format( epoch, train_cost / (i + 1), # i.e. divided by N training batches train_loss / (i + 1), # i.e. divided by N training batches dev_MRR, dev_MAP)) say("\n{}\n".format(result_table)) myio.say("\tp_norm: {}\n".format( self.get_pnorm_stat(sess)))
def train(self, ids_corpus, train, dev=None, test=None, heldout=None): args = self.args dropout_prob = np.float64(args.dropout).astype(theano.config.floatX) batch_size = args.batch_size padding_id = self.padding_id bos_id = self.bos_id eos_id = self.eos_id #train_batches = myio.create_batches(ids_corpus, train, batch_size, padding_id, args.loss) updates, lr, gnorm = create_optimization_updates( cost = self.cost, params = self.params, lr = args.learning_rate, method = args.learning )[:3] train_func = theano.function( inputs = [ self.idxs, self.idys ], outputs = [ self.cost, self.loss, gnorm ], updates = updates ) eval_func = theano.function( inputs = [ self.idxs ], #outputs = self.scores2 outputs = self.scores ) nll_func = theano.function( inputs = [ self.idxs, self.idys ], outputs = [ self.nll, self.mask ] ) say("\tp_norm: {}\n".format( self.get_pnorm_stat() )) result_table = PrettyTable(["Epoch", "dev MAP", "dev MRR", "dev P@1", "dev P@5"] + ["tst MAP", "tst MRR", "tst P@1", "tst P@5"]) unchanged = 0 best_dev = -1 dev_MAP = dev_MRR = dev_P1 = dev_P5 = 0 test_MAP = test_MRR = test_P1 = test_P5 = 0 heldout_PPL = -1 start_time = 0 max_epoch = args.max_epoch for epoch in xrange(max_epoch): unchanged += 1 if unchanged > 8: break start_time = time.time() train_batches = myio.create_batches(ids_corpus, train, batch_size, padding_id, bos_id, eos_id, auto_encode=True) N =len(train_batches) train_cost = 0.0 train_loss = 0.0 train_loss2 = 0.0 for i in xrange(N): # get current batch t1, b1, t2 = train_batches[i] if args.use_title: idxs, idys = myio.create_one_batch(t1, t2, padding_id) cur_cost, cur_loss, grad_norm = train_func(idxs, idys) train_cost += cur_cost train_loss += cur_loss train_loss2 += cur_loss / idys.shape[0] if args.use_body: idxs, idys = myio.create_one_batch(b1, t2, padding_id) cur_cost, cur_loss, grad_norm = train_func(idxs, idys) train_cost += cur_cost train_loss += cur_loss train_loss2 += cur_loss / idys.shape[0] if i % 10 == 0: say("\r{}/{}".format(i,N)) if i == N-1: self.dropout.set_value(0.0) if dev is not None: dev_MAP, dev_MRR, dev_P1, dev_P5 = self.evaluate(dev, eval_func) if test is not None: test_MAP, test_MRR, test_P1, test_P5 = self.evaluate(test, eval_func) if heldout is not None: heldout_PPL = self.evaluate_perplexity(heldout, nll_func) if dev_MRR > best_dev: unchanged = 0 best_dev = dev_MRR result_table.add_row( [ epoch ] + [ "%.2f" % x for x in [ dev_MAP, dev_MRR, dev_P1, dev_P5 ] + [ test_MAP, test_MRR, test_P1, test_P5 ] ] ) if args.model: self.save_model(args.model+".pkl.gz") dropout_p = np.float64(args.dropout).astype( theano.config.floatX) self.dropout.set_value(dropout_p) say("\r\n\n") say( ( "Epoch {}\tcost={:.3f}\tloss={:.3f} {:.3f}\t" \ +"\tMRR={:.2f},{:.2f}\tPPL={:.1f}\t|g|={:.3f}\t[{:.3f}m]\n" ).format( epoch, train_cost / (i+1), train_loss / (i+1), train_loss2 / (i+1), dev_MRR, best_dev, heldout_PPL, float(grad_norm), (time.time()-start_time)/60.0 )) say("\tp_norm: {}\n".format( self.get_pnorm_stat() )) say("\n") say("{}".format(result_table)) say("\n")
def main(): print 'Starting at: {}\n'.format(datetime.now()) raw_corpus = myio.read_corpus(args.corpus) embedding_layer = create_embedding_layer( n_d=200, embs=load_embedding_iterator(args.embeddings), only_words=False if args.use_embeddings else True, trainable=args.trainable ) ids_corpus = myio.map_corpus(raw_corpus, embedding_layer, max_len=args.max_seq_len) print("vocab size={}, corpus size={}\n".format( embedding_layer.n_V, len(raw_corpus) )) padding_id = embedding_layer.vocab_map["<padding>"] if args.reweight: weights = myio.create_idf_weights(args.corpus, embedding_layer) if args.layer.lower() == "lstm": from models import LstmQR as Model elif args.layer.lower() in ["bilstm", "bigru"]: from models import BiRNNQR as Model elif args.layer.lower() == "cnn": from models import CnnQR as Model elif args.layer.lower() == "gru": from models import GruQR as Model else: raise Exception("no correct layer given") if args.dev: dev = myio.read_annotations(args.dev, K_neg=-1, prune_pos_cnt=-1) dev = myio.create_eval_batches(ids_corpus, dev, padding_id, pad_left=False) if args.test: test = myio.read_annotations(args.test, K_neg=-1, prune_pos_cnt=-1) test = myio.create_eval_batches(ids_corpus, test, padding_id, pad_left=False) model = Model(args, embedding_layer, weights=weights if args.reweight else None) model.ready() print 'total (non) trainable params: ', model.num_parameters() if args.load_pre_trained_part: # need to remove the old assigns to embeddings model.init_assign_ops = model.load_pre_trained_part(args.load_pre_trained_part) print '\nmodel init_assign_ops: {}\n'.format(model.init_assign_ops) if args.train: start_time = time.time() train = myio.read_annotations(args.train) train_batches = myio.create_batches( ids_corpus, train, args.batch_size, padding_id, pad_left=False ) print("{} to create batches\n".format(time.time()-start_time)) print("{} batches, {} tokens in total, {} triples in total\n".format( len(train_batches), sum(len(x[0].ravel())+len(x[1].ravel()) for x in train_batches), sum(len(x[2].ravel()) for x in train_batches) )) model.train_model( ids_corpus, train, dev=dev if args.dev else None, test=test if args.test else None ) print '\nEnded at: {}'.format(datetime.now())
def main(): print args set_default_rng_seed(args.seed) assert args.embedding, "Pre-trained word embeddings required." embedding_layer = myio.create_embedding_layer(args.embedding) max_len = args.max_len if args.train: train_x, train_y = myio.read_annotations(args.train) if args.debug: len_ = len(train_x) * args.debug len_ = int(len_) train_x = train_x[:len_] train_y = train_y[:len_] print 'train size: ', len(train_x) #, train_x[0], len(train_x[0]) #exit() train_x = [embedding_layer.map_to_ids(x)[:max_len] for x in train_x] if args.dev: dev_x, dev_y = myio.read_annotations(args.dev) if args.debug: len_ = len(dev_x) * args.debug len_ = int(len_) dev_x = dev_x[:len_] dev_x = dev_y[:len_] print 'train size: ', len(train_x) dev_x = [embedding_layer.map_to_ids(x)[:max_len] for x in dev_x] if args.load_rationale: rationale_data = myio.read_rationales(args.load_rationale) for x in rationale_data: x["xids"] = embedding_layer.map_to_ids(x["x"]) #print 'in main: ', args.seed if args.train: model = Model(args=args, embedding_layer=embedding_layer, nclasses=len(train_y[0])) if args.load_model: model.load_model(args.load_model, seed=args.seed, select_all=args.select_all) say("model loaded successfully.\n") else: model.ready() #say(" ready time nedded {} \n".format(time.time()-start_ready_time)) #debug_func2 = theano.function( # inputs = [ model.x, model.z ], # outputs = model.generator.logpz # ) #theano.printing.debugprint(debug_func2) #return model.train( (train_x, train_y), (dev_x, dev_y) if args.dev else None, None, #(test_x, test_y), rationale_data if args.load_rationale else None, trained_max_epochs=args.trained_max_epochs) if args.load_model and not args.dev and not args.train: model = Model(args=args, embedding_layer=embedding_layer, nclasses=-1) model.load_model(args.load_model, seed=args.seed, select_all=args.select_all) say("model loaded successfully.\n") sample_generator = theano.function( inputs=[model.x], outputs=model.z, #updates = model.generator.sample_updates ) sample_encoder = theano.function( inputs=[model.x, model.y, model.z], outputs=[ model.encoder.obj, model.encoder.loss, model.encoder.pred_diff ], #updates = model.generator.sample_updates ) # compile an evaluation function eval_func = theano.function( inputs=[model.x, model.y], outputs=[ model.z, model.encoder.obj, model.encoder.loss, model.encoder.pred_diff ], #updates = model.generator.sample_updates ) debug_func_enc = theano.function( inputs=[model.x, model.y], outputs=[ model.z, model.encoder.obj, model.encoder.loss, model.encoder.pred_diff ], #updates = model.generator.sample_updates ) debug_func_gen = theano.function( inputs=[model.x, model.y], outputs=[ model.z, model.encoder.obj, model.encoder.loss, model.encoder.pred_diff ], #updates = model.generator.sample_updates ) # compile a predictor function pred_func = theano.function( inputs=[model.x], outputs=[model.z, model.encoder.preds], #updates = model.generator.sample_updates ) # batching data padding_id = embedding_layer.vocab_map["<padding>"] if rationale_data is not None: valid_batches_x, valid_batches_y = myio.create_batches( [u["xids"] for u in rationale_data], [u["y"] for u in rationale_data], args.batch, padding_id, sort=False) # disable dropout model.dropout.set_value(0.0) if rationale_data is not None: #model.dropout.set_value(0.0) start_rational_time = time.time() r_mse, r_p1, r_prec1, r_prec2, gen_time, enc_time, prec_cal_time = model.evaluate_rationale( rationale_data, valid_batches_x, valid_batches_y, sample_generator, sample_encoder, eval_func) #valid_batches_y, eval_func) #model.dropout.set_value(dropout_prob) #say(("\ttest rationale mser={:.4f} p[1]r={:.2f} prec1={:.4f}" + # " prec2={:.4f} generator time={:.4f} encoder time={:.4f} total test time={:.4f}\n").format( # r_mse, # r_p1, # r_prec1, # r_prec2, # gen_time, # enc_time, # time.time() - start_rational_time #)) data = str('%.5f' % r_mse) + "\t" + str( '%4.2f' % r_p1) + "\t" + str('%4.4f' % r_prec1) + "\t" + str( '%4.4f' % r_prec2) + "\t" + str('%4.2f' % gen_time) + "\t" + str( '%4.2f' % enc_time) + "\t" + str( '%4.2f' % prec_cal_time) + "\t" + str( '%4.2f' % (time.time() - start_rational_time) ) + "\t" + str(args.sparsity) + "\t" + str( args.coherent) + "\t" + str( args.max_epochs) + "\t" + str( args.cur_epoch) with open(args.graph_data_path, 'a') as g_f: print 'writning to file: ', data g_f.write(data + "\n")
def train(self, train, dev, test, rationale_data): args = self.args dropout = self.dropout padding_id = self.embedding_layer.vocab_map["<padding>"] if dev is not None: dev_batches_x, dev_batches_y = myio.create_batches( dev[0], dev[1], args.batch, padding_id) if test is not None: test_batches_x, test_batches_y = myio.create_batches( test[0], test[1], args.batch, padding_id) if rationale_data is not None: valid_batches_x, valid_batches_y = myio.create_batches( [u["xids"] for u in rationale_data], [u["y"] for u in rationale_data], args.batch, padding_id, sort=False) start_time = time.time() train_batches_x, train_batches_y = myio.create_batches( train[0], train[1], args.batch, padding_id) say("{:.2f}s to create training batches\n\n".format(time.time() - start_time)) updates_e, lr_e, gnorm_e = create_optimization_updates( cost=self.generator.cost_e, params=self.encoder.params, method=args.learning, lr=args.learning_rate)[:3] updates_g, lr_g, gnorm_g = create_optimization_updates( cost=self.generator.cost, params=self.generator.params, method=args.learning, lr=args.learning_rate)[:3] sample_generator = theano.function( inputs=[self.x], outputs=self.z_pred, #updates = self.generator.sample_updates #allow_input_downcast = True ) get_loss_and_pred = theano.function( inputs=[self.x, self.z, self.y], outputs=[self.generator.loss_vec, self.encoder.preds]) eval_generator = theano.function( inputs=[self.x, self.y], outputs=[ self.z, self.generator.obj, self.generator.loss, self.encoder.pred_diff ], givens={self.z: self.generator.z_pred}, #updates = self.generator.sample_updates, #no_default_updates = True ) train_generator = theano.function( inputs = [ self.x, self.y ], outputs = [ self.generator.obj, self.generator.loss, \ self.generator.sparsity_cost, self.z, gnorm_g, gnorm_e ], givens = { self.z : self.generator.z_pred }, #updates = updates_g, updates = updates_g.items() | updates_e.items() #+ self.generator.sample_updates, #no_default_updates = True ) eval_period = args.eval_period unchanged = 0 best_dev = 1e+2 best_dev_e = 1e+2 dropout_prob = np.float64(args.dropout).astype(theano.config.floatX) for epoch in range(args.max_epochs): unchanged += 1 if unchanged > 10: return train_batches_x, train_batches_y = myio.create_batches( train[0], train[1], args.batch, padding_id) processed = 0 train_cost = 0.0 train_loss = 0.0 train_sparsity_cost = 0.0 p1 = 0.0 start_time = time.time() N = len(train_batches_x) for i in range(N): if (i + 1) % 100 == 0: say("\r{}/{} ".format(i + 1, N)) bx, by = train_batches_x[i], train_batches_y[i] mask = bx != padding_id cost, loss, sparsity_cost, bz, gl2_g, gl2_e = train_generator( bx, by) k = len(by) processed += k train_cost += cost train_loss += loss train_sparsity_cost += sparsity_cost p1 += np.sum(bz * mask) / (np.sum(mask) + 1e-8) if (i == N - 1) or (eval_period > 0 and processed / eval_period > (processed - k) / eval_period): say("\n") say(( "Generator Epoch {:.2f} costg={:.4f} scost={:.4f} lossg={:.4f} " + "p[1]={:.2f} |g|={:.4f} {:.4f}\t[{:.2f}m / {:.2f}m]\n" ).format(epoch + (i + 1.0) / N, train_cost / (i + 1), train_sparsity_cost / (i + 1), train_loss / (i + 1), p1 / (i + 1), float(gl2_g), float(gl2_e), (time.time() - start_time) / 60.0, (time.time() - start_time) / 60.0 / (i + 1) * N)) say("\t"+str([ "{:.1f}".format(np.linalg.norm(x.get_value(borrow=True))) \ for x in self.encoder.params ])+"\n") say("\t"+str([ "{:.1f}".format(np.linalg.norm(x.get_value(borrow=True))) \ for x in self.generator.params ])+"\n") if dev: self.dropout.set_value(0.0) dev_obj, dev_loss, dev_diff, dev_p1 = self.evaluate_data( dev_batches_x, dev_batches_y, eval_generator, sampling=True) if dev_obj < best_dev: best_dev = dev_obj unchanged = 0 if args.dump and rationale_data: self.dump_rationales(args.dump, valid_batches_x, valid_batches_y, get_loss_and_pred, sample_generator) if args.save_model: self.save_model(args.save_model, args) say(( "\tsampling devg={:.4f} mseg={:.4f} avg_diffg={:.4f}" + " p[1]g={:.2f} best_dev={:.4f}\n").format( dev_obj, dev_loss, dev_diff, dev_p1, best_dev)) if rationale_data is not None: r_mse, r_p1, r_prec1, r_prec2 = self.evaluate_rationale( rationale_data, valid_batches_x, valid_batches_y, eval_generator) say(( "\trationale mser={:.4f} p[1]r={:.2f} prec1={:.4f}" + " prec2={:.4f}\n").format( r_mse, r_p1, r_prec1, r_prec2)) self.dropout.set_value(dropout_prob)
def train(self, train, dev, test): args = self.args dropout = self.dropout padding_id = self.embedding_layer.vocab_map["<padding>"] if dev is not None: dev_batches_x, dev_batches_y = myio.create_batches( dev[0], dev[1], args.batch, padding_id ) if test is not None: test_batches_x, test_batches_y = myio.create_batches( test[0], test[1], args.batch, padding_id ) start_time = time.time() train_batches_x, train_batches_y = myio.create_batches( train[0], train[1], args.batch, padding_id ) say("{:.2f}s to create training batches\n\n".format( time.time()-start_time )) updates_e, lr_e, gnorm_e = create_optimization_updates( cost = self.encoder.cost_e, params = self.encoder.params, method = args.learning, lr = args.learning_rate )[:3] updates_g, lr_g, gnorm_g = create_optimization_updates( cost = self.encoder.cost_g, params = self.generator.params, method = args.learning, lr = args.learning_rate )[:3] sample_generator = theano.function( inputs = [ self.x ], outputs = self.z ) get_loss_and_pred = theano.function( inputs = [ self.x, self.y ], outputs = [ self.encoder.loss_vec, self.encoder.preds, self.z ] ) train_generator = theano.function( inputs = [ self.x, self.y ], outputs = [ self.encoder.obj, self.encoder.loss, \ self.encoder.sparsity_cost, self.z, gnorm_e, gnorm_g ], updates = updates_e.items() + updates_g.items(), ) eval_func = theano.function( inputs = [ self.x, self.y ], outputs = [ self.z, self.encoder.obj, self.true_pos, self.tot_pos, self.tot_true ] ) eval_period = args.eval_period unchanged = 0 best_dev = 1e+2 best_dev_e = 1e+2 last_train_avg_cost = None last_dev_avg_cost = None tolerance = 0.10 + 1e-3 dropout_prob = np.float64(args.dropout).astype(theano.config.floatX) for epoch in xrange(args.max_epochs): unchanged += 1 if unchanged > 50: return train_batches_x, train_batches_y = myio.create_batches( train[0], train[1], args.batch, padding_id ) more = True if args.decay_lr: param_bak = [ p.get_value(borrow=False) for p in self.params ] while more: processed = 0 train_cost = 0.0 train_loss = 0.0 train_sparsity_cost = 0.0 p1 = 0.0 start_time = time.time() N = len(train_batches_x) for i in xrange(N): if (i+1) % 100 == 0: say("\r{}/{} {:.2f} ".format(i+1,N,p1/(i+1))) bx, by = train_batches_x[i], train_batches_y[i] mask = bx != padding_id cost, loss, sparsity_cost, bz, gl2_e, gl2_g = train_generator(bx, by) k = len(by) processed += k train_cost += cost train_loss += loss train_sparsity_cost += sparsity_cost p1 += np.sum(bz*mask) / (np.sum(mask)+1e-8) cur_train_avg_cost = train_cost / N if dev: self.dropout.set_value(0.0) dev_obj, dev_prec, dev_recall, dev_f1, dev_p1 = self.evaluate_data( dev_batches_x, dev_batches_y, eval_func) self.dropout.set_value(dropout_prob) cur_dev_avg_cost = dev_obj more = False if args.decay_lr and last_train_avg_cost is not None: if cur_train_avg_cost > last_train_avg_cost*(1+tolerance): more = True say("\nTrain cost {} --> {}\n".format( last_train_avg_cost, cur_train_avg_cost )) if dev and cur_dev_avg_cost > last_dev_avg_cost*(1+tolerance): more = True say("\nDev cost {} --> {}\n".format( last_dev_avg_cost, cur_dev_avg_cost )) if more: lr_val = lr_g.get_value()*0.5 lr_val = np.float64(lr_val).astype(theano.config.floatX) lr_g.set_value(lr_val) lr_e.set_value(lr_val) say("Decrease learning rate to {}\n".format(float(lr_val))) for p, v in zip(self.params, param_bak): p.set_value(v) continue last_train_avg_cost = cur_train_avg_cost if dev: last_dev_avg_cost = cur_dev_avg_cost say("\n") say(("Generator Epoch {:.2f} costg={:.4f} scost={:.4f} lossg={:.4f} " + "p[1]={:.3f} |g|={:.4f} {:.4f}\t[{:.2f}m / {:.2f}m]\n").format( epoch+(i+1.0)/N, train_cost / N, train_sparsity_cost / N, train_loss / N, p1 / N, float(gl2_e), float(gl2_g), (time.time()-start_time)/60.0, (time.time()-start_time)/60.0/(i+1)*N )) say("\t"+str([ "{:.2f}".format(np.linalg.norm(x.get_value(borrow=True))) \ for x in self.encoder.params ])+"\n") say("\t"+str([ "{:.2f}".format(np.linalg.norm(x.get_value(borrow=True))) \ for x in self.generator.params ])+"\n") if dev: if dev_obj < best_dev: best_dev = dev_obj unchanged = 0 if args.dump and test: self.dump_rationales(args.dump, test_batches_x, test_batches_y, get_loss_and_pred, sample_generator) say(("\tdevg={:.4f} f1g={:.4f} preg={:.4f} recg={:.4f}" + " p[1]g={:.3f} best_dev={:.4f}\n").format( dev_obj, dev_f1, dev_prec, dev_recall, dev_p1, best_dev )) if test is not None: self.dropout.set_value(0.0) test_obj, test_prec, test_recall, test_f1, test_p1 = self.evaluate_data( test_batches_x, test_batches_y, eval_func) self.dropout.set_value(dropout_prob) say(("\ttestt={:.4f} f1t={:.4f} pret={:.4f} rect={:.4f}" + " p[1]t={:.3f}\n").format( test_obj, test_f1, test_prec, test_recall, test_p1 ))
def train(self, ids_corpus, train, dev=None, test=None): dropout_prob = np.float64(args.dropout).astype(theano.config.floatX) batch_size = args.batch_size padding_id = self.padding_id #train_batches = myio.create_batches(ids_corpus, train, batch_size, padding_id) if dev is not None: dev, dev_raw = dev if test is not None: test, test_raw = test if args.joint: updates_e, lr_e, gnorm_e = create_optimization_updates( cost=self.encoder.cost_e, #self.encoder.cost, params=self.encoder.params, lr=args.learning_rate * 0.1, method=args.learning)[:3] else: updates_e = {} updates_g, lr_g, gnorm_g = create_optimization_updates( cost=self.encoder.cost_g, params=self.generator.params, lr=args.learning_rate, method=args.learning)[:3] train_func = theano.function( inputs = [ self.x, self.triples, self.pairs ], outputs = [ self.encoder.obj, self.encoder.loss, \ self.encoder.sparsity_cost, self.generator.p1, gnorm_g ], # updates = updates_g.items() + updates_e.items() + self.generator.sample_updates, updates = collections.OrderedDict(list(updates_g.items()) + list(updates_e.items()) + list(self.generator.sample_updates.items())), #no_default_updates = True, on_unused_input= "ignore" ) eval_func = theano.function(inputs=[self.x], outputs=self.encoder.scores) eval_func2 = theano.function( inputs=[self.x], outputs=[self.encoder.scores_z, self.generator.p1, self.z], updates=self.generator.sample_updates, #no_default_updates = True ) say("\tp_norm: {}\n".format(self.get_pnorm_stat(self.encoder.params))) say("\tp_norm: {}\n".format(self.get_pnorm_stat( self.generator.params))) result_table = PrettyTable( ["Epoch", "dev MAP", "dev MRR", "dev P@1", "dev P@5"] + ["tst MAP", "tst MRR", "tst P@1", "tst P@5"]) last_train_avg_cost = None tolerance = 0.5 + 1e-3 unchanged = 0 best_dev = -1 dev_MAP = dev_MRR = dev_P1 = dev_P5 = 0 test_MAP = test_MRR = test_P1 = test_P5 = 0 start_time = 0 max_epoch = args.max_epoch for epoch in range(max_epoch): unchanged += 1 if unchanged > 20: break start_time = time.time() train = myio.read_annotations(args.train) train_batches = myio.create_batches(ids_corpus, train, batch_size, padding_id, pad_left=not args.average, merge=args.merge) N = len(train_batches) more = True param_bak = [p.get_value(borrow=False) for p in self.params] while more: train_loss = 0.0 train_cost = 0.0 train_scost = 0.0 train_p1 = 0.0 for i in range(N): # get current batch idts, triples, pairs = train_batches[i] cur_cost, cur_loss, cur_scost, cur_p1, gnormg = train_func( idts, triples, pairs) train_loss += cur_loss train_cost += cur_cost train_scost += cur_scost train_p1 += cur_p1 if i % 10 == 0: say("\r{}/{} {:.3f}".format(i, N, train_p1 / (i + 1))) cur_train_avg_cost = train_cost / N more = False if last_train_avg_cost is not None: if cur_train_avg_cost > last_train_avg_cost * (1 + tolerance): more = True say("\nTrain cost {} --> {}\n".format( last_train_avg_cost, cur_train_avg_cost)) if more: lr_val = lr_g.get_value() * 0.5 if lr_val < 1e-5: return lr_val = np.float64(lr_val).astype(theano.config.floatX) lr_g.set_value(lr_val) lr_e.set_value(lr_val) say("Decrease learning rate to {}\n".format(float(lr_val))) for p, v in zip(self.params, param_bak): p.set_value(v) continue last_train_avg_cost = cur_train_avg_cost say("\r\n\n") say( ( "Epoch {} cost={:.3f} loss={:.3f} scost={:.3f}" \ +" P[1]={:.3f} |g|={:.3f}\t[{:.3f}m]\n" ).format( epoch, train_cost / N, train_loss / N, train_scost / N, train_p1 / N, float(gnormg), (time.time()-start_time)/60.0 )) say("\tp_norm: {}\n".format( self.get_pnorm_stat(self.encoder.params))) say("\tp_norm: {}\n".format( self.get_pnorm_stat(self.generator.params))) self.dropout.set_value(0.0) if dev is not None: full_MAP, full_MRR, full_P1, full_P5 = self.evaluate( dev, eval_func) dev_MAP, dev_MRR, dev_P1, dev_P5, dev_PZ1, dev_PT = self.evaluate_z( dev, dev_raw, ids_corpus, eval_func2) if test is not None: test_MAP, test_MRR, test_P1, test_P5, test_PZ1, test_PT = \ self.evaluate_z(test, test_raw, ids_corpus, eval_func2) if dev_MAP > best_dev: best_dev = dev_MAP unchanged = 0 say("\n") say(" fMAP={:.2f} fMRR={:.2f} fP1={:.2f} fP5={:.2f}\n".format( full_MAP, full_MRR, full_P1, full_P5)) say("\n") say((" dMAP={:.2f} dMRR={:.2f} dP1={:.2f} dP5={:.2f}" + " dP[1]={:.3f} d%T={:.3f} best_dev={:.2f}\n").format( dev_MAP, dev_MRR, dev_P1, dev_P5, dev_PZ1, dev_PT, best_dev)) result_table.add_row([epoch] + [ "%.2f" % x for x in [dev_MAP, dev_MRR, dev_P1, dev_P5] + [test_MAP, test_MRR, test_P1, test_P5] ]) if unchanged == 0: say("\n") say((" tMAP={:.2f} tMRR={:.2f} tP1={:.2f} tP5={:.2f}" + " tP[1]={:.3f} t%T={:.3f}\n").format( test_MAP, test_MRR, test_P1, test_P5, test_PZ1, test_PT)) if args.dump_rationale: self.evaluate_z(dev + test, dev_raw + test_raw, ids_corpus, eval_func2, args.dump_rationale) #if args.save_model: # self.save_model(args.save_model) dropout_p = np.float64(args.dropout).astype( theano.config.floatX) self.dropout.set_value(dropout_p) say("\n") say("{}".format(result_table)) say("\n") if train_p1 / N <= 1e-4 or train_p1 / N + 1e-4 >= 1.0: break
def main(): assert args.embedding, "Pre-trained word embeddings required." embedding_layer = myio.create_embedding_layer(args.embedding) embedding_layer_y = myio.create_embedding_layer(args.embedding) max_len_x = args.sentence_length * args.max_sentences max_len_y = args.sentence_length_hl * args.max_sentences_hl if args.train: train_x, train_y = myio.read_docs(args.train) train_x = [embedding_layer.map_to_ids(x)[:max_len_x] for x in train_x] train_y = [ embedding_layer_y.map_to_ids(y)[:max_len_y] for y in train_y ] if args.dev: dev_x, dev_y = myio.read_docs(args.dev) dev_x = [embedding_layer.map_to_ids(x)[:max_len_x] for x in dev_x] dev_y = [embedding_layer_y.map_to_ids(y)[:max_len_y] for y in dev_y] if args.load_rationale: rationale_data = myio.read_rationales(args.load_rationale) for x in rationale_data: x["xids"] = embedding_layer.map_to_ids(x["x"]) if args.train: model = Model(args=args, embedding_layer=embedding_layer, embedding_layer_y=embedding_layer_y, nclasses=len(train_y[0])) model.ready() # debug_func2 = theano.function( # inputs = [ model.x, model.z ], # outputs = model.generator.logpz # ) # theano.printing.debugprint(debug_func2) # return model.train( (train_x, train_y), (dev_x, dev_y) if args.dev else None, None, # (test_x, test_y), rationale_data if args.load_rationale else None) if args.load_model and args.dev and not args.train: model = Model(args=None, embedding_layer=embedding_layer, nclasses=-1) model.load_model(args.load_model) say("model loaded successfully.\n") # compile an evaluation function eval_func = theano.function(inputs=[model.x, model.y], outputs=[ model.z, model.encoder.obj, model.encoder.loss, model.encoder.pred_diff ], updates=model.generator.sample_updates) # compile a predictor function pred_func = theano.function(inputs=[model.x], outputs=[model.z, model.encoder.preds], updates=model.generator.sample_updates) # batching data padding_id = embedding_layer.vocab_map["<padding>"] dev_batches_x, dev_batches_y = myio.create_batches( dev_x, dev_y, args.batch, padding_id) # disable dropout model.dropout.set_value(0.0) dev_obj, dev_loss, dev_diff, dev_p1 = model.evaluate_data( dev_batches_x, dev_batches_y, eval_func, sampling=True) say("{} {} {} {}\n".format(dev_obj, dev_loss, dev_diff, dev_p1))
def train(self, train, dev, test, rationale_data): args = self.args dropout = self.dropout padding_id = self.embedding_layer.vocab_map["<padding>"] if dev is not None: dev_batches_x, dev_batches_y, dev_batches_bv = myio.create_batches( dev[0], dev[1], args.batch, padding_id) if test is not None: test_batches_x, test_batches_y = myio.create_batches( test[0], test[1], args.batch, padding_id) if rationale_data is not None: valid_batches_x, valid_batches_y = myio.create_batches( [u["xids"] for u in rationale_data], [u["y"] for u in rationale_data], args.batch, padding_id, sort=False) # start_time = time.time() # train_batches_x, train_batches_y = myio.create_batches( # train[0], train[1], args.batch, padding_id # ) # say("{:.2f}s to create training batches\n\n".format( # time.time() - start_time # )) updates_e, lr_e, gnorm_e = create_optimization_updates( cost=self.encoder.cost_e, params=self.encoder.params, method=args.learning, beta1=args.beta1, beta2=args.beta2, lr=args.learning_rate)[:3] updates_g, lr_g, gnorm_g = create_optimization_updates( cost=self.encoder.cost_g, params=self.generator.params, method=args.learning, beta1=args.beta1, beta2=args.beta2, lr=args.learning_rate)[:3] sample_generator = theano.function( inputs=[self.x], outputs=self.z, updates=self.generator.sample_updates) # get_loss_and_pred = theano.function( # inputs=[self.x, self.y], # outputs=[self.encoder.loss_vec, self.z], # updates=self.generator.sample_updates + self.generator.sample_updates_sent # ) # eval_generator = theano.function( inputs=[self.x, self.y, self.bv], outputs=[self.z, self.encoder.obj, self.encoder.loss], updates=self.generator.sample_updates) train_generator = theano.function( inputs=[self.x, self.y, self.bv], outputs=[ self.encoder.obj, self.encoder.loss, self.encoder.sparsity_cost, self.z, gnorm_e, gnorm_g ], updates=updates_e.items() + updates_g.items() + self.generator.sample_updates) eval_period = args.eval_period unchanged = 0 best_dev = 1e+2 best_dev_e = 1e+2 last_train_avg_cost = None last_dev_avg_cost = None tolerance = 0.10 + 1e-3 dropout_prob = np.float64(args.dropout).astype(theano.config.floatX) metric_output = open( args.train_output_readable + '_METRICS' + '_sparcity_' + str(args.sparsity) + '.out', 'w+') if args.dev_baseline: ofp1 = open( args.train_output_readable + '_METRICS' + '_sparcity_' + str(args.sparsity) + '_baseline.out', 'w+') ofp2 = open( args.train_output_readable + '_sparcity_' + str(args.sparsity) + '_baseline.out', 'w+') dz = myio.convert_bv_to_z(dev_batches_bv) myio.write_train_results(dz[0], dev_batches_x[0], dev_batches_y[0], self.embedding_layer, ofp2, padding_id) myio.write_summ_for_rouge(args, dz, dev_batches_x, dev_batches_y, self.embedding_layer) myio.write_metrics(-1, -1, ofp1, -1, args) ofp1.close() ofp2.close() for epoch in xrange(args.max_epochs): read_output = open( args.train_output_readable + '_e_' + str(epoch) + '_sparcity_' + str(args.sparsity) + '.out', 'w+') total_words_per_epoch = 0 total_summaries_per_epoch = 0 unchanged += 1 if unchanged > 20: metric_output.write("PROBLEM TRAINING, NO DEV IMPROVEMENT") metric_output.close() break train_batches_x, train_batches_y, train_batches_bv = myio.create_batches( train[0], train[1], args.batch, padding_id) more = True if args.decay_lr: param_bak = [p.get_value(borrow=False) for p in self.params] while more: processed = 0 train_cost = 0.0 train_loss = 0.0 train_sparsity_cost = 0.0 p1 = 0.0 start_time = time.time() N = len(train_batches_x) for i in xrange(N): if (i + 1) % 32 == 0: say("\r{}/{} {:.2f} ".format( i + 1, N, p1 / (i + 1))) bx, by, bv = train_batches_x[i], train_batches_y[ i], train_batches_bv[i] mask = bx != padding_id cost, loss, sparsity_cost, bz, gl2_e, gl2_g = train_generator( bx, by, bv) if i % 64 == 0: self.evaluate_rnn_weights(args, epoch, i) if i % 8 == 0: myio.write_train_results(bz, bx, by, self.embedding_layer, read_output, padding_id) k = len(by) processed += k train_cost += cost train_loss += loss train_sparsity_cost += sparsity_cost p1 += np.sum(bz * mask) / (np.sum(mask) + 1e-8) total_summaries_per_epoch += args.batch total_words_per_epoch += myio.total_words(bz) cur_train_avg_cost = train_cost / N if dev: self.dropout.set_value(0.0) dev_obj, dev_loss, dev_p1, dev_v, dev_x, dev_y = self.evaluate_data( dev_batches_x, dev_batches_y, dev_batches_bv, eval_generator, sampling=True) self.dropout.set_value(dropout_prob) cur_dev_avg_cost = dev_obj myio.write_train_results(dev_v[0], dev_x[0], dev_y[0], self.embedding_layer, read_output, padding_id) myio.write_summ_for_rouge(args, dev_v, dev_x, dev_y, self.embedding_layer) myio.write_metrics(total_summaries_per_epoch, total_words_per_epoch, metric_output, epoch, args) metric_output.flush() more = False if args.decay_lr and last_train_avg_cost is not None: if cur_train_avg_cost > last_train_avg_cost * (1 + tolerance): more = True say("\nTrain cost {} --> {}\n".format( last_train_avg_cost, cur_train_avg_cost)) if dev and cur_dev_avg_cost > last_dev_avg_cost * ( 1 + tolerance): more = True say("\nDev cost {} --> {}\n".format( last_dev_avg_cost, cur_dev_avg_cost)) if more: lr_val = lr_g.get_value() * 0.5 lr_val = np.float64(lr_val).astype(theano.config.floatX) lr_g.set_value(lr_val) lr_e.set_value(lr_val) say("Decrease learning rate to {}\n".format(float(lr_val))) for p, v in zip(self.params, param_bak): p.set_value(v) continue last_train_avg_cost = cur_train_avg_cost if dev: last_dev_avg_cost = cur_dev_avg_cost say("\n") say(( "Generator Epoch {:.2f} costg={:.4f} scost={:.4f} lossg={:.4f} " + "p[1]={:.2f} |g|={:.4f} {:.4f}\t[{:.2f}m / {:.2f}m]\n" ).format(epoch + (i + 1.0) / N, train_cost / N, train_sparsity_cost / N, train_loss / N, p1 / N, float(gl2_e), float(gl2_g), (time.time() - start_time) / 60.0, (time.time() - start_time) / 60.0 / (i + 1) * N)) say("\t" + str(["{:.2f}".format(np.linalg.norm(x.get_value(borrow=True))) \ for x in self.encoder.params]) + "\n") say("\t" + str(["{:.2f}".format(np.linalg.norm(x.get_value(borrow=True))) \ for x in self.generator.params]) + "\n") if dev: if dev_obj < best_dev: best_dev = dev_obj unchanged = 0 # if args.dump and rationale_data: # self.dump_rationales(args.dump, valid_batches_x, valid_batches_y, # get_loss_and_pred, sample_generator) # # if args.save_model: # self.save_model(args.save_model, args) say(("\tsampling devg={:.4f} mseg={:.4f}" + " p[1]g={:.2f} best_dev={:.4f}\n").format( dev_obj, dev_loss, dev_p1, best_dev)) # if rationale_data is not None: # self.dropout.set_value(0.0) # r_mse, r_p1, r_prec1, r_prec2 = self.evaluate_rationale( # rationale_data, valid_batches_x, # valid_batches_y, eval_generator) # self.dropout.set_value(dropout_prob) # say(("\trationale mser={:.4f} p[1]r={:.2f} prec1={:.4f}" + # " prec2={:.4f}\n").format( # r_mse, # r_p1, # r_prec1, # r_prec2 # )) read_output.close() metric_output.close()
def train(self, train, dev, test, rationale_data): args = self.args dropout = self.dropout padding_id = self.embedding_layer.vocab_map["<padding>"] if dev is not None: dev_batches_x, dev_batches_y = myio.create_batches( dev[0], dev[1], args.batch, padding_id ) if test is not None: test_batches_x, test_batches_y = myio.create_batches( test[0], test[1], args.batch, padding_id ) if rationale_data is not None: valid_batches_x, valid_batches_y = myio.create_batches( [ u["xids"] for u in rationale_data ], [ u["y"] for u in rationale_data ], args.batch, padding_id, sort = False ) start_time = time.time() train_batches_x, train_batches_y = myio.create_batches( train[0], train[1], args.batch, padding_id ) say("{:.2f}s to create training batches\n\n".format( time.time()-start_time )) updates_e, lr_e, gnorm_e = create_optimization_updates( cost = self.generator.cost_e, params = self.encoder.params, method = args.learning, lr = args.learning_rate )[:3] updates_g, lr_g, gnorm_g = create_optimization_updates( cost = self.generator.cost, params = self.generator.params, method = args.learning, lr = args.learning_rate )[:3] sample_generator = theano.function( inputs = [ self.x ], outputs = self.z_pred, #updates = self.generator.sample_updates #allow_input_downcast = True ) get_loss_and_pred = theano.function( inputs = [ self.x, self.z, self.y ], outputs = [ self.generator.loss_vec, self.encoder.preds ] ) eval_generator = theano.function( inputs = [ self.x, self.y ], outputs = [ self.z, self.generator.obj, self.generator.loss, self.encoder.pred_diff ], givens = { self.z : self.generator.z_pred }, #updates = self.generator.sample_updates, #no_default_updates = True ) train_generator = theano.function( inputs = [ self.x, self.y ], outputs = [ self.generator.obj, self.generator.loss, \ self.generator.sparsity_cost, self.z, gnorm_g, gnorm_e ], givens = { self.z : self.generator.z_pred }, #updates = updates_g, updates = updates_g.items() + updates_e.items() #+ self.generator.sample_updates, #no_default_updates = True ) eval_period = args.eval_period unchanged = 0 best_dev = 1e+2 best_dev_e = 1e+2 dropout_prob = np.float64(args.dropout).astype(theano.config.floatX) for epoch in xrange(args.max_epochs): unchanged += 1 if unchanged > 10: return train_batches_x, train_batches_y = myio.create_batches( train[0], train[1], args.batch, padding_id ) processed = 0 train_cost = 0.0 train_loss = 0.0 train_sparsity_cost = 0.0 p1 = 0.0 start_time = time.time() N = len(train_batches_x) for i in xrange(N): if (i+1) % 100 == 0: say("\r{}/{} ".format(i+1,N)) bx, by = train_batches_x[i], train_batches_y[i] mask = bx != padding_id cost, loss, sparsity_cost, bz, gl2_g, gl2_e = train_generator(bx, by) k = len(by) processed += k train_cost += cost train_loss += loss train_sparsity_cost += sparsity_cost p1 += np.sum(bz*mask) / (np.sum(mask)+1e-8) if (i == N-1) or (eval_period > 0 and processed/eval_period > (processed-k)/eval_period): say("\n") say(("Generator Epoch {:.2f} costg={:.4f} scost={:.4f} lossg={:.4f} " + "p[1]={:.2f} |g|={:.4f} {:.4f}\t[{:.2f}m / {:.2f}m]\n").format( epoch+(i+1.0)/N, train_cost / (i+1), train_sparsity_cost / (i+1), train_loss / (i+1), p1 / (i+1), float(gl2_g), float(gl2_e), (time.time()-start_time)/60.0, (time.time()-start_time)/60.0/(i+1)*N )) say("\t"+str([ "{:.1f}".format(np.linalg.norm(x.get_value(borrow=True))) \ for x in self.encoder.params ])+"\n") say("\t"+str([ "{:.1f}".format(np.linalg.norm(x.get_value(borrow=True))) \ for x in self.generator.params ])+"\n") if dev: self.dropout.set_value(0.0) dev_obj, dev_loss, dev_diff, dev_p1 = self.evaluate_data( dev_batches_x, dev_batches_y, eval_generator, sampling=True) if dev_obj < best_dev: best_dev = dev_obj unchanged = 0 if args.dump and rationale_data: self.dump_rationales(args.dump, valid_batches_x, valid_batches_y, get_loss_and_pred, sample_generator) if args.save_model: self.save_model(args.save_model, args) say(("\tsampling devg={:.4f} mseg={:.4f} avg_diffg={:.4f}" + " p[1]g={:.2f} best_dev={:.4f}\n").format( dev_obj, dev_loss, dev_diff, dev_p1, best_dev )) if rationale_data is not None: r_mse, r_p1, r_prec1, r_prec2 = self.evaluate_rationale( rationale_data, valid_batches_x, valid_batches_y, eval_generator) say(("\trationale mser={:.4f} p[1]r={:.2f} prec1={:.4f}" + " prec2={:.4f}\n").format( r_mse, r_p1, r_prec1, r_prec2 )) self.dropout.set_value(dropout_prob)
def run( in_train_file_embedded, aspect_idx, max_train_examples, batch_size, learning_rate, in_validate_file_embedded, max_validate_examples, validate_every, sparsity, coherence, use_cuda, debug_print_training_examples, num_printed_rationales): train_d = embeddings_helper.load_embedded_data( in_filename=in_train_file_embedded, max_examples=max_train_examples, aspect_idx=aspect_idx) validate_d = embeddings_helper.load_embedded_data( in_filename=in_validate_file_embedded, max_examples=max_validate_examples, aspect_idx=aspect_idx) combined = embeddings_helper.combine_embeddings( embedding_list=[train_d['embedding'], validate_d['embedding']], idx_by_word_list=[train_d['idx_by_word'], validate_d['idx_by_word']], words_lists=[train_d['words'], validate_d['words']], x_idxes_list=[train_d['x_idxes'], validate_d['x_idxes']]) embedding = combined['embedding'] num_hidden = combined['num_hidden'] idx_by_word = combined['idx_by_word'] x_idxes_list = combined['x_idxes_list'] train_d['x_idxes'] = x_idxes_list[0] validate_d['x_idxes'] = x_idxes_list[1] words = combined['words'] # these numbers, ie -0.05 to 0.05 come from # https://github.com/taolei87/rcnn/blob/master/code/nn/initialization.py#L79 unk_idx = idx_by_word['<unk>'] pad_idx = idx_by_word['<pad>'] torch.manual_seed(123) embedding[unk_idx] = rand_uniform((num_hidden,), -0.05, 0.05) # draw validate batches now, since they should be fixed torch.manual_seed(124) validate_batches_x, validate_batches_y = myio.create_batches( x=validate_d['x_idxes'], y=validate_d['y_aspect'], batch_size=batch_size, padding_id=pad_idx) validate_num_batches = len(validate_batches_x) sample_idxes = np.random.choice( validate_num_batches * batch_size, num_printed_rationales, replace=False) sample_idxes_by_batch = defaultdict(list) for i in range(num_printed_rationales): sample_idx = sample_idxes[i] b = sample_idx // batch_size b_idx = sample_idx % batch_size sample_idxes_by_batch[b].append(b_idx) enc = Encoder(embeddings=embedding, num_layers=2) gen = Generator(embeddings=embedding, num_layers=2, pad_id=pad_idx) if use_cuda: enc.cuda() gen.cuda() embedding = embedding.cuda() params = filter(lambda p: p.requires_grad, set(enc.parameters()) | set(gen.parameters())) opt = optim.Adam(params=params, lr=learning_rate) epoch = 0 while True: batches_x, batches_y = myio.create_batches( x=train_d['x_idxes'], y=train_d['y_aspect'], batch_size=batch_size, padding_id=pad_idx) num_batches = len(batches_x) epoch_loss = 0 print(' t', end='', flush=True) epoch_start = time.time() bx_cuda_buf = torch.LongTensor(max_len, batch_size) by_cuda_buf = torch.FloatTensor(batch_size) if use_cuda: bx_cuda_buf = bx_cuda_buf.cuda() by_cuda_buf = by_cuda_buf.cuda() # by_cuda = autograd.Variable(by_cuda.cuda()) for b in range(num_batches): # print('b %s' % b) print('.', end='', flush=True) if b != 0 and b % 70 == 0: print('%s/%s' % (b, num_batches)) print(' t', end='', flush=True) gen.zero_grad() enc.zero_grad() bx = batches_x[b] by = batches_y[b] # this_seq_len = bx.size()[0] seq_len = bx.size()[0] batch_size = bx.size()[1] if debug_print_training_examples: print(rationale_helper.rationale_to_string(words, bx[0])) # print('bx.size()', bx.size()) bx_cuda = autograd.Variable(bx_cuda_buf[:seq_len, :batch_size]) by_cuda = autograd.Variable(by_cuda_buf[:batch_size]) # print('bx_cuda.size()', bx_cuda.size()) bx_cuda.data.copy_(bx) by_cuda.data.copy_(by) # if use_cuda: # if bx_cuda is None: # bx_cuda = autograd.Variable(bx.cuda()) # by_cuda = autograd.Variable(by.cuda()) # else: # bx_cuda.data.copy_(bx) # by_cuda.data.copy_(by) # print('bx.shape', bx.data.shape) rationale_selected_node, rationale_selected, rationales, rationale_lengths = gen.forward(bx_cuda) # print('rationales.shape', rationales.shape) out = enc.forward(rationales) loss_mse = ((by_cuda - out) * (by_cuda - out)).sum().sqrt() loss_z1 = rationale_lengths.sum().float() loss_transitions = (rationale_selected[1:] - rationale_selected[:-1]).abs().sum().float() loss = loss_mse + sparsity * loss_z1 + coherence * loss_transitions rationale_selected_node.reinforce(-loss.data[0]) loss.backward(rationale_selected_node) opt.step() # epoch_loss += loss.data[0] epoch_loss += loss_mse.data[0] print('%s/%s' % (num_batches, num_batches)) epoch_train_time = time.time() - epoch_start def run_validation(): # num_batches = len(batches_x) epoch_loss = 0 print(' v', end='', flush=True) # bx_cuda = None # by_cuda = None for b in range(validate_num_batches): # print('b %s' % b) print('.', end='', flush=True) if b != 0 and b % 70 == 0: print('%s/%s' % (b, validate_num_batches)) print(' v', end='', flush=True) bx = validate_batches_x[b] by = validate_batches_y[b] seq_len = bx.size()[0] batch_size = bx.size()[1] bx_cuda = autograd.Variable(bx_cuda_buf[:seq_len, :batch_size]) by_cuda = autograd.Variable(by_cuda_buf[:batch_size]) bx_cuda.data.copy_(bx) by_cuda.data.copy_(by) # if use_cuda: # bx = bx.cuda() # by = by.cuda() # if use_cuda: # if bx_cuda is None: # bx_cuda = autograd.Variable(bx.cuda()) # by_cuda = autograd.Variable(by.cuda()) # else: # bx_cuda.data.copy_(bx) # by_cuda.data.copy_(by) rationale_selected_node, rationale_selected, rationales, rationale_lengths = gen.forward(bx_cuda) out = enc.forward(rationales) loss = ((by_cuda - out) * (by_cuda - out)).sum().sqrt() # print some sample rationales... for idx in sample_idxes_by_batch[b]: # print('rationales.shape', rationales.size(), 'idx', idx) rationale = rationales[:, idx] # print('rationale.shape', rationale.size()) rationale_str = rationale_helper.rationale_to_string(words=words, rationale=rationale) print(' [%s]' % rationale_str) epoch_loss += loss.data[0] print('%s/%s' % (validate_num_batches, validate_num_batches)) return epoch_loss / validate_num_batches if (epoch + 1) % validate_every == 0: validation_loss = run_validation() print('epoch %s train loss %.3f traintime %s validate loss %.3f' % ( epoch, epoch_loss / num_batches, int(epoch_train_time), validation_loss)) # print(' validate loss %.3f' % (epoch_loss / num_batches)) else: print('epoch %s train loss %.3f traintime %s' % (epoch, epoch_loss / num_batches, int(epoch_train_time))) gc.collect() gc.collect() epoch += 1
def main(): print(args) assert args.embedding, "Pre-trained word embeddings required." embedding_layer = myio.create_embedding_layer(args.embedding) max_len = args.max_len if args.train: train_x, train_y = myio.read_annotations(args.train) train_x = [embedding_layer.map_to_ids(x)[:max_len] for x in train_x] if args.dev: dev_x, dev_y = myio.read_annotations(args.dev) dev_x = [embedding_layer.map_to_ids(x)[:max_len] for x in dev_x] if args.load_rationale: rationale_data = myio.read_rationales(args.load_rationale) for x in rationale_data: x["xids"] = embedding_layer.map_to_ids(x["x"]) if args.train: model = Model(args=args, embedding_layer=embedding_layer, nclasses=len(train_y[0])) model.ready() model.train( (train_x, train_y), (dev_x, dev_y) if args.dev else None, None, #(test_x, test_y), rationale_data if args.load_rationale else None) if args.load_model and args.dev and not args.train: model = Model(args=None, embedding_layer=embedding_layer, nclasses=-1) model.load_model(args.load_model) say("model loaded successfully.\n") # compile an evaluation function eval_func = theano.function( inputs=[model.x, model.y], outputs=[ model.z, model.generator.obj, model.generator.loss, model.encoder.pred_diff ], givens={model.z: model.generator.z_pred}, ) # compile a predictor function pred_func = theano.function( inputs=[model.x], outputs=[model.z, model.encoder.preds], givens={model.z: model.generator.z_pred}, ) # batching data padding_id = embedding_layer.vocab_map["<padding>"] dev_batches_x, dev_batches_y = myio.create_batches( dev_x, dev_y, args.batch, padding_id) # disable dropout model.dropout.set_value(0.0) dev_obj, dev_loss, dev_diff, dev_p1 = model.evaluate_data( dev_batches_x, dev_batches_y, eval_func, sampling=True) say("{} {} {} {}\n".format(dev_obj, dev_loss, dev_diff, dev_p1))
def main(): print args assert args.embedding, "Pre-trained word embeddings required." embedding_layer = myio.create_embedding_layer( args.embedding ) max_len = args.max_len if args.train: train_x, train_y = myio.read_annotations(args.train) train_x = [ embedding_layer.map_to_ids(x)[:max_len] for x in train_x ] if args.dev: dev_x, dev_y = myio.read_annotations(args.dev) dev_x = [ embedding_layer.map_to_ids(x)[:max_len] for x in dev_x ] if args.load_rationale: rationale_data = myio.read_rationales(args.load_rationale) for x in rationale_data: x["xids"] = embedding_layer.map_to_ids(x["x"]) if args.train: model = Model( args = args, embedding_layer = embedding_layer, nclasses = len(train_y[0]) ) model.ready() #debug_func2 = theano.function( # inputs = [ model.x, model.z ], # outputs = model.generator.logpz # ) #theano.printing.debugprint(debug_func2) #return model.train( (train_x, train_y), (dev_x, dev_y) if args.dev else None, None, #(test_x, test_y), rationale_data if args.load_rationale else None ) if args.load_model and args.dev and not args.train: model = Model( args = None, embedding_layer = embedding_layer, nclasses = -1 ) model.load_model(args.load_model) say("model loaded successfully.\n") # compile an evaluation function eval_func = theano.function( inputs = [ model.x, model.y ], outputs = [ model.z, model.encoder.obj, model.encoder.loss, model.encoder.pred_diff ], updates = model.generator.sample_updates ) # compile a predictor function pred_func = theano.function( inputs = [ model.x ], outputs = [ model.z, model.encoder.preds ], updates = model.generator.sample_updates ) # batching data padding_id = embedding_layer.vocab_map["<padding>"] dev_batches_x, dev_batches_y = myio.create_batches( dev_x, dev_y, args.batch, padding_id ) # disable dropout model.dropout.set_value(0.0) dev_obj, dev_loss, dev_diff, dev_p1 = model.evaluate_data( dev_batches_x, dev_batches_y, eval_func, sampling=True) say("{} {} {} {}\n".format(dev_obj, dev_loss, dev_diff, dev_p1))
def train(self, train, dev, test, rationale_data, trained_max_epochs=None): args = self.args args.trained_max_epochs = self.trained_max_epochs = trained_max_epochs dropout = self.dropout padding_id = self.embedding_layer.vocab_map["<padding>"] if dev is not None: dev_batches_x, dev_batches_y = myio.create_batches( dev[0], dev[1], args.batch, padding_id) if test is not None: test_batches_x, test_batches_y = myio.create_batches( test[0], test[1], args.batch, padding_id) if rationale_data is not None: valid_batches_x, valid_batches_y = myio.create_batches( [u["xids"] for u in rationale_data], [u["y"] for u in rationale_data], args.batch, padding_id, sort=False) start_time = time.time() train_batches_x, train_batches_y = myio.create_batches( train[0], train[1], args.batch, padding_id) say("{:.2f}s to create training batches\n\n".format(time.time() - start_time)) updates_e, lr_e, gnorm_e = create_optimization_updates( cost=self.encoder.cost_e, params=self.encoder.params, method=args.learning, beta1=args.beta1, beta2=args.beta2, lr=args.learning_rate)[:3] updates_g, lr_g, gnorm_g = create_optimization_updates( cost=self.encoder.cost_g, params=self.generator.params, method=args.learning, beta1=args.beta1, beta2=args.beta2, lr=args.learning_rate)[:3] sample_generator = theano.function( inputs=[self.x], outputs=self.z, #updates = self.generator.sample_updates ) get_loss_and_pred = theano.function( inputs=[self.x, self.y], outputs=[self.encoder.loss_vec, self.encoder.preds, self.z], #updates = self.generator.sample_updates ) eval_generator = theano.function( inputs=[self.x, self.y], outputs=[ self.z, self.encoder.obj, self.encoder.loss, self.encoder.pred_diff ], #updates = self.generator.sample_updates ) sample_generator = theano.function( inputs=[self.x], outputs=self.z, #updates = self.generator.sample_updates ) sample_encoder = theano.function( inputs=[self.x, self.y, self.z], outputs=[ self.encoder.obj, self.encoder.loss, self.encoder.pred_diff ], #updates = self.generator.sample_updates ) train_generator = theano.function( inputs = [ self.x, self.y ], outputs = [ self.encoder.obj, self.encoder.loss, \ self.encoder.sparsity_cost, self.z, self.word_embs, gnorm_e, gnorm_g ], updates = updates_e.items() + updates_g.items() #+ self.generator.sample_updates, ) eval_period = args.eval_period unchanged = 0 best_dev = 1e+2 best_dev_e = 1e+2 last_train_avg_cost = None last_dev_avg_cost = None tolerance = 0.10 + 1e-3 dropout_prob = np.float64(args.dropout).astype(theano.config.floatX) for epoch_ in xrange(args.max_epochs - 50): # -50 when max_epochs = 100 given #print(" max epochs in train func: ", args.max_epochs) epoch = args.trained_max_epochs + epoch_ unchanged += 1 if unchanged > 25: print 'dev set increases more than 25 times after the best dev found' #return train_batches_x, train_batches_y = myio.create_batches( train[0], train[1], args.batch, padding_id) more = True if args.decay_lr: param_bak = [p.get_value(borrow=False) for p in self.params] start_train_generate = time.time() more_counter = 0 while more: processed = 0 train_cost = 0.0 train_loss = 0.0 train_sparsity_cost = 0.0 p1 = 0.0 start_time = time.time() N = len(train_batches_x) #print(" begining : ", train_cost ) for i in xrange(N): if (i + 1) % 100 == 0: say("\r{}/{} {:.2f} ".format( i + 1, N, p1 / (i + 1))) bx, by = train_batches_x[i], train_batches_y[i] mask = bx != padding_id start_train_time = time.time() cost, loss, sparsity_cost, bz, emb, gl2_e, gl2_g = train_generator( bx, by) #print('gl2_g: ' , gl2_g) k = len(by) processed += k train_cost += cost train_loss += loss train_sparsity_cost += sparsity_cost p1 += np.sum(bz * mask) / (np.sum(mask) + 1e-8) cur_train_avg_cost = train_cost / N #print(" end : ", cur_train_avg_cost ) say("train generate time: {} \n".format(time.time() - start_train_generate)) if dev: self.dropout.set_value(0.0) start_dev_time = time.time() dev_obj, dev_loss, dev_diff, dev_p1 = self.evaluate_data( dev_batches_x, dev_batches_y, eval_generator, sampling=True) self.dropout.set_value(dropout_prob) say("dev evaluate data time: {} \n".format(time.time() - start_dev_time)) cur_dev_avg_cost = dev_obj more = False if args.decay_lr and last_train_avg_cost is not None: if cur_train_avg_cost > last_train_avg_cost * (1 + tolerance): more = True say("\nTrain cost {} --> {}\n".format( last_train_avg_cost, cur_train_avg_cost)) if dev and cur_dev_avg_cost > last_dev_avg_cost * ( 1 + tolerance): more = True say("\nDev cost {} --> {}\n".format( last_dev_avg_cost, cur_dev_avg_cost)) if more: more_counter += 1 if more_counter < 20: more = False if more: more_counter = 0 lr_val = lr_g.get_value() * 0.5 lr_val = np.float64(lr_val).astype(theano.config.floatX) lr_g.set_value(lr_val) lr_e.set_value(lr_val) say("Decrease learning rate to {} at epoch {}\n".format( float(lr_val), epoch_ + 1)) for p, v in zip(self.params, param_bak): #print ('param restoreing: ', p, v) p.set_value(v) continue last_train_avg_cost = cur_train_avg_cost if dev: last_dev_avg_cost = cur_dev_avg_cost say("\n") say(( "Generator Epoch {:.2f} costg={:.4f} scost={:.4f} lossg={:.4f} " + "p[1]={:.2f} |g|={:.4f} {:.4f}\t[{:.2f}m / {:.2f}m]\n" ).format(epoch + (i + 1.0) / N, train_cost / N, train_sparsity_cost / N, train_loss / N, p1 / N, float(gl2_e), float(gl2_g), (time.time() - start_time) / 60.0, (time.time() - start_time) / 60.0 / (i + 1) * N)) say("\t"+str([ "{:.2f}".format(np.linalg.norm(x.get_value(borrow=True))) \ for x in self.encoder.params ])+"\n") say("\t"+str([ "{:.2f}".format(np.linalg.norm(x.get_value(borrow=True))) \ for x in self.generator.params ])+"\n") say("total encode time = {} total geneartor time = {} \n". format(total_encode_time, total_generate_time)) if epoch_ % args.save_every == 0: #and epoch_>0: print 'saving model after epoch -', epoch_ + 1, ' file name: ', args.save_model + str( epoch_) self.save_model(args.save_model + str(epoch_), args) if dev: if dev_obj < best_dev: best_dev = dev_obj unchanged = 0 if args.dump and rationale_data: self.dump_rationales(args.dump, valid_batches_x, valid_batches_y, get_loss_and_pred, sample_generator) if args.save_model: print 'saving best model after epoch -', epoch_ + 1, ' file name: ', args.save_model self.save_model(args.save_model, args) say(( "\tsampling devg={:.4f} mseg={:.4f} avg_diffg={:.4f}" + " p[1]g={:.2f} best_dev={:.4f}\n").format( dev_obj, dev_loss, dev_diff, dev_p1, best_dev)) if rationale_data is not None: self.dropout.set_value(0.0) start_rational_time = time.time() #r_mse, r_p1, r_prec1, r_prec2 = self.evaluate_rationale( # rationale_data, valid_batches_x, # valid_batches_y, eval_generator) r_mse, r_p1, r_prec1, r_prec2, gen_time, enc_time, prec_cal_time = self.evaluate_rationale( rationale_data, valid_batches_x, valid_batches_y, sample_generator, sample_encoder, eval_generator) self.dropout.set_value(dropout_prob) say(( "\trationale mser={:.4f} p[1]r={:.2f} prec1={:.4f}" + " prec2={:.4f} time nedded for rational={}\n" ).format(r_mse, r_p1, r_prec1, r_prec2, time.time() - start_rational_time))
def main(args): raw_corpus = myio.read_corpus(args.corpus, args.translations or None, args.translatable_ids or None, args.generated_questions_train or None) generated_questions_eval = myio.read_generated_questions( args.generated_questions) embedding_layer = None if args.trainable_embeddings == 1: embedding_layer = myio.create_embedding_layer( raw_corpus, n_d=args.hidden_dim, cut_off=args.cut_off, embs=load_embedding_iterator(args.embeddings) if args.embeddings else None, fix_init_embs=False) else: embedding_layer = myio.create_embedding_layer( raw_corpus, n_d=args.hidden_dim, cut_off=args.cut_off, embs=load_embedding_iterator(args.embeddings) if args.embeddings else None) ids_corpus = myio.map_corpus(raw_corpus, embedding_layer, max_len=args.max_seq_len, generated_questions=generated_questions_eval) say("vocab size={}, corpus size={}\n".format(embedding_layer.n_V, len(raw_corpus))) padding_id = embedding_layer.vocab_map["<padding>"] if args.reweight: weights = myio.create_idf_weights(args.corpus, embedding_layer) if args.dev: # dev = myio.read_annotations(args.dev, K_neg=-1, prune_pos_cnt=-1) dev = myio.read_annotations(args.dev, K_neg=args.dev_pool_size, prune_pos_cnt=-1) dev = myio.create_eval_batches(ids_corpus, dev, padding_id, pad_left=not args.average) if args.test: test = myio.read_annotations(args.test, K_neg=-1, prune_pos_cnt=-1) test = myio.create_eval_batches(ids_corpus, test, padding_id, pad_left=not args.average) if args.train: start_time = time.time() train = myio.read_annotations( args.train, training_data_percent=args.training_data_percent) train_batches = myio.create_batches(ids_corpus, train, args.batch_size, padding_id, pad_left=not args.average, include_generated_questions=True) say("{} to create batches\n".format(time.time() - start_time)) say("{} batches, {} tokens in total, {} triples in total\n".format( len(train_batches), sum(len(x[0].ravel()) + len(x[1].ravel()) for x in train_batches), sum(len(x[2].ravel()) for x in train_batches))) train_batches = None model = Model(args, embedding_layer, weights=weights if args.reweight else None) # print('args.average: '+args.average) model.ready() # # # set parameters using pre-trained network if args.do_train == 1: if args.load_pretrain: model.load_pretrained_parameters(args) model.train(ids_corpus, train, dev if args.dev else None, test if args.test else None) # AVERAGE THE PREDICTIONS OBTAINED BY RUNNING THE MODEL 10 TIMES if args.do_evaluate == 1: model.load_pretrained_parameters(args) # model.set_model(model.load_model(args.load_pretrain)) for i in range(1): r = model.just_eval(dev if args.dev else None, test if args.test else None) # ANALYZE the results if len(args.analyze_file.strip()) > 0: model.load_pretrained_parameters(args) file_name = args.analyze_file.strip( ) # 'AskUbuntu.Rcnn_analysis3.gt(es)-gt.txt' model.analyze(file_name, embedding_layer, dev)
def train(self, ids_corpus, train, dev=None, test=None): dropout_prob = np.float64(args.dropout).astype(theano.config.floatX) batch_size = args.batch_size padding_id = self.padding_id #train_batches = myio.create_batches(ids_corpus, train, batch_size, padding_id) updates, lr, gnorm = create_optimization_updates( cost=self.cost, params=self.params, lr=args.learning_rate, method=args.learning)[:3] train_func = theano.function(inputs=[self.idts, self.idbs, self.idps], outputs=[self.cost, self.loss, gnorm], updates=updates) eval_func = theano.function(inputs=[self.idts, self.idbs], outputs=self.scores, on_unused_input='ignore') say("\tp_norm: {}\n".format(self.get_pnorm_stat())) result_table = PrettyTable( ["Epoch", "dev MAP", "dev MRR", "dev P@1", "dev P@5"] + ["tst MAP", "tst MRR", "tst P@1", "tst P@5"]) unchanged = 0 best_dev = -1 dev_MAP = dev_MRR = dev_P1 = dev_P5 = 0 test_MAP = test_MRR = test_P1 = test_P5 = 0 start_time = 0 max_epoch = args.max_epoch for epoch in xrange(max_epoch): unchanged += 1 if unchanged > 15: break start_time = time.time() train = myio.read_annotations(args.train) train_batches = myio.create_batches(ids_corpus, train, batch_size, padding_id, pad_left=not args.average) N = len(train_batches) train_loss = 0.0 train_cost = 0.0 for i in xrange(N): # get current batch idts, idbs, idps = train_batches[i] cur_cost, cur_loss, grad_norm = train_func(idts, idbs, idps) train_loss += cur_loss train_cost += cur_cost if i % 10 == 0: say("\r{}/{}".format(i, N)) if i == N - 1: self.dropout.set_value(0.0) if dev is not None: dev_MAP, dev_MRR, dev_P1, dev_P5 = self.evaluate( dev, eval_func) if test is not None: test_MAP, test_MRR, test_P1, test_P5 = self.evaluate( test, eval_func) if dev_MRR > best_dev: unchanged = 0 best_dev = dev_MRR result_table.add_row([epoch] + [ "%.2f" % x for x in [dev_MAP, dev_MRR, dev_P1, dev_P5] + [test_MAP, test_MRR, test_P1, test_P5] ]) if args.save_model: self.save_model(args.save_model) dropout_p = np.float64(args.dropout).astype( theano.config.floatX) self.dropout.set_value(dropout_p) say("\r\n\n") say( ( "Epoch {}\tcost={:.3f}\tloss={:.3f}" \ +"\tMRR={:.2f},{:.2f}\t|g|={:.3f}\t[{:.3f}m]\n" ).format( epoch, train_cost / (i+1), train_loss / (i+1), dev_MRR, best_dev, float(grad_norm), (time.time()-start_time)/60.0 )) say("\tp_norm: {}\n".format(self.get_pnorm_stat())) say("\n") say("{}".format(result_table)) say("\n")
def train(self, ids_corpus, train, dev=None, test=None): dropout_prob = np.float64(args.dropout).astype(theano.config.floatX) batch_size = args.batch_size padding_id = self.padding_id #train_batches = myio.create_batches(ids_corpus, train, batch_size, padding_id) if dev is not None: dev, dev_raw = dev if test is not None: test, test_raw = test if args.joint: updates_e, lr_e, gnorm_e = create_optimization_updates( cost = self.encoder.cost_e, #self.encoder.cost, params = self.encoder.params, lr = args.learning_rate*0.1, method = args.learning )[:3] else: updates_e = {} updates_g, lr_g, gnorm_g = create_optimization_updates( cost = self.encoder.cost_g, params = self.generator.params, lr = args.learning_rate, method = args.learning )[:3] train_func = theano.function( inputs = [ self.x, self.triples, self.pairs ], outputs = [ self.encoder.obj, self.encoder.loss, \ self.encoder.sparsity_cost, self.generator.p1, gnorm_g ], updates = updates_g.items() + updates_e.items() + self.generator.sample_updates, #no_default_updates = True, on_unused_input= "ignore" ) eval_func = theano.function( inputs = [ self.x ], outputs = self.encoder.scores ) eval_func2 = theano.function( inputs = [ self.x ], outputs = [ self.encoder.scores_z, self.generator.p1, self.z ], updates = self.generator.sample_updates, #no_default_updates = True ) say("\tp_norm: {}\n".format( self.get_pnorm_stat(self.encoder.params) )) say("\tp_norm: {}\n".format( self.get_pnorm_stat(self.generator.params) )) result_table = PrettyTable(["Epoch", "dev MAP", "dev MRR", "dev P@1", "dev P@5"] + ["tst MAP", "tst MRR", "tst P@1", "tst P@5"]) last_train_avg_cost = None tolerance = 0.5 + 1e-3 unchanged = 0 best_dev = -1 dev_MAP = dev_MRR = dev_P1 = dev_P5 = 0 test_MAP = test_MRR = test_P1 = test_P5 = 0 start_time = 0 max_epoch = args.max_epoch for epoch in xrange(max_epoch): unchanged += 1 if unchanged > 20: break start_time = time.time() train = myio.read_annotations(args.train) train_batches = myio.create_batches(ids_corpus, train, batch_size, padding_id, pad_left=not args.average, merge=args.merge) N =len(train_batches) more = True param_bak = [ p.get_value(borrow=False) for p in self.params ] while more: train_loss = 0.0 train_cost = 0.0 train_scost = 0.0 train_p1 = 0.0 for i in xrange(N): # get current batch idts, triples, pairs = train_batches[i] cur_cost, cur_loss, cur_scost, cur_p1, gnormg = train_func(idts, triples, pairs) train_loss += cur_loss train_cost += cur_cost train_scost += cur_scost train_p1 += cur_p1 if i % 10 == 0: say("\r{}/{} {:.3f}".format(i,N,train_p1/(i+1))) cur_train_avg_cost = train_cost / N more = False if last_train_avg_cost is not None: if cur_train_avg_cost > last_train_avg_cost*(1+tolerance): more = True say("\nTrain cost {} --> {}\n".format( last_train_avg_cost, cur_train_avg_cost )) if more: lr_val = lr_g.get_value()*0.5 if lr_val < 1e-5: return lr_val = np.float64(lr_val).astype(theano.config.floatX) lr_g.set_value(lr_val) lr_e.set_value(lr_val) say("Decrease learning rate to {}\n".format(float(lr_val))) for p, v in zip(self.params, param_bak): p.set_value(v) continue last_train_avg_cost = cur_train_avg_cost say("\r\n\n") say( ( "Epoch {} cost={:.3f} loss={:.3f} scost={:.3f}" \ +" P[1]={:.3f} |g|={:.3f}\t[{:.3f}m]\n" ).format( epoch, train_cost / N, train_loss / N, train_scost / N, train_p1 / N, float(gnormg), (time.time()-start_time)/60.0 )) say("\tp_norm: {}\n".format( self.get_pnorm_stat(self.encoder.params) )) say("\tp_norm: {}\n".format( self.get_pnorm_stat(self.generator.params) )) self.dropout.set_value(0.0) if dev is not None: full_MAP, full_MRR, full_P1, full_P5 = self.evaluate(dev, eval_func) dev_MAP, dev_MRR, dev_P1, dev_P5, dev_PZ1, dev_PT = self.evaluate_z(dev, dev_raw, ids_corpus, eval_func2) if test is not None: test_MAP, test_MRR, test_P1, test_P5, test_PZ1, test_PT = \ self.evaluate_z(test, test_raw, ids_corpus, eval_func2) if dev_MAP > best_dev: best_dev = dev_MAP unchanged = 0 say("\n") say(" fMAP={:.2f} fMRR={:.2f} fP1={:.2f} fP5={:.2f}\n".format( full_MAP, full_MRR, full_P1, full_P5 )) say("\n") say((" dMAP={:.2f} dMRR={:.2f} dP1={:.2f} dP5={:.2f}" + " dP[1]={:.3f} d%T={:.3f} best_dev={:.2f}\n").format( dev_MAP, dev_MRR, dev_P1, dev_P5, dev_PZ1, dev_PT, best_dev )) result_table.add_row( [ epoch ] + [ "%.2f" % x for x in [ dev_MAP, dev_MRR, dev_P1, dev_P5 ] + [ test_MAP, test_MRR, test_P1, test_P5 ] ] ) if unchanged == 0: say("\n") say((" tMAP={:.2f} tMRR={:.2f} tP1={:.2f} tP5={:.2f}" + " tP[1]={:.3f} t%T={:.3f}\n").format( test_MAP, test_MRR, test_P1, test_P5, test_PZ1, test_PT )) if args.dump_rationale: self.evaluate_z(dev+test, dev_raw+test_raw, ids_corpus, eval_func2, args.dump_rationale) #if args.save_model: # self.save_model(args.save_model) dropout_p = np.float64(args.dropout).astype( theano.config.floatX) self.dropout.set_value(dropout_p) say("\n") say("{}".format(result_table)) say("\n") if train_p1/N <= 1e-4 or train_p1/N+1e-4 >= 1.0: break