def transform_task(task_id): _train, _valid, _test, old_dic = parse(c.data_path, task_id) new_train = copy.deepcopy(_train) new_valid = copy.deepcopy(_valid) new_test = copy.deepcopy(_test) for key in sorted(list(old_dic.keys())): for i in [0, 2, 3]: new_train[i][_train[i] == old_dic[key]] = word2id[key] new_valid[i][_valid[i] == old_dic[key]] = word2id[key] new_test[i][_test[i] == old_dic[key]] = word2id[key] def pad(A): out = [] _maxS = 130 _maxW = 12 out.append(np.zeros((A[0].shape[0], _maxS, _maxW))) out[0][:, :A[0].shape[1], :A[0].shape[2]] = A[0] out.append(A[1]) out.append(np.zeros((A[2].shape[0], _maxW))) out[2][:, :A[2].shape[1]] = A[2] out.append(A[3]) return out sets = [] sets.append(pad(new_train)) sets.append(pad(new_valid)) sets.append(pad(new_test)) sets.append(word2id) return sets
import time import sys import os from preprocessor.reader import parse from lib import * ###### Hyper Parameters ------------------ c = types.SimpleNamespace() # user input c.task_id = int(sys.argv[1]) c.log_keyword = str(sys.argv[2]) # data loading (necessary for task specific symbol_size parameter) c.data_path = "tasks/en-valid" + "-10k" raw_train, raw_valid, raw_test, word2id = parse(c.data_path, c.task_id) id2word = {word2id[k]: k for k in word2id.keys()} c.vocab_size = len(word2id) # model parameters c.symbol_size = c.vocab_size c.entity_size = 90 c.hidden_size = 40 c.role_size = 20 c.init_limit = 0.10 c.LN = True # optimizer c.learning_rate = 0.001 c.beta1 = 0.9 c.beta2 = 0.999
def main(load=True): # Get Vectorized Forms of Stories, Questions, and Answers train, test, val = parse() train_text_arr, train_all_labels, train_mask_arr, labels_embedding, adj_m = train val_text_arr, val_all_labels, val_mask_arr, _, _ = val test_text_arr, test_all_labels, test_mask_arr, _, _ = test adj_bias = adj_to_bias(adj_m, adj_m.shape[0], nhood=1) labels_embedding = labels_embedding[np.newaxis] adj_bias = adj_bias[np.newaxis] # Setup Checkpoint + Log Paths ckpt_dir = "./checkpoints/" if not os.path.exists(ckpt_dir): os.mkdir(ckpt_dir) # Build Model with tf.Session() as sess: # Instantiate Model entity_net = EntityNetwork( metadata['vocab_size'], metadata['max_sentence_length'], FLAGS.batch_size, FLAGS.memory_slots, FLAGS.embedding_size, metadata['mask_dim'], metadata['labels_dim'], FLAGS.learning_rate, FLAGS.decay_epochs * (metadata['dataset_size'] / FLAGS.batch_size), FLAGS.decay_rate) # Initialize Saver saver = tf.train.Saver() # Initialize all Variables if os.path.exists(ckpt_dir + "checkpoint") and load == True: print('Restoring Variables from Checkpoint!') saver.restore(sess, tf.train.latest_checkpoint(ckpt_dir)) with open(ckpt_dir + "training_logs.pik", 'rb') as f: train_loss_history, train_metric, val_loss_history, val_metric = pickle.load( f) else: print('Initializing Variables!') sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) train_loss_history, train_metric_history, val_loss_history, val_metric_history = {}, {}, {}, {} # Get Current Epoch curr_epoch = sess.run(entity_net.epoch_step) # Start Training Loop n, test_n, val_n, bsz, best_val_loss = train_text_arr.shape[ 0], test_text_arr.shape[0], val_text_arr.shape[ 0], FLAGS.batch_size, np.inf eval_bsz = FLAGS.eval_batch_size best_val_epoch = -1 for epoch in range(curr_epoch, FLAGS.num_epochs): train_loss, y_true, y_pred = [], [], [] for batch_i, (start, end) in enumerate( tqdm(zip(range(0, n, bsz), range(bsz, n, bsz)), ncols=80)): # generate mask index mask = np.reshape(np.array(train_mask_arr[start:end]), -1) mask_index = np.array( [idx for idx, val in enumerate(mask) if val == 1]) labels_unrolled = np.reshape(train_all_labels[start:end], (-1, metadata['labels_dim'])) curr_loss, ground_truth, logits, _ = sess.run( [ entity_net.loss_val, entity_net.ground_truth, tf.nn.sigmoid(entity_net.logits), entity_net.train_op ], # = [] feed_dict={ entity_net.S: train_text_arr[start:end], entity_net.labels: labels_unrolled, entity_net.mask: mask_index, entity_net.labels_embedding: labels_embedding, entity_net.bias_adj: adj_bias, entity_net.adj_m: adj_m }) train_loss.append(curr_loss) ground_truth = ground_truth.astype("int") predictions = (logits >= 0.5).astype("int") [y_true.append(label) for label in ground_truth] [y_pred.append(label) for label in predictions] tqdm.write("Epoch: {}, iter {}: loss = {:.3f}".format( epoch + 1, batch_i, np.mean(train_loss))) # Add train loss, train metric to data train_loss, train_metric = np.mean( train_loss), precision_recall_fscore_support( np.array(y_true), np.array(y_pred), average="micro")[:3] train_loss_history[epoch] = train_loss train_metric_history[epoch] = train_metric tqdm.write("Train loss: {:.3f} ; [P, R, F-score]: {}".format( train_loss, train_metric)) # Increment Epoch sess.run(entity_net.epoch_increment) # Validate every so often if epoch % FLAGS.validate_every == 0: val_loss, val_metric = do_eval(val_n, bsz, sess, entity_net, val_text_arr, val_all_labels, val_mask_arr, labels_embedding, adj_bias, adj_m) # Add val loss, val acc to data tqdm.write("Val loss: {:.3f} ; [P, R, F-score]: {}".format( val_loss, val_metric)) val_loss_history[epoch] = val_loss val_metric_history[epoch] = val_metric # Update best_val if val_loss < best_val_loss: tqdm.write("\nBest val loss") best_val_loss = val_loss best_val_metric = val_metric best_val_epoch = epoch test_loss, test_metric = do_eval(test_n, eval_bsz, sess, entity_net, test_text_arr, test_all_labels, test_mask_arr, labels_embedding, adj_bias, adj_m) tqdm.write( "Test loss: {} ; [P, R, F-score]: {}".format( test_loss, test_metric)) # Save Model saver.save(sess, ckpt_dir + "model.ckpt", global_step=entity_net.epoch_step) with open(ckpt_dir + "training_logs.txt", 'wb+') as f: pickle.dump( (train_loss, train_metric, val_loss, val_metric), f) # Early Stopping Condition # if best_val > FLAGS.validation_threshold: # break tqdm.write("Train Loss:") print([train_loss_history[epoch] for epoch in range(FLAGS.num_epochs)]) tqdm.write("Val Loss:") print([val_loss_history[epoch] for epoch in range(FLAGS.num_epochs)]) # Test Loop tqdm.write( "Best Val loss: {} ; [P, R, F-score]: {} ; Best val epoch: {}" .format(best_val_loss, best_val_metric, best_val_epoch)) tqdm.write("Test loss: {} ; [P, R, F-score]: {}".format( test_loss, test_metric))
def main(_): # Get Vectorized Forms of Stories, Questions, and Answers train, val, test, word2id = parse(FLAGS.data_path, FLAGS.task_id) trainS, trainS_len, trainQ, trainA, _ = train valS, valS_len, valQ, valA, _ = val testS, testS_len, testQ, testA, _ = test # Assert Shapes assert (trainS.shape[1:] == valS.shape[1:] == testS.shape[1:]) assert (trainQ.shape[1] == valQ.shape[1] == testQ.shape[1]) # Setup Checkpoint + Log Paths ckpt_dir = "./checkpoints/qa_%d/" % FLAGS.task_id if not os.path.exists(ckpt_dir): os.mkdir(ckpt_dir) # Build Model with tf.Session() as sess: # Instantiate Model entity_net = EntityNetwork( word2id, trainS.shape[2], trainS.shape[1], FLAGS.batch_size, FLAGS.memory_slots, FLAGS.embedding_size, FLAGS.learning_rate, FLAGS.decay_epochs * (trainS.shape[0] / FLAGS.batch_size), FLAGS.decay_rate) # Initialize Saver saver = tf.train.Saver() # Initialize all Variables if os.path.exists(ckpt_dir + "checkpoint"): print 'Restoring Variables from Checkpoint!' saver.restore(sess, tf.train.latest_checkpoint(ckpt_dir)) with open(ckpt_dir + "training_logs.pik", 'r') as f: train_loss, train_acc, val_loss, val_acc = pickle.load(f) else: print 'Initializing Variables!' sess.run(tf.global_variables_initializer()) train_loss, train_acc, val_loss, val_acc = {}, {}, {}, {} # Get Current Epoch curr_epoch = sess.run(entity_net.epoch_step) # Start Training Loop n, val_n, test_n, bsz, best_val = trainS.shape[0], valS.shape[ 0], testS.shape[0], FLAGS.batch_size, 0.0 for epoch in range(curr_epoch, FLAGS.num_epochs): loss, acc, counter = 0.0, 0.0, 0 for start, end in zip(range(0, n, bsz), range(bsz, n, bsz)): curr_loss, curr_acc, _ = sess.run( [ entity_net.loss_val, entity_net.accuracy, entity_net.train_op ], feed_dict={ entity_net.S: trainS[start:end], entity_net.S_len: trainS_len[start:end], entity_net.Q: trainQ[start:end], entity_net.A: trainA[start:end] }) loss, acc, counter = loss + curr_loss, acc + curr_acc, counter + 1 if counter % 100 == 0: print "Epoch %d\tBatch %d\tTrain Loss: %.3f\tTrain Accuracy: %.3f" % ( epoch, counter, loss / float(counter), acc / float(counter)) # Add train loss, train acc to data train_loss[epoch], train_acc[epoch] = loss / float( counter), acc / float(counter) # Increment Epoch sess.run(entity_net.epoch_increment) # Validate every so often if epoch % FLAGS.validate_every == 0: val_loss_val, val_acc_val = do_eval(val_n, bsz, sess, entity_net, valS, valS_len, valQ, valA) print "Epoch %d Validation Loss: %.3f\tValidation Accuracy: %.3f" % ( epoch, val_loss_val, val_acc_val) # Add val loss, val acc to data val_loss[epoch], val_acc[epoch] = val_loss_val, val_acc_val # Update best_val if val_acc[epoch] > best_val: best_val = val_acc[epoch] # Save Model saver.save(sess, ckpt_dir + "model.ckpt", global_step=entity_net.epoch_step) with open(ckpt_dir + "training_logs.pik", 'w') as f: pickle.dump((train_loss, train_acc, val_loss, val_acc), f) # Early Stopping Condition if best_val > FLAGS.validation_threshold: break # Test Loop test_loss, test_acc = do_eval(test_n, bsz, sess, entity_net, testS, testS_len, testQ, testA) # Print and Write Test Loss/Accuracy print "Test Loss: %.3f\tTest Accuracy: %.3f" % (test_loss, test_acc) with open(ckpt_dir + "output.txt", 'w') as g: g.write("Test Loss: %.3f\tTest Accuracy: %.3f\n" % (test_loss, test_acc))
def main(_): if FLAGS.mode == "train": # Parse Data print '[*] Parsing Data!' S, S_len, Q, Q_len, A, word2id, a_word2id = parse("train", pik_path=os.path.join(FLAGS.ckpt_dir, 'train', 'train.pik'), voc_path=os.path.join(FLAGS.ckpt_dir, 'voc.pik')) # Initialize Model print '[*] Creating Model!' rn = RelationNetwork(S, S_len, Q, Q_len, A, word2id, a_word2id, restore=False) # Train for 50 Epochs print '[*] Training Model!' rn.fit(epochs=50) elif FLAGS.mode == "valid": # Restore Model print '[*] Restoring Model!' S, S_len, Q, Q_len, A, word2id, a_word2id = parse("train", pik_path=os.path.join(FLAGS.ckpt_dir, 'train', 'train.pik'), voc_path=os.path.join(FLAGS.ckpt_dir, 'voc.pik')) rn = RelationNetwork(S, S_len, Q, Q_len, A, word2id, a_word2id, restore=tf.train.latest_checkpoint(os.path.join(FLAGS.ckpt_dir, 'ckpts'))) if FLAGS.task == 0: print '[*] Validating on all Tasks!' for task in range(1, 21): print '[*] Loading Task %d!' % task S, S_len, Q, Q_len, A, _, _ = parse("valid", pik_path=os.path.join(FLAGS.ckpt_dir, 'valid', 'valid_%d.pik' % task), voc_path=os.path.join(FLAGS.ckpt_dir, 'voc.pik'), task_id=task) accuracy = rn.eval(S, S_len, Q, Q_len, A) print 'Task %d\tAccuracy: %.3f' % (task, accuracy) else: task = FLAGS.task print '[*] Validating on Task %d' % task S, S_len, Q, Q_len, A, _, _ = parse("valid", pik_path=os.path.join(FLAGS.ckpt_dir, 'valid', 'valid_%d.pik' % task), voc_path=os.path.join(FLAGS.ckpt_dir, 'voc.pik'), task_id=task) accuracy = rn.eval(S, S_len, Q, Q_len, A) print 'Task %d\tAccuracy: %.3f' % (task, accuracy) elif FLAGS.mode == "test": # Restore Model print '[*] Restoring Model!' S, S_len, Q, Q_len, A, word2id, a_word2id = parse("train", pik_path=os.path.join(FLAGS.ckpt_dir, 'train', 'train.pik'), voc_path=os.path.join(FLAGS.ckpt_dir, 'voc.pik')) rn = RelationNetwork(S, S_len, Q, Q_len, A, word2id, a_word2id, restore=tf.train.latest_checkpoint(os.path.join(FLAGS.ckpt_dir, 'ckpts'))) if FLAGS.task == 0: print '[*] Testing on all Tasks!' for task in range(1, 21): print '[*] Loading Task %d!' % task S, S_len, Q, Q_len, A, _, _ = parse("test", pik_path=os.path.join(FLAGS.ckpt_dir, 'test', 'test_%d.pik' % task), voc_path=os.path.join(FLAGS.ckpt_dir, 'voc.pik'), task_id=task) accuracy = rn.eval(S, S_len, Q, Q_len, A) print 'Task %d\tAccuracy: %.3f' % (task, accuracy) else: task = FLAGS.task print '[*] Testing on Task %d' % task S, S_len, Q, Q_len, A, _, _ = parse("test", pik_path=os.path.join(FLAGS.ckpt_dir, 'test', 'test_%d.pik' % task), voc_path=os.path.join(FLAGS.ckpt_dir, 'voc.pik'), task_id=task) accuracy = rn.eval(S, S_len, Q, Q_len, A) print 'Task %d\tAccuracy: %.3f' % (task, accuracy) else: print "Unsupported Mode, use one of [train, valid, test]" raise UserWarning