def infer(args): start = time.time() # Load testing data # ==================================== with open(os.path.join(args.save_dir, 'config.pkl'), 'rb') as f: saved_args = pickle.load(f) print('restored args:\n', json.dumps(vars(saved_args), indent=4, separators=(',',':'))) with open(os.path.join(args.save_dir, 'words_vocab.pkl'), 'rb') as f: _, vocab = pickle.load(f) data_loader = TextLoader(args.test_file, args.sense_file, args.batch_size, args.seq_length, args.data_set_size, shuffle=True) sense_idx = pickle.load(open('wsd_senses_idx.p','rb')) #words to sense dict words_sense = pickle.load(open('verbs_sense.p','rb')) # Predict # =================================== #checkpoint = tf.train.latest_checkpoint(args.save_dir) with tf.Graph().as_default(): with tf.Session() as sess: start = time.time() saver = tf.train.import_meta_graph('./save2/model.ckpt-65.meta') saver.restore(sess,tf.train.latest_checkpoint('./save/')) graph = tf.get_default_graph() graph_x = graph.get_tensor_by_name("x:0") graph_y = graph.get_tensor_by_name("y:0") graph_context_layer = graph.get_tensor_by_name("cont_layer:0") #graph_softmax_loss = graph.get_operation_by_name("softmax_loss") ''' model = BasicLSTM(saved_args, True) saver = tf.train.Saver() saver.restore(sess, checkpoint) ''' data_loader.reset_batch_pointer() x_batch_test, y_batch_test, unk_count_test, n_sent_test, senss_test = data_loader.next_batch_test(collect_sense=True) feed_dict_test = {graph_x: x_batch_test, graph_y: y_batch_test} data_loader.reset_batch_pointer() xs = [] ys = [] senses = [] data_loader.reset_batch_pointer() for i in range(data_loader.num_batches): x_batch, y_batch, unk_count, n_sent, senss = data_loader.next_batch(collect_sense=True, shuffle=False) feed_dict = {graph_x: x_batch, graph_y: y_batch} wordVecs = sess.run(graph_context_layer, feed_dict) n_sents = len(y_batch) for j in range(n_sents): if y_batch[j] != -1: xs.append(wordVecs[j]) ys.append(y_batch[j]) senses.append(senss[j]) #print(xs[0].shape) n_words = np.max(ys) + 1 n_examples = len(ys) sense_vects = {} for i in range(n_examples): if senses[i] in sense_vects: sense_vects[senses[i]].append(xs[i]) else: sense_vects[senses[i]] = [xs[i]] sense_keys = sense_vects.keys() sense_train_counts = {} for key in sense_keys: sense_train_counts[key] = len(sense_vects[key][:]) sense_vects[key] = np.mean(sense_vects[key], axis=0) xs_test = [] ys_test = [] senses_test= [] for i in range(data_loader.num_batches): wordVecs = sess.run(graph_context_layer, feed_dict_test) n_sents = len(y_batch_test) for j in range(n_sents): if y_batch_test[j] != -1: xs_test.append(wordVecs[j]) ys_test.append(y_batch_test[j]) senses_test.append(senss_test[j]) n_tests = len(ys_test) corr = 0 for i in range(n_tests): ambig_word = data_loader.verbs_idx[ys_test[i]] ambig_word = data_loader.words[ambig_word] correct_sense = senses_test[i] if ambig_word in words_sense: max_cos = 0 for sense in words_sense[ambig_word]: if sense in sense_idx: sen = sense_idx[sense] if sen in sense_vects: print(cosine_similarity(sense_vects[sen], xs_test[i])) exit() loss, contextVecs = sess.run(graph_context_layer, feed_dict) print(contextVecs.shape) exit() print("Saved prediction to {}".format(out_path)) print("Total run time: {}s".format(time.time() - start))
def train(args): # Data Preparation # ==================================== data_loader = TextLoader(args.data_dir, args.sense_file, args.batch_size, args.seq_length, args.data_set_size) args.vocab_size = data_loader.vocab_size args.verb_size = len(data_loader.verbs) print(args.verb_size) print("Number of sentences: {}".format(data_loader.num_data)) print("Vocabulary size: {}".format(args.vocab_size)) # Check compatibility if training is continued from previously saved model if args.init_from is not None: # check if all necessary files exist assert os.path.isdir( args.init_from), " %s must be a path" % args.init_from assert os.path.isfile( os.path.join(args.init_from, "config.pkl") ), "config.pkl file does not exist in path %s" % args.init_from assert os.path.isfile( os.path.join(args.init_from, "words_vocab.pkl") ), "words_vocab.pkl.pkl file does not exist in path %s" % args.init_from ckpt = tf.train.get_checkpoint_state(args.init_from) assert ckpt, "No checkpoint found" assert ckpt.model_checkpoint_path, "No model path found in checkpoint" # open old config and check if models are compatible with open(os.path.join(args.init_from, 'config.pkl'), 'rb') as f: saved_model_args = pickle.load(f) need_be_same = ["rnn_size", "num_layers", "seq_length"] for checkme in need_be_same: assert vars(saved_model_args)[checkme] == vars( args )[checkme], "Command line argument and saved model disagree on '%s' " % checkme # open saved vocab/dict and check if vocabs/dicts are compatible with open(os.path.join(args.init_from, 'words_vocab.pkl'), 'rb') as f: saved_words, saved_vocab = pickle.load(f) assert saved_words == data_loader.words, "Data and loaded model disagree on word set!" assert saved_vocab == data_loader.vocab, "Data and loaded model disagree on dictionary mappings!" if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) with open(os.path.join(args.save_dir, 'config.pkl'), 'wb') as f: pickle.dump(args, f) with open(os.path.join(args.save_dir, 'words_vocab.pkl'), 'wb') as f: pickle.dump((data_loader.words, data_loader.vocab), f) """ embedding_matrix = get_vocab_embedding(args.save_dir, data_loader.words, args.embedding_file) print("Embedding matrix shape:",embedding_matrix.shape) """ # Training # ==================================== with tf.Graph().as_default(): with tf.Session() as sess: #sess = tf_debug.LocalCLIDebugWrapperSession(sess) #sess.run() model = BasicLSTM(args) # Define training procedure global_step = tf.Variable(0, name='global_step', trainable=False) optimizer = tf.train.AdamOptimizer(args.learning_rate) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(model.cost, tvars), args.grad_clip) train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=global_step) # Keep track of gradient values and sparsity grad_summaries = [] for g, v in zip(grads, tvars): if g is not None: grad_hist_summary = tf.summary.histogram( "{}/grad/hist".format(v.name), g) sparsity_summary = tf.summary.scalar( "{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) # Summary for loss loss_summary = tf.summary.scalar("loss", model.cost) # Train summaries merged = tf.summary.merge_all() if not os.path.exists(args.log_dir): os.makedirs(args.log_dir) train_writer = tf.summary.FileWriter(args.log_dir, sess.graph) saver = tf.train.Saver(tf.global_variables()) # Initialize all variables sess.run(tf.global_variables_initializer()) # Restore model if args.init_from is not None: saver.restore(sess, ckpt.model_checkpoint_path) # Start training print("Start training") # create test and dev sets data_loader.reset_batch_pointer() x_batch_dev, y_batch_dev, unk_count_dev, n_sent_dev, _ = data_loader.next_batch_test( ) feed_dict_dev = { model.x: x_batch_dev, model.y: y_batch_dev, model.keep_prob: args.keep_prob } data_loader.reset_batch_pointer() x_batch_test, y_batch_test, unk_count_test, n_sent_test, _ = data_loader.next_batch_test( set_to_choose=1) feed_dict_test = { model.x: x_batch_test, model.y: y_batch_test, model.keep_prob: args.keep_prob } data_loader.reset_batch_pointer() steps = 0 prev_equal = 0. for epoch in range(args.num_epochs): data_loader.reset_batch_pointer() state = sess.run(model.initial_state) for i in range(data_loader.num_batches): start = time.time() x_batch, y_batch, unk_count, n_sent, _ = data_loader.next_batch( ) feed_dict = { model.x: x_batch, model.y: y_batch, model.keep_prob: args.keep_prob } _, step, summary, loss, equal = sess.run([ train_op, global_step, merged, model.cost, model.equal ], feed_dict) print( "training step {}, epoch {}, batch {}/{}, loss: {:.4f}, accuracy: {:.4f}, avg unk count: {}, avg sent: {}, time/batch: {:.3f}" .format(step, epoch, i, data_loader.num_batches, loss, np.mean(equal), int(unk_count / args.batch_size), n_sent, time.time() - start)) # Start dev print("Start dev") data_loader.reset_batch_pointer() accur = [] for i in range(data_loader.num_batches_test): start = time.time() step, summary, loss, equal = sess.run( [global_step, merged, model.cost, model.equal], feed_dict_dev) print( "dev step {}, epoch {}, batch {}/{}, loss: {:.4f}, accuracy: {:.4f}, avg unk count: {}, avg sent: {}, time/batch: {:.3f}" .format(step, epoch, i, data_loader.num_batches_test, loss, np.mean(equal), int(unk_count_test / args.batch_size), n_sent_test, time.time() - start)) accur.append(np.mean(equal)) eval_acc = np.mean(accur) if eval_acc > prev_equal: prev_equal = eval_acc train_writer.add_summary(summary, step) current_step = tf.train.global_step(sess, global_step) checkpoint_path = os.path.join(args.save_dir, 'model.ckpt') path = saver.save(sess, checkpoint_path, global_step=current_step) print("Saved model checkpoint to {}".format(path)) elif steps > args.stop_count: print("early stopping") break else: steps += 1 print("Start test") data_loader.reset_batch_pointer() accur = [] for i in range(data_loader.num_batches_test): start = time.time() step, summary, loss, equal = sess.run( [global_step, merged, model.cost, model.equal], feed_dict_test) print( "test step {}, epoch {}, batch {}/{}, loss: {:.4f}, accuracy: {:.4f}, avg unk count: {}, avg sent: {}, time/batch: {:.3f}" .format(step, epoch, i, data_loader.num_batches_test, loss, np.mean(equal), int(unk_count_test / args.batch_size), n_sent_test, time.time() - start)) accur.append(np.mean(equal)) print('avg test: {:.4f}'.format(np.mean(accur))) train_writer.close()