def do_train(args): # Set up some parameters. config = Config(args) helper, train, dev, train_raw, dev_raw = load_and_preprocess_data(args) embeddings = load_embeddings(args, helper) config.embed_size = embeddings.shape[1] # config.use_crf = True helper.save(config.output_path) handler = logging.FileHandler(config.log_output) handler.setLevel(logging.DEBUG) handler.setFormatter( logging.Formatter('%(asctime)s:%(levelname)s: %(message)s')) logging.getLogger().addHandler(handler) report = None #Report(Config.eval_output) with tf.Graph().as_default(): logger.info("Building model...", ) start = time.time() model = RNNModel(helper, config, embeddings) logger.info("took %.2f seconds", time.time() - start) init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as session: session.run(init) model.fit(session, saver, train, dev) if report: report.log_output(model.output(session, dev_raw)) report.save() else: # Save predictions in a text file. output = model.output(session, dev_raw) sentences, labels, predictions = zip(*output) predictions = [[LBLS[l] for l in preds] for preds in predictions] output = zip(sentences, labels, predictions) with open(model.config.conll_output, 'w') as f: write_conll(f, output) with open(model.config.eval_output, 'w') as f: for sentence, labels, predictions in output: print_sentence(f, sentence, labels, predictions)
def do_test2(args): logger.info("Testing implementation of RNNModel") config = Config(args) helper, train, dev, train_raw, dev_raw = load_and_preprocess_data(args) embeddings = load_embeddings(args, helper) config.embed_size = embeddings.shape[1] with tf.Graph().as_default(): logger.info("Building model...", ) start = time.time() model = RNNModel(helper, config, embeddings) logger.info("took %.2f seconds", time.time() - start) init = tf.global_variables_initializer() saver = None with tf.Session() as session: session.run(init) model.fit(session, saver, train, dev) logger.info("Model did not crash!") logger.info("Passed!")
def do_evaluate(args): config = Config(args.model_path) helper = ModelHelper.load(args.model_path) input_data = read_ngrams(args.data) embeddings = load_embeddings(args, helper) config.embed_size = embeddings.shape[1] with tf.Graph().as_default(): logger.info("Building model...",) start = time.time() model = NGramModel(helper, config, embeddings) logger.info("took %.2f seconds", time.time() - start) init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as session: session.run(init) saver.restore(session, model.config.model_output) for sentence, labels, predictions in model.output(session, input_data): predictions = [LBLS[l] for l in predictions] print_sentence(args.output, sentence, labels, predictions)
def do_train(args): config = Config() helper, train, dev, train_raw, dev_raw = load_and_preprocess_data(args) embeddings = load_embeddings(args, helper) config.embed_size = embeddings.shape[1] helper.save(config.output_path) handler = logging.FileHandler(config.log_output) handler.setLevel(logging.DEBUG) handler.setFormatter(logging.Formatter('%(asctime)s:%(levelname)s: %(message)s')) logging.getLogger().addHandler(handler) report = None with tf.Graph().as_default(): logger.info("Building model...",) start = time.time() model = NGramModel(helper, config, embeddings) logger.info("took %.2f seconds", time.time() - start) init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as session: session.run(init) model.fit(session, saver, train, dev) if report: report.log_output(model.output(session, dev_raw)) report.save() else: output = model.output(session, dev_raw) sentences, labels, predictions = zip(*output) predictions = [preds for preds in predictions] output = zip(sentences, labels, predictions) with open(model.config.conll_output, 'w') as f: write_ngrams(f, output)
def do_shell(args): config = Config(args) helper = ModelHelper.load(args.model_path) embeddings = load_embeddings(args, helper) config.embed_size = embeddings.shape[1] with tf.Graph().as_default(): logger.info("Building model...", ) start = time.time() model = RNNModel(helper, config, embeddings) logger.info("took %.2f seconds", time.time() - start) init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as session: session.run(init) saver.restore(session, model.config.model_output) print("""Welcome! You can use this shell to explore the behavior of your model. Please enter sentences with spaces between tokens, e.g., input> Germany 's representative to the European Union 's veterinary committee . """) while True: # Create simple REPL try: sentence = raw_input("input> ") tokens = sentence.strip().split(" ") for sentence, _, predictions in model.output( session, [(tokens, ["O"] * len(tokens))]): predictions = [LBLS[l] for l in predictions] print_sentence(sys.stdout, sentence, [""] * len(tokens), predictions) except EOFError: print("Closing session.") break
saver.save(session, self.config.model_output + "_%d" % (iterTime)) #if(smallIter % 200 == 0): print("Intermediate epoch %d : loss : %f" % (iterTime, np.mean(np.mean(np.array(loss))))) print("epoch %d : loss : %f" % (iterTime, np.mean(np.mean(np.array(loss))))) def __init__(self, config, pretrained_embeddings, report=None): super(RNNModel, self).__init__(config) self.pretrained_embeddings = pretrained_embeddings self.input_placeholder = None self.labels_placeholder = None self.mask_placeholder = None self.dropout_placeholder = None self.build() if __name__ == "__main__": args = "gru" config = Config(args) glove_path = "../data/glove/glove.6B.50d.txt" glove_vector = data_util.load_embeddings(glove_path, config.embed_size) model = RNNModel(config, glove_vector.astype(np.float32)) model.train_model()