def main(_): raw_data = reader.load_holmes_data(12001) train_data, _, word_to_id = raw_data if FLAGS.model == "test": test_questions = reader.get_questions(word_to_id, FLAGS.q_path) config = get_config() eval_config = get_config() eval_config.batch_size = 1 eval_config.num_steps = 30 with tf.Graph().as_default(): initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.name_scope("Train"): train_input = PTBInput(config=config, data=train_data, name="TrainInput") with tf.variable_scope("Model", reuse=None, initializer=initializer): m = PTBModel(is_training=True, config=config, input_=train_input) tf.summary.scalar("Training Loss", m.cost) tf.summary.scalar("Learning Rate", m.lr) if FLAGS.model == "test": with tf.name_scope("Test"): test_input = PTBInput(config=eval_config, data=train_data, name="TestInput") with tf.variable_scope("Model", reuse=True, initializer=initializer): mtest = PTBModel(is_training=False, config=eval_config, input_=test_questions) saver = tf.train.Saver() session_config = tf.ConfigProto() session_config.gpu_options.per_process_gpu_memory_fraction = 0.05 with tf.Session(config=session_config) as session: session.run(initializer) for i in range(config.max_max_epoch): lr_decay = config.lr_decay**max(i + 1 - config.max_epoch, 0.0) m.assign_lr(session, config.learning_rate * lr_decay) print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) train_perplexity = run_epoch(session, m, eval_op=m.train_op, verbose=True) save_path = saver.save(session, "model/model.ckpt") print("Model saved in file: %s" % save_path) saver.restore(session, "model/model.ckpt") print("Model restored.") test_perplexity = predict(session, mtest, test_questions)
def cli(question_file, answers_dir, answer_file, test): """Given a set of questions and the answers directory with top 100 documents for each question, generate the answer file """ success('---NLP Project Three: Question Answer---') question_filepath = os.path.realpath(question_file) answers_dirpath = os.path.realpath(answers_dir) answer_filepath = os.path.realpath(answer_file) log('Answering: {}\n Using: {}\n Into: {}'.format(question_filepath, answers_dirpath, answer_filepath)) if test: warn('Testing, not normal execution...') _test_endpoint(question_filepath, answers_dirpath, answer_filepath) return try: questions = get_questions(question_filepath) if len(questions) is not 232: warn('devset has 232 questions (Got {})'.format(len(questions))) answers = {} for question in questions: answers[question.qid] = get_answers(question, answers_dirpath) if len(answers) is not len(questions): warn('Got {} answers for {} questions'.format( len(answers), len(questions))) write_answers(answers, answer_filepath) success('Wrote answers to {}'.format(answer_filepath)) except NotImplementedError as e: error('TODO: {}'.format(e))
def _test_endpoint(question_filepath, answers_dirpath, answer_filepath): """Testing endpoint """ questions = get_questions(question_filepath) print questions question = [q for q in questions if q.qid == 214] print question question = question[0] answers = get_answers(question, answers_dirpath) print answers