Пример #1
0
def main(_):
    raw_data = reader.load_holmes_data(12001)
    train_data, _, word_to_id = raw_data
    if FLAGS.model == "test":
        test_questions = reader.get_questions(word_to_id, FLAGS.q_path)

    config = get_config()
    eval_config = get_config()
    eval_config.batch_size = 1
    eval_config.num_steps = 30

    with tf.Graph().as_default():
        initializer = tf.random_uniform_initializer(-config.init_scale,
                                                    config.init_scale)

        with tf.name_scope("Train"):
            train_input = PTBInput(config=config,
                                   data=train_data,
                                   name="TrainInput")
            with tf.variable_scope("Model",
                                   reuse=None,
                                   initializer=initializer):
                m = PTBModel(is_training=True,
                             config=config,
                             input_=train_input)
            tf.summary.scalar("Training Loss", m.cost)
            tf.summary.scalar("Learning Rate", m.lr)

        if FLAGS.model == "test":
            with tf.name_scope("Test"):
                test_input = PTBInput(config=eval_config,
                                      data=train_data,
                                      name="TestInput")
                with tf.variable_scope("Model",
                                       reuse=True,
                                       initializer=initializer):
                    mtest = PTBModel(is_training=False,
                                     config=eval_config,
                                     input_=test_questions)

        saver = tf.train.Saver()
        session_config = tf.ConfigProto()
        session_config.gpu_options.per_process_gpu_memory_fraction = 0.05
        with tf.Session(config=session_config) as session:
            session.run(initializer)
            for i in range(config.max_max_epoch):
                lr_decay = config.lr_decay**max(i + 1 - config.max_epoch, 0.0)
                m.assign_lr(session, config.learning_rate * lr_decay)
                print("Epoch: %d Learning rate: %.3f" %
                      (i + 1, session.run(m.lr)))
                train_perplexity = run_epoch(session,
                                             m,
                                             eval_op=m.train_op,
                                             verbose=True)
                save_path = saver.save(session, "model/model.ckpt")
                print("Model saved in file: %s" % save_path)

            saver.restore(session, "model/model.ckpt")
            print("Model restored.")
            test_perplexity = predict(session, mtest, test_questions)
Пример #2
0
def cli(question_file, answers_dir, answer_file, test):
    """Given a set of questions and the answers directory with top 100
    documents for each question, generate the answer file
    """
    success('---NLP Project Three: Question Answer---')

    question_filepath = os.path.realpath(question_file)
    answers_dirpath = os.path.realpath(answers_dir)
    answer_filepath = os.path.realpath(answer_file)

    log('Answering: {}\n Using: {}\n Into: {}'.format(question_filepath,
                                                      answers_dirpath,
                                                      answer_filepath))

    if test:
        warn('Testing, not normal execution...')
        _test_endpoint(question_filepath, answers_dirpath, answer_filepath)
        return

    try:
        questions = get_questions(question_filepath)
        if len(questions) is not 232:
            warn('devset has 232 questions (Got {})'.format(len(questions)))

        answers = {}
        for question in questions:
            answers[question.qid] = get_answers(question, answers_dirpath)
        if len(answers) is not len(questions):
            warn('Got {} answers for {} questions'.format(
                len(answers), len(questions)))

        write_answers(answers, answer_filepath)
        success('Wrote answers to {}'.format(answer_filepath))
    except NotImplementedError as e:
        error('TODO: {}'.format(e))
Пример #3
0
def _test_endpoint(question_filepath, answers_dirpath, answer_filepath):
    """Testing endpoint
    """
    questions = get_questions(question_filepath)
    print questions
    question = [q for q in questions if q.qid == 214]
    print question
    question = question[0]
    answers = get_answers(question, answers_dirpath)
    print answers