def run_func2(dataset, config):
    vocab, rev_vocab = initialize_vocab(config.vocab_path)

    q, c, a = zip(*[[_q, _c, _a] for (_q, _c, _a) in dataset])

    dataset = [q, c, a]

    embed_path = config.embed_path

    embeddings = get_trimmed_glove_vectors(embed_path)

    encoder = Encoder(config.hidden_state_size)
    decoder = Decoder(config.hidden_state_size)

    qa = QASystem(encoder, decoder, embeddings, config)
    question_uuid_data = [i for i in xrange(len(a))]

    with tf.Session() as sess:
        qa.initialize_model(sess, config.train_dir)
        answers, answers_canonical = generate_answers(sess, qa, dataset,
                                                      question_uuid_data,
                                                      rev_vocab)
        # write to json file to root dir
        with io.open('dev-prediction.txt', 'w', encoding='utf-8') as f:
            for i in xrange(len(a)):
                curr_ans = unicode(answers[i], "utf-8")
                f.write("%s\n" % (curr_ans))
def run_func():
    config = Config()

    # ========= Load Dataset =========
    # You can change this code to load dataset in your own way
    vocab, rev_vocab = initialize_vocab(config.vocab_path)

    dev_path = "data/squad/fuse.json"
    dev_dirname = os.path.dirname(os.path.abspath(dev_path))
    dev_filename = os.path.basename(dev_path)
    context_data, question_data, question_uuid_data = prepare_dev(
        dev_dirname, dev_filename, vocab)

    ques_len = len(question_data)
    answers = [[0, 0] for _ in xrange(ques_len)]

    dataset = [question_data, context_data, answers]

    embed_path = config.embed_path

    embeddings = get_trimmed_glove_vectors(embed_path)

    encoder = Encoder(config.hidden_state_size)
    decoder = Decoder(config.hidden_state_size)

    qa = QASystem(encoder, decoder, embeddings, config)

    with tf.Session() as sess:
        qa.initialize_model(sess, config.train_dir)
        answers, _ = generate_answers(sess, qa, dataset, question_uuid_data,
                                      rev_vocab)
        # write to json file to root dir
        with io.open('temp/fuse-answer.json', 'w', encoding='utf-8') as f:
            f.write(unicode(json.dumps(answers, ensure_ascii=False)))
Пример #3
0
def run_func():
    config = Config()
    vocab, rev_vocab = initialize_vocab(config.vocab_path)

    dev_path = "download/squad/test.json"
    dev_dirname = os.path.dirname(os.path.abspath(dev_path))
    dev_filename = os.path.basename(dev_path)
    context_data, question_data, question_uuid_data = prepare_dev(
        dev_dirname, dev_filename, vocab)

    ques_len = len(question_data)
    answers = [[0, 0] for _ in xrange(ques_len)]

    dataset = [question_data, context_data, answers]

    embed_path = config.embed_path
    embeddings = get_trimmed_glove_vectors(embed_path)

    encoder = Encoder(config.hidden_state_size)
    decoder = Decoder(config.hidden_state_size)

    qa = QASystem(encoder, decoder, embeddings, config)

    data = "Id,Answer\n"

    with tf.Session() as sess:
        qa.initialize_model(sess, config.train_dir)
        answers, _ = generate_answers(sess, qa, dataset, question_uuid_data,
                                      rev_vocab)
        for a in answers:
            ans = answers[a]
            data += a + "," + normalize_answer(ans).replace(" s ", "s ") + "\n"

    with open('submission.csv', 'wb') as file:
        file.write(data)
Пример #4
0
def predict(model_name):
    qa = QASystem(model_name)

    with tf.Session() as sess:
        qa.initialize_model(sess)
        while True:
            question = input("Ask a question: ")
            for answer, confidence, doc in answer_question(qa, sess, question, best_n=10):
                print('{:.2f}:\t{}    ({})'.format(confidence, answer, doc))
def run_func(model_name):
    train = SquadData.load(config.SQUAD_TRAIN_PREFIX, size=config.TRAIN_SIZE)
    dev = SquadData.load(config.SQUAD_DEV_PREFIX, size=config.EVAL_SIZE)

    qa = QASystem(model_name)
    
    with tf.Session() as sess:
        # ====== Load a pretrained model if it exists or create a new one if no pretrained available ======
        qa.initialize_model(sess)
        qa.train(sess, [train, dev])
Пример #6
0
def run_func():
    config = Config()
    train = squad_dataset(config.question_train, config.context_train,
                          config.answer_train)
    dev = squad_dataset(config.question_dev, config.context_dev,
                        config.answer_dev)
    # print(config.question_train)
    embed_path = config.embed_path
    vocab_path = config.vocab_path
    # print(config.embed_path, config.vocab_path)
    vocab, rev_vocab = initialize_vocab(vocab_path)

    embeddings = get_trimmed_glove_vectors(embed_path)

    encoder = Encoder(config.hidden_size)
    decoder = Decoder(config.hidden_size)

    qa = QASystem(encoder, decoder, embeddings, config)

    with tf.Session() as sess:
        # ====== Load a pretrained model if it exists or create a new one if no pretrained available ======
        qa.initialize_model(sess, config.train_dir)
        # train process
        # qa.train(sess, [train, dev], config.train_dir)
        # em = qa.evaluate_model(sess, dev)

        # run process
        while True:
            question = input('please input question: ')
            if question == 'exit':
                break
            raw_context = input('please input context: ')
            if raw_context == 'exit':
                break
            question = [
                vocab[x] if x in vocab.keys() else 2 for x in question.split()
            ]
            context = [
                vocab[x] if x in vocab.keys() else 2
                for x in raw_context.split()
            ]
            test = [[question], [context], [[1, 2]]]
            a_s, a_e = qa.answer(sess, test)
            if a_e == a_s:
                print("answer: ", raw_context.split()[a_s[0]])
            else:
                print("answer: ",
                      ' '.join(raw_context.split()[a_s[0]:a_e[0] + 1]))
def evaluate(model_name, n=None):
    data = []
    with open(config.TREC_PATH, 'r') as f:
        reader = csv.reader(f, delimiter='\t')
        for row in list(reader)[199:]:
            data.append((row[2].strip(), row[3].strip()))

    if not n:
        n = len(data)

    qa = QASystem(model_name)

    top_count = 0
    top_5_count = 0
    top_10_count = 0
    with tf.Session() as sess:
        qa.initialize_model(sess)

        with open(os.path.join(config.MODELS_DIR, model_name, 'trec.csv'),
                  'w') as f:
            writer = csv.writer(f)
            i = 0
            for question, answer_pattern in tqdm(data[:n]):
                answers = [
                    answer for answer, confidence, doc in answer_question(
                        qa, sess, question, 10)
                ]
                writer.writerow(answers)
                correct = [
                    bool(re.search(answer_pattern, answer))
                    for answer in answers
                ]
                if True in correct[:1]:
                    top_count += 1
                if True in correct[:5]:
                    top_5_count += 1
                if True in correct[:10]:
                    top_10_count += 1
                i += 1
                print('{}: {}, {}, {}'.format(i,
                                              float(top_count) / i,
                                              float(top_5_count) / i,
                                              float(top_10_count) / i))
    print('Top match: {}'.format(float(top_count) / n))
    print('Top 5 match: {}'.format(float(top_5_count) / n))
    print('Top 10 match: {}'.format(float(top_10_count) / n))
Пример #8
0
def run_func():
    config = Config()
    train = squad_dataset(config.question_train, config.context_train,
                          config.answer_train)
    dev = squad_dataset(config.question_dev, config.context_dev,
                        config.answer_dev)

    embed_path = config.embed_path
    vocab_path = config.vocab_path
    vocab, rev_vocab = initialize_vocab(vocab_path)

    embeddings = get_trimmed_glove_vectors(embed_path)

    encoder = Encoder(config.hidden_state_size)
    decoder = Decoder(config.hidden_state_size)

    qa = QASystem(encoder, decoder, embeddings, config)

    with tf.Session() as sess:
        # ====== Load a pretrained model if it exists or create a new one if no pretrained available ======
        qa.initialize_model(sess, config.train_dir)
        qa.train(sess, [train, dev], config.train_dir)