def get_padded_batch(seq_batch, vocab):
    seq_batch_ids = [[vocab[loader.SOS]] + [
        vocab[tok if tok in vocab.keys() else loader.OOV]
        for tok in tokenise(sent, asbytes=False)
    ] + [vocab[loader.EOS]] for sent in seq_batch]
    max_seq_len = max([len(seq) for seq in seq_batch_ids])
    padded_batch = np.asarray([
        seq + [vocab[loader.PAD] for i in range(max_seq_len - len(seq))]
        for seq in seq_batch_ids
    ])
    return padded_batch
Exemplo n.º 2
0
    def get_q(self, ctxt, ans, ans_pos):
        ctxt_filt, ans_pos = preprocessing.filter_context(ctxt, ans_pos, 0, 30)
        ans_toks = preprocessing.tokenise(ans, asbytes=False)
        doc = self.nlp(ctxt_filt)
        ctxt_toks = [str(tok).lower() for tok in doc]
        # ans_ix = preprocessing.char_pos_to_word(ctxt_filt, ctxt_toks, ans_pos, asbytes=False)
        if ans_toks[0] not in ctxt_toks:
            # print(ans_toks[0], ctxt_toks)
            ans_ix=preprocessing.char_pos_to_word(ctxt_filt, ctxt_toks, ans_pos, asbytes=False)
            # print(ctxt_toks[ans_ix])
        else:
            ans_ix = ctxt_toks.index(ans_toks[0])


        ans_type = Counter([doc[i].ent_type_ for i in range(ans_ix, min(ans_ix+len(ans_toks), len(doc)))]).most_common()[0][0]
        # print(ans_type)

        type_distances=[]
        verb_distances=[]
        for offset in range(len(ctxt_toks)):
            # print(doc[offset].ent_type_, doc[offset])
            if str(doc[offset]).lower() not in ans_toks:
                # print(doc[offset], ans_toks)
                if doc[offset].pos_ == 'NOUN':
                    type_distances.append((max(offset-ans_ix-len(ans_toks)+1, ans_ix-offset), 'THING', doc[offset], offset))
                if doc[offset].ent_type_ != '' \
                    and not (doc[offset].ent_iob_ == 'B' and str(doc[min(offset+1, len(doc)-1)]).lower() in ans_toks) \
                    and self.type_translate(doc[offset].ent_type_) != 'CARDINAL':
                    type_distances.append((max(offset-ans_ix-len(ans_toks)+1, ans_ix-offset), doc[offset].ent_type_, doc[offset], offset))
                if doc[offset].tag_ in ['VBG','VBN']:
                    # print(doc[offset])
                    verb_distances.append((max(offset-ans_ix-len(ans_toks)+1, ans_ix-offset), doc[offset].tag_, doc[offset], offset))

        nearest_verb = sorted(verb_distances, key=lambda x: x[0])[0] if len(verb_distances) >0 else (0,'VBG', 'is',0)

        if len(type_distances) >0:
            nearest_entity = sorted(type_distances, key=lambda x: x[0])[0]
            ix= nearest_entity[3]
            entity_ixs=[ix]
            # print(nearest_entity)
            while ix+1 < len(doc) and doc[ix+1].ent_iob_ == 'I':
                entity_ixs.append(ix+1)
                ix+=1

            entity_toks = [str(tok) for tok in doc[entity_ixs[0]:entity_ixs[-1]+1]]
            entity_type=nearest_entity[1]
        else:
            entity_toks = ["thing"]
            entity_type="THING"

        # print(entity_toks)
        return self.format_q(self.type_translate(ans_type), self.type_translate(entity_type), entity_toks, nearest_verb[2])
Exemplo n.º 3
0
 def get_ans(self, contexts, questions):
     toks = [tokenise(ctxt, asbytes=False) for ctxt in contexts]
     padded_batch_cs = self.get_padded_batch(contexts)
     padded_batch_qs = self.get_padded_batch(questions)
     spans = self.sess.run(self.model.pred_span,
                           feed_dict={
                               self.model.context_in: padded_batch_cs,
                               self.model.question_in: padded_batch_qs
                           })
     return [
         " ".join(toks[i][span[0]:span[1] + 1])
         for i, span in enumerate(spans)
     ]
Exemplo n.º 4
0
def main(_):

    train_data = loader.load_squad_triples("./data/", False)




    print('Loaded SQuAD with ',len(train_data),' triples')
    train_contexts, train_qs, train_as,train_a_pos = zip(*train_data)

    qa = MpcmQaInstance()
    qa.load_from_chkpt(FLAGS.model_dir+'saved/qatest')
    vocab = qa.vocab

    questions = ["What colour is the car?","When was the car made?","Where was the date?", "What was the dog called?","Who was the oldest cat?"]
    contexts=["The car is green, and was built in 1985. This sentence should make it less likely to return the date, when asked about a cat. The oldest cat was called creme puff and lived for many years!" for i in range(len(questions))]


    spans = qa.get_ans(contexts, questions)
    print(contexts[0])
    for i, q in enumerate(questions):
        toks = tokenise(contexts[i], asbytes=False)
        print(q, "->", toks[spans[i,0]:spans[i,1]])
Exemplo n.º 5
0
def main(_):

    model_type=FLAGS.model_type
    # chkpt_path = FLAGS.model_dir+'saved/qgen-maluuba-crop-glove-smart'
    # chkpt_path = FLAGS.model_dir+'qgen-saved/MALUUBA-CROP-LATENT/1533247183'
    disc_path = FLAGS.model_dir+'saved/discriminator-trained-latent'
    chkpt_path = FLAGS.model_dir+'qgen/'+ model_type+'/'+FLAGS.eval_model_id

    # load dataset
    # train_data = loader.load_squad_triples(FLAGS.data_path, False)
    dev_data = loader.load_squad_triples(FLAGS.data_path, dev=FLAGS.eval_on_dev, test=FLAGS.eval_on_test)

    if len(dev_data) < FLAGS.num_eval_samples:
        exit('***ERROR*** Eval dataset is smaller than the num_eval_samples flag!')
    if len(dev_data) > FLAGS.num_eval_samples:
        print('***WARNING*** Eval dataset is larger than the num_eval_samples flag!')

    # train_contexts_unfilt, _,_,train_a_pos_unfilt = zip(*train_data)
    dev_contexts_unfilt, _,_,dev_a_pos_unfilt = zip(*dev_data)

    if FLAGS.filter_window_size_before >-1:
        # train_data = preprocessing.filter_squad(train_data, window_size=FLAGS.filter_window_size, max_tokens=FLAGS.filter_max_tokens)
        dev_data = preprocessing.filter_squad(dev_data, window_size_before=FLAGS.filter_window_size_before, window_size_after=FLAGS.filter_window_size_after, max_tokens=FLAGS.filter_max_tokens)


    # print('Loaded SQuAD with ',len(train_data),' triples')
    print('Loaded SQuAD dev set with ',len(dev_data),' triples')
    # train_contexts, train_qs, train_as,train_a_pos = zip(*train_data)
    dev_contexts, dev_qs, dev_as, dev_a_pos = zip(*dev_data)


    # vocab = loader.get_vocab(train_contexts, tf.app.flags.FLAGS.vocab_size)
    with open(chkpt_path+'/vocab.json') as f:
        vocab = json.load(f)

    with SquadStreamer(vocab, FLAGS.eval_batch_size, 1, shuffle=False) as dev_data_source:

        glove_embeddings = loader.load_glove(FLAGS.data_path)


        # Create model
        if model_type[:7] == "SEQ2SEQ":
            model = Seq2SeqModel(vocab, training_mode=False)
        elif model_type[:2] == "RL":
            # TEMP - no need to spin up the LM or QA model at eval time
            FLAGS.qa_weight = 0
            FLAGS.lm_weight = 0
            model = RLModel(vocab, training_mode=False)
        else:
            exit("Unrecognised model type: "+model_type)

        with model.graph.as_default():
            saver = tf.train.Saver()

        if FLAGS.eval_metrics:
            lm = LstmLmInstance()
            # qa = MpcmQaInstance()
            qa = QANetInstance()

            lm.load_from_chkpt(FLAGS.model_dir+'saved/lmtest')
            # qa.load_from_chkpt(FLAGS.model_dir+'saved/qatest')
            qa.load_from_chkpt(FLAGS.model_dir+'saved/qanet2')

            discriminator = DiscriminatorInstance(trainable=False, path=disc_path)

        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=mem_limit)
        with tf.Session(graph=model.graph, config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
            if not os.path.exists(chkpt_path):
                exit('Checkpoint path doesnt exist! '+chkpt_path)
            # summary_writer = tf.summary.FileWriter(FLAGS.log_directory+"eval/"+str(int(time.time())), sess.graph)

            saver.restore(sess, tf.train.latest_checkpoint(chkpt_path))
            # print('Loading not implemented yet')
            # else:
            #     sess.run(tf.global_variables_initializer())
            #     sess.run(model.glove_init_ops)

            num_steps = FLAGS.num_eval_samples//FLAGS.eval_batch_size

            # Initialise the dataset

            # np.random.shuffle(dev_data)
            dev_data_source.initialise(dev_data)

            f1s=[]
            bleus=[]
            qa_scores=[]
            qa_scores_gold=[]
            lm_scores=[]
            nlls=[]
            disc_scores=[]
            sowe_similarities=[]
            copy_probs=[]

            qgolds=[]
            qpreds=[]
            qpred_ids=[]
            qgold_ids=[]
            ctxts=[]
            answers=[]
            ans_positions=[]

            metric_individuals=[]
            res=[]
            for e in range(1):
                for i in tqdm(range(num_steps), desc='Epoch '+str(e)):
                    dev_batch, curr_batch_size = dev_data_source.get_batch()
                    pred_batch,pred_beam,pred_beam_lens,pred_ids,pred_lens,gold_batch, gold_lens,gold_ids,ctxt,ctxt_len,ans,ans_len,nll,copy_prob= sess.run([model.q_hat_beam_string, model.q_hat_full_beam_str, model.q_hat_full_beam_lens,model.q_hat_beam_ids,model.q_hat_beam_lens,model.question_raw, model.question_length, model.question_ids, model.context_raw, model.context_length, model.answer_locs, model.answer_length, model.nll, model.mean_copy_prob], feed_dict={model.input_batch: dev_batch ,model.is_training:False})

                    unfilt_ctxt_batch = [dev_contexts_unfilt[ix] for ix in dev_batch[3]]
                    a_text_batch = ops.byte_token_array_to_str(dev_batch[2][0], dev_batch[2][2], is_array=False)
                    unfilt_apos_batch = [dev_a_pos_unfilt[ix] for ix in dev_batch[3]]

                    # subtract 1 to remove the "end sent token"
                    pred_q_batch = [q.replace(' </Sent>',"").replace(" <PAD>","") for q in ops.byte_token_array_to_str(pred_batch, pred_lens-1)]

                    ctxts.extend(unfilt_ctxt_batch)
                    answers.extend(a_text_batch)
                    ans_positions.extend([dev_a_pos_unfilt[ix] for ix in dev_batch[3]])
                    copy_probs.extend(copy_prob.tolist())



                    # get QA score

                    # gold_str=[]
                    # pred_str=[]


                    gold_ans = ops.byte_token_array_to_str(dev_batch[2][0], dev_batch[2][2], is_array=False)
                    # pred_str = ops.byte_token_array_to_str([dev_batch[0][0][b][qa_pred[b][0]:qa_pred[b][1]] for b in range(curr_batch_size)], is_array=False)
                    nlls.extend(nll.tolist())

                    if FLAGS.eval_metrics:
                        qa_pred = qa.get_ans(unfilt_ctxt_batch, ops.byte_token_array_to_str(pred_batch, pred_lens))
                        gold_qa_pred = qa.get_ans(unfilt_ctxt_batch, ops.byte_token_array_to_str(dev_batch[1][0], dev_batch[1][3]))

                        qa_score_batch = [metrics.f1(metrics.normalize_answer(gold_ans[b]), metrics.normalize_answer(qa_pred[b])) for b in range(curr_batch_size)]
                        qa_score_gold_batch = [metrics.f1(metrics.normalize_answer(gold_ans[b]), metrics.normalize_answer(gold_qa_pred[b])) for b in range(curr_batch_size)]
                        lm_score_batch = lm.get_seq_perplexity(pred_q_batch).tolist()
                        disc_score_batch = discriminator.get_pred(unfilt_ctxt_batch, pred_q_batch, gold_ans, unfilt_apos_batch).tolist()

                    for b, pred in enumerate(pred_batch):
                        pred_str = pred_q_batch[b].replace(' </Sent>',"").replace(" <PAD>","")
                        gold_str = tokens_to_string(gold_batch[b][:gold_lens[b]-1])
                        f1s.append(metrics.f1(gold_str, pred_str))
                        bleus.append(metrics.bleu(gold_str, pred_str))
                        qgolds.append(gold_str)
                        qpreds.append(pred_str)

                        # calc cosine similarity between sums of word embeddings
                        pred_sowe = np.sum(np.asarray([glove_embeddings[w] if w in glove_embeddings.keys() else np.zeros((FLAGS.embedding_size,)) for w in preprocessing.tokenise(pred_str ,asbytes=False)]) ,axis=0)
                        gold_sowe = np.sum(np.asarray([glove_embeddings[w] if w in glove_embeddings.keys() else np.zeros((FLAGS.embedding_size,)) for w in preprocessing.tokenise(gold_str ,asbytes=False)]) ,axis=0)
                        this_similarity = np.inner(pred_sowe, gold_sowe)/np.linalg.norm(pred_sowe, ord=2)/np.linalg.norm(gold_sowe, ord=2)

                        sowe_similarities.append(this_similarity)



                        this_metric_dict={
                            'f1':f1s[-1],
                            'bleu': bleus[-1],
                            'nll': nlls[-1],
                            'sowe': sowe_similarities[-1]
                            }
                        if FLAGS.eval_metrics:
                            this_metric_dict={
                            **this_metric_dict,
                            'qa': qa_score_batch[b],
                            'lm': lm_score_batch[b],
                            'disc': disc_score_batch[b]}
                            qa_scores.extend(qa_score_batch)
                            lm_scores.extend(lm_score_batch)
                            disc_scores.extend(disc_score_batch)
                        metric_individuals.append(this_metric_dict)

                        res.append({
                            'c':unfilt_ctxt_batch[b],
                            'q_pred': pred_str,
                            'q_gold': gold_str,
                            'a_pos': unfilt_apos_batch[b],
                            'a_text': a_text_batch[b],
                            'metrics': this_metric_dict,

                            'q_pred_ids': pred_ids.tolist()[b],
                            'q_gold_ids': dev_batch[1][1][b].tolist()

                        })

                    # Quick output
                    if i==0:
                        # print(copy_prob.tolist())
                        # print(copy_probs)
                        pred_str = tokens_to_string(pred_batch[0][:pred_lens[0]-1])
                        gold_str = tokens_to_string(gold_batch[0][:gold_lens[0]-1])
                        # print(pred_str)
                        print(qpreds[0])
                        print(gold_str)


                        title=chkpt_path
                        out_str = output_eval(title,pred_batch,  pred_ids, pred_lens, gold_batch, gold_lens, ctxt, ctxt_len, ans, ans_len)
                        with open(FLAGS.log_directory+'out_eval_'+model_type+'.htm', 'w', encoding='utf-8') as fp:
                            fp.write(out_str)

            # res = list(zip(qpreds,qgolds,ctxts,answers,ans_positions,metric_individuals))
            metric_dict={
                'f1':np.mean(f1s),
                'bleu': metrics.bleu_corpus(qgolds, qpreds),
                'nll':np.mean(nlls),
                'sowe': np.mean(sowe_similarities)
                }
            if FLAGS.eval_metrics:
                metric_dict={**metric_dict,
                'qa':np.mean(qa_scores),
                'lm':np.mean(lm_scores),
                'disc': np.mean(disc_scores)}
            # print(res)
            with open(FLAGS.log_directory+'out_eval_'+model_type+("_test" if FLAGS.eval_on_test else "")+("_train" if (not FLAGS.eval_on_dev and not FLAGS.eval_on_test) else "")+'.json', 'w', encoding='utf-8') as fp:
                json.dump({"metrics":metric_dict, "results": res}, fp)


            print("F1: ", np.mean(f1s))
            print("BLEU: ", metrics.bleu_corpus(qgolds, qpreds))
            print("NLL: ", np.mean(nlls))
            print("SOWE: ", np.mean(sowe_similarities))

            print("Copy prob: ", np.mean(copy_probs))
            if FLAGS.eval_metrics:
                print("QA: ", np.mean(qa_scores))
                print("LM: ", np.mean(lm_scores))
                print("Disc: ", np.mean(disc_scores))
Exemplo n.º 6
0
# QA:  0.06118585070207543
# LM:  534.2029482313792

# Filtered candidates by LM
# F1:  0.1523949117692406
# BLEU:  0.0015981573389975351
# QA:  0.060554504435173614
# LM:  480.1121001809438

for i in tqdm(range(len(train_data))):
    triple = train_data[i]

    ctxt, q, ans, ans_pos = triple

    ctxt_filt, ans_pos = preprocessing.filter_context(ctxt, ans_pos, 0, 30)
    ctxt_toks = preprocessing.tokenise(ctxt, asbytes=False)

    response = requests.post(url, data=ctxt_filt.encode('utf-8'))
    if response.status_code != 200:
        exit("There was a problem connecting to the CoreNLP server!")

    res = response.json()
    # print(ctxt_filt)
    candidates = []

    # Run NER to get question word
    for ent in res['sentences'][0]['entitymentions']:
        if ent['text'].find(ans):
            ner = ent['ner']
        else:
            ner = "UNK"
def main(_):
    train_data = loader.load_squad_triples(FLAGS.data_path, False)
    dev_data = loader.load_squad_triples(FLAGS.data_path, True)[:500]

    chkpt_path = FLAGS.model_dir + 'saved/qatest'
    # chkpt_path = FLAGS.model_dir+'qa/1528885583'

    print('Loaded SQuAD with ', len(train_data), ' triples')
    train_contexts, train_qs, train_as, train_a_pos = zip(*train_data)
    dev_contexts, dev_qs, dev_as, dev_a_pos = zip(*dev_data)

    # vocab = loader.get_vocab(train_contexts, tf.app.flags.FLAGS.qa_vocab_size)
    with open(chkpt_path + '/vocab.json') as f:
        vocab = json.load(f)

    model = MpcmQa(vocab, training_mode=False)
    with model.graph.as_default():
        saver = tf.train.Saver()

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=mem_limit)
    with tf.Session(graph=model.graph,
                    config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        if not os.path.exists(chkpt_path):
            os.makedirs(chkpt_path)
        summary_writer = tf.summary.FileWriter(
            FLAGS.log_dir + 'qa/' + str(int(time.time())), sess.graph)

        saver.restore(sess, chkpt_path + '/model.checkpoint')

        num_steps = len(dev_data) // FLAGS.batch_size

        f1s = []
        exactmatches = []
        for e in range(1):
            np.random.shuffle(train_data)
            train_contexts, train_qs, train_as, train_a_pos = zip(*train_data)
            for i in tqdm(range(num_steps), desc='Epoch ' + str(e)):
                # TODO: this keeps coming up - refactor it
                batch_contexts = dev_contexts[i * FLAGS.batch_size:(i + 1) *
                                              FLAGS.batch_size]
                batch_questions = dev_qs[i * FLAGS.batch_size:(i + 1) *
                                         FLAGS.batch_size]
                batch_ans_text = dev_as[i * FLAGS.batch_size:(i + 1) *
                                        FLAGS.batch_size]
                batch_answer_charpos = dev_a_pos[i * FLAGS.batch_size:(i + 1) *
                                                 FLAGS.batch_size]

                batch_answers = []
                for j, ctxt in enumerate(batch_contexts):
                    ans_span = char_pos_to_word(
                        ctxt.encode(),
                        [t.encode() for t in tokenise(ctxt, asbytes=False)],
                        batch_answer_charpos[j])
                    ans_span = (
                        ans_span, ans_span +
                        len(tokenise(batch_ans_text[j], asbytes=False)))
                    batch_answers.append(ans_span)

                # print(batch_answers[:3])
                # exit()

                summ, pred = sess.run(
                    [model.eval_summary, model.pred_span],
                    feed_dict={
                        model.context_in:
                        get_padded_batch(batch_contexts, vocab),
                        model.question_in:
                        get_padded_batch(batch_questions, vocab),
                        model.answer_spans_in: batch_answers,
                        model.is_training: False
                    })

                summary_writer.add_summary(summ,
                                           global_step=(e * num_steps + i))

                gold_str = []
                pred_str = []
                for b in range(FLAGS.batch_size):
                    gold_str.append(" ".join(
                        tokenise(batch_contexts[b], asbytes=False)
                        [batch_answers[b][0]:batch_answers[b][1]]))
                    pred_str.append(" ".join(
                        tokenise(batch_contexts[b],
                                 asbytes=False)[pred[b][0]:pred[b][1]]))

                f1s.extend([
                    f1(gold_str[b], pred_str[b])
                    for b in range(FLAGS.batch_size)
                ])
                exactmatches.extend([
                    np.product(pred[b] == batch_answers[b]) * 1.0
                    for b in range(FLAGS.batch_size)
                ])

                if i % FLAGS.eval_freq == 0:
                    out_str = "<h1>" + "Eval - Dev set" + "</h1>"
                    for b in range(FLAGS.batch_size):
                        out_str += batch_contexts[b] + '<br/>'
                        out_str += batch_questions[b] + '<br/>'
                        out_str += str(batch_answers[b]) + str(
                            tokenise(batch_contexts[b], asbytes=False)
                            [batch_answers[b][0]:batch_answers[b][1]]
                        ) + '<br/>'
                        out_str += str(pred[b]) + str(
                            tokenise(batch_contexts[b], asbytes=False)
                            [pred[b][0]:pred[b][1]]) + '<br/>'
                        out_str += batch_ans_text[b] + '<br/>'
                        out_str += pred_str[b] + '<br/>'
                        out_str += "F1: " + str(f1(gold_str[b],
                                                   pred_str[b])) + '<br/>'
                        out_str += "EM: " + str(
                            np.product(pred[b] == batch_answers[b]) * 1.0)
                        out_str += "<hr/>"
                    with open(FLAGS.log_dir + 'out_qa_eval.htm', 'w') as fp:
                        fp.write(out_str)
        print("F1: ", np.mean(f1s))
        print("EM: ", np.mean(exactmatches))
counts = {k: 0 for k in q_words}

word_gold = ["other" for i in range(len(results))]
word_pred = ["other" for i in range(len(results))]

gold_pred_bleu = []
gold_pred_f1 = []
nlls = []
x = []
for i, res in enumerate(results):
    qpred, qgold, ctxt, answer, a_pos = res['q_pred'], res['q_gold'], res[
        'c'], res['a_text'], res['a_pos']
    gold_pred_bleu.append(metrics.bleu(qgold, qpred))
    nlls.append(res['metrics']['nll'])
    # x.append(metrics.f1(ctxt, qpred))
    x.append(len(preprocessing.tokenise(qgold, asbytes=False)))
    gold_pred_f1.append(metrics.f1(qgold, qpred))

    triggered = False
    for q in q_words:
        if q != "other" and q in qpred.lower():
            # scores[q].append(metrics.bleu(qgold, qpred))
            # scores[q].append(res['metrics']['qa'])
            # counts[q] += 1
            word_pred[i] = q

        if q != "other" and q in qgold.lower():
            counts[q] += 1
            word_gold[i] = q
            triggered = True
    if not triggered:
Exemplo n.º 9
0
# glove_short = list(loader.get_glove_vocab('./data/', size=2000, d=200).keys())[4:]

squad_vocab =set()
squad_count = Counter()

start = time()
max_context_len=0
max_pos = None
debugstr = ""

c_lens=[]
q_lens=[]
for i,triple in enumerate(squad):
    # filtered,new_pos = preprocessing.filter_context(triple[0], triple[3], 1, 100)

    c_toks=  preprocessing.tokenise(triple[0], asbytes=False)
    q_toks=  preprocessing.tokenise(triple[1], asbytes=False)
    # context_set = sorted(set(c_toks))
    # context_set = c_toks
    # if len(context_set) > max_context_len:
    #     max_context_len = len(context_set)
    #     # max_pos = new_pos
    #     debugstr = triple[1]
    #     ix=i
    squad_count.update(c_toks)
    squad_count.update(q_toks)
    # squad_vocab |= set(c_toks)

    c_lens.append(len(c_toks))
    q_lens.append(len(q_toks))
end = time()
def main(_):
    model = FileLoaderModel('./models/BASELINE')
    squad = loader.load_squad_triples(FLAGS.data_path, True, as_dict=True)

    disc_path = FLAGS.model_dir + 'saved/discriminator-trained-latent'

    glove_embeddings = loader.load_glove(FLAGS.data_path)

    if FLAGS.eval_metrics:
        lm = LstmLmInstance()
        # qa = MpcmQaInstance()
        qa = QANetInstance()

        lm.load_from_chkpt(FLAGS.model_dir + 'saved/lmtest')
        # qa.load_from_chkpt(FLAGS.model_dir+'saved/qatest')
        qa.load_from_chkpt(FLAGS.model_dir + 'saved/qanet')

        discriminator = DiscriminatorInstance(trainable=False, path=disc_path)

    f1s = []
    bleus = []
    qa_scores = []
    qa_scores_gold = []
    lm_scores = []
    nlls = []
    disc_scores = []
    sowe_similarities = []

    qgolds = []
    qpreds = []
    ctxts = []
    answers = []
    ans_positions = []

    metric_individuals = []
    res = []

    missing = 0

    for id, el in tqdm(squad.items()):

        unfilt_ctxt_batch = [el[0]]
        a_text_batch = [el[2]]
        a_pos_batch = [el[3]]

        ctxts.extend(unfilt_ctxt_batch)
        answers.extend(a_text_batch)
        ans_positions.extend(a_pos_batch)

        pred_str = model.get_q(id)

        if pred_str is None:
            missing += 1
            continue
        gold_str = el[1]

        if FLAGS.eval_metrics:
            qa_pred = qa.get_ans(unfilt_ctxt_batch, [pred_str])
            gold_qa_pred = qa.get_ans(unfilt_ctxt_batch, [gold_str])

            qa_score = metrics.f1(el[2].lower(), qa_pred[0].lower())
            qa_score_gold = metrics.f1(el[2].lower(), gold_qa_pred[0].lower())
            lm_score = lm.get_seq_perplexity([pred_str]).tolist()
            disc_score = discriminator.get_pred(unfilt_ctxt_batch, [pred_str],
                                                a_text_batch,
                                                a_pos_batch).tolist()[0]

        f1s.append(metrics.f1(gold_str, pred_str))
        bleus.append(metrics.bleu(gold_str, pred_str))
        qgolds.append(gold_str)
        qpreds.append(pred_str)

        # calc cosine similarity between sums of word embeddings
        pred_sowe = np.sum(np.asarray([
            glove_embeddings[w] if w in glove_embeddings.keys() else np.zeros(
                (FLAGS.embedding_size, ))
            for w in preprocessing.tokenise(pred_str, asbytes=False)
        ]),
                           axis=0)
        gold_sowe = np.sum(np.asarray([
            glove_embeddings[w] if w in glove_embeddings.keys() else np.zeros(
                (FLAGS.embedding_size, ))
            for w in preprocessing.tokenise(gold_str, asbytes=False)
        ]),
                           axis=0)
        this_similarity = np.inner(pred_sowe, gold_sowe) / np.linalg.norm(
            pred_sowe, ord=2) / np.linalg.norm(gold_sowe, ord=2)

        sowe_similarities.append(this_similarity)

        this_metric_dict = {
            'f1': f1s[-1],
            'bleu': bleus[-1],
            'nll': 0,
            'sowe': sowe_similarities[-1]
        }
        if FLAGS.eval_metrics:
            this_metric_dict = {
                **this_metric_dict, 'qa': qa_score,
                'lm': lm_score,
                'disc': disc_score
            }
            qa_scores.append(qa_score)
            lm_scores.append(lm_score)
            disc_scores.append(disc_score)
        metric_individuals.append(this_metric_dict)

        res.append({
            'c': el[0],
            'q_pred': pred_str,
            'q_gold': gold_str,
            'a_pos': el[3],
            'a_text': el[2],
            'metrics': this_metric_dict
        })

    metric_dict = {
        'f1': np.mean(f1s),
        'bleu': np.mean(bleus),
        'nll': 0,
        'sowe': np.mean(sowe_similarities)
    }
    if FLAGS.eval_metrics:
        metric_dict = {
            **metric_dict, 'qa': np.mean(qa_scores),
            'lm': np.mean(lm_scores),
            'disc': np.mean(disc_scores)
        }
    # print(res)
    with open(FLAGS.log_dir + 'out_eval_BASELINE' +
              ("_train" if not FLAGS.eval_on_dev else "") + '.json',
              'w',
              encoding='utf-8') as fp:
        json.dump({"metrics": metric_dict, "results": res}, fp)

    print("F1: ", np.mean(f1s))
    print("BLEU: ", np.mean(bleus))
    print("NLL: ", 0)
    print("SOWE: ", np.mean(sowe_similarities))
    if FLAGS.eval_metrics:
        print("QA: ", np.mean(qa_scores))
        print("LM: ", np.mean(lm_scores))
        print("Disc: ", np.mean(disc_scores))

    print(missing, " ids were missing")
Exemplo n.º 11
0
    print("LM loaded")
    qa.load_from_chkpt(FLAGS.model_dir + 'saved/qatest')
    print("QA loaded")

    lm_vocab = lm.vocab
    qa_vocab = qa.vocab

    f1s = []
    bleus = []
    qa_scores = []
    lm_scores = []

    for i in tqdm(range(len(train_data))):
        triple = train_data[i]
        ctxt, q, ans, ans_pos = triple
        ctxt_toks = preprocessing.tokenise(ctxt, asbytes=False)

        # print(triple[0])
        gen_q = model.get_q(triple[0], triple[2], triple[3])
        gen_q_toks = preprocessing.tokenise(gen_q, asbytes=False)

        f1s.append(metrics.f1(triple[1], gen_q))
        bleus.append(metrics.bleu(triple[1], gen_q))

        qhat_for_lm = preprocessing.lookup_vocab(gen_q_toks,
                                                 lm_vocab,
                                                 do_tokenise=False,
                                                 asbytes=False)
        ctxt_for_lm = preprocessing.lookup_vocab(ctxt_toks,
                                                 lm_vocab,
                                                 do_tokenise=False,
Exemplo n.º 12
0
def main(_):
    run_id = str(int(time.time()))
    chkpt_path = FLAGS.model_dir + 'lm/' + run_id

    if not os.path.exists(chkpt_path):
        os.makedirs(chkpt_path)

    train_data = loader.load_squad_triples(FLAGS.data_path, False)
    dev_data = loader.load_squad_triples(FLAGS.data_path, True)

    np.random.shuffle(train_data)

    print('Loaded SQuAD with ', len(train_data), ' triples')
    train_contexts, train_qs, train_as, train_a_pos = zip(*train_data)
    _, dev_qs, _, _ = zip(*dev_data)
    vocab = loader.get_vocab(train_qs, tf.app.flags.FLAGS.lm_vocab_size)

    with open(chkpt_path + '/vocab.json', 'w') as outfile:
        json.dump(vocab, outfile)

    unique_sents = list(set(train_qs))
    print(len(unique_sents), " unique sentences")

    # Create model

    model = LstmLm(vocab, num_units=FLAGS.lm_units)
    with model.graph.as_default():
        saver = tf.train.Saver()

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=mem_limit)
    with tf.Session(graph=model.graph,
                    config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        if not os.path.exists(chkpt_path):
            os.makedirs(chkpt_path)
        summary_writer = tf.summary.FileWriter(
            FLAGS.log_directory + 'lm/' + run_id, sess.graph)

        if FLAGS.restore:
            # saver.restore(sess, chkpt_path+ '/model.checkpoint')
            print('Loading not implemented yet')
        else:
            print("Building graph, loading glove")
            sess.run(tf.global_variables_initializer())

        num_steps = len(unique_sents) // FLAGS.batch_size

        best_perp = 1e6

        for e in range(FLAGS.lm_num_epochs):
            np.random.shuffle(unique_sents)
            for i in tqdm(range(num_steps), desc='Epoch ' + str(e)):
                seq_batch = unique_sents[i * FLAGS.batch_size:(i + 1) *
                                         FLAGS.batch_size]

                seq_batch_ids = [[vocab[loader.SOS]] + [
                    vocab[tok if tok in vocab.keys() else loader.OOV]
                    for tok in tokenise(sent, asbytes=False)
                ] + [vocab[loader.EOS]] for sent in seq_batch]
                max_seq_len = max([len(seq) for seq in seq_batch_ids])
                padded_batch = np.asarray([
                    seq +
                    [vocab[loader.PAD] for i in range(max_seq_len - len(seq))]
                    for seq in seq_batch_ids
                ])

                summ, _, pred, gold, seq = sess.run(
                    [
                        model.train_summary, model.optimise, model.preds,
                        model.tgt_output, model.input_seqs
                    ],
                    feed_dict={model.input_seqs: padded_batch})
                summary_writer.add_summary(summ,
                                           global_step=(e * num_steps + i))

                # print(pred, gold, seq)
                # exit()

                # if i%FLAGS.eval_freq==0:
                #     saver.save(sess, chkpt_path+'/model.checkpoint')
                # print(pred, gold, seq)

            perps = []
            num_steps_dev = len(dev_qs) // FLAGS.batch_size
            for i in tqdm(range(num_steps_dev), desc="Eval"):
                seq_batch = dev_qs[i * FLAGS.batch_size:(i + 1) *
                                   FLAGS.batch_size]
                seq_batch_ids = [[vocab[loader.SOS]] + [
                    vocab[tok if tok in vocab.keys() else loader.OOV]
                    for tok in tokenise(sent, asbytes=False)
                ] + [vocab[loader.EOS]] for sent in seq_batch]
                max_seq_len = max([len(seq) for seq in seq_batch_ids])
                padded_batch = np.asarray([
                    seq +
                    [vocab[loader.PAD] for i in range(max_seq_len - len(seq))]
                    for seq in seq_batch_ids
                ])

                perp = sess.run(model.perplexity,
                                feed_dict={model.input_seqs: padded_batch})
                perps.extend(perp)

            perpsummary = tf.Summary(value=[
                tf.Summary.Value(tag="dev_perf/perplexity",
                                 simple_value=sum(perps) / len(perps))
            ])

            summary_writer.add_summary(perpsummary,
                                       global_step=((e + 1) * num_steps))

            if np.mean(perps) < best_perp:
                print(np.mean(perps), " Saving!")
                saver.save(sess, chkpt_path + '/model.checkpoint')
                best_perp = np.mean(perps)
Exemplo n.º 13
0
# QA:  0.06118585070207543
# LM:  534.2029482313792

# Filtered candidates by LM
# F1:  0.1523949117692406
# BLEU:  0.0015981573389975351
# QA:  0.060554504435173614
# LM:  480.1121001809438

for i in tqdm(range(len(train_data))):
    triple=train_data[i]

    ctxt,q,ans,ans_pos = triple

    ctxt_filt, ans_pos = preprocessing.filter_context(ctxt, ans_pos, 0, 30)
    ctxt_toks = preprocessing.tokenise(ctxt, asbytes=False)


    response = requests.post(url, data=ctxt_filt.encode('utf-8'))
    if response.status_code != 200:
        exit("There was a problem connecting to the CoreNLP server!")

    res = response.json()
    # print(ctxt_filt)
    candidates=[]

    # Run NER to get question word
    for ent in res['sentences'][0]['entitymentions']:
        if ent['text'].find(ans):
            ner = ent['ner']
        else:
Exemplo n.º 14
0
def main(_):
    if FLAGS.testing:
        print('TEST MODE - reducing model size')
        FLAGS.qa_encoder_units =32
        FLAGS.qa_match_units=32
        FLAGS.qa_batch_size =16
        FLAGS.embedding_size=50

    run_id = str(int(time.time()))

    chkpt_path = FLAGS.model_dir+'qa/'+run_id
    restore_path=FLAGS.model_dir+'qa/1529056867'

    if not os.path.exists(chkpt_path):
        os.makedirs(chkpt_path)

    train_data = loader.load_squad_triples(FLAGS.data_path, False)
    dev_data = loader.load_squad_triples(FLAGS.data_path, dev=True, ans_list=True)

    train_data = filter_squad(train_data, window_size=FLAGS.filter_window_size, max_tokens=FLAGS.filter_max_tokens)
    # dev_data = filter_squad(dev_data, window_size=FLAGS.filter_window_size, max_tokens=FLAGS.filter_max_tokens)

    if FLAGS.testing:
        train_data=train_data[:1000]
        num_dev_samples=100
    else:
        num_dev_samples=3000

    print('Loaded SQuAD with ',len(train_data),' triples')
    train_contexts, train_qs, train_as,train_a_pos = zip(*train_data)
    dev_contexts, dev_qs, dev_as,dev_a_pos = zip(*dev_data)

    if FLAGS.restore:
        with open(restore_path+'/vocab.json') as f:
            vocab = json.load(f)
    else:
        vocab = loader.get_vocab(train_contexts+train_qs, tf.app.flags.FLAGS.qa_vocab_size)
        with open(chkpt_path+'/vocab.json', 'w') as outfile:
            json.dump(vocab, outfile)



    model = MpcmQa(vocab)
    with model.graph.as_default():
        saver = tf.train.Saver()



    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=mem_limit, allow_growth = True)
    with tf.Session(graph=model.graph, config=tf.ConfigProto(gpu_options=gpu_options)) as sess:

        summary_writer = tf.summary.FileWriter(FLAGS.log_directory+'qa/'+run_id, sess.graph)

        if FLAGS.restore:
            saver.restore(sess, restore_path+ '/model.checkpoint')
            start_e=40#FLAGS.qa_num_epochs
            print('Loaded model')
        else:
            print("Building graph, loading glove")
            start_e=0
            sess.run(tf.global_variables_initializer())

        num_steps_train = len(train_data)//FLAGS.qa_batch_size
        num_steps_dev = num_dev_samples//FLAGS.qa_batch_size

        f1summary = tf.Summary(value=[tf.Summary.Value(tag="dev_perf/f1",
                                         simple_value=0.0)])
        emsummary = tf.Summary(value=[tf.Summary.Value(tag="dev_perf/em",
                                  simple_value=0.0)])

        summary_writer.add_summary(f1summary, global_step=start_e*num_steps_train)
        summary_writer.add_summary(emsummary, global_step=start_e*num_steps_train)

        best_oos_nll=1e6

        for e in range(start_e,start_e+FLAGS.qa_num_epochs):
            np.random.shuffle(train_data)
            train_contexts, train_qs, train_as,train_a_pos = zip(*train_data)

            for i in tqdm(range(num_steps_train), desc='Epoch '+str(e)):
                # TODO: this keeps coming up - refactor it
                batch_contexts = train_contexts[i*FLAGS.qa_batch_size:(i+1)*FLAGS.qa_batch_size]
                batch_questions = train_qs[i*FLAGS.qa_batch_size:(i+1)*FLAGS.qa_batch_size]
                batch_ans_text = train_as[i*FLAGS.qa_batch_size:(i+1)*FLAGS.qa_batch_size]
                batch_answer_charpos = train_a_pos[i*FLAGS.qa_batch_size:(i+1)*FLAGS.qa_batch_size]

                batch_answers=[]
                for j, ctxt in enumerate(batch_contexts):
                    ans_span=char_pos_to_word(ctxt.encode(), [t.encode() for t in tokenise(ctxt, asbytes=False)], batch_answer_charpos[j])
                    ans_span=(ans_span, ans_span+len(tokenise(batch_ans_text[j],asbytes=False))-1)
                    batch_answers.append(ans_span)

                # print(batch_answers[:3])
                # exit()
                # run_metadata = tf.RunMetadata()
                # run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
                _,summ, pred = sess.run([model.optimizer, model.train_summary, model.pred_span],
                        feed_dict={model.context_in: get_padded_batch(batch_contexts,vocab),
                                model.question_in: get_padded_batch(batch_questions,vocab),
                                model.answer_spans_in: batch_answers,
                                model.is_training: True})
                                # ,run_metadata=run_metadata, options=run_options)

                summary_writer.add_summary(summ, global_step=(e*num_steps_train+i))
                # summary_writer.add_run_metadata(run_metadata, tag="step "+str(i), global_step=(e*num_steps_train+i))

                if i%FLAGS.eval_freq==0:
                    gold_str=[]
                    pred_str=[]
                    f1s = []
                    exactmatches= []
                    for b in range(FLAGS.qa_batch_size):
                        gold_str.append(" ".join(tokenise(batch_contexts[b],asbytes=False)[batch_answers[b][0]:batch_answers[b][1]+1]))
                        pred_str.append( " ".join(tokenise(batch_contexts[b],asbytes=False)[pred[b][0]:pred[b][1]+1]) )

                    f1s.extend([f1(gold_str[b], pred_str[b]) for b in range(FLAGS.qa_batch_size)])
                    exactmatches.extend([ np.product(pred[b] == batch_answers[b])*1.0 for b in range(FLAGS.qa_batch_size) ])

                    f1summary = tf.Summary(value=[tf.Summary.Value(tag="train_perf/f1",
                                                     simple_value=sum(f1s)/len(f1s))])
                    emsummary = tf.Summary(value=[tf.Summary.Value(tag="train_perf/em",
                                              simple_value=sum(exactmatches)/len(exactmatches))])

                    summary_writer.add_summary(f1summary, global_step=(e*num_steps_train+i))
                    summary_writer.add_summary(emsummary, global_step=(e*num_steps_train+i))


                    # saver.save(sess, chkpt_path+'/model.checkpoint')


            f1s=[]
            exactmatches=[]
            nlls=[]

            np.random.shuffle(dev_data)
            dev_subset = dev_data[:num_dev_samples]
            for i in tqdm(range(num_steps_dev), desc='Eval '+str(e)):
                dev_contexts,dev_qs,dev_as,dev_a_pos = zip(*dev_subset)
                batch_contexts = dev_contexts[i*FLAGS.qa_batch_size:(i+1)*FLAGS.qa_batch_size]
                batch_questions = dev_qs[i*FLAGS.qa_batch_size:(i+1)*FLAGS.qa_batch_size]
                batch_ans_text = dev_as[i*FLAGS.qa_batch_size:(i+1)*FLAGS.qa_batch_size]
                batch_answer_charpos = dev_a_pos[i*FLAGS.qa_batch_size:(i+1)*FLAGS.qa_batch_size]

                batch_answers=[]
                for j, ctxt in enumerate(batch_contexts):
                    ans_span=char_pos_to_word(ctxt.encode(), [t.encode() for t in tokenise(ctxt, asbytes=False)], batch_answer_charpos[j][0])
                    ans_span=(ans_span, ans_span+len(tokenise(batch_ans_text[j][0],asbytes=False))-1)
                    batch_answers.append(ans_span)


                pred,nll = sess.run([model.pred_span, model.nll],
                        feed_dict={model.context_in: get_padded_batch(batch_contexts,vocab),
                                model.question_in: get_padded_batch(batch_questions,vocab),
                                model.answer_spans_in: batch_answers,
                                model.is_training: False})
                gold_str=[]
                pred_str=[]

                for b in range(FLAGS.qa_batch_size):
                    pred_str = " ".join(tokenise(batch_contexts[b],asbytes=False)[pred[b][0]:pred[b][1]+1])
                    this_f1=[]
                    this_em=[]
                    for a in range(len(batch_ans_text[b])):
                        this_f1.append(f1(normalize_answer(batch_ans_text[b][a]), normalize_answer(pred_str)))
                        this_em.append(1.0*(normalize_answer(batch_ans_text[b][a]) == normalize_answer(pred_str)))
                    f1s.append(max(this_f1))
                    exactmatches.append(max(this_em))
                nlls.extend(nll.tolist())
            f1summary = tf.Summary(value=[tf.Summary.Value(tag="dev_perf/f1",
                                             simple_value=sum(f1s)/len(f1s))])
            emsummary = tf.Summary(value=[tf.Summary.Value(tag="dev_perf/em",
                                      simple_value=sum(exactmatches)/len(exactmatches))])
            nllsummary = tf.Summary(value=[tf.Summary.Value(tag="dev_perf/nll",
                                      simple_value=np.mean(nlls))])

            summary_writer.add_summary(f1summary, global_step=((e+1)*num_steps_train))
            summary_writer.add_summary(emsummary, global_step=((e+1)*num_steps_train))
            summary_writer.add_summary(nllsummary, global_step=((e+1)*num_steps_train))

            mean_nll=np.mean(nlls)
            if mean_nll < best_oos_nll:
                print("New best NLL! ", mean_nll, " Saving... F1: ", np.mean(f1s))
                best_oos_nll = mean_nll
                saver.save(sess, chkpt_path+'/model.checkpoint')
            else:
                print("NLL not improved ", mean_nll)
Exemplo n.º 15
0
def bleu(gold, prediction, order=4):
    return compute_bleu([[tokenise(gold, asbytes=False)]],
                        [tokenise(prediction, asbytes=False)],
                        smooth=False,
                        max_order=order)[0]
Exemplo n.º 16
0
def bleu_corpus(golds, preds, order=4):
    return compute_bleu([[tokenise(gold, asbytes=False)] for gold in golds],
                        [tokenise(pred, asbytes=False) for pred in preds],
                        smooth=False,
                        max_order=order)[0]