def get_q(self, ctxt, ans, ans_pos): ctxt_filt, ans_pos = preprocessing.filter_context(ctxt, ans_pos, 0, 30) ans_toks = preprocessing.tokenise(ans, asbytes=False) doc = self.nlp(ctxt_filt) ctxt_toks = [str(tok).lower() for tok in doc] # ans_ix = preprocessing.char_pos_to_word(ctxt_filt, ctxt_toks, ans_pos, asbytes=False) if ans_toks[0] not in ctxt_toks: # print(ans_toks[0], ctxt_toks) ans_ix=preprocessing.char_pos_to_word(ctxt_filt, ctxt_toks, ans_pos, asbytes=False) # print(ctxt_toks[ans_ix]) else: ans_ix = ctxt_toks.index(ans_toks[0]) ans_type = Counter([doc[i].ent_type_ for i in range(ans_ix, min(ans_ix+len(ans_toks), len(doc)))]).most_common()[0][0] # print(ans_type) type_distances=[] verb_distances=[] for offset in range(len(ctxt_toks)): # print(doc[offset].ent_type_, doc[offset]) if str(doc[offset]).lower() not in ans_toks: # print(doc[offset], ans_toks) if doc[offset].pos_ == 'NOUN': type_distances.append((max(offset-ans_ix-len(ans_toks)+1, ans_ix-offset), 'THING', doc[offset], offset)) if doc[offset].ent_type_ != '' \ and not (doc[offset].ent_iob_ == 'B' and str(doc[min(offset+1, len(doc)-1)]).lower() in ans_toks) \ and self.type_translate(doc[offset].ent_type_) != 'CARDINAL': type_distances.append((max(offset-ans_ix-len(ans_toks)+1, ans_ix-offset), doc[offset].ent_type_, doc[offset], offset)) if doc[offset].tag_ in ['VBG','VBN']: # print(doc[offset]) verb_distances.append((max(offset-ans_ix-len(ans_toks)+1, ans_ix-offset), doc[offset].tag_, doc[offset], offset)) nearest_verb = sorted(verb_distances, key=lambda x: x[0])[0] if len(verb_distances) >0 else (0,'VBG', 'is',0) if len(type_distances) >0: nearest_entity = sorted(type_distances, key=lambda x: x[0])[0] ix= nearest_entity[3] entity_ixs=[ix] # print(nearest_entity) while ix+1 < len(doc) and doc[ix+1].ent_iob_ == 'I': entity_ixs.append(ix+1) ix+=1 entity_toks = [str(tok) for tok in doc[entity_ixs[0]:entity_ixs[-1]+1]] entity_type=nearest_entity[1] else: entity_toks = ["thing"] entity_type="THING" # print(entity_toks) return self.format_q(self.type_translate(ans_type), self.type_translate(entity_type), entity_toks, nearest_verb[2])
def main(_): from tqdm import tqdm FLAGS = tf.app.flags.FLAGS # questions = ["What colour is the car?","When was the car made?","Where was the date?", "What was the dog called?","Who was the oldest cat?"] # contexts=["The car is green, and was built in 1985. This sentence should make it less likely to return the date, when asked about a cat. The oldest cat was called creme puff and lived for many years!" for i in range(len(questions))] trainable = False squad_train_full = loader.load_squad_triples(path="./data/") squad_dev_full = loader.load_squad_triples(path="./data/", dev=True, ans_list=True) para_limit = FLAGS.test_para_limit ques_limit = FLAGS.test_ques_limit char_limit = FLAGS.char_limit def filter_func(example, is_test=False): return len(example["context_tokens"]) > para_limit or \ len(example["ques_tokens"]) > ques_limit or \ (example["y2s"][0] - example["y1s"][0]) > ans_limit qa = QANetInstance() qa.load_from_chkpt("./models/saved/qanet2/", trainable=trainable) squad_train = [] for x in squad_train_full: c_toks = word_tokenize(x[0]) q_toks = word_tokenize(x[1]) if len(c_toks) < para_limit and len(q_toks) < ques_limit: squad_train.append(x) squad_dev = [] for x in squad_dev_full: c_toks = word_tokenize(x[0]) q_toks = word_tokenize(x[1]) if len(c_toks) < para_limit and len(q_toks) < ques_limit: squad_dev.append(x) num_train_steps = len(squad_train) // FLAGS.batch_size num_eval_steps = len(squad_dev) // FLAGS.batch_size best_f1 = 0 if trainable: run_id = str(int(time.time())) chkpt_path = FLAGS.model_dir + 'qanet/' + run_id if not os.path.exists(chkpt_path): os.makedirs(chkpt_path) summary_writer = tf.summary.FileWriter( FLAGS.log_directory + 'qanet/' + run_id, qa.model.graph) for i in tqdm(range(FLAGS.qa_num_epochs * num_train_steps)): if i % num_train_steps == 0: print('Shuffling training set') np.random.shuffle(squad_train) this_batch = squad_train[i * FLAGS.batch_size:(i + 1) * FLAGS.batch_size] batch_contexts, batch_questions, batch_ans_text, batch_ans_charpos = zip( *this_batch) batch_answers = [] for j, ctxt in enumerate(batch_contexts): ans_span = char_pos_to_word( ctxt.encode(), [t.encode() for t in word_tokenize(ctxt)], batch_ans_charpos[j]) ans_span = (np.eye(FLAGS.test_para_limit)[ans_span], np.eye(FLAGS.test_para_limit) [ans_span + len(word_tokenize(batch_ans_text[j])) - 1]) batch_answers.append(ans_span) this_loss = qa.train_step(batch_contexts, batch_questions, batch_answers) if i % 50 == 0: losssummary = tf.Summary(value=[ tf.Summary.Value(tag="train_loss/loss", simple_value=np.mean(this_loss)) ]) summary_writer.add_summary(losssummary, global_step=i) if i > 0 and i % 1000 == 0: qa_f1s = [] qa_em = [] for j in tqdm(range(num_eval_steps)): this_batch = squad_dev[j * FLAGS.batch_size:(j + 1) * FLAGS.batch_size] spans = qa.get_ans([x[0] for x in this_batch], [x[1] for x in this_batch]) for b in range(len(this_batch)): qa_f1s.append( metrics.f1( metrics.normalize_answer(this_batch[b][2]), metrics.normalize_answer(spans[b]))) qa_em.append( 1.0 * (metrics.normalize_answer(this_batch[b][2]) == metrics.normalize_answer(spans[b]))) f1summary = tf.Summary(value=[ tf.Summary.Value(tag="dev_perf/f1", simple_value=np.mean(qa_f1s)) ]) summary_writer.add_summary(f1summary, global_step=i) if np.mean(qa_f1s) > best_f1: print("New best F1! ", np.mean(qa_f1s), " Saving...") best_f1 = np.mean(qa_f1s) qa.saver.save(qa.sess, chkpt_path + '/model.checkpoint') qa_f1s = [] qa_em = [] for i in tqdm(range(num_eval_steps)): this_batch = squad_dev[i * FLAGS.batch_size:(i + 1) * FLAGS.batch_size] spans = qa.get_ans([x[0] for x in this_batch], [x[1] for x in this_batch]) for b in range(len(this_batch)): this_f1s = [] this_em = [] for a in range(len(this_batch[b][2])): this_f1s.append( metrics.f1(metrics.normalize_answer(this_batch[b][2][a]), metrics.normalize_answer(spans[b]))) this_em.append(1.0 * (metrics.normalize_answer(this_batch[b][2][a]) == metrics.normalize_answer(spans[b]))) qa_em.append(max(this_em)) qa_f1s.append(max(this_f1s)) if i == 0: print(qa_f1s, qa_em) print(this_batch[0]) print(spans[0]) print('EM: ', np.mean(qa_em)) print('F1: ', np.mean(qa_f1s))
def main(_): train_data = loader.load_squad_triples(FLAGS.data_path, False) dev_data = loader.load_squad_triples(FLAGS.data_path, True)[:500] chkpt_path = FLAGS.model_dir + 'saved/qatest' # chkpt_path = FLAGS.model_dir+'qa/1528885583' print('Loaded SQuAD with ', len(train_data), ' triples') train_contexts, train_qs, train_as, train_a_pos = zip(*train_data) dev_contexts, dev_qs, dev_as, dev_a_pos = zip(*dev_data) # vocab = loader.get_vocab(train_contexts, tf.app.flags.FLAGS.qa_vocab_size) with open(chkpt_path + '/vocab.json') as f: vocab = json.load(f) model = MpcmQa(vocab, training_mode=False) with model.graph.as_default(): saver = tf.train.Saver() gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=mem_limit) with tf.Session(graph=model.graph, config=tf.ConfigProto(gpu_options=gpu_options)) as sess: if not os.path.exists(chkpt_path): os.makedirs(chkpt_path) summary_writer = tf.summary.FileWriter( FLAGS.log_dir + 'qa/' + str(int(time.time())), sess.graph) saver.restore(sess, chkpt_path + '/model.checkpoint') num_steps = len(dev_data) // FLAGS.batch_size f1s = [] exactmatches = [] for e in range(1): np.random.shuffle(train_data) train_contexts, train_qs, train_as, train_a_pos = zip(*train_data) for i in tqdm(range(num_steps), desc='Epoch ' + str(e)): # TODO: this keeps coming up - refactor it batch_contexts = dev_contexts[i * FLAGS.batch_size:(i + 1) * FLAGS.batch_size] batch_questions = dev_qs[i * FLAGS.batch_size:(i + 1) * FLAGS.batch_size] batch_ans_text = dev_as[i * FLAGS.batch_size:(i + 1) * FLAGS.batch_size] batch_answer_charpos = dev_a_pos[i * FLAGS.batch_size:(i + 1) * FLAGS.batch_size] batch_answers = [] for j, ctxt in enumerate(batch_contexts): ans_span = char_pos_to_word( ctxt.encode(), [t.encode() for t in tokenise(ctxt, asbytes=False)], batch_answer_charpos[j]) ans_span = ( ans_span, ans_span + len(tokenise(batch_ans_text[j], asbytes=False))) batch_answers.append(ans_span) # print(batch_answers[:3]) # exit() summ, pred = sess.run( [model.eval_summary, model.pred_span], feed_dict={ model.context_in: get_padded_batch(batch_contexts, vocab), model.question_in: get_padded_batch(batch_questions, vocab), model.answer_spans_in: batch_answers, model.is_training: False }) summary_writer.add_summary(summ, global_step=(e * num_steps + i)) gold_str = [] pred_str = [] for b in range(FLAGS.batch_size): gold_str.append(" ".join( tokenise(batch_contexts[b], asbytes=False) [batch_answers[b][0]:batch_answers[b][1]])) pred_str.append(" ".join( tokenise(batch_contexts[b], asbytes=False)[pred[b][0]:pred[b][1]])) f1s.extend([ f1(gold_str[b], pred_str[b]) for b in range(FLAGS.batch_size) ]) exactmatches.extend([ np.product(pred[b] == batch_answers[b]) * 1.0 for b in range(FLAGS.batch_size) ]) if i % FLAGS.eval_freq == 0: out_str = "<h1>" + "Eval - Dev set" + "</h1>" for b in range(FLAGS.batch_size): out_str += batch_contexts[b] + '<br/>' out_str += batch_questions[b] + '<br/>' out_str += str(batch_answers[b]) + str( tokenise(batch_contexts[b], asbytes=False) [batch_answers[b][0]:batch_answers[b][1]] ) + '<br/>' out_str += str(pred[b]) + str( tokenise(batch_contexts[b], asbytes=False) [pred[b][0]:pred[b][1]]) + '<br/>' out_str += batch_ans_text[b] + '<br/>' out_str += pred_str[b] + '<br/>' out_str += "F1: " + str(f1(gold_str[b], pred_str[b])) + '<br/>' out_str += "EM: " + str( np.product(pred[b] == batch_answers[b]) * 1.0) out_str += "<hr/>" with open(FLAGS.log_dir + 'out_qa_eval.htm', 'w') as fp: fp.write(out_str) print("F1: ", np.mean(f1s)) print("EM: ", np.mean(exactmatches))
def main(_): if FLAGS.testing: print('TEST MODE - reducing model size') FLAGS.qa_encoder_units =32 FLAGS.qa_match_units=32 FLAGS.qa_batch_size =16 FLAGS.embedding_size=50 run_id = str(int(time.time())) chkpt_path = FLAGS.model_dir+'qa/'+run_id restore_path=FLAGS.model_dir+'qa/1529056867' if not os.path.exists(chkpt_path): os.makedirs(chkpt_path) train_data = loader.load_squad_triples(FLAGS.data_path, False) dev_data = loader.load_squad_triples(FLAGS.data_path, dev=True, ans_list=True) train_data = filter_squad(train_data, window_size=FLAGS.filter_window_size, max_tokens=FLAGS.filter_max_tokens) # dev_data = filter_squad(dev_data, window_size=FLAGS.filter_window_size, max_tokens=FLAGS.filter_max_tokens) if FLAGS.testing: train_data=train_data[:1000] num_dev_samples=100 else: num_dev_samples=3000 print('Loaded SQuAD with ',len(train_data),' triples') train_contexts, train_qs, train_as,train_a_pos = zip(*train_data) dev_contexts, dev_qs, dev_as,dev_a_pos = zip(*dev_data) if FLAGS.restore: with open(restore_path+'/vocab.json') as f: vocab = json.load(f) else: vocab = loader.get_vocab(train_contexts+train_qs, tf.app.flags.FLAGS.qa_vocab_size) with open(chkpt_path+'/vocab.json', 'w') as outfile: json.dump(vocab, outfile) model = MpcmQa(vocab) with model.graph.as_default(): saver = tf.train.Saver() gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=mem_limit, allow_growth = True) with tf.Session(graph=model.graph, config=tf.ConfigProto(gpu_options=gpu_options)) as sess: summary_writer = tf.summary.FileWriter(FLAGS.log_directory+'qa/'+run_id, sess.graph) if FLAGS.restore: saver.restore(sess, restore_path+ '/model.checkpoint') start_e=40#FLAGS.qa_num_epochs print('Loaded model') else: print("Building graph, loading glove") start_e=0 sess.run(tf.global_variables_initializer()) num_steps_train = len(train_data)//FLAGS.qa_batch_size num_steps_dev = num_dev_samples//FLAGS.qa_batch_size f1summary = tf.Summary(value=[tf.Summary.Value(tag="dev_perf/f1", simple_value=0.0)]) emsummary = tf.Summary(value=[tf.Summary.Value(tag="dev_perf/em", simple_value=0.0)]) summary_writer.add_summary(f1summary, global_step=start_e*num_steps_train) summary_writer.add_summary(emsummary, global_step=start_e*num_steps_train) best_oos_nll=1e6 for e in range(start_e,start_e+FLAGS.qa_num_epochs): np.random.shuffle(train_data) train_contexts, train_qs, train_as,train_a_pos = zip(*train_data) for i in tqdm(range(num_steps_train), desc='Epoch '+str(e)): # TODO: this keeps coming up - refactor it batch_contexts = train_contexts[i*FLAGS.qa_batch_size:(i+1)*FLAGS.qa_batch_size] batch_questions = train_qs[i*FLAGS.qa_batch_size:(i+1)*FLAGS.qa_batch_size] batch_ans_text = train_as[i*FLAGS.qa_batch_size:(i+1)*FLAGS.qa_batch_size] batch_answer_charpos = train_a_pos[i*FLAGS.qa_batch_size:(i+1)*FLAGS.qa_batch_size] batch_answers=[] for j, ctxt in enumerate(batch_contexts): ans_span=char_pos_to_word(ctxt.encode(), [t.encode() for t in tokenise(ctxt, asbytes=False)], batch_answer_charpos[j]) ans_span=(ans_span, ans_span+len(tokenise(batch_ans_text[j],asbytes=False))-1) batch_answers.append(ans_span) # print(batch_answers[:3]) # exit() # run_metadata = tf.RunMetadata() # run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) _,summ, pred = sess.run([model.optimizer, model.train_summary, model.pred_span], feed_dict={model.context_in: get_padded_batch(batch_contexts,vocab), model.question_in: get_padded_batch(batch_questions,vocab), model.answer_spans_in: batch_answers, model.is_training: True}) # ,run_metadata=run_metadata, options=run_options) summary_writer.add_summary(summ, global_step=(e*num_steps_train+i)) # summary_writer.add_run_metadata(run_metadata, tag="step "+str(i), global_step=(e*num_steps_train+i)) if i%FLAGS.eval_freq==0: gold_str=[] pred_str=[] f1s = [] exactmatches= [] for b in range(FLAGS.qa_batch_size): gold_str.append(" ".join(tokenise(batch_contexts[b],asbytes=False)[batch_answers[b][0]:batch_answers[b][1]+1])) pred_str.append( " ".join(tokenise(batch_contexts[b],asbytes=False)[pred[b][0]:pred[b][1]+1]) ) f1s.extend([f1(gold_str[b], pred_str[b]) for b in range(FLAGS.qa_batch_size)]) exactmatches.extend([ np.product(pred[b] == batch_answers[b])*1.0 for b in range(FLAGS.qa_batch_size) ]) f1summary = tf.Summary(value=[tf.Summary.Value(tag="train_perf/f1", simple_value=sum(f1s)/len(f1s))]) emsummary = tf.Summary(value=[tf.Summary.Value(tag="train_perf/em", simple_value=sum(exactmatches)/len(exactmatches))]) summary_writer.add_summary(f1summary, global_step=(e*num_steps_train+i)) summary_writer.add_summary(emsummary, global_step=(e*num_steps_train+i)) # saver.save(sess, chkpt_path+'/model.checkpoint') f1s=[] exactmatches=[] nlls=[] np.random.shuffle(dev_data) dev_subset = dev_data[:num_dev_samples] for i in tqdm(range(num_steps_dev), desc='Eval '+str(e)): dev_contexts,dev_qs,dev_as,dev_a_pos = zip(*dev_subset) batch_contexts = dev_contexts[i*FLAGS.qa_batch_size:(i+1)*FLAGS.qa_batch_size] batch_questions = dev_qs[i*FLAGS.qa_batch_size:(i+1)*FLAGS.qa_batch_size] batch_ans_text = dev_as[i*FLAGS.qa_batch_size:(i+1)*FLAGS.qa_batch_size] batch_answer_charpos = dev_a_pos[i*FLAGS.qa_batch_size:(i+1)*FLAGS.qa_batch_size] batch_answers=[] for j, ctxt in enumerate(batch_contexts): ans_span=char_pos_to_word(ctxt.encode(), [t.encode() for t in tokenise(ctxt, asbytes=False)], batch_answer_charpos[j][0]) ans_span=(ans_span, ans_span+len(tokenise(batch_ans_text[j][0],asbytes=False))-1) batch_answers.append(ans_span) pred,nll = sess.run([model.pred_span, model.nll], feed_dict={model.context_in: get_padded_batch(batch_contexts,vocab), model.question_in: get_padded_batch(batch_questions,vocab), model.answer_spans_in: batch_answers, model.is_training: False}) gold_str=[] pred_str=[] for b in range(FLAGS.qa_batch_size): pred_str = " ".join(tokenise(batch_contexts[b],asbytes=False)[pred[b][0]:pred[b][1]+1]) this_f1=[] this_em=[] for a in range(len(batch_ans_text[b])): this_f1.append(f1(normalize_answer(batch_ans_text[b][a]), normalize_answer(pred_str))) this_em.append(1.0*(normalize_answer(batch_ans_text[b][a]) == normalize_answer(pred_str))) f1s.append(max(this_f1)) exactmatches.append(max(this_em)) nlls.extend(nll.tolist()) f1summary = tf.Summary(value=[tf.Summary.Value(tag="dev_perf/f1", simple_value=sum(f1s)/len(f1s))]) emsummary = tf.Summary(value=[tf.Summary.Value(tag="dev_perf/em", simple_value=sum(exactmatches)/len(exactmatches))]) nllsummary = tf.Summary(value=[tf.Summary.Value(tag="dev_perf/nll", simple_value=np.mean(nlls))]) summary_writer.add_summary(f1summary, global_step=((e+1)*num_steps_train)) summary_writer.add_summary(emsummary, global_step=((e+1)*num_steps_train)) summary_writer.add_summary(nllsummary, global_step=((e+1)*num_steps_train)) mean_nll=np.mean(nlls) if mean_nll < best_oos_nll: print("New best NLL! ", mean_nll, " Saving... F1: ", np.mean(f1s)) best_oos_nll = mean_nll saver.save(sess, chkpt_path+'/model.checkpoint') else: print("NLL not improved ", mean_nll)