def evaluate_policy_docs(): opt = make_options() dataset = data.Dataset() feeder = data.Feeder(dataset) model, _ = models.load_or_create_models(opt, False) translator = Translator(model, opt.beam_size, opt.min_length, opt.max_length) docs = data.load_policy_documents() for doc in docs: data.parse_paragraphs(doc) lines = [] for doc in docs: paras = [p for p in doc.paragraphs if 50 <= len(p) <= 400] if not paras: continue lines.append('=================================') lines.append(doc.title) if len(paras) > 16: paras = random.sample(paras, 16) paras = sorted(paras, key=lambda x: -len(x)) pids = [feeder.sent_to_ids(p) for p in paras] pids = data.align2d(pids) src = nu.tensor(pids) lengths = (src != data.NULL_ID).sum(-1) tgt = translator.translate(src.transpose(0, 1), lengths, opt.best_k_questions) questions = [[feeder.ids_to_sent(t) for t in qs] for qs in tgt] for p, qs in zip(paras, questions): lines.append('--------------------------------') lines.append(p) for k, q in enumerate(qs): lines.append('predict {}: {}'.format(k, q)) utils.write_all_lines(opt.output_file, lines)
def create_vocab(filename): char_vocab = defaultdict(lambda: 0) for line in utils.read_all_lines(filename): for word in line.split(' '): for char in word: char_vocab[char] += 1 char_vocab = sorted(char_vocab.items(), key=lambda x: -x[1]) utils.write_all_lines(config.vocab_file, ['{}:{}'.format(w, n) for w, n in char_vocab])
def create_question_vocab(filename): vocab = defaultdict(lambda: 0) for line in utils.read_all_lines(filename): sample = json.loads(line) question = sample['segmented_question'] for word in question: vocab[word] += 1 vocab = sorted(vocab.items(), key=lambda x: -x[1]) utils.write_all_lines(config.question_vocab_file, ['{}:{}'.format(w, c) for w, c in vocab])
def create_answer_vocab(filename): vocab = defaultdict(lambda: 0) for line in utils.read_all_lines(filename): sample = json.loads(line) for doc in sample['documents']: for answer in doc['segmented_paragraphs']: for word in answer: vocab[word] += 1 vocab = sorted(vocab.items(), key=lambda x: -x[1]) utils.write_all_lines(config.answer_vocab_file, ['{}:{}'.format(w, c) for w, c in vocab])
def prepare_dataset_with_question_answers(source, target): lines = [] for line in utils.read_all_lines(source): sample = json.loads(line) question = sample['question'] for answer in sample['answers']: if len(answer) > len(question) * 2 and len(answer) >= 20: lines.append(answer) lines.append(question) lines.append('<P>') utils.write_all_lines(target, lines)
def export(doc): lines = ['url', doc.url, ''] + ['title', doc.title, '' ] + ['content'] + doc.paragraphs + [''] for k, v in doc.classified_paragraphs.items(): lines.append(k) lines += v lines.append('') lines.append('qas') for qa in doc.qas: lines += [ '------------------------', 'q:' + qa.question, 'a:' + qa.answer ] utils.write_all_lines('./generate/{}.txt'.format(doc.title), lines)
def prepare_dataset_with_document(source, target): lines = [] for line in utils.read_all_lines(source): sample = json.loads(line) documents = [doc for doc in sample['documents'] if doc['is_selected']] questions = [doc['title'] for doc in documents] para_indices = [doc['most_related_para'] for doc in documents] answers = [ doc['paragraphs'][k] for doc, k in zip(documents, para_indices) ] for q, a in zip(questions, answers): lines.append(rip_marks(a)) lines.append(rip_marks(q)) lines.append('<P>') utils.write_all_lines(target, lines)
def evaluate_accuracy(model, dataset, batch_size=32, char_limit=16, size=None, output_file='./output/evaluate.txt', profile='dev'): model.eval() feeder = data.TrainFeeder(dataset, batch_size, char_limit) feeder.prepare(profile) size = size or feeder.size feeder.sort(size) lines = [] total_em, total_f1, total = 0, 0, 0 while feeder.cursor < size: ids, cs, qs, chs, qhs, y1s, y2s, ct, qt = feeder.next(batch_size) logits1, logits2 = model(func.tensor(cs), func.tensor(qs), func.tensor(chs), func.tensor(qhs), ct, qt) y1p, y2p = model.calc_span(logits1, logits2) for pids, qids, lable_start, label_end, predict_start, predict_end in zip( cs, qs, y1s, y2s, y1p, y2p): lines.append('--------------------------------') lines.append(feeder.ids_to_sent(pids)) lines.append('question: ' + feeder.ids_to_sent(qids)) lines.append('reference: ' + feeder.ids_to_sent(pids[lable_start:label_end + 1])) lines.append('predict: ' + feeder.ids_to_sent(pids[predict_start:predict_end + 1])) em, f1, bs = evaluate_batch(feeder, ids, y1p.tolist(), y2p.tolist()) total_em += em total_f1 += f1 total += bs print('{}/{}'.format(feeder.cursor, size)) exact_match = total_em / total * 100 f1 = total_f1 / total * 100 message = 'EM: {:>.4F}, F1: {:>.4F}, Total: {}'.format( exact_match, f1, total) lines.append(message) utils.write_all_lines(output_file, lines) print('evauation finished with ' + message) return exact_match, f1
def prepare_dataset_with_document(source, target): lines = [] for line in utils.read_all_lines(source): sample = json.loads(line) documents = sample['documents'] questions = [sample['segmented_question'] ] + [doc['segmented_title'] for doc in documents] question_words = set(questions[0]) - stop_words questions = [' '.join(question) for question in questions] for doc in documents: for passage in doc['segmented_paragraphs']: passage_words = set(passage) - stop_words common = question_words & passage_words passage = rip_marks(' '.join(passage)) if len(common) / len(question_words) > 0.3 and len( passage) > 2 * len(questions[0]): lines.append(passage) lines += list(set(questions)) lines.append('<P>') utils.write_all_lines(target, lines)
def prepare_dataset_with_document(source, target): aqs = [] all = 0 for line in utils.read_all_lines(source): sample = json.loads(line) question = sample['segmented_question'] question_words = set(question) - stop_words for doc in sample['documents']: for answer in doc['segmented_paragraphs']: answer_words = set(answer) - stop_words common = question_words & answer_words if len(common) / len(question_words) > 0.3: a = rip_marks(str.join(' ', answer)) q = rip_marks(str.join(' ', question)) if len(a) > 2 * len(q): aqs.append((a, q)) all += 1 print('{}: {}/{} preprocessed'.format(source, len(aqs), all)) #utils.save_json(target, [{'q': q, 'a': a} for a,q in aqs]) utils.write_all_lines(target, ['{}\n{}\n'.format(q, a) for a, q in aqs]) return aqs
def evaluate_accuracy(model, dataset, batch_size=20, beam_size=5, min_length=5, max_length=20, best_k_questions=3, size=None, output_file=config.evaluate_output_file): model.eval() feeder = data.TrainFeeder(dataset) feeder.prepare('dev', batch_size) translator = Translator(model, beam_size, min_length, max_length) size = size or feeder.size feeder.sort(size) lines = [] correct = 0 total = 0 while feeder.cursor < size: x, _, lengths, pids, qids = data.next(feeder, batch_size) tgt = translator.translate(x, lengths, best_k_questions) passages = [feeder.ids_to_sent(t) for t in pids] questions = [[feeder.ids_to_sent(t) for t in qs] for qs in tgt] gtruths = [feeder.ids_to_sent(t) for t in qids] for p, qs, g in zip(passages, questions, gtruths): lines.append('--------------------------------') lines.append(p) lines.append('reference: ' + g) for k, q in enumerate(qs): lines.append('predict {}: {}'.format(k, q)) correct += len(set(g) & set(qs[0])) total += len(set(qs[0])) print('{}/{}'.format(feeder.cursor, size)) accuracy = correct / total * 100 lines.append('correct: {}/{}, accuracy: {}'.format(correct, total, accuracy)) print('evauation finished with accuracy: {:>.2F}'.format(accuracy)) utils.write_all_lines(output_file, lines) return accuracy
def create_vocab(filename): qv = defaultdict(lambda: 0) av = defaultdict(lambda: 0) qset = set() aset = set() for q, a in data.load_qa(filename): sq = str.join('', q) sa = str.join('', a) if sq not in qset: for word in q: qv[word] += 1 qset.add(sq) if sa not in aset: for word in a: av[word] += 1 aset.add(sa) qv = sorted(qv.items(), key=lambda x: -x[1]) av = sorted(av.items(), key=lambda x: -x[1]) utils.write_all_lines(config.question_vocab_file, ['{}:{}'.format(w, c) for w, c in qv]) utils.write_all_lines(config.answer_vocab_file, ['{}:{}'.format(w, c) for w, c in av]) utils.write_all_lines('./generate/questions.txt', qset) utils.write_all_lines('./generate/answers.txt', aset)
def append_source(span): part = line[span[0]:span[1]] for c in part: source.append(c) def append_target(span, source_span): slen = source_span[1] - source_span[0] tag = line[span[0]:span[1]].upper() global target if slen == 1: target.append('S-' + tag) else: target += ['S-' + tag] + ['M-' + tag] * (slen - 2) + ['E-' + tag] def join(tp): return '$'.join(tp) last_pos = 0 for m in re.finditer(r'<(.*?)>(.*?)</.*?>', line): start, end = m.span(0) process_others(last_pos, start) last_pos = end append_source(m.span(2)) append_target(m.span(1), m.span(2)) process_others(last_pos, len(line)) lines.append(join(source)) lines.append(join(target)) utils.write_all_lines('eval.postprocessed.txt', lines)
lines = list(utils.read_all_lines('./eval.csv'))[100:200] source = [] target = [] for line in lines: line = line.split('$') if len(source) == len(target): source.append(line) else: target.append(line) assert len(source) == len(target) lines = [] for s, t in zip(source, target): assert len(s) == len(t) line = '' for x, y in zip(s, t): if y.startswith('S-'): line += f'<{y[2:]}>{x}</{y[2:]}>' elif y.startswith('B-'): line += f'<{y[2:]}>{x}' elif y.startswith('E-'): line += f'{x}</{y[2:]}>' else: line += x lines.append(line) utils.write_all_lines('./eval.txt', lines)
def __call__(self, message): print(message) self.lines.append(message) utils.write_all_lines(self.output_file, self.lines)