def main(args): dec_dir = join(args.decode_dir, 'output') with open(join(args.decode_dir, 'log.json')) as f: split = json.loads(f.read())['split'] ref_dir = join(args.data_dir, 'refs', split) assert exists(ref_dir) if args.rouge: dec_pattern = r'(\d+).dec' ref_pattern = '#ID#.ref' output = eval_rouge(dec_pattern, dec_dir, ref_pattern, ref_dir, rouge_path=args.rouge_path) metric = 'rouge' else: dec_pattern = '[0-9]+.dec' ref_pattern = '[0-9]+.ref' output = eval_meteor(dec_pattern, dec_dir, ref_pattern, ref_dir, meteor_path=args.meteor_path) metric = 'meteor' print(output) with open(join(args.decode_dir, '{}.txt'.format(metric)), 'w') as f: f.write(output)
def main(dec_dir, ref_dir): dec_pattern = r'test-(\d+).txt' ref_pattern = 'test-#ID#.txt' output = eval_rouge(dec_pattern, dec_dir, ref_pattern, ref_dir) metric = 'rouge' print(output) with open(join(dec_dir, '{}.txt'.format(metric)), 'w') as f: f.write(output)
def upper_bound(): for split in ['val', 'test']: print(split) dec_rerank = '/home/yhj/emnlp/baseline/upperbound_%s/output_rerank' % split dec_order = '/home/yhj/emnlp/baseline/upperbound_%s/output_order' % split path = os.path.join('/home/yhj/dataset/emnlp/', split) if not os.path.exists(dec_rerank): os.makedirs(dec_rerank) if not os.path.exists(dec_order): os.makedirs(dec_order) for file in iter_files(path): paper = json.load(open(file)) name = os.path.basename(file) name, _ = os.path.splitext(name) sents = [paper['article'][i] for i in paper['extracted']] with open(os.path.join(dec_rerank, name + '.dec'), 'w') as f: f.write('\n'.join(sents)) order = sorted(paper['extracted']) sents = [paper['article'][i] for i in order] with open(os.path.join(dec_order, name + '.dec'), 'w') as f: f.write('\n'.join(sents)) ref_dir = os.path.join('/home/yhj/dataset/emnlp/refs/', split) dec_pattern = r'(\d+).dec' ref_pattern = '#ID#.ref' output = eval_rouge(dec_pattern, dec_rerank, ref_pattern, ref_dir) print('%s rerank:' % split) print(output) with open( '/home/yhj/emnlp/baseline/upperbound_%s/rouge_rerank.txt' % split, 'w') as f: f.write(output) output = eval_rouge(dec_pattern, dec_order, ref_pattern, ref_dir) print('%s order:' % split) print(output) with open( '/home/yhj/emnlp/baseline/upperbound_%s/rouge_order.txt' % split, 'w') as f: f.write(output)
def main(args): dec_dir = args.decode_dir ref_dir = join(_REF_DIR, 'reference') if args.rouge: dec_pattern = r'(\d+).dec' ref_pattern = '#ID#.ref' output = eval_rouge(dec_pattern, dec_dir, ref_pattern, ref_dir) else: dec_pattern = '[0-9]+.dec' ref_pattern = '[0-9]+.ref' output = eval_meteor(dec_pattern, dec_dir, ref_pattern, ref_dir) print(output)
def main(args): dec_dir = join(args.decode_dir, 'output') ref_dir = args.reference_dir assert exists(ref_dir) dec_pattern = r'(\d+).dec' ref_pattern = '#ID#.ref' output = eval_rouge(dec_pattern, dec_dir, ref_pattern, ref_dir) metric = 'rouge' print(output) with open(join(args.decode_dir, '{}.txt'.format(metric)), 'w') as f: f.write(output)
def main(args): dec_dir = args.decode_file ref_dir = args.ref_file assert exists(ref_dir) if args.rouge: output = eval_rouge(dec_dir, ref_dir) metric = 'rouge' else: output = eval_meteor(dec_dir, ref_dir) metric = 'meteor' print(output) with open('{}.txt'.format(metric), 'w') as f: f.write(output)
def main(args): dec_dir = join(args.decode_dir, 'output') with open(join(args.decode_dir, 'log.json')) as f: split = json.loads(f.read())['split'] ref_dir = join(_DATA_DIR, 'refs', split) assert exists(ref_dir) if args.rouge: dec_pattern = r'(\d+).dec' ref_pattern = '#ID#.ref' output = eval_rouge(dec_pattern, dec_dir, ref_pattern, ref_dir, force=args.force) metric = 'rouge' elif args.meteor: dec_pattern = '[0-9]+.dec' ref_pattern = '[0-9]+.ref' output = eval_meteor(dec_pattern, dec_dir, ref_pattern, ref_dir, force=args.force) metric = 'meteor' elif args.novel_ngrams: eval_novel_ngrams_args = { 'data_dir': join(_DATA_DIR, split), 'data_pattern': '[0-9]+.json', 'dec_dir': dec_dir, 'dec_pattern': '[0-9]+.dec', 'ref_dir': ref_dir, 'ref_pattern': '[0-9]+.ref', } output = json.dumps(eval_novel_ngrams(**eval_novel_ngrams_args), indent=4, ensure_ascii=False) metric = 'novel-ngrams' else: raise NotImplementedError() print(output) with open(join(args.decode_dir, '{}.txt'.format(metric)), 'w') as f: f.write(output)
def main(args): dec_dir = join(args.decode_dir, 'output') with open(join(args.decode_dir, 'log.json')) as f: split = json.loads(f.read())['split'] ref_dir = join(_DATA_DIR, 'refs', split) assert exists(ref_dir) if args.rouge: dec_pattern = r'(\d+).dec' ref_pattern = '#ID#.ref' output = eval_rouge(dec_pattern, dec_dir, ref_pattern, ref_dir) metric = 'rouge' else: dec_pattern = '[0-9]+.dec' ref_pattern = '[0-9]+.ref' output = eval_meteor(dec_pattern, dec_dir, ref_pattern, ref_dir) metric = 'meteor' print(output) with open(join(args.decode_dir, '{}.txt'.format(metric)), 'w') as f: f.write(output)
def calc_official_rouge(dec_dir, name): if name == 'val': ref_dir = cm.REF04 else: ref_dir = cm.REF11 print(f'{name}: ref_dir={ref_dir}') dec_pattern = r'(\d+).dec' ref_pattern = '#ID#.[A-Z].ref' output = eval_rouge(dec_pattern, dec_dir, ref_pattern, ref_dir) # print(output) for line in output.split('\n'): if line.startswith('1 ROUGE-1 Average_F'): r1 = float(line.split()[3]) if line.startswith('1 ROUGE-2 Average_F'): r2 = float(line.split()[3]) if line.startswith('1 ROUGE-L Average_F'): rl = float(line.split()[3]) if line.startswith('1 ROUGE-SU4 Average_F'): rsu4 = float(line.split()[3]) R = {'R-1': r1, 'R-2': r2, 'R-L': rl, 'R-SU4': rsu4} print(R, '\n') return R
def main(args): dec_dir = join(args.decode_dir, 'output_top{}'.format(args.n_ext)) if not exists(dec_dir): make_summaries(args.decode_dir, args.n_ext) with open(join(args.decode_dir, 'log.json')) as f: split = json.loads(f.read())['split'] ref_dir = join(_DATA_DIR, 'refs', split) assert exists(ref_dir) if args.rouge: dec_pattern = r'(\d+).dec' ref_pattern = '#ID#.ref' output = eval_rouge(dec_pattern, dec_dir, ref_pattern, ref_dir) metric = 'rouge' else: dec_pattern = '[0-9]+.dec' ref_pattern = '[0-9]+.ref' output = eval_meteor(dec_pattern, dec_dir, ref_pattern, ref_dir) metric = 'meteor' print(output) with open(join(args.decode_dir, 'top{}_{}.txt'.format(args.n_ext, metric)), 'w') as f: f.write(output)
0: textrank_summarizer(text, stemmer, language, sentences_count), 1: lexrank_summarizer(text, stemmer, language, sentences_count), 2: luhn_summarizer(text, stemmer, language, sentences_count), 3: reduction_summarizer(text, stemmer, language, sentences_count), 4: sumbasic_summarizer(text, stemmer, language, sentences_count), 5: kl_summarizer(text, stemmer, language, sentences_count), 6: edmundson_summarizer(text, stemmer, language, sentences_count) } return switcher.get(sum_index) return switch(sum_index) if __name__ == "__main__": DATA_DIR = '../ProcessData/vietnews2/test' REF_DIR = '../ProcessData/vietnews2/refs' DECODE_DIR = 'decode/reduction' decoded_dir = os.path.join(DECODE_DIR, 'decoded') if not os.path.exists(DECODE_DIR): os.makedirs(DECODE_DIR) if not os.path.exists(decoded_dir): os.makedirs(decoded_dir) for filename in os.listdir(DATA_DIR): with open(os.path.join(DATA_DIR, filename), 'r') as f: data = json.load(f) text = '\n'.join(data['article']) summ = summarize(text, sum_index=3) with open(os.path.join(decoded_dir, filename.split('.')[0] + '.dec'), 'w') as f: f.write(summ) rouge = eval_rouge(decoded_dir, REF_DIR) print(rouge) with open(os.path.join(DECODE_DIR, 'rouge.txt'), 'w') as f: f.write(rouge)
def test(args, split): ext_dir = args.path ckpts = sort_ckpt(ext_dir) # setup loader def coll(batch): articles = list(filter(bool, batch)) return articles dataset = DecodeDataset(split) n_data = len(dataset) loader = DataLoader( dataset, batch_size=args.batch, shuffle=False, num_workers=4, collate_fn=coll ) # decode and evaluate top 5 models os.mkdir(join(args.path, 'decode')) os.mkdir(join(args.path, 'ROUGE')) for i in range(min(5, len(ckpts))): print('Start loading checkpoint {} !'.format(ckpts[i])) cur_ckpt = torch.load( join(ext_dir, 'ckpt/{}'.format(ckpts[i])) )['state_dict'] extractor = Extractor(ext_dir, cur_ckpt, args.emb_type, cuda=args.cuda) save_path = join(args.path, 'decode/{}'.format(ckpts[i])) os.mkdir(save_path) # decoding ext_list = [] cur_idx = 0 start = time() with torch.no_grad(): for raw_article_batch in loader: tokenized_article_batch = map(tokenize(None, args.emb_type), raw_article_batch) for raw_art_sents in tokenized_article_batch: ext_idx = extractor(raw_art_sents) ext_list.append(ext_idx) cur_idx += 1 print('{}/{} ({:.2f}%) decoded in {} seconds\r'.format( cur_idx, n_data, cur_idx/n_data*100, timedelta(seconds=int(time()-start)) ), end='') print() # write files for file_idx, ext_ids in enumerate(ext_list): dec = [] data_path = join(DATA_DIR, '{}/{}.json'.format(split, file_idx)) with open(data_path) as f: data = json.loads(f.read()) n_ext = 2 if data['source'] == 'CNN' else 3 n_ext = min(n_ext, len(data['article'])) for j in range(n_ext): sent_idx = ext_ids[j] dec.append(data['article'][sent_idx]) with open(join(save_path, '{}.dec'.format(file_idx)), 'w') as f: for sent in dec: print(sent, file=f) # evaluate current model print('Starting evaluating ROUGE !') dec_path = save_path ref_path = join(DATA_DIR, 'refs/{}'.format(split)) ROUGE = eval_rouge(dec_path, ref_path) print(ROUGE) with open(join(args.path, 'ROUGE/{}.txt'.format(ckpts[i])), 'w') as f: print(ROUGE, file=f)
from evaluate import eval_rouge import os if __name__ == "__main__": for each in ['decode_baseline', 'decode_introduction']: print(each) dec_dir = '/home/yhj/long-summarization/logroot/%s/decoded' % each ref_dir = '/home/yhj/long-summarization/logroot/%s/reference' % each dec_pattern = r'(\d+).decoded' ref_pattern = '#ID#.reference' output = eval_rouge(dec_pattern, dec_dir, ref_pattern, ref_dir) print(output) with open(os.path.join(os.path.dirname(dec_dir), 'rouge.txt'), 'w') as f: f.write(output)