def test_sentence_score_L(self): reference = 'It \'s my living town' hypothesis = 'I went to the Mars from my living town.' scorer = rouge.Rouge() rouge_calc = RougeCalculator(stopwords=False, lang="en") expect = rouge_calc.rouge_l(hypothesis, reference) eq_(expect, scorer.sentence_score(reference, hypothesis, mode='L'))
def test_sentence_score_N_2(self): reference = 'I went to Mars' hypothesis = 'I went to the Mars from my living town.' scorer = rouge.Rouge() rouge_calc = RougeCalculator(stopwords=False, lang="en") expect = rouge_calc.rouge_n(hypothesis, reference, n=2) eq_(expect, scorer.sentence_score(reference, hypothesis, n=2))
def evaluate(net, criterion, vocab, data_iter, train_next=True): net.eval() reviews = [] refs = [] sums = [] loss, r1, r2, rl = .0, .0, .0, .0 rouge = RougeCalculator(stopwords=False, lang="en") for batch in tqdm(data_iter): src, trg, src_embed, trg_embed, src_mask, src_lens, trg_lens, src_text, trg_text = vocab.read_batch( batch) pre_output = net(src, trg, src_embed, trg_embed, vocab.word_num, src_mask, src_lens, trg_lens, test=True) output = torch.log(pre_output.view(-1, pre_output.size(-1)) + 1e-20) trg_output = trg.view(-1) loss += criterion(output, trg_output).data.item() / len(src_lens) reviews.extend(src_text) refs.extend(trg_text) pre_output[:, :, 3] = float('-inf') rst = torch.argmax(pre_output, dim=-1).tolist() for i, summary in enumerate(rst): cur_sum = [''] for idx in summary: if idx == vocab.EOS_IDX: break w = vocab.id_word(idx) cur_sum.append(w) cur_sum = ' '.join(cur_sum).strip() if len(cur_sum) == 0: cur_sum = '<EMP>' sums.append(cur_sum) r1 += rouge.rouge_n(cur_sum, trg_text[i], n=1) r2 += rouge.rouge_n(cur_sum, trg_text[i], n=2) rl += rouge.rouge_l(cur_sum, trg_text[i]) for i in example_idx: print('> %s' % reviews[i]) print('= %s' % refs[i]) print('< %s\n' % sums[i]) if not train_next: with open(args.output_dir + args.load_model, 'w') as f: for review, ref, summary in zip(reviews, refs, sums): f.write('> %s\n' % review) f.write('= %s\n' % ref) f.write('< %s\n\n' % summary) loss /= len(data_iter) r1 /= len(sums) r2 /= len(sums) rl /= len(sums) if train_next: net.train() return loss, r1, r2, rl
def __init__(self, vocabulary, model, optimizer, criterion, phi): self.model = model self.optimizer = optimizer self.criterion = criterion self.vocabulary = vocabulary self.rouge = RougeCalculator(stopwords=False, lang="en", stemming=False) self.phi = phi
def cal_rouge(summa, refer): rouge = RougeCalculator(stopwords=True, lang="en") rouge_1 = rouge.rouge_n(summary=summa, references=refer, n=1) rouge_2 = rouge.rouge_n(summary=summa, references=[refer], n=2) rouge_l = rouge.rouge_l(summary=summa, references=[refer]) print("ROUGE-1: {}, ROUGE-2: {}, ROUGE-L: {}".format( rouge_1, rouge_2, rouge_l).replace(", ", "\n"))
def evaluate_rouge_n(summary, references, n=2): rouge = RougeCalculator(stopwords=True, lang="ja") if type(n) == type(1): return rouge.rouge_n(summary=summary, references=references, n=n) else: if n == "l": return rouge.rouge_l(summary=summary, references=references) elif n == "be": return rouge.rouge_be(summary=summary, references=references) else: pass
def calc_rouge(original_sentences, predict_sentences): rouge_1 = 0.0 rouge_2 = 0.0 for original, predict in zip(original_sentences, predict_sentences): # Remove padding original, predict = original.replace("<PAD>", "").strip(), predict.replace("<PAD>", "").strip() rouge = RougeCalculator(stopwords=True, lang="en") r1 = rouge.rouge_1(summary=predict, references=original) r2 = rouge.rouge_2(summary=predict, references=original) rouge_1 += r1 rouge_2 += r2 return rouge_1, rouge_2
def __init__(self, hugging_face_model_name: str = "distilbert-base-uncased-distilled-squad", tokenizer_name: str = "distilbert-base-uncased", cuda_is_available: bool = True): cuda_is_available = 0 if cuda_is_available else -1 self.__reader = TransformersReader(model=hugging_face_model_name, tokenizer=tokenizer_name, context_window_size=512, use_gpu=cuda_is_available) self.__rouge = RougeCalculator(stopwords=False)
def test_rouge(self): data = self.load_test_data() rouge = RougeCalculator(stopwords=False, lang="zh") for eval_id in data: summaries = data[eval_id]["summaries"] references = data[eval_id]["references"] for n in [1, 2]: for s in summaries: v = rouge.rouge_n(self._compress(s), self._compress(references), n) b_v = rouge_n(self._split(s), [self._split(r) for r in references], n, 0.5) self.assertLess(abs(b_v - v), 1e-5)
def evaluate_rouge(summary, references, n=1, lang="zh"): rouge_calc = RougeCalculator(stopwords=True, lang=lang) assert len(summary) == len(references), "number of summary and references should be equal" rouges = [] for s, rs in zip(summary, references): if n == 'l': rouge_n = rouge_calc.rouge_l(s, rs) else: rouge_n = rouge_calc.rouge_n(s, rs, n) rouges.append(rouge_n) rouge_avg = sum(rouges) / len(rouges) return rouge_avg, rouges
def score_model(test_pairs, model, model_id): scores = [0, 0, 0] rouge_calc = RougeCalculator(stopwords=False, lang="en") results = predict_from_data(test_pairs, _range=(0, 5000), model=model) for k in results: el = results[k] scores[0] += rouge_calc.rouge_1(el['beam'].split('EOS')[0], el['ref'].split('EOS')[0]) scores[1] += rouge_calc.rouge_2(el['beam'].split('EOS')[0], el['ref'].split('EOS')[0]) scores[2] += rouge_calc.rouge_l(el['beam'].split('EOS')[0], el['ref'].split('EOS')[0]) print(model_id.split("@"), round(scores[0] / len(results), 3), round(scores[1] / len(results), 3), round(scores[2] / len(results), 3))
def calculate(summary, reference): rouge = RougeCalculator(lang="en") scores = [] for n, alpha in product([1, 2], [1, 0, 0.5]): scores.append( rouge.rouge_n(summary=summary, references=reference, n=n, alpha=alpha)) for alpha in [1, 0, 0.5]: scores.append( rouge.rouge_l(summary=summary, references=reference, alpha=alpha)) return scores
def myeval(valid_x, valid_y, vocab, model): rouge = RougeCalculator(stopwords=True, lang="zh") bleu_ch = BLEUCalculator(lang="zh") model.eval() eval_batch_num = 0 sum_rouge_1 = 0 sum_rouge_2 = 0 sum_rouge_L = 0 score_ch = 0 sum_loss = 0 limit = 63 logging.info('Evaluating on %d minibatches...' % limit) i2w = {key: value for value, key in vocab.items()} ckpt_file = args.ckpt_file[9:] fout_pred = open(os.path.join('tmp/systems', '%s.txt' % ckpt_file), "w") fout_y = open(os.path.join('tmp/models', 'ref_%s.txt' % ckpt_file), "w") while eval_batch_num < limit: with torch.no_grad(): loss = run_batch(valid_x, valid_y, model) sum_loss += loss _, x = valid_x.next_batch() pred = greedy(model, x, vocab) _, y = valid_y.next_batch() y = y[:,1:].tolist() for idx in range(len(pred)): line_pred = [i2w[tok] for tok in pred[idx] if tok != vocab[config.end_tok] and tok != vocab[config.pad_tok]] line_y = [i2w[tok] for tok in y[idx] if tok != vocab[config.end_tok] and tok != vocab[config.pad_tok]] fout_pred.write(" ".join(line_pred) + "\n") fout_y.write(" ".join(line_y) + "\n") sum_rouge_1 += rouge.rouge_n(references=" ".join(line_y),summary=" ".join(line_pred),n=1) sum_rouge_2 += rouge.rouge_n(references=" ".join(line_y),summary=" ".join(line_pred),n=2) sum_rouge_L += rouge.rouge_l(references=" ".join(line_y),summary=" ".join(line_pred)) score_ch += bleu_ch.bleu(" ".join(line_y), " ".join(line_pred)) eval_batch_num += 1 fout_pred.close() fout_y.close() avg_rouge_1 = sum_rouge_1/(len(pred) * limit) avg_rouge_2 = sum_rouge_2/(len(pred) * limit) avg_rouge_L = sum_rouge_L/(len(pred) * limit) avg_bleu_ch = score_ch/(len(pred) * limit) avg_loss = sum_loss/limit print("ROUGE_1 = ",avg_rouge_1) print("ROUGE_2 = ",avg_rouge_2) print("ROUGE_L = ",avg_rouge_L) print("BLEU = ", avg_bleu_ch) print("Perplexity = ", math.pow(2, avg_loss)) model.train()
class RougeNCalc: def __init__(self): self.rouge = RougeCalculator(stopwords=True, lang="ja") def __call__(self, summary, reference): score = self.rouge.rouge_n(summary, reference, n=1) return score
def avg_rouge(ref_dir, dec_dir, n): ref_files = os.path.join(ref_dir, "*reference.txt") filelist = glob.glob(ref_files) scores_list = [] for ref_file in filelist: basename = os.path.basename(ref_file) number = basename.split("_")[0] dec_file = os.path.join(dec_dir, "{}_decoded.txt".format(number)) dec_cont = gen_sentence(dec_file) ref_cont = gen_sentence(ref_file) """第一种Rouge""" # if n == 'l': # dec_cont = ''.join([i + ' ' for i in dec_cont]) # ref_cont = ''.join([i + ' ' for i in ref_cont]) # rouge = Rouge() # score = rouge.get_scores(dec_cont, ref_cont) # scores_list.append(score[0]['rouge-l']['f']) # else: # score = compute_rouge_n(dec_cont, ref_cont, n) # scores_list.append(score) """第二种Rouge""" # dec_cont = ''.join([i + ' ' for i in dec_cont]) # ref_cont = ''.join([i + ' ' for i in ref_cont]) # # rouge = Rouge() # score = rouge.get_scores(dec_cont, ref_cont) # if n == 1: # scores_list.append(score[0]['rouge-1']['f']) # elif n == 2: # scores_list.append(score[0]['rouge-2']['f']) # elif n == 'l': # scores_list.append(score[0]['rouge-l']['f']) """第三种Rouge""" rouge = RougeCalculator(stopwords=True, lang="zh") dec_cont = ''.join([i + ' ' for i in dec_cont]) ref_cont = ''.join([i + ' ' for i in ref_cont]) if n == 1: rouge_1 = rouge.rouge_n(dec_cont, ref_cont, n=1) scores_list.append(rouge_1) elif n == 2: rouge_2 = rouge.rouge_n(dec_cont, ref_cont, n=2) scores_list.append(rouge_2) elif n == 'l': rouge_l = rouge.rouge_l(dec_cont, ref_cont) scores_list.append(rouge_l) return sum(scores_list) / len(scores_list)
def main(args): rougeone_list = [] rougetwo_list = [] rougel_list = [] rouge4one = RougeCalculator(stopwords=True, lang=args.lang) rouge4other = RougeCalculator(stopwords=False, lang=args.lang) if args.keyword: kr = KeywordRemover(args.keyword) with open(args.system_out) as sf, \ open(args.reference) as rf: for i, (so, re) in enumerate(zip(sf, rf)): print(i, end='\r', flush=True) if args.keyword: so = kr(so) re = kr(re) rougeone_list.append( rouge4one.rouge_1(summary=so, references=re, alpha=args.alpha)) rougetwo_list.append( rouge4other.rouge_2(summary=so, references=re, alpha=args.alpha)) rougel_list.append( rouge4one.rouge_l(summary=so, references=re, alpha=args.alpha)) print('ROUGE-1\t%.6f' % (np.average(rougeone_list))) print('ROUGE-2\t%.6f' % (np.average(rougetwo_list))) print('ROUGE-L\t%.6f' % (np.average(rougel_list)))
def test_rouge_with_stop_words(self): data = self.load_test_data() rouge = RougeCalculator(stopwords=True, lang="zh") def split(text): words = self._split(text) words = [w for w in words if not rouge._lang.is_stop_word(w)] return words for eval_id in data: summaries = data[eval_id]["summaries"] references = data[eval_id]["references"] for n in [1, 2]: for s in summaries: v = rouge.rouge_n(s, references, n) b_v = rouge_n(split(s), [split(r) for r in references], n, 0.5) self.assertLess(abs(b_v - v), 1e-5)
def __init__(self, metrics: List[str] = [ "rouge_1", "rouge_2", "rouge_l", "rouge_be", "bleu" ], lang: str = "en", stopwords: bool = True, stemming: bool = True, use_porter=True): if use_porter: self.rouge = RougeCalculator(stopwords=stopwords, stemming=stemming, lang="en-porter") else: self.rouge = RougeCalculator(stopwords=stopwords, stemming=stemming, lang="en") self.bleu = BLEUCalculator(lang=lang) self.metrics = sorted(metrics)
def calc_rouge(machine_summery, reference_summery, debug_print=False): rouge = RougeCalculator(stopwords=True, lang="en") rouge_1 = rouge.rouge_n(summary=machine_summery, references=reference_summery, n=1) rouge_2 = rouge.rouge_n(summary=machine_summery, references=reference_summery, n=2) rouge_l = rouge.rouge_l(summary=machine_summery, references=reference_summery) if debug_print: print( "current sentences results:\nROUGE-1: {}, ROUGE-2: {}, ROUGE-L: {}" .format(rouge_1, rouge_2, rouge_l).replace(", ", "\n")) return rouge_1, rouge_2, rouge_l
def test_custom_lang(self): class Custom(BaseLang): def __init__(self): super(Custom, self).__init__("cs") def tokenize(self, text): return text.split("/") lang = Custom() rouge = RougeCalculator(lang=lang) rouge_score = rouge.rouge_n( summary="I/went/to/the/Mars/from/my/living/town.", references="I/went/to/Mars", n=1) bleu = BLEUCalculator(lang=lang) bleu_score = bleu.bleu("I/am/waiting/on/the/beach", "He/is/walking/on/the/beach") self.assertGreater(rouge_score, 0) self.assertGreater(bleu_score, 0)
def __init__(self, metrics=None, stats=None, stem=False, remove_stop=False): self.metrics = metrics if metrics is not None \ else Rouge.DEFAULT_METRICS self.stats = stats if stats is not None \ else Rouge.DEFAULT_STATS self.rouge_calc = RougeCalculator(stopwords=remove_stop, stemming=stem, lang="en") for m in self.metrics: if m not in Rouge.AVAILABLE_METRICS: raise ValueError("Unknown metric '%s'" % m) for s in self.stats: if s not in Rouge.AVAILABLE_STATS: raise ValueError("Unknown stat '%s'" % s)
def evaluate(): from sumeval.metrics.rouge import RougeCalculator rouge = RougeCalculator(stopwords=True, lang="zh") rouge_1 = rouge.rouge_n(summary="I went to the Mars from my living town.", references="I went to Mars", n=1) rouge_2 = rouge.rouge_n( summary="I went to the Mars from my living town.", references=["I went to Mars", "It's my living town"], n=2) rouge_l = rouge.rouge_l( summary="I went to the Mars from my living town.", references=["I went to Mars", "It's my living town"]) # You need spaCy to calculate ROUGE-BE rouge_be = rouge.rouge_be( summary="I went to the Mars from my living town.", references=["I went to Mars", "It's my living town"]) print("ROUGE-1: {}, ROUGE-2: {}, ROUGE-L: {}, ROUGE-BE: {}".format( rouge_1, rouge_2, rouge_l, rouge_be).replace(", ", "\n"))
def eval_rouges(refrence_summary, model_summary): # refrence_summary = "tokyo shares close up #.## percent" # model_summary = "tokyo stocks close up # percent to fresh record high" rouge = RougeCalculator(stopwords=True, lang="en") rouge_1 = rouge.rouge_n( summary=model_summary, references=refrence_summary, n=1) rouge_2 = rouge.rouge_n( summary=model_summary, references=[refrence_summary], n=2) rouge_l = rouge.rouge_l( summary=model_summary, references=[refrence_summary]) # You need spaCy to calculate ROUGE-BE rouge_be = rouge.rouge_be( summary=model_summary, references=[refrence_summary]) bleu = BLEUCalculator() bleu_score = bleu.bleu(summary=model_summary, references=[refrence_summary]) # print("ROUGE-1: {}, ROUGE-2: {}, ROUGE-L: {}, ROUGE-BE: {}".format( # rouge_1, rouge_2, rouge_l, rouge_be # ).replace(", ", "\n")) return rouge_1, rouge_2, rouge_l, rouge_be, bleu_score
def main(conf): id_list = [] rougeone_list = [] rougetwo_list = [] rougel_list = [] rouge4one = RougeCalculator(stopwords=True, lang=conf.rouge.lang) rouge4other = RougeCalculator(stopwords=False, lang=conf.rouge.lang) keynum_counter = defaultdict(lambda: {'count': 0, 'rouge1_lis': [], 'rouge2_lis': [], 'rougel_lis': []}) if conf.rouge.get('keyword', None): kr = KeywordRemover(conf.rouge.keyword) with open(conf.system_out) as sf, \ open(conf.reference) as rf, \ open(conf.test_src) as tsrcf: assert len(sf.readlines()) == len(rf.readlines()) == len(tsrcf.readlines()) with open(conf.system_out) as sf, \ open(conf.reference) as rf, \ open(conf.test_src) as tsrcf: tsrcs = tsrcf.readlines() for i, (so, re) in enumerate(zip(sf, rf)): print(i, end='\r', flush=True) if conf.rouge.get('keyword', None): so = kr(so) re = kr(re) else: idx, so = so.split(None, 1) idx, re = re.split(None, 1) id_list.append(int(idx)) rouge1 = rouge4one.rouge_1(summary=so, references=re, alpha=conf.alpha) rouge2 = rouge4other.rouge_2(summary=so, references=re, alpha=conf.alpha) rougel = rouge4one.rouge_l(summary=so, references=re, alpha=conf.alpha) rougeone_list.append(rouge1) rougetwo_list.append(rouge2) rougel_list.append(rougel) keynum = tsrcs[int(idx)].count('</@>') keynum_counter[keynum]['count'] += 1 keynum_counter[keynum]['rouge1_lis'].append(rouge1) keynum_counter[keynum]['rouge2_lis'].append(rouge2) keynum_counter[keynum]['rougel_lis'].append(rougel) lowest_idids = np.argpartition(rougeone_list, 100)[:100] print(f"Lowest IDs\t{' '.join(map(str, np.array(id_list)[lowest_idids]))}") print('ROUGE-1\t%.6f'%(np.average(rougeone_list))) print('ROUGE-2\t%.6f'%(np.average(rougetwo_list))) print('ROUGE-L\t%.6f'%(np.average(rougel_list))) if conf.output: with open(conf.output, 'w') as of: for idx, r1, r2, rl in zip(id_list, rougeone_list, rougetwo_list, rougel_list): of.write(f'{idx}, {r1}, {r2}, {rl}\n') for keynum in range(5): print(keynum) print(f'count: {keynum_counter[keynum]["count"]}') print('ROUGE-1\t%.6f'%(np.average(keynum_counter[keynum]['rouge1_lis']))) print('ROUGE-2\t%.6f'%(np.average(keynum_counter[keynum]['rouge2_lis']))) print('ROUGE-L\t%.6f'%(np.average(keynum_counter[keynum]['rougel_lis'])))
def train_rl(self, data, val_data, nb_epochs, batch_size, optimizer, lr, tf_ratio, stop_criterion, use_cuda, print_evry): if self.logger is None: self.encoder_optimizer = optimizer(self.encoder.parameters(), lr=lr, weight_decay=0.0000001) self.decoder_optimizer = optimizer(self.decoder.parameters(), lr=lr, weight_decay=0.0000001) self.criterion = nn.NLLLoss() self.logger = TrainingLogger(nb_epochs, batch_size, len(data), len(val_data)) print("Optimizers compiled for RL training") rouge_calc = RougeCalculator(stopwords=False, lang="en") for epoch in range(len(self.logger.log), nb_epochs): self.logger.init_epoch(epoch) batches = utils.sort_and_shuffle_data(data, nb_buckets=100, batch_size=batch_size, rnd=True) for b in range(len(batches)): loss, _time = self.train_batch_rl_mc(samples=batches[b], use_cuda=self.use_cuda, rouge=rouge_calc) self.logger.add_iteration(b + 1, loss, _time) if b % print_evry == 0: preds = self.predict([data[b * batch_size]], self.config['target_length'], False, self.use_cuda) print('\n', " ".join([str(t[0]['word']) for t in preds])) for b in range(int(len(val_data) / batch_size)): try: loss, _time = self.train_batch( val_data[b * batch_size:(b + 1) * batch_size], self.use_cuda, backprop=False) self.logger.add_val_iteration(b + 1, loss, _time) except: print("\n", "Error during validation!") if epoch == 0 or self.logger.log[epoch][ "val_loss"] < self.logger.log[epoch - 1]["val_loss"]: self.save_model(self.config['model_path'], self.config['model_id'], epoch=epoch, loss=self.logger.log[epoch]["val_loss"])
def test_rouge_l(self): data = self.load_test_data() rouge = RougeCalculator(stopwords=True) for eval_id in data: summaries = data[eval_id]["summaries"] references = data[eval_id]["references"] for s in summaries: baseline = Pythonrouge(summary_file_exist=False, summary=[[s]], reference=[[[r] for r in references]], n_gram=1, recall_only=False, ROUGE_L=True, length_limit=True, length=50, stemming=False, stopwords=True) b1_v = baseline.calc_score() b2_v = rouge_l(rouge.tokenize(s), [rouge.tokenize(r) for r in references], 0.5) v = rouge.rouge_l(s, references) self.assertLess(abs(b2_v - v), 1e-5) self.assertLess(abs(b1_v["ROUGE-L-F"] - v), 1e-5)
def rouge_eval(ref_dir, dec_dir): rouge = RougeCalculator(stopwords=True, lang="en") rouge_1 = 0 rouge_2 = 0 rouge_3 = 0 rouge_l = 0 num_files = len(os.listdir(ref_dir)) for filename in os.listdir(ref_dir): file_id = filename[:6] ref_file = os.path.join(ref_dir, filename) dec_file = os.path.join(dec_dir, '%s_decoded.txt' % file_id) ref_sum = ' '.join([line for line in open(ref_file, 'r').readlines()]) dec_sum = ' '.join([line for line in open(dec_file, 'r').readlines()]) rouge_1 += rouge.rouge_n(dec_sum, ref_sum, n=1) rouge_2 += rouge.rouge_n(dec_sum, ref_sum, n=2) rouge_3 += rouge.rouge_n(dec_sum, ref_sum, n=3) rouge_l += rouge.rouge_l(dec_sum, ref_sum) return { '1': 100 * rouge_1 / num_files, '2': 100 * rouge_2 / num_files, '3': 100 * rouge_3 / num_files, 'l': 100 * rouge_l / num_files }
class LanguageMetrics(object): bleu = BLEUCalculator(tokenizer=SimpleTokenizer()) rouge = RougeCalculator(stopwords=True, lang="en", tokenizer=SimpleTokenizer()) @staticmethod def _computeScore(summary, refs, criteria): if isinstance(refs, str): refs = [refs] score = criteria(summary=summary, references=refs) return score @staticmethod def blue_score(summary, refs): score = LanguageMetrics._computeScore(summary, refs, LanguageMetrics.bleu.bleu) return score @staticmethod def rouge_1_score(summary, refs): score = LanguageMetrics._computeScore(summary, refs, LanguageMetrics.rouge.rouge_1) return score @staticmethod def rouge_2_score(summary, refs): score = LanguageMetrics._computeScore(summary, refs, LanguageMetrics.rouge.rouge_2) return score @staticmethod def rouge_l_score(summary, refs): score = LanguageMetrics._computeScore(summary, refs, LanguageMetrics.rouge.rouge_l) return score @staticmethod def rouge_be_score(summary, refs): score = LanguageMetrics._computeScore(summary, refs, LanguageMetrics.rouge.rouge_be) return score @staticmethod def rouge_n_score(summary, refs, n): rouge_n = partial(func=LanguageMetrics.rouge.rouge_n, n=n) score = LanguageMetrics._computeScore(summary, refs, rouge_n) return score
def __init__(self, rouge_variants, remove_stopwords=False, stem=False): self.rouge_variants = rouge_variants self.rouge_calc = RougeCalculator(stopwords=remove_stopwords, lang="en", stemming=stem) self.availeble_calcs = { "ROUGE-1-F": self.rouge_calc.rouge_1, "ROUGE-2-F": self.rouge_calc.rouge_2, "ROUGE-L-F": self.rouge_calc.rouge_l } self.calcs_to_use = [] for v in self.rouge_variants: if v in self.availeble_calcs: self.calcs_to_use.append(self.availeble_calcs[v]) else: print("Rouge variant not useable", v)
def test_rouge_be_hm(self): rouge = RougeCalculator(stopwords=False, lang="ja") summaries = ["私はきれいな花が好きで、きれいな花には目がない。"] references = ["きれいな花が好きだ", "私はきれいな花に目がない"] r_bes = [rouge.parse_to_be(r) for r in references] for _type in ["HM", "HMR"]: _r_bes = [self._bes_to_words(r, _type) for r in r_bes] for s in summaries: s_bes = rouge.parse_to_be(s) s_bes = self._bes_to_words(s_bes, _type) base = rouge.rouge_n(s_bes, _r_bes, n=1) score = rouge.rouge_be(s, references, _type) self.assertLess(abs(base - score), 1e-5)