Exemplo n.º 1
0
def eval_rouge(reference, candidate, log_path):
    assert len(reference) == len(candidate)

    ref_dir = log_path + 'reference/'
    cand_dir = log_path + 'candidate/'
    if not os.path.exists(ref_dir):
        os.mkdir(ref_dir)
    if not os.path.exists(cand_dir):
        os.mkdir(cand_dir)
    ref_file = ref_dir + 'reference'
    cand_file = cand_dir + 'candidate'

    for i in range(len(reference)):
        with codecs.open(ref_file + str(i), 'w', 'utf-8') as f:
            f.write("".join(reference[i]).replace(" <\s> ", "\n") + '\n')
        with codecs.open(cand_file + str(i), 'w', 'utf-8') as f:
            f.write("".join(candidate[i]).replace(" <\s> ", "\n") + '\n')

    reference_set = [[ref_file + str(i)] for i in range(len(reference))]
    candidate_set = [cand_file + str(i) for i in range(len(candidate))]
    recall, precision, F_measure = PythonROUGE(candidate_set,
                                               reference_set,
                                               ngram_order=2)

    recall = [round(score * 100, 2) for score in recall]
    precision = [round(score * 100, 2) for score in precision]
    F_measure = [round(score * 100, 2) for score in F_measure]

    return {'recall': recall, 'precision': precision, 'F_measure': F_measure}
Exemplo n.º 2
0
def eval_multi_rouge(reference, candidate, log_path):

    ref_dir = log_path + 'reference/'
    cand_dir = log_path + 'candidate/'
    if not os.path.exists(ref_dir):
        os.mkdir(ref_dir)
    if not os.path.exists(cand_dir):
        os.mkdir(cand_dir)
    ref_file = ref_dir + 'reference'
    cand_file = cand_dir+'candidate'

    for i in range(len(candidate)):
        with codecs.open(cand_file+str(i), 'w', 'utf-8') as f:
            f.write("".join(candidate[i]).replace(" <\s> ", "\n")+'\n')

    reference_set = [[] for i in range(len(candidate))]
    for i in range(4):
        lines = codecs.open('/home/mashuming/data/summarization/giga/DUC2004/task1_ref'+str(i)+'.txt','r','utf8').read().strip().split('\n')
        for j, line in enumerate(lines):
            with codecs.open(ref_file+'_'+str(i)+'_'+str(j),'w','utf8') as f:
                f.write(line+'\n')
            reference_set[j].append(ref_file+'_'+str(i)+'_'+str(j))

    candidate_set = [cand_file+str(i) for i in range(len(candidate))]
    recall, precision, F_measure = PythonROUGE(candidate_set, reference_set, ngram_order=2, byte=75)

    recall = [round(score * 100, 2) for score in recall]
    precision = [round(score * 100, 2) for score in precision]
    F_measure = [round(score * 100, 2) for score in F_measure]

    return {'recall': recall, 'precision': precision, 'F_measure': F_measure}