Пример #1
0
def sentence_rouge(reflex, genlex):
    rouge = Pythonrouge(n_gram=2, ROUGE_SU4=True, ROUGE_L=True, stemming=True, stopwords=True, word_level=True, length_limit=True, \
            length=50, use_cf=False, cf=95, scoring_formula="average", resampling=True, samples=1000, favor=True, p=0.5)
    genlex = [[genlex,]]
    reflex = [[[reflex,]]]
    setting_file = rouge.setting(files=False, summary=genlex, reference=reflex)
    result = rouge.eval_rouge(setting_file, recall_only=False, ROUGE_path=ROUGE_path, data_path=data_path)
    return result['ROUGE-L-F']
def evaluate_rouge_scores(evaluation_file_name):
    summaries = []  # model-generated
    references = []  # human-generated
    # articles = {}
    with gzip.open(evaluation_file_name) as json_file:
        json_data = json_file.read()
        data = json.loads(json_data)
        print("%d entries..." % len(data))
        for example in data:
            # datum = example['data']
            # if not datum in articles:
            # articles[datum] = True
            summaries.append(
                remove_tags(example['prediction']).encode('utf-8').split())
            references.append([
                remove_tags(example).encode('utf-8').split()
                for example in example['label']
            ])
    print("%d entries are used for evaluation." % len(summaries))
    # DEBUG: print a couple examples and their respective ROUGE scores
    # print(zip(summaries[5:10], references[5:10]))
    # rouge = Pythonrouge(n_gram=2, ROUGE_SU4=False, ROUGE_L=True, stemming=False, stopwords=False, word_level=True, length_limit=False, length=50, use_cf=True, cf=95, scoring_formula="average", resampling=False, samples=500, favor=False, p=0.5)
    # setting_file = rouge.setting(files=False, summary=summaries[5:10], reference=references[5:10])
    # print(rouge.eval_rouge(setting_file, recall_only=False, ROUGE_path=ROUGE_PATH, data_path=ROUGE_DATA, f_measure_only=False))
    rouge = Pythonrouge(n_gram=2,
                        ROUGE_SU4=False,
                        ROUGE_L=True,
                        stemming=False,
                        stopwords=False,
                        word_level=True,
                        length_limit=False,
                        length=50,
                        use_cf=True,
                        cf=95,
                        scoring_formula="average",
                        resampling=False,
                        samples=500,
                        favor=False,
                        p=0.5)
    setting_file = rouge.setting(files=False,
                                 summary=summaries,
                                 reference=references)
    result = rouge.eval_rouge(setting_file,
                              recall_only=False,
                              ROUGE_path=ROUGE_PATH,
                              data_path=ROUGE_DATA,
                              f_measure_only=False)
    return result
Пример #3
0
def evaluate(system_summary,
             reference_summaries,
             stemming=False,
             stopwords=False,
             use_cf=False,
             ngram=2):
    ROUGE_path = "rouge_files/ROUGE-1.5.5/ROUGE-1.5.5.pl"
    data_path = "rouge_files/ROUGE-1.5.5/data/"

    # initialize setting of ROUGE, eval ROUGE-1, 2
    rouge = Pythonrouge(n_gram=ngram,
                        ROUGE_SU4=False,
                        ROUGE_L=False,
                        stemming=stemming,
                        stopwords=stopwords,
                        word_level=True,
                        length_limit=True,
                        length=100,
                        use_cf=use_cf,
                        cf=95,
                        scoring_formula="average",
                        resampling=True,
                        samples=1000,
                        favor=True,
                        p=0.5)

    # system summary: list of summaries, where each summary is a list of sentences
    summary = [system_summary]

    # reference summaries: list of (list of summaries per article), where each summary is a list of sentences
    reference = [[[summary] for summary in reference_summaries]]

    setting_file = rouge.setting(files=False,
                                 summary=summary,
                                 reference=reference,
                                 temp_root='')

    result = rouge.eval_rouge(setting_file,
                              ROUGE_path=ROUGE_path,
                              data_path=data_path)

    return result
Пример #4
0
from __future__ import print_function
import sys
from pythonrouge.pythonrouge import Pythonrouge


if __name__ == '__main__':
    ROUGE_path = "./pythonrouge/RELEASE-1.5.5/ROUGE-1.5.5.pl"
    data_path = "./pythonrouge/RELEASE-1.5.5/data"
    summary_dir = "./sample/summary/"
    reference_dir = "./sample/reference/"
    # setting rouge options
    rouge = Pythonrouge(n_gram=2, ROUGE_SU4=True, ROUGE_L=True, stemming=True, stopwords=True, word_level=True, length_limit=True, length=50, use_cf=False, cf=95, scoring_formula="average", resampling=True, samples=1000, favor=True, p=0.5)
    print("evaluate sumamry & reference in these dir\nsummary: {}\nreference: {}".format(summary_dir, reference_dir))
    print("\nAll metric")
    setting_file = rouge.setting(files=True, summary_path=summary_dir, reference_path=reference_dir)
    print(rouge.eval_rouge(setting_file, ROUGE_path=ROUGE_path, data_path=data_path))
    print("\nRecall Only and save setting.xml")
    setting_file = rouge.setting(files=True, summary_path=summary_dir, reference_path=reference_dir, delete=False)
    print(rouge.eval_rouge(setting_file, recall_only=True, ROUGE_path=ROUGE_path, data_path=data_path))
    print("\nEvaluate ROUGE based on sentecnce lists")
    summary = [["Great location, very good selection of food for breakfast buffet.",
                "Stunning food, amazing service.",
                "The food is excellent and the service great."],
                ["The keyboard, more than 90% standard size, is just large enough .",
                "Surprisingly readable screen for the size .",
                "Smaller size videos   play even smoother ."]]
    reference = [[["Food was excellent with a wide range of choices and good services.", "It was a bit expensive though."],
               ["Food can be a little bit overpriced, but is good for a hotel."],
               ["The food in the hotel was a little over priced but excellent in taste and choice.",
               "There were also many choices to eat in the near vicinity of the hotel."],
               ["The food is good, the service great.",
Пример #5
0
# initialize setting of ROUGE, eval ROUGE-1, 2, SU4, L
rouge = Pythonrouge(n_gram=2, ROUGE_SU4=True, ROUGE_L=True, stopwords=True)

path = "../Summaries"
modelGeneratedSummariesPath = path + "/modelGenerated/1.txt"
goldReferenceSummariesPath = path + "/goldReference/1.txt"
avgRougeScoreCounter = Counter({})

with open(modelGeneratedSummariesPath) as f:
    modelLines = f.readlines()
with open(goldReferenceSummariesPath) as f:
    goldLines = f.readlines()

noOfDocs = 0

for (modelDocSummary, goldDocSummary) in izip(modelLines, goldLines):
    if (modelDocSummary.__len__() == 1):
        continue
    modelSummaryInputToRouge = [[modelDocSummary]]
    goldSummaryInputToRouge = [[[goldDocSummary]]]
    setting_file = rouge.setting(files=False,
                                 summary=modelSummaryInputToRouge,
                                 reference=goldSummaryInputToRouge)
    rougeScore = rouge.eval_rouge(setting_file, recall_only=True)
    avgRougeScoreCounter += Counter(rougeScore)
    noOfDocs += 1

avgRougeScore = dict(
    map(lambda (k, v): (k, v / float(noOfDocs)), avgRougeScoreCounter.items()))
print avgRougeScore
Пример #6
0
                    ROUGE_SU4=True,
                    ROUGE_L=True,
                    stemming=True,
                    stopwords=True,
                    word_level=True,
                    length_limit=True,
                    length=50,
                    use_cf=False,
                    cf=95,
                    scoring_formula="average",
                    resampling=True,
                    samples=1000,
                    favor=True,
                    p=0.5)

# system summary & reference summary
summary = [[" Tokyo is the one of the biggest city in the world."]]
reference = [[[
    "The capital of Japan, Tokyo, is the center of Japanese economy."
]]]

# If you evaluate ROUGE by sentence list as above, set files=False
setting_file = rouge.setting(files=False, summary=summary, reference=reference)

# If you need only recall of ROUGE metrics, set recall_only=True
result = rouge.eval_rouge(setting_file,
                          recall_only=True,
                          ROUGE_path=ROUGE_path,
                          data_path=data_path)
print(result)
Пример #7
0
import sys
ROUGE_path = sys.argv[1]  #ROUGE-1.5.5.pl
data_path = sys.argv[2]  #data folder in RELEASE-1.5.5
summary_dir = sys.argv[3]
reference_dir = sys.argv[4]

# initialize setting of ROUGE, eval ROUGE-1~4, SU4
rouge = Pythonrouge(n_gram=4,
                    ROUGE_SU4=True,
                    ROUGE_L=True,
                    stemming=True,
                    stopwords=True,
                    word_level=True,
                    length_limit=True,
                    length=50,
                    use_cf=False,
                    cf=95,
                    scoring_formula="average",
                    resampling=True,
                    samples=1000,
                    favor=True,
                    p=0.5)

# make a setting file, set files=True because you've already save files in specific directories
setting_file = rouge.setting(files=True,
                             summary_path=summary_dir,
                             reference_path=reference_dir)
result = rouge.eval_rouge(setting_file,
                          ROUGE_path=ROUGE_path,
                          data_path=data_path)
print result