def compute_rouge_abstract_score(sentence, abstract):
    """
    Computes the ROUGE score of the given sentence compared to the given abstract.
    :param sentence: the sentence to compute the ROUGE score for, as a list of words.
    :param abstract: the abstract of the paper to compute the ROUGE score against, as a list of strings.
    :return: the ROUGE score of the sentence compared to the abstract.
    """
    r = Rouge()
    return r.calc_score([" ".join(sentence)], abstract)
Пример #2
0
assert(len(golds) > 0)
assert(len(generateds) > 0)
assert(type(golds[0]) is tuple)
assert(type(generateds[0]) is tuple)
assert(type(golds[0][0][0]) is str)
assert(type(generateds[0][0][0]) is str)

r = Rouge()
summaries_and_scores = []
for gold, ref in zip(golds, generateds):

    avg_score = 0
    count = 0

    for sent in ref[0]:
        score = r.calc_score([sent], gold[0])
        avg_score += score
        count += 1

    if count > 0:
        avg_score = avg_score / count
    else:
        avg_score = 0
    summaries_and_scores.append((gold[0], ref[0], avg_score, gold[1]))

summaries_and_scores = sorted(summaries_and_scores, key=itemgetter(2))

for item in summaries_and_scores:
    print("\n")
    print("SCORE: ", item[2])
    print()
Пример #3
0
            abstract = paper["ABSTRACT"]
            abstract_join = [" ".join(x) for x in abstract]

            sentences = []

            # Iterate over the whole paper
            for section, sents in paper.iteritems():

                section_avg_score = 0
                i = 0

                # Iterate over each sentence in the section
                for sentence in sents:

                    # Calculate the ROUGE score and add it to the list
                    r_score = r.calc_score([" ".join(sentence)], abstract_join)
                    section_avg_score += r_score
                    i += 1

                if i > 0:
                    section_avg_score /= i
                else:
                    section_avg_score = 0

                rouge_by_section[section].append(section_avg_score)

            if count % 1000 == 0:
                print("\nWriting data...")
                write_dir = BASE_DIR + "/Data/Generated_Data/Rouge_By_Section_Abstract/"
                with open(write_dir + "rouge_by_section_abstract_list.pkl", "wb") as f:
                    pickle.dump(rouge_by_section, f)
Пример #4
0
    def prepare_data(self):
        """
        Puts the data in a form suitable for the Word2Vec classifier - it changes each sentence into the average of
        its constituent word vectors.
        :return: all sentences as vectors and their classification (data is balanced).
        """

        # Count of how many papers have been processed.
        count = 0

        # Sentences as vectors with their classification
        data = []

        # Count of positive data
        pos_count = 0

        # Count of negative data
        neg_count = 0

        r = Rouge()

        # Iterate over every file in the paper directory
        for filename in os.listdir(PAPER_SOURCE):

            # Ignores files which are not papers e.g. hidden files
            if filename.endswith(".txt"):

                # Display a loading bar of progress
                useful_functions.loading_bar(self.loading_section_size, count,
                                             self.number_of_papers)
                count += 1

                # Opens the paper as a dictionary, with keys corresponding to section titles and values corresponding
                # to the text in that section. The text is given as a list of lists, each list being a list of words
                # corresponding to a sentence.
                paper = useful_functions.read_in_paper(filename,
                                                       sentences_as_lists=True)

                # Get the highlights of the paper
                highlights = paper["HIGHLIGHTS"]
                highlights_join = [" ".join(x) for x in highlights]
                abstract = paper["ABSTRACT"]

                sentences = []

                # Iterate over the whole paper
                for section, sents in paper.iteritems():

                    # Iterate over each sentence in the section
                    for sentence in sents:

                        # We don't want to calculate ROUGE for the abstract
                        if section != "ABSTRACT":
                            # Calculate the ROUGE score and add it to the list
                            r_score = r.calc_score([" ".join(sentence)],
                                                   highlights_join)
                            sentences.append((sentence, r_score, section))

                sentences = [(x, section) for x, score, section in reversed(
                    sorted(sentences, key=itemgetter(1)))]

                sents_pos = sentences[0:self.num_summary]
                sents_neg = sentences[self.num_summary:]

                if len(sents_neg) < len(sents_pos):
                    continue

                sents_pos = [(x[0], x[1], y)
                             for x, y in zip(sents_pos, [1] * len(sents_pos))]
                sents_neg = [x for x in reversed(sents_neg)][:len(sents_pos)]
                sents_neg = [(x[0], x[1], y)
                             for x, y in zip(sents_neg, [0] * len(sents_neg))]
                sents_class = sents_pos + sents_neg
                random.shuffle(sents_class)

                # Each item in the sentence list has form [(sentence, section, classification)]
                paper = {
                    "filename":
                    filename,
                    "title":
                    paper["MAIN-TITLE"],
                    "gold":
                    paper["HIGHLIGHTS"],
                    "abstract":
                    abstract,
                    "sentences":
                    sents_class,
                    "description":
                    "All text data is given in the form of a list of words."
                }

                data.append(paper)

                if count % 1000 == 0:
                    print("\nWriting data...")
                    write_dir = BASE_DIR + "/Data/Generated_Data/Sentences_And_SummaryBool/Abstract_Neg/AbstractNet/"
                    with open(write_dir + "data.pkl", "wb") as f:
                        pickle.dump(data, f)
                    print("Done")

        return data
Пример #5
0
            highlights_join = [" ".join(x) for x in highlights]
            abstract = paper["ABSTRACT"]

            sentences = []

            # Iterate over the whole paper
            for section, sents in paper.iteritems():

                section_avg_score = 0
                i = 0

                # Iterate over each sentence in the section
                for sentence in sents:

                    # Calculate the ROUGE score and add it to the list
                    r_score = r.calc_score([" ".join(sentence)],
                                           highlights_join)
                    section_avg_score += r_score
                    i += 1

                if i > 0:
                    section_avg_score /= i
                else:
                    section_avg_score = 0

                rouge_by_section[section].append(section_avg_score)

            if count % 1000 == 0:
                print("\nWriting data...")
                write_dir = BASE_DIR + "/Data/Generated_Data/Rouge_By_Section/"
                with open(write_dir + "rouge_by_section_list.pkl", "wb") as f:
                    pickle.dump(rouge_by_section, f)