Пример #1
0
    def test_order_information(self):
        """
        Test ordering Sentences by MEAD score
        :return:
        """
        doc_id_1 = 'TST_ENG_20190101.0001'
        sentence_1 = 'Puppies love playing fetch.'
        sentence_2 = 'They all ran around with their tails wagging ' \
                     'and their tongues hanging out having loads of fun in the sun.'
        sentence_3 = "He loves playing so he liked to run around with the other dogs playing fetch."
        expected_info = [
            Sentence(sentence_1, 1, doc_id_1),
            Sentence(sentence_3, 3, doc_id_1),
            Sentence(sentence_2, 2, doc_id_1)
        ]

        WordMap.word_set = self.w_set
        WordMap.create_mapping()
        Vectors().create_freq_vectors(self.topics)
        generator = MeadSummaryGenerator(self.doc_list, MeadContentSelector(),
                                         self.args)
        generator.select_content(self.idf)
        generator.order_information()

        first_sentences = generator.content_selector.selected_content[:3]

        self.assertListEqual(expected_info, first_sentences)
Пример #2
0
    def test_select_content(self):
        sentence_1 = 'In a park somewhere, a bunch of puppies played fetch with their owners today.'
        doc_id_1 = 'TST_ENG_20190101.0001'
        sentence_2 = 'I took my small puppy to the dog park today.'
        doc_id_2 = 'TST_ENG_20190101.0002'

        selector = LeadSentenceSelector()
        documents = [Document(doc_id_1), Document(doc_id_2)]
        expected_sentences = [Sentence(sentence_1, 1, doc_id_1), Sentence(sentence_2, 1, doc_id_2)]
        selector.select_content(documents, [])
        selected_sentences = selector.selected_content

        self.assertCountEqual(expected_sentences, selected_sentences)
Пример #3
0
    def test_get_sentence_position(self):
        selector = MeadContentSelector()
        sentence_1 = Sentence("Here is a test sentence.", 0)
        sentence_2 = Sentence("Here is another one.", 50)

        pos_score_1 = selector.get_sentence_position(sentence_1, 100)
        pos_score_2 = selector.get_sentence_position(sentence_2, 100)

        expected_score_1 = 1
        expected_score_2 = 50 / 100

        self.assertEqual(expected_score_1, pos_score_1)
        self.assertEqual(expected_score_2, pos_score_2)
Пример #4
0
    def test_remove_parens(self):
        s = Sentence("The puppy (aka Mr. Mayor) was the cutest.", 1)
        sentences = self.realizer.compress_sentences([s])
        summary = "\n".join([s.compressed for s in sentences])

        expected = "The puppy was the cutest."
        self.assertEqual(summary, expected)
Пример #5
0
    def test_remove_initial_conj(self):
        s = Sentence("But, puppies are great.", 1)
        sentences = self.realizer.compress_sentences([s])
        summary = "\n".join([s.compressed for s in sentences])

        expected = "Puppies are great."
        self.assertEqual(summary, expected)
Пример #6
0
    def test_remove_adverbs(self):
        s = Sentence("Puppies love running quickly and playing loudly.", 1)
        sentences = self.realizer.compress_sentences([s])
        summary = "\n".join([s.compressed for s in sentences])

        expected = "Puppies love running and playing."
        self.assertEqual(summary, expected)
Пример #7
0
    def test_remove_junk(self):
        s = Sentence("Seattle, WA --- Puppies are great.", 1)
        sentences = self.realizer.compress_sentences([s])
        summary = "\n".join([s.compressed for s in sentences])

        expected = "Puppies are great."
        self.assertEqual(summary, expected)
Пример #8
0
    def test_remove_npadvmod(self):
        s = Sentence("Joe said Saturday that the park was full of puppies.", 1)

        sentences = self.realizer.compress_sentences([s])
        summary = "\n".join([s.compressed for s in sentences])

        expected = "The park was full of puppies."
        self.assertEqual(summary, expected)
Пример #9
0
    def test_remove_appositives(self):
        s = Sentence(
            "Dennis, the cutest puppy in the park, ran towards the ball.", 1)
        sentences = self.realizer.compress_sentences([s])
        summary = "\n".join([s.compressed for s in sentences])

        expected = "Dennis ran towards the ball."
        self.assertEqual(expected, summary)
Пример #10
0
    def test_remove_temporal_mod(self):
        s = Sentence("By 8 a.m. on Saturday the park was full of puppies.", 1)

        sentences = self.realizer.compress_sentences([s])
        summary = "\n".join([s.compressed for s in sentences])

        expected = "The park was full of puppies."
        self.assertEqual(summary, expected)
Пример #11
0
    def test_remove_attributions(self):
        s = Sentence("Julia said that puppies are cute.", 1)

        sentences = self.realizer.compress_sentences([s])
        summary = "\n".join([s.compressed for s in sentences])

        expected = "Puppies are cute."
        self.assertEqual(summary, expected)
Пример #12
0
    def test_remove_attribution_phrases(self):
        s = Sentence(
            "Seattle State Bureau of Animal Rating said "
            "in a press release that puppies are cute.", 1)

        sentences = self.realizer.compress_sentences([s])
        summary = "\n".join([s.compressed for s in sentences])

        expected = "Puppies are cute."
        self.assertEqual(summary, expected)
Пример #13
0
    def test_order_information(self):
        sentence_1 = 'Puppies are cute because many of them are small.'
        doc_id_1 = 'TST20190201.0001'
        sentence_2 = 'In a park somewhere, a bunch of puppies played fetch with their owners today.'
        doc_id_2 = 'TST_ENG_20190101.0001'
        expected_info = [
            Sentence(sentence_2, 1, doc_id_2),
            Sentence(sentence_1, 1, doc_id_1)
        ]

        documents = [
            Document('TST_ENG_20190101.0001'),
            Document('TST20190201.0001')
        ]
        generator = LeadSummaryGenerator(documents, LeadSentenceSelector(), [])
        generator.select_content()
        generator.order_information()

        self.assertListEqual(expected_info,
                             generator.content_selector.selected_content)
Пример #14
0
    def test_bad(self):
        s = Sentence(
            "Heilongjiang Provincial Bureau of Environmental Protection said in a press release that by 6 a.m. on Saturday, concentration of nitrobenzene monitored at Sujiatun upstream Sifangtai, one major water intake spot of Harbin, capital of northeast China's Heilongjiang Province, fell to 0.0793 mg per liter, but above the state safety standard of 0.017 mg per liter, but the density of benzene stood at 0.0011 mg per liter, which is within   the state safety benchmark.",
            1)

        sentences = self.realizer.compress_sentences([s])
        summary = "\n".join([s.compressed for s in sentences])

        expected = "Concentration of nitrobenzene monitored at Sujiatun upstream Sifangtai fell, " \
                   "but above the state safety standard, but the density of benzene stood, " \
                   "which is within the state safety benchmark."
        self.assertEqual(summary, expected)
Пример #15
0
    def test_get_centroid_score(self):
        selector = MeadContentSelector()
        sent_1 = Sentence("Puppies love playing fetch.", 0)
        self.args.c_threshold = 'mean'

        WordMap.word_set = self.w_set
        WordMap.word_to_id = self.w_map
        Vectors().create_freq_vectors(self.topics)

        centroid = selector.get_cluster_centroid(self.doc_list, self.idf,
                                                 self.args.c_threshold)

        expected_centroid_score = 6.3
        c_score = selector.get_centroid_score(sent_1, centroid)

        self.assertAlmostEqual(expected_centroid_score, c_score, 1)
    def test_process_sentence(self):
        Preprocessor.load_models()
        test_sentence = "In a park somewhere, a bunch of puppies played fetch with their owners today."
        doc_id = "TST_ENG_20190101.0001"
        s = Sentence(test_sentence, 0, doc_id)
        a = s.tokenized()
        b = s.word_count()
        c = s.is_first_sentence()
        d = s.position()
        e = s.document_id()

        features = [a, b, c, d, e]
        expected_features = [['park', 'somewhere', 'bunch', 'puppy', 'play', 'fetch', 'owner', 'today'],
                             14, True, 0, 'TST_ENG_20190101.0001']

        self.assertCountEqual(features, expected_features)
Пример #17
0
class MeldaSentenceCompressionTests(unittest.TestCase):
    """
    Tests for MeldaInfoOrdering
    """
    Preprocessor.load_models()

    s0 = Sentence(
        "In a park somewhere, a bunch of puppies played fetch with their owners today.",
        1)
    s1 = Sentence("I took my small puppy to the dog park today.", 1)
    s2 = Sentence(
        "He loves playing so he liked to run around with the other dogs playing fetch.",
        1)
    s3 = Sentence("Puppies love playing fetch.", 1)

    input_summary = [s0, s1, s2, s3]

    args = parse_args(['test_data/test_topics.xml', 'test'])
    args.n = 1

    realizer = MeldaContentRealizer()

    def test_remove_adverbs(self):
        s = Sentence("Puppies love running quickly and playing loudly.", 1)
        sentences = self.realizer.compress_sentences([s])
        summary = "\n".join([s.compressed for s in sentences])

        expected = "Puppies love running and playing."
        self.assertEqual(summary, expected)

    def test_remove_initial_conj(self):
        s = Sentence("But, puppies are great.", 1)
        sentences = self.realizer.compress_sentences([s])
        summary = "\n".join([s.compressed for s in sentences])

        expected = "Puppies are great."
        self.assertEqual(summary, expected)

    def test_remove_parens(self):
        s = Sentence("The puppy (aka Mr. Mayor) was the cutest.", 1)
        sentences = self.realizer.compress_sentences([s])
        summary = "\n".join([s.compressed for s in sentences])

        expected = "The puppy was the cutest."
        self.assertEqual(summary, expected)

    def test_remove_appositives(self):
        s = Sentence(
            "Dennis, the cutest puppy in the park, ran towards the ball.", 1)
        sentences = self.realizer.compress_sentences([s])
        summary = "\n".join([s.compressed for s in sentences])

        expected = "Dennis ran towards the ball."
        self.assertEqual(expected, summary)

    def test_remove_junk(self):
        s = Sentence("Seattle, WA --- Puppies are great.", 1)
        sentences = self.realizer.compress_sentences([s])
        summary = "\n".join([s.compressed for s in sentences])

        expected = "Puppies are great."
        self.assertEqual(summary, expected)

    def test_remove_attributions(self):
        s = Sentence("Julia said that puppies are cute.", 1)

        sentences = self.realizer.compress_sentences([s])
        summary = "\n".join([s.compressed for s in sentences])

        expected = "Puppies are cute."
        self.assertEqual(summary, expected)

    def test_remove_attribution_phrases(self):
        s = Sentence(
            "Seattle State Bureau of Animal Rating said "
            "in a press release that puppies are cute.", 1)

        sentences = self.realizer.compress_sentences([s])
        summary = "\n".join([s.compressed for s in sentences])

        expected = "Puppies are cute."
        self.assertEqual(summary, expected)

    def test_remove_temporal_mod(self):
        s = Sentence("By 8 a.m. on Saturday the park was full of puppies.", 1)

        sentences = self.realizer.compress_sentences([s])
        summary = "\n".join([s.compressed for s in sentences])

        expected = "The park was full of puppies."
        self.assertEqual(summary, expected)

    def test_remove_mod_rel(self):
        s = Sentence(
            "Joe said that by 8 a.m. on Saturday the park was full of puppies.",
            1)

        sentences = self.realizer.compress_sentences([s])
        summary = "\n".join([s.compressed for s in sentences])

        expected = "The park was full of puppies."
        self.assertEqual(summary, expected)

    def test_bad(self):
        s = Sentence(
            "Heilongjiang Provincial Bureau of Environmental Protection said in a press release that by 6 a.m. on Saturday, concentration of nitrobenzene monitored at Sujiatun upstream Sifangtai, one major water intake spot of Harbin, capital of northeast China's Heilongjiang Province, fell to 0.0793 mg per liter, but above the state safety standard of 0.017 mg per liter, but the density of benzene stood at 0.0011 mg per liter, which is within   the state safety benchmark.",
            1)

        sentences = self.realizer.compress_sentences([s])
        summary = "\n".join([s.compressed for s in sentences])

        expected = "Concentration of nitrobenzene monitored at Sujiatun upstream Sifangtai fell, " \
                   "but above the state safety standard, but the density of benzene stood, " \
                   "which is within the state safety benchmark."
        self.assertEqual(summary, expected)

    def test_remove_npadvmod(self):
        s = Sentence("Joe said Saturday that the park was full of puppies.", 1)

        sentences = self.realizer.compress_sentences([s])
        summary = "\n".join([s.compressed for s in sentences])

        expected = "The park was full of puppies."
        self.assertEqual(summary, expected)