示例#1
0
 def test_extract_content_small(self):
     self.assertEqual(
         _to_templates([]),
         Template.from_string("").extract_content(Template.from_string("")),
     )
     self.assertEqual(
         _to_templates([""]),
         Template.from_string("[SLOT]").extract_content(
             Template.from_string("")),
     )
     self.assertEqual(
         _to_templates(["", "", ""]),
         Template.from_string("[SLOT] [SLOT] [SLOT]").extract_content(
             Template.from_string("")),
     )
     self.assertEqual(
         _to_templates(["a"]),
         Template.from_string("[SLOT]").extract_content(
             Template.from_string("a")),
     )
     self.assertEqual(
         _to_templates(["[SLOT]"]),
         Template.from_string("[SLOT]").extract_content(
             Template.from_string("[SLOT]")),
     )
示例#2
0
def calculate_merged_string(string1, string2):
    merged_templates = Template.merge_templates_wagner_fischer(
        Template.from_string(string1, slot_token="[SLOT]"),
        Template.from_string(string2, slot_token="[SLOT]"),
        allow_longer_template=False
    )
    return next(merged_templates).to_flat_string(detokenizer=lambda x: " ".join(x))
示例#3
0
    def test_3_line_learner(self):
        learner = TemplateLatticeLearner(minimal_variables=True)
        dataset = ["hello world", "hi world", "hello universe"]
        template_tree = learner.learn(dataset)

        expected = TemplateTree(
            Template.from_string("[SLOT]"),
            [
                TemplateTree(
                    Template.from_string("[SLOT] world"),
                    [
                        TemplateTree(Template.from_string(s))
                        for s in ["hello world", "hi world"]
                    ],
                ),
                TemplateTree(
                    Template.from_string("hello [SLOT]"),
                    [
                        TemplateTree(Template.from_string(s))
                        for s in ["hello world", "hello universe"]
                    ],
                ),
            ],
        )
        print(template_tree_visualiser.render_tree_string(template_tree))
        self.assertEqual(expected, template_tree)
示例#4
0
    def test_learn_hello_world_tree_larger(self):
        learner = TemplateLatticeLearner(minimal_variables=True,
                                         words_per_leaf_slot=2)
        dataset = list(
            self.hello_world_and_world_adjective.generate_all_string())
        template_tree = learner.learn(dataset)
        print(template_tree_visualiser.render_tree_string(template_tree))

        pruned_template_tree = template_tree.prune_redundant_abstractions()
        print(
            "pruned\n",
            template_tree_visualiser.render_tree_string(pruned_template_tree),
        )

        # Only two templates in the top
        top_templates = {
            tt.get_template()
            for tt in pruned_template_tree.get_children()
        }
        self.assertEqual(
            {
                Template.from_string("The [SLOT] is [SLOT]"),
                Template.from_string("[SLOT], [SLOT]!"),
            },
            top_templates,
        )
        self.assertEqual(
            set(dataset),
            set({
                t.get_template().to_flat_string()
                for t in pruned_template_tree.get_descendant_leaves()
            }),
        )
示例#5
0
    def test_collapse_same_children(self):
        """ Tests if collapsing a tree with children with similar templates will merge correctly """
        ss1 = TemplateTree(Template.from_string("a b c c d"))
        ss2 = TemplateTree(Template.from_string("c b e e d"))
        ss3 = TemplateTree(Template.from_string("h h h b f d"))
        ss4 = TemplateTree(Template.from_string("i i i b g d"))
        ss5 = TemplateTree(Template.from_string("j k l l d"))

        us1 = TemplateTree(
            Template.from_string("[SLOT] b [SLOT] d", slot_token="[SLOT]"), [ss1, ss2]
        )
        us2 = TemplateTree(
            Template.from_string("[SLOT] b [SLOT] d", slot_token="[SLOT]"), [ss3, ss4]
        )
        us3 = TemplateTree(
            Template.from_string("[SLOT] d", slot_token="[SLOT]"), [us1, us2, ss5]
        )

        ts1 = TemplateTree(
            Template.from_string("[SLOT] b [SLOT] d", slot_token="[SLOT]"),
            [ss1, ss2, ss3, ss4],
        )
        ts2 = TemplateTree(
            Template.from_string("[SLOT] d", slot_token="[SLOT]"), [ts1, ss5]
        )

        collapsed_u = us3.collapse()
        self.assertEqual(ts2, collapsed_u)
示例#6
0
    def test_min_empty_sequence_disallow_empty_longer_2(self):
        learner = TemplateLatticeLearner(minimal_variables=True,
                                         allow_empty_string=False)
        template_1 = Template.from_string("x y z a a b c")
        template_2 = Template.from_string("x y z b c d")
        merge = learner._get_best_merge_candidate(template_1, template_2)

        self.assertEqual(Template.from_string("x y z [SLOT]"),
                         merge.get_merged_template())
示例#7
0
    def test_2_line_learner(self):
        learner = TemplateLatticeLearner(minimal_variables=True)
        dataset = ["hello world", "hi world"]
        template_tree = learner.learn(dataset)

        expected_top_template = Template.from_string("[SLOT] world")
        expected = TemplateTree(
            expected_top_template,
            [TemplateTree(Template.from_string(s)) for s in dataset],
        )
        print(template_tree_visualiser.render_tree_string(template_tree))
        self.assertEqual(expected_top_template, template_tree.get_template())
        self.assertEqual(expected, template_tree)
示例#8
0
    def test_extract_content_all_ambiguous_2(self):
        b2 = Template.from_string("a [SLOT] a [SLOT]", slot_token="[SLOT]")
        t2 = Template.from_string("a a a a")
        self.assertEqual(
            {
                _to_templates(["", "a a"]),
                _to_templates(["a a", ""]),
                _to_templates(["a", "a"]),
            },
            b2.extract_content_all(t2),
        )

        # With lowest slot length variance should be picked:
        self.assertEqual(_to_templates(["a", "a"]), b2.extract_content(t2))
示例#9
0
    def test_extract_content_all_ambiguous(self):
        b1 = Template.from_string("[SLOT] [SLOT]", slot_token="[SLOT]")
        t1 = Template.from_string("a b")
        self.assertEqual(
            {
                _to_templates(["a", "b"]),
                _to_templates(["a b", ""]),
                _to_templates(["", "a b"]),
            },
            b1.extract_content_all(t1),
        )

        # With lowest slot length variance should be picked:
        self.assertEqual(_to_templates(["a", "b"]), b1.extract_content(t1))
示例#10
0
    def test_min_empty_sequence_longer(self):
        learner = TemplateLatticeLearner(minimal_variables=True,
                                         words_per_leaf_slot=2,
                                         allow_empty_string=True)
        template_1 = Template.from_string(
            "who sang i want to be with you everywhere")
        template_2 = Template.from_string(
            "who sang i only want to be with you")

        merge = learner._get_best_merge_candidate(template_1, template_2)
        self.assertEqual(
            Template.from_string(
                "who sang i [SLOT] want to be with you [SLOT]"),
            merge.get_merged_template())
示例#11
0
    def test_equals_new_leaves(self):
        """ Test if Template Trees are equal if different leaves are used by constructing new trees from scratch"""

        s1 = TemplateTree(Template.from_string("a b c d"))
        s2 = TemplateTree(Template.from_string("a b e d"))
        s3 = TemplateTree(Template.from_string("a b f d"))
        s4 = TemplateTree(Template.from_string("g b h d"))
        u1 = TemplateTree(
            Template.from_string("a b [SLOT] d", slot_token="[SLOT]"), [s1, s2]
        )
        u2 = TemplateTree(
            Template.from_string("a b [SLOT] d", slot_token="[SLOT]"), [s3, u1]
        )
        u2_selfs3 = TemplateTree(
            Template.from_string("a b [SLOT] d", slot_token="[SLOT]"), [self.s3, u1]
        )
        u3 = TemplateTree(
            Template.from_string("[SLOT] b [SLOT] d", slot_token="[SLOT]"), [s4, u2]
        )

        self.assertEqual(self.s1, s1)
        self.assertEqual(self.s2, s2)
        self.assertEqual(self.s3, s3)
        self.assertEqual(self.s4, s4)
        self.assertEqual(self.u1, u1)
        self.assertEqual(self.u2, u2_selfs3)
        self.assertEqual(self.u2, u2)
        self.assertEqual(self.u3, u3)
示例#12
0
    def test_get_best_merge_candidate_hello_world(self):
        learner = TemplateLatticeLearner(minimal_variables=True,
                                         words_per_leaf_slot=2)
        template_1 = Template.from_string("hello world")
        template_2 = Template.from_string("hi solar system")

        merge_1_2 = learner._get_best_merge_candidate(template_1, template_2)
        self.assertEqual(
            Template.from_string("[SLOT]"),
            merge_1_2.get_merged_template(minimal_variables=True),
        )
        self.assertEqual(
            4,
            merge_1_2.get_distance(),
        )
示例#13
0
    def test_reoccuring_slot(self):
        dataset = ["I like cats and dogs", "I like dogs and chickens"]
        grammar = grammar_induction.induce_grammar_using_template_trees(
            dataset, relative_similarity_threshold=0.1, minimal_variables=True)

        non_terminals = grammar.get_slots()
        self.assertEqual(2, len(non_terminals))
        word_list_nt = [
            s for s in non_terminals if s is not grammar.get_start()
        ][0]

        # Assert only one top template
        origin_templates = grammar.get_content_for(grammar.get_start())
        self.assertEqual(1, len(origin_templates))

        # Check origin template
        origin_template: Template = origin_templates[0]
        self.assertTrue(
            Template.from_string("I like [SLOT] and [SLOT]").has_same_shape(
                origin_template))

        # Check top template has only one named slot
        self.assertEqual(1, len(set(origin_template.get_slots())))

        # Check if slot has properly merged values
        self.assertEqual(
            {"cats", "dogs", "chickens"},
            {
                t.to_flat_string()
                for t in grammar.get_content_for(word_list_nt)
            },
        )
示例#14
0
    def test_get_best_merge_candidate(self):
        learner = TemplateLatticeLearner(minimal_variables=True,
                                         words_per_leaf_slot=2)
        template_1 = Template.from_string("The solar system is [SLOT]")
        template_1_point = Template.from_string("The solar system is [SLOT].")
        template_2 = Template.from_string("[SLOT], solar system!")

        template_3 = Template.from_string("The earth is [SLOT]")
        template_3_point = Template.from_string("The earth is [SLOT].")

        merge_1_2 = learner._get_best_merge_candidate(template_1, template_2)
        self.assertEqual(
            Template.from_string("[SLOT] solar system [SLOT]"),
            merge_1_2.get_merged_template(minimal_variables=True),
        )
        self.assertEqual(
            3,
            merge_1_2.get_distance(),
        )

        merge_1_3 = learner._get_best_merge_candidate(template_1, template_3)
        self.assertEqual(
            Template.from_string("The [SLOT] is [SLOT]"),
            merge_1_3.get_merged_template(minimal_variables=True),
        )
        self.assertEqual(
            3,
            merge_1_3.get_distance(),
        )

        # With punctuation version
        merge_1_2p = learner._get_best_merge_candidate(template_1_point,
                                                       template_2)
        self.assertEqual(
            Template.from_string("[SLOT] solar system [SLOT]"),
            merge_1_2p.get_merged_template(minimal_variables=True),
        )
        self.assertEqual(
            4,
            merge_1_2p.get_distance(),
        )

        merge_1_3p = learner._get_best_merge_candidate(template_1_point,
                                                       template_3_point)
        self.assertEqual(
            Template.from_string("The [SLOT] is [SLOT]."),
            merge_1_3p.get_merged_template(minimal_variables=True),
        )
        self.assertEqual(
            3,
            merge_1_3p.get_distance(),
        )
示例#15
0
 def test_slot_parsing(self):
     original_string = "a [SLOT] c d"
     template = Template.from_string(original_string, slot_token="[SLOT]")
     self.assertEqual(4, template.get_number_of_elements())
     self.assertFalse(template._elements[0].is_slot())
     self.assertTrue(template._elements[1].is_slot())
     self.assertFalse(template._elements[2].is_slot())
     self.assertFalse(template._elements[3].is_slot())
示例#16
0
 def test_disallow_empty_string_hard(self):
     dataset = [
         "I saw her on the quiet hill",
         "I saw her on the tall hill",
         "I saw her on the hill",
         "He likes cute cats",
         "He likes nice cats",
         "He likes cats",
     ]
     learner = TemplateLatticeLearner(minimal_variables=True,
                                      allow_empty_string=False)
     template_tree = learner.learn(dataset)
     expected = TemplateTree(
         Template.from_string("[SLOT]"),
         [
             TemplateTree(
                 Template.from_string("He likes [SLOT]"),
                 [
                     TemplateTree(
                         Template.from_string("He likes [SLOT] cats"),
                         [
                             TemplateTree(Template.from_string(s)) for s in
                             ["He likes cute cats", "He likes nice cats"]
                         ],
                     ),
                     TemplateTree(Template.from_string("He likes cats")),
                 ],
             ),
             TemplateTree(
                 Template.from_string("I saw her on the [SLOT]"),
                 [
                     TemplateTree(
                         Template.from_string(
                             "I saw her on the [SLOT] hill"),
                         [
                             TemplateTree(Template.from_string(s))
                             for s in [
                                 "I saw her on the tall hill",
                                 "I saw her on the quiet hill",
                             ]
                         ],
                     ),
                     TemplateTree(
                         Template.from_string("I saw her on the hill")),
                 ],
             ),
         ],
     )
     print(template_tree_visualiser.render_tree_string(template_tree))
     self.assertEqual(expected, template_tree)
示例#17
0
 def test_equals(self):
     """ Tests the TemplateTree __eq__ """
     e1 = TemplateTree(
         Template.from_string("a b [SLOT] d", slot_token="[SLOT]"),
         [self.s1, self.s2],
     )
     self.assertEqual(e1, self.u1)
     self.assertEqual(e1, e1)
     self.assertEqual(self.t3, self.t3)
     self.assertNotEqual(e1, self.u2)
     self.assertNotEqual(e1, self.t1)
示例#18
0
 def test_disallow_empty_string_simple_2(self):
     dataset = [
         "He likes cute cats",
         "He likes nice cats",
         "He likes cats",
         "This is another sentence",
     ]
     learner = TemplateLatticeLearner(minimal_variables=True,
                                      allow_empty_string=False)
     template_tree = learner.learn(dataset)
     expected = TemplateTree(
         Template.from_string("[SLOT]"),
         [
             TemplateTree(
                 Template.from_string("He likes [SLOT]"),
                 [
                     TemplateTree(
                         Template.from_string("He likes [SLOT] cats"),
                         [
                             TemplateTree(Template.from_string(s)) for s in
                             ["He likes cute cats", "He likes nice cats"]
                         ],
                     ),
                     TemplateTree(Template.from_string("He likes cats")),
                 ],
             ),
             TemplateTree(Template.from_string("This is another sentence")),
         ],
     )
     print(template_tree_visualiser.render_tree_string(template_tree))
     self.assertEqual(expected, template_tree)
示例#19
0
    def test_disallow_empty_string_simple(self):
        """ Checks whether disallowing empty string in learning works"""
        learner = TemplateLatticeLearner(minimal_variables=True,
                                         allow_empty_string=False)
        dataset = ["I am a human", "I am a nice human", "I am a bad human"]
        template_tree = learner.learn(dataset)

        expected = TemplateTree(
            Template.from_string("I am a [SLOT]"),
            [
                TemplateTree(
                    Template.from_string("I am a [SLOT] human"),
                    [
                        TemplateTree(Template.from_string(s))
                        for s in ["I am a nice human", "I am a bad human"]
                    ],
                ),
                TemplateTree(Template.from_string("I am a human"), ),
            ],
        )
        print(template_tree_visualiser.render_tree_string(template_tree))
        self.assertEqual(expected, template_tree)
示例#20
0
    def test_4_line_learner_longer_second(self):
        learner = TemplateLatticeLearner(minimal_variables=True,
                                         words_per_leaf_slot=2)
        dataset = [
            "hello world", "hi world", "hello solar system", "hi solar system"
        ]
        template_tree = learner.learn(dataset)

        expected = TemplateTree(
            Template.from_string("[SLOT]"),
            [
                TemplateTree(
                    Template.from_string("[SLOT] world"),
                    [
                        TemplateTree(Template.from_string(s))
                        for s in ["hello world", "hi world"]
                    ],
                ),
                TemplateTree(
                    Template.from_string("[SLOT] solar system"),
                    [
                        TemplateTree(Template.from_string(s))
                        for s in ["hello solar system", "hi solar system"]
                    ],
                ),
                TemplateTree(
                    Template.from_string("hello [SLOT]"),
                    [
                        TemplateTree(Template.from_string(s))
                        for s in ["hello world", "hello solar system"]
                    ],
                ),
                TemplateTree(
                    Template.from_string("hi [SLOT]"),
                    [
                        TemplateTree(Template.from_string(s))
                        for s in ["hi world", "hi solar system"]
                    ],
                ),
            ],
        )
        print(template_tree_visualiser.render_tree_string(template_tree))
        self.assertEqual(expected, template_tree)
示例#21
0
    def test_named_slot_parsing(self):
        original_string = "a <A> c d <B>"
        template = Template.from_string(original_string)

        self.assertEqual(
            Template([
                TemplateString("a"),
                NamedTemplateSlot("A"),
                TemplateString("c"),
                TemplateString("d"),
                NamedTemplateSlot("B"),
            ]),
            template,
        )
示例#22
0
    def test_fill(self):
        a = self.a
        b = self.b
        c = self.c
        slot1 = self.slot1
        slot2 = self.slot2

        self.assertEqual(
            Template.from_string("b"),
            Template([slot1]).fill(SlotAssignment({slot1: Template([b])})),
        )
        self.assertEqual(
            Template.from_string("a b"),
            Template([a, slot1]).fill(SlotAssignment({slot1: Template([b])})),
        )
        self.assertEqual(
            Template.from_string("a b c"),
            Template([a, slot1, c]).fill(SlotAssignment({slot1:
                                                         Template([b])})),
        )
        self.assertEqual(
            Template.from_string("a b c a"),
            Template([a, slot1, c, slot2]).fill(
                SlotAssignment({
                    slot1: Template([b]),
                    slot2: Template([a])
                })),
        )
        self.assertEqual(
            Template.from_string("a b a c"),
            Template([a, slot1, slot2, c]).fill(
                SlotAssignment({
                    slot1: Template([b]),
                    slot2: Template([a])
                })),
        )
        self.assertEqual(
            Template.from_string("a b a c"),
            Template([a, slot1, slot2, c]).fill(
                SlotAssignment({
                    slot2: Template([a]),
                    slot1: Template([b])
                })),
        )
        self.assertEqual(
            Template.from_string("a b a c"),
            Template([a, slot1, slot2, c]).fill_with_strings(["b", "a"]),
        )
示例#23
0
    def test_covers_string(self):
        slotted_template_string = "a b [SLOT] d"
        template = Template.from_string(slotted_template_string, slot_token="[SLOT]")

        # True covers
        self.assertTrue(template.covers_string("a b c d"))
        self.assertTrue(template.covers_string("a b c e d"))
        self.assertTrue(template.covers_string("a b c e f d"))
        self.assertTrue(template.covers_string("a b c d d"))

        self.assertTrue(template.covers_string("a b d"))
        self.assertTrue(template.covers_string("a b d d d d"))

        # Not covers
        self.assertFalse(template.covers_string("a a d d"))
        self.assertFalse(template.covers_string("a b d c"))
        self.assertFalse(template.covers_string("a b c"))
        self.assertFalse(template.covers_string("d"))
示例#24
0
    def test_extract_content_one(self):
        b1 = Template.from_string("a [SLOT]", slot_token="[SLOT]")
        t1 = Template.from_string("a 1")
        self.assertEqual(_to_templates(["1"]), b1.extract_content(t1))
        b2 = Template.from_string("[SLOT] c", slot_token="[SLOT]")
        t2 = Template.from_string("2 c")
        self.assertEqual(_to_templates(["2"]), b2.extract_content(t2))

        b3 = Template.from_string("a [SLOT] c", slot_token="[SLOT]")
        t3 = Template.from_string("a 3 c")
        self.assertEqual(_to_templates(["3"]), b3.extract_content(t3))
        self.assertEqual(_to_templates(["3 c"]), b1.extract_content(t3))
        self.assertEqual(_to_templates(["a 3"]), b2.extract_content(t3))
示例#25
0
 def test_get_slot_content(self):
     self.assertEqual(
         {(Template.from_string("c"),), (Template.from_string("e"),)},
         self.u1.get_slot_contents_tuples(),
     )
     self.assertEqual(
         {(Template.from_string("[SLOT]"),), (Template.from_string("f"),)},
         self.u2.get_slot_contents_tuples(),
     )
     self.assertEqual(
         {
             (Template.from_string("c"),),
             (Template.from_string("e"),),
             (Template.from_string("f"),),
         },
         self.t1.get_slot_contents_tuples(),
     )
示例#26
0
    def test_covers_slotted(self):
        slotted_template_string = "a b [SLOT] d"
        template = Template.from_string(slotted_template_string,
                                        slot_token="[SLOT]")

        def covers_slotted(slotted_string):
            return template.covers(
                Template.from_string(slotted_string, slot_token="[SLOT]"))

        # True covers
        self.assertTrue(covers_slotted("a b [SLOT] d"))
        self.assertTrue(covers_slotted("a b [SLOT] [SLOT] d"))

        # Not covers
        self.assertFalse(covers_slotted("[SLOT]"))
        self.assertFalse(covers_slotted("a [SLOT] [SLOT] d"))
        self.assertFalse(covers_slotted("[SLOT] [SLOT] [SLOT] [SLOT]"))
        self.assertFalse(covers_slotted("a b c d [SLOT]"))
        self.assertFalse(covers_slotted("a b d [SLOT]"))
        self.assertFalse(covers_slotted("[SLOT] a b c d"))
        self.assertFalse(covers_slotted("[SLOT] a b d"))
        self.assertFalse(covers_slotted("a b [SLOT] d [SLOT]"))
示例#27
0
    def test_get_descendent_leaves_slot_content(self):
        # Same tests as before without recursion
        self.assertEqual(
            {(Template.from_string("c"),), (Template.from_string("e"),)},
            self.u1.get_descendent_leaves_slot_content_tuples(),
        )
        self.assertEqual(
            {
                (Template.from_string("c"),),
                (Template.from_string("e"),),
                (Template.from_string("f"),),
            },
            self.t1.get_descendent_leaves_slot_content_tuples(),
        )

        # New tests
        self.assertEqual(
            {
                (Template.from_string("c"),),
                (Template.from_string("e"),),
                (Template.from_string("f"),),
            },
            self.u2.get_descendent_leaves_slot_content_tuples(),
        )
示例#28
0
def _to_templates(strings: List[str]):
    return tuple([Template.from_string(s) for s in strings])
示例#29
0
def _to_templates(lines: Collection[str]) -> List[Template]:
    templates = [
        Template.from_string(line.strip(), tokenizer=word_tokenize)
        for line in lines
    ]
    return templates
示例#30
0
 def test_extract_content_two(self):
     b1 = Template.from_string("a [SLOT] c [SLOT] e", slot_token="[SLOT]")
     t1 = Template.from_string("a b c d e")
     self.assertEqual(_to_templates(["b", "d"]), b1.extract_content(t1))
     t1 = Template.from_string("a b b c d e")
     self.assertEqual(_to_templates(["b b", "d"]), b1.extract_content(t1))