Exemplo n.º 1
0
    def test_named_slot_parsing(self):
        original_string = "a <A> c d <B>"
        template = Template.from_string(original_string)

        self.assertEqual(
            Template([
                TemplateString("a"),
                NamedTemplateSlot("A"),
                TemplateString("c"),
                TemplateString("d"),
                NamedTemplateSlot("B"),
            ]),
            template,
        )
Exemplo n.º 2
0
 def test_get_possible_isomorphic_nt_replacements(self):
     self.assertEqual(
         {
             SlotReplacements(
                 {NamedTemplateSlot("origin"): NamedTemplateSlot("origin")}
             )
         },
         set(self.simple.get_isomorphic_replacements(self.simple)),
     )
     self.assertEqual(
         {
             SlotReplacements(
                 {
                     NamedTemplateSlot("origin"): NamedTemplateSlot("origin"),
                     NamedTemplateSlot("hello"): NamedTemplateSlot("a"),
                     NamedTemplateSlot("world"): NamedTemplateSlot("b"),
                 }
             )
         },
         set(
             self.hello_world_single_a.get_isomorphic_replacements(
                 self.hello_world_single_b
             )
         ),
     )
Exemplo n.º 3
0
    def setUp(self) -> None:
        random.seed(123)

        self.a = TemplateString("a")
        self.b = TemplateString("b")
        self.c = TemplateString("c")
        self.slot1 = TemplateSlot()
        self.slot2 = TemplateSlot()
        self.slot_x = NamedTemplateSlot("x")
        self.slot_y = NamedTemplateSlot("y")
        self.slot_z = NamedTemplateSlot("z")

        self.at = Template([self.a])
        self.bt = Template([self.b])
        self.ct = Template([self.c])
Exemplo n.º 4
0
 def test_from_string(self):
     input_dict = {"A": ["<B>, world", "hi"], "B": ["hello"]}
     expected_output = ContextFreeGrammar(
         {
             NamedTemplateSlot("A"): [
                 Template(
                     [
                         NamedTemplateSlot("B"),
                         TemplateString(","),
                         TemplateString("world"),
                     ]
                 ),
                 Template([TemplateString("hi")]),
             ],
             NamedTemplateSlot("B"): [Template([TemplateString("hello")])],
         }
     )
     output = ContextFreeGrammar.from_string(input_dict)
     self.assertEqual(expected_output, output)
Exemplo n.º 5
0
    def setUp(self) -> None:
        random.seed(123)

        self.a = NamedTemplateSlot("A")
        self.b = NamedTemplateSlot("B")
        self.c = NamedTemplateSlot("C")
        self.d = NamedTemplateSlot("D")

        self.e1 = Template([TemplateString("hello")])
        self.e2 = Template([TemplateString("hi")])
        self.e3 = Template([TemplateString("hey")])
        self.e12 = {self.e1, self.e2}
        self.e23 = {self.e2, self.e3}
        self.e123 = {self.e1, self.e2, self.e3}

        self.e4 = Template([TemplateString("world")])
        self.e5 = Template([TemplateString("earth")])
        self.e6 = Template([TemplateString("everyone")])
        self.e456 = {self.e4, self.e5, self.e6}
Exemplo n.º 6
0
    def test_get_slot_content_mappings(self):
        self.assertEqual(set(), self.s1.get_slot_content_mappings())

        slot1 = NamedTemplateSlot("x")
        slot2 = NamedTemplateSlot("y")
        a = TemplateString("a")
        b = TemplateString("b")
        c = TemplateString("c")

        # Simple tree
        simple_tree = TemplateTree(
            Template([a, slot1]), [TemplateTree(Template([a, b]), [])]
        )
        simple_slot_contents = simple_tree.get_slot_content_mappings()

        self.assertEqual(1, len(simple_slot_contents))
        simple_slot_content = list(simple_slot_contents)[0]
        self.assertTrue(slot1 in simple_slot_content)
        self.assertTrue(slot1 in simple_slot_content.keys())
        self.assertEqual(Template([b]), simple_slot_content[slot1])

        self.assertEqual({SlotAssignment({slot1: Template([b])})}, simple_slot_contents)

        # Two slot tree
        two_slot_tree = TemplateTree(
            Template([slot1, b, slot2]), [TemplateTree(Template([a, b, c]), [])]
        )
        two_slot_tree_contents = two_slot_tree.get_slot_content_mappings()
        self.assertEqual(
            {SlotAssignment({slot1: Template([a]), slot2: Template([c])})},
            two_slot_tree_contents,
        )

        # Test tree
        u1_slot = self.u1.get_template().get_slots()[0]
        self.assertEqual(
            {
                SlotAssignment({u1_slot: Template([TemplateString("c")])}),
                SlotAssignment({u1_slot: Template([TemplateString("e")])}),
            },
            self.u1.get_slot_content_mappings(),
        )
Exemplo n.º 7
0
def _create_large_slotvalues(
        nb_template_elements: int, nb_slots: int,
        max_elements_per_slot: int) -> Tuple[SlotValues, List[Template]]:
    contents = _create_contents(nb_template_elements)
    slot_generator = (NamedTemplateSlot(s)
                      for s in alphabetic_slot_name_iterator())
    slot_values = SlotValues()
    for i in range(nb_slots):
        slot_values[next(slot_generator)] = _shuffled_subset(
            contents, 0, random.randint(1, max_elements_per_slot))
    return slot_values, contents
Exemplo n.º 8
0
    def test_collapse_using_slot_values(self):
        hello = TemplateString("hello")
        hey = TemplateString("hey")
        hi = TemplateString("hi")

        h1 = TemplateTree(Template([hello, hello]))
        h2 = TemplateTree(Template([hey, hello]))
        h3 = TemplateTree(Template([hello, hi]))
        h4 = TemplateTree(Template([hi, hello]))
        h5 = TemplateTree(Template([hi, hi]))

        hello_t = Template([hello])
        hey_t = Template([hey])
        hi_t = Template([hi])

        slot_a = NamedTemplateSlot("A")
        slot_b = NamedTemplateSlot("B")
        slot_c = NamedTemplateSlot("C")
        slot_d = NamedTemplateSlot("D")
        slot_e = NamedTemplateSlot("E")
        slot_f = NamedTemplateSlot("F")

        t1 = TemplateTree(Template([hello, slot_a]), [h1, h3])
        t2 = TemplateTree(Template([slot_b, hello]), [h1, h2, h4])
        t3 = TemplateTree(Template([slot_c, hi]), [h3, h5])
        t4 = TemplateTree(Template([hi, slot_d]), [h4, h5])
        t5 = TemplateTree(Template([slot_e, slot_f]), [t1, t2, t3, t4])

        slot_values = SlotValues(
            {
                slot_a: {Template([slot_e])},
                slot_b: {Template([slot_e])},
                slot_c: {Template([slot_e])},
                slot_d: {Template([slot_e])},
                slot_e: {hello_t, hi_t, hey_t},
                slot_f: {Template([slot_e])},
            }
        )

        self.assertEqual(
            slot_values,
            t5.get_slot_values().merge_slots(relative_similarity_threshold=0.01),
        )
        renamed_tree = t5.name_template_slots(
            {
                slot_a: slot_e,
                slot_b: slot_e,
                slot_c: slot_e,
                slot_d: slot_e,
                slot_f: slot_e,
            }
        )
        collapsed_tree = renamed_tree.collapse_using_slot_values(slot_values)
        self.assertEqual(Template([slot_e, slot_e]), collapsed_tree.get_template())
        self.assertEqual(
            {tt.get_template() for tt in [h1, h2, h3, h4, h5]},
            {tt.get_template() for tt in collapsed_tree.get_children()},
        )
Exemplo n.º 9
0
    def from_string(
        content: str,
        named_slot_regex=default_named_slot_regex,
        tokenizer: Callable[[str], List[str]] = word_tokenize,
        slot_token: str = "[SLOT]",
    ) -> "Template":
        if slot_token in content or named_slot_regex.search(content):
            # If a variable token is defined: split on the variables and add them in between
            parts = content.split(slot_token)
            tokens = []
            for i in range(len(parts)):
                part = parts[i]

                part_parts = []
                last_match: Match = named_slot_regex.search(part)
                while last_match:

                    # Split in three parts
                    part_part_until_match = part[:last_match.start()]
                    part_match = part[last_match.start():last_match.end()]
                    part_from_match = part[last_match.end():]

                    # Tokenize first
                    part_tokens = tokenizer(part_part_until_match)
                    tokens += [TemplateString(t) for t in part_tokens]

                    # Make slot name out of second part
                    named_slot_name = named_slot_regex.findall(part_match)[0]
                    named_slot = NamedTemplateSlot(named_slot_name)
                    tokens += [named_slot]

                    # Further process third
                    part = part_from_match
                    if len(part.strip()) > 0:
                        last_match = named_slot_regex.search(part)
                    else:
                        last_match = None

                if len(part.strip()) > 0:
                    part_tokens = tokenizer(part)
                    tokens += [TemplateString(t) for t in part_tokens]

                # Add variable token in between
                if i < len(parts) - 1:
                    tokens += [TemplateSlot()]
        else:
            tokens = [TemplateString(t) for t in tokenizer(content)]
        return Template(tokens)
Exemplo n.º 10
0
    def name_slots_automatically(
        self, slot_name_generator: Iterator[str] = alphabetic_slot_name_iterator()
    ):
        all_slots = self.get_all_descendent_slots_breadth_first()
        slot_names = {s.get_name() for s in all_slots if s.is_named()}
        unnamed_slots = [s for s in all_slots if not s.is_named()]

        named_slots_map = dict()
        for i in range(len(unnamed_slots)):

            # Make sure the new slot name is not being used in the template tree already
            new_slot_name = None
            while new_slot_name is None or new_slot_name in slot_names:
                new_slot_name = next(slot_name_generator)

            # Create new slot with this name
            named_slots_map[unnamed_slots[i]] = NamedTemplateSlot(new_slot_name)

        return self.name_template_slots(named_slots_map)
Exemplo n.º 11
0
    def test_collapse_using_slot_values(self):
        hello = TemplateString("hello")
        hey = TemplateString("hey")
        world = TemplateString("world")
        universe = TemplateString("universe")

        h1 = TemplateTree(Template([hello, world]))
        h2 = TemplateTree(Template([hey, world]))
        h3 = TemplateTree(Template([hello, universe]))
        h4 = TemplateTree(Template([hey, universe]))

        slot_a = NamedTemplateSlot("A")
        slot_b = NamedTemplateSlot("B")
        slot_c = NamedTemplateSlot("C")

        expected = TemplateTree(Template([slot_a, slot_b]), [h1, h2, h3, h4])
        expected_values = SlotValues(
            {
                slot_a: {Template([hello]), Template([hey])},
                slot_b: {Template([world]), Template([universe])},
            }
        )

        # Test first argument
        hello_t = Template([hello, slot_b])
        hello_tt = TemplateTree(hello_t, [h1, h3])
        hey_t = Template([hey, slot_b])
        hey_tt = TemplateTree(hey_t, [h2, h4])
        greeting_t = Template([slot_a, slot_b])
        greeting_tt = TemplateTree(greeting_t, [hello_tt, hey_tt])

        self.assertTrue(greeting_t.encompasses(hey_t, expected_values))
        self.assertTrue(greeting_t.encompasses(hello_t, expected_values))
        self.assertFalse(hello_t.encompasses(greeting_t, expected_values))

        self.assertEqual(
            expected_values, greeting_tt.calculated_merged_independent_slot_values()
        )

        self.assertEqual(
            expected, greeting_tt.collapse_using_slot_values(expected_values)
        )

        # Do same, but for second argument
        world_t = Template([slot_a, world])
        world_tt = TemplateTree(world_t, [h1, h2])
        universe_t = Template([slot_a, universe])
        universe_tt = TemplateTree(universe_t, [h3, h4])
        place_t = Template([slot_a, slot_b])
        place_tt = TemplateTree(place_t, [world_tt, universe_tt])

        self.assertEqual(
            expected_values, place_tt.calculated_merged_independent_slot_values()
        )

        self.assertEqual(expected, place_tt.collapse_using_slot_values(expected_values))

        # Test mix
        mix_tt = TemplateTree(place_t, [world_tt, hey_tt, h3])

        self.assertEqual(
            expected_values, mix_tt.calculated_merged_independent_slot_values()
        )

        self.assertEqual(expected, mix_tt.collapse_using_slot_values(expected_values))

        # Now with some noise
        noise = Template([TemplateString("noise")])
        noise_tt = TemplateTree(noise)

        noise_t = Template([slot_c])
        full_noise_tt = TemplateTree(noise_t, [greeting_tt, noise_tt])

        noise_values = SlotValues(
            {
                slot_a: {Template([hello]), Template([hey])},
                slot_b: {Template([world]), Template([universe])},
                slot_c: {Template([slot_a, slot_b]), noise},
            }
        )

        collapsed_full_noise = full_noise_tt.collapse_using_slot_values(noise_values)

        self.assertEqual(
            noise_values, full_noise_tt.calculated_merged_independent_slot_values(),
        )
        self.assertEqual(
            TemplateTree(Template([slot_c]), [expected, noise_tt]),
            collapsed_full_noise,
        )
Exemplo n.º 12
0
    def test_merge_large(self):
        contents = _create_contents(100)
        slot_values = SlotValues({
            NamedTemplateSlot("a"): set(contents[0:2]),
            NamedTemplateSlot("b"): set(contents[2:4]),
            NamedTemplateSlot("c"): set(contents[4:6]),
            NamedTemplateSlot("d"): set(contents[6:8]),
            NamedTemplateSlot("e"): set(contents[8:10]),
            NamedTemplateSlot("f"): set(contents[10:12]),
            NamedTemplateSlot("g"): set(contents[12:14]),
            NamedTemplateSlot("h"): set(contents[14:16]),
            NamedTemplateSlot("i"): set(contents[16:18]),
            NamedTemplateSlot("j"): set(contents[18:20]),
            NamedTemplateSlot("k"): set(contents[20:22]),
            NamedTemplateSlot("l"): set(contents[22:24]),
            NamedTemplateSlot("m"): set(contents[24:26]),
            NamedTemplateSlot("n"): set(contents[26:28]),
            NamedTemplateSlot("o"): set(contents[28:30]),
            NamedTemplateSlot("p"): set(contents[30:32]),
            NamedTemplateSlot("q"): set(contents[32:34]),
            NamedTemplateSlot("r"): set(contents[34:36]),
            NamedTemplateSlot("s"): set(contents[36:38]),
            NamedTemplateSlot("t"): set(contents[38:40]),
            NamedTemplateSlot("u"): set(contents[40:42]),
            NamedTemplateSlot("v"): set(contents[42:44]),
            NamedTemplateSlot("w"): set(contents[44:46]),
            NamedTemplateSlot("x"): set(contents[46:48]),
            NamedTemplateSlot("y"): set(contents[48:50]),
            NamedTemplateSlot("z"): set(contents[50:52]),
        })
        self.assertEqual(slot_values, slot_values.merge_slots())
        self.assertEqual(slot_values, slot_values.merge_slots(0.1))
        self.assertEqual(slot_values, slot_values.merge_slots(0.001))

        # Now add something that overlaps
        extra_slot_1 = NamedTemplateSlot("zzz-extra")
        slot_values[extra_slot_1] = set(contents[0:9])
        self.assertEqual(slot_values, slot_values.merge_slots())

        merged_011 = slot_values.merge_slots(0.112)
        self.assertEqual(set(contents[0:9]),
                         merged_011[NamedTemplateSlot("a")])
        self.assertEqual({Template([NamedTemplateSlot("a")])},
                         merged_011[NamedTemplateSlot("b")])
        self.assertEqual({Template([NamedTemplateSlot("a")])},
                         merged_011[NamedTemplateSlot("c")])
        self.assertEqual({Template([NamedTemplateSlot("a")])},
                         merged_011[NamedTemplateSlot("d")])
        self.assertEqual({Template([NamedTemplateSlot("a")])},
                         merged_011[extra_slot_1])
        self.assertEqual(
            set(contents[8:10]),
            merged_011[NamedTemplateSlot("e")],
        )

        merged_01 = slot_values.merge_slots(0.1)
        self.assertEqual(set(contents[0:10]),
                         merged_01[NamedTemplateSlot("a")])
        self.assertEqual({Template([NamedTemplateSlot("a")])},
                         merged_01[NamedTemplateSlot("d")])
        self.assertEqual({Template([NamedTemplateSlot("a")])},
                         merged_011[extra_slot_1])
        self.assertEqual({Template([NamedTemplateSlot("a")])},
                         merged_01[NamedTemplateSlot("e")])

        extra_slot_2 = NamedTemplateSlot("zzz-extra-2")
        slot_values[extra_slot_2] = set(contents[11:52])
        self.assertEqual(slot_values, slot_values.merge_slots())

        merged2_005 = slot_values.merge_slots(0.05)
        self.assertEqual(set(contents[0:10]),
                         merged2_005[NamedTemplateSlot("a")])
        self.assertEqual({Template([NamedTemplateSlot("a")])},
                         merged2_005[NamedTemplateSlot("d")])
        self.assertEqual({Template([NamedTemplateSlot("a")])},
                         merged2_005[extra_slot_1])
        self.assertEqual({Template([NamedTemplateSlot("a")])},
                         merged2_005[NamedTemplateSlot("e")])
        self.assertEqual(set(contents[11:52]), merged2_005[extra_slot_2])
        self.assertEqual(set(contents[10:12]),
                         merged2_005[NamedTemplateSlot("f")])
        self.assertEqual(set(contents[14:16]),
                         merged2_005[NamedTemplateSlot("h")])

        merged2_0023 = slot_values.merge_slots(0.023)
        self.assertEqual(set(contents[0:10]),
                         merged2_0023[NamedTemplateSlot("a")])
        self.assertEqual({Template([NamedTemplateSlot("a")])},
                         merged2_0023[NamedTemplateSlot("d")])
        self.assertEqual({Template([NamedTemplateSlot("a")])},
                         merged2_0023[extra_slot_1])
        self.assertEqual({Template([NamedTemplateSlot("a")])},
                         merged2_0023[NamedTemplateSlot("e")])
        self.assertEqual(set(contents[10:52]),
                         merged2_0023[NamedTemplateSlot("f")])
        self.assertEqual({Template([NamedTemplateSlot("f")])},
                         merged2_0023[NamedTemplateSlot("g")])
        self.assertEqual({Template([NamedTemplateSlot("f")])},
                         merged2_0023[NamedTemplateSlot("h")])
        self.assertEqual({Template([NamedTemplateSlot("f")])},
                         merged2_0023[extra_slot_2])

        extra_slot_3 = NamedTemplateSlot("zzz-extra-3")
        slot_values[extra_slot_3] = set(contents[9:11])
        self.assertEqual(slot_values, slot_values.merge_slots())
        a = {Template([NamedTemplateSlot("a")])}
        self.assertEqual(
            SlotValues({
                NamedTemplateSlot("a"): set(contents[0:52]),
                NamedTemplateSlot("b"): a,
                NamedTemplateSlot("c"): a,
                NamedTemplateSlot("d"): a,
                NamedTemplateSlot("e"): a,
                NamedTemplateSlot("f"): a,
                NamedTemplateSlot("g"): a,
                NamedTemplateSlot("h"): a,
                NamedTemplateSlot("i"): a,
                NamedTemplateSlot("j"): a,
                NamedTemplateSlot("k"): a,
                NamedTemplateSlot("l"): a,
                NamedTemplateSlot("m"): a,
                NamedTemplateSlot("n"): a,
                NamedTemplateSlot("o"): a,
                NamedTemplateSlot("p"): a,
                NamedTemplateSlot("q"): a,
                NamedTemplateSlot("r"): a,
                NamedTemplateSlot("s"): a,
                NamedTemplateSlot("t"): a,
                NamedTemplateSlot("u"): a,
                NamedTemplateSlot("v"): a,
                NamedTemplateSlot("w"): a,
                NamedTemplateSlot("x"): a,
                NamedTemplateSlot("y"): a,
                NamedTemplateSlot("z"): a,
                extra_slot_1: a,
                extra_slot_2: a,
                extra_slot_3: a,
            }),
            slot_values.merge_slots(0.01),
        )