def test_merge_relative_overlap_values(self): contents = _create_contents(10) slot_values = SlotValues({ self.a: set(contents), self.b: _shuffled_subset(contents, 0, 2), }) # It should not merge if the relative similarity threshold is > 0.2 merged_none = slot_values.merge_slots() self.assertEqual(slot_values, merged_none) merged_1 = slot_values.merge_slots(relative_similarity_threshold=1) self.assertEqual(slot_values, merged_1) merged_09 = slot_values.merge_slots(relative_similarity_threshold=0.9) self.assertEqual(slot_values, merged_09) merged_05 = slot_values.merge_slots(relative_similarity_threshold=0.5) self.assertEqual(slot_values, merged_05) # B should merge into A if the threshold is <= 0.2 expected_merged = SlotValues({ self.a: set(contents), self.b: {Template([self.a])} }) merged_02 = slot_values.merge_slots(relative_similarity_threshold=0.2) self.assertEqual(expected_merged, merged_02) merged_01 = slot_values.merge_slots(relative_similarity_threshold=0.1) self.assertEqual(expected_merged, merged_01)
def test_get_all_slot_assignments(self): slot_values = SlotValues({ self.a: {Template([self.c]), self.e1}, self.c: self.e12 }) self.assertEqual( { SlotAssignment({self.c: self.e1}), SlotAssignment({self.c: self.e2}) }, set(slot_values.get_all_possible_assignments([self.c])), )
def test_merge_small_overlap(self): slot_values = SlotValues({self.a: self.e12, self.b: self.e23}) merged = slot_values.merge_slots(relative_similarity_threshold=0.3) self.assertEqual(hashabledict({self.b: self.a}), merged.get_replacements()) self.assertEqual( SlotValues({ self.a: self.e123, self.b: {Template([self.a])} }), merged, )
def test_encompasses(self): template_1 = Template([self.slot_x, self.a, self.slot_y]) template_2 = Template([self.slot_x, self.a, self.b]) template_3 = Template([self.c, self.a, self.b]) template_4 = Template([self.slot_z, self.a, self.b]) self.assertTrue( template_1.encompasses( template_1, SlotValues({ self.slot_x: [Template([self.slot_x])], self.slot_y: [Template([self.slot_y])], }), )) self.assertTrue( template_1.encompasses( template_2, SlotValues({ self.slot_x: [Template([self.slot_x])], self.slot_y: [Template([self.b])], }), )) self.assertTrue( template_1.encompasses( template_3, SlotValues({ self.slot_x: [Template([self.c])], self.slot_y: [Template([self.b])], }), )) self.assertTrue( template_1.encompasses( template_4, SlotValues({ self.slot_x: [Template([self.slot_z])], self.slot_y: [Template([self.b])], }), )) self.assertFalse( template_1.encompasses( template_2, SlotValues({ self.slot_x: [Template([self.slot_x])], self.slot_y: [Template([self.a])], }), ))
def test_collapse_using_slot_values(self): hello = TemplateString("hello") hey = TemplateString("hey") hi = TemplateString("hi") h1 = TemplateTree(Template([hello, hello])) h2 = TemplateTree(Template([hey, hello])) h3 = TemplateTree(Template([hello, hi])) h4 = TemplateTree(Template([hi, hello])) h5 = TemplateTree(Template([hi, hi])) hello_t = Template([hello]) hey_t = Template([hey]) hi_t = Template([hi]) slot_a = NamedTemplateSlot("A") slot_b = NamedTemplateSlot("B") slot_c = NamedTemplateSlot("C") slot_d = NamedTemplateSlot("D") slot_e = NamedTemplateSlot("E") slot_f = NamedTemplateSlot("F") t1 = TemplateTree(Template([hello, slot_a]), [h1, h3]) t2 = TemplateTree(Template([slot_b, hello]), [h1, h2, h4]) t3 = TemplateTree(Template([slot_c, hi]), [h3, h5]) t4 = TemplateTree(Template([hi, slot_d]), [h4, h5]) t5 = TemplateTree(Template([slot_e, slot_f]), [t1, t2, t3, t4]) slot_values = SlotValues( { slot_a: {Template([slot_e])}, slot_b: {Template([slot_e])}, slot_c: {Template([slot_e])}, slot_d: {Template([slot_e])}, slot_e: {hello_t, hi_t, hey_t}, slot_f: {Template([slot_e])}, } ) self.assertEqual( slot_values, t5.get_slot_values().merge_slots(relative_similarity_threshold=0.01), ) renamed_tree = t5.name_template_slots( { slot_a: slot_e, slot_b: slot_e, slot_c: slot_e, slot_d: slot_e, slot_f: slot_e, } ) collapsed_tree = renamed_tree.collapse_using_slot_values(slot_values) self.assertEqual(Template([slot_e, slot_e]), collapsed_tree.get_template()) self.assertEqual( {tt.get_template() for tt in [h1, h2, h3, h4, h5]}, {tt.get_template() for tt in collapsed_tree.get_children()}, )
def test_merge_containing_slot(self): slot_values = SlotValues({ self.a: {Template([self.b]), self.e1, self.e2, self.e3}, self.b: self.e123, self.c: self.e456, }) merged = slot_values.merge_slots() self.assertEqual(hashabledict({self.a: self.b}), merged.get_replacements()) self.assertEqual( SlotValues({ self.a: {Template([self.b])}, self.b: self.e123, self.c: self.e456 }), merged, )
def test_merge_basic(self): slot_values = SlotValues({ self.a: self.e123, self.b: self.e123, self.c: self.e456 }) merged = slot_values.merge_slots() self.assertEqual(hashabledict({self.b: self.a}), merged.get_replacements()) self.assertEqual( SlotValues({ self.a: self.e123, self.b: {Template([self.a])}, self.c: self.e456 }), merged, )
def _create_large_slotvalues( nb_template_elements: int, nb_slots: int, max_elements_per_slot: int) -> Tuple[SlotValues, List[Template]]: contents = _create_contents(nb_template_elements) slot_generator = (NamedTemplateSlot(s) for s in alphabetic_slot_name_iterator()) slot_values = SlotValues() for i in range(nb_slots): slot_values[next(slot_generator)] = _shuffled_subset( contents, 0, random.randint(1, max_elements_per_slot)) return slot_values, contents
def test_merge_relative_overlap_values_three_variables_1(self): contents = _create_contents(10) slot_values = SlotValues({ self.a: set(contents), self.b: set(contents[0:2]), self.c: set(contents[5:8]), }) # It should not merge if the relative similarity threshold is > 0.2 merged_none = slot_values.merge_slots() self.assertEqual(slot_values, merged_none) merged_1 = slot_values.merge_slots(relative_similarity_threshold=1) self.assertEqual(slot_values, merged_1) merged_05 = slot_values.merge_slots(relative_similarity_threshold=0.5) self.assertEqual(slot_values, merged_05) merged_03 = slot_values.merge_slots(relative_similarity_threshold=0.3) self.assertEqual( SlotValues({ self.a: set(contents), self.b: set(contents[0:2]), self.c: {Template([self.a])}, }), merged_03, ) # B should merge into A if the threshold is <= 0.2 full_merge = SlotValues({ self.a: set(contents), self.b: {Template([self.a])}, self.c: {Template([self.a])}, }) merged_02 = slot_values.merge_slots(relative_similarity_threshold=0.2) self.assertEqual(full_merge, merged_02) merged_01 = slot_values.merge_slots(relative_similarity_threshold=0.1) self.assertEqual(full_merge, merged_01)
def test_merge_containing_multiple_slots_complely(self): slot_values = SlotValues({ self.a: {Template([self.b]), Template([self.c]), self.e1, self.e2}, self.b: self.e123, self.c: self.e123, }) merged = slot_values.merge_slots() self.assertEqual(hashabledict({ self.a: self.b, self.c: self.b }), merged.get_replacements()) self.assertEqual( SlotValues({ self.a: {Template([self.b])}, self.b: self.e123, self.c: {Template([self.b])}, }), merged, )
def get_slot_values(self) -> SlotValues: """ Calculates the slot values of every slot of the template. This assumes that slots have independent content """ result = SlotValues() for child in self._children: # Find the slot values for this child slot_values: Dict[ TemplateSlot, Set[Template] ] = self._template.create_slot_values_mapping(child._template) result.add_all_slot_values(slot_values) # Add slot values of the child result.add_all_slot_values(child.get_slot_values()) return result
def test_collapse_using_slot_values(self): hello = TemplateString("hello") hey = TemplateString("hey") world = TemplateString("world") universe = TemplateString("universe") h1 = TemplateTree(Template([hello, world])) h2 = TemplateTree(Template([hey, world])) h3 = TemplateTree(Template([hello, universe])) h4 = TemplateTree(Template([hey, universe])) slot_a = NamedTemplateSlot("A") slot_b = NamedTemplateSlot("B") slot_c = NamedTemplateSlot("C") expected = TemplateTree(Template([slot_a, slot_b]), [h1, h2, h3, h4]) expected_values = SlotValues( { slot_a: {Template([hello]), Template([hey])}, slot_b: {Template([world]), Template([universe])}, } ) # Test first argument hello_t = Template([hello, slot_b]) hello_tt = TemplateTree(hello_t, [h1, h3]) hey_t = Template([hey, slot_b]) hey_tt = TemplateTree(hey_t, [h2, h4]) greeting_t = Template([slot_a, slot_b]) greeting_tt = TemplateTree(greeting_t, [hello_tt, hey_tt]) self.assertTrue(greeting_t.encompasses(hey_t, expected_values)) self.assertTrue(greeting_t.encompasses(hello_t, expected_values)) self.assertFalse(hello_t.encompasses(greeting_t, expected_values)) self.assertEqual( expected_values, greeting_tt.calculated_merged_independent_slot_values() ) self.assertEqual( expected, greeting_tt.collapse_using_slot_values(expected_values) ) # Do same, but for second argument world_t = Template([slot_a, world]) world_tt = TemplateTree(world_t, [h1, h2]) universe_t = Template([slot_a, universe]) universe_tt = TemplateTree(universe_t, [h3, h4]) place_t = Template([slot_a, slot_b]) place_tt = TemplateTree(place_t, [world_tt, universe_tt]) self.assertEqual( expected_values, place_tt.calculated_merged_independent_slot_values() ) self.assertEqual(expected, place_tt.collapse_using_slot_values(expected_values)) # Test mix mix_tt = TemplateTree(place_t, [world_tt, hey_tt, h3]) self.assertEqual( expected_values, mix_tt.calculated_merged_independent_slot_values() ) self.assertEqual(expected, mix_tt.collapse_using_slot_values(expected_values)) # Now with some noise noise = Template([TemplateString("noise")]) noise_tt = TemplateTree(noise) noise_t = Template([slot_c]) full_noise_tt = TemplateTree(noise_t, [greeting_tt, noise_tt]) noise_values = SlotValues( { slot_a: {Template([hello]), Template([hey])}, slot_b: {Template([world]), Template([universe])}, slot_c: {Template([slot_a, slot_b]), noise}, } ) collapsed_full_noise = full_noise_tt.collapse_using_slot_values(noise_values) self.assertEqual( noise_values, full_noise_tt.calculated_merged_independent_slot_values(), ) self.assertEqual( TemplateTree(Template([slot_c]), [expected, noise_tt]), collapsed_full_noise, )
def test_merge_large(self): contents = _create_contents(100) slot_values = SlotValues({ NamedTemplateSlot("a"): set(contents[0:2]), NamedTemplateSlot("b"): set(contents[2:4]), NamedTemplateSlot("c"): set(contents[4:6]), NamedTemplateSlot("d"): set(contents[6:8]), NamedTemplateSlot("e"): set(contents[8:10]), NamedTemplateSlot("f"): set(contents[10:12]), NamedTemplateSlot("g"): set(contents[12:14]), NamedTemplateSlot("h"): set(contents[14:16]), NamedTemplateSlot("i"): set(contents[16:18]), NamedTemplateSlot("j"): set(contents[18:20]), NamedTemplateSlot("k"): set(contents[20:22]), NamedTemplateSlot("l"): set(contents[22:24]), NamedTemplateSlot("m"): set(contents[24:26]), NamedTemplateSlot("n"): set(contents[26:28]), NamedTemplateSlot("o"): set(contents[28:30]), NamedTemplateSlot("p"): set(contents[30:32]), NamedTemplateSlot("q"): set(contents[32:34]), NamedTemplateSlot("r"): set(contents[34:36]), NamedTemplateSlot("s"): set(contents[36:38]), NamedTemplateSlot("t"): set(contents[38:40]), NamedTemplateSlot("u"): set(contents[40:42]), NamedTemplateSlot("v"): set(contents[42:44]), NamedTemplateSlot("w"): set(contents[44:46]), NamedTemplateSlot("x"): set(contents[46:48]), NamedTemplateSlot("y"): set(contents[48:50]), NamedTemplateSlot("z"): set(contents[50:52]), }) self.assertEqual(slot_values, slot_values.merge_slots()) self.assertEqual(slot_values, slot_values.merge_slots(0.1)) self.assertEqual(slot_values, slot_values.merge_slots(0.001)) # Now add something that overlaps extra_slot_1 = NamedTemplateSlot("zzz-extra") slot_values[extra_slot_1] = set(contents[0:9]) self.assertEqual(slot_values, slot_values.merge_slots()) merged_011 = slot_values.merge_slots(0.112) self.assertEqual(set(contents[0:9]), merged_011[NamedTemplateSlot("a")]) self.assertEqual({Template([NamedTemplateSlot("a")])}, merged_011[NamedTemplateSlot("b")]) self.assertEqual({Template([NamedTemplateSlot("a")])}, merged_011[NamedTemplateSlot("c")]) self.assertEqual({Template([NamedTemplateSlot("a")])}, merged_011[NamedTemplateSlot("d")]) self.assertEqual({Template([NamedTemplateSlot("a")])}, merged_011[extra_slot_1]) self.assertEqual( set(contents[8:10]), merged_011[NamedTemplateSlot("e")], ) merged_01 = slot_values.merge_slots(0.1) self.assertEqual(set(contents[0:10]), merged_01[NamedTemplateSlot("a")]) self.assertEqual({Template([NamedTemplateSlot("a")])}, merged_01[NamedTemplateSlot("d")]) self.assertEqual({Template([NamedTemplateSlot("a")])}, merged_011[extra_slot_1]) self.assertEqual({Template([NamedTemplateSlot("a")])}, merged_01[NamedTemplateSlot("e")]) extra_slot_2 = NamedTemplateSlot("zzz-extra-2") slot_values[extra_slot_2] = set(contents[11:52]) self.assertEqual(slot_values, slot_values.merge_slots()) merged2_005 = slot_values.merge_slots(0.05) self.assertEqual(set(contents[0:10]), merged2_005[NamedTemplateSlot("a")]) self.assertEqual({Template([NamedTemplateSlot("a")])}, merged2_005[NamedTemplateSlot("d")]) self.assertEqual({Template([NamedTemplateSlot("a")])}, merged2_005[extra_slot_1]) self.assertEqual({Template([NamedTemplateSlot("a")])}, merged2_005[NamedTemplateSlot("e")]) self.assertEqual(set(contents[11:52]), merged2_005[extra_slot_2]) self.assertEqual(set(contents[10:12]), merged2_005[NamedTemplateSlot("f")]) self.assertEqual(set(contents[14:16]), merged2_005[NamedTemplateSlot("h")]) merged2_0023 = slot_values.merge_slots(0.023) self.assertEqual(set(contents[0:10]), merged2_0023[NamedTemplateSlot("a")]) self.assertEqual({Template([NamedTemplateSlot("a")])}, merged2_0023[NamedTemplateSlot("d")]) self.assertEqual({Template([NamedTemplateSlot("a")])}, merged2_0023[extra_slot_1]) self.assertEqual({Template([NamedTemplateSlot("a")])}, merged2_0023[NamedTemplateSlot("e")]) self.assertEqual(set(contents[10:52]), merged2_0023[NamedTemplateSlot("f")]) self.assertEqual({Template([NamedTemplateSlot("f")])}, merged2_0023[NamedTemplateSlot("g")]) self.assertEqual({Template([NamedTemplateSlot("f")])}, merged2_0023[NamedTemplateSlot("h")]) self.assertEqual({Template([NamedTemplateSlot("f")])}, merged2_0023[extra_slot_2]) extra_slot_3 = NamedTemplateSlot("zzz-extra-3") slot_values[extra_slot_3] = set(contents[9:11]) self.assertEqual(slot_values, slot_values.merge_slots()) a = {Template([NamedTemplateSlot("a")])} self.assertEqual( SlotValues({ NamedTemplateSlot("a"): set(contents[0:52]), NamedTemplateSlot("b"): a, NamedTemplateSlot("c"): a, NamedTemplateSlot("d"): a, NamedTemplateSlot("e"): a, NamedTemplateSlot("f"): a, NamedTemplateSlot("g"): a, NamedTemplateSlot("h"): a, NamedTemplateSlot("i"): a, NamedTemplateSlot("j"): a, NamedTemplateSlot("k"): a, NamedTemplateSlot("l"): a, NamedTemplateSlot("m"): a, NamedTemplateSlot("n"): a, NamedTemplateSlot("o"): a, NamedTemplateSlot("p"): a, NamedTemplateSlot("q"): a, NamedTemplateSlot("r"): a, NamedTemplateSlot("s"): a, NamedTemplateSlot("t"): a, NamedTemplateSlot("u"): a, NamedTemplateSlot("v"): a, NamedTemplateSlot("w"): a, NamedTemplateSlot("x"): a, NamedTemplateSlot("y"): a, NamedTemplateSlot("z"): a, extra_slot_1: a, extra_slot_2: a, extra_slot_3: a, }), slot_values.merge_slots(0.01), )
def test_merge_relative_overlap_values_three_variables_2(self): contents = _create_contents(10) slot_values = SlotValues({ self.a: set(contents[1:5]), self.b: set(contents[0:2]), self.c: set(contents[2:6]), }) # It should not merge if the relative similarity threshold is > 0.2 merged_none = slot_values.merge_slots() self.assertEqual(slot_values, merged_none) merged_1 = slot_values.merge_slots(relative_similarity_threshold=1) self.assertEqual(slot_values, merged_1) merged_061 = slot_values.merge_slots( relative_similarity_threshold=0.61) self.assertEqual(slot_values, merged_061) expected_first_merged = SlotValues({ self.a: set(contents[1:6]), self.b: set(contents[0:2]), self.c: {Template([self.a])}, }) merged_06 = slot_values.merge_slots(relative_similarity_threshold=0.6) self.assertEqual( expected_first_merged, merged_06, ) merged_021 = slot_values.merge_slots( relative_similarity_threshold=0.21) self.assertEqual( expected_first_merged, merged_021, ) expected_full_merged = SlotValues({ self.a: set(contents[0:6]), self.b: {Template([self.a])}, self.c: {Template([self.a])}, }) merged_02 = slot_values.merge_slots(relative_similarity_threshold=0.2) self.assertEqual( expected_full_merged, merged_02, ) merged_01 = slot_values.merge_slots(relative_similarity_threshold=0.1) self.assertEqual( expected_full_merged, merged_01, )