def convert_template_elements_from_wagner_fischer( elements: Tuple[TemplateElement], alignment: List[str], minimal_variables=True, merge_named_slots=False, allow_empty_string=True, ) -> List[TemplateElement]: resulting_elements = [] elements_index = 0 # Keep track if last was empty, as to merge slots if minimal_variables=True has_dangling_empty_slot = False # Check if last was a delete last_was_new_delete_slot = False for (i, operation) in enumerate(alignment): if operation == "M": # KEEP new_element: TemplateElement = elements[elements_index] if not new_element.is_slot() or ( not minimal_variables or not _has_ending_slot( resulting_elements, merge_named_slots)): # Remove last slot if it is named and there is a new slot coming in if (new_element.is_slot() and len(elements) > 1 and merge_named_slots and elements[len(elements) - 1].is_named()): resulting_elements.pop() if allow_empty_string or (not has_dangling_empty_slot and not last_was_new_delete_slot): resulting_elements.append(new_element) last_was_new_delete_slot = False has_dangling_empty_slot = False elements_index += 1 elif operation == "S": # SUBSTITUTE -> add slot if not minimal_variables or not _has_ending_slot( resulting_elements, False): resulting_elements.append(TemplateSlot()) has_dangling_empty_slot = False last_was_new_delete_slot = False elements_index += 1 elif operation == "D": # DELETE -> skip element if not _has_ending_slot(resulting_elements, False): resulting_elements.append(TemplateSlot()) has_dangling_empty_slot = True last_was_new_delete_slot = True elements_index += 1 elif operation == "I": # INSERT -> add slot & stay # Check if it is the last operation when allow_empty_string=False, because if so, it is disallowed to insert # a new slot at the end! Pop the last element and insert a slot! if not allow_empty_string and i == len(alignment) - 1: resulting_elements.pop() if not _has_ending_slot(resulting_elements, False): resulting_elements.append(TemplateSlot()) has_dangling_empty_slot = True last_was_new_delete_slot = False return resulting_elements
def setUp(self) -> None: random.seed(123) self.a = TemplateString("a") self.b = TemplateString("b") self.c = TemplateString("c") self.slot1 = TemplateSlot() self.slot2 = TemplateSlot() self.slot_x = NamedTemplateSlot("x") self.slot_y = NamedTemplateSlot("y") self.slot_z = NamedTemplateSlot("z") self.at = Template([self.a]) self.bt = Template([self.b]) self.ct = Template([self.c])
def convert_template_elements_from_wagner_fischer( elements: Tuple[TemplateElement], alignment: List[str], minimal_variables=True, merge_named_slots=False, ) -> List[TemplateElement]: resulting_elements = [] elements_index = 0 for operation in alignment: if operation == "M": # KEEP new_element: TemplateElement = elements[elements_index] if not new_element.is_slot() or ( not minimal_variables or not _has_ending_slot( resulting_elements, merge_named_slots)): # Remove last slot if it is named and there is a new slot coming in if (new_element.is_slot() and len(elements) > 1 and merge_named_slots and elements[len(elements) - 1].is_named()): resulting_elements.pop() resulting_elements.append(new_element) elements_index += 1 elif operation == "S": # SUBSTITUTE -> add slot if not minimal_variables or not _has_ending_slot( resulting_elements, False): resulting_elements.append(TemplateSlot()) elements_index += 1 elif operation == "D": # DELETE -> skip element if not _has_ending_slot(resulting_elements, False): resulting_elements.append(TemplateSlot()) elements_index += 1 elif operation == "I": # INSERT -> add slot & stay if not _has_ending_slot(resulting_elements, False): resulting_elements.append(TemplateSlot()) return resulting_elements
def from_string( content: str, named_slot_regex=default_named_slot_regex, tokenizer: Callable[[str], List[str]] = word_tokenize, slot_token: str = "[SLOT]", ) -> "Template": if slot_token in content or named_slot_regex.search(content): # If a variable token is defined: split on the variables and add them in between parts = content.split(slot_token) tokens = [] for i in range(len(parts)): part = parts[i] part_parts = [] last_match: Match = named_slot_regex.search(part) while last_match: # Split in three parts part_part_until_match = part[:last_match.start()] part_match = part[last_match.start():last_match.end()] part_from_match = part[last_match.end():] # Tokenize first part_tokens = tokenizer(part_part_until_match) tokens += [TemplateString(t) for t in part_tokens] # Make slot name out of second part named_slot_name = named_slot_regex.findall(part_match)[0] named_slot = NamedTemplateSlot(named_slot_name) tokens += [named_slot] # Further process third part = part_from_match if len(part.strip()) > 0: last_match = named_slot_regex.search(part) else: last_match = None if len(part.strip()) > 0: part_tokens = tokenizer(part) tokens += [TemplateString(t) for t in part_tokens] # Add variable token in between if i < len(parts) - 1: tokens += [TemplateSlot()] else: tokens = [TemplateString(t) for t in tokenizer(content)] return Template(tokens)
def from_string_tokens(elements: List[str], slot_token: str = None) -> "Template": return Template([ TemplateSlot() if el == slot_token else TemplateString(el) for el in elements ])