Пример #1
0
def convert_template_elements_from_wagner_fischer(
    elements: Tuple[TemplateElement],
    alignment: List[str],
    minimal_variables=True,
    merge_named_slots=False,
    allow_empty_string=True,
) -> List[TemplateElement]:
    resulting_elements = []
    elements_index = 0

    # Keep track if last was empty, as to merge slots if minimal_variables=True
    has_dangling_empty_slot = False

    # Check if last was a delete
    last_was_new_delete_slot = False

    for (i, operation) in enumerate(alignment):
        if operation == "M":  # KEEP
            new_element: TemplateElement = elements[elements_index]
            if not new_element.is_slot() or (
                    not minimal_variables or not _has_ending_slot(
                        resulting_elements, merge_named_slots)):
                # Remove last slot if it is named and there is a new slot coming in
                if (new_element.is_slot() and len(elements) > 1
                        and merge_named_slots
                        and elements[len(elements) - 1].is_named()):
                    resulting_elements.pop()

                if allow_empty_string or (not has_dangling_empty_slot
                                          and not last_was_new_delete_slot):
                    resulting_elements.append(new_element)
                last_was_new_delete_slot = False
                has_dangling_empty_slot = False

            elements_index += 1
        elif operation == "S":  # SUBSTITUTE -> add slot
            if not minimal_variables or not _has_ending_slot(
                    resulting_elements, False):
                resulting_elements.append(TemplateSlot())
            has_dangling_empty_slot = False
            last_was_new_delete_slot = False
            elements_index += 1
        elif operation == "D":  # DELETE -> skip element
            if not _has_ending_slot(resulting_elements, False):
                resulting_elements.append(TemplateSlot())
                has_dangling_empty_slot = True
                last_was_new_delete_slot = True
            elements_index += 1
        elif operation == "I":  # INSERT -> add slot & stay
            # Check if it is the last operation when allow_empty_string=False, because if so, it is disallowed to insert
            # a new slot at the end! Pop the last element and insert a slot!
            if not allow_empty_string and i == len(alignment) - 1:
                resulting_elements.pop()

            if not _has_ending_slot(resulting_elements, False):
                resulting_elements.append(TemplateSlot())
                has_dangling_empty_slot = True
            last_was_new_delete_slot = False

    return resulting_elements
Пример #2
0
    def setUp(self) -> None:
        random.seed(123)

        self.a = TemplateString("a")
        self.b = TemplateString("b")
        self.c = TemplateString("c")
        self.slot1 = TemplateSlot()
        self.slot2 = TemplateSlot()
        self.slot_x = NamedTemplateSlot("x")
        self.slot_y = NamedTemplateSlot("y")
        self.slot_z = NamedTemplateSlot("z")

        self.at = Template([self.a])
        self.bt = Template([self.b])
        self.ct = Template([self.c])
Пример #3
0
def convert_template_elements_from_wagner_fischer(
    elements: Tuple[TemplateElement],
    alignment: List[str],
    minimal_variables=True,
    merge_named_slots=False,
) -> List[TemplateElement]:
    resulting_elements = []
    elements_index = 0

    for operation in alignment:
        if operation == "M":  # KEEP
            new_element: TemplateElement = elements[elements_index]
            if not new_element.is_slot() or (
                    not minimal_variables or not _has_ending_slot(
                        resulting_elements, merge_named_slots)):
                # Remove last slot if it is named and there is a new slot coming in
                if (new_element.is_slot() and len(elements) > 1
                        and merge_named_slots
                        and elements[len(elements) - 1].is_named()):
                    resulting_elements.pop()
                resulting_elements.append(new_element)
            elements_index += 1
        elif operation == "S":  # SUBSTITUTE -> add slot
            if not minimal_variables or not _has_ending_slot(
                    resulting_elements, False):
                resulting_elements.append(TemplateSlot())
            elements_index += 1
        elif operation == "D":  # DELETE -> skip element
            if not _has_ending_slot(resulting_elements, False):
                resulting_elements.append(TemplateSlot())
            elements_index += 1
        elif operation == "I":  # INSERT -> add slot & stay
            if not _has_ending_slot(resulting_elements, False):
                resulting_elements.append(TemplateSlot())

    return resulting_elements
Пример #4
0
    def from_string(
        content: str,
        named_slot_regex=default_named_slot_regex,
        tokenizer: Callable[[str], List[str]] = word_tokenize,
        slot_token: str = "[SLOT]",
    ) -> "Template":
        if slot_token in content or named_slot_regex.search(content):
            # If a variable token is defined: split on the variables and add them in between
            parts = content.split(slot_token)
            tokens = []
            for i in range(len(parts)):
                part = parts[i]

                part_parts = []
                last_match: Match = named_slot_regex.search(part)
                while last_match:

                    # Split in three parts
                    part_part_until_match = part[:last_match.start()]
                    part_match = part[last_match.start():last_match.end()]
                    part_from_match = part[last_match.end():]

                    # Tokenize first
                    part_tokens = tokenizer(part_part_until_match)
                    tokens += [TemplateString(t) for t in part_tokens]

                    # Make slot name out of second part
                    named_slot_name = named_slot_regex.findall(part_match)[0]
                    named_slot = NamedTemplateSlot(named_slot_name)
                    tokens += [named_slot]

                    # Further process third
                    part = part_from_match
                    if len(part.strip()) > 0:
                        last_match = named_slot_regex.search(part)
                    else:
                        last_match = None

                if len(part.strip()) > 0:
                    part_tokens = tokenizer(part)
                    tokens += [TemplateString(t) for t in part_tokens]

                # Add variable token in between
                if i < len(parts) - 1:
                    tokens += [TemplateSlot()]
        else:
            tokens = [TemplateString(t) for t in tokenizer(content)]
        return Template(tokens)
Пример #5
0
 def from_string_tokens(elements: List[str],
                        slot_token: str = None) -> "Template":
     return Template([
         TemplateSlot() if el == slot_token else TemplateString(el)
         for el in elements
     ])