Exemplo n.º 1
0
    def test_slot_copy_copies_value_type_and_attributes(self):
        slot = Slot(self.to_value, self.attributes, self.fact)
        copy = slot.copy()

        self.assertEqual(slot.value, copy.value)
        self.assertEqual(slot.slot_type, copy.slot_type)
        self.assertEqual(slot.attributes, copy.attributes)
Exemplo n.º 2
0
    def test_slot_copy_changes_to_copy_do_not_reflect_to_original(self):
        slot = Slot(self.to_value, self.attributes, self.fact)
        copy = slot.copy()
        copy.attributes["new_key"] = "new_val"
        copy.value = LiteralSource("new literal")

        self.assertNotEqual(slot.value, copy.value)
        self.assertNotEqual(slot.attributes, copy.attributes)
Exemplo n.º 3
0
    def test_template_move_slot_backwards(self):
        new_slot = Slot(FactFieldSource("timestamp_from"))
        self.template.add_slot(2, new_slot)

        self.template.move_slot(2, 1)
        self.assertListEqual(self.template.components,
                             [self.slot, new_slot, self.literal])
Exemplo n.º 4
0
    def test_template_move_slot_forwards(self):
        # TODO: This fails, when it shouldn't
        new_slot = Slot(FactFieldSource("timestamp_from"))
        self.template.add_slot(2, new_slot)

        self.template.move_slot(0, 1)
        self.assertListEqual(self.template.components,
                             [self.literal, self.slot, new_slot])
Exemplo n.º 5
0
    def test_slot_creation_with_default_values(self):
        slot = Slot(self.to_value)

        self.assertEqual(slot.value, "some literal")
        self.assertEqual(slot.slot_type, "literal")
        self.assertIsInstance(slot.attributes, dict)
        self.assertEqual(len(slot.attributes), 0)
        self.assertIsNone(slot.fact)
Exemplo n.º 6
0
    def realize(self, slot: Slot, random: Generator, language: str) -> Tuple[bool, List[TemplateComponent]]:
        # We can only parse the slot contents with a regex if the slot contents are a string
        if not isinstance(slot.value, str):
            return False, []

        match = re.fullmatch(self.regex, slot.value)

        if not match:
            return False, []

        groups = [match.group(i) for i in self.extracted_groups]

        # Check that the requirements placed on the groups are fulfilled
        if self.group_requirements is not None and not self.group_requirements(*groups):
            return False, []

        # Check that the requirements placed on the slot are fulfilled
        if self.slot_requirements is not None and not self.slot_requirements(slot):
            return False, []

        components = []
        entities = groups[0].split(self.separator)
        for idx, element in enumerate(entities):
            remaining = len(entities) - idx - 1

            template = random.choice(self.templates)
            log.info("Template: {}".format(template))

            string_realization = template.format(element)
            log.info("String realization: {}".format(string_realization))

            for idx, realization_token in enumerate(self.split_to_tokens(string_realization)):
                new_slot = slot.copy(include_fact=True)

                # By default, copy copies the attributes too. In case attach_attributes_to was set,
                # we need to explicitly reset the attributes for all those slots NOT explicitly mentioned
                if idx not in self.attach_attributes_to:
                    new_slot.attributes = {}

                # An ugly hack that ensures the lambda correctly binds to the value of realization_token at this
                # time. Without this, all the lambdas bind to the final value of the realization_token variable, ie.
                # the final value at the end of the loop.  See https://stackoverflow.com/a/10452819
                new_slot.value = lambda f, realization_token=realization_token: realization_token
                components.append(new_slot)

                if remaining > 1:
                    components.append(Literal(","))
                elif remaining == 1:
                    components.append(Literal(self.combiner))

            log.info("Components: {}".format([str(c) for c in components]))

        return True, components
Exemplo n.º 7
0
    def realize(self, slot: Slot, random: Generator, language: str) -> Tuple[bool, List[TemplateComponent]]:
        value = slot.value
        language = language.split("-")[0]

        try:
            value = float(value)
        except ValueError:
            return False, []

        if slot.attributes.get("abs"):
            value = abs(value)

        if isinstance(value, (int, float)):
            if int(value) == value:
                slot.value = lambda x: babel.numbers.format_decimal(int(value), locale=language)
                return True, [slot]

            for rounding in range(5):
                if round(value, rounding) != 0:
                    slot.value = lambda x: babel.numbers.format_decimal(round(value, rounding + 2), locale=language)
                    return True, [slot]

        return True, [slot]
Exemplo n.º 8
0
    def resolve_surface_form(self, registry: Registry, random: Generator,
                             language: str, slot: Slot, entity: str,
                             entity_type: str) -> None:
        realizer = self.realizers.get(language, {}).get(entity_type, {}).get(
            slot.attributes.get("name_type"))
        if realizer is None:
            log.error(
                "No entity name resolver component for language {} and entity_type {}!"
                .format(language, entity_type))
            return

        realization = realizer.resolve(random, entity)
        slot.value = lambda x: realization
        log.debug('Realizer entity "{}" of type "{}" as "{}"'.format(
            entity, entity_type, realization))
    def setUp(self):
        self.fact = Fact("1", "_", "_", "_", "_", "_", "_", "kissa", "_")
        self.message = Message(self.fact)

        self.expr = FactField("corpus")
        self.matcher = Matcher(self.expr, "=", "1")
        self.rules = [([self.matcher], [0])]

        self.slot = Slot(FactFieldSource("result_value"))
        self.literal = LiteralSlot("sana")
        self.components = [self.slot, self.literal]

        self.template = Template(self.components, self.rules)
        self.template.fill(self.message, [self.message])

        self.realizer = FinnishUralicNLPMorphologicalRealizer()
Exemplo n.º 10
0
    def setUp(self):
        self.fact1 = Fact("1", "_", "_", "_", "_", "_", "_", "_", "_")
        self.fact2 = Fact("2", "_", "_", "_", "_", "_", "_", "_", "_")

        self.message1 = Message(self.fact1)
        self.message2 = Message(self.fact2)

        self.expr = FactField("corpus")
        self.matcher = Matcher(self.expr, "=", "1")
        self.rules = [([self.matcher], [0])]

        self.slot = Slot(FactFieldSource("corpus"))
        self.literal = LiteralSlot("literal")
        self.components = [self.slot, self.literal]

        self.template = Template(self.components, self.rules)
Exemplo n.º 11
0
 def test_template_add_slot(self):
     new_slot = Slot(FactFieldSource("timestamp_from"))
     self.template.add_slot(2, new_slot)
     self.assertIn(new_slot, self.template.components)
     self.assertEqual(self.template.get_slot("timestamp_from"), new_slot)
Exemplo n.º 12
0
def read_template_group(
    template_spec: List[str], current_language: Optional[str] = None, warn_on_old_format: bool = True
):
    """
    Parse a template group: one block that shares fact constraints and may specify multiple templates
    (for different languages, or the same).

    :param template_spec: text of block, split into lines
    :param current_language: default language to start with
    :param warn_on_old_format: output warnings when the old template format is used. This is the default,
        since the old format is deprecated when using this function, but if you know you're reading an old
        file, you can suppress the warnings
    :return: dict of language -> template list, new default language after group
    """
    # Allow either a string (block) or a list of lines as input
    if isinstance(template_spec, str):
        template_spec = template_spec.splitlines()

    # For readability, lines may be spread over multiple lines of the file, indenting after the first
    # This applies to the template text and fact constraints
    lines = list(group_indented_lines(template_spec))

    # Allow changing the current language without any templates
    # This is mostly used to specify a monolingual set of templates, defining the language and letting it carry through
    lang_name, colon, rest = "".join(lines).strip().partition(":")
    if colon and len(rest) == 0:
        # Return no templates and update the current language
        return {}, lang_name

    # Detect whether this template is specified in the new or old format. Templates using the old format are
    # ignored.
    if not any(line.startswith(RULE_PREFIX) for line in lines):
        # If there are no fact constraint lines (now explicitly marked), assume this is the old format
        if warn_on_old_format:
            warnings.warn("no fact constraint lines found in template: assuming old format")
        return {}, current_language

    # Something has to be specified
    if len(lines) == 0:
        raise TemplateReadingError("empty template definition", raw_text="\n".join(template_spec))

    # Split up the lines into template lines (potentially with a language specifier, but not necessarily) and
    # constraint lines
    template_lines = [line for line in lines if not line.startswith(RULE_PREFIX)]
    # The rest of the lines each define a fact associated with the templates, along with constraints
    constraint_lines = [line[len(RULE_PREFIX) :].lstrip() for line in lines if line.startswith(RULE_PREFIX)]

    # FACT CONSTRAINTS
    # Read in the fact constraints first to get a list of rules that will be associated with the template
    rules = []  # type: List[List[Matcher]]
    seen_what_types = []
    for constraint_line in constraint_lines:
        # Every part of this line represents a constraint on the facts that may match
        matchers = []
        for lhs, op, value in parse_matcher_expr(constraint_line):
            if "what_type" in lhs.field_name:
                # Keep track of all what_types we've seen for reference
                # ToDo: What to do with the regexes?
                if type(value) is set:
                    seen_what_types.extend(value)
                else:
                    seen_what_types.append(value)

            matchers.append(Matcher(lhs, op, value))
        rules.append(matchers)

    # Every template is associated with at least one rule
    # If no constraints were given (very usual), create one rule with no constraints
    if len(rules) == 0:
        rules.append([])

    # TEMPLATES
    # Now we parse the template lines themselves
    templates = {}
    for template_line in template_lines:
        # Work out what language this template is for
        lang_id_match = lang_spec_re.match(template_line)
        if lang_id_match is None:
            # No language spec for this template: use the default language
            pass
        else:
            language, template_line = lang_id_match.groups()
            # Make language specifiers case insensitive
            language = language.lower()
            # If empty language spec, use default language (and strip away the colon prefix)
            if len(language) > 0:
                # Otherwise, switch the current language, so it gets used for this template and becomes the default
                current_language = language

        # Allow alternative versions of a template to be specified using the [] notation for optional parts
        for expanded_template_line in expand_alternatives(template_line):
            components = []  # type: List['TemplateComponent']

            # Generate list for mapping rules into template Slots
            rule_to_slot = []  # type: List[List[int]]
            for idx in range(len(rules)):
                rule_to_slot.append([])

            rest = expanded_template_line.strip()
            while len(rest.strip()):
                # Look for the next opening brace marking a substitution
                literal_part, __, rest = rest.partition("{")
                # Everything up to the brace is a literal
                if len(literal_part) > 0:
                    # To make life easier for the aggregator, literals are split on whitespace here
                    for literal in literal_part.split():
                        components.append(Literal(literal))
                # If no brace was found, we're done
                if len(rest) > 0:
                    # Look for the closing brace
                    subst, closer, rest = rest.partition("}")
                    if not closer:
                        raise TemplateReadingError("closing brace missing in {}".format(expanded_template_line))
                    # Split up the substitution spec on commas, to allow various attributes and filters to be included
                    subst_parts = [p.strip() for p in subst.split(",")]

                    # First check if the first part is actually a literal.
                    if subst_parts[0][0] in ['"', "'"]:
                        if subst_parts[0][-1] != subst_parts[0][0]:
                            raise TemplateReadingError("closing quote missing in {}".format(expanded_template_line))
                        field_name = subst_parts[0]
                        rule_ref = None
                    else:
                        # The first thing is the base value to substitute, which should be one of the fact fields
                        # or the new {time} slot, which refers to both when-fields
                        field_name = subst_parts[0]

                        # It may specify which of the facts it's referring to, though this is not required
                        # (default to first)
                        if "." in field_name:
                            rule_ref, __, field_name = field_name.partition(".")
                            # Use 1-indexed fact numbering in templates: makes more sense for anyone but
                            # computer scientists
                            rule_ref = int(rule_ref) - 1
                            if rule_ref < 0:
                                raise TemplateReadingError(
                                    "Rule references use 1-index numbering. Found reference to rule "
                                    "0: did you mean 1?"
                                )
                        else:
                            # Default to referring to the first rule, since there's usually only one
                            rule_ref = 0

                        # Map alternative field names to their canonical form used internally
                        try:
                            field_name = FACT_FIELD_MAP[field_name]
                        except KeyError:
                            raise TemplateReadingError(
                                "unknown fact field '{}' used in substitution ({})".format(field_name, subst)
                            )

                        # Only some of the field names are allowed to be used in templates
                        # TODO: Remove or reinstate with allowed things received as params from "somewhere"
                        if field_name not in FACT_FIELDS:
                            raise TemplateReadingError(
                                "invalid field name '{}' for use in a template: {}".format(
                                    field_name, expanded_template_line
                                )
                            )

                        if rule_ref >= len(rules):
                            raise TemplateReadingError(
                                "Substitution '{}' refers to rule {}, but template only has {} "
                                "rules".format(subst, rule_ref + 1, len(rules))
                            )

                    attributes = {}
                    # Read each of the attribute specifications
                    for subst_part in subst_parts[1:]:
                        if "=" in subst_part:
                            # Attributes specify things like case, to be used in realisation
                            att, __, val = subst_part.partition("=")
                            attributes[att.strip()] = val.strip()
                        else:
                            raise TemplateReadingError(
                                "Found an attribute with no value specified. "
                                "Possibly a leftover old style filter? {}".format(subst_part)
                            )

                    if field_name[0] in ["'", '"']:
                        to_value = LiteralSource(field_name[1:-1])
                    elif field_name == "time":
                        to_value = TimeSource()
                    else:
                        to_value = FactFieldSource(field_name)

                    # Postprocess attributes
                    attributes = process_attributes(attributes)

                    # len(components) is the index for the next component to be added
                    if rule_ref is not None:
                        rule_to_slot[rule_ref].append(len(components))
                    new_slot = Slot(to_value, attributes=attributes)
                    components.append(new_slot)

            template = Template(components, list(zip(rules, rule_to_slot)))
            # Add this template to the list for the relevant language
            templates.setdefault(current_language, []).append(template)

    return templates, current_language, set(seen_what_types)
Exemplo n.º 13
0
    def test_slot_value_setter(self):
        slot = Slot(self.to_value)
        slot.value = LiteralSource("another literal")

        self.assertEqual(slot.value, "another literal")
Exemplo n.º 14
0
    def test_slot_value_setter_updates_slot_type(self):
        slot = Slot(self.to_value)
        slot.value = FactFieldSource("a fake type")

        self.assertEqual(slot.slot_type, "a fake type")
Exemplo n.º 15
0
    def test_slot_copy_does_not_copy_fact(self):
        slot = Slot(self.to_value, self.attributes, self.fact)
        copy = slot.copy()

        self.assertIsNone(copy.fact)
Exemplo n.º 16
0
 def test_template_added_slot_is_last_component(self):
     new_slot = Slot(FactFieldSource("timestamp_from"))
     self.template.add_slot(2, new_slot)
     self.assertListEqual(self.template.components,
                          [self.slot, self.literal, new_slot])
Exemplo n.º 17
0
    def test_slot_creation_without_defaults(self):
        slot = Slot(self.to_value, self.attributes, self.fact)

        self.assertEqual(slot.attributes, self.attributes)
        self.assertEqual(slot.fact, self.fact)