class TestRealization(TestCase):
    def setUp(self):
        self.fact = Fact("1", "_", "_", "_", "_", "_", "_", "kissa", "_")
        self.message = Message(self.fact)

        self.expr = FactField("corpus")
        self.matcher = Matcher(self.expr, "=", "1")
        self.rules = [([self.matcher], [0])]

        self.slot = Slot(FactFieldSource("result_value"))
        self.literal = LiteralSlot("sana")
        self.components = [self.slot, self.literal]

        self.template = Template(self.components, self.rules)
        self.template.fill(self.message, [self.message])

        self.realizer = FinnishUralicNLPMorphologicalRealizer()

    def test_no_attrs_slot_left_as_is(self):
        self.assertEqual("kissa", self.realizer.realize(self.slot))

    def test_no_attrs_literal_left_as_is(self):
        self.assertEqual("sana", self.realizer.realize(self.literal))

    def test_gen_slot_realized_correctly(self):
        self.slot.attributes["case"] = "genitive"
        self.assertEqual("kissan", self.realizer.realize(self.slot))

    def test_gen_literal_realized_correctly(self):
        self.literal.attributes["case"] = "genitive"
        self.assertEqual("sanan", self.realizer.realize(self.literal))
Exemplo n.º 2
0
    def _combine(self, registry: Registry, language: str, first: Message,
                 second: Message) -> Message:
        log.debug("Combining {} and {}".format(
            [c.value for c in first.template.components],
            [c.value for c in second.template.components]))

        shared_prefix = self._get_combinable_prefix(first, second)
        log.debug(f"Shared prefix is {[e.value for e in shared_prefix]}")

        combined = [c for c in first.template.components]

        conjunctions = registry.get("CONJUNCTIONS").get(language, None)
        if not conjunctions:
            conjunctions = (defaultdict(lambda x: "NO-CONJUNCTION-DICT"), )
        combined.append(
            Literal(
                conjunctions.get("default_combiner",
                                 "MISSING-DEFAULT-CONJUCTION")))
        combined.extend(second.template.components[len(shared_prefix):])
        log.debug("Combined thing is {}".format([c.value for c in combined]))
        new_message = Message(
            facts=first.facts +
            [fact for fact in second.facts if fact not in first.facts],
            importance_coefficient=first.importance_coefficient,
        )
        new_message.template = Template(combined)
        new_message.prevent_aggregation = True
        return new_message
    def setUp(self):
        self.fact = Fact("1", "_", "_", "_", "_", "_", "_", "kissa", "_")
        self.message = Message(self.fact)

        self.expr = FactField("corpus")
        self.matcher = Matcher(self.expr, "=", "1")
        self.rules = [([self.matcher], [0])]

        self.slot = Slot(FactFieldSource("result_value"))
        self.literal = LiteralSlot("sana")
        self.components = [self.slot, self.literal]

        self.template = Template(self.components, self.rules)
        self.template.fill(self.message, [self.message])

        self.realizer = FinnishUralicNLPMorphologicalRealizer()
Exemplo n.º 4
0
    def setUp(self):
        self.fact1 = Fact("1", "_", "_", "_", "_", "_", "_", "_", "_")
        self.fact2 = Fact("2", "_", "_", "_", "_", "_", "_", "_", "_")

        self.message1 = Message(self.fact1)
        self.message2 = Message(self.fact2)

        self.expr = FactField("corpus")
        self.matcher = Matcher(self.expr, "=", "1")
        self.rules = [([self.matcher], [0])]

        self.slot = Slot(FactFieldSource("corpus"))
        self.literal = LiteralSlot("literal")
        self.components = [self.slot, self.literal]

        self.template = Template(self.components, self.rules)
    def _add_template_to_message(message: Message, template_original: Template, all_messages: List[Message]) -> None:
        """
        Adds a matching template to a message, also adding the facts used by the template to the message.

        :param message: The message to be fitted with a template
        :param template_original: The template to be added to the message.
        :param all_messages: Other available messages, some of which will be needed to match possible secondary rules
               in the template.
        :return: Nothing
        """
        template = template_original.copy()
        used_facts = template.fill(message, all_messages)
        if used_facts:
            log.debug("Successfully linked template to message")
        else:
            log.error(
                "Chosen template '{}' for fact '{}' could not be used! "
                "Falling back to default templates".format(template.display_template(), message.main_fact)
            )
            template = DefaultTemplate("")
        message.template = template
        message.facts = used_facts
Exemplo n.º 6
0
def read_template_group(
    template_spec: List[str], current_language: Optional[str] = None, warn_on_old_format: bool = True
):
    """
    Parse a template group: one block that shares fact constraints and may specify multiple templates
    (for different languages, or the same).

    :param template_spec: text of block, split into lines
    :param current_language: default language to start with
    :param warn_on_old_format: output warnings when the old template format is used. This is the default,
        since the old format is deprecated when using this function, but if you know you're reading an old
        file, you can suppress the warnings
    :return: dict of language -> template list, new default language after group
    """
    # Allow either a string (block) or a list of lines as input
    if isinstance(template_spec, str):
        template_spec = template_spec.splitlines()

    # For readability, lines may be spread over multiple lines of the file, indenting after the first
    # This applies to the template text and fact constraints
    lines = list(group_indented_lines(template_spec))

    # Allow changing the current language without any templates
    # This is mostly used to specify a monolingual set of templates, defining the language and letting it carry through
    lang_name, colon, rest = "".join(lines).strip().partition(":")
    if colon and len(rest) == 0:
        # Return no templates and update the current language
        return {}, lang_name

    # Detect whether this template is specified in the new or old format. Templates using the old format are
    # ignored.
    if not any(line.startswith(RULE_PREFIX) for line in lines):
        # If there are no fact constraint lines (now explicitly marked), assume this is the old format
        if warn_on_old_format:
            warnings.warn("no fact constraint lines found in template: assuming old format")
        return {}, current_language

    # Something has to be specified
    if len(lines) == 0:
        raise TemplateReadingError("empty template definition", raw_text="\n".join(template_spec))

    # Split up the lines into template lines (potentially with a language specifier, but not necessarily) and
    # constraint lines
    template_lines = [line for line in lines if not line.startswith(RULE_PREFIX)]
    # The rest of the lines each define a fact associated with the templates, along with constraints
    constraint_lines = [line[len(RULE_PREFIX) :].lstrip() for line in lines if line.startswith(RULE_PREFIX)]

    # FACT CONSTRAINTS
    # Read in the fact constraints first to get a list of rules that will be associated with the template
    rules = []  # type: List[List[Matcher]]
    seen_what_types = []
    for constraint_line in constraint_lines:
        # Every part of this line represents a constraint on the facts that may match
        matchers = []
        for lhs, op, value in parse_matcher_expr(constraint_line):
            if "what_type" in lhs.field_name:
                # Keep track of all what_types we've seen for reference
                # ToDo: What to do with the regexes?
                if type(value) is set:
                    seen_what_types.extend(value)
                else:
                    seen_what_types.append(value)

            matchers.append(Matcher(lhs, op, value))
        rules.append(matchers)

    # Every template is associated with at least one rule
    # If no constraints were given (very usual), create one rule with no constraints
    if len(rules) == 0:
        rules.append([])

    # TEMPLATES
    # Now we parse the template lines themselves
    templates = {}
    for template_line in template_lines:
        # Work out what language this template is for
        lang_id_match = lang_spec_re.match(template_line)
        if lang_id_match is None:
            # No language spec for this template: use the default language
            pass
        else:
            language, template_line = lang_id_match.groups()
            # Make language specifiers case insensitive
            language = language.lower()
            # If empty language spec, use default language (and strip away the colon prefix)
            if len(language) > 0:
                # Otherwise, switch the current language, so it gets used for this template and becomes the default
                current_language = language

        # Allow alternative versions of a template to be specified using the [] notation for optional parts
        for expanded_template_line in expand_alternatives(template_line):
            components = []  # type: List['TemplateComponent']

            # Generate list for mapping rules into template Slots
            rule_to_slot = []  # type: List[List[int]]
            for idx in range(len(rules)):
                rule_to_slot.append([])

            rest = expanded_template_line.strip()
            while len(rest.strip()):
                # Look for the next opening brace marking a substitution
                literal_part, __, rest = rest.partition("{")
                # Everything up to the brace is a literal
                if len(literal_part) > 0:
                    # To make life easier for the aggregator, literals are split on whitespace here
                    for literal in literal_part.split():
                        components.append(Literal(literal))
                # If no brace was found, we're done
                if len(rest) > 0:
                    # Look for the closing brace
                    subst, closer, rest = rest.partition("}")
                    if not closer:
                        raise TemplateReadingError("closing brace missing in {}".format(expanded_template_line))
                    # Split up the substitution spec on commas, to allow various attributes and filters to be included
                    subst_parts = [p.strip() for p in subst.split(",")]

                    # First check if the first part is actually a literal.
                    if subst_parts[0][0] in ['"', "'"]:
                        if subst_parts[0][-1] != subst_parts[0][0]:
                            raise TemplateReadingError("closing quote missing in {}".format(expanded_template_line))
                        field_name = subst_parts[0]
                        rule_ref = None
                    else:
                        # The first thing is the base value to substitute, which should be one of the fact fields
                        # or the new {time} slot, which refers to both when-fields
                        field_name = subst_parts[0]

                        # It may specify which of the facts it's referring to, though this is not required
                        # (default to first)
                        if "." in field_name:
                            rule_ref, __, field_name = field_name.partition(".")
                            # Use 1-indexed fact numbering in templates: makes more sense for anyone but
                            # computer scientists
                            rule_ref = int(rule_ref) - 1
                            if rule_ref < 0:
                                raise TemplateReadingError(
                                    "Rule references use 1-index numbering. Found reference to rule "
                                    "0: did you mean 1?"
                                )
                        else:
                            # Default to referring to the first rule, since there's usually only one
                            rule_ref = 0

                        # Map alternative field names to their canonical form used internally
                        try:
                            field_name = FACT_FIELD_MAP[field_name]
                        except KeyError:
                            raise TemplateReadingError(
                                "unknown fact field '{}' used in substitution ({})".format(field_name, subst)
                            )

                        # Only some of the field names are allowed to be used in templates
                        # TODO: Remove or reinstate with allowed things received as params from "somewhere"
                        if field_name not in FACT_FIELDS:
                            raise TemplateReadingError(
                                "invalid field name '{}' for use in a template: {}".format(
                                    field_name, expanded_template_line
                                )
                            )

                        if rule_ref >= len(rules):
                            raise TemplateReadingError(
                                "Substitution '{}' refers to rule {}, but template only has {} "
                                "rules".format(subst, rule_ref + 1, len(rules))
                            )

                    attributes = {}
                    # Read each of the attribute specifications
                    for subst_part in subst_parts[1:]:
                        if "=" in subst_part:
                            # Attributes specify things like case, to be used in realisation
                            att, __, val = subst_part.partition("=")
                            attributes[att.strip()] = val.strip()
                        else:
                            raise TemplateReadingError(
                                "Found an attribute with no value specified. "
                                "Possibly a leftover old style filter? {}".format(subst_part)
                            )

                    if field_name[0] in ["'", '"']:
                        to_value = LiteralSource(field_name[1:-1])
                    elif field_name == "time":
                        to_value = TimeSource()
                    else:
                        to_value = FactFieldSource(field_name)

                    # Postprocess attributes
                    attributes = process_attributes(attributes)

                    # len(components) is the index for the next component to be added
                    if rule_ref is not None:
                        rule_to_slot[rule_ref].append(len(components))
                    new_slot = Slot(to_value, attributes=attributes)
                    components.append(new_slot)

            template = Template(components, list(zip(rules, rule_to_slot)))
            # Add this template to the list for the relevant language
            templates.setdefault(current_language, []).append(template)

    return templates, current_language, set(seen_what_types)
Exemplo n.º 7
0
class TestTemplate(TestCase):
    def setUp(self):
        self.fact1 = Fact("1", "_", "_", "_", "_", "_", "_", "_", "_")
        self.fact2 = Fact("2", "_", "_", "_", "_", "_", "_", "_", "_")

        self.message1 = Message(self.fact1)
        self.message2 = Message(self.fact2)

        self.expr = FactField("corpus")
        self.matcher = Matcher(self.expr, "=", "1")
        self.rules = [([self.matcher], [0])]

        self.slot = Slot(FactFieldSource("corpus"))
        self.literal = LiteralSlot("literal")
        self.components = [self.slot, self.literal]

        self.template = Template(self.components, self.rules)

    def test_template_constructs(self):
        self.assertListEqual(self.template.components, self.components)
        self.assertIsInstance(self.template.facts, list)
        self.assertEqual(len(self.template.facts), 0)

    def test_template_sets_parent_to_components(self):
        self.assertEqual(self.slot.parent, self.template)
        self.assertEqual(self.literal.parent, self.template)

    def test_template_get_slot(self):
        self.assertEqual(self.template.get_slot("corpus"), self.slot)
        self.assertEqual(self.template.get_slot("literal"), self.literal)

        with self.assertRaises(KeyError):
            self.template.get_slot("no such")

    def test_template_add_slot(self):
        new_slot = Slot(FactFieldSource("timestamp_from"))
        self.template.add_slot(2, new_slot)
        self.assertIn(new_slot, self.template.components)
        self.assertEqual(self.template.get_slot("timestamp_from"), new_slot)

    def test_template_added_slot(self):
        new_slot = Slot(FactFieldSource("timestamp_from"))
        self.template.add_slot(1, new_slot)
        self.assertListEqual(self.template.components,
                             [self.slot, new_slot, self.literal])

    def test_template_added_slot_is_last_component(self):
        new_slot = Slot(FactFieldSource("timestamp_from"))
        self.template.add_slot(2, new_slot)
        self.assertListEqual(self.template.components,
                             [self.slot, self.literal, new_slot])

    def test_template_move_slot_forwards(self):
        # TODO: This fails, when it shouldn't
        new_slot = Slot(FactFieldSource("timestamp_from"))
        self.template.add_slot(2, new_slot)

        self.template.move_slot(0, 1)
        self.assertListEqual(self.template.components,
                             [self.literal, self.slot, new_slot])

    def test_template_move_slot_backwards(self):
        new_slot = Slot(FactFieldSource("timestamp_from"))
        self.template.add_slot(2, new_slot)

        self.template.move_slot(2, 1)
        self.assertListEqual(self.template.components,
                             [self.slot, new_slot, self.literal])

    def test_template_check_success(self):
        used_facts = self.template.check(self.message1, [self.message1])
        self.assertEqual(len(used_facts), 1)
        self.assertIn(self.fact1, used_facts)

    def test_template_check_success_does_not_fill(self):
        self.template.check(self.message1, [self.message1])
        self.assertIsNone(self.message1.template)
        self.assertEqual(len(self.template.facts), 0)
        self.assertIsNone(self.slot.fact)

    def test_template_check_failure(self):
        used_facts = self.template.check(self.message2, [self.message2])
        self.assertEqual(len(used_facts), 0)
        self.assertNotIn(self.fact2, used_facts)

    def test_template_check_failure_does_not_fill(self):
        self.template.check(self.message2, [self.message2])
        self.assertEqual(len(self.template.facts), 0)
        self.assertIsNone(self.slot.fact)

    def test_template_fill_success(self):
        used_facts = self.template.fill(self.message1, [self.message1])
        self.assertEqual(len(used_facts), 1)
        self.assertIn(self.fact1, used_facts)

    def test_template_fill_success_fills(self):
        self.template.fill(self.message1, [self.message1])
        self.assertEqual(len(self.template.facts), 1)
        self.assertIn(self.fact1, self.template.facts)
        self.assertEqual(self.slot.fact, self.fact1)

    def test_template_fill_failure(self):
        used_facts = self.template.fill(self.message2, [self.message2])
        self.assertEqual(len(used_facts), 0)
        self.assertNotIn(self.fact2, used_facts)

    def test_template_fill_failure_does_not_fill(self):
        self.template.fill(self.message2, [self.message2])
        self.assertEqual(len(self.template.facts), 0)
        self.assertIsNone(self.slot.fact)