Exemplo n.º 1
0
 def process_kwargs(self, mapping):
     ''' Apply kwargs in the order they are provided. kwargs are ordered as of python 3.6
     '''
     # Add defaults
     if 'as_is' not in self.kwargs:
         self.kwargs['as_is'] = False
     if 'case_sensitive' not in self.kwargs:
         self.kwargs['case_sensitive'] = True
     if 'escape_special' not in self.kwargs:
         self.kwargs['escape_special'] = False
     if 'norm_form' not in self.kwargs:
         self.kwargs['norm_form'] = 'NFD'
     if 'reverse' not in self.kwargs:
         self.kwargs['reverse'] = False
     # Process kwargs in order received
     for kwarg, val in self.kwargs.items():
         if kwarg == 'as_is' and not val:
             # sort by reverse len
             mapping = sorted(mapping,
                              key=lambda x: len(x["in"]),
                              reverse=True)
         elif kwarg == 'escape_special' and val:
             mapping = [escape_special_characters(x) for x in mapping]
         elif kwarg == 'case_sensitive' and not val:
             mapping = self.lower_mappings(mapping)
         elif kwarg == 'norm_form' and val:
             for io in mapping:
                 for k, v in io.items():
                     if isinstance(v, str):
                         io[k] = normalize(v, self.kwargs['norm_form'])
         elif kwarg == 'reverse' and val:
             mapping = self.reverse_mappings(mapping)
     # After all processing is done, turn into regex
     for io in mapping:
         io['match_pattern'] = self.rule_to_regex(io)
     self.processed = True
     return mapping
Exemplo n.º 2
0
    def process_kwargs(self, mapping):
        ''' Apply kwargs in the order they are provided. kwargs are ordered as of python 3.6
        '''

        if 'as_is' in self.kwargs:
            as_is = self.kwargs['as_is']
            if as_is:
                appropriate_setting = "as-written"
            else:
                appropriate_setting = "apply-longest-first"

            self.kwargs["rule_ordering"] = appropriate_setting

            LOGGER.warning(
                f"mapping from {self.kwargs.get('in_lang')} to {self.kwargs.get('out_lang')} "
                'is using the deprecated parameter "as_is"; '
                f"replace `as_is: {as_is}` with `rule_ordering: {appropriate_setting}`"
            )

        # Add defaults
        if 'rule_ordering' in self.kwargs:
            # right now, "rule-ordering" is a more explict alias of the "as-is" option.
            ordering = self.kwargs["rule_ordering"]
            if ordering not in ("as-written", "apply-longest-first"):
                LOGGER.error(
                    f"mapping from {self.kwargs.get('in_lang')} to {self.kwargs.get('out_lang')} "
                    f"has invalid value '{ordering}' for rule_ordering parameter; "
                    "rule_ordering must be one of "
                    '"as-written" or "apply-longest-first"')
        else:
            self.kwargs["rule_ordering"] = "as-written"
        if 'case_sensitive' not in self.kwargs:
            self.kwargs['case_sensitive'] = True
        if 'escape_special' not in self.kwargs:
            self.kwargs['escape_special'] = False
        if 'norm_form' not in self.kwargs:
            self.kwargs['norm_form'] = 'NFD'
        if 'reverse' not in self.kwargs:
            self.kwargs['reverse'] = False
        if 'prevent_feeding' not in self.kwargs:
            self.kwargs['prevent_feeding'] = False
        if 'in_lang' not in self.kwargs:
            self.kwargs['in_lang'] = 'und'
        if 'out_lang' not in self.kwargs:
            self.kwargs['out_lang'] = 'und'

        # Process kwargs in order received
        for kwarg, val in self.kwargs.items():
            if kwarg == 'rule_ordering' and self.wants_rules_sorted():
                # sort by reverse len
                mapping = sorted(mapping,
                                 key=lambda x: len(x["in"]),
                                 reverse=True)
            elif kwarg == 'escape_special' and val:
                mapping = [escape_special_characters(x) for x in mapping]
            elif kwarg == 'norm_form' and val:
                for io in mapping:
                    for k, v in io.items():
                        if isinstance(v, str):
                            io[k] = normalize(v, self.kwargs['norm_form'])
            elif kwarg == 'reverse' and val:
                mapping = self.reverse_mappings(mapping)
        # After all processing is done, turn into regex
        for io in mapping:
            if self.kwargs['prevent_feeding'] or ('prevent_feeding' in io
                                                  and io['prevent_feeding']):
                io['intermediate_form'] = self._string_to_pua(
                    io['out'], mapping.index(io))
            io['match_pattern'] = self.rule_to_regex(io)
            if not io['match_pattern']:
                mapping.remove(io)
        self.processed = True
        return mapping
Exemplo n.º 3
0
 def test_escape_special(self):
     self.assertEqual(utils.escape_special_characters({'in': '?'}), {'in': '\?'})
Exemplo n.º 4
0
    def process_kwargs(self, mapping):
        """ Apply kwargs in the order they are provided. kwargs are ordered as of python 3.6
        """

        if "as_is" in self.kwargs:
            as_is = self.kwargs["as_is"]
            if as_is:
                appropriate_setting = "as-written"
            else:
                appropriate_setting = "apply-longest-first"

            self.kwargs["rule_ordering"] = appropriate_setting
            del self.kwargs["as_is"]

            LOGGER.warning(
                f"mapping from {self.kwargs.get('in_lang')} to {self.kwargs.get('out_lang')} "
                'is using the deprecated parameter "as_is"; '
                f"replace `as_is: {as_is}` with `rule_ordering: {appropriate_setting}`"
            )

        # Add defaults
        if "rule_ordering" in self.kwargs:
            # right now, "rule-ordering" is a more explict alias of the "as-is" option.
            ordering = self.kwargs["rule_ordering"]
            if ordering not in ("as-written", "apply-longest-first"):
                LOGGER.error(
                    f"mapping from {self.kwargs.get('in_lang')} to {self.kwargs.get('out_lang')} "
                    f"has invalid value '{ordering}' for rule_ordering parameter; "
                    "rule_ordering must be one of "
                    '"as-written" or "apply-longest-first"')
        else:
            self.kwargs["rule_ordering"] = "as-written"
        if "case_sensitive" not in self.kwargs:
            self.kwargs["case_sensitive"] = True
        if "escape_special" not in self.kwargs:
            self.kwargs["escape_special"] = False
        if "norm_form" not in self.kwargs:
            self.kwargs["norm_form"] = "NFD"
        if "reverse" not in self.kwargs:
            self.kwargs["reverse"] = False
        if "prevent_feeding" not in self.kwargs:
            self.kwargs["prevent_feeding"] = False
        if "in_lang" not in self.kwargs:
            self.kwargs["in_lang"] = "und"
        if "out_lang" not in self.kwargs:
            self.kwargs["out_lang"] = "und"

        # Process kwargs in order received
        for kwarg, val in self.kwargs.items():
            if kwarg == "rule_ordering" and self.wants_rules_sorted():
                # sort by reverse len
                mapping = sorted(mapping,
                                 key=lambda x: len(x["in"]),
                                 reverse=True)
            elif kwarg == "escape_special" and val:
                mapping = [escape_special_characters(x) for x in mapping]
            elif kwarg == "norm_form" and val:
                for io in mapping:
                    for k, v in io.items():
                        if isinstance(v, str):
                            io[k] = normalize(v, self.kwargs["norm_form"])
            elif kwarg == "reverse" and val:
                mapping = self.reverse_mappings(mapping)

        # After all processing is done, turn into regex
        for i, io in enumerate(mapping):
            if self.kwargs["prevent_feeding"] or ("prevent_feeding" in io
                                                  and io["prevent_feeding"]):
                io["intermediate_form"] = self._string_to_pua(io["out"], i)
            io["match_pattern"] = self.rule_to_regex(io)

        # Finally, remove rules with an empty match pattern, typically empty rules
        mapping = [io for io in mapping if io["match_pattern"]]

        self.processed = True
        return mapping