Пример #1
0
 def _lang2dict_alias2server(cls, lang):
     langs_recognizable = HenriqueLocale.lang2langs_recognizable(lang)
     h = merge_dicts([{alias: server}
                      for server in cls.list_all()
                      for alias in cls.server_langs2aliases(server, langs_recognizable)],
                     vwrite=vwrite_no_duplicate_key)
     return h
Пример #2
0
    def lang2pattern(cls, lang):
        j_me = cls.j_yaml()

        langs_recognizable = HenriqueLocale.lang2langs_recognizable(lang)
        me_list = [
            me for lang in langs_recognizable for me in j_me.get(lang, [])
        ]
        rstr = RegexTool.rstr_iter2or(map(re.escape, me_list))
        pattern = re.compile(rstr, re.I)
        return pattern
Пример #3
0
    def _lang2dict_alias2codename(cls, lang):

        langs = HenriqueLocale.lang2langs_recognizable(lang)
        h = cls.dict_lang2codename2texts()
        # h_codename2aliases = cls.dict_lang2codename2texts().get(lang)

        h_alias2codename = merge_dicts([{
            cls.text2norm(alias): codename
        } for lang in langs for codename, aliases in h.get(lang).items()
                                        for alias in aliases],
                                       vwrite=vwrite_no_duplicate_key)
        return h_alias2codename
Пример #4
0
    def lang2matcher(cls, lang):
        langs_recognizable = HenriqueLocale.lang2langs_recognizable(lang)

        h_codename2aliases = merge_dicts([{Port.port2codename(port): Port.port_langs2aliases(port, langs_recognizable)}
                                          for port in Port.list_all()],
                                         vwrite=vwrite_no_duplicate_key)

        config = {GazetteerMatcher.Config.Key.NORMALIZER: cls.text2norm,
                  GazetteerMatcher.Config.Key.TEXTS2PATTERN: HenriqueEntity.texts2pattern_port_tradegood,
                  }
        matcher = GazetteerMatcher(h_codename2aliases, config)
        return matcher
Пример #5
0
    def lang2matcher(cls, lang):
        langs_recognizable = HenriqueLocale.lang2langs_recognizable(lang)

        def server2h_codename2aliases(server):
            aliases = Server.server_langs2aliases(server, langs_recognizable)
            return {Server.server2codename(server): aliases}

        h_codename2aliases = merge_dicts(map(server2h_codename2aliases,
                                             Server.list_all()),
                                         vwrite=vwrite_no_duplicate_key)
        assert_is_not_none(h_codename2aliases)

        config = {GazetteerMatcher.Config.Key.NORMALIZER: cls.text2norm}
        matcher = GazetteerMatcher(h_codename2aliases, config)
        return matcher
Пример #6
0
    def text2element_list(cls, text_in, lang):
        logger = HenriqueLogger.func_level2logger(cls.text2element_list,
                                                  logging.DEBUG)

        langs = HenriqueLocale.lang2langs_recognizable(lang)
        logger.debug({"langs": langs})

        match_list_number = list(cls.pattern_number().finditer(text_in))
        span_list_number = lmap(lambda m: m.span(), match_list_number)

        matcher = TimedeltaEntityUnit.langs2matcher(langs)
        span_value_list_timedelta_unit = list(
            matcher.text2span_value_iter(text_in))

        spans_list = [
            span_list_number,
            lmap(ig(0), span_value_list_timedelta_unit),
        ]

        gap2is_valid = partial(StringTool.str_span2match_blank_or_nullstr,
                               text_in)
        indextuple_list = ContextfreeTool.spans_list2reducible_indextuple_list(
            spans_list, gap2is_valid)

        def indextuple2element(indextuple):
            i, j = indextuple

            quantity = int(match_list_number[i].group())
            unit = span_value_list_timedelta_unit[j][1]

            span = (
                span_list_number[i][0],
                span_value_list_timedelta_unit[j][0][1],
            )

            element = {
                cls.Field.QUANTITY: quantity,
                cls.Field.UNIT: unit,
                cls.Field.SPAN: span,
            }

            return element

        element_list = lmap(indextuple2element, indextuple_list)
        return element_list
Пример #7
0
    def lang2matcher(cls, lang):
        langs = HenriqueLocale.lang2langs_recognizable(lang)

        h_lang2codename2aliases = cls.dict_lang2codename2aliases()

        def codename2texts(codename):
            for lang in langs:
                aliases = JsonTool.down(h_lang2codename2aliases, [lang, codename])
                if not aliases:
                    continue

                yield from aliases

        h_codename2texts = {codename: list(codename2texts(codename))
                            for codename in cls.codenames()}

        config = {GazetteerMatcher.Config.Key.NORMALIZER: cls.text2norm}
        matcher = GazetteerMatcher(h_codename2texts, config)
        return matcher
Пример #8
0
    def lang2matcher(cls, lang):
        tgt_list = Tradegoodtype.list_all()
        langs_recognizable = HenriqueLocale.lang2langs_recognizable(lang)

        def tgt2aliases(tgt):
            for _lang in langs_recognizable:
                yield from Tradegoodtype.tradegoodtype_lang2aliases(tgt, _lang)

        h_value2aliases = merge_dicts([{
            Tradegoodtype.tradegoodtype2codename(tgt):
            list(tgt2aliases(tgt))
        } for tgt in tgt_list],
                                      vwrite=vwrite_no_duplicate_key)

        config = {
            GazetteerMatcher.Config.Key.NORMALIZER: cls.text2norm,
            # GazetteerMatcher.Config.Key.TEXTS2PATTERN: HenriqueEntity.texts2rstr_word_with_cardinal_suffix,
        }
        matcher = GazetteerMatcher(h_value2aliases, config)
        return matcher
Пример #9
0
    def lang2matcher(cls, lang):
        tg_list = Tradegood.list_all()
        langs_recognizable = HenriqueLocale.lang2langs_recognizable(lang)

        def tg2aliases(tg):
            for _lang in langs_recognizable:
                yield from Tradegood.tradegood_lang2aliases(tg, _lang)

        h_value2aliases = merge_dicts(
            [{
                Tradegood.tradegood2codename(tg): list(tg2aliases(tg))
            } for tg in tg_list],
            vwrite=vwrite_no_duplicate_key)

        config = {
            GazetteerMatcher.Config.Key.NORMALIZER:
            cls.text2norm,
            GazetteerMatcher.Config.Key.TEXTS2PATTERN:
            HenriqueEntity.texts2pattern_port_tradegood,
        }
        matcher = GazetteerMatcher(h_value2aliases, config)
        return matcher
Пример #10
0
    def text2entity_list(cls, text_in, config=None):
        locale = HenriqueEntity.Config.config2locale(
            config) or HenriqueLocale.DEFAULT
        lang = LocaleTool.locale2lang(locale)
        langs_recognizable = HenriqueLocale.lang2langs_recognizable(lang)

        if "ko" not in langs_recognizable:
            return []

        match_list = list(cls.pattern_ko().finditer(text_in))

        def match2entity_list(match):
            span = match.span()
            assert_in(SpanTool.span2len(span), (2, 3))
            entity_list = []

            s, e = span
            span_nutmeg = (s, s + 1)
            entity_nutmeg = {
                FoxylibEntity.Field.SPAN:
                span_nutmeg,
                FoxylibEntity.Field.TEXT:
                StringTool.str_span2substr(text_in, span_nutmeg),
                FoxylibEntity.Field.VALUE:
                "Nutmeg",
                FoxylibEntity.Field.TYPE:
                TradegoodEntity.entity_type(),
            }
            entity_list.append(entity_nutmeg)

            span_mace = (s + 1, s + 2)
            entity_mace = {
                FoxylibEntity.Field.SPAN:
                span_mace,
                FoxylibEntity.Field.TEXT:
                StringTool.str_span2substr(text_in, span_mace),
                FoxylibEntity.Field.VALUE:
                "Mace",
                FoxylibEntity.Field.TYPE:
                TradegoodEntity.entity_type(),
            }
            entity_list.append(entity_mace)

            if SpanTool.span2len(span) == 3:
                span_clove = (s + 2, s + 3)
                entity_cloves = {
                    FoxylibEntity.Field.SPAN:
                    span_clove,
                    FoxylibEntity.Field.TEXT:
                    StringTool.str_span2substr(text_in, span_clove),
                    FoxylibEntity.Field.VALUE:
                    "Cloves",
                    FoxylibEntity.Field.TYPE:
                    TradegoodEntity.entity_type(),
                }
                entity_list.append(entity_cloves)

            return entity_list

        entity_list = [
            entity for m in match_list for entity in match2entity_list(m)
        ]
        return entity_list