Пример #1
0
    def lang2pattern(cls, lang):
        from henrique.main.document.price.trend.trend_entity import TrendEntity
        logger = HenriqueLogger.func_level2logger(cls.lang2pattern,
                                                  logging.DEBUG)

        # rstr_suffix = format_str("{}?",
        #                          RegexTool.rstr2wrapped(TrendEntity.lang2rstr(lang)),
        #                          )

        ### may be concatenated with port/tradegood name
        # rstr_prefixed = RegexTool.rstr2rstr_words_prefixed(cls.rstr())
        # raise Exception({"rstr_suffix":rstr_suffix})

        rstr_trend = TrendEntity.lang2rstr(lang)

        # bound_right_list_raw = RegexTool.right_wordbounds()

        right_bounds = lchain(
            RegexTool.bounds2prefixed(RegexTool.right_wordbounds(),
                                      rstr_trend),
            RegexTool.right_wordbounds(),
        )
        rstr_rightbound = RegexTool.rstr2right_bounded(cls.rstr(),
                                                       right_bounds)

        logger.debug({#"rstr_trend":rstr_trend,
                      #"right_bounds":right_bounds,
                      "rstr_rightbound":rstr_rightbound,
                      })
        # rstr_suffixed = RegexTool.rstr2rstr_words_suffixed(cls.rstr(), rstr_suffix=rstr_suffix)

        # raise Exception({"rstr_trend": rstr_trend, "rstr_suffixed": rstr_suffixed})
        # return re.compile(RegexTool.rstr2wordbounded(cls.rstr()))
        return re.compile(rstr_rightbound, re.I)
Пример #2
0
    def pattern_hour(cls):
        left_bounds = RegexTool.left_wordbounds()
        right_bounds = lchain(
            RegexTool.right_wordbounds(),
            [r":"],
        )
        rstr = RegexTool.rstr2bounded(r"\d+", left_bounds, right_bounds)

        return re.compile(rstr, re.I)
Пример #3
0
    def test_03(self):
        rstr = "asdf"
        rstr_right_bounded = RegexTool.rstr2right_bounded(
            rstr, RegexTool.right_wordbounds())
        self.assertTrue(re.search(rstr_right_bounded, "ijilijasdf"))
        self.assertFalse(re.search(rstr_right_bounded, "asdfuhuef"))

        rstr_left_bounded = RegexTool.rstr2left_bounded(
            rstr, RegexTool.left_wordbounds())
        self.assertFalse(re.search(rstr_left_bounded, "ijilijasdf"))
        self.assertTrue(re.search(rstr_left_bounded, "asdfuhuef"))
Пример #4
0
    def pattern_number(cls):
        rstr_leftbound = RegexTool.rstr2left_bounded(
            r"\d{1,2}", RegexTool.left_wordbounds())

        rstr_bound_right_list = lchain(
            RegexTool.right_wordbounds(),
            lchain(*TimedeltaEntityUnit.gazetteer_all().values()),
        )
        rstr_bound = RegexTool.rstr2right_bounded(rstr_leftbound,
                                                  rstr_bound_right_list)
        return re.compile(rstr_bound, re.I)
Пример #5
0
    def pattern_suffix(cls):

        left_bounds = RegexTool.left_wordbounds()
        right_bounds = lchain(
            RegexTool.right_wordbounds(),
            [
                RegexTool.bound2prefixed(b, r"시")
                for b in RegexTool.right_wordbounds()
            ],
        )

        rstr_rightbounded = RegexTool.rstr2right_bounded(r"\d+", right_bounds)

        def bound_iter_left():
            b_list_raw = RegexTool.left_wordbounds()
            for b in b_list_raw:
                yield b
                yield r"{}{}".format(b, r"{1,2}")

        bound_list_left = list(bound_iter_left())
        rstr_bound = RegexTool.rstr2left_bounded(rstr_rightbound,
                                                 bound_list_left)

        return re.compile(rstr_bound)
Пример #6
0
    def lang2pattern(cls, lang):
        from henrique.main.document.price.rate.rate_entity import RateEntity
        logger = HenriqueLogger.func_level2logger(cls.lang2pattern,
                                                  logging.DEBUG)

        left_bounds = [
            RateEntity.rstr_last_char(),
            r"\s",
        ]
        right_bounds = RegexTool.right_wordbounds()
        rstr = RegexTool.rstr2bounded(cls.lang2rstr(lang), left_bounds,
                                      right_bounds)

        logger.debug({"left_bounds": left_bounds, "rstr": rstr})
        return re.compile(rstr, re.I)
Пример #7
0
        def texts2pattern(texts):
            rstr_raw = RegexTool.rstr_iter2or(map(re.escape, texts))

            left_bounds = lchain(
                RegexTool.bounds2suffixed(RegexTool.left_wordbounds(), "\d"),
                RegexTool.left_wordbounds(),
            )
            right_bounds = RegexTool.right_wordbounds()

            rstr = RegexTool.rstr2bounded(rstr_raw, left_bounds, right_bounds)
            logger.debug({
                "rstr": rstr,
                "rstr_raw": rstr_raw,
            })
            return re.compile(rstr, re.I)
Пример #8
0
 def texts2pattern_word(cls, texts):
     regex_raw = cls.texts2regex(texts)
     regex_word = RegexTool.rstr2bounded(regex_raw, RegexTool.left_wordbounds(), RegexTool.right_wordbounds())
     return re.compile(regex_word, )  # re.I can be dealt with normalizer