示例#1
0
    def test_01(self):
        f_gap2valid = partial(StringTool.str_span2match_blank, "a b c d e")

        spans_pair1 = [[(0, 1), (4, 5)], [(2, 3)]]
        hyp1 = list(
            ContextfreeTool.spans_list2reducible_indextuple_list(
                spans_pair1, f_gap2valid))
        self.assertEqual(hyp1, [(0, 0)])

        spans_pair2 = [[(0, 1), (6, 7), (8, 9)], [(2, 3), (4, 5)]]
        hyp2 = list(
            ContextfreeTool.spans_list2reducible_indextuple_list(
                spans_pair2, f_gap2valid))
        self.assertEqual(hyp2, [(0, 0)])

        spans_pair3 = [
            [(2, 3), (4, 5)],
            [(0, 1), (6, 7), (8, 9)],
        ]
        hyp3 = list(
            ContextfreeTool.spans_list2reducible_indextuple_list(
                spans_pair3, f_gap2valid))
        self.assertEqual(hyp3, [(1, 1)])

        spans_pair4 = [
            [(2, 3), (4, 5)],
            [
                (8, 9),
                (0, 1),
                (6, 7),
            ],
        ]
        hyp4 = list(
            ContextfreeTool.spans_list2reducible_indextuple_list(
                spans_pair4, f_gap2valid))
        self.assertEqual(hyp4, [(1, 2)])

        spans_pair5 = [[(2, 3), (6, 7)], [
            (8, 9),
            (0, 1),
            (4, 5),
        ], [(6, 7)]]
        hyp5 = list(
            ContextfreeTool.spans_list2reducible_indextuple_list(
                spans_pair5, f_gap2valid))
        self.assertEqual(hyp5, [(0, 2, 0)])
示例#2
0
    def data2entity_list(cls, data):
        text_in = TimeEntity.Data.data2text_in(data)

        m_list_hour = TimeEntity.Data.data2match_list_hour(data)
        span_list_hour = lmap(lambda m: m.span(), m_list_hour)

        m_list_ampm = TimeEntity.Data.data2match_list_ampm(data)
        span_list_ampm = lmap(lambda m: m.span(), m_list_ampm)

        spans_list = [
            span_list_hour,
            span_list_ampm,
        ]
        gap2is_valid = partial(StringTool.str_span2match_blank_or_nullstr,
                               text_in)
        indextuple_list = ContextfreeTool.spans_list2reducible_indextuple_list(
            spans_list, gap2is_valid)

        def indextuple2entity(indextuple):
            i, j = indextuple
            m_hour, m_ampm = m_list_hour[i], m_list_ampm[j]

            hour_raw = TimeTool.hour2norm(int(m_hour.group()))
            if hour_raw is None:
                return None

            hour, ampm = AMPM.hour_ampm2normalized(hour_raw,
                                                   AMPM.match2value(m_ampm))
            if hour is None:
                return None

            if ampm is None:
                return None

            span = (m_hour.span()[0], m_ampm.span()[1])
            value = {
                TimeEntity.Value.Field.HOUR: hour,
                TimeEntity.Value.Field.MINUTE: 0,
                TimeEntity.Value.Field.AMPM: ampm,
            }
            entity = {
                FoxylibEntity.Field.FULLTEXT: text_in,
                FoxylibEntity.Field.TYPE: TimeEntity.entity_type(),
                FoxylibEntity.Field.SPAN: span,
                FoxylibEntity.Field.VALUE: value
            }
            return entity

        entity_list = lfilter(is_not_none,
                              map(indextuple2entity, indextuple_list))
        return entity_list
示例#3
0
    def _text2entity_list_multiday(cls, str_in):
        logger = FoxylibLogger.func_level2logger(
            cls._text2entity_list_multiday, logging.DEBUG)

        entity_list_1day = DayofweekEntityKoSingle.text2entity_list(str_in)

        p_delim = cls.pattern_delim()
        m_list_delim = list(p_delim.finditer(str_in))

        span_ll = [
            lmap(FoxylibEntity.entity2span, entity_list_1day),
            lmap(MatchTool.match2span, m_list_delim),
            lmap(FoxylibEntity.entity2span, entity_list_1day),
        ]

        f_span2is_gap = lambda span: cls.str_span2is_gap(str_in, span)
        j_tuple_list = list(
            ContextfreeTool.spans_list2reducible_indextuple_list(
                span_ll, f_span2is_gap))

        logger.debug({
            "j_tuple_list": j_tuple_list,
            "entity_list_1day": entity_list_1day,
            "m_list_delim": m_list_delim,
        })

        for j_tuple in j_tuple_list:
            j1, j2, j3 = j_tuple

            entity_pair = entity_list_1day[j1], entity_list_1day[j3]
            logger.debug({
                "j1": j1,
                "j3": j3,
                "entity_pair": entity_pair,
            })

            span = (
                FoxylibEntity.entity2span(entity_pair[0])[0],
                FoxylibEntity.entity2span(entity_pair[1])[1],
            )
            j_entity = {
                FoxylibEntity.Field.TYPE:
                DayofweekSpanEntity.entity_type(),
                FoxylibEntity.Field.SPAN:
                span,
                FoxylibEntity.Field.FULLTEXT:
                str_in,
                FoxylibEntity.Field.VALUE:
                tmap(FoxylibEntity.entity2value, entity_pair),
            }
            yield j_entity
示例#4
0
    def _text2entity_list(cls, text_in, lang):
        match_list_sign = list(cls.Sign.pattern().finditer(text_in))
        span_list_sign = lmap(lambda m: m.span(), match_list_sign)

        entity_list_timedelta = TimedeltaEntity._text2entity_list(
            text_in, lang)
        span_list_timedelta = lmap(FoxylibEntity.entity2span,
                                   entity_list_timedelta)

        span_lists = [
            span_list_sign,
            span_list_timedelta,
        ]
        gap2is_valid = partial(StringTool.str_span2match_blank_or_nullstr,
                               text_in)
        indextuple_list = ContextfreeTool.spans_list2reducible_indextuple_list(
            span_lists, gap2is_valid)

        def indextuple2entity(indextuple):
            i, j = indextuple

            match_sign = match_list_sign[i]
            span_sign = span_list_sign[i]
            sign = match_sign.group()

            entity_timedelta = entity_list_timedelta[j]
            span_timedelta = span_list_timedelta[j]

            value = {
                cls.Value.Field.SIGN: sign,
                cls.Value.Field.TIMEDELTA: entity_timedelta
            }

            span = (
                span_sign[0],
                span_timedelta[1],
            )
            entity = {
                FoxylibEntity.Field.SPAN: span,
                FoxylibEntity.Field.TEXT:
                StringTool.str_span2substr(text_in, span),
                FoxylibEntity.Field.VALUE: value,
                FoxylibEntity.Field.TYPE: cls.entity_type(),
            }
            return entity

        entity_list = lmap(indextuple2entity, indextuple_list)
        return entity_list
示例#5
0
    def text2element_list(cls, text_in, lang):
        logger = HenriqueLogger.func_level2logger(cls.text2element_list,
                                                  logging.DEBUG)

        langs = HenriqueLocale.lang2langs_recognizable(lang)
        logger.debug({"langs": langs})

        match_list_number = list(cls.pattern_number().finditer(text_in))
        span_list_number = lmap(lambda m: m.span(), match_list_number)

        matcher = TimedeltaEntityUnit.langs2matcher(langs)
        span_value_list_timedelta_unit = list(
            matcher.text2span_value_iter(text_in))

        spans_list = [
            span_list_number,
            lmap(ig(0), span_value_list_timedelta_unit),
        ]

        gap2is_valid = partial(StringTool.str_span2match_blank_or_nullstr,
                               text_in)
        indextuple_list = ContextfreeTool.spans_list2reducible_indextuple_list(
            spans_list, gap2is_valid)

        def indextuple2element(indextuple):
            i, j = indextuple

            quantity = int(match_list_number[i].group())
            unit = span_value_list_timedelta_unit[j][1]

            span = (
                span_list_number[i][0],
                span_value_list_timedelta_unit[j][0][1],
            )

            element = {
                cls.Field.QUANTITY: quantity,
                cls.Field.UNIT: unit,
                cls.Field.SPAN: span,
            }

            return element

        element_list = lmap(indextuple2element, indextuple_list)
        return element_list
示例#6
0
        def text2entity_list_hm():
            spans_list = [span_list_hour, span_list_minute]

            indextuple_list = ContextfreeTool.spans_list2reducible_indextuple_list(
                spans_list, gap2valid)

            def indextuple2entity(indextuple):
                i, j = indextuple
                span = (span_list_hour[i][0], span_list_minute[j][1])
                m1, m2 = m_list_hour[i], m_list_minute[j]
                hour, minute = int(m1.group()), int(m2.group())

                # raise Exception({"hour":hour, "minute":minute})

                # logger.debug({"hour": hour, "minute": minute,
                #               "TimeTool.hour2is_valid(hour)":TimeTool.hour2is_valid(hour),
                #               })
                if not TimeTool.hour2is_valid(hour):
                    return None

                if not TimeTool.minute2is_valid(minute):
                    return None

                value = {
                    TimeEntity.Value.Field.HOUR: hour,
                    TimeEntity.Value.Field.MINUTE: minute,
                }

                entity = {
                    FoxylibEntity.Field.TYPE: entity_type,
                    FoxylibEntity.Field.FULLTEXT: text_in,
                    FoxylibEntity.Field.SPAN: span,
                    FoxylibEntity.Field.VALUE: value,
                }
                return entity

            entity_list = lfilter(bool, map(indextuple2entity,
                                            indextuple_list))
            # raise Exception({"entity_list":entity_list})
            # logger.debug({"entity_list": entity_list,
            #               "indextuple_list":indextuple_list,
            #               })

            return entity_list
示例#7
0
        def text2entity_list_hms():
            entity_list_hm = text2entity_list_hm()
            span_list_hm = lmap(FoxylibEntity.entity2span, entity_list_hm)
            span_list_second = span_list_minute
            spans_list = [span_list_hm, span_list_second]

            indextuple_list = ContextfreeTool.spans_list2reducible_indextuple_list(
                spans_list, gap2valid)
            h_i2j = dict(indextuple_list)

            def i2entity(i):
                if i not in h_i2j:
                    return entity_list_hm[i]

                j = h_i2j[i]

                span = (span_list_hm[i][0], span_list_second[j][1])
                entity_hm, m2 = entity_list_hm[i], span_list_second[j]
                value_hm = FoxylibEntity.entity2value(entity_hm)

                hour, minute = TimeEntity.Value.value2hm(value_hm)
                second = int(m2.group())

                if not TimeTool.second2is_valid(second):
                    return None

                value = {
                    TimeEntity.Value.Field.HOUR: hour,
                    TimeEntity.Value.Field.MINUTE: minute,
                    TimeEntity.Value.Field.SECOND: second,
                }

                entity = {
                    FoxylibEntity.Field.TYPE: entity_type,
                    FoxylibEntity.Field.FULLTEXT: text_in,
                    FoxylibEntity.Field.SPAN: span,
                    FoxylibEntity.Field.VALUE: value,
                }
                return entity

            return lmap(i2entity, range(len(entity_list_hm)))
示例#8
0
    def text2entity_list(cls, str_in, config=None):
        def entity2is_wordbound_prefixed(entity):
            return StringTool.str_span2is_wordbound_prefixed(
                str_in, FoxylibEntity.entity2span(entity))

        cardinal_entity_list = lfilter(entity2is_wordbound_prefixed,
                                       CardinalEntity.text2entity_list(str_in))

        m_list_suffix = cls.pattern_suffix().finditer(str_in)

        span_ll = [
            lmap(FoxylibEntity.entity2span, cardinal_entity_list),
            lmap(MatchTool.match2span, m_list_suffix),
        ]

        f_span2is_gap = lambda span: StringTool.str_span2match_blank_or_nullstr(
            str_in,
            span,
        )
        j_tuple_list = ContextfreeTool.spans_list2reducible_indextuple_list(
            span_ll, f_span2is_gap)

        for j1, j2 in j_tuple_list:
            cardinal_entity = cardinal_entity_list[j1]
            m_suffix = m_list_suffix[j2]

            span = (FoxylibEntity.entity2span(cardinal_entity)[0],
                    MatchTool.match2span(m_suffix)[1])
            j_entity = {
                FoxylibEntity.Field.TYPE:
                HourEntity.entity_type(),
                FoxylibEntity.Field.SPAN:
                span,
                FoxylibEntity.Field.FULLTEXT:
                str_in,
                FoxylibEntity.Field.VALUE:
                FoxylibEntity.entity2value(cardinal_entity),
            }
            yield j_entity
示例#9
0
    def _text_config2skill_code(cls, text_in, config):
        pattern_prefix = cls.pattern_prefix()
        match_list_prefix = list(pattern_prefix.finditer(text_in))
        if not match_list_prefix:
            return None

        entity_list = SkillEntity.text2entity_list(text_in, config=config)
        if not entity_list:
            return None

        spans_list = [lmap(lambda m: m.span(), match_list_prefix),
                      lmap(FoxylibEntity.entity2span, entity_list)
                      ]
        gap2is_valid = partial(StringTool.str_span2match_blank_or_nullstr, text_in)
        indextuple_list = ContextfreeTool.spans_list2reducible_indextuple_list(spans_list, gap2is_valid)

        assert_in(len(indextuple_list), [0, 1])

        if not indextuple_list:
            return None

        index_entity = l_singleton2obj(indextuple_list)[1]
        entity = entity_list[index_entity]
        return SkillEntity.entity2skill_codename(entity)
示例#10
0
    def entity_list2ampm_suffixed(
        cls,
        data,
        entity_list_in,
    ):
        logger = FoxylibLogger.func_level2logger(cls.entity_list2ampm_suffixed,
                                                 logging.DEBUG)

        text_in = cls.Data.data2text_in(data)
        m_list_ampm = cls.Data.data2match_list_ampm(data)

        span_list_in = lmap(FoxylibEntity.entity2span, entity_list_in)
        span_list_ampm = lmap(lambda m: m.span(), m_list_ampm)

        spans_list = [span_list_in, span_list_ampm]
        gap2is_valid = partial(StringTool.str_span2match_blank_or_nullstr,
                               text_in)
        indextuple_list = ContextfreeTool.spans_list2reducible_indextuple_list(
            spans_list, gap2is_valid)
        h_i2j = dict(indextuple_list)

        def i2entity(i):
            entity = entity_list_in[i]
            assert_equal(FoxylibEntity.entity2type(entity),
                         TimeEntity.entity_type())

            if i not in h_i2j:
                return entity_list_in[i]

            j = h_i2j[i]

            m_ampm = m_list_ampm[j]
            span = (span_list_in[i][0], span_list_ampm[j][1])

            v_entity = FoxylibEntity.entity2value(entity)
            hour, minute, second = TimeEntity.Value.value2hms(v_entity)
            ampm = AMPM.match2value(m_ampm)
            hour_adjusted, ampm_adjusted = AMPM.hour_ampm2normalized(
                hour, ampm)

            # logger.debug({"hour":hour, "ampm":ampm,
            #               "hour_adjusted":hour_adjusted, "ampm_adjusted":ampm_adjusted})

            value = DictTool.filter(
                lambda k, v: v is not None, {
                    TimeEntity.Value.Field.HOUR: hour_adjusted,
                    TimeEntity.Value.Field.MINUTE: minute,
                    TimeEntity.Value.Field.SECOND: second,
                    TimeEntity.Value.Field.AMPM: ampm_adjusted,
                })

            entity = {
                FoxylibEntity.Field.TYPE: FoxylibEntity.entity2type(entity),
                FoxylibEntity.Field.FULLTEXT: text_in,
                FoxylibEntity.Field.SPAN: span,
                FoxylibEntity.Field.VALUE: value,
            }
            return entity

        entity_list = lmap(i2entity, range(len(entity_list_in)))
        return entity_list