def text2norm(cls, text_in): if not text_in: return text_in text_out = StringTool.str2strip_eachline(StringTool.str2strip(text_in)) return text_out # WARMER.warmup()
def j2valid_trend(j): nonlocal entities_list if j < 3: return False j_tuple = j_param_types2j_latest(j, Param.Type.list()) if any(map(is_none, j_tuple)): return False entities_tuple = [ entities_list[j] if j is not None else None for j in j_tuple ] if any(map(lambda x: len(x) != 1, entities_tuple)): return False j_portlike, j_tradegood, j_rate, j_trend = j_tuple assert_equal(j_trend, j) if j_rate != j - 1: return False if j - 2 not in {j_portlike, j_tradegood}: return False entity_portlike, entity_tradegood, entity_rate, entity_trend = map( l_singleton2obj, entities_tuple) if FoxylibEntity.entity2type( entity_portlike) != PortEntity.entity_type( ): # not culture return False entity_latter = max([entity_portlike, entity_tradegood], key=FoxylibEntity.entity2span) span_latter, span_rate, span_trend = lmap( FoxylibEntity.entity2span, [entity_latter, entity_rate, entity_trend]) span_latter_rate = SpanTool.span_pair2between( span_latter, span_rate) str_between_latter_rate = StringTool.str_span2substr( text, span_latter_rate) if not RegexTool.pattern_str2match_full( RegexTool.pattern_blank_or_nullstr(), str_between_latter_rate): return False span_rate_trend = SpanTool.span_pair2between(span_rate, span_trend) str_between_rate_trend = StringTool.str_span2substr( text, span_rate_trend) if not RegexTool.pattern_str2match_full( RegexTool.pattern_blank_or_nullstr(), str_between_rate_trend): return False return True
def entity_pair2is_appendable( cls, text, entity_pair, ): Param = PriceSkillParameter entity_type_pair = lmap(FoxylibEntity.entity2type, entity_pair) param_type_pair = lmap(Param.Type.entity_type2parameter_type, entity_type_pair) for param_type in param_type_pair: if param_type not in {Param.Type.PORTLIKE, Param.Type.TRADEGOOD}: return False param_type_1, param_type_2 = param_type_pair if param_type_1 != param_type_2: return False span_pair = lmap(FoxylibEntity.entity2span, entity_pair) text_between = StringTool.str_span2substr( text, SpanTool.span_pair2between(*span_pair)) is_fullmatch = RegexTool.pattern_str2match_full( Param.pattern_delim(), text_between) if not is_fullmatch: return False return True
def indextuple2entity(indextuple): i, j = indextuple match_sign = match_list_sign[i] span_sign = span_list_sign[i] sign = match_sign.group() entity_timedelta = entity_list_timedelta[j] span_timedelta = span_list_timedelta[j] value = { cls.Value.Field.SIGN: sign, cls.Value.Field.TIMEDELTA: entity_timedelta } span = ( span_sign[0], span_timedelta[1], ) entity = { FoxylibEntity.Field.SPAN: span, FoxylibEntity.Field.TEXT: StringTool.str_span2substr(text_in, span), FoxylibEntity.Field.VALUE: value, FoxylibEntity.Field.TYPE: cls.entity_type(), } return entity
def match2entity_list(match): span = match.span() assert_in(SpanTool.span2len(span), (2, 3)) entity_list = [] s, e = span span_nutmeg = (s, s + 1) entity_nutmeg = { FoxylibEntity.Field.SPAN: span_nutmeg, FoxylibEntity.Field.TEXT: StringTool.str_span2substr(text_in, span_nutmeg), FoxylibEntity.Field.VALUE: "Nutmeg", FoxylibEntity.Field.TYPE: TradegoodEntity.entity_type(), } entity_list.append(entity_nutmeg) span_mace = (s + 1, s + 2) entity_mace = { FoxylibEntity.Field.SPAN: span_mace, FoxylibEntity.Field.TEXT: StringTool.str_span2substr(text_in, span_mace), FoxylibEntity.Field.VALUE: "Mace", FoxylibEntity.Field.TYPE: TradegoodEntity.entity_type(), } entity_list.append(entity_mace) if SpanTool.span2len(span) == 3: span_clove = (s + 2, s + 3) entity_cloves = { FoxylibEntity.Field.SPAN: span_clove, FoxylibEntity.Field.TEXT: StringTool.str_span2substr(text_in, span_clove), FoxylibEntity.Field.VALUE: "Cloves", FoxylibEntity.Field.TYPE: TradegoodEntity.entity_type(), } entity_list.append(entity_cloves) return entity_list
def _zzz( cls, str_PREV, kwarg_KEY, h_KWARG, ): str_ARGs = None #str_PREV = str2strip(LexerTool.token_list_DELIM2str_DELIM(token_list_DELIM[:iSTART_INFIX])) if not kwarg_KEY: if str_PREV: str_ARGs = str_PREV else: if not str_PREV: raise cls.SyntacticError() h_KWARG[StringTool.quoted2stripped( kwarg_KEY.strip())] = StringTool.quoted2stripped( str_PREV.strip()) return str_ARGs
def match2entity(m): span = m.span() entity = { FoxylibEntity.Field.SPAN: span, FoxylibEntity.Field.TEXT: StringTool.str_span2substr(text_in, span), FoxylibEntity.Field.VALUE: cls.match2value(m), FoxylibEntity.Field.TYPE: cls.entity_type(), } return entity
def index2token(index): span = span_list[index] text = StringTool.str_span2substr(str_in, span) token = { cls.Token.Field.INDEX: index, cls.Token.Field.SPAN: span, cls.Token.Field.TEXT: text, cls.Token.Field.NORM: normalizer(text) if normalizer else text, } return token
def match2entity(match): span = match.span() text = StringTool.str_span2substr(text_in, span) codename = cls.lang_alias2codename(lang, text) entity = { FoxylibEntity.Field.VALUE: codename, FoxylibEntity.Field.TEXT: text, FoxylibEntity.Field.SPAN: span, FoxylibEntity.Field.TYPE: cls.entity_type(), } return entity
def cospan2match(cospan): span_ref = Cospan.cospan2span_ref(cospan) charspan_ref = cls.Token.token_list_span2charspan( token_list_ref, span_ref) text_ref = StringTool.str_span2substr( cls.Config.config2reference(matcher.config), charspan_ref) span_hyp = Cospan.cospan2span_hyp(cospan) charspan_hyp = cls.Token.token_list_span2charspan( token_list_hyp, span_hyp) text_hyp = StringTool.str_span2substr(text, charspan_hyp) match = { SubphraseMatch.Field.TOKENSPAN_REF: span_ref, SubphraseMatch.Field.CHARSPAN_REF: charspan_ref, SubphraseMatch.Field.TEXT_REF: text_ref, SubphraseMatch.Field.TOKENSPAN_HYP: span_hyp, SubphraseMatch.Field.CHARSPAN_HYP: charspan_hyp, SubphraseMatch.Field.TEXT_HYP: text_hyp, } return match
def text2entity_list(cls, text_in, config=None): lang = LocaleTool.locale2lang(HenriqueEntity.Config.config2locale(config)) span_value_list = list(cls.lang2matcher(lang).text2span_value_iter(text_in)) entity_list = [{FoxylibEntity.Field.SPAN: span, FoxylibEntity.Field.TEXT: StringTool.str_span2substr(text_in, span), FoxylibEntity.Field.VALUE: value, FoxylibEntity.Field.TYPE: cls.entity_type(), } for span, value in span_value_list] return entity_list
def indexes2entity(indexes): span = ( span_list_element[indexes[0]][0], span_list_element[indexes[-1]][1], ) value = ListTool.indexes2filtered(element_list, indexes) entity = { FoxylibEntity.Field.SPAN: span, FoxylibEntity.Field.TEXT: StringTool.str_span2substr(text_in, span), FoxylibEntity.Field.VALUE: value, FoxylibEntity.Field.TYPE: cls.entity_type(), } return entity
def _text2entity_list_names(cls, text_in): matcher_names = cls.matcher_names() span_value_list = list(matcher_names.text2span_value_iter(text_in), ) entity_list = [{ FoxylibEntity.Field.SPAN: span, FoxylibEntity.Field.TEXT: StringTool.str_span2substr(text_in, span), FoxylibEntity.Field.VALUE: value, FoxylibEntity.Field.TYPE: cls.entity_type(), } for span, value in span_value_list] return entity_list
def text2entity_list(cls, str_in, config=None): def entity2is_wordbound_prefixed(entity): return StringTool.str_span2is_wordbound_prefixed( str_in, FoxylibEntity.entity2span(entity)) cardinal_entity_list = lfilter(entity2is_wordbound_prefixed, CardinalEntity.text2entity_list(str_in)) m_list_suffix = cls.pattern_suffix().finditer(str_in) span_ll = [ lmap(FoxylibEntity.entity2span, cardinal_entity_list), lmap(MatchTool.match2span, m_list_suffix), ] f_span2is_gap = lambda span: StringTool.str_span2match_blank_or_nullstr( str_in, span, ) j_tuple_list = ContextfreeTool.spans_list2reducible_indextuple_list( span_ll, f_span2is_gap) for j1, j2 in j_tuple_list: cardinal_entity = cardinal_entity_list[j1] m_suffix = m_list_suffix[j2] span = (FoxylibEntity.entity2span(cardinal_entity)[0], MatchTool.match2span(m_suffix)[1]) j_entity = { FoxylibEntity.Field.TYPE: HourEntity.entity_type(), FoxylibEntity.Field.SPAN: span, FoxylibEntity.Field.FULLTEXT: str_in, FoxylibEntity.Field.VALUE: FoxylibEntity.entity2value(cardinal_entity), } yield j_entity
def str2token_list(cls, str_in): span_list = cls.str2token_span_list(str_in) return [StringTool.str_span2substr(str_in, span) for span in span_list]
def _match2matchstring_hyp(_match): fulltext_hyp = cls.match2fulltext_hyp(_match) charspan_hyp = cls.match2charspan_hyp(_match) return StringTool.str_span2substr(fulltext_hyp, charspan_hyp)
def _match2matchstring_ref(_match): fulltext_ref = cls.match2fulltext_ref(_match) charspan_ref = cls.match2charspan_ref(_match) return StringTool.str_span2substr(fulltext_ref, charspan_ref)
def text2sub(self, text): span_value_list = list(self.text2span_value_iter(text)) text_subbed = StringTool.str_spans2replace_all(text, span_value_list) return text_subbed
def entity2is_wordbound_prefixed(entity): return StringTool.str_span2is_wordbound_prefixed( str_in, FoxylibEntity.entity2span(entity))
def str_span2is_gap(cls, str_in, span): return StringTool.str_span2match_blank_or_nullstr(str_in, span)