def postgres2j_iter(cls): logger = HenriqueLogger.func_level2logger(cls.postgres2j_iter, logging.DEBUG) with HenriquePostgres.cursor() as cursor: sql = SQL("SELECT * from {}").format( Identifier(TradegoodTable.NAME)) cursor.execute(sql) for t in PostgresTool.fetch_iter(cursor): j = t[TradegoodTable.index_json()] # logger.debug({"j":j}) h_lang2names = {} for lang, name in j["name"].items(): h_lang2names[lang] = lchain(h_lang2names.get(lang, []), [name]) for lang, nickname_list in j.get("nicknames", {}).items(): h_lang2names[lang] = lchain(h_lang2names.get(lang, []), nickname_list) j[TradegoodDoc.F.NAMES] = { lang: luniq(name_list) for lang, name_list in h_lang2names.items() } for k in ["name", "nicknames"]: j.pop(k, None) # logger.debug({'j["names"]':j["names"]}) j[TradegoodDoc.F.KEY] = j["names"]["en"][0] yield j
def table_titles2subbed(cls, table, title_list): n_col = iter2singleton(map(len, table)) assert_equal(n_col, len(title_list) + cls.COUNT_COLHEAD) l_row_top = lchain( table[0][:cls.COUNT_COLHEAD], title_list, ) return lchain([l_row_top], table[1:])
def pattern_number(cls): rstr_leftbound = RegexTool.rstr2left_bounded( r"\d{1,2}", RegexTool.left_wordbounds()) rstr_bound_right_list = lchain( RegexTool.right_wordbounds(), lchain(*TimedeltaEntityUnit.gazetteer_all().values()), ) rstr_bound = RegexTool.rstr2right_bounded(rstr_leftbound, rstr_bound_right_list) return re.compile(rstr_bound, re.I)
def _entities2dict_part(cls, entities): Param = PriceSkillParameter param_type = Param.Type.entity_group2parameter_type(entities) field = cls.parameter_type2field(param_type) if param_type == Param.Type.PORTLIKE: port_codenames = lchain( *map(lambda x: Portlike.entity_portlike2port_codenames(x), entities)) return {field: port_codenames} if param_type == Param.Type.TRADEGOOD: tradegood_codenames = lmap(FoxylibEntity.entity2value, entities) return {field: tradegood_codenames} if param_type == Param.Type.RATE: entity = l_singleton2obj(entities) rate = FoxylibEntity.entity2value(entity) return {field: rate} if param_type == Param.Type.TREND: entity = l_singleton2obj(entities) trend = FoxylibEntity.entity2value(entity) return {field: trend} raise Exception({ "param_type": param_type, "entities": entities, })
def text2entity_list(cls, str_in): logger = FoxylibLogger.func_level2logger(cls.text2entity_list, logging.DEBUG) entity_list_1day_raw = DayofweekEntityKo.text2entity_list(str_in) entity_list_multiday = cls._text2entity_list_multiday(str_in) span_list_multiday = lmap(FoxylibEntity.entity2span, entity_list_multiday) def entity_1day2is_not_covered(entity_1day): span_1day = FoxylibEntity.entity2span(entity_1day) for span_multiday in span_list_multiday: if SpanTool.covers(span_multiday, span_1day): return False return True entity_list_1day_uncovered = lfilter(entity_1day2is_not_covered, entity_list_1day_raw) entity_list = lchain( lmap(cls._entity_1day2multiday, entity_list_1day_uncovered), entity_list_multiday) return entity_list
def text2entity_list(cls, text_in, config=None): entity_list_matcher = cls.text2entity_list_matcher(text_in, config=config) entity_list_specialcase = TradegoodEntitySpecialcase.text2entity_list( text_in, config=config) return lchain(entity_list_matcher, entity_list_specialcase)
def list_spans_func2processed(cls, l_in, span_list, func, f_list2chain=None): if f_list2chain is None: f_list2chain = lambda ll: lchain(*ll) if not span_list: return l_in ll = [] n = len(span_list) for i in range(n): s_this, e_this = span_list[i] e_prev = span_list[i - 1][1] if i > 0 else 0 if s_this > e_prev: ll.append(l_in[e_prev:s_this]) l_in_this = l_in[s_this:e_this] l_out_this = func(l_in_this) ll.append(l_out_this) e_last = span_list[-1][1] if e_last < len(l_in): ll.append(l_in[e_last:]) l_out = f_list2chain(ll) return l_out
def lang2pattern(cls, lang): from henrique.main.document.price.trend.trend_entity import TrendEntity logger = HenriqueLogger.func_level2logger(cls.lang2pattern, logging.DEBUG) # rstr_suffix = format_str("{}?", # RegexTool.rstr2wrapped(TrendEntity.lang2rstr(lang)), # ) ### may be concatenated with port/tradegood name # rstr_prefixed = RegexTool.rstr2rstr_words_prefixed(cls.rstr()) # raise Exception({"rstr_suffix":rstr_suffix}) rstr_trend = TrendEntity.lang2rstr(lang) # bound_right_list_raw = RegexTool.right_wordbounds() right_bounds = lchain( RegexTool.bounds2prefixed(RegexTool.right_wordbounds(), rstr_trend), RegexTool.right_wordbounds(), ) rstr_rightbound = RegexTool.rstr2right_bounded(cls.rstr(), right_bounds) logger.debug({#"rstr_trend":rstr_trend, #"right_bounds":right_bounds, "rstr_rightbound":rstr_rightbound, }) # rstr_suffixed = RegexTool.rstr2rstr_words_suffixed(cls.rstr(), rstr_suffix=rstr_suffix) # raise Exception({"rstr_trend": rstr_trend, "rstr_suffixed": rstr_suffixed}) # return re.compile(RegexTool.rstr2wordbounded(cls.rstr())) return re.compile(rstr_rightbound, re.I)
def _node_parents2name( cls, node, ancestors, ): l = lchain(ancestors, [node]) return cls.node_list2groupname(l)
def _node2rstr_unnamed( cls, node, ancestors, args=None, kwargs=None, ): _args = args or [] _kwargs = kwargs or {} # logger.debug({"node": node, "args": args, "kwargs": kwargs, "type":cls.node2type(node), # "h_node2ak": h_node2ak, # }) if cls.node2type(node) == cls.Type.RSTR_NODE: rstr = node.rstr(*_args, **_kwargs) return rstr subnode_list = node.subnode_list() ancestors_and_me = lchain(ancestors, [node]) rstr_list_subnode = [ cls._node2rstr_named(sn, ancestors_and_me, args=args, kwargs=kwargs) for sn in subnode_list ] str_format = node.rformat(*_args, **_kwargs) rstr = format_str(str_format, *rstr_list_subnode) return rstr
def _i2j_list2l_out(cls, row, j_list): col_count = len(row) l_prefix = row[:cls.COUNT_COLHEAD] l_data = lmap(lambda j: row[j] if j in j_list else "", range(cls.COUNT_COLHEAD, col_count)) l_out = lchain(l_prefix, l_data) return l_out
def warmup(self, target_list=None, ): logger = FoxylibLogger.func_level2logger(self.warmup, logging.DEBUG) cls = self.__class__ if target_list is None: target_list = lchain([self.module], ModuleTool.module2classes_within(self.module)) cls._dict2warmup(self.h, target_list)
def index_jq_list2j_msearch(cls, index_jq_list): j_msearch = lchain(*[[ { 'index': index }, jq, ] for index, jq in index_jq_list]) return j_msearch
def pattern_hour(cls): left_bounds = RegexTool.left_wordbounds() right_bounds = lchain( RegexTool.right_wordbounds(), [r":"], ) rstr = RegexTool.rstr2bounded(r"\d+", left_bounds, right_bounds) return re.compile(rstr, re.I)
def vwrite_out(h, k, v_in): if f_attr2is_appendable(k): v_new = " ".join( lchain( h.get(k, "").split(), v_in.split(), )) return DictTool.VWrite.overwrite(h, k, v_new) return vwrite_in(h, k, v_in)
def table2beamed(cls, table, i_pivot, beam): ij_list = list(cls._str_ll_i2ij_iter(table, i_pivot, beam)) i2j_list = gb_tree_global(ij_list, [ ig(0), ], leaf_func=lambda l: lmap(ig(1), l)) table_filtered = lchain( [table[0]], [cls._i2j_list2l_out(table[i], j_list) for i, j_list in i2j_list], ) return table_filtered
def lexer_args(cls): s_delim_eaters = "," s_ANY_EXs = lchain(s_delim_eaters, DelimLexer.specialchars()) r_TUPLE = ( r"\s+", r"(?:(?:[^\s{0}])|(?:{1}))".format( "".join(map(re.escape, s_ANY_EXs)), r"\s*(?:{0})\s*".format("|".join(map(re.escape, s_delim_eaters))), ), ) lexer = cls.r_TUPLE2lexer(r_TUPLE) return lexer
def text2entity_list(cls, text_in): data = { cls.Data.Field.TEXT_IN: text_in, } from foxylib.tools.entity.calendar.time.hour_ampm.hour_ampm_time_entity import HourAMPMTimeEntity entity_list_hour_ampm = HourAMPMTimeEntity.data2entity_list(data) from foxylib.tools.entity.calendar.time.coloned.coloned_time_entity import ColonedTimeEntity entity_list_coloned = ColonedTimeEntity.data2entity_list(data) entity_list = lchain(entity_list_hour_ampm, entity_list_coloned) return entity_list
def table_labels2repeat(cls, table, label_list): n = len(table) p = len(label_list) for i in range(cls.COUNT_ROWHEAD): yield table[i] for i in range(cls.COUNT_ROWHEAD, n): row_in = table[i] r = (i - cls.COUNT_ROWHEAD) % p row_out = lchain(row_in[:cls.COLINDEX_GROUP], [label_list[r]], row_in[cls.COLINDEX_GROUP + 1:]) yield row_out
def texts2pattern(texts): rstr_raw = RegexTool.rstr_iter2or(map(re.escape, texts)) left_bounds = lchain( RegexTool.bounds2suffixed(RegexTool.left_wordbounds(), "\d"), RegexTool.left_wordbounds(), ) right_bounds = RegexTool.right_wordbounds() rstr = RegexTool.rstr2bounded(rstr_raw, left_bounds, right_bounds) logger.debug({ "rstr": rstr, "rstr_raw": rstr_raw, }) return re.compile(rstr, re.I)
def ports_tradegoods2price_list_latest(cls, server_codename, port_codenames, tradegood_codenames): port_codename_list = list(port_codenames) tradegood_codename_list = list(tradegood_codenames) # https://stackoverflow.com/a/29368862 collection = MarketpriceCollection.collection() mongo_query = {cls.Field.PORT: {"$in": port_codename_list}, cls.Field.TRADEGOOD: {"$in": tradegood_codename_list}, cls.Field.SERVER: server_codename, } mongo_group_id = { "_id": { cls.Field.PORT: "${}".format(cls.Field.PORT), cls.Field.TRADEGOOD: "${}".format(cls.Field.TRADEGOOD), }, } fields_others = cls.Field.set() - {cls.Field.PORT, cls.Field.TRADEGOOD} mongo_group_list = lchain([mongo_group_id], [{field: {"$last": "${}".format(field)}, } for field in fields_others], ) mongo_group = merge_dicts(mongo_group_list, vwrite=vwrite_no_duplicate_key) mongo_pipeline = [ {"$match": mongo_query}, {"$group": mongo_group} ] def item2doc(item): port_codename = JsonTool.down(item, ["_id", cls.Field.PORT]) tradegood_codename = JsonTool.down(item, ["_id", cls.Field.TRADEGOOD]) price = merge_dicts([DictTool.keys2filtered(item, fields_others, ), {cls.Field.PORT: port_codename, cls.Field.TRADEGOOD: tradegood_codename, }, ], vwrite=vwrite_no_duplicate_key ) return price item_list = list(collection.aggregate(mongo_pipeline)) # raise Exception({"item_list":item_list}) doc_list = lmap(item2doc, item_list) return doc_list
def table_spans2summed_row_iter(cls, table, colspan_list): def row_span2str(row, span): s, e = span v = sum(map(lambda x: int(x) if x else 0, row[s:e])) str_out = "" if not v else str(v) return str_out # n_row = len(table) for l_row in table: # l_row = table[i] l_header = l_row[:cls.COUNT_COLHEAD] l_body = [row_span2str(l_row, colspan) for colspan in colspan_list] l_out = lchain(l_header, l_body) yield l_out
def packet2response(cls, packet): chatroom = Chatroom.codename2chatroom( KhalaPacket.packet2chatroom(packet)) locale = Chatroom.chatroom2locale(chatroom) entity_classes = cls.target_entity_classes() text_in = KhalaPacket.packet2text(packet) config = {HenriqueEntity.Config.Field.LOCALE: locale} entity_list_raw = lchain(*[ c.text2entity_list(text_in, config=config) for c in entity_classes ]) entity_list = sorted(entity_list_raw, key=FoxylibEntity.entity2span) blocks = [ cls.entity2response_block(packet, entity) for entity in entity_list ] return Rowsblock.blocks2text(blocks)
def table2percentage(cls, table, v): logger = FoxylibLogger.func_level2logger(cls.table2percentage, logging.DEBUG) count_col = iter2singleton(map(len, table)) h_j2col_sum = { j: sum(map(int, filter(bool, map(ig(j), table[1:])))) for j in range(cls.COUNT_COLHEAD, count_col) } for i, l in enumerate(table): if i == 0: yield l continue # logger.debug({"l":l}) l_head = l[:cls.COUNT_COLHEAD] l_right = [ "{:.02f}".format(int(l[j]) * 100 / h_j2col_sum[j]) if l[j] else l[j] for j in range(cls.COUNT_COLHEAD, count_col) ] yield lchain(l_head, l_right)
def pattern_suffix(cls): left_bounds = RegexTool.left_wordbounds() right_bounds = lchain( RegexTool.right_wordbounds(), [ RegexTool.bound2prefixed(b, r"시") for b in RegexTool.right_wordbounds() ], ) rstr_rightbounded = RegexTool.rstr2right_bounded(r"\d+", right_bounds) def bound_iter_left(): b_list_raw = RegexTool.left_wordbounds() for b in b_list_raw: yield b yield r"{}{}".format(b, r"{1,2}") bound_list_left = list(bound_iter_left()) rstr_bound = RegexTool.rstr2left_bounded(rstr_rightbound, bound_list_left) return re.compile(rstr_bound)
def text2entity_list(cls, str_in): logger = FoxylibLogger.func_level2logger(cls.text2entity_list, logging.DEBUG) p = cls.pattern() m_list = list(p.finditer(str_in)) logger.debug({ "m_list": m_list, }) def match2entity_list(m): s, e = m.span() text = m.group() n = len(text) l = [{ FoxylibEntity.Field.SPAN: (s + i, s + i + 1), FoxylibEntity.Field.VALUE: DayofweekEntityKo.str2value(text[i]), FoxylibEntity.Field.TEXT: text[i], FoxylibEntity.Field.FULLTEXT: str_in, FoxylibEntity.Field.TYPE: DayofweekEntity.entity_type(), } for i in range(n) if text[i] != "," and not text[i].isspace()] # logger.debug({"s": s, # "e": e, # "m": m, # "text": text, # "n": n, # "l": l, # }) return l return lchain(*lmap(match2entity_list, m_list))
def pattern_variation(cls): l = lchain(cls.string_singlequote(), cls.string_doublequote()) rstr = RegexTool.rstr_iter2or(map(re.escape, l)) p = re.compile(rstr) return p
def _text2entity_list(cls, text_in, lang): return lchain( cls._text2entity_list_me(text_in, lang), cls._text2entity_list_names(text_in, ), )
class ParameterLexer: # List of token names. This is always required tokens = ( 'ANY', #'WHITESPACE', 'DQ', 'SQ', 'BACKSLASH', 'EQUAL', 'DELIM_PREFIX', ) l_PREFIX = ["+", "-", "*", "?"] t_DQ = r'"' t_SQ = r"'" t_BACKSLASH = r'\\' t_EQUAL = r'=' t_DELIM_PREFIX = r'[{0}]'.format("".join(lmap(re.escape, l_PREFIX))) l_VAR = lchain( l_PREFIX, [ "\\", "=", "'", '"', ], ) t_ANY = r'(?:[^\s{0}]+)|(?:\s+)'.format("".join(lmap(re.escape, l_VAR))) #t_WHITESPACE = r'\W+' # Error handling rule def t_error(self, t): raise Exception("Illegal character '%s'" % t.value[0]) # Build the lexer def build(self, **kwargs): self.lexer = lex.lex(module=self, **kwargs) DELIM_TYPE_PREFIX = "P" DELIM_TYPE_INFIX = "I" @classmethod def r_prefix(cls): return r"(?:{0})".format("|".join(lmap(re.escape, cls.l_PREFIX))) @classmethod def stt_delim2type(cls, tok): is_PREFIX = (tok.value in cls.l_PREFIX) is_INFIX = (tok.value in ["="]) if len(lfilter(bool, [is_PREFIX, is_INFIX])) != 1: raise Exception() if is_PREFIX: return cls.DELIM_TYPE_PREFIX if is_INFIX: return cls.DELIM_TYPE_INFIX raise Exception() @classmethod def delim_infix2iStart( cls, token_list_DELIM, tt_list_DELIM, ): if not token_list_DELIM: return None tok_LAST = token_list_DELIM[-1] if tok_LAST.type != "ANY": return None if len(token_list_DELIM) <= 1: return -1 tok_2PREV = token_list_DELIM[-2] if tok_2PREV.type not in tt_list_DELIM: return -1 delim_type = cls.stt_delim2type(tok_2PREV) if delim_type == cls.DELIM_TYPE_INFIX: return None # Wrong syntax if delim_type == cls.DELIM_TYPE_PREFIX: return -2 raise Exception() @classmethod def is_delim_infix_valid(cls, token_list_DELIM): if not token_list_DELIM: return False tok_LAST = token_list_DELIM[-1] if tok_LAST.type != "ANY": return False return True @classmethod def lexer2str_DELIM_list( cls, lexer, s_IN, maxsplit=None, ): # delim_rule = LexerTool.DELIM_AS_PREFIX lexer.input(s_IN) tt_list_ESCAPE = ["BACKSLASH"] tt_list_STATE = ["SQ", "DQ"] tt_list_DELIM = [ "DELIM_PREFIX", "EQUAL", ] str_DELIM_list = [] token_list_DELIM = [] state_INITIAL = "INITIAL" l_state = [ state_INITIAL, ] while True: tok = lexer.token() if not tok: break #print(tok, tok.type, file=sys.stderr) state_CUR = l_state[-1] stop_split = (maxsplit is not None) and (len(str_DELIM_list) >= maxsplit) stt = LexerTool.tok2semantic_token_type( tok, token_list_DELIM, [ tt_list_ESCAPE, tt_list_STATE, tt_list_DELIM, ], stop_split, state_CUR, state_INITIAL, ) is_append_BEFORE = stt not in [LexerTool.STT_DELIM] is_append_BEFORE_and_done = (stt in [LexerTool.STT_ANY]) if is_append_BEFORE: token_list_DELIM.append(tok) if is_append_BEFORE_and_done: continue if stt == LexerTool.STT_DELIM: delim_type = cls.stt_delim2type(tok) if delim_type == cls.DELIM_TYPE_INFIX: iSTART_INFIX = cls.delim_infix2iStart( token_list_DELIM, tt_list_DELIM, ) if iSTART_INFIX is None: #raise Exception() return None # Syntactically wrong if iSTART_INFIX < -1: if len(token_list_DELIM) != 2: raise Exception() else: token_list_PREV = token_list_DELIM[:iSTART_INFIX] str_DELIM_list.append( LexerTool.token_list_DELIM2str_DELIM( token_list_PREV)) token_list_DELIM = token_list_DELIM[iSTART_INFIX:] #print(tok, token_list_DELIM, str_DELIM_list, iSTART_INFIX, file=sys.stderr) elif delim_type == cls.DELIM_TYPE_PREFIX: token_list_PREV = token_list_DELIM str_DELIM_list.append( LexerTool.token_list_DELIM2str_DELIM(token_list_PREV)) token_list_DELIM = [] else: raise Exception() token_list_DELIM.append(tok) continue if stt == LexerTool.STT_START: l_state.append(tok.type) continue if stt == LexerTool.STT_END: if l_state[-1] != tok.type: raise Exception() l_state.pop() continue if len(l_state) > 1: return None if l_state[0] != state_INITIAL: return None if token_list_DELIM: str_DELIM_list.append( LexerTool.token_list_DELIM2str_DELIM(token_list_DELIM)) return str_DELIM_list @classmethod def str2l_token( cls, s, maxsplit=None, include_tokens=None, ): if include_tokens is None: include_tokens = True m = cls() m.build() tok_groups = ( ["ANY", "SINGLEQUOTE", "DOUBLEQUOTE"], ["DELIM"], ) l = LexerTool.str2str_token_list( m.lexer, s, tok_groups, maxsplit=maxsplit, include_tokens=include_tokens, ) return l @classmethod def create_lexer(cls): m = cls() m.build() lexer = m.lexer return lexer @classmethod def str2args_kwargs_pair( cls, s_IN, maxsplit=None, ): str_PARAM_list = cls.lexer2str_DELIM_list( cls.create_lexer(), s_IN, maxsplit=maxsplit, ) if not str_PARAM_list: return (None, str_PARAM_list) return ( str2strip(str_PARAM_list[0]), lmap(str2strip, str_PARAM_list[1:]), ) @classmethod def str2args_kwargs_pair_NEW( cls, s_IN, split_ARG_str, ): str_PARAM_list = cls.lexer2str_DELIM_list( cls.create_lexer(), s_IN, ) if not str_PARAM_list: return (None, str_PARAM_list) return ( str2strip(str_PARAM_list[0]), lmap(str2strip, str_PARAM_list[1:]), )
def doc2sheet_name_ko_row(doc): key = PortDoc.doc2key(doc) name_list_en = PortDoc.doc_lang2text_list(doc, "ko") return lchain([key], name_list_en)