def groupby_and_sum(df, value): # df = pd.DataFrame(data) groupby_list = list(df.columns) groupby_list = get_trimed_list(value, groupby_list) # value 指定的内容不做 groupby groupby_list = get_trimed_list("zb", groupby_list) # zb 列不做 groupby groupby_list = get_trimed_list("tb", groupby_list) # zb 列不做 groupby groupby_list = get_trimed_list("hb", groupby_list) # zb 列不做 groupby # groupby 的四种情况 # 1. value列是数字 对 groupby_list 做 groupby 对 value 做 sum # 2. value列是汉字 对 groupby_list 做 groupby 对 value 做 concatenate if df[value][0] is None: return df if np.issubdtype(df[value].dtype, np.number): # 判断这一列是否是数字 if groupby_list: grouped = df.groupby(groupby_list, as_index=False)[value].sum() else: grouped = df[value].sum() else: if groupby_list: grouped = df.groupby( groupby_list, as_index=True)[value].apply(lambda x: get_unilist( (x.str.cat(sep=',')).split(","))).reset_index() else: df["temp"] = 1 grouped = df.groupby( ["temp"], as_index=True)[value].apply(lambda x: get_unilist( (x.str.cat(sep=',')).split(","))).reset_index() grouped = grouped.drop(columns=["temp"]) return grouped
def qh_godown(self, *args, **kwargs): # full的问题 table = self.apis_copy.get("table", "") qh = self.apis_copy.get("Cqh", "") from utils.qh_processor import get_qh_sub from utils.get_unilist import get_unilist child_qh = get_qh_sub(qh, "next") child_qh = get_unilist(child_qh) if "shej" in table or "shij" in table or "xj" in table: qh = self.apis_copy.get("Cqh", "") from utils.qh_processor import get_qh_godown qh_child_level = get_qh_godown(qh) update_dict = {"name": qh_child_level} g.modified_initialization.update({qh_child_level: child_qh}) elif "qh" in table: update_dict = {"name": "qh", "IN-Cqh": ",".join(child_qh)} g.modified_initialization.update({"qh": child_qh}) # waittodo: full的事 elif table == "": # 没有写table,走transformer,按照qh in table处理 qh = self.apis_copy.get("Cqh", "") from utils.qh_processor import get_qh_sub from utils.get_unilist import get_unilist child_qh = get_qh_sub(qh, "next") child_qh = get_unilist(child_qh) update_dict = {"name": "qh", "IN-Cqh": ",".join(child_qh)} g.modified_initialization.update({"qh": child_qh}) else: return self self.apis_copy.update(update_dict) return self
def after_search(self): """ self.db_results: [db_results[0][0]] :return: """ # 获取结果 self.apis_copy["value"] = "wxzb" df_wxzb = Extension.groupby_and_sum(self.db_results[0][0], self.value) groupby = f"{self.apis_copy['name']},{self.apis_copy['stack']}" from utils.get_unilist import get_unilist groupby = get_unilist(groupby.split(",")) if groupby: df_wxzb[self.value] = ( df_wxzb[self.value] / df_wxzb.groupby(groupby)[self.value].transform('sum')) df_wxzb = df_wxzb.loc[df_wxzb["xfxs"] == "网信", :] df_wxzb[self.value].astype(float) df_wxzb = df_wxzb.rename(columns={self.value: "wxzb"}) df_wxzb = df_wxzb.drop(["xfxs"], axis=1) self.df = df_wxzb else: self.df = df_wxzb[self.value][df_wxzb["xfxs"] == "网信"] / df_wxzb[ self.value].sum() self.df = pd.DataFrame({"wxzb": self.df})
def add_global_info(data_frame_list, results): table_list = [] req_list = [] for dataframe in data_frame_list: table_list.extend(dataframe["table"]) req_list.append(dataframe["req"]) results["table"] = get_unilist(table_list) # results["req"] = get_unilist(req_list) return results
def get_initdict_from_sql_mode(results, format_pool_sql): """ "full": {"name": "{INITIALIZATION}['xfxs']", "value": [0], "query": "{INITIALIZATION}['xfxs']"} """ full = results.get("full", {}) init_dicts = {} # 最终的需要做笛卡尔积映射的字段取值字典 followers = { } # 这里面的字段不用做笛卡尔积,和前面字段取值相同如 {"query": "$name"} query的取值和name的取值相同 for k, v in full.items(): # {"name": "xfxs", "query": "xfxs"} if isinstance(v, list): init_dicts[k] = v elif isinstance(v, str): from app import app if v.startswith("$"): followers.setdefault(v[1:], []) followers[v[1:]].append(k) elif v.startswith("fx_db_sql:"): sql = v.replace("fx_db_sql:", "").format(**format_pool_sql) from utils.db_connection import fx_engine try: res = fx_engine.execute(sql) except: return 400, f"PluginSQLError: There must be some error in the sql {sql}", {} from utils.get_unilist import get_unilist init_dicts[k] = get_unilist([i[0] for i in res if i[0]]) elif v.startswith("zb_db_sql:"): sql = v.replace("zb_db_sql:", "").format(**format_pool_sql) from utils.db_connection import zb_engine try: res = zb_engine.execute(sql) except: return 400, f"PluginSQLError: There must be some error in the sql {sql}", {} from utils.get_unilist import get_unilist init_dicts[k] = get_unilist([i[0] for i in res if i[0]]) elif v not in app.config.get("INITIALIZATION"): return 400, f"PluginSQLModeError: full params point to wrong direction {v}", {} else: init_dicts[k] = app.config.get("INITIALIZATION").get(v) else: return 400, f"PluginSQLModeError: full params contains invalid key {k}", {} return 200, "success", {"init_dicts": init_dicts, "followers": followers}
def qh_include_sub(self, *args, **kwargs): # full的问题 qh = self.apis_copy.get("Cqh", "") table = self.apis_copy.get("table", "") if qh and "qh" in table: from utils.qh_processor import get_qh_include_sub from utils.get_unilist import get_unilist qh_include_sub = get_unilist(get_qh_include_sub(qh)) update_dict = {"IN-Cqh": ",".join(qh_include_sub)} self.apis_copy.update(update_dict) return self
def get_waiting_for_search(cls, before_waiting_for_search): """ :param before_waiting_for_search: [ {"table": table_my, "columns": columns_my, "conditions": [apis_copy]}, {"table": table_bmy, "columns": columns_bmy, "conditions": [apis_copy]}, {"table": table_jbmy, "columns": columns_jbmy, "conditions": [apis_copy]} ] :return: code, msg, waiting_for_search: [{"table":"", "ex_table":"", "columns": [], "conditions": []}, {}] """ for_search_done = [] for waiting_for_search_each in before_waiting_for_search: for_search_each_done = {} # 处理 table table = waiting_for_search_each["table"] for_search_each_done["table"] = table # 处理 ex_table ex_table = waiting_for_search_each.get("ex_table") if ex_table is None: db_engine = waiting_for_search_each.get("db_engine", "zb_db") code, msg, real_table = cls.get_real_table({"table": table, "db_engine": db_engine}) if code != 200: return code, msg, {} ex_table = real_table["ex_table"] for_search_each_done["ex_table"] = ex_table # 处理 columns columns = waiting_for_search_each["columns"] columns = [i for i in columns.split(",") if i] from utils.get_unilist import get_unilist for_search_each_done["columns"] = get_unilist(columns) # 处理conditions conditions_done = [] conditions = waiting_for_search_each["conditions"] for condition in conditions: if not isinstance(condition, dict): # 说明有多组condition return 400, "ExtensionError: The condition must be list with some dicts", {} if isinstance(ex_table, str): return 200, "ExtensionError: NoSuchTable", {} code, msg, parsed_condition = cls.get_conditions(ex_table, condition) if code != 200: return code, msg, {} conditions_done.append(parsed_condition) for_search_each_done["conditions"] = conditions_done for_search_done.append(for_search_each_done) return 200, "success", for_search_done
def djjg_qhauth(self, *args, **kwargs): """ 登记机构的饼图的权限控制: 国家: 只有省级的能看 省级:只有省级的能看 市级:省级/市级的能看 省级看所有的市 市级看自己的 县级:省级/市级/县级能看 省级看所有县 市级看自己的县 县级看自己 """ from utils.qh_processor import get_qh_include_sub from utils.get_unilist import get_unilist if g.get("level_auth_name"): qh_list = get_unilist(get_qh_include_sub(g.get("level_auth_name"))) qh_list = [f"'{i}'" for i in qh_list] self.apis_copy["sql_qhauth_other"] = { "djjg_qh": f"qh in ({','.join(qh_list)})" } table = self.apis_copy.get("table", "") qh = self.apis_copy.get("Cqh", "") if "djjg" in table and "qh" in table: self.apis_copy["djjg"] = args[0] self.apis_copy["IN-Cqh"] = ",".join( get_unilist(get_qh_include_sub(qh))) return self
def xfbm_godown(self, *args, **kwargs): xfbm = PT._init_file_reader("shejxfbm+shijxfbm+xjxfbm") target = self.apis_copy.get("xfbm") if target: childxfbm = [] if target in xfbm["shejxfbm"].tolist(): childxfbm = xfbm["shijxfbm"].tolist() elif target in xfbm["shijxfbm"].tolist(): childxfbm = xfbm["xjxfbm"][xfbm["shijxfbm"] == target].tolist() elif target in xfbm["xjxfbm"].tolist(): childxfbm = [target] from utils.get_unilist import get_unilist childxfbm = get_unilist(childxfbm) g.modified_initialization.update({"xfbm": childxfbm}) self.apis_copy.update({ "name": "xfbm", "IN-xfbm": ",".join(childxfbm) }) return self
def get_relation_valuelist(re_col_file, dataframe): from app import app INITIALIZATION_FILE_PATH = app.config["INITIALIZATION_FILE_PATH"] re_list = [ ] # [[["北京市", "海淀区"], ["北京市", "朝阳区"]], [["体育": "足球"], ["体育", "篮球"]]] re_cols = [] # 带有关系的列名,并保持顺序,可能跨表 for file_name, re_df_cols in re_col_file.items(): re_df = pd.read_csv(os.path.join(INITIALIZATION_FILE_PATH, file_name), sep=app.config.get("INITIALIZATION_FILE_SEP", "\t")) re_cols.extend(re_df_cols) # 筛选符合条件的数据 filter_cols = [i for i in dataframe if i in re_df.columns] for col in filter_cols: re_df[re_df_cols] = re_df[re_df_cols][re_df[col] == dataframe[col]] res = re_df[re_df_cols].dropna() # 将这个df中所需要的列的取值,去重后放到全局取值中 tmp_re_list = [list(row) for _, row in res.iterrows()] from utils.get_unilist import get_unilist re_list.append(get_unilist(tmp_re_list)) return re_list, re_cols
def convert2nsv(dataframe, groupid=0): df = dataframe["df"] name = dataframe.get("name") value = dataframe.get("value") stack = dataframe.get("stack") main_name = dataframe.get("main_name") if isinstance(df, np.int64) or isinstance(df, np.float) or isinstance( df, int) or isinstance(df, float): df = pd.DataFrame({value: [df]}) data = df.to_dict(orient='records') # 处理name和value【可处理一个数和name value对的情况】 if name: # 处理name问题 [i.update({"name": i.pop(name)}) for i in data] if value: # 处理value问题 [i.update({"value": i.pop(value)}) for i in data] # 处理main_name问题 if main_name and not stack and not name: # 有main_name但是没有stack,说明只有name data[0]["name"] = main_name if stack: # 处理堆叠问题 data_stack = [] [i.update({stack: i.get(stack, i.get("name"))}) for i in data] stack_list = [i.get(stack) for i in data] # 内层嵌套的内容 from utils.get_unilist import get_unilist stack_uni = get_unilist(stack_list) # 内层嵌套的内容去重 main_list = [i.get("name") for i in data] # 外层包裹的内容 main_uni = get_unilist(main_list) # 外层包裹的内容去重 if name == stack: mapping = {f"value_g{groupid}_{1}": main_name} else: mapping = { f"value_g{groupid}_{stack_index + 1}": stack_uni[stack_index] for stack_index in range(len(stack_uni)) } for main_one in main_uni: dic_one = {"name": main_one} for value_index, stack_one in enumerate(stack_uni): if name == stack: # 是为了处理原来只是name&value,不堆叠,但是制定了extra_index,最终需要堆叠的情况,所以即使自己就是一维的键值对也需要改成嵌套形式 value_index = 0 stack_value = [ tar_one.get("value") for tar_one in data if # 核心:找到name是这个,stack是这个的value值 tar_one.get("name") == main_one and tar_one.get(name) == stack_one ] else: # 正常情况 stack_value = [ tar_one.get("value") for tar_one in data if # 核心:找到name是这个,stack是这个的value值 tar_one.get("name") == main_one and tar_one.get(stack) == stack_one ] if stack_value: dic_one[ f"value_g{groupid}_{value_index + 1}"] = stack_value[0] data_stack.append(dic_one) return { "code": 200, "msg": "success", "map": mapping, "data": data_stack } return {"code": 200, "msg": "success", "data": data}
def _get_none_df(apis_copy): columns = [apis_copy.get("name", ""), apis_copy.get("stack", ""), apis_copy.get("value", "")] from utils.get_unilist import get_unilist columns = get_unilist(columns) nonedf = [[pd.DataFrame([[None] * len(columns)], columns=columns)]] return nonedf