예제 #1
0
def groupby_and_sum(df, value):
    # df = pd.DataFrame(data)
    groupby_list = list(df.columns)
    groupby_list = get_trimed_list(value,
                                   groupby_list)  # value 指定的内容不做 groupby
    groupby_list = get_trimed_list("zb", groupby_list)  # zb 列不做 groupby
    groupby_list = get_trimed_list("tb", groupby_list)  # zb 列不做 groupby
    groupby_list = get_trimed_list("hb", groupby_list)  # zb 列不做 groupby

    # groupby 的四种情况
    # 1. value列是数字      对 groupby_list 做 groupby 对 value 做 sum
    # 2. value列是汉字      对 groupby_list 做 groupby 对 value 做 concatenate

    if df[value][0] is None:
        return df
    if np.issubdtype(df[value].dtype, np.number):  # 判断这一列是否是数字
        if groupby_list:
            grouped = df.groupby(groupby_list, as_index=False)[value].sum()
        else:
            grouped = df[value].sum()
    else:
        if groupby_list:
            grouped = df.groupby(
                groupby_list,
                as_index=True)[value].apply(lambda x: get_unilist(
                    (x.str.cat(sep=',')).split(","))).reset_index()
        else:
            df["temp"] = 1
            grouped = df.groupby(
                ["temp"], as_index=True)[value].apply(lambda x: get_unilist(
                    (x.str.cat(sep=',')).split(","))).reset_index()
            grouped = grouped.drop(columns=["temp"])
    return grouped
예제 #2
0
 def qh_godown(self, *args, **kwargs):
     # full的问题
     table = self.apis_copy.get("table", "")
     qh = self.apis_copy.get("Cqh", "")
     from utils.qh_processor import get_qh_sub
     from utils.get_unilist import get_unilist
     child_qh = get_qh_sub(qh, "next")
     child_qh = get_unilist(child_qh)
     if "shej" in table or "shij" in table or "xj" in table:
         qh = self.apis_copy.get("Cqh", "")
         from utils.qh_processor import get_qh_godown
         qh_child_level = get_qh_godown(qh)
         update_dict = {"name": qh_child_level}
         g.modified_initialization.update({qh_child_level: child_qh})
     elif "qh" in table:
         update_dict = {"name": "qh", "IN-Cqh": ",".join(child_qh)}
         g.modified_initialization.update({"qh": child_qh})
         # waittodo: full的事
     elif table == "":  #  没有写table,走transformer,按照qh in table处理
         qh = self.apis_copy.get("Cqh", "")
         from utils.qh_processor import get_qh_sub
         from utils.get_unilist import get_unilist
         child_qh = get_qh_sub(qh, "next")
         child_qh = get_unilist(child_qh)
         update_dict = {"name": "qh", "IN-Cqh": ",".join(child_qh)}
         g.modified_initialization.update({"qh": child_qh})
     else:
         return self
     self.apis_copy.update(update_dict)
     return self
예제 #3
0
    def after_search(self):
        """
        self.db_results: [db_results[0][0]]
        :return:
        """
        # 获取结果
        self.apis_copy["value"] = "wxzb"
        df_wxzb = Extension.groupby_and_sum(self.db_results[0][0], self.value)

        groupby = f"{self.apis_copy['name']},{self.apis_copy['stack']}"
        from utils.get_unilist import get_unilist
        groupby = get_unilist(groupby.split(","))
        if groupby:
            df_wxzb[self.value] = (
                df_wxzb[self.value] /
                df_wxzb.groupby(groupby)[self.value].transform('sum'))
            df_wxzb = df_wxzb.loc[df_wxzb["xfxs"] == "网信", :]
            df_wxzb[self.value].astype(float)
            df_wxzb = df_wxzb.rename(columns={self.value: "wxzb"})
            df_wxzb = df_wxzb.drop(["xfxs"], axis=1)

            self.df = df_wxzb
        else:
            self.df = df_wxzb[self.value][df_wxzb["xfxs"] == "网信"] / df_wxzb[
                self.value].sum()
            self.df = pd.DataFrame({"wxzb": self.df})
예제 #4
0
def add_global_info(data_frame_list, results):
    table_list = []
    req_list = []
    for dataframe in data_frame_list:
        table_list.extend(dataframe["table"])
        req_list.append(dataframe["req"])
    results["table"] = get_unilist(table_list)
    # results["req"] = get_unilist(req_list)
    return results
예제 #5
0
def get_initdict_from_sql_mode(results, format_pool_sql):
    """
     "full": {"name": "{INITIALIZATION}['xfxs']", "value": [0], "query": "{INITIALIZATION}['xfxs']"}
    """
    full = results.get("full", {})
    init_dicts = {}  # 最终的需要做笛卡尔积映射的字段取值字典
    followers = {
    }  # 这里面的字段不用做笛卡尔积,和前面字段取值相同如    {"query": "$name"}  query的取值和name的取值相同
    for k, v in full.items():  # {"name": "xfxs", "query": "xfxs"}
        if isinstance(v, list):
            init_dicts[k] = v
        elif isinstance(v, str):
            from app import app
            if v.startswith("$"):
                followers.setdefault(v[1:], [])
                followers[v[1:]].append(k)
            elif v.startswith("fx_db_sql:"):
                sql = v.replace("fx_db_sql:", "").format(**format_pool_sql)
                from utils.db_connection import fx_engine
                try:
                    res = fx_engine.execute(sql)
                except:
                    return 400, f"PluginSQLError: There must be some error in the sql {sql}", {}
                from utils.get_unilist import get_unilist
                init_dicts[k] = get_unilist([i[0] for i in res if i[0]])
            elif v.startswith("zb_db_sql:"):
                sql = v.replace("zb_db_sql:", "").format(**format_pool_sql)
                from utils.db_connection import zb_engine
                try:
                    res = zb_engine.execute(sql)
                except:
                    return 400, f"PluginSQLError: There must be some error in the sql {sql}", {}
                from utils.get_unilist import get_unilist
                init_dicts[k] = get_unilist([i[0] for i in res if i[0]])

            elif v not in app.config.get("INITIALIZATION"):
                return 400, f"PluginSQLModeError: full params point to wrong direction {v}", {}
            else:
                init_dicts[k] = app.config.get("INITIALIZATION").get(v)
        else:
            return 400, f"PluginSQLModeError: full params contains invalid key {k}", {}
    return 200, "success", {"init_dicts": init_dicts, "followers": followers}
예제 #6
0
 def qh_include_sub(self, *args, **kwargs):
     # full的问题
     qh = self.apis_copy.get("Cqh", "")
     table = self.apis_copy.get("table", "")
     if qh and "qh" in table:
         from utils.qh_processor import get_qh_include_sub
         from utils.get_unilist import get_unilist
         qh_include_sub = get_unilist(get_qh_include_sub(qh))
         update_dict = {"IN-Cqh": ",".join(qh_include_sub)}
         self.apis_copy.update(update_dict)
     return self
예제 #7
0
    def get_waiting_for_search(cls, before_waiting_for_search):
        """
        :param before_waiting_for_search:  [
            {"table": table_my, "columns": columns_my, "conditions": [apis_copy]},
            {"table": table_bmy, "columns": columns_bmy, "conditions": [apis_copy]},
            {"table": table_jbmy, "columns": columns_jbmy, "conditions": [apis_copy]}
        ]
        :return: code, msg, waiting_for_search: [{"table":"", "ex_table":"", "columns": [], "conditions": []}, {}]
        """
        for_search_done = []
        for waiting_for_search_each in before_waiting_for_search:
            for_search_each_done = {}
            # 处理 table
            table = waiting_for_search_each["table"]
            for_search_each_done["table"] = table

            # 处理 ex_table
            ex_table = waiting_for_search_each.get("ex_table")
            if ex_table is None:
                db_engine = waiting_for_search_each.get("db_engine", "zb_db")
                code, msg, real_table = cls.get_real_table({"table": table, "db_engine": db_engine})
                if code != 200:
                    return code, msg, {}
                ex_table = real_table["ex_table"]
            for_search_each_done["ex_table"] = ex_table

            # 处理 columns
            columns = waiting_for_search_each["columns"]
            columns = [i for i in columns.split(",") if i]
            from utils.get_unilist import get_unilist
            for_search_each_done["columns"] = get_unilist(columns)

            # 处理conditions
            conditions_done = []
            conditions = waiting_for_search_each["conditions"]
            for condition in conditions:
                if not isinstance(condition, dict):  # 说明有多组condition
                    return 400, "ExtensionError: The condition must be list with some dicts", {}
                if isinstance(ex_table, str):
                    return 200, "ExtensionError: NoSuchTable", {}
                code, msg, parsed_condition = cls.get_conditions(ex_table, condition)
                if code != 200:
                    return code, msg, {}
                conditions_done.append(parsed_condition)

            for_search_each_done["conditions"] = conditions_done
            for_search_done.append(for_search_each_done)

        return 200, "success", for_search_done
예제 #8
0
    def djjg_qhauth(self, *args, **kwargs):
        """
        登记机构的饼图的权限控制:
            国家: 只有省级的能看
            省级:只有省级的能看
            市级:省级/市级的能看      省级看所有的市  市级看自己的
            县级:省级/市级/县级能看    省级看所有县   市级看自己的县   县级看自己
        """
        from utils.qh_processor import get_qh_include_sub
        from utils.get_unilist import get_unilist
        if g.get("level_auth_name"):
            qh_list = get_unilist(get_qh_include_sub(g.get("level_auth_name")))
            qh_list = [f"'{i}'" for i in qh_list]
            self.apis_copy["sql_qhauth_other"] = {
                "djjg_qh": f"qh in ({','.join(qh_list)})"
            }
        table = self.apis_copy.get("table", "")
        qh = self.apis_copy.get("Cqh", "")
        if "djjg" in table and "qh" in table:
            self.apis_copy["djjg"] = args[0]

            self.apis_copy["IN-Cqh"] = ",".join(
                get_unilist(get_qh_include_sub(qh)))
        return self
예제 #9
0
 def xfbm_godown(self, *args, **kwargs):
     xfbm = PT._init_file_reader("shejxfbm+shijxfbm+xjxfbm")
     target = self.apis_copy.get("xfbm")
     if target:
         childxfbm = []
         if target in xfbm["shejxfbm"].tolist():
             childxfbm = xfbm["shijxfbm"].tolist()
         elif target in xfbm["shijxfbm"].tolist():
             childxfbm = xfbm["xjxfbm"][xfbm["shijxfbm"] == target].tolist()
         elif target in xfbm["xjxfbm"].tolist():
             childxfbm = [target]
         from utils.get_unilist import get_unilist
         childxfbm = get_unilist(childxfbm)
         g.modified_initialization.update({"xfbm": childxfbm})
         self.apis_copy.update({
             "name": "xfbm",
             "IN-xfbm": ",".join(childxfbm)
         })
     return self
예제 #10
0
def get_relation_valuelist(re_col_file, dataframe):
    from app import app
    INITIALIZATION_FILE_PATH = app.config["INITIALIZATION_FILE_PATH"]
    re_list = [
    ]  # [[["北京市", "海淀区"], ["北京市", "朝阳区"]], [["体育": "足球"], ["体育", "篮球"]]]
    re_cols = []  # 带有关系的列名,并保持顺序,可能跨表
    for file_name, re_df_cols in re_col_file.items():
        re_df = pd.read_csv(os.path.join(INITIALIZATION_FILE_PATH, file_name),
                            sep=app.config.get("INITIALIZATION_FILE_SEP",
                                               "\t"))
        re_cols.extend(re_df_cols)

        # 筛选符合条件的数据
        filter_cols = [i for i in dataframe if i in re_df.columns]
        for col in filter_cols:
            re_df[re_df_cols] = re_df[re_df_cols][re_df[col] == dataframe[col]]
        res = re_df[re_df_cols].dropna()

        # 将这个df中所需要的列的取值,去重后放到全局取值中
        tmp_re_list = [list(row) for _, row in res.iterrows()]
        from utils.get_unilist import get_unilist
        re_list.append(get_unilist(tmp_re_list))

    return re_list, re_cols
예제 #11
0
def convert2nsv(dataframe, groupid=0):
    df = dataframe["df"]
    name = dataframe.get("name")
    value = dataframe.get("value")
    stack = dataframe.get("stack")
    main_name = dataframe.get("main_name")
    if isinstance(df, np.int64) or isinstance(df, np.float) or isinstance(
            df, int) or isinstance(df, float):
        df = pd.DataFrame({value: [df]})
    data = df.to_dict(orient='records')

    # 处理name和value【可处理一个数和name value对的情况】
    if name:  # 处理name问题
        [i.update({"name": i.pop(name)}) for i in data]
    if value:  # 处理value问题
        [i.update({"value": i.pop(value)}) for i in data]

    # 处理main_name问题
    if main_name and not stack and not name:  # 有main_name但是没有stack,说明只有name
        data[0]["name"] = main_name

    if stack:  # 处理堆叠问题
        data_stack = []
        [i.update({stack: i.get(stack, i.get("name"))}) for i in data]

        stack_list = [i.get(stack) for i in data]  # 内层嵌套的内容
        from utils.get_unilist import get_unilist
        stack_uni = get_unilist(stack_list)  # 内层嵌套的内容去重
        main_list = [i.get("name") for i in data]  # 外层包裹的内容
        main_uni = get_unilist(main_list)  # 外层包裹的内容去重
        if name == stack:
            mapping = {f"value_g{groupid}_{1}": main_name}
        else:
            mapping = {
                f"value_g{groupid}_{stack_index + 1}": stack_uni[stack_index]
                for stack_index in range(len(stack_uni))
            }
        for main_one in main_uni:
            dic_one = {"name": main_one}
            for value_index, stack_one in enumerate(stack_uni):
                if name == stack:  # 是为了处理原来只是name&value,不堆叠,但是制定了extra_index,最终需要堆叠的情况,所以即使自己就是一维的键值对也需要改成嵌套形式
                    value_index = 0
                    stack_value = [
                        tar_one.get("value") for tar_one in data
                        if  # 核心:找到name是这个,stack是这个的value值
                        tar_one.get("name") == main_one
                        and tar_one.get(name) == stack_one
                    ]
                else:  # 正常情况
                    stack_value = [
                        tar_one.get("value") for tar_one in data
                        if  # 核心:找到name是这个,stack是这个的value值
                        tar_one.get("name") == main_one
                        and tar_one.get(stack) == stack_one
                    ]
                if stack_value:
                    dic_one[
                        f"value_g{groupid}_{value_index + 1}"] = stack_value[0]

            data_stack.append(dic_one)
        return {
            "code": 200,
            "msg": "success",
            "map": mapping,
            "data": data_stack
        }
    return {"code": 200, "msg": "success", "data": data}
예제 #12
0
 def _get_none_df(apis_copy):
     columns = [apis_copy.get("name", ""), apis_copy.get("stack", ""), apis_copy.get("value", "")]
     from utils.get_unilist import get_unilist
     columns = get_unilist(columns)
     nonedf = [[pd.DataFrame([[None] * len(columns)], columns=columns)]]
     return nonedf