Пример #1
0
    def conflu_manager(cls):
        d_xoi = cls._d_oi_manager()
        streams = [
            cls.stream_manager_000001(),
            cls.stream_manager_010002_(d_xoi), cls.stream_manager_010005_(d_xoi), cls.stream_manager_010004_(d_xoi),
            cls.stream_manager_010003_(d_xoi), cls.stream_manager_010002(d_xoi), cls.stream_manager_010005(d_xoi),
            cls.stream_manager_010004(d_xoi), cls.stream_manager_010003(d_xoi)
        ]

        # stream的定义需根据业务要求保持对应的顺序, 优先级越高, 顺序越靠前
        c = Confluence(*streams, )
        dn = transform.DropDuplicate(subset=[FundOrgMapping.fund_id.name])
        s = Stream(c, transform=[dn])
        return Confluence(s)
Пример #2
0
 def confluence(cls):
     streams = [cls.stream_020001_investment_years(), cls.stream_020002_investment_years(),
                cls.stream_020003_graduate_school(), cls.stream_020003_investment_years(),
                cls.stream_020001_resume(), cls.stream_020002_resume(),
                cls.stream_020003_resume(), cls.stream_gf()]
     c = Confluence(*streams, on=["person_id"])
     return c
Пример #3
0
    def conflu_place(cls):
        # region, prov, city, area

        sql = "SELECT org_id, address FROM {tb_test} WHERE address IS NOT NULL".format(tb_test=TEST_TABLE)
        inp = MysqlInput(ENGINE_RD, sql)

        vm0 = transform.ValueMap({
            "__tmp": (lambda x: cls._clean_place(x), "address")
        })

        vm = transform.ValueMap({
            OrgInfo.region.name: (lambda x: x[0], "__tmp"),
            OrgInfo.prov.name: (lambda x: x[1], "__tmp"),
            OrgInfo.city.name: (lambda x: x[2], "__tmp"),
            OrgInfo.area.name: (lambda x: x[3], "__tmp"),
        })

        sk = transform.MapSelectKeys({
            OrgInfo.org_id.name: None,
            OrgInfo.region.name: None,
            OrgInfo.prov.name: None,
            OrgInfo.city.name: None,
            OrgInfo.area.name: None,
        })

        s = Stream(inp, transform=[vm0, vm, sk])

        return Confluence(s)
Пример #4
0
    def conflu_is_reg_now(cls):
        # is_reg_now

        sql = "SELECT xoi.org_id as org_id_, oi.org_id " \
              "FROM crawl_private.x_org_info xoi " \
              "JOIN (SELECT DISTINCT org_id, version FROM crawl_private.x_org_info WHERE version >= {ver} ) tmp " \
              "ON xoi.org_id = tmp.org_id AND xoi.version = tmp.version " \
              "JOIN (SELECT matched_id, source_id FROM base.id_match WHERE id_type = 2 AND source = '010001' AND is_used = 1) im " \
              "ON xoi.org_id = im.source_id " \
              "RIGHT JOIN (SELECT org_id FROM {tb_test} WHERE org_category = '私募基金管理公司') oi " \
              "ON im.matched_id = oi.org_id ".format(tb_test=TEST_TABLE, ver=(dt.datetime.now() - dt.timedelta(8)).strftime("%Y%m%d%H"))

        inp = MysqlInput(ENGINE_RD, sql)

        vm = transform.ValueMap({
            OrgInfo.is_reg_now.name: (lambda x: "否" if x is None else "是", "org_id_")
        })

        sk = transform.MapSelectKeys({
            "org_id": OrgInfo.org_id.name,
            OrgInfo.is_reg_now.name: None
        })

        s = Stream(inp, [vm, sk])
        return Confluence(s)
Пример #5
0
    def conflu_is_member(cls):
        # is_member, member_type
        sql = "SELECT org_id, is_reg_now, is_member, member_type FROM {tb_test} ".format(tb_test=TEST_TABLE)

        inp = MysqlInput(ENGINE_RD, sql)

        def clean_membertype(is_reg, is_member, member_type, org_id):
            if is_reg == "否":
                if org_id[0] == "P":
                    return "注销备案资格"
                else:
                    return "未备案"
            if is_reg == "是":
                if is_member == "否":
                    return "尚未取得会员资格"
            return member_type

        vm = transform.ValueMap({
            OrgInfo.is_member.name: (lambda is_reg, is_member: "否" if is_reg == "否" else is_member, "is_reg_now", "is_member"),
            OrgInfo.member_type.name: (lambda is_reg, is_member, member_type, org_id: clean_membertype(is_reg, is_member, member_type, org_id), "is_reg_now", "is_member", "member_type", "org_id")
        })

        sk = transform.MapSelectKeys({
            "org_id": OrgInfo.org_id.name,
            OrgInfo.is_member.name: None,
            OrgInfo.member_type.name: None,
        })

        s = Stream(inp, [vm, sk])
        return Confluence(s)
Пример #6
0
    def conflu1(cls, fund_ids=None):
        s00 = cls.stream_y_fund_info(fund_ids)
        s12 = cls.stream_x_fund_info_010002(fund_ids)
        s13 = cls.stream_x_fund_info_010003(fund_ids)
        s14 = cls.stream_x_fund_info_010004(fund_ids)
        s15 = cls.stream_x_fund_info_010005(fund_ids)
        s21 = cls.stream_d_fund_info_020001(fund_ids)
        s22 = cls.stream_d_fund_info_020002(fund_ids)
        s23 = cls.stream_d_fund_info_020003(fund_ids)
        s25 = cls.stream_d_fund_info_020005(fund_ids)
        # s28 = cls.stream_d_fund_info_020008(fund_ids)
        streams = [s00, s12, s13, s14, s15, s21, s22, s23, s25]

        p = {
            0: {
                "fund_name": ("source_id", "000001"),
                "fund_full_name": ("source_id", "010002"),
                "reg_time": ("source_id", "010003"),
                "reg_code": ("source_id", "010002"),
            },
            1: {
                "reg_code": ("source_id", "010003"),
                "fund_full_name": ("source_id", "010003"),
            },
            2: {
                "fund_full_name": ("source_id", "010004"),
            },
            3: {
                "fund_full_name": ("source_id", "010005"),
            }
        }
        conflu = Confluence(*streams, on=["fund_id"], prio_l1=p)
        return conflu
Пример #7
0
    def conflu_master_strategy(cls):
        # master_strategy

        # 外层以org_info表左连接查询, 以确保所有主键都会被更新到, 以处理源表数据变动的情况;
        sql = "SELECT t1.org_id, t2.stype_name FROM {tb_test} t1 " \
              "LEFT JOIN (SELECT oi.org_id, ftm.stype_name " \
              "FROM {tb_test} oi " \
              "JOIN base.fund_org_mapping fom ON oi.org_id = fom.org_id " \
              "JOIN (SELECT fund_id, stype_code, stype_name FROM fund_type_mapping " \
              "WHERE typestandard_code = 601 AND stype_code <> 6010901 AND flag = 1) ftm ON fom.fund_id = ftm.fund_id " \
              "WHERE fom.org_type_code = 1 " \
              "GROUP BY oi.org_id, ftm.stype_code " \
              "ORDER BY org_id ASC, COUNT(fom.fund_id) DESC) t2 ON t1.org_id = t2.org_id ".format(tb_test=TEST_TABLE)

        inp = MysqlInput(ENGINE_RD, sql)

        dd = transform.DropDuplicate(subset=["org_id"])

        sk = transform.MapSelectKeys({
            "org_id": OrgInfo.org_id.name,
            "stype_name": OrgInfo.master_strategy.name
        })

        s = Stream(inp, [dd, sk])

        return Confluence(s)
Пример #8
0
 def confluence(cls):
     streams = [cls.fund_security_data0507(), cls.stream_020001()]
     c = Confluence(*streams,
                    on=[
                        'fund_id', 'statistic_date', 'id', 'source_id',
                        'security_category'
                    ])
     return c
Пример #9
0
 def confluence(cls):
     streams = [
         cls.stream_020001(),
         cls.stream_020002(),
         cls.stream_020003()
     ]
     c = Confluence(*streams, on=["org_id", "fund_id"])
     return c
Пример #10
0
def main():
    s01 = stream_000001()
    s11 = stream_010001()
    s21 = stream_020001()
    s22 = stream_020002()
    s23 = stream_020003()
    c = Confluence(s01, s11, s21, s22, s23, on=[BondInfo.bond_id.name])
    io.to_sql(BondInfo.__tablename__, engine_w, c.dataframe)
Пример #11
0
 def confluence(cls):
     streams = [
         cls.stream_010xxx(),
         cls.stream_020001(),
         cls.stream_020002(),
         cls.stream_020003()
     ]
     c = Confluence(*streams, on=["fund_id", "statistic_date"])
     return c
Пример #12
0
 def confluence(cls):
     streams = [
         cls.stream_fundaccount(),
         cls.stream_securities(),
         cls.stream_futures(),
         cls.stream_private()
     ]
     c = Confluence(*streams, on=["fund_id"])
     return c
Пример #13
0
    def confluence(cls):
        prio_1 = {
            0: {"purchase_status": ("source_id", "020002")},
            1: {"purchase_status": ("source_id", "020001")}
        }
        streams = [cls.stream_020001(), cls.stream_020002(), cls.stream_020003()]

        c = Confluence(*streams, on=["fund_id"], prio_l1=prio_1)
        return c
Пример #14
0
    def conflu(cls):
        s01, s11 = cls.stream_000001(), cls.stream_010001()

        p = {
            0: {
                "org_full_name": ("source_id", "010001"),
            },
            1: {
                "org_full_name": ("source_id", "000001"),
            },
        }
        c = Confluence(s01, s11, on=[OrgInfo.org_id.name], prio_l1=p)

        dk = transform.DropKeys(["source_id"])

        s = Stream(c, transform=[dk])

        return Confluence(s)
Пример #15
0
 def confluence(cls):
     streams = [
         cls.y_org_description(),
         cls.stream_020001(),
         cls.stream_020002(),
         cls.stream_020003(),
         cls.stream_010001()
     ]
     c = Confluence(*streams, on=["org_id"])
     return c
Пример #16
0
 def confluence(cls):
     streams = [
         cls.stream_y_person_info(),
         cls.stream_010001(),
         cls.stream_020001(),
         cls.stream_020002(),
         cls.stream_020001_op(),
         cls.stream_020003()
     ]
     c = Confluence(*streams, on=["person_id"])
     return c
Пример #17
0
 def confluence(cls):
     streams = [
         cls.stream_030001(),
         cls.stream_020001(),
         cls.stream_020002(),
         cls.stream_resume_020001(),
         cls.stream_resume_020002(),
         cls.stream_resume_020003()
     ]
     c = Confluence(*streams, on=["person_id"])
     return c
Пример #18
0
    def confluence(cls):
        p = {
            0: {
                "locked_time_limit": ("source_id", "020001"),
                "min_purchase_amount": ("source_id", "020001"),
                "min_append_amount": ("source_id", "020001"),
                "min_append_amount_remark": ("source_id", "020001"),
                "fee_subscription": ("source_id", "020001"),
                "fee_redeem": ("source_id", "020001"),
            },
            1: {
                "locked_time_limit": ("source_id", "020002"),
                "min_purchase_amount": ("source_id", "020002"),
                "min_append_amount": ("source_id", "020008"),
                "min_append_amount_remark": ("source_id", "020002"),
                "fee_subscription": ("source_id", "020002"),
                "fee_redeem": ("source_id", "020002"),
            },
            2: {
                "locked_time_limit": ("source_id", "020005"),
                "min_purchase_amount": ("source_id", "020008"),
                "min_append_amount": ("source_id", "020003"),
                "min_append_amount_remark": ("source_id", "020008"),
                "fee_subscription": ("source_id", "020008"),
                "fee_redeem": ("source_id", "020008"),
            },
            3: {
                "min_purchase_amount": ("source_id", "020003"),
                "locked_time_limit": ("source_id", "020002"),
                "min_append_amount": ("source_id", "020004"),
                "min_append_amount_remark": ("source_id", "020003"),
                "fee_subscription": ("source_id", "020003"),
                "fee_redeem": ("source_id", "020003"),
            },
            4: {
                "min_purchase_amount": ("source_id", "020004"),
                "min_append_amount_remark": ("source_id", "020004"),
                "fee_subscription": ("source_id", "020004"),
                "fee_redeem": ("source_id", "020004"),
            }
        }

        streams = [
            cls.stream_020001(),
            cls.stream_020002(),
            cls.stream_020008(),
            cls.stream_020003(),
            cls.stream_020005(),
            cls.stream_020004()
        ]

        c = Confluence(*streams, on=["fund_id"], prio_l1=p)
        return c
Пример #19
0
    def conflu_1(cls):
        """
            合并010001, 020001, 020002源数据流

        Returns:
            base.Confluence

        """

        s11 = cls.stream_010001()
        s21 = cls.stream_op_020001()
        s22 = cls.stream_op_020002()
        sy = cls.stream_y()
        c = Confluence(s11, s21, s22, sy, on=["person_id", "org_id", "duty"])

        return c
Пример #20
0
    def conflu_2(cls):
        """
            合并数据源010101数据流和合流1

        Returns:
            base.Confluence

        """

        c1 = cls.conflu_1()
        s11 = cls.stream_010101()

        c = Confluence(s11, c1, on=["org_id", "person_id"])

        df = c.dataframe.dropna(subset=['duty'])
        return df
Пример #21
0
    def conflu_fund_num(cls):
        # fund_num, fund_total_num

        # 外层以org_info表左连接查询, 以确保所有主键都会被更新到, 以处理源表数据变动的情况;
        sql_operating = "SELECT t1.org_id, t2.fund_num FROM {tb_test} t1 " \
                        "JOIN (SELECT oi.org_id, COUNT(fom.fund_id) as fund_num FROM {tb_test} oi " \
                        "JOIN base.fund_org_mapping fom ON oi.org_id = fom.org_id " \
                        "JOIN base.fund_info fi ON fom.fund_id = fi.fund_id " \
                        "WHERE fom.org_type_code = 1 AND fi.fund_status = '运行中'" \
                        "GROUP BY fom.org_id) t2 " \
                        "ON t1.org_id = t2.org_id ".format(tb_test=TEST_TABLE)

        sql_total = "SELECT t1.org_id, t2.fund_total_num FROM {tb_test} t1 " \
                    "JOIN (SELECT oi.org_id, COUNT(fom.fund_id) as fund_total_num FROM {tb_test} oi " \
                    "JOIN base.fund_org_mapping fom ON oi.org_id = fom.org_id " \
                    "JOIN base.fund_info fi ON fom.fund_id = fi.fund_id " \
                    "WHERE fom.org_type_code = 1 " \
                    "GROUP BY fom.org_id) t2 " \
                    "ON t1.org_id = t2.org_id".format(tb_test=TEST_TABLE)

        inp = MysqlInput(ENGINE_RD, sql_operating)

        inp_total = MysqlInput(ENGINE_RD, sql_total)

        jn = transform.Join(inp_total, how="outer", on="org_id")

        vm = transform.ValueMap({
            "fund_num": lambda x: 0 if np.isnan(x) else x,
            "fund_total_num": lambda x: 0 if np.isnan(x) else x,
        })

        sk = transform.MapSelectKeys({
            "org_id": OrgInfo.org_id.name,
            "fund_num": OrgInfo.fund_num.name,
            "fund_total_num": OrgInfo.fund_total_num.name
        })

        s = Stream(inp, [jn, vm, sk])
        return Confluence(s)
Пример #22
0
def main():
    s11 = stream_010001()
    s21 = stream_020001()
    s22 = stream_020002()
    s23 = stream_020003()

    # 122577.SH, par_value
    key_priority = {
        0: {
            BondInfo.issue_price.name: (DBondInfo.source_id.name, "020002"),
            BondInfo.issue_amount.name: (DBondInfo.source_id.name, "010001"),
            BondInfo.coupon_rate.name: (DBondInfo.source_id.name, "020001"),
            BondInfo.maturity_date.name: (DBondInfo.source_id.name, "010001"),
            BondInfo.value_date.name: (DBondInfo.source_id.name, "020003"),
        },
        1: {
            BondInfo.issue_price.name: (DBondInfo.source_id.name, "020001"),
            BondInfo.issue_amount.name: (DBondInfo.source_id.name, "020001"),
            BondInfo.coupon_rate.name: (DBondInfo.source_id.name, "020002"),
            BondInfo.maturity_date.name: (DBondInfo.source_id.name, "020002"),
        },
        2: {
            BondInfo.issue_price.name: (DBondInfo.source_id.name, "010001"),
            BondInfo.issue_amount.name: (DBondInfo.source_id.name, "020002"),
            BondInfo.coupon_rate.name: (DBondInfo.source_id.name, "010001"),
            BondInfo.maturity_date.name: (DBondInfo.source_id.name, "020003"),
        },
        3: {
            BondInfo.maturity_date.name: (DBondInfo.source_id.name, "020001"),
        },
    }
    conflu = Confluence(s11,
                        s21,
                        s22,
                        s23,
                        on=BondInfo.bond_id.name,
                        prio_l1=key_priority)
    io.to_sql(BondInfo.__tablename__, engine_w,
              conflu.dataframe.drop(DBondInfo.source_id.name, axis=1))
Пример #23
0
    def conflu_pinyin(cls):
        # org_name_py

        sql = "SELECT org_id, org_name FROM {tb_test}".format(tb_test=TEST_TABLE)
        inp = MysqlInput(ENGINE_RD, sql)

        vm1 = transform.ValueMap({
            OrgInfo.org_name.name: lambda x: re.sub("(.*)|\(.*\)", "", x)
        })

        vm2 = transform.ValueMap({
            OrgInfo.org_name_py.name: (
                lambda x: "".join([x[0] for x in py(x, style=py_style.FIRST_LETTER)]).upper(), OrgInfo.org_name.name
            )
        })

        sk = transform.MapSelectKeys({
            OrgInfo.org_id.name: None,
            OrgInfo.org_name_py.name: None
        })
        s = Stream(inp, transform=(vm1, vm2, sk))

        return Confluence(s)
Пример #24
0
def conflu1(fund_ids=None):
    s00 = stream_y_fund_info(fund_ids)
    s12 = stream_x_fund_info_010002(fund_ids)
    s13 = stream_x_fund_info_010003(fund_ids)
    s14 = stream_x_fund_info_010004(fund_ids)
    s15 = stream_x_fund_info_010005(fund_ids)
    s21 = stream_d_fund_info_020001(fund_ids)
    s22 = stream_d_fund_info_020002(fund_ids)

    p = {
        0: {
            "fund_name": ("source_id", "000001"),
            "fund_full_name": ("source_id", "010002"),
            "reg_time": ("source_id", "010003"),
            "reg_code": ("source_id", "010002"),
        },
        1: {
            "reg_code": ("source_id", "010003"),
            "fund_full_name": ("source_id", "010003"),
        },
        2: {
            "fund_full_name": ("source_id", "010004"),
        },
        3: {
            "fund_full_name": ("source_id", "010005"),
        }
    }
    conflu = Confluence(s00,
                        s12,
                        s13,
                        s14,
                        s15,
                        s21,
                        s22,
                        on=["fund_id"],
                        prio_l1=p)
    return conflu
Пример #25
0
    def conflu_total_asset_mgt_scale_(cls):
        # asset_mgt_scale

        # 外层以org_info表左连接查询, 以确保所有主键都会被更新到, 以处理源表数据变动的情况;
        sql = "SELECT t1.org_id, t2.total_asset_mgt_scale " \
              "FROM {tb_test} t1 " \
              "LEFT JOIN (SELECT oi.org_id, SUM(fas.asset_scale) total_asset_mgt_scale FROM {tb_test} oi " \
              "JOIN base.fund_org_mapping fom ON oi.org_id = fom.org_id " \
              "JOIN base.fund_asset_scale fas ON fom.fund_id = fas.fund_id " \
              "JOIN (SELECT fund_id, MAX(statistic_date) md FROM base.fund_asset_scale GROUP BY fund_id ) fas_latest " \
              "ON fas.fund_id = fas_latest.fund_id AND fas.statistic_date = fas_latest.md " \
              "WHERE fom.org_type_code = 1 " \
              "GROUP BY org_id) t2 " \
              "ON t1.org_id = t2.org_id" \
              "".format(tb_test=TEST_TABLE)
        inp = MysqlInput(ENGINE_RD, sql)
        sk = transform.MapSelectKeys({
            "org_id": OrgInfo.org_id.name,
            "total_asset_mgt_scale": OrgInfo.total_asset_mgt_scale.name
        })

        s = Stream(inp, [sk])

        return Confluence(s)
Пример #26
0
 def confluence(cls):
     streams = [cls.stream_020001(), cls.stream_020003()]
     c = Confluence(*streams, on=["fund_id", "data_source", "statistic_date", "subject_id"])
     return c
Пример #27
0
 def confluence(cls):
     streams = [cls.stream_000001()]
     c = Confluence(*streams, on=["fund_id"])
     return c
Пример #28
0
 def confluence_2(cls):
     streams = [cls.stream_name()]
     c = Confluence(*streams, on=["org_id"])
     return c
Пример #29
0
 def confluence(cls):
     streams = [cls.stream_03xxxx_type1(), cls.stream_03xxxx_type2(), cls.stream_04xxxx_type2()]
     c = Confluence(*streams, on=["fund_id", "org_type_code"])
     return c
Пример #30
0
def main():
    s = stream_020004()
    c = Confluence(s)
    io.to_sql(BondRating.__tablename__, engine_w, c.dataframe, chunksize=500)