Exemplo n.º 1
0
    def stream_manager_010003(cls, d_xoi=None):
        sql_maxver = "SELECT im.matched_id, xfi.fund_issue_org_amac, manage_type_amac, type_name_amac, fi.fund_name, fi.foundation_date, fi.end_date, fi.fund_status " \
                     "FROM x_fund_info_private xfi " \
                     "JOIN (SELECT fund_id, MAX(version) latest_ver FROM x_fund_info_private GROUP BY fund_id) tb_latest " \
                     "ON xfi.version = tb_latest.latest_ver AND xfi.fund_id = tb_latest.fund_id " \
                     "JOIN base.id_match im ON im.source_id = xfi.fund_id AND im.id_type = 1 AND im.source = '010003' AND im.is_used = 1 " \
                     "LEFT JOIN base.fund_info fi " \
                     "ON im.matched_id = fi.fund_id AND im.id_type = 1 AND im.source = '010003' " \
                     "WHERE fund_name_amac NOT LIKE '%%信托计划'"

        inp = MysqlInput(ENGINE_C, sql_maxver)

        inp2 = MysqlInput(ENGINE_B, "SELECT org_id, org_name FROM base.org_info")

        def clean_org_type(mng_tp, tp):
            # 私募证券投资基金 = 证券投资基金; 其他私募投资基金 = 其他投资基金
            if mng_tp == "顾问管理":
                if tp in {"私募证券投资基金", "证券投资基金"}:
                    return "基金管理人"

            elif mng_tp == "受托管理":
                if tp in {"私募证券投资基金", "证券投资基金", "创业投资基金", "其他私募投资基金", "其他投资基金", "股权投资基金"}:
                    return "基金管理人"

            elif mng_tp == "自我管理":
                if tp in {"私募证券投资基金", "证券投资基金", "股权投资基金", "创业投资基金", "其他私募投资基金", "其他投资基金"}:
                    return "基金管理人"

        if d_xoi is None:
            d_xoi = cls._d_oi_manager()

        vm = transform.ValueMap(
            OrderedDict([
                ("fund_issue_org_amac", lambda x: x.strip()),
                ("org_id", (lambda x: d_xoi.get(x), "fund_issue_org_amac")),
                (FundOrgMapping.org_type.name,
                 (lambda mng_tp, tp: clean_org_type(mng_tp, tp), "manage_type_amac", "type_name_amac")),
                (FundOrgMapping.is_current.name, (lambda x: cls._clean_iscurrent(x), "fund_status"))
            ])
        )

        jn = transform.Join(inp2, how="left", on="org_id")

        dn = transform.Dropna(subset=["matched_id", "org_id", FundOrgMapping.org_type.name])

        vm2 = transform.ValueMap({
            FundOrgMapping.org_type_code.name: (lambda x: 2 if x == "基金管理人" else None, FundOrgMapping.org_type.name),
        })

        sk = transform.MapSelectKeys(cls.FIELDS)

        s = Stream(inp, transform=(vm, jn, dn, vm2, sk))
        return s
Exemplo n.º 2
0
def stream_020002():
    DATA_SOURCE = "020002"
    session = dbsession(bind=ENGINE)
    stmt = session.query(IdMatch).join(
        DOrgPerson,
        and_(IdMatch.source_id == DOrgPerson.person_id,
             IdMatch.data_source == DOrgPerson.data_source,
             IdMatch.id_type == 3, IdMatch.is_used == 1,
             IdMatch.data_source == DATA_SOURCE)).join(
                 PersonInfo,
                 and_(IdMatch.matched_id == PersonInfo.person_id,
                      IdMatch.id_type == 3, IdMatch.is_used == 1,
                      IdMatch.data_source == DATA_SOURCE)).with_entities(
                          IdMatch.matched_id.label(
                              OrgPersonMapping.person_id.name),
                          PersonInfo.person_name, DOrgPerson.org_id,
                          DOrgPerson.tenure_date, DOrgPerson.dimission_date,
                          DOrgPerson.is_current, DOrgPerson.duty)

    inp = MysqlInput(session.bind, stmt)

    fi = stream_oi(DATA_SOURCE)

    jn = transform.Join(fi,
                        left_on=DOrgPerson.org_id.name,
                        right_on=IdMatch.source_id.name)

    sk = transform.MapSelectKeys({
        OrgPersonMapping.person_id.name:
        None,
        PersonInfo.person_name.name:
        OrgPersonMapping.person_name.name,
        IdMatch.matched_id.name:
        OrgPersonMapping.org_id.name,
        DOrgPerson.duty.name:
        OrgPersonMapping.duty.name,
        OrgInfo.org_name.name:
        OrgPersonMapping.org_name.name,
        DOrgPerson.tenure_date.name:
        OrgPersonMapping.tenure_date.name,
        DOrgPerson.dimission_date.name:
        OrgPersonMapping.dimission_date.name,
        DOrgPerson.is_current.name:
        OrgPersonMapping.is_current.name
    })

    s = base.Stream(inp, (jn, sk))
    return s
Exemplo n.º 3
0
    def stream_consultant_010004(cls, d_xoi=None):
        """
            清洗010004源(证券公司)发行产品的投顾公司配对信息;

        Args:
            d_xoi:

        Returns:

        """

        sql_maxver = "SELECT im.matched_id, xfi.fund_issue_org_amac, manage_type_amac, fi.fund_name, fi.foundation_date, fi.end_date, fi.fund_status " \
                     "FROM x_fund_info_securities xfi " \
                     "JOIN (SELECT fund_id, MAX(version) latest_ver FROM x_fund_info_securities GROUP BY fund_id) tb_latest " \
                     "ON xfi.version = tb_latest.latest_ver AND xfi.fund_id = tb_latest.fund_id " \
                     "JOIN base.id_match im ON im.source_id = xfi.fund_id AND im.id_type = 1 AND im.source = '010004' AND im.is_used = 1 " \
                     "LEFT JOIN base.fund_info fi ON im.matched_id = fi.fund_id AND im.id_type = 1 AND im.source = '010004'" \
                     "WHERE fund_name_amac NOT LIKE '%%信托计划'"

        inp = MysqlInput(ENGINE_C, sql_maxver)

        inp2 = MysqlInput(ENGINE_B, "SELECT org_id, org_name FROM base.org_info")

        if d_xoi is None:
            d_xoi = cls._d_oi_consultant()

        vm = transform.ValueMap(
            OrderedDict([
                ("fund_issue_org_amac", lambda x: x.strip()),
                ("org_id", (lambda x: d_xoi.get(x), "fund_issue_org_amac")),
                (FundOrgMapping.org_type.name, (lambda x: {"主动管理": "投资顾问"}.get(x), "manage_type_amac")),
                (FundOrgMapping.is_current.name, (lambda x: cls._clean_iscurrent(x), "fund_status"))
            ])
        )

        jn = transform.Join(inp2, how="left", on="org_id")

        dn = transform.Dropna(subset=["matched_id", "org_id", FundOrgMapping.org_type.name])

        vm2 = transform.ValueMap({
            FundOrgMapping.org_type_code.name: (lambda x: 1 if x == "投资顾问" else None, FundOrgMapping.org_type.name),
        })

        dd = transform.DropDuplicate(subset=["matched_id", "org_id", FundOrgMapping.org_type.name])

        sk = transform.MapSelectKeys(cls.FIELDS)
        s = Stream(inp, transform=[vm, jn, dn, vm2, dd, sk])  # 先去空, 避免空值产生, 导致整形列变成浮点型
        return s
Exemplo n.º 4
0
    def stream_manager_010005(cls, d_xoi=None):
        """
            清洗010005源(期货公司)发行产品的基金管理人配对信息;

        Args:
            d_xoi:

        Returns:

        """

        sql_maxver = "SELECT im.matched_id, xfi.fund_issue_org_amac, fi.fund_name, fi.foundation_date, fi.end_date, fi.fund_status " \
                     "FROM x_fund_info_futures xfi " \
                     "JOIN (SELECT fund_id, MAX(version) latest_ver FROM x_fund_info_futures GROUP BY fund_id) tb_latest " \
                     "ON xfi.version = tb_latest.latest_ver AND xfi.fund_id = tb_latest.fund_id " \
                     "JOIN base.id_match im ON im.source_id = xfi.fund_id AND im.id_type = 1 AND im.source = '010005' AND im.is_used = 1 " \
                     "LEFT JOIN base.fund_info fi " \
                     "ON im.matched_id = fi.fund_id AND im.id_type = 1 AND im.source = '010005' " \
                     "WHERE fund_name_amac NOT LIKE '%%信托计划'"

        inp = MysqlInput(ENGINE_C, sql_maxver)

        inp2 = MysqlInput(ENGINE_B, "SELECT org_id, org_name FROM base.org_info")

        if d_xoi is None:
            d_xoi = cls._d_oi_manager()

        vm = transform.ValueMap(OrderedDict([
            ("fund_issue_org_amac", lambda x: x.strip()),
            ("org_id", (lambda x: d_xoi.get(x), "fund_issue_org_amac")),
            (FundOrgMapping.org_type.name, "基金管理人"),  # 该来源的fund_issue_org_amac都是基金管理人;
            (FundOrgMapping.org_type_code.name, 2),
            (FundOrgMapping.is_current.name, (lambda x: cls._clean_iscurrent(x), "fund_status"))
        ]))

        jn = transform.Join(inp2, how="left", on="org_id")

        sk = transform.MapSelectKeys(cls.FIELDS)
        dn = transform.Dropna(
            subset=[FundOrgMapping.fund_id.name, FundOrgMapping.org_id.name, FundOrgMapping.org_type_code.name])

        s = Stream(inp, transform=(vm, jn, sk, dn,))
        return s
Exemplo n.º 5
0
    def conflu_fund_num(cls):
        # fund_num, fund_total_num

        # 外层以org_info表左连接查询, 以确保所有主键都会被更新到, 以处理源表数据变动的情况;
        sql_operating = "SELECT t1.org_id, t2.fund_num FROM {tb_test} t1 " \
                        "JOIN (SELECT oi.org_id, COUNT(fom.fund_id) as fund_num FROM {tb_test} oi " \
                        "JOIN base.fund_org_mapping fom ON oi.org_id = fom.org_id " \
                        "JOIN base.fund_info fi ON fom.fund_id = fi.fund_id " \
                        "WHERE fom.org_type_code = 1 AND fi.fund_status = '运行中'" \
                        "GROUP BY fom.org_id) t2 " \
                        "ON t1.org_id = t2.org_id ".format(tb_test=TEST_TABLE)

        sql_total = "SELECT t1.org_id, t2.fund_total_num FROM {tb_test} t1 " \
                    "JOIN (SELECT oi.org_id, COUNT(fom.fund_id) as fund_total_num FROM {tb_test} oi " \
                    "JOIN base.fund_org_mapping fom ON oi.org_id = fom.org_id " \
                    "JOIN base.fund_info fi ON fom.fund_id = fi.fund_id " \
                    "WHERE fom.org_type_code = 1 " \
                    "GROUP BY fom.org_id) t2 " \
                    "ON t1.org_id = t2.org_id".format(tb_test=TEST_TABLE)

        inp = MysqlInput(ENGINE_RD, sql_operating)

        inp_total = MysqlInput(ENGINE_RD, sql_total)

        jn = transform.Join(inp_total, how="outer", on="org_id")

        vm = transform.ValueMap({
            "fund_num": lambda x: 0 if np.isnan(x) else x,
            "fund_total_num": lambda x: 0 if np.isnan(x) else x,
        })

        sk = transform.MapSelectKeys({
            "org_id": OrgInfo.org_id.name,
            "fund_num": OrgInfo.fund_num.name,
            "fund_total_num": OrgInfo.fund_total_num.name
        })

        s = Stream(inp, [jn, vm, sk])
        return Confluence(s)
Exemplo n.º 6
0
def stream_020001():
    DATA_SOURCE = "020001"
    session = dbsession(bind=ENGINE)
    stmt = session.query(IdMatch).join(
        DOrgPerson,
        and_(IdMatch.source_id == DOrgPerson.person_id,
             IdMatch.data_source == DOrgPerson.data_source,
             IdMatch.id_type == 3, IdMatch.is_used == 1,
             IdMatch.data_source == DATA_SOURCE)).join(
                 PersonInfo,
                 and_(IdMatch.matched_id == PersonInfo.person_id,
                      IdMatch.id_type == 3, IdMatch.is_used == 1,
                      IdMatch.data_source == DATA_SOURCE)).with_entities(
                          IdMatch.matched_id.label(
                              OrgPersonMapping.person_id.name),
                          PersonInfo.person_name, DOrgPerson.org_id,
                          DOrgPerson.tenure_date,
                          DOrgPerson.dimission_date.name,
                          DOrgPerson.is_current, DOrgPerson.duty)

    inp = MysqlInput(session.bind, stmt)

    oi = stream_oi(DATA_SOURCE)

    def clean_duty_detail(duty_detail):
        duty_detail = duty_detail.strip()
        duty_detail = duty_detail.replace("(", "(")
        duty_detail = duty_detail.replace(")", ")")
        duty_detail = duty_detail.replace("基金经理,", "")
        return duty_detail

    vm1 = transform.ValueMap({
        OrgPersonMapping.duty_detail.name:
        (lambda x: clean_duty_detail(x), DOrgPerson.duty.name)
    })

    vm2 = transform.ValueMap(
        {OrgPersonMapping.duty.name: lambda x: x if x == "基金经理" else "高管"})

    jn = transform.Join(oi,
                        left_on=DOrgPerson.org_id.name,
                        right_on=IdMatch.source_id.name)

    sk = transform.MapSelectKeys({
        OrgPersonMapping.person_id.name:
        None,
        PersonInfo.person_name.name:
        OrgPersonMapping.person_name.name,
        IdMatch.matched_id.name:
        OrgPersonMapping.org_id.name,
        OrgInfo.org_name.name:
        OrgPersonMapping.org_name.name,
        OrgPersonMapping.duty.name:
        None,
        OrgPersonMapping.duty_detail.name:
        None,
        DOrgPerson.tenure_date.name:
        OrgPersonMapping.tenure_date.name,
        DOrgPerson.dimission_date.name:
        OrgPersonMapping.dimission_date.name,
        DOrgPerson.is_current.name:
        OrgPersonMapping.is_current.name
    })

    s = base.Stream(inp, (vm1, vm2, jn, sk))
    return s
Exemplo n.º 7
0
    def stream_consultant_010003(cls, d_xoi=None):
        """
            清洗010003源(私募基金公司)发行产品的投顾公司配对信息;

        Args:
            d_xoi:

        Returns:

        """

        sql_maxver = "SELECT im.matched_id, xfi.fund_issue_org_amac, manage_type_amac, type_name_amac, fi.fund_name, fi.foundation_date, fi.end_date, fi.fund_status " \
                     "FROM x_fund_info_private xfi " \
                     "JOIN (SELECT fund_id, MAX(version) latest_ver FROM x_fund_info_private GROUP BY fund_id) tb_latest " \
                     "ON xfi.version = tb_latest.latest_ver AND xfi.fund_id = tb_latest.fund_id " \
                     "JOIN base.id_match im ON im.source_id = xfi.fund_id AND im.id_type = 1 AND im.source = '010003' AND im.is_used = 1 " \
                     "LEFT JOIN base.fund_info fi ON im.matched_id = fi.fund_id AND im.id_type = 1 AND im.source = '010003'" \
                     "WHERE fund_name_amac NOT LIKE '%%信托计划'"

        inp = MysqlInput(ENGINE_C, sql_maxver)

        inp2 = MysqlInput(ENGINE_B, "SELECT org_id, org_name FROM base.org_info")

        def clean_org_type(manage_type, type_name):
            if manage_type == "顾问管理":
                if type_name in {"信托计划", "期货公司及其子公司的资产管理计划", "其他", "基金子公司", "基金专户",
                                 "证券公司及其子公司的资产管理计划", "银行理财产品", "保险公司及其子公司的资产管理计划",
                                 "私募证券投资基金", "私募投资基金"}:
                    return "投资顾问"

            elif manage_type == "受托管理":
                if type_name in {"创业投资基金", "其他私募投资基金", "其他投资基金", "股权投资基金"}:
                    return "投资顾问"

            elif manage_type == "自我管理":
                if type_name in {"私募证券投资基金", "私募投资基金", "股权投资基金", "创业投资基金", "其他私募投资基金", "其他投资基金"}:
                    return "投资顾问"

        if d_xoi is None:
            d_xoi = cls._d_oi_consultant()

        vm = transform.ValueMap(
            OrderedDict([
                ("fund_issue_org_amac", lambda x: x.strip()),
                ("org_id", (lambda x: d_xoi.get(x), "fund_issue_org_amac")),
                (FundOrgMapping.org_type.name,
                 (lambda mng_tp, tp: clean_org_type(mng_tp, tp), "manage_type_amac", "type_name_amac")),
                (FundOrgMapping.is_current.name, (lambda x: cls._clean_iscurrent(x), "fund_status"))
            ])
        )

        jn = transform.Join(inp2, how="left", on="org_id")

        dn = transform.Dropna(subset=["matched_id", "org_id", FundOrgMapping.org_type.name])

        vm2 = transform.ValueMap({
            FundOrgMapping.org_type_code.name: (lambda x: 1 if x == "投资顾问" else None, FundOrgMapping.org_type.name),
        })

        dd = transform.DropDuplicate(subset=["matched_id", "org_id", FundOrgMapping.org_type.name])

        sk = transform.MapSelectKeys(
            {
                "matched_id": FundOrgMapping.fund_id.name,
                "fund_name": FundOrgMapping.fund_name.name,
                "org_id": FundOrgMapping.org_id.name,
                "org_name": FundOrgMapping.org_name.name,
                FundOrgMapping.org_type_code.name: None,
                FundOrgMapping.org_type.name: None,
            }
        )
        s = Stream(inp, transform=(vm, jn, dn, vm2, dd, sk))
        return s
Exemplo n.º 8
0
    stream = Stream(inp.frame, ())
    return stream


class Confluence:
    def __init__(self, **streams):
        self._streams = streams


s1 = stream_x_fund_info_account()
s1.flow()

s2 = stream_x_fund_info_private()
s2.flow()

s3 = stream_x_fund_info_security()
s3.flow()

s4 = stream_x_fund_info_future()
s4.flow()

im = MysqlInput(engine_b, "\
SELECT matched_id, source_id FROM id_match WHERE id_type = 1 AND source = '010001' AND is_used = 1")
im.frame

j = transform.Join(on="source_id")
j.process(s1.frame, im.frame)
j.process(s2.frame, im.frame)
j.process(s3.frame, im.frame)
j.process(s4.frame, im.frame)