Пример #1
0
def main():
    print("fetching get_data...")
    ftm = pd.read_sql(sql_ftm, engine)
    if len(ftm) == 0:
        print("there is no get_data to be updated...")
        return None
    fi = pd.read_sql(sql_fi, engine)

    ftm_stra = ftm.loc[ftm["typestandard_code"] == 1,
                       ["fund_id", "type_name", "stype_name"]]
    ftm_stra["fund_type_strategy"] = ftm_stra["type_name"] + "_" + ftm_stra[
        "stype_name"]
    ftm_stra = ftm_stra[["fund_id", "fund_type_strategy"]]

    ftm_issu = ftm.loc[ftm["typestandard_code"] == 3,
                       ["fund_id", "type_name", "stype_name"]]
    ftm_issu = ftm_issu[["fund_id", "type_name"]]
    ftm_issu.columns = ["fund_id", "fund_type_issuance"]

    ftm_stru = ftm.loc[ftm["typestandard_code"] == 4,
                       ["fund_id", "type_name", "stype_name"]]
    ftm_stru.columns = [[
        "fund_id", "fund_type_structure", "structure_hierarchy"
    ]]

    tmp = fi.copy()
    tmp = tmp[["fund_id"]]
    result = pd.merge(tmp, ftm_stra, on="fund_id", how="right")
    result = pd.merge(result, ftm_issu, on="fund_id", how="right")
    result = pd.merge(result, ftm_stru, on="fund_id", how="right")

    print("get_data to db...")

    io.to_sql("fund_info", engine, result, "update")
Пример #2
0
def main():
    sql = "SELECT org_id, address, region FROM base.org_info"

    with engine_rd.connect() as conn:
        df_oi = pd.read_sql(sql, conn)

    # 在去空值后, 找出region字段不被包含在address字段的机构, 并且根据address去空
    df = df_oi.loc[list(
        map(lambda x, y: (x not in y), df_oi["region"].fillna("__NONE1"),
            df_oi["address"].fillna("__NONE2")))].dropna(subset=["address"])

    # 对剩余address不为空, 但是region为空或者与address不一致的数据, 从address中提取region;
    df["region_1"] = df["address"].apply(lambda x: re.search(CITIES_re, x))
    df["region_1"] = df["region_1"].apply(lambda x: x.groups()[0]
                                          if x is not None else None)
    df["region_2"] = df["address"].apply(
        lambda x: re.search(PROVINCES_CHN_re, x))
    df["region_2"] = df["region_2"].apply(lambda x: x.groups()[0]
                                          if x is not None else None)
    df["region"] = df["region_1"].fillna(df["region_2"])
    del df["region_1"]
    del df["region_2"]
    del df["address"]
    df = df.dropna(subset=["region"])

    with engine_wt.connect() as conn:
        io.to_sql("org_info", conn, df)
        conn.close()
Пример #3
0
def start_cal(oids, freq):
    table = {
        "w": {
            "dynamic": "org_weekly_index",
            "static": "org_weekly_index_static"
        },
        "m": {
            "dynamic": "org_monthly_index",
            "static": "org_monthly_index_static"
        }
    }
    oi = OrgIndex(freq, engine_rd, engine_rd)
    tasks = list(oi.loop_by_orgs(oids, CALDATE))
    for task in tasks:
        for results in task:
            d, s = results
            if d is not None:
                d.reset_index(inplace=True)
                d = d.reset_index()[[
                    "sample_num", "index_value", "index_id", "index_name",
                    "org_id", "date"
                ]].rename(columns={"date": "statistic_date"})
                io.to_sql(table[freq]["dynamic"], engine_rd, d)

                s.reset_index(inplace=True)
                s = s.reset_index()[[
                    "sample_num", "index_value", "index_id", "index_name",
                    "org_id", "date"
                ]].rename(columns={"date": "statistic_date"})
                io.to_sql(table[freq]["static"], engine_rd, s)
Пример #4
0
 def write(cls):
     df = cls.confluence()
     io.to_sql("base_public.fund_info",
               ENGINE_RD,
               df.dataframe,
               type="update")
     print(df.dataframe)
Пример #5
0
def test():
    c = Streams.conflu()

    c.dataframe
    io.delete("base_test.fund_manager_mapping_test", ENGINE_RD,
              c.dataframe[["fund_id"]].drop_duplicates())
    io.to_sql("base_test.fund_manager_mapping_test", ENGINE_RD, c.dataframe)
Пример #6
0
    def _merge_records(self, table, id_right, ids_wrong, engine):
        pks = get_pk(table, engine)
        if len(pks) == 1:
            sql_right = "SELECT * FROM {tb} WHERE {id_col} = '{id_right}'".format(
                tb=table, id_col=self.ID_COLUMN, id_right=id_right)
            sql_wrong = "SELECT * FROM {tb} WHERE {id_col} IN {ids_wrong}".format(
                tb=table, id_col=self.ID_COLUMN, ids_wrong=ids_wrong)
            df_right, df_wrong = [
                pd.read_sql(sql, engine) for sql in (sql_right, sql_wrong)
            ]

            df_right = df_right.applymap(
                replace_abnormalstr)  # 将空字符, 异常字符视作空值;
            df_res = df_right.append(df_wrong).fillna(method="bfill")[:1]

            sql_del = "DELETE FROM {tb} WHERE {id_col} IN {ids_wrong}".format(
                tb=table, id_col=self.ID_COLUMN, ids_wrong=ids_wrong)

            io.to_sql(table, engine, df_res, type="update")  # 将合并后的结果更新至原表
            engine.execute(sql_del)

        else:
            sql_upt = "UPDATE IGNORE {tb} SET {id_col} = '{id_right}' WHERE {id_col} IN {ids_wrong}".format(
                tb=table,
                id_col=self.ID_COLUMN,
                id_right=id_right,
                ids_wrong=ids_wrong)
            engine.execute(sql_upt)
Пример #7
0
def main():
    sql_ids_used = "SELECT DISTINCT fund_id FROM fund_info \
    WHERE fund_id IN (SELECT fund_id FROM (SELECT fund_id, COUNT(fund_id) cnt FROM fund_nv_data_standard \
    GROUP BY fund_id HAVING cnt >= 3) as T)"

    ids_to_update = [x[0] for x in engine_rd.execute(sql_ids_used).fetchall()]

    fi = get_original(ids_to_update)

    result_m = is_monthly(ids_to_update)
    result_w = is_weekly(ids_to_update)
    result_d = is_daily(ids_to_update)

    result = result_m.copy()
    result.update(result_w)
    result.update(result_d)

    fi["data_freq2"] = [
        result[id_] if id_ in result.keys() else None
        for id_ in fi["fund_id"].tolist()
    ]
    fi["data_freq2"] = fi["data_freq2"].fillna("其他")
    fi["data_freq"] = fi["data_freq2"]

    io.to_sql("fund_info", engine_wt, fi[["fund_id", "data_freq"]], "update")
Пример #8
0
def main():
    bid_whole = get_id()
    step = 500
    for i in range(0, len(bid_whole), step):
        res = crwal(bid_whole[i:i + step])
        print(len(res), res)
        io.to_sql('d_bond_rating', engine_private, res)
Пример #9
0
 def write(cls):
     df = cls.confluence()
     io.to_sql("base_public.org_portfolio_asset",
               ENGINE_RD,
               df.dataframe,
               type="update")
     cls.org_name()
Пример #10
0
def main():
    for conflu in (s01.conflu_ts104(), s01.conflu_ts103(), s01.conflu_ts102()):
        io.to_sql("base.fund_type_source", ENGINE_RD, conflu.dataframe)

    for conflu in (s02.conflu_ts20X(), s02.conflu_ts30X(), s02.conflu_ts40X(),
                   s02.conflu_ts501()):
        io.to_sql("base.fund_type_source", ENGINE_RD, conflu.dataframe)
Пример #11
0
def main():
    pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
    results = pool.map(calculate, range(1, 14))
    pool.close()
    pool.join()
    for result in results:
        io.to_sql("fund_month_index_static", engine_rd, result)
Пример #12
0
 def write(cls):
     df = cls.confluence()
     io.to_sql("base_public.org_position_stock",
               ENGINE_RD,
               df.dataframe,
               type="update")
     cls.org_name()
Пример #13
0
def main(export_path=None):
    pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
    results = pool.map(partial(calculate, export_path=export_path), range(1, 14))
    pool.close()
    pool.join()
    for result in results:
        io.to_sql(table.name, engine_rd, result)
Пример #14
0
def fetch_nv(fids):
    # fids = str(tuple(fids))
    fids = "'" + '\',\''.join(fids) + "'"
    print(fids)
    sql = "select org_id, org_name, org_name_en, org_name_py, org_full_name, \
       org_category, found_date, base_date, org_code, reg_code, \
       reg_time, is_reg_now, is_member, member_type, master_strategy, \
       fund_num, manage_type, \
       asset_mgt_scale, property, reg_capital, real_capital, \
       real_capital_proportion, legal_person, is_qualified, \
       qualifying_way, region, prov, city, area, address, \
       reg_address, org_web, final_report_time, currency, \
       initiation_time, law_firm_name, lawyer_name, phone, profile, \
       legal_person_resume, prize, team, major_shareholder, \
       shareholder_structure, special_tips, investment_idea, email, \
       entry_time, update_time FROM org_info WHERE org_id in ({fids})".format(
        fids=fids)
    # sql = "SELECT * FROM base.org_info " \
    #       "WHERE org_id in ({fids})".format(
    #     fids=fids
    # )
    df = pd.read_sql(sql, engine)
    df.rename(columns={"fund_total_num": "total_fund_num"}, inplace=True)
    print(df)
    io.to_sql('test_gt.org_info', engine_test_gt, df, type="update")
Пример #15
0
    def _migrate_data(self, ids_to_mig):
        """

        Args:
            ids_to_mig: dict<str: list>
                <id_right: [id_wrong1, id_wrong2, ...,]>

        Returns:

        """
        for id_right, ids_wrong in ids_to_mig.items():
            ids_wrong = sf.SQL.values4sql(ids_wrong)

            for table in self.MOD_NOT_KEPP_LIST.get(self._id_type, []):
                # 使用忽略更新, 遇到重复主键无法更新时, 忽略这些id, 并交由DelHelper类处理.
                if table == "config_private.sync_source":
                    sql_upt = "UPDATE IGNORE {tb} SET {id_col} = '{id_right}' WHERE {id_col} IN {ids_wrong}".format(
                        tb=table,
                        id_col="`pk`",
                        id_right=id_right,
                        ids_wrong=ids_wrong)
                    engine_base.execute(sql_upt)

                elif table == "id_match":
                    sql_upt = "UPDATE IGNORE {tb} SET {id_col} = '{id_right}' WHERE {id_col} IN {ids_wrong}".format(
                        tb=table,
                        id_col="`matched_id`",
                        id_right=id_right,
                        ids_wrong=ids_wrong)
                    engine_base.execute(sql_upt)

                else:
                    self._merge_records(table, id_right, ids_wrong,
                                        engine_base)

                print("Mov:", table)

            for table in self.MOD_AND_KEEP_LIST.get(self._id_type, []):
                if table == "config_private.sync_source":
                    col = "pk"

                elif table == "id_match":
                    col = "matched_id"

                else:
                    col = self.ID_COLUMN.replace("`", "")

                sql_upt = "SELECT * FROM {tb} WHERE {id_col} IN {ids_wrong}".format(
                    tb=table,
                    id_col=col,
                    id_right=id_right,
                    ids_wrong=ids_wrong)
                df = pd.read_sql(sql_upt, engine_base)
                df[col] = id_right

                io.to_sql(table, engine_base, df,
                          type="ignore")  # MAY BE WRONG

        return True
Пример #16
0
def main():
    # df = fetch_xls("ftp://115.29.204.48/webdata/spperf.xls")
    # result = df.ix[df["index_code"] == "H11001", ["index_level", "statistic_date"]]
    # result.columns = ["cbi", "statistic_date"]

    #  http://www.csindex.com.cn/zh-CN/indices/index-detail/H11001
    result = fetch_parse_xlx("http://www.csindex.com.cn/uploads/file/autofile/perf/H11001perf.xls")
    io.to_sql("market_index", engine, result, "update")
Пример #17
0
def main():
    c = StreamsMain.confluence()
    io.to_sql("base_public.fund_info",
              ENGINE_RD,
              c.dataframe.drop(["source_id"], axis=1),
              type='update')
    c2 = StreamsMain.confluence_2()
    io.to_sql("base_public.fund_info", ENGINE_RD, c2.dataframe, type='update')
Пример #18
0
def sync(fids):
    sql = "SELECT * FROM {tb} WHERE fund_id IN {fids}".format(tb=TABLE_TO_SYNC, fids=str(tuple(fids)))
    print("fetching...")
    print(sql)
    df = pd.read_sql(sql, ENGINE_R)
    print("writing...")
    io.to_sql(TABLE_TO_SYNC, ENGINE_W, df)
    print("done")
Пример #19
0
def main(export_path=None):
    pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
    results = pool.map(partial(calculate, export_path=export_path), range(1, 13 + 1))
    pool.close()
    pool.join()

    for result in results:
        io.to_sql("fund_weekly_index_static", engine_wt, result)
Пример #20
0
def main():
    s01 = stream_000001()
    s11 = stream_010001()
    s21 = stream_020001()
    s22 = stream_020002()
    s23 = stream_020003()
    c = Confluence(s01, s11, s21, s22, s23, on=[BondInfo.bond_id.name])
    io.to_sql(BondInfo.__tablename__, engine_w, c.dataframe)
Пример #21
0
 def write(cls):
     df = cls.confluence().dataframe
     df_next = df.loc[(df["type"] != "ERROR")
                      & (df["proportion"] >= 0.00001)]
     io.to_sql("base_public.org_portfolio_industry",
               ENGINE_RD,
               df_next,
               type="update")
     cls.org_name()
Пример #22
0
def main():
    data_sources = ["020001", "020002", "020003"]
    try:
        for data_source in data_sources:
            print(data_source)
            print(match(data_source))
            io.to_sql(IdMatch.__tablename__, engine_wt, match(data_source))
    except Exception as e:
        print(data_source, e)
Пример #23
0
def main():
    from utils.database import io
    from utils.algorithm.ranking.ranking import CalculateHelper
    t = dt.date.today()
    t = CalculateHelper.last_rank_date(dt.date.today())
    if (t.month, t.day) in {(3, 31), (6, 30), (9, 30), (12, 31)}:
        print("RANK DATE: ", t)
        f = MutRanking(t)
        res = f.rank_all
        io.to_sql("base_public.fund_rank", cfg.load_engine()["2Gb"], res)
Пример #24
0
def main():
    s11 = stream_010001()
    s21 = stream_020001()
    s22 = stream_020002()

    s21_op = stream_020001_op()

    c = base.Confluence(s11, s21, s22, s21_op, on=[PersonInfo.person_id.name])

    io.to_sql(PersonInfo.__schema_table__, ENGINE, c.dataframe)
Пример #25
0
def main():
    fids = DataSetup.init_funds(DataSetup, engine)

    for fid in fids:
        print(fid)
        df_nv, df_dividend, df_split = DataSetup.init_data(DataSetup, fid, engine)
        calculator = AdjustedNvCalculator(df_nv, df_dividend, df_split)
        res = calculator.cal_by_nv()[["fund_id", "date", "adjusted_nav"]]
        res.columns = ["fund_id", "statistic_date", "swanav"]
        io.to_sql("fund_nv", engine, res)
Пример #26
0
def test1():
    from utils.database import io
    for y in range(2016, 2018):
        for m in (3, 6, 9, 12):
            d = cld.monthrange(y, m)[1]
            date = dt.date(y, m, d)
            print("RANK DATE: ", date)
            f = MutRanking(date)
            a = f.rank_all
            io.to_sql("base_public.fund_rank", cfg.load_engine()["2Gb"], a)
Пример #27
0
def cal(start=None, end=None):
    start = start or dt.date(2015, 4, 1)
    end = end or dt.date.today()
    dates = [x.date() for x in pd.date_range(start, end, freq=const.bday_chn)]
    for ed in dates:
        try:
            prox = resultproxy.Fama3(ed, "d")
            io.to_sql("factor_style_d", engine, prox.result)
            print(prox.result)
        except DataError:
            continue
Пример #28
0
 def to_db(self, dataframe):
     dataframe = dataframe.dropna(how='all').drop_duplicates()
     for i in range(len(dataframe)):
         df = dataframe.iloc[i, :]
         c = pd.DataFrame(df).T
         condition = self.generate_condition(c)
         sql = "SELECT * FROM {tb} WHERE {criterion}".format(
             tb=self.new, criterion=condition)
         df = pd.read_sql(sql, self.engine_new)
         print(df)
         io.to_sql(self.old, self.engine_old, df, type="update")
Пример #29
0
def cal(start=None, end=None):
    start = start or dt.date(2015, 4, 1)
    end = end or dt.date.today()
    dates = [x.date() for x in pd.date_range(start, end, freq="W-FRI")]
    for ed in dates:
        try:
            prox = resultproxy.Carhart4(ed, "w")
            io.to_sql("factor_style_w", engine, prox.result)
            print(prox.result)
        except DataError:
            continue
Пример #30
0
def main():
    results = [
        StreamsMain.conflu(), StreamsDerivative.conflu_place(), StreamsDerivative.conflu_pinyin(),
        StreamsDerivative.conflu_is_reg_now(), StreamsDerivative.conflu_is_member(),
        StreamsDerivative.conflu_fund_num(),
        # StreamsDerivative.conflu_total_asset_mgt_scale_(),
        StreamsDerivative.conflu_master_strategy()
    ]

    for res in results:
        io.to_sql(TEST_TABLE, ENGINE_RD, res.dataframe)