Exemplo n.º 1
0
def merge_stock_info():
    """
    合并 wind,ifind 数据到对应名称的表中
    :return:
    """
    table_name = 'stock_info'
    logging.info("更新 %s 开始", table_name)
    has_table = engine_md.has_table(table_name)
    ifind_table_name = 'ifind_{table_name}'.format(table_name=table_name)
    wind_table_name = 'wind_{table_name}'.format(table_name=table_name)
    # ifind_model = TABLE_MODEL_DIC[ifind_table_name]
    # wind_model = TABLE_MODEL_DIC[wind_table_name]
    # with with_db_session(engine_md) as session:
    #     session.query(ifind_model, wind_model).filter(ifind_model.c.ths_code == wind_model.c.wind_code)
    ifind_sql_str = "select * from {table_name}".format(table_name=ifind_table_name)
    wind_sql_str = "select * from {table_name}".format(table_name=wind_table_name)
    ifind_df = pd.read_sql(ifind_sql_str, engine_md)  # , index_col='ths_code'
    wind_df = pd.read_sql(wind_sql_str, engine_md)  # , index_col='wind_code'
    joined_df = pd.merge(ifind_df, wind_df, how='outer',
                         left_on='ths_code', right_on='wind_code', indicator='indicator_column')
    col_merge_dic = {
        'unique_code': (String(20), prefer_left, {'left_key': 'ths_code', 'right_key': 'wind_code'}),
        'sec_name': (String(20), prefer_left, {'left_key': 'ths_stock_short_name_stock', 'right_key': 'sec_name'}),
        'cn_name': (String(100), get_value, {'key': 'ths_corp_cn_name_stock'}),
        'en_name': (String(100), get_value, {'key': 'ths_corp_name_en_stock'}),
        'delist_date': (Date, prefer_left, {'left_key': 'ths_delist_date_stock', 'right_key': 'delist_date'}),
        'ipo_date': (Date, prefer_left, {'left_key': 'ths_ipo_date_stock', 'right_key': 'ipo_date'}),
        'pre_name': (Text, prefer_left, {'left_key': 'ths_corp_name_en_stock', 'right_key': 'prename'}),
        'established_date': (Date, get_value, {'key': 'ths_established_date_stock'}),
        'exch_city': (String(20), get_value, {'key': 'exch_city'}),
        'exch_cn': (String(20), get_value, {'key': 'ths_listing_exchange_stock'}),
        'exch_eng': (String(20), get_value, {'key': 'exch_eng'}),
        'stock_code': (String(20), prefer_left, {'left_key': 'ths_stock_code_stock', 'right_key': 'trade_code'}),
        'mkt': (String(20), get_value, {'key': 'mkt'}),
    }

    col_merge_rule_dic = {
        key: (val[1], val[2]) for key, val in col_merge_dic.items()
    }
    dtype = {
        key: val[0] for key, val in col_merge_dic.items()
    }
    data_df = merge_data(joined_df, col_merge_rule_dic)
    data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype)
    logger.info('%s 新增或更新记录 %d 条', table_name, data_count)
    if not has_table and engine_md.has_table(table_name):
        alter_table_2_myisam(engine_md, [table_name])
        build_primary_key([table_name])

    return data_df
Exemplo n.º 2
0
def merge_tushare_daily(ths_code_set: set = None, date_from=None):
    table_name = 'tushare_daily'
    logging.info("合成 %s 开始", table_name)
    has_table = engine_md.has_table(table_name)
    if date_from is None and has_table:
        sql_str = "select adddate(max(`trade_date`),1) from {table_name}".format(
            table_name=table_name)
        with with_db_session(engine_md) as session:
            date_from = date_2_str(session.execute(sql_str).scalar())
    # 獲取各個表格數據
    # daily
    tushare_his_df = get_tushare_daily_df('tushare_stock_daily', date_from)
    tushare_ds_df = get_tushare_daily_df('tushare_daily_basic', date_from)
    tushare_suspend_df = get_tushare_daily_df('tushare_suspend', None)
    # quarterly

    tushare_merge_df = get_tushare_daily_df('tushare_stock_cashflow', None)

    # tushare_balancesheet_df = get_tushare_daily_df('tushare_stock_balancesheet', None)
    # tushare_icome_df = get_tushare_daily_df('tushare_stock_income', None)
    # tushare_indicator_df = get_tushare_daily_df('tushare_stock_fina_indicator', None)
    # tushare_merge_quarterly_df = merge_tushare_quarterly(None)
    # #
    # tushare_merge_df_one = pd.merge(tushare_cashflow_df, tushare_balancesheet_df,
    #                                 how='outer', on=['ts_code', 'end_date'])
    # tushare_merge_df_two = pd.merge(tushare_merge_df_one, tushare_icome_df,
    #                                 how='outer', on=['ts_code', 'end_date'])
    # tushare_merge_df = pd.merge(tushare_merge_df_two, tushare_indicator_df,
    #                             how='outer', on=['ts_code', 'end_date'])

    tushare_his_dis_df = pd.merge(tushare_his_df,
                                  tushare_ds_df,
                                  how='outer',
                                  on=[
                                      'ts_code',
                                      'trade_date',
                                  ])
    tushare_his_ds_df = pd.merge(tushare_his_dis_df,
                                 tushare_suspend_df,
                                 how='outer',
                                 on='ts_code')

    tushare_merge_df_g = tushare_merge_df.groupby('ts_code')
    tushare_his_ds_df_g = tushare_his_ds_df.groupby('ts_code')
    logging.debug('提取数据完成')
    merge_date_dic_dic = {}
    # # 計算財務紕漏事件
    for suspend_date_g in [tushare_merge_df.groupby(['ts_code', 'ann_date'])]:
        for num, ((ts_code, ann_date), date_df) in enumerate(suspend_date_g):
            if ths_code_set is not None and ts_code not in ths_code_set:
                continue
            if is_nan_or_none(ann_date):
                continue
            suspend_date_dic = merge_date_dic_dic.setdefault(ts_code, {})
            if ts_code not in tushare_merge_df_g.size():
                logger.error('hebing 表中不存在 %s 的財務數據', ts_code)
                continue
            tushare_merge_df_temp = tushare_merge_df_g.get_group(ts_code)
            if ann_date not in merge_date_dic_dic:
                tushare_merge_df_temp = tushare_merge_df_temp[
                    tushare_merge_df_temp['f_ann_date'] <= ann_date]
                if tushare_merge_df_temp.shape[0] > 0:
                    suspend_date_dic[
                        ann_date] = tushare_merge_df_temp.sort_values(
                            'f_ann_date').iloc[0]
    # # 設置dtype
    dtype = {'ann_date': Date}

    for dic in [
            DTYPE_TUSHARE_STOCK_DAILY_MD,
            DTYPE_TUSHARE_STOCK_DAILY_BASIC,
            DTYPE_TUSHARE_SUSPEND,
            # DTYPE_TUSHARE_STOCK_INCOME,
            # DTYPE_TUSHARE_STOCK_BALABCESHEET,
            DTYPE_TUSHARE_CASHFLOW
    ]:
        for key, val in dic.items():  # len(dic)12
            dtype[key] = val
    logging.debug("计算财报日期完成")

    # 整理 data_df 数据
    tot_data_count, data_count, data_df_list, for_count = 0, 0, [], len(
        merge_date_dic_dic)
    try:
        for num, (ts_code, report_date_dic) in enumerate(
                merge_date_dic_dic.items(),
                start=1):  # key:ts_code nan 較多  列明: nan
            # TODO: size暫時使用  以後在驚醒改進
            if ts_code not in tushare_his_ds_df_g.size():
                logger.error('suspend 表中不存在 %s 的財務數據', ts_code)
                continue
            tushare_his_ds_df_cur_ths_code = tushare_his_ds_df_g.get_group(
                ts_code)  # shape[1] 30
            logger.debug('%d/%d) 处理 %s %d 条数据', num, for_count, ts_code,
                         tushare_his_ds_df_cur_ths_code.shape[0])
            report_date_list = list(suspend_date_dic.keys())
            report_date_list.sort()
            for report_date_from, report_date_to in generate_range(
                    report_date_list):
                logger.debug('%d/%d) 处理 %s [%s - %s]', num, for_count, ts_code,
                             date_2_str(report_date_from),
                             date_2_str(report_date_to))
                # 计算有效的日期范围
                if report_date_from is None:
                    is_fit = tushare_his_ds_df_cur_ths_code[
                        'trade_date'] < report_date_to
                elif report_date_to is None:
                    is_fit = tushare_his_ds_df_cur_ths_code[
                        'trade_date'] >= report_date_from
                else:
                    is_fit = (tushare_his_ds_df_cur_ths_code['trade_date'] <
                              report_date_to) & (
                                  tushare_his_ds_df_cur_ths_code['trade_date']
                                  >= report_date_from)
                # 获取日期范围内的数据
                tushare_his_ds_df_segment = tushare_his_ds_df_cur_ths_code[
                    is_fit].copy()
                segment_count = tushare_his_ds_df_segment.shape[0]
                if segment_count == 0:
                    continue
                fin_s = report_date_dic[
                    report_date_from] if report_date_from is not None else None
                #################################################
                for key in (
                        DTYPE_TUSHARE_STOCK_DAILY_MD.keys()
                        and DTYPE_TUSHARE_STOCK_DAILY_BASIC.keys()
                        and DTYPE_TUSHARE_SUSPEND.keys()
                        and DTYPE_TUSHARE_CASHFLOW.keys()
                        # and
                        #  DTYPE_TUSHARE_STOCK_BALABCESHEET.keys() and DTYPE_TUSHARE_STOCK_INCOME.keys()
                ):
                    if key in ('ts_code', 'trade_date'):
                        continue
                    tushare_his_ds_df_segment[key] = fin_s[
                        key] if fin_s is not None and key in fin_s else None
                tushare_his_ds_df_segment['ann_date'] = report_date_from
                # 添加数据到列表
                data_df_list.append(tushare_his_ds_df_segment)
                data_count += segment_count

            if DEBUG and len(data_df_list) > 1:
                break

            # 保存数据库
            if data_count > 10000:
                # 保存到数据库
                data_df = pd.concat(data_df_list)
                data_count = bunch_insert_on_duplicate_update(
                    data_df, table_name, engine_md, dtype)
                tot_data_count += data_count
                data_count, data_df_list = 0, []

    finally:
        # 保存到数据库   report_date
        if len(data_df_list) > 0:
            data_df = pd.concat(data_df_list)
            data_count = bunch_insert_on_duplicate_update(
                data_df, table_name, engine_md, dtype)
            tot_data_count += data_count

        logger.info('%s 新增或更新记录 %d 条', table_name, tot_data_count)
        if not has_table and engine_md.has_table(table_name):
            alter_table_2_myisam(engine_md, [table_name])
            build_primary_key([table_name])
Exemplo n.º 3
0
def merge_ifind_stock_daily(ths_code_set: set = None, date_from=None):
    """将ds his 以及财务数据合并为 daily 数据"""
    table_name = 'ifind_stock_daily'
    logging.info("合成 %s 开始", table_name)
    has_table = engine_md.has_table(table_name)
    if date_from is None and has_table:
        sql_str = "select adddate(max(`time`),1) from {table_name}".format(
            table_name=table_name)
        with with_db_session(engine_md) as session:
            date_from = date_2_str(session.execute(sql_str).scalar())
    # 獲取各個表格數據
    ifind_his_df = get_ifind_daily_df('ifind_stock_daily_his', date_from)
    ifind_ds_df = get_ifind_daily_df('ifind_stock_daily_ds', date_from)
    ifind_report_date_df = get_ifind_report_date_df('ifind_stock_report_date',
                                                    None)
    ifind_fin_df = get_ifind_daily_df('ifind_stock_fin', None)
    ifind_fin_df_g = ifind_fin_df.groupby('ths_code')
    ths_code_set_4_daily = set(ifind_fin_df_g.size().index)
    # 合并 ds his 数据
    ifind_his_ds_df = pd.merge(ifind_his_df,
                               ifind_ds_df,
                               how='outer',
                               on=['ths_code', 'time'])  # 拼接後續有nan,無數據
    ifind_his_ds_df_g = ifind_his_ds_df.groupby('ths_code')
    logger.debug("提取数据完成")
    # 计算 财报披露时间
    report_date_dic_dic = {}
    for report_date_g in [
            ifind_report_date_df.groupby(
                ['ths_code', 'ths_regular_report_actual_dd_stock'])
    ]:
        for num, ((ths_code, report_date), data_df) in enumerate(report_date_g,
                                                                 start=1):
            if ths_code_set is not None and ths_code not in ths_code_set:
                continue
            if is_nan_or_none(report_date):
                continue
            report_date_dic = report_date_dic_dic.setdefault(ths_code, {})
            if ths_code not in ths_code_set_4_daily:
                logger.error('fin 表中不存在 %s 的財務數據', ths_code)
                continue
            ifind_fin_df_temp = ifind_fin_df_g.get_group(ths_code)
            if report_date not in report_date_dic_dic:
                ifind_fin_df_temp = ifind_fin_df_temp[
                    ifind_fin_df_temp['time'] <= report_date]
                if ifind_fin_df_temp.shape[0] > 0:
                    report_date_dic[
                        report_date] = ifind_fin_df_temp.sort_values(
                            'time').iloc[0]

    # # 设置 dtype
    dtype = {'report_date': Date}
    for dic in [
            DTYPE_STOCK_DAILY_DS, DTYPE_STOCK_REPORT_DATE,
            DTYPE_STOCK_DAILY_FIN, DTYPE_STOCK_DAILY_HIS
    ]:
        for key, val in dic.items():
            dtype[key] = val

    logger.debug("计算财报日期完成")
    # 整理 data_df 数据
    tot_data_count, data_count, data_df_list, for_count = 0, 0, [], len(
        report_date_dic_dic)
    try:
        for num, (ths_code,
                  report_date_dic) in enumerate(report_date_dic_dic.items(),
                                                start=1):  # key:ths_code
            # TODO: 檢查判斷 ths_code 是否存在在ifind_fin_df_g 裏面,,size暫時使用  以後在驚醒改進
            if ths_code not in ifind_his_ds_df_g.size():
                logger.error('fin 表中不存在 %s 的財務數據', ths_code)
                continue
            # open low  等 is NAN 2438
            ifind_his_ds_df_cur_ths_code = ifind_his_ds_df_g.get_group(
                ths_code)  # shape[1] 30
            logger.debug('%d/%d) 处理 %s %d 条数据', num, for_count, ths_code,
                         ifind_his_ds_df_cur_ths_code.shape[0])
            report_date_list = list(report_date_dic.keys())
            report_date_list.sort()
            for report_date_from, report_date_to in generate_range(
                    report_date_list):
                logger.debug('%d/%d) 处理 %s [%s - %s]', num, for_count,
                             ths_code, date_2_str(report_date_from),
                             date_2_str(report_date_to))
                # 计算有效的日期范围
                if report_date_from is None:
                    is_fit = ifind_his_ds_df_cur_ths_code[
                        'time'] < report_date_to
                elif report_date_to is None:
                    is_fit = ifind_his_ds_df_cur_ths_code[
                        'time'] >= report_date_from
                else:
                    is_fit = (ifind_his_ds_df_cur_ths_code['time'] <
                              report_date_to) & (
                                  ifind_his_ds_df_cur_ths_code['time'] >=
                                  report_date_from)
                # 获取日期范围内的数据
                ifind_his_ds_df_segment = ifind_his_ds_df_cur_ths_code[
                    is_fit].copy()
                segment_count = ifind_his_ds_df_segment.shape[0]
                if segment_count == 0:
                    continue
                fin_s = report_date_dic[
                    report_date_from] if report_date_from is not None else None
                for key in DTYPE_STOCK_DAILY_FIN.keys():
                    if key in ('ths_code', 'time'):
                        continue
                    ifind_his_ds_df_segment[key] = fin_s[
                        key] if fin_s is not None and key in fin_s else None
                ifind_his_ds_df_segment['report_date'] = report_date_from
                # 添加数据到列表
                data_df_list.append(ifind_his_ds_df_segment)
                data_count += segment_count

            if DEBUG and len(data_df_list) > 1:
                break

            # 保存数据库
            if data_count > 10000:
                # 保存到数据库
                data_df = pd.concat(data_df_list)
                data_count = bunch_insert_on_duplicate_update(
                    data_df, table_name, engine_md, dtype)
                tot_data_count += data_count
                data_count, data_df_list = 0, []

    finally:
        # 保存到数据库
        if len(data_df_list) > 0:
            data_df = pd.concat(data_df_list)
            data_count = bunch_insert_on_duplicate_update(
                data_df, table_name, engine_md, dtype)
            tot_data_count += data_count

        logger.info('%s 新增或更新记录 %d 条', table_name, tot_data_count)
        if not has_table and engine_md.has_table(table_name):
            alter_table_2_myisam(engine_md, [table_name])
            build_primary_key([table_name])
Exemplo n.º 4
0
def merge_stock_daily(date_from=None):
    """
    合并 wind,ifind 数据到对应名称的表中
    :param date_from:
    :return:
    """
    table_name = 'stock_daily'
    logging.info("更新 %s 开始", table_name)
    has_table = engine_md.has_table(table_name)
    if date_from is None and has_table:
        sql_str = "select adddate(max(trade_date),1) from {table_name}".format(
            table_name=table_name)
        with with_db_session(engine_md) as session:
            date_from = date_2_str(session.execute(sql_str).scalar())
    ifind_table_ds_name = 'ifind_{table_name}_ds'.format(table_name=table_name)
    ifind_table_his_name = 'ifind_{table_name}_his'.format(
        table_name=table_name)
    wind_table_name = 'wind_{table_name}'.format(table_name=table_name)
    if date_from is None:
        ifind_his_sql_str = "select * from {table_name}".format(
            table_name=ifind_table_ds_name)
        ifind_ds_sql_str = "select * from {table_name}".format(
            table_name=ifind_table_his_name)
        wind_sql_str = "select * from {table_name}".format(
            table_name=wind_table_name)
        ifind_his_df = pd.read_sql(ifind_his_sql_str,
                                   engine_md)  # , index_col='ths_code'
        ifind_ds_df = pd.read_sql(ifind_ds_sql_str,
                                  engine_md)  # , index_col='ths_code'
        wind_df = pd.read_sql(wind_sql_str,
                              engine_md)  # , index_col='wind_code'
    else:
        ifind_his_sql_str = "select * from {table_name} where time >= %s".format(
            table_name=ifind_table_ds_name)
        ifind_ds_sql_str = "select * from {table_name} where time >= %s".format(
            table_name=ifind_table_his_name)
        wind_sql_str = "select * from {table_name} where trade_date >= %s".format(
            table_name=wind_table_name)
        ifind_his_df = pd.read_sql(ifind_his_sql_str,
                                   engine_md,
                                   params=[date_from
                                           ])  # , index_col='ths_code'
        ifind_ds_df = pd.read_sql(ifind_ds_sql_str,
                                  engine_md,
                                  params=[date_from])  # , index_col='ths_code'
        wind_df = pd.read_sql(wind_sql_str, engine_md,
                              params=[date_from])  # , index_col='wind_code'

    ifind_df = pd.merge(ifind_his_df,
                        ifind_ds_df,
                        how='outer',
                        on=['ths_code', 'time'])
    joined_df = pd.merge(ifind_df,
                         wind_df,
                         how='outer',
                         left_on=['ths_code', 'time'],
                         right_on=['wind_code', 'trade_date'],
                         indicator='indicator_column')
    col_merge_dic = {
        'unique_code': (String(20), prefer_left, {
            'left_key': 'ths_code',
            'right_key': 'wind_code'
        }),
        'trade_date': (Date, prefer_left, {
            'left_key': 'time',
            'right_key': 'trade_date'
        }),
        'open': (DOUBLE, mean_value, {
            'left_key': 'open_x',
            'right_key': 'open_y',
            'warning_accuracy': 0.01,
            'primary_keys': ('ths_code', 'time')
        }),
        'high': (DOUBLE, mean_value, {
            'left_key': 'high_x',
            'right_key': 'high_y',
            'warning_accuracy': 0.01,
            'primary_keys': ('ths_code', 'time')
        }),
        'low': (DOUBLE, mean_value, {
            'left_key': 'low_x',
            'right_key': 'low_y',
            'warning_accuracy': 0.01,
            'primary_keys': ('ths_code', 'time')
        }),
        # TODO: 原因不详,wind接口取到的部分 close 数据不准确
        'close': (DOUBLE, prefer_left, {
            'left_key': 'close_x',
            'right_key': 'close_y',
            'warning_accuracy': 0.01,
            'primary_keys': ('ths_code', 'time')
        }),
        'volume': (DOUBLE, mean_value, {
            'left_key': 'volume_x',
            'right_key': 'volume_y',
            'warning_accuracy': 1,
            'primary_keys': ('ths_code', 'time')
        }),
        'amount': (DOUBLE, mean_value, {
            'left_key': 'amount',
            'right_key': 'amt',
            'warning_accuracy': 1,
            'primary_keys': ('ths_code', 'time')
        }),
        # 总股本字段:同花顺的 totalShares 字段以变动日期为准,wind total_shares 以公告日为准
        # 因此出现冲突时应该以 wind 为准
        'total_shares': (DOUBLE, prefer_right, {
            'left_key': 'totalShares',
            'right_key': 'total_shares'
        }),
        # 'susp_days': (Integer, '***', {
        #     'left_key': 'ths_up_and_down_status_stock', 'right_key': 'susp_days', 'other_key': 'trade_status',
        #  'primary_keys': ('ths_code', 'time')}),
        'max_up_or_down': (Integer, max_up_or_down, {
            'ths_key': 'ths_up_and_down_status_stock',
            'wind_key': 'maxupordown',
            'primary_keys': ('ths_code', 'time')
        }),
        'total_capital': (DOUBLE, get_value, {
            'key': 'totalCapital'
        }),
        'float_capital': (DOUBLE, get_value, {
            'key': 'floatCapitalOfAShares'
        }),
        'pct_chg': (DOUBLE, mean_value, {
            'left_key': 'changeRatio',
            'right_key': 'pct_chg',
            'warning_accuracy': 0.01,
            'primary_keys': ('ths_code', 'time')
        }),
        'float_a_shares': (DOUBLE, get_value, {
            'key': 'floatSharesOfAShares'
        }),  # 对应wind float_a_shares
        'free_float_shares': (DOUBLE, get_value, {
            'key': 'free_float_shares'
        }),  # 对应 ths ths_free_float_shares_stock
        # PE_TTM 对应 ths ths_pe_ttm_stock 以财务报告期为基准日,对应 wind pe_ttm 以报告期为准
        # 因此应该如有不同应该以 wind 为准
        'pe_ttm': (DOUBLE, prefer_right, {
            'left_key': 'ths_pe_ttm_stock',
            'right_key': 'pe_ttm',
            'warning_accuracy': 0.01,
            'primary_keys': ('ths_code', 'time')
        }),
        'pe': (DOUBLE, get_value, {
            'key': 'pe'
        }),
        'pb': (DOUBLE, get_value, {
            'key': 'pb'
        }),
        'ps': (DOUBLE, get_value, {
            'key': 'ps'
        }),
        'pcf': (DOUBLE, get_value, {
            'key': 'pcf'
        }),
    }

    col_merge_rule_dic = {
        key: (val[1], val[2])
        for key, val in col_merge_dic.items()
    }
    dtype = {key: val[0] for key, val in col_merge_dic.items()}
    data_df = merge_data(joined_df, col_merge_rule_dic)
    data_count = bunch_insert_on_duplicate_update(data_df, table_name,
                                                  engine_md, dtype)
    logger.info('%s 新增或更新记录 %d 条', table_name, data_count)
    if not has_table and engine_md.has_table(table_name):
        alter_table_2_myisam(engine_md, [table_name])
        build_primary_key([table_name])

    return data_df
Exemplo n.º 5
0
def merge_index_info():
    """
    合并 wind,ifind 数据到对应名称的表中
    :return:
    """
    table_name = 'index_info'
    logging.info("更新 %s 开始", table_name)
    has_table = engine_md.has_table(table_name)
    ifind_table_name = 'ifind_{table_name}'.format(table_name=table_name)
    wind_table_name = 'wind_{table_name}'.format(table_name=table_name)
    # ifind_model = TABLE_MODEL_DIC[ifind_table_name]
    # wind_model = TABLE_MODEL_DIC[wind_table_name]
    # with with_db_session(engine_md) as session:
    #     session.query(ifind_model, wind_model).filter(ifind_model.c.ths_code == wind_model.c.wind_code)
    ifind_sql_str = "select * from {table_name}".format(
        table_name=ifind_table_name)
    wind_sql_str = "select * from {table_name}".format(
        table_name=wind_table_name)
    ifind_df = pd.read_sql(ifind_sql_str, engine_md)  # , index_col='ths_code'
    wind_df = pd.read_sql(wind_sql_str, engine_md)  # , index_col='wind_code'
    joined_df = pd.merge(ifind_df,
                         wind_df,
                         how='outer',
                         left_on='ths_code',
                         right_on='wind_code',
                         indicator='indicator_column')
    col_merge_dic = {
        'unique_code': (String(20), prefer_left, {
            'left_key': 'ths_code',
            'right_key': 'wind_code'
        }),
        'sec_name': (String(20), prefer_left, {
            'left_key': 'ths_index_short_name_index',
            'right_key': 'sec_name'
        }),
        'crm_issuer': (String(20), prefer_left, {
            'left_key': 'ths_publish_org_index',
            'right_key': 'crm_issuer'
        }),
        'base_date': (Date, prefer_left, {
            'left_key': 'ths_index_base_period_index',
            'right_key': 'basedate'
        }),
        'basevalue': (DOUBLE, prefer_left, {
            'left_key': 'ths_index_base_point_index',
            'right_key': 'basevalue'
        }),
        'country': (String(20), get_value, {
            'key': 'country'
        }),
        'launchdate': (Date, get_value, {
            'key': 'launchdate'
        }),
        'index_code': (String(20), get_value, {
            'key': 'ths_index_code_index'
        }),
        'index_category': (String(10), get_value, {
            'key': 'ths_index_category_index'
        }),
    }
    col_merge_rule_dic = {
        key: (val[1], val[2])
        for key, val in col_merge_dic.items()
    }
    dtype = {key: val[0] for key, val in col_merge_dic.items()}
    data_df = merge_data(joined_df, col_merge_rule_dic)
    data_count = bunch_insert_on_duplicate_update(data_df, table_name,
                                                  engine_md, dtype)
    logger.info('%s 新增或更新记录 %d 条', table_name, data_count)
    if not has_table and engine_md.has_table(table_name):
        alter_table_2_myisam(engine_md, [table_name])
        # build_primary_key([table_name])
        create_pk_str = """ALTER TABLE {table_name}
                        CHANGE COLUMN `unique_code` `unique_code` VARCHAR(20) NOT NULL ,
                        ADD PRIMARY KEY (`unique_code`)""".format(
            table_name=table_name)
        with with_db_session(engine_md) as session:
            session.execute(create_pk_str)

    return data_df
Exemplo n.º 6
0
def merge_index_daily(date_from=None):
    """
    合并 wind,ifind 数据到对应名称的表中
    :param date_from:
    :return:
    """
    table_name = 'index_daily'
    logging.info("更新 %s 开始", table_name)
    has_table = engine_md.has_table(table_name)
    if date_from is None and has_table:
        sql_str = "select adddate(max(trade_date),1) from {table_name}".format(
            table_name=table_name)
        with with_db_session(engine_md) as session:
            date_from = date_2_str(session.execute(sql_str).scalar())
    ifind_table_name_his = 'ifind_{table_name}_his'.format(
        table_name=table_name)
    ifind_table_name_ds = 'ifind_{table_name}_ds'.format(table_name=table_name)
    wind_table_name = 'wind_{table_name}'.format(table_name=table_name)
    if date_from is None:
        ifind_his_sql_str = "select * from {table_name}".format(
            table_name=ifind_table_name_his)
        ifind_ds_sql_str = "select * from {table_name}".format(
            table_name=ifind_table_name_ds)
        wind_sql_str = "select * from {table_name}".format(
            table_name=wind_table_name)
        ifind_his_df = pd.read_sql(ifind_his_sql_str,
                                   engine_md)  # , index_col='ths_code'
        ifind_ds_df = pd.read_sql(ifind_ds_sql_str,
                                  engine_md)  # , index_col='ths_code'
        wind_df = pd.read_sql(wind_sql_str,
                              engine_md)  # , index_col='wind_code'
    else:
        ifind_his_sql_str = "select * from {table_name} where time >= %s".format(
            table_name=ifind_table_name_his)
        ifind_ds_sql_str = "select * from {table_name} where time >= %s".format(
            table_name=ifind_table_name_ds)
        wind_sql_str = "select * from {table_name} where trade_date >= %s".format(
            table_name=wind_table_name)
        ifind_his_df = pd.read_sql(ifind_his_sql_str,
                                   engine_md,
                                   params=[date_from
                                           ])  # , index_col='ths_code'
        ifind_ds_df = pd.read_sql(ifind_ds_sql_str,
                                  engine_md,
                                  params=[date_from])  # , index_col='ths_code'
        wind_df = pd.read_sql(wind_sql_str, engine_md,
                              params=[date_from])  # , index_col='wind_code'

    # change (ifind_df, wind_df)  into joined_df

    ifind_df = pd.merge(ifind_his_df,
                        ifind_ds_df,
                        how='outer',
                        on=['ths_code', 'time'])
    joined_df = pd.merge(ifind_df,
                         wind_df,
                         how='outer',
                         left_on=['ths_code', 'time'],
                         right_on=['wind_code', 'trade_date'],
                         indicator='indicator_column')

    # data, columns processing
    col_merge_dic = {
        'unique_code': (String(20), prefer_left, {
            'left_key': 'ths_code',
            'right_key': 'wind_code'
        }),
        'trade_date': (Date, prefer_left, {
            'left_key': 'time',
            'right_key': 'trade_date'
        }),
        'open': (DOUBLE, mean_value, {
            'left_key': 'open_x',
            'right_key': 'open_x',
            'warning_accuracy': 0.01,
            'primary_keys': ('ths_code', 'time')
        }),
        'high': (DOUBLE, mean_value, {
            'left_key': 'high_x',
            'right_key': 'high_x',
            'warning_accuracy': 0.01,
            'primary_keys': ('ths_code', 'time')
        }),
        'low': (DOUBLE, mean_value, {
            'left_key': 'low_x',
            'right_key': 'low_y',
            'warning_accuracy': 0.01,
            'primary_keys': ('ths_code', 'time')
        }),
        'close': (DOUBLE, prefer_left, {
            'left_key': 'close_x',
            'right_key': 'close_y',
            'warning_accuracy': 0.01,
            'primary_keys': ('ths_code', 'time')
        }),
        'volume': (DOUBLE, mean3_value, {
            'left_key': 'volume_x',
            'right_key': 'volume_y',
            'warning_accuracy': 10,
            'primary_keys': ('ths_code', 'time')
        }),
        'amount': (DOUBLE, mean_value, {
            'left_key': 'amount',
            'right_key': 'amt',
            'warning_accuracy': 100,
            'primary_keys': ('ths_code', 'time')
        }),
        'free_turn': (DOUBLE, mean2_value, {
            'left_key': 'turnoverRatio',
            'right_key': 'free_turn',
            'warning_accuracy': 0.01,
            'primary_keys': ('ths_code', 'time')
        }),
        'avgPrice': (DOUBLE, get_value, {
            'key': 'avgPrice'
        }),
        'changeRatio': (DOUBLE, get_value, {
            'key': 'changeRatio'
        }),
        'floatCapitalOfAShares': (DOUBLE, get_value, {
            'key': 'floatCapitalOfAShares'
        }),
        'floatCapitalOfBShares': (DOUBLE, get_value, {
            'key': 'floatCapitalOfBShares'
        }),
        'floatSharesOfAShares': (DOUBLE, get_value, {
            'key': 'floatSharesOfAShares'
        }),
        'floatSharesOfBShares': (DOUBLE, get_value, {
            'key': 'floatSharesOfBShares'
        }),
        'pb': (DOUBLE, get_value, {
            'key': 'pb'
        }),
        'pcf': (DOUBLE, get_value, {
            'key': 'pcf'
        }),
        'pe': (DOUBLE, get_value, {
            'key': 'pe'
        }),
        'pe_ttm': (DOUBLE, get_value, {
            'key': 'preClose'
        }),
        'preClose': (DOUBLE, get_value, {
            'key': 'avgPrice'
        }),
        'ps': (DOUBLE, get_value, {
            'key': 'ps'
        }),
        'totalCapital': (DOUBLE, get_value, {
            'key': 'totalCapital'
        }),
        'totalShares': (DOUBLE, get_value, {
            'key': 'totalShares'
        }),
        'transactionAmount': (DOUBLE, get_value, {
            'key': 'transactionAmount'
        }),
        'current_index': (DOUBLE, get_value, {
            'key': 'ths_current_mv_index'
        }),
        'dividend_rate_index': (DOUBLE, get_value, {
            'key': 'ths_dividend_rate_index'
        }),
        'buy_amt_index': (DOUBLE, get_value, {
            'key': 'ths_financing_buy_amt_int_index'
        }),
        'payment_amt_index': (DOUBLE, get_value, {
            'key': 'ths_financing_payment_amt_int_index'
        }),
        'ths_flaot_mv_ratio_index': (DOUBLE, get_value, {
            'key': 'ths_flaot_mv_ratio_index'
        }),
        'ths_float_ashare_index': (DOUBLE, get_value, {
            'key': 'ths_float_ashare_index'
        }),
        'ths_float_ashare_mv_index': (DOUBLE, get_value, {
            'key': 'ths_float_ashare_mv_index'
        }),
        'ths_float_ashare_to_total_shares_index': (DOUBLE, get_value, {
            'key':
            'ths_float_ashare_to_total_shares_index'
        }),
        'ths_float_bshare_index': (DOUBLE, get_value, {
            'key': 'ths_float_bshare_index'
        }),
        'ths_float_bshare_to_total_shares_index': (DOUBLE, get_value, {
            'key':
            'ths_float_bshare_to_total_shares_index'
        }),
        'ths_float_hshare_to_total_shares_index': (DOUBLE, get_value, {
            'key':
            'ths_float_hshare_to_total_shares_index'
        }),
        'ths_limited_ashare_index': (DOUBLE, get_value, {
            'key': 'ths_limited_ashare_index'
        }),
        'ths_margin_trading_amtb_index': (DOUBLE, get_value, {
            'key':
            'ths_margin_trading_amtb_index'
        }),
        'ths_margin_trading_balance_index': (DOUBLE, get_value, {
            'key':
            'ths_margin_trading_balance_index'
        }),
        'ths_margin_trading_repay_amt_index': (DOUBLE, get_value, {
            'key':
            'ths_margin_trading_repay_amt_index'
        }),
        'ths_margin_trading_sell_amt_index': (DOUBLE, get_value, {
            'key':
            'ths_margin_trading_sell_amt_index'
        }),
        'ths_float_hshare_index': (DOUBLE, get_value, {
            'key': 'ths_float_hshare_index'
        }),
        'ths_limited_bshare_index': (DOUBLE, get_value, {
            'key': 'ths_limited_bshare_index'
        }),
        'ths_market_value_index': (DOUBLE, get_value, {
            'key': 'ths_market_value_index'
        }),
        'ths_mt_payment_vol_int_index': (DOUBLE, get_value, {
            'key': 'ths_mt_payment_vol_int_index'
        }),
        'ths_mt_sell_vol_int_index': (DOUBLE, get_value, {
            'key': 'ths_mt_sell_vol_int_index'
        }),
        'ths_mv_csrc_alg_index': (DOUBLE, get_value, {
            'key': 'ths_mv_csrc_alg_index'
        }),
        'ths_pb_index': (DOUBLE, get_value, {
            'key': 'ths_pb_index'
        }),
        'ths_pcf_index': (DOUBLE, get_value, {
            'key': 'ths_pcf_index'
        }),
        'ths_pe_index': (DOUBLE, get_value, {
            'key': 'ths_pe_index'
        }),
        'ths_ps_index': (DOUBLE, get_value, {
            'key': 'ths_ps_index'
        }),
        'ths_short_selling_amtb_index': (DOUBLE, get_value, {
            'key': 'ths_short_selling_amtb_index'
        }),
        'ths_short_selling_payment_vol_index': (DOUBLE, get_value, {
            'key':
            'ths_short_selling_payment_vol_index'
        }),
        'ths_short_selling_sell_vol_index': (DOUBLE, get_value, {
            'key':
            'ths_short_selling_sell_vol_index'
        }),
        'ths_short_selling_vol_balance_index': (DOUBLE, get_value, {
            'key':
            'ths_short_selling_vol_balance_index'
        }),
        'ths_state_owned_lp_shares_index': (DOUBLE, get_value, {
            'key':
            'ths_state_owned_lp_shares_index'
        }),
        'ths_state_owned_shares_index': (DOUBLE, get_value, {
            'key': 'ths_state_owned_shares_index'
        }),
        'ths_total_domestic_lp_shares_index': (DOUBLE, get_value, {
            'key':
            'ths_total_domestic_lp_shares_index'
        }),
        'ths_total_float_shares_index': (DOUBLE, get_value, {
            'key': 'ths_total_float_shares_index'
        }),
        'ths_total_float_shares_ratio_index': (DOUBLE, get_value, {
            'key':
            'ths_total_float_shares_ratio_index'
        }),
        'ths_total_limited_ashare_ratio_index': (DOUBLE, get_value, {
            'key':
            'ths_total_limited_ashare_ratio_index'
        }),
        'ths_total_shares_index': (DOUBLE, get_value, {
            'key': 'ths_total_shares_index'
        }),
        'ths_unfloat_shares_index': (DOUBLE, get_value, {
            'key': 'ths_unfloat_shares_index'
        }),
        'ths_annual_volatility_index': (DOUBLE, get_value, {
            'key': 'ths_annual_volatility_index'
        }),
    }

    col_merge_rule_dic = {
        key: (val[1], val[2])
        for key, val in col_merge_dic.items()
    }
    dtype = {key: val[0] for key, val in col_merge_dic.items()}
    data_df = merge_data(joined_df, col_merge_rule_dic)
    data_count = bunch_insert_on_duplicate_update(data_df, table_name,
                                                  engine_md, dtype)
    logger.info('%s 新增或更新记录 %d 条', table_name, data_count)
    if not has_table and engine_md.has_table(table_name):
        alter_table_2_myisam(engine_md, [table_name])
        build_primary_key([table_name])

    return data_df
Exemplo n.º 7
0
def merge_future_info():
    """
    合并 wind,ifind 数据到对应名称的表中
    :return:
    """
    table_name = 'future_info'
    logging.info("更新 %s 开始", table_name)
    has_table = engine_md.has_table(table_name)
    ifind_table_name = 'ifind_{table_name}'.format(table_name=table_name)
    wind_table_name = 'wind_{table_name}'.format(table_name=table_name)
    # ifind_model = TABLE_MODEL_DIC[ifind_table_name]
    # wind_model = TABLE_MODEL_DIC[wind_table_name]
    # with with_db_session(engine_md) as session:
    #     session.query(ifind_model, wind_model).filter(ifind_model.c.ths_code == wind_model.c.wind_code)
    ifind_sql_str = "select * from {table_name}".format(
        table_name=ifind_table_name)
    wind_sql_str = "select * from {table_name}".format(
        table_name=wind_table_name)
    ifind_df = pd.read_sql(ifind_sql_str, engine_md)  # , index_col='ths_code'
    wind_df = pd.read_sql(wind_sql_str, engine_md)  # , index_col='wind_code'
    joined_df = pd.merge(ifind_df,
                         wind_df,
                         how='outer',
                         left_on='ths_code',
                         right_on='wind_code',
                         indicator='indicator_column')
    col_merge_dic = {
        'unique_code': (String(20), prefer_left, {
            'left_key': 'ths_code',
            'right_key': 'wind_code'
        }),
        'sec_name': (String(50), prefer_left, {
            'left_key': 'ths_future_short_name_future',
            'right_key': 'sec_name'
        }),
        'dl_month': (String(20), prefer_left, {
            'left_key': 'ths_delivery_month_future',
            'right_key': 'dlmonth'
        }),
        'delivery_date': (Date, prefer_left, {
            'left_key': 'ths_last_delivery_date_future',
            'right_key': 'lastdelivery_date'
        }),
        'prince': (DOUBLE, prefer_left, {
            'left_key': 'ths_mini_chg_price_future',
            'right_key': 'mfprice'
        }),
        'ex_eng': (String(20), get_value, {
            'key': 'ths_exchange_short_name_future'
        }),
        'sc_code': (String(20), prefer_left, {
            'left_key': 'ths_td_variety_future',
            'right_key': 'sccode'
        }),
        'p_unit': (String(20), prefer_left, {
            'left_key': 'ths_pricing_unit_future',
            'right_key': 'punit'
        }),
        'l_prince': (DOUBLE, prefer_left, {
            'left_key': 'ths_listing_benchmark_price_future',
            'right_key': 'lprice'
        }),
        'margin': (DOUBLE, prefer_left, {
            'left_key': 'ths_initial_td_deposit_future',
            'right_key': 'margin'
        }),
        'ratio_lmit': (DOUBLE, get_value, {
            'key': 'ths_chg_ratio_lmit_future'
        }),
        'enshort_name': (String(100), get_value, {
            'key': 'ths_contract_en_short_name_future'
        }),
        'en_name': (String(100), get_value, {
            'key': 'ths_contract_en_name_future'
        }),
        'future_code': (String(20), prefer_left, {
            'left_key': 'ths_future_code_future',
            'right_key': 'trade_code'
        }),
        'last_date': (Date, prefer_left, {
            'left_key': 'ths_last_td_date_future',
            'right_key': 'lasttrade_date'
        }),
        'con_month': (String(60), get_value, {
            'key': 'ths_contract_month_explain_future'
        }),
        'de_date': (String(60), get_value, {
            'key': 'ths_delivery_date_explain_future'
        }),
        'td_date': (String(60), get_value, {
            'key': 'ths_last_td_date_explian_future'
        }),
        'benchmark_': (DOUBLE, get_value, {
            'key': 'ths_listing_benchmark_price_future'
        }),
        'sec_type': (String(20), get_value, {
            'key': 'ths_sec_type_future'
        }),
        'td_time': (String(60), get_value, {
            'key': 'ths_td_time_explain_future'
        }),
        'td_unit': (String(20), get_value, {
            'key': 'ths_td_unit_future'
        }),
        'td_variety': (String(20), get_value, {
            'key': 'ths_td_variety_future'
        }),
        'sec_englishname': (String(50), get_value, {
            'key': 'sec_englishname'
        }),
        'exch_eng': (String(50), get_value, {
            'key': 'exch_eng'
        }),
        'changelt': (DOUBLE, get_value, {
            'key': 'changelt'
        }),
        'contractmultiplier': (String(100), get_value, {
            'key': 'contractmultiplier'
        }),
        'ftmargins': (String(100), get_value, {
            'key': 'ftmargins'
        }),
        'thours': (String(200), get_value, {
            'key': 'thours'
        }),
        'ipo_date': (Date, prefer_left, {
            'left_key': 'ths_start_trade_date_future',
            'right_key': 'ipo_date'
        }),
    }
    col_merge_rule_dic = {
        key: (val[1], val[2])
        for key, val in col_merge_dic.items()
    }
    dtype = {key: val[0] for key, val in col_merge_dic.items()}
    data_df = merge_data(joined_df, col_merge_rule_dic)
    data_count = bunch_insert_on_duplicate_update(data_df, table_name,
                                                  engine_md, dtype)
    logger.info('%s 新增或更新记录 %d 条', table_name, data_count)
    if not has_table and engine_md.has_table(table_name):
        alter_table_2_myisam(engine_md, [table_name])
        # build_primary_key([table_name])
        create_pk_str = """ALTER TABLE {table_name}
                        CHANGE COLUMN `unique_code` `unique_code` VARCHAR(20) NOT NULL ,
                        ADD PRIMARY KEY (`unique_code`)""".format(
            table_name=table_name)
        with with_db_session(engine_md) as session:
            session.execute(create_pk_str)

    return data_df
Exemplo n.º 8
0
def merge_future_daily(date_from=None):
    """
    合并 wind,ifind 数据到对应名称的表中
    :param date_from:
    :return:
    """
    table_name = 'future_daily'
    logging.info("更新 %s 开始", table_name)
    has_table = engine_md.has_table(table_name)
    if date_from is None and has_table:
        sql_str = "select adddate(max(trade_date),1) from {table_name}".format(
            table_name=table_name)
        with with_db_session(engine_md) as session:
            date_from = date_2_str(session.execute(sql_str).scalar())
    ifind_table_name = 'ifind_{table_name}'.format(table_name=table_name)
    wind_table_name = 'wind_{table_name}'.format(table_name=table_name)
    if date_from is None:
        ifind_sql_str = "select * from {table_name}".format(
            table_name=ifind_table_name)
        wind_sql_str = "select * from {table_name}".format(
            table_name=wind_table_name)
        ifind_df = pd.read_sql(ifind_sql_str,
                               engine_md)  # , index_col='ths_code'
        wind_df = pd.read_sql(wind_sql_str,
                              engine_md)  # , index_col='wind_code'
    else:
        ifind_sql_str = "select * from {table_name} where time >= %s".format(
            table_name=ifind_table_name)
        wind_sql_str = "select * from {table_name} where trade_date >= %s".format(
            table_name=wind_table_name)
        ifind_df = pd.read_sql(ifind_sql_str, engine_md,
                               params=[date_from])  # , index_col='ths_code'
        wind_df = pd.read_sql(wind_sql_str, engine_md,
                              params=[date_from])  # , index_col='wind_code'
    # change (ifind_df, wind_df)  into joined_df
    joined_df = pd.merge(ifind_df,
                         wind_df,
                         how='outer',
                         left_on=['ths_code', 'time'],
                         right_on=['wind_code', 'trade_date'],
                         indicator='indicator_column')
    # data, columns processing
    col_merge_dic = {
        'unique_code': (String(20), prefer_left, {
            'left_key': 'ths_code',
            'right_key': 'wind_code'
        }),
        'trade_date': (Date, prefer_left, {
            'left_key': 'time',
            'right_key': 'trade_date'
        }),
        'open': (DOUBLE, mean_value, {
            'left_key': 'open_x',
            'right_key': 'open_y',
            'warning_accuracy': 0.01,
            'primary_keys': ('ths_code', 'time')
        }),
        'high': (DOUBLE, mean_value, {
            'left_key': 'high_x',
            'right_key': 'high_y',
            'warning_accuracy': 0.01,
            'primary_keys': ('ths_code', 'time')
        }),
        'low': (DOUBLE, mean_value, {
            'left_key': 'low_x',
            'right_key': 'low_y',
            'warning_accuracy': 0.01,
            'primary_keys': ('ths_code', 'time')
        }),
        # TODO: 原因不详,wind接口取到的部分 close 数据不准确
        'close': (DOUBLE, prefer_left, {
            'left_key': 'close_x',
            'right_key': 'close_y',
            'warning_accuracy': 0.01,
            'primary_keys': ('ths_code', 'time')
        }),
        'volume': (DOUBLE, mean_value, {
            'left_key': 'volume_x',
            'right_key': 'volume_y',
            'warning_accuracy': 10,
            'primary_keys': ('ths_code', 'time')
        }),
        'amount': (DOUBLE, mean2_value, {
            'left_key': 'amount',
            'right_key': 'amt',
            'warning_accuracy': 1,
            'primary_keys': ('ths_code', 'time')
        }),
        'settle': (DOUBLE, mean_value, {
            'left_key': 'settlement',
            'right_key': 'settle',
            'warning_accuracy': 0.01,
            'primary_keys': ('ths_code', 'time')
        }),
        'position': (DOUBLE, mean_value, {
            'left_key': 'openInterest',
            'right_key': 'position',
            'warning_accuracy': 1,
            'primary_keys': ('ths_code', 'time')
        }),
        'maxupordown': (Integer, get_value, {
            'key': 'maxupordown'
        }),
        'st_stock': (DOUBLE, get_value, {
            'key': 'st_stock'
        }),
        'instrument_id': (String(20), get_value, {
            'key': 'instrument_id'
        }),
        'positionchange': (DOUBLE, get_value, {
            'key': 'positionChange'
        }),
        'preclose': (String(20), get_value, {
            'key': 'preClose'
        }),
        'presettlement': (DOUBLE, get_value, {
            'key': 'preSettlement'
        }),
        'amplitude': (DOUBLE, get_value, {
            'key': 'amplitude'
        }),
        'avgprice': (DOUBLE, get_value, {
            'key': 'avgPrice'
        }),
        'change': (DOUBLE, get_value, {
            'key': 'change'
        }),
        'change_settlement': (DOUBLE, get_value, {
            'key': 'change_settlement'
        }),
        'chg_settlement': (DOUBLE, get_value, {
            'key': 'chg_settlement'
        }),
    }

    col_merge_rule_dic = {
        key: (val[1], val[2])
        for key, val in col_merge_dic.items()
    }
    dtype = {key: val[0] for key, val in col_merge_dic.items()}
    data_df = merge_data(joined_df, col_merge_rule_dic)
    data_count = bunch_insert_on_duplicate_update(data_df, table_name,
                                                  engine_md, dtype)
    logger.info('%s 新增或更新记录 %d 条', table_name, data_count)
    if not has_table and engine_md.has_table(table_name):
        alter_table_2_myisam(engine_md, [table_name])
        build_primary_key([table_name])

    return data_df