Exemplo n.º 1
0
def add_data_2_ckdvp(json_indicator,
                     json_param,
                     ths_code_set: set = None,
                     begin_time=None):
    """
    将数据增量保存到 ifind_ckdvp_index 表,code key date value param 五个字段组合成的表 value 为 Varchar(80)
    该表用于存放各种新增加字段的值
    查询语句举例:
    THS_DateSerial('600007.SH,600009.SH','ths_pe_ttm_stock','101','Days:Tradedays,Fill:Previous,Interval:D','2018-07-31','2018-07-31')
    :param json_indicator:
    :param json_param:
    :param ths_code_set:
    :param begin_time:
    :return: 全部数据加载完成,返回True,否则False,例如数据加载中途流量不够而中断
    """
    all_finished = False
    table_name = 'ifind_ckdvp_index'
    has_table = engine_md.has_table(table_name)
    if has_table:
        sql_str = """
            select ths_code, date_frm, if(NULL<end_date, NULL, end_date) date_to
            FROM
            (
                select info.ths_code, ifnull(trade_date_max_1, ths_index_base_period_index) date_frm, NULL,
                    if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                from 
                    ifind_index_info info 
                left outer join
                    (select ths_code, adddate(max(time),1) trade_date_max_1 from {table_name} 
                        where {table_name}.key='{0}' and param='{1}' group by ths_code
                    ) daily
                on info.ths_code = daily.ths_code
            ) tt
            where date_frm <= if(NULL<end_date, NULL, end_date) 
            order by ths_code""".format(json_indicator,
                                        json_param,
                                        table_name=table_name)
    else:
        logger.warning('%s 不存在,仅使用 ifind_index_info 表进行计算日期范围', table_name)
        sql_str = """
            SELECT ths_code, date_frm, 
                if(NULL<end_date, NULL, end_date) date_to
            FROM
            (
                SELECT info.ths_code, ths_index_base_period_index date_frm, NULL,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM ifind_index_info info 
            ) tt
            WHERE date_frm <= if(NULL<end_date, NULL, end_date) 
            ORDER BY ths_code"""

    # 计算每只股票需要获取日线数据的日期区间
    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        code_date_range_dic = {
            ths_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for ths_code, date_from, date_to in table.fetchall()
            if ths_code_set is None or ths_code in ths_code_set
        }

    # 设置 dtype
    dtype = {
        'ths_code': String(20),
        'key': String(80),
        'time': Date,
        'value': String(80),
        'param': String(80),
    }
    data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len(
        code_date_range_dic)
    try:
        for num, (ths_code,
                  (begin_time,
                   end_time)) in enumerate(code_date_range_dic.items(),
                                           start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code,
                         begin_time, end_time)
            data_df = invoker.THS_DateSerial(
                ths_code, json_indicator, json_param,
                'Days:Tradedays,Fill:Previous,Interval:D', begin_time,
                end_time)
            if data_df is not None and data_df.shape[0] > 0:
                data_df['key'] = json_indicator
                data_df['param'] = json_param
                data_df.rename(columns={json_indicator: 'value'}, inplace=True)
                data_count += data_df.shape[0]
                data_df_list.append(data_df)

            # 大于阀值有开始插入
            if data_count >= 10000:
                data_df_all = pd.concat(data_df_list)
                # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
                data_count = bunch_insert_on_duplicate_update(
                    data_df_all, table_name, engine_md, dtype)
                tot_data_count += data_count
                data_df_list, data_count = [], 0

            # 仅调试使用
            if DEBUG and len(data_df_list) > 4:
                break

            all_finished = True
    finally:
        if data_count > 0:
            data_df_all = pd.concat(data_df_list)
            # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
            data_count = bunch_insert_on_duplicate_update(
                data_df_all, table_name, engine_md, dtype)
            tot_data_count += data_count

        if not has_table:
            alter_table_2_myisam(engine_md, [table_name])
            create_pk_str = """ALTER TABLE {table_name}
                CHANGE COLUMN `ths_code` `ths_code` VARCHAR(20) NOT NULL ,
                CHANGE COLUMN `time` `time` DATE NOT NULL ,
                CHANGE COLUMN `key` `key` VARCHAR(80) NOT NULL ,
                CHANGE COLUMN `param` `param` VARCHAR(80) NOT NULL ,
                ADD PRIMARY KEY (`ths_code`, `time`, `key`, `param`)""".format(
                table_name=table_name)
            with with_db_session(engine_md) as session:
                session.execute(create_pk_str)

        logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)

    return all_finished
Exemplo n.º 2
0
def import_index_daily_ds(chain_param=None,
                          ths_code_set: set = None,
                          begin_time=None):
    """
    通过date_serise接口将历史数据保存到 ifind_index_daily_ds,该数据作为 History数据的补充数据 例如:复权因子af、涨跌停标识、停牌状态、原因等
    :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用
    :param ths_code_set:
    :param begin_time:
    :return:
    """
    table_name = 'ifind_index_daily_ds'
    has_table = engine_md.has_table(table_name)
    json_indicator, json_param = unzip_join(
        [(key, val) for key, val, _ in INDICATOR_PARAM_LIST_INDEX_DAILY_DS],
        sep=';')
    if has_table:
        sql_str = """SELECT ths_code, date_frm, if(NULL<end_date, NULL, end_date) date_to
            FROM
            (
                SELECT info.ths_code, ifnull(trade_date_max_1, ths_index_base_period_index) date_frm, NULL,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM 
                    ifind_index_info info 
                LEFT OUTER JOIN
                    (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) daily
                ON info.ths_code = daily.ths_code
            ) tt
            WHERE date_frm <= if(NULL<end_date, NULL, end_date) 
            ORDER BY ths_code""".format(table_name=table_name)
    else:
        sql_str = """SELECT ths_code, date_frm, if(NULL<end_date, NULL, end_date) date_to
            FROM
            (
                SELECT info.ths_code, ths_index_base_period_index date_frm, NULL,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM ifind_index_info info 
            ) tt
            WHERE date_frm <= if(NULL<end_date, NULL, end_date) 
            ORDER BY ths_code;"""
        logger.warning('%s 不存在,仅使用 ifind_index_info 表进行计算日期范围' % table_name)
    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)

        # 获取每只股票需要获取日线数据的日期区间
        code_date_range_dic = {
            ths_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for ths_code, date_from, date_to in table.fetchall()
            if ths_code_set is None or ths_code in ths_code_set
        }

    if TRIAL:
        date_from_min = date.today() - timedelta(days=(365 * 5))
        # 试用账号只能获取近5年数据
        code_date_range_dic = {
            ths_code: (max([date_from, date_from_min]), date_to)
            for ths_code, (date_from, date_to) in code_date_range_dic.items()
            if date_to is not None and date_from_min <= date_to
        }

    data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len(
        code_date_range_dic)
    try:
        for num, (ths_code,
                  (begin_time,
                   end_time)) in enumerate(code_date_range_dic.items(),
                                           start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code,
                         begin_time, end_time)
            end_time = date_2_str(end_time)
            data_df = invoker.THS_DateSerial(
                ths_code, json_indicator, json_param,
                'Days:Tradedays,Fill:Previous,Interval:D', begin_time,
                end_time)
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)
            # 大于阀值有开始插入
            if data_count >= 10000:
                data_df_all = pd.concat(data_df_list)
                # data_df_all.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
                data_count = bunch_insert_on_duplicate_update(
                    data_df_all, table_name, engine_md, DTYPE_INDEX_DAILY_DS)
                tot_data_count += data_count
                data_df_list, data_count = [], 0

            # 仅调试使用
            if DEBUG and len(data_df_list) > 1:
                break
    finally:
        if data_count > 0:
            data_df_all = pd.concat(data_df_list)
            data_count = bunch_insert_on_duplicate_update(
                data_df_all, table_name, engine_md, DTYPE_INDEX_DAILY_DS)
            tot_data_count += data_count

        if not has_table and engine_md.has_table(table_name):
            alter_table_2_myisam(engine_md, [table_name])
            build_primary_key([table_name])

        logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)
def import_stock_hk_fin_by_report_date_weekly(chain_param=None,
                                              ths_code_set: set = None,
                                              begin_time=None,
                                              refresh=False):
    """
    通过date_serise接口将历史数据保存到 import_stock_hk_fin
    该数据作为 为周度获取
    以财务报表发布日期为进准,[ 财务报表发布日-14天 ~ 财务报表发布日],周度获取财务数据
    :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用
    :param ths_code_set:
    :param begin_time:
    :param refresh: 全部刷新
    :return:
    """
    table_name = 'ifind_stock_hk_fin'
    info_table_name = 'ifind_stock_hk_info'
    # ths_cce_hks;ths_total_liab_hks;ths_ebit_ttm_hks
    # jsonparam='2013,100,OC;2013,100,OC;OC,101'
    json_indicator, json_param = unzip_join(
        [(key, val) for key, val, _ in INDICATOR_PARAM_LIST_STOCK_HK_FIN],
        sep=';')
    has_table = engine_md.has_table(table_name)
    ths_code_report_date_str = """select distinct ths_code, subdate(report_date, 14), report_date from
        (
        select ths_code, ths_perf_brief_actual_dd_hks report_date from ifind_stock_hk_report_date
        union
        select ths_code, ths_perf_report_actual_dd_hks report_date from ifind_stock_hk_report_date
        ) tt
        where report_date is not null
        order by ths_code, report_date"""

    if has_table:
        sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to
            FROM
            (
                SELECT info.ths_code, ifnull(trade_date_max_1, ths_ipo_date_hks) date_frm, ths_stop_listing_date_hks,
                if(hour(now())<19, subdate(curdate(),1), curdate()) end_date
                FROM 
                    {info_table_name} info 
                LEFT OUTER JOIN
                    (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) daily
                ON info.ths_code = daily.ths_code
            ) tt
            WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) 
            ORDER BY ths_code""".format(table_name=table_name,
                                        info_table_name=info_table_name)
    else:
        sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to
            FROM
            (
                SELECT info.ths_code, ths_ipo_date_hks date_frm, ths_stop_listing_date_hks,
                if(hour(now())<19, subdate(curdate(),1), curdate()) end_date
                FROM {info_table_name} info 
            ) tt
            WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) 
            ORDER BY ths_code""".format(info_table_name=info_table_name)
        logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, info_table_name)
    with with_db_session(engine_md) as session:
        # 获取报告日-10天到报告日日期范围列表
        table = session.execute(ths_code_report_date_str)
        ths_code_report_date_range_list_dic, ths_code_report_date_range_list_dic_tmp = {}, {}
        for ths_code, date_from, date_to in table.fetchall():
            if ths_code_set is None or ths_code in ths_code_set:
                ths_code_report_date_range_list_dic_tmp.setdefault(
                    ths_code, []).append((date_from, date_to))

        # 获取每只股票需要获取日线数据的日期区间
        if not refresh:
            # 如果全部刷新,则忽略 code_date_range_dic 的日期范围的限制
            table = session.execute(sql_str)
            code_date_range_dic = {
                ths_code: (date_from if begin_time is None else min(
                    [date_from, begin_time]), date_to)
                for ths_code, date_from, date_to in table.fetchall()
                if ths_code_set is None or ths_code in ths_code_set
            }

            if TRIAL:
                date_from_min = date.today() - timedelta(days=(365 * 5))
                # 试用账号只能获取近5年数据
                code_date_range_dic = {
                    ths_code: (max([date_from, date_from_min]), date_to)
                    for ths_code, (date_from,
                                   date_to) in code_date_range_dic.items()
                    if date_from_min <= date_to
                }
        else:
            code_date_range_dic = {}

    # 合并重叠的日期
    for ths_code, date_range_list in ths_code_report_date_range_list_dic_tmp.items(
    ):
        if not refresh and ths_code in code_date_range_dic:
            code_date_range = code_date_range_dic[ths_code]
        else:
            code_date_range = None

        # date_range_list 按照 起始日期 顺序排序,下层循环主要作用是将具有重叠日期的日期范围进行合并
        date_range_list_new, date_from_last, date_to_last = [], None, None
        for date_from, date_to in date_range_list:
            if code_date_range is not None:
                # 如果全部刷新,则忽略 code_date_range_dic 的日期范围的限制
                if not refresh and (date_to < code_date_range[0]
                                    or code_date_range[1] < date_from):
                    continue

            if date_from_last is None:
                # 首次循环 设置 date_from_last
                date_from_last = date_from
            elif date_from < date_to_last:
                # 日期重叠,需要合并
                pass
            else:
                # 日期未重叠,保存 range
                date_range_list_new.append((date_from_last, date_to_last))
                date_from_last = date_from

            # 循环底部,设置 date_to_last
            date_to_last = date_to

        # 循环结束,保存 range
        if date_from_last is not None and date_to_last is not None:
            date_range_list_new.append((date_from_last, date_to_last))

        if len(date_range_list_new) > 0:
            ths_code_report_date_range_list_dic[ths_code] = date_range_list_new

    data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len(
        ths_code_report_date_range_list_dic)
    try:
        for num, (ths_code, date_range_list) in enumerate(
                ths_code_report_date_range_list_dic.items(), start=1):
            for begin_time, end_time in date_range_list:
                logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code,
                             begin_time, end_time)
                data_df = invoker.THS_DateSerial(
                    ths_code, json_indicator, json_param,
                    'Days:Tradedays,Fill:Previous,Interval:W', begin_time,
                    end_time)
                if data_df is not None and data_df.shape[0] > 0:
                    data_count += data_df.shape[0]
                    data_df_list.append(data_df)

                # 仅调试使用
                if DEBUG and len(data_df_list) > 0:
                    break

                # 大于阀值有开始插入
                if data_count >= 2000:
                    tot_data_df = pd.concat(data_df_list)
                    # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
                    bunch_insert_on_duplicate_update(tot_data_df, table_name,
                                                     engine_md,
                                                     DTYPE_STOCK_HK_FIN)
                    tot_data_count += data_count
                    data_df_list, data_count = [], 0

    finally:
        if data_count > 0:
            tot_data_df = pd.concat(data_df_list)
            # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
            bunch_insert_on_duplicate_update(tot_data_df, table_name,
                                             engine_md, DTYPE_STOCK_HK_FIN)
            tot_data_count += data_count

        logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)

        if not has_table and engine_md.has_table(table_name):
            alter_table_2_myisam(engine_md, [table_name])
            build_primary_key([table_name])
def import_stock_hk_fin_quarterly(chain_param=None,
                                  ths_code_set: set = None,
                                  begin_time=None):
    """
    通过date_serise接口将历史数据保存到 import_stock_hk_fin
    该数据作为 为季度获取
    :param ths_code_set:
    :param begin_time:
    :return:
    """
    table_name = 'ifind_stock_hk_fin'
    info_table_name = 'ifind_stock_hk_info'
    # ths_cce_hks;ths_total_liab_hks;ths_ebit_ttm_hks
    # jsonparam='2013,100,OC;2013,100,OC;OC,101'
    json_indicator, json_param = unzip_join(
        [(key, val) for key, val, _ in INDICATOR_PARAM_LIST_STOCK_HK_FIN],
        sep=';')
    has_table = engine_md.has_table(table_name)
    if has_table:
        sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to
            FROM
            (
                SELECT info.ths_code, ifnull(trade_date_max_1, ths_ipo_date_hks) date_frm, ths_stop_listing_date_hks,
                if(hour(now())<19, subdate(curdate(),1), curdate()) end_date
                FROM 
                    {info_table_name} info 
                LEFT OUTER JOIN
                    (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) daily
                ON info.ths_code = daily.ths_code
            ) tt
            WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) 
            ORDER BY ths_code""".format(table_name=table_name,
                                        info_table_name=info_table_name)
    else:
        sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to
            FROM
            (
                SELECT info.ths_code, ths_ipo_date_hks date_frm, ths_stop_listing_date_hks,
                if(hour(now())<19, subdate(curdate(),1), curdate()) end_date
                FROM {info_table_name} info 
            ) tt
            WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) 
            ORDER BY ths_code""".format(info_table_name=info_table_name)
        logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, info_table_name)
    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        code_date_range_dic = {
            ths_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for ths_code, date_from, date_to in table.fetchall()
            if ths_code_set is None or ths_code in ths_code_set
        }

    if TRIAL:
        date_from_min = date.today() - timedelta(days=(365 * 5))
        # 试用账号只能获取近5年数据
        code_date_range_dic = {
            ths_code: (max([date_from, date_from_min]), date_to)
            for ths_code, (date_from, date_to) in code_date_range_dic.items()
            if date_from_min <= date_to
        }

    data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len(
        code_date_range_dic)
    try:
        for num, (ths_code,
                  (begin_time,
                   end_time)) in enumerate(code_date_range_dic.items(),
                                           start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code,
                         begin_time, end_time)
            data_df = invoker.THS_DateSerial(
                ths_code, json_indicator, json_param,
                'Days:Tradedays,Fill:Previous,Interval:Q', begin_time,
                end_time)
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)

            # 仅调试使用
            if DEBUG and len(data_df_list) > 0:
                break

            # 大于阀值有开始插入
            if data_count >= 2000:
                tot_data_df = pd.concat(data_df_list)
                # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
                bunch_insert_on_duplicate_update(tot_data_df, table_name,
                                                 engine_md, DTYPE_STOCK_HK_FIN)
                tot_data_count += data_count
                data_df_list, data_count = [], 0

    finally:
        if data_count > 0:
            tot_data_df = pd.concat(data_df_list)
            # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
            bunch_insert_on_duplicate_update(tot_data_df, table_name,
                                             engine_md, DTYPE_STOCK_HK_FIN)
            tot_data_count += data_count

        logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)

        if not has_table and engine_md.has_table(table_name):
            alter_table_2_myisam(engine_md, [table_name])
            build_primary_key([table_name])
def import_stock_hk_report_date(chain_param=None,
                                ths_code_set: set = None,
                                begin_time=None,
                                interval='Q'):
    """
    通过date_serise接口将历史财务数据保存到 ifind_stock_fin,国内财务数据按季度发布,因此获取周期为季度(默认)
    :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用
    :param ths_code_set:
    :param begin_time:
    :param interval: Q 季度 M 月 W 周 D 日
    :return:
    """
    table_name = 'ifind_stock_hk_report_date'
    info_table_name = 'ifind_stock_hk_info'
    has_table = engine_md.has_table(table_name)
    # jsonIndicator='ths_perf_briefing_fore_dsclsr_date_hks;ths_perf_brief_actual_dd_hks;ths_perf_report_foredsclsr_date_hks;ths_perf_report_actual_dd_hks'
    # jsonparam=';'
    json_indicator, json_param = unzip_join(
        [(key, val)
         for key, val, _ in INDICATOR_PARAM_LIST_STOCK_HK_REPORT_DATE],
        sep=';')
    if has_table:
        sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to
            FROM
            (
                SELECT info.ths_code, ifnull(trade_date_max_1, ths_ipo_date_hks) date_frm, ths_stop_listing_date_hks,
                if(hour(now())<19, subdate(curdate(),1), curdate()) end_date
                FROM 
                    {info_table_name} info 
                LEFT OUTER JOIN
                    (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) daily
                ON info.ths_code = daily.ths_code
            ) tt
            WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) 
            ORDER BY ths_code""".format(table_name=table_name,
                                        info_table_name=info_table_name)
    else:
        sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to
            FROM
            (
                SELECT info.ths_code, ths_ipo_date_hks date_frm, ths_stop_listing_date_hks,
                if(hour(now())<19, subdate(curdate(),1), curdate()) end_date
                FROM {info_table_name} info 
            ) tt
            WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) 
            ORDER BY ths_code""".format(info_table_name=info_table_name)
        logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, info_table_name)
    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        code_date_range_dic = {
            ths_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for ths_code, date_from, date_to in table.fetchall()
            if ths_code_set is None or ths_code in ths_code_set
        }

    if TRIAL:
        date_from_min = date.today() - timedelta(days=(365 * 5))
        # 试用账号只能获取近5年数据
        code_date_range_dic = {
            ths_code: (max([date_from, date_from_min]), date_to)
            for ths_code, (date_from, date_to) in code_date_range_dic.items()
            if date_from_min <= date_to
        }

    data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len(
        code_date_range_dic)
    try:
        for num, (ths_code,
                  (begin_time,
                   end_time)) in enumerate(code_date_range_dic.items(),
                                           start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code,
                         begin_time, end_time)
            data_df = invoker.THS_DateSerial(
                ths_code, json_indicator, json_param,
                "Days:Tradedays,Fill:Previous,Interval:{interval}".format(
                    interval=interval), begin_time, end_time)
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)
            # 大于阀值有开始插入
            if data_count >= 10000:
                data_df_all = pd.concat(data_df_list)
                # data_df_all.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
                data_count = bunch_insert_on_duplicate_update(
                    data_df_all, table_name, engine_md,
                    DTYPE_STOCK_HK_REPORT_DATE)
                tot_data_count += data_count
                data_df_list, data_count = [], 0

            # 仅调试使用
            if DEBUG and len(data_df_list) > 1:
                break
    finally:
        if data_count > 0:
            data_df_all = pd.concat(data_df_list)
            # data_df_all.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
            data_count = bunch_insert_on_duplicate_update(
                data_df_all, table_name, engine_md, DTYPE_STOCK_HK_REPORT_DATE)
            tot_data_count += data_count

        if not has_table and engine_md.has_table(table_name):
            alter_table_2_myisam(engine_md, [table_name])
            build_primary_key([table_name])

        logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)
def import_stock_hk_daily_ds(chain_param=None,
                             ths_code_set: set = None,
                             begin_time=None):
    """
    通过date_serise接口将历史数据保存到 ifind_stock_hk_daily_ds,该数据作为 History数据的补充数据 例如:复权因子af、涨跌停标识、停牌状态、原因等
    :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用
    :param ths_code_set:
    :param begin_time:
    :return:
    """
    table_name = 'ifind_stock_hk_daily_ds'
    info_table_name = 'ifind_stock_hk_info'
    # jsonIndicator='ths_pre_close_stock;ths_open_price_stock;ths_high_price_stock;ths_low_stock;ths_close_price_stock;ths_chg_ratio_stock;ths_chg_stock;ths_vol_stock;ths_trans_num_stock;ths_amt_stock;ths_turnover_ratio_stock;ths_vaild_turnover_stock;ths_af_stock;ths_up_and_down_status_stock;ths_trading_status_stock;ths_suspen_reason_stock;ths_last_td_date_stock'
    # jsonparam='100;100;100;100;100;;100;100;;;;;;;;;'
    json_indicator, json_param = unzip_join(
        [(key, val) for key, val, _ in INDICATOR_PARAM_LIST_STOCK_HK_DAILY_DS],
        sep=';')
    has_table = engine_md.has_table(table_name)
    if has_table:
        sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to
            FROM
            (
                SELECT info.ths_code, ifnull(trade_date_max_1, ths_ipo_date_hks) date_frm, ths_stop_listing_date_hks,
                if(hour(now())<19, subdate(curdate(),1), curdate()) end_date
                FROM 
                    {info_table_name} info 
                LEFT OUTER JOIN
                    (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) daily
                ON info.ths_code = daily.ths_code
            ) tt
            WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) 
            ORDER BY ths_code""".format(table_name=table_name,
                                        info_table_name=info_table_name)
    else:
        sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to
            FROM
            (
                SELECT info.ths_code, ths_ipo_date_hks date_frm, ths_stop_listing_date_hks,
                if(hour(now())<19, subdate(curdate(),1), curdate()) end_date
                FROM {info_table_name} info 
            ) tt
            WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) 
            ORDER BY ths_code""".format(info_table_name=info_table_name)
        logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, info_table_name)
    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)

        # 获取每只股票需要获取日线数据的日期区间
        code_date_range_dic = {
            ths_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for ths_code, date_from, date_to in table.fetchall()
            if ths_code_set is None or ths_code in ths_code_set
        }

    if TRIAL:
        date_from_min = date.today() - timedelta(days=(365 * 5))
        # 试用账号只能获取近5年数据
        code_date_range_dic = {
            ths_code: (max([date_from, date_from_min]), date_to)
            for ths_code, (date_from, date_to) in code_date_range_dic.items()
            if date_from_min <= date_to
        }

    data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len(
        code_date_range_dic)
    try:
        for num, (ths_code,
                  (begin_time,
                   end_time)) in enumerate(code_date_range_dic.items(),
                                           start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code,
                         begin_time, end_time)
            data_df = invoker.THS_DateSerial(
                ths_code, json_indicator, json_param,
                'Days:Tradedays,Fill:Previous,Interval:D', begin_time,
                end_time)
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)

            # 仅调试使用
            if DEBUG and len(data_df_list) > 0:
                break

            # 大于阀值有开始插入
            if data_count >= 2000:
                tot_data_df = pd.concat(data_df_list)
                # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
                bunch_insert_on_duplicate_update(tot_data_df, table_name,
                                                 engine_md,
                                                 DTYPE_STOCK_HK_DAILY_DS)
                tot_data_count += data_count
                data_df_list, data_count = [], 0

    finally:
        if data_count > 0:
            tot_data_df = pd.concat(data_df_list)
            # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
            bunch_insert_on_duplicate_update(tot_data_df, table_name,
                                             engine_md,
                                             DTYPE_STOCK_HK_DAILY_DS)
            tot_data_count += data_count

        logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)

        if not has_table and engine_md.has_table(table_name):
            alter_table_2_myisam(engine_md, [table_name])
            build_primary_key([table_name])