Exemplo n.º 1
0
def import_future_daily_his(chain_param=None,
                            ths_code_set: set = None,
                            begin_time=None):
    """
    更新期货合约日级别行情信息
    :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用
    :param ths_code_set:
    :param begin_time:
    :return:
    """
    table_name = 'ifind_future_daily'
    info_table_name = 'ifind_future_info'
    logger.info("更新 %s 开始", table_name)
    has_table = engine_md.has_table(table_name)
    indicator_param_list = [
        ('preClose', String(20)),
        ('open', DOUBLE),
        ('high', DOUBLE),
        ('low', DOUBLE),
        ('close', DOUBLE),
        ('volume', DOUBLE),
        ('amount', DOUBLE),
        ('avgPrice', DOUBLE),
        ('change', DOUBLE),
        ('changeRatio', DOUBLE),
        ('preSettlement', DOUBLE),
        ('settlement', DOUBLE),
        ('change_settlement', DOUBLE),
        ('chg_settlement', DOUBLE),
        ('openInterest', DOUBLE),
        ('positionChange', DOUBLE),
        ('amplitude', DOUBLE),
    ]
    json_indicator = ','.join([key for key, _ in indicator_param_list])
    if has_table:
        # 16 点以后 下载当天收盘数据,16点以前只下载前一天的数据
        # 对于 date_to 距离今年超过1年的数据不再下载:发现有部分历史过于久远的数据已经无法补全,
        # 如:AL0202.SHF AL9902.SHF CU0202.SHF
        # TODO: ths_ksjyr_future 字段需要替换为 ths_contract_listed_date_future 更加合理
        sql_str = """SELECT ths_code, date_frm, 
                if(lasttrade_date<end_date, lasttrade_date, end_date) date_to
            FROM
            (
            SELECT fi.ths_code, ifnull(trade_date_max_1, ths_start_trade_date_future) date_frm, 
                ths_last_td_date_future lasttrade_date,
                    if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM {info_table_name} fi LEFT OUTER JOIN
                (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) wfd
                ON fi.ths_code = wfd.ths_code
            ) tt
            WHERE date_frm <= if(lasttrade_date<end_date, lasttrade_date, end_date) 
            AND subdate(curdate(), 360) < if(lasttrade_date<end_date, lasttrade_date, end_date) 
            ORDER BY ths_code""".format(table_name=table_name,
                                        info_table_name=info_table_name)
    else:
        sql_str = """SELECT ths_code, date_frm, if(lasttrade_date<end_date, lasttrade_date, end_date) date_to
            FROM 
            (
            SELECT fi.ths_code, ths_start_trade_date_future date_frm, 
                ths_last_td_date_future lasttrade_date,
                    if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM {info_table_name} fi
            ) tt""".format(info_table_name=info_table_name)
        logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, info_table_name)

    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 获取每只股票需要获取日线数据的日期区间
        code_date_range_dic = {
            ths_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for ths_code, date_from, date_to in table.fetchall()
            if ths_code_set is None or ths_code in ths_code_set
        }

    if TRIAL:
        date_from_min = date.today() - timedelta(days=(365 * 5))
        # 试用账号只能获取近5年数据
        code_date_range_dic = {
            ths_code: (max([date_from, date_from_min]), date_to)
            for ths_code, (date_from, date_to) in code_date_range_dic.items()
            if date_from_min <= date_to
        }

    # 设置 dtype
    dtype = {key: val for key, val in indicator_param_list}
    dtype['ths_code'] = String(20)
    dtype['time'] = Date

    data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len(
        code_date_range_dic)
    try:
        logger.info("%d future instrument will be handled", code_count)
        for num, (ths_code,
                  (begin_time,
                   end_time)) in enumerate(code_date_range_dic.items(),
                                           start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code,
                         begin_time, end_time)
            data_df = invoker.THS_HistoryQuotes(
                ths_code, json_indicator,
                'Interval:D,CPS:1,baseDate:1900-01-01,Currency:YSHB,fill:Previous',
                begin_time, end_time)
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)
            # 大于阀值有开始插入
            if data_count >= 10000:
                data_df_all = pd.concat(data_df_list)
                # data_df_all.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
                data_count = bunch_insert_on_duplicate_update(
                    data_df_all, table_name, engine_md, dtype)
                tot_data_count += data_count
                data_df_list, data_count = [], 0
                logging.info("%s 新增数据 %d 条", table_name, data_count)

            # 仅调试使用
            if DEBUG and len(data_df_list) > 1:
                break
    finally:
        if data_count > 0:
            data_df_all = pd.concat(data_df_list)
            # data_df_all.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
            data_count = bunch_insert_on_duplicate_update(
                data_df_all, table_name, engine_md, dtype)
            tot_data_count += data_count
            logging.info("%s 新增数据 %d 条", table_name, data_count)

        if not has_table and engine_md.has_table(table_name):
            alter_table_2_myisam(engine_md, [table_name])
            build_primary_key([table_name])

        logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)
Exemplo n.º 2
0
def import_private_fund_daily(chain_param=None, ths_code_set: set = None, begin_time=None):
    """
    导入 daily 数据
    :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用
    :param ths_code_set:
    :param begin_time:
    :return:
    """
    table_name = 'ifind_private_fund_daily'
    indicator_param_list = [
        ('netAssetValue', '', DOUBLE),
        ('adjustedNAV', '', DOUBLE),
        ('accumulatedNAV', '', DOUBLE),
        ('premium', '', DOUBLE),
        ('premiumRatio', '', DOUBLE),
        ('estimatedPosition', '', DOUBLE)
    ]
    # jsonIndicator='netAssetValue,adjustedNAV,accumulatedNAV,premium,premiumRatio,estimatedPosition'
    # jsonparam=';;;;'
    json_indicator, json_param = unzip_join([(key, val) for key, val, _ in indicator_param_list], sep=';')
    has_table = engine_md.has_table(table_name)
    if has_table:
        sql_str = """SELECT ths_code, date_frm, if(ths_maturity_date_sp<end_date, ths_maturity_date_sp, end_date) date_to
            FROM
            (
                SELECT info.ths_code, ifnull(trade_date_max_1, ths_established_date_sp) date_frm, ths_maturity_date_sp,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM 
                    ifind_private_fund_info info 
                LEFT OUTER JOIN
                    (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) daily
                ON info.ths_code = daily.ths_code
            ) tt
            WHERE date_frm <= if(ths_maturity_date_sp<end_date, ths_maturity_date_sp, end_date) 
            ORDER BY ths_code""".format(table_name=table_name)
    else:
        logger.warning('ifind_private_fund_daily 不存在,仅使用 ifind_private_fund_info 表进行计算日期范围')
        sql_str = """SELECT ths_code, date_frm, if(ths_maturity_date_sp<end_date, ths_maturity_date_sp, end_date) date_to
            FROM
            (
                SELECT info.ths_code, ths_established_date_sp date_frm, ths_maturity_date_sp,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM ifind_private_fund_info info 
            ) tt
            WHERE date_frm <= if(ths_maturity_date_sp<end_date, ths_maturity_date_sp, end_date) 
            ORDER BY ths_code"""

    with with_db_session(engine_md) as session:
        # 计算每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        code_date_range_dic = {
            ths_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to)
            for ths_code, date_from, date_to in table.fetchall() if
            ths_code_set is None or ths_code in ths_code_set}

    if TRIAL:
        date_from_min = date.today() - timedelta(days=(365 * 5))
        # 试用账号只能获取近5年数据
        code_date_range_dic = {
            ths_code: (max([date_from, date_from_min]), date_to)
            for ths_code, (date_from, date_to) in code_date_range_dic.items() if date_from_min <= date_to}

    # 设置 dtype
    dtype = {key: val for key, _, val in indicator_param_list}
    dtype['ths_code'] = String(20)
    dtype['time'] = Date

    data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len(code_date_range_dic)
    try:
        for num, (ths_code, (begin_time, end_time)) in enumerate(code_date_range_dic.items(), start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code, begin_time, end_time)
            data_df = invoker.THS_HistoryQuotes(
                ths_code,
                json_indicator,
                json_param,
                begin_time, end_time
            )
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)
            # 大于阀值有开始插入
            if data_count >= 10000:
                tot_data_df = pd.concat(data_df_list)
                # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
                data_count = bunch_insert_on_duplicate_update(tot_data_df, table_name, engine_md, dtype)
                tot_data_count += data_count
                data_df_list, data_count = [], 0

            if DEBUG and len(data_df_list) > 1:
                break
    finally:
        if data_count > 0:
            tot_data_df = pd.concat(data_df_list)
            # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
            data_count = bunch_insert_on_duplicate_update(tot_data_df, table_name, engine_md, dtype)
            tot_data_count += data_count

        logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)
        if not has_table:
            alter_table_2_myisam(engine_md, [table_name])
            build_primary_key([table_name])
Exemplo n.º 3
0
def import_index_daily_his(chain_param=None,
                           ths_code_set: set = None,
                           begin_time=None):
    """
    通过history接口将历史数据保存到 ifind_index_daily_his
    :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用
    :param ths_code_set:
    :param begin_time: 默认为None,如果非None则代表所有数据更新日期不得晚于该日期
    :return:
    """
    table_name = 'ifind_index_daily_his'
    if begin_time is not None and type(begin_time) == date:
        begin_time = str_2_date(begin_time)
    # THS_HistoryQuotes('600006.SH,600010.SH',
    # 'preClose,open,high,low,close,avgPrice,changeRatio,volume,amount,turnoverRatio,transactionAmount,totalShares,totalCapital,floatSharesOfAShares,floatSharesOfBShares,floatCapitalOfAShares,floatCapitalOfBShares,pe_ttm,pe,pb,ps,pcf',
    # 'Interval:D,CPS:1,baseDate:1900-01-01,Currency:YSHB,fill:Previous',
    # '2018-06-30','2018-07-30')
    json_indicator, _ = unzip_join(
        [(key, val) for key, val, _ in INDICATOR_PARAM_LIST_INDEX_DAILY_HIS],
        sep=';')
    has_table = engine_md.has_table(table_name)
    if has_table:
        sql_str = """SELECT ths_code, date_frm, if(NULL<end_date, NULL, end_date) date_to
            FROM
            (
                SELECT info.ths_code, ifnull(trade_date_max_1, ths_index_base_period_index) date_frm, NULL,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM 
                    ifind_index_info info 
                LEFT OUTER JOIN
                    (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM ifind_index_daily_his GROUP BY ths_code) daily
                ON info.ths_code = daily.ths_code
            ) tt
            WHERE date_frm <= if(NULL<end_date, NULL, end_date) 
            ORDER BY ths_code;"""
    else:
        logger.warning('%s 不存在,仅使用 ifind_index_info 表进行计算日期范围', table_name)
        sql_str = """SELECT ths_code, date_frm, if(NULL<end_date, NULL, end_date) date_to
            FROM
            (
                SELECT info.ths_code, ths_index_base_period_index date_frm, NULL,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM ifind_index_info info 
            ) tt
            WHERE date_frm <= if(NULL<end_date, NULL, end_date) 
            ORDER BY ths_code"""

    # 计算每只股票需要获取日线数据的日期区间
    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 计算每只股票需要获取日线数据的日期区间
        code_date_range_dic = {
            ths_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for ths_code, date_from, date_to in table.fetchall()
            if ths_code_set is None or ths_code in ths_code_set
        }

    if TRIAL:
        date_from_min = date.today() - timedelta(days=(365 * 5))
        # 试用账号只能获取近5年数据
        code_date_range_dic = {
            ths_code: (max([date_from, date_from_min]), date_to)
            for ths_code, (date_from, date_to) in code_date_range_dic.items()
            if date_to is not None and date_from_min <= date_to
        }

    data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len(
        code_date_range_dic)
    try:
        for num, (ths_code,
                  (begin_time,
                   end_time)) in enumerate(code_date_range_dic.items(),
                                           start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code,
                         begin_time, end_time)
            data_df = invoker.THS_HistoryQuotes(
                ths_code, json_indicator,
                'Interval:D,CPS:1,baseDate:1900-01-01,Currency:YSHB,fill:Previous',
                begin_time, end_time)
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df_list.append(data_df)
            # 大于阀值有开始插入
            if data_count >= 10000:
                data_count = bunch_insert_on_duplicate_update(
                    data_df, table_name, engine_md, DTYPE_INDEX_DAILY_HIS)
                tot_data_count += data_count
                data_df_list, data_count = [], 0

            # 仅调试使用
            if DEBUG and len(data_df_list) > 5:
                break
    finally:
        if data_count > 0:
            data_df_all = pd.concat(data_df_list)
            data_count = bunch_insert_on_duplicate_update(
                data_df_all, table_name, engine_md, DTYPE_INDEX_DAILY_HIS)
            tot_data_count += data_count

        logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)
        if not has_table and engine_md.has_table(table_name):
            alter_table_2_myisam(engine_md, [table_name])
            build_primary_key([table_name])
Exemplo n.º 4
0
def import_pub_fund_daily(chain_param=None,
                          ths_code_set: set = None,
                          begin_time=None):
    """
    通过history接口将历史数据保存到 ifind_pub_fund_daily
    :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用
    :param ths_code_set:
    :param begin_time: 默认为None,如果非None则代表所有数据更新日期不得晚于该日期
    :return:
    """
    table_name = 'ifind_pub_fund_daily'
    has_table = engine_md.has_table(table_name)
    if begin_time is not None and type(begin_time) == date:
        begin_time = str_2_date(begin_time)

    indicator_param_list = [('netAssetValue', '', DOUBLE),
                            ('adjustedNAV', '', DOUBLE),
                            ('accumulatedNAV', '', DOUBLE)]
    # THS_HistoryQuotes('600006.SH,600010.SH',
    # 'preClose,open,high,low,close,avgPrice,changeRatio,volume,amount,turnoverRatio,transactionAmount,totalShares,totalCapital,floatSharesOfAShares,floatSharesOfBShares,floatCapitalOfAShares,floatCapitalOfBShares,pe_ttm,pe,pb,ps,pcf',
    # 'Interval:D,CPS:1,baseDate:1900-01-01,Currency:YSHB,fill:Previous',
    # '2018-06-30','2018-07-30')
    json_indicator, _ = unzip_join([(key, val)
                                    for key, val, _ in indicator_param_list],
                                   sep=';')
    if has_table:
        sql_str = """SELECT ths_code, date_frm, if(ths_fund_expiry_date_fund<end_date, ths_fund_expiry_date_fund, end_date) date_to
            FROM
            (
                SELECT info.ths_code, ifnull(trade_date_max_1, ths_lof_listed_date_fund) date_frm, ths_fund_expiry_date_fund,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM 
                    ifind_pub_fund_info info 
                LEFT OUTER JOIN
                    (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) daily
                ON info.ths_code = daily.ths_code
            ) tt
            WHERE date_frm <= if(ths_fund_expiry_date_fund<end_date, ths_fund_expiry_date_fund, end_date) 
            ORDER BY ths_code""".format(table_name=table_name)
    else:
        logger.warning('%s 不存在,仅使用 ifind_pub_fund_info 表进行计算日期范围', table_name)
        sql_str = """SELECT ths_code, date_frm, if(ths_fund_expiry_date_fund<end_date, ths_fund_expiry_date_fund, end_date) date_to
            FROM
            (
                SELECT info.ths_code, ths_lof_listed_date_fund date_frm, ths_fund_expiry_date_fund,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM ifind_pub_fund_info info 
            ) tt
            WHERE date_frm <= if(ths_fund_expiry_date_fund<end_date, ths_fund_expiry_date_fund, end_date) 
            ORDER BY ths_code"""

    # 计算每只股票需要获取日线数据的日期区间
    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 计算每只股票需要获取日线数据的日期区间
        code_date_range_dic = {
            ths_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for ths_code, date_from, date_to in table.fetchall()
            if ths_code_set is None or ths_code in ths_code_set
        }

    if TRIAL:
        date_from_min = date.today() - timedelta(days=(365 * 5))
        # 试用账号只能获取近5年数据
        code_date_range_dic = {
            ths_code: (max([date_from, date_from_min]), date_to)
            for ths_code, (date_from, date_to) in code_date_range_dic.items()
            if date_from_min <= date_to
        }

    # 设置 dtype
    dtype = {key: val for key, _, val in indicator_param_list}
    dtype['ths_code'] = String(20)
    dtype['time'] = Date

    data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len(
        code_date_range_dic)
    try:
        for num, (ths_code,
                  (begin_time,
                   end_time)) in enumerate(code_date_range_dic.items(),
                                           start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code,
                         begin_time, end_time)
            data_df = invoker.THS_HistoryQuotes(
                ths_code, json_indicator,
                'Interval:D,CPS:1,baseDate:1900-01-01,Currency:YSHB,fill:Previous',
                begin_time, end_time)
            if data_df is not None and data_df.shape[0] > 0:
                data_count += data_df.shape[0]
                data_df.rename(
                    columns={col: col.lower()
                             for col in data_df.columns},
                    inplace=True)
                data_df_list.append(data_df)

            if DEBUG and len(data_df_list) > 1:
                break

            # 大于阀值有开始插入
            if data_count >= 10000:
                tot_data_df = pd.concat(data_df_list)
                data_count = bunch_insert_on_duplicate_update(
                    tot_data_df, table_name, engine_md, dtype)
                tot_data_count += data_count
                data_df_list, data_count = [], 0
    finally:
        if len(data_df_list) > 0:
            tot_data_df = pd.concat(data_df_list)
            data_count = bunch_insert_on_duplicate_update(
                tot_data_df, table_name, engine_md, dtype)
            tot_data_count += data_count

        logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)
        if not has_table and engine_md.has_table(table_name):
            alter_table_2_myisam(engine_md, [table_name])
            build_primary_key([table_name])