Пример #1
0
def import_stock_quertarly_hk(chain_param=None,
                              wind_code_set=None,
                              begin_time=None):
    """
    插入股票日线数据到最近一个工作日-1
    :param chain_param:  在celery 中將前面結果做爲參數傳給後面的任務
    :param wind_code_set:
    :param begin_time:
    :return:
    """
    table_name = "wind_stock_quertarly_hk"
    logging.info("更新 %s 开始", table_name)
    has_table = engine_md.has_table(table_name)
    if has_table:
        sql_str = """
           SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
           FROM
           (
               SELECT info.wind_code, ifnull(trade_date, ipo_date) date_frm, delist_date,
               if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
               FROM 
                   wind_stock_info_hk info 
               LEFT OUTER JOIN
                   (SELECT wind_code, adddate(max(trade_date),1) trade_date FROM {table_name} GROUP BY ths_code) quertarly
               ON info.wind_code = quertarly.wind_code
           ) tt
           WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) 
           ORDER BY wind_code""".format(table_name=table_name)
    else:
        logger.warning(
            'wind_stock_quertarly_hk 不存在,仅使用 wind_stock_info_hk 表进行计算日期范围')
        sql_str = """
           SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
           FROM
           (
               SELECT info.wind_code, ipo_date date_frm, delist_date,
               if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
               FROM wind_stock_info_hk info 
           ) tt
           WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) 
           ORDER BY wind_code"""
        logger.warning('%s 不存在,仅使用 wind_stock_info_hk 表进行计算日期范围', table_name)
    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 计算每只股票需要获取日线数据的日期区间
        # 获取date_from,date_to,将date_from,date_to做为value值
        stock_date_dic = {
            wind_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for wind_code, date_from, date_to in table.fetchall()
            if wind_code_set is None or wind_code in wind_code_set
        }
    data_df_list = []
    logger.info('%d data will been import into %s', len(stock_date_dic),
                table_name)
    # 获取股票量价等行情数据
    field_col_name_list = [
        ('roic_ttm', String(20)),
        ('yoyprofit', String(20)),
        ('ebit', String(20)),
        ('ebit2', String(20)),
        ('ebit2_ttm', String(20)),
        ('surpluscapitalps', String(20)),
        ('undistributedps', String(20)),
        ('stm_issuingdate', Date),
    ]
    # 获取列表属性名
    dtype = {key: val for key, val in field_col_name_list}
    dtype['wind_code'] = String(20)
    dtype['trade_date'] = Date
    wind_indictor_str = ",".join(key for key, _ in field_col_name_list)
    upper_col_2_name_dic = {
        name.upper(): val
        for name, val in field_col_name_list
    }
    # 获取接口数据
    try:
        for stock_num, (wind_code,
                        (date_from,
                         date_to)) in enumerate(stock_date_dic.items()):
            data_df = invoker.wsd(wind_code, wind_indictor_str, date_from,
                                  date_to, "unit=1;rptType=1;Period=Q")
            if data_df is None:
                logger.warning('%d) %s has no data during %s %s', stock_num,
                               wind_code, date_from, date_to)
                continue
            data_df.rename(columns=upper_col_2_name_dic, inplace=True)
            # 清理掉期间全空的行
            for trade_date in list(data_df.index):
                is_all_none = data_df.loc[trade_date].apply(
                    lambda x: x is None).all()
                if is_all_none:
                    logger.warning("%s %s 数据全部为空", wind_code, trade_date)
                    data_df.drop(trade_date, inplace=True)
            logger.info('%d) %d data of %s between %s and %s', stock_num,
                        data_df.shape[0], wind_code, date_from, date_to)
            data_df['wind_code'] = wind_code
            data_df.index.rename('trade_date', inplace=True)
            data_df.reset_index(inplace=True)
            data_df_list.append(data_df)
            # 仅供调试使用
            if DEBUG and len(data_df_list) > 5:
                break
    finally:
        # 导入数据库
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)

            # data_df_all.to_sql('wind_stock_quertarly_hk', engine_md, if_exists='append')
            bunch_insert_on_duplicate_update(data_df_all,
                                             table_name,
                                             engine_md,
                                             dtype=dtype)
            logging.info("更新 wind_stock_quertarly_hk 结束 %d 条信息被更新",
                         data_df_all.shape[0])
            if not has_table and engine_md.has_table(table_name):
                alter_table_2_myisam(engine_md, [table_name])
                build_primary_key([table_name])
def import_future_daily(chain_param=None, wind_code_set=None, begin_time=None):
    """
    更新期货合约日级别行情信息
    :param chain_param:  在celery 中將前面結果做爲參數傳給後面的任務
    :return:
    """
    table_name = "wind_future_daily"
    logger.info("更新 %s 开始", table_name)
    has_table = engine_md.has_table(table_name)
    param_list = [("open", DOUBLE), ("high", DOUBLE), ("low", DOUBLE),
                  ("close", DOUBLE), ("volume", DOUBLE), ("amt", DOUBLE),
                  ("dealnum", DOUBLE), ("settle", DOUBLE), ("oi", DOUBLE),
                  ("st_stock", DOUBLE), ('position', DOUBLE),
                  ('instrument_id', String(20)), (
                      'trade_date',
                      Date,
                  )]
    wind_indictor_str = ",".join([key for key, _ in param_list[:10]])

    if has_table:
        sql_str = """
            select wind_code, date_frm, if(lasttrade_date<end_date, lasttrade_date, end_date) date_to
            FROM
            (
            select fi.wind_code, ifnull(trade_date_max_1, ipo_date) date_frm, 
                lasttrade_date,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
            from wind_future_info fi 
            left outer join
                (select wind_code, adddate(max(trade_date),1) trade_date_max_1 from {table_name} group by wind_code) wfd
            on fi.wind_code = wfd.wind_code
            ) tt
            where date_frm <= if(lasttrade_date<end_date, lasttrade_date, end_date) 
            -- and subdate(curdate(), 360) < if(lasttrade_date<end_date, lasttrade_date, end_date) 
            order by wind_code""".format(table_name=table_name)
    else:
        sql_str = """
            SELECT wind_code, date_frm,
                if(lasttrade_date<end_date,lasttrade_date, end_date) date_to
            FROM
            (
                SELECT info.wind_code,ipo_date date_frm, lasttrade_date,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM wind_future_info info
            ) tt
            WHERE date_frm <= if(lasttrade_date<end_date, lasttrade_date, end_date)
            ORDER BY wind_code;
         """
        logger.warning('%s 不存在,仅使用 wind_future_info 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        table = session.execute(sql_str)
        # 获取date_from,date_to,将date_from,date_to做为value值
        future_date_dic = {
            wind_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for wind_code, date_from, date_to in table.fetchall()
            if wind_code_set is None or wind_code in wind_code_set
        }

    # 设置 dtype
    dtype = {key: val for key, val in param_list}
    dtype['wind_code'] = String(20)
    dtype['trade_date'] = Date

    data_df_list = []
    data_len = len(future_date_dic)
    try:
        logger.info("%d future instrument will be handled", data_len)
        for num, (wind_code, (date_frm,
                              date_to)) in enumerate(future_date_dic.items(),
                                                     start=1):
            # 暂时只处理 RU 期货合约信息
            # if wind_code.find('RU') == -1:
            #     continue
            if date_frm > date_to:
                continue
            date_frm_str = date_frm.strftime(STR_FORMAT_DATE)
            date_to_str = date_to.strftime(STR_FORMAT_DATE)
            logger.info('%d/%d) get %s between %s and %s', num, data_len,
                        wind_code, date_frm_str, date_to_str)
            # data_df = wsd_cache(w, wind_code, "open,high,low,close,volume,amt,dealnum,settle,oi,st_stock",
            #                         date_frm, date_to, "")
            try:
                data_df = invoker.wsd(wind_code, wind_indictor_str,
                                      date_frm_str, date_to_str, "")
            except APIError as exp:
                from tasks.wind import ERROR_CODE_MSG_DIC
                error_code = exp.ret_dic.setdefault('error_code', 0)
                if error_code in ERROR_CODE_MSG_DIC:
                    logger.error("%d/%d) %s 执行异常 error_code=%d, %s", num,
                                 data_len, wind_code, error_code,
                                 ERROR_CODE_MSG_DIC[error_code])
                else:
                    logger.exception("%d/%d) %s 执行异常 error_code=%d", num,
                                     data_len, wind_code, error_code)

                if exp.ret_dic.setdefault('error_code', 0) in (
                        -40520007,  # 没有可用数据
                        -40521009,  # 数据解码失败。检查输入参数是否正确,如:日期参数注意大小月月末及短二月
                ):
                    continue
                else:
                    break
            if data_df is None:
                logger.warning('%d/%d) %s has no data during %s %s', num,
                               data_len, wind_code, date_frm_str, date_to)
                continue
            logger.info('%d/%d) %d data of %s between %s and %s', num,
                        data_len, data_df.shape[0], wind_code, date_frm_str,
                        date_to)
            data_df['wind_code'] = wind_code
            data_df.index.rename('trade_date', inplace=True)
            data_df.reset_index(inplace=True)
            data_df.rename(columns={c: str.lower(c)
                                    for c in data_df.columns},
                           inplace=True)
            data_df.rename(columns={'oi': 'position'},
                           inplace=True)  # oi 应该是 open_interest
            data_df['instrument_id'] = wind_code.split('.')[0]
            data_df_list.append(data_df)
            # 仅仅调试时使用
            if DEBUG and len(data_df_list) >= 1:
                break
    finally:
        data_df_count = len(data_df_list)
        if data_df_count > 0:
            logger.info('merge data with %d df', data_df_count)
            data_df = pd.concat(data_df_list)
            data_count = bunch_insert_on_duplicate_update(data_df,
                                                          table_name,
                                                          engine_md,
                                                          dtype=dtype)
            logger.info("更新 %s 结束 %d 条记录被更新", table_name, data_count)
            if not has_table and engine_md.has_table(table_name):
                alter_table_2_myisam(engine_md, [table_name])
                build_primary_key([table_name])
        else:
            logger.info("更新 %s 结束 0 条记录被更新", table_name)
Пример #3
0
def import_stock_daily_hk(chain_param=None,
                          wind_code_set=None,
                          begin_time=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :param chain_param:  在celery 中將前面結果做爲參數傳給後面的任務
    :param wind_code_set:
    :param begin_time:
    :return:
    """
    table_name = 'wind_stock_daily_hk'
    logging.info("更新 %s 开始", table_name)
    param_list = [
        ('OPEN', DOUBLE),
        ('HIGH', DOUBLE),
        ('LOW', DOUBLE),
        ('CLOSE', DOUBLE),
        ('ADJFACTOR', DOUBLE),
        ('VOLUME', DOUBLE),
        ('AMT', DOUBLE),
        ('PCT_CHG', DOUBLE),
        ('MAXUPORDOWN', Integer),
        ('SWING', DOUBLE),
        ('TURN', DOUBLE),
        ('FREE_TURN', DOUBLE),
        ('TRADE_STATUS', String(20)),
        ('SUSP_DAYS', Integer),
        ('TOTAL_SHARES', DOUBLE),
        ('FREE_FLOAT_SHARES', DOUBLE),
        ('EV2_TO_EBITDA', DOUBLE),
        ('PS_TTM', DOUBLE),
        ('PE_TTM', DOUBLE),
        ('PB_MRQ', DOUBLE),
        ('close_usd', DOUBLE),
        ('pe_lyr', DOUBLE),
        ('pb_lyr', DOUBLE),
    ]
    # 将列表列名转化为小写
    col_name_dic = {
        col_name.upper(): col_name.lower()
        for col_name, _ in param_list
    }
    # 获取列表列名
    col_name_list = [col_name.lower() for col_name in col_name_dic.keys()]
    # wind_indictor_str = "open,high,low,close,adjfactor,volume,amt,pct_chg,maxupordown," + \
    #                     "swing,turn,free_turn,trade_status,susp_days," + \
    #                     "total_shares,free_float_shares,ev2_to_ebitda"
    wind_indictor_str = ",".join(col_name_list)
    has_table = engine_md.has_table(table_name)
    if has_table:
        sql_str = """
           SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
           FROM
           (
               SELECT info.wind_code, ifnull(trade_date, ipo_date) date_frm, delist_date,
               if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
               FROM 
                   wind_stock_info_hk info 
               LEFT OUTER JOIN
                   (SELECT wind_code, adddate(max(trade_date),1) trade_date FROM {table_name} GROUP BY wind_code) daily
               ON info.wind_code = daily.wind_code
           ) tt
           WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) 
           ORDER BY wind_code""".format(table_name=table_name)
    else:
        logger.warning('%s 不存在,仅使用 wind_stock_info_hk 表进行计算日期范围', table_name)
        sql_str = """
           SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
           FROM
           (
               SELECT info.wind_code, ipo_date date_frm, delist_date,
               if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
               FROM wind_stock_info_hk info 
           ) tt
           WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) 
           ORDER BY wind_code"""

    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 获取每只股票需要获取日线数据的日期区间
        stock_date_dic = {
            wind_code:
            (date_from if begin_time is None else min([date_from, begin_time]),
             date_to)
            for wind_code, date_from, date_to in table.fetchall()
            if wind_code_set is None or wind_code in wind_code_set
        }
    # 设置 dtype
    dtype = {key: val for key, val in param_list}
    dtype['wind_code'] = String(20)
    dtype['trade_date'] = Date
    #data_df_list = []
    data_len = len(stock_date_dic)
    # 获取接口数据
    logger.info('%d data will been import into %s', data_len, table_name)
    try:
        for data_num, (wind_code,
                       (date_from,
                        date_to)) in enumerate(stock_date_dic.items()):
            logger.debug('%d/%d) %s [%s - %s]', data_num, data_len, wind_code,
                         date_from, date_to)
            data_df_list = []
            # if wind_code == '0192!1.HK':
            #     logger.warning('%s wind 没有这只股票数据', wind_code)
            #     continue

            try:
                data_df = invoker.wsd(wind_code, wind_indictor_str, date_from,
                                      date_to)
            except APIError as exp:
                logger.exception("%d/%d) %s 执行异常", data_num, data_len,
                                 wind_code)
                if exp.ret_dic.setdefault('error_code', 0) in (
                        -40520007,  # 没有可用数据
                        -40521009,  # 数据解码失败。检查输入参数是否正确,如:日期参数注意大小月月末及短二月
                ):
                    continue
                else:
                    break
            if data_df is None:
                logger.warning('%d/%d) %s has no data during %s %s', data_num,
                               data_len, wind_code, date_from, date_to)
                continue
            logger.info('%d/%d) %d data of %s between %s and %s', data_num,
                        data_len, data_df.shape[0], wind_code, date_from,
                        date_to)
            data_df['wind_code'] = wind_code
            data_df.index.rename('trade_date', inplace=True)
            data_df.reset_index(inplace=True)
            data_df.rename(columns=col_name_dic, inplace=True)
            data_df_list.append(data_df)
            if len(data_df_list) > 0:
                data_df_all = pd.concat(data_df_list)

                data_count = bunch_insert_on_duplicate_update(data_df_all,
                                                              table_name,
                                                              engine_md,
                                                              dtype=dtype)
                logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count)
                if not has_table and engine_md.has_table(table_name):
                    alter_table_2_myisam(engine_md, [table_name])
                    build_primary_key([table_name])

            # 仅供调试使用
            if DEBUG and len(data_df_list) > 1:
                break
    finally:
        # 导入数据库 创建
        # if len(data_df_list) > 0:
        #     data_df_all = pd.concat(data_df_list)
        #
        #     data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype=dtype)
        #     logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count)
        #     if not has_table and engine_md.has_table(table_name):
        #         alter_table_2_myisam(engine_md, [table_name])
        #         build_primary_key([table_name])
        pass
Пример #4
0
def import_stock_quertarly(chain_param=None, wind_code_set=None):
    """
    插入股票日线数据到最近一个工作日-1
    :param chain_param:  在celery 中將前面結果做爲參數傳給後面的任務
    :return:
    """
    logging.info("更新 wind_fina_indicator 开始")
    table_name = 'wind_fina_indicator'
    has_table = engine_md.has_table(table_name)
    if has_table:
        sql_str = """
           SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
           FROM
           (
               SELECT info.wind_code, ifnull(trade_date, ipo_date) date_frm, delist_date,
               if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
               FROM 
                   wind_stock_info info 
               LEFT OUTER JOIN
                   (SELECT wind_code, adddate(max(trade_date),1) trade_date FROM {table_name} GROUP BY wind_code) quertarly
               ON info.wind_code = quertarly.wind_code
           ) tt
           WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) 
           ORDER BY wind_code;""".format(table_name=table_name)
    else:
        logger.warning('wind_fina_indicator 不存在,仅使用 wind_stock_info 表进行计算日期范围')
        sql_str = """
           SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
           FROM
           (
               SELECT info.wind_code, ipo_date date_frm, delist_date,
               if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
               FROM wind_stock_info info 
           ) tt
           WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) 
           ORDER BY wind_code"""
    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 计算每只股票需要获取日线数据的日期区间
        begin_time = None
        # 获取date_from,date_to,将date_from,date_to做为value值
        stock_date_dic = {
            wind_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to)
            for wind_code, date_from, date_to in table.fetchall() if
            wind_code_set is None or wind_code in wind_code_set}
    # 获取股票量价等行情数据
    param_list = [
        ('roic_ttm', DOUBLE),
        ('yoyprofit', DOUBLE),
        ('ebit', DOUBLE),
        ('ebit2', DOUBLE),
        ('ebit2_ttm', DOUBLE),
        ('surpluscapitalps', DOUBLE),
        ('undistributedps', DOUBLE),
        ('stm_issuingdate', DOUBLE),

    ]

    # 获取参数列表
    wind_indictor_str = ",".join(key for key, _ in param_list)
    dtype = {key: val for key, val in param_list}
    dtype['wind_code'] = String(20)
    dtype['trade_date'] = Date

    data_df_list = []
    logger.info('%d stocks will been import into wind_stock_quertarly', len(stock_date_dic))

    try:
        for stock_num, (wind_code, (date_from, date_to)) in enumerate(stock_date_dic.items()):
            # 获取股票量价等行情数据
            # w.wsd("002122.SZ", "roic_ttm,yoyprofit,ebit,ebit2,ebit2_ttm,surpluscapitalps,undistributedps,stm_issuingdate", "2012-12-31", "2017-12-06", "unit=1;rptType=1;Period=Q")
            data_df = invoker.wsd(wind_code, wind_indictor_str, date_from, date_to, "unit=1;rptType=1;Period=Q")
            if data_df is None:
                logger.warning('%d) %s has no data during %s %s', stock_num, wind_code, date_from, date_to)
                continue
            data_df.rename(columns={c: str(c).lower() for c in data_df.columns}, inplace=True)
            # 清理掉期间全空的行
            for trade_date in list(data_df.index[:10]):
                is_all_none = data_df.loc[trade_date].apply(lambda x: x is None).all()
                if is_all_none:
                    logger.warning("%s %s 数据全部为空", wind_code, trade_date)
                    data_df.drop(trade_date, inplace=True)
            logger.info('%d) %d data of %s between %s and %s', stock_num, data_df.shape[0], wind_code, date_from,
                        date_to)
            data_df['wind_code'] = wind_code
            data_df.index.rename('trade_date', inplace=True)
            data_df.reset_index(inplace=True)
            data_df_list.append(data_df)
            if DEBUG and len(data_df_list) > 10:
                break
    finally:
        # 导入数据库
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)
            bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype=dtype)
            logging.info("更新 wind_stock_quertarly 结束 %d 条信息被更新", data_df_all.shape[0])
            if not has_table and engine_md.has_table(table_name):
                alter_table_2_myisam(engine_md, [table_name])
                build_primary_key([table_name])
def import_private_fund_nav_daily(chain_param=None, wind_code_list=None):
    """
    :param chain_param:  在celery 中將前面結果做爲參數傳給後面的任務
    :param wind_code_list:
    :return:
    """
    table_name = 'wind_fund_nav_daily'
    # 初始化数据下载端口
    # 初始化数据库engine
    # 链接数据库,并获取fundnav旧表
    # with get_db_session(engine) as session:
    #     table = session.execute('select wind_code, ADDDATE(max(trade_date),1) from wind_fund_nav group by wind_code')
    #     fund_trade_date_begin_dic = dict(table.fetchall())
    # 获取wind_fund_info表信息
    col_name_param_list = [
        ('trade_date', Date),
        ('nav', DOUBLE),
        ('nav_acc', DOUBLE),
        ('nav_date', Date),
    ]
    dtype = {col_name: val for col_name, val in col_name_param_list}
    dtype['wind_code'] = String(200)
    has_table = engine_md.has_table(table_name)
    if has_table:
        fund_info_df = pd.read_sql_query(
            """SELECT DISTINCT fi.wind_code AS wind_code, 
                IFNULL(fund_setupdate, if(trade_date_latest BETWEEN '1900-01-01' AND ADDDATE(CURDATE(), -1), ADDDATE(trade_date_latest,1) , fund_setupdate) ) date_from,
                if(fund_maturitydate BETWEEN '1900-01-01' AND ADDDATE(CURDATE(), -1),fund_maturitydate,ADDDATE(CURDATE(), -1)) date_to 
                FROM fund_info fi
                LEFT JOIN
                (
                SELECT wind_code, ADDDATE(max(trade_date),1) trade_date_from FROM wind_fund_nav_daily
                GROUP BY wind_code
                ) wfn
                ON fi.wind_code = wfn.wind_code""", engine_md)
    else:
        fund_info_df = pd.read_sql_query(
            """SELECT DISTINCT fi.wind_code AS wind_code, 
                IFNULL(fund_setupdate, if(trade_date_latest BETWEEN '1900-01-01' AND ADDDATE(CURDATE(), -1), ADDDATE(trade_date_latest,1) , fund_setupdate) ) date_from,
                if(fund_maturitydate BETWEEN '1900-01-01' AND ADDDATE(CURDATE(), -1),fund_maturitydate,ADDDATE(CURDATE(), -1)) date_to 
                FROM fund_info fi
                ORDER BY wind_code;""", engine_md)

        wind_code_date_frm_to_dic = {
            wind_code: (str_2_date(date_from), str_2_date(date_to))
            for wind_code, date_from, date_to in
            zip(fund_info_df['wind_code'], fund_info_df['date_from'],
                fund_info_df['date_to'])
        }
        fund_info_df.set_index('wind_code', inplace=True)
        if wind_code_list is None:
            wind_code_list = list(fund_info_df.index)
        else:
            wind_code_list = list(
                set(wind_code_list) & set(fund_info_df.index))
        # 结束时间
        date_last_day = date.today() - timedelta(days=1)
        # date_end_str = date_end.strftime(STR_FORMAT_DATE)

        fund_nav_all_df = []
        no_data_count = 0
        code_count = len(wind_code_list)
        # 对每个新获取的基金名称进行判断,若存在 fundnav 中,则只获取部分净值
        wind_code_trade_date_latest_dic = {}
        date_gap = timedelta(days=10)
        try:
            for num, wind_code in enumerate(wind_code_list):
                date_begin, date_end = wind_code_date_frm_to_dic[wind_code]

                # if date_end > date_last_day:
                #     date_end = date_last_day
                if date_begin > date_end:
                    continue
                # 设定数据获取的起始日期
                # wind_code_trade_date_latest_dic[wind_code] = date_to
                # if wind_code in fund_trade_date_begin_dic:
                #     trade_latest = fund_trade_date_begin_dic[wind_code]
                #     if trade_latest > date_end:
                #         continue
                #     date_begin = max([date_begin, trade_latest])
                # if date_begin is None:
                #     continue
                # elif isinstance(date_begin, str):
                #     date_begin = datetime.strptime(date_begin, STR_FORMAT_DATE).date()

                if isinstance(date_begin, date):
                    if date_begin.year < 1900:
                        continue
                    if date_begin > date_end:
                        continue
                    date_begin_str = date_begin.strftime('%Y-%m-%d')
                else:
                    logger.error("%s date_begin:%s", wind_code, date_begin)
                    continue

                if isinstance(date_end, date):
                    if date_begin.year < 1900:
                        continue
                    if date_begin > date_end:
                        continue
                    date_end_str = date_end.strftime('%Y-%m-%d')
                else:
                    logger.error("%s date_end:%s", wind_code, date_end)
                    continue
                # 尝试获取 fund_nav 数据
                for k in range(2):
                    try:
                        fund_nav_tmp_df = invoker.wsd(
                            codes=wind_code,
                            fields='nav,NAV_acc,NAV_date',
                            beginTime=date_begin_str,
                            endTime=date_end_str,
                            options='Fill=Previous')
                        trade_date_latest = datetime.strptime(
                            date_end_str, '%Y-%m-%d').date() - date_gap
                        wind_code_trade_date_latest_dic[
                            wind_code] = trade_date_latest
                        break
                    except APIError as exp:
                        # -40520007z
                        if exp.ret_dic.setdefault('error_code',
                                                  0) == -40520007:
                            trade_date_latest = datetime.strptime(
                                date_end_str, '%Y-%m-%d').date() - date_gap
                            wind_code_trade_date_latest_dic[
                                wind_code] = trade_date_latest
                        logger.error("%s Failed, ErrorMsg: %s" %
                                     (wind_code, str(exp)))
                        continue
                    except Exception as exp:
                        logger.error("%s Failed, ErrorMsg: %s" %
                                     (wind_code, str(exp)))
                        continue
                else:
                    fund_nav_tmp_df = None

                if fund_nav_tmp_df is None:
                    logger.info('%s No data', wind_code)
                    # del wind_code_trade_date_latest_dic[wind_code]
                    no_data_count += 1
                    logger.warning('%d funds no data', no_data_count)
                else:
                    fund_nav_tmp_df.dropna(how='all', inplace=True)
                    df_len = fund_nav_tmp_df.shape[0]
                    if df_len == 0:
                        continue
                    fund_nav_tmp_df['wind_code'] = wind_code
                    # 此处删除 trade_date_latest 之后再加上,主要是为了避免因抛出异常而导致的该条数据也被记录更新
                    # del wind_code_trade_date_latest_dic[wind_code]
                    trade_date_latest = fund_nav_df_2_sql(table_name,
                                                          fund_nav_tmp_df,
                                                          engine_md,
                                                          is_append=True)
                    if trade_date_latest is None:
                        logger.error('%s[%d] data insert failed', wind_code)
                    else:
                        wind_code_trade_date_latest_dic[
                            wind_code] = trade_date_latest
                        logger.info('%d) %s updated, %d funds left', num,
                                    wind_code, code_count - num)

                if DEBUG and num > 1:  # 调试使用
                    break
            #
        finally:
            import_wind_fund_nav_to_nav()
            update_trade_date_latest(wind_code_trade_date_latest_dic)
            if not has_table and engine_md.has_table(table_name):
                alter_table_2_myisam(engine_md, [table_name])
                build_primary_key([table_name])
        return fund_nav_all_df
Пример #6
0
def import_stock_daily(chain_param=None, wind_code_set=None):
    """
    插入股票日线数据到最近一个工作日-1。
    如果超过 BASE_LINE_HOUR 时间,则获取当日的数据
    :param chain_param:  在celery 中將前面結果做爲參數傳給後面的任務
    :return:
    """
    table_name = 'wind_stock_daily'
    logging.info("更新 %s 开始", table_name)
    param_list = [
        ('open', DOUBLE),
        ('high', DOUBLE),
        ('low', DOUBLE),
        ('close', DOUBLE),
        ('adjfactor', DOUBLE),
        ('volume', DOUBLE),
        ('amt', DOUBLE),
        ('pct_chg', DOUBLE),
        ('maxupordown', Integer),
        ('swing', DOUBLE),
        ('turn', DOUBLE),
        ('free_turn', DOUBLE),
        ('trade_status', String(30)),
        ('susp_days', Integer),
        ('total_shares', DOUBLE),
        ('free_float_shares', DOUBLE),
        ('ev2_to_ebitda', DOUBLE),
    ]
    wind_indictor_str = ",".join([key for key, _ in param_list])
    rename_col_dic = {key.upper(): key.lower() for key, _ in param_list}
    has_table = engine_md.has_table(table_name)
    # 进行表格判断,确定是否含有wind_stock_daily
    if has_table:
        sql_str = """
            SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
            FROM
            (
            SELECT info.wind_code, ifnull(trade_date, ipo_date) date_frm, delist_date,
            if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
            FROM 
                wind_stock_info info 
            LEFT OUTER JOIN
                (SELECT wind_code, adddate(max(trade_date),1) trade_date FROM {table_name} GROUP BY wind_code) daily
            ON info.wind_code = daily.wind_code
            ) tt
            WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) 
            ORDER BY wind_code""".format(table_name=table_name)
    else:
        sql_str = """
            SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
            FROM
              (
                SELECT info.wind_code, ipo_date date_frm, delist_date,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM wind_stock_info info 
              ) tt
            WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) 
            ORDER BY wind_code"""
        logger.warning('%s 不存在,仅使用 wind_stock_info 表进行计算日期范围', table_name)

    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 计算每只股票需要获取日线数据的日期区间
        begin_time = None
        # 获取date_from,date_to,将date_from,date_to做为value值
        code_date_range_dic = {
            wind_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to)
            for wind_code, date_from, date_to in table.fetchall() if
            wind_code_set is None or wind_code in wind_code_set}
    # 设置 dtype
    dtype = {key: val for key, val in param_list}
    dtype['wind_code'] = String(20)
    dtype['trade_date'] = Date

    data_df_list = []
    data_len = len(code_date_range_dic)
    logger.info('%d stocks will been import into wind_stock_daily', data_len)
    # 将data_df数据,添加到data_df_list
    try:
        for num, (wind_code, (date_from, date_to)) in enumerate(code_date_range_dic.items(), start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, data_len, wind_code, date_from, date_to)
            try:
                data_df = invoker.wsd(wind_code, wind_indictor_str, date_from, date_to)
            except APIError as exp:
                logger.exception("%d/%d) %s 执行异常", num, data_len, wind_code)
                if exp.ret_dic.setdefault('error_code', 0) in (
                        -40520007,  # 没有可用数据
                        -40521009,  # 数据解码失败。检查输入参数是否正确,如:日期参数注意大小月月末及短二月
                ):
                    continue
                else:
                    break
            if data_df is None:
                logger.warning('%d/%d) %s has no data during %s %s', num, data_len, wind_code, date_from, date_to)
                continue
            logger.info('%d/%d) %d data of %s between %s and %s', num, data_len, data_df.shape[0], wind_code, date_from,
                        date_to)
            data_df['wind_code'] = wind_code
            data_df_list.append(data_df)
            # 仅调试使用
            if DEBUG and len(data_df_list) > 2:
                break
    finally:
        # 导入数据库
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)
            data_df_all.index.rename('trade_date', inplace=True)
            data_df_all.reset_index(inplace=True)
            data_df_all.rename(columns=rename_col_dic, inplace=True)
            # data_df_all.set_index(['wind_code', 'trade_date'], inplace=True)
            # data_df_all.to_sql('wind_stock_daily', engine_md, if_exists='append', dtype=dtype)
            # logging.info("更新 wind_stock_daily 结束 %d 条信息被更新", data_df_all.shape[0])
            data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype)
            logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count)
            if not has_table and engine_md.has_table(table_name):
                alter_table_2_myisam(engine_md, [table_name])
                build_primary_key([table_name])
Пример #7
0
def add_data_2_ckdvp(col_name, param, wind_code_set: set = None, begin_time=None):
    """判断表格是否存在,存在则进行表格的关联查询
    :param col_name: 增加的列属性名
    :param param: 参数
    :param wind_code_set: 默认为None
    :param begin_time: 默认为None
    :return:
    """
    table_name = 'wind_ckdvp_stock'
    all_finished = False
    has_table = engine_md.has_table('wind_ckdvp_stock')
    if has_table:
        # 执行语句,表格数据联立
        sql_str = """
            select wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
            FROM
            (
                select info.wind_code,
                    (ipo_date) date_frm, delist_date,
                    if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                from wind_stock_info info
                left outer join
                    (select wind_code, adddate(max(time),1) from wind_ckdvp_stock
                    where wind_ckdvp_stock.key='{0}' and param='{1}' group by wind_code
                    ) daily
                on info.wind_code = daily.wind_code
            ) tt
            where date_frm <= if(delist_date<end_date,delist_date, end_date)
            order by wind_code""".format(col_name, param)
    else:
        logger.warning('wind_ckdvp_stock 不存在,仅使用 wind_stock_info 表进行计算日期范围')
        sql_str = """
            SELECT wind_code, date_frm,
                if(delist_date<end_date,delist_date, end_date) date_to
            FROM
            (
                SELECT info.wind_code,ipo_date date_frm, delist_date,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM wind_stock_info info
            ) tt
            WHERE date_frm <= if(delist_date<end_date, delist_date, end_date)
            ORDER BY wind_code"""
    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        code_date_range_dic = {
            wind_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to)
            for wind_code, date_from, date_to in table.fetchall() if
            wind_code_set is None or wind_code in wind_code_set}

        # 设置 dtype
        dtype = {
            'wind_code': String(20),
            'key': String(80),
            'time': Date,
            'value': String(80),
            'param': String(80),
        }
        data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len(code_date_range_dic)
        try:
            for num, (wind_code, (date_from, date_to)) in enumerate(code_date_range_dic.items(), start=1):
                logger.debug('%d/%d) %s [%s - %s]', num, code_count, wind_code, date_from, date_to)
                data_df = invoker.wsd(
                    wind_code,
                    col_name,
                    date_from,
                    date_to,
                    param
                )
                if data_df is not None and data_df.shape[0] > 0:
                    # 对我们的表格进行规范整理,整理我们的列名,索引更改
                    data_df['key'] = col_name
                    data_df['param'] = param
                    data_df['wind_code'] = wind_code
                    data_df.rename(columns={col_name.upper(): 'value'}, inplace=True)
                    data_df.index.rename('time', inplace=True)
                    data_df.reset_index(inplace=True)
                    data_count += data_df.shape[0]
                    data_df_list.append(data_df)

                # 大于阀值有开始插入
                if data_count >= 10000:
                    tot_data_df = pd.concat(data_df_list)
                    # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
                    data_count = bunch_insert_on_duplicate_update(tot_data_df, table_name, engine_md, dtype)
                    tot_data_count += data_count
                    data_df_list, data_count = [], 0

                # 仅调试使用
                if DEBUG and len(data_df_list) > 1:
                    break

                all_finished = True
        finally:
            if data_count > 0:
                tot_data_df = pd.concat(data_df_list)
                # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
                data_count = bunch_insert_on_duplicate_update(tot_data_df, table_name, engine_md, dtype)
                tot_data_count += data_count

            if not has_table and engine_md.has_table(table_name):
                create_pk_str = """ALTER TABLE {table_name}
                    CHANGE COLUMN `wind_code` `wind_code` VARCHAR(20) NOT NULL ,
                    CHANGE COLUMN `time` `time` DATE NOT NULL ,
                    CHANGE COLUMN `key` `key` VARCHAR(80) NOT NULL ,
                    CHANGE COLUMN `param` `param` VARCHAR(80) NOT NULL ,
                    ADD PRIMARY KEY (`wind_code`, `time`, `key`, `param`)""".format(table_name=table_name)
                with with_db_session(engine_md) as session:
                    session.execute(create_pk_str)

            logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)
            if not has_table and engine_md.has_table(table_name):
                alter_table_2_myisam(engine_md, [table_name])
                build_primary_key([table_name])
        return all_finished
Пример #8
0
def fill_col_by_wsd(col_name_dic: dict, table_name, top_n=None):
    """补充历史col数据
    :param col_name_dic:
    :param table_name:
    :param top_n:
    :return:
    """
    # 股票列表
    # db_col_name_list = [col_name.lower() for col_name in col_name_dic.values()]
    col_name_list = [col_name.lower() for col_name in col_name_dic.keys()]
    # 获取每只股票ipo 日期 及 最小的交易日前一天
    #     sql_str = """select si.wind_code, td_from, td_to
    # from wind_stock_info si,
    # (select wind_code, min(trade_date) td_from, max(trade_date) td_to from wind_stock_daily where ev2_to_ebitda is null group by wind_code) sd
    # where si.wind_code = sd.wind_code"""
    where_sub_str = ' and '.join([col_name + ' is null' for col_name in col_name_dic.values()])
    sql_str = """
        select wsd.wind_code, min_trade_date, max_trade_date
        from
        (
        select wind_code, min(trade_date) min_trade_date, max(trade_date) max_trade_date
        from wind_convertible_bond_daily
        group by wind_code
        ) wsd
        INNER JOIN
        (
        select wind_code
        from """ + table_name + """ where """ + where_sub_str + """
        group by wind_code
        ) wsd_not_null
        on wsd.wind_code = wsd_not_null.wind_code"""
    with with_db_session(engine_md) as session:
        table = session.execute(sql_str)
        stock_trade_date_range_dic = {content[0]: (content[1], content[2]) for content in table.fetchall()}
    data_df_list = []
    data_count = len(stock_trade_date_range_dic)
    try:
        # for n, (wind_code, (date_from, date_to)) in enumerate(stock_trade_date_range_dic.items()):
        for data_num, (wind_code, (date_from, date_to)) in enumerate(stock_trade_date_range_dic.items(), start=1):
            if top_n is not None and data_num > top_n:
                break
            # 获取股票量价等行情数据
            wind_indictor_str = col_name_list
            data_df = invoker.wsd(wind_code, wind_indictor_str, date_from, date_to)
            if data_df is None:
                logger.warning('%d/%d) %s has no data during %s %s',
                               data_num, data_count, wind_code, date_from, date_to)
                continue
            logger.info('%d/%d) %d data of %s between %s and %s',
                        data_num, data_count, data_df.shape[0], wind_code, date_from, date_to)
            data_df['wind_code'] = wind_code
            data_df_list.append(data_df)
            # 仅供调试使用
            # if data_num >= 10:
            #     break
    finally:
        # 导入数据库
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)
            data_df_all.index.rename('trade_date', inplace=True)
            data_df_all.reset_index(inplace=True)
            # 只有全部列为空的项才需要剔除
            is_na_s = None
            for col_name in col_name_dic.keys():
                col_name = col_name.upper()
                if is_na_s is None:
                    is_na_s = data_df_all[col_name].isna()
                else:
                    is_na_s = is_na_s & data_df_all[col_name].isna()
            data_df_not_null = data_df_all[~is_na_s]
            data_df_not_null.fillna(0, inplace=True)
            if data_df_not_null.shape[0] > 0:
                data_dic_list = data_df_not_null.to_dict(orient='records')
                sql_str = "update %s set " % table_name + \
                          ",".join(
                              ["%s=:%s" % (db_col_name, col_name.upper()) for col_name, db_col_name in
                               col_name_dic.items()]) + \
                          " where wind_code=:wind_code and trade_date=:trade_date"
                with with_db_session(engine_md) as session:
                    session.execute(sql_str, params=data_dic_list)
            logger.info('%d data updated on %s', data_df_not_null.shape[0], table_name)
        else:
            logger.warning('no data for updating on %s', table_name)
Пример #9
0
def import_cb_daily(chain_param=None, wind_code_set: set = None, begin_time=None):
    """
    导入可转债日线数据
    需要补充 转股价格
    :param chain_param:  在celery 中將前面結果做爲參數傳給後面的任務
    :return: 
    """
    table_name = "wind_convertible_bond_daily"
    info_table_name = "wind_convertible_bond_info"
    has_table = engine_md.has_table(table_name)
    col_name_param_list = [
        ('outstandingbalance', DOUBLE),
        ('clause_conversion2_bondlot', DOUBLE),
        ('clause_conversion2_bondproportion', DOUBLE),
        ('clause_conversion2_swapshareprice', DOUBLE),
        ('clause_conversion2_conversionproportion', DOUBLE),
        ('convpremium', DOUBLE),
        ('convpremiumratio', DOUBLE),
        ('convvalue', DOUBLE),
        ('convpe', DOUBLE),
        ('convpb', DOUBLE),
        ('underlyingpe', DOUBLE),
        ('underlyingpb', DOUBLE),
        ('diluterate', DOUBLE),
        ('ldiluterate', DOUBLE),
        ('open', DOUBLE),
        ('high', DOUBLE),
        ('low', DOUBLE),
        ('close', DOUBLE),
        ('volume', DOUBLE),
    ]
    wind_indictor_str = ",".join(col_name for col_name, _ in col_name_param_list)
    logging.info("更新 %s 开始", table_name)

    if has_table:
        sql_str = """
            SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
            FROM
            (
            SELECT info.wind_code, 
                ifnull(trade_date, ipo_date) date_frm, clause_conversion_2_swapshareenddate delist_date,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
            FROM 
                wind_convertible_bond_info info 
            LEFT OUTER JOIN
                (SELECT wind_code, adddate(max(trade_date),1) trade_date FROM {table_name} GROUP BY wind_code) daily
            ON info.wind_code = daily.wind_code
            ) tt
            WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) 
            ORDER BY wind_code""".format(table_name=table_name)
    else:
        logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, info_table_name)
        sql_str = """
            SELECT wind_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to
            FROM
            (
                SELECT info.wind_code, 
                    ipo_date date_frm, clause_conversion_2_swapshareenddate delist_date,
                    if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM wind_convertible_bond_info info 
            ) tt
            WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) 
            ORDER BY wind_code"""

    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 计算每只股票需要获取日线数据的日期区间
        # 获取date_from,date_to,将date_from,date_to做为value值
        code_date_range_dic = {
            wind_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to)
            for wind_code, date_from, date_to in table.fetchall() if
            wind_code_set is None or wind_code in wind_code_set}

    # 设置dtype
    dtype = {key: val for key, val in col_name_param_list}
    dtype['wind_code'] = String(20)
    dtype['trade_date'] = Date

    data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len(code_date_range_dic)
    # 获取股票量价等行情数据
    try:
        for num, (wind_code, (begin_time, end_time)) in enumerate(code_date_range_dic.items(), start=1):
            logger.debug('%d/%d) %s [%s - %s]', num, code_count, wind_code, begin_time, end_time)
            try:
                data_df = invoker.wsd(wind_code, wind_indictor_str, begin_time, end_time, "unit=1")
            except APIError as exp:
                if exp.ret_dic.setdefault('error_code', 0) in (
                        -40520007,  # 没有可用数据
                        -40521009,  # 数据解码失败。检查输入参数是否正确,如:日期参数注意大小月月末及短二月
                ):
                    logger.warning("%d/%d) %s 执行异常 %s", num, code_count, wind_code, exp.ret_dic)
                    continue
                else:
                    logger.exception("%d/%d) %s 执行异常", num, code_count, wind_code)
                    break
            if data_df is None:
                logger.warning('%d/%d) %s has no data during %s %s',
                               num, code_count, wind_code, begin_time, end_time)
                continue
            if data_df is not None and data_df.shape[0] > 0:
                data_df['wind_code'] = wind_code
                data_df.index.rename('trade_date', inplace=True)
                data_df.reset_index(inplace=True)
                data_df.rename(columns={col: col.lower() for col in data_df.columns}, inplace=True)
                data_df_list.append(data_df)
                data_count += data_df.shape[0]
            # 大于阀值有开始插入
            if data_count >= 10000:
                data_df_all = pd.concat(data_df_list)
                data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype)
                logging.info("更新 %s %d 条信息", table_name, data_count)
                tot_data_count += data_count
                data_df_list, data_count = [], 0

            # 仅调试使用
            if DEBUG and len(data_df_list) > 1:
                break
    finally:
        # 导入数据库
        if len(data_df_list) > 0:
            data_df_all = pd.concat(data_df_list)
            data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype)
            logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count)
            if not has_table and engine_md.has_table(table_name):
                alter_table_2_myisam(engine_md, [table_name])
                build_primary_key([table_name])
Пример #10
0
def import_index_daily(chain_param=None):
    """导入指数数据
    :param chain_param:  在celery 中將前面結果做爲參數傳給後面的任務
    :return:
    """
    table_name = "wind_index_daily"
    has_table = engine_md.has_table(table_name)
    col_name_param_list = [
        ('open', DOUBLE),
        ('high', DOUBLE),
        ('low', DOUBLE),
        ('close', DOUBLE),
        ('volume', DOUBLE),
        ('amt', DOUBLE),
        ('turn', DOUBLE),
        ('free_turn', DOUBLE),
    ]
    wind_indictor_str = ",".join([key for key, _ in col_name_param_list])
    rename_col_dic = {key.upper(): key.lower() for key, _ in col_name_param_list}
    dtype = {key: val for key, val in col_name_param_list}
    dtype['wind_code'] = String(20)
    # TODO: 'trade_date' 声明为 Date 类型后,插入数据库会报错,目前原因不详,日后再解决
    # dtype['trade_date'] = Date,

    # yesterday = date.today() - timedelta(days=1)
    # date_ending = date.today() - ONE_DAY if datetime.now().hour < BASE_LINE_HOUR else date.today()
    # sql_str = """select wii.wind_code, wii.sec_name, ifnull(adddate(latest_date, INTERVAL 1 DAY), wii.basedate) date_from
    #     from wind_index_info wii left join
    #     (
    #         select wind_code,index_name, max(trade_date) as latest_date
    #         from wind_index_daily group by wind_code
    #     ) daily
    #     on wii.wind_code=daily.wind_code"""
    # with with_db_session(engine_md) as session:
    #     table = session.execute(sql_str)
    #     wind_code_date_from_dic = {wind_code: (sec_name, date_from) for wind_code, sec_name, date_from in table.fetchall()}
    # with with_db_session(engine_md) as session:
    #     # 获取市场有效交易日数据
    #     sql_str = "select trade_date from wind_trade_date where trade_date > '2005-1-1'"
    #     table = session.execute(sql_str)
    #     trade_date_sorted_list = [t[0] for t in table.fetchall()]
    #     trade_date_sorted_list.sort()
    # date_to = get_last(trade_date_sorted_list, lambda x: x <= date_ending)
    # data_len = len(wind_code_date_from_dic)
    if has_table:
        sql_str = """
              SELECT wind_code, date_frm, if(null<end_date, null, end_date) date_to
              FROM
              (
                  SELECT info.wind_code, ifnull(trade_date, basedate) date_frm, null,
                  if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                  FROM 
                      wind_index_info info 
                  LEFT OUTER JOIN
                      (SELECT wind_code, adddate(max(trade_date),1) trade_date FROM {table_name} GROUP BY wind_code) daily
                  ON info.wind_code = daily.wind_code
              ) tt
              WHERE date_frm <= if(null<end_date, null, end_date) 
              ORDER BY wind_code""".format(table_name=table_name)
    else:
        logger.warning('%s 不存在,仅使用 wind_index_info 表进行计算日期范围', table_name)
        sql_str = """
              SELECT wind_code, date_frm, if(null<end_date, null, end_date) date_to
              FROM
              (
                  SELECT info.wind_code, basedate date_frm, null,
                  if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                  FROM wind_index_info info 
              ) tt
              WHERE date_frm <= if(null<end_date, null, end_date) 
              ORDER BY wind_code;"""

    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 获取每只股票需要获取日线数据的日期区间
        begin_time = None
        wind_code_date_from_dic = {
            wind_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to)
            for wind_code, date_from, date_to in table.fetchall() if
            wind_code_set is None or wind_code in wind_code_set}

    data_len = len(wind_code_date_from_dic)

    logger.info('%d indexes will been import', data_len)
    for data_num, (wind_code, (date_from, date_to)) in enumerate(wind_code_date_from_dic.items()):
        if str_2_date(date_from) > date_to:
            logger.warning("%d/%d) %s %s - %s 跳过", data_num, data_len, wind_code, date_from, date_to)
            continue
        try:
            temp = invoker.wsd(wind_code, wind_indictor_str, date_from, date_to)
        except APIError as exp:
            logger.exception("%d/%d) %s 执行异常", data_num, data_len, wind_code)
            if exp.ret_dic.setdefault('error_code', 0) in (
                    -40520007,  # 没有可用数据
                    -40521009,  # 数据解码失败。检查输入参数是否正确,如:日期参数注意大小月月末及短二月
            ):
                continue
            else:
                break
        temp.reset_index(inplace=True)
        temp.rename(columns={'index': 'trade_date'}, inplace=True)
        temp.rename(columns=rename_col_dic, inplace=True)
        temp.trade_date = temp.trade_date.apply(str_2_date)
        temp['wind_code'] = wind_code
        bunch_insert_on_duplicate_update(temp, table_name, engine_md, dtype=dtype)
        logger.info('更新指数 %s 至 %s 成功', wind_code, date_2_str(date_to))
        if not has_table and engine_md.has_table(table_name):
            alter_table_2_myisam(engine_md, [table_name])
            build_primary_key([table_name])
Пример #11
0
def import_smfund_daily(chain_param=None):
    """
    :param chain_param:  在celery 中將前面結果做爲參數傳給後面的任務
    :return:
    """
    table_name = "wind_smfund_daily"
    has_table = engine_md.has_table(table_name)
    col_name_param_list = [
        ('next_pcvdate', Date),
        ('a_nav', DOUBLE),
        ('b_nav', DOUBLE),
        ('a_fs_inc', DOUBLE),
        ('b_fs_inc', DOUBLE),
        ('cur_interest', DOUBLE),
        ('next_interest', DOUBLE),
        ('ptm_year', DOUBLE),
        ('anal_pricelever', DOUBLE),
        ('anal_navlevel', DOUBLE),
        ('t1_premium', DOUBLE),
        ('t2_premium', DOUBLE),
        ('dq_status', String(50)),
        ('tm_type', TEXT),
        ('code_p', String(20)),
        ('trade_date', Date),
        ('open', DOUBLE),
        ('high', DOUBLE),
        ('low', DOUBLE),
        ('close', DOUBLE),
        ('volume', DOUBLE),
        ('amt', DOUBLE),
        ('pct_chg', DOUBLE),
        ('open_a', DOUBLE),
        ('high_a', DOUBLE),
        ('low_a', DOUBLE),
        ('close_a', DOUBLE),
        ('volume_a', DOUBLE),
        ('amt_a', DOUBLE),
        ('pct_chg_a', DOUBLE),
        ('open_b', DOUBLE),
        ('high_b', DOUBLE),
        ('low_b', DOUBLE),
        ('close_b', DOUBLE),
        ('volume_b', DOUBLE),
        ('amt_b', DOUBLE),
        ('pct_chg_b', DOUBLE),
    ]
    # wset的调用参数
    wind_indictor_str = ",".join([key for key, _ in col_name_param_list[:14]])
    # 设置dtype类型
    dtype = {key: val for key, val in col_name_param_list}

    date_ending = date.today() - ONE_DAY if datetime.now(
    ).hour < BASE_LINE_HOUR else date.today()
    date_ending_str = date_ending.strftime('%Y-%m-%d')
    # 对于 表格是否存在进行判断,取值
    if has_table:
        sql_str = """
            SELECT wind_code, ifnull(date, fund_setupdate) date_start, class_a_code, class_b_code
            FROM wind_smfund_info fi LEFT OUTER JOIN
            (SELECT code_p, adddate(max(trade_date), 1) trade_date_max FROM wind_smfund_daily GROUP BY code_p) smd
            ON fi.wind_code = smd.code_p
            WHERE fund_setupdate IS NOT NULL
            AND class_a_code IS NOT NULL
            AND class_b_code IS NOT NULL"""
    else:
        sql_str = """
            SELECT wind_code, ifnull(date, fund_setupdate) date_start, class_a_code, class_b_code
            FROM wind_smfund_info
            WHERE fund_setupdate IS NOT NULL
            AND class_a_code IS NOT NULL
            AND class_b_code IS NOT NULL"""
    df = pd.read_sql(sql_str, engine_md)
    df.set_index('wind_code', inplace=True)

    data_len = df.shape[0]
    logger.info('分级基金数量: %d', data_len)
    index_start = 1
    # 获取data_from
    for data_num, wind_code in enumerate(
            df.index, start=1):  # 可调整 # [100:min([df_count, 200])]
        if data_num < index_start:
            continue
        logger.info('%d/%d) %s start to import', data_num, data_len, wind_code)
        date_from = df.loc[wind_code, 'date_start']
        date_from = str_2_date(date_from)
        if type(date_from) not in (date, datetime, Timestamp):
            logger.info('%d/%d) %s has no fund_setupdate will be ignored',
                        data_num, data_len, wind_code)
            # print(df.iloc[i, :])
            continue
        date_from_str = date_from.strftime('%Y-%m-%d')
        if date_from > date_ending:
            logger.info('%d/%d) %s %s %s 跳过', data_num, data_len, wind_code,
                        date_from_str, date_ending_str)
            continue
        # 设置wsd接口参数
        field = "open,high,low,close,volume,amt,pct_chg"
        # wsd_cache(w, code, field, beginTime, today, "")
        try:
            df_p = invoker.wsd(wind_code, field, date_from_str,
                               date_ending_str, "")
        except APIError as exp:
            logger.exception("%d/%d) %s 执行异常", data_num, data_len, wind_code)
            if exp.ret_dic.setdefault('error_code', 0) in (
                    -40520007,  # 没有可用数据
                    -40521009,  # 数据解码失败。检查输入参数是否正确,如:日期参数注意大小月月末及短二月
            ):
                continue
            else:
                break
        if df_p is None:
            continue
        df_p.rename(columns=lambda x: x.swapcase(), inplace=True)
        df_p['code_p'] = wind_code
        code_a = df.loc[wind_code, 'class_a_code']
        if code_a is None:
            print('%d %s has no code_a will be ignored' %
                  (data_num, wind_code))
            # print(df.iloc[i, :])
            continue
        # 获得数据存储到df_a里面
        # df_a = wsd_cache(w, code_a, field, beginTime, today, "")
        df_a = invoker.wsd(code_a, field, date_from_str, date_ending_str, "")
        df_a.rename(columns=lambda x: x.swapcase() + '_a', inplace=True)
        code_b = df.loc[wind_code, 'class_b_code']
        # df_b = wsd_cache(w, code_b, field, beginTime, today, "")
        # 获取接口数据 获得df_b
        df_b = invoker.wsd(code_b, field, date_from_str, date_ending_str, "")
        df_b.columns = df_b.columns.map(lambda x: x.swapcase() + '_b')
        new_df = pd.DataFrame()
        for date_str in df_p.index:
            # time = date_str.date().strftime('%Y-%m-%d')
            field = "date=%s;windcode=%s;field=%s" % (date_str, wind_code,
                                                      wind_indictor_str)
            # wset_cache(w, "leveragedfundinfo", field)
            temp = invoker.wset("leveragedfundinfo", field)
            temp['date'] = date_str
            new_df = new_df.append(temp)
            if DEBUG and len(new_df) > 8:
                break
        # 将获取信息进行表格联立 合并
        new_df['next_pcvdate'] = new_df['next_pcvdate'].map(
            lambda x: str_2_date(x) if x is not None else x)
        new_df.set_index('date', inplace=True)
        one_df = pd.concat([df_p, df_a, df_b, new_df], axis=1)
        one_df.index.rename('trade_date', inplace=True)
        one_df.reset_index(inplace=True)
        #    one_df['date'] = one_df['date'].map(lambda x: x.date())
        one_df.rename(columns={'date': 'trade_date'}, inplace=True)
        # one_df.rename(columns={"index":'trade_date'},inplace=True)
        # one_df.set_index(['code_p', 'trade_date'], inplace=True)
        bunch_insert_on_duplicate_update(one_df,
                                         table_name,
                                         engine_md,
                                         dtype=dtype)
        logger.info('%d/%d) %s import success', data_num, data_len, wind_code)
        if not has_table and engine_md.has_table(table_name):
            alter_table_2_myisam(engine_md, [table_name])
            # build_primary_key([table_name])
            # 手动创建主键, 主键不是wind_code
            create_pk_str = """ALTER TABLE {table_name}
                CHANGE COLUMN `code_p` `code_p` VARCHAR(20) NOT NULL ,
                CHANGE COLUMN `trade_date` `trade_date` DATE NOT NULL ,
                ADD PRIMARY KEY (`code_p`, `trade_date`)""".format(
                table_name=table_name)
            with with_db_session(engine_md) as session:
                session.execute(create_pk_str)
Пример #12
0
def import_pub_fund_daily(wind_code_set,chain_param=None):
    """
    导入公募基金日线数据
    :param chain_param:  在celery 中將前面結果做爲參數傳給後面的任務
    :return: 
    """
    logging.info("更新 wind_pub_fund_daily 开始")
    table_name = "wind_pub_fund_daily"
    has_table = engine_md.has_table(table_name)
    if has_table:
        sql_str = """
        SELECT wind_code, date_frm, if(fund_maturitydate<end_date, fund_maturitydate, end_date) date_to
            FROM
            (
            SELECT info.wind_code, ifnull(nav_date, fund_setupdate) date_frm, fund_maturitydate,
            if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
            FROM 
                wind_pub_fund_info info 
            LEFT OUTER JOIN
                (SELECT wind_code, adddate(max(nav_date),1) nav_date FROM {table_name} GROUP BY wind_code) daily
            ON info.wind_code = daily.wind_code
            ) tt
            WHERE date_frm <= if(fund_maturitydate<end_date, fund_maturitydate, end_date) 
            ORDER BY wind_code;""".format(table_name=table_name)
    else:
        logger.warning('wind_pub_fund_daily 不存在,仅使用 wind_pub_fund_info 表进行计算日期范围')
        sql_str = """
            SELECT wind_code, date_frm, if(fund_maturitydate<end_date, fund_maturitydate, end_date) date_to
            FROM
              (
                SELECT info.wind_code, fund_setupdate date_frm, fund_maturitydate,
                if(hour(now())<16, subdate(curdate(),1), curdate()) end_date
                FROM wind_pub_fund_info info 
              ) tt
            WHERE date_frm <= if(fund_maturitydate<end_date, fund_maturitydate, end_date) 
            ORDER BY wind_code
        """

    # with with_db_session(engine_md) as session:
    #     # 获取每只股票最新交易日数据
    #     sql_str = 'select wind_code, max(nav_date) from wind_pub_fund_daily group by wind_code'
    #     table = session.execute(sql_str)
    #     trade_date_latest_dic = dict(table.fetchall())
    #     # 获取市场有效交易日数据
    #     sql_str = "select trade_date from wind_trade_date where trade_date > '1997-1-1'"
    #     table = session.execute(sql_str)
    #     trade_date_sorted_list = [t[0] for t in table.fetchall()]
    #     trade_date_sorted_list.sort()
    #     # 获取每只股票上市日期、退市日期
    #     table = session.execute('SELECT wind_code, setup_date, maturity_date FROM wind_pub_fund_info')
    #
    #
    #     wind_code_date_dic = {
    #     wind_code: (setup_date, maturity_date if maturity_date is None or maturity_date > UN_AVAILABLE_DATE else None)
    #     for
    #     wind_code, setup_date, maturity_date in table.fetchall()}
    # date_ending = date.today() - ONE_DAY if datetime.now().hour < BASE_LINE_HOUR else date.today()
    with with_db_session(engine_md) as session:
        # 获取每只股票需要获取日线数据的日期区间
        table = session.execute(sql_str)
        # 计算每只股票需要获取日线数据的日期区间
        begin_time = None
        # 获取date_from,date_to,将date_from,date_to做为value值
        trade_date_latest_dic = {
            wind_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to)
            for wind_code, date_from, date_to in table.fetchall() if
            wind_code_set is None or wind_code in wind_code_set}
    data_df_list = []
    wind_code_date_count = len(trade_date_latest_dic)

    logger.info('%d pub fund will been import into wind_pub_fund_daily', wind_code_date_count)
    # 获取股票量价等行情数据
    field_col_name_list = [
        ('NAV_date', Date),
        ('NAV_acc', String(20)),
        ('netasset_total', String(20)),
    ]
    wind_indictor_str = ",".join(key.lower() for key, _ in field_col_name_list)
    upper_col_2_name_dic = {name.upper(): name.lower() for name, _ in field_col_name_list}
    dtype = {key.lower(): val for key, val in field_col_name_list}
    dtype['wind_code'] = String(20)
    try:
        data_tot = 0
        for data_num, (wind_code, (date_from, date_to)) in enumerate(trade_date_latest_dic.items()):
            # 初次加载阶段全量载入,以后 ipo_date为空的情况,直接warning跳过
            # if setup_date is None:
            #     # date_ipo = DATE_BASE
            #     logging.warning("%d/%d) %s 缺少 ipo date", data_num, wind_code_date_count, wind_code)
            #     continue
            # # 获取 date_from
            # if wind_code in trade_date_latest_dic:
            #     date_latest_t1 = trade_date_latest_dic[wind_code] + ONE_DAY
            #     date_from = max([date_latest_t1, DATE_BASE, setup_date])
            # else:
            #     date_from = max([DATE_BASE, setup_date])
            # date_from = get_first(trade_date_sorted_list, lambda x: x >= date_from)
            # # 获取 date_to
            # if maturity_date is None:
            #     date_to = date_ending
            # else:
            #     date_to = min([maturity_date, date_ending])
            # date_to = get_last(trade_date_sorted_list, lambda x: x <= date_to)
            # if date_from is None or date_to is None or date_from > date_to:
            #     continue
            try:
                data_df = invoker.wsd(wind_code, wind_indictor_str, date_from, date_to, "unit=1;Days=Weekdays")
            except APIError as exp:
                logger.exception("%d/%d) %s 执行异常", data_num, wind_code_date_count, wind_code)
                if exp.ret_dic.setdefault('error_code', 0) in (
                        -40520007,  # 没有可用数据
                        -40521009,  # 数据解码失败。检查输入参数是否正确,如:日期参数注意大小月月末及短二月
                ):
                    continue
                else:
                    break

            if data_df is None:
                logger.warning('%d/%d) %s has no ohlc data during %s %s', data_num, wind_code_date_count, wind_code,
                               date_from, date_to)
                continue
            # 对数据进行 清理,整理,整合
            data_df = data_df.drop_duplicates().dropna()
            data_df.rename(columns=upper_col_2_name_dic, inplace=True)
            logger.info('%d/%d) %d data of %s between %s and %s', data_num, wind_code_date_count, data_df.shape[0],
                        wind_code, date_from,
                        date_to)
            data_df['wind_code'] = wind_code
            data_tot += data_df.shape[0]
            data_df_list.append(data_df)
            if data_tot > 10000:
                bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype=dtype)
                data_df_list = []

                data_tot = 0
            # 仅仅调试时使用
            # if DEBUG and len(data_df) > 2000:
            #     break

    finally:
        # 导入数据库
        if len(data_df_list) > 0:
            bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype=dtype)
            logging.info("更新 wind_pub_fund_daily 结束 %d 条信息被更新", len(data_df_list))
            if not has_table and engine_md.has_table(table_name):
                alter_table_2_myisam(engine_md, [table_name])
                build_primary_key([table_name])