def import_edb(chain_param=None, wind_code_set=None): """ 通过wind接口获取并导入EDB数据 :param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務 :return: """ table_name = 'wind_commodity_edb' has_table = engine_md.has_table(table_name) logging.info("更新 %s 开始", table_name) param_list = [ ('close', DOUBLE), ] rename_col_dic = {key.upper(): key.lower() for key, _ in param_list} # info_df = pd.read_sql('wind_commodity_info', engine_md) # 进行表格判断,确定是否含有 wind_commodity_edb if has_table: sql_str = """ SELECT `wind_code`, date_frm, if(end_date<end_date2, end_date, end_date2) date_to FROM ( SELECT info.`wind_code`, ifnull(trade_date, begin_date) date_frm, end_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date2 FROM wind_commodity_info info LEFT OUTER JOIN (SELECT `wind_code`, adddate(max(trade_date),1) trade_date FROM {table_name} GROUP BY `wind_code`) daily ON info.`wind_code` = daily.`wind_code` ) tt WHERE date_frm <= if(end_date<end_date2, end_date, end_date2) ORDER BY `wind_code`""".format(table_name=table_name) else: logger.warning('%s 不存在,仅使用 wind_commodity_info 表进行计算日期范围', table_name) sql_str = """ SELECT `wind_code`, date_frm, if(end_date<end_date2, end_date, end_date2) date_to FROM ( SELECT info.`wind_code`, begin_date date_frm, end_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date2 FROM wind_commodity_info info ) tt WHERE date_frm <= if(end_date<end_date2, end_date, end_date2) ORDER BY `wind_code`""" with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 begin_time = None # 获取date_from,date_to,将date_from,date_to做为value值 code_date_range_dic = { wind_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for wind_code, date_from, date_to in table.fetchall() if wind_code_set is None or wind_code in wind_code_set } # 设置 dtype dtype = {key: val for key, val in param_list} dtype['wind_code'] = String(20) dtype['trade_date'] = Date data_df_list = [] data_len = len(code_date_range_dic) logger.info('%d stocks will been import into wind_commodity_edb', data_len) # 将data_df数据,添加到data_df_list try: for num, (wind_code, (date_from, date_to)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, data_len, wind_code, date_from, date_to) try: data_df = invoker.edb(wind_code, date_from, date_to, options='') except APIError as exp: logger.exception("%d/%d) %s 执行异常", num, data_len, wind_code) if exp.ret_dic.setdefault('error_code', 0) in ( -40520007, # 没有可用数据 -40521009, # 数据解码失败。检查输入参数是否正确,如:日期参数注意大小月月末及短二月 ): continue else: break if data_df is None: logger.warning('%d/%d) %s has no data during %s %s', num, data_len, wind_code, date_from, date_to) continue logger.info('%d/%d) %d data of %s between %s and %s', num, data_len, data_df.shape[0], wind_code, date_from, date_to) data_df['wind_code'] = wind_code data_df.rename(columns={wind_code.upper(): 'value'}, inplace=True) data_df_list.append(data_df) # 仅调试使用 if DEBUG and len(data_df_list) > 2: break finally: # 导入数据库 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_df_all.index.rename('trade_date', inplace=True) data_df_all.reset_index(inplace=True) # data_df_all.rename(columns=rename_col_dic, inplace=True) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, dtype) logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) # build_primary_key([table_name]) create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `wind_code` wind_code` VARCHAR(20) NOT NULL FIRST, CHANGE COLUMN `trade_date` `trade_date` DATE NOT NULL AFTER `wind_code`, ADD PRIMARY KEY (`wind_code`, `trade_date`)""".format( table_name=table_name) with with_db_session(engine_md) as session: session.execute(create_pk_str) logger.info('%s 表 `wind_code` `trade_date` 主键设置完成', table_name)
def import_edb_monthly(): table_name = 'wind_edb_monthly' has_table = engine_md.has_table(table_name) PMI_FIELD_CODE_2_CN_DIC = { "M0017126": ("PMI", date(2005, 1, 1)), "M0017127": ("PMI:生产", date(2005, 1, 1)), "M0017128": ("PMI:新订单", date(2005, 1, 1)), "M0017129": ("PMI:新出口订单", date(2005, 1, 1)), "M0017130": ("PMI:在手订单", date(2005, 1, 1)), "M0017131": ("PMI:产成品库存", date(2005, 1, 1)), "M0017132": ("PMI:采购量", date(2005, 1, 1)), "M0017133": ("PMI:进口", date(2005, 1, 1)), "M5766711": ("PMI:出厂价格", date(2005, 1, 1)), "M0017134": ("PMI:主要原材料购进价格", date(2005, 1, 1)), "M0017135": ("PMI:原材料库存", date(2005, 1, 1)), "M0017136": ("PMI:从业人员", date(2005, 1, 1)), "M0017137": ("PMI:供货商配送时间", date(2005, 1, 1)), "M5207790": ("PMI:生产经营活动预期", date(2005, 1, 1)), "M5206738": ("PMI:大型企业", date(2005, 1, 1)), "M5206739": ("PMI:中型企业", date(2005, 1, 1)), "M5206740": ("PMI:小型企业", date(2005, 1, 1)), "M5407921": ("克强指数:当月值", date(2009, 7, 1)), "M0000612": ("CPI:当月同比", date(1990, 1, 1)), "M0000616": ("CPI:食品:当月同比", date(1990, 1, 1)), "M0000613": ("CPI:非食品:当月同比", date(1990, 1, 1)), "M0000614": ("CPI:消费品:当月同比", date(1990, 1, 1)), "M0000615": ("CPI:服务:当月同比", date(1990, 1, 1)), "M0000705": ("CPI:环比", date(1990, 1, 1)), "M0000706": ("CPI:食品:环比", date(1990, 1, 1)), "M0061581": ("CPI:非食品:环比", date(1990, 1, 1)), "M0061583": ("CPI:消费品:环比", date(1990, 1, 1)), "M0001227": ("PPI:全部工业品:当月同比", date(1996, 10, 1)), "M0061585": ("PPI:全部工业品:环比", date(2002, 1, 1)), "M0001228": ("PPI:生产资料:当月同比", date(1996, 10, 1)), "M0066329": ("PPI:生产资料:环比", date(2011, 1, 1)), "M0001232": ("PPI:生活资料:当月同比", date(1996, 10, 1)), "M0066333": ("PPI:生活资料:环比", date(2011, 1, 1)), } # 设置表属性类型 param_list = [ ('field_name', String(45)), ('trade_date', Date), ('val', DOUBLE), ] dtype = {key: val for key, val in param_list} dtype['field_code'] = String(20) data_len = len(PMI_FIELD_CODE_2_CN_DIC) if has_table: sql_str = """select field_code, max(trade_date) trade_date_max from wind_edb_monthly group by field_code""" else: sql_str = """ CREATE TABLE {table_name } ( `field_code` varchar(20) NOT NULL, `field_name` varchar(45) DEFAULT NULL, `trade_date` date NOT NULL, `val` double DEFAULT NULL, PRIMARY KEY (`field_code`,`trade_date`) ) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='保存wind edb 宏观经济数据'; """.format(table_name) # 获取数据库中最大日期 with with_db_session(engine_md) as session: table = session.execute(sql_str) field_date_dic = {row[0]: row[1] for row in table.fetchall()} # 循环更新 for data_num, (wind_code, (field_name, date_from)) in enumerate(PMI_FIELD_CODE_2_CN_DIC.items(), start=1): if wind_code in field_date_dic: date_from = field_date_dic[wind_code] + timedelta(days=1) date_to = date.today() - timedelta(days=1) logger.info('%d/%d) %s %s [%s %s]', data_num, data_len, wind_code, field_name, date_from, date_to) try: data_df = invoker.edb(wind_code, date_from, date_to, "Fill=Previous") except APIError as exp: logger.exception("%d/%d) %s 执行异常", data_num, data_len, wind_code) if exp.ret_dic.setdefault('error_code', 0) in ( -40520007, # 没有可用数据 -40521009, # 数据解码失败。检查输入参数是否正确,如:日期参数注意大小月月末及短二月 ): continue else: break if data_df is None or data_df.shape[0] == 0: continue trade_date_max = str_2_date(max(data_df.index)) if trade_date_max <= date_from: continue data_df.index.rename('trade_date', inplace=True) data_df.reset_index(inplace=True) data_df.rename(columns={wind_code.upper(): 'val'}, inplace=True) data_df['field_code'] = wind_code data_df['field_name'] = field_name # data_df.to_sql('wind_edb_monthly', engine_md, if_exists='append', index=False) bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype=dtype) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])