def import_index_info(chain_param=None, ths_code=None): """ 导入 info 表 :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用 :param ths_code: :param refresh: :return: """ table_name = 'ifind_index_info' has_table = engine_md.has_table(table_name) logging.info("更新 ifind_index_info 开始") if ths_code is None: # 获取全市场股票代码及名称 date_end = date.today() stock_code_set = set() stock_code_set_sub = get_stock_code_set(date_end) if stock_code_set_sub is not None: stock_code_set |= stock_code_set_sub ths_code = ','.join(stock_code_set) indicator_param_list = [ ('ths_index_short_name_index', '', String(20)), ('ths_index_code_index', '', String(10)), ('ths_index_category_index', '', String(20)), ('ths_index_base_period_index', '', Date), ('ths_index_base_point_index', '', DOUBLE), ('ths_publish_org_index', '', String(20)), ] # indicator' = 'ths_index_short_name_index;ths_index_code_index;ths_thscode_index;ths_index_category_index; # ths_index_base_period_index;ths_index_base_point_index;ths_publish_org_index', # param = ';;;;;;' indicator, param = unzip_join([(key, val) for key, val, _ in indicator_param_list], sep=';') data_df = invoker.THS_BasicData(ths_code, indicator, param) if data_df is None or data_df.shape[0] == 0: logging.info("没有可用的 index info 可以更新") return dtype = {key: val for key, _, val in indicator_param_list} dtype['ths_code'] = String(20) data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype) logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) # 更新 code_mapping 表 update_from_info_table(table_name)
def import_future_info(chain_param=None): """ 更新期货合约列表信息 :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用 :return: """ table_name = 'ifind_future_info' logger.info("更新 %s 开始", table_name) # 获取已存在合约列表 try: sql_str = 'SELECT ths_code, ths_start_trade_date_future FROM {table_name}'.format( table_name=table_name) with with_db_session(engine_md) as session: table = session.execute(sql_str) code_ipo_date_dic = dict(table.fetchall()) except: code_ipo_date_dic = {} # 通过wind获取合约列表 future_sectorid_dic_list = [ { 'subject_name': 'CFE 沪深300', 'regex': r"IF\d{4}\.CFE", 'sectorid': '091004001', 'date_establish': '2010-4-16' }, { 'subject_name': 'CFE 上证50', 'regex': r"IH\d{4}\.CFE", 'sectorid': '091004003', 'date_establish': '2015-4-16' }, { 'subject_name': 'CFE 中证500', 'regex': r"IC\d{4}\.CFE", 'sectorid': '091004004', 'date_establish': '2015-4-16' }, { 'subject_name': 'CFE 5年国债期货', 'regex': r"TF\d{4}\.CFE", 'sectorid': '091004002', 'date_establish': '2013-09-06' }, { 'subject_name': 'CFE 10年期国债期货', 'regex': r"T\d{4}\.CFE", 'sectorid': '091004005', 'date_establish': '2015-03-20' }, { 'subject_name': 'SHFE 黄金', 'regex': r"AU\d{4}\.SHF", 'sectorid': '091002002', 'date_establish': '2008-01-09' }, { 'subject_name': 'SHFE 沪银', 'regex': r"AG\d{4}\.SHF", 'sectorid': '091002010', 'date_establish': '2012-05-10' }, { 'subject_name': 'SHFE 螺纹钢', 'regex': r"RB\d{4}\.SHF", 'sectorid': '091002006', 'date_establish': '2009-03-27' }, { 'subject_name': 'SHFE 热卷', 'regex': r"HC\d{4}\.SHF", 'sectorid': '091002012', 'date_establish': '2014-03-21' }, { 'subject_name': 'DCE 焦炭', 'regex': r"J\d{4}\.SHF", 'sectorid': '091001004', 'date_establish': '2011-04-15' }, { 'subject_name': 'DCE 焦煤', 'regex': r"JM\d{4}\.SHF", 'sectorid': '091001010', 'date_establish': '2013-03-22' }, { 'subject_name': '铁矿石', 'regex': r"I\d{4}\.SHF", 'sectorid': '091001011', 'date_establish': '2013-10-18' }, { 'subject_name': '天然橡胶', 'regex': r"RU\d{4}\.SHF", 'sectorid': '091002007', 'date_establish': '1997-02-01' }, { 'subject_name': '铜', 'regex': r"CU\d{4}\.SHF", 'sectorid': '091002003', 'date_establish': '1997-02-01' }, { 'subject_name': '铝', 'regex': r"AL\d{4}\.SHF", 'sectorid': '091002001', 'date_establish': '1997-02-01' }, { 'subject_name': '锌', 'regex': r"ZN\d{4}\.SHF", 'sectorid': '091002009', 'date_establish': '2007-03-26' }, { 'subject_name': '铅', 'regex': r"PB\d{4}\.SHF", 'sectorid': '091002005', 'date_establish': '2011-03-24' }, { 'subject_name': '镍', 'regex': r"NI\d{4}\.SHF", 'sectorid': '091002014', 'date_establish': '2015-03-27' }, { 'subject_name': '锡', 'regex': r"SN\d{4}\.SHF", 'sectorid': '091002013', 'date_establish': '2015-03-27' }, { 'subject_name': '白糖', 'regex': r"SR\d{4}\.CZC", 'sectorid': '091003004', 'date_establish': '2006-01-06' }, { 'subject_name': '棉花', 'regex': r"CF\d{4}\.CZC", 'sectorid': '091003001', 'date_establish': '2004-06-01' }, { 'subject_name': '鲜苹果', 'regex': r"AP\d{4}\.CZC", 'sectorid': '091003019', 'date_establish': '2017-12-22' }, ] # 字段列表及参数 indicator_param_list = [ ('ths_future_short_name_future', '', String(20)), ('ths_future_code_future', '', String(20)), ('ths_sec_type_future', '', String(20)), ('ths_td_variety_future', '', String(20)), ('ths_td_unit_future', '', DOUBLE), ('ths_pricing_unit_future', '', String(20)), ('ths_mini_chg_price_future', '', DOUBLE), ('ths_chg_ratio_lmit_future', '', DOUBLE), ('ths_td_deposit_future', '', DOUBLE), ('ths_start_trade_date_future', '', Date), ('ths_last_td_date_future', '', Date), ('ths_last_delivery_date_future', '', Date), ('ths_delivery_month_future', '', String(10)), ('ths_listing_benchmark_price_future', '', DOUBLE), ('ths_initial_td_deposit_future', '', DOUBLE), ('ths_contract_month_explain_future', '', String(60)), ('ths_td_time_explain_future', '', String(80)), ('ths_last_td_date_explian_future', '', String(60)), ('ths_delivery_date_explain_future', '', String(60)), ('ths_exchange_short_name_future', '', String(20)), ('ths_contract_en_short_name_future', '', String(20)), ('ths_contract_en_name_future', '', String(20)), ] json_indicator, json_param = unzip_join( [(key, val) for key, val, _ in indicator_param_list], sep=';') # 设置 dtype dtype = {key: val for key, _, val in indicator_param_list} dtype['ths_code'] = String(20) # 获取合约列表 code_set = set() ndays_per_update = 60 # 获取历史期货合约列表信息 sector_count = len(future_sectorid_dic_list) for num, future_sectorid_dic in enumerate(future_sectorid_dic_list, start=1): subject_name = future_sectorid_dic['subject_name'] sector_id = future_sectorid_dic['sectorid'] regex_str = future_sectorid_dic['regex'] date_establish = datetime.strptime( future_sectorid_dic['date_establish'], STR_FORMAT_DATE).date() # 计算获取合约列表的起始日期 date_since = get_date_since(code_ipo_date_dic, regex_str, date_establish) date_yestoday = date.today() - timedelta(days=1) logger.debug('%d/%d) 获取 %s %s [%s - %s] 合约列表', num, sector_count, subject_name, sector_id, date_since, date_yestoday) while date_since <= date_yestoday: date_since_str = date_since.strftime(STR_FORMAT_DATE) # 获取合约列表 # w.wset("sectorconstituent","date=2017-05-02;sectorid=a599010205000000") # future_info_df = rest.wset("sectorconstituent", "date=%s;sectorid=%s" % (date_since_str, sector_id)) try: future_info_df = invoker.THS_DataPool( 'block', '%s;%s' % (date_since_str, sector_id), 'date:Y,thscode:Y,security_name:Y') except APIError: logger.exception('THS_DataPool %s 获取失败', '%s;%s' % (date_since_str, sector_id)) break if future_info_df is None or future_info_df.shape[0] == 0: break code_set |= set(future_info_df['THSCODE']) if date_since >= date_yestoday: break else: date_since += timedelta(days=ndays_per_update) if date_since > date_yestoday: date_since = date_yestoday if DEBUG: break # 获取合约列表 code_list = [wc for wc in code_set if wc not in code_ipo_date_dic] # 获取合约基本信息 if len(code_list) > 0: future_info_df = invoker.THS_BasicData(code_list, json_indicator, json_param) if future_info_df is None or future_info_df.shape[0] == 0: data_count = 0 logger.warning("更新 %s 结束 %d 条记录被更新", table_name, data_count) else: data_count = future_info_df.shape[0] # future_info_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) data_count = bunch_insert_on_duplicate_update( future_info_df, table_name, engine_md, dtype) logger.info("更新 %s 结束 %d 条记录被更新", table_name, data_count)
def import_index_daily_his(chain_param=None, ths_code_set: set = None, begin_time=None): """ 通过history接口将历史数据保存到 ifind_index_daily_his :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用 :param ths_code_set: :param begin_time: 默认为None,如果非None则代表所有数据更新日期不得晚于该日期 :return: """ table_name = 'ifind_index_daily_his' if begin_time is not None and type(begin_time) == date: begin_time = str_2_date(begin_time) # THS_HistoryQuotes('600006.SH,600010.SH', # 'preClose,open,high,low,close,avgPrice,changeRatio,volume,amount,turnoverRatio,transactionAmount,totalShares,totalCapital,floatSharesOfAShares,floatSharesOfBShares,floatCapitalOfAShares,floatCapitalOfBShares,pe_ttm,pe,pb,ps,pcf', # 'Interval:D,CPS:1,baseDate:1900-01-01,Currency:YSHB,fill:Previous', # '2018-06-30','2018-07-30') json_indicator, _ = unzip_join( [(key, val) for key, val, _ in INDICATOR_PARAM_LIST_INDEX_DAILY_HIS], sep=';') has_table = engine_md.has_table(table_name) if has_table: sql_str = """SELECT ths_code, date_frm, if(NULL<end_date, NULL, end_date) date_to FROM ( SELECT info.ths_code, ifnull(trade_date_max_1, ths_index_base_period_index) date_frm, NULL, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM ifind_index_info info LEFT OUTER JOIN (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM ifind_index_daily_his GROUP BY ths_code) daily ON info.ths_code = daily.ths_code ) tt WHERE date_frm <= if(NULL<end_date, NULL, end_date) ORDER BY ths_code;""" else: logger.warning('%s 不存在,仅使用 ifind_index_info 表进行计算日期范围', table_name) sql_str = """SELECT ths_code, date_frm, if(NULL<end_date, NULL, end_date) date_to FROM ( SELECT info.ths_code, ths_index_base_period_index date_frm, NULL, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM ifind_index_info info ) tt WHERE date_frm <= if(NULL<end_date, NULL, end_date) ORDER BY ths_code""" # 计算每只股票需要获取日线数据的日期区间 with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 code_date_range_dic = { ths_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ths_code, date_from, date_to in table.fetchall() if ths_code_set is None or ths_code in ths_code_set } if TRIAL: date_from_min = date.today() - timedelta(days=(365 * 5)) # 试用账号只能获取近5年数据 code_date_range_dic = { ths_code: (max([date_from, date_from_min]), date_to) for ths_code, (date_from, date_to) in code_date_range_dic.items() if date_to is not None and date_from_min <= date_to } data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len( code_date_range_dic) try: for num, (ths_code, (begin_time, end_time)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code, begin_time, end_time) data_df = invoker.THS_HistoryQuotes( ths_code, json_indicator, 'Interval:D,CPS:1,baseDate:1900-01-01,Currency:YSHB,fill:Previous', begin_time, end_time) if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 10000: data_count = bunch_insert_on_duplicate_update( data_df, table_name, engine_md, DTYPE_INDEX_DAILY_HIS) tot_data_count += data_count data_df_list, data_count = [], 0 # 仅调试使用 if DEBUG and len(data_df_list) > 5: break finally: if data_count > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_INDEX_DAILY_HIS) tot_data_count += data_count logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def import_index_daily_ds(chain_param=None, ths_code_set: set = None, begin_time=None): """ 通过date_serise接口将历史数据保存到 ifind_index_daily_ds,该数据作为 History数据的补充数据 例如:复权因子af、涨跌停标识、停牌状态、原因等 :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用 :param ths_code_set: :param begin_time: :return: """ table_name = 'ifind_index_daily_ds' has_table = engine_md.has_table(table_name) json_indicator, json_param = unzip_join( [(key, val) for key, val, _ in INDICATOR_PARAM_LIST_INDEX_DAILY_DS], sep=';') if has_table: sql_str = """SELECT ths_code, date_frm, if(NULL<end_date, NULL, end_date) date_to FROM ( SELECT info.ths_code, ifnull(trade_date_max_1, ths_index_base_period_index) date_frm, NULL, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM ifind_index_info info LEFT OUTER JOIN (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) daily ON info.ths_code = daily.ths_code ) tt WHERE date_frm <= if(NULL<end_date, NULL, end_date) ORDER BY ths_code""".format(table_name=table_name) else: sql_str = """SELECT ths_code, date_frm, if(NULL<end_date, NULL, end_date) date_to FROM ( SELECT info.ths_code, ths_index_base_period_index date_frm, NULL, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM ifind_index_info info ) tt WHERE date_frm <= if(NULL<end_date, NULL, end_date) ORDER BY ths_code;""" logger.warning('%s 不存在,仅使用 ifind_index_info 表进行计算日期范围' % table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 获取每只股票需要获取日线数据的日期区间 code_date_range_dic = { ths_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ths_code, date_from, date_to in table.fetchall() if ths_code_set is None or ths_code in ths_code_set } if TRIAL: date_from_min = date.today() - timedelta(days=(365 * 5)) # 试用账号只能获取近5年数据 code_date_range_dic = { ths_code: (max([date_from, date_from_min]), date_to) for ths_code, (date_from, date_to) in code_date_range_dic.items() if date_to is not None and date_from_min <= date_to } data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len( code_date_range_dic) try: for num, (ths_code, (begin_time, end_time)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code, begin_time, end_time) end_time = date_2_str(end_time) data_df = invoker.THS_DateSerial( ths_code, json_indicator, json_param, 'Days:Tradedays,Fill:Previous,Interval:D', begin_time, end_time) if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 10000: data_df_all = pd.concat(data_df_list) # data_df_all.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_INDEX_DAILY_DS) tot_data_count += data_count data_df_list, data_count = [], 0 # 仅调试使用 if DEBUG and len(data_df_list) > 1: break finally: if data_count > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_INDEX_DAILY_DS) tot_data_count += data_count if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)
def import_stock_hk_fin_by_report_date_weekly(chain_param=None, ths_code_set: set = None, begin_time=None, refresh=False): """ 通过date_serise接口将历史数据保存到 import_stock_hk_fin 该数据作为 为周度获取 以财务报表发布日期为进准,[ 财务报表发布日-14天 ~ 财务报表发布日],周度获取财务数据 :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用 :param ths_code_set: :param begin_time: :param refresh: 全部刷新 :return: """ table_name = 'ifind_stock_hk_fin' info_table_name = 'ifind_stock_hk_info' # ths_cce_hks;ths_total_liab_hks;ths_ebit_ttm_hks # jsonparam='2013,100,OC;2013,100,OC;OC,101' json_indicator, json_param = unzip_join( [(key, val) for key, val, _ in INDICATOR_PARAM_LIST_STOCK_HK_FIN], sep=';') has_table = engine_md.has_table(table_name) ths_code_report_date_str = """select distinct ths_code, subdate(report_date, 14), report_date from ( select ths_code, ths_perf_brief_actual_dd_hks report_date from ifind_stock_hk_report_date union select ths_code, ths_perf_report_actual_dd_hks report_date from ifind_stock_hk_report_date ) tt where report_date is not null order by ths_code, report_date""" if has_table: sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to FROM ( SELECT info.ths_code, ifnull(trade_date_max_1, ths_ipo_date_hks) date_frm, ths_stop_listing_date_hks, if(hour(now())<19, subdate(curdate(),1), curdate()) end_date FROM {info_table_name} info LEFT OUTER JOIN (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) daily ON info.ths_code = daily.ths_code ) tt WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) ORDER BY ths_code""".format(table_name=table_name, info_table_name=info_table_name) else: sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to FROM ( SELECT info.ths_code, ths_ipo_date_hks date_frm, ths_stop_listing_date_hks, if(hour(now())<19, subdate(curdate(),1), curdate()) end_date FROM {info_table_name} info ) tt WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) ORDER BY ths_code""".format(info_table_name=info_table_name) logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, info_table_name) with with_db_session(engine_md) as session: # 获取报告日-10天到报告日日期范围列表 table = session.execute(ths_code_report_date_str) ths_code_report_date_range_list_dic, ths_code_report_date_range_list_dic_tmp = {}, {} for ths_code, date_from, date_to in table.fetchall(): if ths_code_set is None or ths_code in ths_code_set: ths_code_report_date_range_list_dic_tmp.setdefault( ths_code, []).append((date_from, date_to)) # 获取每只股票需要获取日线数据的日期区间 if not refresh: # 如果全部刷新,则忽略 code_date_range_dic 的日期范围的限制 table = session.execute(sql_str) code_date_range_dic = { ths_code: (date_from if begin_time is None else min( [date_from, begin_time]), date_to) for ths_code, date_from, date_to in table.fetchall() if ths_code_set is None or ths_code in ths_code_set } if TRIAL: date_from_min = date.today() - timedelta(days=(365 * 5)) # 试用账号只能获取近5年数据 code_date_range_dic = { ths_code: (max([date_from, date_from_min]), date_to) for ths_code, (date_from, date_to) in code_date_range_dic.items() if date_from_min <= date_to } else: code_date_range_dic = {} # 合并重叠的日期 for ths_code, date_range_list in ths_code_report_date_range_list_dic_tmp.items( ): if not refresh and ths_code in code_date_range_dic: code_date_range = code_date_range_dic[ths_code] else: code_date_range = None # date_range_list 按照 起始日期 顺序排序,下层循环主要作用是将具有重叠日期的日期范围进行合并 date_range_list_new, date_from_last, date_to_last = [], None, None for date_from, date_to in date_range_list: if code_date_range is not None: # 如果全部刷新,则忽略 code_date_range_dic 的日期范围的限制 if not refresh and (date_to < code_date_range[0] or code_date_range[1] < date_from): continue if date_from_last is None: # 首次循环 设置 date_from_last date_from_last = date_from elif date_from < date_to_last: # 日期重叠,需要合并 pass else: # 日期未重叠,保存 range date_range_list_new.append((date_from_last, date_to_last)) date_from_last = date_from # 循环底部,设置 date_to_last date_to_last = date_to # 循环结束,保存 range if date_from_last is not None and date_to_last is not None: date_range_list_new.append((date_from_last, date_to_last)) if len(date_range_list_new) > 0: ths_code_report_date_range_list_dic[ths_code] = date_range_list_new data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len( ths_code_report_date_range_list_dic) try: for num, (ths_code, date_range_list) in enumerate( ths_code_report_date_range_list_dic.items(), start=1): for begin_time, end_time in date_range_list: logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code, begin_time, end_time) data_df = invoker.THS_DateSerial( ths_code, json_indicator, json_param, 'Days:Tradedays,Fill:Previous,Interval:W', begin_time, end_time) if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 仅调试使用 if DEBUG and len(data_df_list) > 0: break # 大于阀值有开始插入 if data_count >= 2000: tot_data_df = pd.concat(data_df_list) # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) bunch_insert_on_duplicate_update(tot_data_df, table_name, engine_md, DTYPE_STOCK_HK_FIN) tot_data_count += data_count data_df_list, data_count = [], 0 finally: if data_count > 0: tot_data_df = pd.concat(data_df_list) # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) bunch_insert_on_duplicate_update(tot_data_df, table_name, engine_md, DTYPE_STOCK_HK_FIN) tot_data_count += data_count logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def import_stock_hk_report_date(chain_param=None, ths_code_set: set = None, begin_time=None, interval='Q'): """ 通过date_serise接口将历史财务数据保存到 ifind_stock_fin,国内财务数据按季度发布,因此获取周期为季度(默认) :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用 :param ths_code_set: :param begin_time: :param interval: Q 季度 M 月 W 周 D 日 :return: """ table_name = 'ifind_stock_hk_report_date' info_table_name = 'ifind_stock_hk_info' has_table = engine_md.has_table(table_name) # jsonIndicator='ths_perf_briefing_fore_dsclsr_date_hks;ths_perf_brief_actual_dd_hks;ths_perf_report_foredsclsr_date_hks;ths_perf_report_actual_dd_hks' # jsonparam=';' json_indicator, json_param = unzip_join( [(key, val) for key, val, _ in INDICATOR_PARAM_LIST_STOCK_HK_REPORT_DATE], sep=';') if has_table: sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to FROM ( SELECT info.ths_code, ifnull(trade_date_max_1, ths_ipo_date_hks) date_frm, ths_stop_listing_date_hks, if(hour(now())<19, subdate(curdate(),1), curdate()) end_date FROM {info_table_name} info LEFT OUTER JOIN (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) daily ON info.ths_code = daily.ths_code ) tt WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) ORDER BY ths_code""".format(table_name=table_name, info_table_name=info_table_name) else: sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to FROM ( SELECT info.ths_code, ths_ipo_date_hks date_frm, ths_stop_listing_date_hks, if(hour(now())<19, subdate(curdate(),1), curdate()) end_date FROM {info_table_name} info ) tt WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) ORDER BY ths_code""".format(info_table_name=info_table_name) logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, info_table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) code_date_range_dic = { ths_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ths_code, date_from, date_to in table.fetchall() if ths_code_set is None or ths_code in ths_code_set } if TRIAL: date_from_min = date.today() - timedelta(days=(365 * 5)) # 试用账号只能获取近5年数据 code_date_range_dic = { ths_code: (max([date_from, date_from_min]), date_to) for ths_code, (date_from, date_to) in code_date_range_dic.items() if date_from_min <= date_to } data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len( code_date_range_dic) try: for num, (ths_code, (begin_time, end_time)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code, begin_time, end_time) data_df = invoker.THS_DateSerial( ths_code, json_indicator, json_param, "Days:Tradedays,Fill:Previous,Interval:{interval}".format( interval=interval), begin_time, end_time) if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 10000: data_df_all = pd.concat(data_df_list) # data_df_all.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_STOCK_HK_REPORT_DATE) tot_data_count += data_count data_df_list, data_count = [], 0 # 仅调试使用 if DEBUG and len(data_df_list) > 1: break finally: if data_count > 0: data_df_all = pd.concat(data_df_list) # data_df_all.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_STOCK_HK_REPORT_DATE) tot_data_count += data_count if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)
def import_stock_hk_fin_quarterly(chain_param=None, ths_code_set: set = None, begin_time=None): """ 通过date_serise接口将历史数据保存到 import_stock_hk_fin 该数据作为 为季度获取 :param ths_code_set: :param begin_time: :return: """ table_name = 'ifind_stock_hk_fin' info_table_name = 'ifind_stock_hk_info' # ths_cce_hks;ths_total_liab_hks;ths_ebit_ttm_hks # jsonparam='2013,100,OC;2013,100,OC;OC,101' json_indicator, json_param = unzip_join( [(key, val) for key, val, _ in INDICATOR_PARAM_LIST_STOCK_HK_FIN], sep=';') has_table = engine_md.has_table(table_name) if has_table: sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to FROM ( SELECT info.ths_code, ifnull(trade_date_max_1, ths_ipo_date_hks) date_frm, ths_stop_listing_date_hks, if(hour(now())<19, subdate(curdate(),1), curdate()) end_date FROM {info_table_name} info LEFT OUTER JOIN (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) daily ON info.ths_code = daily.ths_code ) tt WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) ORDER BY ths_code""".format(table_name=table_name, info_table_name=info_table_name) else: sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to FROM ( SELECT info.ths_code, ths_ipo_date_hks date_frm, ths_stop_listing_date_hks, if(hour(now())<19, subdate(curdate(),1), curdate()) end_date FROM {info_table_name} info ) tt WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) ORDER BY ths_code""".format(info_table_name=info_table_name) logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, info_table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) code_date_range_dic = { ths_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ths_code, date_from, date_to in table.fetchall() if ths_code_set is None or ths_code in ths_code_set } if TRIAL: date_from_min = date.today() - timedelta(days=(365 * 5)) # 试用账号只能获取近5年数据 code_date_range_dic = { ths_code: (max([date_from, date_from_min]), date_to) for ths_code, (date_from, date_to) in code_date_range_dic.items() if date_from_min <= date_to } data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len( code_date_range_dic) try: for num, (ths_code, (begin_time, end_time)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code, begin_time, end_time) data_df = invoker.THS_DateSerial( ths_code, json_indicator, json_param, 'Days:Tradedays,Fill:Previous,Interval:Q', begin_time, end_time) if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 仅调试使用 if DEBUG and len(data_df_list) > 0: break # 大于阀值有开始插入 if data_count >= 2000: tot_data_df = pd.concat(data_df_list) # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) bunch_insert_on_duplicate_update(tot_data_df, table_name, engine_md, DTYPE_STOCK_HK_FIN) tot_data_count += data_count data_df_list, data_count = [], 0 finally: if data_count > 0: tot_data_df = pd.concat(data_df_list) # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) bunch_insert_on_duplicate_update(tot_data_df, table_name, engine_md, DTYPE_STOCK_HK_FIN) tot_data_count += data_count logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def import_stock_hk_info(chain_param=None, ths_code=None, refresh=False): """ 导入 info 表 :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用 :param ths_code: :param refresh: :return: """ table_name = 'ifind_stock_hk_info' logging.info("更新 %s 开始", table_name) if ths_code is None: # 获取全市场港股代码及名称 if refresh: date_fetch = datetime.strptime('1991-02-01', STR_FORMAT_DATE).date() else: date_fetch = date.today() date_end = date.today() stock_hk_code_set = set() while date_fetch < date_end: stock_hk_code_set_sub = get_stock_hk_code_set(date_fetch) if stock_hk_code_set_sub is not None: stock_hk_code_set |= stock_hk_code_set_sub date_fetch += timedelta(days=365) stock_hk_code_set_sub = get_stock_hk_code_set(date_end) if stock_hk_code_set_sub is not None: stock_hk_code_set |= stock_hk_code_set_sub if DEBUG: stock_hk_code_set = list(stock_hk_code_set)[:10] ths_code = ','.join(stock_hk_code_set) indicator_param_list = [ ('ths_stock_short_name_hks', '', String(40)), ('ths_stock_code_hks', '', String(20)), ('ths_isin_code_hks', '', String(40)), ('ths_corp_ashare_short_name_hks', '', String(10)), ('ths_corp_ashare_code_hks', '', String(60)), ('ths_stock_varieties_hks', '', String(40)), ('ths_ipo_date_hks', '', Date), ('ths_listed_exchange_hks', '', String(60)), ('ths_stop_listing_date_hks', '', Date), ('ths_corp_cn_name_hks', '', String(120)), ('ths_corp_name_en_hks', '', String(120)), ('ths_established_date_hks', '', Date), ('ths_accounting_date_hks', '', String(20)), ('ths_general_manager_hks', '', String(40)), ('ths_secretary_hks', '', String(40)), ('ths_operating_scope_hks', '', Text), ('ths_mo_product_name_hks', '', String(200)), ('ths_district_hks', '', String(60)), ('ths_reg_address_hks', '', String(200)), ('ths_office_address_hks', '', String(200)), ('ths_corp_tel_hks', '', String(200)), ('ths_corp_fax_hks', '', String(200)), ('ths_corp_website_hks', '', String(200)), ('ths_auditor_hks', '', String(60)), ('ths_legal_counsel_hks', '', String(300)), ('ths_hs_industry_hks', '', String(40)), ] # jsonIndicator='ths_stock_short_name_hks;ths_stock_code_hks;ths_thscode_hks;ths_isin_code_hks;ths_corp_ashare_short_name_hks;ths_corp_ashare_code_hks;ths_stock_varieties_hks;ths_ipo_date_hks;ths_listed_exchange_hks;ths_stop_listing_date_hks;ths_corp_cn_name_hks;ths_corp_name_en_hks;ths_established_date_hks;ths_accounting_date_hks;ths_general_manager_hks;ths_secretary_hks;ths_operating_scope_hks;ths_mo_product_name_hks;ths_district_hks;ths_reg_address_hks;ths_office_address_hks;ths_corp_tel_hks;ths_corp_fax_hks;ths_corp_website_hks;ths_auditor_hks;ths_legal_counsel_hks;ths_hs_industry_hks' # jsonparam=';;;;;;;;;;;' indicator, param = unzip_join([(key, val) for key, val, _ in indicator_param_list], sep=';') param += '100' data_df = invoker.THS_BasicData(ths_code, indicator, param) if data_df is None or data_df.shape[0] == 0: logging.info("没有可用的 stock_hk info 可以更新") return # 删除历史数据,更新数据 has_table = engine_md.has_table(table_name) if has_table: with with_db_session(engine_md) as session: session.execute( "DELETE FROM {table_name} WHERE ths_code IN (".format( table_name=table_name) + ','.join( [':code%d' % n for n in range(len(stock_hk_code_set))]) + ")", params={ 'code%d' % n: val for n, val in enumerate(stock_hk_code_set) }) session.commit() dtype = {key: val for key, _, val in indicator_param_list} dtype['ths_code'] = String(20) # data_count = data_df.shape[0] # data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype) logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) # 更新 code_mapping 表 update_from_info_table(table_name)
def import_stock_hk_daily_ds(chain_param=None, ths_code_set: set = None, begin_time=None): """ 通过date_serise接口将历史数据保存到 ifind_stock_hk_daily_ds,该数据作为 History数据的补充数据 例如:复权因子af、涨跌停标识、停牌状态、原因等 :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用 :param ths_code_set: :param begin_time: :return: """ table_name = 'ifind_stock_hk_daily_ds' info_table_name = 'ifind_stock_hk_info' # jsonIndicator='ths_pre_close_stock;ths_open_price_stock;ths_high_price_stock;ths_low_stock;ths_close_price_stock;ths_chg_ratio_stock;ths_chg_stock;ths_vol_stock;ths_trans_num_stock;ths_amt_stock;ths_turnover_ratio_stock;ths_vaild_turnover_stock;ths_af_stock;ths_up_and_down_status_stock;ths_trading_status_stock;ths_suspen_reason_stock;ths_last_td_date_stock' # jsonparam='100;100;100;100;100;;100;100;;;;;;;;;' json_indicator, json_param = unzip_join( [(key, val) for key, val, _ in INDICATOR_PARAM_LIST_STOCK_HK_DAILY_DS], sep=';') has_table = engine_md.has_table(table_name) if has_table: sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to FROM ( SELECT info.ths_code, ifnull(trade_date_max_1, ths_ipo_date_hks) date_frm, ths_stop_listing_date_hks, if(hour(now())<19, subdate(curdate(),1), curdate()) end_date FROM {info_table_name} info LEFT OUTER JOIN (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) daily ON info.ths_code = daily.ths_code ) tt WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) ORDER BY ths_code""".format(table_name=table_name, info_table_name=info_table_name) else: sql_str = """SELECT ths_code, date_frm, if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) date_to FROM ( SELECT info.ths_code, ths_ipo_date_hks date_frm, ths_stop_listing_date_hks, if(hour(now())<19, subdate(curdate(),1), curdate()) end_date FROM {info_table_name} info ) tt WHERE date_frm <= if(ths_stop_listing_date_hks<end_date, ths_stop_listing_date_hks, end_date) ORDER BY ths_code""".format(info_table_name=info_table_name) logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, info_table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 获取每只股票需要获取日线数据的日期区间 code_date_range_dic = { ths_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ths_code, date_from, date_to in table.fetchall() if ths_code_set is None or ths_code in ths_code_set } if TRIAL: date_from_min = date.today() - timedelta(days=(365 * 5)) # 试用账号只能获取近5年数据 code_date_range_dic = { ths_code: (max([date_from, date_from_min]), date_to) for ths_code, (date_from, date_to) in code_date_range_dic.items() if date_from_min <= date_to } data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len( code_date_range_dic) try: for num, (ths_code, (begin_time, end_time)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code, begin_time, end_time) data_df = invoker.THS_DateSerial( ths_code, json_indicator, json_param, 'Days:Tradedays,Fill:Previous,Interval:D', begin_time, end_time) if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 仅调试使用 if DEBUG and len(data_df_list) > 0: break # 大于阀值有开始插入 if data_count >= 2000: tot_data_df = pd.concat(data_df_list) # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) bunch_insert_on_duplicate_update(tot_data_df, table_name, engine_md, DTYPE_STOCK_HK_DAILY_DS) tot_data_count += data_count data_df_list, data_count = [], 0 finally: if data_count > 0: tot_data_df = pd.concat(data_df_list) # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) bunch_insert_on_duplicate_update(tot_data_df, table_name, engine_md, DTYPE_STOCK_HK_DAILY_DS) tot_data_count += data_count logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def import_pub_fund_info(chain_param=None, ths_code=None, refresh=False): """ 导入 info 表 :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用 :param ths_code: :param refresh: :return: """ table_name = 'ifind_pub_fund_info' has_table = engine_md.has_table(table_name) logging.info("更新 %s 开始", table_name) if ths_code is None: # 获取全市场公募代码及名称 if refresh: date_fetch = datetime.strptime('1991-02-01', STR_FORMAT_DATE).date() else: date_fetch = date.today() date_end = date.today() pub_fund_code_set = set() # 直接获取全部基金包含已成立,已到期的基金,因此不再需要循环 # while date_fetch < date_end: # pub_fund_code_set_sub = get_pub_fund_code_set(date_fetch) # if pub_fund_code_set_sub is not None: # pub_fund_code_set |= pub_fund_code_set_sub # date_fetch += timedelta(days=365) pub_fund_code_set_sub = get_pub_fund_code_set(date_end) if pub_fund_code_set_sub is not None: pub_fund_code_set |= pub_fund_code_set_sub ths_code = list(pub_fund_code_set) if DEBUG: # 该变量仅用于调试期间使用 ths_code_old = ths_code ths_code = ths_code[:40] indicator_param_list = [ ('ths_fund_short_name_fund', '', String(40)), ('ths_fund_code_fund', '', String(40)), ('ths_fund_thscode_fund', '', String(40)), ('ths_fund_full_name_fund', '', String(80)), ('ths_invest_objective_fund', '', String(500)), ('ths_invest_socpe_fund', '', Text), ('ths_perf_comparative_benchmark_fund', '', Text), ('ths_fund_listed_exchange_fund', '', String(40)), ('ths_fund_td_currency_fund', '', String(60)), ('ths_coupon_value_fund', '', String(40)), ('ths_fund_manager_current_fund', '', String(40)), ('ths_fund_manager_his_fund', '', String(400)), ('ths_fund_supervisor_fund', '', String(40)), ('ths_fund_mandator_fund', '', String(20)), ('ths_fund_sponsor_related_org_fund', '', String(40)), ('ths_fund_type_fund', '', String(10)), ('ths_fund_invest_type_fund', '', String(10)), ('ths_invest_type_first_classi_fund', '', String(40)), ('ths_invest_type_second_classi_fund', '', String(40)), ('ths_galaxy_classi_fund', '', String(300)), ('ths_hts_classi_fund', '', String(100)), ('ths_invest_style_fund', '', String(100)), ('ths_fund_duration_fund', '', String(40)), ('ths_fund_establishment_date_fund', '', Date), ('ths_fund_expiry_date_fund', '', Date), ('ths_redemp_sd_fund', '', String(40)), ('ths_mandate_sd_fund', '', String(40)), ('ths_manage_fee_rate_fund', '', String(40)), ('ths_mandate_fee_rate_fund', '', String(40)), ('ths_sales_service_fee_fund', '', String(40)), ('ths_high_pur_fee_rate_fund', '', String(20)), ('ths_high_redemp_fee_rate_fund', '', String(40)), ('ths_lof_listed_date_fund', '', Date), ('ths_lof_listed_td_share_fund', '', String(40)), ('ths_pm_fund_code_fund', '', String(40)), ('ths_par_short_name_fund', '', String(40)), ('ths_online_cash_sell_code_fund', '', String(40)), ('ths_online_cash_pur_sd_fund', '', String(40)), ('ths_online_cash_pur_ed_fund', '', String(40)), ('ths_online_cash_buy_share_ul_fund', '', String(40)), ('ths_online_cash_buy_share_dl_fund', '', String(40)), ('ths_offline_cash_pur_sd_fund', '', String(40)), ('ths_offline_cash_pur_ed_fund', '', String(40)), ('ths_offline_stock_pur_sd_fund', '', String(40)), ('ths_offline_stock_pur_ed_fund', '', String(40)), ('ths_offline_stock_pur_vol_dl_fund', '', String(40)), ('ths_fund_shares_convert_date_fund', '', String(40)), ('ths_fund_shares_convert_ratio_fund', '', String(40)), ('ths_issue_date_fund', '', Date), ('ths_issue_object_fund', '', String(100)), ('ths_issue_method_fund', '', String(80)), ('ths_fund_reg_and_registrant_fund', '', String(40)), ('ths_fund_main_underwrite_fund', '', String(40)), ('ths_fund_issue_coordinator_fund', '', String(500)), ('ths_fund_sales_agent_fund', '', Text), ('ths_fund_listing_recommended_fund', '', String(40)) ] # jsonIndicator='ths_fund_short_name_fund;ths_fund_code_fund;ths_fund_thscode_fund;ths_fund_full_name_fund;ths_invest_objective_fund;ths_invest_socpe_fund;ths_perf_comparative_benchmark_fund;ths_fund_listed_exchange_fund;ths_fund_td_currency_fund;ths_coupon_value_fund;ths_fund_manager_current_fund;ths_fund_manager_his_fund;ths_fund_supervisor_fund;ths_fund_mandator_fund;ths_fund_sponsor_related_org_fund;ths_fund_type_fund;ths_fund_invest_type_fund;ths_invest_type_first_classi_fund;ths_invest_type_second_classi_fund;ths_galaxy_classi_fund;ths_hts_classi_fund;ths_invest_style_fund;ths_fund_duration_fund;ths_fund_establishment_date_fund;ths_fund_expiry_date_fund;ths_redemp_sd_fund;ths_mandate_sd_fund;ths_mandate_ed_fund;ths_manage_fee_rate_fund;ths_mandate_fee_rate_fund;ths_sales_service_fee_fund;ths_high_pur_fee_rate_fund;ths_high_redemp_fee_rate_fund;ths_lof_listed_date_fund;ths_lof_listed_td_share_fund;ths_pm_fund_code_fund;ths_par_short_name_fund;ths_online_cash_sell_code_fund;ths_online_cash_pur_sd_fund;ths_online_cash_pur_ed_fund;ths_online_cash_buy_share_ul_fund;ths_online_cash_buy_share_dl_fund;ths_offline_cash_pur_sd_fund;ths_offline_cash_pur_ed_fund;ths_offline_cash_pur_share_dl_fund;ths_offline_stock_pur_sd_fund;ths_offline_stock_pur_ed_fund;ths_offline_stock_pur_vol_dl_fund;ths_fund_shares_convert_date_fund;ths_fund_shares_convert_ratio_fund;ths_issue_date_fund;ths_issue_object_fund;ths_issue_method_fund;ths_fund_reg_and_registrant_fund;ths_fund_main_underwrite_fund;ths_fund_issue_coordinator_fund;ths_fund_sales_agent_fund;ths_fund_listing_recommended_fund' # jsonparam=';;;;;;;;;;;' indicator, param = unzip_join([(key, val) for key, val, _ in indicator_param_list], sep=';') data_df = invoker.THS_BasicData(ths_code, indicator, param, max_code_num=3000) if data_df is None or data_df.shape[0] == 0: logging.info("没有可用的 pub_fund info 可以更新") return # 删除历史数据,更新数据 # table_name_list = engine_md.table_names() # if table_name in table_name_list: # with with_db_session(engine_md) as session: # session.execute( # "DELETE FROM {table_name} WHERE ths_code IN (".format(table_name=table_name) + ','.join( # [':code%d' % n for n in range(len(pub_fund_code_set))] # ) + ")", # params={'code%d' % n: val for n, val in enumerate(pub_fund_code_set)}) # session.commit() dtype = {key: val for key, _, val in indicator_param_list} dtype['ths_code'] = String(20) # data_count = data_df.shape[0] # data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype) logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) # 更新 code_mapping 表 update_from_info_table(table_name)
def import_pub_fund_daily(chain_param=None, ths_code_set: set = None, begin_time=None): """ 通过history接口将历史数据保存到 ifind_pub_fund_daily :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用 :param ths_code_set: :param begin_time: 默认为None,如果非None则代表所有数据更新日期不得晚于该日期 :return: """ table_name = 'ifind_pub_fund_daily' has_table = engine_md.has_table(table_name) if begin_time is not None and type(begin_time) == date: begin_time = str_2_date(begin_time) indicator_param_list = [ ('netAssetValue', '', DOUBLE), ('adjustedNAV', '', DOUBLE), ('accumulatedNAV', '', DOUBLE) ] # THS_HistoryQuotes('600006.SH,600010.SH', # 'preClose,open,high,low,close,avgPrice,changeRatio,volume,amount,turnoverRatio,transactionAmount,totalShares,totalCapital,floatSharesOfAShares,floatSharesOfBShares,floatCapitalOfAShares,floatCapitalOfBShares,pe_ttm,pe,pb,ps,pcf', # 'Interval:D,CPS:1,baseDate:1900-01-01,Currency:YSHB,fill:Previous', # '2018-06-30','2018-07-30') json_indicator, _ = unzip_join([(key, val) for key, val, _ in indicator_param_list], sep=';') if has_table: sql_str = """SELECT ths_code, date_frm, if(ths_fund_expiry_date_fund<end_date, ths_fund_expiry_date_fund, end_date) date_to FROM ( SELECT info.ths_code, ifnull(trade_date_max_1, ths_lof_listed_date_fund) date_frm, ths_fund_expiry_date_fund, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM ifind_pub_fund_info info LEFT OUTER JOIN (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) daily ON info.ths_code = daily.ths_code ) tt WHERE date_frm <= if(ths_fund_expiry_date_fund<end_date, ths_fund_expiry_date_fund, end_date) ORDER BY ths_code""".format(table_name=table_name) else: logger.warning('%s 不存在,仅使用 ifind_pub_fund_info 表进行计算日期范围', table_name) sql_str = """SELECT ths_code, date_frm, if(ths_fund_expiry_date_fund<end_date, ths_fund_expiry_date_fund, end_date) date_to FROM ( SELECT info.ths_code, ths_lof_listed_date_fund date_frm, ths_fund_expiry_date_fund, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM ifind_pub_fund_info info ) tt WHERE date_frm <= if(ths_fund_expiry_date_fund<end_date, ths_fund_expiry_date_fund, end_date) ORDER BY ths_code""" # 计算每只股票需要获取日线数据的日期区间 with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 code_date_range_dic = { ths_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ths_code, date_from, date_to in table.fetchall() if ths_code_set is None or ths_code in ths_code_set} if TRIAL: date_from_min = date.today() - timedelta(days=(365 * 5)) # 试用账号只能获取近5年数据 code_date_range_dic = { ths_code: (max([date_from, date_from_min]), date_to) for ths_code, (date_from, date_to) in code_date_range_dic.items() if date_from_min <= date_to} # 设置 dtype dtype = {key: val for key, _, val in indicator_param_list} dtype['ths_code'] = String(20) dtype['time'] = Date data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len(code_date_range_dic) try: for num, (ths_code, (begin_time, end_time)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code, begin_time, end_time) data_df = invoker.THS_HistoryQuotes( ths_code, json_indicator, 'Interval:D,CPS:1,baseDate:1900-01-01,Currency:YSHB,fill:Previous', begin_time, end_time ) if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df.rename(columns={col: col.lower() for col in data_df.columns}, inplace=True) data_df_list.append(data_df) if DEBUG and len(data_df_list) > 1: break # 大于阀值有开始插入 if data_count >= 10000: tot_data_df = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update(tot_data_df, table_name, engine_md, dtype) tot_data_count += data_count data_df_list, data_count = [], 0 finally: if len(data_df_list) > 0: tot_data_df = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update(tot_data_df, table_name, engine_md, dtype) tot_data_count += data_count logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def import_stock_info(chain_param=None, ths_code=None, refresh=False): """ :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用 :param ths_code: :param refresh: :return: """ table_name = 'ifind_stock_info' has_table = engine_md.has_table(table_name) logging.info("更新 wind_stock_info 开始") if ths_code is None: # 获取全市场股票代码及名称 if refresh: date_fetch = datetime.strptime('1991-02-01', STR_FORMAT_DATE).date() else: date_fetch = date.today() date_end = date.today() stock_code_set = set() while date_fetch < date_end: stock_code_set_sub = get_stock_code_set(date_fetch) if stock_code_set_sub is not None: stock_code_set |= stock_code_set_sub date_fetch += timedelta(days=365) stock_code_set_sub = get_stock_code_set(date_end) if stock_code_set_sub is not None: stock_code_set |= stock_code_set_sub ths_code = ','.join(stock_code_set) indicator_param_list = [ ('ths_stock_short_name_stock', '', String(10)), ('ths_stock_code_stock', '', String(10)), ('ths_stock_varieties_stock', '', String(10)), ('ths_ipo_date_stock', '', Date), ('ths_listing_exchange_stock', '', String(10)), ('ths_delist_date_stock', '', Date), ('ths_corp_cn_name_stock', '', String(40)), ('ths_corp_name_en_stock', '', String(100)), ('ths_established_date_stock', '', Date), ] # jsonIndicator='ths_stock_short_name_stock;ths_stock_code_stock;ths_thscode_stock;ths_stock_varieties_stock;ths_ipo_date_stock;ths_listing_exchange_stock;ths_delist_date_stock;ths_corp_cn_name_stock;ths_corp_name_en_stock;ths_established_date_stock' # jsonparam=';;;;;;;;;' indicator, param = unzip_join([(key, val) for key, val, _ in indicator_param_list], sep=';') data_df = invoker.THS_BasicData(ths_code, indicator, param) if data_df is None or data_df.shape[0] == 0: logging.info("没有可用的 stock info 可以更新") return # 删除历史数据,更新数据 # with with_db_session(engine_md) as session: # session.execute( # "DELETE FROM {table_name} WHERE ths_code IN (".format(table_name=table_name) + ','.join( # [':code%d' % n for n in range(len(stock_code_set))] # ) + ")", # params={'code%d' % n: val for n, val in enumerate(stock_code_set)}) # session.commit() dtype = {key: val for key, _, val in indicator_param_list} dtype['ths_code'] = String(20) # data_count = data_df.shape[0] # data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype) logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) # 更新 code_mapping 表 update_from_info_table(table_name)
def import_private_fund_info(chain_param=None, ths_code=None, refresh=False): """ 更新基础信息表 :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用 :param ths_code: :param refresh: :return: """ table_name = 'ifind_private_fund_info' has_table = engine_md.has_table(table_name) logging.info("更新 %s 开始", table_name) if ths_code is None: # 获取全市场私募基金代码及名称 date_end = date.today() private_fund_set = set() if not refresh and has_table: sql_str = "select ths_code, ths_maturity_date_sp from {table_name}".format(table_name=table_name) with with_db_session(engine_md) as session: code_in_db_dict = dict(session.execute(sql_str).fetchall()) code_in_db_set = set(code_in_db_dict.keys()) else: code_in_db_dict, code_in_db_set = {}, set() # 查找新增基金 code_set_exists = get_private_fund_set(date_end) if code_set_exists is not None: if not refresh and has_table: code_set_exists -= code_in_db_set private_fund_set |= code_set_exists # 查找已清盘基金 code_set_clear = get_private_fund_set(date_end, field='051010005') if code_set_clear is not None: if not refresh and has_table: code_set_clear -= set([key for key, val in code_in_db_dict.items() if val is not None]) private_fund_set |= code_set_clear ths_code = list(private_fund_set) if DEBUG: ths_code = ths_code[:10] indicator_param_list = [ ('ths_product_short_name_sp', '', String(80)), ('ths_product_full_name_sp', '', String(80)), ('ths_trust_category_sp', '', String(40)), ('ths_is_structured_product_sp', '', String(10)), ('ths_threshold_amt_sp', '', Integer), ('ths_low_add_amt_sp', '', Integer), ('ths_fore_max_issue_scale_sp', '', String(40)), ('ths_actual_issue_scale_sp', '', String(40)), ('ths_invest_manager_current_sp', '', String(60)), ('ths_mendator_sp', '', String(20)), ('ths_recommend_sd_sp', '', Date), ('ths_introduction_ed_sp', '', Date), ('ths_established_date_sp', '', Date), ('ths_maturity_date_sp', '', Date), ('ths_found_years_sp', '', Date), ('ths_duration_y_sp', '', Integer), ('ths_remain_duration_d_sp', '', Integer), ('ths_float_manage_rate_sp', '', DOUBLE), ('ths_mandate_fee_rate_sp', '', DOUBLE), ('ths_subscription_rate_explain_sp', '', String(300)), ('ths_redemp_rate_explain_sp', '', String(300)), ('ths_opening_period_explain_sp', '', String(300)), ('ths_close_period_explain_sp', '', String(300)), ('ths_trustee_sp', '', String(100)), ('ths_secbroker_sp', '', String(40)) ] # jsonIndicator='THS_BasicData('SM000008.XT','ths_product_short_name_sp;ths_product_full_name_sp;ths_trust_category_sp;ths_is_structured_product_sp;ths_threshold_amt_sp;ths_low_add_amt_sp;ths_fore_max_issue_scale_sp;ths_actual_issue_scale_sp;ths_invest_manager_current_sp;ths_invest_advisor_sp;ths_mendator_sp;ths_recommend_sd_sp;ths_introduction_ed_sp;ths_established_date_sp;ths_maturity_date_sp;ths_found_years_sp;ths_duration_y_sp;ths_remain_duration_d_sp;ths_float_manage_rate_sp;ths_mandate_fee_rate_sp;ths_subscription_rate_explain_sp;ths_redemp_rate_explain_sp;ths_opening_period_explain_sp;ths_close_period_explain_sp;ths_trustee_sp;ths_secbroker_sp' # jsonparam=';;;;;;;;;' indicator, param = unzip_join([(key, val) for key, val, _ in indicator_param_list], sep=';') data_df = invoker.THS_BasicData(ths_code, indicator, param, max_code_num=8000) if data_df is None or data_df.shape[0] == 0: logging.info("没有可用的数据可以更新") return dtype = {key: val for key, _, val in indicator_param_list} dtype['ths_code'] = String(20) data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype) logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count) if not has_table: alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name]) # 更新 code_mapping 表 update_from_info_table(table_name)
def import_private_fund_daily(chain_param=None, ths_code_set: set = None, begin_time=None): """ 导入 daily 数据 :param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用 :param ths_code_set: :param begin_time: :return: """ table_name = 'ifind_private_fund_daily' indicator_param_list = [ ('netAssetValue', '', DOUBLE), ('adjustedNAV', '', DOUBLE), ('accumulatedNAV', '', DOUBLE), ('premium', '', DOUBLE), ('premiumRatio', '', DOUBLE), ('estimatedPosition', '', DOUBLE) ] # jsonIndicator='netAssetValue,adjustedNAV,accumulatedNAV,premium,premiumRatio,estimatedPosition' # jsonparam=';;;;' json_indicator, json_param = unzip_join([(key, val) for key, val, _ in indicator_param_list], sep=';') has_table = engine_md.has_table(table_name) if has_table: sql_str = """SELECT ths_code, date_frm, if(ths_maturity_date_sp<end_date, ths_maturity_date_sp, end_date) date_to FROM ( SELECT info.ths_code, ifnull(trade_date_max_1, ths_established_date_sp) date_frm, ths_maturity_date_sp, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM ifind_private_fund_info info LEFT OUTER JOIN (SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) daily ON info.ths_code = daily.ths_code ) tt WHERE date_frm <= if(ths_maturity_date_sp<end_date, ths_maturity_date_sp, end_date) ORDER BY ths_code""".format(table_name=table_name) else: logger.warning('ifind_private_fund_daily 不存在,仅使用 ifind_private_fund_info 表进行计算日期范围') sql_str = """SELECT ths_code, date_frm, if(ths_maturity_date_sp<end_date, ths_maturity_date_sp, end_date) date_to FROM ( SELECT info.ths_code, ths_established_date_sp date_frm, ths_maturity_date_sp, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM ifind_private_fund_info info ) tt WHERE date_frm <= if(ths_maturity_date_sp<end_date, ths_maturity_date_sp, end_date) ORDER BY ths_code""" with with_db_session(engine_md) as session: # 计算每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) code_date_range_dic = { ths_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ths_code, date_from, date_to in table.fetchall() if ths_code_set is None or ths_code in ths_code_set} if TRIAL: date_from_min = date.today() - timedelta(days=(365 * 5)) # 试用账号只能获取近5年数据 code_date_range_dic = { ths_code: (max([date_from, date_from_min]), date_to) for ths_code, (date_from, date_to) in code_date_range_dic.items() if date_from_min <= date_to} # 设置 dtype dtype = {key: val for key, _, val in indicator_param_list} dtype['ths_code'] = String(20) dtype['time'] = Date data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len(code_date_range_dic) try: for num, (ths_code, (begin_time, end_time)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code, begin_time, end_time) data_df = invoker.THS_HistoryQuotes( ths_code, json_indicator, json_param, begin_time, end_time ) if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 10000: tot_data_df = pd.concat(data_df_list) # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) data_count = bunch_insert_on_duplicate_update(tot_data_df, table_name, engine_md, dtype) tot_data_count += data_count data_df_list, data_count = [], 0 if DEBUG and len(data_df_list) > 1: break finally: if data_count > 0: tot_data_df = pd.concat(data_df_list) # tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype) data_count = bunch_insert_on_duplicate_update(tot_data_df, table_name, engine_md, dtype) tot_data_count += data_count logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count) if not has_table: alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])