def min_to_vnpy_increment(chain_param=None, instrument_types=None): from tasks.config import config from tasks.backend import engine_dic table_name = 'dbbardata' interval = '1m' engine_vnpy = engine_dic[config.DB_SCHEMA_VNPY] has_table = engine_vnpy.has_table(table_name) if not has_table: logger.error('当前数据库 %s 没有 %s 表,建议使用 vnpy先建立相应的数据库表后再进行导入操作', engine_vnpy, table_name) return sql_increment_str = "select trade_datetime `datetime`, `open` open_price, high high_price, " \ "`low` low_price, `close` close_price, volume, position as open_interest " \ "from wind_future_min where wind_code = %s and " \ "trade_datetime > %s and `close` is not null and `close` <> 0" sql_whole_str = "select trade_datetime `datetime`, `open` open_price, high high_price, " \ "`low` low_price, `close` close_price, volume, position as open_interest " \ "from wind_future_min where wind_code = %s and " \ "`close` is not null and `close` <> 0" wind_code_list = get_wind_code_list_by_types(instrument_types) wind_code_count = len(wind_code_list) for n, wind_code in enumerate(wind_code_list, start=1): symbol, exchange = wind_code.split('.') if exchange in WIND_VNPY_EXCHANGE_DIC: exchange_vnpy = WIND_VNPY_EXCHANGE_DIC[exchange] else: logger.warning('%s exchange: %s 在交易所列表中不存在', wind_code, exchange) exchange_vnpy = exchange sql_str = f"select max(`datetime`) from {table_name} where symbol=:symbol and `interval`=:interval" with with_db_session(engine_vnpy) as session: datetime_exist = session.scalar(sql_str, params={ 'symbol': symbol, 'interval': interval }) if datetime_exist is not None: # 读取日线数据 df = pd.read_sql(sql_increment_str, engine_md, params=[wind_code, datetime_exist]).dropna() else: df = pd.read_sql(sql_whole_str, engine_md, params=[wind_code]).dropna() df_len = df.shape[0] if df_len == 0: continue df['symbol'] = symbol df['exchange'] = exchange_vnpy df['interval'] = interval datetime_latest = df['datetime'].max().to_pydatetime() df.to_sql(table_name, engine_vnpy, if_exists='append', index=False) logger.info("%d/%d) %s (%s ~ %s] %d data -> %s interval %s", n, wind_code_count, symbol, datetime_2_str(datetime_exist), datetime_2_str(datetime_latest), df_len, table_name, interval)
def min_to_vnpy(chain_param=None, instrument_types=None): from tasks.config import config from tasks.backend import engine_dic interval = '1m' table_name = 'dbbardata' engine_vnpy = engine_dic[config.DB_SCHEMA_VNPY] has_table = engine_vnpy.has_table(table_name) if not has_table: logger.error('当前数据库 %s 没有 %s 表,建议使用 vnpy先建立相应的数据库表后再进行导入操作', engine_vnpy, table_name) return code_list = get_code_list_by_types(instrument_types) code_count, do_count = len(code_list), 0 logger.info("导入分钟级数据到 vnpy 数据库,预计 %d 条记录", code_count) data_count = 0 for n, (order_book_id, exchange, symbol) in enumerate(code_list, start=1): # 读取k线数据 sql_str = "select trade_date `datetime`, `open` open_price, high high_price, " \ "`low` low_price, `close` close_price, volume, open_interest " \ "from rqdatac_future_min where order_book_id = %s and `close` is not null" df = pd.read_sql(sql_str, engine_md, params=[order_book_id]).dropna() df_len = df.shape[0] if df_len == 0: continue do_count += 1 df['symbol'] = symbol df['exchange'] = exchange df['interval'] = interval datetime_latest = df['datetime'].max().to_pydatetime() sql_str = f"select max(`datetime`) from {table_name} where symbol=:symbol and `interval`='{interval}'" del_sql_str = f"delete from {table_name} where symbol=:symbol and `interval`='{interval}'" with with_db_session(engine_vnpy) as session: datetime_exist = session.scalar(sql_str, params={'symbol': symbol}) if datetime_exist is not None: if datetime_exist >= datetime_latest: continue else: session.execute(del_sql_str, params={'symbol': symbol}) session.commit() df.to_sql(table_name, engine_vnpy, if_exists='append', index=False) logger.info( "%d/%d) %s %s -> %s %d data have been insert into table %s", n, code_count, symbol, datetime_2_str(datetime_exist), datetime_2_str(datetime_latest), df_len, table_name) data_count += df_len logger.info(f"全部 {do_count:,d} 个合约 {data_count:,d} 条数据插入完成")
def import_tushare_adj_factor(chain_param=None, ): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_daily_adj_factor' primary_keys = ["ts_code", "trade_date"] logging.info("更新 %s 开始", table_name) # 进行表格判断,确定是否含有 table_name has_table = engine_md.has_table(table_name) # sqlite_file_name = 'eDB_adjfactor.db' check_sqlite_db_primary_keys(table_name, primary_keys) if has_table: sql_str = """ select cal_date FROM ( select * from tushare_trade_date trddate where( cal_date>(SELECT max(trade_date) FROM {table_name})) )tt where (is_open=1 and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) and exchange='SSE') """.format(table_name=table_name) else: sql_str = """ SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) AND exchange='SSE') ORDER BY cal_date""" logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取交易日数据 table = session.execute(sql_str) trade_date_list = [row[0] for row in table.fetchall()] trade_date_count, data_count_tot = len(trade_date_list), 0 try: for num, trade_date in enumerate(trade_date_list, start=1): trade_date = datetime_2_str(trade_date, STR_FORMAT_DATE_TS) data_df = pro.adj_factor(ts_code='', trade_date=trade_date) if data_df is not None and data_df.shape[0] > 0: data_count = bunch_insert( data_df, table_name=table_name, dtype=DTYPE_TUSHARE_STOCK_DAILY_ADJ_FACTOR, primary_keys=primary_keys) data_count_tot += data_count logging.info("%d/%d) %s 表 %s %d 条信息被更新", num, trade_date_count, table_name, trade_date, data_count) else: logging.info("%d/%d) %s 表 %s 数据信息可被更新", num, trade_date_count, table_name, trade_date) except: logger.exception("更新 %s 异常", table_name) finally: logging.info("%s 表 %d 条记录更新完成", table_name, data_count_tot)
def import_tushare_namechange(chain_param=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_namechange' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) if has_table: sql_str = """select max(start_date) start_date FROM md_integration.tushare_stock_namechange""" else: sql_str = """select min(list_date) start_date FROM md_integration.tushare_stock_info""" with with_db_session(engine_md) as session: # 获取交易日数据 table = session.execute(sql_str) start_date = list(row[0] for row in table.fetchall()) start_date = datetime_2_str(start_date[0], STR_FORMAT_DATE_TS) end_date = datetime_2_str(date.today(), STR_FORMAT_DATE_TS) try: data_df = pro.namechange( start_date=start_date, end_date=end_date, fields='ts_code,name,start_date,end_date,change_reason') if len(data_df) > 0: data_count = bunch_insert_on_duplicate_update( data_df, table_name, engine_md, DTYPE_TUSHARE_STOCK_NAMECHANGE) logging.info("更新 %s 结束 %d 条上市公司更名信息被更新", table_name, data_count) else: logging.info("无数据信息可被更新") finally: if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) # build_primary_key([table_name]) create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `ts_code` `ts_code` VARCHAR(20) NOT NULL FIRST, CHANGE COLUMN `start_date` `start_date` DATE NOT NULL AFTER `ts_code`, ADD PRIMARY KEY (`ts_code`, `start_date`)""".format( table_name=table_name) with with_db_session(engine_md) as session: session.execute(create_pk_str) logger.info('%s 表 `ts_code`, `start_date` 主键设置完成', table_name)
def import_tushare_suspend(chain_param=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_daily_suspend' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_suspend # 下面一定要注意引用表的来源,否则可能是串,提取混乱!!!比如本表是tushare_daily_basic,所以引用的也是这个,如果引用错误,就全部乱了l if has_table: sql_str = """ select cal_date FROM ( select * from tushare_trade_date trddate where( cal_date>(SELECT max(suspend_date) FROM {table_name} )) )tt where (is_open=1 and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) and exchange='SSE') """.format(table_name=table_name) else: sql_str = """ SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) AND exchange='SSE') ORDER BY cal_date""" logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取交易日数据 table = session.execute(sql_str) trade_date_list = list(row[0] for row in table.fetchall()) try: trade_date_list_len = len(trade_date_list) for num, trade_date in enumerate(trade_date_list, start=1): trade_date = datetime_2_str(trade_date, STR_FORMAT_DATE_TS) data_df = pro.suspend(ts_code='', suspend_date=trade_date, resume_date='', fields='') if len(data_df) > 0: data_count = bunch_insert_p( data_df, table_name=table_name, dtype=DTYPE_TUSHARE_SUSPEND, primary_keys=['ts_code', 'suspend_date']) logging.info("%d/%d) %s 更新 %s 结束 %d 条信息被更新", num, trade_date_list_len, trade_date, table_name, data_count) else: logging.info("%s 当日无停牌股票", trade_date_list_len) except: logger.exception('更新 %s 表异常', table_name)
def import_tushare_adj_factor(chain_param=None, ): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_daily_adj_factor' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily # 下面一定要注意引用表的来源,否则可能是串,提取混乱!!!比如本表是tushare_daily_basic,所以引用的也是这个,如果引用错误,就全部乱了l if has_table: sql_str = """ select cal_date FROM ( select * from tushare_trade_date trddate where( cal_date>(SELECT max(trade_date) FROM {table_name})) )tt where (is_open=1 and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) and exchange='SSE') """.format(table_name=table_name) else: sql_str = """ SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) AND exchange='SSE') ORDER BY cal_date""" logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取交易日数据 table = session.execute(sql_str) trddate = list(row[0] for row in table.fetchall()) try: for i in range(len(trddate)): trade_date = datetime_2_str(trddate[i], STR_FORMAT_DATE_TS) data_df = pro.adj_factor(ts_code='', trade_date=trade_date) if len(data_df) > 0: data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, DTYPE_TUSHARE_STOCK_DAILY_ADJ_FACTOR) logging.info(" %s 表自 %s 日起的 %d 条信息被更新", table_name, trade_date, data_count) else: logging.info("无数据信息可被更新") finally: if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) # build_primary_key([table_name]) create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `ts_code` `ts_code` VARCHAR(20) NOT NULL FIRST, CHANGE COLUMN `trade_date` `trade_date` DATE NOT NULL AFTER `ts_code`, ADD PRIMARY KEY (`ts_code`, `trade_date`)""".format(table_name=table_name) with with_db_session(engine_md) as session: session.execute(create_pk_str) logger.info('%s 表 `ts_code`, `trade_date` 主键设置完成', table_name)
def import_tushare_daily_basic(chain_param=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_daily_basic' primary_keys = ["ts_code", "trade_date"] logging.info("更新 %s 开始", table_name) check_sqlite_db_primary_keys(table_name, primary_keys) has_table = engine_md.has_table(table_name) # 下面一定要注意引用表的来源,否则可能是串,提取混乱!!! # 比如本表是 tushare_daily_basic,所以引用的也是这个,如果引用错误,就全部乱了 if has_table: sql_str = """ select cal_date FROM ( select * from tushare_trade_date trddate where( cal_date>(SELECT max(trade_date) FROM {table_name} )) )tt where (is_open=1 and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) and exchange='SSE') """.format(table_name=table_name) else: sql_str = """ SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) AND exchange='SSE') ORDER BY cal_date""" logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取交易日数据 table = session.execute(sql_str) trade_date_list = list(row[0] for row in table.fetchall()) try: for_count = len(trade_date_list) for num, trade_date in enumerate(trade_date_list, start=1): trade_date = datetime_2_str(trade_date, STR_FORMAT_DATE_TS) data_df = invoke_daily_basic(ts_code='', trade_date=trade_date) if data_df is not None and data_df.shape[0] > 0: data_count = bunch_insert( data_df, table_name=table_name, dtype=DTYPE_TUSHARE_STOCK_DAILY_BASIC, primary_keys=primary_keys) logging.info("%d/%d) %s 更新 %s 结束 %d 条信息被更新", num, for_count, trade_date, table_name, data_count) else: logging.info("%d/%d) %s 无数据信息可被更新", num, for_count, trade_date) except: logger.exception("更新 %s 表异常", table_name)
def _test_account2(): """测试 plot_data 返回数据是否符合预期""" n_step = 60 ohlcav_col_name_list = ["open", "high", "low", "close", "amount", "volume"] from ibats_common.example.data import load_data md_df = load_data('RB.csv').set_index('trade_date')[ohlcav_col_name_list] md_df.index = pd.DatetimeIndex(md_df.index) from ibats_common.backend.factor import get_factor, transfer_2_batch factors_df = get_factor(md_df, dropna=True) df_index, df_columns, data_arr_batch = transfer_2_batch(factors_df, n_step=n_step) md_df = md_df.loc[df_index, :] shape = [ data_arr_batch.shape[0], 5, int(n_step / 5), data_arr_batch.shape[2] ] data_factors = np.transpose(data_arr_batch.reshape(shape), [0, 2, 3, 1]) print(data_arr_batch.shape, '->', shape, '->', data_factors.shape) # 建立 Account env = Account(md_df, data_factors) next_observation = env.reset() # 做空 env.step(2) for n in range(int(md_df.shape[0] / 2)): env.step(3) # 做多 next_observation, reward, done = env.step(1) while not done: next_observation, reward, done = env.step(3) # 展示结果 reward_df = env.plot_data() value_s = reward_df.iloc[:, 0] from ibats_utils.mess import datetime_2_str from datetime import datetime dt_str = datetime_2_str(datetime.now(), '%Y-%m-%d %H_%M_%S') title = f'test_account_{dt_str}' from ibats_common.analysis.plot import plot_twin plot_twin(value_s, md_df["close"], name=title)
def import_tdx_tick(): """ 通过pytdx接口下载tick数据 :return: """ table_name = 'pytdx_stock_tick' has_table = engine_md.has_table(table_name) if has_table: sql_str = """SELECT md.ts_code, md.trade_date FROM tushare_stock_daily_md md inner join ( select ts_code, delist_date from tushare_stock_info where tushare_stock_info.delist_date is null ) info on info.ts_code = md.ts_code left outer join tushare_stock_daily_suspend suspend on md.ts_code =suspend.ts_code and md.trade_date =suspend.suspend_date left outer join ( select ts_code,max(trade_date) trade_date_max from {table_name} group by ts_code ) m on md.ts_code = m.ts_code where md.trade_date>'2000-01-24' and suspend.suspend_date is null and (m.trade_date_max is null or md.trade_date>m.trade_date_max)""".format( table_name=table_name) else: # sql_str = """SELECT ts_code ,trade_date trade_date_list FROM tushare_stock_daily_md where trade_date>'2000-01-24'""" sql_str = """ SELECT md.ts_code, md.trade_date FROM tushare_stock_daily_md md INNER JOIN ( SELECT ts_code, delist_date FROM tushare_stock_info WHERE tushare_stock_info.delist_date IS NULL ) info ON info.ts_code = md.ts_code LEFT OUTER JOIN tushare_stock_daily_suspend suspend ON md.ts_code =suspend.ts_code AND md.trade_date =suspend.suspend_date WHERE md.trade_date>'2000-01-24' AND suspend.suspend_date IS NULL """ with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) code_date_range_dic = {} for ts_code, trade_date_list in table.fetchall(): # trade_date_list.sort() code_date_range_dic.setdefault(ts_code, []).append(trade_date_list) data_df_list, data_count, all_data_count, data_len = [], 0, 0, len( code_date_range_dic) logger.info('%d stocks will been import into pytdx_stock_tick', data_len) # 将data_df数据,添加到data_df_list Cycles = 1 try: for num, (index_code, trade_date_list) in enumerate(code_date_range_dic.items(), start=1): trade_date_list_len = len(trade_date_list) for i, trade_date in enumerate(trade_date_list): # trade_date=trade_date_list[i] logger.debug('%d/%d) %d/%d) %s [%s]', num, data_len, i, trade_date_list_len, index_code, trade_date) data_df = invoke_tdx_tick(code=index_code[0:6], date_str=datetime_2_str( trade_date, STR_FORMAT_DATE_TS)) # 把数据攒起来 if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 200000: data_df_all = pd.concat(data_df_list) bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, DTYPE_TDX_STOCK_TICK) all_data_count += data_count data_df_list, data_count = [], 0 finally: # 导入数据库 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_TDX_STOCK_TICK) all_data_count = all_data_count + data_count logging.info("更新 %s 结束 %d 条信息被更新", table_name, all_data_count)
def import_tushare_fut_wsr(chain_param=None, ts_code_set=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_fut_wsr' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily if has_table: sql_str = """ select cal_date FROM ( select * from tushare_future_trade_cal trddate where( cal_date>(SELECT max(trade_date) FROM {table_name})) )tt where (is_open=1 and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) ) """.format(table_name=table_name) else: sql_str = """ SELECT cal_date FROM tushare_future_trade_cal trddate WHERE (trddate.is_open=1 AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) AND cal_date>'19950414') ORDER BY cal_date""" logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取交易日数据 table = session.execute(sql_str) trddate = list(row[0] for row in table.fetchall()) # data_len = len(code_date_range_dic) data_df_list, data_count, all_data_count, data_len = [], 0, 0, len(trddate) logger.info('%d 日的期货仓单数据将被导入数据库', data_len) # 将data_df数据,添加到data_df_list fields = 'trade_date,symbol,fut_name,warehouse,wh_id,pre_vol,vol,vol_chg,area,year,grade,brand,place,pd,is_ct,unit,exchange' try: for i in range(len(trddate)): trade_date = datetime_2_str(trddate[i], STR_FORMAT_DATE_TS) data_df = invoke_fut_wsr(trade_date=trade_date, fields=fields) logging.info(" 提取 %s 日 %d 条期货仓单数据", trade_date, data_df.shape[0]) # 把数据攒起来 if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 1000: data_df_all = pd.concat(data_df_list) bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, DTYPE_TUSHARE_FUTURE_WSR) logging.info(" 更新%s表%d条期货仓单数据", table_name, data_count) all_data_count += data_count data_df_list, data_count = [], 0 finally: # 导入数据库 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_TUSHARE_FUTURE_WSR) all_data_count = all_data_count + data_count logging.info("更新 %s 结束 %d 条仓单信息被更新", table_name, all_data_count)
def import_tushare_stock_fina_indicator(chain_param=None, ts_code_set=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_fin_indicator' logging.info("更新 %s 开始", table_name) primary_keys = ['ts_code', 'ann_date', 'end_date'] has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily if has_table: sql_str = """ SELECT ts_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.ts_code, ifnull(ann_date, list_date) date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM tushare_stock_info info LEFT OUTER JOIN (SELECT ts_code, adddate(max(ann_date),1) ann_date FROM {table_name} GROUP BY ts_code) fina_indicator ON info.ts_code = fina_indicator.ts_code ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY ts_code""".format(table_name=table_name) else: sql_str = """ SELECT ts_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.ts_code, list_date date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM tushare_stock_info info ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY ts_code""" logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 begin_time = None # 获取date_from,date_to,将date_from,date_to做为value值 code_date_range_dic = { ts_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ts_code, date_from, date_to in table.fetchall() if ts_code_set is None or ts_code in ts_code_set } fields = 'ts_code', 'ann_date', 'end_date', 'eps', 'dt_eps', 'total_revenue_ps', 'revenue_ps', 'capital_rese_ps', 'surplus_rese_ps', \ 'undist_profit_ps', 'extra_item', 'profit_dedt', 'gross_margin', 'current_ratio', 'quick_ratio', 'cash_ratio', 'invturn_days', 'arturn_days', \ 'inv_turn', 'ar_turn', 'ca_turn', 'fa_turn', 'assets_turn', 'op_income', 'valuechange_income', 'interst_income', 'daa', 'ebit', 'ebitda', 'fcff', \ 'fcfe', 'current_exint', 'noncurrent_exint', 'interestdebt', 'netdebt', 'tangible_asset', 'working_capital', 'networking_capital', 'invest_capital', \ 'retained_earnings', 'diluted2_eps', 'bps', 'ocfps', 'retainedps', 'cfps', 'ebit_ps', 'fcff_ps', 'fcfe_ps', 'netprofit_margin', 'grossprofit_margin', \ 'cogs_of_sales', 'expense_of_sales', 'profit_to_gr', 'saleexp_to_gr', 'adminexp_of_gr', 'finaexp_of_gr', 'impai_ttm', 'gc_of_gr', 'op_of_gr', \ 'ebit_of_gr', 'roe', 'roe_waa', 'roe_dt', 'roa', 'npta', 'roic', 'roe_yearly', 'roa2_yearly', 'roe_avg', 'opincome_of_ebt', 'investincome_of_ebt', \ 'n_op_profit_of_ebt', 'tax_to_ebt', 'dtprofit_to_profit', 'salescash_to_or', 'ocf_to_or', 'ocf_to_opincome', 'capitalized_to_da', 'debt_to_assets', \ 'assets_to_eqt', 'dp_assets_to_eqt', 'ca_to_assets', 'nca_to_assets', 'tbassets_to_totalassets', 'int_to_talcap', 'eqt_to_talcapital', 'currentdebt_to_debt', \ 'longdeb_to_debt', 'ocf_to_shortdebt', 'debt_to_eqt', 'eqt_to_debt', 'eqt_to_interestdebt', 'tangibleasset_to_debt', 'tangasset_to_intdebt', \ 'tangibleasset_to_netdebt', 'ocf_to_debt', 'ocf_to_interestdebt', 'ocf_to_netdebt', 'ebit_to_interest', 'longdebt_to_workingcapital', 'ebitda_to_debt', \ 'turn_days', 'roa_yearly', 'roa_dp', 'fixed_assets', 'profit_prefin_exp', 'non_op_profit', 'op_to_ebt', 'nop_to_ebt', 'ocf_to_profit', 'cash_to_liqdebt', \ 'cash_to_liqdebt_withinterest', 'op_to_liqdebt', 'op_to_debt', 'roic_yearly', 'total_fa_trun', 'profit_to_op', 'q_opincome', 'q_investincome', 'q_dtprofit', \ 'q_eps', 'q_netprofit_margin', 'q_gsprofit_margin', 'q_exp_to_sales', 'q_profit_to_gr', 'q_saleexp_to_gr', 'q_adminexp_to_gr', 'q_finaexp_to_gr', \ 'q_impair_to_gr_ttm', 'q_gc_to_gr', 'q_op_to_gr', 'q_roe', 'q_dt_roe', 'q_npta', 'q_opincome_to_ebt', 'q_investincome_to_ebt', 'q_dtprofit_to_profit', \ 'q_salescash_to_or', 'q_ocf_to_sales', 'q_ocf_to_or', 'basic_eps_yoy', 'dt_eps_yoy', 'cfps_yoy', 'op_yoy', 'ebt_yoy', 'netprofit_yoy', 'dt_netprofit_yoy', \ 'ocf_yoy', 'roe_yoy', 'bps_yoy', 'assets_yoy', 'eqt_yoy', 'tr_yoy', 'or_yoy', 'q_gr_yoy', 'q_gr_qoq', 'q_sales_yoy', 'q_sales_qoq', 'q_op_yoy', 'q_op_qoq', \ 'q_profit_yoy', 'q_profit_qoq', 'q_netprofit_yoy', 'q_netprofit_qoq', 'equity_yoy', 'rd_exp' data_df_list, data_count, all_data_count, data_len = [], 0, 0, len( code_date_range_dic) logger.info('%d 财务指标信息将被插入 tushare_stock_fin_indicator 表', data_len) # 将data_df数据,添加到data_df_list Cycles = 1 try: for num, (ts_code, (date_from, date_to)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code, date_from, date_to) data_df = invoke_fina_indicator( ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS), fields=fields) # logger.info(' %d data of %s between %s and %s', df.shape[0], ts_code, date_from, date_to) if data_df is not None and len( data_df) > 0 and data_df['ann_date'].iloc[-1] is not None: while try_2_date(data_df['ann_date'].iloc[-1]) > date_from: last_date_in_df_last = try_2_date( data_df['ann_date'].iloc[-1]) df2 = invoke_fina_indicator( ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str( try_2_date(data_df['ann_date'].iloc[-1]) - timedelta(days=1), STR_FORMAT_DATE_TS), fields=fields) if len(df2) > 0 and df2['ann_date'].iloc[-1] is not None: last_date_in_df_cur = try_2_date( df2['ann_date'].iloc[-1]) if last_date_in_df_cur < last_date_in_df_last: data_df = pd.concat([data_df, df2]) elif last_date_in_df_cur == last_date_in_df_last: break elif len(df2) <= 0: break if data_df is None: logger.warning('%d/%d) %s has no data during %s %s', num, data_len, ts_code, date_from, date_to) continue elif data_df is not None: logger.info('%d/%d) %d 条 %s 财务指标已提取,起止时间 %s 和 %s', num, data_len, data_df.shape[0], ts_code, date_from, date_to) # 把数据攒起来 if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 1000 and len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert(data_df_all, table_name=table_name, dtype=DTYPE_STOCK_FINA_INDICATOR, primary_keys=primary_keys) all_data_count += data_count logger.info('%d 条财务指标将数据插入 %s 表', data_count, table_name) data_df_list, data_count = [], 0 # 仅调试使用 Cycles = Cycles + 1 if DEBUG and Cycles > 10: break finally: # 导入数据库 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert(data_df_all, table_name=table_name, dtype=DTYPE_STOCK_FINA_INDICATOR, primary_keys=primary_keys) all_data_count += data_count logging.info("更新 %s 结束 %d 条信息被更新", table_name, all_data_count)
def import_tushare_hsgt_top10(chain_param=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_hsgt_top10' logging.info("更新 %s 开始", table_name) param_list = [ ('trade_date', Date), ('ts_code', String(20)), ('name', String(20)), ('close', DOUBLE), ('change', DOUBLE), ('rank', Integer), ('market_type', String(20)), ('amount', DOUBLE), ('net_amount', DOUBLE), ('buy', DOUBLE), ('sell', DOUBLE), ] has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_daily_basic if has_table: sql_str = """ select cal_date FROM ( select * from tushare_trade_date trddate where( cal_date>(SELECT max(trade_date) FROM {table_name})) )tt where (is_open=1 and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) and exchange='SSE') """.format(table_name=table_name) else: sql_str = """ SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) AND exchange='SSE' AND cal_date>='2014-11-17') ORDER BY cal_date""" logger.warning('%s 不存在,仅使用 tushare_trade_date 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取交易日数据 table = session.execute(sql_str) trade_date_list = list(row[0] for row in table.fetchall()) # 设置 dtype dtype = {key: val for key, val in param_list} try: trade_date_list_len = len(trade_date_list) for num, trade_date in enumerate(trade_date_list, start=1): trade_date = datetime_2_str(trade_date, STR_FORMAT_DATE_TS) for market_type in list(['1', '3']): data_df = invoke_hsgt_top10(trade_date=trade_date, market_type=market_type) if len(data_df) > 0: data_count = bunch_insert_p( data_df, table_name=table_name, dtype=dtype, primary_keys=['ts_code', 'trade_date']) logging.info("%d/%d) %s更新 %s 结束 %d 条信息被更新", num, trade_date_list_len, trade_date, table_name, data_count) else: logging.info("无数据信息可被更新") break except: logger.exception('更新 %s 表异常', table_name)
def import_tushare_margin(chain_param=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_margin' logging.info("更新 %s 开始", table_name) param_list = [ ('trade_date', Date), ('exchange_id', String(20)), ('rzye', DOUBLE), ('rzmre', DOUBLE), ('rzche', DOUBLE), ('rqye', DOUBLE), ('rqmcl', DOUBLE), ('rzrqye', DOUBLE), ] has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_daily_basic if has_table: sql_str = """ select cal_date FROM ( select * from tushare_trade_date trddate where( cal_date>(SELECT max(trade_date) FROM {table_name})) )tt where (is_open=1 and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) and exchange='SSE') """.format( table_name=table_name) else: sql_str = """ SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) AND exchange='SSE' AND cal_date>='2010-03-31') ORDER BY cal_date""" logger.warning('%s 不存在,仅使用 tushare_trade_date 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取交易日数据 table = session.execute(sql_str) trade_date_list = list(row[0] for row in table.fetchall()) # 设置 dtype dtype = {key: val for key, val in param_list} try: trade_date_list_len = len(trade_date_list) for num, trade_date in enumerate(trade_date_list, start=1): trade_date = datetime_2_str(trade_date, STR_FORMAT_DATE_TS) for exchange_id in list(['SSE', 'SZSE']): data_df = invoke_margin(trade_date=trade_date, exchange_id=exchange_id) if len(data_df) > 0: # data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype) # logging.info("%s更新 %s %s 结束 %d 条信息被更新", trade_date, table_name, exchange_id, data_count) data_count = bunch_insert( data_df, table_name=table_name, dtype=dtype, primary_keys=['exchange_id', 'trade_date']) logging.info("%d/%d) %s %s 更新 %s 结束 %d 条信息被更新", num, trade_date_list_len, exchange_id, trade_date, table_name, data_count) else: logging.info("%d/%d) %s %s 无数据信息可被更新 %s", num, trade_date_list_len, exchange_id, trade_date, table_name) except: logger.exception('更新 %s 表异常', table_name)
def import_tushare_stock_fina_indicator(ts_code_set=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_fin_indicator' logging.info("更新 %s 开始", table_name) param_list = [ ('ts_code', String(20)), ('ann_date', Date), ('end_date', Date), ('eps', DOUBLE), ('dt_eps', DOUBLE), ('total_revenue_ps', DOUBLE), ('revenue_ps', DOUBLE), ('capital_rese_ps', DOUBLE), ('surplus_rese_ps', DOUBLE), ('undist_profit_ps', DOUBLE), ('extra_item', DOUBLE), ('profit_dedt', DOUBLE), ('gross_margin', DOUBLE), ('current_ratio', DOUBLE), ('quick_ratio', DOUBLE), ('cash_ratio', DOUBLE), ('invturn_days', DOUBLE), ('arturn_days', DOUBLE), ('inv_turn', DOUBLE), ('ar_turn', DOUBLE), ('ca_turn', DOUBLE), ('fa_turn', DOUBLE), ('assets_turn', DOUBLE), ('op_income', DOUBLE), ('valuechange_income', DOUBLE), ('interst_income', DOUBLE), ('daa', DOUBLE), ('ebit', DOUBLE), ('ebitda', DOUBLE), ('fcff', DOUBLE), ('fcfe', DOUBLE), ('current_exint', DOUBLE), ('noncurrent_exint', DOUBLE), ('interestdebt', DOUBLE), ('netdebt', DOUBLE), ('tangible_asset', DOUBLE), ('working_capital', DOUBLE), ('networking_capital', DOUBLE), ('invest_capital', DOUBLE), ('retained_earnings', DOUBLE), ('diluted2_eps', DOUBLE), ('bps', DOUBLE), ('ocfps', DOUBLE), ('retainedps', DOUBLE), ('cfps', DOUBLE), ('ebit_ps', DOUBLE), ('fcff_ps', DOUBLE), ('fcfe_ps', DOUBLE), ('netprofit_margin', DOUBLE), ('grossprofit_margin', DOUBLE), ('cogs_of_sales', DOUBLE), ('expense_of_sales', DOUBLE), ('profit_to_gr', DOUBLE), ('saleexp_to_gr', DOUBLE), ('adminexp_of_gr', DOUBLE), ('finaexp_of_gr', DOUBLE), ('impai_ttm', DOUBLE), ('gc_of_gr', DOUBLE), ('op_of_gr', DOUBLE), ('ebit_of_gr', DOUBLE), ('roe', DOUBLE), ('roe_waa', DOUBLE), ('roe_dt', DOUBLE), ('roa', DOUBLE), ('npta', DOUBLE), ('roic', DOUBLE), ('roe_yearly', DOUBLE), ('roa2_yearly', DOUBLE), ('roe_avg', DOUBLE), ('opincome_of_ebt', DOUBLE), ('investincome_of_ebt', DOUBLE), ('n_op_profit_of_ebt', DOUBLE), ('tax_to_ebt', DOUBLE), ('dtprofit_to_profit', DOUBLE), ('salescash_to_or', DOUBLE), ('ocf_to_or', DOUBLE), ('ocf_to_opincome', DOUBLE), ('capitalized_to_da', DOUBLE), ('debt_to_assets', DOUBLE), ('assets_to_eqt', DOUBLE), ('dp_assets_to_eqt', DOUBLE), ('ca_to_assets', DOUBLE), ('nca_to_assets', DOUBLE), ('tbassets_to_totalassets', DOUBLE), ('int_to_talcap', DOUBLE), ('eqt_to_talcapital', DOUBLE), ('currentdebt_to_debt', DOUBLE), ('longdeb_to_debt', DOUBLE), ('ocf_to_shortdebt', DOUBLE), ('debt_to_eqt', DOUBLE), ('eqt_to_debt', DOUBLE), ('eqt_to_interestdebt', DOUBLE), ('tangibleasset_to_debt', DOUBLE), ('tangasset_to_intdebt', DOUBLE), ('tangibleasset_to_netdebt', DOUBLE), ('ocf_to_debt', DOUBLE), ('ocf_to_interestdebt', DOUBLE), ('ocf_to_netdebt', DOUBLE), ('ebit_to_interest', DOUBLE), ('longdebt_to_workingcapital', DOUBLE), ('ebitda_to_debt', DOUBLE), ('turn_days', DOUBLE), ('roa_yearly', DOUBLE), ('roa_dp', DOUBLE), ('fixed_assets', DOUBLE), ('profit_prefin_exp', DOUBLE), ('non_op_profit', DOUBLE), ('op_to_ebt', DOUBLE), ('nop_to_ebt', DOUBLE), ('ocf_to_profit', DOUBLE), ('cash_to_liqdebt', DOUBLE), ('cash_to_liqdebt_withinterest', DOUBLE), ('op_to_liqdebt', DOUBLE), ('op_to_debt', DOUBLE), ('roic_yearly', DOUBLE), ('total_fa_trun', DOUBLE), ('profit_to_op', DOUBLE), ('q_opincome', DOUBLE), ('q_investincome', DOUBLE), ('q_dtprofit', DOUBLE), ('q_eps', DOUBLE), ('q_netprofit_margin', DOUBLE), ('q_gsprofit_margin', DOUBLE), ('q_exp_to_sales', DOUBLE), ('q_profit_to_gr', DOUBLE), ('q_saleexp_to_gr', DOUBLE), ('q_adminexp_to_gr', DOUBLE), ('q_finaexp_to_gr', DOUBLE), ('q_impair_to_gr_ttm', DOUBLE), ('q_gc_to_gr', DOUBLE), ('q_op_to_gr', DOUBLE), ('q_roe', DOUBLE), ('q_dt_roe', DOUBLE), ('q_npta', DOUBLE), ('q_opincome_to_ebt', DOUBLE), ('q_investincome_to_ebt', DOUBLE), ('q_dtprofit_to_profit', DOUBLE), ('q_salescash_to_or', DOUBLE), ('q_ocf_to_sales', DOUBLE), ('q_ocf_to_or', DOUBLE), ('basic_eps_yoy', DOUBLE), ('dt_eps_yoy', DOUBLE), ('cfps_yoy', DOUBLE), ('op_yoy', DOUBLE), ('ebt_yoy', DOUBLE), ('netprofit_yoy', DOUBLE), ('dt_netprofit_yoy', DOUBLE), ('ocf_yoy', DOUBLE), ('roe_yoy', DOUBLE), ('bps_yoy', DOUBLE), ('assets_yoy', DOUBLE), ('eqt_yoy', DOUBLE), ('tr_yoy', DOUBLE), ('or_yoy', DOUBLE), ('q_gr_yoy', DOUBLE), ('q_gr_qoq', DOUBLE), ('q_sales_yoy', DOUBLE), ('q_sales_qoq', DOUBLE), ('q_op_yoy', DOUBLE), ('q_op_qoq', DOUBLE), ('q_profit_yoy', DOUBLE), ('q_profit_qoq', DOUBLE), ('q_netprofit_yoy', DOUBLE), ('q_netprofit_qoq', DOUBLE), ('equity_yoy', DOUBLE), ('rd_exp', DOUBLE), ] sql_str = """SELECT ts_code,subdate(list_date,365*10) date_frm,list_date date_to FROM tushare_stock_info""" logger.warning('%s 打补丁,使用 tushare_stock_info 表进行计算需要补充提取的日期范围', table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 begin_time = None # 获取date_from,date_to,将date_from,date_to做为value值 code_date_range_dic = { ts_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ts_code, date_from, date_to in table.fetchall() if ts_code_set is None or ts_code in ts_code_set } # 设置 dtype dtype = {key: val for key, val in param_list} # dtype['ts_code'] = String(20) # dtype['trade_date'] = Date fields = 'ts_code', 'ann_date', 'end_date', 'eps', 'dt_eps', 'total_revenue_ps', 'revenue_ps', 'capital_rese_ps', \ 'surplus_rese_ps', 'undist_profit_ps', 'extra_item', 'profit_dedt', 'gross_margin', 'current_ratio', \ 'quick_ratio', 'cash_ratio', 'invturn_days', 'arturn_days', 'inv_turn', 'ar_turn', 'ca_turn', 'fa_turn', \ 'assets_turn', 'op_income', 'valuechange_income', 'interst_income', 'daa', 'ebit', 'ebitda', 'fcff', \ 'fcfe', 'current_exint', 'noncurrent_exint', 'interestdebt', 'netdebt', 'tangible_asset', \ 'working_capital', 'networking_capital', 'invest_capital', 'retained_earnings', 'diluted2_eps', 'bps', \ 'ocfps', 'retainedps', 'cfps', 'ebit_ps', 'fcff_ps', 'fcfe_ps', 'netprofit_margin', 'grossprofit_margin', \ 'cogs_of_sales', 'expense_of_sales', 'profit_to_gr', 'saleexp_to_gr', 'adminexp_of_gr', 'finaexp_of_gr', \ 'impai_ttm', 'gc_of_gr', 'op_of_gr', 'ebit_of_gr', 'roe', 'roe_waa', 'roe_dt', 'roa', 'npta', 'roic', \ 'roe_yearly', 'roa2_yearly', 'roe_avg', 'opincome_of_ebt', 'investincome_of_ebt', 'n_op_profit_of_ebt', \ 'tax_to_ebt', 'dtprofit_to_profit', 'salescash_to_or', 'ocf_to_or', 'ocf_to_opincome', \ 'capitalized_to_da', 'debt_to_assets', 'assets_to_eqt', 'dp_assets_to_eqt', 'ca_to_assets', \ 'nca_to_assets', 'tbassets_to_totalassets', 'int_to_talcap', 'eqt_to_talcapital', 'currentdebt_to_debt', \ 'longdeb_to_debt', 'ocf_to_shortdebt', 'debt_to_eqt', 'eqt_to_debt', 'eqt_to_interestdebt', \ 'tangibleasset_to_debt', 'tangasset_to_intdebt', 'tangibleasset_to_netdebt', 'ocf_to_debt', \ 'ocf_to_interestdebt', 'ocf_to_netdebt', 'ebit_to_interest', 'longdebt_to_workingcapital', \ 'ebitda_to_debt', 'turn_days', 'roa_yearly', 'roa_dp', 'fixed_assets', 'profit_prefin_exp', \ 'non_op_profit', 'op_to_ebt', 'nop_to_ebt', 'ocf_to_profit', 'cash_to_liqdebt', \ 'cash_to_liqdebt_withinterest', 'op_to_liqdebt', 'op_to_debt', 'roic_yearly', 'total_fa_trun', \ 'profit_to_op', 'q_opincome', 'q_investincome', 'q_dtprofit', 'q_eps', 'q_netprofit_margin', \ 'q_gsprofit_margin', 'q_exp_to_sales', 'q_profit_to_gr', 'q_saleexp_to_gr', 'q_adminexp_to_gr', \ 'q_finaexp_to_gr', 'q_impair_to_gr_ttm', 'q_gc_to_gr', 'q_op_to_gr', 'q_roe', 'q_dt_roe', 'q_npta', \ 'q_opincome_to_ebt', 'q_investincome_to_ebt', 'q_dtprofit_to_profit', 'q_salescash_to_or', \ 'q_ocf_to_sales', 'q_ocf_to_or', 'basic_eps_yoy', 'dt_eps_yoy', 'cfps_yoy', 'op_yoy', 'ebt_yoy', \ 'netprofit_yoy', 'dt_netprofit_yoy', 'ocf_yoy', 'roe_yoy', 'bps_yoy', 'assets_yoy', 'eqt_yoy', \ 'tr_yoy', 'or_yoy', 'q_gr_yoy', 'q_gr_qoq', 'q_sales_yoy', 'q_sales_qoq', 'q_op_yoy', 'q_op_qoq', \ 'q_profit_yoy', 'q_profit_qoq', 'q_netprofit_yoy', 'q_netprofit_qoq', 'equity_yoy', 'rd_exp' data_len = len(code_date_range_dic) logger.info('%d data will been import into %s', data_len, table_name) # 将data_df数据,添加到data_df_list Cycles = 1 try: for num, (ts_code, (date_from, date_to)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code, date_from, date_to) df = invoke_fina_indicator( ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS), fields=fields) # logger.info(' %d data of %s between %s and %s', df.shape[0], ts_code, date_from, date_to) data_df = df if data_df is not None and len(data_df) > 0: while try_2_date(df['ann_date'].iloc[-1]) > date_from: last_date_in_df_last, last_date_in_df_cur = try_2_date( df['ann_date'].iloc[-1]), None df2 = invoke_fina_indicator( ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str( try_2_date(df['ann_date'].iloc[-1]) - timedelta(days=1), STR_FORMAT_DATE_TS), fields=fields) if len(df2) > 0: last_date_in_df_cur = try_2_date( df2['ann_date'].iloc[-1]) if last_date_in_df_cur < last_date_in_df_last: data_df = pd.concat([data_df, df2]) df = df2 elif last_date_in_df_cur == last_date_in_df_last: break if data_df is None: logger.warning( '%d/%d) %s has no data during %s %s', num, data_len, ts_code, date_from, date_to) continue logger.info('%d/%d) %d data of %s between %s and %s', num, data_len, data_df.shape[0], ts_code, date_from, date_to) elif len(df2) <= 0: break # 数据插入数据库 data_count = bunch_insert_on_duplicate_update( data_df, table_name, engine_md, dtype) logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count) data_df = [] # 仅调试使用 Cycles = Cycles + 1 if DEBUG and Cycles > 10: break finally: # 导入数据库 if len(data_df) > 0: data_count = bunch_insert_on_duplicate_update( data_df, table_name, engine_md, dtype, myisam_if_create_table=True, primary_keys=['ts_code', 'ann_date', 'end_date'], schema=config.DB_SCHEMA_MD) logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count)
def import_tushare_top_list(chain_param=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_top_list' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) if has_table: sql_str = """ select cal_date FROM ( select * from tushare_trade_date trddate where( cal_date>(SELECT max(trade_date) FROM {table_name} )) )tt where (is_open=1 and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) and exchange='SSE') """.format(table_name=table_name) else: sql_str = """ SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) AND exchange='SSE' and cal_date>'2005-05-31') ORDER BY cal_date""" logger.warning('%s 不存在,仅使用 tushare_trade_date 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取交易日数据 table = session.execute(sql_str) trade_date_list = list(row[0] for row in table.fetchall()) # 定义相应的中间变量 data_df_list, data_count, all_data_count, data_len = [], 0, 0, len(trade_date_list) try: trade_date_list_len = len(trade_date_list) for num, trade_date in enumerate(trade_date_list, start=1): trade_date = datetime_2_str(trade_date, STR_FORMAT_DATE_TS) data_df = invoke_top_list(trade_date=trade_date) # 把数据攒起来 if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 2000: data_df_all = pd.concat(data_df_list) data_count = bunch_insert( data_df_all, table_name=table_name, dtype=DTYPE_TUSHARE_STOCK_TOP_LIST, primary_keys=['ts_code', 'trade_date', 'reason']) logging.info("%d/%d) 更新 %s 结束 ,截至%s日 %d 条信息被更新", num, trade_date_list_len, table_name, trade_date, all_data_count) all_data_count += data_count data_df_list, data_count = [], 0 except: logger.exception('更新 %s 表异常', table_name) finally: if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert( data_df_all, table_name=table_name, dtype=DTYPE_TUSHARE_STOCK_TOP_LIST, primary_keys=['ts_code', 'trade_date', 'reason']) all_data_count = all_data_count + data_count logging.info("更新 %s 结束 %d 条信息被更新", table_name, all_data_count)
def import_repurchase(chain_param=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_repurchase' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # 下面一定要注意引用表的来源,否则可能是串,提取混乱!!!比如本表是tushare_daily_basic,所以引用的也是这个,如果引用错误,就全部乱了l if has_table: sql_str = """ select * from (select * from tushare_trade_date trddate where (cal_date>(SELECT max(ann_date) FROM {table_name} )) )tt where (is_open=1 and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) and exchange='SSE') """.format(table_name=table_name) else: sql_str = """ SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) AND exchange='SSE') AND cal_date>'20120605' ORDER BY cal_date""" logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取交易日数据 table = session.execute(sql_str) ann_date_list = list(row[0] for row in table.fetchall()) logging.info("%d 个交易日的回购信息将被更新", len(ann_date_list)) data_df_list, data_count, all_data_count, data_len = [], 0, 0, len( ann_date_list) try: for i in range(len(ann_date_list)): ann_date = datetime_2_str(ann_date_list[i], STR_FORMAT_DATE_TS) data_df = invoke_repurchase(ann_date=ann_date) if data_df is not None and data_df.shape[0] > 0: logging.info("提取%s日%d条回购信息", ann_date, data_df.shape[0]) else: logging.info("%s日无股票回购公告", ann_date) # 把数据攒起来 if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 1000: data_df_all = pd.concat(data_df_list) bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, DTYPE_TUSHARE_REPURCHASE) logger.info('%d 条股票回购信息被插入 tushare_stock_repurchase 表', data_count) all_data_count += data_count data_df_list, data_count = [], 0 finally: if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_TUSHARE_REPURCHASE) all_data_count = all_data_count + data_count logging.info("更新 %s 结束 %d 条回购信息被更新", table_name, all_data_count)
def import_tushare_stock_fina_mainbz(chain_param=None, ts_code_set=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_fin_mainbz' logging.info("更新 %s 开始", table_name) # param_list = [ # ('ts_code', String(20)), # ('end_date', Date), # ('bz_item', String(200)), # ('bz_sales', DOUBLE), # ('bz_profit', DOUBLE), # ('bz_cost', DOUBLE), # ('curr_type', String(20)), # ('update_flag', String(20)), # ('market_type', String(20)), # ] has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily if has_table: sql_str = """ SELECT ts_code, date_frm, if(delist_date<end_date2, delist_date, end_date2) date_to FROM ( SELECT info.ts_code, ifnull(end_date, subdate(list_date,365*10)) date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date2 FROM tushare_stock_info info LEFT OUTER JOIN (SELECT ts_code, adddate(max(end_date),1) end_date FROM {table_name} GROUP BY ts_code) mainbz ON info.ts_code = mainbz.ts_code ) tt WHERE date_frm <= if(delist_date<end_date2, delist_date, end_date2) ORDER BY ts_code""".format(table_name=table_name) else: sql_str = """ SELECT ts_code, date_frm, if(delist_date<end_date2, delist_date, end_date2) date_to FROM ( SELECT info.ts_code, subdate(list_date,365*10) date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date2 FROM tushare_stock_info info ) tt WHERE date_frm <= if(delist_date<end_date2, delist_date, end_date2) ORDER BY ts_code """ logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 begin_time = None # 获取date_from,date_to,将date_from,date_to做为value值 code_date_range_dic = { ts_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ts_code, date_from, date_to in table.fetchall() if ts_code_set is None or ts_code in ts_code_set } # 设置 dtype # dtype = {key: val for key, val in param_list} # dtype['ts_code'] = String(20) # dtype['trade_date'] = Date data_df_list, data_count, all_data_count, data_len = [], 0, 0, len( code_date_range_dic) logger.info('%d data will been import into %s', data_len, table_name) # 将data_df数据,添加到data_df_list Cycles = 1 try: for num, (ts_code, (date_from, date_to)) in enumerate(code_date_range_dic.items(), start=1): for mainbz_type in list(['P', 'D']): logger.debug('%d/%d) %s [%s - %s] %s', num, data_len, ts_code, date_from, date_to, mainbz_type) data_df = invoke_fina_mainbz( ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS), type=mainbz_type) data_df['market_type'] = mainbz_type # logger.info(' %d data of %s between %s and %s', df.shape[0], ts_code, date_from, date_to) # data_df = df if data_df is not None and len(data_df) > 0: while try_2_date(data_df['end_date'].iloc[-1]) > date_from: last_date_in_df_last, last_date_in_df_cur = try_2_date( data_df['end_date'].iloc[-1]), None df2 = invoke_fina_mainbz( ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str( try_2_date(data_df['end_date'].iloc[-1]), STR_FORMAT_DATE_TS), type=mainbz_type) df2['market_type'] = mainbz_type if len(df2) > 0: last_date_in_df_cur = try_2_date( df2['end_date'].iloc[-1]) if last_date_in_df_cur < last_date_in_df_last: data_df = pd.concat([data_df, df2]) # df = df2 elif last_date_in_df_cur <= last_date_in_df_last: break elif len(df2) <= 0: break if data_df is None: logger.warning('%d/%d) %s 在 %s 到 %s 这段时间如数据', num, data_len, ts_code, date_from, date_to) continue elif data_df is not None: logger.info( '%d/%d), 提取出%d 条 %s 的主营业务数据,类型为%s,起止时间为 %s 和 %s', num, data_len, data_df.shape[0], ts_code, mainbz_type, date_from, date_to) # # 数据插入数据库 # data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype) # logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count) # 把数据攒起来 if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 100 and len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_TUSHARE_STOCK_FINA_MAINBZ, myisam_if_create_table=True, primary_keys=['ts_code', 'end_date', 'bz_item'], schema=config.DB_SCHEMA_MD) all_data_count += data_count data_df_list, data_count = [], 0 # 仅调试使用 Cycles = Cycles + 1 if DEBUG and Cycles > 2: break finally: # 导入数据库 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_TUSHARE_STOCK_FINA_MAINBZ, myisam_if_create_table=True, primary_keys=['ts_code', 'end_date', 'bz_item'], schema=config.DB_SCHEMA_MD) all_data_count = all_data_count + data_count if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) build_primary_key([table_name])
def import_tushare_stock_cashflow(ts_code_set=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_cashflow' logging.info("更新 %s 开始", table_name) sql_str = """SELECT ts_code,subdate(list_date,365*10) date_frm,list_date date_to FROM tushare_stock_info;""" logger.warning('%s 打补丁,使用 tushare_stock_info 表进行计算需要补充提取的日期范围', table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 begin_time = None # 获取date_from,date_to,将date_from,date_to做为value值 code_date_range_dic = { ts_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ts_code, date_from, date_to in table.fetchall() if ts_code_set is None or ts_code in ts_code_set } data_len = len(code_date_range_dic) logger.info('%d data will been import into %s', data_len, table_name) # 将data_df数据,添加到data_df_list Cycles = 1 try: for num, (ts_code, (date_from, date_to)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code, date_from, date_to) df = invoke_cashflow( ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS)) # logger.info(' %d data of %s between %s and %s', df.shape[0], ts_code, date_from, date_to) data_df = df if data_df is not None and len(data_df) > 0: while try_2_date(df['ann_date'].iloc[-1]) > date_from: last_date_in_df_last, last_date_in_df_cur = try_2_date( df['ann_date'].iloc[-1]), None df2 = invoke_cashflow( ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str( try_2_date(df['ann_date'].iloc[-1]) - timedelta(days=1), STR_FORMAT_DATE_TS)) if len(df2) > 0: last_date_in_df_cur = try_2_date( df2['ann_date'].iloc[-1]) if last_date_in_df_cur < last_date_in_df_last: data_df = pd.concat([data_df, df2]) df = df2 elif last_date_in_df_cur == last_date_in_df_last: break if data_df is None: logger.warning( '%d/%d) %s has no data during %s %s', num, data_len, ts_code, date_from, date_to) continue logger.info('%d/%d) %d data of %s between %s and %s', num, data_len, data_df.shape[0], ts_code, date_from, date_to) elif len(df2) <= 0: break # 数据插入数据库 data_count = bunch_insert_on_duplicate_update( data_df, table_name, engine_md, DTYPE_TUSHARE_STOCK_CASHFLOW, myisam_if_create_table=True, primary_keys=['ts_code', 'ann_date'], schema=config.DB_SCHEMA_MD) logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count) data_df = [] # 仅调试使用 Cycles = Cycles + 1 if DEBUG and Cycles > 10: break finally: # 导入数据库 if len(data_df) > 0: data_count = bunch_insert_on_duplicate_update( data_df, table_name, engine_md, DTYPE_TUSHARE_STOCK_CASHFLOW, myisam_if_create_table=True, primary_keys=['ts_code', 'ann_date'], schema=config.DB_SCHEMA_MD) logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count)
def import_tushare_stock_index_daily(chain_param=None, ts_code_set=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_index_daily_md' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily if has_table: sql_str = """ SELECT ts_code, date_frm, if(exp_date<end_date, exp_date, end_date) date_to FROM ( SELECT info.ts_code, ifnull(trade_date, base_date) date_frm, exp_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM tushare_stock_index_basic info LEFT OUTER JOIN (SELECT ts_code, adddate(max(trade_date),1) trade_date FROM {table_name} GROUP BY ts_code) daily ON info.ts_code = daily.ts_code ) tt WHERE date_frm <= if(exp_date<end_date, exp_date, end_date) ORDER BY ts_code""".format(table_name=table_name) else: sql_str = """ SELECT ts_code, date_frm, if(exp_date<end_date, exp_date, end_date) date_to FROM ( SELECT info.ts_code, base_date date_frm, exp_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM tushare_stock_index_basic info ) tt WHERE date_frm <= if(exp_date<end_date, exp_date, end_date) ORDER BY ts_code""" logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 begin_time = None # 获取date_from,date_to,将date_from,date_to做为value值 code_date_range_dic = { ts_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ts_code, date_from, date_to in table.fetchall() if ts_code_set is None or ts_code in ts_code_set } # data_len = len(code_date_range_dic) data_df_list, data_count, all_data_count, data_len = [], 0, 0, len( code_date_range_dic) logger.info('%d stocks will been import into tushare_stock_index_daily_md', data_len) # 将data_df数据,添加到data_df_list try: for num, (ts_code, (date_from, date_to)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code, date_from, date_to) data_df = invoke_index_daily( ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS)) # data_df = df if data_df is not None and data_df.shape[0] > 0: while try_2_date(data_df['trade_date'].iloc[-1]) > date_from: last_date_in_df_last, last_date_in_df_cur = try_2_date( data_df['trade_date'].iloc[-1]), None df2 = invoke_index_daily( ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str( try_2_date(data_df['trade_date'].iloc[-1]) - timedelta(days=1), STR_FORMAT_DATE_TS)) if len(df2 > 0): last_date_in_df_cur = try_2_date( df2['trade_date'].iloc[-1]) if last_date_in_df_cur < last_date_in_df_last: data_df = pd.concat([data_df, df2]) # df = df2 elif last_date_in_df_cur == last_date_in_df_last: break if data_df is None: logger.warning( '%d/%d) %s has no data during %s %s', num, data_len, ts_code, date_from, date_to) continue logger.info('%d/%d) %d data of %s between %s and %s', num, data_len, data_df.shape[0], ts_code, date_from, date_to) else: break # 把数据攒起来 data_count += data_df.shape[0] data_df_list.append(data_df) # 仅调试使用 if DEBUG and len(data_df_list) > 5: break # 大于阀值有开始插入 if data_count >= 500: data_df_all = pd.concat(data_df_list) bunch_insert(data_df_all, table_name=table_name, dtype=DTYPE_TUSHARE_STOCK_INDEX_DAILY_MD, primary_keys=["ts_code", "trade_date"]) all_data_count += data_count data_df_list, data_count = [], 0 finally: # 导入数据库 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert(data_df_all, table_name=table_name, dtype=DTYPE_TUSHARE_STOCK_INDEX_DAILY_MD, primary_keys=["ts_code", "trade_date"]) all_data_count += data_count logging.info("更新 %s 结束 %d 条信息被更新", table_name, all_data_count)
def import_tushare_moneyflow_hsgt(chain_param=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_moneyflow_hsgt' logging.info("更新 %s 开始", table_name) param_list = [ ('trade_date', Date), ('ggt_ss', DOUBLE), ('ggt_sz', DOUBLE), ('hgt', DOUBLE), ('sgt', DOUBLE), ('north_money', DOUBLE), ('south_money', DOUBLE), ] has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_daily_basic # 下面一定要注意引用表的来源,否则可能是串,提取混乱!!!比如本表是tushare_daily_basic,所以引用的也是这个,如果引用错误,就全部乱了l if has_table: sql_str = """ select cal_date FROM ( select * from tushare_trade_date trddate where( cal_date>(SELECT max(trade_date) FROM {table_name})) )tt where (is_open=1 and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) and exchange='SSE') """.format(table_name=table_name) else: sql_str = """ SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) AND exchange='SSE' AND cal_date>='2014-11-17') ORDER BY cal_date""" logger.warning('%s 不存在,仅使用 tushare_trade_date 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取交易日数据 table = session.execute(sql_str) trade_date_list = list(row[0] for row in table.fetchall()) # 设置 dtype dtype = {key: val for key, val in param_list} try: trade_date_list_len = len(trade_date_list) for num, trade_date in enumerate(trade_date_list, start=1): trade_date = datetime_2_str(trade_date, STR_FORMAT_DATE_TS) data_df = invoke_moneyflow_hsgt(trade_date=trade_date) if len(data_df) > 0: data_count = bunch_insert_p(data_df, table_name=table_name, dtype=dtype, primary_keys=['trade_date']) logging.info("%d/%d) %s 更新 %s 结束 %d 条信息被更新", num, trade_date_list_len, trade_date, table_name, data_count) else: logging.info("无数据信息可被更新") except: logger.exception('更新 %s 表异常', table_name)
def import_tushare_dividend(chain_param=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_dividend' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily if has_table: sql_str = """ select cal_date ann_date FROM ( select * from tushare_trade_date trddate where( cal_date>(SELECT max(ann_date) FROM {table_name})) )tt where (is_open=1 and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) and exchange='SSE') """.format(table_name=table_name) else: sql_str = """ SELECT cal_date ann_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) AND exchange='SSE') ORDER BY cal_date""" logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取交易日数据 table = session.execute(sql_str) trddate = list(row[0] for row in table.fetchall()) #输出数据字段 fields = 'ts_code,end_date,ann_date,div_proc,stk_div,stk_bo_rate,stk_co_rate,cash_div,cash_div_tax,\ record_date,ex_date,pay_date,div_listdate,imp_ann_date,base_date,base_share' data_df_list, data_count, all_data_count, data_len = [], 0, 0, len(trddate) try: for i in range(len(trddate)): ann_date = datetime_2_str(trddate[i], STR_FORMAT_DATE_TS) data_df = invoke_dividend(ann_date=ann_date, fields=fields) logging.info(" %s 日 提取 %d 条分红送股信息", ann_date, len(data_df)) # if len(data_df) > 0: # data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, DTYPE_TUSHARE_DIVIDEND) # logging.info(" %s 表 %s 日 %d 条信息被更新", table_name, ann_date, data_count) # else: # logging.info("无数据信息可被更新") # 把数据攒起来 if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 仅调试使用 if DEBUG and len(data_df_list) > 5: break # 大于阀值开始插入 if data_count >= 500: data_df_all = pd.concat(data_df_list) bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, DTYPE_TUSHARE_DIVIDEND) all_data_count += data_count data_df_list, data_count = [], 0 finally: # 导入残余数据到数据库 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_TUSHARE_DIVIDEND) logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count) if not has_table and engine_md.has_table(table_name): alter_table_2_myisam(engine_md, [table_name]) # build_primary_key([table_name]) create_pk_str = """ALTER TABLE {table_name} CHANGE COLUMN `ts_code` `ts_code` VARCHAR(20) NOT NULL FIRST, CHANGE COLUMN `ann_date` `ann_date` DATE NOT NULL AFTER `ts_code`, ADD PRIMARY KEY (`ts_code`, `ann_date`)""".format( table_name=table_name) with with_db_session(engine_md) as session: session.execute(create_pk_str) logger.info('%s 表 `ts_code`, `trade_date` 主键设置完成', table_name)
def import_tushare_stock_top10_holders(ts_code_set=None, chain_param=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_top10_holders' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily if has_table: sql_str = """ SELECT ts_code, date_frm, if(delist_date<end_date2, delist_date, end_date2) date_to FROM ( SELECT info.ts_code, ifnull(end_date, subdate(list_date,365*10)) date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date2 FROM tushare_stock_info info LEFT OUTER JOIN (SELECT ts_code, adddate(max(ann_date),1) end_date FROM {table_name} GROUP BY ts_code) top10_holders ON info.ts_code = top10_holders.ts_code ) tt WHERE date_frm <= if(delist_date<end_date2, delist_date, end_date2) ORDER BY ts_code""".format(table_name=table_name) else: sql_str = """ SELECT ts_code, date_frm, if(delist_date<end_date2, delist_date, end_date2) date_to FROM ( SELECT info.ts_code, subdate(list_date,365*10) date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date2 FROM tushare_stock_info info ) tt WHERE date_frm <= if(delist_date<end_date2, delist_date, end_date2) ORDER BY ts_code """ logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 begin_time = None # 获取date_from,date_to,将date_from,date_to做为value值 code_date_range_dic = { ts_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ts_code, date_from, date_to in table.fetchall() if ts_code_set is None or ts_code in ts_code_set} data_df_list, data_count, all_data_count, data_len = [], 0, 0, len(code_date_range_dic) logger.info('%d stocks will been import into wind_stock_daily', data_len) # 将data_df数据,添加到data_df_list Cycles = 1 try: for num, (ts_code, (date_from, date_to)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code, date_from, date_to) data_df = invoke_top10_holders( ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS)) if data_df is not None and len(data_df) > 0 and data_df['ann_date'].iloc[-1] is not None: last_date_in_df_last = try_2_date(data_df['ann_date'].iloc[-1]) while last_date_in_df_last > date_from: df2 = invoke_top10_holders( ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str(last_date_in_df_last - timedelta(days=1), STR_FORMAT_DATE_TS)) if len(df2) > 0 and df2['ann_date'].iloc[-1] is not None: last_date_in_df_cur = try_2_date(df2['ann_date'].iloc[-1]) if last_date_in_df_cur != last_date_in_df_last: data_df = pd.concat([data_df, df2]) last_date_in_df_last = try_2_date(data_df['ann_date'].iloc[-1]) elif last_date_in_df_cur == last_date_in_df_last: break elif len(df2) > 0 and df2['ann_date'].iloc[-1] is None: last_date_in_df_cur = try_2_date(df2['end_date'].iloc[-1]) if last_date_in_df_cur != last_date_in_df_last: data_df = pd.concat([data_df, df2]) last_date_in_df_last = try_2_date(data_df['end_date'].iloc[-1]) elif last_date_in_df_cur == last_date_in_df_last: break else: break if data_df is None: logger.warning('%d/%d) %s has no data during %s %s', num, data_len, ts_code, date_from, date_to) elif data_df is not None: logger.info('整体进度:%d/%d), %d 条 %s 前10股东被提取,起止时间为 %s 和 %s', num, data_len, data_df.shape[0], ts_code, date_from, date_to) # 把数据攒起来 if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 500 and len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, DTYPE_TUSHARE_STOCK_TOP10_HOLDERS) all_data_count += data_count data_df_list, data_count = [], 0 # 仅调试使用 Cycles = Cycles + 1 if DEBUG and Cycles > 25: break finally: if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, DTYPE_TUSHARE_STOCK_TOP10_HOLDERS) all_data_count = all_data_count + data_count logging.info("更新 %s 结束 %d 条信息被更新", table_name, all_data_count)
def import_tushare_stock_fina_audit(chain_param=None, ts_code_set=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_fin_audit' logging.info("更新 %s 开始", table_name) param_list = [ ('ts_code', String(20)), ('ann_date', Date), ('end_date', Date), ('audit_result', Text), ('audit_fees', DOUBLE), ('audit_agency', String(100)), ('audit_sign', String(100)), ] has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily if has_table: sql_str = """ SELECT ts_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.ts_code, ifnull(ann_date, subdate(list_date,365*8)) date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM tushare_stock_info info LEFT OUTER JOIN (SELECT ts_code, adddate(max(ann_date),1) ann_date FROM {table_name} GROUP BY ts_code) fina_audit ON info.ts_code = fina_audit.ts_code ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY ts_code""".format(table_name=table_name) else: sql_str = """ SELECT ts_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.ts_code, subdate(list_date,365*10) date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM tushare_stock_info info ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY ts_code DESC """ logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 begin_time = None # 获取date_from,date_to,将date_from,date_to做为value值 code_date_range_dic = { ts_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ts_code, date_from, date_to in table.fetchall() if ts_code_set is None or ts_code in ts_code_set} # 设置 dtype dtype = {key: val for key, val in param_list} data_len = len(code_date_range_dic) logger.info('%d data will been import into %s', data_len, table_name) data_df = pd.DataFrame() Cycles = 1 try: for num, (ts_code, (date_from, date_to)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code, date_from, date_to) df = invoke_fina_audit(ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS)) data_df = df if data_df is not None and len(data_df) > 0: while try_2_date(df['ann_date'].iloc[-1]) > date_from: last_date_in_df_last, last_date_in_df_cur = try_2_date(df['ann_date'].iloc[-1]), None df2 = invoke_fina_audit(ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str( try_2_date(df['ann_date'].iloc[-1]) - timedelta(days=1), STR_FORMAT_DATE_TS)) if len(df2) > 0: last_date_in_df_cur = try_2_date(df2['ann_date'].iloc[-1]) if last_date_in_df_cur < last_date_in_df_last: data_df = pd.concat([data_df, df2]) df = df2 elif last_date_in_df_cur == last_date_in_df_last: break if data_df is None: logger.warning('%d/%d) %s has no data during %s %s', num, data_len, ts_code, date_from, date_to) continue logger.info('%d/%d) %d data of %s between %s and %s', num, data_len, data_df.shape[0], ts_code, date_from, date_to) elif len(df2) <= 0: break # 数据插入数据库 data_df_all = data_df data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, dtype, myisam_if_create_table=True, primary_keys=['ts_code', 'ann_date', 'end_date'], schema=config.DB_SCHEMA_MD) logging.info("成功更新 %s 结束 %d 条信息被更新", table_name, data_count) # 仅调试使用 Cycles = Cycles + 1 if DEBUG and Cycles > 10: break finally: # 导入数据库 if len(data_df) > 0: data_df_all = data_df data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, dtype, myisam_if_create_table=True, primary_keys=['ts_code', 'ann_date'], schema=config.DB_SCHEMA_MD) logging.info("成功更新 %s 结束 %d 条信息被更新", table_name, data_count)
def import_tushare_fut_holding(chain_param=None, ts_code_set=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_future_holding' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily if has_table: sql_str = """ select cal_date FROM ( select * from tushare_future_trade_cal trddate where( cal_date>(SELECT max(trade_date) FROM {table_name})) )tt where (is_open=1 and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) ) """.format(table_name=table_name) else: sql_str = """ SELECT cal_date FROM tushare_future_trade_cal trddate WHERE (trddate.is_open=1 AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) AND cal_date>'20020106') ORDER BY cal_date""" logger.warning('%s 不存在,仅使用 tushare_cal 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取交易日数据 table = session.execute(sql_str) trddate = list(row[0] for row in table.fetchall()) #交易所列表 exchange_list = ['DCE', 'CZCE', 'SHFE', 'CFFEX', 'INE'] # data_len = len(code_date_range_dic) data_df_list, data_count, all_data_count, data_len = [], 0, 0, len(trddate) logger.info('%d 日的每日成交持仓排名数据将被导入数据库', data_len) # 将data_df数据,添加到data_df_list try: for i in range(len(trddate)): trade_date = datetime_2_str(trddate[i], STR_FORMAT_DATE_TS) for exchange in exchange_list: data_df = invoke_fut_holding(trade_date=trade_date, exchange=exchange) logging.info(" 提取%s %s 日 %d 条每日成交持仓排名数据", exchange, trade_date, data_df.shape[0]) # 把数据攒起来 if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 20000: data_df_all = pd.concat(data_df_list) bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_TUSHARE_FUTURE_HOLDING) logging.info(" 更新%s表%d条期货仓单数据", table_name, data_count) all_data_count += data_count data_df_list, data_count = [], 0 finally: # 导入数据库 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_TUSHARE_FUTURE_HOLDING) all_data_count = all_data_count + data_count logging.info("更新 %s 结束 %d 条仓单信息被更新", table_name, all_data_count)
def import_tushare_stock_forecast(chain_param=None, ts_code_set=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_forecast' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily if has_table: sql_str = """ SELECT ts_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.ts_code, ifnull(ann_date, list_date) date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM tushare_stock_info info LEFT OUTER JOIN (SELECT ts_code, adddate(max(ann_date),1) ann_date FROM {table_name} GROUP BY ts_code) forecast ON info.ts_code = forecast.ts_code ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY ts_code""".format(table_name=table_name) else: sql_str = """ SELECT ts_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.ts_code, list_date date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM tushare_stock_info info ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY ts_code DESC """ logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name) # ts_code_set = None with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 begin_time = None # 获取date_from,date_to,将date_from,date_to做为value值 code_date_range_dic = { ts_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ts_code, date_from, date_to in table.fetchall() if ts_code_set is None or ts_code in ts_code_set } data_df_list, data_count, all_data_count, data_len = [], 0, 0, len( code_date_range_dic) logger.info( '%d stock balancesheets will been import into tushare_stock_forcast', data_len) # 将data_df数据,添加到data_df_list Cycles = 1 try: for num, (ts_code, (date_from, date_to)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code, date_from, date_to) data_df = invoke_forecast( ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS)) if data_df is None: logger.warning('%d/%d) %s has no data during %s %s', num, data_len, ts_code, date_from, date_to) continue elif data_df is not None: logger.info('整体进度:%d/%d), %d 条 %s 业绩预告数据被提取,起止时间为 %s 和 %s', num, data_len, data_df.shape[0], ts_code, date_from, date_to) # 把数据攒起来 if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 1000 and len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_TUSHARE_STOCK_FORECAST, myisam_if_create_table=True, primary_keys=['ts_code', 'ann_date'], schema=config.DB_SCHEMA_MD) logger.info('%d 条业绩预告数据被插入 %s 表', data_count, table_name) all_data_count += data_count data_df_list, data_count = [], 0 # # 数据插入数据库 # data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, # DTYPE_TUSHARE_STOCK_BALABCESHEET) # logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count) # data_df = [] # 仅调试使用 Cycles = Cycles + 1 if DEBUG and Cycles > 10: break finally: # 导入数据库 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_TUSHARE_STOCK_FORECAST, myisam_if_create_table=True, primary_keys=['ts_code', 'ann_date'], schema=config.DB_SCHEMA_MD) all_data_count = all_data_count + data_count logging.info("更新 %s 结束 %d 条业绩预告信息被更新", table_name, all_data_count)
def import_tushare_stock_balancesheet(ts_code_set=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_balancesheet' logging.info("更新 %s 开始", table_name) param_list = [ ('ts_code', String(20)), ('ann_date', Date), ('f_ann_date', Date), ('end_date', Date), ('report_type', DOUBLE), ('comp_type', DOUBLE), ('total_share', DOUBLE), ('cap_rese', DOUBLE), ('undistr_porfit', DOUBLE), ('surplus_rese', DOUBLE), ('special_rese', DOUBLE), ('money_cap', DOUBLE), ('trad_asset', DOUBLE), ('notes_receiv', DOUBLE), ('accounts_receiv', DOUBLE), ('oth_receiv', DOUBLE), ('prepayment', DOUBLE), ('div_receiv', DOUBLE), ('int_receiv', DOUBLE), ('inventories', DOUBLE), ('amor_exp', DOUBLE), ('nca_within_1y', DOUBLE), ('sett_rsrv', DOUBLE), ('loanto_oth_bank_fi', DOUBLE), ('premium_receiv', DOUBLE), ('reinsur_receiv', DOUBLE), ('reinsur_res_receiv', DOUBLE), ('pur_resale_fa', DOUBLE), ('oth_cur_assets', DOUBLE), ('total_cur_assets', DOUBLE), ('fa_avail_for_sale', DOUBLE), ('htm_invest', DOUBLE), ('lt_eqt_invest', DOUBLE), ('invest_real_estate', DOUBLE), ('time_deposits', DOUBLE), ('oth_assets', DOUBLE), ('lt_rec', DOUBLE), ('fix_assets', DOUBLE), ('cip', DOUBLE), ('const_materials', DOUBLE), ('fixed_assets_disp', DOUBLE), ('produc_bio_assets', DOUBLE), ('oil_and_gas_assets', DOUBLE), ('intan_assets', DOUBLE), ('r_and_d', DOUBLE), ('goodwill', DOUBLE), ('lt_amor_exp', DOUBLE), ('defer_tax_assets', DOUBLE), ('decr_in_disbur', DOUBLE), ('oth_nca', DOUBLE), ('total_nca', DOUBLE), ('cash_reser_cb', DOUBLE), ('depos_in_oth_bfi', DOUBLE), ('prec_metals', DOUBLE), ('deriv_assets', DOUBLE), ('rr_reins_une_prem', DOUBLE), ('rr_reins_outstd_cla', DOUBLE), ('rr_reins_lins_liab', DOUBLE), ('rr_reins_lthins_liab', DOUBLE), ('refund_depos', DOUBLE), ('ph_pledge_loans', DOUBLE), ('refund_cap_depos', DOUBLE), ('indep_acct_assets', DOUBLE), ('client_depos', DOUBLE), ('client_prov', DOUBLE), ('transac_seat_fee', DOUBLE), ('invest_as_receiv', DOUBLE), ('total_assets', DOUBLE), ('lt_borr', DOUBLE), ('st_borr', DOUBLE), ('cb_borr', DOUBLE), ('depos_ib_deposits', DOUBLE), ('loan_oth_bank', DOUBLE), ('trading_fl', DOUBLE), ('notes_payable', DOUBLE), ('acct_payable', DOUBLE), ('adv_receipts', DOUBLE), ('sold_for_repur_fa', DOUBLE), ('comm_payable', DOUBLE), ('payroll_payable', DOUBLE), ('taxes_payable', DOUBLE), ('int_payable', DOUBLE), ('oth_payable', DOUBLE), ('acc_exp', DOUBLE), ('deferred_inc', DOUBLE), ('st_bonds_payable', DOUBLE), ('payable_to_reinsurer', DOUBLE), ('rsrv_insur_cont', DOUBLE), ('acting_trading_sec', DOUBLE), ('acting_uw_sec', DOUBLE), ('non_cur_liab_due_1y', DOUBLE), ('oth_cur_liab', DOUBLE), ('total_cur_liab', DOUBLE), ('bond_payable', DOUBLE), ('lt_payable', DOUBLE), ('specific_payables', DOUBLE), ('estimated_liab', DOUBLE), ('defer_tax_liab', DOUBLE), ('defer_inc_non_cur_liab', DOUBLE), ('oth_ncl', DOUBLE), ('total_ncl', DOUBLE), ('depos_oth_bfi', DOUBLE), ('deriv_liab', DOUBLE), ('depos', DOUBLE), ('agency_bus_liab', DOUBLE), ('oth_liab', DOUBLE), ('prem_receiv_adva', DOUBLE), ('depos_received', DOUBLE), ('ph_invest', DOUBLE), ('reser_une_prem', DOUBLE), ('reser_outstd_claims', DOUBLE), ('reser_lins_liab', DOUBLE), ('reser_lthins_liab', DOUBLE), ('indept_acc_liab', DOUBLE), ('pledge_borr', DOUBLE), ('indem_payable', DOUBLE), ('policy_div_payable', DOUBLE), ('total_liab', DOUBLE), ('treasury_share', DOUBLE), ('ordin_risk_reser', DOUBLE), ('forex_differ', DOUBLE), ('invest_loss_unconf', DOUBLE), ('minority_int', DOUBLE), ('total_hldr_eqy_exc_min_int', DOUBLE), ('total_hldr_eqy_inc_min_int', DOUBLE), ('total_liab_hldr_eqy', DOUBLE), ('lt_payroll_payable', DOUBLE), ('oth_comp_income', DOUBLE), ('oth_eqt_tools', DOUBLE), ('oth_eqt_tools_p_shr', DOUBLE), ('lending_funds', DOUBLE), ('acc_receivable', DOUBLE), ('st_fin_payable', DOUBLE), ('payables', DOUBLE), ('hfs_assets', DOUBLE), ('hfs_sales', DOUBLE), ] # 进行表格判断,确定是否含有tushare_stock_daily sql_str = """SELECT ts_code,subdate(list_date,365*10) date_frm,list_date date_to FROM tushare_stock_info""" logger.warning('%s 打补丁,使用 tushare_stock_info 表进行计算需要补充提取的日期范围', table_name) # ts_code_set = None with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 begin_time = None # 获取date_from,date_to,将date_from,date_to做为value值 code_date_range_dic = { ts_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ts_code, date_from, date_to in table.fetchall() if ts_code_set is None or ts_code in ts_code_set} # 设置 dtype dtype = {key: val for key, val in param_list} # dtype['ts_code'] = String(20) # dtype['trade_date'] = Date data_len = len(code_date_range_dic) logger.info('%d stocks will been import into wind_stock_daily', data_len) # 将data_df数据,添加到data_df_list Cycles = 1 try: for num, (ts_code, (date_from, date_to)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code, date_from, date_to) df = invoke_balancesheet(ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS)) # logger.info(' %d data of %s between %s and %s', df.shape[0], ts_code, date_from, date_to) data_df = df if data_df is not None and len(data_df) > 0: while try_2_date(df['ann_date'].iloc[-1]) > date_from: last_date_in_df_last, last_date_in_df_cur = try_2_date(df['ann_date'].iloc[-1]), None df2 = invoke_balancesheet(ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str( try_2_date(df['ann_date'].iloc[-1]) - timedelta(days=1), STR_FORMAT_DATE_TS)) if len(df2) > 0: last_date_in_df_cur = try_2_date(df2['ann_date'].iloc[-1]) if last_date_in_df_cur < last_date_in_df_last: data_df = pd.concat([data_df, df2]) df = df2 elif last_date_in_df_cur == last_date_in_df_last: break if data_df is None: logger.warning('%d/%d) %s has no data during %s %s', num, data_len, ts_code, date_from, date_to) continue logger.info('%d/%d) %d data of %s between %s and %s', num, data_len, data_df.shape[0], ts_code, date_from, date_to) elif len(df2) <= 0: break # 数据插入数据库 data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype) logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count) data_df = [] # 仅调试使用 Cycles = Cycles + 1 if DEBUG and Cycles > 10: break finally: # 导入数据库 if len(data_df) > 0: data_count = bunch_insert_on_duplicate_update( data_df, table_name, engine_md, dtype, myisam_if_create_table=True, primary_keys=['ts_code', 'ann_date'], schema=config.DB_SCHEMA_MD) logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count)
def import_tushare_tmt_twincome(chain_param=None, ts_code_set=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_tmt_twincome' logging.info("更新 %s 开始", table_name) has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily if has_table: sql_str = """ SELECT ts_code, date_frm start_date, end_date FROM ( SELECT info.ts_code, ifnull(date, start_date) date_frm, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM tushare_tmt_twincome_info info LEFT OUTER JOIN (SELECT item, adddate(max(date),1) date FROM {table_name} GROUP BY item ) income ON info.ts_code = income.item ) tt order by ts_code""".format(table_name=table_name) else: sql_str = """SELECT ts_code, start_date , if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM tushare_tmt_twincome_info info """ logger.warning('%s 不存在,仅使用 tushare_tmt_twincome_info 表进行计算日期范围', table_name) # ts_code_set = None with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 begin_time,ts_code_set = None,None # 获取date_from,date_to,将date_from,date_to做为value值 code_date_range_dic = { ts_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ts_code, date_from, date_to in table.fetchall() if ts_code_set is None or ts_code in ts_code_set} data_df_list, data_count, all_data_count, data_len = [], 0, 0, len(code_date_range_dic) logger.info('%d Taiwan TMT information will been import into tushare_tmt_twincome', data_len) # 将data_df数据,添加到data_df_list Cycles = 1 try: for num, (ts_code, (start_date, end_date)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code, start_date, end_date) data_df = invoke_tmt_twincome(item=ts_code, start_date=datetime_2_str(start_date, STR_FORMAT_DATE_TS), end_date=datetime_2_str(end_date, STR_FORMAT_DATE_TS)) # logger.info(' %d data of %s between %s and %s', df.shape[0], ts_code, start_date, date_to) if len(data_df) > 0 and data_df['date'] is not None: while try_2_date(data_df['date'].iloc[-1]) > try_2_date(start_date): last_date_in_df_last, last_date_in_df_cur = try_2_date(data_df['date'].iloc[-1]), None df2 = invoke_tmt_twincome(item=ts_code, start_date=datetime_2_str(start_date, STR_FORMAT_DATE_TS), end_date=datetime_2_str(try_2_date(data_df['date'].iloc[-1]) - timedelta(days=1),STR_FORMAT_DATE_TS)) if len(df2) > 0 and df2['date'] is not None: last_date_in_df_cur = try_2_date(df2['date'].iloc[-1]) if last_date_in_df_cur < last_date_in_df_last: data_df = pd.concat([data_df, df2]) elif last_date_in_df_cur == last_date_in_df_last: break if data_df is None: logger.warning('%d/%d) %s has no data during %s %s', num, data_len, ts_code, start_date, end_date) continue logger.info('%d/%d) %d data of %s between %s and %s', num, data_len, data_df.shape[0], ts_code, start_date, end_date) elif len(df2) <= 0: break # 把数据攒起来 if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 1000: data_df_all = pd.concat(data_df_list) bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, DTYPE_TUSHARE_TMT_TWINCOME) all_data_count += data_count data_df_list, data_count = [], 0 finally: # 导入数据库 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, DTYPE_TUSHARE_TMT_TWINCOME) all_data_count = all_data_count + data_count logging.info("更新 %s 结束 %d 条信息被更新", table_name, all_data_count)
def import_tushare_stock_cashflow(chain_param=None, ts_code_set=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_cashflow' primary_keys = ['ts_code', 'ann_date', 'end_date'] logging.info("更新 %s 开始", table_name) check_sqlite_db_primary_keys(table_name, primary_keys) has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily if has_table: sql_str = """ SELECT ts_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.ts_code, ifnull(ann_date, list_date) date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM tushare_stock_info info LEFT OUTER JOIN (SELECT ts_code, adddate(max(ann_date),1) ann_date FROM {table_name} GROUP BY ts_code) cashflow ON info.ts_code = cashflow.ts_code ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY ts_code""".format(table_name=table_name) else: sql_str = """ SELECT ts_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.ts_code, list_date date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM tushare_stock_info info ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY ts_code""" logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 begin_time = None # 获取date_from,date_to,将date_from,date_to做为value值 code_date_range_dic = { ts_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ts_code, date_from, date_to in table.fetchall() if ts_code_set is None or ts_code in ts_code_set } data_df_list, data_count, all_data_count, data_len = [], 0, 0, len( code_date_range_dic) logger.info('%d data will been import into %s', data_len, table_name) # 将data_df数据,添加到data_df_list cycles = 1 try: for num, (ts_code, (date_from, date_to)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code, date_from, date_to) df = invoke_cashflow( ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS)) # logger.info(' %d data of %s between %s and %s', df.shape[0], ts_code, date_from, date_to) data_df = df if data_df is not None and len(data_df) > 0: while try_2_date(df['ann_date'].iloc[-1]) > date_from: last_date_in_df_last, last_date_in_df_cur = try_2_date( df['ann_date'].iloc[-1]), None df2 = invoke_cashflow( ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str( try_2_date(df['ann_date'].iloc[-1]) - timedelta(days=1), STR_FORMAT_DATE_TS)) if len(df2) > 0: last_date_in_df_cur = try_2_date( df2['ann_date'].iloc[-1]) if last_date_in_df_cur < last_date_in_df_last: data_df = pd.concat([data_df, df2]) df = df2 elif last_date_in_df_cur == last_date_in_df_last: break elif len(df2) <= 0: break if data_df is None: logger.warning('%d/%d) %s has no data during %s %s', num, data_len, ts_code, date_from, date_to) continue elif data_df is not None: logger.info('%d/%d) %d 条 %s 的现金流被提取,起止时间为 %s 和 %s', num, data_len, data_df.shape[0], ts_code, date_from, date_to) # 把数据攒起来 if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 1000 and len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) bunch_insert(data_df_all, table_name=table_name, dtype=DTYPE_TUSHARE_CASHFLOW, primary_keys=primary_keys) logger.info('%d 条现金流数据已插入 %s 表', data_count, table_name) all_data_count += data_count data_df_list, data_count = [], 0 # # 数据插入数据库 # data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, DTYPE_TUSHARE_CASHFLOW) # logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count) # 仅调试使用 cycles = cycles + 1 if DEBUG and cycles > 10: break finally: # 导入数据库 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert(data_df_all, table_name=table_name, dtype=DTYPE_TUSHARE_CASHFLOW, primary_keys=primary_keys) all_data_count = all_data_count + data_count logging.info("更新 %s 结束 %d 条信息被更新", table_name, all_data_count)
def import_tushare_block_trade(chain_param=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_block_trade' logging.info("更新 %s 开始", table_name) param_list = [ ('trade_date', Date), ('ts_code', String(20)), ('price', DOUBLE), ('vol', DOUBLE), ('amount', DOUBLE), ('buyer', String(100)), ('seller', String(100)), ] has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有 table_name if has_table: sql_str = f"""select cal_date FROM ( select * from tushare_trade_date trddate where( cal_date>(SELECT max(trade_date) FROM {table_name})) )tt where (is_open=1 and cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) and exchange='SSE') """ else: # 2003-08-02 大宗交易制度开始实施 sql_str = """SELECT cal_date FROM tushare_trade_date trddate WHERE (trddate.is_open=1 AND cal_date <= if(hour(now())<16, subdate(curdate(),1), curdate()) AND exchange='SSE' AND cal_date>='2003-08-02') ORDER BY cal_date""" logger.warning('%s 不存在,仅使用 tushare_trade_date 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取交易日数据 table = session.execute(sql_str) trade_date_list = list(row[0] for row in table.fetchall()) # 设置 dtype dtype = {key: val for key, val in param_list} try: trade_date_list_len = len(trade_date_list) for num, trade_date in enumerate(trade_date_list, start=1): trade_date = datetime_2_str(trade_date, STR_FORMAT_DATE_TS) data_df = invoke_block_trade(trade_date=trade_date) if len(data_df) > 0: # 当前表不设置主键,由于存在重复记录,因此无法设置主键 # 例如:002325.SZ 2014-11-17 华泰证券股份有限公司沈阳光荣街证券营业部 两笔完全相同的大宗交易 data_count = bunch_insert( data_df, table_name=table_name, dtype=dtype) logging.info("%d/%d) %s更新 %s 结束 %d 条信息被更新", num, trade_date_list_len, trade_date, table_name, data_count) else: logging.info("%d/%d) %s 无数据信息可被更新", num, trade_date_list_len, trade_date) except: logger.exception('更新 %s 表异常', table_name)
def import_tushare_stock_income(chain_param=None, ts_code_set=None): """ 插入股票日线数据到最近一个工作日-1。 如果超过 BASE_LINE_HOUR 时间,则获取当日的数据 :return: """ table_name = 'tushare_stock_income' logging.info("更新 %s 开始", table_name) # wind_indictor_str = ",".join([key for key, _ in param_list]) # rename_col_dic = {key.upper(): key.lower() for key, _ in param_list} has_table = engine_md.has_table(table_name) # 进行表格判断,确定是否含有tushare_stock_daily if has_table: sql_str = """ SELECT ts_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.ts_code, ifnull(ann_date, subdate(list_date,365*10)) date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM tushare_stock_info info LEFT OUTER JOIN (SELECT ts_code, adddate(max(ann_date),1) ann_date FROM {table_name} GROUP BY ts_code) income ON info.ts_code = income.ts_code ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY ts_code""".format(table_name=table_name) else: sql_str = """ SELECT ts_code, date_frm, if(delist_date<end_date, delist_date, end_date) date_to FROM ( SELECT info.ts_code, subdate(list_date,365*10) date_frm, delist_date, if(hour(now())<16, subdate(curdate(),1), curdate()) end_date FROM tushare_stock_info info ) tt WHERE date_frm <= if(delist_date<end_date, delist_date, end_date) ORDER BY ts_code DESC """ logger.warning('%s 不存在,仅使用 tushare_stock_info 表进行计算日期范围', table_name) with with_db_session(engine_md) as session: # 获取每只股票需要获取日线数据的日期区间 table = session.execute(sql_str) # 计算每只股票需要获取日线数据的日期区间 begin_time = None # 获取date_from,date_to,将date_from,date_to做为value值 code_date_range_dic = { ts_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to) for ts_code, date_from, date_to in table.fetchall() if ts_code_set is None or ts_code in ts_code_set } data_df_list, data_count, all_data_count, data_len = [], 0, 0, len( code_date_range_dic) logger.info('%d stocks will been import into wind_stock_daily', data_len) # 将data_df数据,添加到data_df_list Cycles = 1 try: for num, (ts_code, (date_from, date_to)) in enumerate(code_date_range_dic.items(), start=1): logger.debug('%d/%d) %s [%s - %s]', num, data_len, ts_code, date_from, date_to) df = invoke_income(ts_code=ts_code, start_date=datetime_2_str( date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str(date_to, STR_FORMAT_DATE_TS)) data_df = df if data_df is not None and len(data_df) > 0: while try_2_date(df['ann_date'].iloc[-1]) > date_from: last_date_in_df_last, last_date_in_df_cur = try_2_date( df['ann_date'].iloc[-1]), None df2 = invoke_income( ts_code=ts_code, start_date=datetime_2_str(date_from, STR_FORMAT_DATE_TS), end_date=datetime_2_str( try_2_date(df['ann_date'].iloc[-1]) - timedelta(days=1), STR_FORMAT_DATE_TS)) if df2 is not None and df2.shape[0] > 0: last_date_in_df_cur = try_2_date( df2['ann_date'].iloc[-1]) if last_date_in_df_cur < last_date_in_df_last: data_df = pd.concat([data_df, df2]) df = df2 elif last_date_in_df_cur == last_date_in_df_last: break elif df2 is None or df2.shape[0] <= 0: break if data_df is None: logger.warning('%d/%d) %s has no data during %s %s', num, data_len, ts_code, date_from, date_to) continue elif data_df is not None: logger.info('%d/%d), %d 条 %s 的利润表数据被提取,起止时间为 %s 和 %s', num, data_len, data_df.shape[0], ts_code, date_from, date_to) # # 数据插入数据库 # data_df_all = data_df # data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, # DTYPE_TUSHARE_STOCK_INCOME) # logging.info("成功更新 %s 结束 %d 条信息被更新", table_name, data_count) # 把数据攒起来 if data_df is not None and data_df.shape[0] > 0: data_count += data_df.shape[0] data_df_list.append(data_df) # 大于阀值有开始插入 if data_count >= 1000 and len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_TUSHARE_STOCK_INCOME, myisam_if_create_table=True, primary_keys=['ts_code', 'ann_date', 'end_date'], schema=config.DB_SCHEMA_MD) logger.info('%d 条财务指标将数据插入 %s 表', data_count, table_name) all_data_count += data_count data_df_list, data_count = [], 0 # 仅调试使用 Cycles = Cycles + 1 if DEBUG and Cycles > 10: break finally: # 导入数据库 if len(data_df_list) > 0: data_df_all = pd.concat(data_df_list) data_count = bunch_insert_on_duplicate_update( data_df_all, table_name, engine_md, DTYPE_TUSHARE_STOCK_INCOME, myisam_if_create_table=True, primary_keys=['ts_code', 'ann_date', 'end_date'], schema=config.DB_SCHEMA_MD) all_data_count = all_data_count + data_count logging.info("更新 %s 结束 %d 条信息被更新", table_name, all_data_count)