def tdx_bundle(assets, ingest_minute, # whether to ingest minute data, default False environ, asset_db_writer, minute_bar_writer, daily_bar_writer, adjustment_writer, calendar, start_session, end_session, cache, show_progress, output_dir): eg = Engine(auto_retry=True, multithread=True, best_ip=True, thread_num=8) eg.connect() symbols = fetch_symbols(eg, assets) metas = [] def gen_symbols_data(symbol_map, freq='1d'): for index, symbol in symbol_map.iteritems(): data = reindex_to_calendar( calendar, fetch_single_equity(eg, symbol, freq), freq=freq, ) if freq == '1d': metas.append(get_meta_from_bars(data)) yield int(symbol), data symbol_map = symbols.symbol assets = set([int(s) for s in symbol_map]) daily_bar_writer.write(gen_symbols_data(symbol_map, freq="1d"), assets=assets, show_progress=show_progress) if ingest_minute: with click.progressbar(gen_symbols_data(symbol_map, freq="1m"), label="Merging minute equity files:", length=len(assets), item_show_func=lambda e: e if e is None else str(e[0]), ) as bar: minute_bar_writer.write(bar, show_progress=False) symbols = pd.concat([symbols, pd.DataFrame(data=metas)], axis=1) splits, dividends = fetch_splits_and_dividends(eg, symbols) symbols.set_index('symbol', drop=False, inplace=True) asset_db_writer.write(symbols) adjustment_writer.write( splits=splits, dividends=dividends ) eg.exit()
def tdx_bundle( assets, ingest_minute, # whether to ingest minute data, default False fundamental, # whether to ingest fundamental data, default False environ, asset_db_writer, minute_bar_writer, daily_bar_writer, adjustment_writer, fundamental_writer, calendar, start_session, end_session, cache, show_progress, output_dir): eg = Engine(auto_retry=True, multithread=True, best_ip=True, thread_num=8) eg.connect() symbols = fetch_symbols(eg, assets) metas = [] today = pd.to_datetime('today', utc=True) distance = calendar.session_distance(start_session, today) dates_path = join(output_dir, DATE_DIR) if os.path.isfile(dates_path): with open(dates_path, 'r') as f: dates_json = json.load(f) else: dates_json = {'1d': {}, '1m': {}} session_bars = create_engine('sqlite:///' + join(output_dir, SESSION_BAR_DB)) def gen_symbols_data(symbol_map, freq='1d'): if not session_bars.has_table(SESSION_BAR_TABLE): Base.metadata.create_all( session_bars.connect(), checkfirst=True, tables=[Base.metadata.tables[SESSION_BAR_TABLE]]) func = partial(fetch_single_equity, eg) now = pd.to_datetime('now', utc=True) if end_session >= now.normalize(): end = now.normalize() if now.tz_convert('Asia/Shanghai').time() < datetime.time(15, 5): end = end - pd.Timedelta('1 D') else: end = end_session if freq == '1m': if distance >= 100: func = eg.get_k_data for index, symbol in symbol_map.iteritems(): try: start = pd.to_datetime(dates_json[freq][symbol], utc=True) + pd.Timedelta('1 D') if start >= end: continue except KeyError: start = start_session data = reindex_to_calendar( calendar, func(symbol, start, end, freq), freq=freq, ) if freq == '1d': data.to_sql(SESSION_BAR_TABLE, session_bars.connect(), if_exists='append', index_label='day') if symbol in dates_json[freq]: data = pd.read_sql( "select * from {} where id = {} order by day ASC ". format(SESSION_BAR_TABLE, int(symbol)), session_bars, index_col='day') data.index = pd.to_datetime(data.index) dates_json[freq][symbol] = end.strftime('%Y%m%d') yield int(symbol), data with open(dates_path, 'w') as f: json.dump(dates_json, f) symbol_map = symbols.symbol assets = set([int(s) for s in symbol_map]) daily_bar_writer.write(gen_symbols_data(symbol_map, freq="1d"), assets=assets, show_progress=show_progress) if ingest_minute: with click.progressbar( gen_symbols_data(symbol_map, freq="1m"), label="Merging minute equity files:", length=len(assets), item_show_func=lambda e: e if e is None else str(e[0]), ) as bar: minute_bar_writer.write(bar, show_progress=False) splits, dividends, shares = fetch_splits_and_dividends( eg, symbols, start_session, end_session) metas = pd.read_sql( "select id as symbol,min(day) as start_date,max(day) as end_date from bars group by id;", session_bars, parse_dates=['start_date', 'end_date']) metas['symbol'] = metas['symbol'].apply(lambda x: format(x, '06')) metas['first_traded'] = metas['start_date'] metas['auto_close_date'] = metas['end_date'] symbols = symbols.set_index('symbol', drop=False).join(metas.set_index('symbol'), how='inner') asset_db_writer.write(symbols) adjustment_writer.write(splits=splits, dividends=dividends, shares=shares) if fundamental: logger.info("writing fundamental data:") try: fundamental_writer.write(start_session, end_session) except Exception as e: pass eg.exit()
def tdx_bundle(assets, ingest_minute, # whether to ingest minute data, default False overwrite, environ, asset_db_writer, minute_bar_writer, daily_bar_writer, adjustment_writer, calendar, start_session, end_session, cache, show_progress, output_dir): eg = Engine(auto_retry=True, multithread=True, best_ip=True, thread_num=8) eg.connect() symbols = fetch_symbols(eg, assets) metas = [] today = pd.to_datetime('today',utc=True) distance = calendar.session_distance(start_session, today) if ingest_minute and not overwrite and (start_session < today - pd.DateOffset(years=3)): minute_start = calendar.all_sessions[searchsorted(calendar.all_sessions, today - pd.DateOffset(years=3))] logger.warning( "overwrite start_session for minute bars to {}(3 years)," " to fetch minute data before that, please add '--overwrite True'".format(minute_start)) else: minute_start = start_session def gen_symbols_data(symbol_map, freq='1d'): func = partial(fetch_single_equity, eg) start = start_session end = end_session if freq == '1m': if distance >= 100: func = eg.get_k_data start = minute_start for index, symbol in symbol_map.iteritems(): data = reindex_to_calendar( calendar, func(symbol, start, end, freq), freq=freq, ) if freq == '1d': metas.append(get_meta_from_bars(data)) yield int(symbol), data symbol_map = symbols.symbol assets = set([int(s) for s in symbol_map]) daily_bar_writer.write(gen_symbols_data(symbol_map, freq="1d"), assets=assets, show_progress=show_progress) if ingest_minute: with click.progressbar(gen_symbols_data(symbol_map, freq="1m"), label="Merging minute equity files:", length=len(assets), item_show_func=lambda e: e if e is None else str(e[0]), ) as bar: minute_bar_writer.write(bar, show_progress=False) symbols = pd.concat([symbols, pd.DataFrame(data=metas)], axis=1) splits, dividends = fetch_splits_and_dividends(eg, symbols) symbols.set_index('symbol', drop=False, inplace=True) asset_db_writer.write(symbols) adjustment_writer.write( splits=splits, dividends=dividends ) eg.exit()
def tdx_bundle(assets, ingest_minute, # whether to ingest minute data, default False fundamental, # whether to ingest fundamental data, default False environ, asset_db_writer, minute_bar_writer, daily_bar_writer, adjustment_writer, fundamental_writer, calendar, start_session, end_session, cache, show_progress, output_dir): # eg = Engine(auto_retry=True, multithread=True, best_ip=True, thread_num=1) eg = Engine(auto_retry=True, multithread=True, best_ip=True, thread_num=1) eg.connect() symbols = fetch_symbols(eg, assets) metas = [] today = pd.to_datetime('today', utc=True) distance = calendar.session_distance(start_session, today) dates_path = join(output_dir, DATE_DIR) if os.path.isfile(dates_path): with open(dates_path, 'r') as f: dates_json = json.load(f) else: dates_json = { '1d': {}, '1m': {} } session_bars = create_engine('sqlite:///' + join(output_dir, SESSION_BAR_DB)) def gen_symbols_data(symbol_map, freq='1d'): if not session_bars.has_table(SESSION_BAR_TABLE): Base.metadata.create_all(session_bars.connect(), checkfirst=True, tables=[Base.metadata.tables[SESSION_BAR_TABLE]]) func = partial(fetch_single_equity, eg) now = pd.to_datetime('now', utc=True) if end_session >= now.normalize(): end = now.normalize() if now.tz_convert('Asia/Shanghai').time() < datetime.time(15, 5): end = end - pd.Timedelta('1 D') else: end = end_session end_idx = calendar.all_sessions.searchsorted(end) if calendar.all_sessions[end_idx] > end: end = calendar.all_sessions[end_idx -1] for index, symbol in symbol_map.iteritems(): try: start = pd.to_datetime(dates_json[freq][symbol], utc=True) + pd.Timedelta('1 D') start = calendar.all_sessions[calendar.all_sessions.searchsorted(start)] if start > end: if freq == '1d'and symbol in dates_json[freq]: data = pd.read_sql( "select * from {} where id = {} order by day ASC ".format(SESSION_BAR_TABLE, int(symbol)), session_bars, index_col='day') data.index = pd.to_datetime(data.index) yield int(symbol), data else: yield int(symbol), pd.DataFrame() continue except KeyError: start = start_session if freq == '1m': single_distance = calendar.session_distance(start, end) if single_distance >= 100: func = eg.get_k_data data = reindex_to_calendar( calendar, func(symbol, start, end, freq), start_session=start, end_session=end, freq=freq, ) if data is None or data.empty: if freq == '1d'and symbol in dates_json[freq]: data = pd.read_sql( "select * from {} where id = {} order by day ASC ".format(SESSION_BAR_TABLE, int(symbol)), session_bars, index_col='day') data.index = pd.to_datetime(data.index) yield int(symbol), data continue if freq == '1d': if data.close.isnull()[0]: # padding fill error if the first is NaN data2 = pd.read_sql( "select * from {} where id = {} order by day desc limit 1 ".format(SESSION_BAR_TABLE, int(symbol)), session_bars, index_col='day') if data2.empty: data = data[data.close.notnull()] else: data["close"][0] = data2["close"][0] fillna(data) data.to_sql(SESSION_BAR_TABLE, session_bars.connect(), if_exists='append', index_label='day') if symbol in dates_json[freq]: data = pd.read_sql( "select * from {} where id = {} order by day ASC ".format(SESSION_BAR_TABLE, int(symbol)), session_bars, index_col='day') data.index = pd.to_datetime(data.index) dates_json[freq][symbol] = data.index[-1].strftime('%Y%m%d') yield int(symbol), data with open(dates_path, 'w') as f: json.dump(dates_json, f) symbol_map = symbols.symbol assets = set([int(s) for s in symbol_map]) daily_bar_writer.write(gen_symbols_data(symbol_map, freq="1d"), assets=assets, show_progress=show_progress) splits, dividends, shares = fetch_splits_and_dividends(eg, symbols, start_session, end_session) metas = pd.read_sql("select id as symbol,min(day) as start_date,max(day) as end_date from bars group by id;", session_bars, parse_dates=['start_date','end_date'] ) metas['symbol'] = metas['symbol'].apply(lambda x:format(x,'06')) metas['first_traded'] = metas['start_date'] metas['auto_close_date'] = metas['end_date'] symbols = symbols.set_index('symbol', drop=False).join(metas.set_index('symbol'), how='inner') asset_db_writer.write(symbols) adjustment_writer.write( splits=splits, dividends=dividends, shares=shares ) if fundamental: logger.info("writing fundamental data:") try: fundamental_writer.write(start_session, end_session) except Exception as e: pass if ingest_minute: with click.progressbar(gen_symbols_data(symbol_map, freq="1m"), label="Merging minute equity files:", length=len(assets), item_show_func=lambda e: e if e is None else str(e[0]), ) as bar: minute_bar_writer.write(bar, show_progress=False) eg.exit()