def get_tradeable(universe, start_time, end_time): ''' 获取股票的交易状态 Notes ----- 将成交量为0或者最高价等于最低价视为不能交易,返回值为1表示正常交易,0表示不能交易,NA表示未上市而不能交易 ''' sql = ''' SELECT S.TradingDay, S.TurnoverVolume, S.HighPrice, S.LowPrice, M.Secucode FROM QT_DailyQuote S, SecuMain M WHERE S.InnerCode = M.InnerCode AND M.SecuMarket in (83, 90) AND S.TradingDay <= CAST(\'{end_time}\' as datetime) AND S.TradingDay >= CAST(\'{start_time}\' as datetime) AND M.SecuCategory = 1 ORDER BY S.TradingDay ASC, M.Secucode ASC ''' data = fdgetter.get_db_data(sql, cols=('time', 'vol', 'high', 'low', 'code'), start_time=start_time, end_time=end_time, add_stockcode=False) # pdb.set_trace() data['code'] = data.code.apply(datatoolkits.add_suffix) data.loc[data.vol > 0, 'vol'] = 1 data.loc[(data.vol > 0) & (data.high == data.low), 'vol'] = 0 data = data.pivot_table('vol', index='time', columns='code') data = data.loc[:, sorted(universe)] assert checkdata_completeness(data, start_time, end_time), "Error, data missed!" return data
def _inner(universe, start_time, end_time): offset = 100 * n + 250 new_start = pd.to_datetime(start_time) - pd.Timedelta( '%d day' % offset) data = fdgetter.get_db_data(sql, start_time=new_start, end_time=end_time, cols=('update_time', 'rpt_date', 'code', 'data'), add_stockcode=False) data['code'] = data.code.apply(datatoolkits.add_suffix) # 该部分可以单独做成一个函数 by_code = data.groupby('code') data = by_code.apply( fdmutils.get_observabel_data).reset_index(drop=True) by_cno = data.groupby(['code', 'obs_time']) data = by_cno.apply(fdmutils.cal_season, col_name='data', offset=n) data = data.reset_index().rename(columns={'obs_time': 'time'}) tds = dateshandle.get_tds(start_time, end_time) data = data.groupby('code').apply( datatoolkits.map_data, days=tds, fillna={'code': lambda x: x.code.iloc[0]}) data = data.reset_index(drop=True) data = data.pivot_table('data', index='time', columns='code') data = data.loc[:, sorted(universe)] assert check_indexorder(data), 'Error, data order is mixed!' return data
def get_adjfactor(universe, start_time, end_time): ''' 股票的复权因子 ''' sql = ''' SELECT A.ExDiviDate, A.RatioAdjustingFactor, M.SecuCode FROM QT_AdjustingFactor A, SecuMain M WHERE A.InnerCode = M.InnerCode AND M.secuMarket in (83, 90) AND M.SECUCATEGORY = 1 ORDER BY M.SecuCode ASC, A.ExDiviDate ASC ''' data = fdgetter.get_db_data(sql, cols=('time', 'data', 'code'), add_stockcode=False) data['code'] = data.code.apply(datatoolkits.add_suffix) by_code = data.groupby('code') tds = dateshandle.get_tds(start_time, end_time) data = by_code.apply(datatoolkits.map_data, days=tds, fromNowOn=True, fillna={'code': lambda x: x.code.iloc[0], 'data': lambda x: 1}) data = data.reset_index(drop=True) data = data.pivot_table('data', index='time', columns='code') data = data.loc[:, sorted(universe)].fillna(1) # 因为新股大多数情况下没有分红记录 assert check_indexorder(data), 'Error, data order is mixed!' assert checkdata_completeness(data, start_time, end_time), "Error, data missed!" return data
def get_ctargetprice(universe, start_time, end_time): ''' 获取一致预期目标价数据 ''' sql = ''' SELECT TARGET_PRICE, CON_DATE, STOCK_CODE FROM CON_FORECAST_SCHEDULE WHERE CON_DATE >= CAST(\'{start_time}\' AS datetime) AND CON_DATE <= CAST(\'{end_time}\' AS datetime) ''' data = fdgetter.get_db_data(sql, start_time=start_time, end_time=end_time, cols=('data', 'time', 'code'), db=fdgetter.zyyx, add_stockcode=False) # pdb.set_trace() data['code'] = data.code.apply(datatoolkits.add_suffix) data = data.pivot_table('data', index='time', columns='code') data = data.loc[:, sorted(universe)] assert check_indexorder(data), 'Error, data order is mixed!' assert checkdata_completeness(data, start_time, end_time), "Error, data missed!" return data
def _inner(universe, start_time, end_time): data = fdgetter.get_db_data(transed_sql, cols=cols, start_time=start_time, end_time=end_time, add_stockcode=False) data['code'] = data.code.apply(datatoolkits.add_suffix) if len(data.columns) == 4: data.loc[data.data == 0, 'data'] = data['prevclose'] data.drop('prevclose', inplace=True, axis=1) data = data.pivot_table('data', index='time', columns='code') data = data.loc[:, sorted(universe)] assert check_indexorder(data), 'Error, data order is mixed!' assert checkdata_completeness(data, start_time, end_time), "Error, data missed!" return data
def _inner(universe, start_time, end_time): ''' 上证综指 ''' sql = sql_tmplate.replace('code', index_code).replace('price_type', price_type) data = fdgetter.get_db_data(sql, cols=('close', 'time'), start_time=start_time, end_time=end_time, add_stockcode=False) # pdb.set_trace() data = pd.DataFrame(np.repeat([data.close.values], len(universe), axis=0).T, index=data.time, columns=sorted(universe)) checkdata_completeness(data, start_time, end_time) check_indexorder(data) return data
def _inner(universe, start_time, end_time): nstart_time = pd.to_datetime(start_time) - pd.Timedelta('60 days') data = fdgetter.get_db_data(fdgetter.BASIC_SQLs['INDEX_CONSTITUENTS'], code=index_code, cols=('code', 'time'), add_stockcode=False, start_time=nstart_time, end_time=end_time) data = data.assign(is_constituent=1) data['code'] = data.code.apply(datatoolkits.add_suffix) data = data.pivot_table('is_constituent', index='time', columns='code') tds = dateshandle.get_tds(nstart_time, end_time) data = data.loc[:, sorted(universe)].reset_index() data = datatoolkits.map_data(data, days=tds, fromNowOn=True) data = data.set_index('time').dropna(axis=0, how='all') data = data.loc[(data.index >= start_time) & (data.index <= end_time)] return data
def inner(universe, start_time, end_time): new_start = tds_shift(start_time, 130) data = fdgetter.get_db_data(sql, code=index_code, cols=('code', 'weight', 'time'), add_stockcode=False, start_time=new_start, end_time=end_time) data.code = data.code.apply(datatoolkits.add_suffix) # pdb.set_trace() data = data.pivot_table('weight', index='time', columns='code') tds = dateshandle.get_tds(new_start, end_time) data = datatoolkits.map_data(data.reset_index(), days=tds, fromNowOn=True) data = data.set_index('time') data_tds = dateshandle.get_tds(start_time, end_time) data = data.reindex(data_tds) data = data.loc[:, sorted(universe)] / 100 assert check_indexorder(data), "Mixed index order" return data
def _inner(universe, start_time, end_time): data = fdgetter.get_db_data(transed_sql, cols=('data', 'time', 'code'), add_stockcode=False) data['code'] = data.code.apply(datatoolkits.add_suffix) data = data.drop_duplicates().sort_values(['code', 'time']) # 此处假设若时间相同则股本数量相同 by_code = data.groupby('code') tds = dateshandle.get_tds(start_time, end_time) data = by_code.apply(datatoolkits.map_data, days=tds, fromNowOn=True, fillna={'code': lambda x: x.code.iloc[0]}) data = data.reset_index(drop=True) data = data.pivot_table('data', index='time', columns='code') data = data.loc[:, sorted(universe)] assert check_indexorder(data), 'Error, data order is mixed!' assert checkdata_completeness(data, start_time, end_time), "Error, data missed!" return data
def _inner(universe, start_time, end_time): offset = 366 * n + 180 # pdb.set_trace() new_start = pd.to_datetime(start_time) - pd.Timedelta('%d day' % offset) data = fdgetter.get_db_data(sql, start_time=new_start, end_time=end_time, cols=('update_time', 'rpt_date', 'code', 'data'), add_stockcode=False) data['code'] = data.code.apply(datatoolkits.add_suffix) data = _handle_dbdata(data, start_time, end_time, fdmutils.cal_yr, col_name='data', offset=n) # pdb.set_trace() data = data.loc[:, sorted(universe)] assert check_indexorder(data), 'Error, data order is mixed!' assert checkdata_completeness(data, start_time, end_time), "Error, data missed!" return data
def get_zxind(universe, start_time, end_time): ''' 获取中信行业的数据,并映射到每个交易日中 ''' ind_data = fdgetter.get_db_data(fdgetter.BASIC_SQLs['ZX_IND'], cols=('ind', 'time', 'code'), add_stockcode=False) ind_data['ind'] = ind_data.ind.map(ZXIND_TRANS_DICT) ind_data['code'] = ind_data.code.apply(datatoolkits.add_suffix) # pdb.set_trace() tds = dateshandle.get_tds(start_time, end_time) by_code = ind_data.groupby('code') ind_data = by_code.apply(datatoolkits.map_data, days=tds, fillna={'ind': lambda x: NaS, 'code': lambda x: x.code.iloc[0]}) ind_data = ind_data.reset_index(drop=True).set_index(['time', 'code']) ind_data = ind_data.loc[:, 'ind'].unstack() ind_data = ind_data.loc[:, sorted(universe)].dropna(axis=0, how='all').fillna(NaS) return ind_data
def update_universe(path=UNIVERSE_FILE_PATH): ''' 获取最新的universe,并将最新的universe与之前文件中的universe对比,如果发生了更新,打印相关信息 随后,将最新的universe存储在指定文件中,存储文件为一个tuple(universe, update_time) Parameter --------- path: str, default UNIVERSE_FILE_PATH 存储universe数据的文件 Return ------ universe: list 当前最新的universe Notes ----- 不能自行调用该函数用于获取universe,可能造成获取的universe与因子数据的universe不一致, 获取当前的universe,使用fmanger.factors.utils.get_universe函数 ''' logger = logging.getLogger(__name__.split()[0]) new_universe = fdgetter.get_db_data(fdgetter.BASIC_SQLs['A_UNIVERSE'], cols=('code', ), add_stockcode=False) new_universe['code'] = new_universe.code.apply(datatoolkits.add_suffix) new_universe = new_universe.code.tolist() try: universe_save = datatoolkits.load_pickle(path) universe, _ = universe_save nu_set = set(new_universe) ou_set = set(universe) if nu_set != ou_set: add_diff = list(nu_set.difference(ou_set)) minus_diff = list(ou_set.difference(nu_set)) msg = 'Warning: universe UPDATED, {drop} are DROPED, {add} are ADDED'.\ format(drop=minus_diff, add=add_diff) logger.info(msg) print(msg) except FileNotFoundError: pass data = (new_universe, dt.datetime.now()) datatoolkits.dump_pickle(data, path) return new_universe
def get_st(universe, start_time, end_time): ''' 获取股票特殊处理的情况 ''' st_data = fdgetter.get_db_data(fdgetter.BASIC_SQLs['ST_TAG'], cols=('time', 'abbr', 'ms', 'code'), add_stockcode=False) def _assign_st(row): map_dict = {'ST': 1, 'PT': 5, '撤销ST': 0, '*ST': 2, '撤消*ST并实行ST': 1, '从ST变为*ST': 2, '撤销*ST': 0, '退市整理期': 3, '高风险警示': 4} if row.ms in map_dict: return map_dict[row.ms] else: assert row.ms == '撤销PT', "Error, cannot handle tag '{tag}'".format(tag=row.ms) if 'ST' in row.abbr: return 1 elif '*ST' in row.abbr: return 2 else: return 0 st_data = st_data.assign(tag=lambda x: x.apply(_assign_st, axis=1)) st_data['code'] = st_data.code.apply(datatoolkits.add_suffix) # 剔除日期重复项,因为数字越大表示越风险越高,因而只保留数字大的 # pdb.set_trace() st_data = st_data.sort_values(['code', 'time', 'tag']) by_codentime = st_data.groupby(['code', 'time']) st_data = by_codentime.apply(lambda x: x.tail(1).iloc[0]) st_data = st_data.reset_index(drop=True) tds = dateshandle.get_tds(start_time, end_time) # pdb.set_trace() by_code = st_data.groupby('code') st_data = by_code.apply(datatoolkits.map_data, days=tds, fillna={'code': lambda x: x.code.iloc[0]}, fromNowOn=True) st_data = st_data.reset_index(drop=True) # st_data = st_data.reset_index(drop=True).set_index(['time', 'code']) # st_data = st_data.loc[:, 'tag'].unstack() st_data = st_data.pivot_table('tag', index='time', columns='code').dropna(axis=0, how='all') st_data = st_data.loc[:, sorted(universe)].fillna(0) assert checkdata_completeness(st_data, start_time, end_time), "Error, data missed!" return st_data
def _inner(universe, start_time, end_time): new_start = pd.to_datetime(start_time) - pd.Timedelta('540 day') data = fdgetter.get_db_data(sql_template, start_time=new_start, end_time=end_time, cols=('update_time', 'rpt_date', 'code', 'data'), add_stockcode=False) data['code'] = data.code.apply(datatoolkits.add_suffix) by_code = data.groupby('code') data = by_code.apply(fdmutils.get_observabel_data).reset_index(drop=True) by_cno = data.groupby(['code', 'obs_time']) data = by_cno.apply(fdmutils.cal_ttm, col_name='data').reset_index()\ .rename(columns={'obs_time': 'time'}) tds = dateshandle.get_tds(start_time, end_time) # pdb.set_trace() data = data.groupby('code').apply(datatoolkits.map_data, days=tds, fillna={'code': lambda x: x.code.iloc[0]}) data = data.reset_index(drop=True) data = data.pivot_table('data', index='time', columns='code') data = data.loc[:, sorted(universe)] assert check_indexorder(data), 'Error, data order is mixed!' assert checkdata_completeness(data, start_time, end_time), "Error, data missed!" return data
def get_liststatus(universe, start_time, end_time): ''' 获取股票的上市状态 ''' ls_data = fdgetter.get_db_data(fdgetter.BASIC_SQLs['LIST_STATUS'], cols=('code', 'list_status', 'time'), add_stockcode=False) ls_map = {1: 1, 2: 2, 3: 1, 4: 4, 6: 3} # 原数据库中1表示上市,2表示暂停上市,3表示恢复上市,4表示退市,6表示退市整理 ls_data['list_status'] = ls_data.list_status.map(ls_map) ls_data['code'] = ls_data.code.apply(datatoolkits.add_suffix) by_code = ls_data.groupby('code') tds = dateshandle.get_tds(start_time, end_time) ls_data = by_code.apply(datatoolkits.map_data, days=tds, fillna={'code': lambda x: x.code.iloc[0]}, fromNowOn=True) # ls_data = ls_data.reset_index(drop=True).set_index(['time', 'code']) # ls_data = ls_data.loc[:, 'list_status'].unstack() ls_data = ls_data.reset_index(drop=True) ls_data = ls_data.pivot_table('list_status', index='time', columns='code').dropna(axis=0, how='all') ls_data = ls_data.loc[:, sorted(universe)] return ls_data