def get_universe(start_time, end_time): sql = ''' SELECT SecuCode FROM SecuMain WHERE SecuCategory = 1 AND SecuMarket in (83, 90) AND ListedState != 9 ''' if not check_jydb_update_state(end_time): raise ValueError('JYDB has not been updated!') tds = get_calendar('stock.sse').get_tradingdays(start_time, end_time) data = fetch_db_data(jydb, sql, ['symbol']) symbols = sorted(add_stock_suffix(s) for s in data.symbol) out = DataFrame(1, index=tds, columns=symbols) return out
def cshift_db(symbol, date, data_name, tab_name, offset): # 计算流量类报表偏移后的数据 # symbol: 股票代码 # date: 测试数据时间 # data_name: 数据名称(查询聚源) # tab_name: 报表名称(查询聚源) # offset: 数据时间偏移量,以季为单位 symbol = drop_suffix(symbol) date = trans_date(date) sql = ''' SELECT m.SecuCode, s.{data}, s.enddate, s.infopubldate FROM (SELECT EndDate, InfoPublDate, {data}, CompanyCode, BulletinType, IfAdjusted, IfMerged, ROW_NUMBER() OVER(PARTITION BY COMPANYCODE, ENDDATE ORDER BY INFOPUBLDATE DESC) as rnum FROM {tab_name} WHERE BulletinType != 10 AND InfoPublDate < \'{date:%Y-%m-%d}\' AND IfAdjusted NOT IN (4, 5) AND IfMerged = 1) s, SecuMain M where s.rnum = 1 AND S.CompanyCode = M.CompanyCode AND M.SecuCode = \'{symbol}\' AND M.SecuCategory = 1 AND M.SecuMarket IN (83, 90) AND S.EndDate >= (SELECT TOP(1) S2.CHANGEDATE FROM LC_ListStatus S2 WHERE S2.INNERCODE = M.INNERCODE AND S2.ChangeType = 1) ORDER BY S.EndDate ASC'''.format(data=data_name, symbol=symbol, date=date, tab_name=tab_name) data = fetch_db_data(jydb, sql, ['symbol', 'data', 'rpt_date', 'update_time'], {'data': 'float64'}) # pdb.set_trace() if len(data) < offset or get_calendar('stock.sse').count( data.iloc[-1, 3], date) > 120: return np.nan raw_data = data.set_index('rpt_date').data seasonly_data = raw_data - raw_data.shift(1) seasonly_data.loc[seasonly_data.index.month == 3] = np.nan seasonly_data = seasonly_data.fillna(raw_data) return seasonly_data.iloc[-offset]
def process_fundamental_data(data, cols, start_time, end_time, max_hist_num, func, **kwargs): ''' 处理从数据库中取出的基本面数据,按照给定的函数计算基本面数据,并将基本面数据映射到交易日中 Parameter --------- data: pandas.DataFrame 原始数据,必须包含cols参数中的列名 cols: iterable 元素为列名,长度为4,依次表示的意思是[证券代码列, 数据列, 更新时间列, 报告期列] start_time: datetime like 数据的开始时间 end_time: datetime like 数据的结束时间 max_hist_num: int 每个更新日期保存的历史最新数据的最大数量 func: function 格式为function(pandas.DataFrame)-> value,该函数接受的参数为每个股票在每个更新日期 的最新历史数据,包含cols参数中的列 kwargs: dict 以字典形式传给function的参数 Return ------ out: pandas.DataFrame 处理后生成的二维表,index为时间列,columns为股票代码列 ''' start_time, end_time = trans_date(start_time, end_time) symbol_col, data_col, ut_col, rpt_col = cols data = data.sort_values([symbol_col, ut_col]) tds = get_calendar('stock.sse').get_tradingdays(start_time, end_time) def handler_per_symbol(df): # 对单只证券进行处理 obs_data, obs_flag = expand_data(df, ut_col, rpt_col, max_hist_num) res = obs_data.groupby(obs_flag).apply(func, **kwargs) res = map2td(res, tds).reset_index().rename(columns={ obs_flag: 'time', 0: 'data' }) return res out = data.groupby(symbol_col).apply(handler_per_symbol).reset_index() out = out.pivot_table('data', index='time', columns=symbol_col) return out
def inner(start_time, end_time): start_time, end_time = trans_date(start_time, end_time) nonlocal sql sql = sql.format(start_time=start_time, end_time=end_time) data = fetch_db_data(jydb, sql, cols, dtypes=dtypes) data.symbol = data.symbol.apply(add_stock_suffix) if len(cols) == 4: # 当前数据为需要使用前收盘填充的数据 data.loc[data.data == 0, 'data'] = data['prevclose'] data = data.drop('prevclose', axis=1) data = data.pivot_table('data', index='time', columns='symbol') latest_td = get_calendar('stock.sse').latest_tradingday( end_time, 'PAST') universe = sorted( pitcache_getter('UNIVERSE', 10).get_csdata(latest_td).index) data = data.reindex(columns=universe).sort_index(ascending=True) if not check_completeness(data.index, start_time, end_time): raise ValueError('Data missed!') return data
def check_completeness(time_index, start_time, end_time): ''' 检查数据在时间轴上的完整性 Parameter --------- time_index: iterable 数据的时间轴 start_time: datetime like 设定的数据的开始时间 end_time: datetime like 设定的数据的结束时间 Return ------ result: boolean 若数据完整,返回True ''' expect = get_calendar('stock.sse').get_tradingdays(start_time, end_time) return sorted(time_index) == expect
def inner(start_time, end_time): start_time, end_time = trans_date(start_time, end_time) cache_size = 100 offset = major_mul * minor_mul threshold = 1e-5 start_time_shifted = get_calendar('stock.sse').\ shift_tradingdays(start_time, -offset - 20) to_data = pitcache_getter('TO_RATE', cache_size).\ get_tsdata(start_time_shifted, end_time) data = to_data.rolling(offset, min_periods=offset).sum().dropna(how='all') data[data <= threshold] = np_nan data = data / major_mul data = np_log(data) data = data.loc[(data.index >= start_time) & (data.index <= end_time)] if start_time > trans_date(DATA_START_DATE): if not check_completeness(data.index, start_time, end_time): raise ValueError('Data missed!') return data
def check_jydb_update_state(check_time): """ 检测剧院数据库是否已经更新到给定时间的最近的一个交易日(往前推) 即给定的日期时候在数据库最新日期之前 Parameter --------- check_time: datetime like 需要检测的时间 Return ------ is_updated: boolean True表示是最新 """ db_update_time = get_db_update_time() latest_td = get_calendar('stock.sse').latest_tradingday(check_time, 'PAST') if latest_td.date() <= db_update_time.date(): return True else: return False
def ttm_processor(raw_data, cols, start_time, end_time): """ 将原始的数据处理成TTM,仅支持流量类的数据,如利润表、现金流量表 Parameter --------- raw_data: pandas.DataFrame 待处理的原始报表数据(即季报、半年报和年报) cols: iterable 数据列名,元素依次为[证券代码, 数据, 更新时间, 报告期时间] start_time: datetime like 计算结果的开始时间 end_time: datetime like 计算结果的结束时间 Return ------ out: pandas.DataFrame index为时间,columns为证券代码轴 """ raw_data = calc_seasonly_data(raw_data, cols) # pdb.set_trace() raw_data.symbol = raw_data.symbol.apply(add_stock_suffix) data = process_fundamental_data(raw_data, cols, start_time, end_time, 6, calc_tnm, data_col='data', period_flag_col='rpt_date') last_td = get_calendar('stock.sse').latest_tradingday(end_time, 'PAST') universe = sorted( pitcache_getter('UNIVERSE', 10).get_csdata(last_td).index) data = data.reindex(columns=universe) if not check_completeness(data.index, start_time, end_time): raise ValueError('Data missed!') return data
def inner(start_time, end_time): if table_name in ['ISY', 'CFSY'] and freq == 2: raise ValueError('Incompatible parameters(table_name={tn}, freq={f}'.\ format(tn=table_name, f=freq)) start_time, end_time = trans_date(start_time, end_time) start_time_shifted = get_calendar('stock.sse').\ shift_tradingdays(start_time, -(66 * (offset + 2) * freq)) cols = ['symbol', 'data', 'update_date', 'rpt_date'] data_fetcher = data_fetcher_factory( sql, cols, jydb, { 'data': data_name_sql, 'table_name_sql': table_map[table_name], 'if_adjusted': if_adjusted }, {'data': 'float64'}) raw_data = data_fetcher(start_time_shifted, end_time) raw_data = raw_data.drop_duplicates( ['symbol', 'update_date', 'rpt_date']) if table_name in ['ISY', 'CFSY']: # 当前利润表和现金流量表计算季度偏移时仅会计算单季度数据 is_signal_season = True else: is_signal_season = False return shift_processor(raw_data, cols, start_time, end_time, offset, freq, is_signal_season)
# -*- coding:utf-8 """ Author: Hao Li Email: [email protected] Github: https://github.com/SAmmer0 Created: 2018/3/28 """ import numpy as np from fmanager import query from tdtools import get_calendar from datautils.datacache.cachecore import DataView test_dates = [('2015-04-16', '2015-05-07'), ('2016-06-01', '2017-01-01'), ('2017-11-01', '2018-03-01'), ('2014-01-01', '2014-05-01')] def get_df(start_time, end_time): return query('CLOSE', (start_time, end_time)) dv_df = DataView(get_df, get_calendar('stock.sse')) for date in test_dates: tmp = dv_df.get_tsdata(*date).fillna(-1000) data_cpr = query('CLOSE', date).fillna(-1000) assert np.all(np.all(np.isclose(tmp, data_cpr), axis=1)) print(dv_df._cache_start, dv_df._cache_end, dv_df._extendable) assert len(dv_df._data_cache) == get_calendar('stock.sse').count( dv_df._cache_start, dv_df._cache_end)
Parameter --------- rtime: datetime or the like 需要判断的时间 Return ------ result: boolean True表示时间符合条件 ''' return self._time_condition(rtime) ssetd_scheduler = RSchedule( get_calendar('stock.sse').is_tradingday) # 股票交易日时间计划表 sseme_scheduler = RSchedule(lambda t: get_calendar( 'stock.sse').is_cycle_target(t, 'MONTHLY', 'LAST')) # 股票交易日月末计划表 # -------------------------------------------------------------------------------------------------- # Rule class Rule(object): ''' 标的筛选规则类 提供以下功能: on_time: 在给定的时间,对给定的标的池进行筛选,然后返回筛选结果和筛选状态(用于表明当前筛选是否启用) Parameter --------- datasources: pitdata.DataGetterCollection
def get_st_status(start_time, end_time): ''' 获取股票特殊处理的情况 ''' sql = ''' SELECT S.SpecialTradeTime, S.SecurityAbbr, C.MS, M.SecuCode FROM LC_SpecialTrade S, SecuMain M, CT_SystemConst C WHERE S.InnerCode = M.InnerCode AND M.SecuMarket in (83, 90) AND S.SpecialTradeType = C.DM AND C.LB = 1185 AND M.SecuCategory = 1 ''' data = fetch_db_data(jydb, sql, ['time', 'abbr', 'ms', 'symbol']) def _assign_st(row): map_dict = { 'ST': 1., 'PT': 5., '撤销ST': 0., '*ST': 2., '撤消*ST并实行ST': 1., '从ST变为*ST': 2., '撤销*ST': 0., '退市整理期': 3., '高风险警示': 4. } if row.ms in map_dict: return map_dict[row.ms] else: assert row.ms == '撤销PT', "Error, cannot handle tag '{tag}'".format( tag=row.ms) if 'ST' in row.abbr: return 1 elif '*ST' in row.abbr: return 2 else: return 0 data = data.assign(tag=lambda x: x.apply(_assign_st, axis=1)) data['symbol'] = data.symbol.apply(add_stock_suffix) # 剔除日期重复项,因为数字越大表示越风险越高,因而只保留数字大的 data = data.sort_values(['symbol', 'time', 'tag']) by_snt = data.groupby(['symbol', 'time']) data = by_snt.tail(1) data = data.reset_index(drop=True) tds = get_calendar('stock.sse').get_tradingdays(start_time, end_time) by_symbol = data.groupby('symbol') data = by_symbol.apply(map2td, days=tds, timecol='time', fillna={'symbol': lambda x: x.symbol.iloc[0]}) data = data.pivot_table('tag', index='time', columns='symbol').dropna(axis=0, how='all') last_td = get_calendar('stock.sse').latest_tradingday(end_time, 'PAST') universe = sorted( pitcache_getter('UNIVERSE', 10).get_csdata(last_td).index) data = data.reindex(columns=universe).fillna(0) if not check_completeness(data.index, start_time, end_time): raise ValueError('Error, data missed!') return data
def plot_candle_line(axes, data, columns=None, width=0.8, color_up='red', color_down='green', alpha=1, majortick_locator=lambda x: get_calendar('stock.sse'). is_cycle_target(x, 'MONTHLY', 'LAST'), majortick_format='{:%Y-%m-%d}', minortick_locator=None, minortick_format=None, ticklabel_rotation=0): ''' 用于绘制K线 Parameter --------- axes: matplotlib.axes.Axes 子图 data: pandas.DataFrame 价格数据,index为时间 columns: tuple, default None 各个价格对应的数据列名,依次为(open, close, high, low),默认为('open', 'close', 'high', 'low') width: float, defaut 0.8 k线宽度 color_up: string, default red 上涨K线颜色 color_down: string, default green 下跌K线颜色 alpha: float(0, 1),default 1 k线透明度,0表示透明,1表示不透明 majortick_locator: function(date)->boolean 主刻度定位函数,默认为每个月最后一个交易日 majortick_format: string, default {:%Y-%m-%d} 主刻度格式,必须有.format方法 mainortick_locator: function(date)->boolean, default None 副刻度定位函数,默认没有,即不标识副刻度 minortick_format: string, default None 副刻度格式,必须有.format方法 ticklabel_rotation: float, default 0 刻度标签旋转度数 ''' if columns is None: columns = ('open', 'close', 'high', 'low') dates = data.index data = [data[c] for c in columns] majortick_pos, majortick_labels = date_ticker(dates, majortick_locator, majortick_format) axes.set_xticks(majortick_pos, minor=False) axes.set_xticklabels(majortick_labels, minor=False, rotation=ticklabel_rotation) if minortick_locator is not None: if minortick_format is None: minortick_format = '' minortick_pos, minortick_labels = date_ticker(dates, minortick_locator, minortick_format, add_header=False) # 剔除副刻度中与主刻度重复的项 # pdb.set_trace() minortick_pos, minortick_labels = zip( *[(pos, label) for pos, label in zip(minortick_pos, minortick_labels) if pos not in majortick_pos]) axes.set_xticks(minortick_pos, minor=True) if minortick_format != '': axes.set_xticklabels(minortick_labels, minor=True, rotation=ticklabel_rotation) candlestick2_ochl(axes, *data, width=width, colorup=color_up, colordown=color_down, alpha=alpha)
def generate_dv(factor_name): return DataView(generate_getter(factor_name), get_calendar('stock.sse'))