def test_pdutil(): df = pd.DataFrame(np.random.rand(4, 20)) df.iloc[1, 2] = np.nan df.iloc[3, 4] = np.nan df.iloc[1, 4] = np.nan assert df.isnull().sum().sum() == 3 df.iloc[2, 11] = np.inf df.iloc[2, 12] = -np.inf assert df.isnull().sum().sum() == 3 df2 = jutil.fillinf(df) assert df2.isnull().sum().sum() == 5 res_q = jutil.to_quantile(df, 5, axis=1) df3 = df.copy() df3['group'] = ['a', 'a', 'b', 'a'] dic = jutil.group_df_to_dict(df3, by='group') assert set(list(dic.keys())) == {'a', 'b'}
def get_signal_data(self, signal): """ Returns ------- res : pd.DataFrame Index is pd.MultiIndex ['trade_date', 'symbol'], columns = ['signal', 'return', 'upside_ret(N)','downside_ret(N)','quantile'] """ self._judge(signal) # 判断signal与其他关键参数是否格式一致 self._cal_ret() # 计算信号收益 signal = jutil.fillinf(signal) signal = signal.shift(1) # avoid forward-looking bias # forward or not if not self.forward: signal = signal.shift(self.period) # 处理mask mask = np.logical_or(self.mask, signal.isnull()) # calculate quantile signal_masked = signal.copy() signal_masked = signal_masked[~mask] if self.n_quantiles == 1: df_quantile = signal_masked.copy() df_quantile.loc[:, :] = 1.0 else: df_quantile = jutil.to_quantile(signal_masked, n_quantiles=self.n_quantiles) # ---------------------------------------------------------------------- # stack def stack_td_symbol(df): df = pd.DataFrame(df.stack(dropna=False)) # do not dropna df.index.names = ['trade_date', 'symbol'] df.sort_index(axis=0, level=['trade_date', 'symbol'], inplace=True) return df # ---------------------------------------------------------------------- # concat signal value res = stack_td_symbol(signal) # 信号 res.columns = ['signal'] for ret_type in self.signal_ret.keys(): if self.signal_ret[ret_type] is not None: res[ret_type] = stack_td_symbol( self.signal_ret[ret_type]).fillna(0) # 收益 if self.group is not None: res["group"] = stack_td_symbol(self.group) res['quantile'] = stack_td_symbol(df_quantile) # quantile mask = stack_td_symbol(mask) res = res.loc[~(mask.iloc[:, 0]), :] if len(res) > 0: print("Nan Data Count (should be zero) : {:d}; " \ "Percentage of effective data: {:.0f}%".format(res.isnull().sum(axis=0).sum(), len(res) * 100. / signal.size)) else: print("No signal available.") res = res.astype({'signal': float, 'return': float, 'quantile': int}) return res
X3[signal_name] = factor_dict[signal_name].shift(2).stack() X1_ = X1.join(X2,rsuffix='_2') X1_ = X1_.join(X3,rsuffix='_3') ''' X1_ = X1 # In[ ]: train_indexer = dv.get_ts('close_adj').loc[:20160101].stack().index.values test_indexer = dv.get_ts('close_adj').loc[20160101:].stack().index.values X = X1_ Y = dv.get_ts('close_adj').pct_change(period).shift(-period).stack().reindex( index=X.index) import jaqs.util as jutil Y_q = jutil.to_quantile( dv.get_ts('close_adj').pct_change(period).shift(-period), n_quantiles=7) Y_q_clip = Y_q.stack().reindex(index=X.index) Y_q_clip = Y_q_clip[np.logical_or(Y_q_clip == 1.0, Y_q_clip == 7.0)] Y_clip = Y.reindex(index=Y_q_clip.index) Y_clip_class = pd.Series(np.where(Y_q_clip == 7.0, 1, 0), index=Y_q_clip.index) X_clip = X.reindex(index=Y_q_clip.index) from sklearn.linear_model import LogisticRegression def split(X, max_train_size=5, period=1): n = len(X) lis = [] for i in range(1, n): pred_index = [n - i] if (n - i - max_train_size - period) >= 0: train_index = [
def process_signal_before_analysis(self, signal, price=None, ret=None, benchmark_price=None, period=5, n_quantiles=5, mask=None, forward=False): """ Prepare for signal analysis. Parameters ---------- signal : pd.DataFrame Index is date, columns are stocks. price : pd.DataFrame Index is date, columns are stocks. ret : pd.DataFrame Index is date, columns are stocks. benchmark_price : pd.DataFrame or pd.Series or None Price of benchmark. mask : pd.DataFrame Data cells that should NOT be used. n_quantiles : int period : int periods to compute forward returns on. Returns ------- res : pd.DataFrame Index is pd.MultiIndex ['trade_date', 'symbol'], columns = ['signal', 'return', 'quantile'] """ """ Deal with suspensions: If the period of calculating return is d (from T to T+d), then we do not use signal values of those suspended on T, we do not calculate return for those suspended on T+d. """ # ---------------------------------------------------------------------- # parameter validation if price is None and ret is None: raise ValueError("One of price / ret must be provided.") if price is not None and ret is not None: raise ValueError("Only one of price / ret should be provided.") if ret is not None and benchmark_price is not None: raise ValueError( "You choose 'return' mode but benchmark_price is given.") if not (n_quantiles > 0 and isinstance(n_quantiles, int)): raise ValueError( "n_quantiles must be a positive integer. Input is: {}".format( n_quantiles)) # ensure inputs are aligned data = price if price is not None else ret assert np.all(signal.index == data.index) assert np.all(signal.columns == data.columns) if mask is not None: assert np.all(signal.index == mask.index) assert np.all(signal.columns == mask.columns) mask = jutil.fillinf(mask) mask = mask.astype(int).fillna(0).astype( bool) # dtype of mask could be float. So we need to convert. else: mask = pd.DataFrame(index=signal.index, columns=signal.columns, data=False) signal = jutil.fillinf(signal) data = jutil.fillinf(data) # ---------------------------------------------------------------------- # save data self.n_quantiles = n_quantiles self.period = period # ---------------------------------------------------------------------- # Get dependent variables if price is not None: df_ret = pfm.price2ret(price, period=self.period, axis=0) if benchmark_price is not None: benchmark_price = benchmark_price.loc[signal.index] bench_ret = pfm.price2ret(benchmark_price, self.period, axis=0) self.benchmark_ret = bench_ret residual_ret = df_ret.sub(bench_ret.values.flatten(), axis=0) else: residual_ret = df_ret else: residual_ret = ret # Get independent varibale signal = signal.shift(1) # avoid forward-looking bias # forward or not if forward: # point-in-time signal and forward return residual_ret = residual_ret.shift(-self.period) else: # past signal and point-in-time return signal = signal.shift(self.period) # ---------------------------------------------------------------------- # get masks # mask_prices = data.isnull() # Because we use FORWARD return, if one day's price is broken, the day that is <period> days ago is also broken. # mask_prices = np.logical_or(mask_prices, mask_prices.shift(self.period)) mask_price_return = residual_ret.isnull() mask_signal = signal.isnull() mask = np.logical_or(mask_signal, mask_price_return) # mask = np.logical_or(mask, mask_signal) # if price is not None: # mask_forward = np.logical_or(mask, mask.shift(self.period).fillna(True)) # mask = np.logical_or(mask, mask_forward) # ---------------------------------------------------------------------- # calculate quantile signal_masked = signal.copy() signal_masked = signal_masked[~mask] if n_quantiles == 1: df_quantile = signal_masked.copy() df_quantile.loc[:, :] = 1.0 else: df_quantile = jutil.to_quantile(signal_masked, n_quantiles=n_quantiles) # ---------------------------------------------------------------------- # stack def stack_td_symbol(df): df = pd.DataFrame(df.stack(dropna=False)) # do not dropna df.index.names = ['trade_date', 'symbol'] df.sort_index(axis=0, level=['trade_date', 'symbol'], inplace=True) return df mask = stack_td_symbol(mask) df_quantile = stack_td_symbol(df_quantile) residual_ret = stack_td_symbol(residual_ret) # ---------------------------------------------------------------------- # concat signal value res = stack_td_symbol(signal) res.columns = ['signal'] res['return'] = residual_ret res['quantile'] = df_quantile res = res.loc[~(mask.iloc[:, 0]), :] print("Nan Data Count (should be zero) : {:d}; " \ "Percentage of effective data: {:.0f}%".format(res.isnull().sum(axis=0).sum(), len(res) * 100. / signal.size)) res = res.astype({'signal': float, 'return': float, 'quantile': int}) self.signal_data = res
def create_single_signal_report(self, signal, price, periods, n_quantiles, mask=None, buy_condition=None): """ Parameters ---------- signal : pd.Series index is integer date, values are signals price : pd.Series index is integer date, values are prices mask : pd.Series or None, optional index is integer date, values are bool periods : list of int buy_condition : dict , optional {'cond_name1': {'col_name': str, 'hold': int, 'filter': func}, 'cond_name2': {'col_name': str, 'hold': int, 'filter': func}, } Returns ------- res : dict """ if isinstance(signal, pd.DataFrame): signal = signal.iloc[:, 0] if isinstance(price, pd.DataFrame): price = price.iloc[:, 0] # calc return ret_l = { period: pfm.price2ret(price, period=period, axis=0) for period in periods } df_ret = pd.concat(ret_l, axis=1) # ---------------------------------------------------------------------- # calculate quantile if n_quantiles == 1: df_quantile = signal.copy() df_quantile.loc[:] = 1.0 else: df_quantile = jutil.to_quantile(signal, n_quantiles=n_quantiles, axis=0) # ---------------------------------------------------------------------- # concat signal value res = pd.DataFrame(signal.shift(1)) res.columns = ['signal'] res['quantile'] = df_quantile res = pd.concat([res, df_ret], axis=1) res = res.dropna() print("Nan Data Count (should be zero) : {:d}; " \ "Percentage of effective data: {:.0f}%".format(res.isnull().sum(axis=0).sum(), len(res) * 100. / signal.size)) # calc quantile stats gp = res.groupby(by='quantile') dic_raw = {k: v for k, v in gp} dic_stats = OrderedDict() for q, df in gp: df_stat = pd.DataFrame(index=['mean', 'std'], columns=df_ret.columns, data=np.nan) df_stat.loc['mean', :] = df.loc[:, df_ret.columns].mean(axis=0) df_stat.loc['std', :] = df.loc[:, df_ret.columns].std(axis=0) dic_stats[q] = df_stat # calculate IC ics = calc_various_ic(res, ret_cols=df_ret.columns) # backtest if buy_condition is not None: def sim_backtest(df, dic_of_cond): dic_cum_ret = dict() for key, dic in dic_of_cond.items(): col_name = dic['column'] func = dic['filter'] n_hold = dic['hold'] mask = df[col_name].apply(func).astype(int) dic_cum_ret[key] = (df[n_hold] * mask).cumsum() df_cumret = pd.concat(dic_cum_ret, axis=1) return df_cumret df_backtest = sim_backtest(res, buy_condition) # plot gf = plotting.GridFigure(rows=3, cols=1, height_ratio=1.2) gf.fig.suptitle("Event Return Analysis (annualized)") plotting.plot_ic_decay(ics, ax=gf.next_row()) plotting.plot_quantile_return_mean_std(dic_stats, ax=gf.next_row()) if buy_condition is not None: plotting.plot_batch_backtest(df_backtest, ax=gf.next_row()) self.show_fig(gf.fig, 'single_inst.pdf')
def process_signal_before_analysis(self, signal, price=None, daily_ret=None, benchmark_price=None, daily_benchmark_ret=None, high=None, low=None, group=None, period=5, n_quantiles=5, mask=None, can_enter=None, can_exit=None, forward=True, commission=0.0008): """ Prepare for signal analysis. Parameters ---------- signal : pd.DataFrame Index is date, columns are stocks. price : pd.DataFrame Index is date, columns are stocks. high : pd.DataFrame Index is date, columns are stocks. low : pd.DataFrame Index is date, columns are stocks. daily_ret : pd.DataFrame Index is date, columns are stocks. daily_benchmark_ret : pd.DataFrame or pd.Series or None Daily ret of benchmark. group : pd.DataFrame Index is date, columns are stocks. benchmark_price : pd.DataFrame or pd.Series or None Price of benchmark. mask : pd.DataFrame Data cells that should NOT be used. can_enter: pd.DataFrame Date the security can be traded and BUY. can_exit:pd.DataFrame Date the security can be traded and SELL. n_quantiles : int period : int periods to compute forward returns on. forward :bool Return cal method. True by default. commission: float commission ratio per trade. Returns ------- res : pd.DataFrame Index is pd.MultiIndex ['trade_date', 'symbol'], columns = ['signal', 'return', 'upside_ret(N)','downside_ret(N)','quantile'] """ """ Deal with suspensions: If the period of calculating return is d (from T to T+d), then we do not use signal values of those suspended on T, we do not calculate return for those suspended on T+d. """ # ---------------------------------------------------------------------- # parameter validation if price is None and daily_ret is None: raise ValueError("One of price / daily_ret must be provided.") if price is not None and daily_ret is not None: raise ValueError( "Only one of price / daily_ret should be provided.") if benchmark_price is not None and daily_benchmark_ret is not None: raise ValueError( "Only one of benchmark_price / daily_benchmark_ret should be provided." ) if not (n_quantiles > 0 and isinstance(n_quantiles, int)): raise ValueError( "n_quantiles must be a positive integer. Input is: {}".format( n_quantiles)) if daily_ret is not None: warnings.warn( "Warning: 检查到使用daily_ret模式。未避免未来函数,请注意确保daily_ret格式为对应日期能实现的日收益." ) # ensure inputs are aligned if mask is not None: try: assert np.all(signal.index == mask.index) assert np.all(signal.columns == mask.columns) except: warnings.warn("Warning: signal与mask的index/columns不一致,请检查输入参数!") mask = mask.reindex_like(signal) mask = jutil.fillinf(mask) mask = mask.astype(int).fillna(0).astype( bool) # dtype of mask could be float. So we need to convert. else: mask = pd.DataFrame(index=signal.index, columns=signal.columns, data=False) if can_enter is not None: try: assert np.all(signal.index == can_enter.index) assert np.all(signal.columns == can_enter.columns) except: warnings.warn( "Warning: signal与can_enter的index/columns不一致,请检查输入参数!") can_enter = can_enter.reindex_like(signal) can_enter = jutil.fillinf(can_enter) can_enter = can_enter.astype(int).fillna(0).astype( bool ) # dtype of can_enter could be float. So we need to convert. else: can_enter = pd.DataFrame(index=signal.index, columns=signal.columns, data=True) if can_exit is not None: try: assert np.all(signal.index == can_exit.index) assert np.all(signal.columns == can_exit.columns) except: warnings.warn( "Warning: signal与can_exit的index/columns不一致,请检查输入参数!") can_exit = can_exit.reindex_like(signal) can_exit = jutil.fillinf(can_exit) can_exit = can_exit.astype(int).fillna(0).astype( bool ) # dtype of can_exit could be float. So we need to convert. else: can_exit = pd.DataFrame(index=signal.index, columns=signal.columns, data=True) if group is not None: try: assert np.all(signal.index == group.index) assert np.all(signal.columns == group.columns) except: warnings.warn( "Warning: signal与group的index/columns不一致,请检查输入参数!") group = group.reindex_like(signal) group = group.astype(str) # ---------------------------------------------------------------------- # save data self.n_quantiles = n_quantiles self.period = period # ---------------------------------------------------------------------- # Get dependent variables # 计算benchmark收益 self.benchmark_ret = None if benchmark_price is not None: benchmark_price = benchmark_price.reindex(index=signal.index) self.benchmark_ret = pfm.price2ret(benchmark_price, self.period, axis=0, compound=True) elif daily_benchmark_ret is not None: daily_benchmark_ret = daily_benchmark_ret.reindex( index=signal.index) self.benchmark_ret = pfm.daily_ret_to_ret(daily_benchmark_ret, self.period) # 计算区间持仓收益 isRealPrice = False if daily_ret is not None: try: assert np.all(signal.index == daily_ret.index) assert np.all(signal.columns == daily_ret.columns) except: warnings.warn( "Warning: signal与daily_ret的index/columns不一致,请检查输入参数!") daily_ret = daily_ret.reindex_like(signal) daily_ret = jutil.fillinf(daily_ret).fillna(0) price = pfm.daily_ret_to_cum(daily_ret) else: # 有price isRealPrice = True try: assert np.all(signal.index == price.index) assert np.all(signal.columns == price.columns) except: warnings.warn( "Warning: signal与price的index/columns不一致,请检查输入参数!") price = price.reindex_like(signal) price = jutil.fillinf(price) can_enter = np.logical_and(price != np.NaN, can_enter) df_ret = pfm.price2ret(price, period=self.period, axis=0, compound=True) price_can_exit = price.copy() price_can_exit[~can_exit] = np.NaN price_can_exit = price_can_exit.fillna(method="bfill") ret_can_exit = pfm.price2ret(price_can_exit, period=self.period, axis=0, compound=True) df_ret[~can_exit] = ret_can_exit[~can_exit] if self.benchmark_ret is not None: # 计算持有期相对收益 residual_ret = df_ret.sub(self.benchmark_ret.values.flatten(), axis=0) else: residual_ret = df_ret residual_ret = jutil.fillinf(residual_ret) residual_ret -= commission # 计算潜在上涨空间和潜在下跌空间 if high is not None and isRealPrice: try: assert np.all(signal.index == high.index) assert np.all(signal.columns == high.columns) except: warnings.warn("Warning: signal与high的index/columns不一致,请检查输入参数!") high = high.reindex_like(signal) high = jutil.fillinf(high) else: high = price upside_ret = compute_upside_returns(price, high, can_exit, self.period, compound=True) upside_ret = jutil.fillinf(upside_ret) upside_ret -= commission if low is not None and isRealPrice: try: assert np.all(signal.index == low.index) assert np.all(signal.columns == low.columns) except: warnings.warn("Warning: signal与low的index/columns不一致,请检查输入参数!") low = low.reindex_like(signal) low = jutil.fillinf(low) else: low = price downside_ret = compute_downside_returns(price, low, can_exit, self.period, compound=True) downside_ret = jutil.fillinf(downside_ret) downside_ret -= commission # ---------------------------------------------------------------------- # Get independent varibale signal = jutil.fillinf(signal) signal = signal.shift(1) # avoid forward-looking bias # forward or not if forward: # point-in-time signal and forward return residual_ret = residual_ret.shift(-self.period) upside_ret = upside_ret.shift(-self.period) downside_ret = downside_ret.shift(-self.period) else: # past signal and point-in-time return signal = signal.shift(self.period) can_enter = can_enter.shift(self.period) mask = mask.shift(self.period) self.ret = dict() self.ret["return"] = residual_ret self.ret["upside_ret"] = upside_ret self.ret["downside_ret"] = downside_ret # ---------------------------------------------------------------------- # get masks # mask_prices = data.isnull() # Because we use FORWARD return, if one day's price is broken, the day that is <period> days ago is also broken. # mask_prices = np.logical_or(mask_prices, mask_prices.shift(self.period)) # mask_price_return = residual_ret.isnull() mask_signal = signal.isnull() mask = np.logical_or( mask.fillna(True), np.logical_or(mask_signal, ~(can_enter.fillna(False)))) mask = np.logical_or(mask, self.ret["return"].isnull()) # mask = np.logical_or(mask, mask_signal) # if price is not None: # mask_forward = np.logical_or(mask, mask.shift(self.period).fillna(True)) # mask = np.logical_or(mask, mask_forward) # ---------------------------------------------------------------------- # calculate quantile signal_masked = signal.copy() signal_masked = signal_masked[~mask] if n_quantiles == 1: df_quantile = signal_masked.copy() df_quantile.loc[:, :] = 1.0 else: if group is None: df_quantile = jutil.to_quantile(signal_masked, n_quantiles=n_quantiles) else: from jaqs_fxdayu.data.py_expression_eval import Parser ps = Parser() ps.index_member = None df_quantile = ps.group_quantile(df=signal_masked, group=group, n_quantiles=n_quantiles) # ---------------------------------------------------------------------- # stack def stack_td_symbol(df): df = pd.DataFrame(df.stack(dropna=False)) # do not dropna df.index.names = ['trade_date', 'symbol'] df.sort_index(axis=0, level=['trade_date', 'symbol'], inplace=True) return df # ---------------------------------------------------------------------- # concat signal value res = stack_td_symbol(signal) res.columns = ['signal'] for ret_type in self.ret.keys(): res[ret_type] = stack_td_symbol(self.ret[ret_type]).fillna(0) res['quantile'] = stack_td_symbol(df_quantile) if group is not None: res["group"] = stack_td_symbol(group) mask = stack_td_symbol(mask) res = res.loc[~(mask.iloc[:, 0]), :] if len(res) > 0: print("Nan Data Count (should be zero) : {:d}; " \ "Percentage of effective data: {:.0f}%".format(res.isnull().sum(axis=0).sum(), len(res) * 100. / signal.size)) else: print("No signal available.") res = res.astype({'signal': float, 'return': float, 'quantile': int}) self.signal_data = res
def process_signal(self, enter_signal, exit_signal=None, sig_type="long", price=None, daily_ret=None, max_holding_period=None, stoploss=None, stopprofit=None, mask=None, can_enter=None, can_exit=None, group=None, n_quantiles=1, commission=0.0008): """ Prepare for signal analysis. Parameters ---------- enter_signal : pd.DataFrame Index is date, columns are stocks.value can only be -2/0/2 exit_signal : pd.DataFrame/list of pd.DataFrame Index is date, columns are stocks.value can only be -1/0/1 sig_type: str "long"/"short", which type of signal to process price : pd.DataFrame Index is date, columns are stocks. daily_ret : pd.DataFrame Index is date, columns are stocks. mask : pd.DataFrame Data cells that should NOT be used. can_enter: pd.DataFrame Date the security can open. can_exit:pd.DataFrame Date the security can close. max_holding_period : int Limit the max holding period stoploss:float stoploss ratio per trade stopprofit:float stopprofit ratio per trade n_quantiles: int group : pd.DataFrame Index is date, columns are stocks. commission: float commission ratio per trade. Returns ------- res : pd.DataFrame Signal processed """ # ensure inputs are aligned # parameter validation if sig_type not in ["long", "short"]: raise ValueError("信号类型(sig_type)只能为long/short.") if price is None and daily_ret is None: raise ValueError("One of price / daily_ret must be provided.") if price is not None and daily_ret is not None: raise ValueError( "Only one of price / daily_ret should be provided.") if not (n_quantiles > 0 and isinstance(n_quantiles, int)): raise ValueError( "n_quantiles must be a positive integer. Input is: {}".format( n_quantiles)) enter_signal = jutil.fillinf(enter_signal) if n_quantiles == 1: # 事件类进场信号 # 确保enter_signal里的信号只能为-2(开空),0(不做操作),2(开多) enter_signal = enter_signal.fillna(0) if not enter_signal.isin([-2, 0, 2]).all().all(): raise ValueError("检测到n_quantiles为1,该模式下测试的enter_signal为事件类因子." "请确保enter_signal里的信号只能为-2(开空),0(不做操作),2(开多))." "如需测试普通因子,请指定n_quantiles为大于1的整数.") # 确保至少有一种出场信号 if (exit_signal is None) and (max_holding_period is None) and \ (stoploss is None) and (stopprofit is None): raise ValueError( "确保至少有一种出场信号(exit_signal/max_holding_period/stoploss/stopprofit)" ) else: # 普通进场信号 if max_holding_period is None: raise ValueError("检测到n_quantiles不为1,该模式下测试的enter_signal为普通因子." "该模式下,max_holding_period参数不能为空.") self.period = max_holding_period if exit_signal is not None: # 确保exit_signal里的信号只能为-1(平空),0(不做操作),1(平多) if not isinstance(exit_signal, list): exit_signal = [exit_signal] for i in range(len(exit_signal)): exit_signal[i] = exit_signal[i].reindex_like(enter_signal) exit_signal[i] = jutil.fillinf(exit_signal[i]).fillna(0) if not exit_signal[i].isin([-1, 0, 1]).all().all(): raise ValueError( "请确保所有exit_signal里的信号只能为-1(平空),0(不做操作),1(平多)") else: exit_signal = [] if group is not None: group = group.reindex_like(enter_signal) sig_filter = { "mask": mask, "can_enter": can_enter, "can_exit": can_exit, } for _filter in sig_filter.keys(): if sig_filter[_filter] is not None: sig_filter[_filter] = sig_filter[_filter].reindex_like( enter_signal) sig_filter[_filter] = jutil.fillinf( sig_filter[_filter]).astype(int).fillna(0) else: sig_filter[_filter] = pd.DataFrame( index=enter_signal.index, columns=enter_signal.columns, data=0 if _filter == "mask" else 1) # process #============================================================= # 信号在当天的收盘时候统计,具体执行则在下一天的交易日的开盘--设置price=open, # 或下一天交易日的收盘--设置price=close,或别的价格--如设置price=vwap # 防止未来函数 enter_signal = enter_signal.shift(1) for i in range(len(exit_signal)): exit_signal[i] = exit_signal[i].shift(1) # 处理价格数据 if daily_ret is not None: daily_ret = daily_ret.reindex_like(enter_signal) daily_ret = jutil.fillinf(daily_ret).fillna(0) price = pfm.daily_ret_to_cum(daily_ret) # 取净值 else: # 有price price = price.reindex_like(enter_signal) price = jutil.fillinf(price) # 取价格 self.price = price #===================== # 调整出场点 pos = [] # 定时出场位置 if max_holding_period is not None: pos.append( get_period_exit_pos(enter_signal, period=max_holding_period)) # 止损出场位置 if stoploss is not None: pos.append( get_stop_pos(price, stoploss, sig_type=sig_type, stop_type="stop_loss")) # 止盈出场位置 if stopprofit is not None: pos.append( get_stop_pos(price, stopprofit, sig_type=sig_type, stop_type="stop_profit")) # 自定义出场信号位置 for es in exit_signal: pos.append(get_exit_pos(es, exit_type="close_%s" % (sig_type, ))) # 综合了各种出场条件,选择最先触发的出场条件出场 exit_pos = reduce(get_first_pos, pos).replace(LONGINT, np.nan) # 每天允许出场的最近的出场点 exit_permited_pos = get_exit_pos(sig_filter["can_exit"], value=[1]) self.final_exit_pos[sig_type] = get_exit_value(exit_permited_pos, exit_pos) # ===================== # 计算信号收益 price_exit = get_exit_value(price, self.final_exit_pos[sig_type]) ret_exit = jutil.fillinf((price_exit - price) / price) if sig_type == "short": ret_exit = -1 * ret_exit self.ret[sig_type] = ret_exit - commission # ===================== # 计算signal_data # ---------------------------------------------------------------------- # mask signal if n_quantiles == 1: # 事件因子 if sig_type == "long": value = 2 else: value = -2 mask_signal = enter_signal != value else: # 普通因子 mask_signal = enter_signal.isnull() mask_signal = np.logical_or( mask_signal, np.logical_or(sig_filter["mask"], sig_filter["can_enter"] != 1)) mask_signal = np.logical_or(mask_signal, self.ret[sig_type].isnull()) # ban掉出场信号在进场那天的 # get sig pos sig_pos = get_sig_pos(self.final_exit_pos[sig_type]) mask_signal = np.logical_or(mask_signal, sig_pos == self.final_exit_pos[sig_type]) # calculate quantile if n_quantiles == 1: df_quantile = pd.DataFrame(1, index=enter_signal.index, columns=enter_signal.columns) else: signal_masked = enter_signal.copy() signal_masked = signal_masked[~mask_signal] if group is None: df_quantile = jutil.to_quantile(signal_masked, n_quantiles=n_quantiles) else: from jaqs_fxdayu.data.py_expression_eval import Parser ps = Parser() ps.index_member = None df_quantile = ps.group_quantile(df=signal_masked, group=group, n_quantiles=n_quantiles) # ---------------------------------------------------------------------- # concat signal value res = stack_td_symbol(enter_signal) res.columns = ['signal'] res["return"] = stack_td_symbol(self.ret[sig_type]) res["exit_time"] = stack_td_symbol(self.final_exit_pos[sig_type]) res['quantile'] = stack_td_symbol(df_quantile) if group is not None: res["group"] = stack_td_symbol(group) res["sig_type"] = sig_type mask_signal = stack_td_symbol(mask_signal) res = res.loc[~(mask_signal.iloc[:, 0]), :] if len(res) > 0: print("Nan Data Count (should be zero) : {:d}; " \ "Percentage of effective data: {:.0f}%".format(res.isnull().sum(axis=0).sum(), len(res) * 100. / enter_signal.size)) res = res.astype({ 'signal': float, 'return': float, 'quantile': int }) self.signal_data[sig_type] = res else: print("sig_type %s:No signal available." % (sig_type, ))
def create_single_signal_report(self, signal, price, periods, n_quantiles, mask=None, buy_condition=None): """ Parameters ---------- signal : pd.Series index is integer date, values are signals price : pd.Series index is integer date, values are prices mask : pd.Series or None, optional index is integer date, values are bool periods : list of int buy_condition : dict , optional {'cond_name1': {'col_name': str, 'hold': int, 'filter': func}, 'cond_name2': {'col_name': str, 'hold': int, 'filter': func}, } Returns ------- res : dict """ if isinstance(signal, pd.DataFrame): signal = signal.iloc[:, 0] if isinstance(price, pd.DataFrame): price = price.iloc[:, 0] # calc return ret_l = {period: pfm.price2ret(price, period=period, axis=0) for period in periods} df_ret = pd.concat(ret_l, axis=1) # ---------------------------------------------------------------------- # calculate quantile if n_quantiles == 1: df_quantile = signal.copy() df_quantile.loc[:] = 1.0 else: df_quantile = jutil.to_quantile(signal, n_quantiles=n_quantiles, axis=0) # ---------------------------------------------------------------------- # concat signal value res = pd.DataFrame(signal.shift(1)) res.columns = ['signal'] res['quantile'] = df_quantile res = pd.concat([res, df_ret], axis=1) res = res.dropna() print("Nan Data Count (should be zero) : {:d}; " \ "Percentage of effective data: {:.0f}%".format(res.isnull().sum(axis=0).sum(), len(res) * 100. / signal.size)) # calc quantile stats gp = res.groupby(by='quantile') dic_raw = {k: v for k, v in gp} dic_stats = OrderedDict() for q, df in gp: df_stat = pd.DataFrame(index=['mean', 'std'], columns=df_ret.columns, data=np.nan) df_stat.loc['mean', :] = df.loc[:, df_ret.columns].mean(axis=0) df_stat.loc['std', :] = df.loc[:, df_ret.columns].std(axis=0) dic_stats[q] = df_stat # calculate IC ics = calc_various_ic(res, ret_cols=df_ret.columns) # backtest if buy_condition is not None: def sim_backtest(df, dic_of_cond): dic_cum_ret = dict() for key, dic in dic_of_cond.items(): col_name = dic['column'] func = dic['filter'] n_hold = dic['hold'] mask = df[col_name].apply(func).astype(int) dic_cum_ret[key] = (df[n_hold] * mask).cumsum() df_cumret = pd.concat(dic_cum_ret, axis=1) return df_cumret df_backtest = sim_backtest(res, buy_condition) # plot gf = plotting.GridFigure(rows=3, cols=1, height_ratio=1.2) gf.fig.suptitle("Event Return Analysis (annualized)") plotting.plot_ic_decay(ics, ax=gf.next_row()) plotting.plot_quantile_return_mean_std(dic_stats, ax=gf.next_row()) if buy_condition is not None: plotting.plot_batch_backtest(df_backtest, ax=gf.next_row()) self.show_fig(gf.fig, 'single_inst.pdf')
def process_signal_before_analysis(self, signal, price=None, ret=None, benchmark_price=None, period=5, n_quantiles=5, mask=None, forward=False): """ Prepare for signal analysis. Parameters ---------- signal : pd.DataFrame Index is date, columns are stocks. price : pd.DataFrame Index is date, columns are stocks. ret : pd.DataFrame Index is date, columns are stocks. benchmark_price : pd.DataFrame or pd.Series or None Price of benchmark. mask : pd.DataFrame Data cells that should NOT be used. n_quantiles : int period : int periods to compute forward returns on. Returns ------- res : pd.DataFrame Index is pd.MultiIndex ['trade_date', 'symbol'], columns = ['signal', 'return', 'quantile'] """ """ Deal with suspensions: If the period of calculating return is d (from T to T+d), then we do not use signal values of those suspended on T, we do not calculate return for those suspended on T+d. """ # ---------------------------------------------------------------------- # parameter validation if price is None and ret is None: raise ValueError("One of price / ret must be provided.") if price is not None and ret is not None: raise ValueError("Only one of price / ret should be provided.") if ret is not None and benchmark_price is not None: raise ValueError("You choose 'return' mode but benchmark_price is given.") if not (n_quantiles > 0 and isinstance(n_quantiles, int)): raise ValueError("n_quantiles must be a positive integer. Input is: {}".format(n_quantiles)) # ensure inputs are aligned data = price if price is not None else ret assert np.all(signal.index == data.index) assert np.all(signal.columns == data.columns) if mask is not None: assert np.all(signal.index == mask.index) assert np.all(signal.columns == mask.columns) mask = jutil.fillinf(mask) mask = mask.astype(int).fillna(0).astype(bool) # dtype of mask could be float. So we need to convert. else: mask = pd.DataFrame(index=signal.index, columns=signal.columns, data=False) signal = jutil.fillinf(signal) data = jutil.fillinf(data) # ---------------------------------------------------------------------- # save data self.n_quantiles = n_quantiles self.period = period # ---------------------------------------------------------------------- # Get dependent variables if price is not None: df_ret = pfm.price2ret(price, period=self.period, axis=0) if benchmark_price is not None: benchmark_price = benchmark_price.loc[signal.index] bench_ret = pfm.price2ret(benchmark_price, self.period, axis=0) self.benchmark_ret = bench_ret residual_ret = df_ret.sub(bench_ret.values.flatten(), axis=0) else: residual_ret = df_ret else: residual_ret = ret # Get independent varibale signal = signal.shift(1) # avoid forward-looking bias # forward or not if forward: # point-in-time signal and forward return residual_ret = residual_ret.shift(-self.period) else: # past signal and point-in-time return signal = signal.shift(self.period) # ---------------------------------------------------------------------- # get masks # mask_prices = data.isnull() # Because we use FORWARD return, if one day's price is broken, the day that is <period> days ago is also broken. # mask_prices = np.logical_or(mask_prices, mask_prices.shift(self.period)) mask_price_return = residual_ret.isnull() mask_signal = signal.isnull() mask = np.logical_or(mask_signal, mask_price_return) # mask = np.logical_or(mask, mask_signal) # if price is not None: # mask_forward = np.logical_or(mask, mask.shift(self.period).fillna(True)) # mask = np.logical_or(mask, mask_forward) # ---------------------------------------------------------------------- # calculate quantile signal_masked = signal.copy() signal_masked = signal_masked[~mask] if n_quantiles == 1: df_quantile = signal_masked.copy() df_quantile.loc[:, :] = 1.0 else: df_quantile = jutil.to_quantile(signal_masked, n_quantiles=n_quantiles) # ---------------------------------------------------------------------- # stack def stack_td_symbol(df): df = pd.DataFrame(df.stack(dropna=False)) # do not dropna df.index.names = ['trade_date', 'symbol'] df.sort_index(axis=0, level=['trade_date', 'symbol'], inplace=True) return df mask = stack_td_symbol(mask) df_quantile = stack_td_symbol(df_quantile) residual_ret = stack_td_symbol(residual_ret) # ---------------------------------------------------------------------- # concat signal value res = stack_td_symbol(signal) res.columns = ['signal'] res['return'] = residual_ret res['quantile'] = df_quantile res = res.loc[~(mask.iloc[:, 0]), :] print("Nan Data Count (should be zero) : {:d}; " \ "Percentage of effective data: {:.0f}%".format(res.isnull().sum(axis=0).sum(), len(res) * 100. / signal.size)) res = res.astype({'signal': float, 'return': float, 'quantile': int}) self.signal_data = res