def process_signal_before_analysis(self, signal, price=None, daily_ret=None, benchmark_price=None, daily_benchmark_ret=None, high=None, low=None, group=None, period=5, n_quantiles=5, mask=None, can_enter=None, can_exit=None, forward=True, commission=0.0008): """ Prepare for signal analysis. Parameters ---------- signal : pd.DataFrame Index is date, columns are stocks. price : pd.DataFrame Index is date, columns are stocks. high : pd.DataFrame Index is date, columns are stocks. low : pd.DataFrame Index is date, columns are stocks. daily_ret : pd.DataFrame Index is date, columns are stocks. daily_benchmark_ret : pd.DataFrame or pd.Series or None Daily ret of benchmark. group : pd.DataFrame Index is date, columns are stocks. benchmark_price : pd.DataFrame or pd.Series or None Price of benchmark. mask : pd.DataFrame Data cells that should NOT be used. can_enter: pd.DataFrame Date the security can be traded and BUY. can_exit:pd.DataFrame Date the security can be traded and SELL. n_quantiles : int period : int periods to compute forward returns on. forward :bool Return cal method. True by default. commission: float commission ratio per trade. Returns ------- res : pd.DataFrame Index is pd.MultiIndex ['trade_date', 'symbol'], columns = ['signal', 'return', 'upside_ret(N)','downside_ret(N)','quantile'] """ """ Deal with suspensions: If the period of calculating return is d (from T to T+d), then we do not use signal values of those suspended on T, we do not calculate return for those suspended on T+d. """ # ---------------------------------------------------------------------- # parameter validation if price is None and daily_ret is None: raise ValueError("One of price / daily_ret must be provided.") if price is not None and daily_ret is not None: raise ValueError( "Only one of price / daily_ret should be provided.") if benchmark_price is not None and daily_benchmark_ret is not None: raise ValueError( "Only one of benchmark_price / daily_benchmark_ret should be provided." ) if not (n_quantiles > 0 and isinstance(n_quantiles, int)): raise ValueError( "n_quantiles must be a positive integer. Input is: {}".format( n_quantiles)) if daily_ret is not None: warnings.warn( "Warning: 检查到使用daily_ret模式。未避免未来函数,请注意确保daily_ret格式为对应日期能实现的日收益." ) # ensure inputs are aligned if mask is not None: try: assert np.all(signal.index == mask.index) assert np.all(signal.columns == mask.columns) except: warnings.warn("Warning: signal与mask的index/columns不一致,请检查输入参数!") mask = mask.reindex_like(signal) mask = jutil.fillinf(mask) mask = mask.astype(int).fillna(0).astype( bool) # dtype of mask could be float. So we need to convert. else: mask = pd.DataFrame(index=signal.index, columns=signal.columns, data=False) if can_enter is not None: try: assert np.all(signal.index == can_enter.index) assert np.all(signal.columns == can_enter.columns) except: warnings.warn( "Warning: signal与can_enter的index/columns不一致,请检查输入参数!") can_enter = can_enter.reindex_like(signal) can_enter = jutil.fillinf(can_enter) can_enter = can_enter.astype(int).fillna(0).astype( bool ) # dtype of can_enter could be float. So we need to convert. else: can_enter = pd.DataFrame(index=signal.index, columns=signal.columns, data=True) if can_exit is not None: try: assert np.all(signal.index == can_exit.index) assert np.all(signal.columns == can_exit.columns) except: warnings.warn( "Warning: signal与can_exit的index/columns不一致,请检查输入参数!") can_exit = can_exit.reindex_like(signal) can_exit = jutil.fillinf(can_exit) can_exit = can_exit.astype(int).fillna(0).astype( bool ) # dtype of can_exit could be float. So we need to convert. else: can_exit = pd.DataFrame(index=signal.index, columns=signal.columns, data=True) if group is not None: try: assert np.all(signal.index == group.index) assert np.all(signal.columns == group.columns) except: warnings.warn( "Warning: signal与group的index/columns不一致,请检查输入参数!") group = group.reindex_like(signal) group = group.astype(str) # ---------------------------------------------------------------------- # save data self.n_quantiles = n_quantiles self.period = period # ---------------------------------------------------------------------- # Get dependent variables # 计算benchmark收益 self.benchmark_ret = None if benchmark_price is not None: benchmark_price = benchmark_price.reindex(index=signal.index) self.benchmark_ret = pfm.price2ret(benchmark_price, self.period, axis=0, compound=True) elif daily_benchmark_ret is not None: daily_benchmark_ret = daily_benchmark_ret.reindex( index=signal.index) self.benchmark_ret = pfm.daily_ret_to_ret(daily_benchmark_ret, self.period) # 计算区间持仓收益 isRealPrice = False if daily_ret is not None: try: assert np.all(signal.index == daily_ret.index) assert np.all(signal.columns == daily_ret.columns) except: warnings.warn( "Warning: signal与daily_ret的index/columns不一致,请检查输入参数!") daily_ret = daily_ret.reindex_like(signal) daily_ret = jutil.fillinf(daily_ret).fillna(0) price = pfm.daily_ret_to_cum(daily_ret) else: # 有price isRealPrice = True try: assert np.all(signal.index == price.index) assert np.all(signal.columns == price.columns) except: warnings.warn( "Warning: signal与price的index/columns不一致,请检查输入参数!") price = price.reindex_like(signal) price = jutil.fillinf(price) can_enter = np.logical_and(price != np.NaN, can_enter) df_ret = pfm.price2ret(price, period=self.period, axis=0, compound=True) price_can_exit = price.copy() price_can_exit[~can_exit] = np.NaN price_can_exit = price_can_exit.fillna(method="bfill") ret_can_exit = pfm.price2ret(price_can_exit, period=self.period, axis=0, compound=True) df_ret[~can_exit] = ret_can_exit[~can_exit] if self.benchmark_ret is not None: # 计算持有期相对收益 residual_ret = df_ret.sub(self.benchmark_ret.values.flatten(), axis=0) else: residual_ret = df_ret residual_ret = jutil.fillinf(residual_ret) residual_ret -= commission # 计算潜在上涨空间和潜在下跌空间 if high is not None and isRealPrice: try: assert np.all(signal.index == high.index) assert np.all(signal.columns == high.columns) except: warnings.warn("Warning: signal与high的index/columns不一致,请检查输入参数!") high = high.reindex_like(signal) high = jutil.fillinf(high) else: high = price upside_ret = compute_upside_returns(price, high, can_exit, self.period, compound=True) upside_ret = jutil.fillinf(upside_ret) upside_ret -= commission if low is not None and isRealPrice: try: assert np.all(signal.index == low.index) assert np.all(signal.columns == low.columns) except: warnings.warn("Warning: signal与low的index/columns不一致,请检查输入参数!") low = low.reindex_like(signal) low = jutil.fillinf(low) else: low = price downside_ret = compute_downside_returns(price, low, can_exit, self.period, compound=True) downside_ret = jutil.fillinf(downside_ret) downside_ret -= commission # ---------------------------------------------------------------------- # Get independent varibale signal = jutil.fillinf(signal) signal = signal.shift(1) # avoid forward-looking bias # forward or not if forward: # point-in-time signal and forward return residual_ret = residual_ret.shift(-self.period) upside_ret = upside_ret.shift(-self.period) downside_ret = downside_ret.shift(-self.period) else: # past signal and point-in-time return signal = signal.shift(self.period) can_enter = can_enter.shift(self.period) mask = mask.shift(self.period) self.ret = dict() self.ret["return"] = residual_ret self.ret["upside_ret"] = upside_ret self.ret["downside_ret"] = downside_ret # ---------------------------------------------------------------------- # get masks # mask_prices = data.isnull() # Because we use FORWARD return, if one day's price is broken, the day that is <period> days ago is also broken. # mask_prices = np.logical_or(mask_prices, mask_prices.shift(self.period)) # mask_price_return = residual_ret.isnull() mask_signal = signal.isnull() mask = np.logical_or( mask.fillna(True), np.logical_or(mask_signal, ~(can_enter.fillna(False)))) mask = np.logical_or(mask, self.ret["return"].isnull()) # mask = np.logical_or(mask, mask_signal) # if price is not None: # mask_forward = np.logical_or(mask, mask.shift(self.period).fillna(True)) # mask = np.logical_or(mask, mask_forward) # ---------------------------------------------------------------------- # calculate quantile signal_masked = signal.copy() signal_masked = signal_masked[~mask] if n_quantiles == 1: df_quantile = signal_masked.copy() df_quantile.loc[:, :] = 1.0 else: if group is None: df_quantile = jutil.to_quantile(signal_masked, n_quantiles=n_quantiles) else: from jaqs_fxdayu.data.py_expression_eval import Parser ps = Parser() ps.index_member = None df_quantile = ps.group_quantile(df=signal_masked, group=group, n_quantiles=n_quantiles) # ---------------------------------------------------------------------- # stack def stack_td_symbol(df): df = pd.DataFrame(df.stack(dropna=False)) # do not dropna df.index.names = ['trade_date', 'symbol'] df.sort_index(axis=0, level=['trade_date', 'symbol'], inplace=True) return df # ---------------------------------------------------------------------- # concat signal value res = stack_td_symbol(signal) res.columns = ['signal'] for ret_type in self.ret.keys(): res[ret_type] = stack_td_symbol(self.ret[ret_type]).fillna(0) res['quantile'] = stack_td_symbol(df_quantile) if group is not None: res["group"] = stack_td_symbol(group) mask = stack_td_symbol(mask) res = res.loc[~(mask.iloc[:, 0]), :] if len(res) > 0: print("Nan Data Count (should be zero) : {:d}; " \ "Percentage of effective data: {:.0f}%".format(res.isnull().sum(axis=0).sum(), len(res) * 100. / signal.size)) else: print("No signal available.") res = res.astype({'signal': float, 'return': float, 'quantile': int}) self.signal_data = res
def process_signal(self, enter_signal, exit_signal=None, sig_type="long", price=None, daily_ret=None, max_holding_period=None, stoploss=None, stopprofit=None, mask=None, can_enter=None, can_exit=None, group=None, n_quantiles=1, commission=0.0008): """ Prepare for signal analysis. Parameters ---------- enter_signal : pd.DataFrame Index is date, columns are stocks.value can only be -2/0/2 exit_signal : pd.DataFrame/list of pd.DataFrame Index is date, columns are stocks.value can only be -1/0/1 sig_type: str "long"/"short", which type of signal to process price : pd.DataFrame Index is date, columns are stocks. daily_ret : pd.DataFrame Index is date, columns are stocks. mask : pd.DataFrame Data cells that should NOT be used. can_enter: pd.DataFrame Date the security can open. can_exit:pd.DataFrame Date the security can close. max_holding_period : int Limit the max holding period stoploss:float stoploss ratio per trade stopprofit:float stopprofit ratio per trade n_quantiles: int group : pd.DataFrame Index is date, columns are stocks. commission: float commission ratio per trade. Returns ------- res : pd.DataFrame Signal processed """ # ensure inputs are aligned # parameter validation if sig_type not in ["long", "short"]: raise ValueError("信号类型(sig_type)只能为long/short.") if price is None and daily_ret is None: raise ValueError("One of price / daily_ret must be provided.") if price is not None and daily_ret is not None: raise ValueError( "Only one of price / daily_ret should be provided.") if not (n_quantiles > 0 and isinstance(n_quantiles, int)): raise ValueError( "n_quantiles must be a positive integer. Input is: {}".format( n_quantiles)) enter_signal = jutil.fillinf(enter_signal) if n_quantiles == 1: # 事件类进场信号 # 确保enter_signal里的信号只能为-2(开空),0(不做操作),2(开多) enter_signal = enter_signal.fillna(0) if not enter_signal.isin([-2, 0, 2]).all().all(): raise ValueError("检测到n_quantiles为1,该模式下测试的enter_signal为事件类因子." "请确保enter_signal里的信号只能为-2(开空),0(不做操作),2(开多))." "如需测试普通因子,请指定n_quantiles为大于1的整数.") # 确保至少有一种出场信号 if (exit_signal is None) and (max_holding_period is None) and \ (stoploss is None) and (stopprofit is None): raise ValueError( "确保至少有一种出场信号(exit_signal/max_holding_period/stoploss/stopprofit)" ) else: # 普通进场信号 if max_holding_period is None: raise ValueError("检测到n_quantiles不为1,该模式下测试的enter_signal为普通因子." "该模式下,max_holding_period参数不能为空.") self.period = max_holding_period if exit_signal is not None: # 确保exit_signal里的信号只能为-1(平空),0(不做操作),1(平多) if not isinstance(exit_signal, list): exit_signal = [exit_signal] for i in range(len(exit_signal)): exit_signal[i] = exit_signal[i].reindex_like(enter_signal) exit_signal[i] = jutil.fillinf(exit_signal[i]).fillna(0) if not exit_signal[i].isin([-1, 0, 1]).all().all(): raise ValueError( "请确保所有exit_signal里的信号只能为-1(平空),0(不做操作),1(平多)") else: exit_signal = [] if group is not None: group = group.reindex_like(enter_signal) sig_filter = { "mask": mask, "can_enter": can_enter, "can_exit": can_exit, } for _filter in sig_filter.keys(): if sig_filter[_filter] is not None: sig_filter[_filter] = sig_filter[_filter].reindex_like( enter_signal) sig_filter[_filter] = jutil.fillinf( sig_filter[_filter]).astype(int).fillna(0) else: sig_filter[_filter] = pd.DataFrame( index=enter_signal.index, columns=enter_signal.columns, data=0 if _filter == "mask" else 1) # process #============================================================= # 信号在当天的收盘时候统计,具体执行则在下一天的交易日的开盘--设置price=open, # 或下一天交易日的收盘--设置price=close,或别的价格--如设置price=vwap # 防止未来函数 enter_signal = enter_signal.shift(1) for i in range(len(exit_signal)): exit_signal[i] = exit_signal[i].shift(1) # 处理价格数据 if daily_ret is not None: daily_ret = daily_ret.reindex_like(enter_signal) daily_ret = jutil.fillinf(daily_ret).fillna(0) price = pfm.daily_ret_to_cum(daily_ret) # 取净值 else: # 有price price = price.reindex_like(enter_signal) price = jutil.fillinf(price) # 取价格 self.price = price #===================== # 调整出场点 pos = [] # 定时出场位置 if max_holding_period is not None: pos.append( get_period_exit_pos(enter_signal, period=max_holding_period)) # 止损出场位置 if stoploss is not None: pos.append( get_stop_pos(price, stoploss, sig_type=sig_type, stop_type="stop_loss")) # 止盈出场位置 if stopprofit is not None: pos.append( get_stop_pos(price, stopprofit, sig_type=sig_type, stop_type="stop_profit")) # 自定义出场信号位置 for es in exit_signal: pos.append(get_exit_pos(es, exit_type="close_%s" % (sig_type, ))) # 综合了各种出场条件,选择最先触发的出场条件出场 exit_pos = reduce(get_first_pos, pos).replace(LONGINT, np.nan) # 每天允许出场的最近的出场点 exit_permited_pos = get_exit_pos(sig_filter["can_exit"], value=[1]) self.final_exit_pos[sig_type] = get_exit_value(exit_permited_pos, exit_pos) # ===================== # 计算信号收益 price_exit = get_exit_value(price, self.final_exit_pos[sig_type]) ret_exit = jutil.fillinf((price_exit - price) / price) if sig_type == "short": ret_exit = -1 * ret_exit self.ret[sig_type] = ret_exit - commission # ===================== # 计算signal_data # ---------------------------------------------------------------------- # mask signal if n_quantiles == 1: # 事件因子 if sig_type == "long": value = 2 else: value = -2 mask_signal = enter_signal != value else: # 普通因子 mask_signal = enter_signal.isnull() mask_signal = np.logical_or( mask_signal, np.logical_or(sig_filter["mask"], sig_filter["can_enter"] != 1)) mask_signal = np.logical_or(mask_signal, self.ret[sig_type].isnull()) # ban掉出场信号在进场那天的 # get sig pos sig_pos = get_sig_pos(self.final_exit_pos[sig_type]) mask_signal = np.logical_or(mask_signal, sig_pos == self.final_exit_pos[sig_type]) # calculate quantile if n_quantiles == 1: df_quantile = pd.DataFrame(1, index=enter_signal.index, columns=enter_signal.columns) else: signal_masked = enter_signal.copy() signal_masked = signal_masked[~mask_signal] if group is None: df_quantile = jutil.to_quantile(signal_masked, n_quantiles=n_quantiles) else: from jaqs_fxdayu.data.py_expression_eval import Parser ps = Parser() ps.index_member = None df_quantile = ps.group_quantile(df=signal_masked, group=group, n_quantiles=n_quantiles) # ---------------------------------------------------------------------- # concat signal value res = stack_td_symbol(enter_signal) res.columns = ['signal'] res["return"] = stack_td_symbol(self.ret[sig_type]) res["exit_time"] = stack_td_symbol(self.final_exit_pos[sig_type]) res['quantile'] = stack_td_symbol(df_quantile) if group is not None: res["group"] = stack_td_symbol(group) res["sig_type"] = sig_type mask_signal = stack_td_symbol(mask_signal) res = res.loc[~(mask_signal.iloc[:, 0]), :] if len(res) > 0: print("Nan Data Count (should be zero) : {:d}; " \ "Percentage of effective data: {:.0f}%".format(res.isnull().sum(axis=0).sum(), len(res) * 100. / enter_signal.size)) res = res.astype({ 'signal': float, 'return': float, 'quantile': int }) self.signal_data[sig_type] = res else: print("sig_type %s:No signal available." % (sig_type, ))