class Strategy(object): def __init__(self, alpha_model, data_meta, universe, start_date, end_date, freq, benchmark=905, industry_cat='sw_adj', industry_level=1, dask_client=None): self.alpha_model = alpha_model self.data_meta = data_meta self.universe = universe self.benchmark = benchmark self.dates = makeSchedule(start_date, end_date, freq, 'china.sse') self.dates = [d.strftime('%Y-%m-%d') for d in self.dates] self.industry_cat = industry_cat self.industry_level = industry_level self.freq = freq self.horizon = map_freq(freq) self.engine = SqlEngine(self.data_meta.data_source) self.dask_client = dask_client def prepare_backtest_data(self): total_factors = self.engine.fetch_factor_range( self.universe, self.alpha_model.formulas, dates=self.dates) alpha_logger.info("alpha factor data loading finished ...") total_industry = self.engine.fetch_industry_matrix_range( self.universe, dates=self.dates, category=self.industry_cat, level=self.industry_level) alpha_logger.info("industry data loading finished ...") total_benchmark = self.engine.fetch_benchmark_range( dates=self.dates, benchmark=self.benchmark) alpha_logger.info("benchmark data loading finished ...") total_risk_cov, total_risk_exposure = self.engine.fetch_risk_model_range( self.universe, dates=self.dates, risk_model=self.data_meta.risk_model) alpha_logger.info("risk_model data loading finished ...") total_returns = self.engine.fetch_dx_return_range(self.universe, dates=self.dates, horizon=self.horizon, offset=1) alpha_logger.info("returns data loading finished ...") total_data = pd.merge(total_factors, total_industry, on=['trade_date', 'code']) total_data = pd.merge(total_data, total_benchmark, on=['trade_date', 'code'], how='left') total_data.fillna({'weight': 0.}, inplace=True) total_data = pd.merge(total_data, total_returns, on=['trade_date', 'code']) total_data = pd.merge(total_data, total_risk_exposure, on=['trade_date', 'code']) is_in_benchmark = (total_data.weight > 0.).astype(float).values.reshape((-1, 1)) total_data.loc[:, 'benchmark'] = is_in_benchmark total_data.loc[:, 'total'] = np.ones_like(is_in_benchmark) total_data.sort_values(['trade_date', 'code'], inplace=True) self.index_return = self.engine.fetch_dx_return_index_range( self.benchmark, dates=self.dates, horizon=self.horizon, offset=1).set_index('trade_date') self.total_data = total_data self.total_risk_cov = total_risk_cov def _create_lu_bounds(self, running_setting, codes, benchmark_w): codes = np.array(codes) if running_setting.weights_bandwidth: lbound = np.maximum( 0., benchmark_w - running_setting.weights_bandwidth) ubound = running_setting.weights_bandwidth + benchmark_w lb = running_setting.lbound ub = running_setting.ubound if lb or ub: if not isinstance(lb, dict): lbound = np.ones_like(benchmark_w) * lb else: lbound = np.zeros_like(benchmark_w) for c in lb: lbound[codes == c] = lb[c] if 'other' in lb: for i, c in enumerate(codes): if c not in lb: lbound[i] = lb['other'] if not isinstance(ub, dict): ubound = np.ones_like(benchmark_w) * ub else: ubound = np.ones_like(benchmark_w) for c in ub: ubound[codes == c] = ub[c] if 'other' in ub: for i, c in enumerate(codes): if c not in ub: ubound[i] = ub['other'] return lbound, ubound def run(self, running_setting): alpha_logger.info("starting backting ...") total_data_groups = self.total_data.groupby('trade_date') rets = [] turn_overs = [] leverags = [] previous_pos = pd.DataFrame() executor = copy.deepcopy(running_setting.executor) positions = pd.DataFrame() if self.dask_client is None: models = {} for ref_date, _ in total_data_groups: models[ref_date] = train_model(ref_date.strftime('%Y-%m-%d'), self.alpha_model, self.data_meta) else: def worker(parameters): new_model = train_model(parameters[0].strftime('%Y-%m-%d'), parameters[1], parameters[2]) return parameters[0], new_model l = self.dask_client.map(worker, [(d[0], self.alpha_model, self.data_meta) for d in total_data_groups]) results = self.dask_client.gather(l) models = dict(results) for ref_date, this_data in total_data_groups: new_model = models[ref_date] codes = this_data.code.values.tolist() if previous_pos.empty: current_position = None else: previous_pos.set_index('code', inplace=True) remained_pos = previous_pos.loc[codes] remained_pos.fillna(0., inplace=True) current_position = remained_pos.weight.values if running_setting.rebalance_method == 'tv': risk_cov = self.total_risk_cov[self.total_risk_cov.trade_date == ref_date] sec_cov = self._generate_sec_cov(this_data, risk_cov) else: sec_cov = None benchmark_w = this_data.weight.values constraints = LinearConstraints(running_setting.bounds, this_data, benchmark_w) lbound, ubound = self._create_lu_bounds(running_setting, codes, benchmark_w) features = new_model.features dfs = [] for name in features: data_cleaned = this_data.dropna(subset=[name]) raw_factors = data_cleaned[[name]].values new_factors = factor_processing( raw_factors, pre_process=self.data_meta.pre_process, risk_factors=data_cleaned[ self.data_meta.neutralized_risk].values.astype(float) if self.data_meta.neutralized_risk else None, post_process=self.data_meta.post_process) df = pd.DataFrame(new_factors, columns=[name], index=data_cleaned.code) dfs.append(df) new_factors = pd.concat(dfs, axis=1) new_factors = new_factors.loc[codes].fillna(new_factors.median()) er = new_model.predict(new_factors).astype(float) alpha_logger.info('{0} re-balance: {1} codes'.format( ref_date, len(er))) target_pos = self._calculate_pos(running_setting, er, this_data, constraints, benchmark_w, lbound, ubound, sec_cov=sec_cov, current_position=current_position) target_pos['code'] = codes target_pos['trade_date'] = ref_date turn_over, executed_pos = executor.execute(target_pos=target_pos) leverage = executed_pos.weight.abs().sum() ret = executed_pos.weight.values @ (np.exp(this_data.dx.values) - 1.) rets.append(np.log(1. + ret)) executor.set_current(executed_pos) turn_overs.append(turn_over) leverags.append(leverage) positions = positions.append(executed_pos) previous_pos = executed_pos positions['benchmark_weight'] = self.total_data['weight'].values positions['dx'] = self.total_data.dx.values trade_dates = positions.trade_date.unique() ret_df = pd.DataFrame( { 'returns': rets, 'turn_over': turn_overs, 'leverage': leverags }, index=trade_dates) ret_df['benchmark_returns'] = self.index_return['dx'] ret_df.loc[advanceDateByCalendar('china.sse', ret_df.index[-1], self.freq)] = 0. ret_df = ret_df.shift(1) ret_df.iloc[0] = 0. ret_df['excess_return'] = ret_df[ 'returns'] - ret_df['benchmark_returns'] * ret_df['leverage'] return ret_df, positions @staticmethod def _generate_sec_cov(current_data, risk_cov): risk_exposure = current_data[all_styles].values risk_cov = risk_cov[all_styles].values special_risk = current_data['srisk'].values sec_cov = risk_exposure @ risk_cov @ risk_exposure.T / 10000 + np.diag( special_risk**2) / 10000 return sec_cov def _calculate_pos(self, running_setting, er, data, constraints, benchmark_w, lbound, ubound, sec_cov, current_position): more_opts = running_setting.more_opts target_pos, _ = er_portfolio_analysis( er, industry=data.industry_name.values, dx_return=None, constraints=constraints, detail_analysis=False, benchmark=benchmark_w, method=running_setting.rebalance_method, lbound=lbound, ubound=ubound, current_position=current_position, target_vol=more_opts.get('target_vol'), cov=sec_cov, turn_over_target=more_opts.get('turn_over_target')) return target_pos
class Strategy(object): def __init__(self, alpha_model, data_meta, running_setting, dask_client=None): self.alpha_model = alpha_model self.data_meta = data_meta self.running_setting = running_setting self.engine = SqlEngine(self.data_meta.data_source) self.dask_client = dask_client def run(self): alpha_logger.info("starting backting ...") total_factors = self.engine.fetch_factor_range( self.running_setting.universe, self.alpha_model.formulas, dates=self.running_setting.dates) alpha_logger.info("alpha factor data loading finished ...") total_industry = self.engine.fetch_industry_matrix_range( self.running_setting.universe, dates=self.running_setting.dates, category=self.running_setting.industry_cat, level=self.running_setting.industry_level) alpha_logger.info("industry data loading finished ...") total_benchmark = self.engine.fetch_benchmark_range( dates=self.running_setting.dates, benchmark=self.running_setting.benchmark) alpha_logger.info("benchmark data loading finished ...") total_risk_cov, total_risk_exposure = self.engine.fetch_risk_model_range( self.running_setting.universe, dates=self.running_setting.dates, risk_model=self.data_meta.risk_model) alpha_logger.info("risk_model data loading finished ...") total_returns = self.engine.fetch_dx_return_range( self.running_setting.universe, dates=self.running_setting.dates, horizon=self.running_setting.horizon, offset=1) alpha_logger.info("returns data loading finished ...") total_data = pd.merge(total_factors, total_industry, on=['trade_date', 'code']) total_data = pd.merge(total_data, total_benchmark, on=['trade_date', 'code'], how='left') total_data.fillna({'weight': 0.}, inplace=True) total_data = pd.merge(total_data, total_returns, on=['trade_date', 'code']) total_data = pd.merge(total_data, total_risk_exposure, on=['trade_date', 'code']) is_in_benchmark = (total_data.weight > 0.).astype(float).reshape( (-1, 1)) total_data.loc[:, 'benchmark'] = is_in_benchmark total_data.loc[:, 'total'] = np.ones_like(is_in_benchmark) total_data.sort_values(['trade_date', 'code'], inplace=True) total_data_groups = total_data.groupby('trade_date') rets = [] turn_overs = [] leverags = [] previous_pos = pd.DataFrame() executor = copy.deepcopy(self.running_setting.executor) positions = pd.DataFrame() if self.dask_client is None: models = {} for ref_date, _ in total_data_groups: models[ref_date] = train_model(ref_date.strftime('%Y-%m-%d'), self.alpha_model, self.data_meta) else: def worker(parameters): new_model = train_model(parameters[0].strftime('%Y-%m-%d'), parameters[1], parameters[2]) return parameters[0], new_model l = self.dask_client.map(worker, [(d[0], self.alpha_model, self.data_meta) for d in total_data_groups]) results = self.dask_client.gather(l) models = dict(results) for ref_date, this_data in total_data_groups: new_model = models[ref_date] this_data = this_data.fillna( this_data[new_model.features].median()) codes = this_data.code.values.tolist() if self.running_setting.rebalance_method == 'tv': risk_cov = total_risk_cov[total_risk_cov.trade_date == ref_date] sec_cov = self._generate_sec_cov(this_data, risk_cov) else: sec_cov = None benchmark_w = this_data.weight.values constraints = LinearConstraints(self.running_setting.bounds, this_data, benchmark_w) lbound = np.maximum( 0., benchmark_w - self.running_setting.weights_bandwidth) ubound = self.running_setting.weights_bandwidth + benchmark_w if previous_pos.empty: current_position = None else: previous_pos.set_index('code', inplace=True) remained_pos = previous_pos.loc[codes] remained_pos.fillna(0., inplace=True) current_position = remained_pos.weight.values features = new_model.features raw_factors = this_data[features].values new_factors = factor_processing( raw_factors, pre_process=self.data_meta.pre_process, risk_factors=this_data[self.data_meta.neutralized_risk].values. astype(float) if self.data_meta.neutralized_risk else None, post_process=self.data_meta.post_process) er = new_model.predict(pd.DataFrame( new_factors, columns=features)).astype(float) alpha_logger.info('{0} re-balance: {1} codes'.format( ref_date, len(er))) target_pos = self._calculate_pos(er, this_data, constraints, benchmark_w, lbound, ubound, sec_cov=sec_cov, current_position=current_position, **self.running_setting.more_opts) target_pos['code'] = codes target_pos['trade_date'] = ref_date turn_over, executed_pos = executor.execute(target_pos=target_pos) leverage = executed_pos.weight.abs().sum() ret = executed_pos.weight.values @ (np.exp(this_data.dx.values) - 1.) rets.append(np.log(1. + ret)) executor.set_current(executed_pos) turn_overs.append(turn_over) leverags.append(leverage) positions = positions.append(executed_pos) previous_pos = executed_pos positions['benchmark_weight'] = total_data['weight'].values positions['dx'] = total_data.dx.values trade_dates = positions.trade_date.unique() ret_df = pd.DataFrame( { 'returns': rets, 'turn_over': turn_overs, 'leverage': leverags }, index=trade_dates) index_return = self.engine.fetch_dx_return_index_range( self.running_setting.benchmark, dates=self.running_setting.dates, horizon=self.running_setting.horizon, offset=1).set_index('trade_date') ret_df['benchmark_returns'] = index_return['dx'] ret_df.loc[advanceDateByCalendar('china.sse', ret_df.index[-1], self.running_setting.freq)] = 0. ret_df = ret_df.shift(1) ret_df.iloc[0] = 0. ret_df['excess_return'] = ret_df[ 'returns'] - ret_df['benchmark_returns'] * ret_df['leverage'] return ret_df, positions @staticmethod def _generate_sec_cov(current_data, risk_cov): risk_exposure = current_data[all_styles].values risk_cov = risk_cov[all_styles].values special_risk = current_data['srisk'].values sec_cov = risk_exposure @ risk_cov @ risk_exposure.T / 10000 + np.diag( special_risk**2) / 10000 return sec_cov def _calculate_pos(self, er, data, constraints, benchmark_w, lbound, ubound, **kwargs): target_pos, _ = er_portfolio_analysis( er, industry=data.industry_name.values, dx_return=None, constraints=constraints, detail_analysis=False, benchmark=benchmark_w, method=self.running_setting.rebalance_method, lbound=lbound, ubound=ubound, current_position=kwargs.get('current_position'), target_vol=kwargs.get('target_vol'), cov=kwargs.get('sec_cov'), turn_over_target=kwargs.get('turn_over_target')) return target_pos