def FactorTest_deal(codes, self_obj): import Analysis_Funs as af import tools.Sample_Tools as smpl import tools.Pretreat_Tools as pretreat import dill import pandas as pd self_ = dill.loads(self_obj) data = smpl.get_data(codes, start=self_.start, end=self_.end, gap=self_.gap) df = smpl.resample_stockdata_low(data.data, freq=self_.freq) ret_forward_re = smpl.get_forward_return(df, 'close') # 为保证ind_Model_Class信息的完整性,使用重采样前的数据来生成指标,后面会另外再重采样。 ind_obj = self_.ind_Model_Class(data.data) ind_obj.fit() ind = pd.DataFrame(ind_obj.ind_df[self_.main_field]) ind.dropna(axis=0, inplace=True) if self_.neutralize.get('enable', False): ind_close = pd.concat([ind, data.close], axis=1) #为了给复权用 ind_close.dropna(axis=0, inplace=True) ind_added = smpl.add_marketvalue_industry( ind_close, static_mv=self_.neutralize.get('static_mv', False)) return (ind_added, ret_forward_re) return (ind, ret_forward_re)
def process(self): data = smpl.get_sample_by_zs(name=self.sample, start=self.start, end=self.end, gap=self.gap, only_main=self.only_main) df = smpl.resample_stockdata_low(data.data, freq=self.freq) # 后续的重采样依赖于ret_forward,否则不同周期下,resample会出现日期不一致的情况。 ret_forward = smpl.get_forward_return(df, 'close') # 为保证ind_Model_Class信息的完整性,使用重采样前的数据来生成指标,后面会另外再重采样。 ind_obj = self.ind_Model_Class(data.data) ind_obj.fit() ind = pd.DataFrame(ind_obj.ind_df[self.main_field]) ind.dropna(axis=0, inplace=True) if self.neutralize.get('enable', False): ind_close = pd.concat([ind, data.close], axis=1) #为了给复权用 ind_close.dropna(axis=0, inplace=True) ind_added = smpl.add_marketvalue_industry( ind_close, static_mv=self.neutralize.get('static_mv', False)) # self.indx1 = ind_added x = ind_added[['totalCapital', 'industry']].sort_index() # x = ind_added[['liquidity_totalCapital','industry']] y = ind_added.iloc[:, 0].sort_index() ind = pretreat.neutralize(y, x, categorical=['industry'], logarithmetics=['totalCapital']) # 取消因子标准化,很多时候标准化后的rank_ic的结果,与分箱测试观测结果不符 # factor_standardized = pretreat.standardize(ind, multi_code=True) self.rank_ic = af.get_rank_ic(ind, ret_forward) else: # neutralize 最后得到的ind是series,而原来的是dataframe # get_rank_ic 内部会做交集,这外面就不必resample了 # factor_standardized = pretreat.standardize(ind, multi_code=True)[self.main_field] self.rank_ic = af.get_rank_ic(ind[self.main_field], ret_forward) # self.a = ind # self.b = ret_forward # self.rank_ic = af.get_rank_ic(factor_standardized, pretreat.standardize(ret_forward, multi_code=True)) self.res = pd.DataFrame( [af.get_ic_desc(self.rank_ic)], columns=['rankIC', 'rankIC_std', 'rankIC_T', 'rankIC_P']) self.res['ICIR'] = round(af.get_ic_ir(self.rank_ic), 6) self.res['winning'] = round(af.get_winning_rate(self.rank_ic), 6) common_index = ind.index.get_level_values(0).unique().intersection( ret_forward.index.get_level_values(0).unique()) ind_resample = ind.loc[common_index] self.ind_ret_df = pd.concat([ind_resample, ret_forward], axis=1) self.ind_ret_df.dropna(axis=0, inplace=True) # 分箱 self.ind_binned = self.ind_ret_df.groupby( level=0, group_keys=False).apply(lambda x: pretreat.binning( x, deal_column=self.main_field, box_count=10, inplace=True))