def add_industry_share_value(self): """ 持仓数据增加字段 industry 行业, share_value 股票的流通是指 :return: """ index_class_obj = GetIndexClass() index_class = index_class_obj.get_index_class() index_class_obj.get_zero_index_class() share_data_obj = GetShare() share_data = share_data_obj.get_share('float_a_share_value') def cal_industry_share_value(x, index_class, share_data): date_time = x.name stock_code = x['instrument_exchange'] industry = index_class.loc[date_time, stock_code] share_value = share_data.loc[date_time, stock_code] if not isinstance(industry, str): industry = 'other' return industry, share_value self.position_data_df['industry'], self.position_data_df['share_value'] = \ zip(*self.position_data_df.apply(cal_industry_share_value, args=(index_class, share_data,), axis=1))
def neutralize_method(self, method): index_class_obj = GetIndexClass() index_class_obj.get_index_class() index_class_obj.get_zero_index_class() share_data = pd.DataFrame({}) if NeutralizeMethod.MARKET_VALUE.value in method: share_data_obj = GetShare() share_data = share_data_obj.get_share('float_a_share_value') def cal_resid(data, index_class_obj, share_data, method): print('data.name', data.name) # 删除一些 , 因子数据为NAN的个股 data = data.dropna() index_class_in_date = pd.DataFrame({}) share_data_in_date = pd.DataFrame({}) if NeutralizeMethod.INDUSTRY.value in method: index_class_in_date = index_class_obj.get_index_class_in_date( data.name) if NeutralizeMethod.MARKET_VALUE.value in method: share_data_in_date = share_data.loc[data.name].dropna() share_data_in_date = pd.DataFrame( {'float_a_share_value': share_data_in_date}) # 行业中性与流通市值中性化取交集 neutralize_data = index_class_in_date.join(share_data_in_date, how='outer').dropna() # 因子数据的股票list与中性化数据的股票list,取交集 stock_code_list = list( set(data.index).intersection(set(neutralize_data.index))) # 因子数据取 有效股票列表数据,并排序 factor = data[stock_code_list].sort_index() # 中性化数据取 有效股票列表数据,并排序 neutralize_data = neutralize_data.reindex( stock_code_list).sort_index() # 回归 neutralize_data = sm.add_constant(neutralize_data) model = sm.OLS(factor, neutralize_data) fit_result = model.fit() # 残差作为中性化后的数据 return fit_result.resid self.raw_data = self.raw_data.apply(cal_resid, args=( index_class_obj, share_data, method, ), axis=1) return self.raw_data
def cal_factor_return(self, method='float_value_inverse'): """ method = {‘float_value_inverse’, ‘float_value_square_root’} :param method: :return: """ index_class_obj = GetIndexClass() index_class_obj.get_index_class() index_class_obj.get_zero_index_class() share_data_obj = GetShare() share_data = share_data_obj.get_share('float_a_share_value') index_list = self.factor.index factor_return_daily = {} factor_t_value_dict = {} for index in range(self.factor.shape[0]): stock_return = self.stock_return.iloc[index].dropna() factor_data = self.factor.iloc[index].dropna() stock_list = list( set(stock_return.index).intersection(set(factor_data.index))) stock_return = stock_return[stock_list].sort_index() print(index_list[index]) index_class_in_date = index_class_obj.get_index_class_in_date( index_list[index]).reindex(stock_list).sort_index() share_data_in_date = share_data.loc[index_list[index]].reindex( stock_list).dropna() share_data_in_date = pd.DataFrame({ 'float_a_share_value': share_data_in_date[stock_list].sort_index() }) factor_data = pd.DataFrame( {self.factor_name: factor_data[stock_list].sort_index()}) x = sm.add_constant( pd.concat( [index_class_in_date, factor_data, share_data_in_date], axis=1)) if stock_return.empty: factor_return_daily[index_list[index]] = None factor_t_value_dict[index_list[index]] = None continue wls_model = None weights = None if method == 'float_value_inverse': weights = (1. / share_data_in_date['float_a_share_value']) weights[np.isinf(weights)] = 0 wls_model = sm.WLS(stock_return, x, weights=weights) elif method == 'float_value_square_root': weights = share_data_in_date['float_a_share_value'].values**0.5 wls_model = sm.WLS(stock_return, x, weights=weights) weights.name = index_list[index] if wls_model is None: factor_return_daily[index_list[index]] = None factor_t_value_dict[index_list[index]] = None continue else: results = wls_model.fit() factor_return_daily[index_list[index]] = results.params[ self.factor_name] factor_t_value_dict[index_list[index]] = results.tvalues[ self.factor_name] self.factor_t_value = pd.Series(factor_t_value_dict) self.factor_return_daily = pd.Series(factor_return_daily) self.factor_return['cumsum'] = self.factor_return_daily.cumsum() + 1 self.factor_return['cumprod'] = ( self.factor_return_daily.add(1)).cumprod() self.factor_return['daily'] = self.factor_return_daily