def add_industry_share_value(self):
        """
        持仓数据增加字段
        industry 行业,
        share_value 股票的流通是指
        :return:
        """
        index_class_obj = GetIndexClass()
        index_class = index_class_obj.get_index_class()
        index_class_obj.get_zero_index_class()

        share_data_obj = GetShare()
        share_data = share_data_obj.get_share('float_a_share_value')

        def cal_industry_share_value(x, index_class, share_data):
            date_time = x.name
            stock_code = x['instrument_exchange']
            industry = index_class.loc[date_time, stock_code]
            share_value = share_data.loc[date_time, stock_code]
            if not isinstance(industry, str):
                industry = 'other'
            return industry, share_value

        self.position_data_df['industry'], self.position_data_df['share_value'] = \
            zip(*self.position_data_df.apply(cal_industry_share_value, args=(index_class, share_data,), axis=1))
    def neutralize_method(self, method):
        index_class_obj = GetIndexClass()
        index_class_obj.get_index_class()
        index_class_obj.get_zero_index_class()

        share_data = pd.DataFrame({})
        if NeutralizeMethod.MARKET_VALUE.value in method:
            share_data_obj = GetShare()
            share_data = share_data_obj.get_share('float_a_share_value')

        def cal_resid(data, index_class_obj, share_data, method):
            print('data.name', data.name)
            # 删除一些 , 因子数据为NAN的个股
            data = data.dropna()
            index_class_in_date = pd.DataFrame({})
            share_data_in_date = pd.DataFrame({})

            if NeutralizeMethod.INDUSTRY.value in method:
                index_class_in_date = index_class_obj.get_index_class_in_date(
                    data.name)
            if NeutralizeMethod.MARKET_VALUE.value in method:
                share_data_in_date = share_data.loc[data.name].dropna()
                share_data_in_date = pd.DataFrame(
                    {'float_a_share_value': share_data_in_date})

            # 行业中性与流通市值中性化取交集
            neutralize_data = index_class_in_date.join(share_data_in_date,
                                                       how='outer').dropna()
            # 因子数据的股票list与中性化数据的股票list,取交集
            stock_code_list = list(
                set(data.index).intersection(set(neutralize_data.index)))
            # 因子数据取 有效股票列表数据,并排序
            factor = data[stock_code_list].sort_index()
            # 中性化数据取 有效股票列表数据,并排序
            neutralize_data = neutralize_data.reindex(
                stock_code_list).sort_index()

            # 回归
            neutralize_data = sm.add_constant(neutralize_data)
            model = sm.OLS(factor, neutralize_data)
            fit_result = model.fit()
            # 残差作为中性化后的数据
            return fit_result.resid

        self.raw_data = self.raw_data.apply(cal_resid,
                                            args=(
                                                index_class_obj,
                                                share_data,
                                                method,
                                            ),
                                            axis=1)
        return self.raw_data
示例#3
0
    def cal_factor_return(self, method='float_value_inverse'):
        """
        method = {‘float_value_inverse’, ‘float_value_square_root’}
        :param method:
        :return:
        """
        index_class_obj = GetIndexClass()
        index_class_obj.get_index_class()
        index_class_obj.get_zero_index_class()

        share_data_obj = GetShare()
        share_data = share_data_obj.get_share('float_a_share_value')

        index_list = self.factor.index
        factor_return_daily = {}
        factor_t_value_dict = {}

        for index in range(self.factor.shape[0]):
            stock_return = self.stock_return.iloc[index].dropna()
            factor_data = self.factor.iloc[index].dropna()

            stock_list = list(
                set(stock_return.index).intersection(set(factor_data.index)))
            stock_return = stock_return[stock_list].sort_index()
            print(index_list[index])
            index_class_in_date = index_class_obj.get_index_class_in_date(
                index_list[index]).reindex(stock_list).sort_index()

            share_data_in_date = share_data.loc[index_list[index]].reindex(
                stock_list).dropna()
            share_data_in_date = pd.DataFrame({
                'float_a_share_value':
                share_data_in_date[stock_list].sort_index()
            })
            factor_data = pd.DataFrame(
                {self.factor_name: factor_data[stock_list].sort_index()})

            x = sm.add_constant(
                pd.concat(
                    [index_class_in_date, factor_data, share_data_in_date],
                    axis=1))
            if stock_return.empty:
                factor_return_daily[index_list[index]] = None
                factor_t_value_dict[index_list[index]] = None
                continue
            wls_model = None
            weights = None
            if method == 'float_value_inverse':
                weights = (1. / share_data_in_date['float_a_share_value'])
                weights[np.isinf(weights)] = 0
                wls_model = sm.WLS(stock_return, x, weights=weights)
            elif method == 'float_value_square_root':
                weights = share_data_in_date['float_a_share_value'].values**0.5
                wls_model = sm.WLS(stock_return, x, weights=weights)
            weights.name = index_list[index]

            if wls_model is None:
                factor_return_daily[index_list[index]] = None
                factor_t_value_dict[index_list[index]] = None
                continue
            else:
                results = wls_model.fit()
                factor_return_daily[index_list[index]] = results.params[
                    self.factor_name]
                factor_t_value_dict[index_list[index]] = results.tvalues[
                    self.factor_name]

        self.factor_t_value = pd.Series(factor_t_value_dict)
        self.factor_return_daily = pd.Series(factor_return_daily)
        self.factor_return['cumsum'] = self.factor_return_daily.cumsum() + 1
        self.factor_return['cumprod'] = (
            self.factor_return_daily.add(1)).cumprod()
        self.factor_return['daily'] = self.factor_return_daily