示例#1
0
def FactorTest_deal(codes, self_obj):
    import Analysis_Funs as af
    import tools.Sample_Tools as smpl
    import tools.Pretreat_Tools as pretreat
    import dill
    import pandas as pd

    self_ = dill.loads(self_obj)

    data = smpl.get_data(codes,
                         start=self_.start,
                         end=self_.end,
                         gap=self_.gap)

    df = smpl.resample_stockdata_low(data.data, freq=self_.freq)
    ret_forward_re = smpl.get_forward_return(df, 'close')

    # 为保证ind_Model_Class信息的完整性,使用重采样前的数据来生成指标,后面会另外再重采样。
    ind_obj = self_.ind_Model_Class(data.data)
    ind_obj.fit()
    ind = pd.DataFrame(ind_obj.ind_df[self_.main_field])
    ind.dropna(axis=0, inplace=True)
    if self_.neutralize.get('enable', False):
        ind_close = pd.concat([ind, data.close], axis=1)  #为了给复权用
        ind_close.dropna(axis=0, inplace=True)
        ind_added = smpl.add_marketvalue_industry(
            ind_close, static_mv=self_.neutralize.get('static_mv', False))
        return (ind_added, ret_forward_re)

    return (ind, ret_forward_re)
示例#2
0
    def process(self):
        data = smpl.get_sample_by_zs(name=self.sample,
                                     start=self.start,
                                     end=self.end,
                                     gap=self.gap,
                                     only_main=self.only_main)

        df = smpl.resample_stockdata_low(data.data, freq=self.freq)
        # 后续的重采样依赖于ret_forward,否则不同周期下,resample会出现日期不一致的情况。
        ret_forward = smpl.get_forward_return(df, 'close')

        # 为保证ind_Model_Class信息的完整性,使用重采样前的数据来生成指标,后面会另外再重采样。
        ind_obj = self.ind_Model_Class(data.data)
        ind_obj.fit()
        ind = pd.DataFrame(ind_obj.ind_df[self.main_field])
        ind.dropna(axis=0, inplace=True)

        if self.neutralize.get('enable', False):
            ind_close = pd.concat([ind, data.close], axis=1)  #为了给复权用
            ind_close.dropna(axis=0, inplace=True)
            ind_added = smpl.add_marketvalue_industry(
                ind_close, static_mv=self.neutralize.get('static_mv', False))
            #             self.indx1 = ind_added
            x = ind_added[['totalCapital', 'industry']].sort_index()
            # x = ind_added[['liquidity_totalCapital','industry']]
            y = ind_added.iloc[:, 0].sort_index()
            ind = pretreat.neutralize(y,
                                      x,
                                      categorical=['industry'],
                                      logarithmetics=['totalCapital'])

            # 取消因子标准化,很多时候标准化后的rank_ic的结果,与分箱测试观测结果不符
            #             factor_standardized = pretreat.standardize(ind, multi_code=True)
            self.rank_ic = af.get_rank_ic(ind, ret_forward)
        else:
            # neutralize 最后得到的ind是series,而原来的是dataframe
            # get_rank_ic 内部会做交集,这外面就不必resample了
            #             factor_standardized = pretreat.standardize(ind, multi_code=True)[self.main_field]
            self.rank_ic = af.get_rank_ic(ind[self.main_field], ret_forward)

#         self.a = ind
#         self.b = ret_forward

#         self.rank_ic = af.get_rank_ic(factor_standardized, pretreat.standardize(ret_forward, multi_code=True))

        self.res = pd.DataFrame(
            [af.get_ic_desc(self.rank_ic)],
            columns=['rankIC', 'rankIC_std', 'rankIC_T', 'rankIC_P'])
        self.res['ICIR'] = round(af.get_ic_ir(self.rank_ic), 6)
        self.res['winning'] = round(af.get_winning_rate(self.rank_ic), 6)

        common_index = ind.index.get_level_values(0).unique().intersection(
            ret_forward.index.get_level_values(0).unique())
        ind_resample = ind.loc[common_index]
        self.ind_ret_df = pd.concat([ind_resample, ret_forward], axis=1)
        self.ind_ret_df.dropna(axis=0, inplace=True)
        # 分箱
        self.ind_binned = self.ind_ret_df.groupby(
            level=0, group_keys=False).apply(lambda x: pretreat.binning(
                x, deal_column=self.main_field, box_count=10, inplace=True))