Python Sample_Tools示例，tools.Sample_Tools Python示例

示例#1

0

显示文件

def FactorTest_deal(codes, self_obj):
    import Analysis_Funs as af
    import tools.Sample_Tools as smpl
    import tools.Pretreat_Tools as pretreat
    import dill
    import pandas as pd

    self_ = dill.loads(self_obj)

    data = smpl.get_data(codes,
                         start=self_.start,
                         end=self_.end,
                         gap=self_.gap)

    df = smpl.resample_stockdata_low(data.data, freq=self_.freq)
    ret_forward_re = smpl.get_forward_return(df, 'close')

    # 为保证ind_Model_Class信息的完整性，使用重采样前的数据来生成指标，后面会另外再重采样。
    ind_obj = self_.ind_Model_Class(data.data)
    ind_obj.fit()
    ind = pd.DataFrame(ind_obj.ind_df[self_.main_field])
    ind.dropna(axis=0, inplace=True)
    if self_.neutralize.get('enable', False):
        ind_close = pd.concat([ind, data.close], axis=1)  #为了给复权用
        ind_close.dropna(axis=0, inplace=True)
        ind_added = smpl.add_marketvalue_industry(
            ind_close, static_mv=self_.neutralize.get('static_mv', False))
        return (ind_added, ret_forward_re)

    return (ind, ret_forward_re)

示例#2

0

显示文件

    def process(self):
        data = smpl.get_sample_by_zs(name=self.sample,
                                     start=self.start,
                                     end=self.end,
                                     gap=self.gap,
                                     only_main=self.only_main)

        df = smpl.resample_stockdata_low(data.data, freq=self.freq)
        # 后续的重采样依赖于ret_forward，否则不同周期下，resample会出现日期不一致的情况。
        ret_forward = smpl.get_forward_return(df, 'close')

        # 为保证ind_Model_Class信息的完整性，使用重采样前的数据来生成指标，后面会另外再重采样。
        ind_obj = self.ind_Model_Class(data.data)
        ind_obj.fit()
        ind = pd.DataFrame(ind_obj.ind_df[self.main_field])
        ind.dropna(axis=0, inplace=True)

        if self.neutralize.get('enable', False):
            ind_close = pd.concat([ind, data.close], axis=1)  #为了给复权用
            ind_close.dropna(axis=0, inplace=True)
            ind_added = smpl.add_marketvalue_industry(
                ind_close, static_mv=self.neutralize.get('static_mv', False))
            #             self.indx1 = ind_added
            x = ind_added[['totalCapital', 'industry']].sort_index()
            # x = ind_added[['liquidity_totalCapital','industry']]
            y = ind_added.iloc[:, 0].sort_index()
            ind = pretreat.neutralize(y,
                                      x,
                                      categorical=['industry'],
                                      logarithmetics=['totalCapital'])

            # 取消因子标准化，很多时候标准化后的rank_ic的结果，与分箱测试观测结果不符
            #             factor_standardized = pretreat.standardize(ind, multi_code=True)
            self.rank_ic = af.get_rank_ic(ind, ret_forward)
        else:
            # neutralize 最后得到的ind是series，而原来的是dataframe
            # get_rank_ic 内部会做交集，这外面就不必resample了
            #             factor_standardized = pretreat.standardize(ind, multi_code=True)[self.main_field]
            self.rank_ic = af.get_rank_ic(ind[self.main_field], ret_forward)

#         self.a = ind
#         self.b = ret_forward

#         self.rank_ic = af.get_rank_ic(factor_standardized, pretreat.standardize(ret_forward, multi_code=True))

        self.res = pd.DataFrame(
            [af.get_ic_desc(self.rank_ic)],
            columns=['rankIC', 'rankIC_std', 'rankIC_T', 'rankIC_P'])
        self.res['ICIR'] = round(af.get_ic_ir(self.rank_ic), 6)
        self.res['winning'] = round(af.get_winning_rate(self.rank_ic), 6)

        common_index = ind.index.get_level_values(0).unique().intersection(
            ret_forward.index.get_level_values(0).unique())
        ind_resample = ind.loc[common_index]
        self.ind_ret_df = pd.concat([ind_resample, ret_forward], axis=1)
        self.ind_ret_df.dropna(axis=0, inplace=True)
        # 分箱
        self.ind_binned = self.ind_ret_df.groupby(
            level=0, group_keys=False).apply(lambda x: pretreat.binning(
                x, deal_column=self.main_field, box_count=10, inplace=True))

示例#3

0

显示文件

文件： Fetcher_EM.py 项目： kaikai03/IndicatorLab

def fetch_stock_codelist_by_blockname(block_name='民航机场'):
    series = pd.Series(
        smpl.get_codes_from_blockname(block_name,
                                      collections=DATABASE.stock_block_em))
    series.index = series.to_list()
    ns = QA.QA_fetch_stock_name(series.to_list())
    dic = ns['name'].to_dict()
    return series.rename(dic)

示例#4

0

显示文件

    def process_multi(self, worker=4):
        codes = smpl.get_codes_by_zs(name=self.sample,
                                     only_main=self.only_main)
        task = task_chunk_split(codes, worker)

        results = parallal_task(worker,
                                FactorTest_deal,
                                task,
                                self_obj=dill.dumps(self))

        res_T = np.array(results, dtype=object).T.tolist()
        ind = pd.concat(res_T[0])
        ret_forward_re = pd.concat(res_T[1])
        #         self.indx1 = ind

        if self.neutralize.get('enable', False):
            x = ind[['totalCapital', 'industry']].sort_index()
            # x = ind[['liquidity_totalCapital','industry']]
            y = ind.iloc[:, 0].sort_index()
            ind = pretreat.neutralize(y,
                                      x,
                                      categorical=['industry'],
                                      logarithmetics=['totalCapital'])

            #         factor_standardized = pretreat.standardize(ind, multi_code=True)
            self.rank_ic = af.get_rank_ic(ind, ret_forward_re)
        else:
            self.rank_ic = af.get_rank_ic(ind[self.main_field], ret_forward_re)

#         self.rank_ic = af.get_rank_ic(factor_standardized, ret_forward_re)

        self.res = pd.DataFrame(
            [af.get_ic_desc(self.rank_ic)],
            columns=['rankIC', 'rankIC_std', 'rankIC_T', 'rankIC_P'])
        self.res['ICIR'] = round(af.get_ic_ir(self.rank_ic), 6)
        self.res['winning'] = round(af.get_winning_rate(self.rank_ic), 6)

        common_index = ind.index.get_level_values(0).unique().intersection(
            ret_forward_re.index.get_level_values(0).unique())
        ind_resample = ind.loc[common_index]
        self.ind_ret_df = pd.concat([ind_resample, ret_forward_re], axis=1)
        self.ind_ret_df.dropna(axis=0, inplace=True)
        # 分箱
        self.ind_binned = self.ind_ret_df.groupby(
            level=0, group_keys=False).apply(lambda x: pretreat.binning(
                x, deal_column=self.main_field, box_count=10, inplace=True))

示例#5

0

显示文件

    def binned_plot(self, only_binned=False):
        # 去除绘图不需要的原始因子和code
        ind_binned_noindex = self.ind_binned.reset_index().drop(
            ['code', self.main_field], axis=1)
        # 按日期分组，组内再按分箱分组求总收益,结果会被倒序。
        ind_binned_ret_date = ind_binned_noindex.set_index([
            'date', 'group_label'
        ]).groupby(level=0).apply(lambda x: x.groupby(level=1).agg(sum))

        fig = plt.figure(figsize=(1420 / 72, 320 / 72))
        ind_binned_ret_all = ind_binned_noindex.drop(
            ['date'], axis=1).dropna().set_index('group_label').groupby(
                level=0).apply(lambda x: x['ret_forward'].sum())
        plt.bar(ind_binned_ret_all.index, ind_binned_ret_all)
        plt.title('分箱平均收益', **PLOT_TITLE)
        plt.show()

        if only_binned:
            return

        blenchmark = smpl.get_benchmark(name=self.sample,
                                        start=self.start,
                                        end=self.end,
                                        gap=self.gap)
        blenchmark_re = smpl.resample_stockdata_low(blenchmark.data,
                                                    freq=self.freq)
        blenchmark_ret = smpl.get_forward_return(blenchmark_re, 'close')
        blenchmark_ret.reset_index('code', drop=True, inplace=True)
        blenchmark_cum = blenchmark_ret.cumsum()

        fig = plt.figure(figsize=(1420 / 72, 320 / 72))
        lns = ind_binned_ret_date.groupby(level=1).apply(
            lambda x: plt.plot(x.index.get_level_values(0).unique().tolist(),
                               x.values.tolist(),
                               label=x.index.get_level_values(1)[0]))
        ax2 = plt.gca().twinx()
        lns = [x[0] for x in lns.values]  # lns,为了合并legend
        lns += ax2.plot(blenchmark_ret,
                        linestyle=":",
                        linewidth=2,
                        color="black",
                        label='bm')
        labs = [l.get_label() for l in lns]
        legend = plt.legend(lns,
                            labs,
                            loc='upper left',
                            fontsize='x-small',
                            title='反序\n注意\n10最小')
        legend.get_title().set_fontsize(fontsize=12)
        plt.grid(linestyle="dotted", color="lightgray")
        plt.title('分箱收益变化', **PLOT_TITLE)
        plt.show()

        ind_binned_ret_cum = ind_binned_ret_date.groupby(
            level=1).apply(lambda x: x.cumsum())
        fig = plt.figure(figsize=(1420 / 72, 320 / 72))
        lns = ind_binned_ret_cum.groupby(level=1).apply(
            lambda x: plt.plot(x.index.get_level_values(0).unique().tolist(),
                               x.values.tolist(),
                               label=x.index.get_level_values(1)[0]))
        ax3 = plt.gca().twinx()
        lns = [x[0] for x in lns.values]  # lns,为了合并legend
        lns += ax3.plot(blenchmark_cum,
                        linestyle=":",
                        linewidth=2,
                        color="black",
                        label='bm')
        labs = [l.get_label() for l in lns]
        legend = plt.legend(lns,
                            labs,
                            loc='upper left',
                            fontsize='x-small',
                            title='反序\n注意\n10最小')
        legend.get_title().set_fontsize(fontsize=12)
        plt.grid(linestyle="dotted", color="lightgray")
        plt.title('累计收益率', **PLOT_TITLE)
        plt.show()

示例#6

0

显示文件

文件： Fetcher_EM.py 项目： kaikai03/IndicatorLab

def fetch_blocks_view(hy_source='industry'):
    return smpl.get_blocks_view(hy_source=hy_source,
                                collections=DATABASE.stock_block_em)

示例#7

0

显示文件

文件： Fetcher_EM.py 项目： kaikai03/IndicatorLab

def fetch_all_blocks(hy_source='concept'):
    return smpl.get_all_blocks(hy_source=hy_source,
                               collections=DATABASE.stock_block_em)