Exemplo n.º 1
0
def get_25assets(v1, v2):
    sampleControl = False
    q = 5

    ss=[]
    for v in [v1,v2]:
        if v in Database(sample_control=sampleControl).all_indicators:
            s=Database(sample_control=sampleControl).by_indicators([v])
        else:
            s=pd.read_pickle(os.path.join(dirFI,v+'.pkl')).stack()
            s.name=v
        ss.append(s)

    # data lagged
    weight = Database(sample_control=sampleControl).by_indicators(['weight'])
    datalagged = pd.concat(ss+[weight], axis=1)
    datalagged = datalagged.groupby('sid').shift(1)
    # data t
    datat = Database(sample_control=sampleControl).by_indicators(['stockEretM'])

    comb = pd.concat([datalagged, datat], axis=1)
    comb = comb.dropna()

    comb['g1'] = comb.groupby('t', group_keys=False).apply(
        lambda df: assign_port_id(df[v1], q))
    comb['g2'] = comb.groupby('t', group_keys=False).apply(
        lambda df: assign_port_id(df[v2], q))

    assets = comb.groupby(['t', 'g1', 'g2']).apply(
        lambda df: my_average(df, 'stockEretM', wname='weight'))\
        .unstack(level=['g1','g2'])
    return assets
Exemplo n.º 2
0
def get_bivariate_panel(v1, v2='size__size'):
    sampleControl = False
    q = 5

    ss = []
    for v in [v1, v2]:
        if v in Database(sample_control=sampleControl).all_indicators:
            s = Database(sample_control=sampleControl).by_indicators([v])
        else:
            s = _read(v).stack()
            s.name = v
        ss.append(s)

    # data lagged
    weight = Database(sample_control=sampleControl).by_indicators(['weight'])
    datalagged = pd.concat(ss + [weight], axis=1)
    datalagged = datalagged.groupby('sid').shift(1)

    # data t
    datat = Database(sample_control=sampleControl).by_indicators(
        ['stockEretM'])
    comb = pd.concat([datalagged, datat], axis=1)
    comb = comb.dropna()

    comb['g1'] = comb.groupby(
        't', group_keys=False).apply(lambda df: assign_port_id(df[v1], q))
    comb['g2'] = comb.groupby(
        't', group_keys=False).apply(lambda df: assign_port_id(df[v2], q))

    panel = comb.groupby(['t', 'g1', 'g2']).apply(
        lambda df: my_average(df, 'stockEretM', wname='weight'))\
        .unstack(level=['g1','g2'])
    print(v1)
    return panel
Exemplo n.º 3
0
def construct_playingField(vars, model):
    '''
    :param vars: list
    :param model: belong to {'5x5','2x4x4'}
    :return:
    '''
    if model == '5x5':
        v1, v2 = tuple(vars)
        comb = data_for_bivariate(v1, v2, 5, 5, independent=True)
        assets=comb.groupby(['t','g1','g2']).apply(
            lambda df:my_average(df,'stockEretM',wname='weight'))\
            .unstack(level=['g1','g2'])
    elif model == '2x4x4':
        #v1 must belong to size category
        v1, v2, v3 = tuple(vars)
        comb = combine_with_datalagged([v1, v2, v3])
        comb = comb.dropna()
        comb['g1'] = comb.groupby('t', group_keys=False).apply(
            lambda df: assign_port_id(df[v1], 2, range(1, 3)))

        comb['g2'] = comb.groupby(['t', 'g1'], group_keys=False).apply(
            lambda df: assign_port_id(df[v2], 4, range(1, 5)))

        comb['g3'] = comb.groupby(['t', 'g1'], group_keys=False).apply(
            lambda df: assign_port_id(df[v3], 4, range(1, 5)))

        assets=comb.groupby(['t','g1','g2','g3']).apply(
            lambda df: my_average(df, 'stockEretM', wname='weight')) \
            .unstack(level=['g1', 'g2','g3'])
    else:
        raise MyError('Model "{}" is not supported currently'.format(model))

    return assets
Exemplo n.º 4
0
    def _get_independent_data(self):
        # TODO: add the method of ratios such as [0.3,0.7]
        comb = combine_with_datalagged([self.indicator1, self.indicator2])
        comb = comb.dropna()
        comb['g1'] = comb.groupby(
            't', group_keys=False).apply(lambda df: assign_port_id(
                df[self.indicator1], self.q,
                [self.indicator1 + str(i) for i in range(1, self.q + 1)]))

        comb['g2'] = comb.groupby(
            't', group_keys=False).apply(lambda df: assign_port_id(
                df[self.indicator2], self.q,
                [self.indicator2 + str(i) for i in range(1, self.q + 1)]))
        return comb
Exemplo n.º 5
0
def indicator2factor(indicator):
    sampleControl = False
    q = 5

    # data lagged
    df = _read(indicator)
    s = df.stack()
    s.name = indicator
    weight = Database(sample_control=sampleControl).by_indicators(['weight'])
    datalagged = pd.concat([s, weight], axis=1)
    datalagged = datalagged.groupby('sid').shift(1)

    # data t
    datat = Database(sample_control=sampleControl).by_indicators(
        ['stockEretM'])
    comb = pd.concat([datalagged, datat], axis=1)
    comb = comb.dropna()

    comb['g'] = comb.groupby(
        't',
        group_keys=False).apply(lambda df: assign_port_id(df[indicator], q))

    panel = comb.groupby(['t', 'g']).apply(
        lambda df: my_average(df, 'stockEretM', wname='weight')) \
        .unstack(level=['g'])

    factor = panel[q] - panel[1]
    factor.name = indicator
    factor.to_pickle(os.path.join(factorPath, '{}.pkl'.format(indicator)))
Exemplo n.º 6
0
def single_sorting_factor(indicator, q, weight=False):
    # method1 independent way
    '''
    This function is used to construct a new factor by a given indicator.
    We first group stocks into "q" portfolios based on the rank of "indicator"
    every month.Then,at the next month we calculate the corresponding monthly
    value-weighted (if weight is True) portfolio return.The factor return is
    the spread between the return of the top portfolio and bottom portfolio.

    :param indicator:
    :param q:
    :param weight:
    :return:Series
    '''
    if isinstance(q, int):
        labels = ['g{}'.format(i) for i in range(1, q + 1)]
    elif isinstance(q, (list, tuple)):
        labels = ['g{}'.format(i) for i in range(1, len(q))]
    else:
        raise MyError('q:"{}"  is wrong!'.format(repr(q)))

    comb = combine_with_datalagged([indicator])
    comb['g'] = comb.groupby('t', group_keys=False).apply(
        lambda df: assign_port_id(df[indicator], q, labels))

    if weight:
        panel=comb.groupby(['t','g']).apply(
            lambda df:my_average(df,'stockEretM',wname='weight'))\
            .unstack(level=['g'])
    else:
        panel = comb.groupby(['t',
                              'g'])['stockEretM'].mean().unstack(level=['g'])

    factor = panel[labels[-1]] - panel[labels[0]]
    return factor
Exemplo n.º 7
0
def get_hxz4():
    '''
    calculate hxz4 factors,refer to din.py for details about the indicators

    References:
        Hou, K., Mo, H., Xue, C., and Zhang, L. (2018). Motivating Factors (Rochester, NY: Social Science Research Network).

    Returns:

    '''
    v1 = 'size__size'
    v2 = 'inv__inv'  #I/A
    v3 = 'roe__roe'  # ROE

    comb = combine_with_datalagged([v1, v2, v3], sample_control=True)
    comb = comb.dropna()

    comb['g1'] = comb.groupby('t', group_keys=False).apply(
        lambda df: assign_port_id(df[v1], 2, range(1, 3)))

    comb['g2'] = comb.groupby(['t', 'g1'], group_keys=False).apply(
        lambda df: assign_port_id(df[v2], [0, 0.3, 0.7, 1.0], range(1, 4)))

    comb['g3'] = comb.groupby(['t', 'g1'], group_keys=False).apply(
        lambda df: assign_port_id(df[v3], [0, 0.3, 0.7, 1.0], range(1, 4)))

    assets = comb.groupby(
        ['t', 'g1', 'g2',
         'g3']).apply(lambda df: my_average(df, 'stockEretM', wname='weight'))

    df1 = assets.groupby(['t', 'g1']).mean().unstack(level='g1')
    smb = df1[1] - df1[2]

    df2 = assets.groupby(['t', 'g2']).mean().unstack(level='g2')
    ria = df2[3] - df2[1]

    df3 = assets.groupby(['t', 'g3']).mean().unstack(level='g3')
    roe = df3[3] - df2[1]

    rp = load_data('rpM')
    hxz4 = pd.concat([rp, smb, ria, roe],
                     axis=1,
                     keys=['rp', 'smb', 'ria', 'roe'])
    hxz4.columns.name = 'type'
    hxz4 = hxz4.dropna()
    save(hxz4, 'hxz4M')
Exemplo n.º 8
0
    def portfolio_analysis(self):
        '''
        table 8.4

        :return:
        '''
        #TODO: add a parameter to declare what risk models will be used. [ff3,capm,ff5]

        all_indicators = list(set(self.indicators + ['capM', 'eretM']))
        comb = DATA.by_indicators(all_indicators)

        result_eavg=[]
        result_wavg=[]
        for indicator in self.indicators:
            gcol='g_%s'%indicator
            # comb[gcol]=comb.groupby('t').apply(
            #     lambda df:grouping(df[indicator].reset_index(level='t'),self.q,labels=self.groupnames))
            comb[gcol]=comb.groupby('t',group_keys=False).apply(
                lambda df:assign_port_id(df[indicator], self.q, self.groupnames))
            #TODO:Add an alternative sorting method,that is,updating yearly as page 9 of Chen et al., “On the Predictability of Chinese Stock Returns.”

            panel_stk_eavg,panel_stk_wavg=self._get_panel_stk_avg(comb, indicator, gcol)
            for panel_stk in [panel_stk_eavg,panel_stk_wavg]:
                panel=panel_stk.unstack(level=[gcol])
                panel.columns=panel.columns.astype(str)
                panel['_'.join([self.groupnames[-1],self.groupnames[0]])]=panel[self.groupnames[-1]]-panel[self.groupnames[0]]
                panel['avg']=panel.mean(axis=1)
                #TODO: use the risk models declared above

                a_data = comb.groupby(['t', gcol])[indicator].mean()
                a_data = a_data.unstack()
                a_data.columns = a_data.columns.astype(str)
                a_data.index = a_data.index.astype(str)
                a_data['_'.join([self.groupnames[-1], self.groupnames[0]])] = a_data[self.groupnames[-1]] - a_data[
                    self.groupnames[0]]
                a_data['avg']=a_data.mean(axis=1)
                a = a_data.mean()
                a.name='avg'
                a=a.to_frame().T

                riskAdjusted=risk_adjust(panel)
                #TODO:something must be wrong with size or portfolio_analysse.
                if panel_stk is panel_stk_eavg:
                    result_eavg.append(pd.concat([a,riskAdjusted],axis=0))
                else:
                    result_wavg.append(pd.concat([a,riskAdjusted],axis=0))
        table_e=pd.concat(result_eavg,axis=0,keys=self.indicators)
        table_w=pd.concat(result_wavg,axis=0,keys=self.indicators)
        #reorder the columns
        initialOrder=table_e.columns.tolist()
        newOrder=self.groupnames+[col for col in initialOrder if col not in self.groupnames]
        table_e=table_e.reindex(columns=newOrder)
        table_w=table_w.reindex(columns=newOrder)

        table_e.to_csv(os.path.join(self.path,'univariate portfolio analysis-equal weighted.csv'))
        table_w.to_csv(os.path.join(self.path,'univariate portfolio analysis-value weighted.csv'))
Exemplo n.º 9
0
    def _get_dependent_data(self,indicators):
        '''

        :param indicators:list with two elements,the first is the controlling variable
        :return:
        '''

        # sometimes the self.indicators and ['mktCap','eret'] may share some elements
        comb=DATA.by_indicators(indicators+['capM','eretM'])
        comb=comb.dropna()
        comb['g1']=comb.groupby('t',group_keys=False).apply(
            lambda df:assign_port_id(df[indicators[0]], self.q,
                                     [indicators[0] + str(i) for i in range(1,self.q + 1)]))

        comb['g2']=comb.groupby(['t','g1'],group_keys=False).apply(
            lambda df:assign_port_id(df[indicators[1]], self.q,
                                     [indicators[1] + str(i) for i in range(1,self.q + 1)]))

        return comb
Exemplo n.º 10
0
    def _get_dependent_data(self, control, target):
        '''

        :param indicators:list with two elements,the first is the controlling variable
        :return:
        '''
        comb = combine_with_datalagged([control, target])
        comb = comb.dropna()
        comb['g1'] = comb.groupby(
            't', group_keys=False).apply(lambda df: assign_port_id(
                df[control], self.q,
                [control + str(i) for i in range(1, self.q + 1)]))

        comb['g2'] = comb.groupby(
            ['t', 'g1'], group_keys=False).apply(lambda df: assign_port_id(
                df[target], self.q,
                [target + str(i) for i in range(1, self.q + 1)]))

        return comb
Exemplo n.º 11
0
def three_sorting_factor(v1, v2, v3, q1, q2, q3, weight=True):
    '''

    v1 and v2 are independent,v3 is conditional on v1 and v2

    reference:
        page 18 of Pan, L., Tang, Y., and Xu, J. (2016).
        Speculative Trading and Stock Returns. Review of Finance 20, 1835–1865.


    '''

    comb = combine_with_datalagged([v1, v2, v3])
    comb = comb.dropna()

    comb['g1'] = comb.groupby('t', group_keys=False).apply(
        lambda df: assign_port_id(df[v1], q1, range(1, q1 + 1)))

    comb['g2'] = comb.groupby('t', group_keys=False).apply(
        lambda df: assign_port_id(df[v2], q2, range(1, q2 + 1)))

    # g3 is conditional on g1 and g2
    comb['g3'] = comb.groupby(['t', 'g1', 'g2'], group_keys=False).apply(
        lambda df: assign_port_id(df[v3], q3, range(1, q3 + 1)))

    if weight:
        s = comb.groupby([
            't', 'g1', 'g2', 'g3'
        ]).apply(lambda df: my_average(df, 'stockEretM', wname='weight'))
    else:
        s = comb.groupby(['t', 'g1', 'g2', 'g3'])['stockEretM'].mean()

    panel1 = s.groupby(['t', 'g1']).mean().unstack(level='g1')
    factor1 = panel1[q1] - panel1[1]

    panel2 = s.groupby(['t', 'g2']).mean().unstack(level='g2')
    factor2 = panel2[q2] - panel2[1]

    panel3 = s.groupby(['t', 'g3']).mean().unstack(level='g3')
    factor3 = panel3[q3] - panel3[1]
    return factor1, factor2, factor3
Exemplo n.º 12
0
    def _get_independent_data(self):
        # TODO: add the method of ratios such as [0.3,0.7]
        # sometimes the self.indicators and ['capM','eretM'] may share some elements
        comb=DATA.by_indicators([self.indicator1,self.indicator2,'capM','eretM'])
        comb=comb.dropna()
        comb['g1']=comb.groupby('t',group_keys=False).apply(
            lambda df:assign_port_id(df[self.indicator1], self.q,
                                     [self.indicator1 + str(i) for i in range(1, self.q + 1)]))

        comb['g2']=comb.groupby('t',group_keys=False).apply(
            lambda df:assign_port_id(df[self.indicator2], self.q,
                                     [self.indicator2 + str(i) for i in range(1,self.q + 1)]))

        # comb['g1']=comb.groupby('t',group_keys=False).apply(
        #     lambda df:pd.qcut(df[self.indicator1],self.q,
        #                       labels=[self.indicator1+str(i) for i in range(1,self.q+1)])
        # )
        #
        # comb['g2']=comb.groupby('t',group_keys=False).apply(
        #     lambda df:pd.qcut(df[self.indicator2],self.q,
        #                       labels=[self.indicator2+str(i) for i in range(1,self.q+1)])
        # )

        return comb
Exemplo n.º 13
0
def data_for_bivariate(v1, v2, q1, q2, independent=True, **kwargs):
    comb = combine_with_datalagged([v1, v2], **kwargs)
    comb = comb.dropna()

    if independent:
        comb['g1'] = comb.groupby(
            't', group_keys=False).apply(lambda df: assign_port_id(df[v1], q1))

        comb['g2'] = comb.groupby(
            't', group_keys=False).apply(lambda df: assign_port_id(df[v2], q2))
    else:  #dependent
        '''
        v2 is conditional on v1,that is,we first group stocks into n1 portfolios
        based on the rank of v1,and then we will group each of the n1 portfolios
        into n2 portfolios based on v2
        '''
        comb['g1'] = comb.groupby(
            't', group_keys=False).apply(lambda df: assign_port_id(df[v1], q1))

        comb['g2'] = comb.groupby(
            ['t', 'g1'],
            group_keys=False).apply(lambda df: assign_port_id(df[v2], q2))

    return comb
Exemplo n.º 14
0
def lagged_n(n=0):
    predicted = pd.read_pickle(
        os.path.join(directory, 'predicted_{}.pkl'.format(history)))
    eret = load_data('stockEretM').shift(-n).stack()
    comb = pd.concat([eret, predicted.stack().replace(0, np.nan)],
                     axis=1,
                     keys=['eret', 'predicted'])
    comb.index.names = ['t', 'sid']
    comb = comb.dropna()
    comb['g'] = comb.groupby(
        't', group_keys=False).apply(lambda df: assign_port_id(
            df['predicted'], 10, ['g{}'.format(i) for i in range(1, 11)]))

    ts = comb.groupby(['t', 'g'])['eret'].mean().unstack('g')
    ts.columns = ts.columns.astype(str)
    ts['spread'] = ts['g10'] - ts['g1']
    print(n)
    return ts['spread']
Exemplo n.º 15
0
def get_single_sorting_assets(indicator, q, weight=True):
    if isinstance(q, int):
        labels = ['g{}'.format(i) for i in range(1, q + 1)]
    elif isinstance(q, (list, tuple)):
        labels = ['g{}'.format(i) for i in range(1, len(q))]
    else:
        raise MyError('q:"{}"  is wrong!'.format(repr(q)))

    comb = combine_with_datalagged([indicator])
    comb['g'] = comb.groupby('t', group_keys=False).apply(
        lambda df: assign_port_id(df[indicator], q, labels))

    if weight:
        assets=comb.groupby(['t','g']).apply(
            lambda df:my_average(df,'stockEretM',wname='weight'))\
            .unstack(level=['g'])
    else:
        assets = comb.groupby(['t',
                               'g'])['stockEretM'].mean().unstack(level=['g'])
    return assets
Exemplo n.º 16
0
def indicatorDf_to_10_assets(indicatorDf, indicatorName):
    sampleControl = False
    q = 10

    # data lagged
    s = indicatorDf.stack()
    s.name = indicatorName
    weight = Database(sample_control=sampleControl).by_indicators(['weight'])
    datalagged = pd.concat([s, weight], axis=1)
    datalagged = datalagged.groupby('sid').shift(1)

    # data t
    datat = Database(sample_control=sampleControl).by_indicators(['stockEretM'])
    comb = pd.concat([datalagged, datat], axis=1)
    comb = comb.dropna()

    comb['g'] = comb.groupby('t', group_keys=False).apply(
        lambda df: assign_port_id(df[indicatorName], q))

    assets = comb.groupby(['t', 'g']).apply(
        lambda df: my_average(df, 'stockEretM', wname='weight')) \
        .unstack(level=['g'])
    return assets
Exemplo n.º 17
0
    def portfolio_analysis(self):
        '''
        table 8.4

        :return:
        '''
        comb = combine_with_datalagged(self.indicators)
        # all_indicators = list(set(self.indicators + ['weight', 'stockEretM']))
        # comb = DATA.by_indicators(all_indicators)

        result_eavg = []
        result_wavg = []
        for indicator in self.indicators:
            gcol = 'g_%s' % indicator
            # comb[gcol]=comb.groupby('t').apply(
            #     lambda df:grouping(df[indicator].reset_index(level='t'),self.q,labels=self.groupnames))
            comb[gcol] = comb.groupby(
                't', group_keys=False).apply(lambda df: assign_port_id(
                    df[indicator], self.q, self.groupnames))
            # TODO:Add an alternative sorting method,that is,updating yearly as page 9 of Chen et al., “On the Predictability of Chinese Stock Returns.”

            panel_stk_eavg, panel_stk_wavg = self._get_panel_stk_avg(
                comb, indicator, gcol)
            for panel_stk in [panel_stk_eavg, panel_stk_wavg]:
                panel = panel_stk.unstack(level=[gcol])
                panel.columns = panel.columns.astype(str)
                panel['_'.join([
                    self.groupnames[-1], self.groupnames[0]
                ])] = panel[self.groupnames[-1]] - panel[self.groupnames[0]]
                panel['avg'] = panel.mean(axis=1)
                # TODO: use the risk models declared above

                # part A
                a_data = comb.groupby(['t', gcol])[indicator].mean()
                a_data = a_data.unstack()
                a_data.columns = a_data.columns.astype(str)
                a_data.index = a_data.index.astype(str)
                a_data['_'.join([
                    self.groupnames[-1], self.groupnames[0]
                ])] = a_data[self.groupnames[-1]] - a_data[self.groupnames[0]]
                a_data['avg'] = a_data.mean(axis=1)
                a = a_data.mean()
                a.name = 'avg'
                a = a.to_frame().T

                riskAdjusted = risk_adjust(panel)
                # TODO:something must be wrong with size or portfolio_analysse.
                if panel_stk is panel_stk_eavg:
                    result_eavg.append(pd.concat([a, riskAdjusted], axis=0))
                else:
                    result_wavg.append(pd.concat([a, riskAdjusted], axis=0))
        table_e = pd.concat(result_eavg, axis=0, keys=self.indicators)
        table_w = pd.concat(result_wavg, axis=0, keys=self.indicators)
        # reorder the columns
        initialOrder = table_e.columns.tolist()
        h = self.groupnames + ['avg']
        newOrder = h + [col for col in initialOrder if col not in h]
        # newOrder = self.groupnames + [col for col in initialOrder if col not in self.groupnames]
        table_e = table_e.reindex(columns=newOrder)
        table_w = table_w.reindex(columns=newOrder)

        #mark the t values to facilitate the following analysis
        table_e['significant_positive'] = table_e.iloc[:, -1].map(
            lambda v: 1 if v > 2 else np.nan)
        table_e['significant_negative'] = table_e.iloc[:, -2].map(
            lambda v: -1 if v < -2 else np.nan)
        table_w['significant_positive'] = table_w.iloc[:, -1].map(
            lambda v: 1 if v > 2 else np.nan)
        table_w['significant_negative'] = table_w.iloc[:, -2].map(
            lambda v: -1 if v < -2 else np.nan)

        # table_e.to_csv(os.path.join(self.path, 'univariate portfolio analysis-equal weighted.csv'))
        # table_w.to_csv(os.path.join(self.path, 'univariate portfolio analysis-value weighted.csv'))

        self.results['uni_port_analysis_eq'] = table_e
        self.results['uni_port_analysis_vw'] = table_w