Пример #1
0
def get_amihud_illiq():
    df = read_gta('TRD_Dalyr')
    df = df[['Stkcd', 'Trddt', 'Dretwd', 'Dnvaltrd']]
    df.columns = ['sid', 't', 'ret', 'volume']
    df['t'] = freq_end(df['t'], 'D')
    df = df.set_index(['t', 'sid'])
    if not df.index.is_monotonic_increasing:
        df = df.sort_index(
            level='t'
        )  #TODO: gta's data is not monotonic_increasing ,add this two row to other scripts

    dict = OrderedDict({'1M': 15, '3M': 50, '6M': 100, '12M': 200})

    result = groupby_rolling(df, 'illiq', dict, _amihud)
    result.index.names = ['type', 't']

    ln_result = np.log(result)
    ln_result = ln_result.reset_index()
    ln_result['type'] = 'ln_' + ln_result['type'].astype(str)
    ln_result = ln_result.set_index(['type', 't'])
    illiq = pd.concat([result, ln_result], axis=0)
    #TODO:use valid observation for the whole project as page 276

    # adjust the format of the DataFrame
    illiq.columns = pd.Index(illiq.columns.astype(str), illiq.columns.name)
    illiq = illiq.reset_index()
    illiq['t'] = freq_end(illiq['t'], 'M')
    illiq = illiq.set_index(['type', 't'])
    illiq = illiq.stack().unstack(level='type')

    #TODO: The data is really noisy,refer to outliers figures for details
    save(illiq, 'illiq')
Пример #2
0
def get_stockCloseY():
    tbname = 'TRD_Year'
    varname = 'Yclsprc'
    indname = 'Trdynt'
    colname = 'Stkcd'
    fn = 'stockCloseY'

    path = os.path.join(DATA_PATH, fn + '.csv')
    table = _readFromSrc(tbname)
    df = pd.pivot_table(table, varname, indname, colname)
    df.index = freq_end(df.index, 'Y')
    df.to_csv(path)
Пример #3
0
def get_pu():
    '''
    policy uncertainty
    :return:
    '''
    url = r'http://www.policyuncertainty.com/media/China_Policy_Uncertainty_Data.xlsx'
    pu = pd.read_excel(url, skip_footer=1)
    pu.columns = ['year', 'month', 'pu']
    pu['t'] = pu['year'].map(str) + '-' + pu['month'].map(str)
    pu['t'] = freq_end(pu['t'], 'M')
    pu = pu.set_index('t')
    pu = pu['pu']
    save(pu, 'pu')
Пример #4
0
def get_ff3D():
    tbname = 'STK_MKT_ThrfacDay'
    df = read_gta(tbname)
    condition1 = df['MarkettypeID'] == 'P9707'
    # P9709 全部A股市场包含沪深A股和创业板.
    # 流通市值加权
    df = df[condition1][['TradingDate', 'RiskPremium1', 'SMB1', 'HML1']]
    df.columns = ['t', 'rp', 'smb', 'hml']
    df.columns.name = 'type'
    df = df.set_index('t')
    df.index = freq_end(df.index, 'D')
    save(df, 'ff3D')
    return df
Пример #5
0
def get_ffcM():
    df = read_gta('STK_MKT_CarhartFourFactors')
    #trick: P9709 全部A股市场包含沪深A股和创业板
    #trick: 流通市值加权
    df = df[df['MarkettypeID'] == 'P9709'][[
        'TradingMonth', 'RiskPremium1', 'SMB1', 'HML1', 'UMD2'
    ]]
    df.columns = ['t', 'rp', 'smb', 'hml', 'mom']
    df.columns.name = 'type'
    df = df.set_index('t')
    df.index = freq_end(df.index, 'M')

    save(df, 'ffcM')
Пример #6
0
def get_ff3M():  #fixme: there are some abnormal values
    df = read_gta('STK_MKT_ThrfacMonth')
    #trick:P9709 全部A股市场包含沪深A股和创业板
    #trick:流通市值加权
    df = df[df['MarkettypeID'] == 'P9709'][[
        'TradingMonth', 'RiskPremium1', 'SMB1', 'HML1'
    ]]
    df.columns = ['t', 'rp', 'smb', 'hml']
    df = df.set_index('t')
    df.index = freq_end(df.index, 'M')
    df.columns.name = 'type'
    save(df, 'ff3M')
    return df
Пример #7
0
def get_ff5M():
    df = read_gta('STK_MKT_FivefacMonth')
    #trick:P9709 全部A股市场包含沪深A股和创业板
    #trick:流通市值加权
    #trick: 2*3 投资组合
    df = df[(df['MarkettypeID'] == 'P9709') & (df['Portfolios'] == 1)][[
        'TradingMonth', 'RiskPremium1', 'SMB1', 'HML1', 'RMW1', 'CMA1'
    ]]
    df.columns = ['t', 'rp', 'smb', 'hml', 'rmw', 'cma']
    df.columns.name = 'type'
    df = df.set_index('t')
    df.index = freq_end(df.index, 'M')
    # df.index.name='t'
    save(df, 'ff5M')
Пример #8
0
def get_mktRetM():
    tbname = 'TRD_Cnmont'
    indVar = 'Trdmnt'
    targetVar = 'Cmretwdos'  #trick:考虑现金红利再投资的综合日市场回报率(流通市值加权平均法)

    df = read_gta(tbname)
    df = df[df['Markettype'] == 21]  # 21=综合A股和创业板

    df = df.set_index(indVar)
    df.index = freq_end(df.index, 'M')
    df.index.name = 't'

    s = df[targetVar]
    s.name = 'mktRetM'

    save(s, 'mktRetM')
Пример #9
0
def get_capM():
    '''
    get stock monthly circulation market capitalization

    :return:
    '''
    tbname = 'TRD_Mnth'
    varname = 'Msmvosd'  #trick:月个股流通市值,单位 千元
    # TODO:the unit convert it to million as Cakici, Chan, and Topyan, “Cross-Sectional Stock Return Predictability in China.”
    indname = 'Trdmnt'
    colname = 'Stkcd'
    df = read_df_from_gta(tbname, varname, indname, colname)
    df.index.name = 't'
    df.index = freq_end(df.index, 'M')
    df.columns = df.columns.astype(str)
    df.columns.name = 'sid'
    save(df, 'capM')
Пример #10
0
def get_mktRetM():
    newName = 'mktRetM'
    path = os.path.join(DATA_PATH, newName + '.csv')
    tbname = 'TRD_Cnmont'
    indVar = 'Trdmnt'
    targetVar = 'Cmretwdos'  # 考虑现金红利再投资的综合日市场回报率(流通市值加权平均法)

    df = _readFromSrc(tbname)
    df = df[df['Markettype'] == 21]  # 21=综合A股和创业板

    df = df.set_index(indVar)
    df = df.sort_index()
    df = df[[targetVar]]
    del df.index.name
    df.columns = [newName]
    df.index = freq_end(df.index, 'M')
    df.to_csv(path)
Пример #11
0
def get_ff3M_resset():
    '''
    from resset data

    :return:
    '''
    tbname = 'THRFACDAT_MONTHLY'
    df = read_resset(tbname)
    # 'Exchflg == 0'   所有交易所
    # 'Mktflg == A'    只考虑A股
    df = df[(df['Exchflg'] == 0) & (df['Mktflg'] == 'A')]
    df = df.set_index('Date')
    df.index = freq_end(df.index, 'M')
    df.index.name = 't'
    df = df[['Rmrf_tmv', 'Smb_tmv',
             'Hml_tmv']]  #weighted with tradable capitalization
    df.columns = ['rp', 'smb', 'hml']
    df.columns.name = 'type'
    save(df, 'ff3M_resset')
Пример #12
0
def get_stockRetM():
    '''
    monthly stock return with dividend
    '''
    tbname = 'TRD_Mnth'
    varname = 'Mretwd'  #考虑现金红利再投资的收益
    indname = 'Trdmnt'
    colname = 'Stkcd'

    df = read_df_from_gta(tbname, varname, indname, colname)

    #TODO: identify the axis and convert the axis automatically
    df.index.name = 't'
    df.columns.name = 'sid'
    df.index = freq_end(df.index, 'M')
    df.columns = df.columns.astype(str)

    save(df, 'stockRetM')
    return df
Пример #13
0
def get_amihud_illiq():
    df = read_gta('TRD_Dalyr')
    df = df[['Stkcd', 'Trddt', 'Dretwd', 'Dnvaltrd']]
    df.columns = ['sid', 't', 'ret', 'volume']
    df['t'] = freq_end(df['t'], 'D')
    df = df.set_index(['t', 'sid'])
    if not df.index.is_monotonic_increasing:
        df = df.sort_index(
            level='t'
        )  #TODO: gta's data is not monotonic_increasing ,add this two row to other scripts

    dict = OrderedDict({'1M': 15, '3M': 50, '6M': 100, '12M': 200})

    result = groupby_rolling(df, 'illiq', dict, _amihud)
    result.index.names = ['type', 't']

    ln_result = np.log(result)
    ln_result = ln_result.reset_index()
    ln_result['type'] = 'ln_' + ln_result['type'].astype(str)
    ln_result = ln_result.set_index(['type', 't'])
    illiq = pd.concat([result, ln_result], axis=0)
    #TODO:use valid observation for the whole project as page 276
    illiq.to_csv(os.path.join(DATA_PATH, 'illiq.csv'))
Пример #14
0
def get_liquidity_ps():
    df = read_gta('Liq_PSM_M')
    #MarketType==21   综合A股和创业板
    # 流通市值加权,but on the page 310,Bali use total market capilization
    condition1 = (df['MarketType'] == 21)
    condition2 = (df['ST'] == 1)  #delete the ST stocks

    df = df[condition1 & condition2][['Trdmnt', 'AggPS_os']]
    df.columns = ['t', 'rm']
    df = df.set_index('t')

    df.index = freq_end(df.index, 'M')
    df = df.sort_index()
    df['rm_ahead'] = df['rm'].shift(1)
    df['delta_rm'] = df['rm'] - df['rm'].shift(1)
    df['delta_rm_ahead'] = df['rm_ahead'] - df['rm_ahead'].shift(1)

    #df.groupby(lambda x:x.year).apply(lambda df:df.shape[0])
    #TODO: we don't know the length of window to regress.In this place,we use the five years history
    def regr(df):
        if df.shape[0] > 30:
            return sm.ols(formula='delta_rm ~ delta_rm_ahead + rm_ahead',
                          data=df).fit().resid[0]
        else:
            return np.NaN

    window = 60  # not exact 5 years
    lm = pd.Series(
        [regr(df.loc[:month][-window:].dropna()) for month in df.index],
        index=df.index)
    lm.name = 'lm'

    ret = read_df('stockRetM', freq='M')
    rf = read_df('rfM', freq='M')
    eret = ret.sub(rf['rf'], axis=0)
    eret = eret.stack()
    eret.index.names = ['t', 'sid']
    eret.name = 'eret'

    ff3 = read_df('ff3_gta', 'M')
    factors = pd.concat([ff3, lm], axis=1)

    comb = eret.to_frame().join(factors)

    def _for_one_month(df):
        if df.shape[0] >= 30:
            return sm.ols(formula='eret ~ rp + smb + hml + lm',
                          data=df).fit().params['lm']
        else:
            return np.NaN

    def _get_result(df):
        thresh = 30  #30 month
        if df.shape[0] > thresh:
            values = []
            sid = df.index[0][1]
            df = df.reset_index(level='sid', drop=True)
            months = df.index.tolist()[thresh:]
            for month in months:
                subdf = df.loc[:month][-60:]
                subdf = subdf.dropna()
                # df=df.reset_index(level='sid',drop=True).loc[:month].last(window)
                values.append(_for_one_month(subdf))
            print(sid)
            return pd.Series(values, index=months)

    result = comb.groupby('sid').apply(_get_result)
    result.unstack('sid').to_csv(os.path.join(DATA_PATH, 'liqBeta.csv'))
Пример #15
0
def get_liquidity():
    # Turnover rate
    df1 = read_gta('Liq_Tover_M', index_col=0)
    df1 = df1[df1['Status'] == 'A']  # A=正常交易
    df1 = df1[[
        'Stkcd', 'Trdmnt', 'ToverOsM', 'ToverTlM', 'ToverOsMAvg', 'ToverTlMAvg'
    ]]
    df1.columns = [
        'sid', 't', 'turnover1', 'turnover2', 'turnover3', 'turnover4'
    ]
    df1['t'] = freq_end(df1['t'], 'M')
    df1['sid'] = df1['sid'].astype(str)
    df1 = df1.set_index(['t', 'sid'])
    df1 = df1.astype(float)

    # Amihud
    df2 = read_gta('Liq_Amihud_M', index_col=0)
    df2 = df2[df2['Status'] == 'A']  # A=正常交易
    df2 = df2[['Stkcd', 'Trdmnt', 'ILLIQ_M']]  # 月内日均换手率(流通股数)
    df2.columns = ['sid', 't', 'amihud']
    df2['t'] = freq_end(df2['t'], 'M')
    df2['sid'] = df2['sid'].astype(str)
    df2 = df2.set_index(['t', 'sid'])
    df2 = df2.astype(float)
    '''
    roll1,roll2,zeros1 and zeros2 are note proper for portfolio analysis,since there are a lot of
    zeros in sample,which will cause errors in the program.
    '''
    # roll
    # df3=read_gta('Liq_Roll_M',index_col=0)
    # df3 = df3[df3['Status'] == 'A']  # A=正常交易
    # df3 = df3[['Stkcd', 'Trdmnt', 'Roll_M','Roll_Impact_M']]  # 月内日均换手率(流通股数)
    # df3.columns = ['sid', 't', 'roll1','roll2']
    # df3['t'] = freq_end(df3['t'], 'M')
    # df3['sid'] = df3['sid'].astype(str)
    # df3=df3.set_index(['t','sid'])
    # df3=df3.astype(float)

    # # Zeros
    # df4=read_gta('Liq_Zeros_M',index_col=0)
    # df4 = df4[df4['Status'] == 'A']  # A=正常交易
    # df4 = df4[['Stkcd', 'Trdmnt', 'Zeros_M','Zeros_Impact_M']]  # 月内日均换手率(流通股数)
    # df4.columns = ['sid', 't', 'zeros1','zeros2']
    # df4['t'] = freq_end(df4['t'], 'M')
    # df4['sid'] = df4['sid'].astype(str)
    # df4=df4.set_index(['t','sid'])
    # df4=df4.astype(float)

    # Pastor Stambaugh
    df5 = read_gta('Liq_PS_M', index_col=0)
    df5 = df5[df5['Status'] == 'A']  # A=正常交易
    df5 = df5[['Stkcd', 'Trdmnt', 'PSos', 'PStl']]  # 月内日均换手率(流通股数)
    df5.columns = ['sid', 't', 'ps1', 'ps2']
    df5['t'] = freq_end(df5['t'], 'M')
    df5['sid'] = df5['sid'].astype(str)
    df5 = df5.set_index(['t', 'sid'])
    df5 = df5.astype(float)

    # combine them
    x = pd.concat([df[~df.index.duplicated()] for df in [df1, df2, df5]],
                  axis=1)
    x.columns.name = 'type'

    save(x, 'liquidity')