示例#1
0
def get_portRet_ts():
    port = pd.read_csv(r'D:\quantDb\resset\PMONRET_FF.csv')
    q1 = 'Exchflg == 0'
    q2 = 'Mktflg == A'
    port = filterDf(port, [q1, q2])
    port['Date'] = [d[:-3] for d in port['Date']]

    months = sorted(port['Date'].unique().tolist())

    portRet_rs_tmv = pd.DataFrame()
    portRet_rs_mc = pd.DataFrame()
    for month in months:
        for i in range(1, 6):
            for j in range(1, 6):
                try:
                    portRet_rs_tmv.loc[month, i * 10 + j] = port[
                        (port['Date'] == month) & (port['Sizeflg'] == i)
                        & (port['BMflg'] == j)]['Pmonret_tmv'].values[0]
                except IndexError:
                    portRet_rs_tmv.loc[month, i * 10 + j] = np.NaN
                try:
                    portRet_rs_mc.loc[month, i * 10 +
                                      j] = port[(port['Date'] == month)
                                                & (port['Sizeflg'] == i)
                                                &
                                                (port['BMflg']
                                                 == j)]['Pmonret_mc'].values[0]
                except IndexError:
                    portRet_rs_mc.loc[month, i * 10 + j] = np.NaN
        print month

    save_df(portRet_rs_tmv, 'portRet_rs_tmv')
    save_df(portRet_rs_mc, 'portRet_rs_mc')
示例#2
0
def _validate_ff5_factorRet():
    tbname='STK_MKT_FivefacMonth'
    df=pd.read_csv(os.path.join(sp,tbname+'.csv'))
    q='MarkettypeID == P9709'
    df=filterDf(df,q)
    df=df.set_index('TradingMonth')

    typeDict={1:'2x3',2:'2x2',3:'2x2x2x2'}

    for k,v in typeDict.iteritems():
        smb=df[df['Portfolios']==k]['SMB1'].to_frame()
        smb['mysmb']=pd.read_csv(os.path.join(factorRetPath,'%s_smb.csv'%v),index_col=0)['smb']

        hml=df[df['Portfolios']==k]['HML1'].to_frame()
        hml['myhml']=pd.read_csv(os.path.join(factorRetPath,'%s_hml.csv'%v),index_col=0)['hml']

        rmw=df[df['Portfolios']==k]['RMW1'].to_frame()
        rmw['myrmw']=pd.read_csv(os.path.join(factorRetPath,'%s_rmw.csv'%v),index_col=0)['rmw']

        cma=df[df['Portfolios']==k]['CMA1'].to_frame()
        cma['mycma']=pd.read_csv(os.path.join(factorRetPath,'%s_cma.csv'%v),index_col=0)['cma']

        rp=df[df['Portfolios']==k]['RiskPremium1'].to_frame()
        rp['myrp']=pd.read_csv(os.path.join(bdp,'rp.csv'),index_col=0)['rp']

        direc=os.path.join(validatePath,'%s'%v)
        if not os.path.exists(direc):
            os.makedirs(direc)

        smb.dropna(axis=0).cumsum().plot().get_figure().savefig(os.path.join(direc,'smb.png'))
        hml.dropna(axis=0).cumsum().plot().get_figure().savefig(os.path.join(direc,'hml.png'))
        rmw.dropna(axis=0).cumsum().plot().get_figure().savefig(os.path.join(direc,'rmw.png'))
        cma.dropna(axis=0).cumsum().plot().get_figure().savefig(os.path.join(direc,'cma.png'))
        rp.dropna(axis=0).cumsum().plot().get_figure().savefig(os.path.join(direc,'rp.png'))
示例#3
0
def validate_portRet():
    df = pd.read_csv(r'D:\quantDb\resset\PMONRET_FF.csv', index_col=0)
    q1 = 'Exchflg == 0'
    q2 = 'Mktflg == A'
    df = filterDf(df, [q1, q2])

    mypr = get_df('portRet')
    mypr.columns = [int(float(col)) for col in mypr.columns]
    pr = pd.DataFrame()
    for date in sorted(df['Date'].unique().tolist()):
        month = date[:-3]
        for i in range(1, 6):
            for j in range(1, 6):
                try:
                    pr.loc[month, i * 10 +
                           j] = df[(df['Sizeflg'] == i) & (df['BMflg'] == j) &
                                   (df['Date']
                                    == date)]['Pmonret_tmv'].values[0]
                except IndexError:
                    pr.loc[month, i * 10 + j] = np.NaN
                    pass

        print month

    pr = pr.dropna(axis=0, how='any')

    for i in range(1, 6):
        for j in range(1, 6):
            port = i * 10 + j
            tmp = pd.DataFrame()
            tmp['mypr'] = mypr[port]
            tmp['pr'] = pr[port]
            tmp.cumsum().plot().get_figure().savefig(
                r'D:\quantDb\researchTopics\crossSection\data\observe\portRet\%s.png'
                % port)
示例#4
0
def get_bv():
    name='bv'
    tbname='FI_T9'
    fldname='F091001A' #每股净资产
    timefld='Accper'

    df = pd.read_csv(os.path.join(sp, tbname + '.csv'))

    q1 = 'Typrep == A'
    q2 = 'Accper endswith 12-31'  # TODO: only need annual report
    q = [q1, q2]
    df = filterDf(df, q)
    colnames = ['Stkcd', timefld, fldname]

    df = df[colnames]
    subdfs = []
    for stockId, x in list(df.groupby('Stkcd')):
        tmpdf = x[[timefld, fldname]]
        tmpdf = tmpdf.set_index(timefld)
        tmpdf.columns = [stockId]
        subdfs.append(tmpdf)

    table = pd.concat(subdfs, axis=1)
    table.index=[ind[:-3] for ind in table.index]
    table = table.sort_index(ascending=True)
    table.to_csv(os.path.join(tmpp,name+'.csv'))
示例#5
0
def get_bv():
    name = 'bv'
    tbname = 'FI_T9'
    fldname = 'F091001A'  #每股净资产
    timefld = 'Accper'

    df = pd.read_csv(os.path.join(sp, tbname + '.csv'))

    q1 = 'Typrep == A'
    q2 = 'Accper endswith 12-31'  # TODO: only need annual report
    q = [q1, q2]
    df = filterDf(df, q)
    colnames = ['Stkcd', timefld, fldname]

    df = df[colnames]
    subdfs = []
    for stockId, x in list(df.groupby('Stkcd')):
        tmpdf = x[[timefld, fldname]]
        tmpdf = tmpdf.set_index(timefld)
        tmpdf.columns = [stockId]
        subdfs.append(tmpdf)

    table = pd.concat(subdfs, axis=1)
    table.index = [ind[:-3] for ind in table.index]
    table = table.sort_index(ascending=True)

    save_df(table, name)
示例#6
0
def get_rf():
    df = pd.read_csv(os.path.join(sp, 'TRD_Nrrate.csv'))
    q = 'Nrr1 == NRI01'  # TODO:TBC=国债票面利率
    df = filterDf(df, q)
    colnames = ['Clsdt', 'Nrrmtdt']
    df = df.sort_values('Clsdt')

    df = df[colnames]
    df = df.set_index('Clsdt')

    dates = pd.date_range(df.index[0], df.index[-1], freq='D')
    dates = [d.strftime('%Y-%m-%d') for d in dates]

    newdf = pd.DataFrame(index=dates)
    newdf['Nrrmtdt'] = df['Nrrmtdt']
    newdf = newdf.fillna(method='ffill')
    newdf = newdf.reset_index()
    newdf['month'] = newdf['index'].apply(
        lambda x: '-'.join(x.split('-')[:-1]))

    avg = newdf.groupby('month').mean()
    avg = avg / 100
    del avg.index.name
    avg.columns = ['rf']
    save_df(avg, 'rf')
示例#7
0
def _validate_ff5_factorRet():
    tbname = 'STK_MKT_FivefacMonth'
    df = pd.read_csv(os.path.join(sp, tbname + '.csv'))
    q = 'MarkettypeID == P9709'
    df = filterDf(df, q)
    df = df.set_index('TradingMonth')

    typeDict = {1: '2x3', 2: '2x2', 3: '2x2x2x2'}

    for k, v in typeDict.iteritems():
        smb = df[df['Portfolios'] == k]['SMB1'].to_frame()
        smb['mysmb'] = pd.read_csv(os.path.join(factorRetPath,
                                                '%s_smb.csv' % v),
                                   index_col=0)['smb']

        hml = df[df['Portfolios'] == k]['HML1'].to_frame()
        hml['myhml'] = pd.read_csv(os.path.join(factorRetPath,
                                                '%s_hml.csv' % v),
                                   index_col=0)['hml']

        rmw = df[df['Portfolios'] == k]['RMW1'].to_frame()
        rmw['myrmw'] = pd.read_csv(os.path.join(factorRetPath,
                                                '%s_rmw.csv' % v),
                                   index_col=0)['rmw']

        cma = df[df['Portfolios'] == k]['CMA1'].to_frame()
        cma['mycma'] = pd.read_csv(os.path.join(factorRetPath,
                                                '%s_cma.csv' % v),
                                   index_col=0)['cma']

        rp = df[df['Portfolios'] == k]['RiskPremium1'].to_frame()
        rp['myrp'] = pd.read_csv(os.path.join(bdp, 'rp.csv'),
                                 index_col=0)['rp']

        direc = os.path.join(validatePath, '%s' % v)
        if not os.path.exists(direc):
            os.makedirs(direc)

        smb.dropna(axis=0).cumsum().plot().get_figure().savefig(
            os.path.join(direc, 'smb.png'))
        hml.dropna(axis=0).cumsum().plot().get_figure().savefig(
            os.path.join(direc, 'hml.png'))
        rmw.dropna(axis=0).cumsum().plot().get_figure().savefig(
            os.path.join(direc, 'rmw.png'))
        cma.dropna(axis=0).cumsum().plot().get_figure().savefig(
            os.path.join(direc, 'cma.png'))
        rp.dropna(axis=0).cumsum().plot().get_figure().savefig(
            os.path.join(direc, 'rp.png'))
示例#8
0
def _get_indictor2(name, tbname, fldname, timefld='Trdmnt'):
    df = pd.read_csv(os.path.join(sp, tbname + '.csv'))
    q = 'Markettype in [1,4,16]'
    df = filterDf(df, q)
    colnames = ['Stkcd', timefld, fldname]

    df = df[colnames]
    subdfs = []
    for stockId, x in list(df.groupby('Stkcd')):
        tmpdf = x[[timefld, fldname]]
        tmpdf = tmpdf.set_index(timefld)
        tmpdf.columns = [stockId]
        subdfs.append(tmpdf)

    table = pd.concat(subdfs, axis=1)
    table = table.sort_index(ascending=True)
    save_df(table, name)
示例#9
0
def _get_indicator2(name,tbname,fldname,timefld='Trdmnt'):
    df = pd.read_csv(os.path.join(sp, tbname + '.csv'))
    q='Markettype in [1,4,16]'
    df=filterDf(df,q)
    colnames=['Stkcd',timefld,fldname]

    df = df[colnames]
    subdfs = []
    for stockId, x in list(df.groupby('Stkcd')):
        tmpdf = x[[timefld, fldname]]
        tmpdf = tmpdf.set_index(timefld)
        tmpdf.columns = [stockId]
        subdfs.append(tmpdf)

    table = pd.concat(subdfs, axis=1)
    table = table.sort_index(ascending=True)
    table.to_csv(os.path.join(tmpp,name+'.csv'))
示例#10
0
def _get_indictor1(name, tbname, fldname, timefld='Accper'):
    df = pd.read_csv(os.path.join(sp, tbname + '.csv'))
    df = df[df['Typrep'] == 'A']
    q = 'Accper endswith 12-31'
    df = filterDf(df, q)
    colnames = ['Stkcd', timefld, fldname]
    df = df[colnames]
    subdfs = []
    for stockId, x in list(df.groupby('Stkcd')):
        tmpdf = x[[timefld, fldname]]
        tmpdf = tmpdf.set_index(timefld)
        tmpdf.index = [ind[:-3] for ind in tmpdf.index]
        tmpdf.columns = [stockId]
        subdfs.append(tmpdf)

    table = pd.concat(subdfs, axis=1)
    table = table.sort_index(ascending=True)
    save_df(table, name)
示例#11
0
def _get_indicator1(name,tbname,fldname,timefld='Accper'):
    df=pd.read_csv(os.path.join(sp,tbname+'.csv'))
    df=df[df['Typrep']=='A']
    q='Accper endswith 12-31'
    df=filterDf(df,q)
    colnames=['Stkcd',timefld,fldname]
    df=df[colnames]
    subdfs=[]
    for stockId,x in list(df.groupby('Stkcd')):
        tmpdf=x[[timefld,fldname]]
        tmpdf=tmpdf.set_index(timefld)
        tmpdf.index=[ind[:-3] for ind in tmpdf.index]
        tmpdf.columns=[stockId]
        subdfs.append(tmpdf)

    table=pd.concat(subdfs,axis=1)
    table=table.sort_index(ascending=True)
    table.to_csv(os.path.join(tmpp,name+'.csv'))
示例#12
0
def get_rm():
    name = 'rm'
    dbname = ''
    tbname = 'TRD_Cnmont'
    fldname = 'Cmretwdos'
    timefld = 'Trdmnt'
    q = []
    cols = []

    df = pd.read_csv(os.path.join(sp, tbname + '.csv'))
    q = 'Markettype == 5'  #综合A股市场
    df = filterDf(df, q)
    colnames = [timefld, fldname]

    df = df[colnames]

    df = df.set_index('Trdmnt')
    df = df.sort_index()
    del df.index.name
    df.columns = ['rm']
    save_df(df, 'rm')
示例#13
0
def get_rm():
    name='rm'
    dbname=''
    tbname='TRD_Cnmont'
    fldname='Cmretwdos'
    timefld='Trdmnt'
    q=[]
    cols=[]

    df = pd.read_csv(os.path.join(sp, tbname + '.csv'))
    q = 'Markettype == 5'#综合A股市场
    df = filterDf(df, q)
    colnames = [timefld, fldname]

    df = df[colnames]

    df=df.set_index('Trdmnt')
    df=df.sort_index()
    del df.index.name
    df.columns=['rm']
    df.to_csv(os.path.join(tmpp,'rm.csv'))
示例#14
0
def get_mv():
    name = 'mv'
    tbname = 'TRD_Mnth'
    fldname = 'Mclsprc'  #月收盘价
    timefld = 'Trdmnt'

    df = pd.read_csv(os.path.join(sp, tbname + '.csv'))
    q1 = 'Markettype in [1,4,16]'
    q2 = 'Trdmnt endswith 12'  #TODO: only need the data in December
    q = [q1, q2]
    df = filterDf(df, q)
    colnames = ['Stkcd', timefld, fldname]

    df = df[colnames]
    subdfs = []
    for stockId, x in list(df.groupby('Stkcd')):
        tmpdf = x[[timefld, fldname]]
        tmpdf = tmpdf.set_index(timefld)
        tmpdf.columns = [stockId]
        subdfs.append(tmpdf)

    table = pd.concat(subdfs, axis=1)
    table = table.sort_index(ascending=True)
    save_df(table, name)
示例#15
0
def get_mv():
    name='mv'
    tbname='TRD_Mnth'
    fldname='Mclsprc' #月收盘价
    timefld='Trdmnt'

    df = pd.read_csv(os.path.join(sp, tbname + '.csv'))
    q1 = 'Markettype in [1,4,16]'
    q2= 'Trdmnt endswith 12' #TODO: only need the data in December
    q=[q1,q2]
    df = filterDf(df, q)
    colnames = ['Stkcd', timefld, fldname]

    df = df[colnames]
    subdfs = []
    for stockId, x in list(df.groupby('Stkcd')):
        tmpdf = x[[timefld, fldname]]
        tmpdf = tmpdf.set_index(timefld)
        tmpdf.columns = [stockId]
        subdfs.append(tmpdf)

    table = pd.concat(subdfs, axis=1)
    table = table.sort_index(ascending=True)
    table.to_csv(os.path.join(tmpp,name+'.csv'))
示例#16
0
def get_rf():
    df = pd.read_csv(os.path.join(sp,'TRD_Nrrate.csv'))
    q = 'Nrr1 == NRI01'  # TODO:TBC=国债票面利率
    df = filterDf(df, q)
    colnames = ['Clsdt', 'Nrrmtdt']
    df = df.sort_values('Clsdt')

    df = df[colnames]
    df = df.set_index('Clsdt')

    dates = pd.date_range(df.index[0], df.index[-1], freq='D')
    dates = [d.strftime('%Y-%m-%d') for d in dates]

    newdf = pd.DataFrame(index=dates)
    newdf['Nrrmtdt'] = df['Nrrmtdt']
    newdf = newdf.fillna(method='ffill')
    newdf = newdf.reset_index()
    newdf['month'] = newdf['index'].apply(lambda x: '-'.join(x.split('-')[:-1]))

    avg = newdf.groupby('month').mean()
    avg = avg / 100
    del avg.index.name
    avg.columns = ['rf']
    avg.to_csv(os.path.join(tmpp,'rf.csv'))