Пример #1
0
    def plot_vertical_momentum_flux(self,data,xdata,terrain):

        met=data[['wdir','wspd','wvert','lats','lons']]
        # topo=np.asarray(Terrain.get_topo(lats=met['lats'], lons=met['lons']))
        topo2=np.asarray(Terrain.get_topo2(lats=met['lats'], lons=met['lons'],terrain=terrain))

        u_comp,v_comp = get_wind_components(met.wspd,met.wdir)
        w_comp=met.wvert

        u_moflux = pd.rolling_cov(u_comp, w_comp, 60, center=True)
        v_moflux = pd.rolling_cov(v_comp, w_comp, 60, center=True)

        fig, ax = plt.subplots(2,1, sharex=True)
        l1=ax[0].plot(xdata,u_moflux,label='U-moment')
        l2=ax[0].plot(xdata,v_moflux,label='V-moment')
        ax[0].set_ylim([-1.0,1.0])
        ax[0].set_ylabel('Vertical momentum flux [ m2 s-2]',color='b',fontsize=15)
        # plt.legend(handles=[l1,l2])
        ax[0].legend()

        spl=UnivariateSpline(xdata[::5],topo2[::5],k=5)
        xsmooth=np.linspace(0.,xdata[-1],int(len(xdata)))
        ysmooth=spl(xsmooth)
        ysmooth[ysmooth<0]=0
        ax[1].plot(xsmooth, ysmooth,color='black')
        ax[1].set_xlabel('Distance from flight start [km]',fontsize=15)

        plt.draw()
Пример #2
0
def find_capm_gap(df_prices, i_lookback, switch):
#   df_spread = pd.merge(df_prices, df_prices, left_index=True, right_index=True, how='outer') 
    frames = [df_prices, df_prices]
    df_spread = pd.concat(frames, keys=ls_symbols)
    print "in"
    print "df_spread:::", df_spread
    df_capm_gap = np.NAN * copy.deepcopy(df_prices)
    ts_index = df_prices[ls_symbols[-1]]
    tsu.returnize0(ts_index)
    for s_symbol in ls_symbols[:len(ls_symbols)-1]:
    	ts_price = df_prices[s_symbol]
	tsu.returnize0(ts_price)
#       print "returns", ts_price
#       print "index", ts_index
	ts_x_ret = pd.rolling_sum(ts_index, i_lookback)   
    	ts_y_ret = pd.rolling_sum(ts_price, i_lookback)
        
    	beta = (1/pd.rolling_var(ts_index, i_lookback)) * pd.rolling_cov(ts_index, ts_price, i_lookback)
    	alpha = pd.rolling_mean(ts_price, i_lookback) - beta * pd.rolling_mean(ts_index, i_lookback)
    	df_capm_gap[s_symbol] = switch*(ts_y_ret - ts_x_ret)+(1-switch)*(ts_y_ret - alpha - beta * ts_x_ret) 
#       print "ind", ts_x_ret, "y", ts_y_ret, "a" , alpha, "b", beta, df_capm_gap[s_symbol]
    ldt_timestamps = df_capm_gap.index
    print df_capm_gap
    for i in range(1, len(ldt_timestamps)):
	df_capm_gap.ix[ldt_timestamps[i]]=scipy.stats.stats.rankdata(df_capm_gap.ix[ldt_timestamps[i]])
        print df_spread.ix[[('AMZN',df_prices.index[i])]] 
    return df_capm_gap 
Пример #3
0
def featBeta(dData, lLookback=14, sMarket='$SPX', b_human=False):
    '''
    @summary: Calculate beta relative to a given stock/index.
    @param dData: Dictionary of data to use
    @param sStock: Stock to calculate beta relative to
    @param b_human: if true return dataframe to plot
    @return: DataFrame array containing feature values
    '''

    dfPrice = dData['close']

    #''' Calculate returns '''
    dfRets = dfPrice.copy()
    tsu.returnize1(dfRets.values)

    tsMarket = dfRets[sMarket]

    dfRet = pand.rolling_cov(tsMarket, dfRets, lLookback)
    dfRet /= dfRet[sMarket]

    if b_human:
        for sym in dData['close']:
            x = 1000 / dData['close'][sym][0]
            dData['close'][sym] = dData['close'][sym] * x
        return dData['close']
    return dfRet
Пример #4
0
def featBeta( dData, lLookback=14, sMarket='$SPX', b_human=False ):
    '''
    @summary: Calculate beta relative to a given stock/index.
    @param dData: Dictionary of data to use
    @param sStock: Stock to calculate beta relative to
    @param b_human: if true return dataframe to plot
    @return: DataFrame array containing feature values
    '''

    dfPrice = dData['close']

    #''' Calculate returns '''
    dfRets = dfPrice.copy()
    tsu.returnize1(dfRets.values)

    tsMarket = dfRets[sMarket]

    dfRet = pand.rolling_cov(tsMarket, dfRets, lLookback)
    dfRet /= dfRet[sMarket]
   
    if b_human:
        for sym in dData['close']:
            x=1000/dData['close'][sym][0]
            dData['close'][sym]=dData['close'][sym]*x
        return dData['close']
    return dfRet
Пример #5
0
def CalCov(context,x,rolling_window):
    df_x=x.asMatrix().iloc[-100:, :5]
    df_x.sort_index(inplace=True)
    #df_x=df_x.ix[1:10,1:5]
    df_cov=pd.rolling_cov(df_x,window=rolling_window)
    ls_factorretcov=list(calcfactorRetCov(df_cov,date) for date in list(df_x.index))
    df_l_factorretcov=pd.concat(ls_factorretcov,axis=0).rename(columns={'variable':'o2'}).reset_index()
    df_l_factorretcov.drop('index',axis=1,inplace=True)
    return df_l_factorretcov
Пример #6
0
def analysis():
    """ A simple API endpoint to compare data from two sensors
        Example http://127.0.0.1:5000/api/stats/compare?a=sensoraname&b=sensorbname
    """

    if 'wotkit_token' in session:

        a = request.args.get('a')
        b = request.args.get('b')
        hours = int(request.args.get('hours'))

        if (a and b and hours):

            msph = 3600000  #milliseconds per hour
            result = defaultdict(dict)

            sensoraDataSeries = WotKitDataToSeries(
                WoTKitgetSensorData(a, msph * hours))
            sensorbDataSeries = WotKitDataToSeries(
                WoTKitgetSensorData(b, msph * hours))

            # Labels object
            result['labels'] = [ ` i ` + "h" for i in range(1, hours)]

            # Sensor A object
            sensoraDailyMeans = sensoraDataSeries.resample('H', how='mean')
            result['a']['mean'] = SeriesToList(sensoraDailyMeans)
            result['a']['rolling_mean'] = SeriesToList(
                pd.rolling_mean(sensoraDailyMeans, 5))
            result['a']['rolling_stdev'] = SeriesToList(
                pd.rolling_std(sensoraDailyMeans, 5))
            result['a']['rolling_skewness'] = SeriesToList(
                pd.rolling_skew(sensoraDailyMeans, 5))
            result['a']['rolling_kurtosis'] = SeriesToList(
                pd.rolling_kurt(sensoraDailyMeans, 5))

            #Sensor B object
            sensorbDailyMeans = sensorbDataSeries.resample('H', how='mean')
            result['b']['mean'] = SeriesToList(sensorbDailyMeans)
            result['b']['rolling_mean'] = SeriesToList(
                pd.rolling_mean(sensorbDailyMeans, 5))
            result['b']['rolling_stdev'] = SeriesToList(
                pd.rolling_std(sensorbDailyMeans, 5))
            result['b']['rolling_skewness'] = SeriesToList(
                pd.rolling_skew(sensorbDailyMeans, 5))
            result['b']['rolling_kurtosis'] = SeriesToList(
                pd.rolling_kurt(sensorbDailyMeans, 5))

            #Comparison object
            result['comparison']['correlation'] = SeriesToList(
                pd.rolling_corr(sensoraDailyMeans, sensorbDailyMeans, 5))
            result['comparison']['covariance'] = SeriesToList(
                pd.rolling_cov(sensoraDailyMeans, sensorbDailyMeans, 5))

            json_response = json.dumps(result)

            return Response(json_response, content_type='application/json')
Пример #7
0
    def evaluate(self, table):
        window = self.window
        lhs = self.lhs
        rhs = self.rhs
        lval = None
        rval = None

        if lhs is not None:
            lval = lhs.eval(table)

        if rhs is not None:
            rval = rhs.eval(table)

        return pd.rolling_cov(lval, rval, window)
def beta(data, market_data):
    data['MARKET'] = market_data['MARKET']
    column_beta = [data.columns[i] for i in [0, 2, 3, 5, 6, 7]]
    a = []
    for i in column_beta:
        a.append(i + '_BETA')
        data[i + '_BETA'] = pd.rolling_cov(data[i], data['MARKET'],
                                           window=36) / pd.rolling_var(
                                               data['MARKET'], window=36)

    for i in column_beta[:4]:
        data[i + '_ALPHA'] = data[i] - data[i + '_BETA'] * data['MARKET']

    return data.dropna(), a[:4]
Пример #9
0
    def plot_vertical_heat_flux(self,data,xdata):

        met=data[['theta','wvert','lats','lons']]
        topo=np.asarray(Terrain.get_topo(lats=met['lats'], lons=met['lons']))
        
        v_heatflux = pd.rolling_cov(met.wvert, met.theta, 60, center=True)

        plt.figure()
        plt.plot(xdata,v_heatflux)
        ax=plt.gca()
        ax.set_ylim([-0.5,0.5])
        ax.set_ylabel('Vertical heat flux [K m s-1]',color='b',fontsize=15)
        ax.set_xlabel('Distance from flight start [km]',fontsize=15)
        add_second_y_in(ax,topo,xaxis=xdata, color='r',label='Topography [m]')
        plt.draw()
Пример #10
0
def read_data(name, df=None):
    close_index = name+".Close"
    return_index = name+".Return"
    beta_index = name+".Beta"
    temp_df = pd.read_csv(FOLDER_PATH+name+FILE_EXTENSION,
                          delimiter="\t",
                          parse_dates=True,
                          index_col=False
##                          date_parser=functools.partial(datetime.strptime, format = "%Y/%m/%d")
                          )
    temp_df.columns=[DATE_INDEX,
                     name+".Open",
                     name+".High",
                     name+".Low",
                     name+".Close",
                     name+".Volume",
                     name+".Vol",
                     name+".MA1",
                     name+".MA2",
                     name+".MA3",
                     name+".MA4",
                     name+".MA5",
                     name+".MA6"]
    temp_df = temp_df.drop([
                     name+".Open",
                     name+".High",
                     name+".Low",
                     name+".MA1",
                     name+".Vol",
                  name+".MA2",
                  name+".MA3",
                  name+".MA4",
                  name+".MA5",
                  name+".MA6"],1)
    # Rule out invalid data
    temp_df = temp_df[(temp_df[name+".Volume"]>0)]
    for rn in range(0, len(temp_df)):
        temp_df.ix[rn, DATE_INDEX] = pd.to_datetime(temp_df.iloc[rn][DATE_INDEX])
    # Calculate daily return
    get_return = lambda x: x[1]/x[0]-1
    temp_df[return_index] = pd.rolling_apply(temp_df[close_index], 2, get_return, min_periods=2)    # Calculate beta
    if not df is None:
        temp_df = pd.merge(df, temp_df, on=DATE_INDEX, how='outer')
        temp_df[beta_index] = pd.rolling_cov(temp_df[return_index], temp_df[INDEX_HISTORY_FILE_NAME+".Return"], COV_ROLLING_WINDOW, min_periods=COV_ROLLING_WINDOW)/\
                              pd.rolling_var(temp_df[INDEX_HISTORY_FILE_NAME+".Return"], COV_ROLLING_WINDOW, min_periods=COV_ROLLING_WINDOW)
        # Calculate alpha
        temp_df[name+".Alpha"] = temp_df[return_index] - temp_df[INDEX_HISTORY_FILE_NAME+".Return"]*temp_df[beta_index]
    return temp_df
Пример #11
0
def read_data(name):
    close_index = name+".Close"
    return_index = name+".Return"
    beta_index = name+".Beta"
    temp_df = pd.read_csv(FOLDER_PATH+name+FILE_EXTENSION)
    # Rule out invalid data
    temp_df = df[(df[name+".Volume"]>0)]
    # Calculate daily return
    get_return = lambda x: x[1]/x[0]-1
    temp_df[return_index] = pd.rolling_apply(df[close_index], 2, get_return, min_periods=2)
    # Calculate beta
    temp_df[beta_index] = pd.rolling_cov(df[return_index], temp_df[INDEX_HISTORY_FILE_NAME+".Return"], 200, min_periods=200)/\
                            pd.rolling_var(df[return_index], 200, min_periods=200)
    # Calculate alpha
    temp_df[name+".Alpha"] = temp_df[return_index] - temp_df[INDEX_HISTORY_FILE_NAME+".Return"]*temp_df[beta_index]
    return temp_df
Пример #12
0
def Var_Cov_Weight(Tickers, lookback):
    df = pd.DataFrame()

    for Ticker in Tickers:
        df[Ticker] = web.DataReader(Ticker, 'yahoo', '2000-01-01')['Adj Close']

    Return_df = df.dropna().pct_change()

    Panel = pd.rolling_cov(Return_df, lookback)

    Var_Cov_df = pd.DataFrame(index=Panel.items, columns=Tickers)

    for Date in Panel.items:
        Var_Cov_df.ix[Date] = Panel[Date].sum().tolist()

    return Var_Cov_df
def analysis():
    """ A simple API endpoint to compare data from two sensors
        Example http://127.0.0.1:5000/api/stats/compare?a=sensoraname&b=sensorbname
    """

    if 'wotkit_token' in session:

        a = request.args.get('a')
        b = request.args.get('b')
        hours = int(request.args.get('hours'))
        
        if (a and b and hours):
            
            msph = 3600000 #milliseconds per hour
            result = defaultdict(dict)
            
            sensoraDataSeries = WotKitDataToSeries(WoTKitgetSensorData(a, msph*hours))
            sensorbDataSeries = WotKitDataToSeries(WoTKitgetSensorData(b, msph*hours))
           
            # Labels object
            result['labels'] = [`i`+"h" for i in range(1,hours)]

            # Sensor A object             
            sensoraDailyMeans = sensoraDataSeries.resample('H', how = 'mean')
            result['a']['mean'] = SeriesToList( sensoraDailyMeans )
            result['a']['rolling_mean'] = SeriesToList( pd.rolling_mean(sensoraDailyMeans, 5) )
            result['a']['rolling_stdev'] = SeriesToList( pd.rolling_std(sensoraDailyMeans, 5) )
            result['a']['rolling_skewness'] = SeriesToList( pd.rolling_skew(sensoraDailyMeans, 5) )
            result['a']['rolling_kurtosis'] = SeriesToList( pd.rolling_kurt(sensoraDailyMeans, 5) )

            #Sensor B object         
            sensorbDailyMeans = sensorbDataSeries.resample('H', how = 'mean')
            result['b']['mean'] = SeriesToList(sensorbDailyMeans)
            result['b']['rolling_mean'] = SeriesToList( pd.rolling_mean(sensorbDailyMeans, 5) )
            result['b']['rolling_stdev'] = SeriesToList( pd.rolling_std(sensorbDailyMeans, 5) )
            result['b']['rolling_skewness'] = SeriesToList( pd.rolling_skew(sensorbDailyMeans, 5) )
            result['b']['rolling_kurtosis'] = SeriesToList( pd.rolling_kurt(sensorbDailyMeans, 5) )
            
            #Comparison object
            result['comparison']['correlation'] = SeriesToList( pd.rolling_corr(sensoraDailyMeans, sensorbDailyMeans, 5) )
            result['comparison']['covariance'] = SeriesToList( pd.rolling_cov(sensoraDailyMeans, sensorbDailyMeans, 5) )         
          
            json_response = json.dumps(result)

            return Response(json_response, content_type='application/json')
Пример #14
0
    def RV(df, **kwargs):
        if "RVspace" in kwargs:
            RVspace = kwargs["RVspace"]
        else:
            RVspace = "classic"
        if 'mode' in kwargs:
            mode = kwargs['mode']
        else:
            mode = 'linear'
        if 'n' in kwargs:
            n = kwargs['n']
        else:
            n = 25

        if RVspace == "classic":
            cc = list(combinations(df.columns, 2))
            if mode == 'linear':
                df0 = pd.concat([df[c[0]].sub(df[c[1]]) for c in cc], axis=1, keys=cc)
            elif mode == 'priceRatio':
                df0 = pd.concat([df[c[0]]/df[c[1]] for c in cc], axis=1, keys=cc)
            elif mode == 'priceRatio_zScore':
                lDF = []
                for c in cc:
                    PrRatio = df[c[0]] / df[c[1]]
                    emaPrRatio = pyerb.ema(PrRatio, nperiods=n)
                    volPrRatio = pyerb.expander(PrRatio, np.std, n)
                    PrZScore = (PrRatio-emaPrRatio) / volPrRatio
                    lDF.append(PrZScore)
                df0 = pd.concat(lDF, axis=1, keys=cc)
            elif mode == 'beta':
                df0 = pd.concat([df[c[0]].sub((pd.rolling_cov(df[c[0]], df[c[1]], window=25) / pd.rolling_var(df[c[1]], window=25)) * df[c[1]]) for c in cc], axis=1, keys=cc)

            df0.columns = df0.columns.map('_'.join)

        else:
            print("Projection based on RVSpace asset ... ")
            rvList = []
            for c in df.columns:
                rvDF = df[c].sub(df[RVspace])
                rvDF.name = c + "_" + RVspace
            df0 = pd.concat([], axis=1)

        return df0.fillna(method='ffill').fillna(0)
Пример #15
0
def spread_gap(df_prices, i_lookbak, switch):
    df_capm_gap = np.NAN * copy.deepcopy(df_prices)
    ts_index = df_prices[ls_symbols[-1]]
    tsu.returnize0(ts_index)
    for s_symbol in ls_symbols[:len(ls_symbols)-1]:
        ts_price = df_prices[s_symbol]
        tsu.returnize0(ts_price)
        print "returns", ts_price
        print "index", ts_index
        ts_x_ret = pd.rolling_sum(ts_index, i_lookback)
        ts_y_ret = pd.rolling_sum(ts_price, i_lookback)

        beta = (1/pd.rolling_var(ts_index, i_lookback)) * pd.rolling_cov(ts_index, ts_price, i_lookback)
        alpha = pd.rolling_mean(ts_price, i_lookback) - beta * pd.rolling_mean(ts_index, i_lookback)
        df_capm_gap[s_symbol] = switch*(ts_y_ret - ts_x_ret)+(1-switch)*(ts_y_ret - alpha - beta * ts_x_ret)
        print "ind", ts_x_ret, "y", ts_y_ret, "a" , alpha, "b", beta, df_capm_gap[s_symbol]
    ldt_timestamps = df_capm_gap.index
    for i in range(1, len(ldt_timestamps)):
        df_capm_gap.ix[ldt_timestamps[i]]=scipy.stats.stats.rankdata(df_capm_gap.ix[ldt_timestamps[i]])
    return df_capm_gap
Пример #16
0
 def test_ts_cov(self):
     self.env.add_operator('ts_cov', {
         'operator': OperatorTSCov,
         'arg1': {'value': [3, 5]},
         })
     string1 = 'ts_cov(2, open1, open2)'
     gene1 = self.env.parse_string(string1)
     self.assertFalse(gene1.validate())
     string2 = 'ts_cov(5, open1, open2)'
     gene2 = self.env.parse_string(string2)
     self.assertTrue(gene2.validate())
     self.assertEqual(gene2.dimension, 'CNY USD')
     self.assertRaises(IndexError, gene2.eval, self.env, self.date1, self.date2)
     date1 = self.env.shift_date(self.date1, 4)
     df = pd.rolling_cov(self.env.get_data_value('open1'), self.env.get_data_value('open2'), 5).iloc[4:]
     self.assertTrue(
             frame_equal(
                 gene2.eval(self.env, date1, self.date2),
                 df)
             )
Пример #17
0
 def test_ts_cov(self):
     self.env.add_operator('ts_cov', {
         'operator': OperatorTSCov,
         'arg1': {
             'value': [3, 5]
         },
     })
     string1 = 'ts_cov(2, open1, open2)'
     gene1 = self.env.parse_string(string1)
     self.assertFalse(gene1.validate())
     string2 = 'ts_cov(5, open1, open2)'
     gene2 = self.env.parse_string(string2)
     self.assertTrue(gene2.validate())
     self.assertEqual(gene2.dimension, 'CNY USD')
     self.assertRaises(IndexError, gene2.eval, self.env, self.date1,
                       self.date2)
     date1 = self.env.shift_date(self.date1, 4)
     df = pd.rolling_cov(self.env.get_data_value('open1'),
                         self.env.get_data_value('open2'), 5).iloc[4:]
     self.assertTrue(
         frame_equal(gene2.eval(self.env, date1, self.date2), df))
Пример #18
0
def find_capm_gap(df_prices, sharpe_lookback, switch):
#    frames = [df_prices, df_prices]
#    df_spread = pd.concat(frames, keys=ls_symbols)
    df_capm_gap = np.NAN * copy.deepcopy(df_prices)
    ts_index = df_prices[ls_symbols[-1]]
    tsu.returnize0(ts_index)
    for s_symbol in ls_symbols[:len(ls_symbols)-1]:
        ts_price = df_prices[s_symbol]
        tsu.returnize0(ts_price)

        ts_x_ret = pd.rolling_sum(ts_index, i_lookback)
        ts_y_ret = pd.rolling_sum(ts_price, i_lookback)

        beta = (1/pd.rolling_var(ts_index, i_lookback)) * pd.rolling_cov(ts_index, ts_price, i_lookback)
        alpha = pd.rolling_mean(ts_price, i_lookback) - beta * pd.rolling_mean(ts_index, i_lookback)
        df_capm_gap[s_symbol] = switch*(ts_y_ret - ts_x_ret)+(1-switch)*(ts_y_ret - alpha - beta * ts_x_ret)

    ldt_timestamps = df_capm_gap.index

    for i in range(1 + sharpe_lookback, len(ldt_timestamps)):
        df_capm_gap.ix[ldt_timestamps[i]]=scipy.stats.stats.rankdata(df_capm_gap.ix[ldt_timestamps[i]])
    return df_capm_gap
Пример #19
0
 def ts_operation(df1, df2, n):
     return pd.rolling_cov(df1, df2, n)
Пример #20
0
 def cov(self, x, y, n):
     (x, y) = self._align_bivariate(x, y)
     return pd.rolling_cov(x, y, n)
Пример #21
0
 def ts_operation(df1, df2, n):
     return pd.rolling_cov(df1, df2, n)
Пример #22
0
def rolling_cov_pairwise(df, *args, **kwargs):
    d = {}
    for c in df.columns:
        d[c] = pd.rolling_cov(df[c], df, *args, **kwargs)
    p = pd.Panel(d)
    return p.transpose(1, 0, 2)
Пример #23
0
def RiskModelStyleOnly(df_ret, dict_risk_expo, period):
    '''
    df_ret=x0
    dict_risk_expo=x1
    period=5
    '''
    period = int(period['CovWindow'])

    ls_fexponame = list(
        map(gftIO.gidInt2Str,
            list(dict_risk_expo['osets'].asColumnTab()['O0'])))
    allfactor = []
    for i in ls_fexponame:
        allfactor.extend(
            list(
                map(gftIO.gidInt2Str,
                    list(dict_risk_expo[i].asColumnTab()['O0']))))

    ##stock return preprocess
    df_w_ret = df_ret.asMatrix().T.dropna(how='all', axis=1)

    ##factor exposure preprocess
    dict_risk_expo_new = {
        factorname: dict_risk_expo[factorname].asMatrix()
        for factorname in allfactor
    }
    ls_ls_fexpodate = list([
        dict_risk_expo_new[factorname].index.tolist()
        for factorname in dict_risk_expo_new.keys()
    ])
    ls_alldates_fexpo = reduce(np.intersect1d, ls_ls_fexpodate)

    ls_ls_fexposymbol = list([
        dict_risk_expo_new[factorname].columns.tolist()
        for factorname in dict_risk_expo_new.keys()
    ])
    ls_allsymbols_fexpo = reduce(np.intersect1d, ls_ls_fexposymbol)

    ##get fexpo date,find the nearest business day

    fexpodate = pd.DataFrame(ls_alldates_fexpo, columns=['date_fexpo'])
    retdate = pd.DataFrame(df_w_ret.columns, columns=['date_ret'])

    retdate.sort_values("date_ret", ascending=True, inplace=True)
    fexpodate.sort_values("date_fexpo", ascending=True, inplace=True)

    df_date_map = pd.merge_asof(retdate,
                                fexpodate,
                                left_on="date_ret",
                                right_on="date_fexpo",
                                allow_exact_matches=False)

    df_date_map.dropna(how='any', inplace=True)
    df_date_map = df_date_map.drop_duplicates(
        subset='date_fexpo').reset_index()
    dict_date_map = {
        df_date_map.date_fexpo[i]: df_date_map.date_ret[i]
        for i in range(len(df_date_map))
    }

    ##get the date intersection of stock return and factor exposure
    ls_alldates = set(df_w_ret.columns).intersection(
        set(dict_date_map.values()))
    ls_alldates_ondaybefore = sorted(list(dict_date_map.keys()))
    ls_allsymbols = {
        date: list(
            set(df_w_ret[[dict_date_map[date]]].dropna().index).intersection(
                set(ls_allsymbols_fexpo)))
        for date in ls_alldates_ondaybefore
    }

    #align the stock return and factor exposure
    dict_df_ret = {
        dict_date_map[date]:
        df_w_ret[[dict_date_map[date]]].reindex(index=ls_allsymbols[date])
        for date in ls_alldates_ondaybefore
    }
    dict_df_fexpo = {
        date: fexpomerge(dict_risk_expo_new, date, allfactor, ls_allsymbols)
        for date in ls_alldates_ondaybefore
    }

    #for i in dict_risk_expo_new.keys():
    #if dict_risk_expo_new[i].index.min() > df_l_ret.index.min() or dict_risk_expo_new[i].index.max() < df_l_ret.index.max():
    #raise Exception

    ########################step3:calculate factor return########################

    ls_df_fitresult = {
        dict_date_map[date]: Regression(date, dict_df_ret, dict_df_fexpo,
                                        dict_date_map)
        for date in ls_alldates_ondaybefore
    }

    ls_df_facreturn = list(
        ls_df_fitresult[date]['params'].rename(columns={'params': date})
        for date in ls_alldates)
    df_model_params = reduce(
        lambda df_para1, df_para2: pd.concat([df_para1, df_para2], axis=1),
        ls_df_facreturn)

    ########################step4:calculate factor return covariance########################

    df_allfactorret = df_model_params.T
    df_allfactorret = df_allfactorret.sort_index()

    panel_factorretcov = pd.rolling_cov(df_allfactorret, window=period)

    ls_factorretcov = list(
        calcfactorRetCov(panel_factorretcov, date, allfactor)
        for date in list(df_allfactorret.index))
    df_l_factorretcov = pd.concat(
        ls_factorretcov, axis=0).rename(columns={'variable': 'factorid2'})

    ########################step5:calculate the residual(specific) variances of regression########################

    ##part1:merge factorreturn,factor exposure and stock return
    ls_specificrisk = list(
        ls_df_fitresult[date]['resid'].rename(columns={'resid': date})
        for date in ls_alldates)
    df_w_specificrisk = pd.concat(ls_specificrisk, axis=1).T
    df_w_specificrisk = df_w_specificrisk.sort_index()
    df_specificrisk_var = pd.rolling_var(df_w_specificrisk, window=period)
    df_specificrisk_var['idname'] = df_specificrisk_var.index
    df_specificrisk_var = pd.melt(df_specificrisk_var, id_vars=['idname'])
    df_specificrisk_var = df_specificrisk_var.rename(columns={
        'idname': 'date',
        'variable': 'symbol',
        'value': 'specificrisk'
    })

    ########################step6:generate final return value########################

    dict_factorret = {
        key + '.ret': df_allfactorret[[key]].rename(
            columns={
                key:
                list(
                    gftIO.strSet2Np(
                        np.array(list(df_allfactorret[[key]].columns))))[0]
            })
        for key in df_allfactorret.columns
    }

    dictMerged = dict(
        dict_factorret, **dict_risk_expo, **{
            'ret_cov': df_l_factorretcov,
            'specificRisk': df_specificrisk_var
        })
    #gftIO.zdump(dictMerged,'riskmodel.pkl')

    return dictMerged
Пример #24
0
def ts_covFn(df, col1, col2, min_periods, max_periods):
    if not (max_periods): max_periods = len(df[col1])
    return pd.rolling_cov(df[col1],
                          df[col2],
                          max_periods,
                          min_periods=min_periods)
Пример #25
0
 def cov(self, x, y, n):
     (x, y) = self._align_bivariate(x, y)
     return pd.rolling_cov(x, y, n)
Пример #26
0
def rolling_cov_pairwise(df, *args, **kwargs):
    d = {}
    for c in df.columns:
        d[c] = pd.rolling_cov(df[c], df, *args, **kwargs)
    p = pd.Panel(d)
    return p.transpose(1,0,2)