def test_params_only(basic_data, method): y, x, _ = basic_data mod = RollingOLS(y, x, 150) res = mod.fit(method=method, params_only=False) res_params_only = mod.fit(method=method, params_only=True) # use assert_allclose to incorporate for numerical errors on x86 platforms assert_allclose(res_params_only.params, res.params)
def test_methods(basic_data): y, x, _ = basic_data mod = RollingOLS(y, x, 150) res_inv = mod.fit(method='inv') res_lstsq = mod.fit(method='lstsq') res_pinv = mod.fit(method='pinv') assert_allclose(res_inv.params, res_lstsq.params) assert_allclose(res_inv.params, res_pinv.params)
def test_methods(basic_data, params_only): y, x, _ = basic_data mod = RollingOLS(y, x, 150) res_inv = mod.fit(method="inv", params_only=params_only) res_lstsq = mod.fit(method="lstsq", params_only=params_only) res_pinv = mod.fit(method="pinv", params_only=params_only) assert_allclose(res_inv.params, res_lstsq.params) assert_allclose(res_inv.params, res_pinv.params)
def test_min_nobs(basic_data): y, x, w = basic_data if not np.any(np.isnan(np.asarray(x))): return mod = RollingOLS(y, x, 150) res = mod.fit() # Ensures that the constraint binds min_nobs = res.nobs[res.nobs != 0].min() + 1 mod = RollingOLS(y, x, 150, min_nobs=min_nobs) res = mod.fit() assert np.all(res.nobs[res.nobs != 0] >= min_nobs)
def playing_with_rolling(self, pair, fromDate="2015-01-01", toDate="2018-01-01"): symbol1 = pair[0] symbol2 = pair[1] data1 = self.portfolio[symbol1][self.analysisOn][fromDate:toDate] data2 = self.portfolio[symbol2][self.analysisOn][fromDate:toDate] model = sm.OLS(data1, sm.add_constant(data2)) window = 180 model2 = RollingOLS(data1, sm.add_constant(data2), window=window) results = model.fit() results2 = model2.fit() # spread = data1 - results.params[1] * data2 - results.params[0] # spread_rolling = data1 - results2.params.adjusted_close * data2 - results2.params.const spread = data1 - results.params[1] * data2 spread_rolling = data1 - results2.params.adjusted_close * data2 spread_mean = pd.Series(spread_rolling).rolling(window=window).mean() spread_std = pd.Series(spread_rolling).rolling(window=window).std() fig, axs = plt.subplots(2) # plt.plot((spread - spread.mean())/spread.std()) axs[0].plot((spread_rolling - spread_mean) / spread_std) axs[0].xaxis.set_major_locator(plt.MaxNLocator(15)) axs[1].plot(results2.params.adjusted_close['2013-03-15':]) axs[1].xaxis.set_major_locator(plt.MaxNLocator(15)) # plt.plot(spread) # plt.plot(spread_rolling) plt.show()
def compute_rolling_regression( window_size: int, endog: pd.DataFrame, exog: pd.DataFrame ): """ Wrapper function to compute rolling regression co-efficients for pre-processed LOB using stats-models. Based on Amaya, Rochen et al (2015) we assume the coefficient is the liquidity cost and alpha is the intercept. Ref: https://www.statsmodels.org/dev/examples/notebooks/generated/rolling_ls.html Calculation described in "Distilling Liquidity Costs from Limit Order Books" by Amaya, Rochen et al (2015). Paper source: https://www.sciencedirect.com/science/article/abs/pii/S0378426618301353 :window_size: Size of the window :endog: Dependent variable - y :exog: Independent variable - x :return: rols_results (instance of statsmodels results object), rols_params (pd.DataFrame) """ endog = endog exog = sm.add_constant(exog, prepend=False) rols = RollingOLS(endog, exog, window=window_size) rols_results = rols.fit() rols_params = rols_results.params rols_params.columns = ["liquidity_cost", "intercept"] return rols_results, rols_params
def test_expanding(basic_data): y, x, w = basic_data xa = np.asarray(x) mod = RollingOLS(y, x, 150, min_nobs=50, expanding=True) res = mod.fit() params = np.asarray(res.params) assert np.all(np.isnan(params[:49])) first = np.where(np.cumsum(np.all(np.isfinite(xa), axis=1)) >= 50)[0][0] assert np.all(np.isfinite(params[first:]))
def rolling_ols_model(): # Rolling Ordinary Least Squares (Rolling OLS) from statsmodels.regression.rolling import RollingOLS data = get_dataset("longley") exog = sm.add_constant(data.exog, prepend=False) rolling_ols = RollingOLS(data.endog, exog) model = rolling_ols.fit(reset=50) return ModelWithResults(model=model, alg=rolling_ols, inference_dataframe=exog)
def get_rolling_beta(df: pd.DataFrame, hist: pd.DataFrame, mark: pd.DataFrame, n: pd.DataFrame) -> pd.DataFrame: """Turns a holdings portfolio into a rolling beta dataframe Parameters ---------- df : pd.DataFrame The dataframe of daily holdings hist : pd.DataFrame A dataframe of historical returns mark : pd.DataFrame The dataframe of market performance n : int The period to get returns for Returns ---------- final : pd.DataFrame Dataframe with rolling beta """ df = df["Holding"] uniques = df.columns.tolist() res = df.div(df.sum(axis=1), axis=0) res = res.fillna(0) comb = pd.merge(hist["Close"], mark["Market"], how="outer", left_index=True, right_index=True) comb = comb.fillna(method="ffill") for col in hist["Close"].columns: exog = sm.add_constant(comb["Close"]) rols = RollingOLS(comb[col], exog, window=252) rres = rols.fit() res[f"beta_{col}"] = rres.params["Close"] final = res.fillna(method="ffill") for uni in uniques: final[f"prod_{uni}"] = final[uni] * final[f"beta_{uni}"] dropped = final[[f"beta_{x}" for x in uniques]].copy() final = final.drop(columns=[f"beta_{x}" for x in uniques] + uniques) final["total"] = final.sum(axis=1) final = final[final.index >= datetime.now() - timedelta(days=n + 1)] comb = pd.merge(final, dropped, how="left", left_index=True, right_index=True) return comb
def calc_aggregates(data, days): model = RollingOLS(data["BTC-GBP"].Close, data["ETH-GBP"].Close, window=days) result = model.fit() rolling_beta = result.params.Close rolling_beta.name = "beta" spread = data["BTC-GBP"].Close - rolling_beta * data["ETH-GBP"].Close return { "mean": spread.mean(), "std": spread.std(), "beta": rolling_beta.iloc[-1], }
def calibrate(self, windowOLS, **kwargs): #x, y, time = super().get_sample(self.x,self.y, self.timestamp, start_hist, end_hist) #model = RollingOLS(endog =self.y, exog=self.x,window=self.windowOLS) #rres = model.fit() #self.beta = rres.params.reshape(-1, ) self.windowOLS = min(windowOLS, len(self.y - 1)) df = pd.DataFrame({'y': self.y, 'x': self.x, 'c': 1}) model = RollingOLS(endog=df['y'], exog=df[['x', 'c']], window=self.windowOLS) rres = model.fit() self.beta = rres.params['x'].values.reshape(-1, )
def capm(self, close, market, window_length_return, window_length_beta): r_market = self.log_Returns(market, window_length_return).loc[slice(close.index[0], close.index[-1])] exog = sm.add_constant(r_market) cap_beta = pd.DataFrame(columns=close.columns) for tick in close.columns: r_assets = self.log_Returns(close[[tick]], window_length_return) endog = r_assets rols = RollingOLS(endog, exog, window=window_length_beta) rres = rols.fit() capm = rres.params.dropna() capm.columns = ['intercept', 'beta'] cap_beta.loc[:, tick] = capm['beta'] return cap_beta
def computeForDay(self, strategy, timeSeriesTick, timeSeriesTrade): timeSeriesReg = timeSeriesTick.resample( str(int(self.resamplePeriod)) + "S" ).first() timeSeriesReg = timeSeriesReg.fillna(method="pad") timeTable = timeSeriesReg.to_frame() timeTable["second"] = timeSeriesReg.index.astype(np.int64) timeTable["second"] = (timeTable["second"] - timeTable["second"][0]) / math.pow( 10, 9 ) # self.betaSeries = pd.stats.ols.MovingOLS(y=timeTable['price'], x=timeTable['second'], window_type='rolling', window = self.period, intercept=True).beta mod = RollingOLS( timeTable["price"], add_constant(timeTable["second"], prepend=False), window=self.period, ) self.betaSeries = mod.fit().params return {"betaSeries": self.betaSeries}
def calc_beta_ret(df, market_port_ret, window=52): # Find country beta's through rolling regression y = market_port_ret rolling_betas = {} for c in df.columns: X = sm.add_constant(df[c]) model = RollingOLS(y, X, window) rolling_res = model.fit(params_only=True) rolling_betas[c] = rolling_res.params.dropna() # Put all beta's for every country and every date in a dataframe out_df = pd.DataFrame() for key, value in rolling_betas.items(): col = pd.DataFrame(value[key]) if out_df.empty: out_df = out_df.append(col) else: out_df = pd.concat([out_df, col], axis=1) return out_df
def rolling_OLS_Kt(curves, window=14) -> pd.DataFrame: """ A Rolling window Ordinary Least Squares inference of the derivative of the logarithm of the number of cases. {args} """ a, b = window if isinstance(window, Sequence) else (window, window) daily = diff(cases(curves), smooth=a) # We first make a OLS inference to extrapolate series to past Y = np.log(daily).values X = np.arange(len(Y)) ols = sm.OLS(Y[:b], sm.add_constant(X[:b]), missing="drop") res = ols.fit() # We need at least c new observations to obtain a result without NaNs m = res.params[1] X_ = np.arange(X[0] - b, X[0]) Y_ = m * (X_ - X[0]) + Y[0] X = np.concatenate([X_, X]) Y = np.concatenate([Y_, Y]) # Use Rolling OLS to obtain an inference to the growth ratio ols = RollingOLS(Y, sm.add_constant(X), window=b, missing="drop") res = ols.fit() Kt = res.params[b:, 1] low, high = res.conf_int()[b:, :, 1].T out = pd.DataFrame({ "Kt": Kt, "Kt_low": low, "Kt_high": high }, index=curves.index) return out
def regress_factor_loadings(self, portfolio, benchmark_returns: pd.Series = None, date: datetime = None, regression_window: int = 36, rolling=False, show=True): ''' :param portfolio: str, pd.Series, TimeDataFrame, Portfolio... If more than an asset, we compute an equal weighted returns :param benchmark_returns: :param date: :param regression_window: :param plot: :return: ''' if not (isinstance(portfolio, TimeDataFrame) or isinstance(portfolio, Portfolio)): portfolio = TimeDataFrame(portfolio) if len(portfolio.df_returns.columns) > 1: # TODO actually, do an equal weighting raise TypeError('Inappropriate argument type for portfolio') if portfolio.frequency != self.factors_timedf.frequency: portfolio_copy = portfolio.set_frequency(self.factors_timedf.frequency, inplace=False) \ .slice_dataframe(to_date=date, inplace=False) else: portfolio_copy = portfolio if benchmark_returns is None: # if no benchmark specified, just use the one in the model timedf_merged = portfolio_copy.merge([self.factors_timedf], inplace=False) else: timedf_merged = portfolio_copy.merge( [self.factors_timedf, benchmark_returns], inplace=False) timedf_merged.df_returns.drop(['MKT-RF'], axis=1, inplace=True) timedf_merged.df_returns.rename( columns={benchmark_returns: 'MKT-RF'}, inplace=True) timedf_merged.df_returns['MKT-RF'] = timedf_merged.df_returns[ 'MKT-RF'] - timedf_merged.df_returns['RF'] portfolio_returns, factors_df = timedf_merged.df_returns.iloc[:, 0] - timedf_merged.df_returns['RF'], \ timedf_merged.df_returns.iloc[:, 1:] portfolio_returns.rename('XsRet', inplace=True) factors_df.drop(['RF'], axis=1, inplace=True) # don't need it anymore if rolling: # endogenous is the portfolio returns (y, dependent), exogenous is the factors (x, explanatory, independent) rols = RollingOLS(endog=portfolio_returns, exog=factors_df, window=regression_window) rres = rols.fit() params = rres.params.dropna() print(params.tail()) if show: rres.plot_recursive_coefficient(variables=factors_df.columns, figsize=(10, 6)) plt.show() return rres else: # need to merge again to run regression on dataframe (with y being XsRet) df_stock_factor = pd.merge(portfolio_returns, factors_df, left_index=True, right_index=True) df_stock_factor = df_stock_factor.iloc[-regression_window:, :] # rename because will give syntax error with '-' when running regression df_stock_factor.rename(columns={'MKT-RF': 'MKT'}, inplace=True) reg = sm.ols(formula='XsRet ~ {}'.format(' + '.join( factors_df.columns)), data=df_stock_factor).fit(cov_type='HAC', cov_kwds={'maxlags': 1}) print(reg.summary()) if show: nrows, ncols = ceil(len(factors_df.columns) / 3), min( len(factors_df.columns), 3) fig, axs = plt.subplots(nrows=nrows, ncols=ncols, figsize=(12, 5)) plt.tight_layout() for i, factor in enumerate(df_stock_factor.iloc[:, 1:]): idx_x, idx_y = floor(i / 3), floor(i % 3) ax = axs if nrows > 1: ax = axs[idx_x, ] if ncols > 1: ax = ax[idx_y] X = np.linspace(df_stock_factor[factor].min(), df_stock_factor[factor].max()) Y = reg.params[i + 1] * X + reg.params[0] # beta * x + alpha ax.plot(X, Y) # plt.draw() # plt.pause(0.001) ax.scatter(df_stock_factor[factor], df_stock_factor.iloc[:, 0], alpha=0.3) ax.grid(True) ax.axis('tight') ax.set_xlabel(factor if factor != 'MKT' else 'MKT-RF') ax.set_ylabel('Portfolio Excess Returns') # plt.ion() plt.show() return reg
def get_rolling_linear_regression(self, df, window_size, target_name, hedge_name, autocorr_periods=0): """ when autocorr_periods is greater than 2, we will take the lag of the hedge against the current of the target :param df: :param window_size: :param target_name: :param hedge_name: :param autocorr_periods: :return: """ from statsmodels.regression.rolling import RollingOLS df_lr = sm.add_constant(df) df_lr[target_name + 'Rank'] = df_lr[target_name].rank() df_lr[hedge_name + 'Rank'] = df_lr[hedge_name].rank() if autocorr_periods > 2: for lag_p in range(1, autocorr_periods): df_lr['SpearmanCorr_hedge_lag' + str(lag_p)] = df_lr[target_name].rank().rolling(window=window_size). \ corr(df_lr[hedge_name].rank().shift(-lag_p)) df_lr['SpearmanCorr_tgt_lag' + str(lag_p)] = df_lr[hedge_name].rank().rolling(window=window_size). \ corr(df_lr[target_name].rank().shift(-lag_p)) df_lr['PearsonCorr_hedge_lag' + str(lag_p)] = df_lr[target_name]. \ rolling(window=window_size).corr(other=df_lr[hedge_name].shift(-lag_p)) df_lr['PearsonCorr_tgt_lag' + str(lag_p)] = df_lr[hedge_name]. \ rolling(window=window_size).corr(other=df_lr[target_name].shift(-lag_p)) model_hedge_lagp = RollingOLS(endog=df_lr[target_name].values, exog=df_lr[['const', hedge_name ]].shift(-lag_p), window=window_size) model_tgt_lagp = RollingOLS(endog=df_lr[hedge_name].values, exog=df_lr[['const', target_name ]].shift(-lag_p), window=window_size) rres_hedge_lagp = model_hedge_lagp.fit() rres_tgt_lagp = model_tgt_lagp.fit() intercept_lagp = rres_hedge_lagp.params['const'] slope_lagp = rres_hedge_lagp.params[hedge_name] r_squared_lagp = rres_hedge_lagp.rsquared df_lr['intercept_hedge_lag' + str(lag_p)] = intercept_lagp df_lr['interecept_tgt_lap' + str(lag_p)] = rres_tgt_lagp.params['const'] df_lr['slope_hedge_lag' + str(lag_p)] = slope_lagp df_lr['slope_tgt_lag' + str(lag_p)] = rres_tgt_lagp.params[target_name] df_lr['r_squared_hedge_lag' + str(lag_p)] = r_squared_lagp df_lr['r_squared_tgt_lag' + str(lag_p)] = rres_tgt_lagp.rsquared model = RollingOLS(endog=df_lr[target_name].values, exog=df_lr[['const', hedge_name]], window=window_size) rres = model.fit() intercept = rres.params['const'] slope = rres.params[hedge_name] r_squared = rres.rsquared df_lr['SpearmanCorr'] = df_lr[target_name + 'Rank'].rolling( window=window_size).corr(df_lr[hedge_name + 'Rank']) df_lr['PearsonCorr'] = df_lr[target_name]. \ rolling(window=window_size).corr(other=df_lr[hedge_name]) df_lr['r_squared'] = r_squared df_lr['intercept'] = intercept df_lr['slope'] = slope df_lr['linreg_f_stat_p_val'] = rres.f_pvalue p_val_colnames = ['intercept_p_val', 'slope_p_val'] arrOfArr = np.split(rres.pvalues, 2, axis=1) for i in range(len(p_val_colnames)): b = np.array(arrOfArr[i]).flatten() c = pd.Series(b, index=df_lr.index) c.dropna(inplace=True) df_lr[p_val_colnames[i]] = c df_lr = df_lr.drop( columns=[target_name + 'Rank', hedge_name + 'Rank', 'const'], axis=1).dropna() return df_lr
start="1-1-1926")[0] industries.head() # The first model estimated is a rolling version of the CAPM that # regresses # the excess return of Technology sector firms on the excess return of the # market. # # The window is 60 months, and so results are available after the first 60 # (`window`) # months. The first 59 (`window - 1`) estimates are all `nan` filled. endog = industries.HiTec - factors.RF.values exog = sm.add_constant(factors["Mkt-RF"]) rols = RollingOLS(endog, exog, window=60) rres = rols.fit() params = rres.params.copy() params.index = np.arange(1, params.shape[0] + 1) params.head() params.iloc[57:62] params.tail() # We next plot the market loading along with a 95% point-wise confidence # interval. # The `alpha=False` omits the constant column, if present. fig = rres.plot_recursive_coefficient(variables=["Mkt-RF"], figsize=(14, 6)) # Next, the model is expanded to include all three factors, the excess
def calibrate(self, windowOLS, copula_lookback, recalibrate_n, **kwargs): self.windowOLS = int(windowOLS) self.copula_lookback = int(copula_lookback) self.recalibrate_n = int(recalibrate_n) df = pd.DataFrame({'y':self.y,'x':self.x,'c':1}) model = RollingOLS(endog =df['y'], exog=df['x'],window=self.windowOLS) rres = model.fit() self.beta = rres.params['x'].values.reshape(-1, ) # Copula decision: df['x_log_ret']= np.log(df.x) - np.log(df.x.shift(1)) df['y_log_ret']= np.log(df.y) - np.log(df.y.shift(1)) # Convert the two returns series to two uniform values u and v using the empirical distribution functions ecdf_x, ecdf_y = ECDF(df.x_log_ret), ECDF(df.y_log_ret) u, v = [ecdf_x(a) for a in df.x_log_ret], [ecdf_y(a) for a in df.y_log_ret] # Compute the Akaike Information Criterion (AIC) for different copulas and choose copula with minimum AIC tau = stats.kendalltau(df.x_log_ret, df.y_log_ret)[0] # estimate Kendall'rank correlation AIC ={} # generate a dict with key being the copula family, value = [theta, AIC] for i in ['clayton', 'frank', 'gumbel']: param = self._parameter(i, tau) lpdf = [self._lpdf_copula(i, param, x, y) for (x, y) in zip(u, v)] # Replace nan with zero and inf with finite numbers in lpdf list lpdf = np.nan_to_num(lpdf) loglikelihood = sum(lpdf) AIC[i] = [param, -2 * loglikelihood + 2] # Choose the copula with the minimum AIC copula = min(AIC.items(), key = lambda x: x[1][1])[0] self.startIdx = copula_lookback + 1 # Because first is NAN df['MI_u_v'] = 0.5 df['MI_v_u'] = 0.5 for i in np.arange(self.startIdx , len(df)-recalibrate_n, recalibrate_n): window = range(i - copula_lookback, i) predWindow = range(i, i + recalibrate_n) x_hist = df.x_log_ret.iloc[window] y_hist = df.y_log_ret.iloc[window] x_forw = df.x_log_ret.iloc[predWindow] y_forw = df.y_log_ret.iloc[predWindow] # Estimate Kendall'rank correlation tau = stats.kendalltau(x_hist, y_hist)[0] # Estimate the copula parameter: theta theta = self._parameter(copula, tau) # Simulate the empirical distribution function for returns of selected trading pair ecdf_x, ecdf_y = ECDF(x_hist), ECDF(y_hist) # Now get future values a, b = self._misprice_index(copula, theta, ecdf_x(x_forw), ecdf_y(y_forw)) df.MI_u_v.iloc[predWindow] = a df.MI_v_u.iloc[predWindow] = b self.MI_u_v = df.MI_u_v self.MI_v_u = df.MI_v_u