def checkMovingOLS(self, window_type, x, y, weights=None, **kwds): window = np.linalg.matrix_rank(x.values) * 2 with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): moving = ols(y=y, x=x, weights=weights, window_type=window_type, window=window, **kwds) # check that sparse version is the same with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): sparse_moving = ols( y=y.to_sparse(), x=x.to_sparse(), weights=weights, window_type=window_type, window=window, **kwds ) _compare_ols_results(moving, sparse_moving) index = moving._index for n, i in enumerate(moving._valid_indices): if window_type == "rolling" and i >= window: prior_date = index[i - window + 1] else: prior_date = index[0] date = index[i] x_iter = {} for k, v in compat.iteritems(x): x_iter[k] = v.truncate(before=prior_date, after=date) y_iter = y.truncate(before=prior_date, after=date) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): static = ols(y=y_iter, x=x_iter, weights=weights, **kwds) self.compare(static, moving, event_index=i, result_index=n) _check_non_raw_results(moving)
def test_wls_panel(self): y = tm.makeTimeDataFrame() x = Panel({"x1": tm.makeTimeDataFrame(), "x2": tm.makeTimeDataFrame()}) y.ix[[1, 7], "A"] = np.nan y.ix[[6, 15], "B"] = np.nan y.ix[[3, 20], "C"] = np.nan y.ix[[5, 11], "D"] = np.nan stack_y = y.stack() stack_x = DataFrame(dict((k, v.stack()) for k, v in x.iteritems())) weights = x.std("items") stack_weights = weights.stack() stack_y.index = stack_y.index.get_tuple_index() stack_x.index = stack_x.index.get_tuple_index() stack_weights.index = stack_weights.index.get_tuple_index() result = ols(y=y, x=x, weights=1 / weights) expected = ols(y=stack_y, x=stack_x, weights=1 / stack_weights) assert_almost_equal(result.beta, expected.beta) for attr in ["resid", "y_fitted"]: rvals = getattr(result, attr).stack().values evals = getattr(expected, attr).values assert_almost_equal(rvals, evals)
def test_wls_panel(self): y = tm.makeTimeDataFrame() x = Panel({"x1": tm.makeTimeDataFrame(), "x2": tm.makeTimeDataFrame()}) y.ix[[1, 7], "A"] = np.nan y.ix[[6, 15], "B"] = np.nan y.ix[[3, 20], "C"] = np.nan y.ix[[5, 11], "D"] = np.nan stack_y = y.stack() stack_x = DataFrame(dict((k, v.stack()) for k, v in compat.iteritems(x))) weights = x.std("items") stack_weights = weights.stack() stack_y.index = stack_y.index._tuple_index stack_x.index = stack_x.index._tuple_index stack_weights.index = stack_weights.index._tuple_index with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = ols(y=y, x=x, weights=1 / weights) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): expected = ols(y=stack_y, x=stack_x, weights=1 / stack_weights) assert_almost_equal(result.beta, expected.beta) for attr in ["resid", "y_fitted"]: rvals = getattr(result, attr).stack().values evals = getattr(expected, attr).values assert_almost_equal(rvals, evals)
def checkMovingOLS(self, x, y, window_type='rolling', **kwds): window = 25 # must be larger than rank of x moving = ols(y=y, x=x, window_type=window_type, window=window, **kwds) index = moving._index for n, i in enumerate(moving._valid_indices): if window_type == 'rolling' and i >= window: prior_date = index[i - window + 1] else: prior_date = index[0] date = index[i] x_iter = {} for k, v in x.iteritems(): x_iter[k] = v.truncate(before=prior_date, after=date) y_iter = y.truncate(before=prior_date, after=date) static = ols(y=y_iter, x=x_iter, **kwds) self.compare(static, moving, event_index=i, result_index=n) _check_non_raw_results(moving)
def checkMovingOLS(self, x, y, window_type="rolling", **kwds): window = 25 # must be larger than rank of x with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): moving = ols(y=y, x=x, window_type=window_type, window=window, **kwds) index = moving._index for n, i in enumerate(moving._valid_indices): if window_type == "rolling" and i >= window: prior_date = index[i - window + 1] else: prior_date = index[0] date = index[i] x_iter = {} for k, v in compat.iteritems(x): x_iter[k] = v.truncate(before=prior_date, after=date) y_iter = y.truncate(before=prior_date, after=date) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): static = ols(y=y_iter, x=x_iter, **kwds) self.compare(static, moving, event_index=i, result_index=n) _check_non_raw_results(moving)
def checkMovingOLS(self, window_type, x, y, weights=None, **kwds): from scikits.statsmodels.tools.tools import rank window = rank(x.values) * 2 moving = ols(y=y, x=x, weights=weights, window_type=window_type, window=window, **kwds) # check that sparse version is the same sparse_moving = ols(y=y.to_sparse(), x=x.to_sparse(), weights=weights, window_type=window_type, window=window, **kwds) _compare_ols_results(moving, sparse_moving) index = moving._index for n, i in enumerate(moving._valid_indices): if window_type == 'rolling' and i >= window: prior_date = index[i - window + 1] else: prior_date = index[0] date = index[i] x_iter = {} for k, v in x.iteritems(): x_iter[k] = v.truncate(before=prior_date, after=date) y_iter = y.truncate(before=prior_date, after=date) static = ols(y=y_iter, x=x_iter, weights=weights, **kwds) self.compare(static, moving, event_index=i, result_index=n) _check_non_raw_results(moving)
def test_wls_panel(self): y = tm.makeTimeDataFrame() x = Panel({'x1' : tm.makeTimeDataFrame(), 'x2' : tm.makeTimeDataFrame()}) y.ix[[1, 7], 'A'] = np.nan y.ix[[6, 15], 'B'] = np.nan y.ix[[3, 20], 'C'] = np.nan y.ix[[5, 11], 'D'] = np.nan stack_y = y.stack() stack_x = DataFrame(dict((k, v.stack()) for k, v in x.iteritems())) weights = x.std('items') stack_weights = weights.stack() stack_y.index = stack_y.index.get_tuple_index() stack_x.index = stack_x.index.get_tuple_index() stack_weights.index = stack_weights.index.get_tuple_index() result = ols(y=y, x=x, weights=1/weights) expected = ols(y=stack_y, x=stack_x, weights=1/stack_weights) assert_almost_equal(result.beta, expected.beta) for attr in ['resid', 'y_fitted']: rvals = getattr(result, attr).stack().values evals = getattr(expected, attr).values assert_almost_equal(rvals, evals)
def checkMovingOLS(self, window_type, x, y, **kwds): try: from scikits.statsmodels.tools.tools import rank except ImportError: from scikits.statsmodels.tools import rank window = rank(x.values) * 2 moving = ols(y=y, x=x, window_type=window_type, window=window, **kwds) if isinstance(moving.y, Series): index = moving.y.index elif isinstance(moving.y, LongPanel): index = moving.y.major_axis for n, i in enumerate(moving._valid_indices): if window_type == 'rolling' and i >= window: prior_date = index[i - window + 1] else: prior_date = index[0] date = index[i] x_iter = {} for k, v in x.iteritems(): x_iter[k] = v.truncate(before=prior_date, after=date) y_iter = y.truncate(before=prior_date, after=date) static = ols(y=y_iter, x=x_iter, **kwds) self.compare(static, moving, event_index=i, result_index=n) _check_non_raw_results(moving)
def test_wls_panel(self): y = tm.makeTimeDataFrame() x = Panel({'x1': tm.makeTimeDataFrame(), 'x2': tm.makeTimeDataFrame()}) y.iloc[[1, 7], y.columns.get_loc('A')] = np.nan y.iloc[[6, 15], y.columns.get_loc('B')] = np.nan y.iloc[[3, 20], y.columns.get_loc('C')] = np.nan y.iloc[[5, 11], y.columns.get_loc('D')] = np.nan stack_y = y.stack() stack_x = DataFrame(dict((k, v.stack()) for k, v in x.iteritems())) weights = x.std('items') stack_weights = weights.stack() stack_y.index = stack_y.index._tuple_index stack_x.index = stack_x.index._tuple_index stack_weights.index = stack_weights.index._tuple_index with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = ols(y=y, x=x, weights=1 / weights) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): expected = ols(y=stack_y, x=stack_x, weights=1 / stack_weights) assert_almost_equal(result.beta, expected.beta) for attr in ['resid', 'y_fitted']: rvals = getattr(result, attr).stack().values evals = getattr(expected, attr).values assert_almost_equal(rvals, evals)
def test_plm_attrs(self): y = tm.makeTimeDataFrame() x = {"a": tm.makeTimeDataFrame(), "b": tm.makeTimeDataFrame()} rmodel = ols(y=y, x=x, window=10) model = ols(y=y, x=x) model.resid rmodel.resid
def test_auto_rolling_window_type(self): data = tm.makeTimeDataFrame() y = data.pop("A") window_model = ols(y=y, x=data, window=20, min_periods=10) rolling_model = ols(y=y, x=data, window=20, min_periods=10, window_type="rolling") assert_frame_equal(window_model.beta, rolling_model.beta)
def checkForSeries(self, x, y, series_x, series_y, **kwds): # Consistency check with simple OLS. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = ols(y=y, x=x, **kwds) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): reference = ols(y=series_y, x=series_x, **kwds) self.compare(reference, result)
def test_series_rhs(self): y = tm.makeTimeSeries() x = tm.makeTimeSeries() model = ols(y=y, x=x) expected = ols(y=y, x={"x": x}) assert_series_equal(model.beta, expected.beta) # GH 5233/5250 assert_series_equal(model.y_predict, model.predict(x=x))
def test_plm_ctor(self): y = tm.makeTimeDataFrame() x = {"a": tm.makeTimeDataFrame(), "b": tm.makeTimeDataFrame()} model = ols(y=y, x=x, intercept=False) model.summary model = ols(y=y, x=Panel(x)) model.summary
def test_plm_attrs(self): y = tm.makeTimeDataFrame() x = {"a": tm.makeTimeDataFrame(), "b": tm.makeTimeDataFrame()} with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): rmodel = ols(y=y, x=x, window=10) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): model = ols(y=y, x=x) model.resid rmodel.resid
def test_auto_rolling_window_type(self): data = tm.makeTimeDataFrame() y = data.pop("A") with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): window_model = ols(y=y, x=data, window=20, min_periods=10) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): rolling_model = ols(y=y, x=data, window=20, min_periods=10, window_type="rolling") assert_frame_equal(window_model.beta, rolling_model.beta)
def test_plm_ctor(self): y = tm.makeTimeDataFrame() x = {"a": tm.makeTimeDataFrame(), "b": tm.makeTimeDataFrame()} with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): model = ols(y=y, x=x, intercept=False) model.summary with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): model = ols(y=y, x=Panel(x)) model.summary
def test_series_rhs(self): y = tm.makeTimeSeries() x = tm.makeTimeSeries() with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): model = ols(y=y, x=x) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): expected = ols(y=y, x={'x': x}) assert_series_equal(model.beta, expected.beta) # GH 5233/5250 assert_series_equal(model.y_predict, model.predict(x=x))
def testFiltering(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = ols(y=self.panel_y2, x=self.panel_x2) x = result._x index = x.index.get_level_values(0) index = Index(sorted(set(index))) exp_index = Index([datetime(2000, 1, 1), datetime(2000, 1, 3)]) self.assertTrue (exp_index.equals(index)) index = x.index.get_level_values(1) index = Index(sorted(set(index))) exp_index = Index(["A", "B"]) self.assertTrue(exp_index.equals(index)) x = result._x_filtered index = x.index.get_level_values(0) index = Index(sorted(set(index))) exp_index = Index([datetime(2000, 1, 1), datetime(2000, 1, 3), datetime(2000, 1, 4)]) self.assertTrue(exp_index.equals(index)) assert_almost_equal(result._y.values.flat, [1, 4, 5]) exp_x = [[6, 14, 1], [9, 17, 1], [30, 48, 1]] assert_almost_equal(exp_x, result._x.values) exp_x_filtered = [[6, 14, 1], [9, 17, 1], [30, 48, 1], [11, 20, 1], [12, 21, 1]] assert_almost_equal(exp_x_filtered, result._x_filtered.values) self.assertTrue(result._x_filtered.index.levels[0].equals(result.y_fitted.index))
def test_plm_lagged_y_predict(self): y = tm.makeTimeDataFrame() x = {'a' : tm.makeTimeDataFrame(), 'b' : tm.makeTimeDataFrame()} model = ols(y=y, x=x, window=10) result = model.lagged_y_predict(2)
def test_plm_lagged_y_predict(self): y = tm.makeTimeDataFrame() x = {"a": tm.makeTimeDataFrame(), "b": tm.makeTimeDataFrame()} with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): model = ols(y=y, x=x, window=10) result = model.lagged_y_predict(2)
def test_y_predict(self): y = tm.makeTimeSeries() x = tm.makeTimeDataFrame() with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): model1 = ols(y=y, x=x) assert_series_equal(model1.y_predict, model1.y_fitted) assert_almost_equal(model1._y_predict_raw, model1._y_fitted_raw)
def ols_results(self): """ Returns the results of the regressions: x_1 ~ L(X) x_2 ~ L(X) ... x_k ~ L(X) where X = [x_1, x_2, ..., x_k] and L(X) represents the columns of X lagged 1, 2, ..., n lags (n is the user-provided number of lags). Returns ------- dict """ from pandas.stats.api import ols d = {} for i in xrange(1, 1 + self._p): for col, series in self._lagged_data[i].iteritems(): d[_make_param_name(i, col)] = series result = dict([(col, ols(y=y, x=d, intercept=self._intercept)) for col, y in self._data.iteritems()]) return result
def test_predict(self): y = tm.makeTimeSeries() x = tm.makeTimeDataFrame() with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): model1 = ols(y=y, x=x) assert_series_equal(model1.predict(), model1.y_predict) assert_series_equal(model1.predict(x=x), model1.y_predict) assert_series_equal(model1.predict(beta=model1.beta), model1.y_predict) exog = x.copy() exog['intercept'] = 1. rs = Series(np.dot(exog.values, model1.beta.values), x.index) assert_series_equal(model1.y_predict, rs) x2 = x.reindex(columns=x.columns[::-1]) assert_series_equal(model1.predict(x=x2), model1.y_predict) x3 = x2 + 10 pred3 = model1.predict(x=x3) x3['intercept'] = 1. x3 = x3.reindex(columns=model1.beta.index) expected = Series(np.dot(x3.values, model1.beta.values), x3.index) assert_series_equal(expected, pred3) beta = Series(0., model1.beta.index) pred4 = model1.predict(beta=beta) assert_series_equal(Series(0., pred4.index), pred4)
def testWithTimeEffects(self): result = ols(y=self.panel_y2, x=self.panel_x2, time_effects=True) assert_almost_equal(result._y_trans.values.flat, [0, -0.5, 0.5]) exp_x = [[0, 0], [-10.5, -15.5], [10.5, 15.5]] assert_almost_equal(result._x_trans.values, exp_x)
def checkNonPooled(self, x, y, **kwds): # For now, just check that it doesn't crash result = ols(y=y, x=x, pool=False, **kwds) _check_repr(result) for attr in NonPooledPanelOLS.ATTRIBUTES: _check_repr(getattr(result, attr))
def test_predict(self): y = tm.makeTimeSeries() x = tm.makeTimeDataFrame() model1 = ols(y=y, x=x) assert_series_equal(model1.predict(), model1.y_predict) assert_series_equal(model1.predict(x=x), model1.y_predict) assert_series_equal(model1.predict(beta=model1.beta), model1.y_predict) exog = x.copy() exog["intercept"] = 1.0 rs = Series(np.dot(exog.values, model1.beta.values), x.index) assert_series_equal(model1.y_predict, rs) x2 = x.reindex(columns=x.columns[::-1]) assert_series_equal(model1.predict(x=x2), model1.y_predict) x3 = x2 + 10 pred3 = model1.predict(x=x3) x3["intercept"] = 1.0 x3 = x3.reindex(columns=model1.beta.index) expected = Series(np.dot(x3.values, model1.beta.values), x3.index) assert_series_equal(expected, pred3) beta = Series(0.0, model1.beta.index) pred4 = model1.predict(beta=beta) assert_series_equal(Series(0.0, pred4.index), pred4)
def trend_analysis_df(self,trend_dataframe): # Define date variables date_today = datetime.date.today() date_7d_ago = date_today - datetime.timedelta(7) date_8d_ago = date_today - datetime.timedelta(8) date_14d_ago = date_today - datetime.timedelta(14) # Setting up view for regression trend_df_last_7d = trend_dataframe.ix[str(date_7d_ago):str(date_today)] trend_df_prior_7d = trend_dataframe.ix[str(date_14d_ago):str(date_8d_ago)] # Get timeseries means trend_series_last7d_mean = pd.Series(trend_df_last_7d.mean(), name='Daily Avg (Last week)') trend_series_prior7d_mean = pd.Series(trend_df_prior_7d.mean(), name='Daily Avg (Prior week)') trend_series_last30d_mean = pd.Series(trend_dataframe.mean(), name='Daily Avg (Last 30 days)') # Get Regression Coeffs trend_series_30d_regress_coeff = pd.Series(name='Regress_coeff (30d)') for i in trend_dataframe: # Conduct Regression for each event t_series = pd.Series(trend_dataframe[i],index=trend_dataframe.index).sort_index() s_series = pd.Series(t_series.values) s_reset_as_df = s_series.reset_index() s_coeff = ols(x=s_reset_as_df["index"] ,y=s_reset_as_df[0]).beta['x'] # Gets the regression coeff trend_series_30d_regress_coeff = trend_series_30d_regress_coeff.set_value(i,s_coeff) # Create Trend Analysis Dataframe trend_analysis_df = pd.concat([trend_series_last7d_mean,trend_series_prior7d_mean,trend_series_last30d_mean,trend_series_30d_regress_coeff],axis=1) trend_analysis_df.index.name = "Events" return trend_analysis_df
def cointegrate(ticker1,df1,ts1,ticker2,df2,ts2): df = pd.DataFrame(index=df1.index) column1 = '{}_{}'.format(ticker1,ts1) column2 = '{}_{}'.format(ticker2,ts2) df[column1] = df1[ts1].astype('float') df[column2] = df2[ts2].astype('float') # Plot the two time series #plot_price_series(df1, ts1, df2,ts2) # Display a scatter plot of the two time series #plot_scatter_series(df1, ts1, df2,ts2) # Calculate optimal hedge ratio "beta" res = ols(y=df[column2], x=df[column1]) print(res) #print(res.params) #res = res.fit() #print(res.summary()) beta_hr = res.beta.x print(res.beta.intercept) # Calculate the residuals of the linear combination #df = pd.DataFrame(index = df1.index) df['model']= res.beta.intercept+beta_hr*df[column1] df["res"] = df[column2] - df['model'] # Plot the residuals plot_residuals(df) # Calculate and output the CADF test on the residuals test = Test_Stationarity(df,'res') test.dickey_fuller_test() test.test_hurst_exponent()
def testWithWeights(self): data = np.arange(10).reshape((5, 2)) index = [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3), datetime(2000, 1, 4), datetime(2000, 1, 5)] cols = ['A', 'B'] weights = DataFrame(data, index=index, columns=cols) result = ols(y=self.panel_y2, x=self.panel_x2, weights=weights) assert_almost_equal(result._y_trans.values.flat, [0, 16, 25]) exp_x = [[0, 0, 0], [36, 68, 4], [150, 240, 5]] assert_almost_equal(result._x_trans.values, exp_x) exp_x_filtered = [[6, 14, 1], [9, 17, 1], [30, 48, 1], [11, 20, 1], [12, 21, 1]] # exp_x_filtered = [[0, 0, 0], # [36, 68, 4], # [150, 240, 5], # [66, 120, 6], # [84, 147, 7]] assert_almost_equal(result._x_filtered.values, exp_x_filtered)
def checkOLS(self, exog, endog, x, y): reference = sm.OLS(endog, sm.add_constant(exog, prepend=False)).fit() with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = ols(y=y, x=x) # check that sparse version is the same with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): sparse_result = ols(y=y.to_sparse(), x=x.to_sparse()) _compare_ols_results(result, sparse_result) assert_almost_equal(reference.params, result._beta_raw) assert_almost_equal(reference.df_model, result._df_model_raw) assert_almost_equal(reference.df_resid, result._df_resid_raw) assert_almost_equal(reference.fvalue, result._f_stat_raw[0]) assert_almost_equal(reference.pvalues, result._p_value_raw) assert_almost_equal(reference.rsquared, result._r2_raw) assert_almost_equal(reference.rsquared_adj, result._r2_adj_raw) assert_almost_equal(reference.resid, result._resid_raw) assert_almost_equal(reference.bse, result._std_err_raw) assert_almost_equal(reference.tvalues, result._t_stat_raw) assert_almost_equal(reference.cov_params(), result._var_beta_raw) assert_almost_equal(reference.fittedvalues, result._y_fitted_raw) _check_non_raw_results(result)
def test_f_test(self): x = tm.makeTimeDataFrame() y = x.pop('A') with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): model = ols(y=y, x=x) hyp = '1*B+1*C+1*D=0' result = model.f_test(hyp) hyp = ['1*B=0', '1*C=0', '1*D=0'] result = model.f_test(hyp) assert_almost_equal(result['f-stat'], model.f_stat['f-stat']) self.assertRaises(Exception, model.f_test, '1*A=0')
def test_r2_no_intercept(self): y = tm.makeTimeSeries() x = tm.makeTimeDataFrame() x_with = x.copy() x_with['intercept'] = 1. with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): model1 = ols(y=y, x=x) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): model2 = ols(y=y, x=x_with, intercept=False) assert_series_equal(model1.beta, model2.beta) # TODO: can we infer whether the intercept is there... self.assertNotEqual(model1.r2, model2.r2) # rolling with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): model1 = ols(y=y, x=x, window=20) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): model2 = ols(y=y, x=x_with, window=20, intercept=False) assert_frame_equal(model1.beta, model2.beta) self.assertTrue((model1.r2 != model2.r2).all())
def test_plm_exclude_dummy_corner(self): y = tm.makeTimeDataFrame() x = {'a': tm.makeTimeDataFrame(), 'b': tm.makeTimeDataFrame()} with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): model = ols( y=y, x=x, entity_effects=True, dropped_dummies={'entity': 'D'}) model.summary def f(): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): ols(y=y, x=x, entity_effects=True, dropped_dummies={'entity': 'E'}) self.assertRaises(Exception, f)
def test_plm_f_test(self): y = tm.makeTimeDataFrame() x = {'a': tm.makeTimeDataFrame(), 'b': tm.makeTimeDataFrame()} with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): model = ols(y=y, x=x) hyp = '1*a+1*b=0' result = model.f_test(hyp) hyp = ['1*a=0', '1*b=0'] result = model.f_test(hyp) assert_almost_equal(result['f-stat'], model.f_stat['f-stat'])
def testWithXEffectsAndDroppedDummies(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = ols(y=self.panel_y2, x=self.panel_x2, x_effects=['x1'], dropped_dummies={'x1': 30}) res = result._x assert_almost_equal(result._y.values.flat, [1, 4, 5]) exp_x = DataFrame([[1., 0., 14., 1.], [0, 1, 17, 1], [0, 0, 48, 1]], columns=['x1_6', 'x1_9', 'x2', 'intercept'], index=res.index, dtype=float) assert_frame_equal(res, exp_x.reindex(columns=res.columns))
def testWithEntityEffectsAndDroppedDummies(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = ols(y=self.panel_y2, x=self.panel_x2, entity_effects=True, dropped_dummies={'entity': 'B'}) # .flat is flatiter instance assert_almost_equal(result._y.values.flat, [1, 4, 5], check_dtype=False) exp_x = DataFrame([[1., 6., 14., 1.], [1, 9, 17, 1], [0, 30, 48, 1]], index=result._x.index, columns=['FE_A', 'x1', 'x2', 'intercept'], dtype=float) tm.assert_frame_equal(result._x, exp_x.loc[:, result._x.columns])
def testWithXEffects(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = ols(y=self.panel_y2, x=self.panel_x2, x_effects=['x1']) # .flat is flatiter instance assert_almost_equal(result._y.values.flat, [1, 4, 5], check_dtype=False) res = result._x exp_x = DataFrame([[0., 0., 14., 1.], [0, 1, 17, 1], [1, 0, 48, 1]], columns=['x1_30', 'x1_9', 'x2', 'intercept'], index=res.index, dtype=float) exp_x[['x1_30', 'x1_9']] = exp_x[['x1_30', 'x1_9']].astype(np.uint8) assert_frame_equal(res, exp_x.reindex(columns=res.columns))
def test_various_attributes(self): # just make sure everything "works". test correctness elsewhere x = DataFrame(np.random.randn(100, 5)) y = np.random.randn(100) model = ols(y=y, x=x, window=20) series_attrs = ['rank', 'df', 'forecast_mean', 'forecast_vol'] for attr in series_attrs: value = getattr(model, attr) self.assert_(isinstance(value, Series)) # works model._results
def test_plm_exclude_dummy_corner(self): y = tm.makeTimeDataFrame() x = {'a': tm.makeTimeDataFrame(), 'b': tm.makeTimeDataFrame()} model = ols(y=y, x=x, entity_effects=True, dropped_dummies={'entity': 'D'}) model.summary self.assertRaises(Exception, ols, y=y, x=x, entity_effects=True, dropped_dummies={'entity': 'E'})
def test_f_test(self): x = tm.makeTimeDataFrame() y = x.pop('A') model = ols(y=y, x=x) hyp = '1*B+1*C+1*D=0' result = model.f_test(hyp) hyp = ['1*B=0', '1*C=0', '1*D=0'] result = model.f_test(hyp) assert_almost_equal(result['f-stat'], model.f_stat['f-stat']) self.assertRaises(Exception, model.f_test, '1*A=0')
def test_various_attributes(self): # just make sure everything "works". test correctness elsewhere x = DataFrame(np.random.randn(100, 5)) y = np.random.randn(100) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): model = ols(y=y, x=x, window=20) series_attrs = ['rank', 'df', 'forecast_mean', 'forecast_vol'] for attr in series_attrs: value = getattr(model, attr) tm.assertIsInstance(value, Series) # works model._results
def testWithXEffectsAndConversionAndDroppedDummies(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = ols(y=self.panel_y3, x=self.panel_x3, x_effects=['x1', 'x2'], dropped_dummies={'x2': 'foo'}) # .flat is flatiter instance assert_almost_equal(result._y.values.flat, [1, 2, 3, 4], check_dtype=False) exp_x = np.array([[0, 0, 0, 0, 1], [1, 0, 1, 0, 1], [0, 1, 0, 1, 1], [0, 0, 0, 0, 1]], dtype=np.float64) assert_almost_equal(result._x.values, exp_x) exp_index = Index(['x1_B', 'x1_C', 'x2_bar', 'x2_baz', 'intercept']) self.assert_index_equal(exp_index, result._x.columns)
def runTest(self, stock1, stock2): start = self.start end = self.end first = web.DataReader(stock1, "yahoo", start, end) second = web.DataReader(stock2, "yahoo", start, end) first["Value"] = map(self.formula, first["Adj Close"].tolist()) second["Value"] = map(self.formula, second["Adj Close"].tolist()) df = pd.DataFrame(index=first.index) df[stock1] = first["Value"] df[stock2] = second["Value"] res = ols(y=df[stock2], x=df[stock1]) beta = res.beta.x R2 = res.r2 df["res"] = df[stock2] - beta * df[stock1] #Runs CADF and get results cadf = ts.adfuller(df["res"]) testStat = cadf[0] pValue = cadf[1] #Calculates Hurst Exponent hurst = self.hurst(df["res"]) results = df["res"].tolist() counter = 1 delta = [] while counter < len(results): temp = results[counter] - results[counter - 1] delta.append(temp) counter = counter + 1 results.pop() halfLife = self.half_life(delta, results) pair = Pair(stock1, stock2, beta, R2, testStat, pValue, hurst, halfLife) return pair
def checkOLS(self, exog, endog, x, y): reference = sm.OLS(endog, sm.add_constant(exog)).fit() result = ols(y=y, x=x) assert_almost_equal(reference.params, result._beta_raw) assert_almost_equal(reference.df_model, result._df_model_raw) assert_almost_equal(reference.df_resid, result._df_resid_raw) assert_almost_equal(reference.fvalue, result._f_stat_raw[0]) assert_almost_equal(reference.pvalues, result._p_value_raw) assert_almost_equal(reference.rsquared, result._r2_raw) assert_almost_equal(reference.rsquared_adj, result._r2_adj_raw) assert_almost_equal(reference.resid, result._resid_raw) assert_almost_equal(reference.bse, result._std_err_raw) assert_almost_equal(reference.t(), result._t_stat_raw) assert_almost_equal(reference.cov_params(), result._var_beta_raw) assert_almost_equal(reference.fittedvalues, result._y_fitted_raw) _check_non_raw_results(result)
def _check_wls(self, x, y, weights): result = ols(y=y, x=x, weights=1/weights) combined = x.copy() combined['__y__'] = y combined['__weights__'] = weights combined = combined.dropna() endog = combined.pop('__y__').values aweights = combined.pop('__weights__').values exog = sm.add_constant(combined.values, prepend=False) sm_result = sm.WLS(endog, exog, weights=1/aweights).fit() assert_almost_equal(sm_result.params, result._beta_raw) assert_almost_equal(sm_result.resid, result._resid_raw) self.checkMovingOLS('rolling', x, y, weights=weights) self.checkMovingOLS('expanding', x, y, weights=weights)
def test_predict_longer_exog(self): exogenous = { "1998": "4760", "1999": "5904", "2000": "4504", "2001": "9808", "2002": "4241", "2003": "4086", "2004": "4687", "2005": "7686", "2006": "3740", "2007": "3075", "2008": "3753", "2009": "4679", "2010": "5468", "2011": "7154", "2012": "4292", "2013": "4283", "2014": "4595", "2015": "9194", "2016": "4221", "2017": "4520" } endogenous = { "1998": "691", "1999": "1580", "2000": "80", "2001": "1450", "2002": "555", "2003": "956", "2004": "877", "2005": "614", "2006": "468", "2007": "191" } endog = Series(endogenous) exog = Series(exogenous) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): model = ols(y=endog, x=exog) pred = model.y_predict self.assertTrue(pred.index.equals(exog.index))
def testFiltering(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = ols(y=self.panel_y2, x=self.panel_x2) x = result._x index = x.index.get_level_values(0) index = Index(sorted(set(index))) exp_index = Index([datetime(2000, 1, 1), datetime(2000, 1, 3)]) self.assertTrue (exp_index.equals(index)) index = x.index.get_level_values(1) index = Index(sorted(set(index))) exp_index = Index(['A', 'B']) self.assertTrue(exp_index.equals(index)) x = result._x_filtered index = x.index.get_level_values(0) index = Index(sorted(set(index))) exp_index = Index([datetime(2000, 1, 1), datetime(2000, 1, 3), datetime(2000, 1, 4)]) self.assertTrue(exp_index.equals(index)) assert_almost_equal(result._y.values.flat, [1, 4, 5]) exp_x = [[6, 14, 1], [9, 17, 1], [30, 48, 1]] assert_almost_equal(exp_x, result._x.values) exp_x_filtered = [[6, 14, 1], [9, 17, 1], [30, 48, 1], [11, 20, 1], [12, 21, 1]] assert_almost_equal(exp_x_filtered, result._x_filtered.values) self.assertTrue(result._x_filtered.index.levels[0].equals( result.y_fitted.index))
def testFiltering(self): with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = ols(y=self.panel_y2, x=self.panel_x2) x = result._x index = x.index.get_level_values(0) index = Index(sorted(set(index))) exp_index = Index([datetime(2000, 1, 1), datetime(2000, 1, 3)]) self.assert_index_equal(exp_index, index) index = x.index.get_level_values(1) index = Index(sorted(set(index))) exp_index = Index(['A', 'B']) self.assert_index_equal(exp_index, index) x = result._x_filtered index = x.index.get_level_values(0) index = Index(sorted(set(index))) exp_index = Index( [datetime(2000, 1, 1), datetime(2000, 1, 3), datetime(2000, 1, 4)]) self.assert_index_equal(exp_index, index) # .flat is flatiter instance assert_almost_equal(result._y.values.flat, [1, 4, 5], check_dtype=False) exp_x = np.array([[6, 14, 1], [9, 17, 1], [30, 48, 1]], dtype=np.float64) assert_almost_equal(exp_x, result._x.values) exp_x_filtered = np.array( [[6, 14, 1], [9, 17, 1], [30, 48, 1], [11, 20, 1], [12, 21, 1]], dtype=np.float64) assert_almost_equal(exp_x_filtered, result._x_filtered.values) self.assert_index_equal(result._x_filtered.index.levels[0], result.y_fitted.index)
def cadf_test(tickdict1, tickdict2, begdate, enddate): import datetime import numpy as np import matplotlib.pyplot as plt import matplotlib.dates as mdates import pandas as pd import pprint import statsmodels.tsa.stattools as sts from pandas.stats.api import ols import tushare as ts print(begdate, enddate) ticker1 = tickdict1['code'] ticker2 = tickdict2['code'] symbol1 = tickdict1['symbo'] symbol2 = tickdict2['symbo'] print(ticker1, ticker2) df1 = ts.get_k_data(ticker1, start=begdate, end=enddate) df2 = ts.get_k_data(ticker2, start=begdate, end=enddate) df1.index = df1['date'] df2.index = df2['date'] df = pd.DataFrame(index=df1['date']) df[symbol1] = df1["close"] df[symbol2] = df2["close"] # Plot the two time series # plot_scatter_series(df, "sz50", "hs300") # Calculate optimal hedge ratio "beta" res = ols(y=df[symbol2], x=df[symbol1]) beta_hr = res.beta.x # Calculate the residuals of the linear combination df["res"] = df[symbol2] - beta_hr * df[symbol1] # Plot the residuals # plot_residuals(df) # Calculate and output the CADF test on the residuals cadf = sts.adfuller(df["res"]) pprint.pprint(cadf) return cadf
def testFiltering(self): result = ols(y=self.panel_y2, x=self.panel_x2) x = result._x index = [x.major_axis[i] for i in x.major_labels] index = Index(sorted(set(index))) exp_index = Index([datetime(2000, 1, 1), datetime(2000, 1, 3)]) self.assertTrue(exp_index.equals(index)) index = [x.minor_axis[i] for i in x.minor_labels] index = Index(sorted(set(index))) exp_index = Index(['A', 'B']) self.assertTrue(exp_index.equals(index)) x = result._x_filtered index = [x.major_axis[i] for i in x.major_labels] index = Index(sorted(set(index))) exp_index = Index([datetime(2000, 1, 1), datetime(2000, 1, 3), datetime(2000, 1, 4)]) self.assertTrue(exp_index.equals(index)) assert_almost_equal(result._y.values.flat, [1, 4, 5]) exp_x = [[6, 14, 1], [9, 17, 1], [30, 48, 1]] assert_almost_equal(exp_x, result._x.values) exp_x_filtered = [[6, 14, 1], [9, 17, 1], [30, 48, 1], [11, 20, 1], [12, 21, 1]] assert_almost_equal(exp_x_filtered, result._x_filtered.values) self.assertTrue(result._x_filtered.major_axis.equals( result.y_fitted.index))
def fill_regressed_data(S): """ Fill missing returns by linear combinations of assets without missing returns. """ S = S.copy() R = np.log(S).diff() R.iloc[0] = 0 X = R.dropna(1) for col in set(S.columns) - set(X.columns): R[col].iloc[0] = np.nan y = R[col] # fit regression res = ols(y=y, x=X, intercept=True) pred = res.predict(x=X[y.isnull()]) # get absolute prices pred = pred.cumsum() pred += np.log(S[col].dropna().iloc[0]) - pred.iloc[-1] # fill missing data S[col] = S[col].fillna(np.exp(pred)) return S
def calc_positive_negative_dates(data, pos_x_min=0.005, pos_x_max=0.01, neg_y_max=-0.005, neg_y_min=-0.01): pdata = data[( (((data.OPEN - data.PREV_CLOSE) / data.PREV_CLOSE) > pos_x_min) & (((data.OPEN - data.PREV_CLOSE) / data.PREV_CLOSE) < pos_x_max)) | ( (((data.OPEN - data.PREV_CLOSE) / data.PREV_CLOSE) < neg_y_max) & (((data.OPEN - data.PREV_CLOSE) / data.PREV_CLOSE) > neg_y_min))] #calculate prev close to open return andn open to close return and regress prev_close_open = (pdata.OPEN - pdata.PREV_CLOSE) / pdata.PREV_CLOSE open_close = (pdata.CLOSE - pdata.OPEN) / pdata.OPEN fig = plt.figure() plt.scatter(x=prev_close_open, y=open_close) fig.suptitle('Posb(%03f,%03f)andNeg(%03f,%03f)' % (pos_x_min, pos_x_max, neg_y_max, neg_y_min), fontsize=20) plt.xlabel('prev_close_open', fontsize=10) plt.ylabel('open_close', fontsize=10) plt.savefig('Posb(%03f,%03f)andNeg(%03f,%03f).jpg' % (pos_x_min, pos_x_max, neg_y_max, neg_y_min)) res = ols(y=open_close, x=prev_close_open) print res
def regression_without_ccy_nation(Currency,typ): #typ='Corp' #nation of interest reg_df=pd.read_excel(ROOT_DIR + 'cleaned data/regression data/' + typ +'/' + Currency + '_' + typ +'.xlsx',) key_ccy=list(NATION_CURRENCY_DICT.keys())[list(NATION_CURRENCY_DICT.values()).index(Currency)]; reg_df=reg_df[reg_df.Currency==key_ccy] n=len(reg_df.index); mu1=np.zeros(n) sigma1=np.zeros(n) mu2=np.zeros(n) sigma2=np.zeros(n) for i in range(n): date_obs=reg_df['Date'][i] mu1[i]=np.mean(reg_df[(reg_df['Date']>date_obs+relativedelta(months=-12)) & (reg_df['Date']<=date_obs)]['PrincipalAmount($mil)']) mu2[i]=np.mean(reg_df[(reg_df['Date']>date_obs+relativedelta(months=-24)) & (reg_df['Date']<=date_obs)]['PrincipalAmount($mil)']) sigma1[i]=np.std(reg_df[(reg_df['Date']>date_obs+relativedelta(months=-12)) & (reg_df['Date']<=date_obs)]['PrincipalAmount($mil)']) sigma2[i]=np.std(reg_df[(reg_df['Date']>date_obs+relativedelta(months=-24)) & (reg_df['Date']<=date_obs)]['PrincipalAmount($mil)']) reg_df['normal_amount_1y'] = (reg_df['PrincipalAmount($mil)'] - mu1)/sigma1 reg_df['normal_amount_2y'] = (reg_df['PrincipalAmount($mil)'] - mu2)/sigma2 cols = reg_df.columns.tolist() cols = cols[-1:] + cols[:-1] reg_df = reg_df[cols] reg_df=reg_df[reg_df['Currency']!=reg_df['Nation']] res1 = ols(y = reg_df['PrincipalAmount($mil)'], x = reg_df[['r_market','Butterfly_market','Curve_market','r_domicile','Butterfly_domicile','Curve_domicile','credit_market','credit_domicile']]) res2 = ols(y = reg_df['normal_amount_1y'], x = reg_df[['r_market','Butterfly_market','Curve_market','r_domicile','Butterfly_domicile','Curve_domicile','credit_market','credit_domicile']]) res3 = ols(y = reg_df['normal_amount_2y'], x = reg_df[['r_market','Butterfly_market','Curve_market','r_domicile','Butterfly_domicile','Curve_domicile','credit_market','credit_domicile']]) res4 = ols(y = reg_df['PrincipalAmount($mil)'], x = reg_df[['r_market','Butterfly_market','Curve_market','r_domicile','Butterfly_domicile','Curve_domicile']]) res5 = ols(y = reg_df['normal_amount_1y'], x = reg_df[['r_market','Butterfly_market','Curve_market','r_domicile','Butterfly_domicile','Curve_domicile']]) res6 = ols(y = reg_df['normal_amount_2y'], x = reg_df[['r_market','Butterfly_market','Curve_market','r_domicile','Butterfly_domicile','Curve_domicile']]) correl_matrix=reg_df[['r_market','Butterfly_market','Curve_market','r_domicile','Butterfly_domicile','Curve_domicile','credit_market','credit_domicile']].corr() return res1, res2, res3, res4, res5, res6, reg_df, correl_matrix
def filter_obs(hob_df, filter_water_table, filter_stresses): '''Applies depth and measurement variability criteria to the head observations.''' exclusion_dict = {} for iname, idf in hob_df.groupby('site_no'): idow = idf['well_depth_va'].mean() idtw = idf['lev_va'].mean() istd = idf['lev_va'].std() if ( filter_water_table == True ): # Reduce the dataframe to only those sites that are likely to measure the water table # Keep all wells shallower than minimum depth if (idtw <= min_dtw): continue # Criteria: Exclude depths to water that are deeper than the likely water table elevation if (idtw > max_dtw): # print 'Too deep: ',iname,idtw exclusion_dict[iname] = 'dtw = %i > max dtw' % (idtw) continue # Criteria: Exclude wells that are likely measuring a confined aquifer if ((idow - idtw) > (sat_thick_mult * idow) and (idow > land_surface_buffer)): # print 'Likely in a confined aquifer: ',iname,idtw exclusion_dict[iname] = 'sat thick %i > %0.2f * dow' % ( (idow - idtw), sat_thick_mult) continue if (filter_stresses == True): # Keep all wells shallower than minimum depth if (idtw <= min_dtw): continue # Criteria: Exclude wells with a potential trend over time. # Perform ordinary least squares and test both the slope # and R^2 of the best fit idf['date_delta'] = (idf['lev_dt'] - idf['lev_dt'].min()) / np.timedelta64(1, 'D') imodel = ols(y=idf['lev_va'], x=idf['date_delta']) itrend = imodel.beta[ 'x'] * 365. # The slope of the ols fit converted to length/year ir2 = imodel.r2 # The R^2 value for the ols fit if ((itrend > max_trend) and (ir2 > max_r2)): # print 'Apparent temporal trend: ',iname,itrend exclusion_dict[ iname] = 'Apparent temporal trend = %0.2f m/year > %0.2f & R^2 = %0.2f > %0.2f' % ( itrend, max_trend, ir2, max_r2) continue if (istd > (std_mult * idtw)): # print 'Excessive measurement variability: ',iname,istd exclusion_dict[ iname] = 'Measurement std = %4.2f > %0.2f * dtw' % ( istd, std_mult) continue return exclusion_dict
def checkForSeries(self, x, y, series_x, series_y, **kwds): # Consistency check with simple OLS. result = ols(y=y, x=x, **kwds) reference = ols(y=series_y, x=series_x, **kwds) self.compare(reference, result)
if __name__ == "__main__": start = datetime.datetime(2012, 1, 1) end = datetime.datetime(2013, 1, 1) arex = web.DataReader("AREX", "yahoo", start, end) wll = web.DataReader("WLL", "yahoo", start, end) df = pd.DataFrame(index=arex.index) df["AREX"] = arex["Adj Close"] df["WLL"] = wll["Adj Close"] # Plot the two time series plot_price_series(df, "AREX", "WLL") # Display a scatter plot of the two time series plot_scatter_series(df, "AREX", "WLL") # Calculate optimal hedge ratio "beta" res = ols(y=df['WLL'], x=df["AREX"]) beta_hr = res.beta.x # Calculate the residuals of the linear combination df["res"] = df["WLL"] - beta_hr * df["AREX"] # Plot the residuals plot_residuals(df) # Calculate and output the CADF test on the residuals cadf = ts.adfuller(df["res"]) pprint.pprint(cadf)
def test_summary_many_terms(self): x = DataFrame(np.random.randn(100, 20)) y = np.random.randn(100) model = ols(y=y, x=x) model.summary
def test_y_predict(self): y = tm.makeTimeSeries() x = tm.makeTimeDataFrame() model1 = ols(y=y, x=x) assert_series_equal(model1.y_predict, model1.y_fitted) assert_almost_equal(model1._y_predict_raw, model1._y_fitted_raw)
def test_series_rhs(self): y = tm.makeTimeSeries() x = tm.makeTimeSeries() model = ols(y=y, x=x) expected = ols(y=y, x={'x' : x}) assert_series_equal(model.beta, expected.beta)