Пример #1
0
    def checkMovingOLS(self, window_type, x, y, weights=None, **kwds):
        window = np.linalg.matrix_rank(x.values) * 2

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            moving = ols(y=y, x=x, weights=weights, window_type=window_type, window=window, **kwds)

        # check that sparse version is the same
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            sparse_moving = ols(
                y=y.to_sparse(), x=x.to_sparse(), weights=weights, window_type=window_type, window=window, **kwds
            )
        _compare_ols_results(moving, sparse_moving)

        index = moving._index

        for n, i in enumerate(moving._valid_indices):
            if window_type == "rolling" and i >= window:
                prior_date = index[i - window + 1]
            else:
                prior_date = index[0]

            date = index[i]

            x_iter = {}
            for k, v in compat.iteritems(x):
                x_iter[k] = v.truncate(before=prior_date, after=date)
            y_iter = y.truncate(before=prior_date, after=date)

            with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
                static = ols(y=y_iter, x=x_iter, weights=weights, **kwds)

            self.compare(static, moving, event_index=i, result_index=n)

        _check_non_raw_results(moving)
Пример #2
0
    def test_wls_panel(self):
        y = tm.makeTimeDataFrame()
        x = Panel({"x1": tm.makeTimeDataFrame(), "x2": tm.makeTimeDataFrame()})

        y.ix[[1, 7], "A"] = np.nan
        y.ix[[6, 15], "B"] = np.nan
        y.ix[[3, 20], "C"] = np.nan
        y.ix[[5, 11], "D"] = np.nan

        stack_y = y.stack()
        stack_x = DataFrame(dict((k, v.stack()) for k, v in x.iteritems()))

        weights = x.std("items")
        stack_weights = weights.stack()

        stack_y.index = stack_y.index.get_tuple_index()
        stack_x.index = stack_x.index.get_tuple_index()
        stack_weights.index = stack_weights.index.get_tuple_index()

        result = ols(y=y, x=x, weights=1 / weights)
        expected = ols(y=stack_y, x=stack_x, weights=1 / stack_weights)

        assert_almost_equal(result.beta, expected.beta)

        for attr in ["resid", "y_fitted"]:
            rvals = getattr(result, attr).stack().values
            evals = getattr(expected, attr).values
            assert_almost_equal(rvals, evals)
Пример #3
0
    def test_wls_panel(self):
        y = tm.makeTimeDataFrame()
        x = Panel({"x1": tm.makeTimeDataFrame(), "x2": tm.makeTimeDataFrame()})

        y.ix[[1, 7], "A"] = np.nan
        y.ix[[6, 15], "B"] = np.nan
        y.ix[[3, 20], "C"] = np.nan
        y.ix[[5, 11], "D"] = np.nan

        stack_y = y.stack()
        stack_x = DataFrame(dict((k, v.stack()) for k, v in compat.iteritems(x)))

        weights = x.std("items")
        stack_weights = weights.stack()

        stack_y.index = stack_y.index._tuple_index
        stack_x.index = stack_x.index._tuple_index
        stack_weights.index = stack_weights.index._tuple_index

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = ols(y=y, x=x, weights=1 / weights)
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            expected = ols(y=stack_y, x=stack_x, weights=1 / stack_weights)

        assert_almost_equal(result.beta, expected.beta)

        for attr in ["resid", "y_fitted"]:
            rvals = getattr(result, attr).stack().values
            evals = getattr(expected, attr).values
            assert_almost_equal(rvals, evals)
Пример #4
0
    def checkMovingOLS(self, x, y, window_type='rolling', **kwds):
        window = 25  # must be larger than rank of x

        moving = ols(y=y, x=x, window_type=window_type,
                     window=window, **kwds)

        index = moving._index

        for n, i in enumerate(moving._valid_indices):
            if window_type == 'rolling' and i >= window:
                prior_date = index[i - window + 1]
            else:
                prior_date = index[0]

            date = index[i]

            x_iter = {}
            for k, v in x.iteritems():
                x_iter[k] = v.truncate(before=prior_date, after=date)
            y_iter = y.truncate(before=prior_date, after=date)

            static = ols(y=y_iter, x=x_iter, **kwds)

            self.compare(static, moving, event_index=i,
                         result_index=n)

        _check_non_raw_results(moving)
Пример #5
0
    def checkMovingOLS(self, x, y, window_type="rolling", **kwds):
        window = 25  # must be larger than rank of x

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            moving = ols(y=y, x=x, window_type=window_type, window=window, **kwds)

        index = moving._index

        for n, i in enumerate(moving._valid_indices):
            if window_type == "rolling" and i >= window:
                prior_date = index[i - window + 1]
            else:
                prior_date = index[0]

            date = index[i]

            x_iter = {}
            for k, v in compat.iteritems(x):
                x_iter[k] = v.truncate(before=prior_date, after=date)
            y_iter = y.truncate(before=prior_date, after=date)

            with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
                static = ols(y=y_iter, x=x_iter, **kwds)

            self.compare(static, moving, event_index=i, result_index=n)

        _check_non_raw_results(moving)
Пример #6
0
    def checkMovingOLS(self, window_type, x, y, weights=None, **kwds):
        from scikits.statsmodels.tools.tools import rank
        window = rank(x.values) * 2

        moving = ols(y=y, x=x, weights=weights, window_type=window_type,
                     window=window, **kwds)

        # check that sparse version is the same
        sparse_moving = ols(y=y.to_sparse(), x=x.to_sparse(),
                            weights=weights,
                            window_type=window_type,
                            window=window, **kwds)
        _compare_ols_results(moving, sparse_moving)

        index = moving._index

        for n, i in enumerate(moving._valid_indices):
            if window_type == 'rolling' and i >= window:
                prior_date = index[i - window + 1]
            else:
                prior_date = index[0]

            date = index[i]

            x_iter = {}
            for k, v in x.iteritems():
                x_iter[k] = v.truncate(before=prior_date, after=date)
            y_iter = y.truncate(before=prior_date, after=date)

            static = ols(y=y_iter, x=x_iter, weights=weights, **kwds)

            self.compare(static, moving, event_index=i,
                         result_index=n)

        _check_non_raw_results(moving)
Пример #7
0
    def test_wls_panel(self):
        y = tm.makeTimeDataFrame()
        x = Panel({'x1' : tm.makeTimeDataFrame(),
                   'x2' : tm.makeTimeDataFrame()})

        y.ix[[1, 7], 'A'] = np.nan
        y.ix[[6, 15], 'B'] = np.nan
        y.ix[[3, 20], 'C'] = np.nan
        y.ix[[5, 11], 'D'] = np.nan

        stack_y = y.stack()
        stack_x = DataFrame(dict((k, v.stack())
                                  for k, v in x.iteritems()))

        weights = x.std('items')
        stack_weights = weights.stack()

        stack_y.index = stack_y.index.get_tuple_index()
        stack_x.index = stack_x.index.get_tuple_index()
        stack_weights.index = stack_weights.index.get_tuple_index()

        result = ols(y=y, x=x, weights=1/weights)
        expected = ols(y=stack_y, x=stack_x, weights=1/stack_weights)

        assert_almost_equal(result.beta, expected.beta)

        for attr in ['resid', 'y_fitted']:
            rvals = getattr(result, attr).stack().values
            evals = getattr(expected, attr).values
            assert_almost_equal(rvals, evals)
Пример #8
0
    def checkMovingOLS(self, window_type, x, y, **kwds):
        try:
            from scikits.statsmodels.tools.tools import rank
        except ImportError:
            from scikits.statsmodels.tools import rank

        window = rank(x.values) * 2

        moving = ols(y=y, x=x, window_type=window_type,
                     window=window, **kwds)

        if isinstance(moving.y, Series):
            index = moving.y.index
        elif isinstance(moving.y, LongPanel):
            index = moving.y.major_axis

        for n, i in enumerate(moving._valid_indices):
            if window_type == 'rolling' and i >= window:
                prior_date = index[i - window + 1]
            else:
                prior_date = index[0]

            date = index[i]

            x_iter = {}
            for k, v in x.iteritems():
                x_iter[k] = v.truncate(before=prior_date, after=date)
            y_iter = y.truncate(before=prior_date, after=date)

            static = ols(y=y_iter, x=x_iter, **kwds)

            self.compare(static, moving, event_index=i,
                         result_index=n)

        _check_non_raw_results(moving)
Пример #9
0
    def test_wls_panel(self):
        y = tm.makeTimeDataFrame()
        x = Panel({'x1': tm.makeTimeDataFrame(),
                   'x2': tm.makeTimeDataFrame()})

        y.iloc[[1, 7], y.columns.get_loc('A')] = np.nan
        y.iloc[[6, 15], y.columns.get_loc('B')] = np.nan
        y.iloc[[3, 20], y.columns.get_loc('C')] = np.nan
        y.iloc[[5, 11], y.columns.get_loc('D')] = np.nan

        stack_y = y.stack()
        stack_x = DataFrame(dict((k, v.stack())
                                 for k, v in x.iteritems()))

        weights = x.std('items')
        stack_weights = weights.stack()

        stack_y.index = stack_y.index._tuple_index
        stack_x.index = stack_x.index._tuple_index
        stack_weights.index = stack_weights.index._tuple_index

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = ols(y=y, x=x, weights=1 / weights)
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            expected = ols(y=stack_y, x=stack_x, weights=1 / stack_weights)

        assert_almost_equal(result.beta, expected.beta)

        for attr in ['resid', 'y_fitted']:
            rvals = getattr(result, attr).stack().values
            evals = getattr(expected, attr).values
            assert_almost_equal(rvals, evals)
Пример #10
0
    def test_plm_attrs(self):
        y = tm.makeTimeDataFrame()
        x = {"a": tm.makeTimeDataFrame(), "b": tm.makeTimeDataFrame()}

        rmodel = ols(y=y, x=x, window=10)
        model = ols(y=y, x=x)
        model.resid
        rmodel.resid
Пример #11
0
    def test_auto_rolling_window_type(self):
        data = tm.makeTimeDataFrame()
        y = data.pop("A")

        window_model = ols(y=y, x=data, window=20, min_periods=10)
        rolling_model = ols(y=y, x=data, window=20, min_periods=10, window_type="rolling")

        assert_frame_equal(window_model.beta, rolling_model.beta)
Пример #12
0
    def checkForSeries(self, x, y, series_x, series_y, **kwds):
        # Consistency check with simple OLS.
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = ols(y=y, x=x, **kwds)
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            reference = ols(y=series_y, x=series_x, **kwds)

        self.compare(reference, result)
Пример #13
0
    def test_series_rhs(self):
        y = tm.makeTimeSeries()
        x = tm.makeTimeSeries()
        model = ols(y=y, x=x)
        expected = ols(y=y, x={"x": x})
        assert_series_equal(model.beta, expected.beta)

        # GH 5233/5250
        assert_series_equal(model.y_predict, model.predict(x=x))
Пример #14
0
    def test_plm_ctor(self):
        y = tm.makeTimeDataFrame()
        x = {"a": tm.makeTimeDataFrame(), "b": tm.makeTimeDataFrame()}

        model = ols(y=y, x=x, intercept=False)
        model.summary

        model = ols(y=y, x=Panel(x))
        model.summary
Пример #15
0
    def test_plm_attrs(self):
        y = tm.makeTimeDataFrame()
        x = {"a": tm.makeTimeDataFrame(), "b": tm.makeTimeDataFrame()}

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            rmodel = ols(y=y, x=x, window=10)
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            model = ols(y=y, x=x)
        model.resid
        rmodel.resid
Пример #16
0
    def test_auto_rolling_window_type(self):
        data = tm.makeTimeDataFrame()
        y = data.pop("A")

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            window_model = ols(y=y, x=data, window=20, min_periods=10)
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            rolling_model = ols(y=y, x=data, window=20, min_periods=10, window_type="rolling")

        assert_frame_equal(window_model.beta, rolling_model.beta)
Пример #17
0
    def test_plm_ctor(self):
        y = tm.makeTimeDataFrame()
        x = {"a": tm.makeTimeDataFrame(), "b": tm.makeTimeDataFrame()}

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            model = ols(y=y, x=x, intercept=False)
        model.summary

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            model = ols(y=y, x=Panel(x))
        model.summary
Пример #18
0
    def test_series_rhs(self):
        y = tm.makeTimeSeries()
        x = tm.makeTimeSeries()
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            model = ols(y=y, x=x)
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            expected = ols(y=y, x={'x': x})
        assert_series_equal(model.beta, expected.beta)

        # GH 5233/5250
        assert_series_equal(model.y_predict, model.predict(x=x))
Пример #19
0
    def testFiltering(self):
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = ols(y=self.panel_y2, x=self.panel_x2)

        x = result._x
        index = x.index.get_level_values(0)
        index = Index(sorted(set(index)))
        exp_index = Index([datetime(2000, 1, 1), datetime(2000, 1, 3)])
        self.assertTrue
        (exp_index.equals(index))

        index = x.index.get_level_values(1)
        index = Index(sorted(set(index)))
        exp_index = Index(["A", "B"])
        self.assertTrue(exp_index.equals(index))

        x = result._x_filtered
        index = x.index.get_level_values(0)
        index = Index(sorted(set(index)))
        exp_index = Index([datetime(2000, 1, 1), datetime(2000, 1, 3), datetime(2000, 1, 4)])
        self.assertTrue(exp_index.equals(index))

        assert_almost_equal(result._y.values.flat, [1, 4, 5])

        exp_x = [[6, 14, 1], [9, 17, 1], [30, 48, 1]]
        assert_almost_equal(exp_x, result._x.values)

        exp_x_filtered = [[6, 14, 1], [9, 17, 1], [30, 48, 1], [11, 20, 1], [12, 21, 1]]
        assert_almost_equal(exp_x_filtered, result._x_filtered.values)

        self.assertTrue(result._x_filtered.index.levels[0].equals(result.y_fitted.index))
Пример #20
0
    def test_plm_lagged_y_predict(self):
        y = tm.makeTimeDataFrame()
        x = {'a' : tm.makeTimeDataFrame(),
             'b' : tm.makeTimeDataFrame()}

        model = ols(y=y, x=x, window=10)
        result = model.lagged_y_predict(2)
Пример #21
0
    def test_plm_lagged_y_predict(self):
        y = tm.makeTimeDataFrame()
        x = {"a": tm.makeTimeDataFrame(), "b": tm.makeTimeDataFrame()}

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            model = ols(y=y, x=x, window=10)
        result = model.lagged_y_predict(2)
Пример #22
0
 def test_y_predict(self):
     y = tm.makeTimeSeries()
     x = tm.makeTimeDataFrame()
     with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
         model1 = ols(y=y, x=x)
     assert_series_equal(model1.y_predict, model1.y_fitted)
     assert_almost_equal(model1._y_predict_raw, model1._y_fitted_raw)
Пример #23
0
    def ols_results(self):
        """
        Returns the results of the regressions:
        x_1 ~ L(X)
        x_2 ~ L(X)
        ...
        x_k ~ L(X)

        where X = [x_1, x_2, ..., x_k]
        and L(X) represents the columns of X lagged 1, 2, ..., n lags
        (n is the user-provided number of lags).

        Returns
        -------
        dict
        """
        from pandas.stats.api import ols

        d = {}
        for i in xrange(1, 1 + self._p):
            for col, series in self._lagged_data[i].iteritems():
                d[_make_param_name(i, col)] = series

        result = dict([(col, ols(y=y, x=d, intercept=self._intercept))
                       for col, y in self._data.iteritems()])

        return result
Пример #24
0
    def test_predict(self):
        y = tm.makeTimeSeries()
        x = tm.makeTimeDataFrame()
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            model1 = ols(y=y, x=x)
        assert_series_equal(model1.predict(), model1.y_predict)
        assert_series_equal(model1.predict(x=x), model1.y_predict)
        assert_series_equal(model1.predict(beta=model1.beta), model1.y_predict)

        exog = x.copy()
        exog['intercept'] = 1.
        rs = Series(np.dot(exog.values, model1.beta.values), x.index)
        assert_series_equal(model1.y_predict, rs)

        x2 = x.reindex(columns=x.columns[::-1])
        assert_series_equal(model1.predict(x=x2), model1.y_predict)

        x3 = x2 + 10
        pred3 = model1.predict(x=x3)
        x3['intercept'] = 1.
        x3 = x3.reindex(columns=model1.beta.index)
        expected = Series(np.dot(x3.values, model1.beta.values), x3.index)
        assert_series_equal(expected, pred3)

        beta = Series(0., model1.beta.index)
        pred4 = model1.predict(beta=beta)
        assert_series_equal(Series(0., pred4.index), pred4)
Пример #25
0
    def testWithTimeEffects(self):
        result = ols(y=self.panel_y2, x=self.panel_x2, time_effects=True)

        assert_almost_equal(result._y_trans.values.flat, [0, -0.5, 0.5])

        exp_x = [[0, 0], [-10.5, -15.5], [10.5, 15.5]]
        assert_almost_equal(result._x_trans.values, exp_x)
Пример #26
0
    def checkNonPooled(self, x, y, **kwds):
        # For now, just check that it doesn't crash
        result = ols(y=y, x=x, pool=False, **kwds)

        _check_repr(result)
        for attr in NonPooledPanelOLS.ATTRIBUTES:
            _check_repr(getattr(result, attr))
Пример #27
0
    def test_predict(self):
        y = tm.makeTimeSeries()
        x = tm.makeTimeDataFrame()
        model1 = ols(y=y, x=x)
        assert_series_equal(model1.predict(), model1.y_predict)
        assert_series_equal(model1.predict(x=x), model1.y_predict)
        assert_series_equal(model1.predict(beta=model1.beta), model1.y_predict)

        exog = x.copy()
        exog["intercept"] = 1.0
        rs = Series(np.dot(exog.values, model1.beta.values), x.index)
        assert_series_equal(model1.y_predict, rs)

        x2 = x.reindex(columns=x.columns[::-1])
        assert_series_equal(model1.predict(x=x2), model1.y_predict)

        x3 = x2 + 10
        pred3 = model1.predict(x=x3)
        x3["intercept"] = 1.0
        x3 = x3.reindex(columns=model1.beta.index)
        expected = Series(np.dot(x3.values, model1.beta.values), x3.index)
        assert_series_equal(expected, pred3)

        beta = Series(0.0, model1.beta.index)
        pred4 = model1.predict(beta=beta)
        assert_series_equal(Series(0.0, pred4.index), pred4)
 def trend_analysis_df(self,trend_dataframe):
   # Define date variables
   date_today = datetime.date.today()
   date_7d_ago = date_today - datetime.timedelta(7)
   date_8d_ago = date_today - datetime.timedelta(8)
   date_14d_ago = date_today - datetime.timedelta(14)
   # Setting up view for regression
   trend_df_last_7d = trend_dataframe.ix[str(date_7d_ago):str(date_today)]
   trend_df_prior_7d = trend_dataframe.ix[str(date_14d_ago):str(date_8d_ago)]
   # Get timeseries means
   trend_series_last7d_mean = pd.Series(trend_df_last_7d.mean(), name='Daily Avg (Last week)')
   trend_series_prior7d_mean = pd.Series(trend_df_prior_7d.mean(), name='Daily Avg (Prior week)')
   trend_series_last30d_mean = pd.Series(trend_dataframe.mean(), name='Daily Avg (Last 30 days)')
   # Get Regression Coeffs
   trend_series_30d_regress_coeff = pd.Series(name='Regress_coeff (30d)')
   for i in trend_dataframe:
     # Conduct Regression for each event
     t_series = pd.Series(trend_dataframe[i],index=trend_dataframe.index).sort_index()
     s_series = pd.Series(t_series.values)
     s_reset_as_df = s_series.reset_index()
     s_coeff = ols(x=s_reset_as_df["index"] ,y=s_reset_as_df[0]).beta['x'] # Gets the regression coeff
     trend_series_30d_regress_coeff = trend_series_30d_regress_coeff.set_value(i,s_coeff)
   # Create Trend Analysis Dataframe
   trend_analysis_df = pd.concat([trend_series_last7d_mean,trend_series_prior7d_mean,trend_series_last30d_mean,trend_series_30d_regress_coeff],axis=1)
   trend_analysis_df.index.name = "Events"
   return trend_analysis_df
Пример #29
0
def cointegrate(ticker1,df1,ts1,ticker2,df2,ts2):
	
	df = pd.DataFrame(index=df1.index) 
	column1 = '{}_{}'.format(ticker1,ts1)
	column2 = '{}_{}'.format(ticker2,ts2)
	
	df[column1] = df1[ts1].astype('float') 
	df[column2] = df2[ts2].astype('float')
	
	# Plot the two time series 
	#plot_price_series(df1, ts1, df2,ts2)

	# Display a scatter plot of the two time series 
	#plot_scatter_series(df1, ts1, df2,ts2)
	# Calculate optimal hedge ratio "beta" 
	res = ols(y=df[column2], x=df[column1]) 
	print(res)
	#print(res.params)
	#res = res.fit()
	#print(res.summary())
	beta_hr = res.beta.x
	print(res.beta.intercept)
	# Calculate the residuals of the linear combination 
	#df = pd.DataFrame(index = df1.index)
	df['model']= res.beta.intercept+beta_hr*df[column1]
	df["res"] = df[column2] - df['model']

	# Plot the residuals 
	plot_residuals(df)
	
	# Calculate and output the CADF test on the residuals 
	test = Test_Stationarity(df,'res')
	test.dickey_fuller_test()
	test.test_hurst_exponent()
Пример #30
0
    def testWithWeights(self):
        data = np.arange(10).reshape((5, 2))
        index = [datetime(2000, 1, 1),
                 datetime(2000, 1, 2),
                 datetime(2000, 1, 3),
                 datetime(2000, 1, 4),
                 datetime(2000, 1, 5)]
        cols = ['A', 'B']
        weights = DataFrame(data, index=index, columns=cols)

        result = ols(y=self.panel_y2, x=self.panel_x2, weights=weights)

        assert_almost_equal(result._y_trans.values.flat, [0, 16, 25])

        exp_x = [[0, 0, 0],
                 [36, 68, 4],
                 [150, 240, 5]]
        assert_almost_equal(result._x_trans.values, exp_x)


        exp_x_filtered = [[6, 14, 1],
                          [9, 17, 1],
                          [30, 48, 1],
                          [11, 20, 1],
                          [12, 21, 1]]
#         exp_x_filtered = [[0, 0, 0],
#                           [36, 68, 4],
#                           [150, 240, 5],
#                           [66, 120, 6],
#                           [84, 147, 7]]

        assert_almost_equal(result._x_filtered.values, exp_x_filtered)
Пример #31
0
    def checkOLS(self, exog, endog, x, y):
        reference = sm.OLS(endog, sm.add_constant(exog, prepend=False)).fit()
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = ols(y=y, x=x)

        # check that sparse version is the same
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            sparse_result = ols(y=y.to_sparse(), x=x.to_sparse())
        _compare_ols_results(result, sparse_result)

        assert_almost_equal(reference.params, result._beta_raw)
        assert_almost_equal(reference.df_model, result._df_model_raw)
        assert_almost_equal(reference.df_resid, result._df_resid_raw)
        assert_almost_equal(reference.fvalue, result._f_stat_raw[0])
        assert_almost_equal(reference.pvalues, result._p_value_raw)
        assert_almost_equal(reference.rsquared, result._r2_raw)
        assert_almost_equal(reference.rsquared_adj, result._r2_adj_raw)
        assert_almost_equal(reference.resid, result._resid_raw)
        assert_almost_equal(reference.bse, result._std_err_raw)
        assert_almost_equal(reference.tvalues, result._t_stat_raw)
        assert_almost_equal(reference.cov_params(), result._var_beta_raw)
        assert_almost_equal(reference.fittedvalues, result._y_fitted_raw)

        _check_non_raw_results(result)
Пример #32
0
    def test_f_test(self):
        x = tm.makeTimeDataFrame()
        y = x.pop('A')

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            model = ols(y=y, x=x)

        hyp = '1*B+1*C+1*D=0'
        result = model.f_test(hyp)

        hyp = ['1*B=0', '1*C=0', '1*D=0']
        result = model.f_test(hyp)
        assert_almost_equal(result['f-stat'], model.f_stat['f-stat'])

        self.assertRaises(Exception, model.f_test, '1*A=0')
Пример #33
0
    def test_r2_no_intercept(self):
        y = tm.makeTimeSeries()
        x = tm.makeTimeDataFrame()

        x_with = x.copy()
        x_with['intercept'] = 1.

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            model1 = ols(y=y, x=x)
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            model2 = ols(y=y, x=x_with, intercept=False)
        assert_series_equal(model1.beta, model2.beta)

        # TODO: can we infer whether the intercept is there...
        self.assertNotEqual(model1.r2, model2.r2)

        # rolling

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            model1 = ols(y=y, x=x, window=20)
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            model2 = ols(y=y, x=x_with, window=20, intercept=False)
        assert_frame_equal(model1.beta, model2.beta)
        self.assertTrue((model1.r2 != model2.r2).all())
Пример #34
0
    def test_plm_exclude_dummy_corner(self):
        y = tm.makeTimeDataFrame()
        x = {'a': tm.makeTimeDataFrame(),
             'b': tm.makeTimeDataFrame()}

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            model = ols(
                y=y, x=x, entity_effects=True, dropped_dummies={'entity': 'D'})
        model.summary

        def f():
            with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
                ols(y=y, x=x, entity_effects=True,
                    dropped_dummies={'entity': 'E'})
        self.assertRaises(Exception, f)
Пример #35
0
    def test_plm_f_test(self):
        y = tm.makeTimeDataFrame()
        x = {'a': tm.makeTimeDataFrame(),
             'b': tm.makeTimeDataFrame()}

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            model = ols(y=y, x=x)

        hyp = '1*a+1*b=0'
        result = model.f_test(hyp)

        hyp = ['1*a=0',
               '1*b=0']
        result = model.f_test(hyp)
        assert_almost_equal(result['f-stat'], model.f_stat['f-stat'])
Пример #36
0
    def testWithXEffectsAndDroppedDummies(self):
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = ols(y=self.panel_y2,
                         x=self.panel_x2,
                         x_effects=['x1'],
                         dropped_dummies={'x1': 30})

        res = result._x
        assert_almost_equal(result._y.values.flat, [1, 4, 5])
        exp_x = DataFrame([[1., 0., 14., 1.], [0, 1, 17, 1], [0, 0, 48, 1]],
                          columns=['x1_6', 'x1_9', 'x2', 'intercept'],
                          index=res.index,
                          dtype=float)

        assert_frame_equal(res, exp_x.reindex(columns=res.columns))
Пример #37
0
    def testWithEntityEffectsAndDroppedDummies(self):
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = ols(y=self.panel_y2,
                         x=self.panel_x2,
                         entity_effects=True,
                         dropped_dummies={'entity': 'B'})

        # .flat is flatiter instance
        assert_almost_equal(result._y.values.flat, [1, 4, 5],
                            check_dtype=False)
        exp_x = DataFrame([[1., 6., 14., 1.], [1, 9, 17, 1], [0, 30, 48, 1]],
                          index=result._x.index,
                          columns=['FE_A', 'x1', 'x2', 'intercept'],
                          dtype=float)
        tm.assert_frame_equal(result._x, exp_x.loc[:, result._x.columns])
Пример #38
0
    def testWithXEffects(self):
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = ols(y=self.panel_y2, x=self.panel_x2, x_effects=['x1'])

        # .flat is flatiter instance
        assert_almost_equal(result._y.values.flat, [1, 4, 5],
                            check_dtype=False)

        res = result._x
        exp_x = DataFrame([[0., 0., 14., 1.], [0, 1, 17, 1], [1, 0, 48, 1]],
                          columns=['x1_30', 'x1_9', 'x2', 'intercept'],
                          index=res.index,
                          dtype=float)
        exp_x[['x1_30', 'x1_9']] = exp_x[['x1_30', 'x1_9']].astype(np.uint8)
        assert_frame_equal(res, exp_x.reindex(columns=res.columns))
Пример #39
0
    def test_various_attributes(self):
        # just make sure everything "works". test correctness elsewhere

        x = DataFrame(np.random.randn(100, 5))
        y = np.random.randn(100)
        model = ols(y=y, x=x, window=20)

        series_attrs = ['rank', 'df', 'forecast_mean', 'forecast_vol']

        for attr in series_attrs:
            value = getattr(model, attr)
            self.assert_(isinstance(value, Series))

        # works
        model._results
Пример #40
0
    def test_plm_exclude_dummy_corner(self):
        y = tm.makeTimeDataFrame()
        x = {'a': tm.makeTimeDataFrame(), 'b': tm.makeTimeDataFrame()}

        model = ols(y=y,
                    x=x,
                    entity_effects=True,
                    dropped_dummies={'entity': 'D'})
        model.summary

        self.assertRaises(Exception,
                          ols,
                          y=y,
                          x=x,
                          entity_effects=True,
                          dropped_dummies={'entity': 'E'})
Пример #41
0
    def test_f_test(self):
        x = tm.makeTimeDataFrame()
        y = x.pop('A')

        model = ols(y=y, x=x)

        hyp = '1*B+1*C+1*D=0'
        result = model.f_test(hyp)

        hyp = ['1*B=0',
               '1*C=0',
               '1*D=0']
        result = model.f_test(hyp)
        assert_almost_equal(result['f-stat'], model.f_stat['f-stat'])

        self.assertRaises(Exception, model.f_test, '1*A=0')
Пример #42
0
    def test_various_attributes(self):
        # just make sure everything "works". test correctness elsewhere

        x = DataFrame(np.random.randn(100, 5))
        y = np.random.randn(100)
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            model = ols(y=y, x=x, window=20)

        series_attrs = ['rank', 'df', 'forecast_mean', 'forecast_vol']

        for attr in series_attrs:
            value = getattr(model, attr)
            tm.assertIsInstance(value, Series)

        # works
        model._results
Пример #43
0
    def testWithXEffectsAndConversionAndDroppedDummies(self):
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = ols(y=self.panel_y3,
                         x=self.panel_x3,
                         x_effects=['x1', 'x2'],
                         dropped_dummies={'x2': 'foo'})
        # .flat is flatiter instance
        assert_almost_equal(result._y.values.flat, [1, 2, 3, 4],
                            check_dtype=False)
        exp_x = np.array([[0, 0, 0, 0, 1], [1, 0, 1, 0, 1], [0, 1, 0, 1, 1],
                          [0, 0, 0, 0, 1]],
                         dtype=np.float64)
        assert_almost_equal(result._x.values, exp_x)

        exp_index = Index(['x1_B', 'x1_C', 'x2_bar', 'x2_baz', 'intercept'])
        self.assert_index_equal(exp_index, result._x.columns)
Пример #44
0
    def runTest(self, stock1, stock2):

        start = self.start
        end = self.end

        first = web.DataReader(stock1, "yahoo", start, end)
        second = web.DataReader(stock2, "yahoo", start, end)

        first["Value"] = map(self.formula, first["Adj Close"].tolist())
        second["Value"] = map(self.formula, second["Adj Close"].tolist())

        df = pd.DataFrame(index=first.index)
        df[stock1] = first["Value"]
        df[stock2] = second["Value"]

        res = ols(y=df[stock2], x=df[stock1])

        beta = res.beta.x
        R2 = res.r2

        df["res"] = df[stock2] - beta * df[stock1]

        #Runs CADF and get results
        cadf = ts.adfuller(df["res"])

        testStat = cadf[0]
        pValue = cadf[1]

        #Calculates Hurst Exponent
        hurst = self.hurst(df["res"])

        results = df["res"].tolist()
        counter = 1
        delta = []
        while counter < len(results):
            temp = results[counter] - results[counter - 1]
            delta.append(temp)
            counter = counter + 1
        results.pop()

        halfLife = self.half_life(delta, results)

        pair = Pair(stock1, stock2, beta, R2, testStat, pValue, hurst,
                    halfLife)

        return pair
Пример #45
0
    def checkOLS(self, exog, endog, x, y):
        reference = sm.OLS(endog, sm.add_constant(exog)).fit()
        result = ols(y=y, x=x)

        assert_almost_equal(reference.params, result._beta_raw)
        assert_almost_equal(reference.df_model, result._df_model_raw)
        assert_almost_equal(reference.df_resid, result._df_resid_raw)
        assert_almost_equal(reference.fvalue, result._f_stat_raw[0])
        assert_almost_equal(reference.pvalues, result._p_value_raw)
        assert_almost_equal(reference.rsquared, result._r2_raw)
        assert_almost_equal(reference.rsquared_adj, result._r2_adj_raw)
        assert_almost_equal(reference.resid, result._resid_raw)
        assert_almost_equal(reference.bse, result._std_err_raw)
        assert_almost_equal(reference.t(), result._t_stat_raw)
        assert_almost_equal(reference.cov_params(), result._var_beta_raw)
        assert_almost_equal(reference.fittedvalues, result._y_fitted_raw)

        _check_non_raw_results(result)
Пример #46
0
    def _check_wls(self, x, y, weights):
        result = ols(y=y, x=x, weights=1/weights)

        combined = x.copy()
        combined['__y__'] = y
        combined['__weights__'] = weights
        combined = combined.dropna()

        endog = combined.pop('__y__').values
        aweights = combined.pop('__weights__').values
        exog = sm.add_constant(combined.values, prepend=False)

        sm_result = sm.WLS(endog, exog, weights=1/aweights).fit()

        assert_almost_equal(sm_result.params, result._beta_raw)
        assert_almost_equal(sm_result.resid, result._resid_raw)

        self.checkMovingOLS('rolling', x, y, weights=weights)
        self.checkMovingOLS('expanding', x, y, weights=weights)
Пример #47
0
    def test_predict_longer_exog(self):
        exogenous = {
            "1998": "4760",
            "1999": "5904",
            "2000": "4504",
            "2001": "9808",
            "2002": "4241",
            "2003": "4086",
            "2004": "4687",
            "2005": "7686",
            "2006": "3740",
            "2007": "3075",
            "2008": "3753",
            "2009": "4679",
            "2010": "5468",
            "2011": "7154",
            "2012": "4292",
            "2013": "4283",
            "2014": "4595",
            "2015": "9194",
            "2016": "4221",
            "2017": "4520"
        }
        endogenous = {
            "1998": "691",
            "1999": "1580",
            "2000": "80",
            "2001": "1450",
            "2002": "555",
            "2003": "956",
            "2004": "877",
            "2005": "614",
            "2006": "468",
            "2007": "191"
        }

        endog = Series(endogenous)
        exog = Series(exogenous)
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            model = ols(y=endog, x=exog)

        pred = model.y_predict
        self.assertTrue(pred.index.equals(exog.index))
Пример #48
0
    def testFiltering(self):
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = ols(y=self.panel_y2, x=self.panel_x2)

        x = result._x
        index = x.index.get_level_values(0)
        index = Index(sorted(set(index)))
        exp_index = Index([datetime(2000, 1, 1), datetime(2000, 1, 3)])
        self.assertTrue
        (exp_index.equals(index))

        index = x.index.get_level_values(1)
        index = Index(sorted(set(index)))
        exp_index = Index(['A', 'B'])
        self.assertTrue(exp_index.equals(index))

        x = result._x_filtered
        index = x.index.get_level_values(0)
        index = Index(sorted(set(index)))
        exp_index = Index([datetime(2000, 1, 1),
                           datetime(2000, 1, 3),
                           datetime(2000, 1, 4)])
        self.assertTrue(exp_index.equals(index))

        assert_almost_equal(result._y.values.flat, [1, 4, 5])

        exp_x = [[6, 14, 1],
                 [9, 17, 1],
                 [30, 48, 1]]
        assert_almost_equal(exp_x, result._x.values)

        exp_x_filtered = [[6, 14, 1],
                          [9, 17, 1],
                          [30, 48, 1],
                          [11, 20, 1],
                          [12, 21, 1]]
        assert_almost_equal(exp_x_filtered, result._x_filtered.values)

        self.assertTrue(result._x_filtered.index.levels[0].equals(
            result.y_fitted.index))
Пример #49
0
    def testFiltering(self):
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = ols(y=self.panel_y2, x=self.panel_x2)

        x = result._x
        index = x.index.get_level_values(0)
        index = Index(sorted(set(index)))
        exp_index = Index([datetime(2000, 1, 1), datetime(2000, 1, 3)])
        self.assert_index_equal(exp_index, index)

        index = x.index.get_level_values(1)
        index = Index(sorted(set(index)))
        exp_index = Index(['A', 'B'])
        self.assert_index_equal(exp_index, index)

        x = result._x_filtered
        index = x.index.get_level_values(0)
        index = Index(sorted(set(index)))
        exp_index = Index(
            [datetime(2000, 1, 1),
             datetime(2000, 1, 3),
             datetime(2000, 1, 4)])
        self.assert_index_equal(exp_index, index)

        # .flat is flatiter instance
        assert_almost_equal(result._y.values.flat, [1, 4, 5],
                            check_dtype=False)

        exp_x = np.array([[6, 14, 1], [9, 17, 1], [30, 48, 1]],
                         dtype=np.float64)
        assert_almost_equal(exp_x, result._x.values)

        exp_x_filtered = np.array(
            [[6, 14, 1], [9, 17, 1], [30, 48, 1], [11, 20, 1], [12, 21, 1]],
            dtype=np.float64)
        assert_almost_equal(exp_x_filtered, result._x_filtered.values)

        self.assert_index_equal(result._x_filtered.index.levels[0],
                                result.y_fitted.index)
Пример #50
0
def cadf_test(tickdict1, tickdict2, begdate, enddate):
    import datetime
    import numpy as np
    import matplotlib.pyplot as plt
    import matplotlib.dates as mdates
    import pandas as pd

    import pprint
    import statsmodels.tsa.stattools as sts
    from pandas.stats.api import ols
    import tushare as ts
    print(begdate, enddate)
    ticker1 = tickdict1['code']
    ticker2 = tickdict2['code']
    symbol1 = tickdict1['symbo']
    symbol2 = tickdict2['symbo']
    print(ticker1, ticker2)
    df1 = ts.get_k_data(ticker1, start=begdate, end=enddate)
    df2 = ts.get_k_data(ticker2, start=begdate, end=enddate)
    df1.index = df1['date']
    df2.index = df2['date']
    df = pd.DataFrame(index=df1['date'])
    df[symbol1] = df1["close"]
    df[symbol2] = df2["close"]

    # Plot the two time series
    # plot_scatter_series(df, "sz50", "hs300")

    # Calculate optimal hedge ratio "beta"
    res = ols(y=df[symbol2], x=df[symbol1])
    beta_hr = res.beta.x
    # Calculate the residuals of the linear combination
    df["res"] = df[symbol2] - beta_hr * df[symbol1]
    # Plot the residuals
    # plot_residuals(df)
    # Calculate and output the CADF test on the residuals
    cadf = sts.adfuller(df["res"])
    pprint.pprint(cadf)
    return cadf
Пример #51
0
    def testFiltering(self):
        result = ols(y=self.panel_y2, x=self.panel_x2)

        x = result._x
        index = [x.major_axis[i] for i in x.major_labels]
        index = Index(sorted(set(index)))
        exp_index = Index([datetime(2000, 1, 1), datetime(2000, 1, 3)])
        self.assertTrue(exp_index.equals(index))

        index = [x.minor_axis[i] for i in x.minor_labels]
        index = Index(sorted(set(index)))
        exp_index = Index(['A', 'B'])
        self.assertTrue(exp_index.equals(index))

        x = result._x_filtered
        index = [x.major_axis[i] for i in x.major_labels]
        index = Index(sorted(set(index)))
        exp_index = Index([datetime(2000, 1, 1),
                           datetime(2000, 1, 3),
                           datetime(2000, 1, 4)])
        self.assertTrue(exp_index.equals(index))

        assert_almost_equal(result._y.values.flat, [1, 4, 5])

        exp_x = [[6, 14, 1],
                 [9, 17, 1],
                 [30, 48, 1]]
        assert_almost_equal(exp_x, result._x.values)

        exp_x_filtered = [[6, 14, 1],
                          [9, 17, 1],
                          [30, 48, 1],
                          [11, 20, 1],
                          [12, 21, 1]]
        assert_almost_equal(exp_x_filtered, result._x_filtered.values)

        self.assertTrue(result._x_filtered.major_axis.equals(
            result.y_fitted.index))
Пример #52
0
def fill_regressed_data(S):
    """ Fill missing returns by linear combinations of assets without missing returns. """
    S = S.copy()
    R = np.log(S).diff()
    R.iloc[0] = 0

    X = R.dropna(1)

    for col in set(S.columns) - set(X.columns):
        R[col].iloc[0] = np.nan
        y = R[col]

        # fit regression
        res = ols(y=y, x=X, intercept=True)
        pred = res.predict(x=X[y.isnull()])

        # get absolute prices
        pred = pred.cumsum()
        pred += np.log(S[col].dropna().iloc[0]) - pred.iloc[-1]

        # fill missing data
        S[col] = S[col].fillna(np.exp(pred))

    return S
Пример #53
0
def calc_positive_negative_dates(data,
                                 pos_x_min=0.005,
                                 pos_x_max=0.01,
                                 neg_y_max=-0.005,
                                 neg_y_min=-0.01):
    pdata = data[(
        (((data.OPEN - data.PREV_CLOSE) / data.PREV_CLOSE) > pos_x_min) &
        (((data.OPEN - data.PREV_CLOSE) / data.PREV_CLOSE) < pos_x_max)) | (
            (((data.OPEN - data.PREV_CLOSE) / data.PREV_CLOSE) < neg_y_max) &
            (((data.OPEN - data.PREV_CLOSE) / data.PREV_CLOSE) > neg_y_min))]
    #calculate prev close to open return andn open to close return and regress
    prev_close_open = (pdata.OPEN - pdata.PREV_CLOSE) / pdata.PREV_CLOSE
    open_close = (pdata.CLOSE - pdata.OPEN) / pdata.OPEN
    fig = plt.figure()
    plt.scatter(x=prev_close_open, y=open_close)
    fig.suptitle('Posb(%03f,%03f)andNeg(%03f,%03f)' %
                 (pos_x_min, pos_x_max, neg_y_max, neg_y_min),
                 fontsize=20)
    plt.xlabel('prev_close_open', fontsize=10)
    plt.ylabel('open_close', fontsize=10)
    plt.savefig('Posb(%03f,%03f)andNeg(%03f,%03f).jpg' %
                (pos_x_min, pos_x_max, neg_y_max, neg_y_min))
    res = ols(y=open_close, x=prev_close_open)
    print res
Пример #54
0
def regression_without_ccy_nation(Currency,typ):
    
    #typ='Corp'
    #nation of interest 
    reg_df=pd.read_excel(ROOT_DIR  + 'cleaned data/regression data/' + typ +'/' + Currency + '_' + typ +'.xlsx',)
    key_ccy=list(NATION_CURRENCY_DICT.keys())[list(NATION_CURRENCY_DICT.values()).index(Currency)];
    reg_df=reg_df[reg_df.Currency==key_ccy]
    
    n=len(reg_df.index);
    mu1=np.zeros(n)
    sigma1=np.zeros(n)
    mu2=np.zeros(n)
    sigma2=np.zeros(n)
    for i in range(n):
        date_obs=reg_df['Date'][i]
        mu1[i]=np.mean(reg_df[(reg_df['Date']>date_obs+relativedelta(months=-12)) & (reg_df['Date']<=date_obs)]['PrincipalAmount($mil)'])
        mu2[i]=np.mean(reg_df[(reg_df['Date']>date_obs+relativedelta(months=-24)) & (reg_df['Date']<=date_obs)]['PrincipalAmount($mil)'])
        sigma1[i]=np.std(reg_df[(reg_df['Date']>date_obs+relativedelta(months=-12)) & (reg_df['Date']<=date_obs)]['PrincipalAmount($mil)'])
        sigma2[i]=np.std(reg_df[(reg_df['Date']>date_obs+relativedelta(months=-24)) & (reg_df['Date']<=date_obs)]['PrincipalAmount($mil)'])
    
    
    reg_df['normal_amount_1y'] = (reg_df['PrincipalAmount($mil)'] - mu1)/sigma1
    reg_df['normal_amount_2y'] = (reg_df['PrincipalAmount($mil)'] - mu2)/sigma2
    cols = reg_df.columns.tolist()
    cols = cols[-1:] + cols[:-1]
    reg_df = reg_df[cols]
    reg_df=reg_df[reg_df['Currency']!=reg_df['Nation']]
    
    res1 = ols(y = reg_df['PrincipalAmount($mil)'], x = reg_df[['r_market','Butterfly_market','Curve_market','r_domicile','Butterfly_domicile','Curve_domicile','credit_market','credit_domicile']])
    res2 = ols(y = reg_df['normal_amount_1y'], x = reg_df[['r_market','Butterfly_market','Curve_market','r_domicile','Butterfly_domicile','Curve_domicile','credit_market','credit_domicile']])
    res3 = ols(y = reg_df['normal_amount_2y'], x = reg_df[['r_market','Butterfly_market','Curve_market','r_domicile','Butterfly_domicile','Curve_domicile','credit_market','credit_domicile']])
    res4 = ols(y = reg_df['PrincipalAmount($mil)'], x = reg_df[['r_market','Butterfly_market','Curve_market','r_domicile','Butterfly_domicile','Curve_domicile']])
    res5 = ols(y = reg_df['normal_amount_1y'], x = reg_df[['r_market','Butterfly_market','Curve_market','r_domicile','Butterfly_domicile','Curve_domicile']])
    res6 = ols(y = reg_df['normal_amount_2y'], x = reg_df[['r_market','Butterfly_market','Curve_market','r_domicile','Butterfly_domicile','Curve_domicile']])
  
    correl_matrix=reg_df[['r_market','Butterfly_market','Curve_market','r_domicile','Butterfly_domicile','Curve_domicile','credit_market','credit_domicile']].corr()
    
    return res1, res2, res3, res4, res5, res6, reg_df, correl_matrix
Пример #55
0
def filter_obs(hob_df, filter_water_table, filter_stresses):
    '''Applies depth and measurement variability criteria to the head observations.'''

    exclusion_dict = {}
    for iname, idf in hob_df.groupby('site_no'):

        idow = idf['well_depth_va'].mean()
        idtw = idf['lev_va'].mean()
        istd = idf['lev_va'].std()

        if (
                filter_water_table == True
        ):  # Reduce the dataframe to only those sites that are likely to measure the water table

            # Keep all wells shallower than minimum depth
            if (idtw <= min_dtw):
                continue

            # Criteria: Exclude depths to water that are deeper than the likely water table elevation
            if (idtw > max_dtw):
                # print 'Too deep: ',iname,idtw
                exclusion_dict[iname] = 'dtw = %i > max dtw' % (idtw)
                continue

            # Criteria: Exclude wells that are likely measuring a confined aquifer
            if ((idow - idtw) > (sat_thick_mult * idow)
                    and (idow > land_surface_buffer)):
                # print 'Likely in a confined aquifer: ',iname,idtw
                exclusion_dict[iname] = 'sat thick %i > %0.2f * dow' % (
                    (idow - idtw), sat_thick_mult)
                continue

        if (filter_stresses == True):

            # Keep all wells shallower than minimum depth
            if (idtw <= min_dtw):
                continue

            # Criteria: Exclude wells with a potential trend over time.
            # Perform ordinary least squares and test both the slope
            # and R^2 of the best fit
            idf['date_delta'] = (idf['lev_dt'] -
                                 idf['lev_dt'].min()) / np.timedelta64(1, 'D')
            imodel = ols(y=idf['lev_va'], x=idf['date_delta'])
            itrend = imodel.beta[
                'x'] * 365.  # The slope of the ols fit converted to length/year
            ir2 = imodel.r2  # The R^2 value for the ols fit

            if ((itrend > max_trend) and (ir2 > max_r2)):
                # print 'Apparent temporal trend: ',iname,itrend
                exclusion_dict[
                    iname] = 'Apparent temporal trend = %0.2f m/year > %0.2f & R^2 = %0.2f > %0.2f' % (
                        itrend, max_trend, ir2, max_r2)
                continue

            if (istd > (std_mult * idtw)):
                # print 'Excessive measurement variability: ',iname,istd
                exclusion_dict[
                    iname] = 'Measurement std = %4.2f > %0.2f * dtw' % (
                        istd, std_mult)
                continue

    return exclusion_dict
Пример #56
0
    def checkForSeries(self, x, y, series_x, series_y, **kwds):
        # Consistency check with simple OLS.
        result = ols(y=y, x=x, **kwds)
        reference = ols(y=series_y, x=series_x, **kwds)

        self.compare(reference, result)
Пример #57
0
if __name__ == "__main__":
    start = datetime.datetime(2012, 1, 1)
    end = datetime.datetime(2013, 1, 1)

    arex = web.DataReader("AREX", "yahoo", start, end)
    wll = web.DataReader("WLL", "yahoo", start, end)

    df = pd.DataFrame(index=arex.index)
    df["AREX"] = arex["Adj Close"]
    df["WLL"] = wll["Adj Close"]

    # Plot the two time series
    plot_price_series(df, "AREX", "WLL")

    # Display a scatter plot of the two time series
    plot_scatter_series(df, "AREX", "WLL")

    # Calculate optimal hedge ratio "beta"
    res = ols(y=df['WLL'], x=df["AREX"])
    beta_hr = res.beta.x

    # Calculate the residuals of the linear combination
    df["res"] = df["WLL"] - beta_hr * df["AREX"]

    # Plot the residuals
    plot_residuals(df)

    # Calculate and output the CADF test on the residuals
    cadf = ts.adfuller(df["res"])
    pprint.pprint(cadf)
Пример #58
0
 def test_summary_many_terms(self):
     x = DataFrame(np.random.randn(100, 20))
     y = np.random.randn(100)
     model = ols(y=y, x=x)
     model.summary
Пример #59
0
 def test_y_predict(self):
     y = tm.makeTimeSeries()
     x = tm.makeTimeDataFrame()
     model1 = ols(y=y, x=x)
     assert_series_equal(model1.y_predict, model1.y_fitted)
     assert_almost_equal(model1._y_predict_raw, model1._y_fitted_raw)
Пример #60
0
 def test_series_rhs(self):
     y = tm.makeTimeSeries()
     x = tm.makeTimeSeries()
     model = ols(y=y, x=x)
     expected = ols(y=y, x={'x' : x})
     assert_series_equal(model.beta, expected.beta)