def test_missing(self):
     data = longley.load()
     data.exog = add_constant(data.exog, prepend=False)
     data.endog[[3, 7, 14]] = np.nan
     mod = OLS(data.endog, data.exog, missing='drop')
     assert_equal(mod.endog.shape[0], 13)
     assert_equal(mod.exog.shape[0], 13)
示例#2
0
 def setupClass(cls):
     data = longley.load()
     data.exog = add_constant(data.exog, prepend=False)
     ols_res = OLS(data.endog, data.exog).fit()
     gls_res = GLS(data.endog, data.exog).fit()
     cls.res1 = gls_res
     cls.res2 = ols_res
 def setupClass(cls):
     R = np.zeros(7)
     R[4:6] = [1,-1]
     data = longley.load()
     data.exog = add_constant(data.exog, prepend=False)
     res1 = OLS(data.endog, data.exog).fit()
     cls.Ttest1 = res1.t_test(R)
 def setupClass(cls):
     data = longley.load()
     data.exog = add_constant(data.exog, prepend=False)
     res1 = OLS(data.endog, data.exog).fit()
     R2 = [[0,1,-1,0,0,0,0],[0, 0, 0, 0, 1, -1, 0]]
     cls.Ftest1 = res1.f_test(R2)
     hyp = 'x2 = x3, x5 = x6'
     cls.NewFtest1 = res1.f_test(hyp)
 def setupClass(cls):
     data = longley.load()
     data.exog = add_constant(data.exog, prepend=False)
     cls.res1 = OLS(data.endog, data.exog).fit()
     R = np.identity(7)
     cls.Ttest = cls.res1.t_test(R)
     hyp = 'x1 = 0, x2 = 0, x3 = 0, x4 = 0, x5 = 0, x6 = 0, const = 0'
     cls.NewTTest = cls.res1.t_test(hyp)
示例#6
0
 def __init__(self):
     from statsmodels.datasets.cpunish import load
     self.data = load()
     self.endog = self.data.endog
     self.exog = self.data.exog
     np.random.seed(1234)
     self.weight = np.random.randint(5, 100, len(self.endog))
     self.endog_big = np.repeat(self.endog, self.weight)
     self.exog_big = np.repeat(self.exog, self.weight, axis=0)
    def setupClass(cls):
        from .results.results_regression import LongleyRTO
        data = longley.load()
        res1 = OLS(data.endog, data.exog).fit()
        res2 = LongleyRTO()
        res2.wresid = res1.wresid # workaround hack
        cls.res1 = res1
        cls.res2 = res2

        res_qr = OLS(data.endog, data.exog).fit(method="qr")
        cls.res_qr = res_qr
 def setupClass(cls):
     data = longley.load()
     data.exog = add_constant(data.exog, prepend=False)
     res1 = OLS(data.endog, data.exog).fit()
     R = np.array([[0,1,1,0,0,0,0],
           [0,1,0,1,0,0,0],
           [0,1,0,0,0,0,0],
           [0,0,0,0,1,0,0],
           [0,0,0,0,0,1,0]])
     q = np.array([0,0,0,1,0])
     cls.Ftest1 = res1.f_test((R,q))
 def setup_class(cls):
     data = longley.load(as_pandas=False)
     data.exog = add_constant(data.exog, prepend=False)
     ols_res = OLS(data.endog, data.exog).fit()
     gls_res = GLS(data.endog, data.exog).fit()
     gls_res_scalar = GLS(data.endog, data.exog, sigma=1)
     cls.endog = data.endog
     cls.exog = data.exog
     cls.res1 = gls_res
     cls.res2 = ols_res
     cls.res3 = gls_res_scalar
示例#10
0
 def setupClass(cls):
     from results.results_glm import Cpunish
     from statsmodels.datasets.cpunish import load
     data = load()
     data.exog[:,3] = np.log(data.exog[:,3])
     data.exog = add_constant(data.exog)
     exposure = [100] * len(data.endog)
     cls.res1 = GLM(data.endog, data.exog, family=sm.families.Poisson(),
                 exposure=exposure).fit()
     cls.res1.params[-1] += np.log(100) # add exposure back in to param
                                         # to make the results the same
     cls.res2 = Cpunish()
示例#11
0
 def setupClass(cls):
     data = longley.load()
     data.exog = add_constant(data.exog, prepend=False)
     y = data.endog
     X = data.exog
     n = y.shape[0]
     w = np.ones(n)
     cls.results = []
     cls.results.append(OLS(y, X).fit())
     cls.results.append(WLS(y, X, w).fit())
     cls.results.append(GLS(y, X, 100*w).fit())
     cls.results.append(GLS(y, X, np.diag(0.1*w)).fit())
示例#12
0
    def setupClass(cls):
        from results.results_regression import Longley
        data = longley.load()
        data.exog = add_constant(data.exog, prepend=False)
        res1 = OLS(data.endog, data.exog).fit()
        res2 = Longley()
        res2.wresid = res1.wresid # workaround hack
        cls.res1 = res1
        cls.res2 = res2

        res_qr = OLS(data.endog, data.exog).fit(method="qr")
        cls.res_qr = res_qr
示例#13
0
    def setupClass(cls):
        from results.results_regression import LongleyGls

        data = longley.load()
        exog = add_constant(np.column_stack(\
                (data.exog[:,1],data.exog[:,4])))
        tmp_results = OLS(data.endog, exog).fit()
        rho = np.corrcoef(tmp_results.resid[1:],
                tmp_results.resid[:-1])[0][1] # by assumption
        order = toeplitz(np.arange(16))
        sigma = rho**order
        GLS_results = GLS(data.endog, exog, sigma=sigma).fit()
        cls.res1 = GLS_results
        cls.res2 = LongleyGls()
示例#14
0
 def setupClass(cls):
     data = longley.load()
     data.exog = add_constant(data.exog, prepend=False)
     y = data.endog
     X = data.exog
     n = y.shape[0]
     np.random.seed(5)
     w = np.random.uniform(0.5, 1, n)
     w_inv = 1. / w
     cls.results = []
     cls.results.append(WLS(y, X, w).fit())
     cls.results.append(WLS(y, X, 0.01 * w).fit())
     cls.results.append(GLS(y, X, 100 * w_inv).fit())
     cls.results.append(GLS(y, X, np.diag(0.1 * w_inv)).fit())
示例#15
0
    def setupClass(cls):
#        if skipR:
#            raise SkipTest, "Rpy not installed"
#        try:
#            r.library('car')
#        except RPyRException:
#            raise SkipTest, "car library not installed for R"
        R = np.zeros(7)
        R[4:6] = [1,-1]
#        self.R = R
        data = longley.load()
        data.exog = add_constant(data.exog)
        res1 = OLS(data.endog, data.exog).fit()
        cls.Ttest1 = res1.t_test(R)
示例#16
0
    def __init__(self):
        '''
        Tests Poisson family with canonical log link.

        Test results were obtained by R.
        '''
        from results.results_glm import Cpunish
        from statsmodels.datasets.cpunish import load
        self.data = load()
        self.data.exog[:,3] = np.log(self.data.exog[:,3])
        self.data.exog = add_constant(self.data.exog)
        self.res1 = GLM(self.data.endog, self.data.exog,
                    family=sm.families.Poisson()).fit()
        self.res2 = Cpunish()
示例#17
0
    def __init__(self):
        '''
        Tests Poisson family with canonical log link.

        Test results were obtained by R.
        '''
        from .results.results_glm import Cpunish
        from statsmodels.datasets.cpunish import load
        self.data = load()
        self.data.exog[:,3] = np.log(self.data.exog[:,3])
        self.data.exog = add_constant(self.data.exog, prepend=False)
        self.res1 = GLM(self.data.endog, self.data.exog,
                    family=sm.families.Poisson()).fit()
        self.res2 = Cpunish()
        # compare with discrete, start close to save time
        modd = discrete.Poisson(self.data.endog, self.data.exog)
        self.resd = modd.fit(start_params=self.res1.params * 0.9, disp=False)
示例#18
0
    def __init__(self):
        '''
        Test Gaussian family with canonical identity link
        '''
        # Test Precisions
        self.decimal_resids = DECIMAL_3
        self.decimal_params = DECIMAL_2
        self.decimal_bic = DECIMAL_0
        self.decimal_bse = DECIMAL_3

        from statsmodels.datasets.longley import load
        self.data = load()
        self.data.exog = add_constant(self.data.exog)
        self.res1 = GLM(self.data.endog, self.data.exog,
                        family=sm.families.Gaussian()).fit()
        from results.results_glm import Longley
        self.res2 = Longley()
示例#19
0
    def setup_class(cls):
        from .results.results_regression import LongleyGls

        data = longley.load(as_pandas=False)
        exog = add_constant(np.column_stack(
            (data.exog[:, 1], data.exog[:, 4])), prepend=False)
        tmp_results = OLS(data.endog, exog).fit()
        rho = np.corrcoef(tmp_results.resid[1:],
                          tmp_results.resid[:-1])[0][1]  # by assumption
        order = toeplitz(np.arange(16))
        sigma = rho**order
        GLS_results = GLS(data.endog, exog, sigma=sigma).fit()
        cls.res1 = GLS_results
        cls.res2 = LongleyGls()
        # attach for test_missing
        cls.sigma = sigma
        cls.exog = exog
        cls.endog = data.endog
示例#20
0
    def __init__(self):
        '''
        Test Binomial family with canonical logit link using star98 dataset.
        '''
        self.decimal_resids = DECIMAL_1
        self.decimal_bic = DECIMAL_2

        from statsmodels.datasets.star98 import load
        from results.results_glm import Star98
        data = load()
        data.exog = add_constant(data.exog)
        self.res1 = GLM(data.endog, data.exog, \
        family=sm.families.Binomial()).fit()
        #NOTE: if you want to replicate with RModel
        #res2 = RModel(data.endog[:,0]/trials, data.exog, r.glm,
        #        family=r.binomial, weights=trials)

        self.res2 = Star98()
示例#21
0
def test_wtd_patsy_missing():
    from statsmodels.datasets.cpunish import load
    import pandas as pd
    data = load()
    data.exog[0, 0] = np.nan
    data.endog[[2, 4, 6, 8]] = np.nan
    data.pandas = pd.DataFrame(data.exog, columns=data.exog_name)
    data.pandas['EXECUTIONS'] = data.endog
    weights = np.arange(1, len(data.endog)+1)
    formula = """EXECUTIONS ~ INCOME + PERPOVERTY + PERBLACK + VC100k96 +
                 SOUTH + DEGREE"""
    mod_misisng = GLM.from_formula(formula, data=data.pandas, freq_weights=weights)
    assert_equal(mod_misisng.freq_weights.shape[0],
                 mod_misisng.endog.shape[0])
    assert_equal(mod_misisng.freq_weights.shape[0],
                 mod_misisng.exog.shape[0])
    assert_equal(mod_misisng.freq_weights.shape[0], 12)
    keep_weights = np.array([ 2,  4,  6,  8, 10, 11, 12, 13, 14, 15, 16, 17])
    assert_equal(mod_misisng.freq_weights, keep_weights)
示例#22
0
    def __init__(self):
        '''
        Tests Gamma family with canonical inverse link (power -1)
        '''
        # Test Precisions
        self.decimal_aic_R = -1 #TODO: off by about 1, we are right with Stata
        self.decimal_resids = DECIMAL_2

        from statsmodels.datasets.scotland import load
        from results.results_glm import Scotvote
        data = load()
        data.exog = add_constant(data.exog)
        res1 = GLM(data.endog, data.exog, \
                    family=sm.families.Gamma()).fit()
        self.res1 = res1
#        res2 = RModel(data.endog, data.exog, r.glm, family=r.Gamma)
        res2 = Scotvote()
        res2.aic_R += 2 # R doesn't count degree of freedom for scale with gamma
        self.res2 = res2
示例#23
0
    def setupClass(cls):
        from .results.results_regression import Longley
        data = longley.load()
        data.exog = add_constant(data.exog, prepend=False)
        res1 = OLS(data.endog, data.exog).fit()
        res2 = Longley()
        res2.wresid = res1.wresid # workaround hack
        cls.res1 = res1
        cls.res2 = res2

        res_qr = OLS(data.endog, data.exog).fit(method="qr")

        model_qr = OLS(data.endog, data.exog)
        Q, R = np.linalg.qr(data.exog)
        model_qr.exog_Q, model_qr.exog_R  = Q, R
        model_qr.normalized_cov_params = np.linalg.inv(np.dot(R.T, R))
        model_qr.rank = np_matrix_rank(R)
        res_qr2 = model_qr.fit(method="qr")

        cls.res_qr = res_qr
        cls.res_qr_manual = res_qr2
示例#24
0
    def __init__(self):
        '''
        Test Negative Binomial family with canonical log link
        '''
        # Test Precision
        self.decimal_resid = DECIMAL_1
        self.decimal_params = DECIMAL_3
        self.decimal_resids = -1 # 1 % mismatch at 0
        self.decimal_fittedvalues = DECIMAL_1

        from statsmodels.datasets.committee import load
        self.data = load()
        self.data.exog[:,2] = np.log(self.data.exog[:,2])
        interaction = self.data.exog[:,2]*self.data.exog[:,1]
        self.data.exog = np.column_stack((self.data.exog,interaction))
        self.data.exog = add_constant(self.data.exog)
        self.res1 = GLM(self.data.endog, self.data.exog,
                family=sm.families.NegativeBinomial()).fit()
        from results.results_glm import Committee
        res2 = Committee()
        res2.aic_R += 2 # They don't count a degree of freedom for the scale
        self.res2 = res2
示例#25
0
 def setupClass(cls):
     data = load().data
     cls.model = ols(longley_formula, data)
     super(TestFormulaRecArray, cls).setupClass()
示例#26
0
 def setupClass(cls):
     cls.data = load()
示例#27
0
"""Example: statsmodels.OLS
"""

from statsmodels.datasets.longley import load
import statsmodels.api as sm
from statsmodels.iolib.table import SimpleTable, default_txt_fmt
import numpy as np

data = load()

data_orig = (data.endog.copy(), data.exog.copy())

# Note: In this example using zscored/standardized variables has no effect on
#   regression estimates. Are there no numerical problems?

rescale = 0
# 0: no rescaling, 1:demean, 2:standardize, 3:standardize and transform back
rescale_ratio = data.endog.std() / data.exog.std(0)
if rescale > 0:
    # rescaling
    data.endog -= data.endog.mean()
    data.exog -= data.exog.mean(0)
if rescale > 1:
    data.endog /= data.endog.std()
    data.exog /= data.exog.std(0)

# skip because mean has been removed, but dimension is hardcoded in table
data.exog = sm.tools.add_constant(data.exog, prepend=False)


ols_model = sm.OLS(data.endog, data.exog)
示例#28
0
 def setupClass(cls):
     data = longley.load()
     data.exog = add_constant(data.exog, prepend=False)
     cls.endog = data.endog
     cls.exog = data.exog
     cls.ols_model = OLS(data.endog, data.exog)
示例#29
0
"""Ordinary Least Squares
"""

from statsmodels.datasets.longley import load
import statsmodels.api as sm
import numpy as np

data = load()
data.exog = sm.tools.add_constant(data.exog)

ols_model = sm.OLS(data.endog, data.exog)
ols_results = ols_model.fit()

# the Longley dataset is well known to have high multicollinearity
# one way to find the condition number is as follows

# normalize the independent variables to have unit length, Greene 4.9
norm_x = np.ones_like(data.exog)
for i in range(int(ols_model.df_model)):
    norm_x[:,i] = data.exog[:,i]/np.linalg.norm(data.exog[:,i])
norm_xtx = np.dot(norm_x.T,norm_x)
eigs = np.linalg.eigvals(norm_xtx)
collin = np.sqrt(eigs.max()/eigs.min())
print collin
# clearly there is a big problem with multicollinearity
# the rule of thumb is any number of 20 requires attention

# for instance, consider the longley dataset with the last observation dropped
ols_results2 = sm.OLS(data.endog[:-1], data.exog[:-1,:]).fit()

# all of our coefficients change considerably in percentages
示例#30
0
 def setup_class(cls):
     cls.data = load(as_pandas=False)
示例#31
0
y_true = np.dot(X, beta)
y = y_true + np.random.normal(size=nsample)
res3 = sm.OLS(y, X).fit()
print res3.f_test(R)

#Multicollinearity
#-----------------

#Data
#^^^^
# The Longley dataset is well known to have high multicollinearity, that is,
# the exogenous predictors are highly correlated. This is problematic because
# it can affect the stability of our coefficient estimates as we make minor
# changes to model specification.
from statsmodels.datasets.longley import load
y = load().endog
X = load().exog
X = sm.tools.add_constant(X, prepend=False)

#Fit and summary
#^^^^^^^^^^^^^^^
ols_model = sm.OLS(y, X)
ols_results = ols_model.fit()
print ols_results.summary()

#Condition number
#^^^^^^^^^^^^^^^^
# One way to assess multicollinearity is to compute the condition number.
# Values over 20 are worrisome (see Greene 4.9). The first step is to normalize
# the independent variables to have unit length:
norm_x = np.ones_like(X)
示例#32
0
 def setup_class(cls):
     data = longley.load(as_pandas=False)
     data.exog = add_constant(data.exog, prepend=False)
     cls.res1 = OLS(data.endog, data.exog).fit()
     R = np.identity(7)[:-1, :]
     cls.Ftest = cls.res1.f_test(R)
示例#33
0
 def setupClass(cls):
     cls.data = load()
示例#34
0
 def setup_class(cls):
     data = longley.load(as_pandas=False)
     data.exog = add_constant(data.exog, prepend=False)
     cls.res1 = GLS(data.endog, data.exog).fit()
     cls.res2 = OLS(data.endog, data.exog).fit()
示例#35
0
 def setupClass(cls):
     data = load().data
     cls.model = ols(longley_formula, data)
     super(TestFormulaRecArray, cls).setupClass()
示例#36
0
 def setup_class(cls):
     data = longley.load(as_pandas=False)
     data.exog = add_constant(data.exog, prepend=False)
     cls.endog = data.endog
     cls.exog = data.exog
     cls.ols_model = OLS(data.endog, data.exog)
示例#37
0
 def setupClass(cls):
     data = longley.load()
     data.exog = add_constant(data.exog, prepend=False)
     cls.res1 = OLS(data.endog, data.exog).fit()
     cls.res2 = WLS(data.endog, data.exog).fit()
示例#38
0
 def setupClass(cls):
     data = longley.load()
     data.exog = add_constant(data.exog, prepend=False)
     cls.res1 = OLS(data.endog, data.exog).fit()
     R = np.identity(7)[:-1,:]
     cls.Ftest = cls.res1.f_test(R)
示例#39
0
beta = np.array((3.5, 5.7, 150))
Y = np.dot(X, beta) + np.random.standard_normal(40)
mod2 = sm.OLS(Y, X)
res2 = mod2.fit()
f2 = lambda params: -1 * mod2.loglike(params)
resfmin = optimize.fmin(f2, np.ones(3), ftol=1e-10)
print('OLS')
print(res2.params)
print('MLE')
print(resfmin)

print('\nExample 2: Longley Data, high multicollinearity')
print('-----------------------------------------------\n')

from statsmodels.datasets.longley import load
data = load(as_pandas=False)
data.exog = sm.add_constant(data.exog, prepend=False)
mod = sm.OLS(data.endog, data.exog)
f = lambda params: -1 * mod.loglike(params)
score = lambda params: -1 * mod.score(params)

#now you're set up to try and minimize or root find, but I couldn't get this one to work
#note that if you want to get the results, it's also a property of mod, so you can do

res = mod.fit()
#print mod.results.params
print('OLS')
print(res.params)
print('MLE')
#resfmin2 = optimize.fmin(f, mod.results.params*0.9, maxfun=5000, maxiter=5000, xtol=1e-10, ftol= 1e-10)
resfmin2 = optimize.fmin(f,
示例#40
0
 def setup_class(cls):
     data = longley.load(as_pandas=False)
     data.exog = add_constant(data.exog, prepend=False)
     cls.res1 = GLS(data.endog, data.exog).fit()
     cls.res2 = OLS(data.endog, data.exog).fit()
print(results.qr.summary())
results_pivot = model.fit(method="qr-pivot")
print(results_pivot.summary())

assert all(
    (results.params -
     results_pivot.params) < 1e-8), 'Some of the params are not identical'
assert all(
    (results.pvalues -
     results_pivot.pvalues) < 1e-8), 'Some of the params are not identical'

predict = results.predict(X)
predict_pivot = results_pivot.predict(X)

###################################
data = longley.load(as_pandas=False)

data.exog = sm.add_constant(data.exog, prepend=False)
ols_m = sm.OLS(data.endog, data.exog)
res = ols_m.fit()
res_qr = ols_m.fit(method='qr')
res_pivot = ols_m.fit(method='qr-pivot')

print(res.summary())
print(res_qr.summary())
print(res_pivot.summary())
print('end')

# Test multiple regression with multicollinearity X3 is highly correlated with

X3 = sm.add_constant(