Exemplo n.º 1
0
def test_HC_use():
    np.random.seed(0)
    nsample = 100
    x = np.linspace(0,10, 100)
    X = sm.add_constant(np.column_stack((x, x**2)), prepend=False)
    beta = np.array([1, 0.1, 10])
    y = np.dot(X, beta) + np.random.normal(size=nsample)

    results = sm.OLS(y, X).fit()

    #test cov_params
    idx = np.array([1,2])
    #need to call HC0_se to have cov_HC0 available
    results.HC0_se
    cov12 = results.cov_params(column=[1,2], cov_p=results.cov_HC0)
    assert_almost_equal(cov12, results.cov_HC0[idx[:,None], idx], decimal=15)

    #test t_test
    tvals = results.params/results.HC0_se
    ttest = results.t_test(np.eye(3), cov_p=results.cov_HC0)
    assert_almost_equal(ttest.tvalue, tvals, decimal=14)
    assert_almost_equal(ttest.sd, results.HC0_se, decimal=14)

    #test f_test
    ftest = results.f_test(np.eye(3)[:-1], cov_p=results.cov_HC0)
    slopes = results.params[:-1]
    idx = np.array([0,1])
    cov_slopes = results.cov_HC0[idx[:,None], idx]
    fval = np.dot(slopes, np.dot(np.linalg.inv(cov_slopes), slopes))/len(idx)
    assert_almost_equal(ftest.fvalue, fval, decimal=12)
Exemplo n.º 2
0
def linmod(y,
           x,
           weights=None,
           sigma=None,
           add_const=True,
           filter_missing=True,
           **kwds):
    '''get linear model with extra options for entry

    dispatches to regular model class and does not wrap the output

    If several options are exclusive, for example sigma and weights, then the
    chosen class depends on the implementation sequence.
    '''

    if filter_missing:
        y, x = remove_nanrows(y, x)
        #do the same for masked arrays

    if add_const:
        x = sm.add_constant(x, prepend=True)

    if not sigma is None:
        return GLS(y, x, sigma=sigma, **kwds)
    elif not weights is None:
        return WLS(y, x, weights=weights, **kwds)
    else:
        return OLS(y, x, **kwds)
Exemplo n.º 3
0
class TestRlm(CheckRlmResults):
    from gwstatsmodels.datasets.stackloss import load
    data = load()  # class attributes for subclasses
    data.exog = sm.add_constant(data.exog)

    def __init__(self):
        # Test precisions
        self.decimal_standarderrors = DECIMAL_1
        self.decimal_scale = DECIMAL_3

        results = RLM(self.data.endog, self.data.exog,\
                    M=sm.robust.norms.HuberT()).fit()   # default M
        h2 = RLM(self.data.endog, self.data.exog,\
                    M=sm.robust.norms.HuberT()).fit(cov="H2").bcov_scaled
        h3 = RLM(self.data.endog, self.data.exog,\
                    M=sm.robust.norms.HuberT()).fit(cov="H3").bcov_scaled
        self.res1 = results
        self.res1.h2 = h2
        self.res1.h3 = h3

    def setup(self):
        #        r.library('MASS')
        #        self.res2 = RModel(self.data.endog, self.data.exog,
        #                        r.rlm, psi="psi.huber")
        from results.results_rlm import Huber
        self.res2 = Huber()

    def test_summary(self):
        # smoke test that summary at least returns something
        self.res1.summary()
Exemplo n.º 4
0
 def __init__(self):
     data = sm.datasets.spector.load()
     data.exog = sm.add_constant(data.exog)
     #mod = sm.Probit(data.endog, data.exog)
     self.mod = sm.Logit(data.endog, data.exog)
     #res = mod.fit(method="newton")
     self.params = [np.array([1, 0.25, 1.4, -7])]
Exemplo n.º 5
0
 def __init__(self):
     data = sm.datasets.spector.load()
     data.exog = sm.add_constant(data.exog)
     #mod = sm.Probit(data.endog, data.exog)
     self.mod = sm.Logit(data.endog, data.exog)
     #res = mod.fit(method="newton")
     self.params = [np.array([1,0.25,1.4,-7])]
Exemplo n.º 6
0
 def setupClass(cls):
     data = sm.datasets.spector.load()
     data.exog = sm.add_constant(data.exog)
     cls.res1 = Logit(data.endog, data.exog).fit(method="newton", disp=0)
     res2 = Spector()
     res2.logit()
     cls.res2 = res2
Exemplo n.º 7
0
    def __init__(self):

        # generate artificial data
        np.random.seed(98765678)
        nobs = 200
        rvs = np.random.randn(nobs, 6)
        data_exog = rvs
        data_exog = sm.add_constant(data_exog)
        xbeta = 1 + 0.1 * rvs.sum(1)
        data_endog = np.random.poisson(np.exp(xbeta))

        #estimate discretemod.Poisson as benchmark
        self.res_discrete = Poisson(data_endog, data_exog).fit(disp=0)

        mod_glm = sm.GLM(data_endog, data_exog, family=sm.families.Poisson())
        self.res_glm = mod_glm.fit()

        #estimate generic MLE
        #self.mod = PoissonGMLE(data_endog, data_exog)
        #res = self.mod.fit()
        offset = self.res_discrete.params[0] * data_exog[:, 0]  #1d ???
        #self.res = PoissonOffsetGMLE(data_endog, data_exog[:,1:], offset=offset).fit(start_params = np.ones(6)/2., method='nm')
        modo = PoissonOffsetGMLE(data_endog, data_exog[:, 1:], offset=offset)
        self.res = modo.fit(start_params=0.9 * self.res_discrete.params[1:],
                            method='nm',
                            disp=0)
Exemplo n.º 8
0
    def __init__(self):

        # generate artificial data
        np.random.seed(98765678)
        nobs = 200
        rvs = np.random.randn(nobs,6)
        data_exog = rvs
        data_exog = sm.add_constant(data_exog)
        xbeta = 1 + 0.1*rvs.sum(1)
        data_endog = np.random.poisson(np.exp(xbeta))

        #estimate discretemod.Poisson as benchmark
        self.res_discrete = Poisson(data_endog, data_exog).fit(disp=0)

        mod_glm = sm.GLM(data_endog, data_exog, family=sm.families.Poisson())
        self.res_glm = mod_glm.fit()

        #estimate generic MLE
        #self.mod = PoissonGMLE(data_endog, data_exog)
        #res = self.mod.fit()
        offset = self.res_discrete.params[0] * data_exog[:,0]  #1d ???
        #self.res = PoissonOffsetGMLE(data_endog, data_exog[:,1:], offset=offset).fit(start_params = np.ones(6)/2., method='nm')
        modo = PoissonOffsetGMLE(data_endog, data_exog[:,1:], offset=offset)
        self.res = modo.fit(start_params = 0.9*self.res_discrete.params[1:],
                            method='nm', disp=0)
Exemplo n.º 9
0
def test_HC_use():
    np.random.seed(0)
    nsample = 100
    x = np.linspace(0, 10, 100)
    X = sm.add_constant(np.column_stack((x, x**2)), prepend=False)
    beta = np.array([1, 0.1, 10])
    y = np.dot(X, beta) + np.random.normal(size=nsample)

    results = sm.OLS(y, X).fit()

    #test cov_params
    idx = np.array([1, 2])
    #need to call HC0_se to have cov_HC0 available
    results.HC0_se
    cov12 = results.cov_params(column=[1, 2], cov_p=results.cov_HC0)
    assert_almost_equal(cov12, results.cov_HC0[idx[:, None], idx], decimal=15)

    #test t_test
    tvals = results.params / results.HC0_se
    ttest = results.t_test(np.eye(3), cov_p=results.cov_HC0)
    assert_almost_equal(ttest.tvalue, tvals, decimal=14)
    assert_almost_equal(ttest.sd, results.HC0_se, decimal=14)

    #test f_test
    ftest = results.f_test(np.eye(3)[:-1], cov_p=results.cov_HC0)
    slopes = results.params[:-1]
    idx = np.array([0, 1])
    cov_slopes = results.cov_HC0[idx[:, None], idx]
    fval = np.dot(slopes, np.dot(np.linalg.inv(cov_slopes), slopes)) / len(idx)
    assert_almost_equal(ftest.fvalue, fval, decimal=12)
Exemplo n.º 10
0
 def setupClass(cls):
     data = sm.datasets.spector.load()
     data.exog = sm.add_constant(data.exog)
     cls.res1 = Logit(data.endog, data.exog).fit(method="newton", disp=0)
     res2 = Spector()
     res2.logit()
     cls.res2 = res2
Exemplo n.º 11
0
 def setupClass(cls):
     from results.results_discrete import RandHIE
     data = sm.datasets.randhie.load()
     exog = sm.add_constant(data.exog)
     cls.res1 = Poisson(data.endog, exog).fit(method='newton', disp=0)
     res2 = RandHIE()
     res2.poisson()
     cls.res2 = res2
Exemplo n.º 12
0
    def setup_class(self):

        nobs = 10000
        np.random.seed(987689)
        x = np.random.randn(nobs, 3)
        x = sm.add_constant(x, prepend=True)
        self.exog = x
        self.xf = 0.25 * np.ones((2,4))
Exemplo n.º 13
0
 def setupClass(cls):
     from results.results_discrete import RandHIE
     data = sm.datasets.randhie.load()
     exog = sm.add_constant(data.exog)
     cls.res1 = Poisson(data.endog, exog).fit(method='newton', disp=0)
     res2 = RandHIE()
     res2.poisson()
     cls.res2 = res2
Exemplo n.º 14
0
    def setup_class(self):

        nobs = 10000
        np.random.seed(987689)
        x = np.random.randn(nobs, 3)
        x = sm.add_constant(x, prepend=True)
        self.exog = x
        self.xf = 0.25 * np.ones((2, 4))
Exemplo n.º 15
0
 def setupClass(cls):
     data = sm.datasets.spector.load()
     data.exog = sm.add_constant(data.exog)
     res2 = Spector()
     res2.probit()
     cls.res2 = res2
     cls.res1 = Probit(data.endog, data.exog).fit(method="ncg",
         disp=0, avextol=1e-8)
Exemplo n.º 16
0
 def setupClass(cls):
     data = sm.datasets.spector.load()
     data.exog = sm.add_constant(data.exog)
     res2 = Spector()
     res2.probit()
     cls.res2 = res2
     cls.res1 = Probit(data.endog, data.exog).fit(method="ncg",
                                                  disp=0,
                                                  avextol=1e-8)
Exemplo n.º 17
0
def test_qqplot():
    #just test that it runs
    data = sm.datasets.longley.load()
    data.exog = sm.add_constant(data.exog)
    mod_fit = sm.OLS(data.endog, data.exog).fit()
    res = mod_fit.resid
    fig = sm.qqplot(res)

    plt.close(fig)
Exemplo n.º 18
0
def test_poisson_newton():
    #GH: 24, Newton doesn't work well sometimes
    nobs = 10000
    np.random.seed(987689)
    x = np.random.randn(nobs, 3)
    x = sm.add_constant(x, prepend=True)
    y_count = np.random.poisson(np.exp(x.sum(1)))
    mod = sm.Poisson(y_count, x)
    res = mod.fit(start_params=-np.ones(4), method='newton', disp=0)
    assert_(not res.mle_retvals['converged'])
Exemplo n.º 19
0
def test_poisson_newton():
    #GH: 24, Newton doesn't work well sometimes
    nobs = 10000
    np.random.seed(987689)
    x = np.random.randn(nobs, 3)
    x = sm.add_constant(x, prepend=True)
    y_count = np.random.poisson(np.exp(x.sum(1)))
    mod = sm.Poisson(y_count, x)
    res = mod.fit(start_params=-np.ones(4), method='newton', disp=0)
    assert_(not res.mle_retvals['converged'])
Exemplo n.º 20
0
 def setupClass(cls):
     if iswindows:   # does this work with classmethod?
         raise SkipTest("fmin_cg sometimes fails to converge on windows")
     data = sm.datasets.spector.load()
     data.exog = sm.add_constant(data.exog)
     res2 = Spector()
     res2.probit()
     cls.res2 = res2
     cls.res1 = Probit(data.endog, data.exog).fit(method="cg",
         disp=0, maxiter=500)
Exemplo n.º 21
0
 def setupClass(cls):
     if iswindows:  # does this work with classmethod?
         raise SkipTest("fmin_cg sometimes fails to converge on windows")
     data = sm.datasets.spector.load()
     data.exog = sm.add_constant(data.exog)
     res2 = Spector()
     res2.probit()
     cls.res2 = res2
     cls.res1 = Probit(data.endog, data.exog).fit(method="cg",
                                                  disp=0,
                                                  maxiter=500)
Exemplo n.º 22
0
 def setupClass(cls):
     from results.results_discrete import Anes
     data = sm.datasets.anes96.load()
     exog = data.exog
     exog[:, 0] = np.log(exog[:, 0] + .1)
     exog = np.column_stack((exog[:, 0], exog[:, 2], exog[:, 5:8]))
     exog = sm.add_constant(exog)
     cls.res1 = MNLogit(data.endog, exog).fit(method="newton", disp=0)
     res2 = Anes()
     res2.mnlogit_basezero()
     cls.res2 = res2
Exemplo n.º 23
0
 def setupClass(cls):
     from results.results_discrete import Anes
     data = sm.datasets.anes96.load()
     exog = data.exog
     exog[:,0] = np.log(exog[:,0] + .1)
     exog = np.column_stack((exog[:,0],exog[:,2],
         exog[:,5:8]))
     exog = sm.add_constant(exog)
     cls.res1 = MNLogit(data.endog, exog).fit(method="newton", disp=0)
     res2 = Anes()
     res2.mnlogit_basezero()
     cls.res2 = res2
Exemplo n.º 24
0
    def setup(self):
        nsample = 100
        sig = 0.5
        x1 = np.linspace(0, 20, nsample)
        x2 = 5 + 3 * np.random.randn(nsample)
        X = np.c_[x1, x2, np.sin(0.5 * x1), (x2 - 5) ** 2, np.ones(nsample)]
        beta = [0.5, 0.5, 1, -0.04, 5.0]
        y_true = np.dot(X, beta)
        y = y_true + sig * np.random.normal(size=nsample)
        exog0 = sm.add_constant(np.c_[x1, x2], prepend=False)
        res = sm.OLS(y, exog0).fit()

        self.res = res
Exemplo n.º 25
0
    def setup(self):
        nsample = 100
        sig = 0.5
        x1 = np.linspace(0, 20, nsample)
        x2 = 5 + 3 * np.random.randn(nsample)
        X = np.c_[x1, x2, np.sin(0.5 * x1), (x2 - 5)**2, np.ones(nsample)]
        beta = [0.5, 0.5, 1, -0.04, 5.]
        y_true = np.dot(X, beta)
        y = y_true + sig * np.random.normal(size=nsample)
        exog0 = sm.add_constant(np.c_[x1, x2], prepend=False)
        res = sm.OLS(y, exog0).fit()

        self.res = res
Exemplo n.º 26
0
    def __init__(self):
        #from results.results_discrete import Anes
        data = sm.datasets.anes96.load()
        exog = data.exog
        exog[:, 0] = np.log(exog[:, 0] + .1)
        exog = np.column_stack((exog[:, 0], exog[:, 2], exog[:, 5:8]))
        exog = sm.add_constant(exog)
        self.mod = sm.MNLogit(data.endog, exog)

        def loglikeflat(self, params):
            #reshapes flattened params
            return self.loglike(params.reshape(6, 6))

        self.mod.loglike = loglikeflat  #need instance method
        self.params = [np.ones((6, 6))]
Exemplo n.º 27
0
def test_poisson_predict():
    #GH: 175, make sure poisson predict works without offset and exposure
    data = sm.datasets.randhie.load()
    exog = sm.add_constant(data.exog)
    res = sm.Poisson(data.endog, exog).fit(method='newton', disp=0)
    pred1 = res.predict()
    pred2 = res.predict(exog)
    assert_almost_equal(pred1, pred2)
    #exta options
    pred3 = res.predict(exog, offset=0, exposure=1)
    assert_almost_equal(pred1, pred3)
    pred3 = res.predict(exog, offset=0, exposure=2)
    assert_almost_equal(2*pred1, pred3)
    pred3 = res.predict(exog, offset=np.log(2), exposure=1)
    assert_almost_equal(2*pred1, pred3)
Exemplo n.º 28
0
def test_poisson_predict():
    #GH: 175, make sure poisson predict works without offset and exposure
    data = sm.datasets.randhie.load()
    exog = sm.add_constant(data.exog)
    res = sm.Poisson(data.endog, exog).fit(method='newton', disp=0)
    pred1 = res.predict()
    pred2 = res.predict(exog)
    assert_almost_equal(pred1, pred2)
    #exta options
    pred3 = res.predict(exog, offset=0, exposure=1)
    assert_almost_equal(pred1, pred3)
    pred3 = res.predict(exog, offset=0, exposure=2)
    assert_almost_equal(2 * pred1, pred3)
    pred3 = res.predict(exog, offset=np.log(2), exposure=1)
    assert_almost_equal(2 * pred1, pred3)
Exemplo n.º 29
0
    def __init__(self):
        #from results.results_discrete import Anes
        data = sm.datasets.anes96.load()
        exog = data.exog
        exog[:,0] = np.log(exog[:,0] + .1)
        exog = np.column_stack((exog[:,0],exog[:,2],
            exog[:,5:8]))
        exog = sm.add_constant(exog)
        self.mod = sm.MNLogit(data.endog, exog)

        def loglikeflat(self, params):
            #reshapes flattened params
            return self.loglike(params.reshape(6,6))
        self.mod.loglike = loglikeflat  #need instance method
        self.params = [np.ones((6,6))]
Exemplo n.º 30
0
    def setupClass(cls):
        #        import scipy
        #        major, minor, micro = scipy.__version__.split('.')[:3]
        #        if int(minor) < 9:
        #            raise SkipTest
        #Skip this unconditionally for release 0.3.0
        #since there are still problems with scipy 0.9.0 on some machines
        #Ralf on mailing list 2011-03-26
        raise SkipTest

        data = sm.datasets.spector.load()
        data.exog = sm.add_constant(data.exog)
        res2 = Spector()
        res2.logit()
        cls.res2 = res2
        cls.res1 = Logit(data.endog, data.exog).fit(method="bfgs", disp=0)
Exemplo n.º 31
0
def test_perfect_prediction():
    cur_dir = os.path.dirname(os.path.abspath(__file__))
    iris_dir = os.path.join(cur_dir, '..', '..', 'genmod', 'tests', 'results')
    iris_dir = os.path.abspath(iris_dir)
    iris = np.genfromtxt(os.path.join(iris_dir, 'iris.csv'), delimiter=",",
                            skip_header=1)
    y = iris[:,-1]
    X = iris[:,:-1]
    X = X[y != 2]
    y = y[y != 2]
    X = sm.add_constant(X, prepend=True)
    mod = Logit(y,X)
    assert_raises(PerfectSeparationError, mod.fit)
    #turn off raise PerfectSeparationError
    mod.raise_on_perfect_prediction = False
    mod.fit()  #should not raise
Exemplo n.º 32
0
def test_perfect_prediction():
    cur_dir = os.path.dirname(os.path.abspath(__file__))
    iris_dir = os.path.join(cur_dir, '..', '..', 'genmod', 'tests', 'results')
    iris_dir = os.path.abspath(iris_dir)
    iris = np.genfromtxt(os.path.join(iris_dir, 'iris.csv'),
                         delimiter=",",
                         skip_header=1)
    y = iris[:, -1]
    X = iris[:, :-1]
    X = X[y != 2]
    y = y[y != 2]
    X = sm.add_constant(X, prepend=True)
    mod = Logit(y, X)
    assert_raises(PerfectSeparationError, mod.fit)
    #turn off raise PerfectSeparationError
    mod.raise_on_perfect_prediction = False
    mod.fit()  #should not raise
Exemplo n.º 33
0
    def setupClass(cls):
#        import scipy
#        major, minor, micro = scipy.__version__.split('.')[:3]
#        if int(minor) < 9:
#            raise SkipTest
        #Skip this unconditionally for release 0.3.0
        #since there are still problems with scipy 0.9.0 on some machines
        #Ralf on mailing list 2011-03-26
        raise SkipTest

        data = sm.datasets.spector.load()
        data.exog = sm.add_constant(data.exog)
        res2 = Spector()
        res2.logit()
        cls.res2 = res2
        cls.res1 = Logit(data.endog, data.exog).fit(method="bfgs",
            disp=0)
Exemplo n.º 34
0
    def __init__(self):

        # generate artificial data
        np.random.seed(98765678)
        nobs = 200
        rvs = np.random.randn(nobs,6)
        data_exog = rvs
        data_exog = sm.add_constant(data_exog)
        xbeta = 0.1 + 0.1*rvs.sum(1)
        data_endog = np.random.poisson(np.exp(xbeta))

        #estimate discretemod.Poisson as benchmark
        self.res_discrete = Poisson(data_endog, data_exog).fit(disp=0)

        mod_glm = sm.GLM(data_endog, data_exog, family=sm.families.Poisson())
        self.res_glm = mod_glm.fit()

        #estimate generic MLE
        self.mod = PoissonGMLE(data_endog, data_exog)
        self.res = self.mod.fit(start_params=0.9 * self.res_discrete.params,
                                method='nm', disp=0)
Exemplo n.º 35
0
    def calc_factors(self, x=None, keepdim=0, addconst=True):
        '''get factor decomposition of exogenous variables

        This uses principal component analysis to obtain the factors. The number
        of factors kept is the maximum that will be considered in the regression.
        '''
        if x is None:
            x = self.exog
        else:
            x = np.asarray(x)
        xred, fact, evals, evecs = pca(x, keepdim=keepdim, normalize=1)
        self.exog_reduced = xred
        #self.factors = fact
        if addconst:
            self.factors = sm.add_constant(fact, prepend=True)
            self.hasconst = 1  #needs to be int
        else:
            self.factors = fact
            self.hasconst = 0  #needs to be int

        self.evals = evals
        self.evecs = evecs
Exemplo n.º 36
0
    def __init__(self):

        # generate artificial data
        np.random.seed(98765678)
        nobs = 200
        rvs = np.random.randn(nobs, 6)
        data_exog = rvs
        data_exog = sm.add_constant(data_exog)
        xbeta = 0.1 + 0.1 * rvs.sum(1)
        data_endog = np.random.poisson(np.exp(xbeta))

        #estimate discretemod.Poisson as benchmark
        self.res_discrete = Poisson(data_endog, data_exog).fit(disp=0)

        mod_glm = sm.GLM(data_endog, data_exog, family=sm.families.Poisson())
        self.res_glm = mod_glm.fit()

        #estimate generic MLE
        self.mod = PoissonGMLE(data_endog, data_exog)
        self.res = self.mod.fit(start_params=0.9 * self.res_discrete.params,
                                method='nm',
                                disp=0)
Exemplo n.º 37
0
class TestRlmHuber(CheckRlmResults):
    from gwstatsmodels.datasets.stackloss import load
    data = load()
    data.exog = sm.add_constant(data.exog)

    def __init__(self):
        results = RLM(self.data.endog, self.data.exog,\
                    M=sm.robust.norms.HuberT()).fit(scale_est=\
                    sm.robust.scale.HuberScale())
        h2 = RLM(self.data.endog, self.data.exog,\
                    M=sm.robust.norms.HuberT()).fit(cov="H2",
                    scale_est=sm.robust.scale.HuberScale()).bcov_scaled
        h3 = RLM(self.data.endog, self.data.exog,\
                    M=sm.robust.norms.HuberT()).fit(cov="H3",
                    scale_est=sm.robust.scale.HuberScale()).bcov_scaled
        self.res1 = results
        self.res1.h2 = h2
        self.res1.h3 = h3

    def setup(self):
        from results.results_rlm import HuberHuber
        self.res2 = HuberHuber()
Exemplo n.º 38
0
    def calc_factors(self, x=None, keepdim=0, addconst=True):
        '''get factor decomposition of exogenous variables

        This uses principal component analysis to obtain the factors. The number
        of factors kept is the maximum that will be considered in the regression.
        '''
        if x is None:
            x = self.exog
        else:
            x = np.asarray(x)
        xred, fact, evals, evecs  = pca(x, keepdim=keepdim, normalize=1)
        self.exog_reduced = xred
        #self.factors = fact
        if addconst:
            self.factors = sm.add_constant(fact, prepend=True)
            self.hasconst = 1  #needs to be int
        else:
            self.factors = fact
            self.hasconst = 0  #needs to be int

        self.evals = evals
        self.evecs = evecs
Exemplo n.º 39
0
def linmod(y, x, weights=None, sigma=None, add_const=True, filter_missing=True,
           **kwds):
    '''get linear model with extra options for entry

    dispatches to regular model class and does not wrap the output

    If several options are exclusive, for example sigma and weights, then the
    chosen class depends on the implementation sequence.
    '''

    if filter_missing:
        y, x = remove_nanrows(y, x)
        #do the same for masked arrays

    if add_const:
        x = sm.add_constant(x, prepend=True)

    if not sigma is None:
        return GLS(y, x, sigma=sigma, **kwds)
    elif not weights is None:
        return WLS(y, x, weights=weights, **kwds)
    else:
        return OLS(y, x, **kwds)
Exemplo n.º 40
0
    import gwstatsmodels.api as sm
    import numpy.lib.recfunctions as nprf

    data = sm.datasets.grunfeld.load()
    # Baltagi doesn't include American Steel
    endog = data.endog[:-20]
    fullexog = data.exog[:-20]
#    fullexog.sort(order=['firm','year'])
    panel_arr = nprf.append_fields(fullexog, 'investment', endog, float,
            usemask=False)
    panel_panda = LongPanel.fromRecords(panel_arr, major_field='year',
            minor_field='firm')

    # the most cumbersome way of doing it as far as preprocessing by hand
    exog = fullexog[['value','capital']].view(float).reshape(-1,2)
    exog = sm.add_constant(exog)
    panel = group(fullexog['firm'])
    year = fullexog['year']
    panel_mod = PanelModel(endog, exog, panel, year, xtnames=['firm','year'],
            equation='invest value capital')
# note that equation doesn't actually do anything but name the variables
    panel_ols = panel_mod.fit(model='pooled')

    panel_be = panel_mod.fit(model='between', effects='oneway')
    panel_fe = panel_mod.fit(model='fixed', effects='oneway')

    panel_bet = panel_mod.fit(model='between', effects='time')
    panel_fet = panel_mod.fit(model='fixed', effects='time')

    panel_fe2 = panel_mod.fit(model='fixed', effects='twoways')
Exemplo n.º 41
0
    np.random.seed(9876789)

    # OLS non-linear curve but linear in parameters
    # ---------------------------------------------

    nsample = 100
    sig = 0.5
    x1 = np.linspace(0, 20, nsample)
    x2 = 5 + 3* np.random.randn(nsample)
    X = np.c_[x1, x2, np.sin(0.5*x1), (x2-5)**2, np.ones(nsample)]
    beta = [0.5, 0.5, 1, -0.04, 5.]
    y_true = np.dot(X, beta)
    y = y_true + sig * np.random.normal(size=nsample)

    #estimate only linear function, misspecified because of non-linear terms
    exog0 = sm.add_constant(np.c_[x1, x2], prepend=False)

#    plt.figure()
#    plt.plot(x1, y, 'o', x1, y_true, 'b-')

    res = sm.OLS(y, exog0).fit()
    #print res.params
    #print res.bse


    plot_old = 0 #True
    if plot_old:

        #current bug predict requires call to model.results
        #print res.model.predict
        prstd, iv_l, iv_u = wls_prediction_std(res)
Exemplo n.º 42
0
    print approx_hess_cs((1,2,3), fun, (x,), h=1.0e-20)  #this is correctly zero

    print approx_hess_cs((1,2,3), fun2, (y,x), h=1.0e-20)-2*np.dot(x.T, x)
    print numdiff.approx_hess(xk,fun2,1e-3, (y,x))[0] - 2*np.dot(x.T, x)

    gt = (-x*2*(y-np.dot(x, [1,2,3]))[:,None])
    g = approx_fprime_cs((1,2,3), fun1, (y,x), h=1.0e-20)#.T   #this shouldn't be transposed
    gd = numdiff.approx_fprime1((1,2,3),fun1,epsilon,(y,x))
    print maxabs(g, gt)
    print maxabs(gd, gt)


    import gwstatsmodels.api as sm

    data = sm.datasets.spector.load()
    data.exog = sm.add_constant(data.exog)
    #mod = sm.Probit(data.endog, data.exog)
    mod = sm.Logit(data.endog, data.exog)
    #res = mod.fit(method="newton")
    test_params = [1,0.25,1.4,-7]
    loglike = mod.loglike
    score = mod.score
    hess = mod.hessian

    #cs doesn't work for Probit because special.ndtr doesn't support complex
    #maybe calculating ndtr for real and imag parts separately, if we need it
    #and if it still works in this case
    print 'sm', score(test_params)
    print 'fd', numdiff.approx_fprime1(test_params,loglike,epsilon)
    print 'cs', numdiff.approx_fprime_cs(test_params,loglike)
    print 'sm', hess(test_params)
Exemplo n.º 43
0
import gwstatsmodels.sandbox.panel.sandwich_covariance as sw
import gwstatsmodels.sandbox.panel.sandwich_covariance_generic as swg

#http://www.ats.ucla.edu/stat/stata/seminars/svy_stata_intro/srs.dta
import gwstatsmodels.iolib.foreign as dta

srs = dta.genfromdta("srs.dta")
y = srs['api00']
#x = srs[['growth', 'emer', 'yr_rnd']].view(float).reshape(len(y), -1)
#force sequence
x = np.column_stack([srs[ii] for ii in ['growth', 'emer', 'yr_rnd']])
group = srs['dnum']

#xx = sm.add_constant(x, prepend=True)
xx = sm.add_constant(x, prepend=False)  #for Stata compatibility

#remove nan observation
mask = (xx != -999.0).all(1)  #nan code in dta file
mask.shape
y = y[mask]
xx = xx[mask]
group = group[mask]

res_srs = sm.OLS(y, xx).fit()
print res_srs.params
print res_srs.bse

bse_cr = sw.cov_cluster(res_srs, group.astype(int))[1]
print bse_cr
Exemplo n.º 44
0
def anova_ols(y, x):
    X = sm.add_constant(data2dummy(x))
    res = sm.OLS(y, X).fit()
    return res.fvalue, res.f_pvalue, res.rsquared, np.sqrt(res.mse_resid)
Exemplo n.º 45
0
    from pandas import DataFrame
    data = sm.datasets.longley.load()
    df = DataFrame(data.exog, columns=data.exog_name)
    y = data.endog
    # data.exog = sm.add_constant(data.exog)
    df['intercept'] = 1.
    olsresult = sm.OLS(y, df).fit()
    rlmresult = sm.RLM(y, df).fit()

    # olswrap = RegressionResultsWrapper(olsresult)
    # rlmwrap = RLMResultsWrapper(rlmresult)

    data = sm.datasets.wfs.load()
    # get offset
    offset = np.log(data.exog[:,-1])
    exog = data.exog[:,:-1]

    # convert dur to dummy
    exog = sm.tools.categorical(exog, col=0, drop=True)
    # drop reference category
    # convert res to dummy
    exog = sm.tools.categorical(exog, col=0, drop=True)
    # convert edu to dummy
    exog = sm.tools.categorical(exog, col=0, drop=True)
    # drop reference categories and add intercept
    exog = sm.add_constant(exog[:,[1,2,3,4,5,7,8,10,11,12]])

    endog = np.round(data.endog)
    mod = sm.GLM(endog, exog, family=sm.families.Poisson()).fit()
    # glmwrap = GLMResultsWrapper(mod)
Exemplo n.º 46
0
            pred = np.dot(self.wexog, self.coeffs)
            eps = np.diag((self.wendog - pred) ** 2)
            sigmaSq = np.sum(eps)
            pinvX = np.dot(self.rnorm_cov_params, self.wexog.T)
            self._wncp = np.dot(np.dot(pinvX, eps), pinvX.T) * df / sigmaSq
        return self._wncp

    _coeffs = None
    @property
    def coeffs(self):
        """Estimated parameters"""
        if self._coeffs is None:
            betaLambda = np.dot(self.inv_rwexog, self.rwendog)
            self._coeffs = betaLambda[:self.ncoeffs]
        return self._coeffs

    def fit(self):
        rncp = self.wrnorm_cov_params
        lfit = RegressionResults(self, self.coeffs, normalized_cov_params=rncp)
        return lfit

if __name__=="__main__":
    import gwstatsmodels.api as sm
    dta = np.genfromtxt('./rlsdata.txt', names=True)
    design = np.column_stack((dta['Y'],dta['Y']**2,dta[['NE','NC','W','S']].view(float).reshape(dta.shape[0],-1)))
    design = sm.add_constant(design, prepend=True)
    rls_mod = RLS(dta['G'],design, constr=[0,0,0,1,1,1,1])
    rls_fit = rls_mod.fit()
    print rls_fit.params

Exemplo n.º 47
0
mv2m = mvn3.marginal(np.array([0, 1]))
print mv2m.mean
print mv2m.cov

mv2c = mvn3.conditional(np.array([0, 1]), [0])
print mv2c.mean
print mv2c.cov

mv2c = mvn3.conditional(np.array([0]), [0, 0])
print mv2c.mean
print mv2c.cov

import gwstatsmodels.api as sm

mod = sm.OLS(x[:, 0], sm.add_constant(x[:, 1:], prepend=True))
res = mod.fit()
print res.model.predict(np.array([1, 0, 0]))
mv2c = mvn3.conditional(np.array([0]), [0, 0])
print mv2c.mean
mv2c = mvn3.conditional(np.array([0]), [1, 1])
print res.model.predict(np.array([1, 1, 1]))
print mv2c.mean

#the following wrong input doesn't raise an exception but produces wrong numbers
#mv2c = mvn3.conditional(np.array([0]), [[1, 1],[2,2]])

#************** multivariate t distribution ***************

mvt3 = mvd.MVT(mu, cov3, 4)
xt = mvt3.rvs(size=100000)
Exemplo n.º 48
0
##    r = np.zeros(n_groups)
##    R = np.c_[np.zeros((n_groups-1, k_vars)),
##              np.eye(n_groups-1)-1./n_groups * np.ones((n_groups-1, n_groups-1))]

if __name__ == '__main__':

    import numpy as np
    import gwstatsmodels.api as sm

    examples = [2]

    np.random.seed(765367)
    np.random.seed(97653679)
    nsample = 100
    x = np.linspace(0, 10, nsample)
    X = sm.add_constant(np.column_stack((x, x**2, (x / 5.)**3)), prepend=True)
    beta = np.array([10, 1, 0.1, 0.5])
    y = np.dot(X, beta) + np.random.normal(size=nsample)

    res_ols = sm.OLS(y, X).fit()

    R = [[0, 0, 0, 1]]
    r = [0]  #, 0, 0 , 0]
    lambd = 1  #1e-4
    mod = TheilGLS(y, X, r_matrix=R, q_matrix=r, sigma_prior=lambd)
    res = mod.fit()
    print res_ols.params
    print res.params

    #example 2
    #I need more flexible penalization in example, the penalization should
Exemplo n.º 49
0
    64 57  8
    71 59 10
    53 49  6
    67 62 11
    55 51  8
    58 50  7
    77 55 10
    57 48  9
    56 42 10
    51 42  6
    76 61 12
    68 57  9'''.split(), float).reshape(-1, 3)
    varnames = 'weight height age'.split()

    endog = data[:, 0]
    exog = sm.add_constant(data[:, 2], prepend=True)

    res_ols = sm.OLS(endog, exog).fit()

    hh = (res_ols.model.exog * res_ols.model.pinv_wexog.T).sum(1)
    x = res_ols.model.exog
    hh_check = np.diag(
        np.dot(x, np.dot(res_ols.model.normalized_cov_params, x.T)))

    from numpy.testing import assert_almost_equal
    assert_almost_equal(hh, hh_check, decimal=13)

    res = res_ols  #alias

    #http://en.wikipedia.org/wiki/PRESS_statistic
    #predicted residuals, leave one out predicted residuals
Exemplo n.º 50
0
# ndts=np.column_stack(dts[col] for col in dts.dtype.names)
# ntda=ntds.swapaxis(1,0)
# ntda is ntds returns false?

# or now we just have detailed information about the different strings
# would this approach ever be inappropriate for a string typed variable
# other than dates?
#    descstats(ndts, [1])
#    raw_input("Enter to try second part")
#    descstats(ndts, [1,20,3])

if __name__ == '__main__':
    import gwstatsmodels.api as sm
    import os
    data = sm.datasets.longley.load()
    data.exog = sm.add_constant(data.exog)
    sum1 = descstats(data.exog)
    sum1a = descstats(data.exog[:,:1])

#    loc='http://eagle1.american.edu/~js2796a/data/handguns_data.csv'
#    dta=np.recfromcsv(loc)
#    summary2 = descstats(dta,['stpop'])
#    summary3 =  descstats(dta,['stpop','avginc','vio'])
#TODO: needs a by argument
#    summary4 = descstats(dta) this fails
# this is a bug
# p = dta[['stpop']]
# p.view(dtype = np.float, type = np.ndarray)
# this works
# p.view(dtype = np.int, type = np.ndarray)
Exemplo n.º 51
0
    64 57  8
    71 59 10
    53 49  6
    67 62 11
    55 51  8
    58 50  7
    77 55 10
    57 48  9
    56 42 10
    51 42  6
    76 61 12
    68 57  9'''.split(), float).reshape(-1,3)
    varnames = 'weight height age'.split()

    endog = data[:,0]
    exog = sm.add_constant(data[:,2], prepend=True)


    res_ols = sm.OLS(endog, exog).fit()

    hh = (res_ols.model.exog * res_ols.model.pinv_wexog.T).sum(1)
    x = res_ols.model.exog
    hh_check = np.diag(np.dot(x, np.dot(res_ols.model.normalized_cov_params, x.T)))

    from numpy.testing import assert_almost_equal
    assert_almost_equal(hh, hh_check, decimal=13)

    res = res_ols #alias

    #http://en.wikipedia.org/wiki/PRESS_statistic
    #predicted residuals, leave one out predicted residuals
Exemplo n.º 52
0
    if normed:
        mi_normed = np.sqrt(1. - np.exp(-2 * mi))
        return mi_normed, (pyx, py, px, binsy, binsx), mi_obs
    else:
        return mi


if __name__ == '__main__':
    import gwstatsmodels.api as sm

    funtype = ['linear', 'quadratic'][1]
    nobs = 200
    sig = 2#5.
    #x = np.linspace(-3, 3, nobs) + np.random.randn(nobs)
    x = np.sort(3*np.random.randn(nobs))
    exog = sm.add_constant(x, prepend=True)
    #y = 0 + np.log(1+x**2) + sig * np.random.randn(nobs)
    if funtype == 'quadratic':
        y = 0 + x**2 + sig * np.random.randn(nobs)
    if funtype == 'linear':
        y = 0 + x + sig * np.random.randn(nobs)

    print 'correlation'
    print np.corrcoef(y,x)[0, 1]
    print 'pearsonr', stats.pearsonr(y,x)
    print 'spearmanr', stats.spearmanr(y,x)
    print 'kendalltau', stats.kendalltau(y,x)

    pxy, binsx, binsy = np.histogram2d(x,y, bins=5)
    px, binsx_ = np.histogram(x, bins=binsx)
    py, binsy_ = np.histogram(y, bins=binsy)
Exemplo n.º 53
0
    def as_csv(self):
        '''return tables as string

        Returns
        -------
        csv : string
            concatenated summary tables in comma delimited format

        '''
        return summary_return(self.tables, return_fmt='csv')

    def as_html(self):
        '''return tables as string

        Returns
        -------
        html : string
            concatenated summary tables in HTML format

        '''
        return summary_return(self.tables, return_fmt='html')


if __name__ == "__main__":
    import gwstatsmodels.api as sm
    data = sm.datasets.longley.load()
    data.exog = sm.add_constant(data.exog)
    res = sm.OLS(data.endog, data.exog).fit()
    #summary(

Exemplo n.º 54
0
"""
import numpy as np
from numpy.testing import assert_almost_equal

import gwstatsmodels.api as sm

import gwstatsmodels.sandbox.panel.sandwich_covariance as sw
import gwstatsmodels.sandbox.panel.sandwich_covariance_generic as swg

#requires Petersen's test_data
#http://www.kellogg.northwestern.edu/faculty/petersen/htm/papers/se/test_data.txt
pet = np.genfromtxt("test_data.txt")
endog = pet[:, -1]
group = pet[:, 0].astype(int)
time = pet[:, 1].astype(int)
exog = sm.add_constant(pet[:, 2], prepend=True)
res = sm.OLS(endog, exog).fit()

cov01, covg, covt = sw.cov_cluster_2groups(res, group, group2=time)

#Reference number from Petersen
#http://www.kellogg.northwestern.edu/faculty/petersen/htm/papers/se/test_data.htm

bse_petw = [0.0284, 0.0284]
bse_pet0 = [0.0670, 0.0506]
bse_pet1 = [0.0234, 0.0334]  #year
bse_pet01 = [0.0651, 0.0536]  #firm and year
bse_0 = sw.se_cov(covg)
bse_1 = sw.se_cov(covt)
bse_01 = sw.se_cov(cov01)
print res.HC0_se, bse_petw - res.HC0_se
Exemplo n.º 55
0
mv2m = mvn3.marginal(np.array([0,1]))
print mv2m.mean
print mv2m.cov

mv2c = mvn3.conditional(np.array([0,1]), [0])
print mv2c.mean
print mv2c.cov

mv2c = mvn3.conditional(np.array([0]), [0, 0])
print mv2c.mean
print mv2c.cov

import gwstatsmodels.api as sm

mod = sm.OLS(x[:,0], sm.add_constant(x[:,1:], prepend=True))
res = mod.fit()
print res.model.predict(np.array([1,0,0]))
mv2c = mvn3.conditional(np.array([0]), [0, 0])
print mv2c.mean
mv2c = mvn3.conditional(np.array([0]), [1, 1])
print res.model.predict(np.array([1,1,1]))
print mv2c.mean

#the following wrong input doesn't raise an exception but produces wrong numbers
#mv2c = mvn3.conditional(np.array([0]), [[1, 1],[2,2]])

#************** multivariate t distribution ***************

mvt3 = mvd.MVT(mu, cov3, 4)
xt = mvt3.rvs(size=100000)
Exemplo n.º 56
0
import gwstatsmodels.sandbox.panel.sandwich_covariance as sw
import gwstatsmodels.sandbox.panel.sandwich_covariance_generic as swg


#http://www.ats.ucla.edu/stat/stata/seminars/svy_stata_intro/srs.dta
import gwstatsmodels.iolib.foreign as dta

srs = dta.genfromdta("srs.dta")
y = srs['api00']
#x = srs[['growth', 'emer', 'yr_rnd']].view(float).reshape(len(y), -1)
#force sequence
x = np.column_stack([srs[ii] for ii in ['growth', 'emer', 'yr_rnd']])
group = srs['dnum']

#xx = sm.add_constant(x, prepend=True)
xx = sm.add_constant(x, prepend=False) #for Stata compatibility

#remove nan observation
mask = (xx!=-999.0).all(1)   #nan code in dta file
mask.shape
y = y[mask]
xx = xx[mask]
group = group[mask]

res_srs = sm.OLS(y, xx).fit()
print res_srs.params
print res_srs.bse

bse_cr = sw.cov_cluster(res_srs, group.astype(int))[1]
print bse_cr
Exemplo n.º 57
0
    np.random.seed(9876789)

    # OLS non-linear curve but linear in parameters
    # ---------------------------------------------

    nsample = 100
    sig = 0.5
    x1 = np.linspace(0, 20, nsample)
    x2 = 5 + 3 * np.random.randn(nsample)
    X = np.c_[x1, x2, np.sin(0.5 * x1), (x2 - 5) ** 2, np.ones(nsample)]
    beta = [0.5, 0.5, 1, -0.04, 5.0]
    y_true = np.dot(X, beta)
    y = y_true + sig * np.random.normal(size=nsample)

    # estimate only linear function, misspecified because of non-linear terms
    exog0 = sm.add_constant(np.c_[x1, x2], prepend=False)

    #    plt.figure()
    #    plt.plot(x1, y, 'o', x1, y_true, 'b-')

    res = sm.OLS(y, exog0).fit()
    # print res.params
    # print res.bse

    plot_old = 0  # True
    if plot_old:

        # current bug predict requires call to model.results
        # print res.model.predict
        prstd, iv_l, iv_u = wls_prediction_std(res)
        plt.plot(x1, res.fittedvalues, "r-o")
Exemplo n.º 58
0
"""
import numpy as np
from numpy.testing import assert_almost_equal

import gwstatsmodels.api as sm

import gwstatsmodels.sandbox.panel.sandwich_covariance as sw
import gwstatsmodels.sandbox.panel.sandwich_covariance_generic as swg

#requires Petersen's test_data
#http://www.kellogg.northwestern.edu/faculty/petersen/htm/papers/se/test_data.txt
pet = np.genfromtxt("test_data.txt")
endog = pet[:,-1]
group = pet[:,0].astype(int)
time = pet[:,1].astype(int)
exog = sm.add_constant(pet[:,2], prepend=True)
res = sm.OLS(endog, exog).fit()

cov01, covg, covt = sw.cov_cluster_2groups(res, group, group2=time)

#Reference number from Petersen
#http://www.kellogg.northwestern.edu/faculty/petersen/htm/papers/se/test_data.htm

bse_petw = [0.0284, 0.0284]
bse_pet0 = [0.0670, 0.0506]
bse_pet1 = [0.0234, 0.0334]  #year
bse_pet01 = [0.0651, 0.0536]  #firm and year
bse_0 = sw.se_cov(covg)
bse_1 = sw.se_cov(covt)
bse_01 = sw.se_cov(cov01)
print res.HC0_se, bse_petw - res.HC0_se
Exemplo n.º 59
0
##              np.eye(n_groups-1)-1./n_groups * np.ones((n_groups-1, n_groups-1))]



if __name__ == '__main__':

    import numpy as np
    import gwstatsmodels.api as sm

    examples = [2]

    np.random.seed(765367)
    np.random.seed(97653679)
    nsample = 100
    x = np.linspace(0,10, nsample)
    X = sm.add_constant(np.column_stack((x, x**2, (x/5.)**3)), prepend=True)
    beta = np.array([10, 1, 0.1, 0.5])
    y = np.dot(X, beta) + np.random.normal(size=nsample)

    res_ols = sm.OLS(y, X).fit()

    R = [[0, 0, 0 , 1]]
    r = [0] #, 0, 0 , 0]
    lambd = 1 #1e-4
    mod = TheilGLS(y, X, r_matrix=R, q_matrix=r, sigma_prior=lambd)
    res = mod.fit()
    print res_ols.params
    print res.params

    #example 2
    #I need more flexible penalization in example, the penalization should