def test_HC_use(): np.random.seed(0) nsample = 100 x = np.linspace(0,10, 100) X = sm.add_constant(np.column_stack((x, x**2)), prepend=False) beta = np.array([1, 0.1, 10]) y = np.dot(X, beta) + np.random.normal(size=nsample) results = sm.OLS(y, X).fit() #test cov_params idx = np.array([1,2]) #need to call HC0_se to have cov_HC0 available results.HC0_se cov12 = results.cov_params(column=[1,2], cov_p=results.cov_HC0) assert_almost_equal(cov12, results.cov_HC0[idx[:,None], idx], decimal=15) #test t_test tvals = results.params/results.HC0_se ttest = results.t_test(np.eye(3), cov_p=results.cov_HC0) assert_almost_equal(ttest.tvalue, tvals, decimal=14) assert_almost_equal(ttest.sd, results.HC0_se, decimal=14) #test f_test ftest = results.f_test(np.eye(3)[:-1], cov_p=results.cov_HC0) slopes = results.params[:-1] idx = np.array([0,1]) cov_slopes = results.cov_HC0[idx[:,None], idx] fval = np.dot(slopes, np.dot(np.linalg.inv(cov_slopes), slopes))/len(idx) assert_almost_equal(ftest.fvalue, fval, decimal=12)
def linmod(y, x, weights=None, sigma=None, add_const=True, filter_missing=True, **kwds): '''get linear model with extra options for entry dispatches to regular model class and does not wrap the output If several options are exclusive, for example sigma and weights, then the chosen class depends on the implementation sequence. ''' if filter_missing: y, x = remove_nanrows(y, x) #do the same for masked arrays if add_const: x = sm.add_constant(x, prepend=True) if not sigma is None: return GLS(y, x, sigma=sigma, **kwds) elif not weights is None: return WLS(y, x, weights=weights, **kwds) else: return OLS(y, x, **kwds)
class TestRlm(CheckRlmResults): from gwstatsmodels.datasets.stackloss import load data = load() # class attributes for subclasses data.exog = sm.add_constant(data.exog) def __init__(self): # Test precisions self.decimal_standarderrors = DECIMAL_1 self.decimal_scale = DECIMAL_3 results = RLM(self.data.endog, self.data.exog,\ M=sm.robust.norms.HuberT()).fit() # default M h2 = RLM(self.data.endog, self.data.exog,\ M=sm.robust.norms.HuberT()).fit(cov="H2").bcov_scaled h3 = RLM(self.data.endog, self.data.exog,\ M=sm.robust.norms.HuberT()).fit(cov="H3").bcov_scaled self.res1 = results self.res1.h2 = h2 self.res1.h3 = h3 def setup(self): # r.library('MASS') # self.res2 = RModel(self.data.endog, self.data.exog, # r.rlm, psi="psi.huber") from results.results_rlm import Huber self.res2 = Huber() def test_summary(self): # smoke test that summary at least returns something self.res1.summary()
def __init__(self): data = sm.datasets.spector.load() data.exog = sm.add_constant(data.exog) #mod = sm.Probit(data.endog, data.exog) self.mod = sm.Logit(data.endog, data.exog) #res = mod.fit(method="newton") self.params = [np.array([1, 0.25, 1.4, -7])]
def __init__(self): data = sm.datasets.spector.load() data.exog = sm.add_constant(data.exog) #mod = sm.Probit(data.endog, data.exog) self.mod = sm.Logit(data.endog, data.exog) #res = mod.fit(method="newton") self.params = [np.array([1,0.25,1.4,-7])]
def setupClass(cls): data = sm.datasets.spector.load() data.exog = sm.add_constant(data.exog) cls.res1 = Logit(data.endog, data.exog).fit(method="newton", disp=0) res2 = Spector() res2.logit() cls.res2 = res2
def __init__(self): # generate artificial data np.random.seed(98765678) nobs = 200 rvs = np.random.randn(nobs, 6) data_exog = rvs data_exog = sm.add_constant(data_exog) xbeta = 1 + 0.1 * rvs.sum(1) data_endog = np.random.poisson(np.exp(xbeta)) #estimate discretemod.Poisson as benchmark self.res_discrete = Poisson(data_endog, data_exog).fit(disp=0) mod_glm = sm.GLM(data_endog, data_exog, family=sm.families.Poisson()) self.res_glm = mod_glm.fit() #estimate generic MLE #self.mod = PoissonGMLE(data_endog, data_exog) #res = self.mod.fit() offset = self.res_discrete.params[0] * data_exog[:, 0] #1d ??? #self.res = PoissonOffsetGMLE(data_endog, data_exog[:,1:], offset=offset).fit(start_params = np.ones(6)/2., method='nm') modo = PoissonOffsetGMLE(data_endog, data_exog[:, 1:], offset=offset) self.res = modo.fit(start_params=0.9 * self.res_discrete.params[1:], method='nm', disp=0)
def __init__(self): # generate artificial data np.random.seed(98765678) nobs = 200 rvs = np.random.randn(nobs,6) data_exog = rvs data_exog = sm.add_constant(data_exog) xbeta = 1 + 0.1*rvs.sum(1) data_endog = np.random.poisson(np.exp(xbeta)) #estimate discretemod.Poisson as benchmark self.res_discrete = Poisson(data_endog, data_exog).fit(disp=0) mod_glm = sm.GLM(data_endog, data_exog, family=sm.families.Poisson()) self.res_glm = mod_glm.fit() #estimate generic MLE #self.mod = PoissonGMLE(data_endog, data_exog) #res = self.mod.fit() offset = self.res_discrete.params[0] * data_exog[:,0] #1d ??? #self.res = PoissonOffsetGMLE(data_endog, data_exog[:,1:], offset=offset).fit(start_params = np.ones(6)/2., method='nm') modo = PoissonOffsetGMLE(data_endog, data_exog[:,1:], offset=offset) self.res = modo.fit(start_params = 0.9*self.res_discrete.params[1:], method='nm', disp=0)
def test_HC_use(): np.random.seed(0) nsample = 100 x = np.linspace(0, 10, 100) X = sm.add_constant(np.column_stack((x, x**2)), prepend=False) beta = np.array([1, 0.1, 10]) y = np.dot(X, beta) + np.random.normal(size=nsample) results = sm.OLS(y, X).fit() #test cov_params idx = np.array([1, 2]) #need to call HC0_se to have cov_HC0 available results.HC0_se cov12 = results.cov_params(column=[1, 2], cov_p=results.cov_HC0) assert_almost_equal(cov12, results.cov_HC0[idx[:, None], idx], decimal=15) #test t_test tvals = results.params / results.HC0_se ttest = results.t_test(np.eye(3), cov_p=results.cov_HC0) assert_almost_equal(ttest.tvalue, tvals, decimal=14) assert_almost_equal(ttest.sd, results.HC0_se, decimal=14) #test f_test ftest = results.f_test(np.eye(3)[:-1], cov_p=results.cov_HC0) slopes = results.params[:-1] idx = np.array([0, 1]) cov_slopes = results.cov_HC0[idx[:, None], idx] fval = np.dot(slopes, np.dot(np.linalg.inv(cov_slopes), slopes)) / len(idx) assert_almost_equal(ftest.fvalue, fval, decimal=12)
def setupClass(cls): from results.results_discrete import RandHIE data = sm.datasets.randhie.load() exog = sm.add_constant(data.exog) cls.res1 = Poisson(data.endog, exog).fit(method='newton', disp=0) res2 = RandHIE() res2.poisson() cls.res2 = res2
def setup_class(self): nobs = 10000 np.random.seed(987689) x = np.random.randn(nobs, 3) x = sm.add_constant(x, prepend=True) self.exog = x self.xf = 0.25 * np.ones((2,4))
def setup_class(self): nobs = 10000 np.random.seed(987689) x = np.random.randn(nobs, 3) x = sm.add_constant(x, prepend=True) self.exog = x self.xf = 0.25 * np.ones((2, 4))
def setupClass(cls): data = sm.datasets.spector.load() data.exog = sm.add_constant(data.exog) res2 = Spector() res2.probit() cls.res2 = res2 cls.res1 = Probit(data.endog, data.exog).fit(method="ncg", disp=0, avextol=1e-8)
def test_qqplot(): #just test that it runs data = sm.datasets.longley.load() data.exog = sm.add_constant(data.exog) mod_fit = sm.OLS(data.endog, data.exog).fit() res = mod_fit.resid fig = sm.qqplot(res) plt.close(fig)
def test_poisson_newton(): #GH: 24, Newton doesn't work well sometimes nobs = 10000 np.random.seed(987689) x = np.random.randn(nobs, 3) x = sm.add_constant(x, prepend=True) y_count = np.random.poisson(np.exp(x.sum(1))) mod = sm.Poisson(y_count, x) res = mod.fit(start_params=-np.ones(4), method='newton', disp=0) assert_(not res.mle_retvals['converged'])
def setupClass(cls): if iswindows: # does this work with classmethod? raise SkipTest("fmin_cg sometimes fails to converge on windows") data = sm.datasets.spector.load() data.exog = sm.add_constant(data.exog) res2 = Spector() res2.probit() cls.res2 = res2 cls.res1 = Probit(data.endog, data.exog).fit(method="cg", disp=0, maxiter=500)
def setupClass(cls): from results.results_discrete import Anes data = sm.datasets.anes96.load() exog = data.exog exog[:, 0] = np.log(exog[:, 0] + .1) exog = np.column_stack((exog[:, 0], exog[:, 2], exog[:, 5:8])) exog = sm.add_constant(exog) cls.res1 = MNLogit(data.endog, exog).fit(method="newton", disp=0) res2 = Anes() res2.mnlogit_basezero() cls.res2 = res2
def setupClass(cls): from results.results_discrete import Anes data = sm.datasets.anes96.load() exog = data.exog exog[:,0] = np.log(exog[:,0] + .1) exog = np.column_stack((exog[:,0],exog[:,2], exog[:,5:8])) exog = sm.add_constant(exog) cls.res1 = MNLogit(data.endog, exog).fit(method="newton", disp=0) res2 = Anes() res2.mnlogit_basezero() cls.res2 = res2
def setup(self): nsample = 100 sig = 0.5 x1 = np.linspace(0, 20, nsample) x2 = 5 + 3 * np.random.randn(nsample) X = np.c_[x1, x2, np.sin(0.5 * x1), (x2 - 5) ** 2, np.ones(nsample)] beta = [0.5, 0.5, 1, -0.04, 5.0] y_true = np.dot(X, beta) y = y_true + sig * np.random.normal(size=nsample) exog0 = sm.add_constant(np.c_[x1, x2], prepend=False) res = sm.OLS(y, exog0).fit() self.res = res
def setup(self): nsample = 100 sig = 0.5 x1 = np.linspace(0, 20, nsample) x2 = 5 + 3 * np.random.randn(nsample) X = np.c_[x1, x2, np.sin(0.5 * x1), (x2 - 5)**2, np.ones(nsample)] beta = [0.5, 0.5, 1, -0.04, 5.] y_true = np.dot(X, beta) y = y_true + sig * np.random.normal(size=nsample) exog0 = sm.add_constant(np.c_[x1, x2], prepend=False) res = sm.OLS(y, exog0).fit() self.res = res
def __init__(self): #from results.results_discrete import Anes data = sm.datasets.anes96.load() exog = data.exog exog[:, 0] = np.log(exog[:, 0] + .1) exog = np.column_stack((exog[:, 0], exog[:, 2], exog[:, 5:8])) exog = sm.add_constant(exog) self.mod = sm.MNLogit(data.endog, exog) def loglikeflat(self, params): #reshapes flattened params return self.loglike(params.reshape(6, 6)) self.mod.loglike = loglikeflat #need instance method self.params = [np.ones((6, 6))]
def test_poisson_predict(): #GH: 175, make sure poisson predict works without offset and exposure data = sm.datasets.randhie.load() exog = sm.add_constant(data.exog) res = sm.Poisson(data.endog, exog).fit(method='newton', disp=0) pred1 = res.predict() pred2 = res.predict(exog) assert_almost_equal(pred1, pred2) #exta options pred3 = res.predict(exog, offset=0, exposure=1) assert_almost_equal(pred1, pred3) pred3 = res.predict(exog, offset=0, exposure=2) assert_almost_equal(2*pred1, pred3) pred3 = res.predict(exog, offset=np.log(2), exposure=1) assert_almost_equal(2*pred1, pred3)
def test_poisson_predict(): #GH: 175, make sure poisson predict works without offset and exposure data = sm.datasets.randhie.load() exog = sm.add_constant(data.exog) res = sm.Poisson(data.endog, exog).fit(method='newton', disp=0) pred1 = res.predict() pred2 = res.predict(exog) assert_almost_equal(pred1, pred2) #exta options pred3 = res.predict(exog, offset=0, exposure=1) assert_almost_equal(pred1, pred3) pred3 = res.predict(exog, offset=0, exposure=2) assert_almost_equal(2 * pred1, pred3) pred3 = res.predict(exog, offset=np.log(2), exposure=1) assert_almost_equal(2 * pred1, pred3)
def __init__(self): #from results.results_discrete import Anes data = sm.datasets.anes96.load() exog = data.exog exog[:,0] = np.log(exog[:,0] + .1) exog = np.column_stack((exog[:,0],exog[:,2], exog[:,5:8])) exog = sm.add_constant(exog) self.mod = sm.MNLogit(data.endog, exog) def loglikeflat(self, params): #reshapes flattened params return self.loglike(params.reshape(6,6)) self.mod.loglike = loglikeflat #need instance method self.params = [np.ones((6,6))]
def setupClass(cls): # import scipy # major, minor, micro = scipy.__version__.split('.')[:3] # if int(minor) < 9: # raise SkipTest #Skip this unconditionally for release 0.3.0 #since there are still problems with scipy 0.9.0 on some machines #Ralf on mailing list 2011-03-26 raise SkipTest data = sm.datasets.spector.load() data.exog = sm.add_constant(data.exog) res2 = Spector() res2.logit() cls.res2 = res2 cls.res1 = Logit(data.endog, data.exog).fit(method="bfgs", disp=0)
def test_perfect_prediction(): cur_dir = os.path.dirname(os.path.abspath(__file__)) iris_dir = os.path.join(cur_dir, '..', '..', 'genmod', 'tests', 'results') iris_dir = os.path.abspath(iris_dir) iris = np.genfromtxt(os.path.join(iris_dir, 'iris.csv'), delimiter=",", skip_header=1) y = iris[:,-1] X = iris[:,:-1] X = X[y != 2] y = y[y != 2] X = sm.add_constant(X, prepend=True) mod = Logit(y,X) assert_raises(PerfectSeparationError, mod.fit) #turn off raise PerfectSeparationError mod.raise_on_perfect_prediction = False mod.fit() #should not raise
def test_perfect_prediction(): cur_dir = os.path.dirname(os.path.abspath(__file__)) iris_dir = os.path.join(cur_dir, '..', '..', 'genmod', 'tests', 'results') iris_dir = os.path.abspath(iris_dir) iris = np.genfromtxt(os.path.join(iris_dir, 'iris.csv'), delimiter=",", skip_header=1) y = iris[:, -1] X = iris[:, :-1] X = X[y != 2] y = y[y != 2] X = sm.add_constant(X, prepend=True) mod = Logit(y, X) assert_raises(PerfectSeparationError, mod.fit) #turn off raise PerfectSeparationError mod.raise_on_perfect_prediction = False mod.fit() #should not raise
def __init__(self): # generate artificial data np.random.seed(98765678) nobs = 200 rvs = np.random.randn(nobs,6) data_exog = rvs data_exog = sm.add_constant(data_exog) xbeta = 0.1 + 0.1*rvs.sum(1) data_endog = np.random.poisson(np.exp(xbeta)) #estimate discretemod.Poisson as benchmark self.res_discrete = Poisson(data_endog, data_exog).fit(disp=0) mod_glm = sm.GLM(data_endog, data_exog, family=sm.families.Poisson()) self.res_glm = mod_glm.fit() #estimate generic MLE self.mod = PoissonGMLE(data_endog, data_exog) self.res = self.mod.fit(start_params=0.9 * self.res_discrete.params, method='nm', disp=0)
def calc_factors(self, x=None, keepdim=0, addconst=True): '''get factor decomposition of exogenous variables This uses principal component analysis to obtain the factors. The number of factors kept is the maximum that will be considered in the regression. ''' if x is None: x = self.exog else: x = np.asarray(x) xred, fact, evals, evecs = pca(x, keepdim=keepdim, normalize=1) self.exog_reduced = xred #self.factors = fact if addconst: self.factors = sm.add_constant(fact, prepend=True) self.hasconst = 1 #needs to be int else: self.factors = fact self.hasconst = 0 #needs to be int self.evals = evals self.evecs = evecs
def __init__(self): # generate artificial data np.random.seed(98765678) nobs = 200 rvs = np.random.randn(nobs, 6) data_exog = rvs data_exog = sm.add_constant(data_exog) xbeta = 0.1 + 0.1 * rvs.sum(1) data_endog = np.random.poisson(np.exp(xbeta)) #estimate discretemod.Poisson as benchmark self.res_discrete = Poisson(data_endog, data_exog).fit(disp=0) mod_glm = sm.GLM(data_endog, data_exog, family=sm.families.Poisson()) self.res_glm = mod_glm.fit() #estimate generic MLE self.mod = PoissonGMLE(data_endog, data_exog) self.res = self.mod.fit(start_params=0.9 * self.res_discrete.params, method='nm', disp=0)
class TestRlmHuber(CheckRlmResults): from gwstatsmodels.datasets.stackloss import load data = load() data.exog = sm.add_constant(data.exog) def __init__(self): results = RLM(self.data.endog, self.data.exog,\ M=sm.robust.norms.HuberT()).fit(scale_est=\ sm.robust.scale.HuberScale()) h2 = RLM(self.data.endog, self.data.exog,\ M=sm.robust.norms.HuberT()).fit(cov="H2", scale_est=sm.robust.scale.HuberScale()).bcov_scaled h3 = RLM(self.data.endog, self.data.exog,\ M=sm.robust.norms.HuberT()).fit(cov="H3", scale_est=sm.robust.scale.HuberScale()).bcov_scaled self.res1 = results self.res1.h2 = h2 self.res1.h3 = h3 def setup(self): from results.results_rlm import HuberHuber self.res2 = HuberHuber()
import gwstatsmodels.api as sm import numpy.lib.recfunctions as nprf data = sm.datasets.grunfeld.load() # Baltagi doesn't include American Steel endog = data.endog[:-20] fullexog = data.exog[:-20] # fullexog.sort(order=['firm','year']) panel_arr = nprf.append_fields(fullexog, 'investment', endog, float, usemask=False) panel_panda = LongPanel.fromRecords(panel_arr, major_field='year', minor_field='firm') # the most cumbersome way of doing it as far as preprocessing by hand exog = fullexog[['value','capital']].view(float).reshape(-1,2) exog = sm.add_constant(exog) panel = group(fullexog['firm']) year = fullexog['year'] panel_mod = PanelModel(endog, exog, panel, year, xtnames=['firm','year'], equation='invest value capital') # note that equation doesn't actually do anything but name the variables panel_ols = panel_mod.fit(model='pooled') panel_be = panel_mod.fit(model='between', effects='oneway') panel_fe = panel_mod.fit(model='fixed', effects='oneway') panel_bet = panel_mod.fit(model='between', effects='time') panel_fet = panel_mod.fit(model='fixed', effects='time') panel_fe2 = panel_mod.fit(model='fixed', effects='twoways')
np.random.seed(9876789) # OLS non-linear curve but linear in parameters # --------------------------------------------- nsample = 100 sig = 0.5 x1 = np.linspace(0, 20, nsample) x2 = 5 + 3* np.random.randn(nsample) X = np.c_[x1, x2, np.sin(0.5*x1), (x2-5)**2, np.ones(nsample)] beta = [0.5, 0.5, 1, -0.04, 5.] y_true = np.dot(X, beta) y = y_true + sig * np.random.normal(size=nsample) #estimate only linear function, misspecified because of non-linear terms exog0 = sm.add_constant(np.c_[x1, x2], prepend=False) # plt.figure() # plt.plot(x1, y, 'o', x1, y_true, 'b-') res = sm.OLS(y, exog0).fit() #print res.params #print res.bse plot_old = 0 #True if plot_old: #current bug predict requires call to model.results #print res.model.predict prstd, iv_l, iv_u = wls_prediction_std(res)
print approx_hess_cs((1,2,3), fun, (x,), h=1.0e-20) #this is correctly zero print approx_hess_cs((1,2,3), fun2, (y,x), h=1.0e-20)-2*np.dot(x.T, x) print numdiff.approx_hess(xk,fun2,1e-3, (y,x))[0] - 2*np.dot(x.T, x) gt = (-x*2*(y-np.dot(x, [1,2,3]))[:,None]) g = approx_fprime_cs((1,2,3), fun1, (y,x), h=1.0e-20)#.T #this shouldn't be transposed gd = numdiff.approx_fprime1((1,2,3),fun1,epsilon,(y,x)) print maxabs(g, gt) print maxabs(gd, gt) import gwstatsmodels.api as sm data = sm.datasets.spector.load() data.exog = sm.add_constant(data.exog) #mod = sm.Probit(data.endog, data.exog) mod = sm.Logit(data.endog, data.exog) #res = mod.fit(method="newton") test_params = [1,0.25,1.4,-7] loglike = mod.loglike score = mod.score hess = mod.hessian #cs doesn't work for Probit because special.ndtr doesn't support complex #maybe calculating ndtr for real and imag parts separately, if we need it #and if it still works in this case print 'sm', score(test_params) print 'fd', numdiff.approx_fprime1(test_params,loglike,epsilon) print 'cs', numdiff.approx_fprime_cs(test_params,loglike) print 'sm', hess(test_params)
import gwstatsmodels.sandbox.panel.sandwich_covariance as sw import gwstatsmodels.sandbox.panel.sandwich_covariance_generic as swg #http://www.ats.ucla.edu/stat/stata/seminars/svy_stata_intro/srs.dta import gwstatsmodels.iolib.foreign as dta srs = dta.genfromdta("srs.dta") y = srs['api00'] #x = srs[['growth', 'emer', 'yr_rnd']].view(float).reshape(len(y), -1) #force sequence x = np.column_stack([srs[ii] for ii in ['growth', 'emer', 'yr_rnd']]) group = srs['dnum'] #xx = sm.add_constant(x, prepend=True) xx = sm.add_constant(x, prepend=False) #for Stata compatibility #remove nan observation mask = (xx != -999.0).all(1) #nan code in dta file mask.shape y = y[mask] xx = xx[mask] group = group[mask] res_srs = sm.OLS(y, xx).fit() print res_srs.params print res_srs.bse bse_cr = sw.cov_cluster(res_srs, group.astype(int))[1] print bse_cr
def anova_ols(y, x): X = sm.add_constant(data2dummy(x)) res = sm.OLS(y, X).fit() return res.fvalue, res.f_pvalue, res.rsquared, np.sqrt(res.mse_resid)
from pandas import DataFrame data = sm.datasets.longley.load() df = DataFrame(data.exog, columns=data.exog_name) y = data.endog # data.exog = sm.add_constant(data.exog) df['intercept'] = 1. olsresult = sm.OLS(y, df).fit() rlmresult = sm.RLM(y, df).fit() # olswrap = RegressionResultsWrapper(olsresult) # rlmwrap = RLMResultsWrapper(rlmresult) data = sm.datasets.wfs.load() # get offset offset = np.log(data.exog[:,-1]) exog = data.exog[:,:-1] # convert dur to dummy exog = sm.tools.categorical(exog, col=0, drop=True) # drop reference category # convert res to dummy exog = sm.tools.categorical(exog, col=0, drop=True) # convert edu to dummy exog = sm.tools.categorical(exog, col=0, drop=True) # drop reference categories and add intercept exog = sm.add_constant(exog[:,[1,2,3,4,5,7,8,10,11,12]]) endog = np.round(data.endog) mod = sm.GLM(endog, exog, family=sm.families.Poisson()).fit() # glmwrap = GLMResultsWrapper(mod)
pred = np.dot(self.wexog, self.coeffs) eps = np.diag((self.wendog - pred) ** 2) sigmaSq = np.sum(eps) pinvX = np.dot(self.rnorm_cov_params, self.wexog.T) self._wncp = np.dot(np.dot(pinvX, eps), pinvX.T) * df / sigmaSq return self._wncp _coeffs = None @property def coeffs(self): """Estimated parameters""" if self._coeffs is None: betaLambda = np.dot(self.inv_rwexog, self.rwendog) self._coeffs = betaLambda[:self.ncoeffs] return self._coeffs def fit(self): rncp = self.wrnorm_cov_params lfit = RegressionResults(self, self.coeffs, normalized_cov_params=rncp) return lfit if __name__=="__main__": import gwstatsmodels.api as sm dta = np.genfromtxt('./rlsdata.txt', names=True) design = np.column_stack((dta['Y'],dta['Y']**2,dta[['NE','NC','W','S']].view(float).reshape(dta.shape[0],-1))) design = sm.add_constant(design, prepend=True) rls_mod = RLS(dta['G'],design, constr=[0,0,0,1,1,1,1]) rls_fit = rls_mod.fit() print rls_fit.params
mv2m = mvn3.marginal(np.array([0, 1])) print mv2m.mean print mv2m.cov mv2c = mvn3.conditional(np.array([0, 1]), [0]) print mv2c.mean print mv2c.cov mv2c = mvn3.conditional(np.array([0]), [0, 0]) print mv2c.mean print mv2c.cov import gwstatsmodels.api as sm mod = sm.OLS(x[:, 0], sm.add_constant(x[:, 1:], prepend=True)) res = mod.fit() print res.model.predict(np.array([1, 0, 0])) mv2c = mvn3.conditional(np.array([0]), [0, 0]) print mv2c.mean mv2c = mvn3.conditional(np.array([0]), [1, 1]) print res.model.predict(np.array([1, 1, 1])) print mv2c.mean #the following wrong input doesn't raise an exception but produces wrong numbers #mv2c = mvn3.conditional(np.array([0]), [[1, 1],[2,2]]) #************** multivariate t distribution *************** mvt3 = mvd.MVT(mu, cov3, 4) xt = mvt3.rvs(size=100000)
## r = np.zeros(n_groups) ## R = np.c_[np.zeros((n_groups-1, k_vars)), ## np.eye(n_groups-1)-1./n_groups * np.ones((n_groups-1, n_groups-1))] if __name__ == '__main__': import numpy as np import gwstatsmodels.api as sm examples = [2] np.random.seed(765367) np.random.seed(97653679) nsample = 100 x = np.linspace(0, 10, nsample) X = sm.add_constant(np.column_stack((x, x**2, (x / 5.)**3)), prepend=True) beta = np.array([10, 1, 0.1, 0.5]) y = np.dot(X, beta) + np.random.normal(size=nsample) res_ols = sm.OLS(y, X).fit() R = [[0, 0, 0, 1]] r = [0] #, 0, 0 , 0] lambd = 1 #1e-4 mod = TheilGLS(y, X, r_matrix=R, q_matrix=r, sigma_prior=lambd) res = mod.fit() print res_ols.params print res.params #example 2 #I need more flexible penalization in example, the penalization should
64 57 8 71 59 10 53 49 6 67 62 11 55 51 8 58 50 7 77 55 10 57 48 9 56 42 10 51 42 6 76 61 12 68 57 9'''.split(), float).reshape(-1, 3) varnames = 'weight height age'.split() endog = data[:, 0] exog = sm.add_constant(data[:, 2], prepend=True) res_ols = sm.OLS(endog, exog).fit() hh = (res_ols.model.exog * res_ols.model.pinv_wexog.T).sum(1) x = res_ols.model.exog hh_check = np.diag( np.dot(x, np.dot(res_ols.model.normalized_cov_params, x.T))) from numpy.testing import assert_almost_equal assert_almost_equal(hh, hh_check, decimal=13) res = res_ols #alias #http://en.wikipedia.org/wiki/PRESS_statistic #predicted residuals, leave one out predicted residuals
# ndts=np.column_stack(dts[col] for col in dts.dtype.names) # ntda=ntds.swapaxis(1,0) # ntda is ntds returns false? # or now we just have detailed information about the different strings # would this approach ever be inappropriate for a string typed variable # other than dates? # descstats(ndts, [1]) # raw_input("Enter to try second part") # descstats(ndts, [1,20,3]) if __name__ == '__main__': import gwstatsmodels.api as sm import os data = sm.datasets.longley.load() data.exog = sm.add_constant(data.exog) sum1 = descstats(data.exog) sum1a = descstats(data.exog[:,:1]) # loc='http://eagle1.american.edu/~js2796a/data/handguns_data.csv' # dta=np.recfromcsv(loc) # summary2 = descstats(dta,['stpop']) # summary3 = descstats(dta,['stpop','avginc','vio']) #TODO: needs a by argument # summary4 = descstats(dta) this fails # this is a bug # p = dta[['stpop']] # p.view(dtype = np.float, type = np.ndarray) # this works # p.view(dtype = np.int, type = np.ndarray)
64 57 8 71 59 10 53 49 6 67 62 11 55 51 8 58 50 7 77 55 10 57 48 9 56 42 10 51 42 6 76 61 12 68 57 9'''.split(), float).reshape(-1,3) varnames = 'weight height age'.split() endog = data[:,0] exog = sm.add_constant(data[:,2], prepend=True) res_ols = sm.OLS(endog, exog).fit() hh = (res_ols.model.exog * res_ols.model.pinv_wexog.T).sum(1) x = res_ols.model.exog hh_check = np.diag(np.dot(x, np.dot(res_ols.model.normalized_cov_params, x.T))) from numpy.testing import assert_almost_equal assert_almost_equal(hh, hh_check, decimal=13) res = res_ols #alias #http://en.wikipedia.org/wiki/PRESS_statistic #predicted residuals, leave one out predicted residuals
if normed: mi_normed = np.sqrt(1. - np.exp(-2 * mi)) return mi_normed, (pyx, py, px, binsy, binsx), mi_obs else: return mi if __name__ == '__main__': import gwstatsmodels.api as sm funtype = ['linear', 'quadratic'][1] nobs = 200 sig = 2#5. #x = np.linspace(-3, 3, nobs) + np.random.randn(nobs) x = np.sort(3*np.random.randn(nobs)) exog = sm.add_constant(x, prepend=True) #y = 0 + np.log(1+x**2) + sig * np.random.randn(nobs) if funtype == 'quadratic': y = 0 + x**2 + sig * np.random.randn(nobs) if funtype == 'linear': y = 0 + x + sig * np.random.randn(nobs) print 'correlation' print np.corrcoef(y,x)[0, 1] print 'pearsonr', stats.pearsonr(y,x) print 'spearmanr', stats.spearmanr(y,x) print 'kendalltau', stats.kendalltau(y,x) pxy, binsx, binsy = np.histogram2d(x,y, bins=5) px, binsx_ = np.histogram(x, bins=binsx) py, binsy_ = np.histogram(y, bins=binsy)
def as_csv(self): '''return tables as string Returns ------- csv : string concatenated summary tables in comma delimited format ''' return summary_return(self.tables, return_fmt='csv') def as_html(self): '''return tables as string Returns ------- html : string concatenated summary tables in HTML format ''' return summary_return(self.tables, return_fmt='html') if __name__ == "__main__": import gwstatsmodels.api as sm data = sm.datasets.longley.load() data.exog = sm.add_constant(data.exog) res = sm.OLS(data.endog, data.exog).fit() #summary(
""" import numpy as np from numpy.testing import assert_almost_equal import gwstatsmodels.api as sm import gwstatsmodels.sandbox.panel.sandwich_covariance as sw import gwstatsmodels.sandbox.panel.sandwich_covariance_generic as swg #requires Petersen's test_data #http://www.kellogg.northwestern.edu/faculty/petersen/htm/papers/se/test_data.txt pet = np.genfromtxt("test_data.txt") endog = pet[:, -1] group = pet[:, 0].astype(int) time = pet[:, 1].astype(int) exog = sm.add_constant(pet[:, 2], prepend=True) res = sm.OLS(endog, exog).fit() cov01, covg, covt = sw.cov_cluster_2groups(res, group, group2=time) #Reference number from Petersen #http://www.kellogg.northwestern.edu/faculty/petersen/htm/papers/se/test_data.htm bse_petw = [0.0284, 0.0284] bse_pet0 = [0.0670, 0.0506] bse_pet1 = [0.0234, 0.0334] #year bse_pet01 = [0.0651, 0.0536] #firm and year bse_0 = sw.se_cov(covg) bse_1 = sw.se_cov(covt) bse_01 = sw.se_cov(cov01) print res.HC0_se, bse_petw - res.HC0_se
mv2m = mvn3.marginal(np.array([0,1])) print mv2m.mean print mv2m.cov mv2c = mvn3.conditional(np.array([0,1]), [0]) print mv2c.mean print mv2c.cov mv2c = mvn3.conditional(np.array([0]), [0, 0]) print mv2c.mean print mv2c.cov import gwstatsmodels.api as sm mod = sm.OLS(x[:,0], sm.add_constant(x[:,1:], prepend=True)) res = mod.fit() print res.model.predict(np.array([1,0,0])) mv2c = mvn3.conditional(np.array([0]), [0, 0]) print mv2c.mean mv2c = mvn3.conditional(np.array([0]), [1, 1]) print res.model.predict(np.array([1,1,1])) print mv2c.mean #the following wrong input doesn't raise an exception but produces wrong numbers #mv2c = mvn3.conditional(np.array([0]), [[1, 1],[2,2]]) #************** multivariate t distribution *************** mvt3 = mvd.MVT(mu, cov3, 4) xt = mvt3.rvs(size=100000)
import gwstatsmodels.sandbox.panel.sandwich_covariance as sw import gwstatsmodels.sandbox.panel.sandwich_covariance_generic as swg #http://www.ats.ucla.edu/stat/stata/seminars/svy_stata_intro/srs.dta import gwstatsmodels.iolib.foreign as dta srs = dta.genfromdta("srs.dta") y = srs['api00'] #x = srs[['growth', 'emer', 'yr_rnd']].view(float).reshape(len(y), -1) #force sequence x = np.column_stack([srs[ii] for ii in ['growth', 'emer', 'yr_rnd']]) group = srs['dnum'] #xx = sm.add_constant(x, prepend=True) xx = sm.add_constant(x, prepend=False) #for Stata compatibility #remove nan observation mask = (xx!=-999.0).all(1) #nan code in dta file mask.shape y = y[mask] xx = xx[mask] group = group[mask] res_srs = sm.OLS(y, xx).fit() print res_srs.params print res_srs.bse bse_cr = sw.cov_cluster(res_srs, group.astype(int))[1] print bse_cr
np.random.seed(9876789) # OLS non-linear curve but linear in parameters # --------------------------------------------- nsample = 100 sig = 0.5 x1 = np.linspace(0, 20, nsample) x2 = 5 + 3 * np.random.randn(nsample) X = np.c_[x1, x2, np.sin(0.5 * x1), (x2 - 5) ** 2, np.ones(nsample)] beta = [0.5, 0.5, 1, -0.04, 5.0] y_true = np.dot(X, beta) y = y_true + sig * np.random.normal(size=nsample) # estimate only linear function, misspecified because of non-linear terms exog0 = sm.add_constant(np.c_[x1, x2], prepend=False) # plt.figure() # plt.plot(x1, y, 'o', x1, y_true, 'b-') res = sm.OLS(y, exog0).fit() # print res.params # print res.bse plot_old = 0 # True if plot_old: # current bug predict requires call to model.results # print res.model.predict prstd, iv_l, iv_u = wls_prediction_std(res) plt.plot(x1, res.fittedvalues, "r-o")
""" import numpy as np from numpy.testing import assert_almost_equal import gwstatsmodels.api as sm import gwstatsmodels.sandbox.panel.sandwich_covariance as sw import gwstatsmodels.sandbox.panel.sandwich_covariance_generic as swg #requires Petersen's test_data #http://www.kellogg.northwestern.edu/faculty/petersen/htm/papers/se/test_data.txt pet = np.genfromtxt("test_data.txt") endog = pet[:,-1] group = pet[:,0].astype(int) time = pet[:,1].astype(int) exog = sm.add_constant(pet[:,2], prepend=True) res = sm.OLS(endog, exog).fit() cov01, covg, covt = sw.cov_cluster_2groups(res, group, group2=time) #Reference number from Petersen #http://www.kellogg.northwestern.edu/faculty/petersen/htm/papers/se/test_data.htm bse_petw = [0.0284, 0.0284] bse_pet0 = [0.0670, 0.0506] bse_pet1 = [0.0234, 0.0334] #year bse_pet01 = [0.0651, 0.0536] #firm and year bse_0 = sw.se_cov(covg) bse_1 = sw.se_cov(covt) bse_01 = sw.se_cov(cov01) print res.HC0_se, bse_petw - res.HC0_se
## np.eye(n_groups-1)-1./n_groups * np.ones((n_groups-1, n_groups-1))] if __name__ == '__main__': import numpy as np import gwstatsmodels.api as sm examples = [2] np.random.seed(765367) np.random.seed(97653679) nsample = 100 x = np.linspace(0,10, nsample) X = sm.add_constant(np.column_stack((x, x**2, (x/5.)**3)), prepend=True) beta = np.array([10, 1, 0.1, 0.5]) y = np.dot(X, beta) + np.random.normal(size=nsample) res_ols = sm.OLS(y, X).fit() R = [[0, 0, 0 , 1]] r = [0] #, 0, 0 , 0] lambd = 1 #1e-4 mod = TheilGLS(y, X, r_matrix=R, q_matrix=r, sigma_prior=lambd) res = mod.fit() print res_ols.params print res.params #example 2 #I need more flexible penalization in example, the penalization should