def __init__(self, endog, exog, smoothers=None, family=families.Gaussian()): #self.family = family #TODO: inconsistent super __init__ AdditiveModel.__init__(self, exog, smoothers=smoothers, family=family) GLM.__init__(self, endog, exog, family=family) assert self.family is family #make sure we got the right family
def __init__(self): # Test Precisions self.decimal_aic_R = -10 # Big difference vs R. self.decimal_resids = DECIMAL_3 from results.results_glm import InvGaussLog res2 = InvGaussLog() self.res1 = GLM(res2.endog, res2.exog, family=sm.families.InverseGaussian(link=\ sm.families.links.log)).fit() self.res2 = res2
def __init__(self): # Test Precision self.decimal_aic_R = DECIMAL_0 self.decimal_aic_Stata = DECIMAL_2 self.decimal_loglike = DECIMAL_0 self.decimal_null_deviance = DECIMAL_1 nobs = 100 x = np.arange(nobs) np.random.seed(54321) # y = 1.0 - .02*x - .001*x**2 + 0.001 * np.random.randn(nobs) self.X = np.c_[np.ones((nobs, 1)), x, x**2] self.lny = np.exp(-(-1.0 + 0.02*x + 0.0001*x**2)) +\ 0.001 * np.random.randn(nobs) GaussLog_Model = GLM(self.lny, self.X, \ family=sm.families.Gaussian(sm.families.links.log)) self.res1 = GaussLog_Model.fit() from results.results_glm import GaussianLog self.res2 = GaussianLog()
def __init__(self): # Test Precision self.decimal_aic_R = DECIMAL_0 self.decimal_aic_Stata = DECIMAL_2 self.decimal_loglike = DECIMAL_0 self.decimal_null_deviance = DECIMAL_1 nobs = 100 x = np.arange(nobs) np.random.seed(54321) # y = 1.0 - .02*x - .001*x**2 + 0.001 * np.random.randn(nobs) self.X = np.c_[np.ones((nobs,1)),x,x**2] self.lny = np.exp(-(-1.0 + 0.02*x + 0.0001*x**2)) +\ 0.001 * np.random.randn(nobs) GaussLog_Model = GLM(self.lny, self.X, \ family=sm.families.Gaussian(sm.families.links.log)) self.res1 = GaussLog_Model.fit() from results.results_glm import GaussianLog self.res2 = GaussianLog()
def __init__(self): # Test Precisions self.decimal_bic = DECIMAL_1 self.decimal_aic_R = DECIMAL_1 self.decimal_aic_Stata = DECIMAL_3 self.decimal_loglike = DECIMAL_1 self.decimal_resids = DECIMAL_3 nobs = 100 x = np.arange(nobs) np.random.seed(54321) y = 1.0 + 2.0 * x + x**2 + 0.1 * np.random.randn(nobs) self.X = np.c_[np.ones((nobs,1)),x,x**2] self.y_inv = (1. + .02*x + .001*x**2)**-1 + .001 * np.random.randn(nobs) InverseLink_Model = GLM(self.y_inv, self.X, family=sm.families.Gaussian(sm.families.links.inverse_power)) InverseLink_Res = InverseLink_Model.fit() self.res1 = InverseLink_Res from results.results_glm import GaussianInverse self.res2 = GaussianInverse()
def test_prefect_pred(): cur_dir = os.path.dirname(os.path.abspath(__file__)) iris = np.genfromtxt(os.path.join(cur_dir, 'results', 'iris.csv'), delimiter=",", skip_header=1) y = iris[:, -1] X = iris[:, :-1] X = X[y != 2] y = y[y != 2] X = add_constant(X, prepend=True) glm = GLM(y, X, family=sm.families.Binomial()) assert_raises(PerfectSeparationError, glm.fit)
def __init__(self): # Test Precisions self.decimal_aic_R = -10 #TODO: Big difference vs R self.decimal_fittedvalues = DECIMAL_3 self.decimal_params = DECIMAL_3 from results.results_glm import Medpar1 data = Medpar1() self.res1 = GLM(data.endog, data.exog, family=sm.families.InverseGaussian(link=\ sm.families.links.identity)).fit() from results.results_glm import InvGaussIdentity self.res2 = InvGaussIdentity()
def __init__(self): # Test Precisions self.decimal_resids = DECIMAL_3 self.decimal_aic_R = DECIMAL_0 self.decimal_fittedvalues = DECIMAL_3 from results.results_glm import CancerLog res2 = CancerLog() self.res1 = GLM( res2.endog, res2.exog, family=sm.families.Gamma(link=sm.families.links.log)).fit() self.res2 = res2
def __init__(self): # Test Precisions self.decimal_bic = DECIMAL_1 self.decimal_aic_R = DECIMAL_1 self.decimal_aic_Stata = DECIMAL_3 self.decimal_loglike = DECIMAL_1 self.decimal_resids = DECIMAL_3 nobs = 100 x = np.arange(nobs) np.random.seed(54321) y = 1.0 + 2.0 * x + x**2 + 0.1 * np.random.randn(nobs) self.X = np.c_[np.ones((nobs, 1)), x, x**2] self.y_inv = (1. + .02 * x + .001 * x**2)**-1 + .001 * np.random.randn(nobs) InverseLink_Model = GLM(self.y_inv, self.X, family=sm.families.Gaussian( sm.families.links.inverse_power)) InverseLink_Res = InverseLink_Model.fit() self.res1 = InverseLink_Res from results.results_glm import GaussianInverse self.res2 = GaussianInverse()
def __init__(self): # Test Precisions self.decimal_resids = -100 #TODO Very off from Stata? self.decimal_params = DECIMAL_2 self.decimal_aic_R = DECIMAL_0 self.decimal_loglike = DECIMAL_1 from results.results_glm import CancerIdentity res2 = CancerIdentity() self.res1 = GLM( res2.endog, res2.exog, family=sm.families.Gamma(link=sm.families.links.identity)).fit() self.res2 = res2
def setupClass(cls): from results.results_glm import Cpunish from gwstatsmodels.datasets.cpunish import load data = load() data.exog[:, 3] = np.log(data.exog[:, 3]) data.exog = add_constant(data.exog) exposure = [100] * len(data.endog) cls.res1 = GLM(data.endog, data.exog, family=sm.families.Poisson(), exposure=exposure).fit() cls.res1.params[-1] += np.log(100) # add exposure back in to param # to make the results the same cls.res2 = Cpunish()
def __init__(self): ''' Tests Poisson family with canonical log link. Test results were obtained by R. ''' from results.results_glm import Cpunish from gwstatsmodels.datasets.cpunish import load self.data = load() self.data.exog[:, 3] = np.log(self.data.exog[:, 3]) self.data.exog = add_constant(self.data.exog) self.res1 = GLM(self.data.endog, self.data.exog, family=sm.families.Poisson()).fit() self.res2 = Cpunish()
def __init__(self): ''' Test Gaussian family with canonical identity link ''' # Test Precisions self.decimal_resids = DECIMAL_3 self.decimal_params = DECIMAL_2 self.decimal_bic = DECIMAL_0 self.decimal_bse = DECIMAL_3 from gwstatsmodels.datasets.longley import load self.data = load() self.data.exog = add_constant(self.data.exog) self.res1 = GLM(self.data.endog, self.data.exog, family=sm.families.Gaussian()).fit() from results.results_glm import Longley self.res2 = Longley()
def __init__(self): ''' Test Binomial family with canonical logit link using star98 dataset. ''' self.decimal_resids = DECIMAL_1 self.decimal_bic = DECIMAL_2 from gwstatsmodels.datasets.star98 import load from results.results_glm import Star98 data = load() data.exog = add_constant(data.exog) self.res1 = GLM(data.endog, data.exog, \ family=sm.families.Binomial()).fit() #NOTE: if you want to replicate with RModel #res2 = RModel(data.endog[:,0]/trials, data.exog, r.glm, # family=r.binomial, weights=trials) self.res2 = Star98()
def init(self): nobs = self.nobs y_true, x, exog = self.y_true, self.x, self.exog if not hasattr(self, 'scale'): scale = 1 else: scale = self.scale f = self.family self.mu_true = mu_true = f.link.inverse(y_true) np.random.seed(8765993) #y_obs = np.asarray([stats.poisson.rvs(p) for p in mu], float) y_obs = self.rvs(mu_true, scale=scale, size=nobs) #this should work m = GAM(y_obs, x, family=f) #TODO: y_obs is twice __init__ and fit m.fit(y_obs, maxiter=100) res_gam = m.results self.res_gam = res_gam #attached for debugging self.mod_gam = m #attached for debugging res_glm = GLM(y_obs, exog, family=f).fit() #Note: there still are some naming inconsistencies self.res1 = res1 = Dummy() #for gam model #res2 = Dummy() #for benchmark self.res2 = res2 = res_glm #reuse existing glm results, will add additional #eta in GLM terminology res2.y_pred = res_glm.model.predict(res_glm.params, exog, linear=True) res1.y_pred = res_gam.predict(x) res1.y_predshort = res_gam.predict(x[:10]) #, linear=True) #mu res2.mu_pred = res_glm.model.predict(res_glm.params, exog, linear=False) res1.mu_pred = res_gam.mu #parameters slopes = [i for ss in m.smoothers for i in ss.params[1:]] const = res_gam.alpha + sum([ss.params[1] for ss in m.smoothers]) res1.params = np.array([const] + slopes)
def __init__(self): ''' Tests Gamma family with canonical inverse link (power -1) ''' # Test Precisions self.decimal_aic_R = -1 #TODO: off by about 1, we are right with Stata self.decimal_resids = DECIMAL_2 from gwstatsmodels.datasets.scotland import load from results.results_glm import Scotvote data = load() data.exog = add_constant(data.exog) res1 = GLM(data.endog, data.exog, \ family=sm.families.Gamma()).fit() self.res1 = res1 # res2 = RModel(data.endog, data.exog, r.glm, family=r.Gamma) res2 = Scotvote() res2.aic_R += 2 # R doesn't count degree of freedom for scale with gamma self.res2 = res2
def __init__(self): ''' Tests the Inverse Gaussian family in GLM. Notes ----- Used the rndivgx.ado file provided by Hardin and Hilbe to generate the data. Results are read from model_results, which were obtained by running R_ig.s ''' # Test Precisions self.decimal_aic_R = DECIMAL_0 self.decimal_loglike = DECIMAL_0 from results.results_glm import InvGauss res2 = InvGauss() res1 = GLM(res2.endog, res2.exog, \ family=sm.families.InverseGaussian()).fit() self.res1 = res1 self.res2 = res2
def __init__(self): ''' Test Negative Binomial family with canonical log link ''' # Test Precision self.decimal_resid = DECIMAL_1 self.decimal_params = DECIMAL_3 self.decimal_resids = -1 # 1 % mismatch at 0 self.decimal_fittedvalues = DECIMAL_1 from gwstatsmodels.datasets.committee import load self.data = load() self.data.exog[:, 2] = np.log(self.data.exog[:, 2]) interaction = self.data.exog[:, 2] * self.data.exog[:, 1] self.data.exog = np.column_stack((self.data.exog, interaction)) self.data.exog = add_constant(self.data.exog) self.res1 = GLM(self.data.endog, self.data.exog, family=sm.families.NegativeBinomial()).fit() from results.results_glm import Committee res2 = Committee() res2.aic_R += 2 # They don't count a degree of freedom for the scale self.res2 = res2
plt.legend(loc='upper left') plt.title('gam.GAM Poisson') counter = 2 for ii, xx in zip(['z', 'x1', 'x2'], [z, x[:, 0], x[:, 1]]): sortidx = np.argsort(xx) #plt.figure() plt.subplot(2, 2, counter) plt.plot(xx[sortidx], p[sortidx], 'k.', alpha=0.5) plt.plot(xx[sortidx], yp[sortidx], 'b.', label='true') plt.plot(xx[sortidx], y_pred[sortidx], 'r.', label='GAM') plt.legend(loc='upper left') plt.title('gam.GAM Poisson ' + ii) counter += 1 res = GLM(p, exog_reduced, family=f).fit() #plot component, compared to true component x1 = x[:, 0] x2 = x[:, 1] f1 = exog[:, :order + 1].sum(1) - 1 #take out constant f2 = exog[:, order + 1:].sum(1) - 1 plt.figure() #Note: need to correct for constant which is indeterminatedly distributed #plt.plot(x1, m.smoothers[0](x1)-m.smoothers[0].params[0]+1, 'r') #better would be subtract f(0) m.smoothers[0](np.array([0])) plt.plot(x1, f1, linewidth=2) plt.plot(x1, m.smoothers[0](x1) - m.smoothers[0].params[0], 'r') plt.figure() plt.plot(x2, f2, linewidth=2)
def __init__(self): from results.results_glm import Lbw self.res2 = Lbw() self.res1 = GLM(self.res2.endog, self.res2.exog, family=sm.families.Binomial()).fit()