def test_glmpoisson_screening(): y, x, idx_nonzero_true, beta = _get_poisson_data() nobs = len(y) xnames_true = ['var%4d' % ii for ii in idx_nonzero_true] xnames_true[0] = 'const' parameters = pd.DataFrame(beta[idx_nonzero_true], index=xnames_true, columns=['true']) xframe_true = pd.DataFrame(x[:, idx_nonzero_true], columns=xnames_true) res_oracle = GLMPenalized(y, xframe_true, family=family.Poisson()).fit() parameters['oracle'] = res_oracle.params mod_initial = GLMPenalized(y, np.ones(nobs), family=family.Poisson()) screener = VariableScreening(mod_initial) exog_candidates = x[:, 1:] res_screen = screener.screen_exog(exog_candidates, maxiter=10) assert_equal(np.sort(res_screen.idx_nonzero), idx_nonzero_true) xnames = ['var%4d' % ii for ii in res_screen.idx_nonzero] xnames[0] = 'const' # smoke test res_screen.results_final.summary(xname=xnames) res_screen.results_pen.summary() assert_equal(res_screen.results_final.mle_retvals['converged'], True) ps = pd.Series(res_screen.results_final.params, index=xnames, name='final') parameters = parameters.join(ps, how='outer') assert_allclose(parameters['oracle'], parameters['final'], atol=5e-6)
def _initialize(cls): y, x = cls.y, cls.x modp = GLM(y, x, family=family.Poisson()) cls.res2 = modp.fit() mod = GLMPenalized(y, x, family=family.Poisson(), penal=cls.penalty) mod.pen_weight = 0 cls.res1 = mod.fit(method='bfgs', maxiter=100, disp=0) cls.atol = 5e-6
def _initialize(cls): y, x = cls.y, cls.x modp = GLM(y, x[:, :cls.k_nonzero], family=family.Poisson()) cls.res2 = modp.fit() mod = GLMPenalized(y, x, family=family.Poisson(), penal=cls.penalty) mod.pen_weight *= 1.5 # same as discrete Poisson mod.penal.tau = 0.05 cls.res1 = mod.fit(method='bfgs', maxiter=100) cls.exog_index = slice(None, cls.k_nonzero, None) cls.atol = 5e-3
def setup_class(cls): sp = np.array([40491.3940640059, 232455.530262537]) # s_scale is same as before cls.s_scale = s_scale = np.array([2.443955e-06, 0.007945455]) cls.exog = patsy.dmatrix('fuel + drive', data=df_autos) x_spline = df_autos[['weight', 'hp']].values bs = BSplines(x_spline, df=[12, 10], degree=[3, 3], variable_names=['weight', 'hp'], constraints='center', include_intercept=True) alpha0 = 1 / s_scale * sp / 2 gam_bs = GLMGam.from_formula('city_mpg ~ fuel + drive', df_autos, smoother=bs, family=family.Poisson(), alpha=alpha0) cls.res1a = gam_bs.fit(use_t=False) cls.res1b = gam_bs.fit(method='newton', use_t=True) cls.res1 = cls.res1a._results cls.res2 = results_mpg_bs_poisson.mpg_bs_poisson cls.rtol_fitted = 1e-8 cls.covp_corrfact = 1 # not needed
def setup_class(cls): super(TestGAMPoisson, cls).setup_class() #initialize DGP cls.family = family.Poisson() cls.rvs = stats.poisson.rvs cls.init()
def __init__(self): super(self.__class__, self).__init__() #initialize DGP self.family = family.Poisson() self.rvs = stats.poisson.rvs self.init()
def _initialize(cls): y, x = cls.y, cls.x cov_type = 'HC0' modp = PoissonPenalized(y, x, penal=cls.penalty) modp.pen_weight *= 1.5 # same as discrete Poisson 1.5 modp.penal.tau = 0.05 cls.res2 = modp.fit(cov_type=cov_type, method='bfgs', maxiter=100, disp=0) mod = GLMPenalized(y, x, family=family.Poisson(), penal=cls.penalty) mod.pen_weight *= 1.5 # same as discrete Poisson 1.5 mod.penal.tau = 0.05 cls.res1 = mod.fit(cov_type=cov_type, method='bfgs', maxiter=100, disp=0) cls.exog_index = slice(None, None, None) cls.atol = 1e-4
def __init__(self, family_name='normal', link_name='identity', fam_params=None): """Constructor.""" # Store link self.link_name = link_name if self.link_name.lower() == 'logit': self.link = L.logit elif self.link_name.lower() == 'log': self.link = L.log elif self.link_name.lower() == 'identity': self.link = L.identity elif self.link_name.lower() == 'sqrt': self.link = L.sqrt elif self.link_name.lower() == 'probit': self.link = L.probit family_kwargs = {} if self.link_name: family_kwargs['link'] = self.link # Store family self.family_name = family_name if self.family_name.lower() == 'normal': self.family = F.Gaussian(**family_kwargs) def rand(x): return np.random.normal(x, fam_params) elif self.family_name.lower() == 'binomial': self.family = F.Binomial(**family_kwargs) def rand(x): return np.random.binomial(1, x) elif self.family_name.lower() == 'poisson': self.family = F.Poisson(**family_kwargs) def rand(x): return np.random.poisson(x) self.rand = rand self.in_columns = None self.out_columns = None
plt.title('gam.AdditiveModel') if example == 2: print("binomial") f = family.Binomial() b = np.asarray([scipy.stats.bernoulli.rvs(p) for p in f.link.inverse(y)]) b.shape = y.shape m = GAM(b, d, family=f) toc = time.time() m.fit(b) tic = time.time() print(tic - toc) if example == 3: print("Poisson") f = family.Poisson() y = y / y.max() * 3 yp = f.link.inverse(y) p = np.asarray([scipy.stats.poisson.rvs(p) for p in f.link.inverse(y)], float) p.shape = y.shape m = GAM(p, d, family=f) toc = time.time() m.fit(p) tic = time.time() print(tic - toc) plt.figure() plt.plot(x1, standardize(m.smoothers[0](x1)), 'r') plt.plot(x1, standardize(f1(x1)), linewidth=2) plt.figure()