def init(cls): nobs = cls.nobs y_true, x, exog = cls.y_true, cls.x, cls.exog if not hasattr(cls, 'scale'): scale = 1 else: scale = cls.scale f = cls.family cls.mu_true = mu_true = f.link.inverse(y_true) np.random.seed(8765993) #y_obs = np.asarray([stats.poisson.rvs(p) for p in mu], float) if issubclass(get_class(cls.rvs), stats.rv_discrete): # Discrete distributions don't take `scale`. y_obs = cls.rvs(mu_true, size=nobs) else: y_obs = cls.rvs(mu_true, scale=scale, size=nobs) m = GAM(y_obs, x, family=f) #TODO: y_obs is twice __init__ and fit m.fit(y_obs, maxiter=100) res_gam = m.results cls.res_gam = res_gam #attached for debugging cls.mod_gam = m #attached for debugging res_glm = GLM(y_obs, exog, family=f).fit() #Note: there still are some naming inconsistencies cls.res1 = res1 = Dummy() #for gam model #res2 = Dummy() #for benchmark cls.res2 = res2 = res_glm #reuse existing glm results, will add additional #eta in GLM terminology res2.y_pred = res_glm.model.predict(res_glm.params, exog, linear=True) res1.y_pred = res_gam.predict(x) res1.y_predshort = res_gam.predict(x[:10]) #, linear=True) #mu res2.mu_pred = res_glm.model.predict(res_glm.params, exog, linear=False) res1.mu_pred = res_gam.mu #parameters slopes = [i for ss in m.smoothers for i in ss.params[1:]] const = res_gam.alpha + sum([ss.params[1] for ss in m.smoothers]) res1.params = np.array([const] + slopes)
def init(self): nobs = self.nobs y_true, x, exog = self.y_true, self.x, self.exog if not hasattr(self, 'scale'): scale = 1 else: scale = self.scale f = self.family self.mu_true = mu_true = f.link.inverse(y_true) np.random.seed(8765993) #y_obs = np.asarray([stats.poisson.rvs(p) for p in mu], float) y_obs = self.rvs(mu_true, scale=scale, size=nobs) #this should work m = GAM(y_obs, x, family=f) #TODO: y_obs is twice __init__ and fit m.fit(y_obs, maxiter=100) res_gam = m.results self.res_gam = res_gam #attached for debugging self.mod_gam = m #attached for debugging res_glm = GLM(y_obs, exog, family=f).fit() #Note: there still are some naming inconsistencies self.res1 = res1 = Dummy() #for gam model #res2 = Dummy() #for benchmark self.res2 = res2 = res_glm #reuse existing glm results, will add additional #eta in GLM terminology res2.y_pred = res_glm.model.predict(res_glm.params, exog, linear=True) res1.y_pred = res_gam.predict(x) res1.y_predshort = res_gam.predict(x[:10]) #, linear=True) #mu res2.mu_pred = res_glm.model.predict(res_glm.params, exog, linear=False) res1.mu_pred = res_gam.mu #parameters slopes = [i for ss in m.smoothers for i in ss.params[1:]] const = res_gam.alpha + sum([ss.params[1] for ss in m.smoothers]) res1.params = np.array([const] + slopes)
print(m) y_pred = m.results.predict(d) plt.figure() plt.plot(y, '.') plt.plot(z, 'b-', label='true') plt.plot(y_pred, 'r-', label='AdditiveModel') plt.legend() plt.title('gam.AdditiveModel') if example == 2: print("binomial") f = family.Binomial() b = np.asarray([scipy.stats.bernoulli.rvs(p) for p in f.link.inverse(y)]) b.shape = y.shape m = GAM(b, d, family=f) toc = time.time() m.fit(b) tic = time.time() print(tic - toc) if example == 3: print("Poisson") f = family.Poisson() y = y / y.max() * 3 yp = f.link.inverse(y) p = np.asarray([scipy.stats.poisson.rvs(p) for p in f.link.inverse(y)], float) p.shape = y.shape m = GAM(p, d, family=f) toc = time.time()
x2 = R.standard_normal(nobs) x2.sort() y = R.standard_normal((nobs, )) d = np.array([x1, x2]).T import scipy.stats, time print("binomial") f = family.Binomial() # b = np.asarray([scipy.stats.bernoulli.rvs(p) for p in f.link.inverse(y)]) # b.shape = y.shape b = np.zeros(len(x1)) b[x1 > 0.5] = 1 m = GAM(b, d, family=f) toc = time.time() m.fit(b) tic = time.time() print(tic - toc) plt.figure() plt.plot(x1, standardize(m.smoothers[0](x1)), 'r') #plt.plot(x1, standardize(f1(x1)), linewidth=2) #plt.figure() plt.plot(x2, standardize(m.smoothers[0](x2)), 'r') #plt.plot(x2, standardize(f2(x2)), linewidth=2) plt.show()