def regress_ols2(y, X, req=[]): model, f_out = sm.OLS(y, X.array).fit(), dd(list) ## PARAMS ## for pv, bw, n in zip(model.pvalues, model.params, X.names): f_out[X.parent[n]].append((pv, bw, n)) x_out = { 'params': {n: sorted(p) for n, p in f_out.items()}, 'rs': model.rsquared, 'ars': model.rsquared_adj, 'bic': model.bic, 'pwr-05': 0.5, 'pwr-001': 0.01 } if 'resids' in req: x_out['resids'] = model.resid if 'pwr' in req and model.rsquared > 0: df_de, df_num, f_2 = len(X.names) - 1, len(y) - len(X.names), np.sqrt( model.rsquared / (1 - model.rsquared)) x_out['pwr-05'], x_out['pwr-001'] = smp.FTestPower().solve_power( effect_size=f_2, df_num=df_num, df_denom=df_de, alpha=0.05), smp.FTestPower().solve_power(effect_size=f_2, df_num=df_num, df_denom=df_de, alpha=0.001) if 'predictors-only' in req and len(X.p_names) > 1: p_model, p_out = sm.OLS(y, X.p_array).fit(), dd(list) for pv, bw, n in zip(p_model.pvalues, p_model.params, X.p_names): p_out[X.parent[n]].append((pv, bw, n)) P_out = { 'params': {n: sorted(p) for n, p in p_out.items()}, 'rs': p_model.rsquared, 'ars': p_model.rsquared_adj, 'bic': p_model.bic, 'resids': model.resid } x_out['predictors'] = P_out if 'covariates-only' in req and len(X.c_names) > 1: c_model, c_out = sm.OLS(y, X.c_array).fit(), dd(list) for pv, bw, n in zip(c_model.pvalues, c_model.params, X.c_names): c_out[X.parent[n]].append((pv, bw, n)) C_out = { 'params': {n: sorted(p) for n, p in c_out.items()}, 'rs': c_model.rsquared, 'ars': c_model.rsquared_adj, 'bic': c_model.bic, 'resids': model.resid } x_out['covariates'] = C_out return x_out
def test(self, y): self.y, self.yLen = y, len(y) self.execute() # self.resids = RegResiduals(self.X,self.y,self.dist).extract(self.model,self.zero_prob) # self.process() if self.valid: self.output = [ (p, b, x, i in self.X.predictor_idx) for i, (p, b, x) in enumerate( zip(self.model.pvalues, self.model.params, self.X.names)) ] try: self.pwr = { a: smp.FTestPower().solve_power(effect_size=np.sqrt( self.rsq / (1 - self.rsq)), df_num=self.dfn, df_denom=self.dfd, alpha=a) for a in self.alphas } except: self.pwr = {a: 0.5 for a in self.alphas} else: self.output = [(0.99, 0, x, i in self.X.predictor_idx) for i, x in enumerate(self.X.names)] self.pwr = {a: 0.5 for a in self.alphas} return self
def regress_zip(self, Y, X, interest=None): r_out, p_out, alp = {}, dd(lambda: {}), 0.05 Y = np.array([np.array(log(y + 1.0)) for y in Y]) null = msc.PoissonZiGMLE(Y, np.array([1 for x in X])).fit(disp=0) model = msc.PoissonZiGMLE(Y, np.array(X)).fit(disp=0) params = model.params try: pvals = model.pvalues except ValueError: pvals = [0.99 for p in params] for p in self.D.inferred_predictors: p_out[p.split('=')[0]][p.split('=')[1]] = (1, 0) for pv, bw, c in zip(pvals, params, self.D.predictors): p_out[c.split('=')[0]][c.split('=')[-1]] = (pv, bw) for a, b in p_out.items(): r_out[a] = sorted(b.items(), key=lambda loc: loc[1][0]) x_out = { 'rs': 1 - (model.llf / null.llf), 'ars': 1 - ((model.llf - len(X[0])) / null.llf), 'bic': model.bic } f_2 = x_out['rs'] / (1 - x_out['rs']) df_de, df_num = len(X[0]) - 1, len(Y) - len(X[0]) pwr = smp.FTestPower().solve_power(effect_size=np.sqrt(f_2), df_num=df_num, df_denom=df_de, alpha=alp) x_out['resids'] = Y x_out['params'] = r_out return x_out
def regress_glmnb(self, Y, X, interest=None): r_out, p_out, alp = {}, dd(lambda: {}), 0.05 null = sm.GLM(Y, [np.array(1) for x in X], family=sm.families.NegativeBinomial()).fit() model = sm.GLM(Y, X, family=sm.families.NegativeBinomial()).fit() for p in self.D.inferred_predictors: p_out[p.split('=')[0]][p.split('=')[1]] = (1, 0) for pv, bw, c in zip(model.pvalues, model.params, self.D.predictors): p_out[c.split('=')[0]][c.split('=')[-1]] = (pv, bw) for a, b in p_out.items(): r_out[a] = sorted(b.items(), key=lambda loc: loc[1][0]) x_out = { 'rs': 1 - (model.llf / null.llf), 'ars': 1 - ((model.llf - len(X[0])) / null.llf), 'bic': model.bic } f_2 = x_out['rs'] / (1 - x_out['rs']) df_de, df_num = len(X[0]) - 1, len(Y) - len(X[0]) pwr = smp.FTestPower().solve_power(effect_size=np.sqrt(f_2), df_num=df_num, df_denom=df_de, alpha=alp) x_out['pwr'] = pwr x_out['resids'] = [log(x + 1.0) for x in model.resid_pearson] x_out['params'] = r_out return x_out
def test(self, y): model = sm.OLS(y, self.X.array).fit() self.rsq, self.rsa, self.bic = round(model.rsquared, 5), round(model.rsquared_adj, 3), round(model.bic, 3) self.output = [(p, b, x, i in self.X.predictor_idx) for i, ( p, b, x) in enumerate(zip(model.pvalues, model.params, self.X.names))] try: self.pwr = { a: smp.FTestPower().solve_power(effect_size=np.sqrt( self.rsq / (1 - self.rsq)), df_num=self.dfn, df_denom=self.dfd, alpha=a) for a in self.alphas } except: self.pwr = {a: 0.5 for a in self.alphas} self.resids, self.c_resids = model.resid, [ sum([x[j] * model.params[j] for j in self.X.covariate_idx]) + y[i] for i, x in enumerate(self.X.array) ] return self
def test_pwr(self, alphas=[0.05, 0.001]): if self.model.rsquared > 0: df_de = len(self.X.names) - 1 df_num = len(self.y) - len(self.X.names) f_2 = np.sqrt(self.model.rsquared / (1 - self.model.rsquared)) for a in alphas: self.pwr[a] = smp.FTestPower().solve_power(effect_size=f_2, df_num=df_num, df_denom=df_de, alpha=a) return self
def test(self, y): #print self.LOG,'huh' #if self.LOG: y = [math.log(yi+1.0,2) for yi in y] self.output,self.zero_infl, self.rsq, self.rsa, self.bic, self.aic = [],0.0,'NA','NA','NA','NA' self.valid, self.y, self.yA, self.yLen, self.history = True, y, np.array( y), len(y), '' self.execute() if self.valid: self.v_explained = 1 - (np.var(self.res.resid) / np.var(self.yA)) try: self.pwr = { a: smp.FTestPower().solve_power(effect_size=np.sqrt( self.v_explained / (1 - self.v_explained)), df_num=self.dfn, df_denom=self.dfd, alpha=a) for a in self.alphas } except: self.pwr = {a: 0.5 for a in self.alphas} if any([np.isnan(pw) for pw in self.pwr.values()]): self.pwr = {a: 0.5 for a in self.alphas} else: self.v_explained = 0 self.pwr = {a: 0.0 for a in self.alphas} self.output = [(0.5, b, t, x, i in self.X.predictor_idx) for i, (p, t, b, x) in enumerate( zip(self.res.pvalues, self.res.tvalues, self.res.params, self.X.names))] self.bic, self.aic, self.rsq, self.rsa = 0, 0, 0, 0 self.tvalues = self.res.tvalues #self.bic, self.aic, self.rsq, self.rsa = self.res.bic, self.res.aic, self.res.prsquared, 1- (((1-self.res.prsquared)*(self.yLen-1)) / self.dfn) #(self.yLen-self.X.len-1)) #self.output = [(p,b,x,i in self.X.predictor_idx) for i,(p,b,x) in enumerate(zip(self.res.pvalues, self.res.params, self.X.names))] return self