def compare(Y, first_model, test_effect, sub=None): """ OBSOLETE Tests for a significant influence of test_effect by comparing whether (first_model + test_effect) explains significantly more variance than first_model alone. """ a1 = lm(Y, first_model, sub=sub) a2 = lm(Y, first_model + test_effect, sub=sub) print print a1.anova(title="MODEL 1:") print "\n" print a2.anova(title="MODEL 2:") # compare SS_diff = a1.SS_res - a2.SS_res df_diff = test_effect.df MS_diff = SS_diff / df_diff # if not round(SS_diff, 6) == round(SS_cov_1 - SS_cov_2, 6): # txt = "\nWARNING: SS_diff: {0} a1.SS_res - a2.SS_res: {1}" # print txt.format(SS_diff, a1.SS_res - a2.SS_res) F = MS_diff / a2.MS_res p = 1 - sp.stats.distributions.f.cdf(F, df_diff, a2.df_res) stars = test.star(p).replace(" ", "") difftxt = "Residual SS reduction: {SS}, df difference: {df}, " + "F = {F}{s}, p = {p}" print "\n" + difftxt.format(SS=SS_diff, df=df_diff, F=F, s=stars, p=p)
def anova(self): "Return an ANOVA table" # table head table = fmtxt.Table("l" + "r" * 5) if self.title: table.title(self.title) table.cell() headers = ["SS", "df", "MS"] headers += ["F", "p"] for hd in headers: table.cell(hd, r"\textbf", just="c") table.midrule() # table body for name, F_test in zip(self.names, self.F_tests): table.cell(name) table.cell(fmtxt.stat(F_test.SS)) table.cell(fmtxt.stat(F_test.df, fmt="%i")) table.cell(fmtxt.stat(F_test.MS)) if F_test.F: stars = test.star(F_test.p) table.cell(fmtxt.stat(F_test.F, stars=stars)) table.cell(fmtxt.p(F_test.p)) else: table.cell() table.cell() # residuals if self.X.df_error > 0: table.empty_row() table.cell("Residuals") SS, df, MS = self.residuals table.cell(SS) table.cell(df, fmt="%i") table.cell(MS) table.endline() # total table.midrule() table.cell("Total") SS = np.sum((self.Y.x - self.Y.mean()) ** 2) table.cell(fmtxt.stat(SS)) table.cell(fmtxt.stat(len(self.Y) - 1, fmt="%i")) return table
def comparelm(lm1, lm2): """ Fox (p. 109) """ if lm2.df_res > lm1.df_res: mtemp = lm1 lm1 = lm2 lm2 = mtemp else: assert lm1.df_res != lm2.df_res SS_diff = lm1.SS_res - lm2.SS_res df_diff = lm1.df_res - lm2.df_res MS_diff = SS_diff / df_diff F = MS_diff / lm2.MS_res p = 1 - sp.stats.distributions.f.cdf(F, df_diff, lm2.df_res) stars = test.star(p).replace(" ", "") difftxt = "Residual SS reduction: {SS}, df difference: {df}, " + "F = {F:.3f}{s}, p = {p:.4f}" return difftxt.format(SS=SS_diff, df=df_diff, F=F, s=stars, p=p)
def anova(self): "Return ANOVA table" if self.show_ems is None: ems = defaults["show_ems"] else: ems = self.show_ems # table head table = textab.Table("l" + "r" * (5 + ems)) if self.title: table.title(self.title) table.cell() headers = ["SS", "df", "MS"] # if ems: headers += ["E(MS)"] headers += ["F", "p"] for hd in headers: table.cell(hd, r"\textbf", just="c") table.midrule() # table body for name, SS, df, MS, F, p in self._results_table: table.cell(name) table.cell(textab.stat(SS)) table.cell(textab.stat(df, fmt="%i")) table.cell(textab.stat(MS)) if F: stars = test.star(p) table.cell(textab.stat(F, stars=stars)) table.cell(textab.p(p)) else: table.cell() table.cell() # total table.midrule() table.cell("Total") table.cell(textab.stat(self.Y.SS)) table.cell(textab.stat(self.Y.N - 1, fmt="%i")) return table
def anova(self, title=None, empty=True, ems=None): """ returns an ANOVA table for the linear model """ if ems is None: ems = defaults["show_ems"] Y = self.Y X = self.X values = self.values # method # if X.df_error == 0: # hopkins = True e_ms = _hopkins_ems(X) # else: # hopkins = False # table head table = textab.Table("l" + "r" * (5 + ems)) if title: table.title(title) elif self.title: table.title(self.title) # for msg in X.check(): # table.caption('! '+msg) table.cell() headers = ["SS", "df", "MS"] if ems: headers += ["E(MS)"] headers += ["F", "p"] for hd in headers: table.cell(hd, r"\textbf", just="c") table.midrule() if isbalanced(X): # MS for factors (Needed for models involving random effects) self.MS = [] for i, name, index, df in X.iter_effects(): SS = np.sum(values[:, index].sum(1) ** 2) self.MS.append(SS / df) else: raise NotImplementedError() tests = {} for e in X.effects: # effect to test m0effects = [] for e0 in X.effects: # effect in model0 if e0 is e: pass elif all([f in e0.factors for f in e.factors]): pass else: m0effects.append(e0) model0 = model(m0effects) model1 = model0 + e SS, df, MS, F, p = incremental_F_test(Y, model1, model0) tests[e.name] = dict(SS=SS, df=df, MS=MS, F=F, p=p) # table body self.results = {} for i, name, index, df in X.iter_effects(): SS = np.sum(values[:, index].sum(1) ** 2) # if v: print name, index, SS MS = SS / df # self.results[name] = {'SS':SS, 'df':df, 'MS':MS} if e_ms[i] != None: # hopkins and e_ms_i = e_ms[i] MS_d = self.MS[e_ms_i] df_d = X.effects[e_ms_i].df e_ms_name = X.effects[e_ms_i].name elif self.df_res > 0: df_d = self.df_res MS_d = self.MS_res e_ms_name = "Res" else: MS_d = False e_ms_name = None # F-test if MS_d != False: F = MS / MS_d p = 1 - sp.stats.distributions.f.cdf(F, df, df_d) stars = test.star(p) tex_stars = textab.Stars(stars) F_tex = [F, tex_stars] else: F_tex = None p = None # add to table if e_ms_name or empty: table.cell(name) table.cell(SS) table.cell(df, fmt="%i") table.cell(MS) if ems: table.cell(e_ms_name) table.cell(F_tex, mat=True) table.cell(p, fmt=defaults["p_fmt"], drop0=True) # store results self.results[name] = {"SS": SS, "df": df, "MS": MS, "E(MS)": e_ms_name, "F": F, "p": p} # self.indexes[name] = index # for self.Ysub() # table end if self.df_res > 0: table.cell("Residuals") table.cell(self.SS_res) table.cell(self.df_res, fmt="%i") table.cell(self.MS_res) return table
def anova(self, title="ANOVA", empty=True, ems=False): """ returns an ANOVA table for the linear model """ X = self.X values = self.beta * self.X.full if X.df_error == 0: e_ms = hopkins_ems(X) elif hasrandom(X): err = "Models containing random effects need to be fully " "specified." raise NotImplementedError(err) else: e_ms = False # table head table = fmtxt.Table("l" + "r" * (5 + ems)) if title: table.title(title) if not isbalanced(X): table.caption("Warning: model is unbalanced, use anova class") table.cell() headers = ["SS", "df", "MS"] if ems: headers += ["E(MS)"] headers += ["F", "p"] for hd in headers: table.cell(hd, r"\textbf", just="c") table.midrule() # MS for factors (Needed for models involving random effects) MSs = {} SSs = {} for e in X.effects: idx = X.full_index[e] SSs[e] = SS = np.sum(values[:, idx].sum(1) ** 2) MSs[e] = SS / e.df # table body results = {} for e in X.effects: MS = MSs[e] if e_ms: e_EMS = e_ms[e] df_d = sum(c.df for c in e_EMS) MS_d = sum(MSs[c] for c in e_EMS) e_ms_name = " + ".join(repr(c) for c in e_EMS) else: df_d = self.df_res MS_d = self.MS_res e_ms_name = "Res" # F-test if MS_d != False: F = MS / MS_d p = 1 - scipy.stats.distributions.f.cdf(F, e.df, df_d) stars = test.star(p) tex_stars = fmtxt.Stars(stars) F_tex = [F, tex_stars] else: F_tex = None p = None # add to table if e_ms_name or empty: table.cell(e.name) table.cell(SSs[e]) table.cell(e.df, fmt="%i") table.cell(MS) if ems: table.cell(e_ms_name) table.cell(F_tex, mat=True) table.cell(fmtxt.p(p)) # store results results[e.name] = {"SS": SS, "df": e.df, "MS": MS, "E(MS)": e_ms_name, "F": F, "p": p} # Residuals if self.df_res > 0: table.cell("Residuals") table.cell(self.SS_res) table.cell(self.df_res, fmt="%i") table.cell(self.MS_res) return table