Exemplo n.º 1
0
def compare(Y, first_model, test_effect, sub=None):
    """
    OBSOLETE
    
    Tests for a significant influence of test_effect by comparing whether 
    (first_model + test_effect) explains significantly more variance than
    first_model alone.
     
    """
    a1 = lm(Y, first_model, sub=sub)
    a2 = lm(Y, first_model + test_effect, sub=sub)
    print
    print a1.anova(title="MODEL 1:")
    print "\n"
    print a2.anova(title="MODEL 2:")
    # compare
    SS_diff = a1.SS_res - a2.SS_res
    df_diff = test_effect.df
    MS_diff = SS_diff / df_diff
    # if not round(SS_diff, 6) == round(SS_cov_1 - SS_cov_2, 6):
    #    txt = "\nWARNING: SS_diff: {0} a1.SS_res - a2.SS_res: {1}"
    #    print txt.format(SS_diff, a1.SS_res - a2.SS_res)
    F = MS_diff / a2.MS_res
    p = 1 - sp.stats.distributions.f.cdf(F, df_diff, a2.df_res)
    stars = test.star(p).replace(" ", "")
    difftxt = "Residual SS reduction: {SS}, df difference: {df}, " + "F = {F}{s}, p = {p}"
    print "\n" + difftxt.format(SS=SS_diff, df=df_diff, F=F, s=stars, p=p)
Exemplo n.º 2
0
    def anova(self):
        "Return an ANOVA table"
        # table head
        table = fmtxt.Table("l" + "r" * 5)
        if self.title:
            table.title(self.title)
        table.cell()
        headers = ["SS", "df", "MS"]
        headers += ["F", "p"]
        for hd in headers:
            table.cell(hd, r"\textbf", just="c")
        table.midrule()

        # table body
        for name, F_test in zip(self.names, self.F_tests):
            table.cell(name)
            table.cell(fmtxt.stat(F_test.SS))
            table.cell(fmtxt.stat(F_test.df, fmt="%i"))
            table.cell(fmtxt.stat(F_test.MS))
            if F_test.F:
                stars = test.star(F_test.p)
                table.cell(fmtxt.stat(F_test.F, stars=stars))
                table.cell(fmtxt.p(F_test.p))
            else:
                table.cell()
                table.cell()

        # residuals
        if self.X.df_error > 0:
            table.empty_row()
            table.cell("Residuals")
            SS, df, MS = self.residuals
            table.cell(SS)
            table.cell(df, fmt="%i")
            table.cell(MS)
            table.endline()

        # total
        table.midrule()
        table.cell("Total")
        SS = np.sum((self.Y.x - self.Y.mean()) ** 2)
        table.cell(fmtxt.stat(SS))
        table.cell(fmtxt.stat(len(self.Y) - 1, fmt="%i"))
        return table
Exemplo n.º 3
0
def comparelm(lm1, lm2):
    """
    Fox (p. 109)
    
    """
    if lm2.df_res > lm1.df_res:
        mtemp = lm1
        lm1 = lm2
        lm2 = mtemp
    else:
        assert lm1.df_res != lm2.df_res
    SS_diff = lm1.SS_res - lm2.SS_res
    df_diff = lm1.df_res - lm2.df_res
    MS_diff = SS_diff / df_diff
    F = MS_diff / lm2.MS_res
    p = 1 - sp.stats.distributions.f.cdf(F, df_diff, lm2.df_res)
    stars = test.star(p).replace(" ", "")
    difftxt = "Residual SS reduction: {SS}, df difference: {df}, " + "F = {F:.3f}{s}, p = {p:.4f}"
    return difftxt.format(SS=SS_diff, df=df_diff, F=F, s=stars, p=p)
Exemplo n.º 4
0
    def anova(self):
        "Return ANOVA table"
        if self.show_ems is None:
            ems = defaults["show_ems"]
        else:
            ems = self.show_ems

        # table head
        table = textab.Table("l" + "r" * (5 + ems))
        if self.title:
            table.title(self.title)
        table.cell()
        headers = ["SS", "df", "MS"]
        #        if ems: headers += ["E(MS)"]
        headers += ["F", "p"]
        for hd in headers:
            table.cell(hd, r"\textbf", just="c")
        table.midrule()

        # table body
        for name, SS, df, MS, F, p in self._results_table:
            table.cell(name)
            table.cell(textab.stat(SS))
            table.cell(textab.stat(df, fmt="%i"))
            table.cell(textab.stat(MS))
            if F:
                stars = test.star(p)
                table.cell(textab.stat(F, stars=stars))
                table.cell(textab.p(p))
            else:
                table.cell()
                table.cell()

        # total
        table.midrule()
        table.cell("Total")
        table.cell(textab.stat(self.Y.SS))
        table.cell(textab.stat(self.Y.N - 1, fmt="%i"))
        return table
Exemplo n.º 5
0
    def anova(self, title=None, empty=True, ems=None):
        """
        returns an ANOVA table for the linear model
         
        """
        if ems is None:
            ems = defaults["show_ems"]
        Y = self.Y
        X = self.X
        values = self.values
        # method
        # if X.df_error == 0:
        #    hopkins = True
        e_ms = _hopkins_ems(X)
        # else:
        #    hopkins = False

        # table head
        table = textab.Table("l" + "r" * (5 + ems))
        if title:
            table.title(title)
        elif self.title:
            table.title(self.title)
        #        for msg in X.check():
        #            table.caption('! '+msg)
        table.cell()
        headers = ["SS", "df", "MS"]
        if ems:
            headers += ["E(MS)"]
        headers += ["F", "p"]
        for hd in headers:
            table.cell(hd, r"\textbf", just="c")
        table.midrule()

        if isbalanced(X):
            # MS for factors (Needed for models involving random effects)
            self.MS = []
            for i, name, index, df in X.iter_effects():
                SS = np.sum(values[:, index].sum(1) ** 2)
                self.MS.append(SS / df)
        else:
            raise NotImplementedError()
            tests = {}
            for e in X.effects:  # effect to test
                m0effects = []
                for e0 in X.effects:  # effect in model0
                    if e0 is e:
                        pass
                    elif all([f in e0.factors for f in e.factors]):
                        pass
                    else:
                        m0effects.append(e0)
                model0 = model(m0effects)
                model1 = model0 + e
                SS, df, MS, F, p = incremental_F_test(Y, model1, model0)
                tests[e.name] = dict(SS=SS, df=df, MS=MS, F=F, p=p)

        # table body
        self.results = {}
        for i, name, index, df in X.iter_effects():
            SS = np.sum(values[:, index].sum(1) ** 2)
            # if v: print name, index, SS
            MS = SS / df
            # self.results[name] = {'SS':SS, 'df':df, 'MS':MS}
            if e_ms[i] != None:  # hopkins and
                e_ms_i = e_ms[i]
                MS_d = self.MS[e_ms_i]
                df_d = X.effects[e_ms_i].df
                e_ms_name = X.effects[e_ms_i].name
            elif self.df_res > 0:
                df_d = self.df_res
                MS_d = self.MS_res
                e_ms_name = "Res"
            else:
                MS_d = False
                e_ms_name = None
            # F-test
            if MS_d != False:
                F = MS / MS_d
                p = 1 - sp.stats.distributions.f.cdf(F, df, df_d)
                stars = test.star(p)
                tex_stars = textab.Stars(stars)
                F_tex = [F, tex_stars]
            else:
                F_tex = None
                p = None
            # add to table
            if e_ms_name or empty:
                table.cell(name)
                table.cell(SS)
                table.cell(df, fmt="%i")
                table.cell(MS)
                if ems:
                    table.cell(e_ms_name)
                table.cell(F_tex, mat=True)
                table.cell(p, fmt=defaults["p_fmt"], drop0=True)
            # store results
            self.results[name] = {"SS": SS, "df": df, "MS": MS, "E(MS)": e_ms_name, "F": F, "p": p}
            # self.indexes[name] = index # for self.Ysub()
        # table end
        if self.df_res > 0:
            table.cell("Residuals")
            table.cell(self.SS_res)
            table.cell(self.df_res, fmt="%i")
            table.cell(self.MS_res)
        return table
Exemplo n.º 6
0
    def anova(self, title="ANOVA", empty=True, ems=False):
        """
        returns an ANOVA table for the linear model

        """
        X = self.X
        values = self.beta * self.X.full

        if X.df_error == 0:
            e_ms = hopkins_ems(X)
        elif hasrandom(X):
            err = "Models containing random effects need to be fully " "specified."
            raise NotImplementedError(err)
        else:
            e_ms = False

        # table head
        table = fmtxt.Table("l" + "r" * (5 + ems))
        if title:
            table.title(title)

        if not isbalanced(X):
            table.caption("Warning: model is unbalanced, use anova class")

        table.cell()
        headers = ["SS", "df", "MS"]
        if ems:
            headers += ["E(MS)"]
        headers += ["F", "p"]
        for hd in headers:
            table.cell(hd, r"\textbf", just="c")
        table.midrule()

        # MS for factors (Needed for models involving random effects)
        MSs = {}
        SSs = {}
        for e in X.effects:
            idx = X.full_index[e]
            SSs[e] = SS = np.sum(values[:, idx].sum(1) ** 2)
            MSs[e] = SS / e.df

        # table body
        results = {}
        for e in X.effects:
            MS = MSs[e]
            if e_ms:
                e_EMS = e_ms[e]
                df_d = sum(c.df for c in e_EMS)
                MS_d = sum(MSs[c] for c in e_EMS)
                e_ms_name = " + ".join(repr(c) for c in e_EMS)
            else:
                df_d = self.df_res
                MS_d = self.MS_res
                e_ms_name = "Res"

            # F-test
            if MS_d != False:
                F = MS / MS_d
                p = 1 - scipy.stats.distributions.f.cdf(F, e.df, df_d)
                stars = test.star(p)
                tex_stars = fmtxt.Stars(stars)
                F_tex = [F, tex_stars]
            else:
                F_tex = None
                p = None
            # add to table
            if e_ms_name or empty:
                table.cell(e.name)
                table.cell(SSs[e])
                table.cell(e.df, fmt="%i")
                table.cell(MS)
                if ems:
                    table.cell(e_ms_name)
                table.cell(F_tex, mat=True)
                table.cell(fmtxt.p(p))
            # store results
            results[e.name] = {"SS": SS, "df": e.df, "MS": MS, "E(MS)": e_ms_name, "F": F, "p": p}

        # Residuals
        if self.df_res > 0:
            table.cell("Residuals")
            table.cell(self.SS_res)
            table.cell(self.df_res, fmt="%i")
            table.cell(self.MS_res)

        return table