def anova_single(model, **kwargs): """ ANOVA table for one fitted linear model. Parameters ---------- model : fitted linear model results instance A fitted linear model typ : int or str {1,2,3} or {"I","II","III"} Type of sum of squares to use. **kwargs** scale : float Estimate of variance, If None, will be estimated from the largest model. Default is None. test : str {"F", "Chisq", "Cp"} or None Test statistics to provide. Default is "F". Notes ----- Use of this function is discouraged. Use anova_lm instead. """ test = kwargs.get("test", "F") scale = kwargs.get("scale", None) typ = kwargs.get("typ", 1) robust = kwargs.get("robust", None) if robust: robust = robust.lower() endog = model.model.endog exog = model.model.exog nobs = exog.shape[0] response_name = model.model.endog_names design_info = model.model._data._orig_exog.design_info exog_names = model.model.exog_names # +1 for resids n_rows = (len(design_info.terms) - _has_intercept(design_info) + 1) pr_test = "PR(>%s)" % test names = ['df', 'sum_sq', 'mean_sq', test, pr_test] table = DataFrame(np.zeros((n_rows, 5)), columns = names) if typ in [1,"I"]: return anova1_lm_single(model, endog, exog, nobs, design_info, table, n_rows, test, pr_test, robust) elif typ in [2, "II"]: return anova2_lm_single(model, design_info, n_rows, test, pr_test, robust) elif typ in [3, "III"]: return anova3_lm_single(model, design_info, n_rows, test, pr_test, robust) elif typ in [4, "IV"]: raise NotImplemented("Type IV not yet implemented") else: # pragma: no cover raise ValueError("Type %s not understood" % str(typ))
def anova_single(model, **kwargs): """ Anova table for one fitted linear model. Parameters ---------- model : fitted linear model results instance A fitted linear model typ : int or str {1,2,3} or {"I","II","III"} Type of sum of squares to use. **kwargs** scale : float Estimate of variance, If None, will be estimated from the largest model. Default is None. test : str {"F", "Chisq", "Cp"} or None Test statistics to provide. Default is "F". Notes ----- Use of this function is discouraged. Use anova_lm instead. """ test = kwargs.get("test", "F") scale = kwargs.get("scale", None) typ = kwargs.get("typ", 1) robust = kwargs.get("robust", None) if robust: robust = robust.lower() endog = model.model.endog exog = model.model.exog nobs = exog.shape[0] response_name = model.model.endog_names design_info = model.model.data.design_info exog_names = model.model.exog_names # +1 for resids n_rows = (len(design_info.terms) - _has_intercept(design_info) + 1) pr_test = "PR(>%s)" % test names = ['df', 'sum_sq', 'mean_sq', test, pr_test] table = DataFrame(np.zeros((n_rows, 5)), columns=names) if typ in [1, "I"]: return anova1_lm_single(model, endog, exog, nobs, design_info, table, n_rows, test, pr_test, robust) elif typ in [2, "II"]: return anova2_lm_single(model, design_info, n_rows, test, pr_test, robust) elif typ in [3, "III"]: return anova3_lm_single(model, design_info, n_rows, test, pr_test, robust) elif typ in [4, "IV"]: raise NotImplementedError("Type IV not yet implemented") else: # pragma: no cover raise ValueError("Type %s not understood" % str(typ))
def anova3_lm_single(model, design_info, n_rows, test, pr_test, robust): n_rows += _has_intercept(design_info) terms_info = design_info.terms names = ['sum_sq', 'df', test, pr_test] table = DataFrame(np.zeros((n_rows, 4)), columns = names) cov = _get_covariance(model, robust) col_order = [] index = [] for i, term in enumerate(terms_info): # grab term, hypothesis is that term == 0 cols = design_info.slice(term) L1 = np.eye(model.model.exog.shape[1])[cols] L12 = L1 r = L1.shape[0] if test == 'F': f = model.f_test(L12, cov_p=cov) table.ix[i][test] = test_value = f.fvalue table.ix[i][pr_test] = f.pvalue # need to back out SSR from f_test table.ix[i]['df'] = r #col_order.append(cols.start) index.append(term.name()) table.index = Index(index + ['Residual']) #NOTE: Don't need to sort because terms are an ordered dict now #table = table.ix[np.argsort(col_order + [model.model.exog.shape[1]+1])] # back out sum of squares from f_test ssr = table[test] * table['df'] * model.ssr/model.df_resid table['sum_sq'] = ssr # fill in residual table.ix['Residual'][['sum_sq','df', test, pr_test]] = (model.ssr, model.df_resid, np.nan, np.nan) return table
def anova3_lm_single(model, design_info, n_rows, test, pr_test, robust): n_rows += _has_intercept(design_info) terms_info = design_info.terms names = ['sum_sq', 'df', test, pr_test] table = DataFrame(np.zeros((n_rows, 4)), columns = names) cov = _get_covariance(model, robust) col_order = [] index = [] for i, term in enumerate(terms_info): # grab term, hypothesis is that term == 0 cols = design_info.slice(term) L1 = np.eye(model.model.exog.shape[1])[cols] L12 = L1 r = L1.shape[0] if test == 'F': f = model.f_test(L12, cov_p=cov) table.loc[table.index[i], test] = test_value = f.fvalue table.loc[table.index[i], pr_test] = f.pvalue # need to back out SSR from f_test table.loc[table.index[i], 'df'] = r #col_order.append(cols.start) index.append(term.name()) table.index = Index(index + ['Residual']) #NOTE: Do not need to sort because terms are an ordered dict now #table = table.iloc[np.argsort(col_order + [model.model.exog.shape[1]+1])] # back out sum of squares from f_test ssr = table[test] * table['df'] * model.ssr/model.df_resid table['sum_sq'] = ssr # fill in residual table.loc['Residual', ['sum_sq','df', test, pr_test]] = (model.ssr, model.df_resid, np.nan, np.nan) return table