def typeIII(response, ancova, recarray): """ Produce an ANCOVA table with type III sum of squares from a given ANCOVA formula. Inputs ------ response: str field name of response in recarray ancova: ANCOVA specifies the model to be fit recarray: np.ndarray should contain all field names in the terms of ancova as well as response """ X = ancova.formula.design(recarray, return_float=True) Y = recarray[response] model = OLS(Y, X) results = model.fit() SSE_F = np.sum(results.resid**2) df_F = results.df_resid names = [] fs = [] dfs = [] sss = [] pvals = [] for contrast in ancova.contrast_names: r = results.f_test(ancova.contrast_matrices[contrast]) names.append(contrast) fs.append(r.fvalue) dfs.append(r.df_num) pvals.append(r.pvalue) sss.append(r.fvalue * results.scale * r.df_num) # Add in the "residual row" sss.append(SSE_F) dfs.append(df_F) pvals.append(np.nan) fs.append(np.nan) names.append('Residuals') result = np.array( names, np.dtype([('contrast', 'S%d' % max([len(n) for n in names]))])) result = ML.rec_append_fields( result, ['SS', 'df', 'MS', 'F', 'p_value'], [sss, dfs, np.array(sss) / np.array(dfs), fs, pvals]) return result
def typeII(response, ancova, recarray): """ Produce an ANCOVA table from a given ANCOVA formula with type II sums of squares. Inputs ------ response: str field name of response in recarray ancova: ANCOVA specifies the model to be fit recarray: np.ndarray should contain all field names in the terms of ancova as well as response """ Y = recarray[response] X = ancova.formula.design(recarray, return_float=True) model = OLS(Y, X) results = model.fit() SSE_F = np.sum(results.resid**2) df_F = results.df_resid names = [] sss = [] fs = [] dfs = [] pvals = [] for name, expr_factors in zip(ancova.contrast_names, ancova.sequence()): expr, factors = expr_factors F = ancova.all_but_above(expr, factors) C = ancova.contrasts[name] XF, contrast_matrices = F.formula.design(recarray, contrasts={'C': C}) modelF = OLS(Y, XF) resultsF = modelF.fit() SSEF = np.sum(resultsF.resid**2) dfF = resultsF.df_resid ftest = resultsF.f_test(contrast_matrices['C']) SSER = SSEF + ftest.fvalue * ftest.df_num * (SSEF / dfF) dfR = dfF + ftest.df_num sss.append(SSER - SSEF) dfs.append(ftest.df_num) fs.append(((SSER - SSEF) / (dfR - dfF)) / (SSE_F / df_F)) pvals.append(f_dbn.sf(fs[-1], dfR - dfF, df_F)) names.append(name) # Add in the "residual row" sss.append(SSE_F) dfs.append(df_F) pvals.append(np.nan) fs.append(np.nan) names.append('Residuals') result = np.array( names, np.dtype([('contrast', 'S%d' % max([len(n) for n in names]))])) result = ML.rec_append_fields( result, ['SS', 'df', 'MS', 'F', 'p_value'], [sss, dfs, np.array(sss) / np.array(dfs), fs, pvals]) return result
def typeI(response, ancova, recarray): """ Produce an ANCOVA table from a given ANCOVA formula with type I sums of squares where the order is based on the order of terms in the contrast_names of ancova. Inputs ------ response: str field name of response in recarray ancova: ANCOVA specifies the model to be fit recarray: np.ndarray should contain all field names in the terms of ancova as well as response """ Y = recarray[response] X = ancova.formula.design(recarray, return_float=True) model = OLS(Y, X) results = model.fit() SSE_F = np.sum(results.resid**2) df_F = results.df_resid model = OLS(Y, ancova.formulae[0].design(recarray, return_float=True)) results = model.fit() SSE_old = np.sum(results.resid**2) df_old = results.df_resid names = [] sss = [] fs = [] dfs = [] pvals = [] names.append(ancova.contrast_names[0]) fs.append( ((np.sum(Y**2) - SSE_old) / (Y.shape[0] - df_old)) / (SSE_F / df_F)) sss.append((np.sum(Y**2) - SSE_old)) dfs.append(Y.shape[0] - df_old) pvals.append(f_dbn.sf(fs[-1], Y.shape[0] - df_old, df_F)) for d in range(1, len(ancova.formulae)): terms = [] for f in ancova.formulae[:(d + 1)]: terms += list(f.terms) # JT: this is not numerically efficient # could be done by updating some factorization of the full X X = Formula(terms).design(recarray, return_float=True) model = OLS(Y, X) results = model.fit() SSE_new = np.sum(results.resid**2) df_new = results.df_resid sss.append(SSE_old - SSE_new) dfs.append(df_old - df_new) fs.append(((SSE_old - SSE_new) / (df_old - df_new)) / (SSE_F / df_F)) pvals.append(f_dbn.sf(fs[-1], df_old - df_new, df_new)) names.append(ancova.contrast_names[d]) SSE_old = SSE_new df_old = df_new # Add in the "residual row" sss.append(SSE_new) dfs.append(df_new) pvals.append(np.nan) fs.append(np.nan) names.append('Residuals') result = np.array( names, np.dtype([('contrast', 'S%d' % max([len(n) for n in names]))])) result = ML.rec_append_fields( result, ['SS', 'df', 'MS', 'F', 'p_value'], [sss, dfs, np.array(sss) / np.array(dfs), fs, pvals]) return result