def _constraints_factor(encoding_matrix, comparison='pairwise', k_params=None, idx_start=None): """helper function to create constraints based on encoding matrix Parameters ---------- encoding_matrix : ndarray contrast matrix for the encoding of a factor as defined by patsy. The number of rows should be equal to the number of levels or categories of the factor, the number of columns should be equal to the number of parameters for this factor. comparison : str Currently only 'pairwise' is implemented. The restriction matrix can be used for testing the hypothesis that all pairwise differences are zero. k_params : int number of parameters idx_start : int Index of the first parameter of this factor. The restrictions on the factor are inserted as a block in the full restriction matrix starting at column with index `idx_start`. Returns ------- contrast : ndarray Contrast or restriction matrix that can be used in hypothesis test of model results. The number of columns is k_params. """ cm = encoding_matrix k_level, k_p = cm.shape import statsmodels.sandbox.stats.multicomp as mc if comparison in ['pairwise', 'pw', 'pairs']: c_all = -mc.contrast_allpairs(k_level) else: raise NotImplementedError('currentlyonly pairwise comparison') contrasts = c_all.dot(cm) if k_params is not None: if idx_start is None: raise ValueError("if k_params is not None, then idx_start is " "required") contrasts = _embed_constraints(contrasts, k_params, idx_start) return contrasts
def t_test_pairwise(result, term_name, method='hs', alpha=0.05, factor_labels=None, ignore=False): """perform pairwise t_test with multiple testing corrected p-values This uses the formula design_info encoding contrast matrix and should work for all encodings of a main effect. Parameters ---------- result : result instance The results of an estimated model with a categorical main effect. term_name : str name of the term for which pairwise comparisons are computed. Term names for categorical effects are created by patsy and correspond to the main part of the exog names. method : str or list of strings multiple testing p-value correction, default is 'hs', see stats.multipletesting alpha : float significance level for multiple testing reject decision. factor_labels : None, list of str Labels for the factor levels used for pairwise labels. If not provided, then the labels from the formula design_info are used. ignore : boolean Turn off some of the exceptions raised by input checks. Returns ------- results : instance of a simple Results class The results are stored as attributes, the main attributes are the following two. Other attributes are added for debugging purposes or as background information. - result_frame : pandas DataFrame with t_test results and multiple testing corrected p-values. - contrasts : matrix of constraints of the null hypothesis in the t_test. Notes ----- Status: experimental. Currently only checked for treatment coding with and without specified reference level. Currently there are no multiple testing corrected confidence intervals available. """ desinfo = result.model.data.design_info term_idx = desinfo.term_names.index(term_name) term = desinfo.terms[term_idx] idx_start = desinfo.term_slices[term].start if not ignore and len(term.factors) > 1: raise ValueError('interaction effects not yet supported') factor = term.factors[0] cat = desinfo.factor_infos[factor].categories if factor_labels is not None: if len(factor_labels) == len(cat): cat = factor_labels else: raise ValueError("factor_labels has the wrong length, should be %d" % len(cat)) k_level = len(cat) cm = desinfo.term_codings[term][0].contrast_matrices[factor].matrix k_params = len(result.params) labels = _get_pairs_labels(k_level, cat) import statsmodels.sandbox.stats.multicomp as mc c_all_pairs = -mc.contrast_allpairs(k_level) contrasts_sub = c_all_pairs.dot(cm) contrasts = _embed_constraints(contrasts_sub, k_params, idx_start) res_df = t_test_multi(result, contrasts, method=method, ci_method=None, alpha=alpha, contrast_names=labels) res = MultiCompResult(result_frame=res_df, contrasts=contrasts, term=term, contrast_labels=labels, term_encoding_matrix=cm) return res
def t_test_pairwise(result, term_name, method='hs', alpha=0.05, factor_labels=None, ignore=False): """ Perform pairwise t_test with multiple testing corrected p-values This uses the formula design_info encoding contrast matrix and should work for all encodings of a main effect. Parameters ---------- result : result instance The results of an estimated model with a categorical main effect. term_name : str name of the term for which pairwise comparisons are computed. Term names for categorical effects are created by patsy and correspond to the main part of the exog names. method : {str, list[str]} multiple testing p-value correction, default is 'hs', see stats.multipletesting alpha : float significance level for multiple testing reject decision. factor_labels : {list[str], None} Labels for the factor levels used for pairwise labels. If not provided, then the labels from the formula design_info are used. ignore : bool Turn off some of the exceptions raised by input checks. Returns ------- MultiCompResult The results are stored as attributes, the main attributes are the following two. Other attributes are added for debugging purposes or as background information. - result_frame : pandas DataFrame with t_test results and multiple testing corrected p-values. - contrasts : matrix of constraints of the null hypothesis in the t_test. Notes ----- Status: experimental. Currently only checked for treatment coding with and without specified reference level. Currently there are no multiple testing corrected confidence intervals available. """ desinfo = result.model.data.design_info term_idx = desinfo.term_names.index(term_name) term = desinfo.terms[term_idx] idx_start = desinfo.term_slices[term].start if not ignore and len(term.factors) > 1: raise ValueError('interaction effects not yet supported') factor = term.factors[0] cat = desinfo.factor_infos[factor].categories if factor_labels is not None: if len(factor_labels) == len(cat): cat = factor_labels else: raise ValueError( "factor_labels has the wrong length, should be %d" % len(cat)) k_level = len(cat) cm = desinfo.term_codings[term][0].contrast_matrices[factor].matrix k_params = len(result.params) labels = _get_pairs_labels(k_level, cat) import statsmodels.sandbox.stats.multicomp as mc c_all_pairs = -mc.contrast_allpairs(k_level) contrasts_sub = c_all_pairs.dot(cm) contrasts = _embed_constraints(contrasts_sub, k_params, idx_start) res_df = t_test_multi(result, contrasts, method=method, ci_method=None, alpha=alpha, contrast_names=labels) res = MultiCompResult(result_frame=res_df, contrasts=contrasts, term=term, contrast_labels=labels, term_encoding_matrix=cm) return res