def plot_contrast_matrix(contrast_def, design_matrix, colorbar=False, ax=None): """Creates plot for contrast definition. Parameters ---------- contrast_def : str or array of shape (n_col) or list of (string or array of shape (n_col)) where ``n_col`` is the number of columns of the design matrix, (one array per run). If only one array is provided when there are several runs, it will be assumed that the same contrast is desired for all runs. The string can be a formula compatible with the linear constraint of the Patsy library. Basically one can use the name of the conditions as they appear in the design matrix of the fitted model combined with operators /*+- and numbers. Please checks the patsy documentation for formula examples: http://patsy.readthedocs.io/en/latest/API-reference.html#patsy.DesignInfo.linear_constraint design_matrix: pandas DataFrame colorbar: Boolean, optional (default False) Include a colorbar in the contrast matrix plot. ax: matplotlib Axes object, optional (default None) Directory where plotted figures will be stored. Returns ------- Plot Axes object """ design_column_names = design_matrix.columns.tolist() if isinstance(contrast_def, str): di = DesignInfo(design_column_names) contrast_def = di.linear_constraint(contrast_def).coefs if ax is None: plt.figure(figsize=(8, 4)) ax = plt.gca() maxval = np.max(np.abs(contrast_def)) con_mx = np.asmatrix(contrast_def) mat = ax.matshow(con_mx, aspect='equal', extent=[0, con_mx.shape[1], 0, con_mx.shape[0]], cmap='gray', vmin=-maxval, vmax=maxval) ax.set_label('conditions') ax.set_ylabel('') ax.set_yticklabels(['' for x in ax.get_yticklabels()]) # Shift ticks to be at 0.5, 1.5, etc ax.xaxis.set(ticks=np.arange(1.0, len(design_column_names) + 1.0), ticklabels=design_column_names) ax.set_xticklabels(design_column_names, rotation=90, ha='right') if colorbar: plt.colorbar(mat, fraction=0.025, pad=0.04) plt.tight_layout() return ax
def _get_contrast(second_level_contrast, design_matrix): """ Check and return contrast when testing one contrast at the time """ if isinstance(second_level_contrast, str): if second_level_contrast in design_matrix.columns.tolist(): contrast = second_level_contrast else: raise ValueError('"' + second_level_contrast + '" is not a valid' + ' contrast name') else: # Check contrast definition if second_level_contrast is None: if design_matrix.shape[1] == 1: second_level_contrast = np.ones([1]) else: raise ValueError('No second-level contrast is specified.') elif (np.nonzero(second_level_contrast)[0]).size != 1: raise ValueError('second_level_contrast must be ' 'a list of 0s and 1s') if isinstance(second_level_contrast, np.ndarray): con_val = np.asarray(second_level_contrast, dtype=bool) else: design_info = DesignInfo(design_matrix.columns.tolist()) constraint = design_info.linear_constraint(second_level_contrast) con_val = np.asarray(constraint.coefs, dtype=bool).ravel() contrast = np.asarray(design_matrix.columns.tolist())[con_val][0] return contrast
def compute_rfx_contrast(imgs, design_matrix, contrast_def, mask=None, noise_model='ols', stat_type='t', output_type='z_score'): design_info = DesignInfo(design_matrix.columns.tolist()) if isinstance(imgs, list): Y = np.stack([i.get_data() for i in imgs]).reshape(len(imgs), -1) elif isinstance(imgs, np.ndarray): Y = imgs else: raise ValueError(f"Unknown format for Y ({type(imgs)}).") X = design_matrix.values labels, results = run_glm(Y, X, noise_model=noise_model) if isinstance(contrast_def, (np.ndarray, str)): con_vals = [contrast_def] elif isinstance(contrast_def, (list, tuple)): con_vals = contrast_def else: raise ValueError('contrast_def must be an array or str or list of' ' (array or str)') for cidx, con in enumerate(con_vals): if not isinstance(con, np.ndarray): con_vals[cidx] = design_info.linear_constraint(con).coefs contrast = compute_contrast(labels, results, con_vals, stat_type) values = getattr(contrast, output_type)() if isinstance(imgs, list): values = nib.Nifti1Image(values.reshape(imgs[0].shape), affine=imgs[0].affine) return values
def fit_constrained(self, constraints, start_params=None, **fit_kwds): """fit the model subject to linear equality constraints The constraints are of the form `R params = q` where R is the constraint_matrix and q is the vector of constraint_values. The estimation creates a new model with transformed design matrix, exog, and converts the results back to the original parameterization. Parameters ---------- constraints : formula expression or tuple If it is a tuple, then the constraint needs to be given by two arrays (constraint_matrix, constraint_value), i.e. (R, q). Otherwise, the constraints can be given as strings or list of strings. see t_test for details start_params : None or array_like starting values for the optimization. `start_params` needs to be given in the original parameter space and are internally transformed. **fit_kwds : keyword arguments fit_kwds are used in the optimization of the transformed model. Returns ------- results : Results instance """ from patsy import DesignInfo from statsmodels.base._constraints import fit_constrained # same pattern as in base.LikelihoodModel.t_test lc = DesignInfo(self.exog_names).linear_constraint(constraints) R, q = lc.coefs, lc.constants # TODO: add start_params option, need access to tranformation # fit_constrained needs to do the transformation params, cov, res_constr = fit_constrained(self, R, q, start_params=start_params, fit_kwds=fit_kwds) #create dummy results Instance, TODO: wire up properly res = self.fit(start_params=params, maxiter=0) # we get a wrapper back res._results.params = params res._results.normalized_cov_params = cov k_constr = len(q) res._results.df_resid += k_constr res._results.df_model -= k_constr res._results.constraints = lc res._results.k_constr = k_constr res._results.results_constrained = res_constr # TODO: the next is not the best. history should bin in results res._results.model.history = res_constr.model.history res._results.mu = res_constr.mu return res
def _get_con_val(second_level_contrast, design_matrix): """ Check the contrast and return con_val when testing one contrast or more """ if second_level_contrast is None: if design_matrix.shape[1] == 1: second_level_contrast = np.ones([1]) else: raise ValueError('No second-level contrast is specified.') if isinstance(second_level_contrast, np.ndarray): con_val = second_level_contrast if np.all(con_val == 0): raise ValueError('Contrast is null') else: design_info = DesignInfo(design_matrix.columns.tolist()) constraint = design_info.linear_constraint(second_level_contrast) con_val = constraint.coefs return con_val
def fit_constrained_wrap(model, constraints, start_params=None, **fit_kwds): """fit_constraint that returns a results instance This is a development version for fit_constrained methods or fit_constrained as standalone function. It will not work correctly for all models because creating a new results instance is not standardized for use outside the `fit` methods, and might need adjustements for this. This is the prototype for the fit_constrained method that has been added to Poisson and GLM. Parameters ---------- model : Model constraints : tuple (constraint_matrix, constraint_values) start_params : array-like or None **fit_kwds """ self = model # alias for use as method # TODO: decide whether to move the imports from patsy import DesignInfo # same pattern as in base.LikelihoodModel.t_test lc = DesignInfo(self.exog_names).linear_constraint(constraints) R, q = lc.coefs, lc.constants # TODO: add start_params option, need access to tranformation # fit_constrained needs to do the transformation params, cov, res_constr = fit_constrained(self, R, q, start_params=start_params, fit_kwds=fit_kwds) # create dummy results Instance, TODO: wire up properly res = self.fit(start_params=params, maxiter=0, warn_convergence=False) # we get a wrapper back res._results.params = params res._results.cov_params_default = cov cov_type = fit_kwds.get('cov_type', 'nonrobust') if cov_type == 'nonrobust': res._results.normalized_cov_params = cov / res_constr.scale else: res._results.normalized_cov_params = None k_constr = len(q) res._results.df_resid += k_constr res._results.df_model -= k_constr res._results.constraints = lc res._results.k_constr = k_constr res._results.results_constrained = res_constr # FIXME: don't alter these in-place return res
def fit_constrained_wrap(model, constraints, start_params=None, **fit_kwds): """fit_constraint that returns a results instance This is a development version for fit_constrained methods or fit_constrained as standalone function. It will not work correctly for all models because creating a new results instance is not standardized for use outside the `fit` methods, and might need adjustements for this. This is the prototype for the fit_constrained method that has been added to Poisson and GLM. """ self = model # alias for use as method #constraints = (R, q) # TODO: temporary trailing underscore to not overwrite the monkey # patched version # TODO: decide whether to move the imports from patsy import DesignInfo # we need this import if we copy it to a different module #from statsmodels.base._constraints import fit_constrained # same pattern as in base.LikelihoodModel.t_test lc = DesignInfo(self.exog_names).linear_constraint(constraints) R, q = lc.coefs, lc.constants # TODO: add start_params option, need access to tranformation # fit_constrained needs to do the transformation params, cov, res_constr = fit_constrained(self, R, q, start_params=start_params, fit_kwds=fit_kwds) #create dummy results Instance, TODO: wire up properly res = self.fit(start_params=params, maxiter=0, warn_convergence=False) # we get a wrapper back res._results.params = params res._results.cov_params_default = cov cov_type = fit_kwds.get('cov_type', 'nonrobust') if cov_type == 'nonrobust': res._results.normalized_cov_params = cov / res_constr.scale else: res._results.normalized_cov_params = None k_constr = len(q) res._results.df_resid += k_constr res._results.df_model -= k_constr res._results.constraints = lc res._results.k_constr = k_constr res._results.results_constrained = res_constr return res
def compute_fxe_contrast(self, contrast_def, stat_type='t', run=None, output_type='z_score'): """ Computes a fixed effect across multiple runs. """ self.logger.info(f"Computing contrast: {contrast_def} for task {self.task} ...") if self.glm is None: raise ValueError("GLM has not been run yet!") if run is None: results = self.glm['results'] labels = self.glm['labels'] dms = self.glm['dms'] design_info = DesignInfo(dms[0].columns.tolist()) else: results = self.glm['results'][run] labels = self.glm['labels'][run] dms = self.glm['dms'][run] design_info = DesignInfo(dms.columns.tolist()) if isinstance(contrast_def, (np.ndarray, str)): con_vals = [contrast_def] elif isinstance(contrast_def, (list, tuple)): con_vals = contrast_def else: raise ValueError('contrast_def must be an array or str or list of' ' (array or str)') for cidx, con in enumerate(con_vals): if not isinstance(con, np.ndarray): con_vals[cidx] = design_info.linear_constraint(con).coefs if run is None: contrast = _fixed_effect_contrast(labels, results, con_vals, stat_type) else: contrast = compute_contrast(labels, results, con_vals, stat_type) values = getattr(contrast, output_type)() if self.mask is not None: return masking.unmask(values, self.mask) else: return values
def compute_symbolic_transform(self, expression, exclude=[]): # This converts symbolic expressions like "-A1/2" into # matrices which perform that transformation. (Actually it is a bit of # a hack. The parser/interpreter from patsy that we re-use actually # converts arbitrary *combinations* of linear *constraints* into # matrices, and is designed to interpret strings like: # "A1=2, rhz*2=lhz" # We re-use this code, but interpret the output differently: # only one expression is allowed, and it specifies some value that # is computed from the data, and then added to each channel # not mentioned in 'exclude'. transform = np.eye(self.num_channels) lc = DesignInfo(self.channel_names).linear_constraint(expression) # Check for the weird things that make sense for linear # constraints, but not for our hack here: if lc.coefs.shape[0] != 1: raise ValueError("only one expression allowed!") if np.any(lc.constants != 0): raise ValueError("transformations must be linear, not affine!") for i, channel_name in enumerate(self.channel_names): if channel_name not in exclude: transform[i, :] += lc.coefs[0, :] return transform
def _multivariate_test(hypotheses, exog_names, endog_names, fn): k_xvar = len(exog_names) k_yvar = len(endog_names) results = {} for hypo in hypotheses: if len(hypo) == 2: name, L = hypo M = None C = None elif len(hypo) == 3: name, L, M = hypo C = None elif len(hypo) == 4: name, L, M, C = hypo else: raise ValueError('hypotheses must be a tuple of length 2, 3 or 4.' ' len(hypotheses)=%d' % len(hypo)) if any(isinstance(j, string_types) for j in L): L = DesignInfo(exog_names).linear_constraint(L).coefs else: if not isinstance(L, np.ndarray) or len(L.shape) != 2: raise ValueError('Contrast matrix L must be a 2-d array!') if L.shape[1] != k_xvar: raise ValueError('Contrast matrix L should have the same ' 'number of columns as exog! %d != %d' % (L.shape[1], k_xvar)) if M is None: M = np.eye(k_yvar) elif any(isinstance(j, string_types) for j in M): M = DesignInfo(endog_names).linear_constraint(M).coefs.T else: if M is not None: if not isinstance(M, np.ndarray) or len(M.shape) != 2: raise ValueError('Transform matrix M must be a 2-d array!') if M.shape[0] != k_yvar: raise ValueError('Transform matrix M should have the same ' 'number of rows as the number of columns ' 'of endog! %d != %d' % (M.shape[0], k_yvar)) if C is None: C = np.zeros([L.shape[0], M.shape[1]]) elif not isinstance(C, np.ndarray): raise ValueError('Constant matrix C must be a 2-d array!') if C.shape[0] != L.shape[0]: raise ValueError('contrast L and constant C must have the same ' 'number of rows! %d!=%d' % (L.shape[0], C.shape[0])) if C.shape[1] != M.shape[1]: raise ValueError('transform M and constant C must have the same ' 'number of columns! %d!=%d' % (M.shape[1], C.shape[1])) E, H, q, df_resid = fn(L, M, C) EH = np.add(E, H) p = matrix_rank(EH) # eigenvalues of inv(E + H)H eigv2 = np.sort(eigvals(solve(EH, H))) stat_table = multivariate_stats(eigv2, p, q, df_resid) results[name] = { 'stat': stat_table, 'contrast_L': L, 'transform_M': M, 'constant_C': C } return results
def compute_contrast(self, contrast_def, stat_type=None, output_type='z_score'): """Generate different outputs corresponding to the contrasts provided e.g. z_map, t_map, effects and variance. In multi-session case, outputs the fixed effects map. Parameters ---------- contrast_def : str or array of shape (n_col) or list of (string or array of shape (n_col)) where ``n_col`` is the number of columns of the design matrix, (one array per run). If only one array is provided when there are several runs, it will be assumed that the same contrast is desired for all runs. The string can be a formula compatible with the linear constraint of the Patsy library. Basically one can use the name of the conditions as they appear in the design matrix of the fitted model combined with operators /\*+- and numbers. Please checks the patsy documentation for formula examples: http://patsy.readthedocs.io/en/latest/API-reference.html#patsy.DesignInfo.linear_constraint stat_type : {'t', 'F'}, optional type of the contrast output_type : str, optional Type of the output map. Can be 'z_score', 'stat', 'p_value', 'effect_size', 'effect_variance' or 'all' Returns ------- output : Nifti1Image or dict The desired output image(s). If ``output_type == 'all'``, then the output is a dictionary of images, keyed by the type of image. """ if self.labels_ is None or self.results_ is None: raise ValueError('The model has not been fit yet') if isinstance(contrast_def, (np.ndarray, str)): con_vals = [contrast_def] elif isinstance(contrast_def, (list, tuple)): con_vals = contrast_def else: raise ValueError('contrast_def must be an array or str or list of' ' (array or str)') # Translate formulas to vectors with patsy design_info = DesignInfo(self.design_matrices_[0].columns.tolist()) for cidx, con in enumerate(con_vals): if not isinstance(con, np.ndarray): con_vals[cidx] = design_info.linear_constraint(con).coefs n_runs = len(self.labels_) if len(con_vals) != n_runs: warn('One contrast given, assuming it for all %d runs' % n_runs) con_vals = con_vals * n_runs # 'all' is assumed to be the final entry; if adding more, place before 'all' valid_types = [ 'z_score', 'stat', 'p_value', 'effect_size', 'effect_variance', 'all' ] if output_type not in valid_types: raise ValueError( 'output_type must be one of {}'.format(valid_types)) contrast = _fixed_effect_contrast(self.labels_, self.results_, con_vals, stat_type) output_types = valid_types[:-1] if output_type == 'all' else [ output_type ] outputs = {} for output_type_ in output_types: estimate_ = getattr(contrast, output_type_)() # Prepare the returned images output = self.masker_.inverse_transform(estimate_) contrast_name = str(con_vals) output.header['descrip'] = ('%s of contrast %s' % (output_type_, contrast_name)) outputs[output_type_] = output return outputs if output_type == 'all' else output
def compute_contrast(self, contrast_def, stat_type=None, output_type='z_score'): """Generate different outputs corresponding to the contrasts provided e.g. z_map, t_map, effects and variance. Parameters ---------- contrast_def : str or array of shape (n_col) where ``n_col`` is the number of columns of the design matrix, The string can be a formula compatible with the linear constraint of the Patsy library. Basically one can use the name of the conditions as they appear in the design matrix of the fitted model combined with operators /*+- and numbers. Please checks the patsy documentation for formula examples: http://patsy.readthedocs.io/en/latest/API-reference.html#patsy.DesignInfo.linear_constraint stat_type : {'t', 'F'}, optional type of the contrast output_type : str, optional Type of the output map. Can be 'z_score', 'stat', 'p_value', 'effect_size' or 'effect_variance' Returns ------- output_image : Nifti1Image The desired output image """ # check model was fit if self.labels_ is None or self.results_ is None: raise ValueError('The model has not been fit yet') # check contrast definition if isinstance(contrast_def, np.ndarray): con_val = contrast_def if np.all(con_val == 0): raise ValueError('Contrast is null') else: design_info = DesignInfo(self.design_matrix_.columns.tolist()) con_val = design_info.linear_constraint(contrast_def).coefs # check output type if isinstance(output_type, _basestring): if output_type not in [ 'z_score', 'stat', 'p_value', 'effect_size', 'effect_variance' ]: raise ValueError( 'output_type must be one of "z_score", "stat"' ', "p_value", "effect_size" or "effect_variance"') else: raise ValueError('output_type must be one of "z_score", "stat",' ' "p_value", "effect_size" or "effect_variance"') if self.memory is not None: arg_ignore = ['labels', 'results'] mem_contrast = self.memory.cache(compute_contrast, ignore=arg_ignore) else: mem_contrast = compute_contrast contrast = mem_contrast(self.labels_, self.results_, con_val, stat_type) estimate_ = getattr(contrast, output_type)() # Prepare the returned images output = self.masker_.inverse_transform(estimate_) contrast_name = str(con_val) output.get_header()['descrip'] = ('%s of contrast %s' % (output_type, contrast_name)) return output
def compute_contrast( self, second_level_contrast=None, first_level_contrast=None, second_level_stat_type=None, output_type='z_score'): """Generate different outputs corresponding to the contrasts provided e.g. z_map, t_map, effects and variance. Parameters ---------- second_level_contrast: str or array of shape (n_col), optional Where ``n_col`` is the number of columns of the design matrix, The string can be a formula compatible with the linear constraint of the Patsy library. Basically one can use the name of the conditions as they appear in the design matrix of the fitted model combined with operators /\*+- and numbers. Please check the patsy documentation for formula examples: http://patsy.readthedocs.io/en/latest/API-reference.html#patsy.DesignInfo.linear_constraint The default (None) is accepted if the design matrix has a single column, in which case the only possible contrast array([1]) is applied; when the design matrix has multiple columns, an error is raised. first_level_contrast: str or array of shape (n_col) with respect to FirstLevelModel, optional In case a list of FirstLevelModel was provided as second_level_input, we have to provide a contrast to apply to the first level models to get the corresponding list of images desired, that would be tested at the second level. In case a pandas DataFrame was provided as second_level_input this is the map name to extract from the pandas dataframe map_name column. It has to be a 't' contrast. second_level_stat_type: {'t', 'F'}, optional Type of the second level contrast output_type: str, optional Type of the output map. Can be 'z_score', 'stat', 'p_value', 'effect_size' or 'effect_variance' Returns ------- output_image: Nifti1Image The desired output image """ if self.second_level_input_ is None: raise ValueError('The model has not been fit yet') # first_level_contrast check if isinstance(self.second_level_input_[0], FirstLevelModel): if first_level_contrast is None: raise ValueError('If second_level_input was a list of ' 'FirstLevelModel, then first_level_contrast ' 'is mandatory. It corresponds to the ' 'second_level_contrast argument of the ' 'compute_contrast method of FirstLevelModel') # check contrast definition if second_level_contrast is None: if self.design_matrix_.shape[1] == 1: second_level_contrast = np.ones([1]) else: raise ValueError('No second-level contrast is specified.') if isinstance(second_level_contrast, np.ndarray): con_val = second_level_contrast if np.all(con_val == 0): raise ValueError('Contrast is null') else: design_info = DesignInfo(self.design_matrix_.columns.tolist()) constraint = design_info.linear_constraint(second_level_contrast) con_val = constraint.coefs # check output type if isinstance(output_type, _basestring): if output_type not in ['z_score', 'stat', 'p_value', 'effect_size', 'effect_variance']: raise ValueError( 'output_type must be one of "z_score", "stat"' ', "p_value", "effect_size" or "effect_variance"') else: raise ValueError('output_type must be one of "z_score", "stat",' ' "p_value", "effect_size" or "effect_variance"') # Get effect_maps appropriate for chosen contrast effect_maps = _infer_effect_maps(self.second_level_input_, first_level_contrast) # Check design matrix X and effect maps Y agree on number of rows if len(effect_maps) != self.design_matrix_.shape[0]: raise ValueError( 'design_matrix does not match the number of maps considered. ' '%i rows in design matrix do not match with %i maps' % (self.design_matrix_.shape[0], len(effect_maps))) # Fit an Ordinary Least Squares regression for parametric statistics Y = self.masker_.transform(effect_maps) if self.memory: mem_glm = self.memory.cache(run_glm, ignore=['n_jobs']) else: mem_glm = run_glm labels, results = mem_glm(Y, self.design_matrix_.values, n_jobs=self.n_jobs, noise_model='ols') # We save memory if inspecting model details is not necessary if self.minimize_memory: for key in results: results[key] = SimpleRegressionResults(results[key]) self.labels_ = labels self.results_ = results # We compute contrast object if self.memory: mem_contrast = self.memory.cache(compute_contrast) else: mem_contrast = compute_contrast contrast = mem_contrast(self.labels_, self.results_, con_val, second_level_stat_type) # We get desired output from contrast object estimate_ = getattr(contrast, output_type)() # Prepare the returned images output = self.masker_.inverse_transform(estimate_) contrast_name = str(con_val) output.header['descrip'] = ( '%s of contrast %s' % (output_type, contrast_name)) return output
def wald_test(self, r_matrix, cov_p=None, scale=1.0, invcov=None, use_f=None): """ Compute a Wald-test for a joint linear hypothesis. Parameters ---------- r_matrix : array-like, str, or tuple - array : An r x k array where r is the number of restrictions to test and k is the number of regressors. It is assumed that the linear combination is equal to zero. - str : The full hypotheses to test can be given as a string. See the examples. - tuple : A tuple of arrays in the form (R, q), ``q`` can be either a scalar or a length p row vector. cov_p : array-like, optional An alternative estimate for the parameter covariance matrix. If None is given, self.normalized_cov_params is used. scale : float, optional Default is 1.0 for no scaling. invcov : array-like, optional A q x q array to specify an inverse covariance matrix based on a restrictions matrix. use_f : bool If True, then the F-distribution is used. If False, then the asymptotic distribution, chisquare is used. If use_f is None, then the F distribution is used if the model specifies that use_t is True. The test statistic is proportionally adjusted for the distribution by the number of constraints in the hypothesis. Returns ------- res : ContrastResults instance The results for the test are attributes of this results instance. See also -------- statsmodels.stats.contrast.ContrastResults f_test t_test patsy.DesignInfo.linear_constraint Notes ----- The matrix `r_matrix` is assumed to be non-singular. More precisely, r_matrix (pX pX.T) r_matrix.T is assumed invertible. Here, pX is the generalized inverse of the design matrix of the model. There can be problems in non-OLS models where the rank of the covariance of the noise is not full. """ if use_f is None: #switch to use_t false if undefined use_f = (hasattr(self, 'use_t') and self.use_t) from patsy import DesignInfo names = self.model.data.param_names LC = DesignInfo(names).linear_constraint(r_matrix) r_matrix, q_matrix = LC.coefs, LC.constants if (self.normalized_cov_params is None and cov_p is None and invcov is None and not hasattr(self, 'cov_params_default')): raise ValueError('need covariance of parameters for computing ' 'F statistics') cparams = np.dot(r_matrix, self.params[:, None]) J = float(r_matrix.shape[0]) # number of restrictions if q_matrix is None: q_matrix = np.zeros(J) else: q_matrix = np.asarray(q_matrix) if q_matrix.ndim == 1: q_matrix = q_matrix[:, None] if q_matrix.shape[0] != J: raise ValueError("r_matrix and q_matrix must have the same " "number of rows") Rbq = cparams - q_matrix if invcov is None: cov_p = self.cov_params(r_matrix=r_matrix, cov_p=cov_p) if np.isnan(cov_p).max(): raise ValueError("r_matrix performs f_test for using " "dimensions that are asymptotically " "non-normal") invcov = np.linalg.inv(cov_p) if (hasattr(self, 'mle_settings') and self.mle_settings['optimizer'] in ['l1', 'l1_cvxopt_cp']): F = nan_dot(nan_dot(Rbq.T, invcov), Rbq) else: F = np.dot(np.dot(Rbq.T, invcov), Rbq) df_resid = getattr(self, 'df_resid_inference', self.df_resid) if use_f: F /= J return ContrastResults(F=F, df_denom=df_resid, df_num=invcov.shape[0]) else: return ContrastResults(chi2=F, df_denom=J, statistic=F, distribution='chi2', distargs=(J, ))
def t_test(self, r_matrix, cov_p=None, scale=None, use_t=None): """ Compute a t-test for a each linear hypothesis of the form Rb = q Parameters ---------- r_matrix : array-like, str, tuple - array : If an array is given, a p x k 2d array or length k 1d array specifying the linear restrictions. It is assumed that the linear combination is equal to zero. - str : The full hypotheses to test can be given as a string. See the examples. - tuple : A tuple of arrays in the form (R, q). If q is given, can be either a scalar or a length p row vector. cov_p : array-like, optional An alternative estimate for the parameter covariance matrix. If None is given, self.normalized_cov_params is used. scale : float, optional An optional `scale` to use. Default is the scale specified by the model fit. use_t : bool, optional If use_t is None, then the default of the model is used. If use_t is True, then the p-values are based on the t distribution. If use_t is False, then the p-values are based on the normal distribution. Returns ------- res : ContrastResults instance The results for the test are attributes of this results instance. The available results have the same elements as the parameter table in `summary()`. Examples -------- >>> import numpy as np >>> import statsmodels.api as sm >>> data = sm.datasets.longley.load() >>> data.exog = sm.add_constant(data.exog) >>> results = sm.OLS(data.endog, data.exog).fit() >>> r = np.zeros_like(results.params) >>> r[5:] = [1,-1] >>> print(r) [ 0. 0. 0. 0. 0. 1. -1.] r tests that the coefficients on the 5th and 6th independent variable are the same. >>> T_test = results.t_test(r) >>> print(T_test) <T contrast: effect=-1829.2025687192481, sd=455.39079425193762, t=-4.0167754636411717, p=0.0015163772380899498, df_denom=9> >>> T_test.effect -1829.2025687192481 >>> T_test.sd 455.39079425193762 >>> T_test.tvalue -4.0167754636411717 >>> T_test.pvalue 0.0015163772380899498 Alternatively, you can specify the hypothesis tests using a string >>> from statsmodels.formula.api import ols >>> dta = sm.datasets.longley.load_pandas().data >>> formula = 'TOTEMP ~ GNPDEFL + GNP + UNEMP + ARMED + POP + YEAR' >>> results = ols(formula, dta).fit() >>> hypotheses = 'GNPDEFL = GNP, UNEMP = 2, YEAR/1829 = 1' >>> t_test = results.t_test(hypotheses) >>> print(t_test) See Also --------- tvalues : individual t statistics f_test : for F tests patsy.DesignInfo.linear_constraint """ from patsy import DesignInfo names = self.model.data.param_names LC = DesignInfo(names).linear_constraint(r_matrix) r_matrix, q_matrix = LC.coefs, LC.constants num_ttests = r_matrix.shape[0] num_params = r_matrix.shape[1] if (cov_p is None and self.normalized_cov_params is None and not hasattr(self, 'cov_params_default')): raise ValueError('Need covariance of parameters for computing ' 'T statistics') if num_params != self.params.shape[0]: raise ValueError('r_matrix and params are not aligned') if q_matrix is None: q_matrix = np.zeros(num_ttests) else: q_matrix = np.asarray(q_matrix) q_matrix = q_matrix.squeeze() if q_matrix.size > 1: if q_matrix.shape[0] != num_ttests: raise ValueError("r_matrix and q_matrix must have the same " "number of rows") if use_t is None: #switch to use_t false if undefined use_t = (hasattr(self, 'use_t') and self.use_t) _t = _sd = None _effect = np.dot(r_matrix, self.params) # nan_dot multiplies with the convention nan * 0 = 0 # Perform the test if num_ttests > 1: _sd = np.sqrt( np.diag(self.cov_params(r_matrix=r_matrix, cov_p=cov_p))) else: _sd = np.sqrt(self.cov_params(r_matrix=r_matrix, cov_p=cov_p)) _t = (_effect - q_matrix) * recipr(_sd) df_resid = getattr(self, 'df_resid_inference', self.df_resid) if use_t: return ContrastResults(effect=_effect, t=_t, sd=_sd, df_denom=df_resid) else: return ContrastResults(effect=_effect, statistic=_t, sd=_sd, df_denom=df_resid, distribution='norm')
def _multivariate_test(hypotheses, exog_names, endog_names, fn): """ Multivariate linear model hypotheses testing For y = x * params, where y are the dependent variables and x are the independent variables, testing L * params * M = 0 where L is the contrast matrix for hypotheses testing and M is the transformation matrix for transforming the dependent variables in y. Algorithm: T = L*inv(X'X)*L' H = M'B'L'*inv(T)*LBM E = M'(Y'Y - B'X'XB)M And then finding the eigenvalues of inv(H + E)*H .. [*] https://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/viewer.htm#statug_introreg_sect012.htm Parameters ---------- %(hypotheses_doc)s k_xvar : int The number of independent variables k_yvar : int The number of dependent variables fn : function a function fn(contrast_L, transform_M) that returns E, H, q, df_resid where q is the rank of T matrix Returns ------- results : MANOVAResults """ k_xvar = len(exog_names) k_yvar = len(endog_names) results = {} for hypo in hypotheses: if len(hypo) ==2: name, L = hypo M = None C = None elif len(hypo) == 3: name, L, M = hypo C = None elif len(hypo) == 4: name, L, M, C = hypo else: raise ValueError('hypotheses must be a tuple of length 2, 3 or 4.' ' len(hypotheses)=%d' % len(hypo)) if any(isinstance(j, str) for j in L): L = DesignInfo(exog_names).linear_constraint(L).coefs else: if not isinstance(L, np.ndarray) or len(L.shape) != 2: raise ValueError('Contrast matrix L must be a 2-d array!') if L.shape[1] != k_xvar: raise ValueError('Contrast matrix L should have the same ' 'number of columns as exog! %d != %d' % (L.shape[1], k_xvar)) if M is None: M = np.eye(k_yvar) elif any(isinstance(j, str) for j in M): M = DesignInfo(endog_names).linear_constraint(M).coefs.T else: if M is not None: if not isinstance(M, np.ndarray) or len(M.shape) != 2: raise ValueError('Transform matrix M must be a 2-d array!') if M.shape[0] != k_yvar: raise ValueError('Transform matrix M should have the same ' 'number of rows as the number of columns ' 'of endog! %d != %d' % (M.shape[0], k_yvar)) if C is None: C = np.zeros([L.shape[0], M.shape[1]]) elif not isinstance(C, np.ndarray): raise ValueError('Constant matrix C must be a 2-d array!') if C.shape[0] != L.shape[0]: raise ValueError('contrast L and constant C must have the same ' 'number of rows! %d!=%d' % (L.shape[0], C.shape[0])) if C.shape[1] != M.shape[1]: raise ValueError('transform M and constant C must have the same ' 'number of columns! %d!=%d' % (M.shape[1], C.shape[1])) E, H, q, df_resid = fn(L, M, C) EH = np.add(E, H) p = matrix_rank(EH) # eigenvalues of inv(E + H)H eigv2 = np.sort(eigvals(solve(EH, H))) stat_table = multivariate_stats(eigv2, p, q, df_resid) results[name] = {'stat':stat_table, 'contrast_L':L, 'transform_M':M, 'constant_C':C} return results
def wald_test(self, r_matrix, xname=None, cov_p=None, scale=1.0, invcov=None, use_f=None): """ Compute a Wald-test for a joint linear hypothesis. Parameters ---------- r_matrix : {array_like, str, tuple} One of: - array : An r x k array where r is the number of restrictions to test and k is the number of regressors. It is assumed that the linear combination is equal to zero. - str : The full hypotheses to test can be given as a string. See the examples. - tuple : A tuple of arrays in the form (R, q), ``q`` can be either a scalar or a length p row vector. cov_p : array_like, optional An alternative estimate for the parameter covariance matrix. If None is given, self.normalized_cov_params is used. scale : float, optional Default is 1.0 for no scaling. .. deprecated:: 0.10.0 invcov : array_like, optional A q x q array to specify an inverse covariance matrix based on a restrictions matrix. use_f : bool If True, then the F-distribution is used. If False, then the asymptotic distribution, chisquare is used. If use_f is None, then the F distribution is used if the model specifies that use_t is True. The test statistic is proportionally adjusted for the distribution by the number of constraints in the hypothesis. df_constraints : int, optional The number of constraints. If not provided the number of constraints is determined from r_matrix. Returns ------- ContrastResults The results for the test are attributes of this results instance. """ from patsy import DesignInfo names = xname params = self.params.ravel() LC = DesignInfo(names).linear_constraint(r_matrix) r_matrix, q_matrix = LC.coefs, LC.constants cparams = np.dot(r_matrix, params[:, None]) J = float(r_matrix.shape[0]) # number of restrictions if q_matrix is None: q_matrix = np.zeros(J) else: q_matrix = np.asarray(q_matrix) if q_matrix.ndim == 1: q_matrix = q_matrix[:, None] if q_matrix.shape[0] != J: raise ValueError("r_matrix and q_matrix must have the same " "number of rows") Rbq = cparams - q_matrix if invcov is None: cov_p = self.cov_params(r_matrix=r_matrix, cov_p=self.Hinv(self.params)) if np.isnan(cov_p).max(): raise ValueError("r_matrix performs f_test for using " "dimensions that are asymptotically " "non-normal") invcov = np.linalg.pinv(cov_p) J_ = np.linalg.matrix_rank(cov_p) if J_ < J: import warnings warnings.warn( 'covariance of constraints does not have full ' 'rank. The number of constraints is %d, but ' 'rank is %d' % (J, J_), ValueWarning) J = J_ F = np.dot(np.dot(Rbq.T, invcov), Rbq) df_resid = self.df_resid return ContrastResults(chi2=F, df_denom=J, statistic=F, distribution='chi2', distargs=(J, ))
def compute_contrast(self, contrast_def, stat_type=None, output_type='z_score'): """Generate different outputs corresponding to the contrasts provided e.g. z_map, t_map, effects and variance. In multi-session case, outputs the fixed effects map. Parameters ---------- contrast_def : str or array of shape (n_col) or list of (string or array of shape (n_col)) where ``n_col`` is the number of columns of the design matrix, (one array per run). If only one array is provided when there are several runs, it will be assumed that the same contrast is desired for all runs. The string can be a formula compatible with the linear constraint of the Patsy library. Basically one can use the name of the conditions as they appear in the design matrix of the fitted model combined with operators /\*+- and numbers. Please checks the patsy documentation for formula examples: http://patsy.readthedocs.io/en/latest/API-reference.html#patsy.DesignInfo.linear_constraint stat_type : {'t', 'F'}, optional type of the contrast output_type : str, optional Type of the output map. Can be 'z_score', 'stat', 'p_value', 'effect_size', 'effect_variance' or 'all' Returns ------- output : Nifti1Image or dict The desired output image(s). If ``output_type == 'all'``, then the output is a dictionary of images, keyed by the type of image. """ if self.labels_ is None or self.results_ is None: raise ValueError('The model has not been fit yet') if isinstance(contrast_def, (np.ndarray, str)): con_vals = [contrast_def] elif isinstance(contrast_def, (list, tuple)): con_vals = contrast_def else: raise ValueError('contrast_def must be an array or str or list of' ' (array or str)') # Translate formulas to vectors with patsy design_info = DesignInfo(self.design_matrices_[0].columns.tolist()) for cidx, con in enumerate(con_vals): if not isinstance(con, np.ndarray): con_vals[cidx] = design_info.linear_constraint(con).coefs n_runs = len(self.labels_) if len(con_vals) != n_runs: warn('One contrast given, assuming it for all %d runs' % n_runs) con_vals = con_vals * n_runs # 'all' is assumed to be the final entry; if adding more, place before 'all' valid_types = ['z_score', 'stat', 'p_value', 'effect_size', 'effect_variance', 'all'] if output_type not in valid_types: raise ValueError('output_type must be one of {}'.format(valid_types)) contrast = _fixed_effect_contrast(self.labels_, self.results_, con_vals, stat_type) output_types = valid_types[:-1] if output_type == 'all' else [output_type] outputs = {} for output_type_ in output_types: estimate_ = getattr(contrast, output_type_)() # Prepare the returned images output = self.masker_.inverse_transform(estimate_) contrast_name = str(con_vals) output.header['descrip'] = ( '%s of contrast %s' % (output_type_, contrast_name)) outputs[output_type_] = output return outputs if output_type == 'all' else output
def compute_contrast(self, second_level_contrast=None, first_level_contrast=None, second_level_stat_type=None, output_type='z_score'): """Generate different outputs corresponding to the contrasts provided e.g. z_map, t_map, effects and variance. Parameters ---------- second_level_contrast: str or array of shape (n_col), optional Where ``n_col`` is the number of columns of the design matrix, The string can be a formula compatible with the linear constraint of the Patsy library. Basically one can use the name of the conditions as they appear in the design matrix of the fitted model combined with operators /\*+- and numbers. Please check the patsy documentation for formula examples: http://patsy.readthedocs.io/en/latest/API-reference.html#patsy.DesignInfo.linear_constraint The default (None) is accepted if the design matrix has a single column, in which case the only possible contrast array([1]) is applied; when the design matrix has multiple columns, an error is raised. first_level_contrast: str or array of shape (n_col) with respect to FirstLevelModel, optional In case a list of FirstLevelModel was provided as second_level_input, we have to provide a contrast to apply to the first level models to get the corresponding list of images desired, that would be tested at the second level. In case a pandas DataFrame was provided as second_level_input this is the map name to extract from the pandas dataframe map_name column. It has to be a 't' contrast. second_level_stat_type: {'t', 'F'}, optional Type of the second level contrast output_type: str, optional Type of the output map. Can be 'z_score', 'stat', 'p_value', 'effect_size' or 'effect_variance' Returns ------- output_image: Nifti1Image The desired output image """ if self.second_level_input_ is None: raise ValueError('The model has not been fit yet') # first_level_contrast check if isinstance(self.second_level_input_[0], FirstLevelModel): if first_level_contrast is None: raise ValueError('If second_level_input was a list of ' 'FirstLevelModel, then first_level_contrast ' 'is mandatory. It corresponds to the ' 'second_level_contrast argument of the ' 'compute_contrast method of FirstLevelModel') # check contrast definition if second_level_contrast is None: if self.design_matrix_.shape[1] == 1: second_level_contrast = np.ones([1]) else: raise ValueError('No second-level contrast is specified.') if isinstance(second_level_contrast, np.ndarray): con_val = second_level_contrast if np.all(con_val == 0): raise ValueError('Contrast is null') else: design_info = DesignInfo(self.design_matrix_.columns.tolist()) constraint = design_info.linear_constraint(second_level_contrast) con_val = constraint.coefs # check output type if isinstance(output_type, _basestring): if output_type not in [ 'z_score', 'stat', 'p_value', 'effect_size', 'effect_variance' ]: raise ValueError( 'output_type must be one of "z_score", "stat"' ', "p_value", "effect_size" or "effect_variance"') else: raise ValueError('output_type must be one of "z_score", "stat",' ' "p_value", "effect_size" or "effect_variance"') # Get effect_maps appropriate for chosen contrast effect_maps = _infer_effect_maps(self.second_level_input_, first_level_contrast) # Check design matrix X and effect maps Y agree on number of rows if len(effect_maps) != self.design_matrix_.shape[0]: raise ValueError( 'design_matrix does not match the number of maps considered. ' '%i rows in design matrix do not match with %i maps' % (self.design_matrix_.shape[0], len(effect_maps))) # Fit an Ordinary Least Squares regression for parametric statistics Y = self.masker_.transform(effect_maps) if self.memory: mem_glm = self.memory.cache(run_glm, ignore=['n_jobs']) else: mem_glm = run_glm labels, results = mem_glm(Y, self.design_matrix_.values, n_jobs=self.n_jobs, noise_model='ols') # We save memory if inspecting model details is not necessary if self.minimize_memory: for key in results: results[key] = SimpleRegressionResults(results[key]) self.labels_ = labels self.results_ = results # We compute contrast object if self.memory: mem_contrast = self.memory.cache(compute_contrast) else: mem_contrast = compute_contrast contrast = mem_contrast(self.labels_, self.results_, con_val, second_level_stat_type) # We get desired output from contrast object estimate_ = getattr(contrast, output_type)() # Prepare the returned images output = self.masker_.inverse_transform(estimate_) contrast_name = str(con_val) output.header['descrip'] = ('%s of contrast %s' % (output_type, contrast_name)) return output
# L does not have full row rank, calculating constant fails with Singular Matrix # transform data xr = T x np.random.seed(1) x = np.random.randn(10, 5) xr = tr1.reduce(x) # roundtrip x2 = tr1.expand(xr) # this does not hold ? do not use constant? do not need it anyway ? #assert_allclose(x2, x, rtol=1e-14) from patsy import DesignInfo names = 'a b c d'.split() LC = DesignInfo(names).linear_constraint('a + b = 0') LC = DesignInfo(names).linear_constraint(['a + b = 0', 'a + 2*c = 1', 'b-a', 'c-a', 'd-a']) #LC = DesignInfo(self.model.exog_names).linear_constraint(r_matrix) r_matrix, q_matrix = LC.coefs, LC.constants np.random.seed(123) nobs = 20 x = 1 + np.random.randn(nobs, 4) exog = np.column_stack((np.ones(nobs), x)) endog = exog.sum(1) + np.random.randn(nobs) from statsmodels.regression.linear_model import OLS res2 = OLS(endog, exog).fit() #transf = TransformRestriction(np.eye(exog.shape[1])[:2], res2.params[:2] / 2) transf = TransformRestriction([[0, 0, 0,1,1]], res2.params[-2:].sum()) exog_st = transf.reduce(exog)
def __init__(self, endog, exog, constraints=None, **kwargs): # Standardize data endog_using_pandas = _is_using_pandas(endog, None) if not endog_using_pandas: endog = np.asanyarray(endog) exog_is_using_pandas = _is_using_pandas(exog, None) if not exog_is_using_pandas: exog = np.asarray(exog) # Make sure we have 2-dimensional array if exog.ndim == 1: if not exog_is_using_pandas: exog = exog[:, None] else: exog = pd.DataFrame(exog) self.k_exog = exog.shape[1] # Handle constraints self.k_constraints = 0 self._r_matrix = self._q_matrix = None if constraints is not None: from patsy import DesignInfo from statsmodels.base.data import handle_data data = handle_data(endog, exog, **kwargs) names = data.param_names LC = DesignInfo(names).linear_constraint(constraints) self._r_matrix, self._q_matrix = LC.coefs, LC.constants self.k_constraints = self._r_matrix.shape[0] constraint_endog = np.zeros((len(endog), len(self._r_matrix))) if endog_using_pandas: constraint_endog = pd.DataFrame(constraint_endog, index=endog.index) endog = concat([endog, constraint_endog], axis=1) endog.values[:, 1:] = self._q_matrix[:, 0] else: endog[:, 1:] = self._q_matrix[:, 0] # Handle coefficient initialization kwargs.setdefault('initialization', 'diffuse') # Initialize the state space representation super(RecursiveLS, self).__init__( endog, k_states=self.k_exog, exog=exog, **kwargs) # Use univariate filtering by default self.ssm.filter_univariate = True # Concentrate the scale out of the likelihood function self.ssm.filter_concentrated = True # Setup the state space representation self['design'] = np.zeros((self.k_endog, self.k_states, self.nobs)) self['design', 0] = self.exog[:, :, None].T if self._r_matrix is not None: self['design', 1:, :] = self._r_matrix[:, :, None] self['transition'] = np.eye(self.k_states) # Notice that the filter output does not depend on the measurement # variance, so we set it here to 1 self['obs_cov', 0, 0] = 1. self['transition'] = np.eye(self.k_states) # Linear constraints are technically imposed by adding "fake" endog # variables that are used during filtering, but for all model- and # results-based purposes we want k_endog = 1. if self._r_matrix is not None: self.k_endog = 1
#!/usr/bin/env python # -*- coding: utf-8 -*- from patsy import dmatrix from patsy import DesignMatrix, DesignInfo from patsy import LookupFactor, ModelDesc, Term X = [[1, 10], [1, 20], [1, -2]] print(dmatrix(X)) design_info = DesignInfo(["Intercept!", "Not intercept!"]) X_dm = DesignMatrix(X, design_info) print(dmatrix(X_dm)) def add_predictors(base_formula, extra_predictors): desc = ModelDesc.from_formula(base_formula) # Using LookupFactor here ensures that everything will work correctly even # if one of the column names in extra_columns is named like "weight.in.kg" # or "sys.exit()" or "LittleBobbyTables()". desc.rhs_termlist += [Term([LookupFactor(p)]) for p in extra_predictors] return desc extra_predictors = [f"x{i}" for i in range(10)] desc = add_predictors("np.log(y) ~ a*b + c:d", extra_predictors) print(desc.describe())