def __init__(self, data, Y, algorithm=None, cv_dict=None, mask=None, output_dir='.', **kwargs): """ Initialize Predict. Args: data: nibabel data instance Y: vector of training labels subject_id: vector of labels corresponding to each subject algorithm: Algorithm to use for prediction. Must be one of 'svm', 'svr', 'linear', 'logistic', 'lasso', 'ridge', 'ridgeClassifier','randomforest', or 'randomforestClassifier' cv_dict: Type of cross_validation to use. A dictionary of {'type': 'kfolds', 'n_folds': n}, {'type': 'kfolds', 'n_folds': n, 'subject_id': holdout}, or {'type': 'loso', 'subject_id': holdout}, where n = number of folds, and subject = vector of subject ids that corresponds to self.Y mask: binary nibabel mask output_dir: Directory to use for writing all outputs **kwargs: Additional keyword arguments to pass to the prediction algorithm """ self.output_dir = output_dir if mask is not None: if type(mask) is not nib.nifti1.Nifti1Image: raise ValueError("mask is not a nibabel instance") self.mask = mask else: self.mask = nib.load( os.path.join(get_resource_path(), 'MNI152_T1_2mm_brain_mask.nii.gz')) if type(data) is list: data = nib.concat_images(data) if not isinstance(data, (nib.nifti1.Nifti1Image, nib.nifti1.Nifti1Pair)): raise ValueError("data is not a nibabel instance") self.nifti_masker = NiftiMasker(mask_img=mask) self.data = self.nifti_masker.fit_transform(data) if type(Y) is list: Y = np.array(Y) if self.data.shape[0] != len(Y): raise ValueError("Y does not match the correct size of data") self.Y = Y if algorithm is not None: self.set_algorithm(algorithm, **kwargs) if cv_dict is not None: self.cv = set_cv(cv_dict)
def __init__(self, data, Y, algorithm=None, cv_dict=None, mask=None, output_dir='.', **kwargs): """ Initialize Predict. Args: data: nibabel data instance Y: vector of training labels subject_id: vector of labels corresponding to each subject algorithm: Algorithm to use for prediction. Must be one of 'svm', 'svr', 'linear', 'logistic', 'lasso', 'ridge', 'ridgeClassifier','randomforest', or 'randomforestClassifier' cv_dict: Type of cross_validation to use. A dictionary of {'type': 'kfolds', 'n_folds': n}, {'type': 'kfolds', 'n_folds': n, 'subject_id': holdout}, or {'type': 'loso', 'subject_id': holdout}, where n = number of folds, and subject = vector of subject ids that corresponds to self.Y mask: binary nibabel mask output_dir: Directory to use for writing all outputs **kwargs: Additional keyword arguments to pass to the prediction algorithm """ self.output_dir = output_dir if mask is not None: if type(mask) is not nib.nifti1.Nifti1Image: raise ValueError("mask is not a nibabel instance") self.mask = mask else: self.mask = nib.load(os.path.join(get_resource_path(),'MNI152_T1_2mm_brain_mask.nii.gz')) if type(data) is list: data=nib.concat_images(data) if not isinstance(data,(nib.nifti1.Nifti1Image, nib.nifti1.Nifti1Pair)): raise ValueError("data is not a nibabel instance") self.nifti_masker = NiftiMasker(mask_img=mask) self.data = self.nifti_masker.fit_transform(data) if type(Y) is list: Y=np.array(Y) if self.data.shape[0]!= len(Y): raise ValueError("Y does not match the correct size of data") self.Y = Y if algorithm is not None: self.set_algorithm(algorithm, **kwargs) if cv_dict is not None: self.cv = set_cv(cv_dict)
def predict(self, algorithm=None, cv_dict=None, plot=True, **kwargs): """ Run prediction Args: algorithm: Algorithm to use for prediction. Must be one of 'svm', 'svr', 'linear', 'logistic', 'lasso', 'ridge', 'ridgeClassifier','randomforest', or 'randomforestClassifier' cv_dict: Type of cross_validation to use. A dictionary of {'type': 'kfolds', 'n_folds': n}, {'type': 'kfolds', 'n_folds': n, 'subject_id': holdout}, or {'type': 'loso', 'subject_id': holdout}, where n = number of folds, and subject = vector of subject ids that corresponds to self.Y plot: Boolean indicating whether or not to create plots. **kwargs: Additional keyword arguments to pass to the prediction algorithm Returns: output: a dictionary of prediction parameters """ # Set algorithm if algorithm is not None: predictor_settings = set_algorithm(algorithm, **kwargs) else: # Use SVR as a default predictor_settings = set_algorithm('svr', **{'kernel':"linear"}) # Initialize output dictionary output = {} output['Y'] = np.array(self.Y).flatten() # Overall Fit for weight map predictor = predictor_settings['predictor'] predictor.fit(self.data, output['Y']) output['yfit_all'] = predictor.predict(self.data) if predictor_settings['prediction_type'] == 'classification': if predictor_settings['algorithm'] not in ['svm','ridgeClassifier','ridgeClassifierCV']: output['prob_all'] = predictor.predict_proba(self.data)[:,1] else: output['dist_from_hyperplane_all'] = predictor.decision_function(self.data) if predictor_settings['algorithm'] == 'svm' and predictor.probability: output['prob_all'] = predictor.predict_proba(self.data)[:,1] output['intercept'] = predictor.intercept_ # Weight map output['weight_map'] = self.empty() if predictor_settings['algorithm'] == 'lassopcr': output['weight_map'].data = np.dot(predictor_settings['_pca'].components_.T,predictor_settings['_lasso'].coef_) elif predictor_settings['algorithm'] == 'pcr': output['weight_map'].data = np.dot(predictor_settings['_pca'].components_.T,predictor_settings['_regress'].coef_) else: output['weight_map'].data = predictor.coef_.squeeze() # Cross-Validation Fit if cv_dict is not None: output['cv'] = set_cv(cv_dict) predictor_cv = predictor_settings['predictor'] output['yfit_xval'] = output['yfit_all'].copy() output['intercept_xval'] = [] output['weight_map_xval'] = deepcopy(output['weight_map']) wt_map_xval = []; if predictor_settings['prediction_type'] == 'classification': if predictor_settings['algorithm'] not in ['svm','ridgeClassifier','ridgeClassifierCV']: output['prob_xval'] = np.zeros(len(self.Y)) else: output['dist_from_hyperplane_xval'] = np.zeros(len(self.Y)) if predictor_settings['algorithm'] == 'svm' and predictor_cv.probability: output['prob_xval'] = np.zeros(len(self.Y)) for train, test in output['cv']: predictor_cv.fit(self.data[train], self.Y.loc[train]) output['yfit_xval'][test] = predictor_cv.predict(self.data[test]) if predictor_settings['prediction_type'] == 'classification': if predictor_settings['algorithm'] not in ['svm','ridgeClassifier','ridgeClassifierCV']: output['prob_xval'][test] = predictor_cv.predict_proba(self.data[test])[:,1] else: output['dist_from_hyperplane_xval'][test] = predictor_cv.decision_function(self.data[test]) if predictor_settings['algorithm'] == 'svm' and predictor_cv.probability: output['prob_xval'][test] = predictor_cv.predict_proba(self.data[test])[:,1] output['intercept_xval'].append(predictor_cv.intercept_) # Weight map if predictor_settings['algorithm'] == 'lassopcr': wt_map_xval.append(np.dot(predictor_settings['_pca'].components_.T,predictor_settings['_lasso'].coef_)) elif predictor_settings['algorithm'] == 'pcr': wt_map_xval.append(np.dot(predictor_settings['_pca'].components_.T,predictor_settings['_regress'].coef_)) else: wt_map_xval.append(predictor_cv.coef_.squeeze()) output['weight_map_xval'].data = np.array(wt_map_xval) # Print Results if predictor_settings['prediction_type'] == 'classification': output['mcr_all'] = np.mean(output['yfit_all']==np.array(self.Y).flatten()) print 'overall accuracy: %.2f' % output['mcr_all'] if cv_dict is not None: output['mcr_xval'] = np.mean(output['yfit_xval']==np.array(self.Y).flatten()) print 'overall CV accuracy: %.2f' % output['mcr_xval'] elif predictor_settings['prediction_type'] == 'prediction': output['rmse_all'] = np.sqrt(np.mean((output['yfit_all']-output['Y'])**2)) output['r_all'] = np.corrcoef(output['Y'],output['yfit_all'])[0,1] print 'overall Root Mean Squared Error: %.2f' % output['rmse_all'] print 'overall Correlation: %.2f' % output['r_all'] if cv_dict is not None: output['rmse_xval'] = np.sqrt(np.mean((output['yfit_xval']-output['Y'])**2)) output['r_xval'] = np.corrcoef(output['Y'],output['yfit_xval'])[0,1] print 'overall CV Root Mean Squared Error: %.2f' % output['rmse_xval'] print 'overall CV Correlation: %.2f' % output['r_xval'] # Plot if plot: if cv_dict is not None: if predictor_settings['prediction_type'] == 'prediction': fig2 = scatterplot(pd.DataFrame({'Y': output['Y'], 'yfit_xval':output['yfit_xval']})) elif predictor_settings['prediction_type'] == 'classification': if predictor_settings['algorithm'] not in ['svm','ridgeClassifier','ridgeClassifierCV']: output['roc'] = Roc(input_values=output['prob_xval'], binary_outcome=output['Y'].astype('bool')) else: output['roc'] = Roc(input_values=output['dist_from_hyperplane_xval'], binary_outcome=output['Y'].astype('bool')) if predictor_settings['algorithm'] == 'svm' and predictor_cv.probability: output['roc'] = Roc(input_values=output['prob_xval'], binary_outcome=output['Y'].astype('bool')) fig2 = output['roc'].plot() # output['roc'].summary() fig1=output['weight_map'].plot() return output
def predict(self, algorithm=None, cv_dict=None, save_images=True, save_output=True, save_plot=True, **kwargs): """ Run prediction Args: algorithm: Algorithm to use for prediction. Must be one of 'svm', 'svr', 'linear', 'logistic', 'lasso', 'ridge', 'ridgeClassifier','randomforest', or 'randomforestClassifier' cv_dict: Type of cross_validation to use. A dictionary of {'type': 'kfolds', 'n_folds': n}, {'type': 'kfolds', 'n_folds': n, 'subject_id': holdout}, or {'type': 'loso'', 'subject_id': holdout}, where n = number of folds, and subject = vector of subject ids that corresponds to self.Y save_images: Boolean indicating whether or not to save images to file. save_output: Boolean indicating whether or not to save prediction output to file. save_plot: Boolean indicating whether or not to create plots. **kwargs: Additional keyword arguments to pass to the prediction algorithm """ if not hasattr(self, 'algorithm'): if algorithm is not None: self.set_algorithm(algorithm, **kwargs) else: raise ValueError( "Make sure you specify an 'algorithm' to use.") # Overall Fit for weight map predictor = self.predictor predictor.fit(self.data, self.Y) self.yfit_all = predictor.predict(self.data) if self.prediction_type == 'classification': if self.algorithm not in [ 'svm', 'ridgeClassifier', 'ridgeClassifierCV' ]: self.prob_all = predictor.predict_proba(self.data) else: dist_from_hyperplane_all = predictor.decision_function( self.data) if self.algorithm == 'svm' and self.predictor.probability: self.prob_all = predictor.predict_proba(self.data) # Cross-Validation Fit if cv_dict is not None: self.cv = set_cv(cv_dict) dist_from_hyperplane_xval = None if hasattr(self, 'cv'): predictor_cv = self.predictor self.yfit_xval = self.yfit_all.copy() if self.prediction_type == 'classification': if self.algorithm not in [ 'svm', 'ridgeClassifier', 'ridgeClassifierCV' ]: self.prob_xval = np.zeros(len(self.Y)) else: dist_from_hyperplane_xval = np.zeros(len(self.Y)) if self.algorithm == 'svm' and self.predictor.probability: self.prob_xval = np.zeros(len(self.Y)) for train, test in self.cv: predictor_cv.fit(self.data[train], self.Y[train]) self.yfit_xval[test] = predictor_cv.predict(self.data[test]) if self.prediction_type == 'classification': if self.algorithm not in [ 'svm', 'ridgeClassifier', 'ridgeClassifierCV' ]: self.prob_xval[test] = predictor_cv.predict_proba( self.data[test]) else: dist_from_hyperplane_xval[ test] = predictor_cv.decision_function( self.data[test]) if self.algorithm == 'svm' and self.predictor.probability: self.prob_xval[test] = predictor_cv.predict_proba( self.data[test]) # Save Outputs if save_images: self._save_image(predictor) if save_output: self._save_stats_output(dist_from_hyperplane_xval) if save_plot: if hasattr(self, 'cv'): self._save_plot(predictor_cv) else: self._save_plot(predictor) # Print Results if self.prediction_type == 'classification': self.mcr_all = np.mean(self.yfit_all == self.Y) print 'overall accuracy: %.2f' % self.mcr_all if hasattr(self, 'cv'): self.mcr_xval = np.mean(self.yfit_xval == self.Y) print 'overall CV accuracy: %.2f' % self.mcr_xval elif self.prediction_type == 'prediction': self.rmse_all = np.sqrt(np.mean((self.yfit_all - self.Y)**2)) self.r_all = np.corrcoef(self.Y, self.yfit_all)[0, 1] print 'overall Root Mean Squared Error: %.2f' % self.rmse_all print 'overall Correlation: %.2f' % self.r_all if hasattr(self, 'cv'): self.rmse_xval = np.sqrt(np.mean((self.yfit_xval - self.Y)**2)) self.r_xval = np.corrcoef(self.Y, self.yfit_xval)[0, 1] print 'overall CV Root Mean Squared Error: %.2f' % self.rmse_xval print 'overall CV Correlation: %.2f' % self.r_xval
def predict(self, algorithm=None, cv_dict=None, save_images=True, save_output=True, save_plot=True, **kwargs): """ Run prediction Args: algorithm: Algorithm to use for prediction. Must be one of 'svm', 'svr', 'linear', 'logistic', 'lasso', 'ridge', 'ridgeClassifier','randomforest', or 'randomforestClassifier' cv_dict: Type of cross_validation to use. A dictionary of {'type': 'kfolds', 'n_folds': n}, {'type': 'kfolds', 'n_folds': n, 'subject_id': holdout}, or {'type': 'loso'', 'subject_id': holdout}, where n = number of folds, and subject = vector of subject ids that corresponds to self.Y save_images: Boolean indicating whether or not to save images to file. save_output: Boolean indicating whether or not to save prediction output to file. save_plot: Boolean indicating whether or not to create plots. **kwargs: Additional keyword arguments to pass to the prediction algorithm """ if not hasattr(self,'algorithm'): if algorithm is not None: self.set_algorithm(algorithm, **kwargs) else: raise ValueError("Make sure you specify an 'algorithm' to use.") # Overall Fit for weight map predictor = self.predictor predictor.fit(self.data, self.Y) self.yfit_all = predictor.predict(self.data) if self.prediction_type == 'classification': if self.algorithm not in ['svm','ridgeClassifier','ridgeClassifierCV']: self.prob_all = predictor.predict_proba(self.data) else: dist_from_hyperplane_all = predictor.decision_function(self.data) if self.algorithm == 'svm' and self.predictor.probability: self.prob_all = predictor.predict_proba(self.data) # Cross-Validation Fit if cv_dict is not None: self.cv = set_cv(cv_dict) dist_from_hyperplane_xval = None if hasattr(self, 'cv'): predictor_cv = self.predictor self.yfit_xval = self.yfit_all.copy() if self.prediction_type == 'classification': if self.algorithm not in ['svm','ridgeClassifier','ridgeClassifierCV']: self.prob_xval = np.zeros(len(self.Y)) else: dist_from_hyperplane_xval = np.zeros(len(self.Y)) if self.algorithm == 'svm' and self.predictor.probability: self.prob_xval = np.zeros(len(self.Y)) for train, test in self.cv: predictor_cv.fit(self.data[train], self.Y[train]) self.yfit_xval[test] = predictor_cv.predict(self.data[test]) if self.prediction_type == 'classification': if self.algorithm not in ['svm','ridgeClassifier','ridgeClassifierCV']: self.prob_xval[test] = predictor_cv.predict_proba(self.data[test]) else: dist_from_hyperplane_xval[test] = predictor_cv.decision_function(self.data[test]) if self.algorithm == 'svm' and self.predictor.probability: self.prob_xval[test] = predictor_cv.predict_proba(self.data[test]) # Save Outputs if save_images: self._save_image(predictor) if save_output: self._save_stats_output(dist_from_hyperplane_xval) if save_plot: if hasattr(self, 'cv'): self._save_plot(predictor_cv) else: self._save_plot(predictor) # Print Results if self.prediction_type == 'classification': self.mcr_all = np.mean(self.yfit_all==self.Y) print 'overall accuracy: %.2f' % self.mcr_all if hasattr(self,'cv'): self.mcr_xval = np.mean(self.yfit_xval==self.Y) print 'overall CV accuracy: %.2f' % self.mcr_xval elif self.prediction_type == 'prediction': self.rmse_all = np.sqrt(np.mean((self.yfit_all-self.Y)**2)) self.r_all = np.corrcoef(self.Y,self.yfit_all)[0,1] print 'overall Root Mean Squared Error: %.2f' % self.rmse_all print 'overall Correlation: %.2f' % self.r_all if hasattr(self,'cv'): self.rmse_xval = np.sqrt(np.mean((self.yfit_xval-self.Y)**2)) self.r_xval = np.corrcoef(self.Y,self.yfit_xval)[0,1] print 'overall CV Root Mean Squared Error: %.2f' % self.rmse_xval print 'overall CV Correlation: %.2f' % self.r_xval