def _validate(self, simpleworc, *args, **kwargs): if simpleworc._add_evaluation: if not simpleworc._images_train: if hasattr(simpleworc, 'images_train'): if not simpleworc.images_train: raise ae.WORCValueError( f'You have added the evaluation pipeline, but have not provided images, which is currently required. We will work on this option in a future release.' ) else: raise ae.WORCValueError( f'You have added the evaluation pipeline, but have not provided images, which is currently required. We will work on this option in a future release.' )
def _validate(self, simpleworc): errstr = None try: if simpleworc._labels_file_train: labels, subjects, _ = load_label_csv( simpleworc._labels_file_train) elif simpleworc.labels_file_train: labels, subjects, _ = load_label_csv( simpleworc.labels_file_train) else: raise ae.WORCValueError( f'No labels, use SimpleWorc().labels_from_this_file(**) to add labels.' ) except ae.WORCAssertionError as wae: if 'First column should be patient ID' in str(wae): # TODO: print wrong column name and file so that it is clear what needs to be replaced in which file raise ae.WORCValueError( f'First column in the file given to SimpleWORC().labels_from_this_file(**) needs to be named Patient.' ) # check labels for substrings of eachother labels_matches = self._get_all_substrings_for_array(labels) if labels_matches: # if not empty we have a problem errstr = "Found label(s) that are a substring of other label(s). This is currently not allowed in WORC. Rename the following label(s):\n" for label, matches in labels_matches.items(): for match in matches: errstr += f"{label} is a substring of {match}\n" # check subject names for substrings of eachother subjects_matches = self._get_all_substrings_for_array(subjects) if subjects_matches: # if not empty we have a problem errstr = "Found subject(s) that are a substring of other subject(s). This is currently not allowed in WORC. Rename the following subject(s):\n" for subject, matches in subjects_matches.items(): for match in matches: errstr += f"{subject} is a substring of {match}\n" if errstr: raise ae.WORCValueError(errstr)
def load_labels(label_file, label_type): """Loads the label data from a label file Args: label_file (string): The path to the label file label_type (list): List of the names of the labels to load Returns: dict: A dict containing 'patient_IDs', 'label' and 'label_type' """ if not os.path.exists(label_file): raise ae.WORCKeyError(f'File {label_file} does not exist!') _, extension = os.path.splitext(label_file) if extension == '.txt': label_names, patient_IDs, label_status = load_label_txt( label_file) elif extension == '.csv': label_names, patient_IDs, label_status = load_label_csv( label_file) elif extension == '.ini': label_names, patient_IDs, label_status = load_label_XNAT( label_file) else: raise ae.WORCIOError(extension + ' is not valid label file extension.') print("Label names to extract: " + str(label_type)) labels = list() for i_label in label_type: label_index = np.where(label_names == i_label)[0] if label_index.size == 0: raise ae.WORCValueError('Could not find label: ' + str(i_label)) else: labels.append(label_status[:, label_index]) label_data = dict() label_data['patient_IDs'] = patient_IDs label_data['label'] = labels label_data['label_name'] = label_type return label_data
def _validate(self, simpleworc, *args, **kwargs): if not simpleworc._labels_file_train: if hasattr(simpleworc, 'labels_file_train'): if not simpleworc.labels_file_train: raise ae.WORCValueError( f'No labels, use SimpleWorc().labels_from_this_file(**) to add labels.' ) else: raise ae.WORCValueError( f'No labels, use SimpleWorc().labels_from_this_file(**) to add labels.' ) if not simpleworc._label_names: if not simpleworc.label_names: raise ae.WORCValueError( f'No label(s) to predict selected. Use SimpleWorc().predict_labels(**) to select labels.' ) if not simpleworc._method: raise ae.WORCValueError( f'No method selected. Call function binary_classification(**) or regression(**) or survival(**) on SimpleWorc().' ) if simpleworc._images_train: for num, (ims, segs) in enumerate( zip(simpleworc._images_train, simpleworc._segmentations_train)): if ims.keys() != segs.keys(): raise ae.WORCValueError( f'Subjects in images_train and segmentations_train are not the same for modality {num}.' ) if hasattr(simpleworc, 'images_train'): if simpleworc.images_train: for num, (ims, segs) in enumerate( zip(simpleworc.images_train, simpleworc.segmentations_train)): if ims.keys() != segs.keys(): raise ae.WORCValueError( f'Subjects in images_train and segmentations_train are not the same for modality {num}.' )
def _validate(self, simpleworc, *args, **kwargs): if simpleworc._num_subjects < min_subjects: raise ae.WORCValueError( f'Less than {min_subjects} subjects (you have {simpleworc._num_subjects}) will probably make WORC crash due to a split in the test/validation set having only one subject. Use at least {min_subjects} subjects or more.' )
def random_split_cross_validation(image_features, feature_labels, classes, patient_ids, n_iterations, param_grid, config, modus, test_size, start=0, save_data=None, tempsave=False, tempfolder=None, fixedsplits=None, fixed_seed=False, use_fastr=None, fastr_plugin=None): """Cross-validation in which data is randomly split in each iteration. Due to options of doing single-label and multi-label classification, stratified splitting, and regression, we use a manual loop instead of the default scikit-learn object. Parameters ------------ Returns ------------ """ print('Starting random-split cross-validation.') logging.debug('Starting random-split cross-validation.') if save_data is None: # Start from zero, thus empty list of previos data save_data = list() for i in range(start, n_iterations): print(('Cross-validation iteration {} / {} .').format( str(i + 1), str(n_iterations))) logging.debug(('Cross-validation iteration {} / {} .').format( str(i + 1), str(n_iterations))) timestamp = strftime("%Y-%m-%d %H:%M:%S", gmtime()) print(f'\t Time: {timestamp}.') logging.debug(f'\t Time: {timestamp}.') if fixed_seed: random_seed = i**2 else: random_seed = np.random.randint(5000) t = time.time() # Split into test and training set, where the percentage of each # label is maintained if any(clf in regressors for clf in param_grid['classifiers']): # We cannot do a stratified shuffle split with regression stratify = None else: if modus == 'singlelabel': classes_temp = stratify = classes.ravel() elif modus == 'multilabel': # Create a stratification object from the labels # Label = 0 means no label equals one # Other label numbers refer to the label name that is 1 stratify = list() for pnum in range(0, len(classes[0])): plabel = 0 for lnum, slabel in enumerate(classes): if slabel[pnum] == 1: plabel = lnum + 1 stratify.append(plabel) # Sklearn multiclass requires rows to be objects/patients classes_temp = np.zeros((classes.shape[1], classes.shape[0])) for n_patient in range(0, classes.shape[1]): for n_label in range(0, classes.shape[0]): classes_temp[n_patient, n_label] = classes[n_label, n_patient] else: raise ae.WORCKeyError('{} is not a valid modus!').format(modus) if fixedsplits is None: # Use Random Split. Split per patient, not per sample unique_patient_ids, unique_indices =\ np.unique(np.asarray(patient_ids), return_index=True) if any(clf in regressors for clf in param_grid['classifiers']): unique_stratify = None else: unique_stratify = [stratify[i] for i in unique_indices] try: unique_PID_train, indices_PID_test\ = train_test_split(unique_patient_ids, test_size=test_size, random_state=random_seed, stratify=unique_stratify) except ValueError as e: e = str(e) + ' Increase the size of your validation set.' raise ae.WORCValueError(e) # Check for all ids if they are in test or training indices_train = list() indices_test = list() patient_ID_train = list() patient_ID_test = list() for num, pid in enumerate(patient_ids): if pid in unique_PID_train: indices_train.append(num) # Make sure we get a unique ID if pid in patient_ID_train: n = 1 while str(pid + '_' + str(n)) in patient_ID_train: n += 1 pid = str(pid + '_' + str(n)) patient_ID_train.append(pid) else: indices_test.append(num) # Make sure we get a unique ID if pid in patient_ID_test: n = 1 while str(pid + '_' + str(n)) in patient_ID_test: n += 1 pid = str(pid + '_' + str(n)) patient_ID_test.append(pid) # Split features and labels accordingly X_train = [image_features[i] for i in indices_train] X_test = [image_features[i] for i in indices_test] if modus == 'singlelabel': Y_train = classes_temp[indices_train] Y_test = classes_temp[indices_test] elif modus == 'multilabel': Y_train = classes_temp[indices_train, :] Y_test = classes_temp[indices_test, :] else: raise ae.WORCKeyError('{} is not a valid modus!').format(modus) else: # Use pre defined splits train = fixedsplits[str(i) + '_train'].values test = fixedsplits[str(i) + '_test'].values # Convert the numbers to the correct indices ind_train = list() for j in train: success = False for num, p in enumerate(patient_ids): if j == p: ind_train.append(num) success = True if not success: raise ae.WORCIOError("Patient " + str(j).zfill(3) + " is not included!") ind_test = list() for j in test: success = False for num, p in enumerate(patient_ids): if j == p: ind_test.append(num) success = True if not success: raise ae.WORCIOError("Patient " + str(j).zfill(3) + " is not included!") X_train = [image_features[i] for i in ind_train] X_test = [image_features[i] for i in ind_test] patient_ID_train = patient_ids[ind_train] patient_ID_test = patient_ids[ind_test] if modus == 'singlelabel': Y_train = classes_temp[ind_train] Y_test = classes_temp[ind_test] elif modus == 'multilabel': Y_train = classes_temp[ind_train, :] Y_test = classes_temp[ind_test, :] else: raise ae.WORCKeyError('{} is not a valid modus!').format(modus) # Find best hyperparameters and construct classifier config['HyperOptimization']['use_fastr'] = use_fastr config['HyperOptimization']['fastr_plugin'] = fastr_plugin n_cores = config['General']['Joblib_ncores'] trained_classifier = random_search_parameters( features=X_train, labels=Y_train, param_grid=param_grid, n_cores=n_cores, random_seed=random_seed, **config['HyperOptimization']) # We only want to save the feature values and one label array X_train = [x[0] for x in X_train] X_test = [x[0] for x in X_test] temp_save_data = (trained_classifier, X_train, X_test, Y_train, Y_test, patient_ID_train, patient_ID_test, random_seed) save_data.append(temp_save_data) # Create a temporary save if tempsave: panda_labels = [ 'trained_classifier', 'X_train', 'X_test', 'Y_train', 'Y_test', 'config', 'patient_ID_train', 'patient_ID_test', 'random_seed', 'feature_labels' ] panda_data_temp =\ pd.Series([trained_classifier, X_train, X_test, Y_train, Y_test, config, patient_ID_train, patient_ID_test, random_seed, feature_labels], index=panda_labels, name='Constructed crossvalidation') panda_data = pd.DataFrame(panda_data_temp) n = 0 filename = os.path.join(tempfolder, 'tempsave_' + str(i) + '.hdf5') while os.path.exists(filename): n += 1 filename = os.path.join(tempfolder, 'tempsave_' + str(i + n) + '.hdf5') panda_data.to_hdf(filename, 'EstimatorData') del panda_data, panda_data_temp # Print elapsed time elapsed = int((time.time() - t) / 60.0) print(f'\t Fitting took {elapsed} minutes.') logging.debug(f'\t Fitting took {elapsed} minutes.') return save_data
def fit_and_score(X, y, scoring, train, test, parameters, fit_params=None, return_train_score=True, return_n_test_samples=True, return_times=True, return_parameters=False, return_estimator=False, error_score='raise', verbose=True, return_all=True): """Fit an estimator to a dataset and score the performance. The following methods can currently be applied as preprocessing before fitting, in this order: 0. Apply OneHotEncoder 1. Apply feature imputation 2. Select features based on feature type group (e.g. shape, histogram). 3. Scale features with e.g. z-scoring. 4. Apply feature selection based on variance of feature among patients. 5. Univariate statistical testing (e.g. t-test, Wilcoxon). 6. Use Relief feature selection. 7. Select features based on a fit with a LASSO model. 8. Select features using PCA. 9. Resampling 10. If a SingleLabel classifier is used for a MultiLabel problem, a OneVsRestClassifier is employed around it. All of the steps are optional. Parameters ---------- estimator: sklearn estimator, mandatory Unfitted estimator which will be fit. X: array, mandatory Array containingfor each object (rows) the feature values (1st Column) and the associated feature label (2nd Column). y: list(?), mandatory List containing the labels of the objects. scorer: sklearn scorer, mandatory Function used as optimization criterion for the hyperparamater optimization. train: list, mandatory Indices of the objects to be used as training set. test: list, mandatory Indices of the objects to be used as testing set. parameters: dictionary, mandatory Contains the settings used for the above preprocessing functions and the fitting. TODO: Create a default object and show the fields. fit_params:dictionary, default None Parameters supplied to the estimator for fitting. See the SKlearn site for the parameters of the estimators. return_train_score: boolean, default True Save the training score to the final SearchCV object. return_n_test_samples: boolean, default True Save the number of times each sample was used in the test set to the final SearchCV object. return_times: boolean, default True Save the time spend for each fit to the final SearchCV object. return_parameters: boolean, default True Return the parameters used in the final fit to the final SearchCV object. return_estimator : bool, default=False Whether to return the fitted estimator. error_score: numeric or "raise" by default Value to assign to the score if an error occurs in estimator fitting. If set to "raise", the error is raised. If a numeric value is given, FitFailedWarning is raised. This parameter does not affect the refit step, which will always raise the error. verbose: boolean, default=True If True, print intermediate progress to command line. Warnings are always printed. return_all: boolean, default=True If False, only the ret object containing the performance will be returned. If True, the ret object plus all fitted objects will be returned. Returns ---------- Depending on the return_all input parameter, either only ret or all objects below are returned. ret: list Contains optionally the train_scores and the test_scores, fit_time, score_time, parameters_est and parameters_all. GroupSel: WORC GroupSel Object Either None if the groupwise feature selection is not used, or the fitted object. VarSel: WORC VarSel Object Either None if the variance threshold feature selection is not used, or the fitted object. SelectModel: WORC SelectModel Object Either None if the feature selection based on a fittd model is not used, or the fitted object. feature_labels: list Labels of the features. Only one list is returned, not one per feature object, as we assume all samples have the same feature names. scaler: scaler object Either None if feature scaling is not used, or the fitted object. encoder: WORC Encoder Object Either None if feature OneHotEncoding is not used, or the fitted object. imputer: WORC Imputater Object Either None if feature imputation is not used, or the fitted object. pca: WORC PCA Object Either None if PCA based feature selection is not used, or the fitted object. StatisticalSel: WORC StatisticalSel Object Either None if the statistical test feature selection is not used, or the fitted object. ReliefSel: WORC ReliefSel Object Either None if the RELIEF feature selection is not used, or the fitted object. Sampler: WORC ObjectSampler Object Either None if no resampling is used, or an ObjectSampler object """ # We copy the parameter object so we can alter it and keep the original if verbose: print("\n") print('#######################################') print('Starting fit and score of new workflow.') para_estimator = parameters.copy() estimator = cc.construct_classifier(para_estimator) # Check the scorer scorers, __ = check_multimetric_scoring(estimator, scoring=scoring) para_estimator = delete_cc_para(para_estimator) # Get random seed from parameters random_seed = para_estimator['random_seed'] del para_estimator['random_seed'] # X is a tuple: split in two arrays feature_values = np.asarray([x[0] for x in X]) feature_labels = np.asarray([x[1] for x in X]) # Split in train and testing X_train, y_train = _safe_split(estimator, feature_values, y, train) X_test, y_test = _safe_split(estimator, feature_values, y, test, train) train = np.arange(0, len(y_train)) test = np.arange(len(y_train), len(y_train) + len(y_test)) # Set some defaults for if a part fails and we return a dummy fit_time = np.inf score_time = np.inf Sampler = None encoder = None imputer = None scaler = None GroupSel = None SelectModel = None pca = None StatisticalSel = None VarSel = None ReliefSel = None if isinstance(scorers, dict): test_scores = {name: np.nan for name in scorers} if return_train_score: train_scores = test_scores.copy() else: test_scores = error_score if return_train_score: train_scores = error_score # Initiate dummy return object for when fit and scoring failes: sklearn defaults ret = [train_scores, test_scores] if return_train_score else [test_scores] if return_n_test_samples: ret.append(_num_samples(X_test)) if return_times: ret.extend([fit_time, score_time]) if return_parameters: ret.append(para_estimator) if return_estimator: ret.append(estimator) # Additional to sklearn defaults: return all parameters ret.append(parameters) # ------------------------------------------------------------------------ # OneHotEncoder if 'OneHotEncoding' in para_estimator.keys(): if para_estimator['OneHotEncoding'] == 'True': if verbose: print(f'Applying OneHotEncoding, will ignore unknowns.') feature_labels_tofit =\ para_estimator['OneHotEncoding_feature_labels_tofit'] encoder =\ OneHotEncoderWrapper(handle_unknown='ignore', feature_labels_tofit=feature_labels_tofit, verbose=verbose) encoder.fit(X_train, feature_labels) if encoder.encoder is not None: # Encoder is fitted feature_labels = encoder.encoder.encoded_feature_labels X_train = encoder.transform(X_train) X_test = encoder.transform(X_test) del para_estimator['OneHotEncoding'] del para_estimator['OneHotEncoding_feature_labels_tofit'] # Delete the object if we do not need to return it if not return_all: del encoder # ------------------------------------------------------------------------ # Feature imputation if 'Imputation' in para_estimator.keys(): if para_estimator['Imputation'] == 'True': imp_type = para_estimator['ImputationMethod'] if verbose: print(f'Imputing NaN with {imp_type}.') imp_nn = para_estimator['ImputationNeighbours'] imputer = Imputer(missing_values=np.nan, strategy=imp_type, n_neighbors=imp_nn) imputer.fit(X_train) original_shape = X_train.shape X_train = imputer.transform(X_train) imputed_shape = X_train.shape X_test = imputer.transform(X_test) if original_shape != imputed_shape: removed_features = original_shape[1] - imputed_shape[1] raise ae.WORCValueError( f'Several features ({removed_features}) were np.NaN for all objects. Hence, imputation was not possible. Either make sure this is correct and turn of imputation, or correct the feature.' ) del para_estimator['Imputation'] del para_estimator['ImputationMethod'] del para_estimator['ImputationNeighbours'] # Delete the object if we do not need to return it if not return_all: del imputer # Remove any NaN feature values if these are still left after imputation X_train = replacenan(X_train, verbose=verbose, feature_labels=feature_labels[0]) X_test = replacenan(X_test, verbose=verbose, feature_labels=feature_labels[0]) # ------------------------------------------------------------------------ # Groupwise feature selection if 'SelectGroups' in para_estimator: if verbose: print("Selecting groups of features.") del para_estimator['SelectGroups'] # TODO: more elegant way to solve this feature_groups = [ 'shape_features', 'histogram_features', 'orientation_features', 'texture_gabor_features', 'texture_glcm_features', 'texture_gldm_features', 'texture_glcmms_features', 'texture_glrlm_features', 'texture_glszm_features', 'texture_gldzm_features', 'texture_ngtdm_features', 'texture_ngldm_features', 'texture_lbp_features', 'dicom_features', 'semantic_features', 'coliage_features', 'vessel_features', 'phase_features', 'fractal_features', 'location_features', 'rgrd_features', 'original_features', 'wavelet_features', 'log_features' ] # First take out the toolbox selection, which is a list toolboxes = para_estimator['toolbox'] del para_estimator['toolbox'] # Check per feature group if the parameter is present parameters_featsel = dict() for group in feature_groups: if group not in para_estimator: # Default: do use the group, except for texture features if group == 'texture_features': value = 'False' else: value = 'True' else: value = para_estimator[group] del para_estimator[group] parameters_featsel[group] = value # Fit groupwise feature selection object GroupSel = SelectGroups(parameters=parameters_featsel, toolboxes=toolboxes) GroupSel.fit(feature_labels[0]) if verbose: print("\t Original Length: " + str(len(X_train[0]))) # Transform all objectd accordingly X_train = GroupSel.transform(X_train) X_test = GroupSel.transform(X_test) if verbose: print("\t New Length: " + str(len(X_train[0]))) feature_labels = GroupSel.transform(feature_labels) # Delete the object if we do not need to return it if not return_all: del GroupSel # Check whether there are any features left if len(X_train[0]) == 0: # TODO: Make a specific WORC exception for this warning. if verbose: print( '[WARNING]: No features are selected! Probably all feature groups were set to False. Parameters:' ) print(parameters) # Delete the non-used fields para_estimator = delete_nonestimator_parameters(para_estimator) if return_all: return ret, GroupSel, VarSel, SelectModel, feature_labels[ 0], scaler, encoder, imputer, pca, StatisticalSel, ReliefSel, Sampler else: return ret # ------------------------------------------------------------------------ # Feature scaling if verbose and para_estimator['FeatureScaling'] != 'None': print(f'Fitting scaler and transforming features, method ' + f'{para_estimator["FeatureScaling"]}.') scaling_method = para_estimator['FeatureScaling'] if scaling_method == 'None': scaler = None else: skip_features = para_estimator['FeatureScaling_skip_features'] n_skip_feat = len([ i for i in feature_labels[0] if any(e in i for e in skip_features) ]) if n_skip_feat == len(X_train[0]): # Don't need to scale any features if verbose: print( '[WORC Warning] Skipping scaling, only skip features selected.' ) scaler = None else: scaler = WORCScaler(method=scaling_method, skip_features=skip_features) scaler.fit(X_train, feature_labels[0]) if scaler is not None: X_train = scaler.transform(X_train) X_test = scaler.transform(X_test) del para_estimator['FeatureScaling'] # Delete the object if we do not need to return it if not return_all: del scaler # -------------------------------------------------------------------- # Feature selection based on variance if para_estimator['Featsel_Variance'] == 'True': if verbose: print("Selecting features based on variance.") if verbose: print("\t Original Length: " + str(len(X_train[0]))) try: X_train, feature_labels, VarSel =\ selfeat_variance(X_train, feature_labels) X_test = VarSel.transform(X_test) except ValueError: if verbose: print( '[WARNING]: No features meet the selected Variance threshold! Skipping selection.' ) if verbose: print("\t New Length: " + str(len(X_train[0]))) del para_estimator['Featsel_Variance'] # Delete the object if we do not need to return it if not return_all: del VarSel # Check whether there are any features left if len(X_train[0]) == 0: # TODO: Make a specific WORC exception for this warning. if verbose: print( '[WARNING]: No features are selected! Probably your features have too little variance. Parameters:' ) print(parameters) para_estimator = delete_nonestimator_parameters(para_estimator) if return_all: return ret, GroupSel, VarSel, SelectModel, feature_labels[ 0], scaler, encoder, imputer, pca, StatisticalSel, ReliefSel, Sampler else: return ret # -------------------------------------------------------------------- # Relief feature selection, possibly multi classself. # Needs to be done after scaling! # para_estimator['ReliefUse'] = 'True' if 'ReliefUse' in para_estimator.keys(): if para_estimator['ReliefUse'] == 'True': if verbose: print("Selecting features using relief.") # Get parameters from para_estimator n_neighbours = para_estimator['ReliefNN'] sample_size = para_estimator['ReliefSampleSize'] distance_p = para_estimator['ReliefDistanceP'] numf = para_estimator['ReliefNumFeatures'] # Fit RELIEF object ReliefSel = SelectMulticlassRelief(n_neighbours=n_neighbours, sample_size=sample_size, distance_p=distance_p, numf=numf, random_state=random_seed) ReliefSel.fit(X_train, y) if verbose: print("\t Original Length: " + str(len(X_train[0]))) # Transform all objects accordingly X_train = ReliefSel.transform(X_train) X_test = ReliefSel.transform(X_test) if verbose: print("\t New Length: " + str(len(X_train[0]))) feature_labels = ReliefSel.transform(feature_labels) del para_estimator['ReliefUse'] del para_estimator['ReliefNN'] del para_estimator['ReliefSampleSize'] del para_estimator['ReliefDistanceP'] del para_estimator['ReliefNumFeatures'] # Delete the object if we do not need to return it if not return_all: del ReliefSel # Check whether there are any features left if len(X_train[0]) == 0: # TODO: Make a specific WORC exception for this warning. if verbose: print( '[WARNING]: No features are selected! Probably RELIEF could not properly select features. Parameters:' ) print(parameters) para_estimator = delete_nonestimator_parameters(para_estimator) if return_all: return ret, GroupSel, VarSel, SelectModel, feature_labels[ 0], scaler, encoder, imputer, pca, StatisticalSel, ReliefSel, Sampler else: return ret # ------------------------------------------------------------------------ # Perform feature selection using a model para_estimator['SelectFromModel'] = 'True' if 'SelectFromModel' in para_estimator.keys( ) and para_estimator['SelectFromModel'] == 'True': model = para_estimator['SelectFromModel_estimator'] if verbose: print(f"Selecting features using model {model}.") if model == 'Lasso': # Use lasso model for feature selection alpha = para_estimator['SelectFromModel_lasso_alpha'] selectestimator = Lasso(alpha=alpha) elif model == 'LR': # Use logistic regression model for feature selection selectestimator = LogisticRegression() elif model == 'RF': # Use random forest model for feature selection n_estimators = para_estimator['SelectFromModel_n_trees'] selectestimator = RandomForestClassifier(n_estimators=n_estimators) else: raise ae.WORCKeyError( f'Model {model} is not known for SelectFromModel. Use Lasso, LR, or RF.' ) # Prefit model selectestimator.fit(X_train, y_train) # Use fit to select optimal features SelectModel = SelectFromModel(selectestimator, prefit=True) if verbose: print("\t Original Length: " + str(len(X_train[0]))) X_train_temp = SelectModel.transform(X_train) if len(X_train_temp[0]) == 0: if verbose: print( '[WORC WARNING]: No features are selected! Probably your data is too noisy or the selection too strict. Skipping SelectFromModel.' ) SelectModel = None parameters['SelectFromModel'] = 'False' else: X_train = SelectModel.transform(X_train) X_test = SelectModel.transform(X_test) feature_labels = SelectModel.transform(feature_labels) if verbose: print("\t New Length: " + str(len(X_train[0]))) if 'SelectFromModel' in para_estimator.keys(): del para_estimator['SelectFromModel'] del para_estimator['SelectFromModel_lasso_alpha'] del para_estimator['SelectFromModel_estimator'] del para_estimator['SelectFromModel_n_trees'] # Delete the object if we do not need to return it if not return_all: del SelectModel # Check whether there are any features left if len(X_train[0]) == 0: # TODO: Make a specific WORC exception for this warning. if verbose: print( '[WARNING]: No features are selected! Probably SelectFromModel could not properly select features. Parameters:' ) print(parameters) para_estimator = delete_nonestimator_parameters(para_estimator) if return_all: return ret, GroupSel, VarSel, SelectModel, feature_labels[ 0], scaler, encoder, imputer, pca, StatisticalSel, ReliefSel, Sampler else: return ret # ---------------------------------------------------------------- # PCA dimensionality reduction # Principle Component Analysis if 'UsePCA' in para_estimator.keys( ) and para_estimator['UsePCA'] == 'True': if verbose: print('Fitting PCA') print("\t Original Length: " + str(len(X_train[0]))) if para_estimator['PCAType'] == '95variance': # Select first X components that describe 95 percent of the explained variance pca = PCA(n_components=None, random_state=random_seed) try: pca.fit(X_train) except (ValueError, LinAlgError) as e: if verbose: print( f'[WARNING]: skipping this setting due to PCA Error: {e}.' ) if return_all: return ret, GroupSel, VarSel, SelectModel, feature_labels[ 0], scaler, encoder, imputer, pca, StatisticalSel, ReliefSel, Sampler else: return ret evariance = pca.explained_variance_ratio_ num = 0 sum = 0 while sum < 0.95: sum += evariance[num] num += 1 # Make a PCA based on the determined amound of components pca = PCA(n_components=num, random_state=random_seed) try: pca.fit(X_train) except (ValueError, LinAlgError) as e: if verbose: print( f'[WARNING]: skipping this setting due to PCA Error: {e}.' ) if return_all: return ret, GroupSel, VarSel, SelectModel, feature_labels[ 0], scaler, encoder, imputer, pca, StatisticalSel, ReliefSel, Sampler else: return ret X_train = pca.transform(X_train) X_test = pca.transform(X_test) else: # Assume a fixed number of components: cannot be larger than # n_samples n_components = min(len(X_train), int(para_estimator['PCAType'])) if n_components >= len(X_train[0]): if verbose: print( f"[WORC WARNING] PCA n_components ({n_components})> n_features ({len(X_train[0])}): skipping PCA." ) else: pca = PCA(n_components=n_components, random_state=random_seed) pca.fit(X_train) X_train = pca.transform(X_train) X_test = pca.transform(X_test) if verbose: print("\t New Length: " + str(len(X_train[0]))) # Delete the object if we do not need to return it if not return_all: del pca if 'UsePCA' in para_estimator.keys(): del para_estimator['UsePCA'] del para_estimator['PCAType'] # -------------------------------------------------------------------- # Feature selection based on a statistical test if 'StatisticalTestUse' in para_estimator.keys(): if para_estimator['StatisticalTestUse'] == 'True': metric = para_estimator['StatisticalTestMetric'] threshold = para_estimator['StatisticalTestThreshold'] if verbose: print( f"Selecting features based on statistical test. Method {metric}, threshold {round(threshold, 5)}." ) print("\t Original Length: " + str(len(X_train[0]))) StatisticalSel = StatisticalTestThreshold(metric=metric, threshold=threshold) StatisticalSel.fit(X_train, y) X_train_temp = StatisticalSel.transform(X_train) if len(X_train_temp[0]) == 0: if verbose: print( '[WORC WARNING]: No features are selected! Probably your statistical test feature selection was too strict. Skipping thresholding.' ) StatisticalSel = None parameters['StatisticalTestUse'] = 'False' else: X_train = StatisticalSel.transform(X_train) X_test = StatisticalSel.transform(X_test) feature_labels = StatisticalSel.transform(feature_labels) if verbose: print("\t New Length: " + str(len(X_train[0]))) del para_estimator['StatisticalTestUse'] del para_estimator['StatisticalTestMetric'] del para_estimator['StatisticalTestThreshold'] # Delete the object if we do not need to return it if not return_all: del StatisticalSel # ------------------------------------------------------------------------ # Use object resampling if 'Resampling_Use' in para_estimator.keys(): if para_estimator['Resampling_Use'] == 'True': # Determine our starting balance pos_initial = int(np.sum(y_train)) neg_initial = int(len(y_train) - pos_initial) len_in = len(y_train) # Fit ObjectSampler and transform dataset # NOTE: need to save random state for this one as well! Sampler =\ ObjectSampler(method=para_estimator['Resampling_Method'], sampling_strategy=para_estimator['Resampling_sampling_strategy'], n_jobs=para_estimator['Resampling_n_cores'], n_neighbors=para_estimator['Resampling_n_neighbors'], k_neighbors=para_estimator['Resampling_k_neighbors'], threshold_cleaning=para_estimator['Resampling_threshold_cleaning'], verbose=verbose) try: Sampler.fit(X_train, y_train) X_train_temp, y_train_temp = Sampler.transform( X_train, y_train) except ae.WORCValueError as e: message = str(e) if verbose: print('[WORC WARNING] Skipping resampling: ' + message) Sampler = None parameters['Resampling_Use'] = 'False' except RuntimeError as e: if 'ADASYN is not suited for this specific dataset. Use SMOTE instead.' in str( e): # Seldomly occurs, therefore return performance dummy if verbose: print( f'[WARNING]: {e}. Returning dummies. Parameters: ') print(parameters) para_estimator = delete_nonestimator_parameters( para_estimator) if return_all: return ret, GroupSel, VarSel, SelectModel, feature_labels[ 0], scaler, encoder, imputer, pca, StatisticalSel, ReliefSel, Sampler else: return ret else: raise e else: pos = int(np.sum(y_train_temp)) neg = int(len(y_train_temp) - pos) if pos < 10 or neg < 10: if verbose: print( f'[WORC WARNING] Skipping resampling: to few objects returned in one or both classes (pos: {pos}, neg: {neg}).' ) Sampler = None parameters['Resampling_Use'] = 'False' else: X_train = X_train_temp y_train = y_train_temp # Notify the user what the resampling did pos = int(np.sum(y_train)) neg = int(len(y_train) - pos) if verbose: message = f"Resampling from {len_in} ({pos_initial} pos," +\ f" {neg_initial} neg) to {len(y_train)} ({pos} pos, {neg} neg) patients." print(message) # Also reset train and test indices train = np.arange(0, len(y_train)) test = np.arange(len(y_train), len(y_train) + len(y_test)) del para_estimator['Resampling_Use'] del para_estimator['Resampling_Method'] del para_estimator['Resampling_sampling_strategy'] del para_estimator['Resampling_n_neighbors'] del para_estimator['Resampling_k_neighbors'] del para_estimator['Resampling_threshold_cleaning'] del para_estimator['Resampling_n_cores'] # Delete the object if we do not need to return it if not return_all: del Sampler # ---------------------------------------------------------------- # Fitting and scoring # Only when using fastr this is an entry if 'Number' in para_estimator.keys(): del para_estimator['Number'] # For certainty, we delete all parameters again para_estimator = delete_nonestimator_parameters(para_estimator) # NOTE: This just has to go to the construct classifier function, # although it is more convenient here due to the hyperparameter search if type(y) is list: labellength = 1 else: try: labellength = y.shape[1] except IndexError: labellength = 1 if labellength > 1 and type(estimator) not in [ RankedSVM, RandomForestClassifier ]: # Multiclass, hence employ a multiclass classifier for e.g. SVM, LR estimator.set_params(**para_estimator) estimator = OneVsRestClassifier(estimator) if verbose: print(f"Fitting ML method: {parameters['classifiers']}.") # Recombine feature values and label for train and test set feature_values = np.concatenate((X_train, X_test), axis=0) y = np.concatenate((y_train, y_test), axis=0) para_estimator = None try: ret = _fit_and_score(estimator, feature_values, y, scorers, train, test, verbose, para_estimator, fit_params, return_train_score=return_train_score, return_parameters=return_parameters, return_n_test_samples=return_n_test_samples, return_times=return_times, return_estimator=return_estimator, error_score=error_score) except (ValueError, LinAlgError) as e: if type(estimator) == LDA: if verbose: print( f'[WARNING]: skipping this setting due to LDA Error: {e}.') if return_all: return ret, GroupSel, VarSel, SelectModel, feature_labels[ 0], scaler, encoder, imputer, pca, StatisticalSel, ReliefSel, Sampler else: return ret else: raise e # Add original parameters to return object ret.append(parameters) if return_all: return ret, GroupSel, VarSel, SelectModel, feature_labels[ 0], scaler, encoder, imputer, pca, StatisticalSel, ReliefSel, Sampler else: return ret
def crossval(config, label_data, image_features, param_grid=None, use_fastr=False, fastr_plugin=None, tempsave=False, fixedsplits=None, ensemble={'Use': False}, outputfolder=None, modus='singlelabel'): """ Constructs multiple individual classifiers based on the label settings Parameters ---------- config: dict, mandatory Dictionary with config settings. See the Github Wiki for the available fields and formatting. label_data: dict, mandatory Should contain the following: patient_IDs (list): IDs of the patients, used to keep track of test and training sets, and label data label (list): List of lists, where each list contains the label status for that patient for each label label_name (list): Contains the different names that are stored in the label object image_features: numpy array, mandatory Consists of a tuple of two lists for each patient: (feature_values, feature_labels) param_grid: dictionary, optional Contains the parameters and their values wich are used in the grid or randomized search hyperparamater optimization. See the construct_classifier function for some examples. use_fastr: boolean, default False If False, parallel execution through Joblib is used for fast execution of the hyperparameter optimization. Especially suited for execution on mutlicore (H)PC's. The settings used are specified in the config.ini file in the IOparser folder, which you can adjust to your system. If True, fastr is used to split the hyperparameter optimization in separate jobs. Parameters for the splitting can be specified in the config file. Especially suited for clusters. fastr_plugin: string, default None Determines which plugin is used for fastr executions. When None, uses the default plugin from the fastr config. tempsave: boolean, default False If True, create a .hdf5 file after each cross validation containing the classifier and results from that that split. This is written to the GSOut folder in your fastr output mount. If False, only the result of all combined cross validations will be saved to a .hdf5 file. This will also be done if set to True. fixedsplits: string, optional By default, random split cross validation is used to train and evaluate the machine learning methods. Optionally, you can provide a .xlsx file containing fixed splits to be used. See the Github Wiki for the format. ensemble: dictionary, optional Contains the configuration for constructing an ensemble. modus: string, default 'singlelabel' Determine whether one-vs-all classification (or regression) for each single label is used ('singlelabel') or if multilabel classification is performed ('multilabel'). Returns ---------- panda_data: pandas dataframe Contains all information on the trained classifier. """ if tempsave: import fastr # Define all possible regressors regressors = ['SVR', 'RFR', 'SGDR', 'Lasso', 'ElasticNet'] # Process input data patient_IDs = label_data['patient_IDs'] label_value = label_data['label'] label_name = label_data['label_name'] if outputfolder is None: logfilename = os.path.join(os.getcwd(), 'classifier.log') else: logfilename = os.path.join(outputfolder, 'classifier.log') print("Logging to file " + str(logfilename)) for handler in logging.root.handlers[:]: logging.root.removeHandler(handler) logging.basicConfig(filename=logfilename, level=logging.DEBUG) N_iterations = config['CrossValidation']['N_iterations'] test_size = config['CrossValidation']['test_size'] classifier_labelss = dict() logging.debug('Starting classifier') # We only need one label instance, assuming they are all the sample feature_labels = image_features[0][1] # Check if we need to use fixedsplits: if fixedsplits is not None and '.xlsx' in fixedsplits: # fixedsplits = '/home/mstarmans/Settings/RandomSufflingOfData.xlsx' wb = xlrd.open_workbook(fixedsplits) wb = wb.sheet_by_index(1) if modus == 'singlelabel': print('Performing Single class classification.') logging.debug('Performing Single class classification.') elif modus == 'multilabel': print('Performing Multi label classification.') logging.debug('Performing Multi class classification.') label_value = [label_value] label_name = [label_name] else: m = ('{} is not a valid modus!').format(modus) logging.debug(m) raise ae.WORCKeyError(m) for i_class, i_name in zip(label_value, label_name): if modus == 'singlelabel': i_class_temp = i_class.ravel() save_data = list() for i in range(0, N_iterations): print(('Cross validation iteration {} / {} .').format(str(i + 1), str(N_iterations))) logging.debug(('Cross validation iteration {} / {} .').format(str(i + 1), str(N_iterations))) random_seed = np.random.randint(5000) # Split into test and training set, where the percentage of each # label is maintained if any(clf in regressors for clf in param_grid['classifiers']): # We cannot do a stratified shuffle split with regression stratify = None else: if modus == 'singlelabel': stratify = i_class_temp elif modus == 'multilabel': # Create a stratification object from the labels # Label = 0 means no label equals one # Other label numbers refer to the label name that is 1 stratify = list() for pnum in range(0, len(i_class[0])): plabel = 0 for lnum, slabel in enumerate(i_class): if slabel[pnum] == 1: plabel = lnum + 1 stratify.append(plabel) # Sklearn multiclass requires rows to be objects/patients # i_class = i_class.reshape(i_class.shape[1], i_class.shape[0]) i_class_temp = np.zeros((i_class.shape[1], i_class.shape[0])) for n_patient in range(0, i_class.shape[1]): for n_label in range(0, i_class.shape[0]): i_class_temp[n_patient, n_label] = i_class[n_label, n_patient] i_class_temp = i_class_temp else: raise ae.WORCKeyError('{} is not a valid modus!').format(modus) if fixedsplits is None: # Use Random Split. Split per patient, not per sample unique_patient_IDs, unique_indices =\ np.unique(np.asarray(patient_IDs), return_index=True) if any(clf in regressors for clf in param_grid['classifiers']): unique_stratify = None else: unique_stratify = [stratify[i] for i in unique_indices] try: unique_PID_train, indices_PID_test\ = train_test_split(unique_patient_IDs, test_size=test_size, random_state=random_seed, stratify=unique_stratify) except ValueError as e: e = str(e) + ' Increase the size of your validation set.' raise ae.WORCValueError(e) # Check for all IDs if they are in test or training indices_train = list() indices_test = list() patient_ID_train = list() patient_ID_test = list() for num, pid in enumerate(patient_IDs): if pid in unique_PID_train: indices_train.append(num) # Make sure we get a unique ID if pid in patient_ID_train: n = 1 while str(pid + '_' + str(n)) in patient_ID_train: n += 1 pid = str(pid + '_' + str(n)) patient_ID_train.append(pid) else: indices_test.append(num) # Make sure we get a unique ID if pid in patient_ID_test: n = 1 while str(pid + '_' + str(n)) in patient_ID_test: n += 1 pid = str(pid + '_' + str(n)) patient_ID_test.append(pid) # Split features and labels accordingly X_train = [image_features[i] for i in indices_train] X_test = [image_features[i] for i in indices_test] if modus == 'singlelabel': Y_train = i_class_temp[indices_train] Y_test = i_class_temp[indices_test] elif modus == 'multilabel': Y_train = i_class_temp[indices_train, :] Y_test = i_class_temp[indices_test, :] else: raise ae.WORCKeyError('{} is not a valid modus!').format(modus) else: # Use pre defined splits indices = wb.col_values(i) indices = [int(j) for j in indices[1:]] # First element is "Iteration x" train = indices[0:121] test = indices[121:] # Convert the numbers to the correct indices ind_train = list() for j in train: success = False for num, p in enumerate(patient_IDs): if str(j).zfill(3) == p[0:3]: ind_train.append(num) success = True if not success: raise ae.WORCIOError("Patient " + str(j).zfill(3) + " is not included!") ind_test = list() for j in test: success = False for num, p in enumerate(patient_IDs): if str(j).zfill(3) == p[0:3]: ind_test.append(num) success = True if not success: raise ae.WORCIOError("Patient " + str(j).zfill(3) + " is not included!") X_train = np.asarray(image_features)[ind_train].tolist() Y_train = np.asarray(i_class_temp)[ind_train].tolist() patient_ID_train = patient_IDs[ind_train] X_test = np.asarray(image_features)[ind_test].tolist() Y_test = np.asarray(i_class_temp)[ind_test].tolist() patient_ID_test = patient_IDs[ind_test] # Find best hyperparameters and construct classifier config['HyperOptimization']['use_fastr'] = use_fastr config['HyperOptimization']['fastr_plugin'] = fastr_plugin n_cores = config['General']['Joblib_ncores'] trained_classifier = random_search_parameters(features=X_train, labels=Y_train, param_grid=param_grid, n_cores=n_cores, **config['HyperOptimization']) # Create an ensemble if required if ensemble['Use']: trained_classifier.create_ensemble(X_train, Y_train) # We only want to save the feature values and one label array X_train = [x[0] for x in X_train] X_test = [x[0] for x in X_test] temp_save_data = (trained_classifier, X_train, X_test, Y_train, Y_test, patient_ID_train, patient_ID_test, random_seed) save_data.append(temp_save_data) # Create a temporary save if tempsave: panda_labels = ['trained_classifier', 'X_train', 'X_test', 'Y_train', 'Y_test', 'config', 'patient_ID_train', 'patient_ID_test', 'random_seed'] panda_data_temp =\ pd.Series([trained_classifier, X_train, X_test, Y_train, Y_test, config, patient_ID_train, patient_ID_test, random_seed], index=panda_labels, name='Constructed crossvalidation') panda_data = pd.DataFrame(panda_data_temp) n = 0 filename = os.path.join(fastr.config.mounts['tmp'], 'GSout', 'RS_' + str(i) + '.hdf5') while os.path.exists(filename): n += 1 filename = os.path.join(fastr.config.mounts['tmp'], 'GSout', 'RS_' + str(i + n) + '.hdf5') if not os.path.exists(os.path.dirname(filename)): os.makedirs(os.path.dirname(filename)) panda_data.to_hdf(filename, 'SVMdata') del panda_data, panda_data_temp [classifiers, X_train_set, X_test_set, Y_train_set, Y_test_set, patient_ID_train_set, patient_ID_test_set, seed_set] =\ zip(*save_data) panda_labels = ['classifiers', 'X_train', 'X_test', 'Y_train', 'Y_test', 'config', 'patient_ID_train', 'patient_ID_test', 'random_seed', 'feature_labels'] panda_data_temp =\ pd.Series([classifiers, X_train_set, X_test_set, Y_train_set, Y_test_set, config, patient_ID_train_set, patient_ID_test_set, seed_set, feature_labels], index=panda_labels, name='Constructed crossvalidation') if modus == 'singlelabel': i_name = ''.join(i_name) elif modus == 'multilabel': i_name = ','.join(i_name) classifier_labelss[i_name] = panda_data_temp panda_data = pd.DataFrame(classifier_labelss) return panda_data
def load_config(config_file_path): """ Parse a segmentix configuration file. Arguments: config_file_path: path to the configuration file to be parsed. Returns: settings_dict: dictionary containing all parsed settings. """ if not os.path.exists(config_file_path): e = f'File {config_file_path} does not exist!' raise ae.WORCKeyError(e) settings = configparser.ConfigParser() settings.read(config_file_path) settings_dict = {'Segmentix': dict(), 'Preprocessing': dict()} # Segmentation settings settings_dict['Sementix'] = dict() settings_dict['Segmentix']['type'] =\ str(settings['Segmentix']['segtype']) settings_dict['Segmentix']['mask'] =\ str(settings['Segmentix']['mask']) settings_dict['Segmentix']['radius'] =\ int(settings['Segmentix']['segradius']) settings_dict['Segmentix']['N_blobs'] =\ int(settings['Segmentix']['N_blobs']) settings_dict['Segmentix']['fillholes'] =\ settings['Segmentix'].getboolean('fillholes') settings_dict['Segmentix']['remove_small_objects'] =\ settings['Segmentix'].getboolean('remove_small_objects') settings_dict['Segmentix']['min_object_size'] =\ int(settings['Segmentix']['min_object_size']) settings_dict['Segmentix']['AssumeSameImageAndMaskMetadata'] =\ settings['General'].getboolean('AssumeSameImageAndMaskMetadata') # Check spacing settings_dict['Preprocessing']['CheckSpacing'] =\ settings['Preprocessing'].getboolean('CheckSpacing') # Re-orientation settings_dict['Preprocessing']['CheckOrientation'] =\ settings['Preprocessing'].getboolean('CheckOrientation') settings_dict['Preprocessing']['OrientationPrimaryAxis'] =\ str(settings['Preprocessing']['OrientationPrimaryAxis']) # Resampling settings_dict['Preprocessing']['Resampling'] =\ settings['Preprocessing'].getboolean('Resampling') settings_dict['Preprocessing']['Resampling_spacing'] =\ [float(item) for item in settings['Preprocessing']['Resampling_spacing'].split(',')] if len(settings_dict['Preprocessing']['Resampling_spacing']) != 3: s = settings_dict['Preprocessing']['Resampling_spacing'] raise ae.WORCValueError( f'Resampling spacing should be three elements, got {s}') return settings_dict
def load_config(config_file_path): """ Parse a WORC configuration file. Arguments: config_file_path: path to the configuration file to be parsed. Returns: settings_dict: dictionary containing all parsed settings. """ if not os.path.exists(config_file_path): e = f'File {config_file_path} does not exist!' raise ae.WORCKeyError(e) settings = configparser.ConfigParser() settings.read(config_file_path) settings_dict = { 'Preprocessing': dict(), 'ImageFeatures': dict(), 'General': dict() } # General settings settings_dict['ImageFeatures']['image_type'] =\ [str(item).strip() for item in settings['ImageFeatures']['image_type'].split(',')] settings_dict['General']['AssumeSameImageAndMaskMetadata'] =\ settings['General'].getboolean('AssumeSameImageAndMaskMetadata') # Detect incorrect spacing settings_dict['Preprocessing']['CheckSpacing'] =\ settings['Preprocessing'].getboolean('CheckSpacing') # Clipping settings_dict['Preprocessing']['Clipping'] =\ settings['Preprocessing'].getboolean('Clipping') settings_dict['Preprocessing']['Clipping_Range'] =\ [float(item) for item in settings['Preprocessing']['Clipping_Range'].split(',')] if len(settings_dict['Preprocessing']['Clipping_Range']) != 2: raise ae.WORCValueError( f"Clipping range should be two floats split by a comma, got {settings['Preprocessing']['Clipping_Range']}." ) # Normalization settings_dict['Preprocessing']['Normalize'] =\ settings['Preprocessing'].getboolean('Normalize') settings_dict['Preprocessing']['Normalize_ROI'] =\ str(settings['Preprocessing']['Normalize_ROI']) settings_dict['Preprocessing']['ROIdilate'] =\ str(settings['Preprocessing']['ROIdilate']) settings_dict['Preprocessing']['ROIDetermine'] =\ str(settings['Preprocessing']['ROIDetermine']) settings_dict['Preprocessing']['ROIdilateradius'] =\ int(settings['Preprocessing']['ROIdilateradius']) settings_dict['Preprocessing']['Method'] =\ str(settings['Preprocessing']['Method']) # Bias Correction settings_dict['Preprocessing']['BiasCorrection'] =\ settings['Preprocessing'].getboolean('BiasCorrection') settings_dict['Preprocessing']['BiasCorrection_Mask'] =\ settings['Preprocessing'].getboolean('BiasCorrection_Mask') # Re-orientation settings_dict['Preprocessing']['CheckOrientation'] =\ settings['Preprocessing'].getboolean('CheckOrientation') settings_dict['Preprocessing']['OrientationPrimaryAxis'] =\ str(settings['Preprocessing']['OrientationPrimaryAxis']) # Resampling settings_dict['Preprocessing']['Resampling'] =\ settings['Preprocessing'].getboolean('Resampling') settings_dict['Preprocessing']['Resampling_spacing'] =\ [float(item) for item in settings['Preprocessing']['Resampling_spacing'].split(',')] if len(settings_dict['Preprocessing']['Resampling_spacing']) != 3: s = settings_dict['Preprocessing']['Resampling_spacing'] raise ae.WORCValueError( f'Resampling spacing should be three elements, got {s}') return settings_dict
def findlabeldata(patientinfo, label_type, filenames=None, objects=None, pids=None): """ Load the label data and match to the unage features. Args: patientinfo (string): file with patient label data label_type (string): name of the label read out from patientinfo filenames (list): names of the patient feature files, used for matching objects (np.array or list): array of objects you want to order as well Returns: label_data (dict): contains patient ids, their labels and the label name """ # Get the labels and patient IDs label_data_temp = load_labels(patientinfo, label_type) label_data = dict() patient_IDs = list() label_value = list() for i_len in range(len(label_data_temp['label_name'])): label_value.append(list()) # Check per feature file / pid if there is a match in the label data if filenames: iterator = filenames elif pids: iterator = pids else: raise ae.WORCValueError('Either input pids or filenames for label matching!') objects_out = list() for i_feat, feat in enumerate(iterator): ifound = 0 matches = list() for i_num, i_patient in enumerate(label_data_temp['patient_IDs']): if i_patient.lower() in str(feat).lower(): # Match: add the patient ID to the ID's and to the matches patient_IDs.append(i_patient) matches.append(i_patient) # If there are feature files given, add it to the list if objects is not None: objects_out.append(objects[i_feat]) # For each label that we have, add the value to the label list for i_len in range(len(label_data_temp['label_name'])): label_value[i_len].append(label_data_temp['label'][i_len][i_num]) # Calculate how many matches we found for this (feature) file: should be one ifound += 1 if ifound > 1: message = ('Multiple matches ({}) found in labeling for feature file {}.').format(str(matches), str(feat)) raise ae.WORCValueError(message) elif ifound == 0: message = ('No entry found in labeling for feature file {}.').format(str(feat)) raise ae.WORCKeyError(message) # Convert to arrays for i_len in range(len(label_value)): label_value[i_len] = np.asarray(label_value[i_len]) label_data['patient_IDs'] = np.asarray(patient_IDs) label_data['label'] = np.asarray(label_value) label_data['label_name'] = label_data_temp['label_name'] return label_data, objects_out
def combine_multiple_estimators(predictions, label_data, multilabel_type, label_types, ensemble=1, strategy='argmax', alpha=0.95): ''' Combine multiple estimators in a single model. Note: the multilabel_type labels should correspond to the ordering in label_types. Hence, if multilabel_type = 0, the prediction is label_type[0] etc. ''' # Load the multilabel label data label_data = lp.load_labels(label_data, multilabel_type) patient_IDs = label_data['patient_IDs'] labels = label_data['label'] # Initialize some objects y_truths = list() y_scores = list() y_predictions = list() pids = list() y_truths_train = list() y_scores_train = list() y_predictions_train = list() pids_train = list() accuracy = list() sensitivity = list() specificity = list() auc = list() f1_score_list = list() precision = list() npv = list() acc_av = list() # Extract all the predictions from the estimators for prediction, label_type in zip(predictions, label_types): y_truth, y_score, y_prediction, pid,\ y_truth_train, y_score_train, y_prediction_train, pid_train =\ plot_estimator_performance(prediction, label_data, label_type, ensemble=ensemble, output='allscores') y_truths.append(y_truth) y_scores.append(y_score) y_predictions.append(y_prediction) pids.append(pid) y_truths_train.append(y_truth_train) y_scores_train.append(y_score_train) y_predictions_train.append(y_prediction_train) pids_train.append(pid_train) # Combine the predictions for i_crossval in range(0, len(y_truths[0])): # Extract all values for this cross validation iteration from all objects y_truth = [t[i_crossval] for t in y_truths] y_score = [t[i_crossval] for t in y_scores] pid = [t[i_crossval] for t in pids] if strategy == 'argmax': # For each patient, take the maximum posterior y_prediction = np.argmax(y_score, axis=0) y_score = np.max(y_score, axis=0) elif strategy == 'decisiontree': # Fit a decision tree on the training set a = 1 else: raise ae.WORCValueError( f"{strategy} is not a valid estimation combining strategy! Should be one of [argmax]." ) # Compute multilabel performance metrics y_truth = np.argmax(y_truth, axis=0) accuracy_temp, sensitivity_temp, specificity_temp, \ precision_temp, npv_temp, f1_score_temp, auc_temp, accav_temp = \ metrics.performance_multilabel(y_truth, y_prediction, y_score) print("Truth: " + str(y_truth)) print("Prediction: " + str(y_prediction)) print('AUC: ' + str(auc_temp)) # Append performance to lists for all cross validations accuracy.append(accuracy_temp) sensitivity.append(sensitivity_temp) specificity.append(specificity_temp) auc.append(auc_temp) f1_score_list.append(f1_score_temp) precision.append(precision_temp) npv.append(npv_temp) acc_av.append(acc_av_temp) # Extract sample size N_1 = float(len(train_patient_IDs)) N_2 = float(len(test_patient_IDs)) # Compute confidence intervals stats = dict() stats[ "Accuracy 95%:"] = f"{np.nanmean(accuracy)} {str(compute_confidence(accuracy, N_1, N_2, alpha))}" stats[ "Average Accuracy 95%:"] = f"{np.nanmean(acc_av)} {str(compute_confidence(accuracy, N_1, N_2, alpha))}" stats[ "AUC 95%:"] = f"{np.nanmean(auc)} {str(compute_confidence(auc, N_1, N_2, alpha))}" stats[ "F1-score 95%:"] = f"{np.nanmean(f1_score_list)} {str(compute_confidence(f1_score_list, N_1, N_2, alpha))}" stats[ "Precision 95%:"] = f"{np.nanmean(precision)} {str(compute_confidence(precision, N_1, N_2, alpha))}" stats[ "NPV 95%:"] = f"{np.nanmean(npv)} {str(compute_confidence(npv, N_1, N_2, alpha))}" stats[ "Sensitivity 95%: "] = f"{np.nanmean(sensitivity)} {str(compute_confidence(sensitivity, N_1, N_2, alpha))}" stats[ "Specificity 95%:"] = f"{np.nanmean(specificity)} {str(compute_confidence(specificity, N_1, N_2, alpha))}" # Print all CI's stats = OrderedDict(sorted(stats.items())) for k, v in stats.items(): print(f"{k} : {v}.") return stats
def plot_estimator_performance(prediction, label_data, label_type, crossval_type=None, alpha=0.95, ensemble=None, verbose=True, ensemble_scoring=None, output=None, modus=None, thresholds=None, survival=False, shuffle_estimators=False, bootstrap=None, bootstrap_N=None, overfit_scaler=None): """Plot the output of a single estimator, e.g. a SVM. Parameters ---------- prediction: pandas dataframe or string, mandatory output of trainclassifier function, either a pandas dataframe or a HDF5 file label_data: string, mandatory Contains the path referring to a .txt file containing the patient label(s) and value(s) to be used for learning. See the Github Wiki for the format. label_type: string, mandatory Name of the label to extract from the label data to test the estimator on. alpha: float, default 0.95 Significance of confidence intervals. ensemble: False, integer or 'Caruana' Determine whether an ensemble will be created. If so, either provide an integer to determine how many of the top performing classifiers should be in the ensemble, or use the string "Caruana" to use smart ensembling based on Caruana et al. 2004. verbose: boolean, default True Plot intermedate messages. ensemble_scoring: string, default None Metric to be used for evaluating the ensemble. If None, the option set in the prediction object will be used. output: string, default stats Determine which results are put out. If stats, the statistics of the estimator will be returned. If scores, the scores will be returned. thresholds: list of integer(s), default None If None, use default threshold of sklearn (0.5) on posteriors to converge to a binary prediction. If one integer is provided, use that one. If two integers are provided, posterior < thresh[0] = 0, posterior > thresh[1] = 1. Returns ---------- Depending on the output parameters, the following outputs are returned: If output == 'stats': stats: dictionary Contains the confidence intervals of the performance metrics and the number of times each patient was classifier correctly or incorrectly. If output == 'scores': y_truths: list Contains the true label for each object. y_scores: list Contains the score (e.g. posterior) for each object. y_predictions: list Contains the predicted label for each object. pids: list Contains the patient ID/name for each object. """ # Load the prediction object if it's a hdf5 file if type(prediction) is not pd.core.frame.DataFrame: if os.path.isfile(prediction): prediction = pd.read_hdf(prediction) else: raise ae.WORCIOError( ('{} is not an existing file!').format(str(prediction))) # Select the estimator from the pandas dataframe to use keys = prediction.keys() if label_type is None: label_type = keys[0] # Load the label data if type(label_data) is not dict: if os.path.isfile(label_data): if type(label_type) is not list: # Singlelabel: convert to list label_type = [[label_type]] label_data = lp.load_labels(label_data, label_type) else: raise ae.WORCValueError( f"Label data {label_data} incorrect: not a dictionary, or file does not exist." ) n_labels = len(label_type) patient_IDs = label_data['patient_IDs'] labels = label_data['label'] if type(label_type) is list: # FIXME: Support for multiple label types not supported yet. print( '[WORC Warning] Support for multiple label types not supported yet. Taking first label for plot_estimator_performance.' ) label_type = keys[0] # Extract the estimators, features and labels regression = is_regressor( prediction[label_type]['classifiers'][0].best_estimator_) feature_labels = prediction[label_type]['feature_labels'] # Get some configuration variables if present in the prediction config = prediction[label_type].config if ensemble is None: ensemble = int(config['Ensemble']['Use']) if modus is None: modus = config['Labels']['modus'] if crossval_type is None: crossval_type = config['CrossValidation']['Type'] if bootstrap is None: bootstrap = config['Bootstrap']['Use'] if bootstrap_N is None: bootstrap_N = int(config['Bootstrap']['N_iterations']) if overfit_scaler is None: overfit_scaler = config['Evaluation']['OverfitScaler'] ensemble_metric = config['Ensemble']['Metric'] # Create lists for performance measures if not regression: sensitivity = list() specificity = list() precision = list() npv = list() accuracy = list() bca = list() auc = list() f1_score_list = list() if modus == 'multilabel': acc_av = list() # Also add scoring measures for all single label scores sensitivity_single = [list() for j in n_labels] specificity_single = [list() for j in n_labels] precision_single = [list() for j in n_labels] npv_single = [list() for j in n_labels] accuracy_single = [list() for j in n_labels] bca_single = [list() for j in n_labels] auc_single = [list() for j in n_labels] f1_score_list_single = [list() for j in n_labels] else: r2score = list() MSE = list() coefICC = list() PearsonC = list() PearsonP = list() SpearmanC = list() SpearmanP = list() patient_classification_list = dict() percentages_selected = list() if output in ['scores', 'decision'] or crossval_type == 'LOO': # Keep track of all groundth truths and scores y_truths = list() y_scores = list() y_predictions = list() pids = list() # Extract sample size N_1 = float(len(prediction[label_type]['patient_ID_train'][0])) N_2 = float(len(prediction[label_type]['patient_ID_test'][0])) # Convert tuples to lists if required if type(prediction[label_type]['X_test']) is tuple: prediction[label_type]['X_test'] = list( prediction[label_type]['X_test']) prediction[label_type]['X_train'] = list( prediction[label_type]['X_train']) prediction[label_type]['Y_train'] = list( prediction[label_type]['Y_train']) prediction[label_type]['Y_test'] = list( prediction[label_type]['Y_test']) prediction[label_type]['patient_ID_test'] = list( prediction[label_type]['patient_ID_test']) prediction[label_type]['patient_ID_train'] = list( prediction[label_type]['patient_ID_train']) prediction[label_type]['classifiers'] = list( prediction[label_type]['classifiers']) # Loop over the test sets, which correspond to cross-validation # or bootstrapping iterations n_iter = len(prediction[label_type]['Y_test']) if bootstrap: iterobject = range(0, bootstrap_N) else: iterobject = range(0, n_iter) for i in iterobject: print("\n") if bootstrap: print(f"Bootstrap {i + 1} / {bootstrap_N}.") else: print(f"Cross-validation {i + 1} / {n_iter}.") test_indices = list() # When bootstrapping, there is only a single train/test set. if bootstrap: if i == 0: X_test_temp_or = prediction[label_type]['X_test'][0] X_train_temp = prediction[label_type]['X_train'][0] Y_train_temp = prediction[label_type]['Y_train'][0] Y_test_temp_or = prediction[label_type]['Y_test'][0] test_patient_IDs_or = prediction[label_type][ 'patient_ID_test'][0] train_patient_IDs = prediction[label_type]['patient_ID_train'][ 0] fitted_model = prediction[label_type]['classifiers'][0] # Objects required for first iteration test_patient_IDs = test_patient_IDs_or[:] X_test_temp = X_test_temp_or[:] Y_test_temp = Y_test_temp_or[:] else: X_test_temp = prediction[label_type]['X_test'][i] X_train_temp = prediction[label_type]['X_train'][i] Y_train_temp = prediction[label_type]['Y_train'][i] Y_test_temp = prediction[label_type]['Y_test'][i] test_patient_IDs = prediction[label_type]['patient_ID_test'][i] train_patient_IDs = prediction[label_type]['patient_ID_train'][i] fitted_model = prediction[label_type]['classifiers'][i] # Check which patients are in the test set. if output == 'stats' and crossval_type != 'LOO': for i_ID in test_patient_IDs: # Initiate counting how many times a patient is classified correctly if i_ID not in patient_classification_list: patient_classification_list[i_ID] = dict() patient_classification_list[i_ID]['N_test'] = 0 patient_classification_list[i_ID]['N_correct'] = 0 patient_classification_list[i_ID]['N_wrong'] = 0 patient_classification_list[i_ID]['N_test'] += 1 # Check if this is exactly the label of the patient within the label file if i_ID not in patient_IDs: print( f'[WORC WARNING] Patient {i_ID} is not found the patient labels, removing underscore.' ) i_ID = i_ID.split("_")[0] if i_ID not in patient_IDs: print( f'[WORC WARNING] Did not help, excluding patient {i_ID}.' ) continue test_indices.append(np.where(patient_IDs == i_ID)[0][0]) # Extract ground truth y_truth = Y_test_temp # If required, shuffle estimators for "Random" ensembling if shuffle_estimators: # Randomly shuffle the estimators print('Shuffling estimators for random ensembling.') shuffle(fitted_model.cv_results_['params']) # If requested, first let the SearchCV object create an ensemble if bootstrap and i > 0: # For bootstrapping, only do this at the first iteration pass elif not fitted_model.ensemble: # If required, rank according to generalization score instead of mean_validation_score if ensemble_metric == 'generalization': print('Using generalization score for estimator ranking.') indices = fitted_model.cv_results_['rank_generalization_score'] fitted_model.cv_results_['params'] = [ fitted_model.cv_results_['params'][i] for i in indices[::-1] ] elif ensemble_metric != 'Default': raise ae.WORCKeyError( f'Metric {ensemble_metric} is not known: use Default or generalization.' ) # NOTE: Added for backwards compatability if not hasattr(fitted_model, 'cv_iter'): cv_iter = list( fitted_model.cv.split(X_train_temp, Y_train_temp)) fitted_model.cv_iter = cv_iter # Create the ensemble X_train_temp = [(x, feature_labels) for x in X_train_temp] fitted_model.create_ensemble(X_train_temp, Y_train_temp, method=ensemble, verbose=verbose, scoring=ensemble_scoring, overfit_scaler=overfit_scaler) # If bootstrap, generate a bootstrapped sample if bootstrap and i > 0: y_truth, y_prediction, y_score, test_patient_IDs =\ resample(y_truth_all, y_prediction_all, y_score_all, test_patient_IDs_or) else: # Create prediction y_prediction = fitted_model.predict(X_test_temp) if regression: y_score = y_prediction elif modus == 'multilabel': y_score = fitted_model.predict_proba(X_test_temp) else: y_score = fitted_model.predict_proba(X_test_temp)[:, 1] # Create a new binary score based on the thresholds if given if thresholds is not None: if len(thresholds) == 1: y_prediction = y_score >= thresholds[0] elif len(thresholds) == 2: # X_train_temp = [x[0] for x in X_train_temp] y_score_temp = list() y_prediction_temp = list() y_truth_temp = list() test_patient_IDs_temp = list() thresholds_val = fit_thresholds(thresholds, fitted_model, X_train_temp, Y_train_temp, ensemble, ensemble_scoring) for pnum in range(len(y_score)): if y_score[pnum] <= thresholds_val[0] or y_score[ pnum] > thresholds_val[1]: y_score_temp.append(y_score[pnum]) y_prediction_temp.append(y_prediction[pnum]) y_truth_temp.append(y_truth[pnum]) test_patient_IDs_temp.append( test_patient_IDs[pnum]) perc = float(len(y_prediction_temp)) / float( len(y_prediction)) percentages_selected.append(perc) print( f"Selected {len(y_prediction_temp)} from {len(y_prediction)} ({perc}%) patients using two thresholds." ) y_score = y_score_temp y_prediction = y_prediction_temp y_truth = y_truth_temp test_patient_IDs = test_patient_IDs_temp else: raise ae.WORCValueError( f"Need None, one or two thresholds on the posterior; got {len(thresholds)}." ) # If all scores are NaN, the classifier cannot do probabilities, thus # use hard predictions if np.sum(np.isnan(y_score)) == len(y_prediction): print( '[WORC Warning] All scores NaN, replacing with prediction.' ) y_score = y_prediction if bootstrap and i == 0: # Save objects for re-use y_truth_all = y_truth[:] y_prediction_all = y_prediction[:] y_score_all = y_score[:] print("Truth: " + str(y_truth)) print("Prediction: " + str(y_prediction)) print("Score: " + str(y_score)) if output == 'stats' and crossval_type != 'LOO': # Add if patient was classified correctly or not to counting for i_truth, i_predict, i_test_ID in zip(y_truth, y_prediction, test_patient_IDs): if modus == 'multilabel': success = (i_truth == i_predict).all() else: success = i_truth == i_predict if success: patient_classification_list[i_test_ID]['N_correct'] += 1 else: patient_classification_list[i_test_ID]['N_wrong'] += 1 if output in ['decision', 'scores'] or crossval_type == 'LOO': # Output the posteriors y_scores.append(y_score) y_truths.append(y_truth) y_predictions.append(y_prediction) pids.append(test_patient_IDs) elif output == 'stats': # Compute statistics print('Computing performance statistics.') # Compute confusion matrix and use for sensitivity/specificity performances = compute_statistics(y_truth, y_score, y_prediction, modus, regression) # Print AUC to keep you up to date if not regression: if modus == 'singlelabel': accuracy_temp, bca_temp, sensitivity_temp,\ specificity_temp, precision_temp, npv_temp,\ f1_score_temp, auc_temp = performances else: accuracy_temp, sensitivity_temp,\ specificity_temp, precision_temp, npv_temp,\ f1_score_temp, auc_temp, acc_av_temp,\ accuracy_temp_single,\ bca_temp_single, sensitivity_temp_single,\ specificity_temp_single, precision_temp_single,\ npv_temp_single, f1_score_temp_single,\ auc_temp_single = performances print('AUC: ' + str(auc_temp)) # Append performance to lists for all cross validations accuracy.append(accuracy_temp) bca.append(bca_temp) sensitivity.append(sensitivity_temp) specificity.append(specificity_temp) auc.append(auc_temp) f1_score_list.append(f1_score_temp) precision.append(precision_temp) npv.append(npv_temp) if modus == 'multilabel': acc_av.append(acc_av_temp) for j in n_labels: accuracy_single[j].append(accuracy_temp_single[j]) bca_single[j].append(bca_temp_single[j]) sensitivity_single[j].append( sensitivity_temp_single[j]) specificity_single[j].append( specificity_temp_single[j]) auc_single[j].append(auc_temp_single[j]) f1_score_list_single[j].append(f1_score_temp_single[j]) precision_single[j].append(precision_temp_single[j]) npv_single[j].append(npv_temp_single[j]) else: r2score_temp, MSE_temp, coefICC_temp, PearsonC_temp,\ PearsonP_temp, SpearmanC_temp,\ SpearmanP_temp = performances print('R2 Score: ' + str(r2score_temp)) r2score.append(r2score_temp) MSE.append(MSE_temp) coefICC.append(coefICC_temp) PearsonC.append(PearsonC_temp) PearsonP.append(PearsonP_temp) SpearmanC.append(SpearmanC_temp) SpearmanP.append(SpearmanP_temp) # Delete some objects to save memory in cross-validtion if not bootstrap: del fitted_model, X_test_temp, X_train_temp, Y_train_temp del Y_test_temp, test_patient_IDs, train_patient_IDs prediction[label_type]['X_test'][i] = None prediction[label_type]['X_train'][i] = None prediction[label_type]['Y_train'][i] = None prediction[label_type]['Y_test'][i] = None prediction[label_type]['patient_ID_test'][i] = None prediction[label_type]['patient_ID_train'][i] = None prediction[label_type]['classifiers'][i] = None if output in ['scores', 'decision']: # Return the scores and true values of all patients return y_truths, y_scores, y_predictions, pids elif output == 'stats': # Compute statistics stats = dict() output = dict() if crossval_type == 'LOO': performances = compute_statistics(y_truths, y_scores, y_predictions, modus, regression) if not regression: metric_names_single = [ 'Accuracy', 'BCA', 'Sensitivity', 'Specificity', 'Precision', 'NPV', 'F1-score', 'AUC' ] if modus == 'singlelabel': metric_names = metric_names_single elif modus == 'multilabel': metric_names_multi = [ 'Accuracy', 'Sensitivity', 'Specificity', 'Precision', 'NPV', 'F1-score', 'AUC', 'Average Accuracy' ] metric_names = metric_names_multi + metric_names_single else: # Regression metric_names = [ 'R2-score', 'MSE', 'ICC', 'PearsonC', 'PearsonP', 'SpearmanC', 'SpearmanP' ] # Put all metrics with their names in the statistics dict for k, v in zip(metric_names, performances): stats[k] = str(v) if thresholds is not None: if len(thresholds) == 2: # Compute percentage of patients that was selected stats["Percentage Selected"] = str(percentages_selected[0]) output['Statistics'] = stats else: # Compute alpha confidence intervals (CIs) # FIXME: multilabel performance per single label not included # FIXME: multilabel not working in bootstrap # FIXME: bootstrap not done in regression if not regression: metric_names_single = [ 'Accuracy', 'BCA', 'Sensitivity', 'Specificity', 'Precision', 'NPV', 'F1-score', 'AUC' ] if bootstrap: # Compute once for the real test set the performance X_test_temp = prediction[label_type]['X_test'][0] y_truth = prediction[label_type]['Y_test'][0] y_prediction = fitted_model.predict(X_test_temp) y_score = fitted_model.predict_proba(X_test_temp)[:, 1] performances_test =\ metrics.performance_singlelabel(y_truth, y_prediction, y_score, regression) # Aggregate bootstrapped performances performances_bootstrapped =\ [accuracy, bca, sensitivity, specificity, precision, npv, f1_score_list, auc] # Compute confidence intervals for all metrics for p in range(len(metric_names_single)): k = metric_names_single[p] + ' 95%' perf = performances_bootstrapped[p] perf_test = performances_test[p] stats[ k] = f"{perf_test} {str(compute_confidence_bootstrap(perf, perf_test, N_1, alpha))}" else: stats[ "Accuracy 95%:"] = f"{np.nanmean(accuracy)} {str(compute_confidence(accuracy, N_1, N_2, alpha))}" stats[ "BCA 95%:"] = f"{np.nanmean(bca)} {str(compute_confidence(bca, N_1, N_2, alpha))}" stats[ "AUC 95%:"] = f"{np.nanmean(auc)} {str(compute_confidence(auc, N_1, N_2, alpha))}" stats[ "F1-score 95%:"] = f"{np.nanmean(f1_score_list)} {str(compute_confidence(f1_score_list, N_1, N_2, alpha))}" stats[ "Precision 95%:"] = f"{np.nanmean(precision)} {str(compute_confidence(precision, N_1, N_2, alpha))}" stats[ "NPV 95%:"] = f"{np.nanmean(npv)} {str(compute_confidence(npv, N_1, N_2, alpha))}" stats[ "Sensitivity 95%: "] = f"{np.nanmean(sensitivity)} {str(compute_confidence(sensitivity, N_1, N_2, alpha))}" stats[ "Specificity 95%:"] = f"{np.nanmean(specificity)} {str(compute_confidence(specificity, N_1, N_2, alpha))}" if modus == 'multilabel': stats[ "Average Accuracy 95%:"] = f"{np.nanmean(acc_av)} {str(compute_confidence(acc_av, N_1, N_2, alpha))}" if thresholds is not None: if len(thresholds) == 2: # Compute percentage of patients that was selected stats[ "Percentage Selected 95%:"] = f"{np.nanmean(percentages_selected)} {str(compute_confidence(percentages_selected, N_1, N_2, alpha))}" # Extract statistics on how often patients got classified correctly rankings = dict() alwaysright = dict() alwayswrong = dict() percentages = dict() timesintestset = dict() for i_ID in patient_classification_list: percentage_right = patient_classification_list[i_ID][ 'N_correct'] / float( patient_classification_list[i_ID]['N_test']) if i_ID in patient_IDs: label = labels[0][np.where(i_ID == patient_IDs)] else: # Multiple instance of one patient label = labels[0][np.where( i_ID.split('_')[0] == patient_IDs)] label = label[0][0] percentages[i_ID] = str(label) + ': ' + str( round(percentage_right, 2) * 100) + '%' if percentage_right == 1.0: alwaysright[i_ID] = label print(f"Always Right: {i_ID}, label {label}.") elif percentage_right == 0: alwayswrong[i_ID] = label print(f"Always Wrong: {i_ID}, label {label}.") timesintestset[i_ID] = patient_classification_list[i_ID][ 'N_test'] rankings["Always right"] = alwaysright rankings["Always wrong"] = alwayswrong rankings['Percentages'] = percentages rankings['timesintestset'] = timesintestset output['Rankings'] = rankings else: # Regression stats[ 'R2-score 95%: '] = f"{np.nanmean(r2score)} {str(compute_confidence(r2score, N_1, N_2, alpha))}" stats[ 'MSE 95%: '] = f"{np.nanmean(MSE)} {str(compute_confidence(MSE, N_1, N_2, alpha))}" stats[ 'ICC 95%: '] = f"{np.nanmean(coefICC)} {str(compute_confidence(coefICC, N_1, N_2, alpha))}" stats[ 'PearsonC 95%: '] = f"{np.nanmean(PearsonC)} {str(compute_confidence(PearsonC, N_1, N_2, alpha))}" stats[ 'PearsonP 95%: '] = f"{np.nanmean(PearsonP)} {str(compute_confidence(PearsonP, N_1, N_2, alpha))}" stats[ 'SpearmanC 95%: '] = f"{np.nanmean(SpearmanC)} {str(compute_confidence(SpearmanC, N_1, N_2, alpha))}" stats[ 'SpearmanP 95%: '] = f"{np.nanmean(SpearmanP)} {str(compute_confidence(SpearmanP, N_1, N_2, alpha))}" # Print all CI's and add to output stats = OrderedDict(sorted(stats.items())) for k, v in stats.items(): print(f"{k} : {v}.") output['Statistics'] = stats return output