def support_vector_machine(self, sensors_set): features = list(self.dataset.get_sensors_set_features(sensors_set)) print("SUPPORT VECTOR MACHINE.....") print("CLASSIFICATION BASED ON THESE SENSORS: ", self.dataset.get_remained_sensors(sensors_set)) print("NUMBER OF FEATURES: ", len(features)) train_features, train_classes, test_features, test_classes = self.__get_sets_for_classification( self.dataset.get_train, self.dataset.get_test, features) train_features_scaled, test_features_scaled = util.scale_features( train_features, test_features) classifier_svm = SVC(C=const.PAR_SVM_C[sensors_set], gamma=const.PAR_SVM_GAMMA[sensors_set], verbose=False) classifier_svm.fit(train_features_scaled, train_classes) test_prediction = classifier_svm.predict(test_features_scaled) acc = accuracy_score(test_classes, test_prediction) print("ACCURACY : " + str(acc)) print("END SUPPORT VECTOR MACHINE.....") if not os.path.exists(const.DIR_RESULTS): os.makedirs(const.DIR_RESULTS) file_content = "acc\n" + str(acc) with open( const.DIR_RESULTS + "/" + str(sensors_set) + const.FILE_SUPPORT_VECTOR_MACHINE_RESULTS, 'w') as f: f.write(file_content)
def neural_network(self, sensors_set): features = list(self.dataset.get_sensors_set_features(sensors_set)) print("NEURAL NETWORK.....") print("CLASSIFICATION BASED ON THESE SENSORS: ", self.dataset.get_remained_sensors(sensors_set)) print("NUMBER OF FEATURES: ", len(features)) train_features, train_classes, test_features, test_classes = self.__get_sets_for_classification( self.dataset.get_train, self.dataset.get_test, features) train_features_scaled, test_features_scaled = util.scale_features( train_features, test_features) classifier_nn = MLPClassifier( hidden_layer_sizes=(const.PAR_NN_NEURONS[sensors_set], ), alpha=const.PAR_NN_ALPHA[sensors_set], max_iter=const.PAR_NN_MAX_ITER, tol=const.PAR_NN_TOL) classifier_nn.fit(train_features_scaled, train_classes) test_prediction = classifier_nn.predict(test_features_scaled) acc = accuracy_score(test_classes, test_prediction) print("ACCURACY : " + str(acc)) print("END NEURAL NETWORK") if not os.path.exists(const.DIR_RESULTS): os.makedirs(const.DIR_RESULTS) file_content = "acc\n" + str(acc) with open( const.DIR_RESULTS + "/" + str(sensors_set) + const.FILE_NEURAL_NETWORK_RESULTS, 'w') as f: f.write(file_content)
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str, data_test_dir: str): """Brain tissue segmentation using logistic regression. The main routine executes the medical image analysis pipeline: - Image loading - Registration - Pre-processing - Feature extraction - Logistic regression classifier model building - Segmentation using logistic regression on unseen images - Post-processing of the segmentation - Evaluation of the segmentation """ # load atlas images putil.load_atlas_images(data_atlas_dir) print('-' * 5, 'Training...') # load feature matrix and label vector # precomputed by preprocessAndStore.py file_id = open('data_train.pckl', 'rb') data_train = pickle.load(file_id) file_id.close() file_id = open('labels_train.pckl', 'rb') labels_train = pickle.load(file_id) file_id.close() ########################################## # perform a grid search over the parameter grid and choose the optimal parameters param_grid = {'C': [0.5, 1, 2.5, 50, 1000]} # grid to search for best parameter C = 0.02 log_reg_classifier = model_selection.GridSearchCV(sk.LogisticRegression(class_weight='balanced') , param_grid, refit=True) print('abschnitt 1') data_train_scaled, scaler = util.scale_features(data_train) start_time = timeit.default_timer() log_reg_classifier.fit(data_train_scaled, labels_train) util.print_feature_importance(log_reg_classifier.best_estimator_.coef_) util.print_class_count(labels_train) print('abschnitt 2') #print("importance of features: ", log_reg_classifier.best_estimator_.coef_) print("best estimator: ", log_reg_classifier.best_estimator_) print("best parameter: ", log_reg_classifier.best_params_) # store trained log_regr file_id = open('log_regr.pckl', 'wb') pickle.dump(log_reg_classifier, file_id) file_id.close() file_id = open('scaler.pckl', 'wb') pickle.dump(scaler, file_id) file_id.close() print(' Time elapsed:', timeit.default_timer() - start_time, 's')
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str, data_test_dir: str): """Brain tissue segmentation using decision forests. The main routine executes the medical image analysis pipeline: - Segmentation using the decision forest classifier model on unseen images - Post-processing of the segmentation - Evaluation of the segmentation """ # load atlas images putil.load_atlas_images(data_atlas_dir) # crawl the training image directories crawler = load.FileSystemDataCrawler(data_train_dir, IMAGE_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) pre_process_params = { 'zscore_pre': True, 'registration_pre': False, 'coordinates_feature': True, 'intensity_feature': True, 'gradient_intensity_feature': True, 'second_oder_coordinate_feature': False, 'label_percentages': [0.0003, 0.004, 0.003, 0.04, 0.04, 0.02] } #[0.0005, 0.005, 0.005, 0.05, 0.09, 0.022]} print('-' * 5, 'Testing...') # load classifier file_id = open('svm_rbf_fullset_C15_G5_lotofpointspersample.pckl', 'rb') svm_rbf_classifier = pickle.load(file_id) file_id.close() file_id = open('scaler_rbf_fullset_C15_G5_lotofpointspersample.pckl', 'rb') scaler = pickle.load(file_id) file_id.close() # initialize evaluator evaluator = putil.init_evaluator(result_dir) # crawl the training image directories crawler = load.FileSystemDataCrawler(data_test_dir, IMAGE_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) # load images for testing and pre-process pre_process_params['training'] = False images_test = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=False) images_prediction = [] images_probabilities = [] for img in images_test: print('-' * 10, 'Testing', img.id_) scaled_features, s = util.scale_features(img.feature_matrix[0], scaler) start_time = timeit.default_timer() predictions = svm_rbf_classifier.predict(scaled_features) #probabilities = svm_classifier.predict_proba(img.feature_matrix[0]) #predictions = forest.predict(img.feature_matrix[0]) #probabilities = forest.predict_proba(img.feature_matrix[0]) print(' Time elapsed:', timeit.default_timer() - start_time, 's') # convert prediction and probabilities back to SimpleITK images image_prediction = conversion.NumpySimpleITKImageBridge.convert( predictions.astype(np.uint8), img.image_properties) #image_probabilities = conversion.NumpySimpleITKImageBridge.convert(probabilities, img.image_properties) # evaluate segmentation without post-processing evaluator.evaluate(image_prediction, img.images[structure.BrainImageTypes.GroundTruth], img.id_) images_prediction.append(image_prediction) #images_probabilities.append(image_probabilities) # post-process segmentation and evaluate with post-processing #post_process_params = {'crf_post': False} #images_post_processed = putil.post_process_batch(images_test, images_prediction, images_probabilities, # post_process_params, multi_process=True) for i, img in enumerate(images_test): # evaluator.evaluate(images_post_processed[i], img.images[structure.BrainImageTypes.GroundTruth], # img.id_ + '-PP') # save results sitk.WriteImage( images_prediction[i], os.path.join( result_dir, images_test[i].id_ + '_SEG_SVM_fullset_C15-_G5_lotofpointspersample.mha'), True)
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str, data_test_dir: str, ml_method: str, verbose: bool): """Brain tissue segmentation using decision forests. The main routine executes the medical image analysis pipeline: - Image loading - Registration - Pre-processing - Feature extraction - Decision forest classifier model building - Segmentation using the decision forest classifier model on unseen images - Post-processing of the segmentation - Evaluation of the segmentation """ # load atlas images putil.load_atlas_images(data_atlas_dir) print('-' * 5, 'Training '+ ml_method + '...') # crawl the training image directories crawler = load.FileSystemDataCrawler(data_train_dir, IMAGE_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) pre_process_params = {'zscore_pre': True, 'registration_pre': False, 'coordinates_feature': True, 'intensity_feature': True, 'gradient_intensity_feature': True, 'second_oder_coordinate_feature': False, 'label_percentages': [0.0005, 0.005, 0.005, 0.05, 0.09, 0.022]} # load images for training and pre-process images = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=False) # generate feature matrix and label vector data_train = np.concatenate([img.feature_matrix[0] for img in images]) labels_train = np.concatenate([img.feature_matrix[1] for img in images]).squeeze() if verbose: util.print_class_count(labels_train) start_time = timeit.default_timer() if ml_method == 'random_forest': classifier = sk_ensemble.RandomForestClassifier(max_features=images[0].feature_matrix[0].shape[1], n_estimators=20, max_depth=25) data_train_scaled = data_train # do not scale features to keep original RF elif ml_method == 'svm_linear': classifier = svm.SVC(kernel='linear', C=1, class_weight='balanced') data_train_scaled, scaler = util.scale_features(data_train) elif ml_method == 'svm_rbf': classifier = svm.SVC(kernel='rbf', C=15, gamma=5, class_weight='balanced', decision_function_shape='ovo') data_train_scaled, scaler = util.scale_features(data_train) elif ml_method == 'logistic_regression': classifier = linear_model.LogisticRegression(class_weight='balanced') data_train_scaled, scaler = util.scale_features(data_train) else: assert False, "No valid segmentation algorithm selected in argument ml_method" classifier.fit(data_train_scaled, labels_train) print(' Time elapsed:', timeit.default_timer() - start_time, 's') # create a result directory with timestamp t = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S') result_dir = os.path.join(result_dir, t) os.makedirs(result_dir, exist_ok=True) # print and plot feature importance for each structure if verbose: if ml_method == 'svm_linear': util.print_feature_importance(classifier.coef_) util.plot_feature_importance(classifier.coef_, result_dir) if ml_method == 'random_forest': util.print_feature_importance(classifier.feature_importances_) util.plot_feature_importance(classifier.feature_importances_, result_dir) print('-' * 5, 'Testing...') # initialize evaluator evaluator = putil.init_evaluator(result_dir) # crawl the training image directories crawler = load.FileSystemDataCrawler(data_test_dir, IMAGE_KEYS, futil.BrainImageFilePathGenerator(), futil.DataDirectoryFilter()) # load images for testing and pre-process pre_process_params['training'] = False images_test = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=True) images_prediction = [] images_probabilities = [] for img in images_test: print('-' * 10, 'Testing', img.id_) start_time = timeit.default_timer() if ml_method == 'random_forest': scaled_features = img.feature_matrix[0] else: scaled_features, s = util.scale_features(img.feature_matrix[0], scaler) predictions = classifier.predict(scaled_features) print(' Time elapsed:', timeit.default_timer() - start_time, 's') # convert prediction and probabilities back to SimpleITK images image_prediction = conversion.NumpySimpleITKImageBridge.convert(predictions.astype(np.uint8), img.image_properties) probabilities = classifier.predict_proba(scaled_features) image_probabilities = conversion.NumpySimpleITKImageBridge.convert(probabilities, img.image_properties) images_probabilities.append(image_probabilities) # evaluate segmentation without post-processing evaluator.evaluate(image_prediction, img.images[structure.BrainImageTypes.GroundTruth], img.id_) images_prediction.append(image_prediction) # post-process segmentation and evaluate with post-processing post_process_params = {'crf_post': False} images_post_processed = putil.post_process_batch(images_test, images_prediction, images_probabilities, post_process_params, multi_process=True) for i, img in enumerate(images_test): evaluator.evaluate(images_post_processed[i], img.images[structure.BrainImageTypes.GroundTruth], img.id_ + '-PP') # save results sitk.WriteImage(images_prediction[i], os.path.join(result_dir, images_test[i].id_ + '_SEG.mha'), True) sitk.WriteImage(images_post_processed[i], os.path.join(result_dir, images_test[i].id_ + '_SEG-PP.mha'), True)
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str, data_test_dir: str): """Brain tissue segmentation using decision forests. The main routine executes the medical image analysis pipeline: - Image loading - Registration - Pre-processing - Feature extraction - Decision forest classifier model building - Segmentation using the decision forest classifier model on unseen images - Post-processing of the segmentation - Evaluation of the segmentation """ # load atlas images putil.load_atlas_images(data_atlas_dir) print('-' * 5, 'Training...') # load feature matrix and label vector # precomputed by preprocessAndStore.py file_id = open('data_train.pckl', 'rb') data_train = pickle.load(file_id) file_id.close() file_id = open('labels_train.pckl', 'rb') labels_train = pickle.load(file_id) file_id.close() ########################################## # use if GridSearchCV is used # perform a grid search over the parameter grid and choose the optimal parameters #Cs = [10, 12, 15, 20]#a list best = 15 #gammas = [1 ,2 ,3, 5,10]#a list best = 10 #param_grid = {'C': Cs, 'gamma': gammas}#a dictionary #svm_rbf_classifier = model_selection.GridSearchCV(svm.SVC(kernel='rbf'), param_grid, verbose=1) data_train_scaled, scaler = util.scale_features(data_train) # printing out how much labels of each group were taken by the mask util.print_class_count(labels_train) # use if GridSearchCV is not used svm_rbf_classifier = svm.SVC(kernel='rbf', C=15, gamma=10, class_weight='balanced', decision_function_shape='ovo') start_time = timeit.default_timer() print("start training") # for position features only: svm_rbf_classifier.fit(data_train_scaled[:, 0:3], labels_train) svm_rbf_classifier.fit(data_train_scaled, labels_train) #####svm_rbf_classifier.coef_ can not be used with rbf kernel #util.print_feature_importance(svm_rbf_classifier.best_estimator_.coef_) #use if GridSearchCV is used #print("importance of features: ", svm_rbf_classifier.best_estimator_.coef_)#####svm_rbf_classifier.coef_ can not be used with rbf kernel #print("best estimator: ", svm_rbf_classifier.best_estimator_) #print("best parameter: ", svm_rbf_classifier.best_params_) #use if GridSearchCV is not used print("best estimator: ", svm_rbf_classifier) print("estimator dual_coef_: ", svm_rbf_classifier.dual_coef_) file_id = open('svm_rbf_fullset_C15_G5.pckl', 'wb') pickle.dump(svm_rbf_classifier, file_id) file_id.close() file_id = open('scaler_rbf_fullset_C15_G5.pckl', 'wb') pickle.dump(scaler, file_id) file_id.close() print(' Time elapsed:', timeit.default_timer() - start_time, 's')
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str, data_test_dir: str): """Brain tissue segmentation using decision forests. The main routine executes the medical image analysis pipeline: - Image loading - Registration - Pre-processing - Feature extraction - Decision forest classifier model building - Segmentation using the decision forest classifier model on unseen images - Post-processing of the segmentation - Evaluation of the segmentation """ # load atlas images putil.load_atlas_images(data_atlas_dir) print('-' * 5, 'Training...') # load feature matrix and label vector # precomputed by preprocessAndStore.py1057278 file_id = open('data_train_reduced2.pckl', 'rb') data_train = pickle.load(file_id) file_id.close() file_id = open('labels_train_reduced.pckl', 'rb') labels_train = pickle.load(file_id) file_id.close() ########################################## # perform a grid search over the parameter grid and choose the optimal parameters param_grid = {'C': [ 2, 3, 4, 5, 10, 20, 100]} # grid to search for best parameter C = 0.02 #svm_classifier = model_selection.GridSearchCV(svm.LinearSVC(C=1, class_weight='balanced', dual=False), param_grid, verbose=1) data_train_scaled, scaler = util.scale_features(data_train) util.print_class_count(labels_train) # use balanced class weights to include classes with small sample size # solve the primal problem since n_features < n_samples svm_classifier = svm.LinearSVC(C=1, class_weight='balanced', dual=False) # probability=False, kernel= 'rbf') #kernel='linear') start_time = timeit.default_timer() svm_classifier.fit(data_train_scaled, labels_train) #util.print_feature_importance(svm_classifier.coef_) util.plot_feature_importance(svm_classifier.coef_) #print(svm_classifier.best_params_) #print(svm_classifier.best_estimator_) # store trained SVM file_id = open('svm_linear.pckl', 'wb') pickle.dump(svm_classifier, file_id) file_id.close() file_id = open('scaler.pckl', 'wb') pickle.dump(scaler, file_id) file_id.close() print(' Time elapsed:', timeit.default_timer() - start_time, 's')