def train_and_test_model_svm_sklearn(clf,datasets): testset = datasets['testset'] finaltrainset = datasets['finaltrainset'] X_test = np.array([x for x,y in testset]) Y_test = np.array([y for x,y in testset]) X_finaltrain = np.array([x for x,y in finaltrainset]) Y_finaltrain = np.array([y for x,y in finaltrainset]) st_time = time.time() clf.fit(X_finaltrain, Y_finaltrain) print 'Testing...' outputs = np.zeros(len(X_test)) probabilities = np.zeros((len(X_test),len(clf.classes_))) minibatch_size = int(len(X_test)/200000)+1; #minibatch_size = 5 chunked_testset = np.array_split( X_test,minibatch_size) outputs = np.array([]).reshape(1,-1) probabilities = np.array([]).reshape(-1,len(clf.classes_)) for i,test_batch in enumerate(chunked_testset): output_batch = clf.predict(test_batch) outputs = np.c_[outputs, output_batch.reshape(1,-1)] probabilities_batch = clf.predict_proba(test_batch) probabilities = np.r_[probabilities, probabilities_batch.reshape(-1, len(clf.classes_))] ed_time = time.time() print 'timertookd='+ str(ed_time - st_time) outputs = outputs[0] id_to_class = {} for label, id in testset.class_to_id.iteritems(): id_to_class[id] = label # Ground truth lbl = datasets['ground_truth'] auto_lbl = np.array([int(id_to_class[output]) for output in outputs]) # Predicted labels len_bg = testset.metadata['len_bg'] lbl = np.append(lbl, [0]*len_bg) auto_lbl = np.append(auto_lbl, [0]*len_bg) (dice, jaccard, precision, recall) = compute_statistics.compute_eval_multilabel_metrics(auto_lbl, lbl) dice = dice[~np.isnan(dice)] print dice.mean() return dice.mean()
def train_and_test(svm,datasets): testset = datasets['testset'] finaltrainset = datasets['finaltrainset'] svm.train(finaltrainset) outputs, costs = svm.test(testset) id_to_class = {} for label, id in testset.class_to_id.iteritems(): id_to_class[id] = label # Ground truth lbl = datasets['ground_truth'] auto_lbl = np.array([int(id_to_class[output[0]]) for output in outputs]) # Predicted labels len_bg = testset.metadata['len_bg'] lbl = np.append(lbl, [0]*len_bg) auto_lbl = np.append(auto_lbl, [0]*len_bg) (dice, jaccard, precision, recall) = compute_statistics.compute_eval_multilabel_metrics(auto_lbl, lbl) dice = dice[~np.isnan(dice)] return dice.mean()
def measure_to_string(measure): result = '' for value in measure: result += str(value)[:5].rjust(6) return result result = '' # Fill prediction / ground truth with zeros for all background points len_bg = testset.metadata['len_bg'] lbl = np.append(lbl, [0] * len_bg) auto_lbl = np.append(auto_lbl, [0] * len_bg) (dice, jaccard, precision, recall) = compute_statistics.compute_eval_multilabel_metrics(auto_lbl, lbl) lbl_result = string_debug(lbl) auto_lbl_result = string_debug(auto_lbl) results_file = 'libsvm_measures.txt' if not os.path.exists(results_path + results_file): result += 'Results = [Edema, non-enhanced tumor, enhanced tumor, complete (abnormality vs healthy)]\n' result += 'Dataset : ' + dataset_name + '\n' result += 'Model : SVM\n' result += 'Best hyperparameters : ' + hyper_to_string(best_hyperparams) + '\n' result += 'Ground truth : \n' + lbl_result result += 'Prediction : \n' + auto_lbl_result
def measure_to_string(measure): result = '' for value in measure: result += str(value)[:5].rjust(6) return result result = '' # Fill prediction / ground truth with zeros for all background points len_bg = testset.metadata['len_bg'] lbl = np.append(lbl, [0]*len_bg) auto_lbl = np.append(auto_lbl, [0]*len_bg) (dice, jaccard, precision, recall) = compute_statistics.compute_eval_multilabel_metrics(auto_lbl, lbl) lbl_result = string_debug(lbl) auto_lbl_result = string_debug(auto_lbl) results_file = 'libsvm_measures.txt' if not os.path.exists(results_path + results_file): result += 'Results = [Edema, non-enhanced tumor, enhanced tumor, complete (abnormality vs healthy)]\n' result += 'Dataset : ' + dataset_name + '\n' result += 'Model : SVM\n' result += 'Best hyperparameters : ' + hyper_to_string(best_hyperparams) + '\n' result += 'Ground truth : \n' + lbl_result result += 'Prediction : \n' + auto_lbl_result
result = '' for value in measure: result += str(value)[:5].rjust(6) return result result = '' # Fill prediction / ground truth with zeros for all background points """ len_bg = testset.metadata['len_bg'] lbl = np.append(lbl, [0]*len_bg) auto_lbl = np.append(auto_lbl, [0]*len_bg) """ (dice, jaccard, precision, recall) = compute_statistics.compute_eval_multilabel_metrics(auto_lbl, lbl) lbl_result = string_debug(lbl) auto_lbl_result = string_debug(auto_lbl) results_file = 'sklearn_knn_measures.txt' if not os.path.exists(results_path + results_file): result += 'Results = [Edema, non-enhanced tumor, enhanced tumor, complete (abnormality vs healthy)]\n' result += 'Dataset : ' + dataset_name + '\n' result += 'Model : KNN\n' result += 'Best hyperparameters : ' + hyper_to_string(K) + '\n' result += 'Ground truth : \n' + lbl_result result += 'Prediction : \n' + auto_lbl_result
def svm_model(datasets): print "Setting hyperparameters gridsearch..." best_hyperparams = None best_val_error = np.inf finaltrainset = datasets['finaltrainset'] trainset = datasets['trainset'] validset = datasets['validset'] testset = datasets['testset'] lbl = datasets['ground_truth'] output_probabilities = True # Or False! kernels = ['rbf','sigmoid'] #degrees = [1,2,3,4,5,7,10,15] gammas = [0.01,0.1,1,5,10,50,100,200,500,1000] #coef0s = [-10,-1,-0.1,-0.01,0,0.001,0.01,0.1,1,2,5,10,20] Cs = [1,5,10,25,50,75,100,200,500,1000,1500] hyperparams_grid = [] # Rbf kernel parameters start_time = time.clock() for gamma in gammas: for C in Cs: hyperparams_grid.append(['rbf', 3, gamma, 0, C]) use_weights = None if use_weights: label_weights = finaltrainset.metadata['label_weights'] else: label_weights = None output_probabilities = False # Or False! print "Pretraining..." for params in hyperparams_grid: try: # Create SVMClassifier with hyper-parameters svm = SVMClassifier(shrinking=True, kernel=params[0],degree=params[1],gamma=params[2],coef0=params[3],C=params[4],label_weights=label_weights, output_probabilities=output_probabilities) except Exception as inst: print "Error while instantiating SVMClassifier (required hyper-parameters are probably missing)" print inst sys.exit() svm.train(trainset) outputs, costs = svm.test(validset) errors = compute_error_mean_and_sterror(costs) error = errors[0] if error < best_val_error: best_val_error = error best_hyperparams = params print print 'Classification error on valid set : ' + str(best_val_error) print print "Training..." # Train SVM with best hyperparams on train + validset #gamma = 5.0 #C = 1 #best_hyperparams = ['rbf',3,gamma,0,C] best_svm = SVMClassifier(shrinking=True, kernel=best_hyperparams[0],degree=best_hyperparams[1],gamma=best_hyperparams[2],coef0=best_hyperparams[3],C=best_hyperparams[4],label_weights=label_weights, output_probabilities=output_probabilities) best_svm.train(finaltrainset) print 'Testing...' outputs, costs = best_svm.test(testset) end_time = time.clock() processing_time = end_time - start_time errors = compute_error_mean_and_sterror(costs) error = errors[0] print print 'Classification error on test set : ' + str(error) print "****************************************" # Evaluation (compute_statistics.py) id_to_class = {} for label, id in testset.class_to_id.iteritems(): id_to_class[id] = label auto_lbl = np.array([int(id_to_class[output[0]]) for output in outputs]) # Predicted labels len_bg = testset.metadata['len_bg'] lbl = np.append(lbl, [0]*len_bg) auto_lbl = np.append(auto_lbl, [0]*len_bg) (dice, jaccard, precision, recall) = compute_statistics.compute_eval_multilabel_metrics(auto_lbl, lbl) dice = dice[~np.isnan(dice)] return [dice.mean(), processing_time]