示例#1
0
def train_and_test_model_svm_sklearn(clf,datasets):

    testset = datasets['testset']
    finaltrainset = datasets['finaltrainset']
    X_test = np.array([x for x,y in testset])
    Y_test = np.array([y for x,y in testset])
    X_finaltrain = np.array([x for x,y in finaltrainset])
    Y_finaltrain = np.array([y for x,y in finaltrainset])
    st_time = time.time()
    clf.fit(X_finaltrain, Y_finaltrain)

    print 'Testing...'

    outputs = np.zeros(len(X_test))
    probabilities = np.zeros((len(X_test),len(clf.classes_)))

    minibatch_size = int(len(X_test)/200000)+1;
    #minibatch_size = 5
    chunked_testset =  np.array_split( X_test,minibatch_size)
  
    outputs = np.array([]).reshape(1,-1)
    probabilities = np.array([]).reshape(-1,len(clf.classes_))
     
    for i,test_batch in enumerate(chunked_testset):

        output_batch = clf.predict(test_batch)
        outputs = np.c_[outputs, output_batch.reshape(1,-1)]

        probabilities_batch = clf.predict_proba(test_batch)
        probabilities = np.r_[probabilities, probabilities_batch.reshape(-1, len(clf.classes_))]

    ed_time = time.time()
    print 'timertookd='+ str(ed_time - st_time)
    outputs = outputs[0]
    
    id_to_class = {}
    for label, id in testset.class_to_id.iteritems():
        id_to_class[id] = label

     # Ground truth
    lbl = datasets['ground_truth']
    auto_lbl = np.array([int(id_to_class[output]) for output in outputs]) # Predicted labels

    len_bg = testset.metadata['len_bg']
    lbl = np.append(lbl, [0]*len_bg)
    auto_lbl = np.append(auto_lbl, [0]*len_bg)

    (dice, jaccard, precision, recall) = compute_statistics.compute_eval_multilabel_metrics(auto_lbl, lbl)
    dice = dice[~np.isnan(dice)]
    print dice.mean()
    return dice.mean()
示例#2
0
def train_and_test(svm,datasets):

    testset = datasets['testset']
    finaltrainset = datasets['finaltrainset']
    svm.train(finaltrainset)
    outputs, costs = svm.test(testset)
    

    id_to_class = {}
    for label, id in testset.class_to_id.iteritems():
        id_to_class[id] = label
        
     # Ground truth
    lbl = datasets['ground_truth'] 
    auto_lbl = np.array([int(id_to_class[output[0]]) for output in outputs]) # Predicted labels

    len_bg = testset.metadata['len_bg']
    lbl = np.append(lbl, [0]*len_bg)
    auto_lbl = np.append(auto_lbl, [0]*len_bg)
    
    (dice, jaccard, precision, recall) = compute_statistics.compute_eval_multilabel_metrics(auto_lbl, lbl)
    dice = dice[~np.isnan(dice)]
    return dice.mean()
示例#3
0
def measure_to_string(measure):
    result = ''
    for value in measure:
        result += str(value)[:5].rjust(6)
    return result


result = ''

# Fill prediction / ground truth with zeros for all background points
len_bg = testset.metadata['len_bg']
lbl = np.append(lbl, [0] * len_bg)
auto_lbl = np.append(auto_lbl, [0] * len_bg)

(dice, jaccard, precision,
 recall) = compute_statistics.compute_eval_multilabel_metrics(auto_lbl, lbl)

lbl_result = string_debug(lbl)
auto_lbl_result = string_debug(auto_lbl)

results_file = 'libsvm_measures.txt'
if not os.path.exists(results_path + results_file):
    result += 'Results = [Edema, non-enhanced tumor, enhanced tumor, complete (abnormality vs healthy)]\n'

result += 'Dataset : ' + dataset_name + '\n'
result += 'Model : SVM\n'

result += 'Best hyperparameters : ' + hyper_to_string(best_hyperparams) + '\n'

result += 'Ground truth : \n' + lbl_result
result += 'Prediction : \n' + auto_lbl_result
示例#4
0
        
def measure_to_string(measure):
    result = ''
    for value in measure:
        result += str(value)[:5].rjust(6)
    return result


result = ''

# Fill prediction / ground truth with zeros for all background points
len_bg = testset.metadata['len_bg']
lbl = np.append(lbl, [0]*len_bg)
auto_lbl = np.append(auto_lbl, [0]*len_bg)

(dice, jaccard, precision, recall) = compute_statistics.compute_eval_multilabel_metrics(auto_lbl, lbl)

lbl_result = string_debug(lbl)
auto_lbl_result = string_debug(auto_lbl)

results_file = 'libsvm_measures.txt'
if not os.path.exists(results_path + results_file):
    result += 'Results = [Edema, non-enhanced tumor, enhanced tumor, complete (abnormality vs healthy)]\n'
    
result += 'Dataset : ' + dataset_name + '\n'
result += 'Model : SVM\n'

result += 'Best hyperparameters : ' + hyper_to_string(best_hyperparams) + '\n'

result += 'Ground truth : \n' + lbl_result
result += 'Prediction : \n' + auto_lbl_result
示例#5
0
    result = ''
    for value in measure:
        result += str(value)[:5].rjust(6)
    return result


result = ''

# Fill prediction / ground truth with zeros for all background points
"""
len_bg = testset.metadata['len_bg']
lbl = np.append(lbl, [0]*len_bg)
auto_lbl = np.append(auto_lbl, [0]*len_bg)
"""

(dice, jaccard, precision, recall) = compute_statistics.compute_eval_multilabel_metrics(auto_lbl, lbl)

lbl_result = string_debug(lbl)
auto_lbl_result = string_debug(auto_lbl)

results_file = 'sklearn_knn_measures.txt'
if not os.path.exists(results_path + results_file):
    result += 'Results = [Edema, non-enhanced tumor, enhanced tumor, complete (abnormality vs healthy)]\n'
    
result += 'Dataset : ' + dataset_name + '\n'
result += 'Model : KNN\n'

result += 'Best hyperparameters : ' + hyper_to_string(K) + '\n'

result += 'Ground truth : \n' + lbl_result
result += 'Prediction : \n' + auto_lbl_result
示例#6
0
def svm_model(datasets):
    print "Setting hyperparameters gridsearch..."
    best_hyperparams = None
    best_val_error = np.inf

    finaltrainset = datasets['finaltrainset']
    trainset = datasets['trainset']
    validset = datasets['validset']
    testset = datasets['testset']
    lbl = datasets['ground_truth']

    output_probabilities = True # Or False!
    kernels = ['rbf','sigmoid']
    #degrees = [1,2,3,4,5,7,10,15]
    gammas = [0.01,0.1,1,5,10,50,100,200,500,1000]
    #coef0s = [-10,-1,-0.1,-0.01,0,0.001,0.01,0.1,1,2,5,10,20]
    Cs = [1,5,10,25,50,75,100,200,500,1000,1500]

    hyperparams_grid = []
    # Rbf kernel parameters
    start_time = time.clock()
    for gamma in gammas:
        for C in Cs:
            hyperparams_grid.append(['rbf', 3, gamma, 0, C])
    
    use_weights = None               
    if use_weights:
        label_weights = finaltrainset.metadata['label_weights']
    else:
        label_weights = None
        
    output_probabilities = False # Or False!

    print "Pretraining..."
    for params in hyperparams_grid:
        try:
            # Create SVMClassifier with hyper-parameters
            svm = SVMClassifier(shrinking=True, kernel=params[0],degree=params[1],gamma=params[2],coef0=params[3],C=params[4],label_weights=label_weights, output_probabilities=output_probabilities)
        except Exception as inst:
            print "Error while instantiating SVMClassifier (required hyper-parameters are probably missing)"
            print inst
            sys.exit()
        svm.train(trainset)
        outputs, costs = svm.test(validset)
        
        errors = compute_error_mean_and_sterror(costs)
        error = errors[0]
        
        if error < best_val_error:
            best_val_error = error
            best_hyperparams = params

    print
    print 'Classification error on valid set : ' + str(best_val_error)
    print

    print "Training..."
    # Train SVM with best hyperparams on train + validset
    #gamma = 5.0
    #C = 1
    #best_hyperparams = ['rbf',3,gamma,0,C]
    best_svm = SVMClassifier(shrinking=True, kernel=best_hyperparams[0],degree=best_hyperparams[1],gamma=best_hyperparams[2],coef0=best_hyperparams[3],C=best_hyperparams[4],label_weights=label_weights, output_probabilities=output_probabilities)
    best_svm.train(finaltrainset)

    print 'Testing...'
    outputs, costs = best_svm.test(testset)
    end_time = time.clock()
    processing_time = end_time - start_time
    
    errors = compute_error_mean_and_sterror(costs)
    error = errors[0]

    print
    print 'Classification error on test set : ' + str(error)
    print "****************************************"


    # Evaluation (compute_statistics.py)
    id_to_class = {}
    for label, id in testset.class_to_id.iteritems():
        id_to_class[id] = label
        

    auto_lbl = np.array([int(id_to_class[output[0]]) for output in outputs]) # Predicted labels

    len_bg = testset.metadata['len_bg']
    lbl = np.append(lbl, [0]*len_bg)
    auto_lbl = np.append(auto_lbl, [0]*len_bg)
    (dice, jaccard, precision, recall) = compute_statistics.compute_eval_multilabel_metrics(auto_lbl, lbl)
    dice = dice[~np.isnan(dice)]
    return [dice.mean(), processing_time]