def predat(numOtr, cls): """ random subset the dataset, then convert to numpy format for further implementations ------------------------------------------- input: numOtr --total number of training samples cls --number of classes output: sam_tr, ind_tr --numpy format training samples sam_te, ind_te --numpy format testing samples cls -- number of classes """ subset.main('dataset/YaleB.scale', numOtr, 0, 'dataset/YaleB.scale.tr', 'dataset/YaleB.scale.te') lb_tr, ins_tr = svmutil.svm_read_problem('dataset/YaleB.scale.tr') lb_te, ins_te = svmutil.svm_read_problem('dataset/YaleB.scale.te') # change training data to numpy format df = pd.DataFrame(ins_tr).fillna(0) sam_tr = pd.DataFrame.as_matrix(df).T ind_tr = np.array(lb_tr) # change testing data to numpy format df = pd.DataFrame(ins_te).fillna(0) sam_te = pd.DataFrame.as_matrix(df).T ind_te = np.array(lb_te) return sam_tr, ind_tr, sam_te, ind_te, cls
def predictByFile(test_file_name, model): y, X = svmutil.svm_read_problem(test_file_name) p_label, p_acc, p_val = svmutil.svm_predict(y, X, model) print len(p_label)
def trainByFile(train_file_name): y, X = svmutil.svm_read_problem(train_file_name) model = train(y, X) return model
# -*- coding: utf-8 -*- """Excercise 6.2""" import numpy as np from libsvm.python import svm, svmutil import os dir = os.path.dirname(__file__) abspath = os.path.join(dir, 'data.txt') prob = svmutil.svm_read_problem(abspath) print('Linear\n') svmutil.svm_train(prob[0], prob[1], '-t 0') print('RBF\n') svmutil.svm_train(prob[0], prob[1], '-t 2')
print('converting to libsvm-format...') gen_libsvm_format_data('dataset/X_train_precomputed.csv', 'dataset/Y_train.csv', 'train_precomputed.csv', isKernel=True) gen_libsvm_format_data('dataset/X_test_precomputed.csv', 'dataset/Y_test.csv', 'test_precomputed.csv', isKernel=True) gen_libsvm_format_data('dataset/X_train.csv', 'dataset/Y_train.csv', 'train.csv') gen_libsvm_format_data('dataset/X_test.csv', 'dataset/Y_test.csv', 'test.csv') #open training data print('opening training data...') y_train, x_train = svmutil.svm_read_problem('train.csv') prob = svmutil.svm_problem(y_train, x_train) y1, x_train_precomputed = svmutil.svm_read_problem('train_precomputed.csv') prob_precomputed = svmutil.svm_problem(y1, x_train_precomputed, isKernel=True) # grid search for the best parameters print('grid searching...') grid_search = { 'linear': '-t 0 -log2c -5,15,2', 'polynomial': '-t 1 -log2c -5,15,2 -log2g -5,15,2 -log2r -3,5,2 -d 4', 'RBF': '-t 2 -log2c -5,15,2 -log2g -5,15,2', } grid_search_precomputed = {'linear+RBF': '-t 4 -log2c -5,15,2'} best_param = [] kernel_tab = []
def predictByFile( test_file_name, model ): y, X = svmutil.svm_read_problem( test_file_name ) p_label, p_acc, p_val = svmutil.svm_predict(y, X, model) print len(p_label)
def trainByFile( train_file_name ): y, X = svmutil.svm_read_problem(train_file_name) model = train(y, X) return model