def load_data(self): training_filepath = self.training_file testing_filepath = self.testing_file if not self.split and not self.bulk: training_loader = LoadLibsvm.LoadLibSVM(filename=training_filepath, n_features=self.n_features) testing_loader = LoadLibsvm.LoadLibSVM(filename=testing_filepath, n_features=self.n_features) self.x_training, self.y_training = training_loader.load_all_data() self.x_testing, self.y_testing = testing_loader.load_all_data() if self.split and not self.bulk: Print.Print.result2("Data Splitting ...") training_loader = LoadLibsvm.LoadLibSVM(filename=training_filepath, n_features=self.n_features) x_all, y_all = training_loader.load_all_data() ratio = 0.8 size = len(x_all) split_index = int(size * ratio) self.x_training = x_all[:split_index] self.x_testing = x_all[split_index:] self.y_training = y_all[:split_index] self.y_testing = y_all[split_index:] if (self.labelfix): self.y_training = self.y_training[self.y_training == 2] = -1 self.y_testing = self.y_testing[self.y_testing == 2] = -1 if self.bulk and not self.split: # here we consider a larger testing data set. print("Loading Bulk Training File") training_loader = LoadLibsvm.LoadLibSVM(filename=training_filepath, n_features=self.n_features) self.x_training, self.y_training = training_loader.load_all_data() return self.x_training, self.y_training else: return self.x_training, self.y_training, self.x_testing, self.y_testing print("Training and Testing data loaded ...")
def load_light_data(training_file, testing_file, split=False): x_training = [] x_testing = [] y_training = [] y_testing = [] training_filepath = training_file testing_filepath = testing_file if not split: training_loader = LoadLibsvm.LoadLibSVM(filename=training_filepath, n_features=features) testing_loader = LoadLibsvm.LoadLibSVM(filename=testing_filepath, n_features=features) x_training, y_training = training_loader.load_all_data() x_testing, y_testing = testing_loader.load_all_data() if split: training_loader = LoadLibsvm.LoadLibSVM(filename=training_filepath, n_features=features) x_all, y_all = training_loader.load_all_data() ratio = 0.8 size = len(x_all) split_index = int(size * ratio) x_training = x_all[:split_index] x_testing = x_all[split_index:] y_training = y_all[:split_index] y_testing = y_all[split_index:] print("Training and Testing data loaded ...") return x_training, y_training, x_testing, y_testing
def load_data(self): training_filepath = self.training_file testing_filepath = self.testing_file if not self.split: training_loader = LoadLibsvm.LoadLibSVM(filename=training_filepath, n_features=self.n_features) testing_loader = LoadLibsvm.LoadLibSVM(filename=testing_filepath, n_features=self.n_features) self.x_training, self.y_training = training_loader.load_all_data() self.x_testing, self.y_testing = testing_loader.load_all_data() if self.split: Print.Print.result2("Data Splitting ...") training_loader = LoadLibsvm.LoadLibSVM(filename=training_filepath, n_features=self.n_features) x_all, y_all = training_loader.load_all_data() ratio = 0.8 size = len(x_all) split_index = int(size * ratio) self.x_training = x_all[:split_index] self.x_testing = x_all[split_index:] self.y_training = y_all[:split_index] self.y_testing = y_all[split_index:] if (self.labelfix): self.y_training = self.y_training[self.y_training == 2] = -1 self.y_testing = self.y_testing[self.y_testing == 2] = -1 return self.x_training, self.y_training, self.x_testing, self.y_testing
def load_data(training_file, testing_file, split=False, n_features=10): training_filepath = training_file testing_filepath = testing_file if not split: training_loader = LoadLibsvm.LoadLibSVM(filename=training_filepath, n_features=n_features) testing_loader = LoadLibsvm.LoadLibSVM(filename=testing_filepath, n_features=n_features) x_training, y_training = training_loader.load_all_data() x_testing, y_testing = testing_loader.load_all_data() if split: print("Data Splitting ...") training_loader = LoadLibsvm.LoadLibSVM(filename=training_filepath, n_features=n_features) x_all, y_all = training_loader.load_all_data() ratio = 0.6 size = len(x_all) split_index = int(size * ratio) x_training = x_all[:split_index] x_testing = x_all[split_index:] y_training = y_all[:split_index] y_testing = y_all[split_index:] return x_training, y_training, x_testing, y_testing
def load_svm_data(self): training_filepath = self.training_file testing_filepath = self.testing_file training_loader = LoadLibsvm.LoadLibSVM(filename=training_filepath, n_features=22) testing_loader = LoadLibsvm.LoadLibSVM(filename=testing_filepath, n_features=22) x_training, y_training = training_loader.load_all_data() x_testing, y_testing = testing_loader.load_all_data() return x_training, y_training, x_testing, y_testing
def advanced_test(self, x_testing, y_testing, w): #y_pred = self.svm.predict(X=self.x_testing) #self.acc = self.svm.get_accuracy(y_test=self.y_testing, y_pred=y_pred) self.acc = 0 if self.bulk: testing_filepath = self.testing_file print("Loading Bulk Testing Files") files = os.listdir(testing_filepath) print("File Path : " + testing_filepath) print(files) self.bulk_testing_x = [] self.bulk_testing_y = [] self.acc = 0 for file in files: print("Loading Testing Bulk File : " + file) testing_loader = LoadLibsvm.LoadLibSVM( filename=testing_filepath + "/" + file, n_features=self.n_features) x_testing, y_testing = testing_loader.load_all_data() y_pred = self.svm.custom_predict(x_testing, w=w) self.acc += self.svm.get_accuracy(y_test=y_testing, y_pred=y_pred) self.acc = self.acc / len(files) else: y_pred = self.svm.custom_predict(X=x_testing, w=w) self.acc = self.svm.get_accuracy(y_test=y_testing, y_pred=y_pred) return self.acc
def load_data(self): Print.Print.info1("Loading Data") training_filepath = self.training_file testing_filepath = self.testing_file if not self.split and not self.bulk: training_loader = LoadLibsvm.LoadLibSVM(filename=training_filepath, n_features=self.n_features) testing_loader = LoadLibsvm.LoadLibSVM(filename=testing_filepath, n_features=self.n_features) self.x_training, self.y_training = training_loader.load_all_data() self.x_testing, self.y_testing = testing_loader.load_all_data() if self.split and not self.bulk: Print.Print.info1("Splitting data ...") training_loader = LoadLibsvm.LoadLibSVM(filename=training_filepath, n_features=self.n_features) x_all, y_all = training_loader.load_all_data() ratio = 0.8 size = len(x_all) split_index = int(size * ratio) self.x_training = x_all[:split_index] self.x_testing = x_all[split_index:] self.y_training = y_all[:split_index] self.y_testing = y_all[split_index:] if (self.labelfix): # this logic can varied depending on the target labels # TODO : Change the logic self.y_training = self.y_training[self.y_training == 2] = -1 self.y_testing = self.y_testing[self.y_testing == 2] = -1 if self.bulk and not self.split: # here we consider a larger testing data set. # here we consider a larger testing data set. print("Loading Bulk Training File") training_loader = LoadLibsvm.LoadLibSVM(filename=training_filepath, n_features=self.n_features) self.x_training, self.y_training = training_loader.load_all_data() return self.x_training, self.y_training else: return self.x_training, self.y_training, self.x_testing, self.y_testing print("Training and Testing data loaded ...")
def load_training_data(training_file, split=False): x_training = [] y_training = [] training_filepath = training_file if not split: training_loader = LoadLibsvm.LoadLibSVM(filename=training_filepath, n_features=features) x_training, y_training = training_loader.load_all_data() print("Training data loaded ...") return x_training, y_training
def load_random_test_data(testing_filepath): print("Loading Bulk Testing Files") files = os.listdir(testing_filepath) print("File Path : " + testing_filepath) print(files) bulk_testing_x = [] bulk_testing_y = [] file_size = len(files) random_id = np.random.randint(0,file_size) print("Loading Testing Bulk File : " + files[random_id]) testing_loader = LoadLibsvm.LoadLibSVM(filename=testing_filepath + "/" + files[random_id], n_features=1) x_testing, y_testing = testing_loader.load_all_data() return x_testing, y_testing
def case3(): training_filepath = "/home/vibhatha/data/svm/ijcnn1/ijcnn1_training" testing_filepath = "/home/vibhatha/data/svm/ijcnn1/ijcnn1_testing" loader = LoadLibsvm.LoadLibSVM(filename=training_filepath, n_features=22) x_training, y_training, x_testing, y_testing = loader.load_data() svm = SVM.SVM(X=x_training, y=y_training, alpha=1, n_features=22) # alpha=0.0003, epochs=1000 #good alpha = 0.1 epochs = 500 svm.train(X=x_training, y=y_training, alpha=alpha, epochs=epochs) y_pred = svm.predict(X=x_testing) acc = svm.get_accuracy(y_test=y_testing, y_pred=y_pred) print("Accuracy : " + str(acc) + "%") fp = open("logs/results.txt", "a") fp.write("alpha : " + str(alpha) + ", epochs : " + str(epochs) + ", accuracy : " + str(acc) + "%\n") fp.close()
def test(self): self.acc_m = 0 self.acc_sgd = 0 if self.bulk: testing_filepath = self.testing_file print("Loading Bulk Testing Files") files = os.listdir(testing_filepath) print("File Path : " + testing_filepath) print(files) self.bulk_testing_x = [] self.bulk_testing_y = [] for file in files: print("Loading Testing Bulk File : " + file) testing_loader = LoadLibsvm.LoadLibSVM( filename=testing_filepath + "/" + file, n_features=self.n_features) x_testing, y_testing = testing_loader.load_all_data() y_pred_m = self.svm.custom_predict(x_testing, w=self.w_m) self.acc_m += self.svm.get_accuracy(y_test=y_testing, y_pred=y_pred_m) y_pred_sgd = self.svm.custom_predict(x_testing, w=self.w_sgd) self.acc_sgd += self.svm.get_accuracy(y_test=y_testing, y_pred=y_pred_sgd) self.acc_m = self.acc_m / len(files) self.acc_sgd = self.acc_sgd / len(files) else: y_pred_m = self.svm.custom_predict(X=self.x_testing, w=self.w_m) self.acc_m = self.svm.get_accuracy(y_test=self.y_testing, y_pred=y_pred_m) y_pred_sgd = self.svm.custom_predict(X=self.x_testing, w=self.w_sgd) self.acc_sgd = self.svm.get_accuracy(y_test=self.y_testing, y_pred=y_pred_sgd) return self.acc_m, self.acc_sgd
from operations import LoadLibsvm import numpy as np from matplotlib import pyplot as plt X = np.array([[1, 1], [2, 1], [3, 1], [4, 1], [1, 5], [2, 6], [3, 7], [4, 5]]) y = np.array([1, 1, 1, 1, -1, -1, -1, -1]) X_test = np.array([[1, 1.25], [2.1, 1.15], [3.1, 1.45], [4.23, 1.21], [1.3, 5.25], [2.11, 6.24], [3.3, 7.24], [4.212, 5.78]]) #plt.scatter(X[:,0],X[:,1]) #plt.show() dataset = 'ijcnn1' training_filepath = '/home/vibhatha/data/svm/' + dataset + '/training.csv' testing_filepath = '/home/vibhatha/data/svm/' + dataset + '/testing.csv' n_features = 300 split = False training_loader = LoadLibsvm.LoadLibSVM(filename=training_filepath, n_features=n_features) x_training = [] y_training = [] x_testing = [] y_testing = [] if split == True: x_all, y_all = training_loader.load_all_data() ratio = 0.8 size = len(x_all) split_index = int(size * ratio) x_training = x_all[:split_index] x_testing = x_all[split_index:] y_training = y_all[:split_index] y_testing = y_all[split_index:]
## iterate through all the test samples. overall_accuracy_sgd = 0 overall_accuracy_mom = 0 overall_accuracy_ada = 0 overall_accuracy_rmsprop = 0 overall_accuracy_adam = 0 files = os.listdir(testing_file) print("File Path : " + testing_file) print(files) bulk_testing_x = [] bulk_testing_y = [] file_size = len(files) random_id = np.random.randint(0, file_size) print("Loading Testing Bulk File : " + files[random_id]) for file in files: testing_loader = LoadLibsvm.LoadLibSVM(filename=testing_file + "/" + file, n_features=1) x_testing, y_testing = testing_loader.load_all_data() overall_accuracy_sgd += bmsvmsgd.advanced_test(x_testing=x_testing, y_testing=y_testing, w=w_final_sgd) overall_accuracy_mom += bmsvmsgdmomentum.advanced_test(x_testing=x_testing, y_testing=y_testing, w=w_final_mom) overall_accuracy_ada += bmsvmsgdada.advanced_test(x_testing=x_testing, y_testing=y_testing, w=w_final_ada) overall_accuracy_rmsprop += bmsvmsgdrmsprop.advanced_test(x_testing=x_testing, y_testing=y_testing, w=w_final_rmsprop) overall_accuracy_adam += bmsvmsgdadam.advanced_test(x_testing=x_testing, y_testing=y_testing, w=w_final_adam) overall_accuracy_sgd = overall_accuracy_sgd/ file_size overall_accuracy_mom = overall_accuracy_mom / file_size overall_accuracy_ada = overall_accuracy_ada / file_size overall_accuracy_rmsprop = overall_accuracy_rmsprop / file_size overall_accuracy_adam = overall_accuracy_adam / file_size ## stats