def tune_svm_using_10_fold(): dh = DataHandler('data/train-set-feature-engineered.csv', 'prediction_label') headers, train_features, train_prediction_labels = dh.get_numeric_data_set() #train_features = dh.get_k_best_features(500, train_features, train_prediction_labels) data_sets = dh.get_cross_validation_data_sets(10, train_features, train_prediction_labels) accuracy = [] for data_set_number in data_sets: data_set = data_sets.get(data_set_number) training_set = data_set[0] tuning_set = data_set[1] train_features = training_set["data_points"] train_prediction_labels = training_set["labels"] # Feature selection train_features, selected_features = dh.get_k_best_features(len(train_features[0]), train_features, train_prediction_labels) test_features = tuning_set["data_points"] test_prediction_labels = tuning_set["labels"] # Feature selection test_features = dh.get_new_feature_vec(test_features, selected_features) svm = Svm(train_features, train_prediction_labels, 200, 1, 2) svm.train() eval_metrics = EvaluationMetrics(svm, test_features, test_prediction_labels) eval = eval_metrics.evaluate() accuracy.append(eval['accuracy']) average_accuracy = sum(accuracy) / len(accuracy) print average_accuracy
def __init__(self, features, false_positive_loss=1, false_negative_loss=1): super().__init__() self.svm = Svm() self._features = features self.svm.false_negative_loss = false_negative_loss self.svm.false_positive_loss = false_positive_loss self._row_len = len(features) + 1 self.examples = np.empty((0, self._row_len))
def evaluate_svm(): dh = DataHandler('data/train-set-feature-engineered.csv', 'prediction_label') headers, train_features, train_prediction_labels = dh.get_numeric_data_set() # Feature selection train_features, selected_features = dh.get_k_best_features(len(train_features[0]), train_features, train_prediction_labels) svm = Svm(train_features, train_prediction_labels, 20, 0) svm.train() dh_test = DataHandler('data/test-set-feature-engineered.csv', 'prediction_label') headers, test_features, test_prediction_labels = dh_test.get_numeric_data_set() # Feature selection test_features = dh_test.get_new_feature_vec(test_features, selected_features) eval_metrics = EvaluationMetrics(svm, test_features, test_prediction_labels) eval = eval_metrics.evaluate() eval_metrics.compute_and_plot_auc(eval['predicted'], test_prediction_labels) eval_metrics.compute_au_roc(eval['predicted'], test_prediction_labels)
class _SubClassifier(AbstractClassfier): def from_list(self, mat): self.svm.from_list(mat) def classify_vec(self, rect, axis=-1): return self.svm.classify_vec(self.calc_features(rect), axis) def to_list(self): return self.svm.to_list() def __init__(self, features, false_positive_loss=1, false_negative_loss=1): super().__init__() self.svm = Svm() self._features = features self.svm.false_negative_loss = false_negative_loss self.svm.false_positive_loss = false_positive_loss self._row_len = len(features) + 1 self.examples = np.empty((0, self._row_len)) def add_current_image(self, y): row = np.empty(self._row_len) row[-1] = y row[:-1] = self.calc_features() self.examples = np.vstack((self.examples, row)) def calc_features(self, rect=None): start = (0, 0) if rect is None else (rect[0][0], rect[1][0]) return np.array([f.calc(start) for f in self._features]) def learn(self): self.svm.learn(np.copy(self.examples), c_arr=2**np.arange(-5, 15, 2.0), epoch=15, cross_validation_times=5, learning_part=0.7) def classify(self, rect): return self.svm.classify(self.calc_features(rect)) def valuefy(self, rect): return self.svm.valuefy(self.calc_features(rect))
print('\nPerformance is being computed:') with open(filein) as f: for line in f: items = line.split() print(items) fold = items[0] data_pkl = items[1] data_id = items[2] data_dat = items[3] pred_file = items[4] ### Build the dataset from dataset.pkl ### model = Svm() \ .load(dataset=data_pkl, id_file=data_id, encode=True, pkl=True)\ .decoding(prediction_file=pred_file) dictionary = model.fetch_dictionary() ############################################## ## Performance Steps: ## ## 1. Confusion Matrix and related scores ## ## 2. Segment OVerlapping score ## ############################################## prediction = [] expectation = [] for key in dictionary: expectation.append(dictionary[key]['dssp'])
filein = sys.argv[1] setype = sys.argv[2] except: print( 'Program Usage: python3 svm_encode.py <file.txt> <set type (trainingset, testset)>' ) raise SystemExit else: with open(filein) as f: for line in f: items = line.split() print(items) path = items[0] data_pkl = items[1] data_id = items[2] data_dat = items[3] # Build the dataset from scratch prof = Pssm(data_id, setype=setype, raw_file=False).parse(normalize=False) dict_prof = prof.fetch_dict() dssp = Dssp(data_id, setype=setype, raw_file=False).parse() dict_dssp = dssp.fetch_dict() dataset = Dataset(data_id, setype=setype).build( profile=dict_prof, dssp=dict_dssp).fetch_dict() model = Svm(id_file=data_id, setype=setype)\ .load(dataset=dataset, id_file=data_id, encode=True)\ .save(path=data_dat, format='dat') \ .save(path=data_pkl, format='pkl')
def main(argv): train_x = read_from_file(sys.argv[1]) train_x = one_hot_encode(train_x).astype(float) train_y = read_from_file(sys.argv[2]) train_y = train_y.astype(float).astype(int) num_of_labels = len(Counter(train_y).keys()) # np.random.seed(5) # mapIndexPosition = list(zip(train_x, train_y)) # np.random.shuffle(mapIndexPosition) # train_x, train_y = zip(*mapIndexPosition) # train_y = np.asarray(train_y) # train_x = np.asarray(train_x) ############## prediction:################ test_x = read_from_file(sys.argv[3]) test_x = one_hot_encode(test_x).astype(float) # # test_y = read_from_file("test_y.txt").astype(float).astype(int).tolist() # ###### cross validation ####### # trains_x = [all_train_x[:657],all_train_x[657:1314],all_train_x[1314:1971],all_train_x[1971:2628],all_train_x[2628:]] # trains_y = [all_train_y[:657], all_train_y[657:1314], all_train_y[1314:1971], all_train_y[1971:2628],all_train_y[2628:]] # for K in range(5): # test_x = trains_x[K] # test_y = trains_y[K] # train_x = [] # train_y = [] # for i in range(5): # if i is not K: # for example, lable in zip(trains_x[i],trains_y[i]): # train_x.append(example) # train_y.append(lable) # # train_x = np.asarray(train_x) # train_y = np.asarray(train_y) # d = {"I": 0, "M": 1, "F": 2} # temp = train_x # temp = scipy.stats.zscore(temp) train_x_Z_score, mean, std_dev = z_score_norm(train_x) train_x_min_max, min_train, max_train = min_max_norm(train_x) test_x_z_score = z_score_norm_by_mean_std(test_x, mean, std_dev) test_x_min_max = min_max_norm_by_min_max(test_x, min_train, max_train) # perceptron_z_score = Perceptron(train_x_Z_score, train_y, num_of_feature, num_of_labels) # svm_z_score = Svm(train_x_Z_score, train_y, num_of_feature, num_of_labels) # pa_z_score = Pa(train_x_Z_score, train_y, num_of_feature, num_of_labels) # # perceptron_min_max = Perceptron(train_x_min_max, train_y, num_of_feature, num_of_labels) # svm_min_max = Svm(train_x_min_max, train_y, num_of_feature, num_of_labels) # pa_min_max = Pa(train_x_min_max, train_y, num_of_feature, num_of_labels) ############# training:################# perceptron = Perceptron(train_x_min_max, train_y, num_of_labels) svm = Svm(train_x_min_max, train_y, num_of_labels) pa = Pa(train_x_Z_score, train_y, num_of_labels) perceptron.train() svm.train() pa.train() predict_pereceptron = [] predict_svm = [] predict_pa = [] for test_min_max, test_z_score in zip(test_x_min_max, test_x_z_score): predict_pereceptron.append(perceptron.predict(test_min_max)) predict_svm.append(svm.predict(test_min_max)) predict_pa.append(pa.predict(test_z_score)) # for test in test_x_z_score: # predict_pa.append(pa.predict(test)) print_predict(predict_pereceptron, predict_svm, predict_pa)