def test_svm(): traindataloc, testdataloc = "../data/train.txt", "../data/test.txt" train_data, train_label = load_features(traindataloc) test_data, test_label = load_features(testdataloc) svm_with_diff_c(train_label.tolist(), train_data.tolist(), test_label.tolist(), test_data.tolist()) '''svm_with_diff_kernel(train_label.tolist(), train_data.tolist(),
def train(self, X, Y): # Use ten 20 feature classifiers F, D, n, i = [ 1, ], [ 1, ], [ 5, ], 0 features = load_features(self.feature_type) X_train, Y_train, X_dev, Y_dev = self.split(X, Y) PX, PY, NX, NY = self.get_positive_negative_set(X_train, Y_train) # Integrate PX and NX PX, NX = IntegralImage(PX), IntegralImage(NX) X_dev = IntegralImage(X_dev) c_list = [] y_pred = [ 1, ] while F[i] > self.F_thresh and sum(y_pred) != 0: i += 1 n.append(n[i - 1]) D.append(0) F.append(F[i - 1]) inc = 10 while F[i] > self.f * F[i - 1] and F[i] > self.F_thresh: n[i] += inc # Use P and N to train classifier print("Training on", PX.ii.shape[0], "positive examples and", NX.ii.shape[0], "negative examples.") X_train = IntegralImage.from_array( np.concatenate((PX.ii, NX.ii))) Y_train = np.concatenate((PY, NY)) weak_classifiers, alphas = self.boosting_algorithm.boost( X_train, Y_train, self.select_features(features, stage=i, num=n[i]), iter=self.T) threshold = sum(alphas) / 2.0 step = threshold / 100.0 D[i] = 0 # Evaluate current cascaded classifier on validation set to determine Fi and Di. # Loop until Di < d * D[i-1]. Decrease threshold by 1 everytime until detection rate is greater while D[i] < self.d * D[i - 1]: c = StrongClassifier(weak_classifiers, alphas, threshold) TP, FP, TN, FN = self.evaluate(c_list + [ c, ], X_dev, Y_dev) F[i] = FP / float(FP + TN) D[i] = TP / float(FN + TP) if self.verbose: print("F", F[i], "D", D[i]) if D[i] < self.d * D[i - 1]: # Decrement threshold by one step threshold -= step inc += 2 y_pred = c.predict(NX) NX.ii = NX.ii[y_pred == 1] NY = NY[y_pred == 1] c_list.append(c) # Print results if self.verbose: print("Detection Rates:", D) print("False Positive Rates:", F) print("Number of Features per Cascade", n[1:5]) self.classifiers = c_list