def _create_clf(dnn): """Initialize the DNR classifier passing a single `layer_clf`""" layers = ['conv2', 'relu'] combiner = CClassifierSVM(kernel=CKernelRBF(gamma=1), C=1) layer_clf = CClassifierSVM(kernel=CKernelRBF(gamma=1), C=1) return CClassifierDNR(combiner, layer_clf, dnn, layers, -inf)
def test_grad_tr_params_linear(self): """Test `grad_tr_params` on a linear classifier.""" for n in (None, CNormalizerMinMax((-10, 10))): clf = CClassifierSVM(store_dual_vars=True, preprocess=n) clf.fit(self.ds.X, self.ds.Y) self._test_grad_tr_params(clf)
def test_alignment(self): ds = CDLRandom(n_samples=100, n_features=500, n_redundant=0, n_informative=10, n_clusters_per_class=1, random_state=0).load() self.logger.info("Train Sec SVM") sec_svm = CClassifierSecSVM(C=1, eta=0.1, eps=1e-2, lb=-0.1, ub=0.5) sec_svm.verbose = 2 sec_svm.fit(ds.X, ds.Y) self.logger.info("Train SVM") svm = CClassifierSVM(C=1) svm.fit(ds.X, ds.Y) self._compute_alignment(ds, sec_svm, svm) svm_pred = sec_svm.predict(ds.X) secsvm_pred = sec_svm.predict(ds.X) self.logger.info("SVM pred:\n{:}".format(svm_pred)) self.logger.info("Sec-SVM pred:\n{:}".format(secsvm_pred)) self.assert_array_almost_equal(secsvm_pred, svm_pred)
def test_grad_tr_params_nonlinear(self): """Test `grad_tr_params` on a nonlinear classifier.""" for n in (None, CNormalizerMinMax((-10, 10))): clf = CClassifierSVM(kernel='rbf', preprocess=n) clf.fit(self.ds.X, self.ds.Y) self._test_grad_tr_params(clf)
def test_performance(self): """ Compare the classifiers performance""" self.logger.info("Testing error performance of the " "classifiers on the training set") for sgd in self.sgds: self.logger.info("SGD kernel: {:}".format(sgd.preprocess)) if sgd.preprocess is not None: k = sgd.preprocess.deepcopy() else: k = None svm = CClassifierSVM(kernel=k) svm.fit(self.dataset.X, self.dataset.Y) label_svm, y_svm = svm.predict(self.dataset.X, return_decision_function=True) label_sgd, y_sgd = sgd.predict(self.dataset.X, return_decision_function=True) acc_svm = CMetric.create('f1').performance_score( self.dataset.Y, label_svm) acc_sgd = CMetric.create('f1').performance_score( self.dataset.Y, label_sgd) self.logger.info("Accuracy of SVM: {:}".format(acc_svm)) self.assertGreater(acc_svm, 0.90, "Accuracy of SVM: {:}".format(acc_svm)) self.logger.info("Accuracy of SGD: {:}".format(acc_sgd)) self.assertGreater(acc_sgd, 0.90, "Accuracy of SGD: {:}".format(acc_sgd))
def setUp(self): self.ds_loader = CDLRandom(n_features=1000, n_redundant=200, n_informative=250, n_clusters_per_class=2) self.ds1 = self.ds_loader.load() self.ds2 = self.ds_loader.load() self.y1 = self.ds1.Y self.y2 = self.ds2.Y self.svm = CClassifierSVM(C=1e-7).fit(self.ds1.X, self.ds1.Y) _, self.s1 = self.svm.predict(self.ds1.X, return_decision_function=True) _, self.s2 = self.svm.predict(self.ds2.X, return_decision_function=True) self.s1 = self.s1[:, 1].ravel() self.s2 = self.s2[:, 1].ravel() # Roc with not computed average (2 repetitions) self.roc_nomean = CRoc() self.roc_nomean.compute([self.y1, self.y2], [self.s1, self.s2]) # Roc with average (2 repetitions) self.roc_wmean = CRoc() self.roc_wmean.compute([self.y1, self.y2], [self.s1, self.s2]) self.roc_wmean.average()
class TestCPlot(CUnitTest): """Unit test for TestCPlot.""" def setUp(self): self.clf = CClassifierSVM() self.dataset = CDLRandom(n_features=2, n_redundant=0, n_informative=1, n_clusters_per_class=1).load() self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X) self.clf.fit(self.dataset.X, self.dataset.Y) def test_fun(self): """Test for CPlotFunction.plot_fun method.""" fig = CFigure() fig.sp.plot_ds(self.dataset) fig.sp.plot_fun(self.clf.decision_function, y=1) fig.show() def test_fgrads(self): """Test for CPlotFunction.plot_fgrads method.""" fig = CFigure() fig.sp.plot_ds(self.dataset) fig.sp.plot_fun(self.clf.decision_function, y=1) fig.sp.plot_fgrads(lambda x: self.clf.grad_f_x(x, y=1)) fig.show()
def test_performance(self): """ Compare the classifiers performance""" self.logger.info("Testing error performance of the " "classifiers on the training set") for ridge in self.ridges: self.logger.info("RIDGE kernel: {:}".format(ridge.preprocess)) if ridge.preprocess is not None: svm_kernel = ridge.preprocess.deepcopy() else: svm_kernel = None svm = CClassifierSVM(kernel=svm_kernel) svm.fit(self.dataset.X, self.dataset.Y) label_svm, y_svm = svm.predict( self.dataset.X, return_decision_function=True) label_ridge, y_ridge = ridge.predict( self.dataset.X, return_decision_function=True) acc_svm = CMetric.create('f1').performance_score( self.dataset.Y, label_svm) acc_ridge = CMetric.create('f1').performance_score( self.dataset.Y, label_ridge) self.logger.info("Accuracy of SVM: {:}".format(acc_svm)) self.assertGreater(acc_svm, 0.90, "Accuracy of SVM: {:}".format(acc_svm)) self.logger.info("Accuracy of ridge: {:}".format(acc_ridge)) self.assertGreater(acc_ridge, 0.90, "Accuracy of ridge: {:}".format(acc_ridge))
def setUp(self): self.clf = CClassifierSVM() self.dataset = CDLRandom(n_features=2, n_redundant=0, n_informative=1, n_clusters_per_class=1).load() self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X) self.clf.fit(self.dataset.X, self.dataset.Y)
def setUp(self): self.clf = CClassifierSVM() # 100 samples, 2 classes, 20 features self.ds = CDLDigits(class_list=[0, 1], zero_one=True).load() # Training classifier self.clf.fit(self.ds.X, self.ds.Y) self.explainer = CExplainerGradient(self.clf)
def setUp(self): self.ds = CDLRandom(n_samples=50, random_state=0).load() self.logger.info("Train an SVM and classify dataset...") self.svm = CClassifierSVM() self.svm.fit(self.ds.X, self.ds.Y) self.labels, self.scores = self.svm.predict( self.ds.X, return_decision_function=True)
class TestCRoc(CUnitTest): """Unit test for CRoc.""" def setUp(self): self.dl1 = CDLRandom(n_features=1000, n_redundant=200, n_informative=250, n_clusters_per_class=2, random_state=0) self.dl2 = CDLRandom(n_features=1000, n_redundant=200, n_informative=250, n_clusters_per_class=2, random_state=1000) self.ds1 = self.dl1.load() self.ds2 = self.dl2.load() self.svm = CClassifierSVM(C=1e-7).fit(self.ds1.X, self.ds1.Y) self.y1, self.s1 = self.svm.predict(self.ds1.X, return_decision_function=True) self.y2, self.s2 = self.svm.predict(self.ds2.X, return_decision_function=True) self.roc = CRoc() def test_roc_1sample(self): self.roc.compute(CArray([1]), CArray([0])) self.roc.average() # Testing 3 and not 1 as roc is bounded (we add a first and last point) self.assertEqual(self.roc.fpr.size, 3) self.assertEqual(self.roc.tpr.size, 3) def test_compute(self): self.roc.compute(self.ds1.Y, self.s1[:, 1].ravel()) fig = CFigure() fig.sp.semilogx(self.roc.fpr, self.roc.tpr) fig.sp.grid() fig.show() def test_mean(self): self.roc.compute([self.ds1.Y, self.ds2.Y], [self.s1[:, 1].ravel(), self.s2[:, 1].ravel()]) mean_fp, mean_tp, mean_std = self.roc.average(return_std=True) fig = CFigure(linewidth=2) fig.sp.errorbar(self.roc.mean_fpr, self.roc.mean_tpr, yerr=mean_std) for rep in range(self.roc.n_reps): fig.sp.semilogx(self.roc.fpr[rep], self.roc.tpr[rep]) fig.sp.semilogx(mean_fp, mean_tp) fig.sp.grid() fig.show()
def setUp(self): self.classifier = CClassifierSVM(kernel='linear', C=1.0) self.lb = -2 self.ub = +2 n_tr = 20 n_ts = 10 n_features = 2 n_reps = 1 self.sec_eval = [] self.attack_ds = [] for rep_i in range(n_reps): self.logger.info( "Loading `random_blobs` with seed: {:}".format(rep_i)) loader = CDLRandomBlobs(n_samples=n_tr + n_ts, n_features=n_features, centers=[(-0.5, -0.5), (+0.5, +0.5)], center_box=(-0.5, 0.5), cluster_std=0.5, random_state=rep_i * 100 + 10) ds = loader.load() self.tr = ds[:n_tr, :] self.ts = ds[n_tr:, :] self.classifier.fit(self.tr.X, self.tr.Y) # only manipulate positive samples, targeting negative ones self.y_target = None self.attack_classes = CArray([1]) for create_fn in (self._attack_pgd_ls, self._attack_cleverhans): # TODO: REFACTOR THESE UNITTESTS REMOVING THE FOR LOOP try: import cleverhans except ImportError: continue self.attack_ds.append(self.ts) attack, param_name, param_values = create_fn() # set sec eval object self.sec_eval.append( CSecEval( attack=attack, param_name=param_name, param_values=param_values, ))
def setUp(self): # Create dummy dataset (we want a test different from train) loader = CDLRandom(random_state=50000) self.training_dataset = loader.load() self.test_dataset = loader.load() # CREATE CLASSIFIERS kernel = CKernel.create('rbf') self.svm = CClassifierSVM(kernel=kernel) self.svm.verbose = 1 self.logger.info("Using kernel {:}".format(self.svm.kernel.class_type))
def _prepare_tree_nonlinear_svm(self, sparse, seed): """Preparare the data required for attacking a TREE classifier with surrogate NONLINEAR SVM. - load a blob 2D dataset - create a decision tree classifier - create a surrogate SVM with RBF kernel (C=1, gamma=1) Parameters ---------- sparse : bool seed : int or None Returns ------- ds : CDataset clf : CClassifierDecisionTree clf_surr : CClassifierSVM """ ds = self._load_blobs( n_feats=2, # Number of dataset features n_clusters=2, # Number of dataset clusters sparse=sparse, seed=seed) clf = CClassifierDecisionTree(random_state=seed) clf_surr = CClassifierSVM(kernel='rbf', C=1) return ds, clf, clf_surr
def test_explanation_svm_rbf(self): self._clf = CClassifierSVM(kernel=CKernelRBF(gamma=0.01), C=10) self._clf.kernel.gamma = 0.01 self._clf.store_dual_vars = True self._clf_idx = 'rbf-svm' self._test_explanation_simple_clf()
def setUp(self): # generate synthetic data self.dataset = CDLRandom(n_features=2, n_redundant=0, n_informative=1, n_clusters_per_class=1, random_state=1).load() self.dataset_sparse = self.dataset.tosparse() kernel_types = (None, CKernelLinear, CKernelRBF, CKernelPoly) self.svms = [ CClassifierSVM(kernel=kernel() if kernel is not None else None) for kernel in kernel_types ] self.logger.info("Testing SVM with kernel functions: %s", str(kernel_types)) for svm in self.svms: # Enabling debug output for each classifier svm.verbose = 2 self.logger.info("." * 50) self.logger.info("Number of Patterns: %s", str(self.dataset.num_samples)) self.logger.info("Features: %s", str(self.dataset.num_features))
def train_clf(tr): clf_ = CClassifierSVM(C=100, kernel="linear") # CClassifierLogistic(C=100) # clf = LogisticClassifier(clf_) print("Training of classifier...") clf.fit(tr) return clf
def _prepare_linear_svm(self, sparse, seed): """Preparare the data required for attacking a LINEAR SVM. - load a blob 2D dataset - create a SVM (C=1) and a minmax preprocessor Parameters ---------- sparse : bool seed : int or None Returns ------- ds : CDataset clf : CClassifierSVM """ ds = self._load_blobs( n_feats=2, # Number of dataset features n_clusters=2, # Number of dataset clusters sparse=sparse, seed=seed) normalizer = CNormalizerMinMax(feature_range=(-1, 1)) clf = CClassifierSVM(C=1.0, preprocess=normalizer) return ds, clf
def test_time(self): """ Compare execution time of ridge and SVM""" self.logger.info("Testing training speed of ridge compared to SVM ") for ridge in self.ridges: self.logger.info("RIDGE kernel: {:}".format(ridge.preprocess)) svm = CClassifierSVM(ridge.preprocess) with self.timer() as t_svm: svm.fit(self.dataset.X, self.dataset.Y) self.logger.info( "Execution time of SVM: {:}".format(t_svm.interval)) with self.timer() as t_ridge: ridge.fit(self.dataset.X, self.dataset.Y) self.logger.info( "Execution time of ridge: {:}".format(t_ridge.interval))
def setUp(self): self.clf = CClassifierSVM(C=1.0) self.n_tr = 40 self.n_features = 10 self.seed = 0 self.logger.info( "Loading `random_blobs` with seed: {:}".format(self.seed)) self.ds = self._load_blobs( self.n_features, 2, sparse=False, seed=self.seed) self.tr = self.ds[:self.n_tr, :] self.ts = self.ds[self.n_tr:, :] self.clf.fit(self.tr.X, self.tr.Y)
def _test_model_clf(): """Model for testing `load_model` functionality. Pre-saved state will set "C=100" so that we can check if state is restored correctly. """ return CClassifierSVM()
def test_plot(self): ds = CDLRandom(n_samples=100, n_features=2, n_redundant=0, random_state=100).load() self.logger.info("Train Sec SVM") sec_svm = CClassifierSecSVM(C=1, eta=0.1, eps=1e-3, lb=-0.1, ub=0.5) sec_svm.verbose = 2 sec_svm.fit(ds.X, ds.Y) self.logger.info("Train SVM") svm = CClassifierSVM(C=1) svm.fit(ds.X, ds.Y) self._compute_alignment(ds, sec_svm, svm) fig = CFigure(height=5, width=8) fig.subplot(1, 2, 1) # Plot dataset points fig.sp.plot_ds(ds) # Plot objective function fig.sp.plot_fun(svm.predict, multipoint=True, plot_background=True, plot_levels=False, n_grid_points=100, grid_limits=ds.get_bounds()) fig.sp.title("SVM") fig.subplot(1, 2, 2) # Plot dataset points fig.sp.plot_ds(ds) # Plot objective function fig.sp.plot_fun(sec_svm.predict, multipoint=True, plot_background=True, plot_levels=False, n_grid_points=100, grid_limits=ds.get_bounds()) fig.sp.title("Sec-SVM") fig.show()
class TestCFigure(CUnitTest): """Unittest for CFigure.""" def test_svm(self): self.X = CArray([[1, 2], [3, 4], [5, 6], [7, 8]]) self.Y = CArray([[0], [1], [1], [0]]).ravel() self.dataset = CDataset(self.X, self.Y) self.classifier = CClassifierSVM(kernel=CKernelRBF()) self.classifier.fit(self.dataset) self.x_min, self.x_max = (self.X[:, [0]].min() - 1, self.X[:, [0]].max() + 1) self.y_min, self.y_max = (self.X[:, [1]].min() - 1, self.X[:, [1]].max() + 1) self.fig = CFigure(height=7, width=10, linewidth=5, fontsize=24, markersize=20) self.fig.sp.title("Svm Test") self.logger.info("Test plot dataset method...") self.fig.sp.plot_ds(self.dataset) self.logger.info("Test plot path method...") path = CArray([[1, 2], [1, 3], [1.5, 5]]) self.fig.sp.plot_path(path) self.logger.info("Test plot function method...") bounds = [(self.x_min, self.x_max), (self.y_min, self.y_max)] self.fig.sp.plot_fun(self.classifier.decision_function, plot_levels=False, grid_limits=bounds, y=1) self.fig.sp.xlim(self.x_min, self.x_max) self.fig.sp.ylim(self.y_min, self.y_max) self.fig.show()
class TestCExplainerGradient(CUnitTest): """Unittests for CExplainerGradient""" def setUp(self): self.clf = CClassifierSVM() # 100 samples, 2 classes, 20 features self.ds = CDLDigits(class_list=[0, 1], zero_one=True).load() # Training classifier self.clf.fit(self.ds) self.explainer = CExplainerGradient(self.clf) def test_explain(self): """Unittest for explain method.""" i = 67 x = self.ds.X[i, :] attr = self.explainer.explain(x, y=1) self.logger.info("Attributions:\n{:}".format(attr.tolist())) self.assertIsInstance(attr, CArray) self.assertEqual(attr.shape, attr.shape) fig = CFigure(height=3, width=6) # Plotting original image fig.subplot(1, 2, 1) fig.sp.imshow(attr.reshape((8, 8)), cmap='gray') th = max(abs(attr.min()), abs(attr.max())) # Plotting attributions fig.subplot(1, 2, 2) fig.sp.imshow(attr.reshape((8, 8)), cmap='seismic', vmin=-1 * th, vmax=th) fig.show()
def test_draw(self): """ Compare the classifiers graphically""" self.logger.info("Testing classifiers graphically") # generate 2D synthetic data dataset = CDLRandom(n_features=2, n_redundant=1, n_informative=1, n_clusters_per_class=1).load() dataset.X = CNormalizerMinMax().fit_transform(dataset.X) self.sgds[0].fit(dataset.X, dataset.Y) svm = CClassifierSVM() svm.fit(dataset.X, dataset.Y) fig = CFigure(width=10, markersize=8) fig.subplot(2, 1, 1) # Plot dataset points fig.sp.plot_ds(dataset) # Plot objective function fig.sp.plot_fun(svm.decision_function, grid_limits=dataset.get_bounds(), y=1) fig.sp.title('SVM') fig.subplot(2, 1, 2) # Plot dataset points fig.sp.plot_ds(dataset) # Plot objective function fig.sp.plot_fun(self.sgds[0].decision_function, grid_limits=dataset.get_bounds(), y=1) fig.sp.title('SGD Classifier') fig.savefig( fm.join(fm.abspath(__file__), 'figs', 'test_c_classifier_sgd1.pdf'))
def setUp(self): self.dl1 = CDLRandom(n_features=1000, n_redundant=200, n_informative=250, n_clusters_per_class=2, random_state=0) self.dl2 = CDLRandom(n_features=1000, n_redundant=200, n_informative=250, n_clusters_per_class=2, random_state=1000) self.ds1 = self.dl1.load() self.ds2 = self.dl2.load() self.svm = CClassifierSVM(C=1e-7).fit(self.ds1.X, self.ds1.Y) self.y1, self.s1 = self.svm.predict(self.ds1.X, return_decision_function=True) self.y2, self.s2 = self.svm.predict(self.ds2.X, return_decision_function=True) self.roc = CRoc()
def test_preprocess(self): """Test classifier with preprocessors inside.""" ds = CDLRandom().load() clf = CClassifierSVM() # All linear transformations with gradient implemented self._test_preprocess(ds, clf, ['min-max', 'mean-std'], [{ 'feature_range': (-1, 1) }, {}]) self._test_preprocess_grad(ds, clf, ['min-max', 'mean-std'], [{ 'feature_range': (-1, 1) }, {}]) # Mixed linear/nonlinear transformations without gradient self._test_preprocess(ds, clf, ['pca', 'unit-norm'], [{}, {}])
def test_multiclass(self): """Test multiclass SVM on MNIST digits.""" self.logger.info("Testing multiclass SVM.") digits = tuple(range(0, 10)) n_tr = 100 # Number of training set samples n_ts = 200 # Number of test set samples loader = CDataLoaderMNIST() tr = loader.load('training', digits=digits, num_samples=n_tr) ts = loader.load('testing', digits=digits, num_samples=n_ts) # Normalize the features in `[0, 1]` tr.X /= 255 ts.X /= 255 svm_params = { 'kernel': CKernelRBF(gamma=0.1), 'C': 10, 'class_weight': { 0: 1, 1: 1 }, 'n_jobs': 2 } classifiers = [ CClassifierMulticlassOVA(CClassifierSVM, **svm_params), CClassifierSVM(**svm_params), ] grads = [] acc = [] for clf in classifiers: clf.verbose = 1 # We can now fit the classifier clf.fit(tr.X, tr.Y) # Compute predictions on a test set y_pred, scores = clf.predict(ts.X, return_decision_function=True) # Evaluate the accuracy of the classifier metric = CMetricAccuracy() acc.append(metric.performance_score(y_true=ts.Y, y_pred=y_pred)) grads.append(clf.grad_f_x(ts.X[1, :], 1)) self.assertAlmostEqual(acc[0], acc[1]) self.assert_array_almost_equal(grads[0], grads[1])
def test_linear_svm(self): """Performs tests on linear SVM.""" self.logger.info("Testing SVM linear variants (kernel and not)") # Instancing a linear SVM and an SVM with linear kernel linear_svm = CClassifierSVM(kernel=None) kernel_linear_svm = self.svms[0] self.logger.info("SVM w/ linear kernel in the primal") self.assertIsNone(linear_svm.kernel) self.logger.info("Training both classifiers on dense data") linear_svm.fit(self.dataset.X, self.dataset.Y) kernel_linear_svm.fit(self.dataset.X, self.dataset.Y) linear_svm_pred_y, linear_svm_pred_score = linear_svm.predict( self.dataset.X, return_decision_function=True) kernel_linear_svm_pred_y, \ kernel_linear_svm_pred_score = kernel_linear_svm.predict( self.dataset.X, return_decision_function=True) # check prediction self.assert_array_equal(linear_svm_pred_y, kernel_linear_svm_pred_y) self.logger.info("Training both classifiers on sparse data") linear_svm.fit(self.dataset_sparse.X, self.dataset_sparse.Y) kernel_linear_svm.fit(self.dataset_sparse.X, self.dataset_sparse.Y) self.assertTrue( linear_svm.w.issparse, "Weights vector is not sparse even " "if training data is sparse") linear_svm_pred_y, linear_svm_pred_score = linear_svm.predict( self.dataset_sparse.X, return_decision_function=True) kernel_linear_svm_pred_y, \ kernel_linear_svm_pred_score = kernel_linear_svm.predict( self.dataset_sparse.X, return_decision_function=True) # check prediction self.assert_array_equal(linear_svm_pred_y, kernel_linear_svm_pred_y)