def test_plot_decision_function(self): """Test plot of multiclass classifier decision function.""" # generate synthetic data ds = CDLRandom(n_classes=3, n_features=2, n_redundant=0, n_clusters_per_class=1, class_sep=1, random_state=0).load() multiclass = CClassifierMulticlassOVA( classifier=CClassifierSVM, class_weight='balanced', preprocess='min-max') # Training and classification multiclass.fit(ds.X, ds.Y) y_pred, score_pred = multiclass.predict( ds.X, return_decision_function=True) def plot_hyperplane(img, clf, min_v, max_v, linestyle, label): """Plot the hyperplane associated to the OVA clf.""" xx = CArray.linspace( min_v - 5, max_v + 5) # make sure the line is long enough # get the separating hyperplane yy = -(clf.w[0] * xx + clf.b) / clf.w[1] img.sp.plot(xx, yy, linestyle, label=label) fig = CFigure(height=7, width=8) fig.sp.title('{:} ({:})'.format(multiclass.__class__.__name__, multiclass.classifier.__name__)) x_bounds, y_bounds = ds.get_bounds() styles = ['go-', 'yp--', 'rs-.', 'bD--', 'c-.', 'm-', 'y-.'] for c_idx, c in enumerate(ds.classes): # Plot boundary and predicted label for each OVA classifier plot_hyperplane(fig, multiclass._binary_classifiers[c_idx], x_bounds[0], x_bounds[1], styles[c_idx], 'Boundary\nfor class {:}'.format(c)) fig.sp.scatter(ds.X[ds.Y == c, 0], ds.X[ds.Y == c, 1], s=40, c=styles[c_idx][0]) fig.sp.scatter(ds.X[y_pred == c, 0], ds.X[y_pred == c, 1], s=160, edgecolors=styles[c_idx][0], facecolors='none', linewidths=2) # Plotting multiclass decision function fig.sp.plot_decision_regions(multiclass, n_grid_points=100, grid_limits=ds.get_bounds(offset=5)) fig.sp.xlim(x_bounds[0] - .5 * x_bounds[1], x_bounds[1] + .5 * x_bounds[1]) fig.sp.ylim(y_bounds[0] - .5 * y_bounds[1], y_bounds[1] + .5 * y_bounds[1]) fig.sp.legend(loc=4) # lower, right fig.show()
def test_plot(self): ds = CDLRandom(n_samples=100, n_features=2, n_redundant=0, random_state=100).load() self.logger.info("Train Sec SVM") sec_svm = CClassifierSecSVM(C=1, eta=0.1, eps=1e-3, lb=-0.1, ub=0.5) sec_svm.verbose = 2 sec_svm.fit(ds.X, ds.Y) self.logger.info("Train SVM") svm = CClassifierSVM(C=1) svm.fit(ds.X, ds.Y) self._compute_alignment(ds, sec_svm, svm) fig = CFigure(height=5, width=8) fig.subplot(1, 2, 1) # Plot dataset points fig.sp.plot_ds(ds) # Plot objective function fig.sp.plot_fun(svm.predict, multipoint=True, plot_background=True, plot_levels=False, n_grid_points=100, grid_limits=ds.get_bounds()) fig.sp.title("SVM") fig.subplot(1, 2, 2) # Plot dataset points fig.sp.plot_ds(ds) # Plot objective function fig.sp.plot_fun(sec_svm.predict, multipoint=True, plot_background=True, plot_levels=False, n_grid_points=100, grid_limits=ds.get_bounds()) fig.sp.title("Sec-SVM") fig.show()
def test_draw(self): """ Compare the classifiers graphically""" self.logger.info("Testing classifiers graphically") # generate 2D synthetic data dataset = CDLRandom(n_features=2, n_redundant=1, n_informative=1, n_clusters_per_class=1).load() dataset.X = CNormalizerMinMax().fit_transform(dataset.X) self.sgds[0].fit(dataset.X, dataset.Y) svm = CClassifierSVM() svm.fit(dataset.X, dataset.Y) fig = CFigure(width=10, markersize=8) fig.subplot(2, 1, 1) # Plot dataset points fig.sp.plot_ds(dataset) # Plot objective function fig.sp.plot_fun(svm.decision_function, grid_limits=dataset.get_bounds(), y=1) fig.sp.title('SVM') fig.subplot(2, 1, 2) # Plot dataset points fig.sp.plot_ds(dataset) # Plot objective function fig.sp.plot_fun(self.sgds[0].decision_function, grid_limits=dataset.get_bounds(), y=1) fig.sp.title('SGD Classifier') fig.savefig( fm.join(fm.abspath(__file__), 'figs', 'test_c_classifier_sgd1.pdf'))
class TestCAttackEvasionPGDLSMNIST(CAttackEvasionTestCases): """Unittests for CAttackEvasionPGDLS on MULTICLASS dataset.""" def setUp(self): import numpy as np np.random.seed(12345678) # generate synthetic data self.ds = CDLRandom(n_classes=3, n_features=2, n_redundant=0, n_clusters_per_class=1, class_sep=1, random_state=0).load() # Add a new class modifying one of the existing clusters self.ds.Y[(self.ds.X[:, 0] > 0).logical_and( self.ds.X[:, 1] > 1).ravel()] = self.ds.num_classes # self.kernel = None self.kernel = CKernelRBF(gamma=10) # Data normalization self.normalizer = CNormalizerMinMax() self.ds.X = self.normalizer.fit_transform(self.ds.X) self.multiclass = CClassifierMulticlassOVA(classifier=CClassifierSVM, class_weight='balanced', preprocess=None, kernel=self.kernel) self.multiclass.verbose = 0 # Training and classification self.multiclass.fit(self.ds.X, self.ds.Y) self.y_pred, self.score_pred = self.multiclass.predict( self.ds.X, return_decision_function=True) def test_indiscriminate(self): """Test indiscriminate evasion.""" self.y_target = None self.logger.info("Test indiscriminate evasion ") expected_x = CArray([0.1783, 0.6249]) self._test_evasion_multiclass(expected_x) def test_targeted(self): """Test targeted evasion.""" self.y_target = 2 self.logger.info("Test target evasion " "(with target class {:}) ".format(self.y_target)) expected_x = CArray([0.9347, 0.3976]) self._test_evasion_multiclass(expected_x) def _test_evasion_multiclass(self, expected_x): # EVASION self.multiclass.verbose = 2 if self.normalizer is not None: lb = self.normalizer.feature_range[0] ub = self.normalizer.feature_range[1] else: lb = None ub = None dmax = 2 self.solver_params = {'eta': 1e-1, 'eta_min': 1.0} eva = CAttackEvasionPGDLS(classifier=self.multiclass, surrogate_classifier=self.multiclass, surrogate_data=self.ds, distance='l2', dmax=dmax, lb=lb, ub=ub, solver_params=self.solver_params, y_target=self.y_target) eva.verbose = 0 # 2 # Points from class 2 region # p_idx = 0 # Points from class 1 region # p_idx = 68 # Points from class 3 region p_idx = 1 # Wrong classified point # p_idx = 53 # Evasion goes up usually # Points from class 0 region # p_idx = 49 # Wrong classified point # p_idx = 27 # Correctly classified point x0 = self.ds.X[p_idx, :] y0 = self.ds.Y[p_idx].item() x_seq = CArray.empty((0, x0.shape[1])) scores = CArray([]) f_seq = CArray([]) x = x0 for d_idx, d in enumerate(range(0, dmax + 1)): self.logger.info("Evasion at dmax: " + str(d)) eva.dmax = d x, f_opt = eva._run(x0=x0, y0=y0, x_init=x) y_pred, score = self.multiclass.predict( x, return_decision_function=True) f_seq = f_seq.append(f_opt) # not considering all iterations, just values at dmax # for all iterations, you should bring eva.x_seq and eva.f_seq x_seq = x_seq.append(x, axis=0) s = score[:, y0 if self.y_target is None else self.y_target] scores = scores.append(s) self.logger.info("Predicted label after evasion: " + str(y_pred)) self.logger.info("Score after evasion: {:}".format(s)) self.logger.info("Objective function after evasion: {:}".format(f_opt)) # Compare optimal point with expected self.assert_array_almost_equal(eva.x_opt.todense().ravel(), expected_x, decimal=4) self._make_plots(x_seq, dmax, eva, x0, scores, f_seq) def _make_plots(self, x_seq, dmax, eva, x0, scores, f_seq): if self.make_figures is False: self.logger.debug("Skipping figures...") return fig = CFigure(height=9, width=10, markersize=6, fontsize=12) # Get plot bounds, taking into account ds and evaded point path bounds_x, bounds_y = self.ds.get_bounds() min_x, max_x = bounds_x min_y, max_y = bounds_y min_x = min(min_x, x_seq[:, 0].min()) max_x = max(max_x, x_seq[:, 0].max()) min_y = min(min_y, x_seq[:, 1].min()) max_y = max(max_y, x_seq[:, 1].max()) ds_bounds = [(min_x, max_x), (min_y, max_y)] # Plotting multiclass decision regions fig.subplot(2, 2, 1) fig = self._plot_decision_function(fig, plot_background=True) fig.sp.plot_path(x_seq, path_style='-', start_style='o', start_facecolor='w', start_edgewidth=2, final_style='o', final_facecolor='k', final_edgewidth=2) # plot distance constraint fig.sp.plot_fun(func=self._rescaled_distance, multipoint=True, plot_background=False, n_grid_points=20, levels_color='k', grid_limits=ds_bounds, levels=[0], colorbar=False, levels_linewidth=2.0, levels_style=':', alpha_levels=.4, c=x0, r=dmax) fig.sp.grid(linestyle='--', alpha=.5, zorder=0) # Plotting multiclass evasion objective function fig.subplot(2, 2, 2) fig = self._plot_decision_function(fig) fig.sp.plot_fgrads(eva._objective_function_gradient, grid_limits=ds_bounds, n_grid_points=20, color='k', alpha=.5) fig.sp.plot_path(x_seq, path_style='-', start_style='o', start_facecolor='w', start_edgewidth=2, final_style='o', final_facecolor='k', final_edgewidth=2) # plot distance constraint fig.sp.plot_fun(func=self._rescaled_distance, multipoint=True, plot_background=False, n_grid_points=20, levels_color='w', grid_limits=ds_bounds, levels=[0], colorbar=False, levels_style=':', levels_linewidth=2.0, alpha_levels=.5, c=x0, r=dmax) fig.sp.plot_fun(lambda z: eva._objective_function(z), multipoint=True, grid_limits=ds_bounds, colorbar=False, n_grid_points=20, plot_levels=False) fig.sp.grid(linestyle='--', alpha=.5, zorder=0) fig.subplot(2, 2, 3) if self.y_target is not None: fig.sp.title("Classifier Score for Target Class (Targ. Evasion)") else: fig.sp.title("Classifier Score for True Class (Indiscr. Evasion)") fig.sp.plot(scores) fig.sp.grid() fig.sp.xlim(0, dmax) fig.sp.xlabel("dmax") fig.subplot(2, 2, 4) fig.sp.title("Objective Function") fig.sp.plot(f_seq) fig.sp.grid() fig.sp.xlim(0, dmax) fig.sp.xlabel("dmax") fig.tight_layout() k_name = self.kernel.class_type if self.kernel is not None else 'lin' fig.savefig( fm.join( self.images_folder, "pgd_ls_multiclass_{:}c_kernel-{:}_target-{:}.pdf".format( self.ds.num_classes, k_name, self.y_target))) def _rescaled_distance(self, x, c, r): """Rescale distance for plot.""" if self.normalizer is not None: c = self.normalizer.inverse_transform(c) x = self.normalizer.inverse_transform(x) constr = CConstraintL2(center=c, radius=r) return x.apply_along_axis(constr.constraint, axis=1) def _get_style(self): """Define the style vector for the different classes.""" if self.ds.num_classes == 3: styles = [('b', 'o', '-'), ('g', 'p', '--'), ('r', 's', '-.')] elif self.ds.num_classes == 4: styles = [('saddlebrown', 'o', '-'), ('g', 'p', '--'), ('y', 's', '-.'), ('gray', 'D', '--')] else: styles = [('saddlebrown', 'o', '-'), ('g', 'p', '--'), ('y', 's', '-.'), ('gray', 'D', '--'), ('c', '-.'), ('m', '-'), ('y', '-.')] return styles def _plot_decision_function(self, fig, plot_background=False): """Plot the decision function of a multiclass classifier.""" fig.sp.title('{:} ({:})'.format(self.multiclass.__class__.__name__, self.multiclass.classifier.__name__)) x_bounds, y_bounds = self.ds.get_bounds() styles = self._get_style() for c_idx, c in enumerate(self.ds.classes): fig.sp.scatter(self.ds.X[self.ds.Y == c, 0], self.ds.X[self.ds.Y == c, 1], s=20, c=styles[c_idx][0], edgecolors='k', facecolors='none', linewidths=1, label='c {:}'.format(c)) # Plotting multiclass decision function fig.sp.plot_fun(lambda x: self.multiclass.predict(x), multipoint=True, cmap='Set2', grid_limits=self.ds.get_bounds(offset=5), colorbar=False, n_grid_points=300, plot_levels=True, plot_background=plot_background, levels=[-1, 0, 1, 2], levels_color='gray', levels_style='--') fig.sp.xlim(x_bounds[0] - .05, x_bounds[1] + .05) fig.sp.ylim(y_bounds[0] - .05, y_bounds[1] + .05) fig.sp.legend(loc=9, ncol=5, mode="expand", handletextpad=.1) return fig