Exemplo n.º 1
0
 def setUp(self):
     self.clf = CClassifierMulticlassOVA(
         classifier=CClassifierSVM, kernel='rbf')
     self.dataset = CDLRandomBlobs(
         random_state=3, n_features=2, centers=4).load()
     self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X)
     self.clf.fit(self.dataset.X, self.dataset.Y)
    def test_multiclass_gradient(self):
        """Test if gradient is correct when requesting for all classes with w"""

        multiclass = CClassifierMulticlassOVA(classifier=CClassifierSVM,
                                              class_weight='balanced')
        multiclass.fit(self.dataset.X, self.dataset.Y)
        div = CArray.rand(shape=multiclass.n_classes, random_state=0)

        def f_x(x):
            x = multiclass.predict(x, return_decision_function=True)[1]
            return CArray((x / div).mean())

        def grad_f_x(x):
            w = CArray.ones(shape=multiclass.n_classes) / \
                (div * multiclass.n_classes)
            return multiclass.gradient(x, w=w)

        i = 5  # Sample to test
        x = self.dataset.X[i, :]

        from secml.optim.function import CFunction
        check_grad_val = CFunction(f_x, grad_f_x).check_grad(x, epsilon=1e-1)
        self.logger.info(
            "norm(grad - num_grad): %s", str(check_grad_val))
        self.assertLess(check_grad_val, 1e-3)
Exemplo n.º 3
0
class TestCPlotClassifier(CUnitTest):
    """Unit test for CPlotClassifier."""

    def setUp(self):
        self.clf = CClassifierMulticlassOVA(
            classifier=CClassifierSVM, kernel='rbf')
        self.dataset = CDLRandomBlobs(
            random_state=3, n_features=2, centers=4).load()
        self.dataset.X = CNormalizerMinMax().fit_transform(self.dataset.X)
        self.clf.fit(self.dataset.X, self.dataset.Y)

    def test_plot_decision_regions(self):
        """Test for `.plot_decision_regions` method."""
        fig = CFigure(width=10, height=5)

        fig.subplot(1, 2, 1)
        fig.sp.plot_ds(self.dataset)
        fig.sp.plot_decision_regions(
            self.clf, n_grid_points=200, plot_background=False)

        fig.subplot(1, 2, 2)
        fig.sp.plot_ds(self.dataset)
        fig.sp.plot_decision_regions(
            self.clf, n_grid_points=200)

        fig.show()
Exemplo n.º 4
0
    def setUp(self):

        import numpy as np
        np.random.seed(12345678)

        # generate synthetic data
        self.ds = CDLRandom(n_classes=3,
                            n_features=2,
                            n_redundant=0,
                            n_clusters_per_class=1,
                            class_sep=1,
                            random_state=0).load()

        # Add a new class modifying one of the existing clusters
        self.ds.Y[(self.ds.X[:, 0] > 0).logical_and(
            self.ds.X[:, 1] > 1).ravel()] = self.ds.num_classes

        # self.kernel = None
        self.kernel = CKernelRBF(gamma=10)

        # Data normalization
        self.normalizer = CNormalizerMinMax()
        self.ds.X = self.normalizer.fit_transform(self.ds.X)

        self.multiclass = CClassifierMulticlassOVA(classifier=CClassifierSVM,
                                                   class_weight='balanced',
                                                   preprocess=None,
                                                   kernel=self.kernel)
        self.multiclass.verbose = 0

        # Training and classification
        self.multiclass.fit(self.ds.X, self.ds.Y)

        self.y_pred, self.score_pred = self.multiclass.predict(
            self.ds.X, return_decision_function=True)
    def test_plot_decision_function(self):
        """Test plot of multiclass classifier decision function."""
        # generate synthetic data
        ds = CDLRandom(n_classes=3, n_features=2, n_redundant=0,
                       n_clusters_per_class=1, class_sep=1,
                       random_state=0).load()

        multiclass = CClassifierMulticlassOVA(
            classifier=CClassifierSVM,
            class_weight='balanced',
            preprocess='min-max')

        # Training and classification
        multiclass.fit(ds.X, ds.Y)
        y_pred, score_pred = multiclass.predict(
            ds.X, return_decision_function=True)

        def plot_hyperplane(img, clf, min_v, max_v, linestyle, label):
            """Plot the hyperplane associated to the OVA clf."""
            xx = CArray.linspace(
                min_v - 5, max_v + 5)  # make sure the line is long enough
            # get the separating hyperplane
            yy = -(clf.w[0] * xx + clf.b) / clf.w[1]
            img.sp.plot(xx, yy, linestyle, label=label)

        fig = CFigure(height=7, width=8)
        fig.sp.title('{:} ({:})'.format(multiclass.__class__.__name__,
                                        multiclass.classifier.__name__))

        x_bounds, y_bounds = ds.get_bounds()

        styles = ['go-', 'yp--', 'rs-.', 'bD--', 'c-.', 'm-', 'y-.']

        for c_idx, c in enumerate(ds.classes):
            # Plot boundary and predicted label for each OVA classifier

            plot_hyperplane(fig, multiclass._binary_classifiers[c_idx],
                            x_bounds[0], x_bounds[1], styles[c_idx],
                            'Boundary\nfor class {:}'.format(c))

            fig.sp.scatter(ds.X[ds.Y == c, 0],
                           ds.X[ds.Y == c, 1],
                           s=40, c=styles[c_idx][0])
            fig.sp.scatter(ds.X[y_pred == c, 0], ds.X[y_pred == c, 1], s=160,
                           edgecolors=styles[c_idx][0],
                           facecolors='none', linewidths=2)

        # Plotting multiclass decision function
        fig.sp.plot_decision_regions(multiclass, n_grid_points=100,
                                     grid_limits=ds.get_bounds(offset=5))

        fig.sp.xlim(x_bounds[0] - .5 * x_bounds[1],
                    x_bounds[1] + .5 * x_bounds[1])
        fig.sp.ylim(y_bounds[0] - .5 * y_bounds[1],
                    y_bounds[1] + .5 * y_bounds[1])

        fig.sp.legend(loc=4)  # lower, right

        fig.show()
Exemplo n.º 6
0
    def setUp(self):
        self.ds = CDLRandom(n_classes=3, n_samples=50, random_state=0,
                            n_informative=3).load()

        self.logger.info("Fit an SVM and classify dataset...")
        self.ova = CClassifierMulticlassOVA(CClassifierSVM)
        self.ova.fit(self.ds.X, self.ds.Y)
        self.labels, self.scores = self.ova.predict(
            self.ds.X, return_decision_function=True)
    def setUp(self):

        # 100 samples, 2 classes, 20 features
        self.ds = CDLDigits().load()

        self.clf = CClassifierMulticlassOVA(CClassifierSVM,
                                            kernel=CKernelRBF(gamma=1e-3))

        # Training classifier
        self.clf.fit(self.ds.X, self.ds.Y)
        self.explainer = CExplainerGradientInput(self.clf)
Exemplo n.º 8
0
    def _prepare_multiclass_svm(self, sparse, seed):
        """Preparare the data required for attacking a MULTICLASS SVM.

        - load the MNIST dataset
        - create a MULTICLASS SVM with RBF kernel (C=1, gamma=0.01)

        Parameters
        ----------
        sparse : bool
        seed : int or None

        Returns
        -------
        ds : CDataset
        clf : CClassifierSVM

        """
        self._load_mnist49(sparse, seed)

        clf = CClassifierMulticlassOVA(
            classifier=CClassifierSVM,
            C=1.0,
            kernel=CKernel.create('rbf', gamma=0.01),
        )

        return clf
    def test_fun(self):
        """Test for decision_function() and predict() methods."""
        self.logger.info("Test for decision_function() and predict() methods.")

        mc = CClassifierMulticlassOVA(classifier=CClassifierSVM,
                                      class_weight='balanced')

        scores_d = self._test_fun(mc, self.dataset.todense())
        scores_s = self._test_fun(mc, self.dataset.tosparse())

        self.assert_array_almost_equal(scores_d, scores_s)
    def setUpClass(cls):

        CUnitTest.setUpClass()

        # 100 samples, 2 classes, 20 features
        cls.ds = CDLDigits().load()

        cls.clf = CClassifierMulticlassOVA(CClassifierSVM,
                                           kernel=CKernelRBF(gamma=1e-3))

        # Training classifier
        cls.clf.fit(cls.ds.X, cls.ds.Y)
Exemplo n.º 11
0
    def test_params_multiclass(self):
        """Parameter estimation for multiclass classifiers."""
        # Create dummy dataset (we want a test different from train)
        tr = CDLRandom(n_classes=4, n_clusters_per_class=1,
                       random_state=50000).load()

        kernel = CKernel.create('rbf')
        multiclass = CClassifierMulticlassOVA(CClassifierSVM,
                                              C=1,
                                              kernel=kernel)
        multiclass.verbose = 1

        xval_parameters = {'C': [1, 10, 100], 'kernel.gamma': [0.1, 1]}

        expected = {'C': 10.0, 'kernel.gamma': 0.1}

        self._run_multiclass(tr, multiclass, xval_parameters, expected)

        self.logger.info("Testing with preprocessor")

        kernel = CKernel.create('rbf')
        multiclass = CClassifierMulticlassOVA(CClassifierSVM,
                                              C=1,
                                              kernel=kernel,
                                              preprocess='min-max')
        multiclass.verbose = 1

        xval_parameters = {'C': [1, 10, 100], 'kernel.gamma': [0.1, 1]}

        expected = {'C': 10.0, 'kernel.gamma': 0.1}

        self._run_multiclass(tr, multiclass, xval_parameters, expected)
    def test_predict_withsvm(self):

        svc = SVC(kernel='linear', class_weight='balanced')
        multiclass_sklearn = OneVsRestClassifier(svc)
        multiclass = CClassifierMulticlassOVA(classifier=CClassifierSVM,
                                              class_weight='balanced')
        multiclass.verbose = 2

        multiclass.fit(self.dataset, n_jobs=2)
        class_pred, score_pred = multiclass.predict(
            self.dataset.X, return_decision_function=True)

        self.logger.info("Predicted: \n{:}".format(class_pred))
        self.logger.info("Real: \n{:}".format(self.dataset.Y))

        acc = CMetric.create('accuracy').performance_score(
            self.dataset.Y, class_pred)
        self.logger.info("Accuracy: {:}".format(acc))

        multiclass_sklearn.fit(self.dataset.X.get_data(),
                               self.dataset.Y.tondarray())
        y_sklearn = multiclass_sklearn.predict(self.dataset.X.get_data())

        acc_sklearn = CMetric.create('accuracy').performance_score(
            self.dataset.Y, CArray(y_sklearn))
        self.logger.info("Accuracy Sklearn: {:}".format(acc_sklearn))

        self.assertLess(abs(acc - acc_sklearn), 0.01)
Exemplo n.º 13
0
    def test_params_multiclass(self):
        """Parameter estimation for multiclass classifiers."""
        kernel = CKernel.create('rbf')
        multiclass = CClassifierMulticlassOVA(
            CClassifierSVM, C=1, kernel=kernel)
        multiclass.verbose = 1

        xval_parameters = {'C': [1, 10, 100], 'kernel.gamma': [0.1, 1]}

        expected = {'C': [1.0, 1.0, 10.0, 10.0],
                    'kernel.gamma': [0.1, 0.1, 0.1, 0.1]}

        self._run_multiclass(multiclass, xval_parameters, expected)

        self.logger.info("Testing with preprocessor")

        kernel = CKernel.create('rbf')
        multiclass = CClassifierMulticlassOVA(
            CClassifierSVM, C=1, kernel=kernel, preprocess='min-max')
        multiclass.verbose = 1

        xval_parameters = {'C': [1, 10, 100], 'kernel.gamma': [0.1, 1]}

        expected = {'C': [100, 10, 10, 1],
                    'kernel.gamma': [0.1, 0.1, 0.1, 0.1]}

        self._run_multiclass(multiclass, xval_parameters, expected)
    def setUpClass(cls):

        CAttackEvasionCleverhansTestCases.setUpClass()

        cls.seed = 0

        cls.tr, cls.val, cls.ts, cls.digits, \
            cls.img_w, cls.img_h = cls._load_mnist()

        cls.clf = CClassifierMulticlassOVA(CClassifierSVM)
        cls.clf.fit(cls.tr)

        cls.x0_img_class = 1
        cls.y_target = 2  # Target class for targeted tests
    def test_normalization(self):
        """Test data normalization inside CClassifierMulticlassOVA."""
        from secml.ml.features.normalization import CNormalizerMinMax
        from secml.data import CDataset

        ds_norm_x = CNormalizerMinMax().fit_transform(self.dataset.X)

        multi_nonorm = CClassifierMulticlassOVA(classifier=CClassifierSVM,
                                                class_weight='balanced')
        multi_nonorm.fit(CDataset(ds_norm_x, self.dataset.Y))
        pred_y_nonorm = multi_nonorm.predict(ds_norm_x)

        multi = CClassifierMulticlassOVA(classifier=CClassifierSVM,
                                         class_weight='balanced',
                                         preprocess='min-max')
        multi.fit(self.dataset)
        pred_y = multi.predict(self.dataset.X)

        self.logger.info("Predictions with internal norm:\n{:}".format(pred_y))
        self.logger.info(
            "Predictions with external norm:\n{:}".format(pred_y_nonorm))

        self.assertFalse((pred_y_nonorm != pred_y).any())
    def test_apply_method(self):

        multiclass = CClassifierMulticlassOVA(classifier=CClassifierSVM,
                                              class_weight='balanced')
        multiclass.fit(self.dataset.X, self.dataset.Y)
        multiclass.apply_method(CClassifierSVM.set, param_name='C',
                                param_value=150)

        for i in range(multiclass.num_classifiers):
            self.assertEqual(multiclass._binary_classifiers[i].C, 150)
    def test_preprocess(self):
        """Test classifier with preprocessors inside."""
        multiclass = CClassifierMulticlassOVA(classifier=CClassifierSVM,
                                              class_weight='balanced')

        # All linear transformations with gradient implemented
        self._test_preprocess(self.dataset, multiclass,
                              ['min-max', 'mean-std'],
                              [{'feature_range': (-1, 1)}, {}])
        self._test_preprocess_grad(self.dataset, multiclass,
                                   ['min-max', 'mean-std'],
                                   [{'feature_range': (-1, 1)}, {}])

        # Mixed linear/nonlinear transformations without gradient
        self._test_preprocess(
            self.dataset, multiclass, ['pca', 'unit-norm'], [{}, {}])
Exemplo n.º 18
0
    def test_multiclass(self):
        """Test multiclass SVM on MNIST digits."""

        self.logger.info("Testing multiclass SVM.")

        digits = tuple(range(0, 10))
        n_tr = 100  # Number of training set samples
        n_ts = 200  # Number of test set samples

        loader = CDataLoaderMNIST()
        tr = loader.load('training', digits=digits, num_samples=n_tr)
        ts = loader.load('testing', digits=digits, num_samples=n_ts)

        # Normalize the features in `[0, 1]`
        tr.X /= 255
        ts.X /= 255

        svm_params = {
            'kernel': CKernelRBF(gamma=0.1),
            'C': 10,
            'class_weight': {
                0: 1,
                1: 1
            },
            'n_jobs': 2
        }
        classifiers = [
            CClassifierMulticlassOVA(CClassifierSVM, **svm_params),
            CClassifierSVM(**svm_params),
        ]

        grads = []
        acc = []
        for clf in classifiers:
            clf.verbose = 1
            # We can now fit the classifier
            clf.fit(tr.X, tr.Y)
            # Compute predictions on a test set
            y_pred, scores = clf.predict(ts.X, return_decision_function=True)
            # Evaluate the accuracy of the classifier
            metric = CMetricAccuracy()
            acc.append(metric.performance_score(y_true=ts.Y, y_pred=y_pred))
            grads.append(clf.grad_f_x(ts.X[1, :], 1))

        self.assertAlmostEqual(acc[0], acc[1])
        self.assert_array_almost_equal(grads[0], grads[1])
Exemplo n.º 19
0
    def setUp(self):

        import numpy as np
        np.random.seed(12345678)

        self._dataset_creation()

        self.kernel = CKernelRBF(gamma=1)

        self.multiclass = CClassifierMulticlassOVA(
            classifier=CClassifierSVM, class_weight='balanced',
            preprocess=None, kernel=self.kernel)
        self.multiclass.verbose = 0

        self.multiclass = CClassifierRejectThreshold(self.multiclass, 0.6)

        # Training and classification
        self.multiclass.fit(self.ds.X, self.ds.Y)

        self.y_pred, self.score_pred = self.multiclass.predict(
            self.ds.X, return_decision_function=True)
    def setUpClass(cls):

        CAttackEvasionCleverhansTestCases.setUpClass()

        cls.seed = 0

        cls.y_target = None

        cls.clf = CClassifierMulticlassOVA(CClassifierSVM,
                                           kernel=CKernelRBF(gamma=10),
                                           C=0.1,
                                           preprocess=CNormalizerMinMax())

        cls.ds = CDLRandomBlobs(n_features=0,
                                centers=[[0.1, 0.1], [0.5, 0], [0.8, 0.8]],
                                cluster_std=0.01,
                                n_samples=100,
                                random_state=cls.seed).load()

        cls.clf.fit(cls.ds.X, cls.ds.Y)

        cls.x0 = CArray([0.6, 0.2])
        cls.y0 = CArray(cls.clf.predict(cls.x0))
    def test_set_get_state(self):
        """Test for set_state and get_state."""

        pre = CPreProcess.create_chain(['pca', 'mean-std'], [{}, {}])
        multi = CClassifierMulticlassOVA(classifier=CClassifierSVM,
                                         kernel='rbf',
                                         class_weight='balanced',
                                         preprocess=pre)

        # Setting different parameter in single trained_classifiers
        multi.prepare(num_classes=4)
        different_c = (10, 20, 30, 40)
        multi.set('C', different_c)
        different_gamma = (50, 60, 70, 80)
        multi.set('kernel.gamma', different_gamma)

        multi.fit(self.dataset)
        pred_y = multi.predict(self.dataset.X)
        self.logger.info(
            "Predictions before restoring state:\n{:}".format(pred_y))

        state = multi.get_state()
        self.logger.info("State of multiclass:\n{:}".format(state))

        # Create an entirely new clf
        pre_post = CPreProcess.create_chain(['pca', 'mean-std'], [{}, {}])
        multi_post = CClassifierMulticlassOVA(classifier=CClassifierSVM,
                                              kernel='rbf',
                                              class_weight='balanced',
                                              preprocess=pre_post)

        # Restore state but not enough binary classifiers
        with self.assertRaises(ValueError):
            multi_post.set_state(state)

        # Restore state
        multi_post.prepare(num_classes=4)
        multi_post.set_state(state)

        for clf_idx, clf in enumerate(multi_post._binary_classifiers):
            self.assertEqual(clf.C, different_c[clf_idx])
            self.assertEqual(clf.kernel.gamma, different_gamma[clf_idx])

        pred_y_post = multi_post.predict(self.dataset.X)
        self.logger.info(
            "Predictions after restoring state:\n{:}".format(pred_y_post))

        self.assert_array_equal(pred_y, pred_y_post)
def attack_keras_model(X, Y, S, nb_attack=25, dmax=0.1):
    """
    Generates an adversarial attack on a general model.

    :param X: Original inputs on which the model is trained
    :param Y: Original outputs on which the model is trained
    :param S: Original protected attributes on which the model is trained
    :return: Adversarial dataset (i.e. new data points + original input)
    """

    from secml.data import CDataset
    from secml.array import CArray

    # secML wants all dimensions to be homogeneous (we had previously float and int in X)
    data_set_encoded_secML = CArray(X, dtype=float, copy=True)
    data_set_encoded_secML = CDataset(data_set_encoded_secML, Y)

    n_tr = round(0.66 * X.shape[0])
    n_ts = X.shape[0] - n_tr

    logger.debug(X.shape)
    logger.debug(n_tr)
    logger.debug(n_ts)

    from secml.data.splitter import CTrainTestSplit
    splitter = CTrainTestSplit(train_size=n_tr, test_size=n_ts)

    # Use training set for the classifier and then pick points from an internal test set.
    tr_set_secML, ts_set_secML = splitter.split(data_set_encoded_secML)

    # tr_set_secML = CDataset(X_train,Y_train)
    # ts_set_secML = CDataset(X_test,Y_test)

    # Create a surrogate classifier

    # Creation of the multiclass classifier
    from secml.ml.classifiers import CClassifierSVM
    from secml.ml.classifiers.multiclass import CClassifierMulticlassOVA
    from secml.ml.kernel import CKernelRBF
    clf = CClassifierMulticlassOVA(CClassifierSVM, kernel=CKernelRBF())

    # Parameters for the Cross-Validation procedure
    xval_params = {'C': [1e-4, 1e-3, 1e-2, 0.1, 1], 'kernel.gamma': [0.01, 0.1, 1, 10, 100, 1e3]}

    # Let's create a 3-Fold data splitter
    random_state = 999

    from secml.data.splitter import CDataSplitterKFold
    xval_splitter = CDataSplitterKFold(num_folds=3, random_state=random_state)

    # Select and set the best training parameters for the classifier
    logger.debug("Estimating the best training parameters...")
    best_params = clf.estimate_parameters(
        dataset=tr_set_secML,
        parameters=xval_params,
        splitter=xval_splitter,
        metric='accuracy',
        perf_evaluator='xval'
    )
    logger.debug("The best training parameters are: ", best_params)

    logger.debug(clf.get_params())
    logger.debug(clf.num_classifiers)

    # Metric to use for training and performance evaluation
    from secml.ml.peval.metrics import CMetricAccuracy
    metric = CMetricAccuracy()

    # Train the classifier
    clf.fit(tr_set_secML)
    logger.debug(clf.num_classifiers)

    # Compute predictions on a test set
    y_pred = clf.predict(ts_set_secML.X)

    # Evaluate the accuracy of the classifier
    acc = metric.performance_score(y_true=ts_set_secML.Y, y_pred=y_pred)

    logger.debug("Accuracy on test set: {:.2%}".format(acc))

    # Prepare attack configuration

    noise_type = 'l2'   # Type of perturbation 'l1' or 'l2'
    lb, ub = 0, 1       # Bounds of the attack space. Can be set to `None` for unbounded
    y_target = None     # None if `error-generic` or a class label for `error-specific`

    # Should be chosen depending on the optimization problem
    solver_params = {
        'eta': 0.1,         # grid search resolution
        'eta_min': 0.1,
        'eta_max': None,    # None should be ok
        'max_iter': 1000,
        'eps': 1e-2         # Tolerance on the stopping crit.
    }

    # Run attack

    from secml.adv.attacks.evasion import CAttackEvasionPGDLS
    pgd_ls_attack = CAttackEvasionPGDLS(
        classifier=clf,
        surrogate_classifier=clf,
        surrogate_data=tr_set_secML,
        distance=noise_type,
        dmax=dmax,
        lb=lb, ub=ub,
        solver_params=solver_params,
        y_target=y_target)

    nb_feat = X.shape[1]

    result_pts = np.empty([nb_attack, nb_feat])
    result_class = np.empty([nb_attack, 1])

    # take a point at random being the starting point of the attack and run the attack
    import random
    for nb_iter in range(0, nb_attack):
        rn = random.randint(0, ts_set_secML.num_samples - 1)
        x0, y0 = ts_set_secML[rn, :].X, ts_set_secML[rn, :].Y,

        try:
            y_pred_pgdls, _, adv_ds_pgdls, _ = pgd_ls_attack.run(x0, y0)
            adv_pt = adv_ds_pgdls.X.get_data()
            # np.asarray([np.asarray(row, dtype=float) for row in y_tr], dtype=float)
            result_pts[nb_iter] = adv_pt
            result_class[nb_iter] = y_pred_pgdls.get_data()[0]
        except ValueError:
            logger.warning("value error on {}".format(nb_iter))

    return result_pts, result_class, ts_set_secML[:nb_attack, :].Y
Exemplo n.º 23
0
class TestCExplainerGradientInput(CUnitTest):
    """Unittests for CExplainerGradientInput"""
    def setUp(self):

        # 100 samples, 2 classes, 20 features
        self.ds = CDLDigits().load()

        self.clf = CClassifierMulticlassOVA(CClassifierSVM,
                                            kernel=CKernelRBF(gamma=1e-3))

        # Training classifier
        self.clf.fit(self.ds)

        self.explainer = CExplainerGradientInput(self.clf)

    def test_explain(self):
        """Unittest for explain method."""
        i = 67
        ds_i = self.ds[i, :]
        x, y_true = ds_i.X, ds_i.Y.item()

        self.logger.info("Explaining P{:} c{:}".format(i, y_true))

        x_pred, x_score = self.clf.predict(x, return_decision_function=True)

        self.logger.info("Predicted class {:}, scores:\n{:}".format(
            x_pred.item(), x_score))
        self.logger.info("Candidates: {:}".format(x_score.argsort()[::-1]))

        fig = CFigure(height=1.5, width=12)

        # Plotting original image
        fig.subplot(1, self.ds.num_classes + 1, 1)
        fig.sp.imshow(x.reshape((8, 8)), cmap='gray')
        fig.sp.title("Origin c{:}".format(y_true))
        fig.sp.yticks([])
        fig.sp.xticks([])

        attr = CArray.empty(shape=(self.ds.num_classes, x.size))

        # Computing attributions
        for c in self.ds.classes:

            attr_c = self.explainer.explain(x, y=c)
            attr[c, :] = attr_c
            self.logger.info("Attributions class {:}:\n{:}".format(
                c, attr_c.tolist()))

            self.assertIsInstance(attr, CArray)
            self.assertEqual(attr.shape, attr.shape)

        th = max(abs(attr.min()), abs(attr.max()))

        # Plotting attributions
        for c in self.ds.classes:

            fig.subplot(1, self.ds.num_classes + 1, 2 + c)
            fig.sp.imshow(attr[c, :].reshape((8, 8)),
                          cmap='seismic',
                          vmin=-1 * th,
                          vmax=th)
            fig.sp.title("Attr c{:}".format(c))
            fig.sp.yticks([])
            fig.sp.xticks([])

        fig.tight_layout()

        fig.show()
Exemplo n.º 24
0
    def test_aspreprocess(self):
        """Test for normalizer used as preprocess."""
        from secml.ml.classifiers import CClassifierSVM
        from secml.ml.classifiers.multiclass import CClassifierMulticlassOVA

        model = mlp(input_dims=20, hidden_dims=(40,), output_dims=3)
        loss = nn.CrossEntropyLoss()
        optimizer = optim.SGD(model.parameters(), lr=1e-1)
        net = CClassifierPyTorch(model=model, loss=loss,
                                 optimizer=optimizer, random_state=0,
                                 epochs=10, preprocess='min-max')
        net.fit(self.ds.X, self.ds.Y)

        norm = CNormalizerDNN(net=net)

        clf = CClassifierMulticlassOVA(
            classifier=CClassifierSVM, preprocess=norm)

        self.logger.info("Testing last layer")

        clf.fit(self.ds.X, self.ds.Y)

        y_pred, scores = clf.predict(
            self.ds.X, return_decision_function=True)
        self.logger.info("TRUE:\n{:}".format(self.ds.Y.tolist()))
        self.logger.info("Predictions:\n{:}".format(y_pred.tolist()))
        self.logger.info("Scores:\n{:}".format(scores))

        x = self.ds.X[0, :]

        self.logger.info("Testing last layer gradient")

        for c in self.ds.classes:
            self.logger.info("Gradient w.r.t. class {:}".format(c))

            grad = clf.grad_f_x(x, y=c)

            self.logger.info("Output of grad_f_x:\n{:}".format(grad))

            check_grad_val = CFunction(
                clf.decision_function, clf.grad_f_x).check_grad(
                    x, y=c, epsilon=1e-1)
            self.logger.info(
                "norm(grad - num_grad): %s", str(check_grad_val))
            self.assertLess(check_grad_val, 1e-3)

            self.assertTrue(grad.is_vector_like)
            self.assertEqual(x.size, grad.size)

        layer = 'linear1'
        norm.out_layer = layer

        self.logger.info("Testing layer {:}".format(norm.out_layer))

        clf.fit(self.ds.X, self.ds.Y)

        y_pred, scores = clf.predict(
            self.ds.X, return_decision_function=True)
        self.logger.info("TRUE:\n{:}".format(self.ds.Y.tolist()))
        self.logger.info("Predictions:\n{:}".format(y_pred.tolist()))
        self.logger.info("Scores:\n{:}".format(scores))

        self.logger.info("Testing 'linear1' layer gradient")
        grad = clf.grad_f_x(x, y=0)  # y is required for multiclassova
        self.logger.info("Output of grad_f_x:\n{:}".format(grad))

        self.assertTrue(grad.is_vector_like)
        self.assertEqual(x.size, grad.size)
Exemplo n.º 25
0
class TestCLossCrossEntropy(CUnitTest):
    """Unittests for CLossCrossEntropy and softmax."""

    def setUp(self):
        self.ds = CDLRandom(n_classes=3, n_samples=50, random_state=0,
                            n_informative=3).load()

        self.logger.info("Fit an SVM and classify dataset...")
        self.ova = CClassifierMulticlassOVA(CClassifierSVM)
        self.ova.fit(self.ds.X, self.ds.Y)
        self.labels, self.scores = self.ova.predict(
            self.ds.X, return_decision_function=True)

    def test_in_out(self):
        """Unittest for input and output to CCrossEntropy"""

        def _check_loss(l, n_samples):
            self.assertIsInstance(l, CArray)
            self.assertTrue(l.isdense)
            self.assertEqual(1, l.ndim)
            self.assertEqual(n_samples, l.size)
            self.assertIsSubDtype(l.dtype, float)

        loss_class = CLossCrossEntropy()

        loss = loss_class.loss(self.ds.Y, self.scores)
        loss_mean = loss.mean()
        self.logger.info(
            "{:}.loss(y_true, scores).mean():\n{:}".format(
                loss_class.__class__.__name__, loss_mean))
        _check_loss(loss, self.ds.Y.size)

        loss = loss_class.loss(self.ds.Y[0], self.scores[0, :])
        loss_mean = loss.mean()
        self.logger.info(
            "{:}.loss(y_true[0], scores[0,:]).mean():\n{:}".format(
                loss_class.__class__.__name__, loss_mean))
        _check_loss(loss, 1)

    def test_grad(self):
        """Compare analytical gradients with its numerical approximation."""
        def _loss_wrapper(scores, loss, true_labels):
            return loss.loss(true_labels, scores)

        loss_class = CLossCrossEntropy()

        y_true = CArray.randint(0, 2, 1)
        score = CArray.randn((1, 3))

        self.logger.info("Y_TRUE: {:} SCORES: {:}".format(y_true, score))

        for pos_label in (None, 0, 1, 2):
            self.logger.info("POS_LABEL: {:}".format(pos_label))

            # real value of the gradient on x
            grad = loss_class.dloss(y_true, score, pos_label)

            self.logger.info("GRAD: {:}".format(grad))

            approx = CFunction(_loss_wrapper).approx_fprime(
                score, eps, loss_class, y_true)
            self.logger.info("APPROX (FULL): {:}".format(approx))

            pos_label = pos_label if pos_label is not None else y_true.item()
            approx = approx[pos_label]

            self.logger.info("APPROX (POS_LABEL): {:}".format(approx))

            check_grad_val = (grad - approx).norm()

            self.logger.info("Gradient difference between analytical svm "
                             "gradient and numerical gradient: %s",
                             str(check_grad_val))
            self.assertLess(check_grad_val, 1e-4,
                            "the gradient is wrong {:}".format(check_grad_val))
class TestCSoftmax(CUnitTest):
    """Unittests for CSoftmax."""
    def setUp(self):
        self.ds = CDLRandom(n_classes=3,
                            n_samples=50,
                            random_state=0,
                            n_informative=3).load()

        self.logger.info("Fit an SVM and classify dataset...")
        self.ova = CClassifierMulticlassOVA(CClassifierSVM)
        self.ova.fit(self.ds)
        self.labels, self.scores = self.ova.predict(
            self.ds.X, return_decision_function=True)

    def test_softmax(self):
        """Unittests for softmax function."""
        from sklearn.utils.extmath import softmax as softmax_sk

        sm = CSoftmax().softmax(self.scores)
        sm_sk = softmax_sk(self.scores.tondarray())

        self.logger.info("Our softmax.max():\n{:}".format(sm.max()))
        self.logger.info("SKlearn softmax.max():\n{:}".format(sm_sk.max()))

        self.assertFalse((sm.round(4) != CArray(sm_sk).round(4)).any())

        self.logger.info("Testing a single point...")

        sm = CSoftmax().softmax(self.scores[0, :])
        sm_sk = softmax_sk(self.scores[0, :].tondarray())

        self.logger.info("Our softmax.max():\n{:}".format(sm.max()))
        self.logger.info("SKlearn softmax.max():\n{:}".format(sm_sk.max()))

        self.assertFalse((sm.round(4) != CArray(sm_sk).round(4)).any())

    def test_softmax_gradient(self):
        """Unittests for softmax gradient:
           Compare analytical gradients with its numerical approximation."""

        self.softmax = CSoftmax()

        def _sigma_pos_label(s, y):
            """
            Compute the sigmoid for the scores in s and return the i-th
            element of the vector that contains the results

            Parameters
            ----------
            s: CArray
                scores
            pos_label: index of the considered score into the vector

            Returns
            -------
            softmax: CArray
            """
            softmax = self.softmax.softmax(s).ravel()
            return softmax[y]

        score = self.scores[0, :]

        for pos_label in (0, 1, 2):
            self.logger.info("POS_LABEL: {:}".format(pos_label))

            # real value of the gradient on x
            grad = self.softmax.gradient(score, pos_label)

            self.logger.info("ANALITICAL GRAD: {:}".format(grad))

            approx = CFunction(_sigma_pos_label).approx_fprime(
                score, 1e-5, pos_label)

            self.logger.info("NUMERICAL GRADIENT: {:}".format(approx))

            check_grad_val = (grad - approx).norm()

            self.logger.info(
                "The norm of the difference bettween the "
                "analytical and the numerical gradient is: %s",
                str(check_grad_val))
            self.assertLess(check_grad_val, 1e-4,
                            "the gradient is wrong {:}".format(check_grad_val))
# Normalize the data
from secml.ml.features import CNormalizerMinMax
nmz = CNormalizerMinMax()
tr.X = nmz.fit_transform(tr.X)
ts.X = nmz.transform(ts.X)

# Metric to use for training and performance evaluation
from secml.ml.peval.metrics import CMetricAccuracy
metric = CMetricAccuracy()

# Creation of the multiclass classifier
from secml.ml.classifiers import CClassifierSVM
from secml.ml.classifiers.multiclass import CClassifierMulticlassOVA
from secml.ml.kernel import CKernelRBF
clf = CClassifierMulticlassOVA(CClassifierSVM, kernel=CKernelRBF())

# Parameters for the Cross-Validation procedure
xval_params = {'C': [1e-2, 0.1, 1], 'kernel.gamma': [10, 100, 1e3]}

# Let's create a 3-Fold data splitter
from secml.data.splitter import CDataSplitterKFold
xval_splitter = CDataSplitterKFold(num_folds=3, random_state=random_state)

# Select and set the best training parameters for the classifier
print("Estimating the best training parameters...")
best_params = clf.estimate_parameters(
    dataset=tr,
    parameters=xval_params,
    splitter=xval_splitter,
    metric='accuracy',
    def test_gradient(self):
        """Unittests for gradient() function."""
        multiclass = CClassifierMulticlassOVA(classifier=CClassifierSVM,
                                              class_weight='balanced')

        i = 5  # Sample to test

        self.logger.info("Testing with dense data...")
        ds = self.dataset.todense()
        multiclass.fit(ds)

        pattern = ds.X[i, :]

        # Compare with numerical gradient
        grads_d = self._test_gradient_numerical(multiclass, pattern)

        # Check if we can return the i_th classifier
        for i in range(multiclass.num_classifiers):

            ova_grad = multiclass._binary_classifiers[i].grad_f_x(pattern)

            gradient = multiclass.grad_f_x(pattern, y=i)
            self.logger.info("Gradient of {:}^th sub-clf is:\n{:}".format(
                i, gradient))

            self.assert_array_equal(gradient, ova_grad)

        self.logger.info("Testing with sparse data...")
        ds = self.dataset.tosparse()
        multiclass.fit(ds)

        pattern = ds.X[i, :]

        # Compare with numerical gradient
        grads_s = self._test_gradient_numerical(multiclass, pattern)

        # Compare dense gradients with sparse gradients
        for grad_i, grad in enumerate(grads_d):
            self.assert_array_almost_equal(grad.atleast_2d(), grads_s[grad_i])

        # Test error raise
        with self.assertRaises(ValueError):
            multiclass.grad_f_x(pattern, y=-1)
        with self.assertRaises(ValueError):
            multiclass.grad_f_x(pattern, y=100)
    def test_set(self):

        from secml.ml.kernels import CKernelRBF
        multiclass = CClassifierMulticlassOVA(classifier=CClassifierSVM,
                                              C=1,
                                              kernel=CKernelRBF())
        # Test set before training
        multiclass.set_params({'C': 100, 'kernel.gamma': 20})
        for clf in multiclass._binary_classifiers:
            self.assertEqual(clf.C, 100.0)
            self.assertEqual(clf.kernel.gamma, 20.0)

        # Restoring kernel
        multiclass.set('kernel', CKernelRBF(gamma=50))

        # Setting different parameter in single trained_classifiers
        multiclass.prepare(num_classes=4)
        different_c = (10, 20, 30, 40)
        multiclass.set('C', different_c)
        different_gamma = (50, 60, 70, 80)
        multiclass.set('kernel.gamma', different_gamma)

        # Fit multiclass classifier than test set after training
        multiclass.fit(self.dataset)

        for clf_idx, clf in enumerate(multiclass._binary_classifiers):
            self.assertEqual(clf.C, different_c[clf_idx])
            self.assertEqual(clf.kernel.gamma, different_gamma[clf_idx])

        # Test set after training
        multiclass.set_params({'C': 30, 'kernel.gamma': 200})
        for clf in multiclass._binary_classifiers:
            self.assertEqual(clf.C, 30.0)
            self.assertEqual(clf.kernel.gamma, 200.0)

        for clf in multiclass._binary_classifiers:
            self.assertEqual(clf.C, 30.0)
            self.assertEqual(clf.kernel.gamma, 200.0)

        # Setting parameter in single trained_classifiers
        multiclass._binary_classifiers[0].kernel.gamma = 300
        for i in range(1, multiclass.num_classifiers):
            self.assertNotEqual(multiclass._binary_classifiers[i].kernel.gamma,
                                300.0)

        # Setting different parameter in single trained_classifiers
        different_c = (100, 200, 300)

        # ValueError is raised as not enough binary classifiers are available
        with self.assertRaises(ValueError):
            multiclass.set('C', different_c)

        multiclass.prepare(num_classes=3)
        multiclass.set('C', different_c)
        for clf_idx, clf in enumerate(multiclass._binary_classifiers):
            self.assertEqual(clf.C, different_c[clf_idx])
Exemplo n.º 30
0
class TestCAttackEvasionPGDLSMNIST(CAttackEvasionTestCases):
    """Unittests for CAttackEvasionPGDLS on MULTICLASS dataset."""
    def setUp(self):

        import numpy as np
        np.random.seed(12345678)

        # generate synthetic data
        self.ds = CDLRandom(n_classes=3,
                            n_features=2,
                            n_redundant=0,
                            n_clusters_per_class=1,
                            class_sep=1,
                            random_state=0).load()

        # Add a new class modifying one of the existing clusters
        self.ds.Y[(self.ds.X[:, 0] > 0).logical_and(
            self.ds.X[:, 1] > 1).ravel()] = self.ds.num_classes

        # self.kernel = None
        self.kernel = CKernelRBF(gamma=10)

        # Data normalization
        self.normalizer = CNormalizerMinMax()
        self.ds.X = self.normalizer.fit_transform(self.ds.X)

        self.multiclass = CClassifierMulticlassOVA(classifier=CClassifierSVM,
                                                   class_weight='balanced',
                                                   preprocess=None,
                                                   kernel=self.kernel)
        self.multiclass.verbose = 0

        # Training and classification
        self.multiclass.fit(self.ds.X, self.ds.Y)

        self.y_pred, self.score_pred = self.multiclass.predict(
            self.ds.X, return_decision_function=True)

    def test_indiscriminate(self):
        """Test indiscriminate evasion."""

        self.y_target = None
        self.logger.info("Test indiscriminate evasion ")

        expected_x = CArray([0.1783, 0.6249])
        self._test_evasion_multiclass(expected_x)

    def test_targeted(self):
        """Test targeted evasion."""

        self.y_target = 2
        self.logger.info("Test target evasion "
                         "(with target class {:}) ".format(self.y_target))

        expected_x = CArray([0.9347, 0.3976])
        self._test_evasion_multiclass(expected_x)

    def _test_evasion_multiclass(self, expected_x):

        # EVASION
        self.multiclass.verbose = 2

        if self.normalizer is not None:
            lb = self.normalizer.feature_range[0]
            ub = self.normalizer.feature_range[1]
        else:
            lb = None
            ub = None

        dmax = 2

        self.solver_params = {'eta': 1e-1, 'eta_min': 1.0}

        eva = CAttackEvasionPGDLS(classifier=self.multiclass,
                                  surrogate_classifier=self.multiclass,
                                  surrogate_data=self.ds,
                                  distance='l2',
                                  dmax=dmax,
                                  lb=lb,
                                  ub=ub,
                                  solver_params=self.solver_params,
                                  y_target=self.y_target)

        eva.verbose = 0  # 2

        # Points from class 2 region
        # p_idx = 0

        # Points from class 1 region
        # p_idx = 68

        # Points from class 3 region
        p_idx = 1  # Wrong classified point
        # p_idx = 53  # Evasion goes up usually

        # Points from class 0 region
        # p_idx = 49  # Wrong classified point
        # p_idx = 27  # Correctly classified point

        x0 = self.ds.X[p_idx, :]
        y0 = self.ds.Y[p_idx].item()

        x_seq = CArray.empty((0, x0.shape[1]))
        scores = CArray([])
        f_seq = CArray([])

        x = x0
        for d_idx, d in enumerate(range(0, dmax + 1)):

            self.logger.info("Evasion at dmax: " + str(d))

            eva.dmax = d
            x, f_opt = eva._run(x0=x0, y0=y0, x_init=x)
            y_pred, score = self.multiclass.predict(
                x, return_decision_function=True)
            f_seq = f_seq.append(f_opt)
            # not considering all iterations, just values at dmax
            # for all iterations, you should bring eva.x_seq and eva.f_seq
            x_seq = x_seq.append(x, axis=0)

            s = score[:, y0 if self.y_target is None else self.y_target]

            scores = scores.append(s)

        self.logger.info("Predicted label after evasion: " + str(y_pred))
        self.logger.info("Score after evasion: {:}".format(s))
        self.logger.info("Objective function after evasion: {:}".format(f_opt))

        # Compare optimal point with expected
        self.assert_array_almost_equal(eva.x_opt.todense().ravel(),
                                       expected_x,
                                       decimal=4)

        self._make_plots(x_seq, dmax, eva, x0, scores, f_seq)

    def _make_plots(self, x_seq, dmax, eva, x0, scores, f_seq):

        if self.make_figures is False:
            self.logger.debug("Skipping figures...")
            return

        fig = CFigure(height=9, width=10, markersize=6, fontsize=12)

        # Get plot bounds, taking into account ds and evaded point path
        bounds_x, bounds_y = self.ds.get_bounds()
        min_x, max_x = bounds_x
        min_y, max_y = bounds_y
        min_x = min(min_x, x_seq[:, 0].min())
        max_x = max(max_x, x_seq[:, 0].max())
        min_y = min(min_y, x_seq[:, 1].min())
        max_y = max(max_y, x_seq[:, 1].max())
        ds_bounds = [(min_x, max_x), (min_y, max_y)]

        # Plotting multiclass decision regions
        fig.subplot(2, 2, 1)
        fig = self._plot_decision_function(fig, plot_background=True)

        fig.sp.plot_path(x_seq,
                         path_style='-',
                         start_style='o',
                         start_facecolor='w',
                         start_edgewidth=2,
                         final_style='o',
                         final_facecolor='k',
                         final_edgewidth=2)

        # plot distance constraint
        fig.sp.plot_fun(func=self._rescaled_distance,
                        multipoint=True,
                        plot_background=False,
                        n_grid_points=20,
                        levels_color='k',
                        grid_limits=ds_bounds,
                        levels=[0],
                        colorbar=False,
                        levels_linewidth=2.0,
                        levels_style=':',
                        alpha_levels=.4,
                        c=x0,
                        r=dmax)

        fig.sp.grid(linestyle='--', alpha=.5, zorder=0)

        # Plotting multiclass evasion objective function
        fig.subplot(2, 2, 2)

        fig = self._plot_decision_function(fig)

        fig.sp.plot_fgrads(eva._objective_function_gradient,
                           grid_limits=ds_bounds,
                           n_grid_points=20,
                           color='k',
                           alpha=.5)

        fig.sp.plot_path(x_seq,
                         path_style='-',
                         start_style='o',
                         start_facecolor='w',
                         start_edgewidth=2,
                         final_style='o',
                         final_facecolor='k',
                         final_edgewidth=2)

        # plot distance constraint
        fig.sp.plot_fun(func=self._rescaled_distance,
                        multipoint=True,
                        plot_background=False,
                        n_grid_points=20,
                        levels_color='w',
                        grid_limits=ds_bounds,
                        levels=[0],
                        colorbar=False,
                        levels_style=':',
                        levels_linewidth=2.0,
                        alpha_levels=.5,
                        c=x0,
                        r=dmax)

        fig.sp.plot_fun(lambda z: eva._objective_function(z),
                        multipoint=True,
                        grid_limits=ds_bounds,
                        colorbar=False,
                        n_grid_points=20,
                        plot_levels=False)

        fig.sp.grid(linestyle='--', alpha=.5, zorder=0)

        fig.subplot(2, 2, 3)
        if self.y_target is not None:
            fig.sp.title("Classifier Score for Target Class (Targ. Evasion)")
        else:
            fig.sp.title("Classifier Score for True Class (Indiscr. Evasion)")
        fig.sp.plot(scores)

        fig.sp.grid()
        fig.sp.xlim(0, dmax)
        fig.sp.xlabel("dmax")

        fig.subplot(2, 2, 4)
        fig.sp.title("Objective Function")
        fig.sp.plot(f_seq)

        fig.sp.grid()
        fig.sp.xlim(0, dmax)
        fig.sp.xlabel("dmax")

        fig.tight_layout()

        k_name = self.kernel.class_type if self.kernel is not None else 'lin'
        fig.savefig(
            fm.join(
                self.images_folder,
                "pgd_ls_multiclass_{:}c_kernel-{:}_target-{:}.pdf".format(
                    self.ds.num_classes, k_name, self.y_target)))

    def _rescaled_distance(self, x, c, r):
        """Rescale distance for plot."""
        if self.normalizer is not None:
            c = self.normalizer.inverse_transform(c)
            x = self.normalizer.inverse_transform(x)
        constr = CConstraintL2(center=c, radius=r)
        return x.apply_along_axis(constr.constraint, axis=1)

    def _get_style(self):
        """Define the style vector for the different classes."""
        if self.ds.num_classes == 3:
            styles = [('b', 'o', '-'), ('g', 'p', '--'), ('r', 's', '-.')]
        elif self.ds.num_classes == 4:
            styles = [('saddlebrown', 'o', '-'), ('g', 'p', '--'),
                      ('y', 's', '-.'), ('gray', 'D', '--')]
        else:
            styles = [('saddlebrown', 'o', '-'), ('g', 'p', '--'),
                      ('y', 's', '-.'), ('gray', 'D', '--'), ('c', '-.'),
                      ('m', '-'), ('y', '-.')]

        return styles

    def _plot_decision_function(self, fig, plot_background=False):
        """Plot the decision function of a multiclass classifier."""
        fig.sp.title('{:} ({:})'.format(self.multiclass.__class__.__name__,
                                        self.multiclass.classifier.__name__))

        x_bounds, y_bounds = self.ds.get_bounds()

        styles = self._get_style()

        for c_idx, c in enumerate(self.ds.classes):
            fig.sp.scatter(self.ds.X[self.ds.Y == c, 0],
                           self.ds.X[self.ds.Y == c, 1],
                           s=20,
                           c=styles[c_idx][0],
                           edgecolors='k',
                           facecolors='none',
                           linewidths=1,
                           label='c {:}'.format(c))

        # Plotting multiclass decision function
        fig.sp.plot_fun(lambda x: self.multiclass.predict(x),
                        multipoint=True,
                        cmap='Set2',
                        grid_limits=self.ds.get_bounds(offset=5),
                        colorbar=False,
                        n_grid_points=300,
                        plot_levels=True,
                        plot_background=plot_background,
                        levels=[-1, 0, 1, 2],
                        levels_color='gray',
                        levels_style='--')

        fig.sp.xlim(x_bounds[0] - .05, x_bounds[1] + .05)
        fig.sp.ylim(y_bounds[0] - .05, y_bounds[1] + .05)

        fig.sp.legend(loc=9, ncol=5, mode="expand", handletextpad=.1)

        return fig