示例#1
0
    def load(self):
        """Loads the dataset.

        Returns
        -------
        dataset : CDataset
            The randomly generated dataset.

        """
        patterns = CArray.randint(2, shape=(self.n_samples, self.n_features))
        labels = CArray.randint(2, shape=(1, self.n_samples))
        return CDataset(patterns, labels)
示例#2
0
    def test_grad(self):
        """Compare analytical gradients with its numerical approximation."""
        def _loss_wrapper(scores, loss, true_labels):
            return loss.loss(true_labels, scores)

        def _dloss_wrapper(scores, loss, true_labels):
            return loss.dloss(true_labels, scores)

        for loss_id in ('hinge', 'hinge-squared', 'square', 'log'):
            self.logger.info("Creating loss: {:}".format(loss_id))
            loss_class = CLoss.create(loss_id)

            n_elemes = 1
            y_true = CArray.randint(0, 2, n_elemes).todense()
            score = CArray.randn((n_elemes, ))

            check_grad_val = CFunction(
                _loss_wrapper, _dloss_wrapper).check_grad(score,
                                                          1e-8,
                                                          loss=loss_class,
                                                          true_labels=y_true)
            self.logger.info(
                "Gradient difference between analytical svm "
                "gradient and numerical gradient: %s", str(check_grad_val))
            self.assertLess(
                check_grad_val, 1e-4,
                "the gradient is wrong {:} for {:} loss".format(
                    check_grad_val, loss_id))
示例#3
0
    def test_randint(self):
        """Test for CArray.randint() classmethod."""
        self.logger.info("Test for CArray.randint() classmethod.")

        for inter in [1, 2, (0, 1), (0, 2), (1, 3)]:
            for shape in [1, 2, (1, 2), (2, 1), (2, 2)]:
                for sparse in [False, True]:
                    if not isinstance(inter, tuple):
                        res = CArray.randint(inter, shape=shape, sparse=sparse)
                    else:
                        res = CArray.randint(*inter,
                                             shape=shape,
                                             sparse=sparse)
                    self.logger.info(
                        "CArray.randint({:}, shape={:}, sparse={:}):"
                        "\n{:}".format(inter, shape, sparse, res))

                    self.assertIsInstance(res, CArray)
                    self.assertEqual(res.isdense, not sparse)
                    self.assertEqual(res.issparse, sparse)
                    if isinstance(shape, tuple):
                        self.assertEqual(res.shape, shape)
                    else:
                        if sparse is True:
                            self.assertEqual(res.shape, (1, shape))
                        else:
                            self.assertEqual(res.shape, (shape, ))
                    self.assertIsSubDtype(res.dtype, int)

                # Checking intervals
                if not isinstance(inter, tuple):
                    self.assertFalse((res < 0).any())
                    self.assertFalse((res >= inter).any())
                else:
                    if inter[0] > 0:
                        # Comparing a sparse matrix with a scalar greater
                        # than zero using < is inefficient, use >=
                        self.assertTrue((res >= inter[0]).all())
                    else:
                        self.assertFalse((res < inter[0]).any())
                    self.assertFalse((res >= inter[1]).any())
示例#4
0
    def test_linear_l1_discrete_10d(self):
        """Test evasion of a linear classifier (10 features)
        using L1 distance (discrete).
        In this test we set few features to the same value to cover a
        special case of the l1 projection, where there are multiple
        features with the same max value. The optimizer should change
        one of them at each iteration.
        """

        eta = 0.5
        sparse = True
        seed = 10

        ds, clf = self._prepare_linear_svm_10d(sparse, seed)

        ds = self._discretize_data(ds, eta)

        evasion_params = {
            "classifier": clf,
            "double_init_ds": ds,
            "distance": 'l1',
            "dmax": 5,
            "lb": -2,
            "ub": 2,
            "attack_classes": CArray([1]),
            "y_target": 0,
            "solver_params": {
                "eta": eta,
                "eta_min": None,
                "eta_max": None
            }
        }

        evas, x0, y0 = self._set_evasion(ds, evasion_params)

        # Set few features to the same max value
        w_new = clf.w.deepcopy()
        w_new[CArray.randint(
            clf.w.size, shape=3, random_state=seed)] = clf.w.max()
        clf._w = w_new

        # Expected final optimal point
        # CAttackEvasionPGDExp uses CLineSearchBisectProj
        # which brings the point outside of the grid
        expected_x = \
            CArray([-1.8333, -1.8333, 1.8333, 0, -0.5, 0, 0.5, -0.5, 1, 0.5])
        expected_y = 0

        self._run_evasion(evas, x0, y0, expected_x, expected_y)
示例#5
0
    def test_attack_pgd_ls_discrete(self):
        """Test SecEval using CAttackEvasionPGDLS on a problematic
        discrete case with L1 constraint.
        We alter the classifier so that many weights have the same value.
        The optimizer should be able to evade the classifier anyway,
        by changing one feature each iteration. Otherwise, by changing
        all the feature with the same value at once, the evasion will always
        fail because the L1 constraint will be violated.
        """
        self.ds = self._discretize_data(self.ds, eta=1)
        self.ds.X[self.ds.X > 1] = 1
        self.ds.X[self.ds.X < -1] = -1

        self.tr = self.ds[:self.n_tr, :]
        self.ts = self.ds[self.n_tr:, :]

        self.clf.fit(self.tr.X, self.tr.Y)

        # Set few features to the same max value
        w_new = self.clf.w.deepcopy()
        w_new[CArray.randint(
            self.clf.w.size, shape=5, random_state=0)] = self.clf.w.max()
        self.clf._w = w_new

        params = {
            "classifier": self.clf,
            "double_init": False,
            "distance": 'l1',
            "lb": -1,
            "ub": 1,
            "y_target": None,
            "solver_params": {'eta': 1, 'eps': 1e-2}
        }
        attack = CAttackEvasionPGDLS(**params)
        attack.verbose = 1

        param_name = 'dmax'

        self._set_and_run(attack, param_name, dmax_step=1)
示例#6
0
    def test_grad(self):
        """Compare analytical gradients with its numerical approximation."""
        def _loss_wrapper(scores, loss, true_labels):
            return loss.loss(true_labels, scores)

        loss_class = CLossCrossEntropy()

        y_true = CArray.randint(0, 2, 1)
        score = CArray.randn((1, 3))

        self.logger.info("Y_TRUE: {:} SCORES: {:}".format(y_true, score))

        for pos_label in (None, 0, 1, 2):
            self.logger.info("POS_LABEL: {:}".format(pos_label))

            # real value of the gradient on x
            grad = loss_class.dloss(y_true, score, pos_label)

            self.logger.info("GRAD: {:}".format(grad))

            approx = CFunction(_loss_wrapper).approx_fprime(
                score, eps, loss_class, y_true)
            self.logger.info("APPROX (FULL): {:}".format(approx))

            pos_label = pos_label if pos_label is not None else y_true.item()
            approx = approx[pos_label]

            self.logger.info("APPROX (POS_LABEL): {:}".format(approx))

            check_grad_val = (grad - approx).norm()

            self.logger.info("Gradient difference between analytical svm "
                             "gradient and numerical gradient: %s",
                             str(check_grad_val))
            self.assertLess(check_grad_val, 1e-4,
                            "the gradient is wrong {:}".format(check_grad_val))
        def _check_repeat(array):
            self.logger.info("Array:\n{:}".format(array))

            for axis in (None, 0, 1):

                if axis is None or array.ndim < 2:
                    repeats_add = CArray.randint(2, shape=array.size)
                elif axis == 0:
                    repeats_add = CArray.randint(2, shape=array.shape[0])
                elif axis == 1:
                    repeats_add = CArray.randint(2, shape=array.shape[1])
                else:
                    repeats_add = None

                for repeats in (0, 1, 2, repeats_add):

                    with self.assertRaises(TypeError):
                        array.repeat(repeats=np.array([1, 2]), axis=axis)

                    if axis == 1 and array.ndim < 2:
                        # No columns to repeat
                        with self.assertRaises(ValueError):
                            array.repeat(repeats=repeats, axis=axis)
                        continue

                    res = array.repeat(repeats=repeats, axis=axis)
                    self.logger.info("array.repeat({:}, axis={:}):"
                                     "\n{:}".format(repeats, axis, res))

                    self.assertIsInstance(res, CArray)
                    self.assertEqual(res.isdense, array.isdense)
                    self.assertEqual(res.issparse, array.issparse)
                    self.assertEqual(res.dtype, array.dtype)

                    if axis is None or array.ndim < 2:
                        # A flat array is always returned
                        if is_scalar(repeats):
                            repeats_mul = array.size * repeats
                        else:
                            repeats_mul = repeats.sum()
                        self.assertEqual(res.shape, (repeats_mul, ))
                    elif axis == 0:
                        if is_scalar(repeats):
                            repeats_mul = array.shape[0] * repeats
                        else:
                            repeats_mul = repeats.sum()
                        self.assertEqual(res.shape,
                                         (repeats_mul, array.shape[1]))
                    elif axis == 1:
                        if is_scalar(repeats):
                            repeats_mul = array.shape[1] * repeats
                        else:
                            repeats_mul = repeats.sum()
                        self.assertEqual(res.shape,
                                         (array.shape[0], repeats_mul))

                    if is_scalar(repeats):
                        repeats_size = array.size * repeats
                    else:
                        if axis is None or array.ndim < 2:
                            repeats_size = repeats.sum()
                        elif axis == 0:
                            repeats_size = repeats.sum() * array.shape[1]
                        elif axis == 1:
                            repeats_size = repeats.sum() * array.shape[0]
                        else:
                            repeats_size = None
                    self.assertEqual(res.size, repeats_size)

                    if not is_scalar(repeats):
                        repeats = repeats.tondarray()
                    np_res = array.tondarray().repeat(repeats=repeats,
                                                      axis=axis)
                    self.assertFalse((res.tondarray() != np_res).any())