def __init__(self,
                 classifier,
                 step,
                 eps,
                 x_train,
                 y_train,
                 x_val,
                 y_val,
                 max_iter=100,
                 **kwargs):
        """
        Initialize an SVM poisoning attack

        :param classifier: A trained ScikitlearnSVC classifier
        :type classifier: `art.classifiers.scikitlearn.ScikitlearnSVC`
        :param step: The step size of the classifier
        :type step: `float`
        :param eps: The minimum difference in loss before convergence of the classifier
        :type eps: `float`
        :param x_train: The training data used for classification
        :type x_train: `np.ndarray`
        :param y_train: The training labels used for classification
        :type y_train: `np.ndarray`
        :param x_val: The validation data used to test the attack
        :type x_val: `np.ndarray`
        :param y_val: The validation labels used to test the attack
        :type y_val: `np.ndarray`
        :param max_iter: The maximum number of iterations for the attack
        :type max_iter: `int`
        :param kwargs: Extra optional keyword arguments
        """
        # pylint: disable=W0212
        from sklearn.svm import LinearSVC, SVC

        super(PoisoningAttackSVM, self).__init__(classifier)

        if not isinstance(classifier, ScikitlearnSVC):
            raise TypeError("Classifier must be a SVC")
        if isinstance(self.classifier._model, LinearSVC):
            self.classifier = ScikitlearnSVC(
                model=SVC(C=self.classifier._model.C, kernel="linear"),
                clip_values=self.classifier.clip_values)
            self.classifier.fit(x_train, y_train)
        elif not isinstance(self.classifier._model, SVC):
            raise NotImplementedError(
                "Model type '{}' not yet supported".format(
                    type(self.classifier._model)))

        self.step = step
        self.eps = eps
        self.x_train = x_train
        self.y_train = y_train
        self.x_val = x_val
        self.y_val = y_val
        self.max_iter = max_iter
        self.set_params(**kwargs)
    def setUpClass(cls):
        master_seed(seed=1234)
        super().setUpClass()

        cls.sklearn_model = LinearSVC()
        cls.classifier = ScikitlearnSVC(model=cls.sklearn_model)
        cls.classifier.fit(x=cls.x_train_iris, y=cls.y_train_iris)
Exemplo n.º 3
0
    def setUpClass(cls):
        np.random.seed(seed=1234)

        sklearn_model = LinearSVC()
        cls.classifier = ScikitlearnSVC(model=sklearn_model)
        assert (type(cls.classifier) == type(
            SklearnClassifier(model=sklearn_model)))
        cls.classifier.fit(x=x_train, y=y_train)
class PoisoningAttackSVM(Attack):
    """
    Close implementation of poisoning attack on Support Vector Machines (SVM) by Biggio et al.

    | Paper link: https://arxiv.org/pdf/1206.6389.pdf
    """
    attack_params = ['classifier', 'step', 'eps', 'x_train', 'y_train', 'x_val', 'y_val']

    def __init__(self, classifier, step, eps, x_train, y_train, x_val, y_val, max_iter=10, **kwargs):
        """
        Initialize an SVM poisoning attack

        :param classifier: A trained ScikitlearnSVC classifier
        :type classifier: `art.classifiers.scikitlearn.ScikitlearnSVC`
        :param step: The step size of the classifier
        :type step: `float`
        :param eps: The minimum difference in loss before convergence of the classifier
        :type eps: `float`
        :param x_train: The training data used for classification
        :type x_train: `np.ndarray`
        :param y_train: The training labels used for classification
        :type y_train: `np.ndarray`
        :param x_val: The validation data used to test the attack
        :type x_val: `np.ndarray`
        :param y_val: The validation labels used to test the attack
        :type y_val: `np.ndarray`
        :param max_iter: The maximum number of iterations for the attack
        :type max_iter: `int`
        :param kwargs: Extra optional keyword arguments
        """
        # pylint: disable=W0212
        from sklearn.svm import LinearSVC, SVC

        super(PoisoningAttackSVM, self).__init__(classifier)

        if not isinstance(classifier, ScikitlearnSVC):
            raise TypeError('Classifier must be a SVC')
        if isinstance(self.classifier._model, LinearSVC):
            self.classifier = ScikitlearnSVC(model=SVC(C=self.classifier._model.C, kernel='linear'),
                                             clip_values=self.classifier.clip_values)
            self.classifier.fit(x_train, y_train)
        elif not isinstance(self.classifier._model, SVC):
            raise NotImplementedError("Model type '{}' not yet supported".format(type(self.classifier._model)))

        self.step = step
        self.eps = eps
        self.x_train = x_train
        self.y_train = y_train
        self.x_val = x_val
        self.y_val = y_val
        self.max_iter = max_iter
        self.set_params(**kwargs)

    def generate(self, x, y=None, **kwargs):
        """
        Iteratively finds optimal attack points starting at values at x

        :param x: An array with the points that initialize attack points.
        :type x: `np.ndarray`
        :param y: The target labels for
        :return: An array holding the adversarial examples.
        :rtype: `np.ndarray`
        """

        if y is None:
            y_attack = self.classifier.predict(x)
        else:
            y_attack = np.copy(y)

        num_poison = len(x)

        if num_poison == 0:
            raise ValueError("Must input at least one poison point")

        num_features = len(x[0])
        train_data = np.copy(self.x_train)
        train_labels = np.copy(self.y_train)
        all_poison = []

        for attack_point, attack_label in zip(x, y_attack):
            poison = self.generate_attack_point(attack_point, attack_label)
            all_poison.append(poison)
            train_data = np.vstack([train_data, poison])
            train_labels = np.vstack([train_labels, attack_label])

        return np.array(all_poison).reshape((num_poison, num_features))

    def set_params(self, **kwargs):
        """
        Take in a dictionary of parameters and apply attack-specific checks before saving them as attributes.

        :param kwargs: a dictionary of attack-specific parameters
        :type kwargs: `dict`
        :return: `True` when parsing was successful
        """
        super(PoisoningAttackSVM, self).set_params(**kwargs)
        if self.step <= 0:
            raise ValueError("Step size must be strictly positive")
        if self.eps <= 0:
            raise ValueError("Value of eps must be strictly positive")
        if self.max_iter <= 1:
            raise ValueError("Value of max_iter must be strictly positive")

    def generate_attack_point(self, x_attack, y_attack):
        """
        Generate a single poison attack the model, using `x_val` and `y_val` as validation points.
        The attack begins at the point init_attack. The attack class will be the opposite of the model's
        classification for `init_attack`.
        :param x_attack: the initial attack point
        :type x_attack: `np.ndarray`
        :param y_attack: the initial attack label
        :type y_attack: `np.ndarray`
        :return: a tuple containing the final attack point and the poisoned model
        :rtype: (`np.ndarray`, `art.classifiers.ScikitlearnSVC`)
        """
        # pylint: disable=W0212
        from sklearn.preprocessing import normalize

        poisoned_model = self.classifier._model
        y_t = np.argmax(self.y_train, axis=1)
        poisoned_model.fit(self.x_train, y_t)
        y_a = np.argmax(y_attack)
        attack_point = np.expand_dims(x_attack, axis=0)
        var_g = poisoned_model.decision_function(self.x_val)
        k_values = np.where(-var_g > 0)
        new_p = np.sum(var_g[k_values])
        old_p = np.copy(new_p)
        i = 0

        while new_p - old_p < self.eps and i < self.max_iter:
            old_p = new_p
            poisoned_input = np.vstack([self.x_train, attack_point])
            poisoned_labels = np.append(y_t, y_a)
            poisoned_model.fit(poisoned_input, poisoned_labels)

            unit_grad = normalize(self.attack_gradient(attack_point))
            attack_point += self.step * unit_grad
            lower, upper = self.classifier.clip_values
            new_attack = np.clip(attack_point, lower, upper)
            new_g = poisoned_model.decision_function(self.x_val)
            k_values = np.where(-new_g > 0)
            new_p = np.sum(new_g[k_values])
            i += 1
            attack_point = new_attack

        poisoned_input = np.vstack([self.x_train, attack_point])
        poisoned_labels = np.append(y_t, y_a)
        poisoned_model.fit(poisoned_input, poisoned_labels)
        return attack_point

    def predict_sign(self, vec):
        """
        Predicts the inputs by binary classifier and outputs -1 and 1 instead of 0 and 1

        :param vec: an input array
        :type vec: `np.ndarray`
        :return: an array of -1/1 predictions
        :rtype: `np.ndarray`
        """
        # pylint: disable=W0212
        preds = self.classifier._model.predict(vec)
        one = 1
        zero = 0
        signs = np.zeros(preds.shape[0], )
        signs[preds == one] = 1
        signs[preds == zero] = -1
        return signs

    def attack_gradient(self, attack_point):
        """
        Calculates the attack gradient, or ∂P for this attack.
        See equation 8 in Biggio et al. Ch. 14

        :param attack_point: the current attack point
        :type attack_point: `np.ndarray`
        :return: The attack gradient
        :rtype: `np.ndarray`
        """
        # pylint: disable=W0212
        art_model = self.classifier
        model = self.classifier._model
        grad = np.zeros((1, self.x_val.shape[1]))
        support_vectors = model.support_vectors_
        num_support = len(support_vectors)
        support_labels = np.expand_dims(self.predict_sign(support_vectors), axis=1)
        c_idx = np.isin(support_vectors, attack_point).all(axis=1)

        if not c_idx.any():
            return grad

        c_idx = np.where(c_idx == True)[0][0]
        alpha_c = model.dual_coef_[0, c_idx]

        assert support_labels.shape == (num_support, 1)
        qss = art_model.q_submatrix(support_vectors, support_vectors)
        qss_inv = np.linalg.inv(qss + np.random.uniform(0, 0.01 * np.min(qss), (num_support, num_support)))
        zeta = np.matmul(qss_inv, support_labels)
        zeta = np.matmul(support_labels.T, zeta)
        nu_k = np.matmul(qss_inv, support_labels)

        for x_k, y_k in zip(self.x_val, self.y_val):
            y_k = np.expand_dims(np.argmax(y_k), axis=0)

            q_ks = art_model.q_submatrix(np.array([x_k]), support_vectors)
            m_k = (1.0 / zeta) * np.matmul(q_ks, zeta * qss_inv - np.matmul(nu_k, nu_k.T)) + np.matmul(y_k, nu_k.T)
            d_q_sc = np.fromfunction(lambda i: art_model._get_kernel_gradient_sv(i, attack_point),
                                     (len(support_vectors),), dtype=int)
            d_q_kc = art_model._kernel_grad(x_k, attack_point)
            grad += (np.matmul(m_k, d_q_sc) + d_q_kc) * alpha_c

        return grad
Exemplo n.º 5
0
    def setUpClass(cls):
        master_seed(301)
        (x_train, y_train), (x_test, y_test), min_, max_ = load_mnist()
        y_train = np.argmax(y_train, axis=1)
        y_test = np.argmax(y_test, axis=1)
        zero_or_four = np.logical_or(y_train == 4, y_train == 0)
        x_train = x_train[zero_or_four]
        y_train = y_train[zero_or_four]
        tr_labels = np.zeros((y_train.shape[0], 2))
        tr_labels[y_train == 0] = np.array([1, 0])
        tr_labels[y_train == 4] = np.array([0, 1])
        y_train = tr_labels

        zero_or_four = np.logical_or(y_test == 4, y_test == 0)
        x_test = x_test[zero_or_four]
        y_test = y_test[zero_or_four]
        te_labels = np.zeros((y_test.shape[0], 2))
        te_labels[y_test == 0] = np.array([1, 0])
        te_labels[y_test == 4] = np.array([0, 1])
        y_test = te_labels

        n_samples_train = x_train.shape[0]
        n_features_train = x_train.shape[1] * x_train.shape[2] * x_train.shape[3]
        n_samples_test = x_test.shape[0]
        n_features_test = x_test.shape[1] * x_test.shape[2] * x_test.shape[3]

        x_train = x_train.reshape(n_samples_train, n_features_train)
        x_test = x_test.reshape(n_samples_test, n_features_test)
        x_train = x_train[:NB_TRAIN]
        y_train = y_train[:NB_TRAIN]

        trusted_data = x_test[:NB_TRUSTED]
        trusted_labels = y_test[:NB_TRUSTED]
        x_test = x_test[NB_TRUSTED:]
        y_test = y_test[NB_TRUSTED:]
        valid_data = x_test[:NB_VALID]
        valid_labels = y_test[:NB_VALID]
        x_test = x_test[NB_VALID:]
        y_test = y_test[NB_VALID:]

        clean_prov = np.random.randint(NB_DEVICES - 1, size=x_train.shape[0])
        p_train = np.eye(NB_DEVICES)[clean_prov]

        no_defense = ScikitlearnSVC(model=SVC(kernel=kernel), clip_values=(min_, max_))
        no_defense.fit(x=x_train, y=y_train)
        poison_points = np.random.randint(no_defense._model.support_vectors_.shape[0], size=NB_POISON)
        all_poison_init = np.copy(no_defense._model.support_vectors_[poison_points])
        poison_labels = np.array([1, 1]) - no_defense.predict(all_poison_init)

        svm_attack = PoisoningAttackSVM(classifier=no_defense, x_train=x_train, y_train=y_train,
                                        step=0.1, eps=1.0, x_val=valid_data, y_val=valid_labels, max_iters=200)

        poisoned_data = svm_attack.generate(all_poison_init, y=poison_labels)

        # Stack on poison to data and add provenance of bad actor
        all_data = np.vstack([x_train, poisoned_data])
        all_labels = np.vstack([y_train, poison_labels])
        poison_prov = np.zeros((NB_POISON, NB_DEVICES))
        poison_prov[:, NB_DEVICES - 1] = 1
        all_p = np.vstack([p_train, poison_prov])

        model = SVC(kernel=kernel)
        cls.mnist = (all_data, all_labels, all_p), (x_test, y_test), (trusted_data, trusted_labels), \
                    (valid_data, valid_labels), (min_, max_)
        cls.classifier = SklearnClassifier(model=model, clip_values=(min_, max_))

        cls.classifier.fit(all_data, all_labels)
        cls.defence_trust = ProvenanceDefense(cls.classifier, all_data, all_labels, all_p,
                                              x_val=trusted_data, y_val=trusted_labels, eps=0.1)
        cls.defence_no_trust = ProvenanceDefense(cls.classifier, all_data, all_labels, all_p, eps=0.1)
Exemplo n.º 6
0
    def setUpClass(cls):
        np.random.seed(seed=1234)

        cls.sklearn_model = LinearSVC()
        cls.classifier = ScikitlearnSVC(model=cls.sklearn_model)
        cls.classifier.fit(x=x_train, y=y_train)
Exemplo n.º 7
0
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(10, activation='softmax')
])

# model.compile(optimizer='adam',
#               loss='sparse_categorical_crossentropy',
#               metrics=['accuracy'])
#
# model.fit(x_train, y_train, epochs=3)
#
# loss_test, accuracy_test = model.evaluate(x_test, y_test)
# print('Accuracy on test data: {:4.2f}%'.format(accuracy_test * 100))
#
# classifier = KerasClassifier(model=model, clip_values=(0, 1))
classsifierSVC = ScikitlearnSVC(model=model_svc, clip_values=(0, 1))


# attack_PGD = ProjectedGradientDescent(classifier=classifier)
# attack_PSVM = PoisoningAttackSVM(classifier=classsifierSVC, eps=.3, step=.1, x_train=x_train, y_train=y_train, x_val=x_test[101:200], y_val=None)
attack_PSVM = PoisoningAttackSVM(classifier=classsifierSVC, eps=.3, step=.1, x_train=x_train_svc, y_train=y_train, x_val=x_test[101:200], y_val=None)
#
# attack_SMM = SaliencyMapMethod(classifier=classifier)
# attack_STran = SpatialTransformation(classifier=classifier)

# test_PGD = attack_PGD.generate(x_test)
# test_PSVM = attack_PSVM.generate(x_test)
test_PSVM = attack_PSVM.generate(x_test_svc)

# test_SMM = attack_SMM.generate(x_test)
# test_STran = attack_STran.generate(x_test)