Exemplo n.º 1
0
    def _to_art_classifier(
        classifier: Union[tf.keras.Model, torch.nn.Module],
        nb_classes: int,
        input_shape: Tuple[int, ...],
    ) -> Union[TensorFlowV2Classifier, PyTorchClassifier]:
        """Converts a classifier to an ART classifier.

        :param classifier: Classifier to be converted. Either a Pytorch or Tensorflow classifier.
        :param nb_classes: Number of classes that were used to train the classifier.
        :param input_shape: Input shape of a data point of the classifier.
        :return: Given classifier converted to an ART classifier.
        :raises TypeError: If the given classifier is of an invalid type.
        """
        if isinstance(classifier, torch.nn.Module):
            return PyTorchClassifier(
                model=classifier,
                loss=None,
                nb_classes=nb_classes,
                input_shape=input_shape,
            )
        if isinstance(classifier, tf.keras.Model):
            return TensorFlowV2Classifier(
                model=classifier,
                nb_classes=nb_classes,
                input_shape=input_shape,
            )
        else:
            raise TypeError(
                f"Expected classifier to be an instance of {str(torch.nn.Module)} or {str(tf.keras.Model)}, received {str(type(classifier))} instead."
            )
Exemplo n.º 2
0
def adv_retrain(attack_name, dataset, model_name, nb_epochs=80, batch_size=512, overwrite=False):

    defended_model_path = "{}{}/".format(param.MODEL_DIR, dataset)
    filename = 'adv_' + model_name + '_' + attack_name + '.h5'
    fpath = os.path.join(defended_model_path, filename)

    if os.path.exists(fpath) and not overwrite:
        print("Adversarial defended model is already trained")
        print("please check at: ", fpath)
    else :
        '''adversrial retrain model'''
        x_train, y_train, x_test, y_test = load_data(dataset)

        ## Load keras pretrained model for the specific dataset
        model_path = "{}{}/{}.h5".format(param.MODEL_DIR, dataset, model_name)
        model = load_model(model_path)
        # model.summary()


        labels_true = np.argmax(y_test, axis=1)
        labels_test = np.argmax(model.predict(x_test), axis=1)
        print('Accuracy test set: %.2f%%' % (np.sum(labels_test == labels_true) / x_test.shape[0] * 100))

        classifier_param = param.classifier_params[dataset_name]
        classifier = TensorFlowV2Classifier(model=model, **classifier_param)

        attack_param = param.attack_params[attack_name][dataset_name]
        attack_param["batch_size"] = batch_size
        if attack_name not in [param.FGSM, param.BIM] : ## some attacks don't have verbose parameter, e.g. bim
            attack_param["verbose"] = VERBOSE
        
        attack = call_function_by_attack_name(attack_name)(classifier, **attack_param)
        x_test_pgd = attack.generate(x_test, y_test)
        labels_pgd = np.argmax(classifier.predict(x_test_pgd), axis=1)
        print('Accuracy on original ' + attack_name + ' adversarial samples: %.2f%%' %
            (np.sum(labels_pgd == labels_true) / x_test.shape[0] * 100))

        # Adversarial Training
        trainer = AdversarialTrainer(classifier, attack, ratio=1.0)
        trainer.fit(x_train, y_train, nb_epochs=nb_epochs, batch_size=batch_size)

        # Save model
        classifier.save(filename=filename, path=defended_model_path)
        
        # Evaluate the adversarially trained model on clean test set
        labels_true = np.argmax(y_test, axis=1)
        labels_test = np.argmax(classifier.predict(x_test), axis=1)
        print('Accuracy test set: %.2f%%' % (np.sum(labels_test == labels_true) / x_test.shape[0] * 100))

        # Evaluate the adversarially trained model on original adversarial samples
        labels_pgd = np.argmax(classifier.predict(x_test_pgd), axis=1)
        print('Accuracy on original ' + attack_name + ' adversarial samples: %.2f%%' %
            (np.sum(labels_pgd == labels_true) / x_test.shape[0] * 100))

        # Evaluate the adversarially trained model on fresh adversarial samples produced on the adversarially trained model
        x_test_pgd = attack.generate(x_test, y_test)
        labels_pgd = np.argmax(classifier.predict(x_test_pgd), axis=1)
        print('Accuracy on new ' + attack_name + ' adversarial samples: %.2f%%' % (np.sum(labels_pgd == labels_true) / x_test.shape[0] * 100))
Exemplo n.º 3
0
def fix_make_dummy_model():
    """
    Create a random model for testing
    """
    def get_prediction_model(param_dic):
        """
        Model going from embeddings to predictions so we can easily optimise the embedding malware embedding.
        Needs to have the same structure as the target model.
        Populated here with "standard" parameters.
        """
        inp = tf.keras.layers.Input(shape=(
            param_dic["maxlen"],
            param_dic["embedding_size"],
        ))
        filt = tf.keras.layers.Conv1D(
            filters=128,
            kernel_size=500,
            strides=500,
            use_bias=True,
            activation="relu",
            padding="valid",
            name="filt_layer",
        )(inp)
        attn = tf.keras.layers.Conv1D(
            filters=128,
            kernel_size=500,
            strides=500,
            use_bias=True,
            activation="sigmoid",
            padding="valid",
            name="attn_layer",
        )(inp)
        gated = tf.keras.layers.Multiply()([filt, attn])
        feat = tf.keras.layers.GlobalMaxPooling1D()(gated)
        dense = tf.keras.layers.Dense(128,
                                      activation="relu",
                                      name="dense_layer")(feat)
        output = tf.keras.layers.Dense(1, name="output_layer")(dense)
        return tf.keras.Model(inputs=inp, outputs=output)

    param_dic = {"maxlen": 2**20, "input_dim": 257, "embedding_size": 8}
    prediction_model = get_prediction_model(param_dic)

    model_weights = np.random.normal(loc=0, scale=1.0, size=(257, 8))

    classifier = TensorFlowV2Classifier(
        model=prediction_model,
        nb_classes=2,
        loss_object=tf.keras.losses.BinaryCrossentropy(from_logits=True),
        input_shape=(param_dic["maxlen"], param_dic["embedding_size"]),
    )

    return classifier, model_weights
def boundary_attack_run(model_to_attack, target_image, iterations=100):
    """
    This fonction runs the black box boundary attack

    inputs:
    -model_to_attack (tensorflow Model instance): model that will be attacked
    -target_image (numpy array (32*32)): image that will be attack
    -iterations (int): number of times to run the attack

    output:
    -degree_of_change (dict): keys: the number of the iteration, values: the degree of change
        between target and adversarial image

    """
    classifier = TensorFlowV2Classifier(
        model=model_to_attack,
        input_shape=(32, 32, 3),
        clip_values=(0, 255),
        nb_classes=10,
    )
    final_degree_of_change = {}
    attack = BoundaryAttack(estimator=classifier,
                            targeted=False,
                            max_iter=0,
                            delta=0.001,
                            epsilon=0.01)
    iter_step = 1
    image_list = []
    target = target_image
    x_adv = None
    for i in range(iterations):
        x_adv = attack.generate(x=np.array([target]), x_adv_init=x_adv)

        # clear_output()
        print(
            "Adversarial image at step %d." % (i * iter_step),
            "L2 error",
            np.linalg.norm(np.reshape(x_adv[0] - target, [-1])),
            "and class label %d." % np.argmax(classifier.predict(x_adv)[0]),
        )
        plt.imshow(x_adv[0][..., ::-1].astype("int32"))
        image_list.append(x_adv[0][..., ::-1].astype(np.uint))
        plt.show(block=False)
        final_degree_of_change[i * iter_step] = degree_of_change([x_adv[0]],
                                                                 [target])

        if hasattr(attack, "curr_delta") and hasattr(attack, "curr_epsilon"):
            attack.max_iter = iter_step
            attack.delta = attack.curr_delta
            attack.epsilon = attack.curr_epsilon
        else:
            break
    return final_degree_of_change
Exemplo n.º 5
0
def Deepfool(points=2, steps=0.05):
    from art.attacks.evasion import NewtonFool
    from art.estimators.classification import TensorFlowV2Classifier

    loss_object = tf.keras.losses.SparseCategoricalCrossentropy()

    classifier = TensorFlowV2Classifier(model=model,
                                        nb_classes=10,
                                        input_shape=(28, 28, 1),
                                        loss_object=loss_object,
                                        clip_values=(0, 1),
                                        channels_first=False)

    # Craft adversarial samples with FGSM
    epsilons = [0.2 * i + 0.1 for i in range(points)]  # Maximum perturbation
    preds = np.argmax(classifier.predict(x_test[:1000]), axis=1)
    acc = np.sum(
        preds == np.argmax(y_test[:1000], axis=1)) / y_test[:1000].shape[0]
    print("\nTest accuracy on normal sample: %.2f%% eps: %.2f" %
          (acc * 100, 0))
    accuracies = [acc]
    examples = []
    for epsilon in epsilons[1:]:
        adv_crafter = NewtonFool(classifier)
        x_test_adv = adv_crafter.generate(x=x_test[:1000], y=y_test[:1000])

        # Evaluate the classifier on the adversarial examples
        preds = np.argmax(classifier.predict(x_test_adv), axis=1)
        acc = np.sum(
            preds == np.argmax(y_test[:1000], axis=1)) / y_test[:1000].shape[0]
        print("\nTest accuracy on adversarial sample: %.2f%% eps: %.2f" %
              (acc * 100, epsilon))
        accuracies.append(acc)
        example = []
        preds = np.argmax(classifier.predict(x_test_adv), axis=1)
        labels = np.argmax(y_test[:1000], axis=1)
        for i in range(len(preds)):
            p, l = preds[i], labels[i]
            if p != l:
                orig = l
                adv = p
                ex = x_test_adv[i]
                example.append((orig, adv, ex))
            if len(example) == 5:
                break
        examples.append(example)
    plot_accuracies(epsilons, accuracies)
    plot_examples(epsilons[1:], examples)
Exemplo n.º 6
0
def gen_adv_data(model, x, y, attack_name, dataset_name, batch_size=1024):
    logging.getLogger().setLevel(logging.CRITICAL)
    
    classifier_param = classifier_params[dataset_name]
    classifier = TensorFlowV2Classifier(model=model, **classifier_param)
    
    attack_param = attack_params[attack_name][dataset_name]
    attack_param["batch_size"] = batch_size
    
    if attack_name not in [param.FGSM, param.BIM] : ## some attacks don't have verbose parameter, e.g. bim
        attack_param["verbose"] = VERBOSE
    
    attack = call_function_by_attack_name(attack_name)(classifier, **attack_param)
    
    data_num = x.shape[0]
    adv_x = attack.generate(x=x, y=y)
    
    logging.getLogger().setLevel(logging.INFO)
    return adv_x
    with tf.GradientTape() as tape:
        predictions = model(images, training=True)
        loss = loss_object(labels, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))


model = TensorFlowModel()
loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=True)

# Step 3: Create the ART classifier

classifier = TensorFlowV2Classifier(
    model=model,
    loss_object=loss_object,
    train_step=train_step,
    nb_classes=10,
    input_shape=(28, 28, 1),
    clip_values=(0, 1),
)

# Step 4: Train the ART classifier

classifier.fit(x_train, y_train, batch_size=64, nb_epochs=3)

# Step 5: Evaluate the ART classifier on benign test examples

predictions = classifier.predict(x_test)
accuracy = np.sum(
    np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
print("Accuracy on benign test examples: {}%".format(accuracy * 100))
    def __init__(
        self,
        estimator: "CLASSIFIER_LOSS_GRADIENTS_TYPE",
        norm: Union[int, float, str] = np.inf,
        eps: float = 0.3,
        eps_step: float = 0.1,
        max_iter: int = 100,
        targeted: bool = False,
        nb_random_init: int = 5,
        batch_size: int = 32,
        loss_type: Optional[str] = None,
        verbose: bool = True,
    ):
        """
        Create a :class:`.AutoProjectedGradientDescent` instance.

        :param estimator: An trained estimator.
        :param norm: The norm of the adversarial perturbation. Possible values: "inf", np.inf, 1 or 2.
        :param eps: Maximum perturbation that the attacker can introduce.
        :param eps_step: Attack step size (input variation) at each iteration.
        :param max_iter: The maximum number of iterations.
        :param targeted: Indicates whether the attack is targeted (True) or untargeted (False).
        :param nb_random_init: Number of random initialisations within the epsilon ball. For num_random_init=0
            starting at the original input.
        :param batch_size: Size of the batch on which adversarial samples are generated.
        :param loss_type: Defines the loss to attack. Available options: None (Use loss defined by estimator),
            "cross_entropy", or "difference_logits_ratio"
        :param verbose: Show progress bars.
        """
        from art.estimators.classification import TensorFlowClassifier, TensorFlowV2Classifier, PyTorchClassifier

        if loss_type not in self._predefined_losses:
            raise ValueError(
                "The argument loss_type has an invalid value. The following options for `loss_type` are currently "
                "supported: {}".format(self._predefined_losses)
            )

        if loss_type is None:
            if hasattr(estimator, "predict") and is_probability(
                estimator.predict(x=np.ones(shape=(1, *estimator.input_shape), dtype=np.float32))
            ):
                raise ValueError(
                    "AutoProjectedGradientDescent is expecting logits as estimator output, the provided "
                    "estimator seems to predict probabilities."
                )

            estimator_apgd = estimator
        else:
            if isinstance(estimator, TensorFlowClassifier):
                import tensorflow as tf

                if loss_type == "cross_entropy":
                    if is_probability(estimator.predict(x=np.ones(shape=(1, *estimator.input_shape)))):
                        raise NotImplementedError("Cross-entropy loss is not implemented for probability output.")

                    self._loss_object = tf.reduce_mean(
                        tf.keras.losses.categorical_crossentropy(
                            y_pred=estimator._output, y_true=estimator._labels_ph, from_logits=True
                        )
                    )

                elif loss_type == "difference_logits_ratio":
                    if is_probability(estimator.predict(x=np.ones(shape=(1, *estimator.input_shape)))):
                        raise ValueError(
                            "The provided estimator seems to predict probabilities. "
                            "If loss_type='difference_logits_ratio' the estimator has to to predict logits."
                        )

                    raise ValueError(
                        "The loss `difference_logits_ratio` has not been validate completely. It seems that the "
                        "commented implemented below is failing to selected the second largest logit for cases "
                        "where the largest logit is the true logit. For future work `difference_logits_ratio` and "
                        "loss_fn should return the same loss value."
                    )

                    # def difference_logits_ratio(y_true, y_pred):
                    #     i_y_true = tf.cast(tf.math.argmax(tf.cast(y_true, tf.int32), axis=1), tf.int32)
                    #     i_y_pred_arg = tf.argsort(y_pred, axis=1)
                    #     # Not completely sure if the following line is correct.
                    #     # `i_y_pred_arg[:, -2], i_y_pred_arg[:, -1]` seems closer to the output of `loss_fn` than
                    #     # `i_y_pred_arg[:, -1], i_y_pred_arg[:, -2]`
                    #     i_z_i = tf.where(i_y_pred_arg[:, -1] != i_y_true[:], i_y_pred_arg[:, -2],
                    #                      i_y_pred_arg[:, -1])
                    #
                    #     z_1 = tf.gather(y_pred, i_y_pred_arg[:, -1], axis=1, batch_dims=0)
                    #     z_3 = tf.gather(y_pred, i_y_pred_arg[:, -3], axis=1, batch_dims=0)
                    #     z_i = tf.gather(y_pred, i_z_i, axis=1, batch_dims=0)
                    #     z_y = tf.gather(y_pred, i_y_true, axis=1, batch_dims=0)
                    #
                    #     z_1 = tf.linalg.diag_part(z_1)
                    #     z_3 = tf.linalg.diag_part(z_3)
                    #     z_i = tf.linalg.diag_part(z_i)
                    #     z_y = tf.linalg.diag_part(z_y)
                    #
                    #     dlr = -(z_y - z_i) / (z_1 - z_3)
                    #
                    #     return tf.reduce_mean(dlr)
                    #
                    # def loss_fn(y_true, y_pred):
                    #     i_y_true = np.argmax(y_true, axis=1)
                    #     i_y_pred_arg = np.argsort(y_pred, axis=1)
                    #     i_z_i = np.where(i_y_pred_arg[:, -1] != i_y_true[:], i_y_pred_arg[:, -1],
                    #                      i_y_pred_arg[:, -2])
                    #
                    #     z_1 = y_pred[:, i_y_pred_arg[:, -1]]
                    #     z_3 = y_pred[:, i_y_pred_arg[:, -3]]
                    #     z_i = y_pred[:, i_z_i]
                    #     z_y = y_pred[:, i_y_true]
                    #
                    #     z_1 = np.diag(z_1)
                    #     z_3 = np.diag(z_3)
                    #     z_i = np.diag(z_i)
                    #     z_y = np.diag(z_y)
                    #
                    #     dlr = -(z_y - z_i) / (z_1 - z_3)
                    #
                    #     return np.mean(dlr)
                    #
                    # self._loss_fn = loss_fn
                    # self._loss_object = difference_logits_ratio(y_true=estimator._labels_ph,
                    #                                             y_pred=estimator._output)

                estimator_apgd = TensorFlowClassifier(
                    input_ph=estimator._input_ph,
                    output=estimator._output,
                    labels_ph=estimator._labels_ph,
                    train=estimator._train,
                    loss=self._loss_object,
                    learning=estimator._learning,
                    sess=estimator._sess,
                    channels_first=estimator.channels_first,
                    clip_values=estimator.clip_values,
                    preprocessing_defences=estimator.preprocessing_defences,
                    postprocessing_defences=estimator.postprocessing_defences,
                    preprocessing=estimator.preprocessing,
                    feed_dict=estimator._feed_dict,
                )

            elif isinstance(estimator, TensorFlowV2Classifier):
                import tensorflow as tf

                if loss_type == "cross_entropy":
                    if is_probability(estimator.predict(x=np.ones(shape=(1, *estimator.input_shape)))):
                        self._loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=False)
                    else:
                        self._loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
                elif loss_type == "difference_logits_ratio":
                    if is_probability(estimator.predict(x=np.ones(shape=(1, *estimator.input_shape)))):
                        raise ValueError(
                            "The provided estimator seems to predict probabilities. "
                            "If loss_type='difference_logits_ratio' the estimator has to to predict logits."
                        )

                    class difference_logits_ratio:
                        def __init__(self):
                            self.reduction = "mean"

                        def __call__(self, y_true, y_pred):
                            i_y_true = tf.cast(tf.math.argmax(tf.cast(y_true, tf.int32), axis=1), tf.int32)
                            i_y_pred_arg = tf.argsort(y_pred, axis=1)
                            i_z_i_list = list()

                            for i in range(y_true.shape[0]):
                                if i_y_pred_arg[i, -1] != i_y_true[i]:
                                    i_z_i_list.append(i_y_pred_arg[i, -1])
                                else:
                                    i_z_i_list.append(i_y_pred_arg[i, -2])

                            i_z_i = tf.stack(i_z_i_list)

                            z_1 = tf.gather(y_pred, i_y_pred_arg[:, -1], axis=1, batch_dims=0)
                            z_3 = tf.gather(y_pred, i_y_pred_arg[:, -3], axis=1, batch_dims=0)
                            z_i = tf.gather(y_pred, i_z_i, axis=1, batch_dims=0)
                            z_y = tf.gather(y_pred, i_y_true, axis=1, batch_dims=0)

                            z_1 = tf.linalg.diag_part(z_1)
                            z_3 = tf.linalg.diag_part(z_3)
                            z_i = tf.linalg.diag_part(z_i)
                            z_y = tf.linalg.diag_part(z_y)

                            dlr = -(z_y - z_i) / (z_1 - z_3)

                            return tf.reduce_mean(dlr)

                    self._loss_fn = difference_logits_ratio()
                    self._loss_object = difference_logits_ratio()

                estimator_apgd = TensorFlowV2Classifier(
                    model=estimator.model,
                    nb_classes=estimator.nb_classes,
                    input_shape=estimator.input_shape,
                    loss_object=self._loss_object,
                    train_step=estimator._train_step,
                    channels_first=estimator.channels_first,
                    clip_values=estimator.clip_values,
                    preprocessing_defences=estimator.preprocessing_defences,
                    postprocessing_defences=estimator.postprocessing_defences,
                    preprocessing=estimator.preprocessing,
                )
            elif isinstance(estimator, PyTorchClassifier):
                import torch

                if loss_type == "cross_entropy":
                    if is_probability(
                        estimator.predict(x=np.ones(shape=(1, *estimator.input_shape), dtype=np.float32))
                    ):
                        raise ValueError(
                            "The provided estimator seems to predict probabilities. If loss_type='cross_entropy' "
                            "the estimator has to to predict logits."
                        )

                    self._loss_object = torch.nn.CrossEntropyLoss(reduction="mean")
                elif loss_type == "difference_logits_ratio":
                    if is_probability(
                        estimator.predict(x=np.ones(shape=(1, *estimator.input_shape), dtype=ART_NUMPY_DTYPE))
                    ):
                        raise ValueError(
                            "The provided estimator seems to predict probabilities. "
                            "If loss_type='difference_logits_ratio' the estimator has to to predict logits."
                        )

                    class difference_logits_ratio:
                        def __init__(self):
                            self.reduction = "mean"

                        def __call__(self, y_pred, y_true):  # type: ignore
                            if isinstance(y_true, np.ndarray):
                                y_true = torch.from_numpy(y_true)
                            if isinstance(y_pred, np.ndarray):
                                y_pred = torch.from_numpy(y_pred)

                            y_true = y_true.float()

                            i_y_true = torch.argmax(y_true, axis=1)
                            i_y_pred_arg = torch.argsort(y_pred, axis=1)
                            i_z_i_list = list()

                            for i in range(y_true.shape[0]):
                                if i_y_pred_arg[i, -1] != i_y_true[i]:
                                    i_z_i_list.append(i_y_pred_arg[i, -1])
                                else:
                                    i_z_i_list.append(i_y_pred_arg[i, -2])

                            i_z_i = torch.stack(i_z_i_list)

                            z_1 = y_pred[:, i_y_pred_arg[:, -1]]
                            z_3 = y_pred[:, i_y_pred_arg[:, -3]]
                            z_i = y_pred[:, i_z_i]
                            z_y = y_pred[:, i_y_true]

                            z_1 = torch.diagonal(z_1)
                            z_3 = torch.diagonal(z_3)
                            z_i = torch.diagonal(z_i)
                            z_y = torch.diagonal(z_y)

                            dlr = -(z_y - z_i) / (z_1 - z_3)

                            return torch.mean(dlr.float())

                    self._loss_object = difference_logits_ratio()

                estimator_apgd = PyTorchClassifier(
                    model=estimator.model,
                    loss=self._loss_object,
                    input_shape=estimator.input_shape,
                    nb_classes=estimator.nb_classes,
                    optimizer=None,
                    channels_first=estimator.channels_first,
                    clip_values=estimator.clip_values,
                    preprocessing_defences=estimator.preprocessing_defences,
                    postprocessing_defences=estimator.postprocessing_defences,
                    preprocessing=estimator.preprocessing,
                    device_type=estimator._device,
                )

            else:
                raise ValueError("The loss type {} is not supported for the provided estimator.".format(loss_type))

        super().__init__(estimator=estimator_apgd)
        self.norm = norm
        self.eps = eps
        self.eps_step = eps_step
        self.max_iter = max_iter
        self.targeted = targeted
        self.nb_random_init = nb_random_init
        self.batch_size = batch_size
        self.loss_type = loss_type
        self.verbose = verbose
        self._check_params()
Exemplo n.º 9
0
def natual(eps):
    # Step 1: Load the MNIST dataset

    (x_train,
     y_train), (x_test,
                y_test), min_pixel_value, max_pixel_value = load_mnist()

    # Step 2: Create the model

    import tensorflow as tf
    from tensorflow.keras import Model
    from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPool2D

    class TensorFlowModel(Model):
        """
        Standard TensorFlow model for unit testing.
        """
        def __init__(self):
            super(TensorFlowModel, self).__init__()
            self.conv1 = Conv2D(filters=4, kernel_size=5, activation="relu")
            self.conv2 = Conv2D(filters=10, kernel_size=5, activation="relu")
            self.maxpool = MaxPool2D(pool_size=(2, 2),
                                     strides=(2, 2),
                                     padding="valid",
                                     data_format=None)
            self.flatten = Flatten()
            self.dense1 = Dense(100, activation="relu")
            self.logits = Dense(10, activation="linear")

        def call(self, x):
            """
            Call function to evaluate the model.
            :param x: Input to the model
            :return: Prediction of the model
            """
            x = self.conv1(x)
            x = self.maxpool(x)
            x = self.conv2(x)
            x = self.maxpool(x)
            x = self.flatten(x)
            x = self.dense1(x)
            x = self.logits(x)
            return x

    optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)

    def train_step(model, images, labels):
        with tf.GradientTape() as tape:
            predictions = model(images, training=True)
            loss = loss_object(labels, predictions)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    model = TensorFlowModel()
    loss_object = tf.keras.losses.CategoricalCrossentropy(from_logits=True)

    # Step 3: Create the ART classifier

    classifier = TensorFlowV2Classifier(
        model=model,
        loss_object=loss_object,
        train_step=train_step,
        nb_classes=10,
        input_shape=(28, 28, 1),
        clip_values=(0, 1),
    )

    # Step 4: Train the ART classifier

    classifier.fit(x_train, y_train, batch_size=64, nb_epochs=10)

    # Step 5: Evaluate the ART classifier on benign test examples

    predictions = classifier.predict(x_test)
    accuracy = np.sum(
        np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(
            y_test)
    print("Accuracy on benign test examples: {}%".format(accuracy * 100))

    # Step 6: Generate adversarial test examples
    attack = ProjectedGradientDescent(estimator=classifier,
                                      eps=eps,
                                      eps_step=eps / 3,
                                      max_iter=20)
    x_test_adv = attack.generate(x=x_test)

    # Step 7: Evaluate the ART classifier on adversarial test examples

    predictions = classifier.predict(x_test_adv)
    accuracy = np.sum(
        np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(
            y_test)
    print("Accuracy on adversarial test examples: {}%".format(accuracy * 100))
Exemplo n.º 10
0
def main():
    args = parse_option()
    print(args)

    # check args
    if args.loss not in LOSS_NAMES:
        raise ValueError('Unsupported loss function type {}'.format(args.loss))

    if args.optimizer == 'adam':
        optimizer1 = tf.keras.optimizers.Adam(lr=args.lr_1)
    elif args.optimizer == 'lars':
        from lars_optimizer import LARSOptimizer
        # not compatible with tf2
        optimizer1 = LARSOptimizer(
            args.lr_1,
            exclude_from_weight_decay=['batch_normalization', 'bias'])
    elif args.optimizer == 'sgd':
        optimizer1 = tfa.optimizers.SGDW(learning_rate=args.lr_1,
                                         momentum=0.9,
                                         weight_decay=1e-4)
    optimizer2 = tf.keras.optimizers.Adam(lr=args.lr_2)

    model_name = '{}_model-bs_{}-lr_{}'.format(args.loss, args.batch_size_1,
                                               args.lr_1)

    # 0. Load data
    if args.data == 'mnist':
        mnist = tf.keras.datasets.mnist
    elif args.data == 'fashion_mnist':
        mnist = tf.keras.datasets.fashion_mnist
    print('Loading {} data...'.format(args.data))
    (_, y_train), (_, y_test) = mnist.load_data()
    # x_train, x_test = x_train / 255.0, x_test / 255.0
    # x_train = x_train.reshape(-1, 28*28).astype(np.float32)
    # x_test = x_test.reshape(-1, 28*28).astype(np.float32)
    (x_train, _), (x_test, _), _, _ = load_mnist()
    # print(x_train[0][0])
    print(x_train.shape, x_test.shape)

    # simulate low data regime for training
    # n_train = x_train.shape[0]
    # shuffle_idx = np.arange(n_train)
    # np.random.shuffle(shuffle_idx)

    # x_train = x_train[shuffle_idx][:args.n_data_train]
    # y_train = y_train[shuffle_idx][:args.n_data_train]
    # print('Training dataset shapes after slicing:')
    print(x_train.shape, y_train.shape)

    train_ds = tf.data.Dataset.from_tensor_slices(
        (x_train, y_train)).shuffle(5000).batch(args.batch_size_1)

    train_ds2 = tf.data.Dataset.from_tensor_slices(
        (x_train, y_train)).shuffle(5000).batch(args.batch_size_2)

    test_ds = tf.data.Dataset.from_tensor_slices(
        (x_test, y_test)).batch(args.batch_size_1)

    # 1. Stage 1: train encoder with multiclass N-pair loss
    encoder = Encoder(normalize=True, activation=args.activation)
    projector = Projector(args.projection_dim,
                          normalize=True,
                          activation=args.activation)

    if args.loss == 'max_margin':

        def loss_func(z, y):
            return losses.max_margin_contrastive_loss(z,
                                                      y,
                                                      margin=args.margin,
                                                      metric=args.metric)
    elif args.loss == 'npairs':
        loss_func = losses.multiclass_npairs_loss
    elif args.loss == 'sup_nt_xent':

        def loss_func(z, y):
            return losses.supervised_nt_xent_loss(
                z,
                y,
                temperature=args.temperature,
                base_temperature=args.base_temperature)
    elif args.loss.startswith('triplet'):
        triplet_kind = args.loss.split('-')[1]

        def loss_func(z, y):
            return losses.triplet_loss(z,
                                       y,
                                       kind=triplet_kind,
                                       margin=args.margin)

    train_loss = tf.keras.metrics.Mean(name='train_loss')
    test_loss = tf.keras.metrics.Mean(name='test_loss')

    # tf.config.experimental_run_functions_eagerly(True)
    @tf.function
    # train step for the contrastive loss
    def train_step_stage1(x, y):
        '''
        x: data tensor, shape: (batch_size, data_dim)
        y: data labels, shape: (batch_size, )
        '''
        with tf.GradientTape() as tape:
            r = encoder(x, training=True)
            z = projector(r, training=True)
            # print("z", z, "y", y)
            loss = loss_func(z, y)

        gradients = tape.gradient(
            loss, encoder.trainable_variables + projector.trainable_variables)
        optimizer1.apply_gradients(
            zip(gradients,
                encoder.trainable_variables + projector.trainable_variables))
        train_loss(loss)

    @tf.function
    def test_step_stage1(x, y):
        r = encoder(x, training=False)
        z = projector(r, training=False)
        t_loss = loss_func(z, y)
        test_loss(t_loss)

    print('Stage 1 training ...')
    for epoch in range(args.epoch):
        # Reset the metrics at the start of the next epoch
        train_loss.reset_states()
        test_loss.reset_states()

        for x, y in train_ds:
            train_step_stage1(x, y)

        for x_te, y_te in test_ds:
            test_step_stage1(x_te, y_te)

        template = 'Epoch {}, Loss: {}, Test Loss: {}'
        # print(template.format(epoch + 1,
        #                       train_loss.result(),
        #                       test_loss.result()))

    if args.draw_figures:
        # projecting data with the trained encoder, projector
        x_tr_proj = projector(encoder(x_train))
        x_te_proj = projector(encoder(x_test))
        # convert tensor to np.array
        x_tr_proj = x_tr_proj.numpy()
        x_te_proj = x_te_proj.numpy()
        print(x_tr_proj.shape, x_te_proj.shape)

        # check learned embedding using PCA
        pca = PCA(n_components=2)
        pca.fit(x_tr_proj)
        x_te_proj_pca = pca.transform(x_te_proj)

        x_te_proj_pca_df = pd.DataFrame(x_te_proj_pca, columns=['PC1', 'PC2'])
        x_te_proj_pca_df['label'] = y_test
        # PCA scatter plot
        fig, ax = plt.subplots()
        ax = sns.scatterplot('PC1',
                             'PC2',
                             data=x_te_proj_pca_df,
                             palette='tab10',
                             hue='label',
                             linewidth=0,
                             alpha=0.6,
                             ax=ax)

        box = ax.get_position()
        ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
        ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
        title = 'Data: {}\nEmbedding: {}\nbatch size: {}; LR: {}'.format(
            args.data, LOSS_NAMES[args.loss], args.batch_size_1, args.lr_1)
        ax.set_title(title)
        fig.savefig('figs/PCA_plot_{}_{}_embed.png'.format(
            args.data, model_name))

        # density plot for PCA
        g = sns.jointplot('PC1', 'PC2', data=x_te_proj_pca_df, kind="hex")
        plt.subplots_adjust(top=0.95)
        g.fig.suptitle(title)

        g.savefig('figs/Joint_PCA_plot_{}_{}_embed.png'.format(
            args.data, model_name))

    # Stage 2: freeze the learned representations and then learn a classifier
    # on a linear layer using a softmax loss
    softmax = SoftmaxPred()

    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_acc = tf.keras.metrics.SparseCategoricalAccuracy(name='train_ACC')
    test_loss = tf.keras.metrics.Mean(name='test_loss')
    test_acc = tf.keras.metrics.SparseCategoricalAccuracy(name='test_ACC')

    cce_loss_obj = tf.keras.losses.SparseCategoricalCrossentropy(
        from_logits=True)

    @tf.function
    # train step for the 2nd stage
    def train_step(model, x, y):
        '''
        x: data tensor, shape: (batch_size, data_dim)
        y: data labels, shape: (batch_size, )
        '''
        with tf.GradientTape() as tape:
            r = model.layers[0](x, training=False)
            y_preds = model.layers[1](r, training=True)
            loss = cce_loss_obj(y, y_preds)

        # freeze the encoder, only train the softmax layer
        gradients = tape.gradient(loss, model.layers[1].trainable_variables)
        optimizer2.apply_gradients(
            zip(gradients, model.layers[1].trainable_variables))
        train_loss(loss)
        train_acc(y, y_preds)

    @tf.function
    def test_step(x, y):
        r = encoder(x, training=False)
        y_preds = softmax(r, training=False)
        t_loss = cce_loss_obj(y, y_preds)
        test_loss(t_loss)
        test_acc(y, y_preds)

    if args.write_summary:
        current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
        train_log_dir = 'logs/{}/{}/{}/train'.format(model_name, args.data,
                                                     current_time)
        test_log_dir = 'logs/{}/{}/{}/test'.format(model_name, args.data,
                                                   current_time)
        train_summary_writer = tf.summary.create_file_writer(train_log_dir)
        test_summary_writer = tf.summary.create_file_writer(test_log_dir)

    print('Stage 2 training ...')
    model = tf.keras.Sequential([encoder, softmax])
    loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
        from_logits=True)

    classifier = TensorFlowV2Classifier(
        model=model,
        loss_object=loss_object,
        train_step=train_step,
        nb_classes=10,
        input_shape=(28, 28, 1),
        clip_values=(0, 1),
    )

    # classifier.fit(x_train, y_train, batch_size=256, nb_epochs=20)

    for epoch in range(args.epoch):
        # Reset the metrics at the start of the next epoch
        train_loss.reset_states()
        train_acc.reset_states()
        test_loss.reset_states()
        test_acc.reset_states()

        for x, y in train_ds2:
            train_step(model, x, y)

        if args.write_summary:
            with train_summary_writer.as_default():
                tf.summary.scalar('loss', train_loss.result(), step=epoch)
                tf.summary.scalar('accuracy', train_acc.result(), step=epoch)

        for x_te, y_te in test_ds:
            test_step(x_te, y_te)

        if args.write_summary:
            with test_summary_writer.as_default():
                tf.summary.scalar('loss', test_loss.result(), step=epoch)
                tf.summary.scalar('accuracy', test_acc.result(), step=epoch)

        template = 'Epoch {}, Loss: {}, Acc: {}, Test Loss: {}, Test Acc: {}'
        print(
            template.format(epoch + 1, train_loss.result(),
                            train_acc.result() * 100, test_loss.result(),
                            test_acc.result() * 100))

    predictions = classifier.predict(x_test)
    print(predictions.shape, y_test.shape)
    accuracy = np.sum(np.argmax(predictions, axis=1) == y_test) / len(y_test)
    print("Accuracy on benign test examples: {}%".format(accuracy * 100))

    print('Stage 3 attacking ...')

    attack = ProjectedGradientDescent(estimator=classifier,
                                      eps=args.eps,
                                      eps_step=args.eps / 3,
                                      max_iter=20)
    x_test_adv = attack.generate(x=x_test)

    print('Stage 4 attacking ...')

    predictions = classifier.predict(x_test_adv)
    accuracy = np.sum(np.argmax(predictions, axis=1) == y_test) / len(y_test)
    print("Accuracy on adversarial test examples: {}%".format(accuracy * 100))

    natual(args.eps)
Exemplo n.º 11
0
    train_data, train_labels = np.split(train.to_numpy(), [25], axis=1)
    test_data, test_labels = np.split(test.to_numpy(), [25], axis=1)
    non_encoded_test_labels = tf.argmax(test_labels, axis=1)
    non_encoded_train_labels = tf.argmax(train_labels,axis=1)
    
    print("Evaluating clean samples on clean model...\n")
    _, orig_acc = new_model.evaluate(test_data, test_labels, verbose=1)
    logger.info("Classifier with original training")
    logger.info("Accuracy on clean test samples: %.2f%%", (orig_acc * 100))
    logger.info("="*50)
    

    print("Creating classifier...\n")
    adv_classifier = TensorFlowV2Classifier(model=new_model,
                                            loss_object=loss_object,
                                            train_step=train_step,
                                            nb_classes=5,
                                            input_shape=(1,25),
                                            clip_values=(0, 1))
    
    
    print("Creating adversarial attack object...\n")
    fgsm = FastGradientMethod(adv_classifier, 
                              norm=np.inf, 
                              eps=eps, 
                              eps_step=0.001, 
                              targeted=False, 
                              batch_size=2048, 
                              num_random_init=27)


    print("Generating adversarial samples...\n")
Exemplo n.º 12
0
    def __init__(
        self,
        estimator: "CLASSIFIER_LOSS_GRADIENTS_TYPE",
        norm: Union[int, float, str] = np.inf,
        eps: float = 0.3,
        eps_step: float = 0.1,
        max_iter: int = 100,
        targeted: bool = False,
        nb_random_init: int = 5,
        batch_size: int = 32,
        loss_type: Optional[str] = None,
    ):
        """
        Create a :class:`.AutoProjectedGradientDescent` instance.

        :param estimator: An trained estimator.
        :param norm: The norm of the adversarial perturbation. Possible values: "inf", np.inf, 1 or 2.
        :param eps: Maximum perturbation that the attacker can introduce.
        :param eps_step: Attack step size (input variation) at each iteration.
        :param max_iter: The maximum number of iterations.
        :param targeted: Indicates whether the attack is targeted (True) or untargeted (False).
        :param nb_random_init: Number of random initialisations within the epsilon ball. For num_random_init=0
            starting at the original input.
        :param batch_size: Size of the batch on which adversarial samples are generated.
        """
        from art.estimators.classification import TensorFlowClassifier, TensorFlowV2Classifier, PyTorchClassifier

        if isinstance(estimator, TensorFlowClassifier):
            import tensorflow as tf

            if loss_type == "cross_entropy":
                if is_probability(
                        estimator.predict(x=np.ones(
                            shape=(1, *estimator.input_shape)))):
                    raise NotImplementedError(
                        "Cross-entropy loss is not implemented for probability output."
                    )
                else:
                    self._loss_object = tf.reduce_mean(
                        tf.keras.losses.categorical_crossentropy(
                            y_pred=estimator._output,
                            y_true=estimator._labels_ph,
                            from_logits=True))

                    def loss_fn(y_true, y_pred):
                        y_pred_norm = y_pred - np.amax(
                            y_pred, axis=1, keepdims=True)
                        loss_value = -(y_true * y_pred_norm - np.log(
                            np.sum(np.exp(y_pred_norm), axis=1,
                                   keepdims=True)))
                        return np.mean(loss_value)

                    self._loss_fn = loss_fn
            elif loss_type == "difference_logits_ratio":
                if is_probability(
                        estimator.predict(x=np.ones(
                            shape=(1, *estimator.input_shape)))):
                    raise ValueError(
                        "The provided estimator seems to predict probabilities. If loss_type='difference_logits_ratio' "
                        "the estimator has to to predict logits.")
                else:

                    def difference_logits_ratio(y_true, y_pred):
                        i_y_true = tf.cast(
                            tf.math.argmax(tf.cast(y_true, tf.int32), axis=1),
                            tf.int32)
                        i_y_pred_arg = tf.argsort(y_pred, axis=1)
                        i_z_i = tf.where(i_y_pred_arg[:, -1] != i_y_true[:],
                                         i_y_pred_arg[:, -2], i_y_pred_arg[:,
                                                                           -1])

                        z_1 = tf.gather(y_pred,
                                        i_y_pred_arg[:, -1],
                                        axis=1,
                                        batch_dims=0)
                        z_3 = tf.gather(y_pred,
                                        i_y_pred_arg[:, -3],
                                        axis=1,
                                        batch_dims=0)
                        z_i = tf.gather(y_pred, i_z_i, axis=1, batch_dims=0)
                        z_y = tf.gather(y_pred, i_y_true, axis=1, batch_dims=0)

                        z_1 = tf.linalg.diag_part(z_1)
                        z_3 = tf.linalg.diag_part(z_3)
                        z_i = tf.linalg.diag_part(z_i)
                        z_y = tf.linalg.diag_part(z_y)

                        dlr = -(z_y - z_i) / (z_1 - z_3)

                        return tf.reduce_mean(dlr)

                    def loss_fn(y_true, y_pred):
                        i_y_true = np.argmax(y_true, axis=1)
                        i_y_pred_arg = np.argsort(y_pred, axis=1)
                        i_z_i = np.where(i_y_pred_arg[:, -1] != i_y_true[:],
                                         i_y_pred_arg[:, -1], i_y_pred_arg[:,
                                                                           -2])

                        z_1 = y_pred[:, i_y_pred_arg[:, -1]]
                        z_3 = y_pred[:, i_y_pred_arg[:, -3]]
                        z_i = y_pred[:, i_z_i]
                        z_y = y_pred[:, i_y_true]

                        z_1 = np.diag(z_1)
                        z_3 = np.diag(z_3)
                        z_i = np.diag(z_i)
                        z_y = np.diag(z_y)

                        dlr = -(z_y - z_i) / (z_1 - z_3)

                        return np.mean(dlr)

                    self._loss_fn = loss_fn
                    self._loss_object = difference_logits_ratio(
                        y_true=estimator._labels_ph, y_pred=estimator._output)
            elif loss_type is None:
                self._loss_object = estimator._loss_object
            else:
                raise ValueError(
                    "The argument loss_type has an invalid value. The following options for loss_type are "
                    "supported: {}".format(
                        [None, "cross_entropy", "difference_logits_ratio"]))

            estimator_apgd = TensorFlowClassifier(
                input_ph=estimator._input_ph,
                output=estimator._output,
                labels_ph=estimator._labels_ph,
                train=estimator._train,
                loss=self._loss_object,
                learning=estimator._learning,
                sess=estimator._sess,
                channels_first=estimator.channels_first,
                clip_values=estimator.clip_values,
                preprocessing_defences=estimator.preprocessing_defences,
                postprocessing_defences=estimator.postprocessing_defences,
                preprocessing=estimator.preprocessing,
                feed_dict=estimator._feed_dict,
            )

        elif isinstance(estimator, TensorFlowV2Classifier):
            import tensorflow as tf

            if loss_type == "cross_entropy":
                if is_probability(
                        estimator.predict(x=np.ones(
                            shape=(1, *estimator.input_shape)))):
                    self._loss_object = tf.keras.losses.CategoricalCrossentropy(
                        from_logits=False)
                    self._loss_fn = self._loss_object
                else:
                    self._loss_object = tf.keras.losses.CategoricalCrossentropy(
                        from_logits=True)
                    self._loss_fn = self._loss_object
            elif loss_type == "difference_logits_ratio":
                if is_probability(
                        estimator.predict(x=np.ones(
                            shape=(1, *estimator.input_shape)))):
                    raise ValueError(
                        "The provided estimator seems to predict probabilities. If loss_type='difference_logits_ratio' "
                        "the estimator has to to predict logits.")
                else:

                    def difference_logits_ratio(y_true, y_pred):
                        i_y_true = tf.cast(
                            tf.math.argmax(tf.cast(y_true, tf.int32), axis=1),
                            tf.int32)
                        i_y_pred_arg = tf.argsort(y_pred, axis=1)
                        i_z_i_list = list()

                        for i in range(y_true.shape[0]):
                            if i_y_pred_arg[i, -1] != i_y_true[i]:
                                i_z_i_list.append(i_y_pred_arg[i, -1])
                            else:
                                i_z_i_list.append(i_y_pred_arg[i, -2])

                        i_z_i = tf.stack(i_z_i_list)

                        z_1 = tf.gather(y_pred,
                                        i_y_pred_arg[:, -1],
                                        axis=1,
                                        batch_dims=0)
                        z_3 = tf.gather(y_pred,
                                        i_y_pred_arg[:, -3],
                                        axis=1,
                                        batch_dims=0)
                        z_i = tf.gather(y_pred, i_z_i, axis=1, batch_dims=0)
                        z_y = tf.gather(y_pred, i_y_true, axis=1, batch_dims=0)

                        z_1 = tf.linalg.diag_part(z_1)
                        z_3 = tf.linalg.diag_part(z_3)
                        z_i = tf.linalg.diag_part(z_i)
                        z_y = tf.linalg.diag_part(z_y)

                        dlr = -(z_y - z_i) / (z_1 - z_3)

                        return tf.reduce_mean(dlr)

                    self._loss_fn = difference_logits_ratio
                    self._loss_object = difference_logits_ratio
            elif loss_type is None:
                self._loss_object = estimator._loss_object
            else:
                raise ValueError(
                    "The argument loss_type has an invalid value. The following options for loss_type are "
                    "supported: {}".format(
                        [None, "cross_entropy", "difference_logits_ratio"]))

            estimator_apgd = TensorFlowV2Classifier(
                model=estimator.model,
                nb_classes=estimator.nb_classes,
                input_shape=estimator.input_shape,
                loss_object=self._loss_object,
                train_step=estimator._train_step,
                channels_first=estimator.channels_first,
                clip_values=estimator.clip_values,
                preprocessing_defences=estimator.preprocessing_defences,
                postprocessing_defences=estimator.postprocessing_defences,
                preprocessing=estimator.preprocessing,
            )
        elif isinstance(estimator, PyTorchClassifier):
            import torch

            if loss_type == "cross_entropy":
                if is_probability(
                        estimator.predict(
                            x=np.ones(shape=(1, *estimator.input_shape),
                                      dtype=np.float32))):
                    raise ValueError(
                        "The provided estimator seems to predict probabilities. If loss_type='cross_entropy' "
                        "the estimator has to to predict logits.")
                else:

                    def loss_fn(y_true, y_pred):
                        return torch.nn.CrossEntropyLoss()(
                            torch.from_numpy(y_pred),
                            torch.from_numpy(np.argmax(y_true, axis=1)))

                    self._loss_fn = loss_fn
                    self._loss_object = torch.nn.CrossEntropyLoss()
            elif loss_type == "difference_logits_ratio":
                if is_probability(
                        estimator.predict(
                            x=np.ones(shape=(1, *estimator.input_shape),
                                      dtype=ART_NUMPY_DTYPE))):
                    raise ValueError(
                        "The provided estimator seems to predict probabilities. If loss_type='difference_logits_ratio' "
                        "the estimator has to to predict logits.")
                else:

                    # def difference_logits_ratio(y_true, y_pred):
                    def difference_logits_ratio(y_pred,
                                                y_true):  # type: ignore
                        if isinstance(y_true, np.ndarray):
                            y_true = torch.from_numpy(y_true)
                        if isinstance(y_pred, np.ndarray):
                            y_pred = torch.from_numpy(y_pred)

                        y_true = y_true.float()

                        # dlr = torch.mean((y_pred - y_true) ** 2)
                        # return loss

                        i_y_true = torch.argmax(y_true, axis=1)
                        i_y_pred_arg = torch.argsort(y_pred, axis=1)
                        i_z_i_list = list()

                        for i in range(y_true.shape[0]):
                            if i_y_pred_arg[i, -1] != i_y_true[i]:
                                i_z_i_list.append(i_y_pred_arg[i, -1])
                            else:
                                i_z_i_list.append(i_y_pred_arg[i, -2])

                        i_z_i = torch.stack(i_z_i_list)

                        z_1 = y_pred[:, i_y_pred_arg[:, -1]]
                        z_3 = y_pred[:, i_y_pred_arg[:, -3]]
                        z_i = y_pred[:, i_z_i]
                        z_y = y_pred[:, i_y_true]

                        z_1 = torch.diagonal(z_1)
                        z_3 = torch.diagonal(z_3)
                        z_i = torch.diagonal(z_i)
                        z_y = torch.diagonal(z_y)

                        dlr = -(z_y - z_i) / (z_1 - z_3)

                        return torch.mean(dlr.float())

                    self._loss_fn = difference_logits_ratio
                    self._loss_object = difference_logits_ratio
            elif loss_type is None:
                self._loss_object = estimator._loss_object
            else:
                raise ValueError(
                    "The argument loss_type has an invalid value. The following options for loss_type are "
                    "supported: {}".format(
                        [None, "cross_entropy", "difference_logits_ratio"]))

            estimator_apgd = PyTorchClassifier(
                model=estimator.model,
                loss=self._loss_object,
                input_shape=estimator.input_shape,
                nb_classes=estimator.nb_classes,
                optimizer=None,
                channels_first=estimator.channels_first,
                clip_values=estimator.clip_values,
                preprocessing_defences=estimator.preprocessing_defences,
                postprocessing_defences=estimator.postprocessing_defences,
                preprocessing=estimator.preprocessing,
                device_type=estimator._device,
            )

        else:
            estimator_apgd = None

        super().__init__(estimator=estimator_apgd)
        self.norm = norm
        self.eps = eps
        self.eps_step = eps_step
        self.max_iter = max_iter
        self.targeted = targeted
        self.nb_random_init = nb_random_init
        self.batch_size = batch_size
        self.loss_type = loss_type
        self._check_params()
Exemplo n.º 13
0
    def test_1_tf(self):
        """
        Test with a TensorFlow Classifier.
        :return:
        """
        tf_version = list(
            map(int,
                tf.__version__.lower().split("+")[0].split(".")))
        if tf_version[0] == 2:

            # Get MNIST
            (x_train, y_train), (x_test, y_test) = self.mnist

            # Create a model from scratch
            from tensorflow.keras import Model
            from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPool2D

            class TensorFlowModel(Model):
                """
                Standard TensorFlow model for unit testing.
                """
                def __init__(self):
                    super(TensorFlowModel, self).__init__()
                    self.conv1 = Conv2D(filters=4,
                                        kernel_size=5,
                                        activation="relu")
                    self.conv2 = Conv2D(filters=10,
                                        kernel_size=5,
                                        activation="relu")
                    self.maxpool = MaxPool2D(pool_size=(2, 2),
                                             strides=(2, 2),
                                             padding="valid",
                                             data_format=None)
                    self.flatten = Flatten()
                    self.dense1 = Dense(100, activation="relu")
                    self.logits = Dense(10, activation="linear")

                def call(self, x):
                    """
                    Call function to evaluate the model.

                    :param x: Input to the model
                    :return: Prediction of the model
                    """
                    x = self.conv1(x)
                    x = self.maxpool(x)
                    x = self.conv2(x)
                    x = self.maxpool(x)
                    x = self.flatten(x)
                    x = self.dense1(x)
                    x = self.logits(x)
                    return x

            optimizer = Adam(learning_rate=0.01)

            def train_step(model, images, labels):
                with tf.GradientTape() as tape:
                    predictions = model(images, training=True)
                    loss = loss_object(labels, predictions)
                gradients = tape.gradient(loss, model.trainable_variables)
                optimizer.apply_gradients(
                    zip(gradients, model.trainable_variables))

            model = TensorFlowModel()
            loss_object = tf.keras.losses.CategoricalCrossentropy(
                from_logits=True)

            classifier = TensorFlowV2Classifier(
                model=model,
                loss_object=loss_object,
                train_step=train_step,
                nb_classes=10,
                input_shape=(28, 28, 1),
                clip_values=(0, 1),
            )

            # Initialize DPA Classifier
            dpa = DeepPartitionEnsemble(
                classifiers=classifier,
                ensemble_size=ENSEMBLE_SIZE,
                channels_first=classifier.channels_first,
                clip_values=classifier.clip_values,
                preprocessing_defences=classifier.preprocessing_defences,
                postprocessing_defences=classifier.postprocessing_defences,
                preprocessing=classifier.preprocessing,
            )

            # Check basic functionality of DPA Classifier
            # check predict
            y_test_dpa = dpa.predict(x=x_test)
            self.assertEqual(y_test_dpa.shape, y_test.shape)
            self.assertTrue(
                (np.sum(y_test_dpa, axis=1) <= ENSEMBLE_SIZE * np.ones(
                    (NB_TEST, ))).all())

            # loss gradient
            grad = dpa.loss_gradient(x=x_test, y=y_test, sampling=True)
            assert grad.shape == (10, 28, 28, 1)

            # fit
            dpa.fit(x=x_train, y=y_train)