def main():
    # Load MNIST data as tf.data.Dataset.
    batch_size = 32
    _, test_data = load_mnist(batch_size)
    test_features = tf.stack([features for features, _ in test_data])
    test_features = tf.reshape(test_features, [-1, 28, 28])
    test_labels = tf.stack([label for _, label in test_data])
    test_labels = tf.reshape(test_labels, [
        -1,
    ])

    # First, create the adversarial examples based on a trained classifier.
    # This classifier has the same architecture and hyper parameters as the
    # classifiers being evaluated.
    attacked_model_path = "../models/mnist/fully_connected/model-1-10"
    attacked_classifier = load_existing_model(attacked_model_path)
    attacker = FGSMAttacker(attacked_classifier, epsilon=0.1)
    perturbed_features = attacker.generate_adversarial_examples(
        test_features, test_labels)
    perturbed_data = tf.data.Dataset.from_tensor_slices(
        (perturbed_features, test_labels))
    perturbed_data = perturbed_data.shuffle(buffer_size=500).batch(batch_size)

    # Then, evaluate the performance of a trained vanilla classifier, on both
    # the original test data and perturbed data.
    print("\nStart evaluating the vanilla classifier.")
    eval_vanilla_model_path = "../models/mnist/fully_connected/model-2-10"
    eval_vanilla_classifier = load_existing_model(eval_vanilla_model_path)
    _, eval_vanilla_acc = eval_vanilla_classifier.evaluate(test_data)
    print("Accuracy on test data for vanilla model:", eval_vanilla_acc)
    _, eval_vanilla_perturbed_acc = eval_vanilla_classifier.evaluate(
        perturbed_data)
    print("Accuracy on perturbed test data for vanilla model:",
          eval_vanilla_perturbed_acc)

    # Then, evaluate the performance of an adversarially trained classifier.
    print("\nStart evaluating the adversarially trained classifier.")
    eval_adv_trained_model_path = "../models/mnist/adv_training_with_pgd_fully_connected/model-1-10"
    eval_adv_trained_classifier = load_existing_model(
        eval_adv_trained_model_path)
    _, eval_adv_trained_acc = eval_adv_trained_classifier.evaluate(test_data)
    print("Accuracy on test data for adversarially trained model:",
          eval_adv_trained_acc)
    _, eval_adv_trained_perturbed_acc = eval_adv_trained_classifier.evaluate(
        perturbed_data)
    print("Accuracy on perturbed test data for adversarially trained model:",
          eval_adv_trained_perturbed_acc)
def main():
    # Load MNIST data as tf.data.Dataset.
    batch_size = 32
    train_data, test_data = load_mnist(batch_size)

    model_num_for_experiment = 3
    total_model_num = 10
    paths = [
        "../models/mnist/fully_connected/model-{}-{}".format(
            i + 1, total_model_num) for i in range(model_num_for_experiment)
    ]

    for path in paths:
        print("Evaluating model from {}".format(path))
        classifier = load_existing_model(path)
        # Evaluate the trained classifier on test data.
        # Uncomment to find out the accuracy and loss for the entire test set.
        # loss, accuracy = classifier.evaluate(test_data)
        # print("Test loss :", loss)
        # print("Test accuracy :", accuracy)
        sample_batch_num = 2

        sample_data = test_data.take(sample_batch_num)
        sample_loss, sample_accuracy = classifier.evaluate(sample_data)
        print("Accuracy on sampled test data:", sample_accuracy)

        sample_features = tf.stack([features for features, _ in sample_data])
        sample_features = tf.reshape(sample_features, [-1, 28, 28])
        sample_labels = tf.stack([label for _, label in sample_data])
        sample_labels = tf.reshape(sample_labels, [
            -1,
        ])

        # Perform fast gradient sign method attack on a sample of the test data.
        attacker = PGDAttacker(classifier, epsilon=0.1, n_iter=20)
        perturbed_features = attacker.generate_adversarial_examples(
            sample_features, sample_labels)

        # Evaluate the trained classifier on perturbed sample data.
        perturbed_data = tf.data.Dataset.from_tensor_slices(
            (perturbed_features, sample_labels))
        perturbed_data = perturbed_data.shuffle(
            buffer_size=500).batch(batch_size)
        print("Perturbed data: {}".format(perturbed_data))
        perturbed_loss, perturbed_accuracy = classifier.evaluate(
            perturbed_data)
        print("Accuracy on perturbed test data:", perturbed_accuracy)
Exemplo n.º 3
0
def main():
    # Load MNIST data as tf.data.Dataset.
    batch_size = 32
    train_data, test_data = load_mnist(batch_size)

    model_num = 10
    paths = ["../models/mnist/fully_connected/model-{}-{}"
        .format(i+1, model_num) for i in range(model_num)]

    print("Paths: {}".format(paths))

    epochs = 5
    for path in paths:
        # Compile and train a simple fully connected MNIST classifier.
        classifier = get_trained_model(train_data, epochs=epochs)
        print("Saving model to path {}.".format(path))
        classifier.save_to_file(path)
def main():
    # Load MNIST data as tf.data.Dataset.
    batch_size = 32
    train_data, test_data = load_mnist(batch_size)

    model_num = 10
    paths = ["../models/mnist/adv_training_with_pgd_fully_connected/model-{}-{}"
             .format(i + 1, model_num) for i in range(model_num)]

    epochs = 5
    for path in paths:
        # Compile and train a simple fully connected MNIST classifier that has
        # been trained on adversarial examples generated by the projected
        # gradient descent attacker.
        classifier = get_trained_model(train_data, epochs=epochs)
        print("Saving adversarially trained model to path {}.".format(path))
        classifier.save_to_file(path)