Exemplo n.º 1
0
def generate_jsma_examples(sess, x, predictions, X_test, Y_test, nb_examples):
    print('Crafting ' + str(nb_examples) + ' adversarial examples')

    # This array indicates whether an adversarial example was found for each
    # test set sample and target class
    results = np.zeros((FLAGS.nb_classes, nb_examples), dtype='i')

    # This array contains the fraction of perturbed features for each test set
    # sample and target class
    perturbations = np.zeros((FLAGS.nb_classes, nb_examples), dtype='f')

    # Define the TF graph for the model's Jacobian
    grads = jacobian_graph(predictions, x)

    X_adv_list = []
    perterb_list = []

    # Loop over the samples we want to perturb into adversarial examples
    for sample_ind in xrange(nb_examples):
        print("=================Working on %d/%d..." %
              (sample_ind + 1, nb_examples))
        # Define the target class.
        target = (int(np.argmax(Y_test[sample_ind])) + 1) % 10

        print('--------------------------------------')
        print('Creating adversarial example for target class ' + str(target))
        _X_adv, result, percentage_perterb = jsma(
            sess,
            x,
            predictions,
            grads,
            X_test[sample_ind:(sample_ind + 1)],
            target,
            theta=1,
            gamma=0.1,
            increase=True,
            back='tf',
            clip_min=0,
            clip_max=1)

        # Update the arrays for later analysis
        results[target, sample_ind] = result
        perturbations[target, sample_ind] = percentage_perterb
        X_adv_list.append(_X_adv)
        perterb_list.append(percentage_perterb)

    X_adv = np.concatenate(tuple(X_adv_list))

    return X_adv, results, perterb_list
Exemplo n.º 2
0
def main(argv=None):
    """
    MNIST tutorial for the Jacobian-based saliency map approach (JSMA)
    :return:
    """

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    ###########################################################################
    # Define the dataset and model
    ###########################################################################

    # Image dimensions ordering should follow the Theano convention
    if keras.backend.image_dim_ordering() != 'tf':
        keras.backend.set_image_dim_ordering('tf')
        print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' "
              "to 'th', temporarily setting to 'tf'")

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)
    print("Created TensorFlow session and set Keras backend.")

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_stl10()
    print("Loaded STL10 test data")
    #print("Loaded CIFAR10 test data")
    #print("Loaded MNIST test data.")

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 96, 96, 3))
    #x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Define TF model graph (MNIST)
    #model = cnn_model()
    #predictions = model(x)
    #print("Defined TensorFlow model graph.")

    # Define TF model graph
    model = vgg19((96,96,3))
    predictions = model(x)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model if it does not exist in the train_dir folder
    saver = tf.train.Saver()
    save_path = os.path.join(FLAGS.train_dir, FLAGS.filename)
    if os.path.isfile(save_path):
        saver.restore(sess, os.path.join(FLAGS.train_dir, FLAGS.filename))
    else:
        train_params = {
            'nb_epochs': FLAGS.nb_epochs,
            'batch_size': FLAGS.batch_size,
            'learning_rate': FLAGS.learning_rate
        }
        model_train(sess, x, y, predictions, X_train, Y_train,
                    args=train_params)
        saver.save(sess, save_path)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': FLAGS.batch_size}
    accuracy = model_eval(sess, x, y, predictions, X_test, Y_test,
                          args=eval_params)
    # assert X_test.shape[0] == 10000, X_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))

    ###########################################################################
    # Craft adversarial examples using the Jacobian-based saliency map approach
    ###########################################################################
    print('Crafting ' + str(FLAGS.source_samples) + ' * ' +
          str(FLAGS.nb_classes-1) + ' adversarial examples')

    # This array indicates whether an adversarial example was found for each
    # test set sample and target class
    results = np.zeros((FLAGS.nb_classes, FLAGS.source_samples), dtype='i')

    # This array contains the fraction of perturbed features for each test set
    # sample and target class
    perturbations = np.zeros((FLAGS.nb_classes, FLAGS.source_samples),
                             dtype='f')

    # Define the TF graph for the model's Jacobian
    grads = jacobian_graph(predictions, x, FLAGS.nb_classes)

    # Initialize our array for grid visualization
    grid_shape = (FLAGS.nb_classes,
                  FLAGS.nb_classes,
                  FLAGS.img_rows,
                  FLAGS.img_cols,
                  FLAGS.nb_channels)
    grid_viz_data = np.zeros(grid_shape, dtype='f')

    # Loop over the samples we want to perturb into adversarial examples
    for sample_ind in xrange(0, FLAGS.source_samples):
        # We want to find an adversarial example for each possible target class
        # (i.e. all classes that differ from the label given in the dataset)
        current_class = int(np.argmax(Y_test[sample_ind]))
        target_classes = other_classes(FLAGS.nb_classes, current_class)

        # For the grid visualization, keep original images along the diagonal
        grid_viz_data[current_class, current_class, :, :, :] = np.reshape(
                X_test[sample_ind:(sample_ind+1)],
                (FLAGS.img_rows, FLAGS.img_cols, FLAGS.nb_channels))

        # Loop over all target classes
        for target in target_classes:
            print('--------------------------------------')
            print('Creating adv. example for target class ' + str(target))

            # This call runs the Jacobian-based saliency map approach
            adv_x, res, percent_perturb = jsma(sess, x, predictions, grads,
                                               X_test[sample_ind:
                                                      (sample_ind+1)],
                                               target, theta=1, gamma=0.1,
                                               increase=True, back='tf',
                                               clip_min=0, clip_max=1)

            # Display the original and adversarial images side-by-side
#            if FLAGS.viz_enabled:
#                if 'figure' not in vars():
#                        figure = pair_visual(
#                                np.reshape(X_test[sample_ind:(sample_ind+1)],
#                                           (FLAGS.img_rows, FLAGS.img_cols, FLAGS.nb_channels)),
#                                np.reshape(adv_x,
#                                           (FLAGS.img_rows, FLAGS.img_cols, FLAGS.nb_channels)))
#                else:
#                    figure = pair_visual(
#                            np.reshape(X_test[sample_ind:(sample_ind+1)],
#                                       (FLAGS.img_rows, FLAGS.img_cols,FLAGS.nb_channels)),
#                            np.reshape(adv_x, (FLAGS.img_rows,
#                                       FLAGS.img_cols,FLAGS.nb_channels)), figure)

            # Add our adversarial example to our grid data
            grid_viz_data[target, current_class, :, :, :] = np.reshape(
                    adv_x, (FLAGS.img_rows, FLAGS.img_cols, FLAGS.nb_channels))
            sum = np.sum(adv_x)
            if sum == 0.0 or sum == 0:
                print('HEY')
                quit()
            # Update the arrays for later analysis
            results[target, sample_ind] = res
            perturbations[target, sample_ind] = percent_perturb

    # Compute the number of adversarial examples that were successfuly found
    nb_targets_tried = ((FLAGS.nb_classes - 1) * FLAGS.source_samples)
    succ_rate = float(np.sum(results)) / nb_targets_tried
    print('Avg. rate of successful adv. examples {0:.2f}'.format(succ_rate))

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(perturbations)
    print('Avg. rate of perturbed features {0:.2f}'.format(percent_perturbed))

    # Compute the average distortion introduced for successful samples only
    percent_perturb_succ = np.mean(perturbations * (results == 1))
    print('Avg. rate of perturbed features for successful '
          'adversarial examples {0:.2f}'.format(percent_perturb_succ))

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if FLAGS.viz_enabled:
        _ = grid_visual(grid_viz_data)
Exemplo n.º 3
0
def main(argv=None):
    """
    MNIST cleverhans tutorial for the Jacobian-based saliency map approach (JSMA)
    :return:
    """

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    ###########################################################################
    # Define the dataset and model
    ###########################################################################

    # Image dimensions ordering should follow the Theano convention
    if keras.backend.image_dim_ordering() != 'th':
        keras.backend.set_image_dim_ordering('th')
        print(
            "INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to 'tf', temporarily setting to 'th'"
        )

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)
    print("Created TensorFlow session and set Keras backend.")

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist()
    print("Loaded MNIST test data.")

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 1, 28, 28))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Define TF model graph
    model = model_mnist()
    predictions = model(x)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model if it does not exist in the train_dir folder
    saver = tf.train.Saver()
    save_path = os.path.join(FLAGS.train_dir, FLAGS.filename)
    if os.path.isfile(save_path):
        saver.restore(sess, os.path.join(FLAGS.train_dir, FLAGS.filename))
    else:
        tf_model_train(sess, x, y, predictions, X_train, Y_train)
        saver.save(sess, save_path)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    accuracy = tf_model_eval(sess, x, y, predictions, X_test, Y_test)
    assert X_test.shape[0] == 10000, X_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))

    ###########################################################################
    # Craft adversarial examples using the Jacobian-based saliency map approach
    ###########################################################################
    print('Crafting ' + str(FLAGS.source_samples) + ' * ' +
          str(FLAGS.nb_classes) + ' adversarial examples')

    # This array indicates whether an adversarial example was found for each
    # test set sample and target class
    results = np.zeros((FLAGS.nb_classes, FLAGS.source_samples), dtype='i')

    # This array contains the fraction of perturbed features for each test set
    # sample and target class
    perturbations = np.zeros((FLAGS.nb_classes, FLAGS.source_samples),
                             dtype='f')

    # Define the TF graph for the model's Jacobian
    grads = jacobian_graph(predictions, x)

    # Loop over the samples we want to perturb into adversarial examples
    for sample_ind in xrange(FLAGS.source_samples):
        # We want to find an adversarial example for each possible target class
        # (i.e. all classes that differ from the label given in the dataset)
        target_classes = other_classes(FLAGS.nb_classes,
                                       int(np.argmax(Y_test[sample_ind])))

        # Loop over all target classes
        for target in target_classes:
            print('--------------------------------------')
            print('Creating adversarial example for target class ' +
                  str(target))

            # This call runs the Jacobian-based saliency map approach
            _, result, percentage_perterb = jsma(
                sess,
                x,
                predictions,
                grads,
                X_test[sample_ind:(sample_ind + 1)],
                target,
                theta=1,
                gamma=0.1,
                increase=True,
                back='tf',
                clip_min=0,
                clip_max=1)

            # Update the arrays for later analysis
            results[target, sample_ind] = result
            perturbations[target, sample_ind] = percentage_perterb

    # Compute the number of adversarial examples that were successfuly found
    success_rate = float(np.sum(results)) / (
        (FLAGS.nb_classes - 1) * FLAGS.source_samples)
    print('Avg. rate of successful misclassifcations {0}'.format(success_rate))

    # Compute the average distortion introduced by the algorithm
    percentage_perturbed = np.mean(perturbations)
    print('Avg. rate of perterbed features {0}'.format(percentage_perturbed))

    # Close TF session
    sess.close()
Exemplo n.º 4
0
def generate_images():

    print('==> Preparing data..')
    if not hasattr(backend, "tf"):
        raise RuntimeError("This tutorial requires keras to be configured"
                           " to use the TensorFlow backend.")

    # Image dimensions ordering should follow the Theano convention
    if keras.backend.image_dim_ordering() != 'tf':
        keras.backend.set_image_dim_ordering('tf')
        print(
            "INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to "
            "'th', temporarily setting to 'tf'")

    # Create TF session and set as Keras backend session
    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.5
    sess = tf.Session(config=config)
    keras.backend.set_session(sess)

    print "==> Beginning Session"

    # Get CIFAR10 test data
    X_train, Y_train, X_test, Y_test = data_cifar10()

    assert Y_train.shape[1] == 10.
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Load model
    print "==> loading vgg model"
    args = load_args()

    if args.model == 'vgg6': model = vggbn(top=True, pool=args.pool)
    if args.model == 'vgg15': model = vgg15(top=True, pool=args.pool)
    if args.model == 'generic': model = generic(top=True, pool=args.pool)
    if args.model == 'resnet18': model = resnet.build_resnet_18(args.pool)

    predictions = model(x)

    model.load_weights(args.load)

    eval_params = {'batch_size': FLAGS.batch_size}
    accuracy = model_eval(sess,
                          x,
                          y,
                          predictions,
                          X_test,
                          Y_test,
                          args=eval_params)
    print '==> Accuracy : {}'.format(accuracy)

    def evaluate():
        # Evaluate the accuracy of the CIFAR10 model on legitimate test examples
        eval_params = {'batch_size': FLAGS.batch_size}
        accuracy = model_eval(sess,
                              x,
                              y,
                              predictions,
                              X_test,
                              Y_test,
                              args=eval_params)
        assert X_test.shape[0] == 10000, X_test.shape
        print('Test accuracy on legitimate test examples: ' + str(accuracy))

    # Train an CIFAR10 model
    train_params = {
        'nb_epochs': FLAGS.nb_epochs,
        'batch_size': FLAGS.batch_size,
        'learning_rate': FLAGS.learning_rate
    }

    im_base = '/im_'
    model_name = args.model + '_p' + str(args.pool)
    if args.attack == 'fgsm' or args.attack == 'FGSM':

        result_dir = os.getcwd() + '/images/fgsm/'
        print "==> creating fgsm adversarial wrapper"
        adv_x = fgsm(x, predictions, eps=0.3)

        print "==> sending to batch evaluator to finalize adversarial images"
        eval_params = {'batch_size': FLAGS.batch_size}
        X_train_adv, = batch_eval(sess, [x], [adv_x], [X_train],
                                  args=eval_params)

        i = 0
        if not os.path.exists(result_dir + model_name):
            os.makedirs(result_dir + model_name)
        print "==> saving images to {}".format(result_dir + model_name)
        for ad in X_train_adv:
            scipy.misc.imsave(
                result_dir + model_name + im_base + str(i) + '.png', ad)
            i += 1

        sess.close()
    """ JSMA """
    if args.attack == 'jsma' or args.attack == 'JSMA':

        result_dir = os.getcwd() + '/images/jsma/trial_single_adv'
        print('Crafting ' + str(FLAGS.source_samples) + ' * ' +
              str(FLAGS.nb_classes - 1) + ' adversarial examples')

        results = np.zeros((FLAGS.nb_classes, FLAGS.source_samples), dtype='i')

        # This array contains the fraction of perturbed features for each test set
        perturbations = np.zeros((FLAGS.nb_classes, FLAGS.source_samples),
                                 dtype='f')

        # Define the TF graph for the model's Jacobian
        grads = jacobian_graph(predictions, x, FLAGS.nb_classes)

        # Initialize our array for grid visualization
        grid_shape = (FLAGS.nb_classes, FLAGS.nb_classes, FLAGS.img_rows,
                      FLAGS.img_cols, FLAGS.nb_channels)
        grid_viz_data = np.zeros(grid_shape, dtype='f')
        i_saved = 0
        n_image = 0
        # Loop over the samples we want to perturb into adversarial examples
        print "==> saving images to {}".format(result_dir + model_name)
        for sample_ind in xrange(7166, FLAGS.source_samples):
            # We want to find an adversarial example for each possible target class
            current_class = int(np.argmax(Y_train[sample_ind]))
            target_classes = other_classes(FLAGS.nb_classes, current_class)
            # For the grid visualization, keep original images along the diagonal
            grid_viz_data[current_class, current_class, :, :, :] = np.reshape(
                X_train[sample_ind:(sample_ind + 1)],
                (FLAGS.img_rows, FLAGS.img_cols, FLAGS.nb_channels))

            # Loop over all target classes
            adversarials = []
            for idx, target in enumerate(target_classes):
                print "image {}".format(sample_ind)

                # here we hold all successful adversarials for this iteration
                # since we dont want 500k images, we will uniformly sample an image to save after each target

                print('--------------------------------------')
                print('Creating adv. example for target class ' + str(target))

                # This call runs the Jacobian-based saliency map approach
                adv_x, res, percent_perturb = jsma(
                    sess,
                    x,
                    predictions,
                    grads,
                    X_train[sample_ind:(sample_ind + 1)],
                    target,
                    theta=1,
                    gamma=0.1,
                    increase=True,
                    back='tf',
                    clip_min=0,
                    clip_max=1)
                # Display the original and adversarial images side-by-side
                adversarial = np.reshape(
                    adv_x, (FLAGS.img_rows, FLAGS.img_cols, FLAGS.nb_channels))
                original = np.reshape(
                    X_train[sample_ind:(sample_ind + 1)],
                    (FLAGS.img_rows, FLAGS.img_cols, FLAGS.nb_channels))

                if FLAGS.viz_enabled:

                    if 'figure' not in vars():
                        figure = pair_visual(original, adversarial)
                    else:
                        figure = pair_visual(original, adversarial, figure)

                if not os.path.exists(result_dir + model_name):
                    os.makedirs(result_dir + model_name)

                if res == 1:
                    adversarials.append(adversarial)

                if idx == FLAGS.nb_classes - 2:

                    try:
                        if len(adversarials) == 1:
                            idx_uniform = 0
                        else:
                            idx_uniform = np.random.randint(
                                0,
                                len(adversarials) - 1)
                        print idx_uniform
                        scipy.misc.imsave(
                            result_dir + model_name + im_base +
                            str(sample_ind) + '.png',
                            adversarials[idx_uniform])
                        i_saved += 1
                        print "==> images saved: {}".format(i_saved)

                    except:

                        print "No adversarials generated"

# Add our adversarial example to our grid data
                grid_viz_data[target, current_class, :, :, :] = np.reshape(
                    adv_x, (FLAGS.img_rows, FLAGS.img_cols, FLAGS.nb_channels))

                # Update the arrays for later analysis
                results[target, sample_ind] = res
                perturbations[target, sample_ind] = percent_perturb

            n_image += 1

# Compute the number of adversarial examples that were successfuly found
        nb_targets_tried = ((FLAGS.nb_classes - 1) * FLAGS.source_samples)
        succ_rate = float(np.sum(results)) / nb_targets_tried
        print(
            'Avg. rate of successful adv. examples {0:.2f}'.format(succ_rate))

        # Compute the average distortion introduced by the algorithm
        percent_perturbed = np.mean(perturbations)
        print('Avg. rate of perturbed features {0:.2f}'.format(
            percent_perturbed))

        # Compute the average distortion introduced for successful samples only
        percent_perturb_succ = np.mean(perturbations * (results == 1))
        print(
            'Avg. rate of perturbed features for successful '
            'adversarial examples {0:.2f}'.format(percent_perturb_succ))

        # Close TF session
        sess.close()

        # Finally, block & display a grid of all the adversarial examples
        if FLAGS.viz_enabled:
            _ = grid_visual(grid_viz_data)