示例#1
0
def print_accuracies(filepath,
                     train_start=TRAIN_START,
                     train_end=TRAIN_END,
                     test_start=TEST_START,
                     test_end=TEST_END,
                     batch_size=BATCH_SIZE,
                     which_set=WHICH_SET,
                     base_eps_iter=BASE_EPS_ITER,
                     nb_iter=NB_ITER):
    """
  Load a saved model and print out its accuracy on different data distributions

  This function works by running a single attack on each example.
  This provides a reasonable estimate of the true failure rate quickly, so
  long as the model does not suffer from gradient masking.
  However, this estimate is mostly intended for development work and not
  for publication. A more accurate estimate may be obtained by running
  an attack bundler instead.

  :param filepath: path to model to evaluate
  :param train_start: index of first training set example to use
  :param train_end: index of last training set example to use
  :param test_start: index of first test set example to use
  :param test_end: index of last test set example to use
  :param batch_size: size of evaluation batches
  :param which_set: 'train' or 'test'
  :param base_eps_iter: step size if the data were in [0,1]
    (Step size will be rescaled proportional to the actual data range)
  :param nb_iter: Number of iterations of PGD to run per class
  """

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(20181014)
    set_log_level(logging.INFO)
    sess = tf.Session()

    with sess.as_default():
        model = load(filepath)
    assert len(model.get_params()) > 0
    factory = model.dataset_factory
    factory.kwargs['train_start'] = train_start
    factory.kwargs['train_end'] = train_end
    factory.kwargs['test_start'] = test_start
    factory.kwargs['test_end'] = test_end
    dataset = factory()

    center = dataset.kwargs['center']
    max_val = dataset.kwargs['max_val']
    value_range = max_val * (1. + center)
    min_value = 0. - center * max_val

    if 'CIFAR' in str(factory.cls):
        base_eps = 8. / 255.
        if base_eps_iter is None:
            base_eps_iter = 2. / 255.
    elif 'MNIST' in str(factory.cls):
        base_eps = .3
        if base_eps_iter is None:
            base_eps_iter = .1
    else:
        raise NotImplementedError(str(factory.cls))

    pgd_params = {
        'eps': base_eps * value_range,
        'eps_iter': base_eps_iter * value_range,
        'nb_iter': nb_iter,
        'clip_min': min_value,
        'clip_max': max_val
    }

    x_data, y_data = dataset.get_set(which_set)

    semantic = Semantic(model, center, max_val, sess)
    pgd = ProjectedGradientDescent(model, sess=sess)

    jobs = [('clean', None, None, None), ('Semantic', semantic, None, None),
            ('pgd', pgd, pgd_params, None)]

    for job in jobs:
        name, attack, attack_params, job_batch_size = job
        if job_batch_size is None:
            job_batch_size = batch_size
        t1 = time.time()
        acc = accuracy(sess,
                       model,
                       x_data,
                       y_data,
                       batch_size=job_batch_size,
                       devices=devices,
                       attack=attack,
                       attack_params=attack_params)
        t2 = time.time()
        print("Accuracy on " + name + " examples: ", acc)
        print("Evaluation took", t2 - t1, "seconds")
示例#2
0
def do_train(train_start=TRAIN_START,
             train_end=60000,
             test_start=0,
             test_end=10000,
             nb_epochs=NB_EPOCHS,
             batch_size=BATCH_SIZE,
             learning_rate=LEARNING_RATE,
             backprop_through_attack=False,
             nb_filters=NB_FILTERS,
             num_threads=None,
             use_ema=USE_EMA,
             ema_decay=EMA_DECAY):
    print('Parameters')
    print('-' * 79)
    for x, y in sorted(locals().items()):
        print('%-32s %s' % (x, y))
    print('-' * 79)

    if os.path.exists(FLAGS.save_path):
        print("Model " + FLAGS.save_path +
              " already exists. Refusing to overwrite.")
        quit()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    dataset = MNIST(train_start=train_start,
                    train_end=train_end,
                    test_start=test_start,
                    test_end=test_end,
                    center=True)

    # Use Image Parameters
    img_rows, img_cols, nchannels = dataset.x_train.shape[1:4]
    nb_classes = dataset.NB_CLASSES

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    train_params = {
        'nb_epochs': nb_epochs,
        'learning_rate': learning_rate,
        'batch_size': batch_size,
    }
    eval_params = {'batch_size': batch_size}
    rng = np.random.RandomState([2017, 8, 30])
    sess = tf.Session()

    def do_eval(x_set, y_set, is_adv=None):
        acc = accuracy(sess, model, x_set, y_set)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'clean'
        if report_text:
            print('Accuracy on %s examples: %0.4f' % (report_text, acc))
        return acc

    model = Model(filters=nb_filters)
    model.dataset_factory = dataset.get_factory()

    pgd = ProjectedGradientDescent(model=model, sess=sess)

    center = dataset.kwargs['center']
    value_range = 1. + center
    base_eps = 8. / 255.

    attack_params = {
        'eps': base_eps * value_range,
        'clip_min': -float(center),
        'clip_max': float(center),
        'eps_iter': (2. / 255.) * value_range,
        'nb_iter': 40.
    }

    loss = CrossEntropy(
        model,
        attack=pgd,
        adv_coeff=1.,
        attack_params=attack_params,
    )

    print_test_period = 10
    print_train_period = 50

    def evaluate():
        global epoch
        global last_test_print
        global last_train_print
        global best_result
        global best_epoch
        with sess.as_default():
            print("Saving to ", FLAGS.save_path)
            save(FLAGS.save_path, model)
        if epoch % print_test_period == 0 or time.time(
        ) - last_test_print > 300:
            t1 = time.time()
            result = do_eval(dataset.x_test, dataset.y_test, False)
            t2 = time.time()
            if result >= best_result:
                if result > best_result:
                    best_epoch = epoch
                else:
                    # Keep track of ties
                    assert result == best_result
                    if not isinstance(best_epoch, list):
                        if best_epoch == -1:
                            best_epoch = []
                        else:
                            best_epoch = [best_epoch]
                    best_epoch.append(epoch)
                best_result = result
            print("Best so far: ", best_result)
            print("Best epoch: ", best_epoch)
            last_test_print = t2
            print("Test eval time: ", t2 - t1)
        if (epoch % print_train_period == 0
                or time.time() - last_train_print > 3000):
            t1 = time.time()
            print("Training set: ")
            do_eval(dataset.x_train, dataset.y_train, False)
            t2 = time.time()
            print("Train eval time: ", t2 - t1)
            last_train_print = t2
        epoch += 1

    optimizer = None

    ema_decay = globals()[ema_decay]
    assert callable(ema_decay)

    train(sess,
          loss,
          dataset.x_train,
          dataset.y_train,
          evaluate=evaluate,
          optimizer=optimizer,
          args=train_params,
          rng=rng,
          var_list=model.get_params(),
          use_ema=use_ema,
          ema_decay=ema_decay)
    # Make sure we always evaluate on the last epoch, so pickling bugs are more
    # obvious
    if (epoch - 1) % print_test_period != 0:
        do_eval(dataset.x_test, dataset.y_test, False)
    if (epoch - 1) % print_train_period != 0:
        print("Training set: ")
        do_eval(dataset.x_train, dataset.y_train, False)

    with sess.as_default():
        save(FLAGS.save_path, model)
示例#3
0
def save_pgd_attacked_images(original_class,
                             target_class,
                             attack_strength,
                             nb_iter=50,
                             seed=1000):

    random.seed(seed)
    np.random.seed(seed)
    tf.set_random_seed(seed)

    eps = attack_strength
    labels_by_name = load_labels_by_name()
    target_label = labels_by_name[target_class].lucid_label

    benign_dataset_path = DataPaths.get_benign_images_datapath(original_class)
    assert benign_dataset_path.exists()

    attacked_dataset_path = DataPaths.get_attacked_images_datapath(
        original_class, target_class, attack_name='pgd', attack_strength=eps)
    assert not attacked_dataset_path.exists()
    print('Saving attacked images to %s' % attacked_dataset_path)

    img_dataset = hdf5utils.load_image_dataset_from_file(benign_dataset_path)

    output_file = h5py.File(attacked_dataset_path, 'w')
    out_dataset = hdf5utils.create_image_dataset(output_file,
                                                 dataset_name='images')
    indices_dataset = hdf5utils.create_dataset(output_file,
                                               data_shape=(1, ),
                                               dataset_name='indices')

    graph = tf.Graph()
    with graph.as_default():
        model = InceptionV1Model()
        x = model.default_input_placeholder
        y_pred = model.get_predicted_class(x)
        with tf.Session(graph=graph) as sess:
            attack = ProjectedGradientDescent(model, sess=sess)
            target_one_hot_encoded = get_one_hot_encoded_targets(target_label)

            x_adv = attack.generate(x,
                                    eps=eps,
                                    nb_iter=nb_iter,
                                    clip_min=-1,
                                    clip_max=1,
                                    eps_iter=(eps / 5),
                                    ord=2,
                                    y_target=target_one_hot_encoded)

            num_attack_success = 0
            pbar = tqdm(unit='imgs', total=len(img_dataset))
            try:
                for i, img in enumerate(img_dataset):
                    ben_img = np.array(img)
                    adv_img = sess.run(x_adv, feed_dict={x: [ben_img]})
                    attack_pred = sess.run(y_pred, feed_dict={x: adv_img})

                    adv_img = adv_img[0]
                    attack_pred = attack_pred[0]
                    assert not np.any(np.isnan(adv_img))
                    assert not np.isnan(attack_pred)

                    if attack_pred == target_label:
                        index = np.array([i])
                        num_attack_success += 1
                        hdf5utils.add_image_to_dataset(adv_img, out_dataset)
                        hdf5utils.add_item_to_dataset(index, indices_dataset)

                    pbar.set_postfix(num_attack_success=num_attack_success)
                    pbar.update()
            except tf.errors.OutOfRangeError:
                pass
示例#4
0
 def setUp(self):
     super(TestProjectedGradientDescent, self).setUp()
     self.attack = ProjectedGradientDescent(self.model, sess=self.sess)
示例#5
0
def single_run_max_confidence_recipe(sess,
                                     model,
                                     x,
                                     y,
                                     nb_classes,
                                     eps,
                                     clip_min,
                                     clip_max,
                                     eps_iter,
                                     nb_iter,
                                     report_path,
                                     batch_size=BATCH_SIZE):
    """A reasonable attack bundling recipe for a max norm threat model and
  a defender that uses confidence thresholding. This recipe uses both
  uniform noise and randomly-initialized PGD targeted attacks.

  References:
  https://openreview.net/forum?id=H1g0piA9tQ

  This version runs each attack (noise, targeted PGD for each class with
  nb_iter iterations, target PGD for each class with 25X more iterations)
  just once and then stops. See `basic_max_confidence_recipe` for a version
  that runs indefinitely.

  :param sess: tf.Session
  :param model: cleverhans.model.Model
  :param x: numpy array containing clean example inputs to attack
  :param y: numpy array containing true labels
  :param nb_classes: int, number of classes
  :param eps: float, maximum size of perturbation (measured by max norm)
  :param eps_iter: float, step size for one version PGD attacks
    (will also run another version with 25X smaller step size)
  :param nb_iter: int, number of iterations for the cheaper PGD attacks
    (will also run another version with 25X more iterations)
  :param report_path: str, the path that the report will be saved to.
  :batch_size: int, the total number of examples to run simultaneously
  """
    noise_attack = Noise(model, sess)
    pgd_attack = ProjectedGradientDescent(model, sess)
    threat_params = {"eps": eps, "clip_min": clip_min, "clip_max": clip_max}
    noise_attack_config = AttackConfig(noise_attack, threat_params, "noise")
    attack_configs = [noise_attack_config]
    pgd_attack_configs = []
    pgd_params = copy.copy(threat_params)
    pgd_params["eps_iter"] = eps_iter
    pgd_params["nb_iter"] = nb_iter
    assert batch_size % num_devices == 0
    dev_batch_size = batch_size // num_devices
    ones = tf.ones(dev_batch_size, tf.int32)
    expensive_pgd = []
    for cls in range(nb_classes):
        cls_params = copy.copy(pgd_params)
        cls_params['y_target'] = tf.to_float(tf.one_hot(
            ones * cls, nb_classes))
        cls_attack_config = AttackConfig(pgd_attack, cls_params,
                                         "pgd_" + str(cls))
        pgd_attack_configs.append(cls_attack_config)
        expensive_params = copy.copy(cls_params)
        expensive_params["eps_iter"] /= 25.
        expensive_params["nb_iter"] *= 25.
        expensive_config = AttackConfig(pgd_attack, expensive_params,
                                        "expensive_pgd_" + str(cls))
        expensive_pgd.append(expensive_config)
    attack_configs = [noise_attack_config] + pgd_attack_configs + expensive_pgd
    new_work_goal = {config: 1 for config in attack_configs}
    goals = [MaxConfidence(t=1., new_work_goal=new_work_goal)]
    bundle_attacks(sess, model, x, y, attack_configs, goals, report_path)
    def fit(self, X, y, sample_weight=None):
        if self.train_type is not None:
            pass

        if self.train_type == 'adv':
            #self.model.compile(loss=self.loss, optimizer=self.optimizer, metrics=[])
            #Y = self.lbl_enc.transform(y.reshape(-1, 1))
            #initialize_uninitialized_global_variables(self.sess)
            #input_generator = InputGenerator(X, Y, sample_weight,
            #    attacker=self.attacker, shuffle=True, batch_size=self.batch_size,
            #    random_state=self.random_state)
            #self.model.fit_generator(
            #    input_generator,
            #    steps_per_epoch=((X.shape[0]*2 - 1) // self.batch_size) + 1,
            #    epochs=self.epochs,
            #    verbose=1,
            #)
            #######################################
            #Y = self.lbl_enc.transform(y.reshape(-1, 1))
            #train_params = {
            #    'init_all': True,
            #    'rng': self.random_state,
            #    'nb_epochs': self.epochs,
            #    'batch_size': self.batch_size,
            #    'learning_rate': self.learning_rate,
            #    'optimizor': tf.train.RMSPropOptimizer,
            #}
            #wrap = KerasModelWrapper(self.model)
            #pgd = ProjectedGradientDescent(wrap, sess=self.sess, nb_iter=20)
            #pgd_params = {'eps': self.eps}
            ##attack = pgd.generate(x, y=y, **pgd_params)
            #def attack(x):
            #    return pgd.generate(x, **pgd_params)
            #loss = CrossEntropy(wrap, smoothing=0.1, attack=attack)
            #def evaluate():
            #    #print("XDDD %f", self.sess.run(loss))
            #    print('Test accuracy on legitimate examples: %0.4f' % self.score(X, y))
            #train(self.sess, loss, X.astype(np.float32), Y.astype(np.float32),
            #        args=train_params, evaluate=evaluate)
            ######################################
            Y = self.lbl_enc.transform(y.reshape(-1, 1))
            wrap_2 = KerasModelWrapper(self.model)
            fgsm_2 = ProjectedGradientDescent(wrap_2, sess=self.sess)
            self.model(self.model.input)
            fgsm_params = {'eps': self.eps}

            # Use a loss function based on legitimate and adversarial examples
            adv_loss_2 = get_adversarial_loss(self.model, fgsm_2, fgsm_params)
            adv_acc_metric_2 = get_adversarial_acc_metric(
                self.model, fgsm_2, fgsm_params)
            self.model.compile(
                #optimizer=keras.optimizers.Adam(self.learning_rate),
                optimizer=keras.optimizers.Nadam(),
                loss=adv_loss_2,
                metrics=['accuracy', adv_acc_metric_2])
            self.model.fit(
                X,
                Y,
                batch_size=self.batch_size,
                epochs=self.epochs,
                verbose=2,
                sample_weight=sample_weight,
            )
            print((self.model.predict(X).argmax(1) == y).mean())

            self.augX, self.augy = None, None

        elif self.train_type == 'advPruning':
            y = y.astype(int) * 2 - 1
            self.augX, self.augy = find_eps_separated_set(X,
                                                          self.eps / 2,
                                                          y,
                                                          ord=self.ord)
            self.augy = (self.augy + 1) // 2

            self.model.compile(loss=self.loss,
                               optimizer=self.optimizer,
                               metrics=[])
            Y = self.lbl_enc.transform(self.augy.reshape(-1, 1))
            self.model.fit(self.augX,
                           Y,
                           batch_size=self.batch_size,
                           verbose=0,
                           epochs=self.epochs,
                           sample_weight=sample_weight)
            print("number of augX", np.shape(self.augX), len(self.augy))
        elif self.train_type is None:
            self.model.compile(loss=self.loss,
                               optimizer=self.optimizer,
                               metrics=[])
            Y = self.lbl_enc.transform(y.reshape(-1, 1))
            self.model.fit(X,
                           Y,
                           batch_size=self.batch_size,
                           verbose=0,
                           epochs=self.epochs,
                           sample_weight=sample_weight)
        else:
            raise ValueError("Not supported train type: %s", self.train_type)
示例#7
0
def mnist_tutorial_cw(train_start=0,
                      train_end=60000,
                      test_start=0,
                      test_end=10000,
                      viz_enabled=True,
                      nb_epochs=6,
                      batch_size=128,
                      nb_classes=10,
                      source_samples=10,
                      learning_rate=0.001,
                      attack_iterations=100,
                      model_path=os.path.join("models", "mnist"),
                      targeted=True):
    """
    MNIST tutorial for Carlini and Wagner's attack
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param viz_enabled: (boolean) activate plots of adversarial examples
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param source_samples: number of test inputs to attack
    :param learning_rate: learning rate for training
    :param model_path: path to the model file
    :param targeted: should we run a targeted attack? or untargeted?
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # MNIST-specific dimensions
    img_rows = 28
    img_cols = 28
    channels = 1

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session
    sess = tf.Session()
    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())
    sess.run(init_op)
    K.set_session(sess)

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)
    K.set_learning_phase(1)
    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    models = {}
    preds = {}
    for model_name in ['mlp', 'cnn', 'hrnn']:
        try:
            print('[DEBUG] Loading model.')
            models[model_name] = load_model('{}{}'.format(
                model_type, model_name))
        except:
            print(
                '[ERROR] Adversarially Trained models not found! Train and save strengthened models first. Then, run this.'
            )
            exit(1)

        preds[model_name] = models[model_name](x)

    rng = np.random.RandomState([2017, 8, 30])

    # Evaluate the accuracy of the Adv trained MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy_test = ''
    attacks = {}

    # Make computations graphs for the attacks
    for model_name in models.keys():
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds[model_name],
                              X_test,
                              Y_test,
                              args=eval_params)
        accuracy_test += '{} {}\n'.format(model_name, accuracy)

        # Instantiate a CW attack object
        wrap = KerasModelWrapper(models[model_name])
        attacks['$PGD_{}$'.format(model_name[0])] = ProjectedGradientDescent(
            wrap, sess=sess)

    # Make the output tensor for specification in the attacks parameters
    idxs = [np.where(np.argmax(Y_test, axis=1) == i)[0][0] for i in range(10)]
    if targeted:
        one_hot = np.zeros((10, 10))
        one_hot[np.arange(10), np.arange(10)] = 1

        adv_inputs = np.array([[instance] * 10 for instance in X_test[idxs]],
                              dtype=np.float32)
        adv_inputs = adv_inputs.reshape((100, 28, 28, 1))
        adv_ys = np.array([one_hot] * 10, dtype=np.float32).reshape((100, 10))
        yname = "y_target"
    else:
        adv_inputs = X_test[idxs]
        adv_ys = None
        yname = "y"

    attack_params = {'eps': 0.3, yname: adv_ys, 'eps_iter': 0.05}

    table_header = '{}model '.format(model_type)
    accuracy_attack = ''

    for model_name in models.keys():

        accuracy_attack += '{} '.format(model_name)

        # For each model, apply all attacks
        for attack_name in attacks.keys():
            print('[DEBUG] Attacking {} using {}.'.format(
                model_name, attack_name))

            # Code brach entered only once for creating the table header with attack names
            if attack_name not in table_header:
                table_header += '{} '.format(attack_name)

            adv = attacks[attack_name].generate_np(adv_inputs, **attack_params)
            if targeted:
                adv_accuracy = model_eval(sess,
                                          x,
                                          y,
                                          preds[model_name],
                                          adv,
                                          adv_ys,
                                          args={'batch_size': 10})
            else:
                adv_accuracy = model_eval(sess,
                                          x,
                                          y,
                                          preds[model_name],
                                          adv,
                                          Y_test[idxs],
                                          args={'batch_size': 10})

            accuracy_attack += '{} '.format(adv_accuracy * 100)

        # Move on to attack the next model
        accuracy_attack += '\n'

    print(table_header)
    print(accuracy_attack)
    print(accuracy_test)

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    return report
示例#8
0
def fixed_max_confidence_recipe(sess,
                                model,
                                x,
                                y,
                                nb_classes,
                                eps,
                                clip_min,
                                clip_max,
                                eps_iter,
                                nb_iter,
                                report_path,
                                batch_size=BATCH_SIZE):
    """A reasonable attack bundling recipe for a max norm threat model and
  a defender that uses confidence thresholding.

  References:
  https://openreview.net/forum?id=H1g0piA9tQ

  This version runs each attack a fixed number of times.
  It is more exhaustive than `single_run_max_confidence_recipe` but because
  it uses a fixed budget rather than running indefinitely it is more
  appropriate for making fair comparisons between two models.

  :param sess: tf.Session
  :param model: cleverhans.model.Model
  :param x: numpy array containing clean example inputs to attack
  :param y: numpy array containing true labels
  :param nb_classes: int, number of classes
  :param eps: float, maximum size of perturbation (measured by max norm)
  :param eps_iter: float, step size for one version of PGD attacks
    (will also run another version with 25X smaller step size)
  :param nb_iter: int, number of iterations for one version of PGD attacks
    (will also run another version with 25X more iterations)
  :param report_path: str, the path that the report will be saved to.
  :batch_size: int, the total number of examples to run simultaneously
  """
    noise_attack = Noise(model, sess)
    pgd_attack = ProjectedGradientDescent(model, sess)
    threat_params = {"eps": eps, "clip_min": clip_min, "clip_max": clip_max}
    noise_attack_config = AttackConfig(noise_attack, threat_params)
    attack_configs = [noise_attack_config]
    pgd_attack_configs = []
    pgd_params = copy.copy(threat_params)
    pgd_params["eps_iter"] = eps_iter
    pgd_params["nb_iter"] = nb_iter
    assert batch_size % num_devices == 0
    dev_batch_size = batch_size // num_devices
    ones = tf.ones(dev_batch_size, tf.int32)
    expensive_pgd = []
    for cls in range(nb_classes):
        cls_params = copy.copy(pgd_params)
        cls_params['y_target'] = tf.to_float(tf.one_hot(
            ones * cls, nb_classes))
        cls_attack_config = AttackConfig(pgd_attack, cls_params,
                                         "pgd_" + str(cls))
        pgd_attack_configs.append(cls_attack_config)
        expensive_params = copy.copy(cls_params)
        expensive_params["eps_iter"] /= 25.
        expensive_params["nb_iter"] *= 25.
        expensive_config = AttackConfig(pgd_attack, expensive_params,
                                        "expensive_pgd_" + str(cls))
        expensive_pgd.append(expensive_config)
    attack_configs = [noise_attack_config] + pgd_attack_configs + expensive_pgd
    new_work_goal = {config: 5 for config in attack_configs}
    pgd_work_goal = {config: 5 for config in pgd_attack_configs}
    # TODO: lower priority: make sure bundler won't waste time running targeted
    # attacks on examples where the target class is the true class
    goals = [
        Misclassify(new_work_goal={noise_attack_config: 50}),
        Misclassify(new_work_goal=pgd_work_goal),
        MaxConfidence(t=0.5, new_work_goal=new_work_goal),
        MaxConfidence(t=0.75, new_work_goal=new_work_goal),
        MaxConfidence(t=0.875, new_work_goal=new_work_goal),
        MaxConfidence(t=0.9375, new_work_goal=new_work_goal),
        MaxConfidence(t=0.96875, new_work_goal=new_work_goal),
        MaxConfidence(t=0.984375, new_work_goal=new_work_goal),
        MaxConfidence(t=1., new_work_goal=new_work_goal)
    ]
    bundle_attacks(sess, model, x, y, attack_configs, goals, report_path)
X_adv_auto = fgsm_auto.generate_np(X_test[indices_test], **fgsm_params)
X_adv_ce = fgsm_ce.generate_np(X_test[indices_test], **fgsm_params)
X_adv_rob = fgsm_rob.generate_np(X_test[indices_test], **fgsm_params)

comp_func(X_adv_stacked, X_adv_auto, X_adv_ce, X_adv_rob, indices_test,
          pred_base, pred_stacked, pred_auto, pred_ce, pred_rob)
#comp_func_transfer(X_adv_stacked, indices_test, pred_base, pred_stacked, model_stacked, model)
#comp_func_transfer(X_adv_auto, indices_test, pred_base, pred_auto, model_auto, model)
#comp_func_transfer(X_adv_ce, indices_test, pred_base, pred_ce, model_ce, model)
#comp_func_transfer(X_adv_rob, indices_test, pred_base, pred_rob, model_rob, model)

###################################
#PGD
print("\n\n")
print("PGD")
pgd_stacked = ProjectedGradientDescent(wrap_stacked, sess=sess)
pgd_auto = ProjectedGradientDescent(wrap_auto, sess=sess)
pgd_ce = ProjectedGradientDescent(wrap_ce, sess=sess)
pgd_rob = ProjectedGradientDescent(wrap_rob, sess=sess)

X_adv_stacked = pgd_stacked.generate_np(X_test[indices_test], **pgd_params)
X_adv_auto = pgd_auto.generate_np(X_test[indices_test], **pgd_params)
X_adv_ce = pgd_ce.generate_np(X_test[indices_test], **pgd_params)
X_adv_rob = pgd_rob.generate_np(X_test[indices_test], **pgd_params)

comp_func(X_adv_stacked, X_adv_auto, X_adv_ce, X_adv_rob, indices_test,
          pred_base, pred_stacked, pred_auto, pred_ce, pred_rob)
#comp_func_transfer(X_adv_stacked, indices_test, pred_base, pred_stacked, model_stacked, model)
#comp_func_transfer(X_adv_auto, indices_test, pred_base, pred_auto, model_auto, model)
#comp_func_transfer(X_adv_ce, indices_test, pred_base, pred_ce, model_ce, model)
#comp_func_transfer(X_adv_rob, indices_test, pred_base, pred_rob, model_rob, model)
示例#10
0
def train_child(t, p, m, num=0):
    # model = nn.DataParallel(TestCNN().cuda(1), device_ids=[1, 2, 3])
    model = TestCNN().cuda(0)
    tf_model = convert_pytorch_model_to_tf(model)
    cleverhans_model = CallableModelWrapper(tf_model, output_layer='logits')
    session = tf.Session()
    x_op = tf.placeholder(tf.float32, shape=(None, 3, 32, 32))
    fgsm = FastGradientMethod(cleverhans_model, sess=session)
    # stm = SpatialTransformationMethod(cleverhans_model, sess=session)
    # cw2 = CarliniWagnerL2(cleverhans_model, sess=session)
    pgd = ProjectedGradientDescent(cleverhans_model, sess=session)
    noise = Noise(cleverhans_model, sess=session)
    mim = MomentumIterativeMethod(cleverhans_model, sess=session)
    df = DeepFool(cleverhans_model, sess=session)

    def fgsm_op(x, eps):
        att = fgsm.generate(x_op, eps=eps)
        return session.run(att, feed_dict={x_op: x})

    # def stm_op(x, eps):
    #     att = stm.generate(x_op, batch_size=len(x), dx_min=-0.1*eps, dx_max=0.1*eps, dy_min=-0.1*eps, dy_max=0.1*eps, angle_min=-30*eps, angle_max=30*eps)
    #     return session.run(att, feed_dict={x_op: x})
    # def cw2_op(x, eps):
    #     att = cw2.generate(x_op, max_iterations=3)
    def pgd_op(x, eps):
        att = pgd.generate(x_op, eps=eps, eps_iter=eps * 0.2, nb_iter=3)
        return session.run(att, feed_dict={x_op: x})

    def noise_op(x, eps):
        att = noise.generate(x_op, eps=eps)
        return session.run(att, feed_dict={x_op: x})

    def df_op(x):
        att = df.generate(x_op, nb_candidate=10, max_iter=3)
        return session.run(att, feed_dict={x_op: x})

    def mim_op(x, eps):
        att = mim.generate(x_op, eps=eps, eps_iter=eps * 0.2)
        return session.run(att, feed_dict={x_op: x})

    def attack_train(x):
        attacks = [fgsm_op, noise_op, mim_op]
        attacks_name = ['FGSM', 'Noise', 'MIM']
        eps = [[0.03, 0.3], [0.03, 0.3], [0.03, 0.3]]
        train_x_adv = x.copy()
        adv_type = np.random.randint(SUBPOLICY_COUNT, size=len(train_x_adv))
        for i, (ti, pi, mi) in enumerate(
                tqdm(zip(t, p, m),
                     total=len(t),
                     desc='Subpolicy: ',
                     leave=False)):
            adv_i = train_x_adv[adv_type == i]
            for j, (tj, pj, mj) in enumerate(
                    tqdm(zip(ti, pi, mi),
                         total=len(ti),
                         desc='Operation: ',
                         leave=False)):
                tj, pj, mj = (*tj, *pj, *mj)
                adv_j = adv_i[np.random.randn(len(adv_i)) < pj]
                for i in tqdm(range(0, len(adv_j), BATCH_SIZE),
                              desc=attacks_name[tj] + ': ',
                              leave=False):
                    adv_j[i:][:BATCH_SIZE] = attacks[tj](
                        adv_j[i:][:BATCH_SIZE], (mj + 1) / MAGN_COUNT *
                        (eps[tj][1] - eps[tj][0]) + eps[tj][0])
        return train_x_adv

    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    epoch_tqdm = tqdm(range(CHILD_EPOCHS), leave=False)
    trainloader = []
    for epoch in epoch_tqdm:
        if epoch % ADVERSIAL_EVERY == 0:
            train_x_adv = attack_train(train_x)
            trainset = torch.utils.data.TensorDataset(
                torch.tensor(train_x_adv, dtype=torch.float),
                torch.tensor(train_y, dtype=torch.long))
            trainloader = torch.utils.data.DataLoader(trainset,
                                                      batch_size=BATCH_SIZE,
                                                      shuffle=True,
                                                      num_workers=4)
        batch_tqdm = tqdm(trainloader, leave=False)
        for x, y in batch_tqdm:
            model.train()
            optimizer.zero_grad()
            output = model(x.cuda(0))
            loss = criterion(output, y.cuda(0))
            loss.backward()
            optimizer.step()
            acc = torch.sum(output.cpu().argmax(axis=1) == y) / y.size(0)
            batch_tqdm.set_description(f'{loss:.3f} {acc:.3f}')
        if epoch % ADVERSIAL_EVERY == ADVERSIAL_EVERY - 1 or epoch == len(
                epoch_tqdm) - 1:
            batch_tqdm = tqdm(valloader, leave=False)
            tot_loss, tot_acc = 0, 0
            for x, y in batch_tqdm:
                model.eval()
                with torch.no_grad():
                    output = model(x.cuda(0))
                    loss = float(criterion(output, y.cuda(0)))
                    acc = float(torch.sum(output.cpu().argmax(axis=1) == y))
                    tot_loss += loss * x.size(0)
                    tot_acc += acc
            raw_loss, raw_acc = tot_loss / len(val_x), tot_acc / len(val_x)
            epoch_tqdm.set_description(f'{raw_loss:.3f} {raw_acc:.3f}')
    val_x_adv = np.zeros_like(val_x)
    for i in tqdm(range(0, len(val_x_adv), BATCH_SIZE),
                  desc='DF: ',
                  leave=False):
        val_x_adv[i:][:BATCH_SIZE] = pgd_op(val_x[i:][:BATCH_SIZE], 0.01)
    adv_valset = torch.utils.data.TensorDataset(
        torch.tensor(val_x_adv, dtype=torch.float),
        torch.tensor(val_y, dtype=torch.long))
    adv_valloader = torch.utils.data.DataLoader(adv_valset,
                                                batch_size=BATCH_SIZE,
                                                shuffle=False,
                                                num_workers=4)
    batch_tqdm = tqdm(adv_valloader, leave=False)
    tot_acc = 0
    for x, y in batch_tqdm:
        model.eval()
        with torch.no_grad():
            output = model(x.cuda(0))
            acc = float(torch.sum(output.cpu().argmax(axis=1) == y))
            tot_acc += acc
    adv_acc = tot_acc / len(val_x)
    torch.save(model.state_dict(), f'runs/{num}.pt')
    return raw_acc, adv_acc
print(results)    
print("results on target model: ")
results = metrics(model_target, X_adv, X_test, y_test, indices)
print(results)    

#####BIM
print("BIM")
bim_params = {'eps': 0.03,
              'nb_iter': 300,
              'eps_iter': 0.03/100,
              'ord': np.inf,
               'clip_min': 0.,
               'clip_max': 1.,
               'rand_init': False
               }
bim = ProjectedGradientDescent(wrap, sess=sess)
X_adv = np.zeros((len(indices),32,32,3))
for i in range(0,len(indices),batch_attack):
    X_adv[i:i+batch_attack] = bim.generate_np(X_test[indices[i:(i+batch_attack)]], **bim_params)
print("results on source model: ")
results = metrics(model, X_adv, X_test, y_test, indices)
print(results)    
print("results on target model: ")
results = metrics(model_target, X_adv, X_test, y_test, indices)
print(results)   

#####CWL2
print("CWL2")
cwl2_params = {'binary_search_steps': 10,
               'max_iterations': 100,
               'learning_rate': 0.1,
示例#12
0
def eval(sess,
         model_name,
         X_train,
         Y_train,
         X_test,
         Y_test,
         cnn=False,
         rbf=False,
         fgsm=False,
         jsma=False,
         df=False,
         bim=False):
    """ Load model saved in model_name.json and model_name_weights.h5 and 
    evaluate its accuracy on legitimate test samples and adversarial samples.
    Use cnn=True if the model is CNN based.
    """

    # open text file and output accuracy results to it
    text_file = open("fmnist_results.txt", "w")

    # load saved model
    print("Load model ... ")
    '''
    json = open('models/{}.json'.format(model_name), 'r')
    model = json.read()
    json.close()
    loaded_model = model_from_json(model)
    loaded_model.load_weights("models/{}_weights.h5".format(model_name))
    '''
    if rbf:
        loaded_model = load_model("rbfmodels/{}.h5".format(model_name),
                                  custom_objects={'RBFLayer': RBFLayer})
        text_file.write('Evaluating on rbfmodels/{}.h5\n\n'.format(model_name))
    else:
        loaded_model = load_model("models/{}.h5".format(model_name))
        text_file.write('Evaluating on models/{}.h5\n\n'.format(model_name))

    # Set placeholders
    if cnn:
        x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    else:
        x = tf.placeholder(tf.float32, shape=(None, 784))

    y = tf.placeholder(tf.float32, shape=(None, 10))

    predictions = loaded_model(x)

    accuracy = model_eval(sess,
                          x,
                          y,
                          predictions,
                          X_test,
                          Y_test,
                          args={"batch_size": 128})
    text_file.write('Test accuracy on legitimate test examples: {0}\n'.format(
        str(accuracy)))
    #print('Test accuracy on legitimate test examples: ' + str(accuracy))

    # Craft adversarial examples depending on the input parameters
    wrap = KerasModelWrapper(loaded_model)

    # FGSM
    if fgsm:
        fgsm = FastGradientMethod(wrap, sess=sess)
        fgsm_params = {'eps': 0.3}
        adv_x = fgsm.generate(x, **fgsm_params)
        adv_x = tf.stop_gradient(adv_x)
        preds_adv = loaded_model(adv_x)

        # Evaluate the accuracy of the F-MNIST model on adversarial examples
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_adv,
                              X_test,
                              Y_test,
                              args={"batch_size": 128})
        text_file.write(
            'Test accuracy on fgsm adversarial test examples: {0}\n'.format(
                str(accuracy)))
        #print('Test accuracy on fgsm adversarial test examples: ' + str(accuracy))

    # JSMA
    if jsma:
        jsma = SaliencyMapMethod(wrap, sess=sess)
        jsma_params = {
            'theta': 2.,
            'gamma': 0.145,
            'clip_min': 0.,
            'clip_max': 1.,
            'y_target': None
        }
        adv_x = jsma.generate(x, **jsma_params)
        adv_x = tf.stop_gradient(adv_x)
        preds_adv = loaded_model(adv_x)

        # Evaluate the accuracy of the F-MNIST model on adversarial examples
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_adv,
                              X_test,
                              Y_test,
                              args={"batch_size": 128})
        text_file.write(
            'Test accuracy on jsma adversarial test examples: {0}\n'.format(
                str(accuracy)))
        #print('Test accuracy on jsma adversarial test examples: ' + str(accuracy))

    # DeepFool
    if df:
        df = DeepFool(wrap, sess=sess)
        df_params = {'nb_candidate': 10, 'max_iter': 50}
        adv_x = df.generate(x, **df_params)
        adv_x = tf.stop_gradient(adv_x)
        preds_adv = loaded_model(adv_x)

        # Evaluate the accuracy of the F-MNIST model on adversarial examples
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_adv,
                              X_test,
                              Y_test,
                              args={"batch_size": 128})
        text_file.write(
            'Test accuracy on df adversarial test examples: {0}\n'.format(
                str(accuracy)))
        #print('Test accuracy on df adversarial test examples: ' + str(accuracy))

    # Basic Iterative Method
    if bim:
        bim = ProjectedGradientDescent(wrap, sess=sess)
        bim_params = {'eps': 0.3}
        adv_x = bim.generate(x, **bim_params)
        adv_x = tf.stop_gradient(adv_x)
        preds_adv = loaded_model(adv_x)

        # Evaluate the accuracy of the F-MNIST model on adversarial examples
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_adv,
                              X_test,
                              Y_test,
                              args={"batch_size": 128})
        text_file.write(
            'Test accuracy on bim adversarial test examples: {0}\n'.format(
                str(accuracy)))
        #print('Test accuracy on bim adversarial test examples: ' + str(accuracy))

    print('Accuracy results outputted to fmnist_results.txt')
    text_file.close()

    # Close TF session
    sess.close()
示例#13
0
cleverhans_model = KerasModelWrapper(model)

# Choose attack
if attack_method == 'MIM':
    op = MomentumIterativeMethod(cleverhans_model, sess=sess)
    params = {
        'eps': eps,
        'nb_iter': nb_iter,
        'eps_iter': eps_iter,
        'ord': order,
        'decay_factor': decay_factor,
        'clip_max': 1.,
        'clip_min': 0
    }
elif attack_method == 'PGD' and order == np.inf:
    op = ProjectedGradientDescent(cleverhans_model, sess=sess)
    params = {
        'eps': eps,
        'eps_iter': eps_iter,
        'nb_iter': nb_iter,
        'clip_max': 1.,
        'clip_min': 0
    }
elif attack_method == 'PGD' and order == 2:
    op = ProjectedGradientDescent(cleverhans_model, sess=sess)
    params = {
        'eps': eps,
        'eps_iter': eps_iter,
        'nb_iter': nb_iter,
        'ord': 2,
        'clip_max': 1.,