示例#1
0
class Data:
    input_dim = 784
    Nclasses = 10
    X = np.reshape(unambiguous_X, (-1, 28, 28, 1))
    Y = to_categorical(unambiguous_Y, 10)
    Xtest = np.reshape(es, (-1, 28, 28, 1))
    Ytest = to_categorical(ls, 10)
    if use_cifar:
        data = CIFAR10()
        Nclasses = 10
        X, Y = data.get_set('train')
        Xtest, Ytest = data.get_set('test')
        img_rows, img_cols, nchannels = Xtest.shape[1:4]
        input_dim = img_rows * img_cols * nchannels
示例#2
0
def save_images(model, attack, set_type, first_index, last_index):
    """
    Applies the saliency map attack against the specified model.

    Parameters
    ----------
    model: str
        The name of the model used.
    attack: str
        The type of used attack (either "jsma", "wjsma" or "tjsma").
    set_type: str
        The type of set used (either "train" or "test").
    first_index:
        The index of the first image attacked.
    last_index: int
        The index of the last image attacked.
    """

    if model in MNIST_SETS:
        from cleverhans.dataset import MNIST

        x_set, y_set = MNIST(train_start=0,
                             train_end=60000,
                             test_start=0,
                             test_end=10000).get_set(set_type)
        gamma = 0.155
    elif model in CIFAR10_SETS:
        from cleverhans.dataset import CIFAR10

        x_set, y_set = CIFAR10(train_start=0,
                               train_end=50000,
                               test_start=0,
                               test_end=10000).get_set(set_type)
        y_set = y_set.reshape((y_set.shape[0], 10))
        gamma = 0.039
    else:
        raise ValueError("Invalid model: " + model)

    generate_attacks(save_path="attack/" + model + "/" + attack + "_" +
                     set_type,
                     file_path="models/joblibs/" + model + ".joblib",
                     x_set=x_set,
                     y_set=y_set,
                     attack=attack,
                     gamma=gamma,
                     first_index=first_index,
                     last_index=last_index)
示例#3
0
def model_test(file_name=FILE_NAME):
    """
    Evaluates the performances of the model over the CIFAR-10 dataset.

    Parameters
    ----------
    file_name: str, optional
        The name of the joblib file.
    """

    cifar10 = CIFAR10(train_start=0, train_end=50000, test_start=0, test_end=10000)
    x_train, y_train = cifar10.get_set('train')
    x_test, y_test = cifar10.get_set('test')

    y_train = y_train.reshape((50000, 10))
    y_test = y_test.reshape((10000, 10))

    model_testing(file_name, x_train, y_train, x_test, y_test)
示例#4
0
def model_train(file_name=FILE_NAME):
    """
    Creates the joblib file of AllConvolutional CIFAR-10 model trained over the MNIST dataset.

    Parameters
    ----------
    file_name: str, optional
        The name of the joblib file.
    """

    layers = [Conv2D(64, (3, 3), (1, 1), "SAME"),
              ReLU(),
              Conv2D(128, (3, 3), (1, 1), "SAME"),
              ReLU(),
              MaxPooling2D((2, 2), (2, 2), "VALID"),
              Conv2D(128, (3, 3), (1, 1), "SAME"),
              ReLU(),
              Conv2D(256, (3, 3), (1, 1), "SAME"),
              ReLU(),
              MaxPooling2D((2, 2), (2, 2), "VALID"),
              Conv2D(256, (3, 3), (1, 1), "SAME"),
              ReLU(),
              Conv2D(512, (3, 3), (1, 1), "SAME"),
              ReLU(),
              MaxPooling2D((2, 2), (2, 2), "VALID"),
              Conv2D(10, (3, 3), (1, 1), "SAME"),
              GlobalAveragePool(),
              Softmax()]

    model = MLP(layers, (None, 32, 32, 3))

    cifar10 = CIFAR10(train_start=0, train_end=50000, test_start=0, test_end=10000)
    x_train, y_train = cifar10.get_set('train')
    x_test, y_test = cifar10.get_set('test')

    y_train = y_train.reshape((50000, 10))
    y_test = y_test.reshape((10000, 10))

    model_training(model, file_name, x_train, y_train, x_test, y_test, nb_epochs=10, batch_size=128,
                   learning_rate=.001, label_smoothing=0.1)
def generate_adv_images(gpu,
                        attack_algo,
                        dataset,
                        source_data_dir,
                        train_start=0,
                        train_end=1000000,
                        test_start=0,
                        test_end=100000,
                        nb_epochs=NB_EPOCHS,
                        batch_size=BATCH_SIZE,
                        learning_rate=0.001,
                        testing=False,
                        num_threads=None,
                        label_smoothing=0.1,
                        args=FLAGS):
    """
    CIFAR10 cleverhans tutorial
    :param source_data_dir: the CIFAR-10 source data directory
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param clean_train: perform normal training on clean examples only
                        before performing adversarial training.
    :param testing: if true, complete an AccuracyReport for unit tests
                    to verify that performance is adequate
    :param label_smoothing: float, amount of label smoothing for cross entropy
    :return: an AccuracyReport object
    """
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu
    if "batch_size" in UNTARGETED_ATTACKER_PARAM[attack_algo]:
        global BATCH_SIZE
        batch_size = UNTARGETED_ATTACKER_PARAM[attack_algo]["batch_size"]
        config.BATCH_SIZE = batch_size
    output_dir = DATASET_ADV_OUTPUT[args.dataset] + "/" + args.arch
    os.makedirs(output_dir, exist_ok=True)
    report = AccuracyReport()
    # if (os.path.exists(output_dir + "/{0}_untargeted_train.npz".format(attack_algo)) and
    #     os.path.exists(output_dir + "/{0}_untargeted_test.npz".format(attack_algo))):
    #     return report
    # Object used to keep track of (and return) key accuracies
    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)
    # Set logging level to see debug information
    set_log_level(logging.DEBUG)
    # Create TF session
    config_args = {}
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    config_args["gpu_options"] = tf.GPUOptions(allow_growth=True)
    sess = tf.Session(config=tf.ConfigProto(**config_args))
    # Get CIFAR10 data
    if dataset == "CIFAR10":
        data = CIFAR10(data_dir=source_data_dir,
                       train_start=train_start,
                       train_end=train_end,
                       test_start=test_start,
                       test_end=test_end)
    elif dataset == "CIFAR100" or dataset == "CIFAR100_coarse_label":
        data = CIFAR100(data_dir=source_data_dir,
                        dataset_name=dataset,
                        train_start=train_start,
                        train_end=train_end,
                        test_start=test_start,
                        test_end=test_end)
    elif dataset == "MNIST" or dataset == "FashionMNIST":
        data = MNIST(data_dir=source_data_dir,
                     train_start=train_start,
                     train_end=train_end,
                     test_start=test_start,
                     test_end=test_end)
    elif dataset == "ImageNet":
        data = MiniImageNet(data_dir=source_data_dir,
                            train_start=train_start,
                            train_end=train_end,
                            test_start=test_start,
                            num_classes=CLASS_NUM["ImageNet"],
                            arch=args.arch)
    elif dataset == "TinyImageNet":
        data = TinyImageNet(data_dir=source_data_dir,
                            train_start=train_start,
                            train_end=train_end,
                            test_start=test_start,
                            num_classes=CLASS_NUM["TinyImageNet"])

    dataset_size = data.x_train.shape[0]
    dataset_train = data.to_tensorflow()[0]
    # dataset_train = dataset_train.shuffle(buffer_size=2000)
    dataset_train = dataset_train.map(
        lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
    dataset_train = dataset_train.batch(batch_size)
    dataset_train = dataset_train.prefetch(16)
    x_train, y_train = data.get_set('train')
    x_test, y_test = data.get_set('test')

    # Use Image Parameters
    img_rows, img_cols, nchannels = x_test.shape[1:4]
    nb_classes = y_test.shape[1]
    # Define input TF placeholder
    x = tf.placeholder(tf.float32,
                       shape=(batch_size, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(batch_size, nb_classes))
    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    eval_params = {'batch_size': batch_size}

    rng = np.random.RandomState([2017, 8, 30])

    def do_generate_eval(adv_x,
                         pred_adv_x,
                         x_set,
                         y_set,
                         report_key,
                         is_adv=None):
        adv_images_total, adv_pred_total, gt_label_total, success_rate = untargeted_advx_image_eval(
            sess, x, y, adv_x, pred_adv_x, x_set, y_set, args=eval_params)

        setattr(report, report_key, success_rate)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        if report_text:
            print('adversarial attack successful rate on %s: %0.4f' %
                  (report_text, success_rate))
        return adv_images_total, adv_pred_total, gt_label_total, success_rate  # shape = (total, H,W,C)

    def do_eval(preds, x_set, y_set, report_key, is_adv=None):
        acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
        setattr(report, report_key, acc)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        print('Test accuracy on %s examples: %0.4f' % (report_text, acc))

    if args.arch == "conv4":
        model = Shallow4ConvLayersConv(
            args.arch,
            IMG_SIZE[dataset],
            CLASS_NUM[dataset],
            in_channels=DATASET_INCHANNELS[args.dataset],
            dim_hidden=64)
        model.is_training = False
    # elif args.arch == "conv10":
    #     model = Shallow10ConvLayersConv(args.arch, CLASS_NUM[dataset], nb_filters=64,
    #                                     input_shape=[IMG_SIZE[dataset], IMG_SIZE[dataset], DATASET_INCHANNELS[args.dataset]])
    elif args.arch == "vgg16":
        model = VGG16("vgg_16", CLASS_NUM[dataset], [
            IMG_SIZE[dataset], IMG_SIZE[dataset],
            DATASET_INCHANNELS[args.dataset]
        ])
        model.is_training = False
    elif args.arch == "vgg16small":
        model = VGG16Small(args.arch, CLASS_NUM[dataset], [
            IMG_SIZE[dataset], IMG_SIZE[dataset],
            DATASET_INCHANNELS[args.dataset]
        ])
    elif args.arch == "resnet10":
        model = ResNet10(args.arch, CLASS_NUM[dataset], [
            IMG_SIZE[dataset], IMG_SIZE[dataset],
            DATASET_INCHANNELS[args.dataset]
        ])
    elif args.arch == "resnet18":
        model = ResNet18(args.arch, CLASS_NUM[dataset], [
            IMG_SIZE[dataset], IMG_SIZE[dataset],
            DATASET_INCHANNELS[args.dataset]
        ])
    elif args.arch == "resnet50":
        model = ResNet50(args.arch, CLASS_NUM[dataset], [
            IMG_SIZE[dataset], IMG_SIZE[dataset],
            DATASET_INCHANNELS[args.dataset]
        ])
    elif args.arch == "resnet101":
        model = ResNet101(args.arch, CLASS_NUM[dataset], [
            IMG_SIZE[dataset], IMG_SIZE[dataset],
            DATASET_INCHANNELS[args.dataset]
        ])

    def evaluate():
        if hasattr(model, "is_training"):
            model.is_training = False
        preds = model.get_logits(x)  # tf.tensor
        do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False)
        if hasattr(model, "is_training"):
            model.is_training = True

    resume = TF_CLEAN_IMAGE_MODEL_PATH[args.dataset] + "/{0}".format(args.arch)
    os.makedirs(resume, exist_ok=True)
    print("using folder {} to store model".format(resume))
    resume_files = os.listdir(resume)
    loss = CrossEntropy(model, smoothing=label_smoothing)
    if len(resume_files) == 0 or len(
            list(
                filter(lambda e: os.path.isfile(resume + "/" + e),
                       resume_files))) == 0:  # clean train must be done!
        if hasattr(model, "is_training"):
            model.is_training = True

        var_list = tf.trainable_variables()
        g_list = tf.global_variables()
        bn_moving_vars = [g for g in g_list if 'moving_mean' in g.name]
        bn_moving_vars += [g for g in g_list if 'moving_variance' in g.name]
        var_list += bn_moving_vars

        saver = tf.train.Saver(var_list=var_list)
        train(sess,
              loss,
              None,
              None,
              model,
              dataset_train=dataset_train,
              dataset_size=dataset_size,
              evaluate=evaluate,
              args=train_params,
              rng=rng,
              var_list=model.get_params())  # 训练nb_epochs个epochs
        save_path = saver.save(sess,
                               "{}/model".format(resume),
                               global_step=nb_epochs)
        print("Model saved in path: %s" % save_path)
    else:
        if len(os.listdir(resume)) == 1 and os.listdir(resume)[0].endswith(
                "ckpt"):
            path = resume + "/" + os.listdir(resume)[0]
            var_list = tf.trainable_variables()
            g_list = tf.global_variables()
            bn_moving_vars = [g for g in g_list if 'moving_mean' in g.name]
            bn_moving_vars += [
                g for g in g_list if 'moving_variance' in g.name
            ]
            var_list += bn_moving_vars
            saver = tf.train.Saver(var_list=var_list)
            saver.restore(sess, path)
            print("load pretrained model {}".format(path))
        else:
            # resume from old
            latest_checkpoint = tf.train.latest_checkpoint(resume)
            var_list = tf.trainable_variables()
            g_list = tf.global_variables()
            bn_moving_vars = [g for g in g_list if 'moving_mean' in g.name]
            bn_moving_vars += [
                g for g in g_list if 'moving_variance' in g.name
            ]
            var_list += bn_moving_vars
            saver = tf.train.Saver(var_list=var_list)
            saver.restore(sess, latest_checkpoint)
            print("load pretrained model {}".format(resume))

        # Calculate training error
        if testing:
            evaluate()
    if hasattr(model, "is_training"):
        model.is_training = False
    # Initialize the Fast Gradient Sign Method (FGSM) attack object and
    # graph
    attacker = ATTACKERS[attack_algo](model, sess=sess)
    param_dict = UNTARGETED_ATTACKER_PARAM[attack_algo]

    if attack_algo in NEED_TARGETED_Y:
        y_target = look_for_target_otherthan_gt(y, CLASS_NUM[args.dataset])
        y_target = tf.reshape(y_target, (batch_size, -1))
        param_dict["y_target"] = y_target

    adv_x = attacker.generate(x, **param_dict)  # tensor
    preds_adv = model.get_logits(adv_x)
    # generate adversarial examples
    adv_images_total, adv_pred_total, gt_label_total, success_rate = do_generate_eval(
        adv_x, preds_adv, x_train, y_train, "clean_train_adv_eval", True)
    np.savez(output_dir + "/{0}_untargeted_train.npz".format(attack_algo),
             adv_images=adv_images_total,
             adv_pred=adv_pred_total,
             gt_label=gt_label_total,
             attack_success_rate=success_rate)

    adv_images_total, adv_pred_total, gt_label_total, success_rate = do_generate_eval(
        adv_x, preds_adv, x_test, y_test, "clean_test_adv_eval", True)
    np.savez(output_dir + "/{0}_untargeted_test.npz".format(attack_algo),
             adv_images=adv_images_total,
             adv_pred=adv_pred_total,
             gt_label=gt_label_total,
             attack_success_rate=success_rate)
    print('generate {} adversarial image done'.format(attack_algo))

    return report
示例#6
0
def cifar10_tutorial(train_start=0, train_end=60000, test_start=0,
                     test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE,
                     learning_rate=LEARNING_RATE,
                     clean_train=CLEAN_TRAIN,
                     testing=False,
                     backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                     nb_filters=NB_FILTERS, num_threads=None,
                     label_smoothing=0.1, retrain=False,
                      source_samples=SOURCE_SAMPLES,
                      attack_iterations=ATTACK_ITERATIONS,
                      targeted=TARGETED):
  """
  CIFAR10 cleverhans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param clean_train: perform normal training on clean examples only
                      before performing adversarial training.
  :param testing: if true, complete an AccuracyReport for unit tests
                  to verify that performance is adequate
  :param backprop_through_attack: If True, backprop through adversarial
                                  example construction process during
                                  adversarial training.
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """

  # Object used to keep track of (and return) key accuracies
  report = AccuracyReport()

  # Set TF random seed to improve reproducibility
  tf.set_random_seed(1234)

  # Set logging level to see debug information
  set_log_level(logging.DEBUG)

  # Create TF session
  if num_threads:
    config_args = dict(intra_op_parallelism_threads=1)
  else:
    config_args = {}
  sess = tf.Session(config=tf.ConfigProto(**config_args))

  # Get CIFAR10 data
  data = CIFAR10(train_start=train_start, train_end=train_end,
                 test_start=test_start, test_end=test_end)
  dataset_size = data.x_train.shape[0]
  dataset_train = data.to_tensorflow()[0]
  dataset_train = dataset_train.map(
      lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
  dataset_train = dataset_train.batch(batch_size)
  dataset_train = dataset_train.prefetch(16)
  x_train, y_train = data.get_set('train')
  x_test, y_test = data.get_set('test')

  ###########################
  # Adjust hue / saturation #
  ###########################
  # hueValue = 0.3
  # tf_x_test = tf.image.adjust_saturation(tf.image.adjust_hue(x_test, hueValue), hueValue)
  # tf_x_test = tf.image.adjust_saturation(tx_test, hueValue)
  # x_test = sess.run(tf_x_test)




  ###############################
  # Transform image to uniimage #
  ###############################
  # x_train = convert_uniimage(x_train)

  # Use Image Parameters
  img_rows, img_cols, nchannels = x_test.shape[1:4]
  nb_classes = y_test.shape[1]

  # Define input TF placeholder
  x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                        nchannels))
  y = tf.placeholder(tf.float32, shape=(None, nb_classes))






  saveFileNumArr = []
  # saveFileNumArr = [50, 500, 1000]

  count = 0
  while count < 1000:
    count = count + 50
    saveFileNumArr.append(count)

  distortionArr = []
  accuracyArr = []
  for i in range(len(saveFileNumArr)):
    saveFileNum = saveFileNumArr[i]
    model_path = os.path.join(save_dir, filename + "-" + str(saveFileNum))
    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")
    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                          nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    nb_filters = 64

    # Define TF model graph
    model = ModelAllConvolutional('model1', nb_classes, nb_filters, input_shape=[32, 32, 3])
    preds = model.get_logits(x)
    loss = CrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'filename': os.path.split(model_path)[-1]
    }

    rng = np.random.RandomState([2017, 8, 30])

    print("Trying to load trained model from: " + model_path)
    # check if we've trained before, and if we have, use that pre-trained model
    if os.path.exists(model_path + ".meta"):
      tf_model_load(sess, model_path)
      print("Load trained model")
    else:
      train(sess, loss, x_train, y_train, args=train_params, rng=rng)
      saver = tf.train.Saver()
      saver.save(sess, model_path)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    # accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    # assert x_test.shape[0] == test_end - test_start, x_test.shape
    # print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    # report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using Carlini and Wagner's approach
    ###########################################################################
    nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
    print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
          ' adversarial examples')
    print("This could take some time ...")

    # Instantiate a CW attack object
    cw = CarliniWagnerL2(model, sess=sess)

    if targeted:
      adv_inputs = np.array(
          [[instance] * nb_classes for
           instance in x_test[:source_samples]], dtype=np.float32)

      one_hot = np.zeros((nb_classes, nb_classes))
      one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

      adv_inputs = adv_inputs.reshape(
          (source_samples * nb_classes, img_rows, img_cols, nchannels))
      adv_ys = np.array([one_hot] * source_samples,
                        dtype=np.float32).reshape((source_samples *
                                                   nb_classes, nb_classes))
      yname = "y_target"
    else:
      adv_inputs = x_test[:source_samples]
      adv_inputs = x_test

      adv_ys = None
      yname = "y"

    if targeted:
      cw_params_batch_size = source_samples * nb_classes
    else:
      cw_params_batch_size = source_samples
    cw_params = {'binary_search_steps': 1,
                 'max_iterations': attack_iterations,
                 'learning_rate': CW_LEARNING_RATE,
                 'batch_size': cw_params_batch_size,
                 'initial_const': 10}

    adv2 = cw.generate(x, **cw_params)
    cw_params[yname] = adv_ys
    adv_x = None
    # adv_x = cw.generate_np(adv_inputs, **cw_params)

    eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
    if targeted:
      accuracy = model_eval(
          sess, x, y, preds, adv_x, adv_ys, args=eval_params)
    else:
      # err = model_eval(sess, x, y, preds, adv, y_test[:source_samples],
      #                  args=eval_params)
      accuracy, distortion = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params, is_adv=True, ae=adv2,
                                        type=type, datasetName="CIFAR10", discretizeColor=discretizeColor)

    print('--------------------------------------')
    print("load save file: ", saveFileNum)
    # Compute the number of adversarial examples that were successfully found
    # print('Test with adv. examples {0:.4f}'.format(adv_accuracy))
    print('Test accuracy on examples: %0.4f ,distortion: %0.4f' % (accuracy, distortion))

    distortionArr.append(distortion)
    accuracyArr.append(accuracy)
    # print(str(accuracy))
    # print(str(distortion))
    tf.reset_default_graph()

  print("accuracy:")
  for accuracy in accuracyArr:
    print(accuracy)

  print("distortion:")
  for distortion in distortionArr:
    print(distortion)

  # Close TF session
  sess.close()


  return report
def cifar10_cw_recon(train_start=0,
                     train_end=60000,
                     test_start=0,
                     test_end=10000,
                     viz_enabled=VIZ_ENABLED,
                     nb_epochs=NB_EPOCHS,
                     batch_size=BATCH_SIZE,
                     source_samples=SOURCE_SAMPLES,
                     learning_rate=LEARNING_RATE,
                     attack_iterations=ATTACK_ITERATIONS,
                     model_path=MODEL_PATH,
                     model_path_cls=MODEL_PATH,
                     targeted=TARGETED,
                     num_threads=None,
                     label_smoothing=0.1,
                     nb_filters=NB_FILTERS):

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)
    rng = np.random.RandomState()

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get CIFAR10 data
    data = CIFAR10(train_start=train_start,
                   train_end=train_end,
                   test_start=test_start,
                   test_end=test_end)
    dataset_size = data.x_train.shape[0]
    dataset_train = data.to_tensorflow()[0]
    dataset_train = dataset_train.map(
        lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
    dataset_train = dataset_train.batch(batch_size)
    dataset_train = dataset_train.prefetch(16)
    x_train, y_train = data.get_set('train')
    x_test, y_test = data.get_set('test')

    nb_latent_size = 100
    # Get MNIST test data
    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]
    print("img_Rows, img_cols, nchannels: ", img_rows, img_cols, nchannels)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    x_t = tf.placeholder(tf.float32,
                         shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    y_t = tf.placeholder(tf.float32, shape=(None, nb_classes))
    z = tf.placeholder(tf.float32, shape=(None, nb_latent_size))
    z_t = tf.placeholder(tf.float32, shape=(None, nb_latent_size))

    #nb_filters = 64
    nb_layers = 500
    '''
  def do_eval_cls(preds, x_set, y_set, x_tar_set,report_key, is_adv = None):
    acc = model_eval(sess, x, y, preds, x_t, x_set, y_set, x_tar_set, args=eval_params_cls)
    setattr(report, report_key, acc)
    if is_adv is None:
      report_text = None
    elif is_adv:
      report_text = 'adversarial'
    else:
      report_text = 'legitimate'
    if report_text:
      print('Test accuracy on %s examples: %0.4f' % (report_text, acc))

  def eval_cls():
    do_eval_cls(y_logits, x_test, y_test, x_test,'clean_train_clean_eval', False)
  '''
    '''
  def evaluate():
        do_eval(y_logits, x_test, y_test, 'clean_train_clean_eval', False)

  filepath_ae = "clean_model_cifar10_ae.joblib"
  filepath_cl = "classifier_cifar10.joblib"

  
# Define TF model graph
  model = ModelBasicAE('model', nb_layers, nb_latent_size)
  #cl_model = ModelCls('cl_model')
  #cl_model = ModelAllConvolutional('model1', nb_classes, nb_filters,
  #                                input_shape=[32, 32, 3])
  #preds = model.get_logits(x)
  recons = model.get_layer(x, 'RECON')
  latent1_orig = model.get_layer(x, 'LATENT')
  latent1_orig_recon = model.get_layer(recons, 'LATENT')

  loss = SquaredError(model)
  print("Defined TensorFlow model graph.")
  #y_logits = cl_model.get_logits(x)
  #loss_cls = CrossEntropy(cl_model, smoothing=label_smoothing)
  ###########################################################################
  # Training the model using TensorFlow
  ###########################################################################

  # Train an MNIST model
  train_params = {
      'nb_epochs': nb_epochs,
      'batch_size': batch_size,
      'learning_rate': learning_rate,
      'filename': os.path.split(model_path)[-1]
  }
  
  
  train_params_cls = {
      'nb_epochs': 4,
      'batch_size': batch_size,
      'learning_rate': learning_rate
  }
  
  rng = np.random.RandomState([2017, 8, 30])
  # check if we've trained before, and if we have, use that pre-trained model
  #if os.path.exists(model_path + ".meta"):
   # tf_model_load(sess, model_path)
  #else:
  #eval_params_cls = {'batch_size': batch_size}
  # Evaluate the accuracy of the MNIST model on legitimate test examples
  eval_params = {'batch_size': batch_size}
  
  def do_eval(recons, x_orig, x_target, y_orig, y_target, report_key, is_adv=False, x_adv = None, recon_adv = False, lat_orig = None, lat_orig_recon = None):
    noise, d_orig, d_targ, avg_dd, d_latent = model_eval_ae(sess, x, x_t, recons, x_orig, x_target, x_adv, recon_adv, lat_orig, lat_orig_recon, args = eval_params)
    setattr(report, report_key, avg_dd)
    if is_adv is None:
      report_text = None
    elif is_adv:
      report_text = 'adversarial'
    else:
      report_text = 'legitimate'
    if report_text:
      print('Test d1 on ', report_text,  ' examples: ', d_orig)
      print('Test d2 on ', report_text,' examples: ', d_targ)
      print('Test distance difference on %s examples: %0.4f' % (report_text, avg_dd))
      print('Noise added: ', noise)
      print("dist_latent_orig_recon on ", report_text, "examples : ", d_latent)
      print()

  def evaluate_ae():
    do_eval(recons, x_test, x_test, y_test, y_test, 'clean_train_clean_eval', False, None, None, latent1_orig, latent1_orig_recon)

  print("Training autoencoder")
  train_ae(sess, loss, x_train,x_train, evaluate = evaluate_ae, args=train_params, rng=rng, var_list=model.get_params())
  #with sess.as_default():
   # save(filepath_ae, model)
  '''
    save_dir = 'models'
    model_name = 'cifar10_AE'
    model_path_ae = os.path.join(save_dir, model_name)

    if clean_train_ae == True:
        input_img = Input(shape=(32, 32, 3))
        x = Conv2D(64, (3, 3), padding='same')(input_img)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = MaxPooling2D((2, 2), padding='same')(x)
        x = Conv2D(32, (3, 3), padding='same')(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = MaxPooling2D((2, 2), padding='same')(x)
        x = Conv2D(16, (3, 3), padding='same')(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        encoded = MaxPooling2D((2, 2), padding='same')(x)

        x = Conv2D(16, (3, 3), padding='same')(encoded)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = UpSampling2D((2, 2))(x)
        x = Conv2D(32, (3, 3), padding='same')(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = UpSampling2D((2, 2))(x)
        x = Conv2D(64, (3, 3), padding='same')(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = UpSampling2D((2, 2))(x)
        x = Conv2D(3, (3, 3), padding='same')(x)
        x = BatchNormalization()(x)
        decoded = Activation('sigmoid')(x)

        model = Model(input_img, decoded)
        model.compile(optimizer='adam', loss='binary_crossentropy')
        #es_cb = EarlyStopping(monitor='val_loss', patience=2, verbose=1, mode='auto')
        #chkpt = saveDir + 'AutoEncoder_Cifar10_Deep_weights.{epoch:02d}-{loss:.2f}-{val_loss:.2f}.hdf5'
        #cp_cb = ModelCheckpoint(filepath = chkpt, monitor='val_loss', verbose=1, save_best_only=True, mode='auto')
        model.fit(
            x_train,
            x_train,
            batch_size=128,
            epochs=2,
            verbose=1,
            validation_data=(x_test, x_test),
            #callbacks=[es_cb, cp_cb],
            shuffle=True)
        score = model.evaluate(x_test, x_test, verbose=1)
        print(score)
        model.save(model_path_ae)
        print('Saved trained model at %s ' % model_path)

    else:
        model = load_model(model_path_ae)

    num_classes = 10
    save_dir = 'models'
    model_name = 'cifar10_CNN'
    model_path_cls = os.path.join(save_dir, model_name)

    if clean_train_cl == True:
        print("Training CNN classifier")
        cl_model = Sequential()
        cl_model.add(
            Conv2D(32, (3, 3), padding='same', input_shape=x_train.shape[1:]))
        cl_model.add(Activation('relu'))
        cl_model.add(Conv2D(32, (3, 3)))
        cl_model.add(Activation('relu'))
        cl_model.add(MaxPooling2D(pool_size=(2, 2)))
        cl_model.add(Dropout(0.25))

        cl_model.add(Conv2D(64, (3, 3), padding='same'))
        cl_model.add(Activation('relu'))
        cl_model.add(Conv2D(64, (3, 3)))
        cl_model.add(Activation('relu'))
        cl_model.add(MaxPooling2D(pool_size=(2, 2)))
        cl_model.add(Dropout(0.25))

        cl_model.add(Flatten())
        cl_model.add(Dense(512))
        cl_model.add(Activation('relu'))
        cl_model.add(Dropout(0.5))
        cl_model.add(Dense(num_classes))
        cl_model.add(Activation('softmax'))

        opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)

        # Let's train the model using RMSprop
        cl_model.compile(loss='categorical_crossentropy',
                         optimizer=opt,
                         metrics=['accuracy'])

        cl_model.fit(x_train,
                     y_train,
                     batch_size=90,
                     epochs=4,
                     validation_data=(x_test, y_test),
                     shuffle=True)

        cl_model.save(model_path_cls)
        print('Saved trained model at %s ' % model_path)

    else:
        cl_model = load_model(model_path_cls)

        # Score trained model.
    scores = cl_model.evaluate(x_test, y_test, verbose=1)
    print('Test loss:', scores[0])
    print('Test accuracy:', scores[1])
    '''
  train(sess, loss_cls, None, None,
            dataset_train=dataset_train, dataset_size=dataset_size,
            evaluate=eval_cls, args=train_params_cls, rng=rng,
            var_list=cl_model.get_params())
  '''
    #with sess.as_default():
    # save(filepath_cl, cl_model)
    '''
  else:
    

    model = load(filepath_ae)
    cl_model = load(filepath_cl)
  '''

    #train_cls(sess, loss_cls, x_train, y_train, evaluate = eval_cls, args = train_params_cls, rng = rng, var_list = cl_model.get_params())
    #train_cls(sess, loss_cls, x_train, y_train, evaluate = eval_cls, args = train_params_cls, rng = rng, var_list = cl_model.get_params())

    ###########################################################################
    # Craft adversarial examples using Carlini and Wagner's approach
    ###########################################################################
    nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
    print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
          ' adversarial examples')
    print("This could take some time ...")

    # Instantiate a CW attack object
    cw = CarliniWagnerAE(model, cl_model, sess=sess)

    if viz_enabled:
        assert source_samples == nb_classes
        idxs = [
            np.where(np.argmax(y_test, axis=1) == i)[0][0]
            for i in range(nb_classes)
        ]
    if targeted:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, nb_classes, img_rows, img_cols,
                          nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')
            grid_viz_data_1 = np.zeros(grid_shape, dtype='f')

            adv_inputs = np.array([[instance] * (nb_classes - 1)
                                   for instance in x_test[idxs]],
                                  dtype=np.float32)

            #adv_input_y = np.array([[instance]*(nb_classes-1) for instance in y_test[idxs]])

            adv_input_y = []
            for curr_num in range(nb_classes):
                targ = []
                for id in range(nb_classes - 1):
                    targ.append(y_test[idxs[curr_num]])
                adv_input_y.append(targ)

            adv_input_y = np.array(adv_input_y)

            adv_target_y = []
            for curr_num in range(nb_classes):
                targ = []
                for id in range(nb_classes):
                    if (id != curr_num):
                        targ.append(y_test[idxs[id]])
                adv_target_y.append(targ)

            adv_target_y = np.array(adv_target_y)

            #print("adv_input_y: \n", adv_input_y)
            #print("adv_target_y: \n", adv_target_y)

            adv_input_targets = []
            for curr_num in range(nb_classes):
                targ = []
                for id in range(nb_classes):
                    if (id != curr_num):
                        targ.append(x_test[idxs[id]])
                adv_input_targets.append(targ)
            adv_input_targets = np.array(adv_input_targets)

            adv_inputs = adv_inputs.reshape((source_samples * (nb_classes - 1),
                                             img_rows, img_cols, nchannels))
            adv_input_targets = adv_input_targets.reshape(
                (source_samples * (nb_classes - 1), img_rows, img_cols,
                 nchannels))

            adv_input_y = adv_input_y.reshape(
                source_samples * (nb_classes - 1), 10)
            adv_target_y = adv_target_y.reshape(
                source_samples * (nb_classes - 1), 10)

        one_hot = np.zeros((nb_classes, nb_classes))
        one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

    adv_ys = np.array([one_hot] * source_samples, dtype=np.float32).reshape(
        (source_samples * nb_classes, nb_classes))
    yname = "y_target"

    cw_params_batch_size = source_samples * (nb_classes - 1)

    cw_params = {
        'binary_search_steps': 4,
        yname: adv_ys,
        'max_iterations': attack_iterations,
        'learning_rate': CW_LEARNING_RATE,
        'batch_size': cw_params_batch_size,
        'initial_const': 1
    }

    adv = cw.generate_np(adv_inputs, adv_input_targets, **cw_params)
    adv = sess.run(adv)
    #print("shaep of adv: ", np.shape(adv))
    '''
  recons = model.get_layer(x, 'RECON')
  recon_orig = model.get_layer(adv_inputs, 'RECON')
  recon_adv = model.get_layer(adv, 'RECON')
  lat_orig = model.get_layer(x, 'LATENT')
  lat_orig_recon = model.get_layer(recons, 'LATENT')
  #pred_adv_recon = cl_model.get_logits(recon_adv)
  '''
    recon_orig = model.predict(adv_inputs)
    recon_adv = model.predict(adv)
    #eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
    #eval_params = {'batch_size': 90}

    #acc_1 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_target_y, adv_input_targets, args=eval_params_cls)
    #acc_2 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_input_y, adv_input_targets, args=eval_params_cls)

    #noise, d1, d2, dist_diff, avg_dist_lat = model_eval_ae(sess, x, x_t,recons, adv_inputs, adv_input_targets, adv, recon_adv,lat_orig, lat_orig_recon, args=eval_params)
    shape = np.shape(adv_inputs)
    noise = reduce_sum(np.square(adv_inputs - adv), list(range(1, len(shape))))
    print("noise: ", noise)
    #recon_adv = sess.run(recon_adv)
    #recon_orig = sess.run(recon_orig)
    scores1 = cl_model.evaluate(recon_adv, adv_input_y, verbose=1)
    scores2 = cl_model.evaluate(recon_adv, adv_target_y, verbose=1)
    print("classifier acc_target: ", scores2[1])
    print("classifier acc_true: ", scores1[1])

    #print("recon_adv[0]\n", recon_adv[0,:,:,0])
    curr_class = 0
    if viz_enabled:
        for j in range(nb_classes):
            if targeted:
                for i in range(nb_classes):
                    #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i]
                    if (i == j):
                        grid_viz_data[i, j] = recon_orig[curr_class * 9]
                        grid_viz_data_1[i, j] = adv_inputs[curr_class * 9]
                        curr_class = curr_class + 1
                    else:
                        if (j > i):
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j - 1]
                            grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) +
                                                        j - 1]
                        else:
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j]
                            grid_viz_data_1[i,
                                            j] = adv[i * (nb_classes - 1) + j]

        #rint(grid_viz_data.shape)

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(
        np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

    # Close TF session
    #sess.close()

    # Finally, block & display a grid of all the adversarial examples

    if viz_enabled:
        #_ = grid_visual(grid_viz_data)
        #_ = grid_visual(grid_viz_data_1)

        plt.ioff()
        figure = plt.figure()
        figure.canvas.set_window_title('Cleverhans: Grid Visualization')

        # Add the images to the plot
        num_cols = grid_viz_data.shape[0]
        num_rows = grid_viz_data.shape[1]
        num_channels = grid_viz_data.shape[4]
        for yy in range(num_rows):
            for xx in range(num_cols):
                figure.add_subplot(num_rows, num_cols,
                                   (xx + 1) + (yy * num_cols))
                plt.axis('off')
                plt.imshow(grid_viz_data[xx, yy, :, :, :])

        # Draw the plot and return
        plt.savefig('cifar10_fig1')
        figure = plt.figure()
        figure.canvas.set_window_title('Cleverhans: Grid Visualization')
        for yy in range(num_rows):
            for xx in range(num_cols):
                figure.add_subplot(num_rows, num_cols,
                                   (xx + 1) + (yy * num_cols))
                plt.axis('off')
                plt.imshow(grid_viz_data_1[xx, yy, :, :, :])

        # Draw the plot and return
        plt.savefig('cifar10_fig2')

    #return report

    #adversarial training
    if (adv_train == True):

        print("starting adversarial training")
        #sess1 = tf.Session()
        adv_input_set = []
        adv_input_target_set = []

        for i in range(20):

            indices = np.arange(np.shape(x_train)[0])
            np.random.shuffle(indices)
            print("indices: ", indices[1:10])
            x_train = x_train[indices]
            y_train = y_train[indices]

            idxs = [
                np.where(np.argmax(y_train, axis=1) == i)[0][0]
                for i in range(nb_classes)
            ]
            adv_inputs_2 = np.array([[instance] * (nb_classes - 1)
                                     for instance in x_train[idxs]],
                                    dtype=np.float32)
            adv_input_targets_2 = []
            for curr_num in range(nb_classes):
                targ = []
                for id in range(nb_classes):
                    if (id != curr_num):
                        targ.append(x_train[idxs[id]])
                adv_input_targets_2.append(targ)
            adv_input_targets_2 = np.array(adv_input_targets_2)

            adv_inputs_2 = adv_inputs_2.reshape(
                (source_samples * (nb_classes - 1), img_rows, img_cols,
                 nchannels))
            adv_input_targets_2 = adv_input_targets_2.reshape(
                (source_samples * (nb_classes - 1), img_rows, img_cols,
                 nchannels))

            adv_input_set.append(adv_inputs_2)
            adv_input_target_set.append(adv_input_targets_2)

        adv_input_set = np.array(adv_input_set),
        adv_input_target_set = np.array(adv_input_target_set)
        print("shape of adv_input_set: ", np.shape(adv_input_set))
        print("shape of adv_input_target_set: ",
              np.shape(adv_input_target_set))
        adv_input_set = np.reshape(
            adv_input_set,
            (np.shape(adv_input_set)[0] * np.shape(adv_input_set)[1] *
             np.shape(adv_input_set)[2], np.shape(adv_input_set)[3],
             np.shape(adv_input_set)[4], np.shape(adv_input_set)[5]))
        adv_input_target_set = np.reshape(adv_input_target_set,
                                          (np.shape(adv_input_target_set)[0] *
                                           np.shape(adv_input_target_set)[1],
                                           np.shape(adv_input_target_set)[2],
                                           np.shape(adv_input_target_set)[3],
                                           np.shape(adv_input_target_set)[4]))

        print("generated adversarial training set")

        adv_set = cw.generate_np(adv_input_set, adv_input_target_set,
                                 **cw_params)

        x_train_aim = np.append(x_train, adv_input_set, axis=0)
        x_train_app = np.append(x_train, adv_set, axis=0)

        model_name = 'cifar10_AE_adv'
        model_path_ae = os.path.join(save_dir, model_name)

        input_img = Input(shape=(32, 32, 3))
        x = Conv2D(64, (3, 3), padding='same')(input_img)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = MaxPooling2D((2, 2), padding='same')(x)
        x = Conv2D(32, (3, 3), padding='same')(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = MaxPooling2D((2, 2), padding='same')(x)
        x = Conv2D(16, (3, 3), padding='same')(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        encoded = MaxPooling2D((2, 2), padding='same')(x)

        x = Conv2D(16, (3, 3), padding='same')(encoded)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = UpSampling2D((2, 2))(x)
        x = Conv2D(32, (3, 3), padding='same')(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = UpSampling2D((2, 2))(x)
        x = Conv2D(64, (3, 3), padding='same')(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = UpSampling2D((2, 2))(x)
        x = Conv2D(3, (3, 3), padding='same')(x)
        x = BatchNormalization()(x)
        decoded = Activation('sigmoid')(x)

        model2 = Model(input_img, decoded)
        model2.compile(optimizer='adam', loss='binary_crossentropy')

        model2.fit(x_train_app,
                   x_train_aim,
                   batch_size=128,
                   epochs=20,
                   verbose=1,
                   validation_data=(x_test, x_test),
                   callbacks=[es_cb, cp_cb],
                   shuffle=True)
        score = model.evaluate(x_test, x_test, verbose=1)
        print(score)
        model2.save(model_path_ae_adv)
        print('Saved adv trained model at %s ' % model_path)
        '''
    model_adv_trained = ModelBasicAE('model_adv_trained', nb_layers, nb_latent_size)
    recons_2 = model_adv_trained.get_layer(x, 'RECON')
    loss_2 = SquaredError(model_adv_trained) 
    train_ae(sess, loss_2, x_train_app, x_train_aim ,args=train_params, rng=rng, var_list=model_adv_trained.get_params())
    saver = tf.train.Saver()
    saver.save(sess, model_path)
    '''

        cw2 = CarliniWagnerAE(model_adv_trained, cl_model, sess=sess)

        adv_2 = cw2.generate_np(adv_inputs, adv_input_targets, **cw_params)

        recon_adv = model2.predict(adv)
        recon_orig = model2.predict(adv_inputs)
        #print("shaep of adv: ", np.shape(adv))
        '''
    recon_orig = model_adv_trained.get_layer(adv_inputs, 'RECON')
    recon_adv = model_adv_trained.get_layer(adv_2, 'RECON')
    lat_orig = model_adv_trained.get_layer(x, 'LATENT')
    lat_orig_recon = model_adv_trained.get_layer(recons, 'LATENT')
    '''
        #pred_adv_recon = cl_model.get_logits(recon_adv)

        #eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
        #eval_params = {'batch_size': 90}
        if targeted:
            #noise, d1, d2, dist_diff, avg_dist_lat = model_eval_ae(sess, x, x_t,recons, adv_inputs, adv_input_targets, adv_2, recon_adv,lat_orig, lat_orig_recon, args=eval_params)
            #acc = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_target_y, adv_input_targets, args=eval_params_cls)
            noise = reduce_sum(tf.square(adv_inputs - adv_2),
                               list(range(1, len(shape))))
            print("noise: ", noise)
            #print("d1: ", d1)
            #print("d2: ", d2)
            #print("d1-d2: ", dist_diff)
            #print("Avg_dist_lat: ", avg_dist_lat)
            #print("classifier acc: ", acc)
        '''  
    recon_adv = sess.run(recon_adv)
    recon_orig = sess.run(recon_orig)
    '''
        scores1 = cl_model.evaluate(recon_adv, adv_input_y, verbose=1)
        scores2 = cl_model.eval_params(recon_adv, adv_target_y, verbose=1)
        print("classifier acc_target: ", scores2[1])
        print("classifier acc_true: ", scores1[1])

        #print("recon_adv[0]\n", recon_adv[0,:,:,0])
        curr_class = 0
        if viz_enabled:
            for j in range(nb_classes):
                for i in range(nb_classes):
                    #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i]
                    if (i == j):
                        grid_viz_data[i, j] = recon_orig[curr_class * 9]
                        grid_viz_data_1[i, j] = adv_inputs[curr_class * 9]
                        curr_class = curr_class + 1
                    else:
                        if (j > i):
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j - 1]
                            grid_viz_data_1[i,
                                            j] = adv_2[i * (nb_classes - 1) +
                                                       j - 1]
                        else:
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j]
                            grid_viz_data_1[i, j] = adv_2[i *
                                                          (nb_classes - 1) + j]

            #rint(grid_viz_data.shape)

        print('--------------------------------------')

        # Compute the number of adversarial examples that were successfully found

        # Compute the average distortion introduced by the algorithm
        percent_perturbed = np.mean(
            np.sum((adv_2 - adv_inputs)**2, axis=(1, 2, 3))**.5)
        print(
            'Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

        # Close TF session
        sess.close()

        # Finally, block & display a grid of all the adversarial examples
        if viz_enabled:
            #_ = grid_visual(grid_viz_data)
            #_ = grid_visual(grid_viz_data_1)
            plt.ioff()
            figure = plt.figure()
            figure.canvas.set_window_title('Cleverhans: Grid Visualization')

            # Add the images to the plot
            num_cols = grid_viz_data.shape[0]
            num_rows = grid_viz_data.shape[1]
            num_channels = grid_viz_data.shape[4]
            for yy in range(num_rows):
                for xx in range(num_cols):
                    figure.add_subplot(num_rows, num_cols,
                                       (xx + 1) + (yy * num_cols))
                    plt.axis('off')

                    if num_channels == 1:
                        plt.imshow(grid_viz_data[xx, yy, :, :, 0])
                    else:
                        plt.imshow(grid_viz_data[xx, yy, :, :, :])

            # Draw the plot and return
            plt.savefig('cifar10_fig1_adv_trained')
            figure = plt.figure()
            figure.canvas.set_window_title('Cleverhans: Grid Visualization')
            for yy in range(num_rows):
                for xx in range(num_cols):
                    figure.add_subplot(num_rows, num_cols,
                                       (xx + 1) + (yy * num_cols))
                    plt.axis('off')
                    plt.imshow(grid_viz_data_1[xx, yy, :, :, :])

            # Draw the plot and return
            plt.savefig('cifar10_fig2_adv_trained')

            return report


#binarization defense
    if (binarization_defense == True or mean_filtering == True):

        #adv = sess.run(adv)
        # print(adv[0])
        if (binarization_defense == True):
            adv[adv > 0.5] = 1.0
            adv[adv <= 0.5] = 0.0
        else:
            #radius = 2
            #adv_list = [mean(adv[i,:,:,0], disk(radius)) for i in range(0, np.shape(adv)[0])]
            #adv = np.array(adv_list)
            #adv = np.expand_dims(adv, axis = 3)
            adv = uniform_filter(adv, 2)
            #adv = median_filter(adv, 2)
        #print("after bin ")
        #print(adv[0])
        '''
    recons = model.get_layer(x, 'RECON')
    recon_orig = model.get_layer(adv_inputs, 'RECON')
    recon_adv = model.get_layer(adv, 'RECON')
    lat_orig = model.get_layer(x, 'LATENT')
    lat_orig_recon = model.get_layer(recon_orig, 'LATENT')
    '''
        recon_orig = model.predict(adv_inputs)
        recon_adv = model.predict(adv)

        #pred_adv_recon = cl_model.get_logits(recon_adv)

        #eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
        eval_params = {'batch_size': 90}
        if targeted:
            #noise, d1, d2, dist_diff, avg_dist_lat = model_eval_ae(sess, x, x_t,recons, adv_inputs, adv_input_targets, adv, recon_adv,lat_orig, lat_orig_recon, args=eval_params)
            #acc1 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_target_y, adv_input_targets, args=eval_params_cls)
            #acc2 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_input_y, adv_input_targets, args=eval_params_cls)

            #print("d1: ", d1)
            #print("d2: ", d2)
            noise = reduce_sum(tf.square(x_orig - x_adv),
                               list(range(1, len(shape))))
            print("noise: ", noise)
            #print("classifier acc for target class: ", acc1)
            #print("classifier acc for true class: ", acc2)
        '''
    recon_adv = sess.run(recon_adv)
    recon_orig = sess.run(recon_orig)
    '''
        scores1 = cl_model.evaluate(recon_adv, adv_input_y, verbose=1)
        scores2 = cl_model.evalluate(recon_adv, adv_target_y, verbose=1)
        print("classifier acc_target: ", scores2[1])
        print("classifier acc_true: ", scores1[1])
        #print("recon_adv[0]\n", recon_adv[0,:,:,0])
        curr_class = 0
        if viz_enabled:
            for j in range(nb_classes):
                for i in range(nb_classes):
                    #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i]
                    if (i == j):
                        grid_viz_data[i, j] = recon_orig[curr_class * 9]
                        grid_viz_data_1[i, j] = adv_inputs[curr_class * 9]
                        curr_class = curr_class + 1
                    else:
                        if (j > i):
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j - 1]
                            grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) +
                                                        j - 1]
                        else:
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j]
                            grid_viz_data_1[i,
                                            j] = adv[i * (nb_classes - 1) + j]
            sess.close()

            #_ = grid_visual(grid_viz_data)
            #_ = grid_visual(grid_viz_data_1)
        plt.ioff()
        figure = plt.figure()
        figure.canvas.set_window_title('Cleverhans: Grid Visualization')

        # Add the images to the plot
        num_cols = grid_viz_data.shape[0]
        num_rows = grid_viz_data.shape[1]
        num_channels = grid_viz_data.shape[4]
        for yy in range(num_rows):
            for xx in range(num_cols):
                figure.add_subplot(num_rows, num_cols,
                                   (xx + 1) + (yy * num_cols))
                plt.axis('off')

                if num_channels == 1:
                    plt.imshow(grid_viz_data[xx, yy, :, :, 0])
                else:
                    plt.imshow(grid_viz_data[xx, yy, :, :, :])

        # Draw the plot and return
        plt.savefig('cifar10_fig1_bin')
        figure = plt.figure()
        figure.canvas.set_window_title('Cleverhans: Grid Visualization')
        for yy in range(num_rows):
            for xx in range(num_cols):
                figure.add_subplot(num_rows, num_cols,
                                   (xx + 1) + (yy * num_cols))
                plt.axis('off')

                if num_channels == 1:
                    plt.imshow(grid_viz_data_1[xx, yy, :, :, 0])
                else:
                    plt.imshow(grid_viz_data_1[xx, yy, :, :, :])

        # Draw the plot and return
        plt.savefig('cifar10_fig2_bin')
def cifar10_cw_latent(train_start=0,
                      train_end=60000,
                      test_start=0,
                      test_end=10000,
                      viz_enabled=VIZ_ENABLED,
                      nb_epochs=NB_EPOCHS,
                      batch_size=BATCH_SIZE,
                      source_samples=SOURCE_SAMPLES,
                      learning_rate=LEARNING_RATE,
                      attack_iterations=ATTACK_ITERATIONS,
                      targeted=TARGETED,
                      num_threads=None,
                      label_smoothing=0.1,
                      nb_filters=NB_FILTERS):

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)
    rng = np.random.RandomState()

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get CIFAR10 data
    data = CIFAR10(train_start=train_start,
                   train_end=train_end,
                   test_start=test_start,
                   test_end=test_end)
    dataset_size = data.x_train.shape[0]
    dataset_train = data.to_tensorflow()[0]
    dataset_train = dataset_train.map(
        lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
    dataset_train = dataset_train.batch(batch_size)
    dataset_train = dataset_train.prefetch(16)
    x_train, y_train = data.get_set('train')
    x_test, y_test = data.get_set('test')

    nb_latent_size = 100
    # Get MNIST test data
    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]
    print("img_Rows, img_cols, nchannels: ", img_rows, img_cols, nchannels)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    x_t = tf.placeholder(tf.float32,
                         shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    y_t = tf.placeholder(tf.float32, shape=(None, nb_classes))
    z = tf.placeholder(tf.float32, shape=(None, nb_latent_size))
    z_t = tf.placeholder(tf.float32, shape=(None, nb_latent_size))

    save_dir = 'models'
    model_name = 'cifar10_AE'
    model_path_ae = os.path.join(save_dir, model_name)

    if clean_train_ae == True:
        input_img = Input(shape=(32, 32, 3))
        x = Conv2D(64, (3, 3), padding='same')(input_img)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = MaxPooling2D((2, 2), padding='same')(x)
        x = Conv2D(32, (3, 3), padding='same')(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = MaxPooling2D((2, 2), padding='same')(x)
        x = Conv2D(16, (3, 3), padding='same')(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        encoded = MaxPooling2D((2, 2), padding='same')(x)

        x = Conv2D(16, (3, 3), padding='same')(encoded)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = UpSampling2D((2, 2))(x)
        x = Conv2D(32, (3, 3), padding='same')(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = UpSampling2D((2, 2))(x)
        x = Conv2D(64, (3, 3), padding='same')(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = UpSampling2D((2, 2))(x)
        x = Conv2D(3, (3, 3), padding='same')(x)
        x = BatchNormalization()(x)
        decoded = Activation('sigmoid')(x)

        model = Model(input_img, decoded)
        model.compile(optimizer='adam', loss='binary_crossentropy')
        #es_cb = EarlyStopping(monitor='val_loss', patience=2, verbose=1, mode='auto')
        #chkpt = saveDir + 'AutoEncoder_Cifar10_Deep_weights.{epoch:02d}-{loss:.2f}-{val_loss:.2f}.hdf5'
        #cp_cb = ModelCheckpoint(filepath = chkpt, monitor='val_loss', verbose=1, save_best_only=True, mode='auto')
        model.fit(
            x_train,
            x_train,
            batch_size=128,
            epochs=5,
            verbose=1,
            validation_data=(x_test, x_test),
            #callbacks=[es_cb, cp_cb],
            shuffle=True)
        score = model.evaluate(x_test, x_test, verbose=1)
        print(score)
        model.save(model_path_ae)
        print('Saved trained model at %s ' % model_path_ae)

    else:
        model = load_model(model_path_ae)

    x_lat_train = model.predict(x_train)
    x_lat_test = model.predict(x_test)

    num_classes = 10
    save_dir = 'models'
    model_name = 'cifar10_CNN_latent'
    model_path_cls = os.path.join(save_dir, model_name)

    if clean_train_cl == True:
        print("Training CNN AE")
        cl_model = Sequential()
        cl_model.add(
            Conv2D(32, (3, 3), padding='same', input_shape=x_train.shape[1:]))
        cl_model.add(Activation('relu'))
        cl_model.add(Conv2D(32, (3, 3)))
        cl_model.add(Activation('relu'))
        cl_model.add(MaxPooling2D(pool_size=(2, 2)))
        cl_model.add(Dropout(0.25))

        cl_model.add(Conv2D(64, (3, 3), padding='same'))
        cl_model.add(Activation('relu'))
        cl_model.add(Conv2D(64, (3, 3)))
        cl_model.add(Activation('relu'))
        cl_model.add(MaxPooling2D(pool_size=(2, 2)))
        cl_model.add(Dropout(0.25))

        cl_model.add(Flatten())
        cl_model.add(Dense(512))
        cl_model.add(Activation('relu'))
        cl_model.add(Dropout(0.5))
        cl_model.add(Dense(num_classes))
        cl_model.add(Activation('softmax'))

        opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)

        # Let's train the model using RMSprop
        cl_model.compile(loss='categorical_crossentropy',
                         optimizer=opt,
                         metrics=['accuracy'])

        cl_model.fit(x_lat_train,
                     y_train,
                     batch_size=90,
                     epochs=2,
                     validation_data=(x_test, y_test),
                     shuffle=True)

        cl_model.save(model_path_cls)
        print('Saved trained model at %s ' % model_path_cls)

    else:
        cl_model = load_model(model_path_cls)

        # Score trained model.
    scores = cl_model.evaluate(x_lat_test, y_test, verbose=1)
    print('Test loss:', scores[0])
    print('Test accuracy:', scores[1])

    ###########################################################################
    # Craft adversarial examples using Carlini and Wagner's approach
    ###########################################################################
    nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
    print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
          ' adversarial examples')
    print("This could take some time ...")

    # Instantiate a CW attack Object
    cw = CarliniWagnerAE_Lat_Keras(model, cl_model, sess=sess)

    if viz_enabled:
        assert source_samples == nb_classes
        idxs = [
            np.where(np.argmax(y_test, axis=1) == i)[0][0]
            for i in range(nb_classes)
        ]
    if targeted:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, nb_classes, img_rows, img_cols,
                          nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')
            grid_viz_data_1 = np.zeros(grid_shape, dtype='f')

            adv_inputs = np.array([[instance] * (nb_classes - 1)
                                   for instance in x_test[idxs]],
                                  dtype=np.float32)

            #adv_input_y = np.array([[instance]*(nb_classes-1) for instance in y_test[idxs]])

            adv_input_y = []
            for curr_num in range(nb_classes):
                targ = []
                for id in range(nb_classes - 1):
                    targ.append(y_test[idxs[curr_num]])
                adv_input_y.append(targ)

            adv_input_y = np.array(adv_input_y)

            adv_target_y = []
            for curr_num in range(nb_classes):
                targ = []
                for id in range(nb_classes):
                    if (id != curr_num):
                        targ.append(y_test[idxs[id]])
                adv_target_y.append(targ)

            adv_target_y = np.array(adv_target_y)

            #print("adv_input_y: \n", adv_input_y)
            #print("adv_target_y: \n", adv_target_y)

            adv_input_targets = []
            for curr_num in range(nb_classes):
                targ = []
                for id in range(nb_classes):
                    if (id != curr_num):
                        targ.append(x_test[idxs[id]])
                adv_input_targets.append(targ)
            adv_input_targets = np.array(adv_input_targets)

            adv_inputs = adv_inputs.reshape((source_samples * (nb_classes - 1),
                                             img_rows, img_cols, nchannels))
            adv_input_targets = adv_input_targets.reshape(
                (source_samples * (nb_classes - 1), img_rows, img_cols,
                 nchannels))

            adv_input_y = adv_input_y.reshape(
                source_samples * (nb_classes - 1), 10)
            adv_target_y = adv_target_y.reshape(
                source_samples * (nb_classes - 1), 10)

        one_hot = np.zeros((nb_classes, nb_classes))
        one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

    adv_ys = np.array([one_hot] * source_samples, dtype=np.float32).reshape(
        (source_samples * nb_classes, nb_classes))
    yname = "y_target"

    cw_params_batch_size = source_samples * (nb_classes - 1)

    cw_params = {
        'binary_search_steps': 4,
        yname: adv_ys,
        'max_iterations': attack_iterations,
        'learning_rate': CW_LEARNING_RATE,
        'batch_size': cw_params_batch_size,
        'initial_const': 1
    }

    adv = cw.generate_np(adv_inputs, adv_input_targets, **cw_params)
    adv = sess.run(adv)

    recon_orig = model.predict(adv_inputs)
    recon_adv = model.predict(adv)
    shape = np.shape(adv_inputs)
    noise = reduce_sum(np.square(adv_inputs - adv), list(range(1, len(shape))))
    print("noise: ", noise)
    scores1 = cl_model.evaluate(recon_adv, adv_input_y, verbose=1)
    scores2 = cl_model.evaluate(recon_adv, adv_target_y, verbose=1)
    print("classifier acc_target: ", scores2[1])
    print("classifier acc_true: ", scores1[1])

    #print("recon_adv[0]\n", recon_adv[0,:,:,0])
    curr_class = 0
    if viz_enabled:
        for j in range(nb_classes):
            if targeted:
                for i in range(nb_classes):
                    #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i]
                    if (i == j):
                        grid_viz_data[i, j] = recon_orig[curr_class * 9]
                        grid_viz_data_1[i, j] = adv_inputs[curr_class * 9]
                        curr_class = curr_class + 1
                    else:
                        if (j > i):
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j - 1]
                            grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) +
                                                        j - 1]
                        else:
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j]
                            grid_viz_data_1[i,
                                            j] = adv[i * (nb_classes - 1) + j]

        #rint(grid_viz_data.shape)

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(
        np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))
    # Finally, block & display a grid of all the adversarial examples

    if viz_enabled:

        plt.ioff()
        figure = plt.figure()
        figure.canvas.set_window_title('Cleverhans: Grid Visualization')

        # Add the images to the plot
        num_cols = grid_viz_data.shape[0]
        num_rows = grid_viz_data.shape[1]
        num_channels = grid_viz_data.shape[4]
        for yy in range(num_rows):
            for xx in range(num_cols):
                figure.add_subplot(num_rows, num_cols,
                                   (xx + 1) + (yy * num_cols))
                plt.axis('off')
                plt.imshow(grid_viz_data[xx, yy, :, :, :])

        # Draw the plot and return
        plt.savefig('cifar10_fig1')
        figure = plt.figure()
        figure.canvas.set_window_title('Cleverhans: Grid Visualization')
        for yy in range(num_rows):
            for xx in range(num_cols):
                figure.add_subplot(num_rows, num_cols,
                                   (xx + 1) + (yy * num_cols))
                plt.axis('off')
                plt.imshow(grid_viz_data_1[xx, yy, :, :, :])

        # Draw the plot and return
        plt.savefig('cifar10_fig2')

    #return report

    #adversarial training
    if (adv_train == True):

        print("starting adversarial training")
        #sess1 = tf.Session()
        adv_input_set = []
        adv_input_target_set = []

        for i in range(20):

            indices = np.arange(np.shape(x_train)[0])
            np.random.shuffle(indices)
            print("indices: ", indices[1:10])
            x_train = x_train[indices]
            y_train = y_train[indices]

            idxs = [
                np.where(np.argmax(y_train, axis=1) == i)[0][0]
                for i in range(nb_classes)
            ]
            adv_inputs_2 = np.array([[instance] * (nb_classes - 1)
                                     for instance in x_train[idxs]],
                                    dtype=np.float32)
            adv_input_targets_2 = []
            for curr_num in range(nb_classes):
                targ = []
                for id in range(nb_classes):
                    if (id != curr_num):
                        targ.append(x_train[idxs[id]])
                adv_input_targets_2.append(targ)
            adv_input_targets_2 = np.array(adv_input_targets_2)

            adv_inputs_2 = adv_inputs_2.reshape(
                (source_samples * (nb_classes - 1), img_rows, img_cols,
                 nchannels))
            adv_input_targets_2 = adv_input_targets_2.reshape(
                (source_samples * (nb_classes - 1), img_rows, img_cols,
                 nchannels))

            adv_input_set.append(adv_inputs_2)
            adv_input_target_set.append(adv_input_targets_2)

        adv_input_set = np.array(adv_input_set),
        adv_input_target_set = np.array(adv_input_target_set)
        print("shape of adv_input_set: ", np.shape(adv_input_set))
        print("shape of adv_input_target_set: ",
              np.shape(adv_input_target_set))
        adv_input_set = np.reshape(
            adv_input_set,
            (np.shape(adv_input_set)[0] * np.shape(adv_input_set)[1] *
             np.shape(adv_input_set)[2], np.shape(adv_input_set)[3],
             np.shape(adv_input_set)[4], np.shape(adv_input_set)[5]))
        adv_input_target_set = np.reshape(adv_input_target_set,
                                          (np.shape(adv_input_target_set)[0] *
                                           np.shape(adv_input_target_set)[1],
                                           np.shape(adv_input_target_set)[2],
                                           np.shape(adv_input_target_set)[3],
                                           np.shape(adv_input_target_set)[4]))

        print("generated adversarial training set")

        adv_set = cw.generate_np(adv_input_set, adv_input_target_set,
                                 **cw_params)

        x_train_aim = np.append(x_train, adv_input_set, axis=0)
        x_train_app = np.append(x_train, adv_set, axis=0)

        model_name = 'cifar10_AE_adv_lat'
        model_path_ae_adv = os.path.join(save_dir, model_name)

        input_img = Input(shape=(32, 32, 3))
        x = Conv2D(64, (3, 3), padding='same')(input_img)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = MaxPooling2D((2, 2), padding='same')(x)
        x = Conv2D(32, (3, 3), padding='same')(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = MaxPooling2D((2, 2), padding='same')(x)
        x = Conv2D(16, (3, 3), padding='same')(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        encoded = MaxPooling2D((2, 2), padding='same')(x)

        x = Conv2D(16, (3, 3), padding='same')(encoded)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = UpSampling2D((2, 2))(x)
        x = Conv2D(32, (3, 3), padding='same')(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = UpSampling2D((2, 2))(x)
        x = Conv2D(64, (3, 3), padding='same')(x)
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = UpSampling2D((2, 2))(x)
        x = Conv2D(3, (3, 3), padding='same')(x)
        x = BatchNormalization()(x)
        decoded = Activation('sigmoid')(x)

        model2 = Model(input_img, decoded)
        model2.compile(optimizer='adam', loss='binary_crossentropy')

        model2.fit(x_train_app,
                   x_train_aim,
                   batch_size=128,
                   epochs=20,
                   verbose=1,
                   validation_data=(x_test, x_test),
                   callbacks=[es_cb, cp_cb],
                   shuffle=True)
        score = model.evaluate(x_test, x_test, verbose=1)
        print(score)
        model2.save(model_path_ae_adv)
        print('Saved adv trained model at ', model_path_ae_adv)

        cw2 = CarliniWagnerAE_Lat_Keras(model_adv_trained, cl_model, sess=sess)

        adv_2 = cw2.generate_np(adv_inputs, adv_input_targets, **cw_params)

        recon_adv = model2.predict(adv)
        recon_orig = model2.predict(adv_inputs)
        if targeted:

            noise = reduce_sum(tf.square(adv_inputs - adv_2),
                               list(range(1, len(shape))))
            print("noise: ", noise)

        scores1 = cl_model.evaluate(recon_adv, adv_input_y, verbose=1)
        scores2 = cl_model.eval_params(recon_adv, adv_target_y, verbose=1)
        print("classifier acc_target: ", scores2[1])
        print("classifier acc_true: ", scores1[1])

        #print("recon_adv[0]\n", recon_adv[0,:,:,0])
        curr_class = 0
        if viz_enabled:
            for j in range(nb_classes):
                for i in range(nb_classes):
                    #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i]
                    if (i == j):
                        grid_viz_data[i, j] = recon_orig[curr_class * 9]
                        grid_viz_data_1[i, j] = adv_inputs[curr_class * 9]
                        curr_class = curr_class + 1
                    else:
                        if (j > i):
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j - 1]
                            grid_viz_data_1[i,
                                            j] = adv_2[i * (nb_classes - 1) +
                                                       j - 1]
                        else:
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j]
                            grid_viz_data_1[i, j] = adv_2[i *
                                                          (nb_classes - 1) + j]

            #rint(grid_viz_data.shape)

        print('--------------------------------------')

        # Compute the number of adversarial examples that were successfully found

        # Compute the average distortion introduced by the algorithm
        percent_perturbed = np.mean(
            np.sum((adv_2 - adv_inputs)**2, axis=(1, 2, 3))**.5)
        print(
            'Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

        # Close TF session
        sess.close()

        # Finally, block & display a grid of all the adversarial examples
        if viz_enabled:
            #_ = grid_visual(grid_viz_data)
            #_ = grid_visual(grid_viz_data_1)
            plt.ioff()
            figure = plt.figure()
            figure.canvas.set_window_title('Cleverhans: Grid Visualization')

            # Add the images to the plot
            num_cols = grid_viz_data.shape[0]
            num_rows = grid_viz_data.shape[1]
            num_channels = grid_viz_data.shape[4]
            for yy in range(num_rows):
                for xx in range(num_cols):
                    figure.add_subplot(num_rows, num_cols,
                                       (xx + 1) + (yy * num_cols))
                    plt.axis('off')

                    if num_channels == 1:
                        plt.imshow(grid_viz_data[xx, yy, :, :, 0])
                    else:
                        plt.imshow(grid_viz_data[xx, yy, :, :, :])

            # Draw the plot and return
            plt.savefig('cifar10_fig1_adv_trained')
            figure = plt.figure()
            figure.canvas.set_window_title('Cleverhans: Grid Visualization')
            for yy in range(num_rows):
                for xx in range(num_cols):
                    figure.add_subplot(num_rows, num_cols,
                                       (xx + 1) + (yy * num_cols))
                    plt.axis('off')
                    plt.imshow(grid_viz_data_1[xx, yy, :, :, :])

            # Draw the plot and return
            plt.savefig('cifar10_fig2_adv_trained')

            return report


#binarization defense
    if (binarization_defense == True or mean_filtering == True):
        if (binarization_defense == True):
            adv[adv > 0.5] = 1.0
            adv[adv <= 0.5] = 0.0
        else:

            adv = uniform_filter(adv, 2)

        recon_orig = model.predict(adv_inputs)
        recon_adv = model.predict(adv)

        eval_params = {'batch_size': 90}
        if targeted:

            noise = reduce_sum(tf.square(x_orig - x_adv),
                               list(range(1, len(shape))))
            print("noise: ", noise)

        scores1 = cl_model.evaluate(recon_adv, adv_input_y, verbose=1)
        scores2 = cl_model.evalluate(recon_adv, adv_target_y, verbose=1)
        print("classifier acc_target: ", scores2[1])
        print("classifier acc_true: ", scores1[1])
        #print("recon_adv[0]\n", recon_adv[0,:,:,0])
        curr_class = 0
        if viz_enabled:
            for j in range(nb_classes):
                for i in range(nb_classes):
                    #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i]
                    if (i == j):
                        grid_viz_data[i, j] = recon_orig[curr_class * 9]
                        grid_viz_data_1[i, j] = adv_inputs[curr_class * 9]
                        curr_class = curr_class + 1
                    else:
                        if (j > i):
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j - 1]
                            grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) +
                                                        j - 1]
                        else:
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j]
                            grid_viz_data_1[i,
                                            j] = adv[i * (nb_classes - 1) + j]
            sess.close()

        plt.ioff()
        figure = plt.figure()
        figure.canvas.set_window_title('Cleverhans: Grid Visualization')

        # Add the images to the plot
        num_cols = grid_viz_data.shape[0]
        num_rows = grid_viz_data.shape[1]
        num_channels = grid_viz_data.shape[4]
        for yy in range(num_rows):
            for xx in range(num_cols):
                figure.add_subplot(num_rows, num_cols,
                                   (xx + 1) + (yy * num_cols))
                plt.axis('off')

                if num_channels == 1:
                    plt.imshow(grid_viz_data[xx, yy, :, :, 0])
                else:
                    plt.imshow(grid_viz_data[xx, yy, :, :, :])

        # Draw the plot and return
        plt.savefig('cifar10_fig1_bin')
        figure = plt.figure()
        figure.canvas.set_window_title('Cleverhans: Grid Visualization')
        for yy in range(num_rows):
            for xx in range(num_cols):
                figure.add_subplot(num_rows, num_cols,
                                   (xx + 1) + (yy * num_cols))
                plt.axis('off')

                if num_channels == 1:
                    plt.imshow(grid_viz_data_1[xx, yy, :, :, 0])
                else:
                    plt.imshow(grid_viz_data_1[xx, yy, :, :, :])

        # Draw the plot and return
        plt.savefig('cifar10_fig2_bin')
def cifar10_tutorial(train_start=0,
                     train_end=60000,
                     test_start=0,
                     test_end=10000,
                     nb_epochs=NB_EPOCHS,
                     batch_size=BATCH_SIZE,
                     architecture=ARCHITECTURE,
                     load_model=LOAD_MODEL,
                     ckpt_dir='None',
                     learning_rate=LEARNING_RATE,
                     clean_train=CLEAN_TRAIN,
                     backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                     nb_filters=NB_FILTERS,
                     num_threads=None,
                     label_smoothing=0.):
    """
    CIFAR10 cleverhans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param clean_train: perform normal training on clean examples only
                        before performing adversarial training.
    :param backprop_through_attack: If True, backprop through adversarial
                                    example construction process during
                                    adversarial training.
    :param label_smoothing: float, amount of label smoothing for cross entropy
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(int(time.time() * 1000) % 2**31)
    np.random.seed(int(time.time() * 1001) % 2**31)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get CIFAR10 data
    data = CIFAR10(train_start=train_start,
                   train_end=train_end,
                   test_start=test_start,
                   test_end=test_end)
    dataset_size = data.x_train.shape[0]
    dataset_train = data.to_tensorflow()[0]
    dataset_train = dataset_train.map(
        lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
    dataset_train = dataset_train.batch(batch_size)
    dataset_train = dataset_train.prefetch(16)
    x_train, y_train = data.get_set('train')

    pgd_train = None
    if FLAGS.load_pgd_train_samples:
        pgd_path = os.path.expanduser('~/data/advhyp/{}/samples'.format(
            FLAGS.load_pgd_train_samples))
        x_train = np.load(os.path.join(pgd_path, 'train_clean.npy'))
        y_train = np.load(os.path.join(pgd_path, 'train_y.npy'))
        pgd_train = np.load(os.path.join(pgd_path, 'train_pgd.npy'))
        if x_train.shape[1] == 3:
            x_train = x_train.transpose((0, 2, 3, 1))
            pgd_train = pgd_train.transpose((0, 2, 3, 1))
        if len(y_train.shape) == 1:
            y_tmp = np.zeros((len(y_train), np.max(y_train) + 1),
                             y_train.dtype)
            y_tmp[np.arange(len(y_tmp)), y_train] = 1.
            y_train = y_tmp

    x_test, y_test = data.get_set('test')
    pgd_test = None
    if FLAGS.load_pgd_test_samples:
        pgd_path = os.path.expanduser('~/data/advhyp/{}/samples'.format(
            FLAGS.load_pgd_test_samples))
        x_test = np.load(os.path.join(pgd_path, 'test_clean.npy'))
        y_test = np.load(os.path.join(pgd_path, 'test_y.npy'))
        pgd_test = np.load(os.path.join(pgd_path, 'test_pgd.npy'))
        if x_test.shape[1] == 3:
            x_test = x_test.transpose((0, 2, 3, 1))
            pgd_test = pgd_test.transpose((0, 2, 3, 1))
        if len(y_test.shape) == 1:
            y_tmp = np.zeros((len(y_test), np.max(y_test) + 1), y_test.dtype)
            y_tmp[np.arange(len(y_tmp)), y_test] = 1.
            y_test = y_tmp

    train_idcs = np.arange(len(x_train))
    np.random.shuffle(train_idcs)
    x_train, y_train = x_train[train_idcs], y_train[train_idcs]
    if pgd_train is not None:
        pgd_train = pgd_train[train_idcs]
    test_idcs = np.arange(len(x_test))[:FLAGS.test_size]
    np.random.shuffle(test_idcs)
    x_test, y_test = x_test[test_idcs], y_test[test_idcs]
    if pgd_test is not None:
        pgd_test = pgd_test[test_idcs]

    # Use Image Parameters
    img_rows, img_cols, nchannels = x_test.shape[1:4]
    nb_classes = y_test.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    eval_params = {'batch_size': batch_size}
    pgd_params = {
        # ord: ,
        'eps': FLAGS.eps,
        'eps_iter': (FLAGS.eps / 5),
        'nb_iter': 10,
        'clip_min': 0,
        'clip_max': 255
    }
    cw_params = {
        'binary_search_steps': FLAGS.cw_search_steps,
        'max_iterations': FLAGS.cw_steps,  #1000
        'abort_early': True,
        'learning_rate': FLAGS.cw_lr,
        'batch_size': batch_size,
        'confidence': 0,
        'initial_const': FLAGS.cw_c,
        'clip_min': 0,
        'clip_max': 255
    }

    # Madry dosen't divide by 255
    x_train *= 255
    x_test *= 255
    if pgd_train is not None:
        pgd_train *= 255
    if pgd_test is not None:
        pgd_test *= 255

    print('x_train amin={} amax={}'.format(np.amin(x_train), np.amax(x_train)))
    print('x_test amin={} amax={}'.format(np.amin(x_test), np.amax(x_test)))

    print(
        'clip_min : {}, clip_max : {}  >> CHECK WITH WHICH VALUES THE CLASSIFIER WAS PRETRAINED !!! <<'
        .format(pgd_params['clip_min'], pgd_params['clip_max']))

    rng = np.random.RandomState()  # [2017, 8, 30]
    debug_dict = dict() if FLAGS.save_debug_dict else None

    def do_eval(preds,
                x_set,
                y_set,
                report_key,
                is_adv=None,
                predictor=None,
                x_adv=None):
        if predictor is None:
            acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
        else:
            do_eval(preds, x_set, y_set, report_key, is_adv=is_adv)
            if x_adv is not None:
                x_set_adv, = batch_eval(sess, [x], [x_adv], [x_set],
                                        batch_size=batch_size)
                assert x_set.shape == x_set_adv.shape
                x_set = x_set_adv
            n_batches = math.ceil(x_set.shape[0] / batch_size)
            p_set, p_det = np.concatenate([
                predictor.send(x_set[b * batch_size:(b + 1) * batch_size])
                for b in tqdm.trange(n_batches)
            ]).T
            acc = np.equal(p_set, y_set[:len(p_set)].argmax(-1)).mean()
            # if is_adv:
            # import IPython ; IPython.embed() ; exit(1)
            if FLAGS.save_debug_dict:
                debug_dict['x_set'] = x_set
                debug_dict['y_set'] = y_set
                ddfn = 'logs/debug_dict_{}.pkl'.format(
                    'adv' if is_adv else 'clean')
                if not os.path.exists(ddfn):
                    with open(ddfn, 'wb') as f:
                        pickle.dump(debug_dict, f)
                debug_dict.clear()
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        if report_text:
            print('Test accuracy on %s examples %s: %0.4f' %
                  (report_text, 'with correction'
                   if predictor is not None else 'without correction', acc))
            if is_adv is not None:
                label = 'test_acc_{}_{}'.format(
                    report_text, 'corrected' if predictor else 'uncorrected')
                swriter.add_scalar(label, acc)
                if predictor is not None:
                    detect = np.equal(p_det, is_adv).mean()
                    label = 'test_det_{}_{}'.format(
                        report_text,
                        'corrected' if predictor else 'uncorrected')
                    print(label, detect)
                    swriter.add_scalar(label, detect)
                    label = 'test_dac_{}_{}'.format(
                        report_text,
                        'corrected' if predictor else 'uncorrected')
                    swriter.add_scalar(
                        label,
                        np.equal(p_set,
                                 y_set[:len(p_set)].argmax(-1))[np.equal(
                                     p_det, is_adv)].mean())

        return acc

    if clean_train:
        if architecture == 'ConvNet':
            model = ModelAllConvolutional('model1',
                                          nb_classes,
                                          nb_filters,
                                          input_shape=[32, 32, 3])
        elif architecture == 'ResNet':
            model = ResNet(scope='ResNet')
        else:
            raise Exception('Specify valid classifier architecture!')

        preds = model.get_logits(x)
        loss = CrossEntropy(model, smoothing=label_smoothing)

        if load_model:
            model_name = 'naturally_trained'
            if FLAGS.load_adv_trained:
                model_name = 'adv_trained'
            if ckpt_dir is not 'None':
                ckpt = tf.train.get_checkpoint_state(
                    os.path.join(os.path.expanduser(ckpt_dir), model_name))
            else:
                ckpt = tf.train.get_checkpoint_state('./models/' + model_name)
            ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path

            saver = tf.train.Saver(var_list=dict(
                (v.name.split('/', 1)[1].split(':')[0], v)
                for v in tf.global_variables()))
            saver.restore(sess, ckpt_path)
            print('\nMODEL SUCCESSFULLY LOADED from : {}'.format(ckpt_path))

            initialize_uninitialized_global_variables(sess)

        else:

            def evaluate():
                do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False)

            train(sess,
                  loss,
                  None,
                  None,
                  dataset_train=dataset_train,
                  dataset_size=dataset_size,
                  evaluate=evaluate,
                  args=train_params,
                  rng=rng,
                  var_list=model.get_params())

        logits_op = preds.op
        while logits_op.type != 'MatMul':
            logits_op = logits_op.inputs[0].op
        latent_x_tensor, weights = logits_op.inputs
        logits_tensor = preds

        nb_classes = weights.shape[-1].value

        if not FLAGS.save_pgd_samples:
            noise_eps = FLAGS.noise_eps.split(',')
            if FLAGS.noise_eps_detect is None:
                FLAGS.noise_eps_detect = FLAGS.noise_eps
            noise_eps_detect = FLAGS.noise_eps_detect.split(',')
            if pgd_train is not None:
                pgd_train = pgd_train[:FLAGS.n_collect]
            if not FLAGS.passthrough:
                predictor = tf_robustify.collect_statistics(
                    x_train[:FLAGS.n_collect],
                    y_train[:FLAGS.n_collect],
                    x,
                    sess,
                    logits_tensor=logits_tensor,
                    latent_x_tensor=latent_x_tensor,
                    weights=weights,
                    nb_classes=nb_classes,
                    p_ratio_cutoff=FLAGS.p_ratio_cutoff,
                    noise_eps=noise_eps,
                    noise_eps_detect=noise_eps_detect,
                    pgd_eps=pgd_params['eps'],
                    pgd_lr=pgd_params['eps_iter'] / pgd_params['eps'],
                    pgd_iters=pgd_params['nb_iter'],
                    save_alignments_dir='logs/stats'
                    if FLAGS.save_alignments else None,
                    load_alignments_dir=os.path.expanduser(
                        '~/data/advhyp/madry/stats')
                    if FLAGS.load_alignments else None,
                    clip_min=pgd_params['clip_min'],
                    clip_max=pgd_params['clip_max'],
                    batch_size=batch_size,
                    num_noise_samples=FLAGS.num_noise_samples,
                    debug_dict=debug_dict,
                    debug=FLAGS.debug,
                    targeted=False,
                    pgd_train=pgd_train,
                    fit_classifier=FLAGS.fit_classifier,
                    clip_alignments=FLAGS.clip_alignments,
                    just_detect=FLAGS.just_detect)
            else:

                def _predictor():
                    _x = yield
                    while (_x is not None):
                        _y = sess.run(preds, {x: _x}).argmax(-1)
                        _x = yield np.stack((_y, np.zeros_like(_y)), -1)

                predictor = _predictor()
            next(predictor)
            if FLAGS.save_alignments:
                exit(0)

            # Evaluate the accuracy of the model on clean examples
            acc_clean = do_eval(preds,
                                x_test,
                                y_test,
                                'clean_train_clean_eval',
                                False,
                                predictor=predictor)

        # Initialize the PGD attack object and graph
        if FLAGS.attack == 'pgd':
            pgd = MadryEtAl(model, sess=sess)
            adv_x = pgd.generate(x, **pgd_params)
        elif FLAGS.attack == 'cw':
            cw = CarliniWagnerL2(model, sess=sess)
            adv_x = cw.generate(x, **cw_params)
        elif FLAGS.attack == 'mean':
            pgd = MadryEtAl(model, sess=sess)
            mean_eps = FLAGS.mean_eps * FLAGS.eps

            def _attack_mean(x):
                x_many = tf.tile(x[None], (FLAGS.mean_samples, 1, 1, 1))
                x_noisy = x_many + tf.random_uniform(x_many.shape, -mean_eps,
                                                     mean_eps)
                x_noisy = tf.clip_by_value(x_noisy, 0, 255)
                x_pgd = pgd.generate(x_noisy, **pgd_params)
                x_clip = tf.minimum(x_pgd, x_many + FLAGS.eps)
                x_clip = tf.maximum(x_clip, x_many - FLAGS.eps)
                x_clip = tf.clip_by_value(x_clip, 0, 255)
                return x_clip

            adv_x = tf.map_fn(_attack_mean, x)
            adv_x = tf.reduce_mean(adv_x, 1)

        preds_adv = model.get_logits(adv_x)

        if FLAGS.save_pgd_samples:
            for ds, y, name in ((x_train, y_train, 'train'), (x_test, y_test,
                                                              'test')):
                train_batches = math.ceil(len(ds) / FLAGS.batch_size)
                train_pgd = np.concatenate([
                    sess.run(adv_x, {
                        x:
                        ds[b * FLAGS.batch_size:(b + 1) * FLAGS.batch_size]
                    }) for b in tqdm.trange(train_batches)
                ])
                np.save('logs/{}_clean.npy'.format(name), ds / 255.)
                np.save('logs/{}_y.npy'.format(name), y)
                train_pgd /= 255.
                np.save('logs/{}_pgd.npy'.format(name), train_pgd)
            exit(0)

        # Evaluate the accuracy of the model on adversarial examples
        if not FLAGS.load_pgd_test_samples:
            acc_pgd = do_eval(preds_adv,
                              x_test,
                              y_test,
                              'clean_train_adv_eval',
                              True,
                              predictor=predictor,
                              x_adv=adv_x)
        else:
            acc_pgd = do_eval(preds,
                              pgd_test,
                              y_test,
                              'clean_train_adv_eval',
                              True,
                              predictor=predictor)
        swriter.add_scalar('test_acc_mean', (acc_clean + acc_pgd) / 2., 0)

        print('Repeating the process, using adversarial training')

    exit(0)
    # Create a new model and train it to be robust to MadryEtAl
    if architecture == 'ConvNet':
        model2 = ModelAllConvolutional('model2',
                                       nb_classes,
                                       nb_filters,
                                       input_shape=[32, 32, 3])
    elif architecture == 'ResNet':
        model = ResNet()
    else:
        raise Exception('Specify valid classifier architecture!')

    pgd2 = MadryEtAl(model2, sess=sess)

    def attack(x):
        return pgd2.generate(x, **pgd_params)

    loss2 = CrossEntropy(model2, smoothing=label_smoothing, attack=attack)
    preds2 = model2.get_logits(x)
    adv_x2 = attack(x)

    if not backprop_through_attack:
        # For some attacks, enabling this flag increases the cost of
        # training, but gives the defender the ability to anticipate how
        # the atacker will change their strategy in response to updates to
        # the defender's parameters.
        adv_x2 = tf.stop_gradient(adv_x2)
    preds2_adv = model2.get_logits(adv_x2)

    if load_model:
        if ckpt_dir is not 'None':
            ckpt = tf.train.get_checkpoint_state(
                os.path.join(os.path.expanduser(ckpt_dir), 'adv_trained'))
        else:
            ckpt = tf.train.get_checkpoint_state('./models/adv_trained')
        ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path

        assert ckpt_path and tf_model_load(
            sess, file_path=ckpt_path), '\nMODEL LOADING FAILED'
        print('\nMODEL SUCCESSFULLY LOADED from : {}'.format(ckpt_path))

        initialize_uninitialized_global_variables(sess)

    else:

        def evaluate2():
            # Accuracy of adversarially trained model on legitimate test inputs
            do_eval(preds2, x_test, y_test, 'adv_train_clean_eval', False)
            # Accuracy of the adversarially trained model on adversarial
            # examples
            do_eval(preds2_adv, x_test, y_test, 'adv_train_adv_eval', True)

        # Perform and evaluate adversarial training
        train(sess,
              loss2,
              None,
              None,
              dataset_train=dataset_train,
              dataset_size=dataset_size,
              evaluate=evaluate2,
              args=train_params,
              rng=rng,
              var_list=model2.get_params())

    # Evaluate model
    do_eval(preds2, x_test, y_test, 'adv_train_clean_eval', False)
    do_eval(preds2_adv, x_test, y_test, 'adv_train_adv_eval', True)

    return report
示例#10
0
def average_stat(model, set_type, attack, with_max_threshold=True):
    """
    Prints out the stats of the attack.

    Parameters
    ----------
    model: str
        The joblib name.
    set_type: str
        The type of set used (either "train" or "test")
    attack: str
        The type of attack used (either "jsma", "wjsma" or "tjsma")
    with_max_threshold: bool, optional
        Uses the max threshold as the upper limit to compute stats for unsuccessful samples if set to True.
    """

    if "mnist" in model:
        image_size = 784
        max_iter = 57 * 2
        max_distortion = max_iter / image_size
        max_pixel_number = int(image_size * max_distortion / 2) * 2

        from cleverhans.dataset import MNIST

        x_set, y_set = MNIST(train_start=0,
                             train_end=60000,
                             test_start=0,
                             test_end=10000).get_set(set_type)
    elif "cifar10" in model:
        image_size = 3072
        max_iter = 57 * 2
        max_distortion = max_iter / image_size
        max_pixel_number = int(image_size * max_distortion / 2) * 2

        from cleverhans.dataset import CIFAR10

        x_set, y_set = CIFAR10(train_start=0,
                               train_end=50000,
                               test_start=0,
                               test_end=10000).get_set(set_type)
        y_set = y_set.reshape((y_set.shape[0], 10))
    else:
        raise ValueError(
            "Invalid folder name, it must have the name of the dataset somewhere either 'mnist' or 'cifar10'"
        )

    y_set = np.argmax(y_set, axis=1)

    average_distortion = 0
    average_distortion_successful = 0
    average_pixel_number = 0
    average_pixel_number_successful = 0

    total_samples = 0
    total_samples_successful = 0

    predicted = np.argmax(get_labels(model, x_set), axis=1)

    folder = "attack/" + model + "/" + attack + "_" + set_type + "/"

    for file in os.listdir(folder):
        df = pandas.read_csv(folder + file)
        df_values = df.to_numpy()

        index = int(file.split("_")[2][:-4])

        if y_set[index] != predicted[index]:
            continue

        for i in range(9):
            total_samples += 1

            if with_max_threshold:
                average_pixel_number += min(df_values[-3, i], max_pixel_number)
                average_distortion += min(df_values[-2, i], max_distortion)
            else:
                average_pixel_number += df_values[-3, i]
                average_distortion += df_values[-2, i]

            if df_values[-3, i] < max_iter:
                total_samples_successful += 1

                average_pixel_number_successful += df_values[-3, i]
                average_distortion_successful += df_values[-2, i]

    print(folder)
    print("----------------------")
    print("WELL PREDICTED ORIGINAL SAMPLES:", total_samples)
    print("SUCCESS RATE (MISS CLASSIFIED):",
          total_samples_successful / total_samples)
    print("AVERAGE NUMBER OF CHANGED PIXELS:",
          average_pixel_number / total_samples)
    print("AVERAGE DISTORTION:", average_distortion / total_samples)
    print("----------------------")
    print("AVERAGE SUCCESSFUL NUMBER OF CHANGED PIXELS:",
          average_pixel_number_successful / total_samples_successful)
    print("AVERAGE SUCCESSFUL DISTORTION:",
          average_distortion_successful / total_samples_successful)
    print("----------------------\n")
def defence_frame(train_start=0,
                  train_end=TRAIN_SIZE,
                  test_start=0,
                  test_end=TEST_SIZE,
                  nb_epochs=NB_EPOCHS,
                  batch_size=BATCH_SIZE,
                  learning_rate=LEARNING_RATE,
                  clean_train=CLEAN_TRAIN,
                  testing=False,
                  backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                  nb_filters=NB_FILTERS,
                  num_threads=None,
                  label_smoothing=0.1):

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1,
                           allow_soft_placement=True,
                           log_device_placement=True)
    else:
        config_args = dict(allow_soft_placement=True,
                           log_device_placement=True)
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Set parameters
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    eval_params = {'batch_size': FLAGS.batch_size}

    def_model_list = []

    if FLAGS.dataset == 'mnist':
        X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                      train_end=train_end,
                                                      test_start=test_start,
                                                      test_end=test_end)
        assert Y_train.shape[1] == 10
        nb_classes = 10
        x = tf.placeholder(tf.float32, shape=[None, 28, 28, 1])
        y = tf.placeholder(tf.float32, shape=[None, 10])
        input_shape = [28, 28, 1]

    elif FLAGS.dataset == 'cifar10':

        data = CIFAR10(train_start=train_start,
                       train_end=train_end,
                       test_start=test_start,
                       test_end=test_end)
        dataset_size = data.x_train.shape[0]
        X_train, Y_train = data.get_set('train')
        X_test, Y_test = data.get_set('test')

        # Use Image Parameters
        img_rows, img_cols, nchannels = X_test.shape[1:4]
        nb_classes = Y_test.shape[1]
        assert Y_test.shape[1] == 10.

        x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3))
        y = tf.placeholder(tf.float32, shape=(None, 10))
        input_shape = [32, 32, 3]

    # define and train clean model to be defenced on
    model = get_model(FLAGS.dataset, FLAGS.attack_model, 'model', nb_classes,
                      nb_filters, input_shape)
    rng = np.random.RandomState([2017, 10, 30])
    loss = CrossEntropy(model, smoothing=label_smoothing)
    train(sess,
          loss,
          X_train,
          Y_train,
          args=train_params,
          rng=rng,
          var_list=model.get_params())

    # for the 1...M attack methods, create adv samples and train defence models
    for i, attack_name in enumerate(FLAGS.attack_type):
        attack_params = get_para(FLAGS.dataset, attack_name)
        model_i = get_model(FLAGS.dataset, FLAGS.attack_model,
                            'model_' + str(i), nb_classes, nb_filters,
                            input_shape)
        if IS_ONLINE:
            attack_method = get_attack(attack_name, model_i, sess)
        else:
            attack_method = get_attack(attack_name, model, sess)

        def attack(x):
            return attack_method.generate(x, **attack_params)

        loss_i = CrossEntropy(model_i,
                              smoothing=label_smoothing,
                              attack=attack,
                              adv_coeff=1.)
        train(sess,
              loss_i,
              X_train,
              Y_train,
              args=train_params,
              rng=rng,
              var_list=model_i.get_params())

        def_model_list.append(model_i)

    # Make Ensemble model
    def ensemble_model_logits(x):
        return do_logits(x, model, def_model_list=def_model_list)

    def ensemble_model_probs(x):
        return tf.math.log(do_probs(x, model, def_model_list=def_model_list))

    if ENSEMBLE_TYPE == 'logits':
        ensemble_model = CallableModelWrapper(ensemble_model_logits, 'logits')
    elif ENSEMBLE_TYPE == 'probs':
        ensemble_model = CallableModelWrapper(ensemble_model_probs, 'logits')

    # Evaluate the accuracy of model on clean examples
    do_eval(sess, x, y, do_probs(x, model), X_test, Y_test,
            "origin model on clean data", eval_params)
    do_eval(sess, x, y, do_probs(x, ensemble_model), X_test, Y_test,
            "ensemble model on clean data", eval_params)

    do_eval(sess, x, y, do_logits(x, model, def_model_list=def_model_list),
            X_test, Y_test, "test ensemble logits on clean data", eval_params)
    do_eval(sess, x, y, do_probs(x, model, def_model_list=def_model_list),
            X_test, Y_test, "test ensemble probs on clean data", eval_params)

    # Evaluate the accuracy of model on adv examples
    for i, attack_name in enumerate(FLAGS.attack_type):
        attack_params = get_para(FLAGS.dataset, attack_name)

        # generate attack to origin model
        origin_attack = get_attack(attack_name, model, sess)
        origin_adv_x = origin_attack.generate(x, **attack_params)

        do_eval(sess, x, y, do_probs(origin_adv_x, model), X_test, Y_test,
                attack_name + "-> origin model, test on origin model",
                eval_params)
        do_eval(
            sess, x, y,
            do_probs(origin_adv_x, model, def_model_list=def_model_list),
            X_test, Y_test, attack_name +
            "-> origin model, test on ensemble model, using probs",
            eval_params)
        do_eval(
            sess, x, y,
            do_logits(origin_adv_x, model, def_model_list=def_model_list),
            X_test, Y_test, attack_name +
            "-> origin model, test on ensemble model, using logits",
            eval_params)

        # generate attack to ensemble model
        ensemble_attack = get_attack(attack_name, ensemble_model, sess)
        ensemble_adv_x = ensemble_attack.generate(x, **attack_params)

        do_eval(sess, x, y, do_probs(ensemble_adv_x,
                                     ensemble_model), X_test, Y_test,
                attack_name + "-> ensemble model, test on ensemble model",
                eval_params)
        do_eval(
            sess, x, y,
            do_logits(ensemble_adv_x, model, def_model_list=def_model_list),
            X_test, Y_test, attack_name +
            "-> ensemble model, test on ensemble model, test logits",
            eval_params)
        do_eval(
            sess, x, y,
            do_probs(ensemble_adv_x, model, def_model_list=def_model_list),
            X_test, Y_test, attack_name +
            "-> ensemble model, test on ensemble model, test probs",
            eval_params)
    b = np.zeros((len(y_test), 43))
    b[np.arange(len(y_test)), y_test] = 1
    y_test = b

    if input.upper() == "TRAIN":
        return x_train.astype(np.float32), y_train.astype(np.float32)
    if input.upper() == "TEST":
        return x_test.astype(np.float32), y_test.astype(np.float32)
    else:
        return None, None


print("STEP 1: Get training data...")
data = CIFAR10(train_start=train_start,
               train_end=train_end,
               test_start=test_start,
               test_end=test_end)

#default load cifar10 image
x_train, y_train = data.get_set('train')
x_test, y_test = data.get_set('test')

# load stop sign images
sign_data = True
if sign_data is True:
    x_train, y_train = Signs("TRAIN")
    x_test, y_test = Signs("TEST")

img_rows, img_cols, nchannels = x_train.shape[1:4]
nb_classes = y_train.shape[1]
my_data = []
示例#13
0
def cifar10_train_on_untargeted(train_start=0, train_end=60000, test_start=0,
                               test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE,
                               learning_rate=LEARNING_RATE,
                               testing=True, adv_training=False,
                               backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                               num_threads=None,threat_model='white_box',
                               model_key='model_1_a',attacker_key='clean',
                               label_smoothing=0.1):
  """
  CIFAR10 cleverhans training
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param clean_train: perform normal training on clean examples only
                      before performing adversarial training.
  :param testing: if true, complete an AccuracyReport for unit tests
                  to verify that performance is adequate
  :param backprop_through_attack: If True, backprop through adversarial
                                  example construction process during
                                  adversarial training.
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """

  # Object used to keep track of (and return) key accuracies
  report = AccuracyReport()

  # Set TF random seed to improve reproducibility
  tf.set_random_seed(1234)

  # Set logging level to see debug information
  set_log_level(logging.DEBUG)

  # Create TF session
  if num_threads:
    config_args = dict(intra_op_parallelism_threads=1)
  else:
    config_args = {}
  sess = tf.Session(config=tf.ConfigProto(**config_args))

  K.set_learning_phase(0)

  ## Create TF session and set as Keras backend session
  K.set_session(sess)

  # Create a new model and train it to be robust to Attacker
  #keras_model = c10load.load_model(version=2,subtract_pixel_mean=True)
  meta = read_from_meta()
  attacker_meta = meta['attacker'][attacker_key]
  model_meta = meta['model'][model_key]
  attack_type = attacker_meta['attack_type']
  
  if threat_model == 'black_box_A':
    print('Using training set A')
    train_end = int(train_end/2)
    assert 'black_box_A' in meta['model'][model_key]['threat_models']
    dataset_section = 'A'
  elif threat_model == 'black_box_B':
    print('Using training set B')
    train_start = int(train_end/2)
    dataset_section = 'B'
    assert 'black_box_B' in meta['model'][model_key]['threat_models']
  elif threat_model == 'white_box':
    print('Using full training set')
    dataset_section = ''
  else:
    raise NotImplementedError

  # Get CIFAR10 data
  data = CIFAR10(train_start=train_start, train_end=train_end,
                 test_start=test_start, test_end=test_end)
  dataset_size = data.x_train.shape[0]
  dataset_train = data.to_tensorflow()[0]
  dataset_train = dataset_train.map(
      lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
  dataset_train = dataset_train.batch(batch_size)
  dataset_train = dataset_train.prefetch(16)
  x_train, y_train = data.get_set('train')
  x_test, y_test = data.get_set('test')

  # Use Image Parameters
  img_rows, img_cols, nchannels = x_test.shape[1:4]
  nb_classes = y_test.shape[1]

  # Define input TF placeholder
  x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                        nchannels))
  y = tf.placeholder(tf.float32, shape=(None, nb_classes))


  attack_params = {}
  attack_params.update(meta['attacker']['default']['attack_params'])
  attack_params.update(attacker_meta['attack_params'])
  for k,v in attack_params.items():
    if isinstance(v,str):
      attack_params[k] = eval(v)
  if 'meta_key' in attacker_meta.keys() and attack_type == 'advgan':
    folderpath = meta['advgan'][attacker_meta['meta_key']]['train_params']['output_folder']
    attack_params.update({'generator_filepath':os.path.join(folderpath,'generator.hd5')})

  model_filename = model_meta['file_name']
  if 'black_box' in threat_model:
    model_filename = model_filename.replace('cifar10','cifar10B')
  model_filepath=model_meta['folder_path']+'/'+model_filename
  
  keras_model=tf.keras.models.load_model(
    filepath=model_filepath,
    custom_objects=custom_object())
  model = KerasModelWrapper(keras_model)

  def attack_statistics(x_true,x_adv):
    # calculate average L1,L2,Linf norms
    # as well as % of pixels modified
    L1 = tf.reduce_mean(K.sum(K.abs(x_adv-x_true),axis=(-1,-2,-3)))
    L2 = tf.reduce_mean(K.sqrt(K.sum(K.square(x_adv-x_true),axis=(-1,-2,-3))))
    
    Linf = tf.reduce_mean(K.max(K.abs(x_true-x_adv),axis=(-1,-2,-3)))
    eps = tf.constant(1/255,shape=x_true.shape.as_list()[1:])
    mod_perc = 100*tf.reduce_mean(K.cast(K.greater(K.abs(x_true-x_adv),eps),dtype='float'))
    return {'L1':L1,'L2':L2,'Linf':Linf,'%pix':mod_perc}

  def do_eval(preds, x_set, y_set, report_key, is_adv=None):
    eval_params = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
    setattr(report, report_key, acc)
    if is_adv is None:
      report_text = None
    elif is_adv:
      report_text = 'adversarial'
    else:
      report_text = 'legitimate'
    if report_text:
      print('Test accuracy on %s examples: %0.4f' % (report_text, acc))

  #define attacker
  if attack_type == 'cwl2':
    from cleverhans.attacks import CarliniWagnerL2
    attacker = CarliniWagnerL2(model, sess=sess)
  elif attack_type == 'fgsm':
    from cleverhans.attacks import FastGradientMethod
    attacker = FastGradientMethod(model, sess=sess)
  elif attack_type == 'pgd':
    from cleverhans.attacks import MadryEtAl
    attacker = MadryEtAl(model, sess=sess)
  elif attack_type == 'advgan':
    from cleverhans.attacks.adversarial_gan import AdvGAN
    attacker = AdvGAN(model,sess=sess)
  elif attack_type == None or attack_type=='clean':
    attacker = None
  else:
    print(attack_type+' is not a valid attack type')

  def attack(x):
    if attacker:
      print('attack_params',attack_params)
      return attacker.generate(x,**attack_params)
    else: 
      return x
  loss = CrossEntropy(model, smoothing=label_smoothing, attack=attack)
  preds = model.get_logits(x)
  adv_x = attack(x)

  if not backprop_through_attack:
    # For the fgsm attack used in this tutorial, the attack has zero
    # gradient so enabling this flag does not change the gradient.
    # For some other attacks, enabling this flag increases the cost of
    # training, but gives the defender the ability to anticipate how
    # the attacker will change their strategy in response to updates to
    # the defender's parameters.
    adv_x = tf.stop_gradient(adv_x)
  preds_adv = model.get_logits(adv_x)

  def evaluate():
    # Accuracy of adversarially trained model on legitimate test inputs
    do_eval(preds, x_test, y_test, 'adv_train_clean_eval', False)
    # Accuracy of the adversarially trained model on adversarial examples
    do_eval(preds_adv, x_test, y_test, 'adv_train_adv_eval', True)
  
  #print_attack info
  with sess.as_default():
    print('attack type: '+ attack_type)
    attack_stats = attack_statistics(x,adv_x)
    feed_dict={x:x_test[:batch_size],y:y_test[:batch_size]}
    attack_stats_eval = sess.run(attack_stats,feed_dict=feed_dict)
    attack_stats_eval = {k:str(v)[:10] for k,v in attack_stats_eval.items()}
    print(attack_stats_eval)

  if adv_training:
      # Train an CIFAR10 model
    reeval_breaks = 10
    train_params = {
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    nb_e = nb_epochs
    prev_acc = 0
    # Perform and evaluate adversarial training
    for rb in range(reeval_breaks,0,-1):
      train_params.update({'nb_epochs': int(np.ceil(nb_e/rb))})
      if nb_e < train_params['nb_epochs'] < 0:
        train_params['nb_epochs'] = nb_e
      print("Starting training {} of {}".format(nb_epochs-nb_e, nb_epochs))
      train(sess, loss, None, None,
          dataset_train=dataset_train, dataset_size=dataset_size,
          evaluate=evaluate, args=train_params, rng=rng)

      nb_e-=train_params['nb_epochs'] 

      #put accuracies in dictionary fr json serializability 
      report_dict = {attr:str(getattr(report,attr))[:10] for attr in dir(report) 
                      if type(getattr(report,attr)) in [float,np.float32,np.float64]}
      print(report_dict)
      #save to meta
      new_meta = read_from_meta()
      new_model = deepcopy(model_meta)
      new_model.update({'adv_training':True,
                        'attacker_key':attacker_key,
                        'parent_key':model_key,
                        'threat_models':[threat_model],
                        'attack_stats':attack_stats_eval,
                        'report':report_dict,
                        'train_params': {
                          'batch_size': batch_size,
                          'learning_rate': learning_rate,
                          'nb_epochs': nb_epochs-nb_e,
                        },
                        'reeval':False
                       })
      if nb_e > 0:
        new_model.update({'training_finished':False,
          'file_name': model_meta['file_name'].replace('clean',attacker_key+'_train_epoch_'+str(new_model['train_params']['nb_epochs']))})
      else:
        new_model.update({'training_finished':True,
          'file_name': model_meta['file_name'].replace('clean',attacker_key+'_train')})

      new_model_key = get_new_key(model_key,meta)
      new_meta['model'].update({new_model_key:new_model})
      write_to_meta(new_meta)
      
      save_filename = new_model['file_name']
      if 'black_box' in threat_model:
        save_filename = save_filename.replace('cifar10','cifar10'+dataset_section) 
      save_model(keras_model,filepath=new_model['folder_path']+'/'+save_filename)

      if report.adv_train_adv_eval >= 0.9:
        break
      elif report.adv_train_adv_eval <= 0.01:
        #increase_lr
        lr = train_params['learning_rate']
        train_params.update({'learning_rate':lr*1.5})
        print('no learning! Increasing learning rate to {}'
          .format(train_params['learning_rate']))
        
      elif prev_acc<=report.adv_train_adv_eval:
        #update_lr
        lr = train_params['learning_rate']
        train_params.update({'learning_rate':lr*0.8})
        print('decreasing learning rate to {}'
          .format(train_params['learning_rate']))
      prev_acc = copy(report.adv_train_adv_eval)

      if nb_e<=0:
        break

  # Calculate training errors
  elif testing:
    do_eval(preds, x_train, y_train, 'train_adv_train_clean_eval')
    do_eval(preds_adv, x_train, y_train, 'train_adv_train_adv_eval')
    report_dict = {attr:str(getattr(report,attr))[:10] for attr in dir(report) 
                    if type(getattr(report,attr)) in [float,]}
    print('report_dict')
    print(report_dict)
  return report
示例#14
0
def cifar10_eval_attacks(train_start=0,
                         train_end=60000,
                         test_start=0,
                         test_end=10000,
                         sweep_eps=SWEEP_EPS,
                         targeted=TARGETED,
                         model_key='model_1_a',
                         attacker_keys='clean',
                         eval_model_keys=None,
                         threat_model='white_box',
                         generate_examples=True):
    """
  CIFAR10 cleverhans training
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param model_key: name of the keras model to be loaded and tested
  :param attacker_key: name or list of names to be loaded 
                       and used to attack the model
 :return: an AccuracyReport object
  """

    if threat_model == 'white_box':
        eval_model_keys = [
            model_key,
        ]
        attacker_partition = ''
        defender_partition = ''
    if threat_model == 'black_box':
        attacker_partition = 'A'
        defender_partition = 'B'
        if not isinstance(eval_model_keys, list):
            raise ValueError('eval_model_keys must be list for black_box')
        #TODO: add white-box info to meta-data
        """     v<the eval model
        "model_1_g": {     v< the surrogate model
        "advgan_b->model_1_e": {
            "model_acc": "saved_models/model_1_cifar10_ResNet20_v2\\pickle\\model_1_g_advgan_b_model_acc.p",
            "target_acc": "saved_models/model_1_cifar10_ResNet20_v2\\pickle\\model_1_g_advgan_b_target_acc.p",
            "attack_stats": {
                "L1": 127.04542236328125,
                "L2": 2.9744277954101563,
                "Linf": 0.2539639711380005,
                "%pix": 93.39645385742188,
                "num_batches": 20,
                "time": "97.7us"
            "threat_model":"black_box"
    """

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    sess = tf.Session()

    K.set_learning_phase(0)

    ## Create TF session and set as Keras backend session
    K.set_session(sess)

    # Get CIFAR10 data
    data = CIFAR10(train_start=train_start,
                   train_end=train_end,
                   test_start=test_start,
                   test_end=test_end)
    dataset_size = data.x_train.shape[0]
    dataset_train = data.to_tensorflow()[0]
    #dataset_train = dataset_train.map(
    #    lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
    #dataset_train = dataset_train.batch(batch_size)
    #dataset_train = dataset_train.prefetch(16)
    #x_train, y_train = data.get_set('train')
    x_test, y_test = data.get_set('test')
    #nb_train = x_train.shape[0]
    nb_test = x_test.shape[0]

    # Use Image Parameters
    img_rows, img_cols, nchannels = x_test.shape[1:4]
    nb_classes = y_test.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    y_target = tf.placeholder(tf.float32, shape=(None, nb_classes))

    meta = read_from_meta()
    model_meta = meta['model'][model_key]
    filename = model_meta['file_name'].replace('CIFAR10',
                                               'CIFAR10' + attacker_partition)
    keras_model = tf.keras.models.load_model(
        filepath=model_meta['folder_path'] + '/' + filename,
        custom_objects=custom_object())
    model = KerasModelWrapper(keras_model)

    attacker_keys = list(attacker_keys)
    report = dict()
    for attacker_key in attacker_keys:
        # Create a new model and train it to be robust to Attacker
        #keras_model = c10load.load_model(version=2,subtract_pixel_mean=True)
        attacker_meta = meta['attacker'][attacker_key]
        attack_type = attacker_meta['attack_type']
        attack_params = {}
        attack_params.update(meta['attacker']['default']['attack_params'])
        attack_params.update(attacker_meta['attack_params'])
        if 'spsa' in attacker_key:
            eval_par = {'batch_size': 1}
        else:
            eval_par = {'batch_size': attack_params['batch_size']}
        for k, v in attack_params.items():
            if isinstance(v, str):
                attack_params[k] = eval(v)
        #define attacker

        if attack_type == 'advgan' or 'g+' in attack_type:
            if 'meta_key' in attacker_meta.keys():
                folderpath = meta['advgan'][
                    attacker_meta['meta_key']]['train_params']['output_folder']
                attack_params.update({
                    'generator_filepath':
                    os.path.join(folderpath, 'generator.hd5'),
                    'custom_objects':
                    custom_object()
                })
            else:
                raise NotImplementedError(
                    "Must provide attacker meta with existing meta_key")

        standard_attackers = {
            'cwl2': cha.CarliniWagnerL2,
            'fgsm': cha.FastGradientMethod,
            'pgd': cha.MadryEtAl,
            'jsma': cha.SaliencyMapMethod,
            'stm': cha.SpatialTransformationMethod,
            'advgan': cha.AdvGAN,
            'spsa': cha.SPSA,
            'g+pgd': cha.GanInformedPGD,
            'g+spsa': cha.GanInformedSPSA
            #'g+fgsm':cha.GanInformedFGM
        }
        if attack_type in standard_attackers.keys():
            attacker = standard_attackers[attack_type](model, sess=sess)
        elif attack_type == None or attack_type == 'clean':
            attacker = None
        else:
            print(attack_type + ' is not a valid attack type')

        pkl_folderpath = os.path.join(model_meta['folder_path'], 'pickle',
                                      attacker_key)
        if not os.path.isdir(pkl_folderpath):
            os.makedirs(pkl_folderpath)


########
        if targeted:
            # get target labels
            target_test = np.repeat(range(nb_classes), nb_test)
            x_test_shuf = np.array(np.tile(x_test, (nb_classes, 1, 1, 1)))
            y_test_shuf = np.array(np.tile(y_test, (nb_classes, 1)))
            y_target_test_shuf = tf.keras.utils.to_categorical(
                target_test, nb_classes)
            #do not shuffle
            #shuffle_in_unison(x_test_shuf,y_test_shuf,y_target_test_shuf)
            x_test_by_t_o = [[None] * nb_classes for n in range(nb_classes)]
            y_test_by_t_o = [[None] * nb_classes for n in range(nb_classes)]
            y_target_test_by_t_o = [[None] * nb_classes
                                    for n in range(nb_classes)]
            nb_test_by_t_o = np.zeros((nb_classes + 1, nb_classes + 1))
            print(y_target_test_shuf)
            for t in range(nb_classes):
                for o in range(nb_classes):
                    if t == o:
                        continue
                    index = np.logical_and(y_target_test_shuf[:, t],
                                           y_test_shuf[:, o])
                    nb_test_by_t_o[t, o] = np.count_nonzero(index)
                    x_test_by_t_o[t][o] = x_test_shuf[index]

                    y_test_by_t_o[t][o] = y_test_shuf[index]
                    y_target_test_by_t_o[t][o] = y_target_test_shuf[index]
            np.testing.assert_array_equal(y_target_test_by_t_o[0][1],
                                          y_target_test_by_t_o[0][2],
                                          err_msg='',
                                          verbose=True)
            nb_test_by_t_o[nb_classes, :] = np.sum(nb_test_by_t_o, axis=0)
            nb_test_by_t_o[:, nb_classes] = np.sum(nb_test_by_t_o, axis=1)
            attack_params.update({'y_target': y_target})

            def model_eval_wrapper(preds,
                                   acc_target='original_class',
                                   adv_x=None):
                if acc_target == 'original_class':
                    acc_target = y_test_by_t_o
                elif acc_target == 'target_class':
                    acc_target = y_target_test_by_t_o
                else:
                    raise ValueError('invalid value for accuracy_target: ' +
                                     acc_target)
                accuracy_by_t_o = np.zeros((nb_classes + 1, nb_classes + 1))
                orig_accuracy_by_t_o = np.zeros(
                    (nb_classes + 1, nb_classes + 1))
                for t in range(nb_classes + 1):
                    for o in range(nb_classes):
                        if t == o:
                            continue
                        row_scale = nb_test_by_t_o[t, o] / nb_test_by_t_o[
                            t, nb_classes]
                        col_scale = nb_test_by_t_o[t, o] / nb_test_by_t_o[
                            nb_classes, o]
                        if t < nb_classes:
                            feed = {
                                y_target:
                                y_target_test_by_t_o[t][o]
                                [:eval_par['batch_size'], :]
                            }
                            if generate_examples:
                                assert adv_x is not None, 'adv_x tensor must be supplied when generating examples'
                                pickle_x_file = os.path.join(
                                    pkl_folderpath, pickle_file_head +
                                    "x_test_targeted_{}_{}.p".format(t, o))
                                if os.path.exists(pickle_x_file):
                                    adv_x_test = pickle.load(
                                        open(pickle_x_file, "rb"))
                                else:
                                    adv_x_test = gen_np(
                                        sess, x_test_by_t_o[t][o], x, adv_x,
                                        y_target_test_by_t_o[t][o], y_target)
                                    pickle.dump(adv_x_test,
                                                open(pickle_x_file, "wb"))

                                accuracy_by_t_o[t, o] = model_eval(
                                    sess,
                                    adv_x,
                                    y,
                                    preds,
                                    adv_x_test,
                                    acc_target[t][o],
                                    args=eval_par)
                                orig_accuracy_by_t_o[t, o] = model_eval(
                                    sess,
                                    adv_x,
                                    y,
                                    preds,
                                    x_test_by_t_o[t][o],
                                    acc_target[t][o],
                                    args=eval_par)
                            else:
                                accuracy_by_t_o[t, o] = model_eval(
                                    sess,
                                    x,
                                    y,
                                    preds,
                                    x_test_by_t_o[t][o],
                                    acc_target[t][o],
                                    feed=feed,
                                    args=eval_par)
                            accuracy_by_t_o[
                                nb_classes,
                                o] += accuracy_by_t_o[t, o] * col_scale
                            orig_accuracy_by_t_o[
                                nb_classes,
                                o] += orig_accuracy_by_t_o[t, o] * col_scale
                        accuracy_by_t_o[
                            t, nb_classes] += accuracy_by_t_o[t, o] * row_scale
                        orig_accuracy_by_t_o[
                            t,
                            nb_classes] += orig_accuracy_by_t_o[t,
                                                                o] * row_scale
                if adv_x is not None:
                    # fill diagonal with original accuracies
                    for o in range(nb_classes):
                        accuracy_by_t_o[o,
                                        o] = orig_accuracy_by_t_o[nb_classes,
                                                                  o]
                return accuracy_by_t_o
        else:
            x_test_shuf = x_test
            y_test_shuf = y_test

        def attack(x, attack_params=attack_params):
            if attacker:
                return attacker.generate(x, **attack_params)
            else:
                return x

        def gen_np(sess, X, x, adv_x, Y_target=None, y_target=None):
            #inputs:
            #  sess (required) : tf session
            #  X (required) : numpy input data
            #  x (required) : placeholder for model input
            #  adv_x (required) : tensor for generator output
            #  Y_target (optional) : optional numpy array speccifying the target class
            #  y_target (optional) : optional placeholder for the target inputs
            #outputs:
            #
            if attacker:
                with sess.as_default():
                    _batch_size = eval_par['batch_size']
                    nb_x = X.shape[0]
                    nb_batches = int(np.ceil(float(nb_x) / _batch_size))
                    assert nb_batches * _batch_size >= nb_x
                    adv_x_np = np.zeros((0, ) + X.shape[1:], dtype=X.dtype)
                    for batch in range(nb_batches):
                        start = batch * _batch_size
                        end = min(nb_x, start + _batch_size)
                        feed_dict = {x: X[start:end]}
                        if not Y_target is None:
                            feed_dict.update({y_target: Y_target[start:end]})
                        adv_x_cur = adv_x.eval(feed_dict=feed_dict)
                        adv_x_np = np.concatenate([adv_x_np, adv_x_cur],
                                                  axis=0)
                    assert end >= nb_x
                    return adv_x_np
            else:
                return x

        def attack_stats_eval(x, adv_x, num_batches=1):
            # Return attack info
            with sess.as_default():
                _batch_size = eval_par['batch_size']
                _as_eval = dict()
                cum_time = 0.
                attack_stats = attack_statistics(x, adv_x)
                for batch in range(num_batches):
                    feed_dict = {
                        x:
                        x_test_shuf[batch * _batch_size:(batch + 1) *
                                    _batch_size],
                        y:
                        y_test_shuf[batch * _batch_size:(batch + 1) *
                                    _batch_size]
                    }
                    if targeted:
                        feed_dict.update({
                            y_target:
                            y_target_test_shuf[batch *
                                               _batch_size:(batch + 1) *
                                               _batch_size]
                        })
                    _as = sess.run(attack_stats, feed_dict=feed_dict)

                    if batch == 0:
                        _as_eval = deepcopy(_as)
                    else:
                        _as_eval = {k: v + _as[k] for k, v in _as_eval.items()}

                    t_1 = time.process_time()
                    adv_x.eval(feed_dict=feed_dict)
                    t_2 = time.process_time()
                    cum_time += t_2 - t_1
            cum_time /= num_batches * _batch_size

            _as_eval = {k: v / num_batches for k, v in _as_eval.items()}
            _as_eval.update({
                'num_batches': num_batches,
                'time': metric_convert(cum_time, 's')
            })
            return _as_eval

        report.update({attacker_key: {'model_acc': {}}})

        for eval_model_key in eval_model_keys:
            #Sweep over models to evaluate on. "White Box" attacks
            #only have one eval_model_key "Black Box" attack may
            #have several eval_model_key "defenses"
            report_view = report[attacker_key]

            if threat_model == 'white_box':
                assert model_key == eval_model_key, (
                    'for white_box attacks, ',
                    'generating model and eval model must be the same')
                eval_model = model
            elif threat_model == 'black_box':
                #add black box eval model to report and update report head
                if not 'black_box' in report_view.keys():
                    report_view.update(
                        {'black_box': {
                            eval_model_key: {
                                'model_acc': {}
                            }
                        }})
                else:
                    report_view['black_box'].update(
                        {eval_model_key: {
                            'model_acc': {}
                        }})
                report_view = report_view['black_box'][eval_model_key]

                #load eval model trained on defense dataset
                eval_model_meta = meta['model'][eval_model_key]
                filename = eval_model_meta['file_name'].replace(
                    'CIFAR10', 'CIFAR10' + defender_partition)
                keras_model = tf.keras.models.load_model(
                    filepath=eval_model_meta['folder_path'] + '/' + filename,
                    custom_objects=custom_object())
                eval_model = KerasModelWrapper(keras_model)

            #evaluate model on clean examples
            preds = eval_model.get_logits(x)
            model_acc = model_eval(sess,
                                   x,
                                   y,
                                   preds,
                                   x_test,
                                   y_test,
                                   args=eval_par)
            print('Test accuracy on clean examples %0.4f\n' % model_acc)
            report_view.update({'clean_model_acc': model_acc})

            t1 = 0
            #sweep epsilon
            if sweep_eps and attack_type != 'clean':
                max_eps = 2 * attack_params['eps']
                if 'eps_iter' in attack_params.keys():
                    max_eps_iter = 2 * attack_params['eps_iter']
                epsilons = np.linspace(1 / 255, max_eps,
                                       min(int(max_eps * 255), 16))
                sweep_e = dict()
                for e in epsilons:
                    scaled_e = str(int(e * 255))
                    t1 = time.time()
                    attack_params.update({'eps': e})
                    if 'eps_iter' in attack_params.keys():
                        attack_params.update(
                            {'eps_iter': max_eps_iter * e / max_eps})
                    adv_x = attack(x, attack_params)
                    attack_stats_cur = attack_stats_eval(x, adv_x, 1)
                    preds_adv = eval_model.get_probs(adv_x)
                    if targeted:
                        model_acc = model_eval_wrapper(
                            preds_adv,
                            acc_target='original_class',
                            adv_x=adv_x)
                        target_acc = model_eval_wrapper(
                            preds_adv, acc_target='target_class', adv_x=adx_x)
                        pickle_file_head = '{}_{}_{}_'.format(
                            model_key, attacker_key, e)
                        pickle_m_file = os.path.join(
                            pkl_folderpath, pickle_file_head + "model_acc.p")
                        pickle_t_file = os.path.join(
                            pkl_folderpath, pickle_file_head + "target_acc.p")
                        pickle.dump(model_acc, open(pickle_m_file, "wb"))
                        pickle.dump(target_acc, open(pickle_t_file, "wb"))
                        sweep_e.update({
                            scaled_e: {
                                'model_acc': pickle_m_file,
                                'target_acc': pickle_t_file,
                                'attack_stats': attack_stats_cur
                            }
                        })
                    else:
                        if generate_examples:
                            pickle_x_file = os.path.join(
                                pkl_folderpath,
                                pickle_file_head + "x_test_untargeted.p")
                            if os.path.exists(pickle_x_file):
                                adv_x_test = pickle.load(
                                    open(pickle_x_file, "rb"))
                            else:
                                adv_x_test = gen_np(sess, x_test, x, adv_x)
                                pickle.dump(adv_x_test,
                                            open(pickle_x_file, "wb"))
                            model_acc = model_eval(sess,
                                                   adv_x,
                                                   y,
                                                   preds,
                                                   adv_x_test,
                                                   y_test,
                                                   args=eval_par)
                        else:
                            model_acc = model_eval(sess,
                                                   x,
                                                   y,
                                                   preds,
                                                   x_test,
                                                   y_test,
                                                   args=eval_par)
                        sweep_e.update({
                            scaled_e: {
                                'model_acc': model_acc,
                                'attack_stats': attack_stats_cur
                            }
                        })
                    print('Epsilon %.2f, accuracy on adversarial' % e,
                          'examples %0.4f\n' % model_acc)
                    print(sweep_e[scaled_e])
                report_view.update({'sweep_eps': sweep_e})
                t2 = time.time()
            else:
                if 'eps' in attack_params:
                    cond_eps = attack_params['eps']
                else:
                    cond_eps = 'N/A'
                print('evaluating {}->{} examples on {} (single epsilon: {})'.
                      format(attacker_key, model_key, eval_model_key,
                             cond_eps))

                t1 = time.time()
                adv_x = attack(x, attack_params)
                preds_adv = eval_model.get_probs(adv_x)
                pickle_file_head = '{}_{}_'.format(model_key, attacker_key)
                if targeted:
                    model_acc = model_eval_wrapper(preds_adv,
                                                   acc_target='original_class',
                                                   adv_x=adv_x)
                    target_acc = model_eval_wrapper(preds_adv,
                                                    acc_target='target_class',
                                                    adv_x=adv_x)

                    if threat_model == 'black_box':
                        pickle_m_file = os.path.join(
                            pkl_folderpath,
                            pickle_file_head + eval_model_key + "_model_acc.p")
                        pickle_t_file = os.path.join(
                            pkl_folderpath, pickle_file_head + eval_model_key +
                            "_target_acc.p")
                    else:
                        pickle_m_file = os.path.join(
                            pkl_folderpath, pickle_file_head + "_model_acc.p")
                        pickle_t_file = os.path.join(
                            pkl_folderpath, pickle_file_head + "_target_acc.p")
                    pickle.dump(model_acc, open(pickle_m_file, "wb"))
                    pickle.dump(target_acc, open(pickle_t_file, "wb"))
                    report_view.update({
                        'model_acc':
                        pickle_m_file,
                        'target_acc':
                        pickle_t_file,
                        'attack_stats':
                        attack_stats_eval(x, adv_x, 20)
                    })
                else:
                    if generate_examples:
                        pickle_x_file = os.path.join(
                            pkl_folderpath,
                            pickle_file_head + "x_test_untargeted.p")
                        if os.path.exists(pickle_x_file):
                            adv_x_test = pickle.load(open(pickle_x_file, "rb"))
                        else:
                            adv_x_test = gen_np(sess, x_test, x, adv_x)
                            pickle.dump(adv_x_test, open(pickle_x_file, "wb"))
                        #evaluate on self and, if black box, all other eval models
                        model_acc = model_eval(sess,
                                               adv_x,
                                               y,
                                               preds_adv,
                                               adv_x_test,
                                               y_test,
                                               args=eval_par)
                    else:
                        model_acc = model_eval(sess,
                                               x,
                                               y,
                                               preds_adv,
                                               x_test,
                                               y_test,
                                               args=eval_par)
                    report_view.update({
                        'model_acc':
                        model_acc,
                        'attack_stats':
                        attack_stats_eval(x, adv_x, 20)
                    })
                t2 = time.time()
                if targeted:
                    print('Test accuracy on adversarial examples %0.4f\n' %
                          model_acc[nb_classes, nb_classes])
                    print('Target accuracy on adversarial examples %0.4f\n' %
                          target_acc[nb_classes, nb_classes])
                else:
                    print('Test accuracy on adversarial examples %0.4f\n' %
                          model_acc)

            print("Took", t2 - t1, "seconds")
    return report
示例#15
0
def main(argv=None):
  
  from cleverhans_tutorials import check_installation
  check_installation(__file__)
  
  if not os.path.exists( CONFIG.SAVE_PATH ):
    os.makedirs( CONFIG.SAVE_PATH )
  save_path_data = CONFIG.SAVE_PATH + 'data/'
  if not os.path.exists( save_path_data ):
    os.makedirs( save_path_data )
  model_path = CONFIG.SAVE_PATH + '../all/' +  CONFIG.DATASET + '/'
  if not os.path.exists( model_path ):
    os.makedirs( model_path )
    os.makedirs( model_path + 'data/' )
  
  nb_epochs = FLAGS.nb_epochs
  batch_size = FLAGS.batch_size
  learning_rate = FLAGS.learning_rate
  nb_filters = FLAGS.nb_filters
  len_x = int(CONFIG.NUM_TEST/2)
  
  start = time.time()

  # Object used to keep track of (and return) key accuracies
  report = AccuracyReport()

  # Set seeds to improve reproducibility
  if CONFIG.DATASET == 'mnist' or CONFIG.DATASET == 'cifar10':
    tf.set_random_seed(1234)
    np.random.seed(1234)
    rd.seed(1234)
  elif CONFIG.DATASET == 'moon' or CONFIG.DATASET == 'dims':
    tf.set_random_seed(13)
    np.random.seed(1234)
    rd.seed(0)          
  
  # Set logging level to see debug information
  set_log_level(logging.DEBUG)

  # Create TF session
  tf_config = tf.ConfigProto(allow_soft_placement=True,log_device_placement=True)
  tf_config.gpu_options.per_process_gpu_memory_fraction = 0.2 
  sess = tf.Session(config=tf_config)   
  
  if CONFIG.DATASET == 'mnist':
    # Get MNIST data
    mnist = MNIST(train_start=0, train_end=CONFIG.NUM_TRAIN,
                  test_start=0, test_end=CONFIG.NUM_TEST)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')
  elif CONFIG.DATASET == 'cifar10':
    # Get CIFAR10 data
    data = CIFAR10(train_start=0, train_end=CONFIG.NUM_TRAIN,
                  test_start=0, test_end=CONFIG.NUM_TEST)
    dataset_size = data.x_train.shape[0]
    dataset_train = data.to_tensorflow()[0]
    dataset_train = dataset_train.map(
      lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
    dataset_train = dataset_train.batch(batch_size)
    dataset_train = dataset_train.prefetch(16)
    x_train, y_train = data.get_set('train')
    x_test, y_test = data.get_set('test')                             
  elif CONFIG.DATASET == 'moon':
    # Create a two moon example
    X, y = make_moons(n_samples=(CONFIG.NUM_TRAIN+CONFIG.NUM_TEST), noise=0.2,
                      random_state=0)
    X = StandardScaler().fit_transform(X)
    x_train1, x_test1, y_train1, y_test1 = train_test_split(X, y,
                                            test_size=(CONFIG.NUM_TEST/(CONFIG.NUM_TRAIN
                                            +CONFIG.NUM_TEST)), random_state=0)                          
    x_train, y_train, x_test, y_test = normalize_reshape_inputs_2d(model_path, x_train1,
                                                                   y_train1, x_test1,
                                                                   y_test1)
  elif CONFIG.DATASET == 'dims':
    X, y = make_moons(n_samples=(CONFIG.NUM_TRAIN+CONFIG.NUM_TEST), noise=0.2,
                      random_state=0)
    X = StandardScaler().fit_transform(X)
    x_train1, x_test1, y_train1, y_test1 = train_test_split(X, y,
                                            test_size=(CONFIG.NUM_TEST/(CONFIG.NUM_TRAIN
                                            +CONFIG.NUM_TEST)), random_state=0)                          
    x_train2, y_train, x_test2, y_test = normalize_reshape_inputs_2d(model_path, x_train1,
                                                                     y_train1,x_test1,
                                                                     y_test1)
    x_train, x_test = add_noise_and_QR(x_train2, x_test2, CONFIG.NUM_DIMS)

  np.save(os.path.join(save_path_data, 'x_test'), x_test)
  np.save(os.path.join(save_path_data, 'y_test'), y_test)

  # Use Image Parameters
  img_rows, img_cols, nchannels = x_train.shape[1:4]
  nb_classes = y_train.shape[1]

  # Define input TF placeholder
  x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                        nchannels))
  y = tf.placeholder(tf.float32, shape=(None, nb_classes))

  # Train an model
  train_params = {
      'nb_epochs': nb_epochs,
      'batch_size': batch_size,
      'learning_rate': learning_rate
  }
  eval_params = {'batch_size': 1}
  rng = np.random.RandomState([2017, 8, 30])
  
  with open(CONFIG.SAVE_PATH + 'acc_param.txt', 'a') as fi:

    def do_eval(adv_x, preds, x_set, y_set, report_key):
      acc, pred_np, adv_x_np = model_eval(sess, x, y, preds, adv_x, nb_classes, x_set,
                                          y_set, args=eval_params)
      setattr(report, report_key, acc)
      if report_key:
        print('Accuracy on %s examples: %0.4f' % (report_key, acc), file=fi)
      return pred_np, adv_x_np
    
    if CONFIG.DATASET == 'mnist':
      trained_model_path = model_path + 'data/trained_model'
      model = ModelBasicCNN('model1', nb_classes, nb_filters)
    elif CONFIG.DATASET == 'cifar10':
      trained_model_path = model_path + 'data/trained_model'
      model = ModelAllConvolutional('model1', nb_classes, nb_filters,
                                    input_shape=[32, 32, 3])
    elif CONFIG.DATASET == 'moon':
      trained_model_path = model_path + 'data/trained_model'
      model = ModelMLP('model1', nb_classes)
    elif CONFIG.DATASET == 'dims':
      trained_model_path = save_path_data + 'trained_model'
      model = ModelMLP_dyn('model1', nb_classes, CONFIG.NUM_DIMS)
      
    preds = model.get_logits(x)
    loss = CrossEntropy(model, smoothing=0.1)
    
    def evaluate():
      _, _ = do_eval(x, preds, x_test, y_test, 'test during train')
    
    if os.path.isfile( trained_model_path + '.index' ):
      tf_model_load(sess, trained_model_path)
    else:
      if CONFIG.DATASET == 'mnist':
        train(sess, loss, x_train, y_train, evaluate=evaluate,
              args=train_params, rng=rng, var_list=model.get_params())
      elif CONFIG.DATASET == 'cifar10':
        train(sess, loss, None, None,
              dataset_train=dataset_train, dataset_size=dataset_size,
              evaluate=evaluate, args=train_params, rng=rng,
              var_list=model.get_params())
      elif CONFIG.DATASET == 'moon':
        train_2d(sess, loss, x, y, x_train, y_train, save=False, evaluate=evaluate,
                args=train_params, rng=rng, var_list=model.get_params())
      elif CONFIG.DATASET == 'dims':
        train_2d(sess, loss, x, y, x_train, y_train, evaluate=evaluate,
                args=train_params, rng=rng, var_list=model.get_params())
      saver = tf.train.Saver()
      saver.save(sess, trained_model_path)
    
    # Evaluate the accuracy on test examples
    if os.path.isfile( save_path_data + 'logits_zero_attacked.npy' ):
      logits_0 = np.load(save_path_data + 'logits_zero_attacked.npy')
    else:
      _, _ = do_eval(x, preds, x_train, y_train, 'train')
      logits_0, _ = do_eval(x, preds, x_test, y_test, 'test')
      np.save(os.path.join(save_path_data, 'logits_zero_attacked'), logits_0) 
    
    if CONFIG.DATASET == 'moon':
      num_grid_points = 5000
      if os.path.isfile( model_path + 'data/images_mesh' + str(num_grid_points) + '.npy' ):
        x_mesh = np.load(model_path + 'data/images_mesh' + str(num_grid_points) + '.npy')
        logits_mesh = np.load(model_path + 'data/logits_mesh' + str(num_grid_points) + '.npy')
      else:
        xx, yy = np.meshgrid(np.linspace(0, 1, num_grid_points), np.linspace(0, 1, num_grid_points)) 
        x_mesh1 = np.stack([np.ravel(xx), np.ravel(yy)]).T
        y_mesh1 = np.ones((x_mesh1.shape[0]),dtype='int64')
        x_mesh, y_mesh, _, _ = normalize_reshape_inputs_2d(model_path, x_mesh1, y_mesh1)
        logits_mesh, _ = do_eval(x, preds, x_mesh, y_mesh, 'mesh')
        x_mesh = np.squeeze(x_mesh)
        np.save(os.path.join(model_path, 'data/images_mesh'+str(num_grid_points)), x_mesh)
        np.save(os.path.join(model_path, 'data/logits_mesh'+str(num_grid_points)), logits_mesh)
        
    points_x = x_test[:len_x]
    points_y = y_test[:len_x]
    points_x_bar = x_test[len_x:]
    points_y_bar = y_test[len_x:] 
     
    # Initialize the CW attack object and graph
    cw = CarliniWagnerL2(model, sess=sess) 
    
    # first attack
    attack_params = {
        'learning_rate': CONFIG.CW_LEARNING_RATE,
        'max_iterations': CONFIG.CW_MAX_ITERATIONS
      }
    
    if CONFIG.DATASET == 'moon':
     
      out_a = compute_polytopes_a(x_mesh, logits_mesh, model_path)
      attack_params['const_a_min'] = out_a
      attack_params['const_a_max'] = 100
    
    adv_x = cw.generate(x, **attack_params) 
      
    if os.path.isfile( save_path_data + 'images_once_attacked.npy' ):
      adv_img_1 = np.load(save_path_data + 'images_once_attacked.npy')
      logits_1 = np.load(save_path_data + 'logits_once_attacked.npy')
    else:
      #Evaluate the accuracy on adversarial examples
      preds_adv = model.get_logits(adv_x)
      logits_1, adv_img_1 = do_eval(adv_x, preds_adv, points_x_bar, points_y_bar,
                                    'test once attacked')
      np.save(os.path.join(save_path_data, 'images_once_attacked'), adv_img_1)
      np.save(os.path.join(save_path_data, 'logits_once_attacked'), logits_1)
      
    # counter attack 
    attack_params['max_iterations'] = 1024
      
    if CONFIG.DATASET == 'moon':  
      
      out_alpha2 = compute_epsilons_balls_alpha(x_mesh, np.squeeze(x_test),
                                                np.squeeze(adv_img_1), model_path,
                                                CONFIG.SAVE_PATH)
      attack_params['learning_rate'] = out_alpha2
      attack_params['const_a_min'] = -1
      attack_params['max_iterations'] = 2048
      
      plot_data(np.squeeze(adv_img_1), logits_1, CONFIG.SAVE_PATH+'data_pred1.png', x_mesh,
                logits_mesh)
      
    adv_adv_x = cw.generate(x, **attack_params) 
      
    x_k = np.concatenate((points_x, adv_img_1), axis=0)
    y_k = np.concatenate((points_y, logits_1), axis=0)
    
    if os.path.isfile( save_path_data + 'images_twice_attacked.npy' ):
      adv_img_2 = np.load(save_path_data + 'images_twice_attacked.npy')
      logits_2 = np.load(save_path_data + 'logits_twice_attacked.npy')
    else:
      # Evaluate the accuracy on adversarial examples
      preds_adv_adv = model.get_logits(adv_adv_x)
      logits_2, adv_img_2 = do_eval(adv_adv_x, preds_adv_adv, x_k, y_k,
                                    'test twice attacked')   
      
      np.save(os.path.join(save_path_data, 'images_twice_attacked'), adv_img_2)
      np.save(os.path.join(save_path_data, 'logits_twice_attacked'), logits_2)
    
    if CONFIG.DATASET == 'moon':  
      plot_data(np.squeeze(adv_img_2[:len_x]), logits_2[:len_x],
                CONFIG.SAVE_PATH+'data_pred2.png', x_mesh, logits_mesh)
      plot_data(np.squeeze(adv_img_2[len_x:]), logits_2[len_x:],
                CONFIG.SAVE_PATH+'data_pred12.png', x_mesh, logits_mesh)
      test_balls(np.squeeze(x_k), np.squeeze(adv_img_2), logits_0, logits_1, logits_2,
                 CONFIG.SAVE_PATH)
 
  compute_returnees(logits_0[len_x:], logits_1, logits_2[len_x:], logits_0[:len_x],
                    logits_2[:len_x], CONFIG.SAVE_PATH) 
  
  if x_test.shape[-1] > 1:
    num_axis=(1,2,3)
  else:
    num_axis=(1,2)
    
  D_p = np.squeeze(np.sqrt(np.sum(np.square(points_x-adv_img_2[:len_x]), axis=num_axis)))
  D_p_p = np.squeeze(np.sqrt(np.sum(np.square(adv_img_1-adv_img_2[len_x:]),
                                    axis=num_axis)))
  D_p_mod, D_p_p_mod = modify_D(D_p, D_p_p, logits_0[len_x:], logits_1, logits_2[len_x:],
                                logits_0[:len_x], logits_2[:len_x])
      
  if D_p_mod != [] and D_p_p_mod != []:
    plot_violins(D_p_mod, D_p_p_mod, CONFIG.SAVE_PATH)
    threshold_evaluation(D_p_mod, D_p_p_mod, CONFIG.SAVE_PATH)
    _ = compute_auroc(D_p_mod, D_p_p_mod, CONFIG.SAVE_PATH)
      
  plot_results_models(len_x, CONFIG.DATASET, CONFIG.SAVE_PATH)
  
  print('Time needed:', time.time()-start)

  return report
示例#16
0
def mnist_tutorial(train_start=0,
                   train_end=60000,
                   test_start=0,
                   test_end=10000,
                   nb_epochs=NB_EPOCHS,
                   batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE,
                   train_dir=TRAIN_DIR,
                   filename=FILENAME,
                   load_model=LOAD_MODEL,
                   testing=False,
                   label_smoothing=0.1):
    """
    MNIST CleverHans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param train_dir: Directory storing the saved model
    :param filename: Filename to save model under
    :param load_model: True for load, False for not load
    :param testing: if true, test error is calculated
    :param label_smoothing: float, amount of label smoothing for cross entropy
    :return: an AccuracyReport object
    """
    tf.keras.backend.set_learning_phase(0)

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    if keras.backend.image_data_format() != 'channels_last':
        raise NotImplementedError(
            "this tutorial requires keras to be configured to channels_last format"
        )

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)

    # Get MNIST test data
    cifar10 = CIFAR10(train_start=train_start,
                      train_end=train_end,
                      test_start=test_start,
                      test_end=test_end)
    x_train, y_train = cifar10.get_set('train')
    x_test, y_test = cifar10.get_set('test')

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Define TF model graph
    model = cnn_model(img_rows=img_rows,
                      img_cols=img_cols,
                      channels=nchannels,
                      nb_filters=64,
                      nb_classes=nb_classes)
    preds = model(x)
    print("Defined TensorFlow model graph.")

    def evaluate():
        # Evaluate the accuracy of the MNIST model on legitimate test examples
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
        report.clean_train_clean_eval = acc
        #        assert X_test.shape[0] == test_end - test_start, X_test.shape
        print('Test accuracy on legitimate examples: %0.4f' % acc)

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': train_dir,
        'filename': filename
    }

    rng = np.random.RandomState([2017, 8, 30])
    if not os.path.exists(train_dir):
        os.mkdir(train_dir)

    ckpt = tf.train.get_checkpoint_state(train_dir)
    print(train_dir, ckpt)
    ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path
    wrap = KerasModelWrapper(model)

    if load_model and ckpt_path:
        saver = tf.train.Saver()
        print(ckpt_path)
        saver.restore(sess, ckpt_path)
        print("Model loaded from: {}".format(ckpt_path))
        evaluate()
    else:
        print("Model was not loaded, training from scratch.")
        loss = CrossEntropy(wrap, smoothing=label_smoothing)
        train(sess,
              loss,
              x_train,
              y_train,
              evaluate=evaluate,
              args=train_params,
              rng=rng)

    # Calculate training error
    if testing:
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds, x_train, y_train, args=eval_params)
        report.train_clean_train_clean_eval = acc

    # Initialize the Fast Gradient Sign Method (FGSM) attack object and graph
    fgsm = FastGradientMethod(wrap, sess=sess)
    fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
    adv_x = fgsm.generate(x, **fgsm_params)
    # Consider the attack to be constant
    adv_x = tf.stop_gradient(adv_x)
    preds_adv = model(adv_x)

    # Evaluate the accuracy of the MNIST model on adversarial examples
    eval_par = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds_adv, x_test, y_test, args=eval_par)
    print('Test accuracy on adversarial examples: %0.4f\n' % acc)
    report.clean_train_adv_eval = acc

    # Calculating train error
    if testing:
        eval_par = {'batch_size': batch_size}
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv,
                         x_train,
                         y_train,
                         args=eval_par)
        report.train_clean_train_adv_eval = acc

    print("Repeating the process, using adversarial training")
    # Redefine TF model graph
    model_2 = cnn_model(img_rows=img_rows,
                        img_cols=img_cols,
                        channels=nchannels,
                        nb_filters=64,
                        nb_classes=nb_classes)
    wrap_2 = KerasModelWrapper(model_2)
    preds_2 = model_2(x)
    fgsm2 = FastGradientMethod(wrap_2, sess=sess)

    def attack(x):
        return fgsm2.generate(x, **fgsm_params)

    preds_2_adv = model_2(attack(x))
    loss_2 = CrossEntropy(wrap_2, smoothing=label_smoothing, attack=attack)

    def evaluate_2():
        # Accuracy of adversarially trained model on legitimate test inputs
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2,
                              x_test,
                              y_test,
                              args=eval_params)
        print('Test accuracy on legitimate examples: %0.4f' % accuracy)
        report.adv_train_clean_eval = accuracy

        # Accuracy of the adversarially trained model on adversarial examples
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2_adv,
                              x_test,
                              y_test,
                              args=eval_params)
        print('Test accuracy on adversarial examples: %0.4f' % accuracy)
        report.adv_train_adv_eval = accuracy

    # Perform and evaluate adversarial training
    train(sess,
          loss_2,
          x_train,
          y_train,
          evaluate=evaluate_2,
          args=train_params,
          rng=rng)

    # Calculate training errors
    if testing:
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2,
                              x_train,
                              y_train,
                              args=eval_params)
        report.train_adv_train_clean_eval = accuracy
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2_adv,
                              x_train,
                              y_train,
                              args=eval_params)
        report.train_adv_train_adv_eval = accuracy

    return report
示例#17
0
def cifar10_cw_recon(train_start=0,
                     train_end=60000,
                     test_start=0,
                     test_end=10000,
                     viz_enabled=VIZ_ENABLED,
                     nb_epochs=NB_EPOCHS,
                     batch_size=BATCH_SIZE,
                     source_samples=SOURCE_SAMPLES,
                     learning_rate=LEARNING_RATE,
                     attack_iterations=ATTACK_ITERATIONS,
                     model_path=MODEL_PATH,
                     model_path_cls=MODEL_PATH,
                     targeted=TARGETED,
                     num_threads=None,
                     label_smoothing=0.1,
                     nb_filters=NB_FILTERS,
                     filename=FILENAME,
                     train_dir_ae=TRAIN_DIR_AE,
                     train_dir_cl=TRAIN_DIR_CL):

    # Object used to keep track of (and return) key accuracies

    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)
    rng = np.random.RandomState()

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get CIFAR10 data
    data = CIFAR10(train_start=train_start,
                   train_end=train_end,
                   test_start=test_start,
                   test_end=test_end)
    dataset_size = data.x_train.shape[0]
    dataset_train = data.to_tensorflow()[0]
    dataset_train = dataset_train.map(
        lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
    dataset_train = dataset_train.batch(batch_size)
    dataset_train = dataset_train.prefetch(16)
    x_train, y_train = data.get_set('train')
    x_test, y_test = data.get_set('test')

    nb_latent_size = 100
    # Get MNIST test data
    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]
    print("img_Rows, img_cols, nchannels: ", img_rows, img_cols, nchannels)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    x_t = tf.placeholder(tf.float32,
                         shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    y_t = tf.placeholder(tf.float32, shape=(None, nb_classes))

    #model_vae= vae_model(x, img_rows=img_rows, img_cols=img_cols,
    #                 channels=nchannels)

    wrap_vae = ModelVAE('wrap_vae')
    recon = wrap_vae.get_layer(x, 'RECON')
    #print("recon: ",recon)
    print("Defined TensorFlow model graph.")

    def evaluate_ae():
        # Evaluate the accuracy of the MNIST model on legitimate test examples
        eval_params = {'batch_size': 128}
        noise, d1, d2, dist_diff, avg_dist_lat = model_eval_ae(
            sess, x, x_t, recon, x_train, x_train, args=eval_params)
        print("reconstruction distance: ", d1)

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': train_dir_ae,
        'filename': filename
    }
    rng = np.random.RandomState([2017, 8, 30])
    if not os.path.exists(train_dir_ae):
        os.mkdir(train_dir_ae)

    #ckpt = tf.train.get_checkpoint_state(train_dir_ae)
    #print(train_dir_ae, ckpt)
    #ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path
    #wrap_vae = KerasModelWrapper(model_vae)
    latent_dim = 20
    intermediate_dim = 128

    #train_ae(sess, global_loss, x_train, x_train, evaluate = evaluate_ae, args = train_params, rng = rng, var_list=wrap_vae.get_params())

    if clean_train_vae == True:
        print("Training VAE")
        loss = vae_loss(wrap_vae)
        train_ae(sess,
                 loss,
                 x_train,
                 x_train,
                 evaluate=evaluate_ae,
                 args=train_params,
                 rng=rng,
                 var_list=wrap_vae.get_params())
        saver = tf.train.Saver()
        saver.save(sess, "train_dir/model_vae.ckpt")
        print("saved model")

    else:
        print("Loading VAE")
        saver = tf.train.Saver()
        #print(ckpt_path)
        saver.restore(sess, "train_dir/model_vae.ckpt")
        evaluate_ae()
        if (train_further):
            train_params = {
                'nb_epochs': 10,
                'batch_size': batch_size,
                'learning_rate': learning_rate,
                'train_dir': train_dir_ae,
                'filename': filename
            }
            #training with the saved model as starting point
            loss = SquaredError(wrap_vae)
            train_ae(sess,
                     loss,
                     x_train,
                     x_train,
                     evaluate=evaluate_vae,
                     args=train_params,
                     rng=rng)
            saver = tf.train.Saver()
            saver.save(sess, "train_dir/model_ae_final.ckpt")

            evaluate_ae()

            print("Model loaded and trained for more epochs")

    num_classes = 10
    '''
  save_dir= 'models'
  model_name = 'cifar10_CNN.h5'
  model_path_cls = os.path.join(save_dir, model_name)
  '''
    cl_model = cnn_cl_model(img_rows=img_rows,
                            img_cols=img_cols,
                            channels=nchannels,
                            nb_filters=64,
                            nb_classes=nb_classes)
    preds_cl = cl_model(x)

    def do_eval_cls(preds, x_set, y_set, x_tar_set, report_key, is_adv=None):
        acc = model_eval(sess,
                         x,
                         y,
                         preds,
                         x_t,
                         x_set,
                         y_set,
                         x_tar_set,
                         args=eval_params_cls)

    def evaluate():
        # Evaluate the accuracy of the MNIST model on legitimate test examples
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess,
                         x,
                         y,
                         preds_cl,
                         x_t,
                         x_test,
                         y_test,
                         x_test,
                         args=eval_params)
        report.clean_train_clean_eval = acc
        #        assert X_test.shape[0] == test_end - test_start, X_test.shape
        print('Test accuracy on legitimate examples: %0.4f' % acc)

    train_params = {
        'nb_epochs': 3,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': train_dir_cl,
        'filename': filename
    }
    rng = np.random.RandomState([2017, 8, 30])
    if not os.path.exists(train_dir_cl):
        os.mkdir(train_dir_cl)

    #ckpt = tf.train.get_checkpoint_state(train_dir_cl)
    #print(train_dir_cl, ckpt)
    #ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path
    wrap_cl = KerasModelWrapper(cl_model)

    if clean_train_cl == True:
        print("Training CNN Classifier")
        loss_cl = CrossEntropy(wrap_cl, smoothing=label_smoothing)
        train(sess,
              loss_cl,
              x_train,
              y_train,
              evaluate=evaluate,
              optimizer=tf.train.RMSPropOptimizer(learning_rate=0.0001,
                                                  decay=1e-6),
              args=train_params,
              rng=rng)
        saver = tf.train.Saver()
        saver.save(sess, "train_dir/model_cnn_cl_vae.ckpt")
        print("saved model at ", "train_dir/model_cnn_cl.ckpt")

    else:
        print("Loading CNN Classifier")
        saver = tf.train.Saver()
        #print(ckpt_path)
        saver.restore(sess, "train_dir/model_cnn_cl_vae.ckpt")
        print("Model loaded")
        evaluate()

        # Score trained model.
    '''
  scores = cl_model.evaluate(x_test, y_test, verbose=1)
  print('Test loss:', scores[0])
  print('Test accuracy:', scores[1])
  cl_model_wrap = KerasModelWrapper(cl_model)
` '''
    ###########################################################################
    # Craft adversarial examples using Carlini and Wagner's approach
    ###########################################################################
    nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
    print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
          ' adversarial examples')
    print("This could take some time ...")

    # Instantiate a CW attack object
    cw = CarliniWagnerAE(wrap_vae, wrap_cl, sess=sess)

    if viz_enabled:
        assert source_samples == nb_classes
        idxs = [
            np.where(np.argmax(y_test, axis=1) == i)[0][0]
            for i in range(nb_classes)
        ]
    if targeted:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, nb_classes, img_rows, img_cols,
                          nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')
            grid_viz_data_1 = np.zeros(grid_shape, dtype='f')

            adv_inputs = np.array([[instance] * (nb_classes - 1)
                                   for instance in x_test[idxs]],
                                  dtype=np.float32)

            #adv_input_y = np.array([[instance]*(nb_classes-1) for instance in y_test[idxs]])

            adv_input_y = []
            for curr_num in range(nb_classes):
                targ = []
                for id in range(nb_classes - 1):
                    targ.append(y_test[idxs[curr_num]])
                adv_input_y.append(targ)

            adv_input_y = np.array(adv_input_y)

            adv_target_y = []
            for curr_num in range(nb_classes):
                targ = []
                for id in range(nb_classes):
                    if (id != curr_num):
                        targ.append(y_test[idxs[id]])
                adv_target_y.append(targ)

            adv_target_y = np.array(adv_target_y)

            #print("adv_input_y: \n", adv_input_y)
            #print("adv_target_y: \n", adv_target_y)

            adv_input_targets = []
            for curr_num in range(nb_classes):
                targ = []
                for id in range(nb_classes):
                    if (id != curr_num):
                        targ.append(x_test[idxs[id]])
                adv_input_targets.append(targ)
            adv_input_targets = np.array(adv_input_targets)

            adv_inputs = adv_inputs.reshape((source_samples * (nb_classes - 1),
                                             img_rows, img_cols, nchannels))
            adv_input_targets = adv_input_targets.reshape(
                (source_samples * (nb_classes - 1), img_rows, img_cols,
                 nchannels))

            adv_input_y = adv_input_y.reshape(
                source_samples * (nb_classes - 1), 10)
            adv_target_y = adv_target_y.reshape(
                source_samples * (nb_classes - 1), 10)

        one_hot = np.zeros((nb_classes, nb_classes))
        one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

    adv_ys = np.array([one_hot] * source_samples, dtype=np.float32).reshape(
        (source_samples * nb_classes, nb_classes))
    yname = "y_target"

    cw_params_batch_size = source_samples * (nb_classes - 1)

    cw_params = {
        'binary_search_steps': 1,
        yname: adv_ys,
        'max_iterations': attack_iterations,
        'learning_rate': CW_LEARNING_RATE,
        'batch_size': cw_params_batch_size,
        'initial_const': 1
    }

    adv = cw.generate_np(adv_inputs, adv_input_targets, **cw_params)
    #adv = sess.run(adv)

    #print("layer names: \n", wrap_vae.get_layer_names())
    recon_orig = wrap_vae.get_layer(x, 'RECON')
    recon_orig = sess.run(recon_orig, feed_dict={x: adv_inputs})
    recon_adv = wrap_vae.get_layer(x, 'RECON')
    recon_adv = sess.run(recon_adv, feed_dict={x: adv})
    pred_adv_recon = wrap_cl.get_logits(x)
    pred_adv_recon = sess.run(pred_adv_recon, {x: recon_adv})

    #scores1 = cl_model.evaluate(recon_adv, adv_input_y, verbose=1)
    #scores2 = cl_model.evaluate(recon_adv, adv_target_y, verbose = 1)
    #acc_1 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_target_y, adv_input_targets, args=eval_params_cls)
    #acc_2 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_input_y, adv_input_targets, args=eval_params_cls)
    shape = np.shape(adv_inputs)
    noise = np.sum(np.square(adv - adv_inputs)) / (np.shape(adv)[0])
    noise = pow(noise, 0.5)
    d1 = np.sum(np.square(recon_adv - adv_inputs)) / (np.shape(adv_inputs)[0])
    d2 = np.sum(
        np.square(recon_adv - adv_input_targets)) / (np.shape(adv_inputs)[0])
    acc_1 = (sum(
        np.argmax(pred_adv_recon, axis=-1) == np.argmax(adv_target_y, axis=-1))
             ) / (np.shape(adv_target_y)[0])
    acc_2 = (sum(
        np.argmax(pred_adv_recon, axis=-1) == np.argmax(adv_input_y, axis=-1))
             ) / (np.shape(adv_target_y)[0])
    print("noise: ", noise)
    print("d1: ", d1)
    print("d2: ", d2)
    print("classifier acc_target: ", acc_1)
    print("classifier acc_true: ", acc_2)

    #print("recon_adv[0]\n", recon_adv[0,:,:,0])
    curr_class = 0
    if viz_enabled:
        for j in range(nb_classes):
            if targeted:
                for i in range(nb_classes):
                    #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i]
                    if (i == j):
                        grid_viz_data[i, j] = recon_orig[curr_class * 9]
                        grid_viz_data_1[i, j] = adv_inputs[curr_class * 9]
                        curr_class = curr_class + 1
                    else:
                        if (j > i):
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j - 1]
                            grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) +
                                                        j - 1]
                        else:
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j]
                            grid_viz_data_1[i,
                                            j] = adv[i * (nb_classes - 1) + j]

        #rint(grid_viz_data.shape)

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(
        np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))
    # Finally, block & display a grid of all the adversarial examples

    if viz_enabled:

        plt.ioff()
        figure = plt.figure()
        figure.canvas.set_window_title('Cleverhans: Grid Visualization')

        # Add the images to the plot
        num_cols = grid_viz_data.shape[0]
        num_rows = grid_viz_data.shape[1]
        num_channels = grid_viz_data.shape[4]
        for yy in range(num_rows):
            for xx in range(num_cols):
                figure.add_subplot(num_rows, num_cols,
                                   (xx + 1) + (yy * num_cols))
                plt.axis('off')
                plt.imshow(grid_viz_data[xx, yy, :, :, :])

        # Draw the plot and return
        plt.savefig('cifar10_vae_fig1')
        figure = plt.figure()
        figure.canvas.set_window_title('Cleverhans: Grid Visualization')
        for yy in range(num_rows):
            for xx in range(num_cols):
                figure.add_subplot(num_rows, num_cols,
                                   (xx + 1) + (yy * num_cols))
                plt.axis('off')
                plt.imshow(grid_viz_data_1[xx, yy, :, :, :])

        # Draw the plot and return
        plt.savefig('cifar10_vae_fig2')

    #return report

    #adversarial training
    if (adv_train == True):

        print("starting adversarial training")
        #sess1 = tf.Session()
        adv_input_set = []
        adv_input_target_set = []

        for i in range(20):

            indices = np.arange(np.shape(x_train)[0])
            np.random.shuffle(indices)
            print("indices: ", indices[1:10])
            x_train = x_train[indices]
            y_train = y_train[indices]

            idxs = [
                np.where(np.argmax(y_train, axis=1) == i)[0][0]
                for i in range(nb_classes)
            ]
            adv_inputs_2 = np.array([[instance] * (nb_classes - 1)
                                     for instance in x_train[idxs]],
                                    dtype=np.float32)
            adv_input_targets_2 = []
            for curr_num in range(nb_classes):
                targ = []
                for id in range(nb_classes):
                    if (id != curr_num):
                        targ.append(x_train[idxs[id]])
                adv_input_targets_2.append(targ)
            adv_input_targets_2 = np.array(adv_input_targets_2)

            adv_inputs_2 = adv_inputs_2.reshape(
                (source_samples * (nb_classes - 1), img_rows, img_cols,
                 nchannels))
            adv_input_targets_2 = adv_input_targets_2.reshape(
                (source_samples * (nb_classes - 1), img_rows, img_cols,
                 nchannels))

            adv_input_set.append(adv_inputs_2)
            adv_input_target_set.append(adv_input_targets_2)

        adv_input_set = np.array(adv_input_set),
        adv_input_target_set = np.array(adv_input_target_set)
        print("shape of adv_input_set: ", np.shape(adv_input_set))
        print("shape of adv_input_target_set: ",
              np.shape(adv_input_target_set))
        adv_input_set = np.reshape(
            adv_input_set,
            (np.shape(adv_input_set)[0] * np.shape(adv_input_set)[1] *
             np.shape(adv_input_set)[2], np.shape(adv_input_set)[3],
             np.shape(adv_input_set)[4], np.shape(adv_input_set)[5]))
        adv_input_target_set = np.reshape(adv_input_target_set,
                                          (np.shape(adv_input_target_set)[0] *
                                           np.shape(adv_input_target_set)[1],
                                           np.shape(adv_input_target_set)[2],
                                           np.shape(adv_input_target_set)[3],
                                           np.shape(adv_input_target_set)[4]))

        print("generated adversarial training set")

        adv_set = cw.generate_np(adv_input_set, adv_input_target_set,
                                 **cw_params)

        x_train_aim = np.append(x_train, adv_input_set, axis=0)
        x_train_app = np.append(x_train, adv_set, axis=0)

        #model_name = 'cifar10_AE_adv.h5'
        #model_path_ae = os.path.join(save_dir, model_name)

        model_ae_adv = ae_model(x,
                                img_rows=img_rows,
                                img_cols=img_cols,
                                channels=nchannels)
        recon = model_ae_adv(x)
        wrap_vae_adv = KerasModelWrapper(model_ae_adv)
        #print("recon: ",recon)
        #print("Defined TensorFlow model graph.")

        print("Training Adversarial AE")
        loss = SquaredError(wrap_vae_adv)
        train_ae(sess,
                 loss_2,
                 x_train_app,
                 x_train_aim,
                 evaluate=evaluate_ae,
                 args=train_params,
                 rng=rng)
        saver = tf.train.Saver()
        saver.save(sess, "train_dir/model_ae_adv.ckpt")
        print("saved model")

        cw2 = CarliniWagnerAE(wrap_vae_adv, wrap_cl, sess=sess)

        adv_2 = cw2.generate_np(adv_inputs, adv_input_targets, **cw_params)

        recon_adv = wrap_vae_adv.get_layer(x, 'RECON')
        recon_orig = wrap_vae_adv.get_layer(x, 'RECON')
        recon_adv = sess.run(recon_adv, {x: adv_2})
        recon_orig = sess.run(recon_orig, {x: adv_inputs})
        pred_adv_recon = wrap_cl.get_logits(x)
        pred_adv_recon = sess.run(pred_adv_recon, {x: recon_adv})

        if targeted:

            noise = reduce_sum(tf.square(adv_inputs - adv_2),
                               list(range(1, len(shape))))
            print("noise: ", noise)
        pred_adv_recon = cl_model.get_layer(recon_adv)
        #scores1 = cl_model.evaluate(recon_adv, adv_input_y, verbose=1)
        #scores2 = cl_model.eval_params(recon_adv, adv_target_y, verbose = 1)
        #acc_1 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_target_y, adv_input_targets, args=eval_params_cls)
        #acc_2 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_input_y, adv_input_targets, args=eval_params_cls)
        noise = np.sum(np.square(adv - adv_inputs)) / (np.shape(adv)[0])
        noise = pow(noise, 0.5)
        d1 = np.sum(
            np.square(recon_adv - adv_inputs)) / (np.shape(adv_inputs)[0])
        d2 = np.sum(np.square(recon_adv -
                              adv_input_targets)) / (np.shape(adv_inputs)[0])
        acc_1 = (sum(
            np.argmax(pred_adv_recon, axis=-1) == np.argmax(
                adv_target_y, axis=-1))) / (np.shape(adv_target_y)[0])
        acc_2 = (sum(
            np.argmax(pred_adv_recon, axis=-1) == np.argmax(
                adv_input_y, axis=-1))) / (np.shape(adv_target_y)[0])
        print("noise: ", noise)
        print("d1: ", d1)
        print("d2: ", d2)
        print("classifier acc_target: ", acc_1)
        print("classifier acc_true: ", acc_2)

        #print("recon_adv[0]\n", recon_adv[0,:,:,0])
        curr_class = 0
        if viz_enabled:
            for j in range(nb_classes):
                for i in range(nb_classes):
                    #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i]
                    if (i == j):
                        grid_viz_data[i, j] = recon_orig[curr_class * 9]
                        grid_viz_data_1[i, j] = adv_inputs[curr_class * 9]
                        curr_class = curr_class + 1
                    else:
                        if (j > i):
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j - 1]
                            grid_viz_data_1[i,
                                            j] = adv_2[i * (nb_classes - 1) +
                                                       j - 1]
                        else:
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j]
                            grid_viz_data_1[i, j] = adv_2[i *
                                                          (nb_classes - 1) + j]

            #rint(grid_viz_data.shape)

        print('--------------------------------------')

        # Compute the number of adversarial examples that were successfully found

        # Compute the average distortion introduced by the algorithm
        percent_perturbed = np.mean(
            np.sum((adv_2 - adv_inputs)**2, axis=(1, 2, 3))**.5)
        print(
            'Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

        # Close TF session
        #sess.close()

        # Finally, block & display a grid of all the adversarial examples
        if viz_enabled:
            #_ = grid_visual(grid_viz_data)
            #_ = grid_visual(grid_viz_data_1)
            plt.ioff()
            figure = plt.figure()
            figure.canvas.set_window_title('Cleverhans: Grid Visualization')

            # Add the images to the plot
            num_cols = grid_viz_data.shape[0]
            num_rows = grid_viz_data.shape[1]
            num_channels = grid_viz_data.shape[4]
            for yy in range(num_rows):
                for xx in range(num_cols):
                    figure.add_subplot(num_rows, num_cols,
                                       (xx + 1) + (yy * num_cols))
                    plt.axis('off')

                    if num_channels == 1:
                        plt.imshow(grid_viz_data[xx, yy, :, :, 0])
                    else:
                        plt.imshow(grid_viz_data[xx, yy, :, :, :])

            # Draw the plot and return
            plt.savefig('cifar10_fig1_vae_adv_trained')
            figure = plt.figure()
            figure.canvas.set_window_title('Cleverhans: Grid Visualization')
            for yy in range(num_rows):
                for xx in range(num_cols):
                    figure.add_subplot(num_rows, num_cols,
                                       (xx + 1) + (yy * num_cols))
                    plt.axis('off')
                    plt.imshow(grid_viz_data_1[xx, yy, :, :, :])

            # Draw the plot and return
            plt.savefig('cifar10_fig2_vae_adv_trained')

            #return report


#binarization defense
#if(binarization_defense == True or mean_filtering==True):
    if (binarization_defense == True):

        print("BINARIZATION")
        print("---------------------------")
        adv[adv > 0.5] = 1.0
        adv[adv <= 0.5] = 0.0

        recon_orig = wrap_vae.get_layer(x, 'RECON')
        recon_adv = wrap_vae.get_layer(x, 'RECON')
        #pred_adv = wrap_cl.get_logits(x)
        recon_orig = sess.run(recon_orig, {x: adv_inputs})
        recon_adv = sess.run(recon_adv, {x: adv})
        #pred_adv = sess.run(pred_adv, {x: recon_adv})
        pred_adv_recon = wrap_cl.get_logits(x)
        pred_adv_recon = sess.run(pred_adv_recon, {x: recon_adv})

        eval_params = {'batch_size': 90}
        if targeted:

            noise = np.sum(np.square(adv - adv_inputs)) / (np.shape(adv)[0])
            noise = pow(noise, 0.5)
            d1 = np.sum(
                np.square(recon_adv - adv_inputs)) / (np.shape(adv_inputs)[0])
            d2 = np.sum(np.square(recon_adv - adv_input_targets)) / (
                np.shape(adv_inputs)[0])
            acc_1 = (sum(
                np.argmax(pred_adv_recon, axis=-1) == np.argmax(
                    adv_target_y, axis=-1))) / (np.shape(adv_target_y)[0])
            acc_2 = (sum(
                np.argmax(pred_adv_recon, axis=-1) == np.argmax(
                    adv_input_y, axis=-1))) / (np.shape(adv_target_y)[0])
            print("noise: ", noise)
            print("d1: ", d1)
            print("d2: ", d2)
            print("classifier acc_target: ", acc_1)
            print("classifier acc_true: ", acc_2)

        curr_class = 0
        if viz_enabled:
            for j in range(nb_classes):
                for i in range(nb_classes):
                    #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i]
                    if (i == j):
                        grid_viz_data[i, j] = recon_orig[curr_class * 9]
                        grid_viz_data_1[i, j] = adv_inputs[curr_class * 9]
                        curr_class = curr_class + 1
                    else:
                        if (j > i):
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j - 1]
                            grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) +
                                                        j - 1]
                        else:
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j]
                            grid_viz_data_1[i,
                                            j] = adv[i * (nb_classes - 1) + j]

        plt.ioff()
        figure = plt.figure()
        figure.canvas.set_window_title('Cleverhans: Grid Visualization')

        # Add the images to the plot
        num_cols = grid_viz_data.shape[0]
        num_rows = grid_viz_data.shape[1]
        num_channels = grid_viz_data.shape[4]
        for yy in range(num_rows):
            for xx in range(num_cols):
                figure.add_subplot(num_rows, num_cols,
                                   (xx + 1) + (yy * num_cols))
                plt.axis('off')

                if num_channels == 1:
                    plt.imshow(grid_viz_data[xx, yy, :, :, 0])
                else:
                    plt.imshow(grid_viz_data[xx, yy, :, :, :])

        # Draw the plot and return
        plt.savefig('cifar10_vae_fig1_bin')
        figure = plt.figure()
        figure.canvas.set_window_title('Cleverhans: Grid Visualization')
        for yy in range(num_rows):
            for xx in range(num_cols):
                figure.add_subplot(num_rows, num_cols,
                                   (xx + 1) + (yy * num_cols))
                plt.axis('off')

                if num_channels == 1:
                    plt.imshow(grid_viz_data_1[xx, yy, :, :, 0])
                else:
                    plt.imshow(grid_viz_data_1[xx, yy, :, :, :])

        # Draw the plot and return
        plt.savefig('cifar10_vae_fig2_bin')

    if (mean_filtering == True):

        print("MEAN FILTERING")
        print("---------------------------")
        adv = uniform_filter(adv, 2)

        recon_orig = wrap_vae.get_layer(x, 'RECON')
        recon_adv = wrap_vae.get_layer(x, 'RECON')
        pred_adv_recon = wrap_cl.get_logits(x)
        recon_orig = sess.run(recon_orig, {x: adv_inputs})
        recon_adv = sess.run(recon_adv, {x: adv})
        pred_adv_recon = sess.run(pred_adv_recon, {x: recon_adv})

        eval_params = {'batch_size': 90}

        noise = np.sum(np.square(adv - adv_inputs)) / (np.shape(adv)[0])
        noise = pow(noise, 0.5)
        d1 = np.sum(
            np.square(recon_adv - adv_inputs)) / (np.shape(adv_inputs)[0])
        d2 = np.sum(np.square(recon_adv -
                              adv_input_targets)) / (np.shape(adv_inputs)[0])
        acc_1 = (sum(
            np.argmax(pred_adv_recon, axis=-1) == np.argmax(
                adv_target_y, axis=-1))) / (np.shape(adv_target_y)[0])
        acc_2 = (sum(
            np.argmax(pred_adv_recon, axis=-1) == np.argmax(
                adv_input_y, axis=-1))) / (np.shape(adv_target_y)[0])
        print("noise: ", noise)
        print("d1: ", d1)
        print("d2: ", d2)
        print("classifier acc_target: ", acc_1)
        print("classifier acc_true: ", acc_2)

        curr_class = 0
        if viz_enabled:
            for j in range(nb_classes):
                for i in range(nb_classes):
                    #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i]
                    if (i == j):
                        grid_viz_data[i, j] = recon_orig[curr_class * 9]
                        grid_viz_data_1[i, j] = adv_inputs[curr_class * 9]
                        curr_class = curr_class + 1
                    else:
                        if (j > i):
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j - 1]
                            grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) +
                                                        j - 1]
                        else:
                            grid_viz_data[i,
                                          j] = recon_adv[i * (nb_classes - 1) +
                                                         j]
                            grid_viz_data_1[i,
                                            j] = adv[i * (nb_classes - 1) + j]

        plt.ioff()
        figure = plt.figure()
        figure.canvas.set_window_title('Cleverhans: Grid Visualization')

        # Add the images to the plot
        num_cols = grid_viz_data.shape[0]
        num_rows = grid_viz_data.shape[1]
        num_channels = grid_viz_data.shape[4]
        for yy in range(num_rows):
            for xx in range(num_cols):
                figure.add_subplot(num_rows, num_cols,
                                   (xx + 1) + (yy * num_cols))
                plt.axis('off')

                if num_channels == 1:
                    plt.imshow(grid_viz_data[xx, yy, :, :, 0])
                else:
                    plt.imshow(grid_viz_data[xx, yy, :, :, :])

        # Draw the plot and return
        plt.savefig('cifar10_vae_fig1_mean')
        figure = plt.figure()
        figure.canvas.set_window_title('Cleverhans: Grid Visualization')
        for yy in range(num_rows):
            for xx in range(num_cols):
                figure.add_subplot(num_rows, num_cols,
                                   (xx + 1) + (yy * num_cols))
                plt.axis('off')

                if num_channels == 1:
                    plt.imshow(grid_viz_data_1[xx, yy, :, :, 0])
                else:
                    plt.imshow(grid_viz_data_1[xx, yy, :, :, :])

        # Draw the plot and return
        plt.savefig('cifar10_vae_fig2_mean')
示例#18
0
文件: evaluate.py 项目: ATPGN/ATPGN
def evaluate(dataset='CIFAR100'):
    batch_size = 128
    test_num = 10000
    defense_list = ['Naive', 'Goodfellow', 'Madry', 'PGN']
    model_path_list = []
    for defense in defense_list:
        for i in os.listdir('save/%s/%s' % (dataset, defense)):
            if os.path.exists('save/%s/%s/%s/model.joblib' %
                              (dataset, defense, i)):
                model_path_list.append('save/%s/%s/%s/model.joblib' %
                                       (dataset, defense, i))

    if dataset == 'CIFAR100':
        data = CIFAR100(test_start=0, test_end=test_num)
        x_test, y_test = data.get_set('test')
        x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3))
        y = tf.placeholder(tf.float32, shape=(None, 100))
    elif dataset == 'CIFAR10':
        data = CIFAR10(test_start=0, test_end=test_num)
        x_test, y_test = data.get_set('test')
        x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3))
        y = tf.placeholder(tf.float32, shape=(None, 10))

    sess = tf.Session()

    cw_params = {
        'batch_size': 128,
        'clip_min': 0.,
        'clip_max': 1.,
        'max_iterations': 100,
        'y': y
    }

    eval_params = {'batch_size': batch_size}

    def do_eval(preds, x_set, y_set, report_text):
        acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
        print('Test accuracy on %s: %0.4f' % (report_text, acc))
        return acc

    def get_adv_x_numpy(adv_x, attack_success_index, x_set, y_set):
        result = []
        result_index = []
        nb_batches = int(math.ceil(float(len(x_set)) / batch_size))
        X_cur = np.zeros((batch_size, ) + x_set.shape[1:], dtype=x_set.dtype)
        Y_cur = np.zeros((batch_size, ) + y_set.shape[1:], dtype=y_set.dtype)
        for batch in range(nb_batches):
            start = batch * batch_size
            end = min(len(x_set), start + batch_size)
            cur_batch_size = end - start
            X_cur[:cur_batch_size] = x_set[start:end]
            Y_cur[:cur_batch_size] = y_set[start:end]
            feed_dict = {x: X_cur, y: Y_cur}
            adv_x_numpy, success_index = sess.run(
                [adv_x, attack_success_index], feed_dict=feed_dict)
            result.append(adv_x_numpy[:cur_batch_size])
            result_index.append(success_index[:cur_batch_size])
        return np.concatenate(result, axis=0), np.concatenate(result_index,
                                                              axis=0)

    print(model_path_list)
    acc_dict = {}
    l2mean_dict = {}
    for model_path in model_path_list:
        defense = model_path.split('/')[2]
        if not defense in acc_dict:
            acc_dict[defense] = []
        if not defense in l2mean_dict:
            l2mean_dict[defense] = []

        if os.path.exists(
                os.path.join(os.path.dirname(model_path), 'cash_result')):
            with open(os.path.join(os.path.dirname(model_path), 'cash_result'),
                      'r') as f:
                cash_result_str = f.read()
                acc, l2mean, model_create_time = cash_result_str.split(",")

            if int(model_create_time) == int(os.path.getctime(model_path)):
                acc_dict[defense].append(float(acc))
                l2mean_dict[defense].append(float(l2mean))
                print(model_path, acc, l2mean)
                continue

        with sess.as_default():
            model = load(model_path)

        attack_model = CarliniWagnerL2(model, sess=sess)
        attack_params = cw_params

        preds = model.get_logits(x)
        acc = do_eval(preds, x_test[:test_num], y_test[:test_num],
                      'DEFENSE : %s' % defense)
        adv_x = attack_model.generate(x, **attack_params)
        preds_adv = model.get_logits(adv_x)
        attack_success_index = tf.math.not_equal(tf.argmax(preds_adv, axis=-1),
                                                 tf.argmax(y, axis=-1))
        adv_x_numpy, success_index = get_adv_x_numpy(adv_x,
                                                     attack_success_index,
                                                     x_test[:test_num],
                                                     y_test[:test_num])
        print('C&W attack success_rate = %f' % np.mean(success_index))

        l2mean = np.mean(
            np.sqrt(
                np.sum(np.power(
                    adv_x_numpy[success_index] -
                    x_test[:test_num][success_index], 2),
                       axis=(1, 2, 3))))

        acc_dict[defense].append(acc)
        l2mean_dict[defense].append(l2mean)
        print(model_path, acc, l2mean)
        with open(os.path.join(os.path.dirname(model_path), 'cash_result'),
                  'w') as f:
            f.write('%.4f,%.4f,%d' %
                    (acc, l2mean, os.path.getctime(model_path)))

    for defense in defense_list:
        if not defense in l2mean_dict:
            continue

        l2mean_dict[defense] = np.array(l2mean_dict[defense])
        acc_dict[defense] = np.array(acc_dict[defense])
        arg_l2mean_dict = np.argsort(l2mean_dict[defense])
        l2mean_dict[defense] = l2mean_dict[defense][arg_l2mean_dict]
        acc_dict[defense] = acc_dict[defense][arg_l2mean_dict]
        plt.plot(l2mean_dict[defense], acc_dict[defense], '-o', label=defense)
    plt.legend()
    plt.xlabel('$\\rho_{cw}$')
    plt.ylabel('benign accuracy')
    plt.title("RESULT FOR %s" % dataset)

    fig_save_dir = 'evaluate/%s' % dataset
    if not os.path.exists(fig_save_dir):
        os.makedirs(fig_save_dir)
    plt.savefig('%s/robustness-curve.png' % fig_save_dir)
def mnist_ae(train_start=0, train_end=60000, test_start=0,
                   test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE,
                   clean_train=CLEAN_TRAIN,
                   testing=False,
                   backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                   num_threads=None,
                   label_smoothing=0.1):
  report = AccuracyReport()

  # Set TF random seed to improve reproducibility
  tf.set_random_seed(1234)
  rng = np.random.RandomState()

  source_samples = 10
  # Create TF session
  sess = tf.Session()
  print("Created TensorFlow session.")

  set_log_level(logging.DEBUG)

  if num_threads:
    config_args = dict(intra_op_parallelism_threads=1)
  else:
    config_args = {}
  sess = tf.Session(config=tf.ConfigProto(**config_args))

  # Get CIFAR10 data
  data = CIFAR10(train_start=train_start, train_end=train_end,
                 test_start=test_start, test_end=test_end)
  dataset_size = data.x_train.shape[0]
  dataset_train = data.to_tensorflow()[0]
  dataset_train = dataset_train.map(
      lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
  dataset_train = dataset_train.batch(batch_size)
  dataset_train = dataset_train.prefetch(16)
  x_train, y_train = data.get_set('train')
  x_test, y_test = data.get_set('test')

  nb_latent_size = 100
  # Get MNIST test data
  # Obtain Image Parameters
  img_rows, img_cols, nchannels = x_train.shape[1:4]
  nb_classes = y_train.shape[1]
  print("img_Rows, img_cols, nchannels: ", img_rows, img_cols, nchannels)

  # Define input TF placeholder
  x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                        nchannels))
  x_t = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                        nchannels))
  y = tf.placeholder(tf.float32, shape=(None, nb_classes))
  y_t = tf.placeholder( tf.float32, shape=(None, nb_classes))
  #z = tf.placeholder(tf.float32, shape = (None, nb_latent_size))
  #z_t = tf.placeholder(tf.float32, shape = (None, nb_latent_size))
  '''
  save_dir= 'models'
  model_name = 'cifar10_AE.h5'
  model_path_ae = os.path.join(save_dir, model_name)
  '''
  #model_ae= ae_model(x, img_rows=img_rows, img_cols=img_cols,
   #                 channels=nchannels)
  #recon = model_ae(x)
  #print("recon: ",recon)
  wrap_ae = ModelVAE('wrap_ae')
  recon = wrap_ae.get_layer(x,'RECON')
  print("Defined TensorFlow model graph.")

  def evaluate_ae():
    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': 128}
    noise, d1, d2, dist_diff, avg_dist_lat = model_eval_ae(sess, x, x_t,recon, x_train, x_train, args=eval_params)
    print("reconstruction distance: ", d1)
  
  # Train an MNIST model
  train_params = {
      'nb_epochs': nb_epochs,
      'batch_size': batch_size,
      'learning_rate': learning_rate,
      #'train_dir': train_dir_ae,
      #'filename': filename
  }
  rng = np.random.RandomState([2017, 8, 30])
  #if not os.path.exists(train_dir_ae):
   # os.mkdir(train_dir_ae)

  #ckpt = tf.train.get_checkpoint_state(train_dir_ae)
  #print(train_dir_ae, ckpt)
  #ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path
  


  if clean_train_vae==True:
    print("Training VAE")
    loss = vae_loss(wrap_ae)
    
    train_ae(sess, loss, x_train, x_train, tf.train.AdamOptimizer(learning_rate=0.0002, beta1=0.5), evaluate=evaluate_ae,
                args=train_params, rng=rng, var_list = wrap_ae.get_params())
    
    saver = tf.train.Saver()
    saver.save(sess, "train_dir/model_vae_fgsm.ckpt")
    print("saved model")
    

  else:
    print("Loading VAE")
    saver = tf.train.Saver()
    #print(ckpt_path)
    saver.restore(sess, "train_dir/model_vae.ckpt")
    evaluate_ae()
    if(train_further):
      train_params = {
        'nb_epochs': 10,
        'batch_size': batch_size,
        'learning_rate': 0.0002,
    }
      #training with the saved model as starting point
      loss = SquaredError(wrap_ae)
      train_ae(sess, loss, x_train, x_train, optimizer = tf.train.AdamOptimizer(learning_rate=0.0002, beta1=0.5), evaluate=evaluate_ae,
            args=train_params, rng=rng)
      saver = tf.train.Saver()
      saver.save(sess, "train_dir/model_vae_fgsm.ckpt")

      evaluate_ae()
    
      print("Model loaded and trained for more epochs")

  num_classes = 10
  '''
  save_dir= 'models'
  model_name = 'cifar10_CNN.h5'
  model_path_cls = os.path.join(save_dir, model_name)
  '''
  cl_model = cnn_cl_model(img_rows=img_rows, img_cols=img_cols,
                    channels=nchannels, nb_filters=64,
                    nb_classes=nb_classes)
  preds_cl = cl_model(x)
  def do_eval_cls(preds, x_set, y_set, x_tar_set,report_key, is_adv = None):
    acc = model_eval(sess, x, y, preds, x_t, x_set, y_set, x_tar_set, args=eval_params_cls)

  def evaluate():
    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    acc = model_eval(sess, x, y, preds_cl,x_t, x_test, y_test, x_test,args=eval_params)
    report.clean_train_clean_eval = acc
#        assert X_test.shape[0] == test_end - test_start, X_test.shape
    print('Test accuracy on legitimate examples: %0.4f' % acc)

  train_params = {
      'nb_epochs': 100,
      'batch_size': batch_size,
      'learning_rate': learning_rate,
      #'train_dir': train_dir_cl,
      #'filename': filename
  }
  rng = np.random.RandomState([2017, 8, 30])
  
  wrap_cl = KerasModelWrapper(cl_model)

  if clean_train_cl == True:  
    train_params = {
        'nb_epochs': 5,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        #'train_dir': train_dir_cl,
        #'filename': filename
      }
    print("Training CNN Classifier")
    '''
    datagen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    )
    datagen.fit(x_train)
    '''
    loss_cl = CrossEntropy(wrap_cl, smoothing=label_smoothing)
    #for x_batch, y_batch in datagen.flow(x_train, y_train, batch_size = 128):
     # train(sess, loss_cl, x_batch, y_batch, tf.train.AdamOptimizer(learning_rate=0.0002, beta1=0.5), evaluate=evaluate,
      #          args=train_params, rng=rng)
    train(sess, loss_cl, x_train, y_train, evaluate=evaluate, optimizer = tf.train.RMSPropOptimizer(learning_rate = 0.0001, decay = 1e-6),
          args=train_params, rng=rng)
    saver = tf.train.Saver()
    saver.save(sess, "train_dir/model_cnn_cl.ckpt")
    print("saved model at ", "train_dir/model_cnn_cl_fgsm.ckpt")
    
  else:
    print("Loading CNN Classifier")
    saver = tf.train.Saver()
    #print(ckpt_path)
    saver.restore(sess, "train_dir/model_cnn_cl.ckpt")
    evaluate()
    if(train_further):
      train_params = {
        'nb_epochs': 10,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': train_dir_cl,
        'filename': filename
      }
      loss_cl = CrossEntropy(wrap_cl, smoothing=label_smoothing)
      train(sess, loss_cl, x_train, y_train, evaluate=evaluate, optimizer = tf.train.RMSPropOptimizer(learning_rate = 0.0001, decay = 1e-6),
            args=train_params, rng=rng)
      saver = tf.train.Saver()
      saver.save(sess, "train_dir/model_cl_fgsm.ckpt")
      print("Model loaded and trained further")
      evaluate()


  ###########################################################################
  # Craft adversarial examples using Carlini and Wagner's approach
  ###########################################################################
  nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
  print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
        ' adversarial examples')
  print("This could take some time ...")

  # Instantiate a CW attack object
 #cw = CarliniWagnerAE(wrap_ae,wrap_cl, sess=sess)

  if viz_enabled:
    assert source_samples == nb_classes
    idxs = [np.where(np.argmax(y_test, axis=1) == i)[0][0]
            for i in range(nb_classes)]
  if targeted:
    if viz_enabled:
      # Initialize our array for grid visualization
      grid_shape = (nb_classes, nb_classes, img_rows, img_cols,
                    nchannels)
      grid_viz_data = np.zeros(grid_shape, dtype='f')
      grid_viz_data_1 = np.zeros(grid_shape, dtype='f')

      adv_inputs = np.array(
          [[instance] * (nb_classes-1) for instance in x_test[idxs]],
          dtype=np.float32)

      #adv_input_y = np.array([[instance]*(nb_classes-1) for instance in y_test[idxs]])
      
      adv_input_y = []
      for curr_num in range(nb_classes):
        targ = []
        for id in range(nb_classes-1):
            targ.append(y_test[idxs[curr_num]])
        adv_input_y.append(targ)
      
      adv_input_y = np.array(adv_input_y)

      adv_target_y = []
      for curr_num in range(nb_classes):
        targ = []
        for id in range(nb_classes):
          if(id!=curr_num):
            targ.append(y_test[idxs[id]])
        adv_target_y.append(targ)
      
      adv_target_y = np.array(adv_target_y)

      #print("adv_input_y: \n", adv_input_y)
      #print("adv_target_y: \n", adv_target_y)

      adv_input_targets = []
      for curr_num in range(nb_classes):
        targ = []
        for id in range(nb_classes):
          if(id!=curr_num):
            targ.append(x_test[idxs[id]])
        adv_input_targets.append(targ)
      adv_input_targets = np.array(adv_input_targets)

      adv_inputs = adv_inputs.reshape(
        (source_samples * (nb_classes-1), img_rows, img_cols, nchannels))
      adv_input_targets = adv_input_targets.reshape(
        (source_samples * (nb_classes-1), img_rows, img_cols, nchannels))

      adv_input_y = adv_input_y.reshape(source_samples*(nb_classes-1), 10)
      adv_target_y = adv_target_y.reshape(source_samples*(nb_classes-1), 10)

    one_hot = np.zeros((nb_classes, nb_classes))
    one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

    

  adv_ys = np.array([one_hot] * source_samples,
                      dtype=np.float32).reshape((source_samples *
                                                 nb_classes, nb_classes))
  yname = "y_target"

  fgsm_params = {
      'eps': 0.3,
      'clip_min': 0.,
      'clip_max': 1.
  }

  fgsm = FastGradientMethodAe(wrap_ae, sess=sess)
  adv = fgsm.generate(x,x_t, **fgsm_params)

  adv = sess.run(adv, {x: adv_inputs, x_t: adv_input_targets})

  recon_orig = wrap_ae.get_layer(x, 'RECON')
  recon_orig = sess.run(recon_orig, feed_dict = {x: adv_inputs})
  recon_adv = wrap_ae.get_layer(x, 'RECON')
  recon_adv = sess.run(recon_adv, feed_dict = {x: adv})
  pred_adv_recon = wrap_cl.get_logits(x)
  pred_adv_recon = sess.run(pred_adv_recon, {x:recon_adv})

  #scores1 = cl_model.evaluate(recon_adv, adv_input_y, verbose=1)
  #scores2 = cl_model.evaluate(recon_adv, adv_target_y, verbose = 1)
  #acc_1 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_target_y, adv_input_targets, args=eval_params_cls)
  #acc_2 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_input_y, adv_input_targets, args=eval_params_cls)
  shape = np.shape(adv_inputs)
  noise = np.sum(np.square(adv-adv_inputs))/(np.shape(adv)[0])
  noise = pow(noise,0.5)
  d1 = np.sum(np.square(recon_adv-adv_inputs))/(np.shape(adv_inputs)[0])
  d2 = np.sum(np.square(recon_adv-adv_input_targets))/(np.shape(adv_inputs)[0])
  acc_1 = (sum(np.argmax(pred_adv_recon, axis=-1)==
                             np.argmax(adv_target_y, axis=-1)))/(np.shape(adv_target_y)[0])
  acc_2 = (sum(np.argmax(pred_adv_recon, axis=-1)==
                             np.argmax(adv_input_y, axis=-1)))/(np.shape(adv_target_y)[0])
  print("noise: ", noise)
  print("d1: ", d1)
  print("d2: ", d2)
  print("classifier acc_target: ", acc_1)
  print("classifier acc_true: ", acc_2)

  #print("recon_adv[0]\n", recon_adv[0,:,:,0])
  curr_class = 0
  if viz_enabled:
    for j in range(nb_classes):
      if targeted:
        for i in range(nb_classes):
          #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i]
          if(i==j):
            grid_viz_data[i,j] = recon_orig[curr_class*9]
            grid_viz_data_1[i,j] = adv_inputs[curr_class*9]
            curr_class = curr_class+1
          else:
            if(j>i):
              grid_viz_data[i,j] = recon_adv[i*(nb_classes-1) + j-1]
              grid_viz_data_1[i,j] = adv[i*(nb_classes-1)+j-1]
            else:
              grid_viz_data[i,j] = recon_adv[i*(nb_classes-1) + j]
              grid_viz_data_1[i,j] = adv[i*(nb_classes-1)+j]


    #rint(grid_viz_data.shape)

  print('--------------------------------------')

  # Compute the number of adversarial examples that were successfully found

  # Compute the average distortion introduced by the algorithm
  percent_perturbed = np.mean(np.sum((adv - adv_inputs)**2,
                                     axis=(1, 2, 3))**.5)
  print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))
  # Finally, block & display a grid of all the adversarial examples
  
  if viz_enabled:
    
    plt.ioff()
    figure = plt.figure()
    figure.canvas.set_window_title('Cleverhans: Grid Visualization')

    # Add the images to the plot
    num_cols = grid_viz_data.shape[0]
    num_rows = grid_viz_data.shape[1]
    num_channels = grid_viz_data.shape[4]
    for yy in range(num_rows):
      for xx in range(num_cols):
        figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols))
        plt.axis('off')
        plt.imshow(grid_viz_data[xx, yy, :, :, :])

    # Draw the plot and return
    plt.savefig('cifar10_fgsm_vae_fig1')
    figure = plt.figure()
    figure.canvas.set_window_title('Cleverhans: Grid Visualization')
    for yy in range(num_rows):
      for xx in range(num_cols):
        figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols))
        plt.axis('off')
        plt.imshow(grid_viz_data_1[xx, yy, :, :, :])

    # Draw the plot and return
    plt.savefig('cifar10_fgsm_vae_fig2')

  if adversarial_training:

    print("starting adversarial training")

    index_shuf = list(range(len(x_train)))
    x_train_target = x_train[index_shuf]
    y_train_target = y_train[index_shuf]
      # Randomly repeat a few training examples each epoch to avoid
      # having a too-small batch
    '''
    while len(index_shuf) % batch_size != 0:
      index_shuf.append(rng.randint(len(x_train)))
      nb_batches = len(index_shuf) // batch_size
      rng.shuffle(index_shuf)
      # Shuffling here versus inside the loop doesn't seem to affect
      # timing very much, but shuffling here makes the code slightly
      # easier to read
    ''' 
      
    print("len of x_train_target and x_train: ", len(x_train_target), len(x_train))
    for ind in range (0, len(x_train)):
      r_ind = -1
      while(np.argmax(y_train_target[ind])==np.argmax(y_train[ind])):
        r_ind = rng.randint(0,len(x_train))
        y_train_target[ind] = y_train[r_ind]
      if r_ind>-1:  
        x_train_target[ind] = x_train[r_ind]
    wrap_ae2 = ModelVAE('wrap_ae2')
    fgsm2 = FastGradientMethodAe(wrap_ae2, sess=sess)
    adv2 = fgsm.generate(x,x_t, **fgsm_params)

    adv_set = sess.run(adv2, {x: x_train, x_t: x_train_target})
    x_train_aim = np.append(x_train, x_train, axis = 0)
    x_train_app = np.append(x_train, adv_set, axis = 0)
    loss2 =  vae_loss(wrap_ae2)
    train_params = {
        'nb_epochs': 5,
        'batch_size': batch_size,
        'learning_rate': learning_rate}

    train_ae(sess, loss2, x_train_app,  x_train_aim, tf.train.AdamOptimizer(learning_rate=0.0002, beta1=0.5),
                args=train_params, rng=rng, var_list = wrap_ae2.get_params())

    evaluate_ae()

    adv3 = fgsm2.generate(x, x_t, **fgsm_params)
    adv3 = sess.run(adv3, {x: adv_inputs, x_t: adv_input_targets})
    recon_orig2 = wrap_ae2.get_layer(x, 'RECON')
    recon_orig2 = sess.run(recon_orig2, feed_dict = {x: adv_inputs})
    recon_adv2 = wrap_ae2.get_layer(x, 'RECON')
    recon_adv2 = sess.run(recon_adv2, feed_dict = {x: adv3})
    pred_adv_recon2 = wrap_cl.get_logits(x)
    pred_adv_recon2 = sess.run(pred_adv_recon2, {x:recon_adv2})

    shape = np.shape(adv_inputs)
    noise = np.sum(np.square(adv3-adv_inputs))/(np.shape(adv3)[0])
    noise = pow(noise,0.5)
    d1 = np.sum(np.square(recon_adv2-adv_inputs))/(np.shape(adv_inputs)[0])
    d2 = np.sum(np.square(recon_adv2-adv_input_targets))/(np.shape(adv_inputs)[0])
    acc_1 = (sum(np.argmax(pred_adv_recon2, axis=-1)==
                               np.argmax(adv_target_y, axis=-1)))/(np.shape(adv_target_y)[0])
    acc_2 = (sum(np.argmax(pred_adv_recon2, axis=-1)==
                               np.argmax(adv_input_y, axis=-1)))/(np.shape(adv_target_y)[0])
    print("noise: ", noise)
    print("d1: ", d1)
    print("d2: ", d2)
    print("classifier acc_target: ", acc_1)
    print("classifier acc_true: ", acc_2)

    #print("recon_adv[0]\n", recon_adv[0,:,:,0])
    curr_class = 0
    if viz_enabled:
      for j in range(nb_classes):
        if targeted:
          for i in range(nb_classes):
            #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i]
            if(i==j):
              grid_viz_data[i,j] = recon_orig2[curr_class*9]
              grid_viz_data_1[i,j] = adv_inputs[curr_class*9]
              curr_class = curr_class+1
            else:
              if(j>i):
                grid_viz_data[i,j] = recon_adv2[i*(nb_classes-1) + j-1]
                grid_viz_data_1[i,j] = adv3[i*(nb_classes-1)+j-1]
              else:
                grid_viz_data[i,j] = recon_adv2[i*(nb_classes-1) + j]
                grid_viz_data_1[i,j] = adv3[i*(nb_classes-1)+j]


      #rint(grid_viz_data.shape)

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(np.sum((adv - adv_inputs)**2,
                                       axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))
    # Finally, block & display a grid of all the adversarial examples
    
    if viz_enabled:
      
      plt.ioff()
      figure = plt.figure()
      figure.canvas.set_window_title('Cleverhans: Grid Visualization')

      # Add the images to the plot
      num_cols = grid_viz_data.shape[0]
      num_rows = grid_viz_data.shape[1]
      num_channels = grid_viz_data.shape[4]
      for yy in range(num_rows):
        for xx in range(num_cols):
          figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols))
          plt.axis('off')
          plt.imshow(grid_viz_data[xx, yy, :, :, :])

      # Draw the plot and return
      plt.savefig('cifar10_vae_fgsm_adv_fig1')
      figure = plt.figure()
      figure.canvas.set_window_title('Cleverhans: Grid Visualization')
      for yy in range(num_rows):
        for xx in range(num_cols):
          figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols))
          plt.axis('off')
          plt.imshow(grid_viz_data_1[xx, yy, :, :, :])

      # Draw the plot and return
      plt.savefig('cifar10_vae_fgsm_adv_fig2')
    


  #return report
  if binarization:

    print("----------------")
    print("BINARIZATION")

    adv[adv>0.5] = 1.0
    adv[adv<=0.5] = 0.0
    
     
    recon_orig = wrap_ae.get_layer(x, 'RECON')
    recon_adv = wrap_ae.get_layer(x, 'RECON')
    #pred_adv = wrap_cl.get_logits(x)
    recon_orig = sess.run(recon_orig, {x: adv_inputs})
    recon_adv = sess.run(recon_adv, {x: adv})
    #pred_adv = sess.run(pred_adv, {x: recon_adv})
    pred_adv_recon = wrap_cl.get_logits(x)
    pred_adv_recon = sess.run(pred_adv_recon, {x:recon_adv})

    eval_params = {'batch_size': 90}
    if targeted:
     
      noise = np.sum(np.square(adv-adv_inputs))/(np.shape(adv)[0])
      noise = pow(noise,0.5)
      d1 = np.sum(np.square(recon_adv-adv_inputs))/(np.shape(adv_inputs)[0])
      d2 = np.sum(np.square(recon_adv-adv_input_targets))/(np.shape(adv_inputs)[0])
      acc_1 = (sum(np.argmax(pred_adv_recon, axis=-1)==
                               np.argmax(adv_target_y, axis=-1)))/(np.shape(adv_target_y)[0])
      acc_2 = (sum(np.argmax(pred_adv_recon, axis=-1)==
                               np.argmax(adv_input_y, axis=-1)))/(np.shape(adv_target_y)[0])
      print("noise: ", noise)
      print("d1: ", d1)
      print("d2: ", d2)
      print("classifier acc_target: ", acc_1)
      print("classifier acc_true: ", acc_2)


    curr_class = 0
    if viz_enabled:
      for j in range(nb_classes):
          for i in range(nb_classes):
            #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i]
            if(i==j):
              grid_viz_data[i,j] = recon_orig[curr_class*9]
              grid_viz_data_1[i,j] = adv_inputs[curr_class*9]
              curr_class = curr_class+1
            else:
              if(j>i):
                grid_viz_data[i,j] = recon_adv[i*(nb_classes-1) + j-1]
                grid_viz_data_1[i,j] = adv[i*(nb_classes-1)+j-1]
              else:
                grid_viz_data[i,j] = recon_adv[i*(nb_classes-1) + j]
                grid_viz_data_1[i,j] = adv[i*(nb_classes-1)+j]
      

    plt.ioff()
    figure = plt.figure()
    figure.canvas.set_window_title('Cleverhans: Grid Visualization')

    # Add the images to the plot
    num_cols = grid_viz_data.shape[0]
    num_rows = grid_viz_data.shape[1]
    num_channels = grid_viz_data.shape[4]
    for yy in range(num_rows):
      for xx in range(num_cols):
        figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy* num_cols))
        plt.axis('off')

        if num_channels == 1:
          plt.imshow(grid_viz_data[xx, yy, :, :, 0])
        else:
          plt.imshow(grid_viz_data[xx, yy, :, :, :])

    # Draw the plot and return
    plt.savefig('cifar10_fgsm_vae_fig1_bin')
    figure = plt.figure()
    figure.canvas.set_window_title('Cleverhans: Grid Visualization')
    for yy in range(num_rows):
      for xx in range(num_cols):
        figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols))
        plt.axis('off')

        if num_channels == 1:
          plt.imshow(grid_viz_data_1[xx, yy, :, :, 0])
        else:
          plt.imshow(grid_viz_data_1[xx, yy, :, :, :])

    # Draw the plot and return
    plt.savefig('cifar10_fgsm_vae_fig2_bin')

  if(mean_filtering ==True):

      print("----------------")
      print("MEAN FILTERING")

      adv = uniform_filter(adv, 2)

      recon_orig = wrap_ae.get_layer(x, 'RECON')
      recon_adv = wrap_ae.get_layer(x, 'RECON')
      pred_adv_recon = wrap_cl.get_logits(x)
      recon_orig = sess.run(recon_orig, {x: adv_inputs})
      recon_adv = sess.run(recon_adv, {x: adv})
      pred_adv_recon = sess.run(pred_adv_recon, {x: recon_adv})

      eval_params = {'batch_size': 90}
      
      noise = np.sum(np.square(adv-adv_inputs))/(np.shape(adv)[0])
      noise = pow(noise,0.5)
      d1 = np.sum(np.square(recon_adv-adv_inputs))/(np.shape(adv_inputs)[0])
      d2 = np.sum(np.square(recon_adv-adv_input_targets))/(np.shape(adv_inputs)[0])
      acc_1 = (sum(np.argmax(pred_adv_recon, axis=-1)==
                               np.argmax(adv_target_y, axis=-1)))/(np.shape(adv_target_y)[0])
      acc_2 = (sum(np.argmax(pred_adv_recon, axis=-1)==
                               np.argmax(adv_input_y, axis=-1)))/(np.shape(adv_target_y)[0])
      print("noise: ", noise)
      print("d1: ", d1)
      print("d2: ", d2)
      print("classifier acc_target: ", acc_1)
      print("classifier acc_true: ", acc_2)


      curr_class = 0
      if viz_enabled:
        for j in range(nb_classes):
            for i in range(nb_classes):
              #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i]
              if(i==j):
                grid_viz_data[i,j] = recon_orig[curr_class*9]
                grid_viz_data_1[i,j] = adv_inputs[curr_class*9]
                curr_class = curr_class+1
              else:
                if(j>i):
                  grid_viz_data[i,j] = recon_adv[i*(nb_classes-1) + j-1]
                  grid_viz_data_1[i,j] = adv[i*(nb_classes-1)+j-1]
                else:
                  grid_viz_data[i,j] = recon_adv[i*(nb_classes-1) + j]
                  grid_viz_data_1[i,j] = adv[i*(nb_classes-1)+j]
        

      plt.ioff()
      figure = plt.figure()
      figure.canvas.set_window_title('Cleverhans: Grid Visualization')

      # Add the images to the plot
      num_cols = grid_viz_data.shape[0]
      num_rows = grid_viz_data.shape[1]
      num_channels = grid_viz_data.shape[4]
      for yy in range(num_rows):
        for xx in range(num_cols):
          figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy* num_cols))
          plt.axis('off')

          if num_channels == 1:
            plt.imshow(grid_viz_data[xx, yy, :, :, 0])
          else:
            plt.imshow(grid_viz_data[xx, yy, :, :, :])

      # Draw the plot and return
      plt.savefig('cifar10_fgsm_vae_fig1_mean')
      figure = plt.figure()
      figure.canvas.set_window_title('Cleverhans: Grid Visualization')
      for yy in range(num_rows):
        for xx in range(num_cols):
          figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols))
          plt.axis('off')

          if num_channels == 1:
            plt.imshow(grid_viz_data_1[xx, yy, :, :, 0])
          else:
            plt.imshow(grid_viz_data_1[xx, yy, :, :, :])

      # Draw the plot and return
      plt.savefig('cifar10_fgsm_vae_fig2_mean')
示例#20
0
def init_defense(sess, x, preds, batch_size, multi_noise=False):
    data = CIFAR10()

    dataset_size = data.x_train.shape[0]
    dataset_train = data.to_tensorflow()[0]
    dataset_train = dataset_train.map(
        lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
    dataset_train = dataset_train.batch(batch_size)
    dataset_train = dataset_train.prefetch(16)
    x_train, y_train = data.get_set('train')
    x_train *= 255

    nb_classes = y_train.shape[1]

    n_collect = 1000
    p_ratio_cutoff = .999
    just_detect = True
    clip_alignments = True
    fit_classifier = True
    noise_eps = 'n30.0'
    num_noise_samples = 256

    if multi_noise:
        noises = 'n0.003,s0.003,u0.003,n0.005,s0.005,u0.005,s0.008,n0.008,u0.008'.split(
            ',')
        noise_eps_detect = []
        for n in noises:
            new_noise = n[0] + str(float(n[1:]) * 255)
            noise_eps_detect.append(new_noise)
    else:
        noise_eps_detect = 'n30.0'

    # these attack parameters are just for initializing the defense
    eps = 8.0
    pgd_params = {
        'eps': eps,
        'eps_iter': (eps / 5),
        'nb_iter': 10,
        'clip_min': 0,
        'clip_max': 255
    }

    logits_op = preds.op
    while logits_op.type != 'MatMul':
        logits_op = logits_op.inputs[0].op
    latent_x_tensor, weights = logits_op.inputs
    logits_tensor = preds

    predictor = tf_robustify.collect_statistics(
        x_train[:n_collect],
        y_train[:n_collect],
        x,
        sess,
        logits_tensor=logits_tensor,
        latent_x_tensor=latent_x_tensor,
        weights=weights,
        nb_classes=nb_classes,
        p_ratio_cutoff=p_ratio_cutoff,
        noise_eps=noise_eps,
        noise_eps_detect=noise_eps_detect,
        pgd_eps=pgd_params['eps'],
        pgd_lr=pgd_params['eps_iter'] / pgd_params['eps'],
        pgd_iters=pgd_params['nb_iter'],
        save_alignments_dir=None,
        load_alignments_dir=None,
        clip_min=pgd_params['clip_min'],
        clip_max=pgd_params['clip_max'],
        batch_size=batch_size,
        num_noise_samples=num_noise_samples,
        debug_dict=None,
        debug=False,
        targeted=False,
        pgd_train=None,
        fit_classifier=fit_classifier,
        clip_alignments=clip_alignments,
        just_detect=just_detect)

    next(predictor)
    return predictor
示例#21
0
def cifar10_tutorial(train_start=0,
                     train_end=60000,
                     test_start=0,
                     test_end=10000,
                     nb_epochs=NB_EPOCHS,
                     batch_size=BATCH_SIZE,
                     model_path=MODEL_PATH,
                     learning_rate=LEARNING_RATE,
                     clean_train=CLEAN_TRAIN,
                     testing=False,
                     backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                     nb_filters=NB_FILTERS,
                     num_threads=None,
                     label_smoothing=0.1):
    """
  CIFAR10 cleverhans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param clean_train: perform normal training on clean examples only
                      before performing adversarial training.
  :param testing: if true, complete an AccuracyReport for unit tests
                  to verify that performance is adequate
  :param backprop_through_attack: If True, backprop through adversarial
                                  example construction process during
                                  adversarial training.
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get CIFAR10 data
    data = CIFAR10(train_start=train_start,
                   train_end=train_end,
                   test_start=test_start,
                   test_end=test_end)
    dataset_size = data.x_train.shape[0]
    dataset_train = data.to_tensorflow()[0]
    dataset_train = dataset_train.map(
        lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
    dataset_train = dataset_train.batch(batch_size)
    dataset_train = dataset_train.prefetch(16)
    x_train, y_train = data.get_set('train')
    x_test, y_test = data.get_set('test')

    # Use Image Parameters
    img_rows, img_cols, nchannels = x_test.shape[1:4]
    nb_classes = y_test.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'filename': os.path.split(model_path)[-1]
    }
    eval_params = {'batch_size': batch_size}
    rng = np.random.RandomState([2017, 8, 30])

    def do_eval(preds, x_set, y_set, report_key, is_adv=None):
        acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
        setattr(report, report_key, acc)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        if report_text:
            print('Test accuracy on %s examples: %0.4f' % (report_text, acc))

    if clean_train:
        print('start')
        #model = CNN('model1', nb_classes, isL2 = True)
        model = make_wresnet(scope='model1')
        preds = model.get_logits(x)
        loss = CrossEntropy(model, smoothing=label_smoothing)
        tf_model_load(
            sess,
            '/nfs/nas4/data-hanwei/data-hanwei/DATA/models/wresnet/cifar1')

        def evaluate():
            do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False)

        optimizer = tf.train.MomentumOptimizer(learning_rate=0.0008,
                                               momentum=0)
        #optimizer = tf.train.MomentumOptimizer(learning_rate=0.0008,momentum=0.9)
        #optimizer = tf.train.MomentumOptimizer(learning_rate=0.001,momentum=0.9)
        train(sess,
              x,
              y,
              model,
              None,
              None,
              dataset_train=dataset_train,
              dataset_size=dataset_size,
              evaluate=evaluate,
              args=train_params,
              rng=rng,
              var_list=model.get_params(),
              optimizer=optimizer)
        saver = tf.train.Saver()
        saver.save(sess, model_path)

        # Calculate training error
        if testing:
            do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval')

    return report
示例#22
0
def train_deflecting(dataset_name=DATASET, train_start=0, train_end=TRAIN_END, test_start=0,
                     test_end=TEST_END, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE,
                     num_capsules_output=NUM_CAPSULES_OUTPUT,
                     output_atoms = OUTPUT_ATOMS,
                     num_routing = NUM_ROUTING,
                     learning_rate=LEARNING_RATE,
                     nb_filters=NB_FILTERS, num_threads=None):
  """
  SVHN cleverhans tutorial to train a deflecting model based on CapsLayer
  :dataset_name: SVHN or CIFAR10
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param num_capsules_output: number of output capsules
  :param output_atoms: size of each capsule vector
  :param num_routing: number of routings in capsule layer
  :param learning_rate: learning rate for training

  """

  # Set TF random seed to improve reproducibility
  tf.set_random_seed(1234)

  # Create TF session
  if num_threads:
    config_args = dict(intra_op_parallelism_threads=1)
  else:
    config_args = {}
  sess = tf.Session(config=tf.ConfigProto(**config_args))

  # Get svhn data
  if dataset_name == "SVHN": 
    data = SVHN(train_start=train_start, train_end=train_end,
                 test_start=test_start, test_end=test_end)
  elif dataset_name == "CIFAR10":
    data = CIFAR10(train_start=train_start, train_end=train_end,
                 test_start=test_start, test_end=test_end)
  dataset_size = data.x_train.shape[0]
  dataset_train = data.to_tensorflow()[0]
  
  if dataset_name == "SVHN": 
    dataset_train = dataset_train.map(lambda x, y: (random_shift((x)), y), 4)
  elif dataset_name == "CIFAR10":
    dataset_train = dataset_train.map(
      lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
    
  dataset_train = dataset_train.batch(batch_size)
  dataset_train = dataset_train.prefetch(16)
  x_train, y_train = data.get_set('train')  
  x_test, y_test = data.get_set('test')
  
  # Use Image Parameters
  img_rows, img_cols, nchannels = x_test.shape[1:4]
  nb_classes = y_test.shape[1]

  # Define input TF placeholder
  x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                        nchannels))
  y = tf.placeholder(tf.float32, shape=(None, nb_classes))


  train_params = {
      'nb_epochs': nb_epochs,
      'batch_size': batch_size,
      'learning_rate': learning_rate
  }
  eval_params = {'batch_size': batch_size}
  
  rng = np.random.RandomState([2017, 8, 30])

    
  model = CapsNetRecons(dataset_name, nb_classes, nb_filters, input_shape=[batch_size, img_rows, img_cols, nchannels], num_capsules_output=num_capsules_output, output_atoms=output_atoms, num_routing=num_routing)
  var_lists = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=dataset_name)
  
  preds = model.get_logits(x)   
  loss = MarginCycLoss(model)

  def evaluate():
    acc = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params) 
    print('Test accuracy on %s examples: %0.4f' % ("clean", acc))
    return acc
 
  train(sess, loss, None, None,
        dataset_train=dataset_train, dataset_size=dataset_size,
        evaluate=evaluate, args=train_params, rng=rng,
        var_list=var_lists)  
def cifar10_tutorial_bim(train_start=0,
                         train_end=60000,
                         test_start=0,
                         test_end=10000,
                         viz_enabled=VIZ_ENABLED,
                         nb_epochs=NB_EPOCHS,
                         batch_size=BATCH_SIZE,
                         source_samples=SOURCE_SAMPLES,
                         learning_rate=LEARNING_RATE,
                         attack_iterations=ATTACK_ITERATIONS,
                         model_path=MODEL_PATH,
                         targeted=TARGETED,
                         noise_output=NOISE_OUTPUT):
    """
  CIFAR10 tutorial for Basic Iterative Method's attack
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param viz_enabled: (boolean) activate plots of adversarial examples
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param nb_classes: number of output classes
  :param source_samples: number of test inputs to attack
  :param learning_rate: learning rate for training
  :param model_path: path to the model file
  :param targeted: should we run a targeted attack? or untargeted?
  :return: an AccuracyReport object
  """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get CIFAR10 test data
    cifar10 = CIFAR10(train_start=train_start,
                      train_end=train_end,
                      test_start=test_start,
                      test_end=test_end)
    x_train, y_train = cifar10.get_set('train')
    x_test, y_test = cifar10.get_set('test')

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    nb_filters = 64

    # Define TF model graph
    model = ModelAllConvolutional('model1',
                                  nb_classes,
                                  nb_filters,
                                  input_shape=[32, 32, 3])
    preds = model.get_logits(x)
    loss = CrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an CIFAR10 model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'filename': os.path.split(model_path)[-1]
    }

    rng = np.random.RandomState([2017, 8, 30])
    # check if we've trained before, and if we have, use that pre-trained model
    if os.path.exists(model_path + ".meta"):
        tf_model_load(sess, model_path)
    else:
        train(sess, loss, x_train, y_train, args=train_params, rng=rng)
        saver = tf.train.Saver()
        saver.save(sess, model_path)

    # Evaluate the accuracy of the CIFAR10 model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using Basic Iterative Method's approach
    ###########################################################################
    nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
    print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
          ' adversarial examples')
    print("This could take some time ...")

    # Instantiate a BIM attack object
    bim = BasicIterativeMethod(model, sess=sess)

    if viz_enabled:
        assert source_samples == nb_classes
        idxs = [
            np.where(np.argmax(y_test, axis=1) == i)[0][0]
            for i in range(nb_classes)
        ]
    if targeted:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, 1, img_rows, img_cols, nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = np.array([[instance] * nb_classes
                                   for instance in x_test[idxs]],
                                  dtype=np.float32)
        else:
            adv_inputs = np.array([[instance] * nb_classes
                                   for instance in x_test[:source_samples]],
                                  dtype=np.float32)

        one_hot = np.zeros((nb_classes, nb_classes))
        one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

        adv_inputs = adv_inputs.reshape(
            (source_samples * nb_classes, img_rows, img_cols, nchannels))
        adv_ys = np.array([one_hot] * source_samples,
                          dtype=np.float32).reshape(
                              (source_samples * nb_classes, nb_classes))
    else:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, nb_classes, img_rows, img_cols,
                          nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = x_test[idxs]
        else:
            adv_inputs = x_test[:source_samples]

        adv_ys = None

    bim_params = {
        'eps': 0.3,
        'clip_min': 0.,
        'clip_max': 1.,
        'nb_iter': 50,
        'eps_iter': .01
    }

    adv = bim.generate_np(adv_inputs, **bim_params)

    eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
    if targeted:
        adv_accuracy = model_eval(sess,
                                  x,
                                  y,
                                  preds,
                                  adv,
                                  adv_ys,
                                  args=eval_params)
    else:
        if viz_enabled:
            err = model_eval(sess,
                             x,
                             y,
                             preds,
                             adv,
                             y_test[idxs],
                             args=eval_params)
            adv_accuracy = 1 - err
        else:
            err = model_eval(sess,
                             x,
                             y,
                             preds,
                             adv,
                             y_test[:source_samples],
                             args=eval_params)
            adv_accuracy = 1 - err

    if viz_enabled:
        for i in range(nb_classes):
            if noise_output:
                image = adv[i * nb_classes] - adv_inputs[i * nb_classes]
            else:
                image = adv[i * nb_classes]
            grid_viz_data[i, 0] = image

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy))
    report.clean_train_adv_eval = 1. - adv_accuracy

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(
        np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

    # Close TF session
    sess.close()

    def save_visual(data, path):
        """
    Modified version of cleverhans.plot.pyplot
    """
        figure = plt.figure()
        # figure.canvas.set_window_title('Cleverhans: Grid Visualization')

        # Add the images to the plot
        num_cols = data.shape[0]
        num_rows = data.shape[1]
        num_channels = data.shape[4]
        for y in range(num_rows):
            for x in range(num_cols):
                figure.add_subplot(num_rows, num_cols,
                                   (x + 1) + (y * num_cols))
                plt.axis('off')

                if num_channels == 1:
                    plt.imshow(data[x, y, :, :, 0], cmap='gray')
                else:
                    plt.imshow(data[x, y, :, :, :])

        # Draw the plot and return
        plt.savefig(path)
        return figure

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        if noise_output:
            image_name = "output/bim_cifar10_noise.png"
        else:
            image_name = "output/bim_cifar10.png"
        _ = save_visual(grid_viz_data, image_name)

    return report
示例#24
0
def cifar10_tutorial(train_start=0,
                     train_end=60000,
                     test_start=0,
                     test_end=10000,
                     nb_epochs=NB_EPOCHS,
                     batch_size=BATCH_SIZE,
                     learning_rate=LEARNING_RATE,
                     clean_train=CLEAN_TRAIN,
                     testing=False,
                     backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                     nb_filters=NB_FILTERS,
                     num_threads=None,
                     label_smoothing=0.1,
                     retrain=False):
    """
  CIFAR10 cleverhans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param clean_train: perform normal training on clean examples only
                      before performing adversarial training.
  :param testing: if true, complete an AccuracyReport for unit tests
                  to verify that performance is adequate
  :param backprop_through_attack: If True, backprop through adversarial
                                  example construction process during
                                  adversarial training.
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get CIFAR10 data
    data = CIFAR10(train_start=train_start,
                   train_end=train_end,
                   test_start=test_start,
                   test_end=test_end)
    dataset_size = data.x_train.shape[0]
    dataset_train = data.to_tensorflow()[0]
    dataset_train = dataset_train.map(
        lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
    dataset_train = dataset_train.batch(batch_size)
    dataset_train = dataset_train.prefetch(16)
    x_train, y_train = data.get_set('train')
    x_test, y_test = data.get_set('test')

    # start = 6
    # end = 10
    # x_test = x_test[start:end]
    # y_test = y_test[start:end]

    ###########################
    # Adjust hue / saturation #
    ###########################
    # hueValue = 0.9
    # saturationValue = 0.9
    # tf_x_test = tf.image.adjust_saturation(tf.image.adjust_hue(x_test, saturationValue), hueValue)
    # tf_x_test = tf.image.adjust_saturation(tx_test, hueValue)
    # x_test = sess.run(tf_x_test)

    ###############################
    # Transform image to uniimage #
    ###############################
    # x_train = convert_uniimage(x_train)

    # Use Image Parameters
    img_rows, img_cols, nchannels = x_test.shape[1:4]
    nb_classes = y_test.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': save_dir,
        'filename': filename,
    }
    eval_params = {'batch_size': batch_size}
    fgsm_params = {'eps': 8 / 255, 'clip_min': 0., 'clip_max': 1.}
    rng = np.random.RandomState([2017, 8, 30])

    def do_eval(preds,
                x_set,
                y_set,
                report_key,
                is_adv=None,
                ae=None,
                type=None,
                datasetName=None,
                discretizeColor=1):
        accuracy, distortion = model_eval(sess,
                                          x,
                                          y,
                                          preds,
                                          x_set,
                                          y_set,
                                          args=eval_params,
                                          is_adv=is_adv,
                                          ae=ae,
                                          type=type,
                                          datasetName=datasetName,
                                          discretizeColor=discretizeColor)
        setattr(report, report_key, accuracy)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        if report_text:
            print('Test accuracy on %s examples: %0.4f' %
                  (report_text, accuracy))

        return accuracy, distortion

    if clean_train:
        model = ModelAllConvolutional('model1',
                                      nb_classes,
                                      nb_filters,
                                      input_shape=[32, 32, 3])
        # model = UIPModel('model1', nb_classes, nb_filters, input_shape=[32, 32, 3])
        preds = model.get_logits(x)
        loss = CrossEntropy(model, smoothing=label_smoothing)

        def evaluate():
            do_eval(preds,
                    x_test,
                    y_test,
                    'clean_train_clean_eval',
                    False,
                    type=type,
                    datasetName="CIFAR10",
                    discretizeColor=discretizeColor)

        # train(sess, loss, None, None,
        #       dataset_train=dataset_train, dataset_size=dataset_size,
        #       evaluate=evaluate, args=train_params, rng=rng,
        #       var_list=model.get_params(), save=save)

        saveFileNumArr = []
        # saveFileNumArr = [50, 500, 1000]

        count = 0
        appendNum = 1000
        while count < 1000:
            count = count + appendNum
            saveFileNumArr.append(count)

        distortionArr = []
        accuracyArr = []
        for i in range(len(saveFileNumArr)):
            saveFileNum = saveFileNumArr[i]
            model_path = os.path.join(save_dir,
                                      filename + "-" + str(saveFileNum))

            print("Trying to load trained model from: " + model_path)
            if os.path.exists(model_path + ".meta"):
                tf_model_load(sess, model_path)
                print("Load trained model")
            else:
                train_with_noise(sess,
                                 loss,
                                 x_train,
                                 y_train,
                                 evaluate=evaluate,
                                 args=train_params,
                                 rng=rng,
                                 var_list=model.get_params(),
                                 save=save,
                                 type=type,
                                 datasetName="CIFAR10",
                                 retrain=retrain,
                                 discretizeColor=discretizeColor)
                retrain = False

            ##########################################
            # Generate semantic adversarial examples #
            ##########################################
            adv_x, y_test2 = color_shift_attack(sess,
                                                x,
                                                y,
                                                np.copy(x_test),
                                                np.copy(y_test),
                                                preds,
                                                args=eval_params,
                                                num_trials=num_trials)
            x_test2 = adv_x
            # convert_uniimage(np.copy(x_test2), np.copy(x_test), discretizeColor)
            accuracy, distortion = do_eval(preds,
                                           np.copy(x_test2),
                                           np.copy(y_test2),
                                           'clean_train_clean_eval',
                                           False,
                                           type=type,
                                           datasetName="CIFAR10",
                                           discretizeColor=discretizeColor)

            # accuracy, distortion = do_eval(preds, np.copy(x_test), np.copy(y_test), 'clean_train_clean_eval', False, type=type,
            #                                datasetName="CIFAR10", discretizeColor=discretizeColor)

            # # Initialize the Fast Gradient Sign Method (FGSM) attack object and
            # # graph
            # fgsm = FastGradientMethod(model, sess=sess)
            # fgsm = BasicIterativeMethod(model, sess=sess)
            # fgsm = MomentumIterativeMethod(model, sess=sess)
            # adv_x = fgsm.generate(x, **fgsm_params)
            # preds_adv = model.get_logits(adv_x)

            # Evaluate the accuracy of the MNIST model on adversarial examples
            # accuracy, distortion = do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True, type=type)
            # accuracy, distortion = do_eval(preds, x_test, y_test, 'clean_train_adv_eval', True, ae=adv_x, type=type,
            #                                datasetName="CIFAR10", discretizeColor=discretizeColor)

            distortionArr.append(distortion)
            accuracyArr.append(accuracy)
            print(str(accuracy))
            print(str(distortion))

        print("accuracy:")
        for accuracy in accuracyArr:
            print(accuracy)

        print("distortion:")
        for distortion in distortionArr:
            print(distortion)

        # print("hue "+str(hueValue))

    return report
示例#25
0
def cifar10_tutorial(train_start=0,
                     train_end=60000,
                     test_start=0,
                     test_end=10000,
                     nb_epochs=NB_EPOCHS,
                     batch_size=BATCH_SIZE,
                     learning_rate=LEARNING_RATE,
                     clean_train=CLEAN_TRAIN,
                     testing=False,
                     backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                     nb_filters=NB_FILTERS,
                     num_threads=None,
                     label_smoothing=0.1,
                     adversarial_training=ADVERSARIAL_TRAINING):
    """
  CIFAR10 cleverhans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param clean_train: perform normal training on clean examples only
                      before performing adversarial training.
  :param testing: if true, complete an AccuracyReport for unit tests
                  to verify that performance is adequate
  :param backprop_through_attack: If True, backprop through adversarial
                                  example construction process during
                                  adversarial training.
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :param adversarial_training: True means using adversarial training
  :return: an AccuracyReport object
  """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        # put data on cpu and gpu both
        config_args = dict(allow_soft_placement=True)
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get CIFAR10 data
    data = CIFAR10(train_start=train_start,
                   train_end=train_end,
                   test_start=test_start,
                   test_end=test_end)
    dataset_size = data.x_train.shape[0]
    dataset_train = data.to_tensorflow()[0]
    dataset_train = dataset_train.map(
        lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
    dataset_train = dataset_train.batch(batch_size)
    dataset_train = dataset_train.prefetch(16)
    x_train, y_train = data.get_set('train')
    x_test, y_test = data.get_set('test')

    # Use Image Parameters
    img_rows, img_cols, nchannels = x_test.shape[1:4]
    nb_classes = y_test.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    eval_params = {'batch_size': batch_size}
    bim_params = {
        'eps': 0.5,
        'clip_min': 0.,
        'eps_iter': 0.002,
        'nb_iter': 10,
        'clip_max': 1.,
        'ord': np.inf
    }
    rng = np.random.RandomState([2017, 8, 30])

    def do_eval(preds, x_set, y_set, report_key, is_adv=None):
        acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
        setattr(report, report_key, acc)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        if report_text:
            print('Test accuracy on %s examples: %0.4f' % (report_text, acc))

    if clean_train:
        model = ModelAllConvolutional('model1',
                                      nb_classes,
                                      nb_filters,
                                      input_shape=[32, 32, 3])

        preds = model.get_logits(x)
        loss = CrossEntropy(model, smoothing=label_smoothing)

        def evaluate():
            do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False)

        """
    when training, evaluating can be happened
    """
        train(sess,
              loss,
              None,
              None,
              dataset_train=dataset_train,
              dataset_size=dataset_size,
              evaluate=evaluate,
              args=train_params,
              rng=rng,
              var_list=model.get_params())
        # save model

        # Calculate training error
        if testing:
            do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval')
        # Initialize the Basic Iterative Method (BIM) attack object and
        # graph
        for i in range(20):
            bim = BasicIterativeMethod(model, sess=sess)
            adv_x = bim.generate(x, **bim_params)
            preds_adv = model.get_logits(adv_x)
            # Evaluate the accuracy of the MNIST model on adversarial examples
            print("eps:%0.2f" %
                  (bim_params["eps_iter"] * bim_params['nb_iter']))
            do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True)
            bim_params["eps_iter"] = bim_params["eps_iter"] + 0.002

        # Calculate training error
        if testing:
            do_eval(preds_adv, x_train, y_train, 'train_clean_train_adv_eval')

    if not adversarial_training:
        return report

    print('Repeating the process, using adversarial training')

    # Create a new model and train it to be robust to BasicIterativeMethod
    model2 = ModelAllConvolutional('model2',
                                   nb_classes,
                                   nb_filters,
                                   input_shape=[32, 32, 3])
    bim2 = BasicIterativeMethod(model2, sess=sess)

    def attack(x):
        return bim2.generate(x, **bim_params)

    # add attack to loss
    loss2 = CrossEntropy(model2, smoothing=label_smoothing, attack=attack)
    preds2 = model2.get_logits(x)
    adv_x2 = attack(x)

    if not backprop_through_attack:
        # For the fgsm attack used in this tutorial, the attack has zero
        # gradient so enabling this flag does not change the gradient.
        # For some other attacks, enabling this flag increases the cost of
        # training, but gives the defender the ability to anticipate how
        # the attacker will change their strategy in response to updates to
        # the defender's parameters.
        adv_x2 = tf.stop_gradient(adv_x2)
    preds2_adv = model2.get_logits(adv_x2)

    def evaluate2():
        # Accuracy of adversarially trained model on legitimate test inputs
        do_eval(preds2, x_test, y_test, 'adv_train_clean_eval', False)
        # Accuracy of the adversarially trained model on adversarial examples
        do_eval(preds2_adv, x_test, y_test, 'adv_train_adv_eval', True)

    # Perform and evaluate adversarial training
    train(sess,
          loss2,
          None,
          None,
          dataset_train=dataset_train,
          dataset_size=dataset_size,
          evaluate=evaluate2,
          args=train_params,
          rng=rng,
          var_list=model2.get_params())

    # Calculate training errors
    if testing:
        do_eval(preds2, x_train, y_train, 'train_adv_train_clean_eval')
        do_eval(preds2_adv, x_train, y_train, 'train_adv_train_adv_eval')

    return report
示例#26
0
def cifar10_tutorial_jsma(train_start=0,
                          train_end=60000,
                          test_start=0,
                          test_end=10000,
                          viz_enabled=VIZ_ENABLED,
                          nb_epochs=NB_EPOCHS,
                          batch_size=BATCH_SIZE,
                          source_samples=SOURCE_SAMPLES,
                          learning_rate=LEARNING_RATE,
                          model_path=MODEL_PATH,
                          noise_output=NOISE_OUTPUT):
    """
  CIFAR10 tutorial for the Jacobian-based saliency map approach (JSMA)
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param viz_enabled: (boolean) activate plots of adversarial examples
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param nb_classes: number of output classes
  :param source_samples: number of test inputs to attack
  :param learning_rate: learning rate for training
  :return: an AccuracyReport object
  """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get CIFAR10 test data
    cifar10 = CIFAR10(train_start=train_start,
                      train_end=train_end,
                      test_start=test_start,
                      test_end=test_end)
    x_train, y_train = cifar10.get_set('train')
    x_test, y_test = cifar10.get_set('test')

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    nb_filters = 64
    # Define TF model graph
    model = ModelAllConvolutional('model1',
                                  nb_classes,
                                  nb_filters,
                                  input_shape=[32, 32, 3])
    preds = model.get_logits(x)
    loss = CrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an CIFAR10 model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'filename': os.path.split(model_path)[-1]
    }
    sess.run(tf.global_variables_initializer())
    rng = np.random.RandomState([2017, 8, 30])
    train(sess, loss, x_train, y_train, args=train_params, rng=rng)

    # Evaluate the accuracy of the CIFAR10 model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using the Jacobian-based saliency map approach
    ###########################################################################
    print('Crafting ' + str(source_samples) + ' * ' + str(nb_classes - 1) +
          ' adversarial examples')

    # Keep track of success (adversarial example classified in target)
    results = np.zeros((nb_classes, source_samples), dtype='i')

    # Rate of perturbed features for each test set example and target class
    perturbations = np.zeros((nb_classes, source_samples), dtype='f')

    # Initialize our array for grid visualization
    grid_shape = (nb_classes, 1, img_rows, img_cols, nchannels)
    grid_viz_data = np.zeros(grid_shape, dtype='f')

    # Instantiate a SaliencyMapMethod attack object
    jsma = SaliencyMapMethod(model, sess=sess)
    jsma_params = {
        'theta': 1.,
        'gamma': 0.1,
        'clip_min': 0.,
        'clip_max': 1.,
        'y_target': None
    }
    # Loop over the samples we want to perturb into adversarial examples
    adv_all = np.zeros((nb_classes, img_rows, img_cols, nchannels), dtype='f')
    sample_all = np.zeros((nb_classes, img_rows, img_cols, nchannels),
                          dtype='f')
    for sample_ind in xrange(0, source_samples):
        print('--------------------------------------')
        print('Attacking input %i/%i' % (sample_ind + 1, source_samples))
        sample = x_test[sample_ind:(sample_ind + 1)]

        # We want to find an adversarial example for each possible target class
        # (i.e. all classes that differ from the label given in the dataset)
        current_class = int(np.argmax(y_test[sample_ind]))
        target_classes = other_classes(nb_classes, current_class)

        # For the grid visualization, keep original images along the diagonal
        # grid_viz_data[current_class, current_class, :, :, :] = np.reshape(
        #     sample, (img_rows, img_cols, nchannels))

        # Loop over all target classes
        for target in target_classes:
            print('Generating adv. example for target class %i' % target)
            # This call runs the Jacobian-based saliency map approach
            one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)
            one_hot_target[0, target] = 1
            jsma_params['y_target'] = one_hot_target
            adv_x = jsma.generate_np(sample, **jsma_params)
            adv_all[current_class] = adv_x
            sample_all[current_class] = sample

            # Check if success was achieved
            res = int(model_argmax(sess, x, preds, adv_x) == target)

            # Computer number of modified features
            adv_x_reshape = adv_x.reshape(-1)
            test_in_reshape = x_test[sample_ind].reshape(-1)
            nb_changed = np.where(adv_x_reshape != test_in_reshape)[0].shape[0]
            percent_perturb = float(nb_changed) / adv_x.reshape(-1).shape[0]
            # Display the original and adversarial images side-by-side
            # if viz_enabled:
            #   figure = pair_visual(
            #       np.reshape(sample, (img_rows, img_cols, nchannels)),
            #       np.reshape(adv_x, (img_rows, img_cols, nchannels)), figure)

            # # Add our adversarial example to our grid data
            # grid_viz_data[target, current_class, :, :, :] = np.reshape(
            #     adv_x, (img_rows, img_cols, nchannels))

            # Update the arrays for later analysis
            results[target, sample_ind] = res
            perturbations[target, sample_ind] = percent_perturb

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    nb_targets_tried = ((nb_classes - 1) * source_samples)
    succ_rate = float(np.sum(results)) / nb_targets_tried
    print('Avg. rate of successful adv. examples {0:.4f}'.format(succ_rate))
    report.clean_train_adv_eval = 1. - succ_rate

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(perturbations)
    print('Avg. rate of perturbed features {0:.4f}'.format(percent_perturbed))

    # Compute the average distortion introduced for successful samples only
    percent_perturb_succ = np.mean(perturbations * (results == 1))
    print('Avg. rate of perturbed features for successful '
          'adversarial examples {0:.4f}'.format(percent_perturb_succ))

    # Compute the average distortion introduced by the algorithm
    l2_norm = np.mean(np.sum((adv_all - sample_all)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(l2_norm))

    for i in range(nb_classes):
        if noise_output:
            image = adv_all[i] - sample_all[i]
        else:
            image = adv_all[i]
        grid_viz_data[i, 0] = image

    # Close TF session
    sess.close()

    def save_visual(data, path):
        """
    Modified version of cleverhans.plot.pyplot
    """
        import matplotlib.pyplot as plt

        figure = plt.figure()
        # figure.canvas.set_window_title('Cleverhans: Grid Visualization')

        # Add the images to the plot
        num_cols = data.shape[0]
        num_rows = data.shape[1]
        num_channels = data.shape[4]
        for y in range(num_rows):
            for x in range(num_cols):
                figure.add_subplot(num_rows, num_cols,
                                   (x + 1) + (y * num_cols))
                plt.axis('off')

                if num_channels == 1:
                    plt.imshow(data[x, y, :, :, 0], cmap='gray')
                else:
                    plt.imshow(data[x, y, :, :, :])

        # Draw the plot and return
        plt.savefig(path)

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        if noise_output:
            image_name = "output/jsma_cifar10_noise.png"
        else:
            image_name = "output/jsma_cifar10.png"
        _ = save_visual(grid_viz_data, image_name)

    return report
示例#27
0
def cifar10_tutorial(
    train_start=0,
    train_end=60000,
    test_start=0,
    test_end=10000,
    nb_epochs=NB_EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=LEARNING_RATE,
    clean_train=CLEAN_TRAIN,
    testing=False,
    backprop_through_attack=BACKPROP_THROUGH_ATTACK,
    nb_filters=NB_FILTERS,
    num_threads=None,
    label_smoothing=0.1,
):
    """
    CIFAR10 cleverhans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param clean_train: perform normal training on clean examples only
                        before performing adversarial training.
    :param testing: if true, complete an AccuracyReport for unit tests
                    to verify that performance is adequate
    :param backprop_through_attack: If True, backprop through adversarial
                                    example construction process during
                                    adversarial training.
    :param label_smoothing: float, amount of label smoothing for cross entropy
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get CIFAR10 data
    data = CIFAR10(
        train_start=train_start,
        train_end=train_end,
        test_start=test_start,
        test_end=test_end,
    )
    dataset_size = data.x_train.shape[0]
    dataset_train = data.to_tensorflow()[0]
    dataset_train = dataset_train.map(
        lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
    dataset_train = dataset_train.batch(batch_size)
    dataset_train = dataset_train.prefetch(16)
    x_train, y_train = data.get_set("train")
    x_test, y_test = data.get_set("test")

    # Use Image Parameters
    img_rows, img_cols, nchannels = x_test.shape[1:4]
    nb_classes = y_test.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Train an MNIST model
    train_params = {
        "nb_epochs": nb_epochs,
        "batch_size": batch_size,
        "learning_rate": learning_rate,
    }
    eval_params = {"batch_size": batch_size}
    fgsm_params = {"eps": 0.3, "clip_min": 0.0, "clip_max": 1.0}
    rng = np.random.RandomState([2017, 8, 30])

    def do_eval(preds, x_set, y_set, report_key, is_adv=None):
        acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
        setattr(report, report_key, acc)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = "adversarial"
        else:
            report_text = "legitimate"
        if report_text:
            print("Test accuracy on %s examples: %0.4f" % (report_text, acc))

    if clean_train:
        model = ModelAllConvolutional("model1",
                                      nb_classes,
                                      nb_filters,
                                      input_shape=[32, 32, 3])
        preds = model.get_logits(x)
        loss = CrossEntropy(model, smoothing=label_smoothing)

        def evaluate():
            do_eval(preds, x_test, y_test, "clean_train_clean_eval", False)

        train(
            sess,
            loss,
            None,
            None,
            dataset_train=dataset_train,
            dataset_size=dataset_size,
            evaluate=evaluate,
            args=train_params,
            rng=rng,
            var_list=model.get_params(),
        )

        # Calculate training error
        if testing:
            do_eval(preds, x_train, y_train, "train_clean_train_clean_eval")

        # Initialize the Fast Gradient Sign Method (FGSM) attack object and
        # graph
        fgsm = FastGradientMethod(model, sess=sess)
        adv_x = fgsm.generate(x, **fgsm_params)
        preds_adv = model.get_logits(adv_x)

        # Evaluate the accuracy of the MNIST model on adversarial examples
        do_eval(preds_adv, x_test, y_test, "clean_train_adv_eval", True)

        # Calculate training error
        if testing:
            do_eval(preds_adv, x_train, y_train, "train_clean_train_adv_eval")

        print("Repeating the process, using adversarial training")

    # Create a new model and train it to be robust to FastGradientMethod
    model2 = ModelAllConvolutional("model2",
                                   nb_classes,
                                   nb_filters,
                                   input_shape=[32, 32, 3])
    fgsm2 = FastGradientMethod(model2, sess=sess)

    def attack(x):
        return fgsm2.generate(x, **fgsm_params)

    loss2 = CrossEntropy(model2, smoothing=label_smoothing, attack=attack)
    preds2 = model2.get_logits(x)
    adv_x2 = attack(x)

    if not backprop_through_attack:
        # For the fgsm attack used in this tutorial, the attack has zero
        # gradient so enabling this flag does not change the gradient.
        # For some other attacks, enabling this flag increases the cost of
        # training, but gives the defender the ability to anticipate how
        # the atacker will change their strategy in response to updates to
        # the defender's parameters.
        adv_x2 = tf.stop_gradient(adv_x2)
    preds2_adv = model2.get_logits(adv_x2)

    def evaluate2():
        # Accuracy of adversarially trained model on legitimate test inputs
        do_eval(preds2, x_test, y_test, "adv_train_clean_eval", False)
        # Accuracy of the adversarially trained model on adversarial examples
        do_eval(preds2_adv, x_test, y_test, "adv_train_adv_eval", True)

    # Perform and evaluate adversarial training
    train(
        sess,
        loss2,
        None,
        None,
        dataset_train=dataset_train,
        dataset_size=dataset_size,
        evaluate=evaluate2,
        args=train_params,
        rng=rng,
        var_list=model2.get_params(),
    )

    # Calculate training errors
    if testing:
        do_eval(preds2, x_train, y_train, "train_adv_train_clean_eval")
        do_eval(preds2_adv, x_train, y_train, "train_adv_train_adv_eval")

    return report
示例#28
0
def cifar10_tutorial(train_start=0,
                     train_end=50000,
                     test_start=0,
                     test_end=10000,
                     nb_epochs=NB_EPOCHS,
                     batch_size=BATCH_SIZE,
                     learning_rate=LEARNING_RATE,
                     clean_train=CLEAN_TRAIN,
                     testing=False,
                     backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                     nb_filters=NB_FILTERS,
                     num_threads=None,
                     label_smoothing=0.1):
    """
  CIFAR10 cleverhans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param clean_train: perform normal training on clean examples only
                      before performing adversarial training.
  :param testing: if true, complete an AccuracyReport for unit tests
                  to verify that performance is adequate
  :param backprop_through_attack: If True, backprop through adversarial
                                  example construction process during
                                  adversarial training.
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get CIFAR10 data
    data = CIFAR10(train_start=train_start,
                   train_end=train_end,
                   test_start=test_start,
                   test_end=test_end)
    dataset_size = data.x_train.shape[0]
    dataset_train = data.to_tensorflow()[0]
    dataset_train = dataset_train.map(
        lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
    dataset_train = dataset_train.batch(batch_size)
    dataset_train = dataset_train.prefetch(16)
    x_train, y_train = data.get_set('train')
    x_test, y_test = data.get_set('test')

    # Use Image Parameters
    img_rows, img_cols, nchannels = x_test.shape[1:4]
    nb_classes = y_test.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    eval_params = {'batch_size': batch_size}
    fgsm_params = {'eps': 0.13, 'clip_min': 0., 'clip_max': 1.}
    rng = np.random.RandomState([2017, 8, 30])

    def do_eval(preds, x_set, y_set, report_key, is_adv=None):
        acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
        setattr(report, report_key, acc)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        if report_text:
            print('Test accuracy on %s examples: %0.4f' % (report_text, acc))

    model = ModelAllConvolutional('model1',
                                  nb_classes,
                                  nb_filters,
                                  input_shape=[32, 32, 3])
    preds = model.get_logits(x)

    if clean_train:
        loss = CrossEntropy(model, smoothing=label_smoothing)

        def evaluate():
            do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False)

        train(sess,
              loss,
              None,
              None,
              dataset_train=dataset_train,
              dataset_size=dataset_size,
              evaluate=evaluate,
              args=train_params,
              rng=rng,
              var_list=model.get_params())

        # save model
        #saver = tf.train.Saver()
        #saver.save(sess, "./checkpoint_dir/clean_model_100.ckpt")

        # load model and compute testing accuracy
    if testing:
        tf_model_load(sess, file_path="./checkpoint_dir/clean_model_100.ckpt")
        do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False)

    # Initialize the Fast Gradient Sign Method (FGSM) attack object and
    # graph
    fgsm = FastGradientMethod(model, sess=sess)
    adv_x = fgsm.generate(x, **fgsm_params)
    preds_adv = model.get_logits(adv_x)

    # Evaluate the accuracy of the CIFAR10 model on adversarial examples
    do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True)

    # generate and show adversarial samples
    x_test_adv = np.zeros(shape=x_test.shape)

    for i in range(10):
        x_test_adv[i * 1000:(i + 1) * 1000] = adv_x.eval(
            session=sess, feed_dict={x: x_test[i * 1000:(i + 1) * 1000]})

    # implement anisotropic diffusion on adversarial samples
    x_test_filtered = np.zeros(shape=x_test_adv.shape)
    for i in range(y_test.shape[0]):
        x_test_filtered[i] = filter.anisotropic_diffusion(x_test_adv[i])

    # implement median on adversarial samples
    # x_test_filtered_med = np.zeros(shape=x_test_adv.shape)
    # for i in range(y_test.shape[0]):
    #     x_test_filtered_med[i] = medfilt(x_test_filtered_ad[i], kernel_size=(3,3,1))

    acc = model_eval(sess,
                     x,
                     y,
                     preds,
                     x_test_filtered,
                     y_test,
                     args=eval_params)
    print("acc after anisotropic diffusion is {}".format(acc))

    return report