class AdverseCNN:
    def __init__(self, nb_epochs, batch_size, learning_rate, eps = 0.3, clip_min=0, clip_max=1):
        self.train_params = {
            'nb_epochs': nb_epochs,
            'batch_size': batch_size,
            'learning_rate': learning_rate
        }
        self.eval_params = {'batch_size': batch_size}
        self.fgsm_params = {
            'eps': eps,
            'clip_min': clip_min,
            'clip_max': clip_max
        }

        self.x_train = None
        self.y_train = None
        self.x_test = None
        self.y_test = None
        self.range = np.random.RandomState([2019, 11, 25])
        self.model = None
        self.preds = None
        self.loss = None
        self.img_rows = None
        self.img_cols = None
        self.nchannels = None
        self.nb_classes = None
        self.preds_adv = None

    def get_data(self, train_start, train_end, test_start, test_end):
        mnist = MNIST(train_start=train_start, train_end=train_end,
                      test_start=test_start, test_end=test_end)
        self.x_train, self.y_train = mnist.get_set('train')
        self.x_test, self.y_test = mnist.get_set('test')
        self.img_rows, self.img_cols, self.nchannels = self.x_train.shape[1:4]
        self.nb_classes = self.y_train.shape[1]

    def adverse_train(self, nb_filters, label_smoothing):
        self.model = ModelBasicCNN('model2', self.nb_classes, nb_filters)
        fgsm = FastGradientMethod(self.model, sess=sess)

        def attack(x):
            return fgsm.generate(x, **self.fgsm_params)

        self.preds = self.model.get_logits(x)
        self.loss = CrossEntropy(self.model, smoothing=label_smoothing, attack=attack)

        adv_x = attack(x)
        self.preds_adv = self.model.get_logits(adv_x)

        train(sess, self.loss, self.x_train, self.y_train, evaluate=self.evaluate,
              args=self.train_params, rng=self.range, var_list=self.model.get_params())

    def evaluate(self):
        do_eval(self.preds, self.eval_params, self.x_test, self.y_test, 'adv_train_clean_eval', False)
        do_eval(self.preds_adv, self.eval_params, self.x_test, self.y_test, 'adv_train_adv_eval', True)

    def test(self):
        do_eval(self.preds, self.eval_params, self.x_train, self.y_train, 'train_adv_train_clean_eval')
        do_eval(self.preds_adv, self.eval_params, self.x_train, self.y_train, 'train_adv_train_adv_eval')
Exemplo n.º 2
0
def mnist_tutorial(train_start=0,
                   train_end=60000,
                   test_start=0,
                   test_end=10000,
                   nb_epochs=NB_EPOCHS,
                   batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE,
                   clean_train=CLEAN_TRAIN,
                   testing=False,
                   backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                   nb_filters=NB_FILTERS,
                   num_threads=None,
                   label_smoothing=0.1):
    """
  MNIST cleverhans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param clean_train: perform normal training on clean examples only
                      before performing adversarial training.
  :param testing: if true, complete an AccuracyReport for unit tests
                  to verify that performance is adequate
  :param backprop_through_attack: If True, backprop through adversarial
                                  example construction process during
                                  adversarial training.
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {'allow_soft_placement': True}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get MNIST data
    mnist = MNIST(train_start=train_start,
                  train_end=train_end,
                  test_start=test_start,
                  test_end=test_end)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')

    # Use Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    eval_params = {'batch_size': batch_size}
    fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
    rng = np.random.RandomState([2017, 8, 30])

    def do_eval(preds, x_set, y_set, report_key, is_adv=None):
        acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
        setattr(report, report_key, acc)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        if report_text:
            print('Test accuracy on %s examples: %0.4f' % (report_text, acc))

    if clean_train:
        model = ModelBasicCNN('model1', nb_classes, nb_filters)
        preds = model.get_logits(x)
        loss = CrossEntropy(model, smoothing=label_smoothing)

        def evaluate():
            do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False)

        train(sess,
              loss,
              x_train,
              y_train,
              evaluate=evaluate,
              args=train_params,
              rng=rng,
              var_list=model.get_params())

        # Calculate training error
        if testing:
            do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval')

        # Initialize the Fast Gradient Sign Method (FGSM) attack object and
        # graph
        fgsm = FastGradientMethod(model, sess=sess)
        adv_fgsm_x = fgsm.generate(x, **fgsm_params)
        preds_adv_fgsm = model.get_logits(adv_fgsm_x)

        # Generate fgsm adversarial examples and save to disk
        dir = 'images/fgsm_adv/'
        if not os.path.exists('images'):
            os.mkdir('images')
        if not os.path.exists(dir):
            os.mkdir(dir)
        if not os.path.exists(dir + 'train/'):
            os.mkdir(dir + 'train/')
        if not os.path.exists(dir + 'test/'):
            os.mkdir(dir + 'test/')
        for index in range(len(y_test)):
            print('test ' + str(index))
            x_ = x_test[index]
            label = np.argmax(y_test[index])
            raw_data = (fgsm.generate_np(x_.reshape(
                (1, 28, 28, 1)), **fgsm_params).reshape(
                    (28, 28)) * 255).astype('uint8')
            im = Image.fromarray(raw_data, mode='P')
            im.save(dir + 'test/' + str(label) + '_' + str(uuid.uuid4()) +
                    '.png')
        for index in range(len(y_train)):
            print('train ' + str(index))
            x_ = x_train[index]
            label = np.argmax(y_train[index])
            raw_data = (fgsm.generate_np(x_.reshape(
                (1, 28, 28, 1)), **fgsm_params).reshape(
                    (28, 28)) * 255).astype('uint8')
            im = Image.fromarray(raw_data, mode='P')
            im.save(dir + 'train/' + str(label) + '_' + str(uuid.uuid4()) +
                    '.png')

    return report
Exemplo n.º 3
0
def mnist_tutorial(train_start=0,
                   train_end=60000,
                   test_start=0,
                   test_end=10000,
                   nb_epochs=NB_EPOCHS,
                   batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE,
                   clean_train=CLEAN_TRAIN,
                   testing=False,
                   backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                   nb_filters=NB_FILTERS,
                   num_threads=None,
                   label_smoothing=0.1):
    """
  MNIST cleverhans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param clean_train: perform normal training on clean examples only
                      before performing adversarial training.
  :param testing: if true, complete an AccuracyReport for unit tests
                  to verify that performance is adequate
  :param backprop_through_attack: If True, backprop through adversarial
                                  example construction process during
                                  adversarial training.
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get MNIST data
    mnist = MNIST(train_start=train_start,
                  train_end=train_end,
                  test_start=test_start,
                  test_end=test_end)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')

    # Use Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    eval_params = {'batch_size': batch_size}
    fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
    rng = np.random.RandomState([2017, 8, 30])

    def do_eval(preds, x_set, y_set, report_key, is_adv=None):
        acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
        setattr(report, report_key, acc)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        if report_text:
            print('Test accuracy on %s examples: %0.4f' % (report_text, acc))

    if clean_train:
        model = ModelBasicCNN('model1', nb_classes, nb_filters)
        preds = model.get_logits(x)
        loss = CrossEntropy(model, smoothing=label_smoothing)

        def evaluate():
            do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False)

        train(sess,
              loss,
              x_train,
              y_train,
              evaluate=evaluate,
              args=train_params,
              rng=rng,
              var_list=model.get_params())

        # Calculate training error
        if testing:
            do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval')

        # Initialize the Fast Gradient Sign Method (FGSM) attack object and
        # graph
        fgsm = FastGradientMethod(model, sess=sess)
        adv_x = fgsm.generate(x, **fgsm_params)
        preds_adv = model.get_logits(adv_x)

        # Evaluate the accuracy of the MNIST model on adversarial examples
        do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True)

        # Calculate training error
        if testing:
            do_eval(preds_adv, x_train, y_train, 'train_clean_train_adv_eval')

        print('Repeating the process, using adversarial training')

    # Create a new model and train it to be robust to FastGradientMethod
    model2 = ModelBasicCNN('model2', nb_classes, nb_filters)
    fgsm2 = FastGradientMethod(model2, sess=sess)

    def attack(x):
        return fgsm2.generate(x, **fgsm_params)

    loss2 = CrossEntropy(model2, smoothing=label_smoothing, attack=attack)
    preds2 = model2.get_logits(x)
    adv_x2 = attack(x)

    if not backprop_through_attack:
        # For the fgsm attack used in this tutorial, the attack has zero
        # gradient so enabling this flag does not change the gradient.
        # For some other attacks, enabling this flag increases the cost of
        # training, but gives the defender the ability to anticipate how
        # the atacker will change their strategy in response to updates to
        # the defender's parameters.
        adv_x2 = tf.stop_gradient(adv_x2)
    preds2_adv = model2.get_logits(adv_x2)

    def evaluate2():
        # Accuracy of adversarially trained model on legitimate test inputs
        do_eval(preds2, x_test, y_test, 'adv_train_clean_eval', False)
        # Accuracy of the adversarially trained model on adversarial examples
        do_eval(preds2_adv, x_test, y_test, 'adv_train_adv_eval', True)

    # Perform and evaluate adversarial training
    train(sess,
          loss2,
          x_train,
          y_train,
          evaluate=evaluate2,
          args=train_params,
          rng=rng,
          var_list=model2.get_params())

    # Calculate training errors
    if testing:
        do_eval(preds2, x_train, y_train, 'train_adv_train_clean_eval')
        do_eval(preds2_adv, x_train, y_train, 'train_adv_train_adv_eval')

    return report
def mnist_tutorial(train_start=0,
                   train_end=60000,
                   test_start=0,
                   test_end=10000,
                   nb_epochs=NB_EPOCHS,
                   batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE,
                   clean_train=CLEAN_TRAIN,
                   testing=False,
                   backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                   nb_filters=NB_FILTERS,
                   num_threads=None,
                   label_smoothing=0.1):
    """
  MNIST cleverhans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param clean_train: perform normal training on clean examples only
                      before performing adversarial training.
  :param testing: if true, complete an AccuracyReport for unit tests
                  to verify that performance is adequate
  :param backprop_through_attack: If True, backprop through adversarial
                                  example construction process during
                                  adversarial training.
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get MNIST data
    mnist = MNIST(path=file,
                  train_start=train_start,
                  train_end=train_end,
                  test_start=test_start,
                  test_end=test_end)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')

    # x_train = x_train[0:1].reshape(784)
    # k = np.unique(x_train.reshape(-1, 784))
    # k = list(set(x_train.reshape(784)))
    # nk = [k.index(x_train[x]) for x in len(x_train)]
    # print(k, np.shape(k), nk)

    ###############################
    # Transform image to uniimage #
    ###############################
    # x_train = convert_uniimage(x_train)
    # x_test = transform_4_in_1(x_test)
    # trans_x_text = np.copy(x_test)
    # x_test = convert_uniimage(x_test)
    # uni_x_test = np.copy(x_test)

    # Use Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': save_dir,
        'filename': filename,
    }
    eval_params = {'batch_size': batch_size}
    fgsm_params = {'eps': eps, 'clip_min': 0., 'clip_max': 1.}
    rng = np.random.RandomState([2017, 8, 30])

    def do_eval(preds,
                x_set,
                y_set,
                report_key,
                is_adv=None,
                ae=None,
                type=None,
                datasetName=None,
                discretizeColor=1):
        accuracy, distortion = model_eval(sess,
                                          x,
                                          y,
                                          preds,
                                          x_set,
                                          y_set,
                                          args=eval_params,
                                          is_adv=is_adv,
                                          ae=ae,
                                          type=type,
                                          datasetName=datasetName,
                                          discretizeColor=discretizeColor)
        setattr(report, report_key, accuracy)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'

        if report_text:
            print('Test accuracy on %s examples: %0.4f' %
                  (report_text, accuracy))

        return accuracy, distortion

    if clean_train:
        model = ModelBasicCNN('model1', nb_classes, nb_filters)
        preds = model.get_logits(x)
        loss = CrossEntropy(model, smoothing=label_smoothing)

        def evaluate():
            do_eval(preds,
                    x_test,
                    y_test,
                    'clean_train_clean_eval',
                    False,
                    type=type,
                    datasetName="MNIST",
                    discretizeColor=discretizeColor)

        saveFileNumArr = []
        # saveFileNumArr = [50, 500, 1000]

        count = 0
        appendNum = 50
        while count < 1000:
            count = count + appendNum
            saveFileNumArr.append(count)

        distortionArr = []
        accuracyArr = []
        for i in range(len(saveFileNumArr)):
            saveFileNum = saveFileNumArr[i]
            model_path = os.path.join(save_dir,
                                      filename + "-" + str(saveFileNum))
            print("Trying to load trained model from: " + model_path)
            if os.path.exists(model_path + ".meta"):
                tf_model_load(sess, model_path)
                print("Load trained model")
            else:
                # train(sess, loss, x_train, y_train, evaluate=evaluate,
                #       args=train_params, rng=rng, var_list=model.get_params())
                train_with_noise(sess,
                                 loss,
                                 x_train,
                                 y_train,
                                 evaluate=evaluate,
                                 args=train_params,
                                 rng=rng,
                                 var_list=model.get_params(),
                                 save=save,
                                 type=type,
                                 datasetName="MNIST",
                                 retrain=retrain,
                                 discretizeColor=discretizeColor)

            # Calculate training error
            accuracy, distortion = do_eval(preds,
                                           x_test,
                                           y_test,
                                           'train_clean_train_clean_eval',
                                           False,
                                           type=type,
                                           datasetName="MNIST",
                                           discretizeColor=discretizeColor)

            # Initialize the Fast Gradient Sign Method (FGSM) attack object and
            # graph
            fgsm = FastGradientMethod(model, sess=sess)
            # fgsm = BasicIterativeMethod(model, sess=sess)
            # fgsm = MomentumIterativeMethod(model, sess=sess)
            # fgsm_params = {
            #   'clip_min': 0.,
            #   'clip_max': 1.,
            #   'verbose': False
            # }
            # fgsm = HopSkipJumpAttack(model, sess=sess)
            adv_x = fgsm.generate(x, **fgsm_params)
            # adv_x = fgsm.generate_np(x, **fgsm_params)
            # adv = sess.run(adv_x, feed_dict={x: x_test})
            preds_adv = model.get_logits(adv_x)
            # print(sess.run(preds_adv, feed_dict={x: x_test}))

            #############################
            # Create adversarial images #
            #############################
            # We have to produce adversarial image 1 by 1 by using HopSkipJumpAttack
            # adv_test = []
            # for i in range(len(x_test)):
            #   tmp_adv_test = sess.run(adv_x, feed_dict={x: [x_test[i]]})
            #   adv_test.append(tmp_adv_test[0])
            #   if (i+1) % 100 == 0:
            #     print((i+1),"/",len(x_test), " adversarial images")
            #
            # adv_test = np.array(adv_test)
            # print(np.shape(adv_test))

            # Evaluate the accuracy of the MNIST model on adversarial examples
            # do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True)
            # accuracy, distortion = do_eval(preds, x_test, y_test, 'clean_train_adv_eval', True, ae=adv_x,
            #                                type=type, datasetName="MNIST", discretizeColor=discretizeColor)
            # do_eval(preds, adv_test, y_test, 'clean_train_adv_eval', True)

            distortionArr.append(distortion)
            accuracyArr.append(accuracy)
            # print(str(accuracy))
            # print(str(distortion))

        print("accuracy:")
        for accuracy in accuracyArr:
            print(accuracy)

        print("distortion:")
        for distortion in distortionArr:
            print(distortion)

    return report
Exemplo n.º 5
0
class CleanCNN:

    def __init__(self, nb_epochs, batch_size, learning_rate, eps = 0.3, clip_min=0, clip_max=1):
        self.train_params = {
            'nb_epochs': nb_epochs,
            'batch_size': batch_size,
            'learning_rate': learning_rate
        }
        self.eval_params = {'batch_size': batch_size}
        self.fgsm_params = {
            'eps': eps,
            'clip_min': clip_min,
            'clip_max': clip_max
        }

        self.x_train = None
        self.y_train = None
        self.x_test = None
        self.y_test = None
        self.range = np.random.RandomState([2019, 11, 25])
        self.model = None
        self.preds = None
        self.loss = None
        self.img_rows = None
        self.img_cols = None
        self.nchannels = None
        self.nb_classes = None

    '''
    def get_data(self, train_start, train_end, test_start, test_end):
        train = np.genfromtxt("drive/My Drive/train.csv", delimiter=',')
        test = np.genfromtxt("drive/My Drive/test.csv", delimiter=',')
        self.x_test = test[:,1:].astype(int)
        self.y_test = oneHotEncodeY(test[:,0].astype(int),47)
        self.x_train = train[:,1:].astype(int)
        
        self.y_train = oneHotEncodeY(train[:,0].astype(int),47)

        self.x_train = np.reshape(self.x_train,(self.x_train.shape[0], 28, 28, 1))
        self.x_test = np.reshape(self.x_test,(self.x_test.shape[0], 28, 28, 1))
        print(self.x_train.shape, self.y_train.shape, self.x_test.shape, self.y_test.shape)
        self.img_rows, self.img_cols, self.nchannels = self.x_train.shape[1:4]
        self.nb_classes = self.y_train.shape[1]

        
        mnist = MNIST(train_start=train_start, train_end=train_end,
                      test_start=test_start, test_end=test_end)
        self.x_train, self.y_train = mnist.get_set('train')
        self.x_test, self.y_test = mnist.get_set('test')
        self.img_rows, self.img_cols, self.nchannels = self.x_train.shape[1:4]
        self.nb_classes = self.y_train.shape[1]
        '''

    def get_data(self):
        self.x_train, self.y_train = extract_training_samples('byclass')
        self.x_test, self.y_test = extract_test_samples('byclass')
        self.y_test = oneHotEncodeY(self.y_test, 62)
        self.y_train = oneHotEncodeY(self.y_train, 62)
        self.x_train = self.x_train.astype('float32')
        self.y_train = self.y_train.astype('float32')
        self.x_test = self.x_test.astype('float32')
        self.y_test = self.y_test.astype('float32')
        #print(np.amax(self.y_train))
        #print(self.x_train.shape, self.y_train.shape, self.x_test.shape, self.y_test.shape)
        
        self.x_train = self.x_train /255.
        self.y_train = self.y_train
        self.x_test = self.x_test/ 255.
        self.y_test = self.y_test
        
        self.x_train = np.reshape(self.x_train,(self.x_train.shape[0], 28, 28, 1))
        self.x_test = np.reshape(self.x_test,(self.x_test.shape[0], 28, 28, 1))
        #self.y_test = np.reshape(self.y_test,(self.y_test.shape[0],1))
        #self.y_train = np.reshape(self.y_train,(self.y_train.shape[0],1))
        
        self.img_rows, self.img_cols, self.nchannels = self.x_train.shape[1:4]

        
        #images = np.reshape(images,(images.shape[0], 28, 28, 1))
        #self.x_train, self.y_train = mnist.get_set('train')
        #self.x_test, self.y_test = mnist.get_set('test')
        
        #print(type(images))
        #print(images.shape[1:4])
        #print(labels.shape)
        #print(images.shape)
        '''
        self.x_train, self.y_train = mnist.get_set('train')
        self.x_test, self.y_test = mnist.get_set('test')
        self.img_rows, self.img_cols, self.nchannels = self.x_train.shape[1:4]
        self.nb_classes = self.y_train.shape[1]
        print(np.amax(self.y_train))
        '''
        self.nb_classes = 62
        #self.x_sub = self.x_test[:s0]
        #self.y_sub = np.argmax(self.y_test[:s0], axis=1)

        #self.x_test = self.x_test[s0:]
        #self.y_test = self.y_test[s0:]

    def train(self, nb_filters, label_smoothing):
        self.model = ModelBasicCNN('model1', self.nb_classes, nb_filters)
        self.preds = self.model.get_logits(x)
        self.loss = CrossEntropy(self.model, smoothing=label_smoothing)

        train(sess, self.loss, self.x_train, self.y_train, evaluate=self.evaluate,
              args=self.train_params, rng=self.range, var_list=self.model.get_params())

    def evaluate(self):
        do_eval(self.preds, self.eval_params, self.x_test, self.y_test, 'clean_train_clean_eval', False)

    def test(self):
        do_eval(self.preds, self.eval_params, self.x_train, self.y_train, 'train_clean_train_clean_eval')

    def adverserial_testing(self):
        fgsm = FastGradientMethod(self.model, sess=sess)
        adv_x = fgsm.generate(x, **self.fgsm_params)
        preds_adv = self.model.get_logits(adv_x)

        #Call from mail
        do_eval(preds_adv, self.eval_params, self.x_test, self.y_test, 'clean_train_adv_eval', True)
        do_eval(preds_adv, self.eval_params, self.x_train, self.y_train, 'train_clean_train_adv_eval')

        print('Repeating the process, using adversarial training')
Exemplo n.º 6
0
class AdverseCNN:
    def __init__(self, nb_epochs, batch_size, learning_rate, eps = 0.3, clip_min=0, clip_max=1):
        self.train_params = {
            'nb_epochs': nb_epochs,
            'batch_size': batch_size,
            'learning_rate': learning_rate
        }
        self.eval_params = {'batch_size': batch_size}
        self.fgsm_params = {
            'eps': eps,
            'clip_min': clip_min,
            'clip_max': clip_max
        }

        self.x_train = None
        self.y_train = None
        self.x_test = None
        self.y_test = None
        self.range = np.random.RandomState([2019, 11, 25])
        self.model = None
        self.preds = None
        self.loss = None
        self.img_rows = None
        self.img_cols = None
        self.nchannels = None
        self.nb_classes = None
        self.preds_adv = None

    def get_data(self):
        self.x_train, self.y_train = extract_training_samples('byclass')
        self.x_test, self.y_test = extract_test_samples('byclass')
        self.y_test = oneHotEncodeY(self.y_test, 62)
        self.y_train = oneHotEncodeY(self.y_train, 62)
        self.x_train = self.x_train.astype('float32')
        self.y_train = self.y_train.astype('float32')
        self.x_test = self.x_test.astype('float32')
        self.y_test = self.y_test.astype('float32')
        print(np.amax(self.y_train))
        print(self.x_train.shape, self.y_train.shape, self.x_test.shape, self.y_test.shape)
        
        self.x_train = self.x_train /255.
        self.y_train = self.y_train
        self.x_test = self.x_test/ 255.
        self.y_test = self.y_test
        
        self.x_train = np.reshape(self.x_train,(self.x_train.shape[0], 28, 28, 1))
        self.x_test = np.reshape(self.x_test,(self.x_test.shape[0], 28, 28, 1))
        #self.y_test = np.reshape(self.y_test,(self.y_test.shape[0],1))
        #self.y_train = np.reshape(self.y_train,(self.y_train.shape[0],1))
        
        self.img_rows, self.img_cols, self.nchannels = self.x_train.shape[1:4]

        
        #images = np.reshape(images,(images.shape[0], 28, 28, 1))
        #self.x_train, self.y_train = mnist.get_set('train')
        #self.x_test, self.y_test = mnist.get_set('test')
        
        print("//////////////////////////////")
        #print(type(images))
        #print(images.shape[1:4])
        #print(labels.shape)
        #print(images.shape)
        '''
        self.x_train, self.y_train = mnist.get_set('train')
        self.x_test, self.y_test = mnist.get_set('test')
        self.img_rows, self.img_cols, self.nchannels = self.x_train.shape[1:4]
        self.nb_classes = self.y_train.shape[1]
        print(np.amax(self.y_train))
        '''
        self.nb_classes = 62
        #self.x_sub = self.x_test[:s0]
        #self.y_sub = np.argmax(self.y_test[:s0], axis=1)

        #self.x_test = self.x_test[s0:]
        #self.y_test = self.y_test[s0:]

    def adverse_train(self, nb_filters, label_smoothing):
        self.model = ModelBasicCNN('model2', self.nb_classes, nb_filters)
        fgsm = FastGradientMethod(self.model, sess=sess)

        def attack(x):
            return fgsm.generate(x, **self.fgsm_params)

        self.preds = self.model.get_logits(x)
        self.loss = CrossEntropy(self.model, smoothing=label_smoothing, attack=attack)

        adv_x = attack(x)
        self.preds_adv = self.model.get_logits(adv_x)

        train(sess, self.loss, self.x_train, self.y_train, evaluate=self.evaluate,
              args=self.train_params, rng=self.range, var_list=self.model.get_params())

    def evaluate(self):
        do_eval(self.preds, self.eval_params, self.x_test, self.y_test, 'adv_train_clean_eval', False)
        do_eval(self.preds_adv, self.eval_params, self.x_test, self.y_test, 'adv_train_adv_eval', True)

    def test(self):
        do_eval(self.preds, self.eval_params, self.x_train, self.y_train, 'train_adv_train_clean_eval')
        do_eval(self.preds_adv, self.eval_params, self.x_train, self.y_train, 'train_adv_train_adv_eval')
Exemplo n.º 7
0
def mnist_tutorial(train_start=0,
                   train_end=60000,
                   test_start=0,
                   test_end=10000,
                   nb_epochs=NB_EPOCHS,
                   batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE,
                   clean_train=CLEAN_TRAIN,
                   testing=False,
                   backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                   nb_filters=NB_FILTERS,
                   num_threads=None,
                   label_smoothing=0.1):
    """
  MNIST cleverhans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param clean_train: perform normal training on clean examples only
                      before performing adversarial training.
  :param testing: if true, complete an AccuracyReport for unit tests
                  to verify that performance is adequate
  :param backprop_through_attack: If True, backprop through adversarial
                                  example construction process during
                                  adversarial training.
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))
    x_train1, y_train1 = get_train(FLAGS.train1)
    x_test1, y_test1 = get_test(FLAGS.test1)
    x_train, y_train = x_train1, y_train1
    x_test, y_test = x_test1, y_test1
    if (FLAGS.train2):
        x_train2, y_train2, x_test2, y_test2 = get_train(FLAGS.train2)
        x_train, y_train = zip_trains(x_train1, y_train1, x_train2, y_train2,
                                      0.5)
        x_test, y_test = zip_tests(x_test1, y_test1, x_test2, y_test2, 0.5)

    # Use Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    print(x)
    print(y)
    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    eval_params = {'batch_size': batch_size}
    rng = np.random.RandomState([2017, 8, 30])

    def do_eval(preds, x_set, y_set, report_key, is_adv=None):
        acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
        setattr(report, report_key, acc)
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        if report_text:
            print('Test accuracy on %s examples: %0.4f' % (report_text, acc))

    if clean_train:
        model = ModelBasicCNN('model1', nb_classes, nb_filters)
        preds = model.get_logits(x)
        loss = CrossEntropy(model, smoothing=label_smoothing)

        def evaluate():
            do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False)

        train(sess,
              loss,
              x_train,
              y_train,
              evaluate=evaluate,
              args=train_params,
              rng=rng,
              var_list=model.get_params())
        # Calculate training error
        if testing:
            do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval')

    return report
Exemplo n.º 8
0
def main(argv=None):
  
  from cleverhans_tutorials import check_installation
  check_installation(__file__)
  
  if not os.path.exists( CONFIG.SAVE_PATH ):
    os.makedirs( CONFIG.SAVE_PATH )
  save_path_data = CONFIG.SAVE_PATH + 'data/'
  if not os.path.exists( save_path_data ):
    os.makedirs( save_path_data )
  model_path = CONFIG.SAVE_PATH + '../all/' +  CONFIG.DATASET + '/'
  if not os.path.exists( model_path ):
    os.makedirs( model_path )
    os.makedirs( model_path + 'data/' )
  
  nb_epochs = FLAGS.nb_epochs
  batch_size = FLAGS.batch_size
  learning_rate = FLAGS.learning_rate
  nb_filters = FLAGS.nb_filters
  len_x = int(CONFIG.NUM_TEST/2)
  
  start = time.time()

  # Object used to keep track of (and return) key accuracies
  report = AccuracyReport()

  # Set seeds to improve reproducibility
  if CONFIG.DATASET == 'mnist' or CONFIG.DATASET == 'cifar10':
    tf.set_random_seed(1234)
    np.random.seed(1234)
    rd.seed(1234)
  elif CONFIG.DATASET == 'moon' or CONFIG.DATASET == 'dims':
    tf.set_random_seed(13)
    np.random.seed(1234)
    rd.seed(0)          
  
  # Set logging level to see debug information
  set_log_level(logging.DEBUG)

  # Create TF session
  tf_config = tf.ConfigProto(allow_soft_placement=True,log_device_placement=True)
  tf_config.gpu_options.per_process_gpu_memory_fraction = 0.2 
  sess = tf.Session(config=tf_config)   
  
  if CONFIG.DATASET == 'mnist':
    # Get MNIST data
    mnist = MNIST(train_start=0, train_end=CONFIG.NUM_TRAIN,
                  test_start=0, test_end=CONFIG.NUM_TEST)
    x_train, y_train = mnist.get_set('train')
    x_test, y_test = mnist.get_set('test')
  elif CONFIG.DATASET == 'cifar10':
    # Get CIFAR10 data
    data = CIFAR10(train_start=0, train_end=CONFIG.NUM_TRAIN,
                  test_start=0, test_end=CONFIG.NUM_TEST)
    dataset_size = data.x_train.shape[0]
    dataset_train = data.to_tensorflow()[0]
    dataset_train = dataset_train.map(
      lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
    dataset_train = dataset_train.batch(batch_size)
    dataset_train = dataset_train.prefetch(16)
    x_train, y_train = data.get_set('train')
    x_test, y_test = data.get_set('test')                             
  elif CONFIG.DATASET == 'moon':
    # Create a two moon example
    X, y = make_moons(n_samples=(CONFIG.NUM_TRAIN+CONFIG.NUM_TEST), noise=0.2,
                      random_state=0)
    X = StandardScaler().fit_transform(X)
    x_train1, x_test1, y_train1, y_test1 = train_test_split(X, y,
                                            test_size=(CONFIG.NUM_TEST/(CONFIG.NUM_TRAIN
                                            +CONFIG.NUM_TEST)), random_state=0)                          
    x_train, y_train, x_test, y_test = normalize_reshape_inputs_2d(model_path, x_train1,
                                                                   y_train1, x_test1,
                                                                   y_test1)
  elif CONFIG.DATASET == 'dims':
    X, y = make_moons(n_samples=(CONFIG.NUM_TRAIN+CONFIG.NUM_TEST), noise=0.2,
                      random_state=0)
    X = StandardScaler().fit_transform(X)
    x_train1, x_test1, y_train1, y_test1 = train_test_split(X, y,
                                            test_size=(CONFIG.NUM_TEST/(CONFIG.NUM_TRAIN
                                            +CONFIG.NUM_TEST)), random_state=0)                          
    x_train2, y_train, x_test2, y_test = normalize_reshape_inputs_2d(model_path, x_train1,
                                                                     y_train1,x_test1,
                                                                     y_test1)
    x_train, x_test = add_noise_and_QR(x_train2, x_test2, CONFIG.NUM_DIMS)

  np.save(os.path.join(save_path_data, 'x_test'), x_test)
  np.save(os.path.join(save_path_data, 'y_test'), y_test)

  # Use Image Parameters
  img_rows, img_cols, nchannels = x_train.shape[1:4]
  nb_classes = y_train.shape[1]

  # Define input TF placeholder
  x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                        nchannels))
  y = tf.placeholder(tf.float32, shape=(None, nb_classes))

  # Train an model
  train_params = {
      'nb_epochs': nb_epochs,
      'batch_size': batch_size,
      'learning_rate': learning_rate
  }
  eval_params = {'batch_size': 1}
  rng = np.random.RandomState([2017, 8, 30])
  
  with open(CONFIG.SAVE_PATH + 'acc_param.txt', 'a') as fi:

    def do_eval(adv_x, preds, x_set, y_set, report_key):
      acc, pred_np, adv_x_np = model_eval(sess, x, y, preds, adv_x, nb_classes, x_set,
                                          y_set, args=eval_params)
      setattr(report, report_key, acc)
      if report_key:
        print('Accuracy on %s examples: %0.4f' % (report_key, acc), file=fi)
      return pred_np, adv_x_np
    
    if CONFIG.DATASET == 'mnist':
      trained_model_path = model_path + 'data/trained_model'
      model = ModelBasicCNN('model1', nb_classes, nb_filters)
    elif CONFIG.DATASET == 'cifar10':
      trained_model_path = model_path + 'data/trained_model'
      model = ModelAllConvolutional('model1', nb_classes, nb_filters,
                                    input_shape=[32, 32, 3])
    elif CONFIG.DATASET == 'moon':
      trained_model_path = model_path + 'data/trained_model'
      model = ModelMLP('model1', nb_classes)
    elif CONFIG.DATASET == 'dims':
      trained_model_path = save_path_data + 'trained_model'
      model = ModelMLP_dyn('model1', nb_classes, CONFIG.NUM_DIMS)
      
    preds = model.get_logits(x)
    loss = CrossEntropy(model, smoothing=0.1)
    
    def evaluate():
      _, _ = do_eval(x, preds, x_test, y_test, 'test during train')
    
    if os.path.isfile( trained_model_path + '.index' ):
      tf_model_load(sess, trained_model_path)
    else:
      if CONFIG.DATASET == 'mnist':
        train(sess, loss, x_train, y_train, evaluate=evaluate,
              args=train_params, rng=rng, var_list=model.get_params())
      elif CONFIG.DATASET == 'cifar10':
        train(sess, loss, None, None,
              dataset_train=dataset_train, dataset_size=dataset_size,
              evaluate=evaluate, args=train_params, rng=rng,
              var_list=model.get_params())
      elif CONFIG.DATASET == 'moon':
        train_2d(sess, loss, x, y, x_train, y_train, save=False, evaluate=evaluate,
                args=train_params, rng=rng, var_list=model.get_params())
      elif CONFIG.DATASET == 'dims':
        train_2d(sess, loss, x, y, x_train, y_train, evaluate=evaluate,
                args=train_params, rng=rng, var_list=model.get_params())
      saver = tf.train.Saver()
      saver.save(sess, trained_model_path)
    
    # Evaluate the accuracy on test examples
    if os.path.isfile( save_path_data + 'logits_zero_attacked.npy' ):
      logits_0 = np.load(save_path_data + 'logits_zero_attacked.npy')
    else:
      _, _ = do_eval(x, preds, x_train, y_train, 'train')
      logits_0, _ = do_eval(x, preds, x_test, y_test, 'test')
      np.save(os.path.join(save_path_data, 'logits_zero_attacked'), logits_0) 
    
    if CONFIG.DATASET == 'moon':
      num_grid_points = 5000
      if os.path.isfile( model_path + 'data/images_mesh' + str(num_grid_points) + '.npy' ):
        x_mesh = np.load(model_path + 'data/images_mesh' + str(num_grid_points) + '.npy')
        logits_mesh = np.load(model_path + 'data/logits_mesh' + str(num_grid_points) + '.npy')
      else:
        xx, yy = np.meshgrid(np.linspace(0, 1, num_grid_points), np.linspace(0, 1, num_grid_points)) 
        x_mesh1 = np.stack([np.ravel(xx), np.ravel(yy)]).T
        y_mesh1 = np.ones((x_mesh1.shape[0]),dtype='int64')
        x_mesh, y_mesh, _, _ = normalize_reshape_inputs_2d(model_path, x_mesh1, y_mesh1)
        logits_mesh, _ = do_eval(x, preds, x_mesh, y_mesh, 'mesh')
        x_mesh = np.squeeze(x_mesh)
        np.save(os.path.join(model_path, 'data/images_mesh'+str(num_grid_points)), x_mesh)
        np.save(os.path.join(model_path, 'data/logits_mesh'+str(num_grid_points)), logits_mesh)
        
    points_x = x_test[:len_x]
    points_y = y_test[:len_x]
    points_x_bar = x_test[len_x:]
    points_y_bar = y_test[len_x:] 
     
    # Initialize the CW attack object and graph
    cw = CarliniWagnerL2(model, sess=sess) 
    
    # first attack
    attack_params = {
        'learning_rate': CONFIG.CW_LEARNING_RATE,
        'max_iterations': CONFIG.CW_MAX_ITERATIONS
      }
    
    if CONFIG.DATASET == 'moon':
     
      out_a = compute_polytopes_a(x_mesh, logits_mesh, model_path)
      attack_params['const_a_min'] = out_a
      attack_params['const_a_max'] = 100
    
    adv_x = cw.generate(x, **attack_params) 
      
    if os.path.isfile( save_path_data + 'images_once_attacked.npy' ):
      adv_img_1 = np.load(save_path_data + 'images_once_attacked.npy')
      logits_1 = np.load(save_path_data + 'logits_once_attacked.npy')
    else:
      #Evaluate the accuracy on adversarial examples
      preds_adv = model.get_logits(adv_x)
      logits_1, adv_img_1 = do_eval(adv_x, preds_adv, points_x_bar, points_y_bar,
                                    'test once attacked')
      np.save(os.path.join(save_path_data, 'images_once_attacked'), adv_img_1)
      np.save(os.path.join(save_path_data, 'logits_once_attacked'), logits_1)
      
    # counter attack 
    attack_params['max_iterations'] = 1024
      
    if CONFIG.DATASET == 'moon':  
      
      out_alpha2 = compute_epsilons_balls_alpha(x_mesh, np.squeeze(x_test),
                                                np.squeeze(adv_img_1), model_path,
                                                CONFIG.SAVE_PATH)
      attack_params['learning_rate'] = out_alpha2
      attack_params['const_a_min'] = -1
      attack_params['max_iterations'] = 2048
      
      plot_data(np.squeeze(adv_img_1), logits_1, CONFIG.SAVE_PATH+'data_pred1.png', x_mesh,
                logits_mesh)
      
    adv_adv_x = cw.generate(x, **attack_params) 
      
    x_k = np.concatenate((points_x, adv_img_1), axis=0)
    y_k = np.concatenate((points_y, logits_1), axis=0)
    
    if os.path.isfile( save_path_data + 'images_twice_attacked.npy' ):
      adv_img_2 = np.load(save_path_data + 'images_twice_attacked.npy')
      logits_2 = np.load(save_path_data + 'logits_twice_attacked.npy')
    else:
      # Evaluate the accuracy on adversarial examples
      preds_adv_adv = model.get_logits(adv_adv_x)
      logits_2, adv_img_2 = do_eval(adv_adv_x, preds_adv_adv, x_k, y_k,
                                    'test twice attacked')   
      
      np.save(os.path.join(save_path_data, 'images_twice_attacked'), adv_img_2)
      np.save(os.path.join(save_path_data, 'logits_twice_attacked'), logits_2)
    
    if CONFIG.DATASET == 'moon':  
      plot_data(np.squeeze(adv_img_2[:len_x]), logits_2[:len_x],
                CONFIG.SAVE_PATH+'data_pred2.png', x_mesh, logits_mesh)
      plot_data(np.squeeze(adv_img_2[len_x:]), logits_2[len_x:],
                CONFIG.SAVE_PATH+'data_pred12.png', x_mesh, logits_mesh)
      test_balls(np.squeeze(x_k), np.squeeze(adv_img_2), logits_0, logits_1, logits_2,
                 CONFIG.SAVE_PATH)
 
  compute_returnees(logits_0[len_x:], logits_1, logits_2[len_x:], logits_0[:len_x],
                    logits_2[:len_x], CONFIG.SAVE_PATH) 
  
  if x_test.shape[-1] > 1:
    num_axis=(1,2,3)
  else:
    num_axis=(1,2)
    
  D_p = np.squeeze(np.sqrt(np.sum(np.square(points_x-adv_img_2[:len_x]), axis=num_axis)))
  D_p_p = np.squeeze(np.sqrt(np.sum(np.square(adv_img_1-adv_img_2[len_x:]),
                                    axis=num_axis)))
  D_p_mod, D_p_p_mod = modify_D(D_p, D_p_p, logits_0[len_x:], logits_1, logits_2[len_x:],
                                logits_0[:len_x], logits_2[:len_x])
      
  if D_p_mod != [] and D_p_p_mod != []:
    plot_violins(D_p_mod, D_p_p_mod, CONFIG.SAVE_PATH)
    threshold_evaluation(D_p_mod, D_p_p_mod, CONFIG.SAVE_PATH)
    _ = compute_auroc(D_p_mod, D_p_p_mod, CONFIG.SAVE_PATH)
      
  plot_results_models(len_x, CONFIG.DATASET, CONFIG.SAVE_PATH)
  
  print('Time needed:', time.time()-start)

  return report