def load_model_and_dataset(dataset):
    if dataset == 'mnist':
        import mnist_NiN_bn
        model = mnist_NiN_bn.NiN_Model()
        saver = tf.train.Saver()
        checkpoint = tf.train.latest_checkpoint(
            '/home/bull/home/zmn/insight/sparse-imperceivable-attacks-master/models/mnist_NiN/'
        )
        saver.restore(sess, checkpoint)
        data = MNIST()
    elif dataset == "mnist2":
        import mnist_model
        model = mnist_model.MNISTModel()
        data = MNIST()
    elif dataset == 'cifar10':
        import cifar_NiN_bn
        model = cifar_NiN_bn.NiN_Model()
        saver = tf.train.Saver()
        checkpoint = tf.train.latest_checkpoint(
            '/home/bull/home/zmn/insight/sparse-imperceivable-attacks-master/models/cifar_NiN/'
        )
        saver.restore(sess, checkpoint)

        data = CIFAR()
    else:
        raise ValueError('unknown dataset')

    return model, data
def main():

    data, model =  MNIST(), MNISTModel(restore="models/mnist", use_log=True)
    origImgs, origLabels, origImgID = util.generate_attack_data_set(data, model, MGR)

    delImgAT_Init = np.zeros(origImgs[0].shape)
    objfunc = ObjectiveFunc.OBJFUNC(MGR, model, origImgs, origLabels)

    MGR.Add_Parameter('eta', MGR.parSet['alpha']/origImgs[0].size)
    MGR.Log_MetaData()

    if(MGR.parSet['optimizer'] == 'ZOSVRG'):
        delImgAT = svrg.ZOSVRG(delImgAT_Init, MGR, objfunc)
    elif(MGR.parSet['optimizer'] == 'ZOSGD'):
        delImgAT = sgd.ZOSGD(delImgAT_Init, MGR, objfunc)
    else:
        print('Please specify a valid optimizer')


    for idx_ImgID in range(MGR.parSet['nFunc']):
        currentID = origImgID[idx_ImgID]
        orig_prob = model.model.predict(np.expand_dims(origImgs[idx_ImgID], axis=0))
        advImg = np.tanh(np.arctanh(origImgs[idx_ImgID]*1.9999999)+delImgAT)/2.0
        adv_prob  = model.model.predict(np.expand_dims(advImg, axis=0))

        suffix = "id{}_Orig{}_Adv{}".format(currentID, np.argmax(orig_prob), np.argmax(adv_prob))
        util.save_img(advImg, "{}/Adv_{}.png".format(MGR.parSet['save_path'], suffix))
    util.save_img(np.tanh(delImgAT)/2.0, "{}/Delta.png".format(MGR.parSet['save_path']))

    sys.stdout.flush()
    MGR.logHandler.close()
示例#3
0
def main(_):
    with tf.Session() as sess:
        K.set_session(sess)
        if FLAGS.dataset == 'MNIST':
            data, model =  MNIST(), MNISTModel("models/mnist", sess)
        elif FLAGS.datset == 'Cifar':
            data, model =  CIFAR(), CIFARModel("models/cifar", sess)


        def _model_fn(x, logits=False):
            ybar, logits_ = model.predict(x)
            if logits:
                return ybar, logits_
            return ybar

        
        if FLAGS.dataset == 'MNIST':
            x_adv = fgsm(_model_fn, x, epochs=9, eps=0.02)
        elif FLAGS.datset == 'Cifar':
            x_adv = fgsm(_model_fn, x, epochs=4, eps=0.01)

        X_adv_test = attack(x_adv, data.test_data, data.test_labels, sess)
        X_adv_train = attack(x_adv, data.train_data, data.train_labels, sess)

        np.save('adversarial_outputs/fgsm_train_' + FLAGS.dataset.lower() + '.npy', X_adv_train)
        np.save('adversarial_outputs/fgsm_test_' + FLAGS.dataset.lower() + '.npy', X_adv_test)
        print("Legit/Adversarial training set")
        model.evaluate(data.train_data, data.train_labels)
        model.evaluate(X_adv_train, data.train_labels)
        
        print("Legit/Adversarial test set")
        model.evaluate(data.test_data, data.test_labels)
        model.evaluate(X_adv_test, data.test_labels)
def compare_baseline():
    data = MNIST()
    model = MNISTModel("models/mnist")
    sess = K.get_session()

    attack = CarliniL2(sess, model, batch_size=100, max_iterations=3000, 
                       binary_search_steps=4, targeted=False,
                       initial_const=10)

    N = 100
    test_adv = attack.attack(data.test_data[:N], data.test_labels[:N])
    print('dist',np.mean(np.sum((test_adv-data.test_data[:N])**2,axis=(1,2,3))**.5))
示例#5
0
def reconstruct(n_samples):
	encoder = load_model('{}_encoder.h5'.format(desc))
	decoder = load_model('{}_decoder.h5'.format(desc))
	data = MNIST()
	x_test = data.test_data
	choice = np.random.choice(np.arange(n_samples))
	original = x_test[choice].reshape(1, 784)
	#normalize = colors.Normalize(0., 255.)
	#original = normalize(original)
	latent = encoder.predict(original)
	reconstruction = decoder.predict(latent)
	draw([{"title": "Original", "image": original}, {"title": "Reconstruction", "image": reconstruction}])
示例#6
0
def main(args):
    if not os.path.isdir('models'):
        os.makedirs('models')

    if args['dataset'] == "mnist" or args['dataset'] == "all":
        train(MNIST(),
              "models/mnist", [32, 32, 64, 64, 200, 200],
              num_epochs=50)
    if args['dataset'] == 'cifar' or args['dataset'] == 'all':
        train(CIFAR(),
              "models/cifar", [64, 64, 128, 128, 256, 256],
              num_epochs=50)
示例#7
0
def cw_attack(file_name, norm, sess, num_image=10, cifar = False, tinyimagenet = False):
    np.random.seed(1215)
    tf.set_random_seed(1215)
    random.seed(1215)
    if norm == '1':
        attack = EADL1
        norm_fn = lambda x: np.sum(np.abs(x),axis=(1,2,3))
    elif norm == '2':
        attack = CarliniL2
        norm_fn = lambda x: np.sum(x**2,axis=(1,2,3))
    elif norm == 'i':
        attack = CarliniLi
        norm_fn = lambda x: np.max(np.abs(x),axis=(1,2,3))

    if cifar:
        data = CIFAR()
    elif tinyimagenet:
        data = tinyImagenet()
    else:
        data = MNIST()
    model = load_model(file_name, custom_objects={'fn':loss,'tf':tf, 'ResidualStart' : ResidualStart, 'ResidualStart2' : ResidualStart2})
    inputs, targets, true_labels, true_ids, img_info = generate_data(data, samples=num_image, targeted=True, random_and_least_likely = True, target_type = 0b0010, predictor=model.predict, start=0)
    model.predict = model
    model.num_labels = 10
    if cifar:
        model.image_size = 32
        model.num_channels = 3
    elif tinyimagenet:
        model.image_size = 64
        model.num_channels = 3
        model.num_labels = 200
    else:
        model.image_size = 28
        model.num_channels = 1
        
    
    start_time = timer.time()
    attack = attack(sess, model, max_iterations = 1000)
    perturbed_input = attack.attack(inputs, targets)
    UB = np.average(norm_fn(perturbed_input-inputs))
    return UB, (timer.time()-start_time)/len(inputs)
def main(args):
    # load data
    print("Loading data", args["dataset"])
    if args["dataset"] == "mnist":
        data = MNIST()
        if args["train_data_source"]:
            print("Using data from {}".format(args["train_data_source"]))
            img = np.load("{}_data.npy".format(args["train_data_source"]))
            labels = np.load("{}_data.npy".format(args["train_data_source"]))
            data.validation_data = img
            data.validation_labels = labels

    elif args["dataset"] == "cifar10":
        data = CIFAR()
    elif args["dataset"] == "fe":
        data = FACIAL()
    elif args["dataset"] == "imagenet":
        # use ImageDataGenerate provided by Keras
        data = ImageNetDataGen(args["imagenet_train_dir"],
                               args["imagenet_validation_dir"],
                               data_augmentation=False)

    print("Done...")

    if args["dataset"] == "imagenet":
        data_shape = (None, 299, 299, 3)
        resize = 256
    else:
        data_shape = data.train_data.shape
        resize = None

    print("Start training autoencoder")
    codec = CODEC(img_size=data_shape[1],
                  num_channels=data_shape[3],
                  compress_mode=args["compress_mode"],
                  resize=resize)
    train_autoencoder(data,
                      codec,
                      batch_size=args["batch_size"],
                      epochs=args["epochs"],
                      saveFilePrefix=args["save_prefix"],
                      train_imagenet=(args["dataset"] == "imagenet"))
示例#9
0
def plot(n_samples):
	encoder = load_model('{}_encoder.h5'.format(desc))
	data = MNIST()
	x_test = data.test_data
	y_test = data.test_labels
	x = x_test[:n_samples].reshape(n_samples, 784)
	y = y_test[:n_samples]
	#normalize = colors.Normalize(0., 255.)
	#x = normalize(x)
	latent = encoder.predict(x)
	if FLAGS.latent_dim > 2:
		tsne = TSNE()
		print("\nFitting t-SNE, this will take awhile...")
		latent = tsne.fit_transform(latent)
	fig, ax = plt.subplots()
	for label in np.arange(10):
		ax.scatter(latent[(y_test == label), 0], latent[(y_test == label), 1], label=label, s=3)
	ax.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
	ax.set_aspect('equal')
	ax.set_title("Latent Space")
	plt.show(block=False)
	raw_input("Press Enter to Exit")
示例#10
0
def convert(file_name, new_name, cifar=False):
    if not cifar:
        eq_weights, new_params = get_weights(file_name)
        data = MNIST()
    else:
        eq_weights, new_params = get_weights(file_name, inp_shape=(32, 32, 3))
        data = CIFAR()
    model = Sequential()
    model.add(Flatten(input_shape=data.train_data.shape[1:]))
    for param in new_params:
        model.add(Dense(param))
        model.add(Lambda(lambda x: tf.nn.relu(x)))
    model.add(Dense(10))

    for i in range(len(eq_weights)):
        try:
            print(eq_weights[i][0].shape)
        except:
            pass
        model.layers[i].set_weights(eq_weights[i])

    sgd = SGD(lr=0.01, decay=1e-5, momentum=0.9, nesterov=True)

    model.compile(loss=fn, optimizer=sgd, metrics=['accuracy'])

    model.save(new_name)
    acc = model.evaluate(data.validation_data, data.validation_labels)[1]
    printlog("Converting CNN to MLP")
    nlayer = file_name.split('_')[-3][0]
    filters = file_name.split('_')[-2]
    kernel_size = file_name.split('_')[-1]
    printlog(
        "model name = {0}, numlayer = {1}, filters = {2}, kernel size = {3}".
        format(file_name, nlayer, filters, kernel_size))
    printlog("Model accuracy: {:.3f}".format(acc))
    printlog("-----------------------------------")
    return acc
示例#11
0
def main(args):
    with tf.Session() as sess:
        random.seed(121)
        np.random.seed(1211)

        image_id = args['img_id']
        arg_max_iter = args['maxiter']
        arg_b = args['binary_steps']
        arg_init_const = args['init_const']
        arg_mode = args['mode']
        arg_kappa = args['kappa']
        arg_beta = args['beta']
        arg_gamma = args['gamma']

        AE_model = util.load_AE("mnist_AE_1")
        data, model = MNIST(), MNISTModel("models/mnist", sess, False)

        orig_prob, orig_class, orig_prob_str = util.model_prediction(
            model, np.expand_dims(data.test_data[image_id], axis=0))
        target_label = orig_class
        print("Image:{}, infer label:{}".format(image_id, target_label))
        orig_img, target = util.generate_data(data, image_id, target_label)

        attack = AEADEN(sess,
                        model,
                        mode=arg_mode,
                        AE=AE_model,
                        batch_size=1,
                        kappa=arg_kappa,
                        init_learning_rate=1e-2,
                        binary_search_steps=arg_b,
                        max_iterations=arg_max_iter,
                        initial_const=arg_init_const,
                        beta=arg_beta,
                        gamma=arg_gamma)

        adv_img = attack.attack(orig_img, target)

        adv_prob, adv_class, adv_prob_str = util.model_prediction(
            model, adv_img)
        delta_prob, delta_class, delta_prob_str = util.model_prediction(
            model, orig_img - adv_img)

        INFO = "[INFO]id:{}, kappa:{}, Orig class:{}, Adv class:{}, Delta class: {}, Orig prob:{}, Adv prob:{}, Delta prob:{}".format(
            image_id, arg_kappa, orig_class, adv_class, delta_class,
            orig_prob_str, adv_prob_str, delta_prob_str)
        print(INFO)

        suffix = "id{}_kappa{}_Orig{}_Adv{}_Delta{}".format(
            image_id, arg_kappa, orig_class, adv_class, delta_class)
        arg_save_dir = "{}_ID{}_Gamma_{}".format(arg_mode, image_id, arg_gamma)
        os.system("mkdir -p Results/{}".format(arg_save_dir))
        util.save_img(
            orig_img,
            "Results/{}/Orig_original{}.png".format(arg_save_dir, orig_class))
        util.save_img(adv_img,
                      "Results/{}/Adv_{}.png".format(arg_save_dir, suffix))
        util.save_img(
            np.absolute(orig_img - adv_img) - 0.5,
            "Results/{}/Delta_{}.png".format(arg_save_dir, suffix))

        sys.stdout.flush()
示例#12
0
    print("training")
    print('=================')
    # run training with given dataset, and print progress
    history = model.fit(inputs,
                        labels,
                        batch_size=batch_size,
                        validation_data=(inputs, labels),
                        epochs=num_epochs,
                        shuffle=True)

    # # save model to a file
    # if file_name != None:
    #     model.save(file_name)
    print('=================')
    print('finished training')
    print('==================')
    return {'model': nlayer_model, 'history': None}


if not os.path.isdir('models'):
    os.makedirs('models')

if __name__ == '__main__':
    print(MNIST().train_data.shape[1:])
    train(MNIST(),
          file_name="models/mnist_5layer_relu",
          params=[20, 20, 20, 20],
          num_epochs=50,
          lr=0.02,
          decay=1e-4)
示例#13
0
def main(args):
    with tf.Session() as sess:

        random.seed(SEED)
        np.random.seed(SEED)
        tf.set_random_seed(SEED)

        image_id_set = np.random.choice(range(1000),
                                        args["image_number"] * 3,
                                        replace=False)
        #image_id_set = np.random.randint(1, 1000, args["image_number"] )
        arg_max_iter = args['maxiter']  ### max number of iterations
        arg_init_const = args[
            'init_const']  ### regularization prior to attack loss
        arg_kappa = args['kappa']  ### attack confidence level
        arg_q = args['q']  ### number of random direction vectors
        arg_mode = args['mode']  ### algorithm name
        arg_save_iteration = args['save_iteration']
        arg_Dataset = args["dataset"]
        arg_targeted_attack = args["targeted_attack"]
        arg_bsz = args["mini_batch_sz"]
        idx_lr = args["lr_idx"]

        ## load classofier For MNIST and CIFAR pixel value range is [-0.5,0.5]
        if (arg_Dataset == 'mnist'):
            data, model = MNIST(), MNISTModel("models/mnist", sess, True)
        elif (arg_Dataset == 'cifar10'):
            data, model = CIFAR(), CIFARModel("models/cifar", sess, True)
        elif (arg_Dataset == 'imagenet'):
            data, model = ImageNet(SEED), InceptionModel(sess, True)
        else:
            print('Please specify a valid dataset')

        succ_count, ii, iii = 0, 0, 0
        final_distortion_count,first_iteration_count, first_distortion_count = [], [], []
        while iii < args["image_number"]:
            ii = ii + 1
            image_id = image_id_set[ii]

            # if image_id!= 836: continue # for test only

            orig_prob, orig_class, orig_prob_str = util.model_prediction(
                model, np.expand_dims(data.test_data[image_id],
                                      axis=0))  ## orig_class: predicted label;

            if arg_targeted_attack:  ### target attack
                target_label = np.remainder(orig_class + 1, 10)
            else:
                target_label = orig_class

            orig_img, target = util.generate_data(data, image_id, target_label)
            # shape of orig_img is (1,28,28,1) in [-0.5, 0.5]

            true_label_list = np.argmax(data.test_labels, axis=1)
            true_label = true_label_list[image_id]

            print("Image ID:{}, infer label:{}, true label:{}".format(
                image_id, orig_class, true_label))
            if true_label != orig_class:
                print(
                    "True Label is different from the original prediction, pass!"
                )
                continue
            else:
                iii = iii + 1

            print('\n', iii, '/', args["image_number"])

            ##  parameter
            d = orig_img.size  # feature dim
            print("dimension = ", d)

            # mu=1/d**2  # smoothing parameter
            q = arg_q + 0
            I = arg_max_iter + 0
            kappa = arg_kappa + 0
            const = arg_init_const + 0

            ## flatten image to vec
            orig_img_vec = np.resize(orig_img, (1, d))
            delta_adv = np.zeros((1, d))  ### initialized adv. perturbation
            #delta_adv = np.random.uniform(-16/255,16/255,(1,d))

            ## w adv image initialization
            if args["constraint"] == 'uncons':
                # * 0.999999 to avoid +-0.5 return +-infinity
                w_ori_img_vec = np.arctanh(
                    2 * (orig_img_vec) * 0.999999
                )  # in real value, note that orig_img_vec in [-0.5, 0.5]
                w_img_vec = np.arctanh(
                    2 * (np.clip(orig_img_vec + delta_adv, -0.5, 0.5)) *
                    0.999999)
            else:
                w_ori_img_vec = orig_img_vec.copy()
                w_img_vec = np.clip(w_ori_img_vec + delta_adv, -0.5, 0.5)

            # ## test ##
            # for test_value in w_ori_img_vec[0, :]:
            #     if np.isnan(test_value) or np.isinf(test_value):
            #         print(test_value)

            # initialize the best solution & best loss
            best_adv_img = []  # successful adv image in [-0.5, 0.5]
            best_delta = []  # best perturbation
            best_distortion = (0.5 * d)**2  # threshold for best perturbation
            total_loss = np.zeros(I)  ## I: max iters
            l2s_loss_all = np.zeros(I)
            attack_flag = False
            first_flag = True  ## record first successful attack

            # parameter setting for ZO gradient estimation
            mu = args["mu"]  ### smoothing parameter

            ## learning rate
            base_lr = args["lr"]

            if arg_mode == "ZOAdaMM":
                ## parameter initialization for AdaMM
                v_init = 1e-7  #0.00001
                v_hat = v_init * np.ones((1, d))
                v = v_init * np.ones((1, d))

                m = np.zeros((1, d))
                # momentum parameter for first and second order moment
                beta_1 = 0.9
                beta_2 = 0.9  # only used by AMSGrad
                print(beta_1, beta_2)

            #for i in tqdm(range(I)):
            for i in range(I):

                if args["decay_lr"]:
                    base_lr = args["lr"] / np.sqrt(i + 1)

                ## Total loss evaluation
                if args["constraint"] == 'uncons':
                    total_loss[i], l2s_loss_all[
                        i] = function_evaluation_uncons(
                            w_img_vec, kappa, target_label, const, model,
                            orig_img, arg_targeted_attack)

                else:
                    total_loss[i], l2s_loss_all[i] = function_evaluation_cons(
                        w_img_vec, kappa, target_label, const, model, orig_img,
                        arg_targeted_attack)

                ## gradient estimation w.r.t. w_img_vec
                if arg_mode == "ZOSCD":
                    grad_est = grad_coord_estimation(mu, q, w_img_vec, d,
                                                     kappa, target_label,
                                                     const, model, orig_img,
                                                     arg_targeted_attack,
                                                     args["constraint"])
                elif arg_mode == "ZONES":
                    grad_est = gradient_estimation_NES(mu, q, w_img_vec, d,
                                                       kappa, target_label,
                                                       const, model, orig_img,
                                                       arg_targeted_attack,
                                                       args["constraint"])
                else:
                    grad_est = gradient_estimation_v2(mu, q, w_img_vec, d,
                                                      kappa, target_label,
                                                      const, model, orig_img,
                                                      arg_targeted_attack,
                                                      args["constraint"])

                # if np.remainder(i,50)==0:
                # print("total loss:",total_loss[i])
                # print(np.linalg.norm(grad_est, np.inf))

                ## ZO-Attack, unconstrained optimization formulation
                if arg_mode == "ZOSGD":
                    delta_adv = delta_adv - base_lr * grad_est
                if arg_mode == "ZOsignSGD":
                    delta_adv = delta_adv - base_lr * np.sign(grad_est)
                if arg_mode == "ZOSCD":
                    delta_adv = delta_adv - base_lr * grad_est
                if arg_mode == "ZOAdaMM":
                    m = beta_1 * m + (1 - beta_1) * grad_est
                    v = beta_2 * v + (1 - beta_2) * np.square(grad_est)  ### vt
                    v_hat = np.maximum(v_hat, v)
                    #print(np.mean(v_hat))
                    delta_adv = delta_adv - base_lr * m / np.sqrt(v_hat)
                    if args["constraint"] == 'cons':
                        tmp = delta_adv.copy()
                        #X_temp = orig_img_vec.reshape((-1,1))
                        #V_temp2 = np.diag(np.sqrt(v_hat.reshape(-1)+1e-10))
                        V_temp = np.sqrt(v_hat.reshape(1, -1))
                        delta_adv = projection_box(tmp, orig_img_vec, V_temp,
                                                   -0.5, 0.5)
                        #delta_adv2 = projection_box_2(tmp, X_temp, V_temp2, -0.5, 0.5)
                    # v_init = 1e-2 #0.00001
                    # v = v_init * np.ones((1, d))
                    # m = np.zeros((1, d))
                    # # momentum parameter for first and second order moment
                    # beta_1 = 0.9
                    # beta_2 = 0.99  # only used by AMSGrad
                    # m = beta_1 * m + (1-beta_1) * grad_est
                    # v = np.maximum(beta_2 * v + (1-beta_2) * np.square(grad_est),v)
                    # delta_adv = delta_adv - base_lr * m /np.sqrt(v+1e-10)
                    # if args["constraint"] == 'cons':
                    #     V_temp = np.diag(np.sqrt(v.reshape(-1)+1e-10))
                    #     X_temp = orig_img_vec.reshape((-1,1))
                    #     delta_adv = projection_box(delta_adv, X_temp, V_temp, -0.5, 0.5)
                if arg_mode == "ZOSMD":
                    delta_adv = delta_adv - 0.5 * base_lr * grad_est
                    # delta_adv = delta_adv - base_lr* grad_est
                    if args["constraint"] == 'cons':
                        #V_temp = np.eye(orig_img_vec.size)
                        V_temp = np.ones_like(orig_img_vec)
                        #X_temp = orig_img_vec.reshape((-1,1))
                        delta_adv = projection_box(delta_adv, orig_img_vec,
                                                   V_temp, -0.5, 0.5)
                if arg_mode == "ZOPSGD":
                    delta_adv = delta_adv - base_lr * grad_est
                    if args["constraint"] == 'cons':
                        #V_temp = np.eye(orig_img_vec.size)
                        V_temp = np.ones_like(orig_img_vec)
                        #X_temp = orig_img_vec.reshape((-1,1))
                        delta_adv = projection_box(delta_adv, orig_img_vec,
                                                   V_temp, -0.5, 0.5)
                if arg_mode == "ZONES":
                    delta_adv = delta_adv - base_lr * np.sign(grad_est)
                    if args["constraint"] == 'cons':
                        #V_temp = np.eye(orig_img_vec.size)
                        V_temp = np.ones_like(orig_img_vec)
                        #X = orig_img_vec.reshape((-1,1))
                        delta_adv = projection_box(delta_adv, orig_img_vec,
                                                   V_temp, -0.5, 0.5)

                # if arg_mode == "ZO-AdaFom":
                #     m = beta_1 * m + (1-beta_1) * grad_est
                #     v = v* (float(i)/(i+1)) + np.square(grad_est)/(i+1)
                #     w_img_vec = w_img_vec - base_lr * m/np.sqrt(v)
                ##

                ### adv. example update
                w_img_vec = w_ori_img_vec + delta_adv

                ## covert back to adv_img in [-0.5 , 0.5]
                if args["constraint"] == 'uncons':
                    adv_img_vec = 0.5 * np.tanh((w_img_vec)) / 0.999999  #
                else:
                    adv_img_vec = w_img_vec.copy()

                adv_img = np.resize(adv_img_vec, orig_img.shape)

                ## update the best solution in the iterations
                attack_prob, _, _ = util.model_prediction(model, adv_img)
                target_prob = attack_prob[0, target_label]
                attack_prob_tmp = attack_prob.copy()
                attack_prob_tmp[0, target_label] = 0
                other_prob = np.amax(attack_prob_tmp)

                if args["print_iteration"]:
                    if np.remainder(i + 1, 1) == 0:
                        if true_label != np.argmax(attack_prob):
                            print(
                                "Iter %d (Succ): ID = %d, lr = %3.5f, decay = %d, ZO = %s %s, loss = %3.5f, l2sdist = %3.5f, TL = %d, PL = %d"
                                % (i + 1, image_id, args["lr"],
                                   int(args["decay_lr"]), arg_mode,
                                   args["constraint"], total_loss[i],
                                   l2s_loss_all[i], true_label,
                                   np.argmax(attack_prob)))
                        else:
                            print(
                                "Iter %d (Fail): ID = %d, lr = %3.6f, decay = %d, ZO = %s %s, loss = %3.5f, l2sdist = %3.5f, TL = %d, PL = %d"
                                % (i + 1, image_id, args["lr"],
                                   int(args["decay_lr"]), arg_mode,
                                   args["constraint"], total_loss[i],
                                   l2s_loss_all[i], true_label,
                                   np.argmax(attack_prob)))

                if arg_save_iteration:
                    os.system("mkdir Examples")
                    if (np.logical_or(
                            true_label != np.argmax(attack_prob),
                            np.remainder(i + 1,
                                         10) == 0)):  ## every 10 iterations
                        suffix = "id_{}_Mode_{}_True_{}_Pred_{}_Ite_{}".format(
                            image_id, arg_mode, true_label,
                            np.argmax(attack_prob), i + 1)
                        # util.save_img(adv_img, "Examples/{}.png".format(suffix))

                if arg_targeted_attack:
                    if (np.log(target_prob + 1e-10) -
                            np.log(other_prob + 1e-10) >=
                            kappa):  # check attack confidence
                        if (distortion(adv_img, orig_img) <
                                best_distortion):  # check distortion
                            # print('best distortion obtained at',i,'-th iteration')
                            best_adv_img = adv_img
                            best_distortion = distortion(adv_img, orig_img)
                            best_delta = adv_img - orig_img
                            best_iteration = i + 1
                            adv_class = np.argmax(attack_prob)
                            attack_flag = True
                            ## Record first attack
                            if (first_flag):
                                first_flag = False  ### once gets into this, it will no longer record the next sucessful attack
                                first_adv_img = adv_img
                                first_distortion = distortion(
                                    adv_img, orig_img)
                                first_delta = adv_img - orig_img
                                first_class = adv_class
                                first_iteration = i + 1
                else:
                    if (np.log(other_prob + 1e-10) -
                            np.log(target_prob + 1e-10) >=
                            kappa):  # check attack confidence
                        if (distortion(adv_img, orig_img) <
                                best_distortion):  # check distortion
                            # print('best distortion obtained at',i,'-th iteration')
                            best_adv_img = adv_img
                            best_distortion = distortion(adv_img, orig_img)
                            best_delta = adv_img - orig_img
                            best_iteration = i + 1
                            adv_class = np.argmax(attack_prob)
                            attack_flag = True
                            ## Record first attack
                            if (first_flag):
                                first_flag = False
                                first_adv_img = adv_img
                                first_distortion = distortion(
                                    adv_img, orig_img)
                                first_delta = adv_img - orig_img
                                first_class = adv_class
                                first_iteration = i + 1

            if (attack_flag):
                # os.system("mkdir Results_SL")
                # ## best attack (final attack)
                # suffix = "id_{}_Mode_{}_True_{}_Pred_{}".format(image_id, arg_mode, true_label, orig_class) ## orig_class, predicted label
                # suffix2 = "id_{}_Mode_{}_True_{}_Pred_{}".format(image_id, arg_mode, true_label, adv_class)
                # suffix3 = "id_{}_Mode_{}".format(image_id, arg_mode)
                # ### save original image
                # util.save_img(orig_img, "Results_SL/id_{}.png".format(image_id))
                # util.save_img(orig_img, "Results_SL/{}_Orig.png".format(suffix))
                # ### adv. image
                # util.save_img(best_adv_img, "Results_SL/{}_Adv_best.png".format(suffix2))
                # ### adv. perturbation
                # util.save_img(best_delta, "Results_SL/{}_Delta_best.png".format(suffix3))
                #
                #
                # ## first attack
                # suffix4 = "id_{}_Mode_{}_True_{}_Pred_{}".format(image_id, arg_mode, true_label, first_class)
                # ## first adv. imag
                # util.save_img(first_adv_img, "Results_SL/{}_Adv_first.png".format(suffix4))
                # ### first adv. perturbation
                # util.save_img(first_delta, "Results_SL/{}_Delta_first.png".format(suffix3))

                ## save data
                succ_count = succ_count + 1
                final_distortion_count.append(l2s_loss_all[-1])
                first_distortion_count.append(first_distortion)
                first_iteration_count.append(first_iteration)
                suffix0 = "retperimage2/id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_per".format(
                    image_id, arg_mode, args["constraint"], args["lr"],
                    int(args["decay_lr"]), args["exp_code"])
                np.savez("{}".format(suffix0),
                         id=image_id,
                         mode=arg_mode,
                         loss=total_loss,
                         perturbation=l2s_loss_all,
                         best_distortion=best_distortion,
                         first_distortion=first_distortion,
                         first_iteration=first_iteration,
                         best_iteation=best_iteration,
                         learn_rate=args["lr"],
                         decay_lr=args["decay_lr"],
                         attack_flag=attack_flag)
                ## print
                print("It takes {} iteations to find the first attack".format(
                    first_iteration))
                # print(total_loss)
            else:
                ## save data
                suffix0 = "retperimage2/id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_per".format(
                    image_id, arg_mode, args["constraint"], args["lr"],
                    int(args["decay_lr"]), args["exp_code"])
                np.savez("{}".format(suffix0),
                         id=image_id,
                         mode=arg_mode,
                         loss=total_loss,
                         perturbation=l2s_loss_all,
                         best_distortion=best_distortion,
                         learn_rate=args["lr"],
                         decay_lr=args["decay_lr"],
                         attack_flag=attack_flag)
                print("Attack Fails")

            sys.stdout.flush()
    print('succ rate:', succ_count / args["image_number"])
    print('average first success l2', np.mean(first_distortion_count))
    print('average first itrs', np.mean(first_iteration_count))
    print('average l2:', np.mean(final_distortion_count), ' best l2:',
          np.min(final_distortion_count), ' worst l2:',
          np.max(final_distortion_count))
示例#14
0
## contained in the LICENCE file in this directory.

from setup_mnist import MNIST
from mn_utils import prepare_data
from worker import AEDetector, SimpleReformer, IdReformer, AttackData, Classifier, Operator, Evaluator
import mn_utils as utils


detector_I = AEDetector("./defensive_models/MNIST_I", p=2)
detector_II = AEDetector("./defensive_models/MNIST_II", p=1)
reformer = SimpleReformer("./defensive_models/MNIST_I")

id_reformer = IdReformer()
classifier = Classifier("./models/example_classifier")

detector_dict = dict()
detector_dict["I"] = detector_I
detector_dict["II"] = detector_II

operator = Operator(MNIST(), classifier, detector_dict, reformer)

idx = utils.load_obj("example_idx")
_, _, Y = prepare_data(MNIST(), idx)
f = "example_carlini_0.0"
testAttack = AttackData(f, Y, "Carlini L2 0.0")

evaluator = Evaluator(operator, testAttack)
evaluator.plot_various_confidences("defense_performance",
                                   drop_rate={"I": 0.001, "II": 0.001})

示例#15
0
    # run training with given dataset, and print progress
    model.fit(data.train_data,
              data.train_labels,
              batch_size=batch_size,
              validation_data=(data.validation_data, data.validation_labels),
              epochs=num_epochs,
              shuffle=True)

    # save model to a file
    if file_name != None:
        model.save(file_name)

    return model


if not os.path.isdir('models'):
    os.makedirs('models')

if __name__ == "__main__":
    import argparse
    ap = argparse.ArgumentParser()
    ap.add_argument('-d', '--dataset', type=str, default="mnist")
    args = vars(ap.parse_args())
    if "mnist" in args["dataset"]:
        MNIST()
    if "cifar" in args["dataset"]:
        CIFAR()
    #train(MNIST(), file_name="models/mnist_2layer", params=[1024], num_epochs=1, lr=0.1, decay=1e-3)
    #train(CIFAR(), file_name="models/cifar_2layer", params=[1024], num_epochs=1, lr=0.2, decay=1e-3)
示例#16
0
            r = np.random.random_integers(0,9)
        l[i,r] = 1
    return l

def attack(data, name):
    sess = K.get_session()
    model = load_model("models/"+name, custom_objects={'fn': fn})
    class Wrap:
        image_size = 28 if "mnist" in name else 32
        num_labels = 10
        num_channels = 1 if "mnist" in name else 3
        def predict(self, x):
            return model(x)
    attack = CarliniL2(sess, Wrap(), batch_size=100,
                       max_iterations=10000, binary_search_steps=5,
                       initial_const=1, targeted=True)
    adv = attack.attack(data.test_data[:100],
                        get_labs(data.test_labels[:100]))
    np.save("/tmp/"+name, adv)
    print(np.mean(np.sum((adv-data.test_data[:100])**2,axis=(1,2,3))**.5))
    
attack(MNIST(), "mnist")
attack(MNIST(), "mnist_brelu")
attack(MNIST(), "mnist_gaussian")
attack(MNIST(), "mnist_gaussian_brelu")

attack(CIFAR(), "cifar")
attack(CIFAR(), "cifar_brelu")
attack(CIFAR(), "cifar_gaussian")
attack(CIFAR(), "cifar_gaussian_brelu")

def expandImage(image_data):
    image_data2 = np.array(image_data)
    image_data2 = (image_data2 + 0.5) * 255
    return image_data2


# In[4]:

if __name__ == "__main__":
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        modelPath = '%smodels/mnist' % (nn_robust_attack_root)
        data, model = MNIST(), MNISTModel(modelPath, sess)

        attack = CarliniLi(sess, model, max_iterations=1000, targeted=False)

        inputs, targets = generate_data(data,
                                        samples=1000,
                                        targeted=False,
                                        start=5500,
                                        inception=False)

        original_classified_wrong_number = 0  #number of benign samples that are misclassified
        disturbed_failure_number = 0  #number of samples that failed to craft corresponding adversarial samples
        test_number = 0  #number of adversarial samples that we generate
        TTP = 0
        TP = 0
        FN = 0
示例#18
0
                        help='number of epochs')
    parser.add_argument('--overwrite',
                        action='store_true',
                        help='overwrite output file')
    args = parser.parse_args()
    print(args)
    nlayers = len(args.layer_parameters) + 1
    if not args.modelfile:
        file_name = args.modelpath + "/" + args.model + "_" + str(
            nlayers
        ) + "layer_" + args.activation + "_" + args.layer_parameters[0]
    else:
        file_name = args.modelfile
    print("Model will be saved to", file_name)
    if os.path.isfile(file_name) and not args.overwrite:
        raise RuntimeError("model {} exists.".format(file_name))
    if args.model == "mnist":
        data = MNIST()
    elif args.model == "cifar":
        data = CIFAR()
    train(data,
          file_name=file_name,
          params=args.layer_parameters,
          num_epochs=args.epochs,
          lr=args.lr,
          decay=args.wd,
          activation=args.activation,
          activation_param=args.leaky_slope,
          grad_reg=args.gradreg,
          dropout_rate=args.dropout)
示例#19
0
    
    # now train the teacher at the given temperature
    teacher = train(data, file_name+"_teacher", params, num_epochs, batch_size, train_temp,
                    init=file_name+"_init")

    # evaluate the labels at temperature t
    predicted = teacher.predict(data.train_data)
    y = tf.nn.softmax(predicted/train_temp)
    print(y)
    data.train_labels = y

    # train the student model at temperature t
    student = train(data, file_name, params, num_epochs, batch_size, train_temp,
                    init=file_name+"_init")

    # and finally we predict at temperature 1
    predicted = student.predict(data.train_data)

    print(predicted)
    
if not os.path.isdir('models'):
    os.makedirs('models')

train(CIFAR(), "models/cifar", [64, 64, 128, 128, 256, 256], num_epochs=50)
train(MNIST(), "models/mnist", [32, 32, 64, 64, 200, 200], num_epochs=50)

train_distillation(MNIST(), "models/mnist-distilled-100", [32, 32, 64, 64, 200, 200],
                   num_epochs=50, train_temp=100)
train_distillation(CIFAR(), "models/cifar-distilled-100", [64, 64, 128, 128, 256, 256],
                   num_epochs=50, train_temp=100)
示例#20
0
    model.fit(data.train_data,
              data.train_labels,
              batch_size=batch_size,
              validation_data=(data.validation_data, data.validation_labels),
              epochs=num_epochs,
              shuffle=True)

    # save model to a file
    if file_name != None:
        model.save(file_name)

    return model


if not os.path.isdir('models'):
    os.makedirs('models')

if __name__ == "__main__":
    train(MNIST(),
          file_name="models/mnist_2layer",
          params=[1024],
          num_epochs=50,
          lr=0.1,
          decay=1e-3)
    train(CIFAR(),
          file_name="models/cifar_2layer",
          params=[1024],
          num_epochs=50,
          lr=0.2,
          decay=1e-3)
示例#21
0
              num_epochs=100,
              batch_size=256,
              if_save=True):
        self.model.compile(loss='mean_squared_error',
                           metrics=['mean_squared_error'],
                           optimizer='adam')

        noise = self.v_noise * np.random.normal(size=np.shape(data.train_data))
        noisy_train_data = data.train_data + noise
        noisy_train_data = np.clip(noisy_train_data, -0.5, 0.5)

        self.model.fit(noisy_train_data,
                       data.train_data,
                       batch_size=batch_size,
                       validation_data=(data.validation_data,
                                        data.validation_data),
                       epochs=num_epochs,
                       shuffle=True)

        if if_save:
            self.model.save(os.path.join(self.model_dir, archive_name))

    def load(self, archive_name, model_dir=None):
        if model_dir is None: model_dir = self.model_dir
        self.model.load_weights(os.path.join(model_dir, archive_name))


if __name__ == '__main__':
    AE = DAE()
    AE.train(MNIST(), "mnist")
示例#22
0
def main(args):
    with tf.Session() as sess:
        print("Loading data and classification model: {}".format(
            args["dataset"]))
        if args['dataset'] == "mnist":
            data, model = MNIST(), MNISTModel("models/mnist",
                                              sess,
                                              use_softmax=True)
        elif args['dataset'] == "cifar10":
            data, model = CIFAR(), CIFARModel("models/cifar",
                                              sess,
                                              use_softmax=True)
        elif args['dataset'] == "imagenet":
            # data, model = ImageNet(data_path=args["imagenet_dir"], targetFile=args["attack_single_img"]), InceptionModel(sess, use_softmax=True)
            data, model = ImageNetDataNP(), InceptionModel(sess,
                                                           use_softmax=True)
        # elif args['dataset'] == "imagenet_np":

        if len(data.test_labels) < args["num_img"]:
            raise Exception("No enough data, only have {} but need {}".format(
                len(data.test_labels), args["num_img"]))

        if args["attack_single_img"]:
            # manually setup attack set
            # attacking only one image with random attack]
            orig_img = data.test_data
            orig_labels = data.test_labels
            orig_img_id = np.array([1])

            if args["attack_type"] == "targeted":
                target_labels = [
                    np.eye(model.num_labels)[args["single_img_target_label"]]
                ]
            else:
                target_labels = orig_labels
        else:
            # generate attack set
            if args["dataset"] == "imagenet" or args[
                    "dataset"] == "imagenet_np":
                shift_index = True
            else:
                shift_index = False

        if args["random_target"] and (args["dataset"] == "imagenet"
                                      or args["dataset"] == "imagenet_np"):
            # find all possible class
            all_class = np.unique(np.argmax(data.test_labels, 1))
            all_orig_img, all_target_labels, all_orig_labels, all_orig_img_id = util.generate_attack_data_set(
                data,
                args["num_img"],
                args["img_offset"],
                model,
                attack_type=args["attack_type"],
                random_target_class=all_class,
                shift_index=shift_index)
        elif args["random_target"]:
            # random target on all possible classes
            class_num = data.test_labels.shape[1]
            all_orig_img, all_target_labels, all_orig_labels, all_orig_img_id = util.generate_attack_data_set(
                data,
                args["num_img"],
                args["img_offset"],
                model,
                attack_type=args["attack_type"],
                random_target_class=list(range(class_num)),
                shift_index=shift_index)
        else:
            all_orig_img, all_target_labels, all_orig_labels, all_orig_img_id = util.generate_attack_data_set(
                data,
                args["num_img"],
                args["img_offset"],
                model,
                attack_type=args["attack_type"],
                shift_index=shift_index)

            # check attack data
        # for i in range(len(orig_img_id)):
        #     tar_lab = np.argmax(target_labels[i])
        #     orig_lab = np.argmax(orig_labels[i])
        #     print("{}:, target label:{}, orig_label:{}, orig_img_id:{}".format(i, tar_lab, orig_lab, orig_img_id[i]))

        # attack related settings
        if args["attack_method"] == "zoo" or args[
                "attack_method"] == "autozoom_bilin":
            if args["img_resize"] is None:
                args["img_resize"] = model.image_size
                print(
                    "Argument img_resize is not set and not using autoencoder, set to image original size:{}"
                    .format(args["img_resize"]))

        if args["attack_method"] == "zoo" or args["attack_method"] == "zoo_ae":
            if args["batch_size"] is None:
                args["batch_size"] = 128
                print(
                    "Using zoo or zoo_ae attack, and batch_size is not set.\nSet batch_size to {}."
                    .format(args["batch_size"]))

        else:
            if args["batch_size"] is not None:
                print("Argument batch_size is not used")
                args["batch_size"] = 1  # force to be 1

        if args["attack_method"] == "zoo_ae" or args[
                "attack_method"] == "autozoom_ae":
            #_, decoder = util.load_codec(args["codec_prefix"])
            if args["dataset"] == "mnist" or args["dataset"] == "cifar10":
                codec = CODEC(model.image_size,
                              model.num_channels,
                              args["compress_mode"],
                              use_tanh=False)
            else:
                codec = CODEC(128, model.num_channels, args["compress_mode"])
            print(args["codec_prefix"])
            codec.load_codec(args["codec_prefix"])
            decoder = codec.decoder
            print(decoder.input_shape)
            args["img_resize"] = decoder.input_shape[1]
            print("Using autoencoder, set the attack image size to:{}".format(
                args["img_resize"]))

        # setup attack
        if args["attack_method"] == "zoo":
            blackbox_attack = ZOO(sess, model, args)
        elif args["attack_method"] == "zoo_ae":
            blackbox_attack = ZOO_AE(sess, model, args, decoder)
        elif args["attack_method"] == "autozoom_bilin":
            blackbox_attack = AutoZOOM_BiLIN(sess, model, args)
        elif args["attack_method"] == "autozoom_ae":
            blackbox_attack = AutoZOOM_AE(sess, model, args, decoder, codec)

        save_prefix = os.path.join(args["save_path"], args["dataset"],
                                   args["attack_method"], args["attack_type"])

        os.system("mkdir -p {}".format(save_prefix))

        total_success = 0
        l2_total = 0

        for i in range(all_orig_img_id.size):
            orig_img = all_orig_img[i:i + 1]
            target = all_target_labels[i:i + 1]
            label = all_orig_labels[i:i + 1]

            target_class = np.argmax(target)
            true_class = np.argmax(label)
            test_index = all_orig_img_id[i]

            # print information
            print(
                "[Info][Start]{}: test_index:{}, true label:{}, target label:{}"
                .format(i, test_index, true_class, target_class))
            if args["attack_method"] == "zoo_ae" or args[
                    "attack_method"] == "autozoom_ae":
                #print ae info
                if args["dataset"] == "mnist" or args["dataset"] == "cifar10":
                    temp_img = all_orig_img[i:i + 1]
                else:
                    temp_img = all_orig_img[i]
                    temp_img = (temp_img + 0.5) * 255
                    temp_img = scipy.misc.imresize(temp_img, (128, 128))
                    temp_img = temp_img / 255 - 0.5
                    temp_img = np.expand_dims(temp_img, axis=0)
                encode_img = codec.encoder.predict(temp_img)
                decode_img = codec.decoder.predict(encode_img)
                diff_img = (decode_img - temp_img)
                diff_mse = np.mean(diff_img.reshape(-1)**2)
                print("[Info][AE] MSE:{:.4f}".format(diff_mse))

            timestart = time.time()
            adv_img = blackbox_attack.attack(orig_img, target)
            timeend = time.time()

            if len(adv_img.shape) == 3:
                adv_img = np.expand_dims(adv_img, axis=0)

            l2_dist = np.sum((adv_img - orig_img)**2)**.5
            adv_class = np.argmax(model.model.predict(adv_img))

            success = False
            if args["attack_type"] == "targeted":
                if adv_class == target_class:
                    success = True
            else:
                if adv_class != true_class:
                    success = True

            if success:
                total_success += 1
                l2_total += l2_dist

            print(
                "[Info][End]{}: test_index:{}, true label:{}, adv label:{}, success:{}, distortion:{:.5f}, success_rate:{:.4f}, l2_avg:{:.4f}"
                .format(i, test_index, true_class, adv_class, success, l2_dist,
                        total_success / (i + 1),
                        0 if total_success == 0 else l2_total / total_success))

            # save images
            suffix = "id{}_testIndex{}_true{}_adv{}".format(
                i, test_index, true_class, adv_class)
            # original image
            save_name = os.path.join(save_prefix, "Orig_{}.png".format(suffix))
            util.save_img(orig_img, save_name)
            save_name = os.path.join(save_prefix, "Orig_{}.npy".format(suffix))
            np.save(save_name, orig_img)

            # adv image
            save_name = os.path.join(save_prefix, "Adv_{}.png".format(suffix))
            util.save_img(adv_img, save_name)
            save_name = os.path.join(save_prefix, "Adv_{}.npy".format(suffix))
            np.save(save_name, adv_img)

            # diff image
            save_name = os.path.join(save_prefix, "Diff_{}.png".format(suffix))
            util.save_img((adv_img - orig_img) / 2, save_name)
            save_name = os.path.join(save_prefix, "Diff_{}.npy".format(suffix))
            np.save(save_name, adv_img - orig_img)
示例#23
0
              epochs=num_epochs,
              shuffle=True)
    

    # save model to a file
    if file_name != None:
        model.save(file_name)
    
    return {'model':model, 'history':history}

if not os.path.isdir('models'):
    os.makedirs('models')


if __name__ == '__main__':
    train(MNIST(), file_name="models/mnist_resnet_2", nlayer=2, activation = tf.nn.relu)
    train(MNIST(), file_name="models/mnist_resnet_3", nlayer=3, activation = tf.nn.relu)
    train(MNIST(), file_name="models/mnist_resnet_4", nlayer=4, activation = tf.nn.relu)
    train(MNIST(), file_name="models/mnist_resnet_5", nlayer=5, activation = tf.nn.relu)

    train(MNIST(), file_name="models/mnist_resnet_2_sigmoid", nlayer=2, activation = tf.sigmoid)
    train(MNIST(), file_name="models/mnist_resnet_3_sigmoid", nlayer=3, activation = tf.sigmoid)
    train(MNIST(), file_name="models/mnist_resnet_4_sigmoid", nlayer=4, activation = tf.sigmoid)
    train(MNIST(), file_name="models/mnist_resnet_5_sigmoid", nlayer=5, activation = tf.sigmoid)

    train(MNIST(), file_name="models/mnist_resnet_2_tanh", nlayer=2, activation = tf.tanh)
    train(MNIST(), file_name="models/mnist_resnet_3_tanh", nlayer=3, activation = tf.tanh)
    train(MNIST(), file_name="models/mnist_resnet_4_tanh", nlayer=4, activation = tf.tanh)
    train(MNIST(), file_name="models/mnist_resnet_5_tanh", nlayer=5, activation = tf.tanh)

    train(MNIST(), file_name="models/mnist_resnet_2_atan", nlayer=2, activation = tf.atan)
示例#24
0
def main(args):
    with tf.Session() as sess:
        if (args['dataset'] == 'mnist'):
            data, model = MNIST(), MNISTModel("models/mnist", sess)
            handpick = False
            inception = False
        if (args['dataset'] == "cifar"):
            data, model = CIFAR(), CIFARModel("models/cifar", sess)
            handpick = True
            inception = False
        if (args['dataset'] == "imagenet"):
            data, model = ImageNet(args['seed_imagenet']), InceptionModel(sess)
            handpick = True
            inception = True

        if (args['adversarial'] != "none"):
            model = MNISTModel("models/mnist_cw" + str(args['adversarial']),
                               sess)

        if (args['temp'] and args['dataset'] == 'mnist'):
            model = MNISTModel("models/mnist-distilled-" + str(args['temp']),
                               sess)
        if (args['temp'] and args['dataset'] == 'cifar'):
            model = CIFARModel("models/cifar-distilled-" + str(args['temp']),
                               sess)

        inputs, targets, labels, true_ids = generate_data(
            data,
            model,
            samples=args['numimg'],
            inception=inception,
            handpick=handpick,
            train=args['train'],
            seed=args['seed'])
        timestart = time.time()
        if (args['attack'] == 'L2'):
            attack = CarliniL2(sess,
                               model,
                               batch_size=args['batch_size'],
                               max_iterations=args['maxiter'],
                               confidence=args['conf'],
                               binary_search_steps=args['binary_steps'],
                               beta=args['beta'],
                               abort_early=args['abort_early'])
            adv = attack.attack(inputs, targets)
        if (args['attack'] == 'L1'):
            attack = EADL1(sess,
                           model,
                           batch_size=args['batch_size'],
                           max_iterations=args['maxiter'],
                           confidence=args['conf'],
                           binary_search_steps=args['binary_steps'],
                           beta=args['beta'],
                           abort_early=args['abort_early'])
            adv = attack.attack(inputs, targets)
        if (args['attack'] == 'EN'):
            attack = EADEN(sess,
                           model,
                           batch_size=args['batch_size'],
                           max_iterations=args['maxiter'],
                           confidence=args['conf'],
                           binary_search_steps=args['binary_steps'],
                           beta=args['beta'],
                           abort_early=args['abort_early'])
            adv = attack.attack(inputs, targets)
        """If untargeted, pass labels instead of targets"""
        if (args['attack'] == 'FGSM'):
            attack = FGM(sess,
                         model,
                         batch_size=args['batch_size'],
                         ord=np.inf,
                         inception=inception)
            adv = attack.attack(inputs, targets)
        if (args['attack'] == 'FGML1'):
            attack = FGM(sess,
                         model,
                         batch_size=args['batch_size'],
                         ord=1,
                         inception=inception)
            adv = attack.attack(inputs, targets)
        if (args['attack'] == 'FGML2'):
            attack = FGM(sess,
                         model,
                         batch_size=args['batch_size'],
                         ord=2,
                         inception=inception)
            adv = attack.attack(inputs, targets)

        if (args['attack'] == 'IFGSM'):
            attack = IGM(sess,
                         model,
                         batch_size=args['batch_size'],
                         ord=np.inf,
                         inception=inception)
            adv = attack.attack(inputs, targets)
        if (args['attack'] == 'IFGML1'):
            attack = IGM(sess,
                         model,
                         batch_size=args['batch_size'],
                         ord=1,
                         inception=inception)
            adv = attack.attack(inputs, targets)
        if (args['attack'] == 'IFGML2'):
            attack = IGM(sess,
                         model,
                         batch_size=args['batch_size'],
                         ord=2,
                         inception=inception)
            adv = attack.attack(inputs, targets)

        timeend = time.time()
        print("Took", timeend - timestart, "seconds to run",
              len(inputs) / args['batch_size'], "random instances.")

        if (args['train']):
            np.save('labels_train.npy', labels)
            np.save(str(args['attack']) + '_train.npy', adv)
            return

        r_best = []
        d_best_l1 = []
        d_best_l2 = []
        d_best_linf = []
        r_average = []
        d_average_l1 = []
        d_average_l2 = []
        d_average_linf = []
        r_worst = []
        d_worst_l1 = []
        d_worst_l2 = []
        d_worst_linf = []

        if (args['conf'] != 0):
            model = MNISTModel("models/mnist-distilled-100", sess)

        if (args['show']):
            if not os.path.exists(
                    str(args['save']) + "/" + str(args['dataset']) + "/" +
                    str(args['attack'])):
                os.makedirs(
                    str(args['save']) + "/" + str(args['dataset']) + "/" +
                    str(args['attack']))

        for i in range(0, len(inputs), args['batch_size']):

            pred = []
            for j in range(i, i + args['batch_size']):
                if inception:
                    pred.append(
                        np.reshape(model.model.predict(adv[j:j + 1]),
                                   (data.test_labels[0:1].shape)))
                else:
                    pred.append(model.model.predict(adv[j:j + 1]))

            dist_l1 = 1e10
            dist_l2 = 1e10
            dist_linf = 1e10
            dist_l1_index = 1e10
            dist_l2_index = 1e10
            dist_linf_index = 1e10
            for k, j in enumerate(range(i, i + args['batch_size'])):
                if (np.argmax(pred[k], 1) == np.argmax(targets[j:j + 1], 1)):
                    if (np.sum(np.abs(adv[j] - inputs[j])) < dist_l1):
                        dist_l1 = np.sum(np.abs(adv[j] - inputs[j]))
                        dist_l1_index = j
                    if (np.amax(np.abs(adv[j] - inputs[j])) < dist_linf):
                        dist_linf = np.amax(np.abs(adv[j] - inputs[j]))
                        dist_linf_index = j
                    if ((np.sum((adv[j] - inputs[j])**2)**.5) < dist_l2):
                        dist_l2 = (np.sum((adv[j] - inputs[j])**2)**.5)
                        dist_l2_index = j
            if (dist_l1_index != 1e10):
                d_best_l2.append((np.sum(
                    (adv[dist_l2_index] - inputs[dist_l2_index])**2)**.5))
                d_best_l1.append(
                    np.sum(np.abs(adv[dist_l1_index] - inputs[dist_l1_index])))
                d_best_linf.append(
                    np.amax(
                        np.abs(adv[dist_linf_index] -
                               inputs[dist_linf_index])))
                r_best.append(1)
            else:
                r_best.append(0)

            rand_int = np.random.randint(i, i + args['batch_size'])
            if inception:
                pred_r = np.reshape(
                    model.model.predict(adv[rand_int:rand_int + 1]),
                    (data.test_labels[0:1].shape))
            else:
                pred_r = model.model.predict(adv[rand_int:rand_int + 1])
            if (np.argmax(pred_r,
                          1) == np.argmax(targets[rand_int:rand_int + 1], 1)):
                r_average.append(1)
                d_average_l2.append(
                    np.sum((adv[rand_int] - inputs[rand_int])**2)**.5)
                d_average_l1.append(
                    np.sum(np.abs(adv[rand_int] - inputs[rand_int])))
                d_average_linf.append(
                    np.amax(np.abs(adv[rand_int] - inputs[rand_int])))

            else:
                r_average.append(0)

            dist_l1 = 0
            dist_l1_index = 1e10
            dist_linf = 0
            dist_linf_index = 1e10
            dist_l2 = 0
            dist_l2_index = 1e10
            for k, j in enumerate(range(i, i + args['batch_size'])):
                if (np.argmax(pred[k], 1) != np.argmax(targets[j:j + 1], 1)):
                    r_worst.append(0)
                    dist_l1_index = 1e10
                    dist_l2_index = 1e10
                    dist_linf_index = 1e10
                    break
                else:
                    if (np.sum(np.abs(adv[j] - inputs[j])) > dist_l1):
                        dist_l1 = np.sum(np.abs(adv[j] - inputs[j]))
                        dist_l1_index = j
                    if (np.amax(np.abs(adv[j] - inputs[j])) > dist_linf):
                        dist_linf = np.amax(np.abs(adv[j] - inputs[j]))
                        dist_linf_index = j
                    if ((np.sum((adv[j] - inputs[j])**2)**.5) > dist_l2):
                        dist_l2 = (np.sum((adv[j] - inputs[j])**2)**.5)
                        dist_l2_index = j
            if (dist_l1_index != 1e10):
                d_worst_l2.append((np.sum(
                    (adv[dist_l2_index] - inputs[dist_l2_index])**2)**.5))
                d_worst_l1.append(
                    np.sum(np.abs(adv[dist_l1_index] - inputs[dist_l1_index])))
                d_worst_linf.append(
                    np.amax(
                        np.abs(adv[dist_linf_index] -
                               inputs[dist_linf_index])))
                r_worst.append(1)

            if (args['show']):
                for j in range(i, i + args['batch_size']):
                    target_id = np.argmax(targets[j:j + 1], 1)
                    label_id = np.argmax(labels[j:j + 1], 1)
                    prev_id = np.argmax(
                        np.reshape(model.model.predict(inputs[j:j + 1]),
                                   (data.test_labels[0:1].shape)), 1)
                    adv_id = np.argmax(
                        np.reshape(model.model.predict(adv[j:j + 1]),
                                   (data.test_labels[0:1].shape)), 1)
                    suffix = "id{}_seq{}_lbl{}_prev{}_adv{}_{}_l1_{:.3f}_l2_{:.3f}_linf_{:.3f}".format(
                        true_ids[i], target_id, label_id, prev_id,
                        adv_id, adv_id == target_id,
                        np.sum(np.abs(adv[j] - inputs[j])),
                        np.sum((adv[j] - inputs[j])**2)**.5,
                        np.amax(np.abs(adv[j] - inputs[j])))

                    show(
                        inputs[j:j + 1],
                        str(args['save']) + "/" + str(args['dataset']) + "/" +
                        str(args['attack']) +
                        "/original_{}.png".format(suffix))
                    show(
                        adv[j:j + 1],
                        str(args['save']) + "/" + str(args['dataset']) + "/" +
                        str(args['attack']) +
                        "/adversarial_{}.png".format(suffix))

        print('best_case_L1_mean', np.mean(d_best_l1))
        print('best_case_L2_mean', np.mean(d_best_l2))
        print('best_case_Linf_mean', np.mean(d_best_linf))
        print('best_case_prob', np.mean(r_best))
        print('average_case_L1_mean', np.mean(d_average_l1))
        print('average_case_L2_mean', np.mean(d_average_l2))
        print('average_case_Linf_mean', np.mean(d_average_linf))
        print('average_case_prob', np.mean(r_average))
        print('worst_case_L1_mean', np.mean(d_worst_l1))
        print('worst_case_L2_mean', np.mean(d_worst_l2))
        print('worst_case_Linf_mean', np.mean(d_worst_linf))
        print('worst_case_prob', np.mean(r_worst))
示例#25
0
    if init != None:
        model.load_weights(init)

    def fn(correct, predicted):
        return tf.nn.softmax_cross_entropy_with_logits(labels=correct,
                                                       logits=predicted /
                                                       train_temp)

    sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)

    model.compile(loss=fn, optimizer=sgd, metrics=['accuracy'])

    model.fit(data.train_data,
              data.train_labels,
              batch_size=batch_size,
              validation_data=(data.validation_data, data.validation_labels),
              nb_epoch=num_epochs,
              shuffle=True)

    if file_name != None:
        model.save(file_name)

    return model


if not os.path.isdir('models'):
    os.makedirs('models')

#train(CIFAR(), "models/cifar", [64, 64, 128, 128, 256, 256], num_epochs=50)
train(MNIST(), "models/mnist", [32, 32, 64, 64, 200, 200], num_epochs=1)
def main(args):
    with tf.Session() as sess:
        if (args['dataset'] == 'mnist'):
            data = MNIST()
            inception = False
            if (args['adversarial'] != "none"):
                model = MNISTModel(
                    "models/mnist_cw" + str(args['adversarial']), sess)
            elif (args['temp']):
                model = MNISTModel(
                    "models/mnist-distilled-" + str(args['temp']), sess)
            else:
                model = MNISTModel("models/mnist", sess)
        if (args['dataset'] == "cifar"):
            data = CIFAR()
            inception = False
            if (args['adversarial'] != "none"):
                model = CIFARModel(
                    "models/cifar_cw" + str(args['adversarial']), sess)
            elif (args['temp']):
                model = CIFARModel(
                    "models/cifar-distilled-" + str(args['temp']), sess)
            else:
                model = CIFARModel("models/cifar", sess)
        if (args['dataset'] == "imagenet"):
            data, model = ImageNet(args['seed_imagenet'],
                                   2 * args['numimg']), InceptionModel(sess)
            inception = True

        inputs, targets, labels, true_ids = generate_data(
            data,
            model,
            samples=args['numimg'],
            targeted=not args['untargeted'],
            target_num=args['targetnum'],
            inception=inception,
            train=args['train'],
            seed=args['seed'])
        timestart = time.time()
        if (args['restore_np']):
            if (args['train']):
                adv = np.load(
                    str(args['dataset']) + '_' + str(args['attack']) +
                    '_train.npy')
            else:
                adv = np.load(
                    str(args['dataset']) + '_' + str(args['attack']) + '.npy')
        else:
            if (args['attack'] == 'L2'):
                attack = CarliniL2(sess,
                                   model,
                                   batch_size=args['batch_size'],
                                   max_iterations=args['maxiter'],
                                   confidence=args['conf'],
                                   initial_const=args['init_const'],
                                   binary_search_steps=args['binary_steps'],
                                   targeted=not args['untargeted'],
                                   beta=args['beta'],
                                   abort_early=args['abort_early'])
                adv = attack.attack(inputs, targets)
            if (args['attack'] == 'L1'):
                attack = EADL1(sess,
                               model,
                               batch_size=args['batch_size'],
                               max_iterations=args['maxiter'],
                               confidence=args['conf'],
                               initial_const=args['init_const'],
                               binary_search_steps=args['binary_steps'],
                               targeted=not args['untargeted'],
                               beta=args['beta'],
                               abort_early=args['abort_early'])
                adv = attack.attack(inputs, targets)
            if (args['attack'] == 'EN'):
                attack = EADEN(sess,
                               model,
                               batch_size=args['batch_size'],
                               max_iterations=args['maxiter'],
                               confidence=args['conf'],
                               initial_const=args['init_const'],
                               binary_search_steps=args['binary_steps'],
                               targeted=not args['untargeted'],
                               beta=args['beta'],
                               abort_early=args['abort_early'])
                adv = attack.attack(inputs, targets)
            """If untargeted, pass labels instead of targets"""
            if (args['attack'] == 'FGSM'):
                attack = FGM(sess,
                             model,
                             batch_size=args['batch_size'],
                             ord=np.inf,
                             eps=args['eps'],
                             inception=inception)
                adv = attack.attack(inputs, targets)
            if (args['attack'] == 'FGML1'):
                attack = FGM(sess,
                             model,
                             batch_size=args['batch_size'],
                             ord=1,
                             eps=args['eps'],
                             inception=inception)
                adv = attack.attack(inputs, targets)
            if (args['attack'] == 'FGML2'):
                attack = FGM(sess,
                             model,
                             batch_size=args['batch_size'],
                             ord=2,
                             eps=args['eps'],
                             inception=inception)
                adv = attack.attack(inputs, targets)

            if (args['attack'] == 'IFGSM'):
                attack = IFGM(sess,
                              model,
                              batch_size=args['batch_size'],
                              ord=np.inf,
                              eps=args['eps'],
                              inception=inception)
                adv = attack.attack(inputs, targets)
            if (args['attack'] == 'IFGML1'):
                attack = IFGM(sess,
                              model,
                              batch_size=args['batch_size'],
                              ord=1,
                              eps=args['eps'],
                              inception=inception)
                adv = attack.attack(inputs, targets)
            if (args['attack'] == 'IFGML2'):
                attack = IFGM(sess,
                              model,
                              batch_size=args['batch_size'],
                              ord=2,
                              eps=args['eps'],
                              inception=inception)
                adv = attack.attack(inputs, targets)

        timeend = time.time()

        if args['untargeted']:
            num_targets = 1
        else:
            num_targets = args['targetnum']
        print("Took", timeend - timestart, "seconds to run",
              len(inputs) / num_targets, "random instances.")

        if (args['save_np']):
            if (args['train']):
                np.save(str(args['dataset']) + '_labels_train.npy', labels)
                np.save(
                    str(args['dataset']) + '_' + str(args['attack']) +
                    '_train.npy', adv)
            else:
                np.save(
                    str(args['dataset']) + '_' + str(args['attack'] + '.npy'),
                    adv)

        r_best_ = []
        d_best_l1_ = []
        d_best_l2_ = []
        d_best_linf_ = []
        r_average_ = []
        d_average_l1_ = []
        d_average_l2_ = []
        d_average_linf_ = []
        r_worst_ = []
        d_worst_l1_ = []
        d_worst_l2_ = []
        d_worst_linf_ = []

        #Transferability Tests
        model_ = []
        model_.append(model)
        if (args['targetmodel'] != "same"):
            if (args['targetmodel'] == "dd_100"):
                model_.append(MNISTModel("models/mnist-distilled-100", sess))
        num_models = len(model_)

        if (args['show']):
            if not os.path.exists(
                    str(args['save']) + "/" + str(args['dataset']) + "/" +
                    str(args['attack'])):
                os.makedirs(
                    str(args['save']) + "/" + str(args['dataset']) + "/" +
                    str(args['attack']))
        for m, model in enumerate(model_):
            r_best = []
            d_best_l1 = []
            d_best_l2 = []
            d_best_linf = []
            r_average = []
            d_average_l1 = []
            d_average_l2 = []
            d_average_linf = []
            r_worst = []
            d_worst_l1 = []
            d_worst_l2 = []
            d_worst_linf = []
            for i in range(0, len(inputs), num_targets):
                pred = []
                for j in range(i, i + num_targets):
                    if inception:
                        pred.append(
                            np.reshape(model.model.predict(adv[j:j + 1]),
                                       (data.test_labels[0:1].shape)))
                    else:
                        pred.append(model.model.predict(adv[j:j + 1]))

                dist_l1 = 1e10
                dist_l1_index = 1e10
                dist_linf = 1e10
                dist_linf_index = 1e10
                dist_l2 = 1e10
                dist_l2_index = 1e10
                for k, j in enumerate(range(i, i + num_targets)):
                    success = False
                    if (args['untargeted']):
                        if (np.argmax(pred[k], 1) != np.argmax(
                                targets[j:j + 1], 1)):
                            success = True
                    else:
                        if (np.argmax(pred[k],
                                      1) == np.argmax(targets[j:j + 1], 1)):
                            success = True
                    if (success):
                        if (np.sum(np.abs(adv[j] - inputs[j])) < dist_l1):
                            dist_l1 = np.sum(np.abs(adv[j] - inputs[j]))
                            dist_l1_index = j
                        if (np.amax(np.abs(adv[j] - inputs[j])) < dist_linf):
                            dist_linf = np.amax(np.abs(adv[j] - inputs[j]))
                            dist_linf_index = j
                        if ((np.sum((adv[j] - inputs[j])**2)**.5) < dist_l2):
                            dist_l2 = (np.sum((adv[j] - inputs[j])**2)**.5)
                            dist_l2_index = j
                if (dist_l1_index != 1e10):
                    d_best_l2.append((np.sum(
                        (adv[dist_l2_index] - inputs[dist_l2_index])**2)**.5))
                    d_best_l1.append(
                        np.sum(
                            np.abs(adv[dist_l1_index] -
                                   inputs[dist_l1_index])))
                    d_best_linf.append(
                        np.amax(
                            np.abs(adv[dist_linf_index] -
                                   inputs[dist_linf_index])))
                    r_best.append(1)
                else:
                    r_best.append(0)

                rand_int = np.random.randint(i, i + num_targets)
                if inception:
                    pred_r = np.reshape(
                        model.model.predict(adv[rand_int:rand_int + 1]),
                        (data.test_labels[0:1].shape))
                else:
                    pred_r = model.model.predict(adv[rand_int:rand_int + 1])
                success_average = False
                if (args['untargeted']):
                    if (np.argmax(pred_r, 1) != np.argmax(
                            targets[rand_int:rand_int + 1], 1)):
                        success_average = True
                else:
                    if (np.argmax(pred_r, 1) == np.argmax(
                            targets[rand_int:rand_int + 1], 1)):
                        success_average = True
                if success_average:
                    r_average.append(1)
                    d_average_l2.append(
                        np.sum((adv[rand_int] - inputs[rand_int])**2)**.5)
                    d_average_l1.append(
                        np.sum(np.abs(adv[rand_int] - inputs[rand_int])))
                    d_average_linf.append(
                        np.amax(np.abs(adv[rand_int] - inputs[rand_int])))

                else:
                    r_average.append(0)

                dist_l1 = 0
                dist_l1_index = 1e10
                dist_linf = 0
                dist_linf_index = 1e10
                dist_l2 = 0
                dist_l2_index = 1e10
                for k, j in enumerate(range(i, i + num_targets)):
                    failure = True
                    if (args['untargeted']):
                        if (np.argmax(pred[k], 1) != np.argmax(
                                targets[j:j + 1], 1)):
                            failure = False
                    else:
                        if (np.argmax(pred[k],
                                      1) == np.argmax(targets[j:j + 1], 1)):
                            failure = False
                    if failure:
                        r_worst.append(0)
                        dist_l1_index = 1e10
                        dist_l2_index = 1e10
                        dist_linf_index = 1e10
                        break
                    else:
                        if (np.sum(np.abs(adv[j] - inputs[j])) > dist_l1):
                            dist_l1 = np.sum(np.abs(adv[j] - inputs[j]))
                            dist_l1_index = j
                        if (np.amax(np.abs(adv[j] - inputs[j])) > dist_linf):
                            dist_linf = np.amax(np.abs(adv[j] - inputs[j]))
                            dist_linf_index = j
                        if ((np.sum((adv[j] - inputs[j])**2)**.5) > dist_l2):
                            dist_l2 = (np.sum((adv[j] - inputs[j])**2)**.5)
                            dist_l2_index = j
                if (dist_l1_index != 1e10):
                    d_worst_l2.append((np.sum(
                        (adv[dist_l2_index] - inputs[dist_l2_index])**2)**.5))
                    d_worst_l1.append(
                        np.sum(
                            np.abs(adv[dist_l1_index] -
                                   inputs[dist_l1_index])))
                    d_worst_linf.append(
                        np.amax(
                            np.abs(adv[dist_linf_index] -
                                   inputs[dist_linf_index])))
                    r_worst.append(1)

                if (args['show'] and m == (num_models - 1)):
                    for j in range(i, i + num_targets):
                        target_id = np.argmax(targets[j:j + 1], 1)
                        label_id = np.argmax(labels[j:j + 1], 1)
                        prev_id = np.argmax(
                            np.reshape(model.model.predict(inputs[j:j + 1]),
                                       (data.test_labels[0:1].shape)), 1)
                        adv_id = np.argmax(
                            np.reshape(model.model.predict(adv[j:j + 1]),
                                       (data.test_labels[0:1].shape)), 1)
                        suffix = "id{}_seq{}_lbl{}_prev{}_adv{}_{}_l1_{:.3f}_l2_{:.3f}_linf_{:.3f}".format(
                            true_ids[i], target_id, label_id, prev_id, adv_id,
                            adv_id == target_id,
                            np.sum(np.abs(adv[j] - inputs[j])),
                            np.sum((adv[j] - inputs[j])**2)**.5,
                            np.amax(np.abs(adv[j] - inputs[j])))

                        show(
                            inputs[j:j + 1],
                            str(args['save']) + "/" + str(args['dataset']) +
                            "/" + str(args['attack']) +
                            "/original_{}.png".format(suffix))
                        show(
                            adv[j:j + 1],
                            str(args['save']) + "/" + str(args['dataset']) +
                            "/" + str(args['attack']) +
                            "/adversarial_{}.png".format(suffix))
            if (m != (num_models - 1)):
                lbl = "Src_"
                if (num_models > 2):
                    lbl += str(m) + "_"
            else:
                lbl = "Tgt_"
            if (num_targets > 1):
                print(lbl + 'best_case_L1_mean', np.mean(d_best_l1))
                print(lbl + 'best_case_L2_mean', np.mean(d_best_l2))
                print(lbl + 'best_case_Linf_mean', np.mean(d_best_linf))
                print(lbl + 'best_case_prob', np.mean(r_best))
                print(lbl + 'average_case_L1_mean', np.mean(d_average_l1))
                print(lbl + 'average_case_L2_mean', np.mean(d_average_l2))
                print(lbl + 'average_case_Linf_mean', np.mean(d_average_linf))
                print(lbl + 'average_case_prob', np.mean(r_average))
                print(lbl + 'worst_case_L1_mean', np.mean(d_worst_l1))
                print(lbl + 'worst_case_L2_mean', np.mean(d_worst_l2))
                print(lbl + 'worst_case_Linf_mean', np.mean(d_worst_linf))
                print(lbl + 'worst_case_prob', np.mean(r_worst))
            else:
                print(lbl + 'L1_mean', np.mean(d_average_l1))
                print(lbl + 'L2_mean', np.mean(d_average_l2))
                print(lbl + 'Linf_mean', np.mean(d_average_linf))
                print(lbl + 'success_prob', np.mean(r_average))
示例#27
0
## Copyright (C) 2016, Nicholas Carlini <*****@*****.**>.
##
## This program is licenced under the BSD 2-Clause licence,
## contained in the LICENCE file in this directory.

from setup_cifar import CIFAR, CIFARModel
from setup_mnist import MNIST, MNISTModel
from setup_inception import ImageNet, InceptionModel

import tensorflow as tf
import numpy as np

BATCH_SIZE = 1

with tf.Session() as sess:
    data, model = MNIST(), MNISTModel("models/mnist", sess)
    data, model = CIFAR(), CIFARModel("models/cifar", sess)
    data, model = ImageNet(), InceptionModel(sess)

    x = tf.placeholder(
        tf.float32,
        (None, model.image_size, model.image_size, model.num_channels))
    y = model.predict(x)

    r = []
    for i in range(0, len(data.test_data), BATCH_SIZE):
        pred = sess.run(y, {x: data.test_data[i:i + BATCH_SIZE]})
        #print(pred)
        #print('real',data.test_labels[i],'pred',np.argmax(pred))
        r.append(
            np.argmax(pred, 1) == np.argmax(data.test_labels[i:i +
示例#28
0
def main(args):
    temp_encoder = encoder(level=args['level'])
    with tf.Session() as sess:
        use_log = not args['use_zvalue']
        is_inception = args['dataset'] == "imagenet"
        # load network
        print('Loading model', args['dataset'])
        if args['dataset'] == "mnist":
            data, model = MNIST(), MNISTModel("models/mnist", sess, use_log)
            # data, model =  MNIST(), MNISTModel("models/mnist-distilled-100", sess, use_log)
        elif args['dataset'] == "cifar10":
            #data, model = CIFAR(), CIFARModel("models/cifar", sess, use_log)
            # data, model = CIFAR(), CIFARModel("models/cifar-distilled-100", sess, use_log)
            data, model = CIFAR(), CIFAR_WIDE("models/wide_resnet", sess,
                                              use_log)
        elif args['dataset'] == "imagenet":
            data, model = ImageNet(), InceptionModel(sess, use_log)
        print('Done...')
        if args['numimg'] == 0:
            args['numimg'] = len(data.test_labels) - args['firstimg']
        print('Using', args['numimg'], 'test images')
        # load attack module
        if args['attack'] == "white":
            # batch size 1, optimize on 1 image at a time, rather than optimizing images jointly
            attack = CarliniL2(sess,
                               model,
                               batch_size=1,
                               max_iterations=args['maxiter'],
                               print_every=args['print_every'],
                               early_stop_iters=args['early_stop_iters'],
                               confidence=0,
                               learning_rate=args['lr'],
                               initial_const=args['init_const'],
                               binary_search_steps=args['binary_steps'],
                               targeted=not args['untargeted'],
                               use_log=use_log,
                               adam_beta1=args['adam_beta1'],
                               adam_beta2=args['adam_beta2'])
        else:
            # batch size 128, optimize on 128 coordinates of a single image
            attack = BlackBoxL2(sess,
                                model,
                                batch_size=128,
                                max_iterations=args['maxiter'],
                                print_every=args['print_every'],
                                early_stop_iters=args['early_stop_iters'],
                                confidence=0,
                                learning_rate=args['lr'],
                                initial_const=args['init_const'],
                                binary_search_steps=args['binary_steps'],
                                targeted=not args['untargeted'],
                                use_log=use_log,
                                use_tanh=args['use_tanh'],
                                use_resize=args['use_resize'],
                                adam_beta1=args['adam_beta1'],
                                adam_beta2=args['adam_beta2'],
                                reset_adam_after_found=args['reset_adam'],
                                solver=args['solver'],
                                save_ckpts=args['save_ckpts'],
                                load_checkpoint=args['load_ckpt'],
                                start_iter=args['start_iter'],
                                init_size=args['init_size'],
                                use_importance=not args['uniform'])

        random.seed(args['seed'])
        np.random.seed(args['seed'])
        print('Generate data')
        all_inputs, all_targets, all_labels, all_true_ids, encoding_all = generate_data(
            data,
            samples=args['numimg'],
            targeted=not args['untargeted'],
            start=args['firstimg'],
            inception=is_inception)
        print('Done...')
        #print('all_inputs : ', all_inputs.shape)
        #print('encoding_all : ',encoding_all.shape)
        os.system("mkdir -p {}/{}".format(args['save'], args['dataset']))
        img_no = 0
        total_success = 0
        l2_total = 0.0
        origin_correct = 0
        adv_correct = 0
        for i in range(all_true_ids.size):
            print(' adversarial_image_no: ', i)
            inputs = all_inputs[i:i + 1]
            encoding_inputs = encoding_all[i:i + 1]
            #print('encoding_inputs shape: ', encoding_inputs)
            targets = all_targets[i:i + 1]
            labels = all_labels[i:i + 1]
            print("true labels:", np.argmax(labels), labels)
            print("target:", np.argmax(targets), targets)
            # test if the image is correctly classified
            original_predict = model.model.predict(encoding_inputs)
            original_predict = np.squeeze(original_predict)
            original_prob = np.sort(original_predict)
            original_class = np.argsort(original_predict)
            print("original probabilities:", original_prob[-1:-6:-1])
            print("original classification:", original_class[-1:-6:-1])
            print("original probabilities (most unlikely):", original_prob[:6])
            print("original classification (most unlikely):",
                  original_class[:6])
            if original_class[-1] != np.argmax(labels):
                print(
                    "skip wrongly classified image no. {}, original class {}, classified as {}"
                    .format(i, np.argmax(labels), original_class[-1]))
                continue
            origin_correct += np.argmax(labels, 1) == original_class[-1]

            img_no += 1
            timestart = time.time()
            adv, const = attack.attack_batch(inputs, targets)
            if type(const) is list:
                const = const[0]
            if len(adv.shape) == 3:
                adv = adv.reshape((1, ) + adv.shape)
            timeend = time.time()
            l2_distortion = np.sum((adv - inputs)**2)**.5

            ##### llj
            encode_adv = np.transpose(adv, axes=(0, 3, 1, 2))
            channel0, channel1, channel2 = encode_adv[:,
                                                      0, :, :], encode_adv[:,
                                                                           1, :, :], encode_adv[:,
                                                                                                2, :, :]
            channel0, channel1, channel2 = temp_encoder.tempencoding(
                channel0), temp_encoder.tempencoding(
                    channel1), temp_encoder.tempencoding(channel2)
            encode_adv = np.concatenate([channel0, channel1, channel2], axis=1)
            encode_adv = np.transpose(encode_adv, axes=(0, 2, 3, 1))

            #### llj
            adversarial_predict = model.model.predict(encode_adv)
            adversarial_predict = np.squeeze(adversarial_predict)
            adversarial_prob = np.sort(adversarial_predict)
            adversarial_class = np.argsort(adversarial_predict)
            print("adversarial probabilities:", adversarial_prob[-1:-6:-1])
            print("adversarial classification:", adversarial_class[-1:-6:-1])

            adv_correct += np.argmax(labels, 1) == adversarial_class[-1]

            success = False
            if args['untargeted']:
                if adversarial_class[-1] != original_class[-1]:
                    success = True
            else:
                if adversarial_class[-1] == np.argmax(targets):
                    success = True
            if l2_distortion > 20.0:
                success = False
            if success:
                total_success += 1
                l2_total += l2_distortion
            suffix = "id{}_seq{}_prev{}_adv{}_{}_dist{}".format(
                all_true_ids[i], i, original_class[-1], adversarial_class[-1],
                success, l2_distortion)
            print("Saving to", suffix)
            show(
                inputs,
                "{}/{}/{}_original_{}.png".format(args['save'],
                                                  args['dataset'], img_no,
                                                  suffix))
            show(
                adv,
                "{}/{}/{}_adversarial_{}.png".format(args['save'],
                                                     args['dataset'], img_no,
                                                     suffix))
            show(
                adv - inputs,
                "{}/{}/{}_diff_{}.png".format(args['save'], args['dataset'],
                                              img_no, suffix))
            print(
                "[STATS][L1] total = {}, seq = {}, id = {}, time = {:.3f}, success = {}, const = {:.6f}, prev_class = {}, new_class = {}, distortion = {:.5f}, success_rate = {:.3f}, l2_avg = {:.5f}"
                .format(img_no, i, all_true_ids[i], timeend - timestart,
                        success, const, original_class[-1],
                        adversarial_class[-1], l2_distortion,
                        total_success / float(img_no),
                        0 if total_success == 0 else l2_total / total_success))
            sys.stdout.flush()

        print(' origin accuracy : ',
              100.0 * origin_correct / all_true_ids.size)
        print(' adv accuracy : ', 100.0 * adv_correct / all_true_ids.size)
示例#29
0
def main(args):
    with tf.Session() as sess:

        random.seed(SEED)
        np.random.seed(SEED)
        tf.set_random_seed(SEED)

        class_id = args['class_id']  ### input image (natural example)
        target_id = args[
            'target_id']  ### target images id (adv example) if target attack
        arg_max_iter = args['maxiter']  ### max number of iterations
        arg_init_const = args[
            'init_const']  ### regularization prior to attack loss
        arg_kappa = args['kappa']  ### attack confidence level
        arg_q = args['q']  ### number of random direction vectors
        arg_mode = args['mode']  ### algorithm name
        arg_save_iteration = args['save_iteration']
        arg_Dataset = args["dataset"]
        arg_targeted_attack = args["targeted_attack"]
        arg_bsz = args["mini_batch_sz"]
        idx_lr = args["lr_idx"]

        ## load classofier For MNIST and CIFAR pixel value range is [-0.5,0.5]
        if (arg_Dataset == 'mnist'):
            data, model = MNIST(), MNISTModel("models/mnist", sess, True)
        elif (arg_Dataset == 'cifar10'):
            data, model = CIFAR(), CIFARModel("models/cifar", sess, True)
        elif (arg_Dataset == 'imagenet'):
            data, model = ImageNet_Universal(SEED), InceptionModel(sess, True)
            #model = InceptionModel(sess, True)
        else:
            print('Please specify a valid dataset')

        #orig_img = np.load('ori_img_backup.npy')
        orig_img = data.test_data[np.where(
            np.argmax(data.test_labels, 1) == class_id)]
        #np.save('ori_img_backup',orig_img)

        #true_label = data.test_labels[np.where(np.argmax(data.test_labels,1) == class_id)]
        _, orig_class = util.model_prediction_u(
            model, orig_img[:30]
        )  # take 30 or less images to make sure arg_bsz number of them are valid

        # filter out the images which misclassified already
        orig_img = orig_img[np.where(orig_class == class_id)]
        if orig_img.shape[0] < arg_bsz:
            assert 'no enough valid inputs'

        orig_img = orig_img[:arg_bsz]

        np.save('original_imgsID' + str(class_id), orig_img)
        #true_label = np.zeros((arg_bsz, 1001))
        #true_label[np.arange(arg_bsz), class_id] = 1
        true_label = class_id

        if arg_targeted_attack:  ### target attack
            #target_label = np.zeros((arg_bsz, 1001))
            #target_label[np.arange(arg_bsz), target_id] = 1
            target_label = target_id
        else:
            target_label = true_label

        #orig_img, target = util.generate_data(data, class_id, target_label)
        # shape of orig_img is (1,28,28,1) in [-0.5, 0.5]

        ##  parameter
        if orig_img.ndim == 3 or orig_img.shape[0] == 1:
            d = orig_img.size  # feature dim
        else:
            d = orig_img[0].size
        print("dimension = ", d)

        # mu=1/d**2  # smoothing parameter
        q = arg_q + 0
        I = arg_max_iter + 0
        kappa = arg_kappa + 0
        const = arg_init_const + 0

        ## flatten image to vec
        orig_img_vec = np.resize(orig_img, (arg_bsz, d))

        ## w adv image initialization
        if args["constraint"] == 'uncons':
            # * 0.999999 to avoid +-0.5 return +-infinity
            w_ori_img_vec = np.arctanh(
                2 * (orig_img_vec) * 0.999999
            )  # in real value, note that orig_img_vec in [-0.5, 0.5]
            w_img_vec = w_ori_img_vec.copy()
        else:
            w_ori_img_vec = orig_img_vec.copy()
            w_img_vec = w_ori_img_vec.copy()

        # ## test ##
        # for test_value in w_ori_img_vec[0, :]:
        #     if np.isnan(test_value) or np.isinf(test_value):
        #         print(test_value)

        delta_adv = np.zeros((1, d))  ### initialized adv. perturbation

        # initialize the best solution & best loss
        best_adv_img = []  # successful adv image in [-0.5, 0.5]
        best_delta = []  # best perturbation
        best_distortion = (0.5 * d)**2  # threshold for best perturbation
        total_loss = np.zeros(I)  ## I: max iters
        l2s_loss_all = np.zeros(I)
        attack_flag = False
        first_flag = True  ## record first successful attack

        # parameter setting for ZO gradient estimation
        mu = args["mu"]  ### smoothing parameter

        ## learning rate
        base_lr = args["lr"]

        if arg_mode == "ZOAdaMM":
            ## parameter initialization for AdaMM
            v_init = 1e-7  #0.00001
            v_hat = v_init * np.ones((1, d))
            v = v_init * np.ones((1, d))

            m = np.zeros((1, d))
            # momentum parameter for first and second order moment
            beta_1 = 0.9
            beta_2 = 0.3  # only used by AMSGrad
            print(beta_1, beta_2)

        #for i in tqdm(range(I)):
        for i in range(I):

            if args["decay_lr"]:
                base_lr = args["lr"] / np.sqrt(i + 1)

            ## Total loss evaluation
            if args["constraint"] == 'uncons':
                total_loss[i], l2s_loss_all[i] = function_evaluation_uncons(
                    w_img_vec, kappa, target_label, const, model, orig_img,
                    arg_targeted_attack)

            else:
                total_loss[i], l2s_loss_all[i] = function_evaluation_cons(
                    w_img_vec, kappa, target_label, const, model, orig_img,
                    arg_targeted_attack)

            ## gradient estimation w.r.t. w_img_vec
            if arg_mode == "ZOSCD":
                grad_est = grad_coord_estimation(mu, q, w_img_vec, d, kappa,
                                                 target_label, const, model,
                                                 orig_img, arg_targeted_attack,
                                                 args["constraint"])
            elif arg_mode == "ZONES":
                grad_est = gradient_estimation_NES(mu, q, w_img_vec, d, kappa,
                                                   target_label, const, model,
                                                   orig_img,
                                                   arg_targeted_attack,
                                                   args["constraint"])
            else:
                grad_est = gradient_estimation_v2(mu, q, w_img_vec, d, kappa,
                                                  target_label, const, model,
                                                  orig_img,
                                                  arg_targeted_attack,
                                                  args["constraint"])

            # if np.remainder(i,50)==0:
            # print("total loss:",total_loss[i])
            # print(np.linalg.norm(grad_est, np.inf))

            ## ZO-Attack, unconstrained optimization formulation
            if arg_mode == "ZOSGD":
                delta_adv = delta_adv - base_lr * grad_est
            if arg_mode == "ZOsignSGD":
                delta_adv = delta_adv - base_lr * np.sign(grad_est)
            if arg_mode == "ZOSCD":
                delta_adv = delta_adv - base_lr * grad_est
            if arg_mode == "ZOAdaMM":
                m = beta_1 * m + (1 - beta_1) * grad_est
                v = beta_2 * v + (1 - beta_2) * np.square(grad_est)  ### vt
                #print(np.mean(np.abs(m)),np.mean(np.sqrt(v)))
                v_hat = np.maximum(v_hat, v)
                delta_adv = delta_adv - base_lr * m / np.sqrt(v)
                if args["constraint"] == 'cons':
                    tmp = delta_adv.copy()
                    #X_temp = orig_img_vec.reshape((-1,1))
                    #V_temp2 = np.diag(np.sqrt(v_hat.reshape(-1)+1e-10))
                    V_temp = np.sqrt(v_hat.reshape(1, -1))
                    delta_adv = projection_box(tmp, orig_img_vec, V_temp, -0.5,
                                               0.5)
                    #delta_adv2 = projection_box_2(tmp, X_temp, V_temp2, -0.5, 0.5)
                # v_init = 1e-2 #0.00001
                # v = v_init * np.ones((1, d))
                # m = np.zeros((1, d))
                # # momentum parameter for first and second order moment
                # beta_1 = 0.9
                # beta_2 = 0.99  # only used by AMSGrad
                # m = beta_1 * m + (1-beta_1) * grad_est
                # v = np.maximum(beta_2 * v + (1-beta_2) * np.square(grad_est),v)
                # delta_adv = delta_adv - base_lr * m /np.sqrt(v+1e-10)
                # if args["constraint"] == 'cons':
                #     V_temp = np.diag(np.sqrt(v.reshape(-1)+1e-10))
                #     X_temp = orig_img_vec.reshape((-1,1))
                #     delta_adv = projection_box(delta_adv, X_temp, V_temp, -0.5, 0.5)
            if arg_mode == "ZOSMD":
                delta_adv = delta_adv - 0.5 * base_lr * grad_est
                # delta_adv = delta_adv - base_lr* grad_est
                if args["constraint"] == 'cons':
                    #V_temp = np.eye(orig_img_vec.size)
                    V_temp = np.ones((1, d))
                    #X_temp = orig_img_vec.reshape((-1,1))
                    delta_adv = projection_box(delta_adv, orig_img_vec, V_temp,
                                               -0.5, 0.5)
            if arg_mode == "ZOPSGD":
                delta_adv = delta_adv - base_lr * grad_est
                if args["constraint"] == 'cons':
                    #V_temp = np.eye(orig_img_vec.size)
                    V_temp = np.ones((1, d))
                    #X_temp = orig_img_vec.reshape((-1,1))
                    delta_adv = projection_box(delta_adv, orig_img_vec, V_temp,
                                               -0.5, 0.5)
            if arg_mode == "ZONES":
                delta_adv = delta_adv - base_lr * np.sign(grad_est)
                if args["constraint"] == 'cons':
                    #V_temp = np.eye(orig_img_vec.size)
                    V_temp = np.ones((1, d))
                    #X = orig_img_vec.reshape((-1,1))
                    delta_adv = projection_box(delta_adv, orig_img_vec, V_temp,
                                               -0.5, 0.5)

            # if arg_mode == "ZO-AdaFom":
            #     m = beta_1 * m + (1-beta_1) * grad_est
            #     v = v* (float(i)/(i+1)) + np.square(grad_est)/(i+1)
            #     w_img_vec = w_img_vec - base_lr * m/np.sqrt(v)
            ##

            ### adv. example update
            w_img_vec = w_ori_img_vec + delta_adv

            ## covert back to adv_img in [-0.5 , 0.5]
            if args["constraint"] == 'uncons':
                adv_img_vec = 0.5 * np.tanh((w_img_vec)) / 0.999999  #
            else:
                adv_img_vec = w_img_vec.copy()

            adv_img = np.resize(adv_img_vec, orig_img.shape)

            ## update the best solution in the iterations
            attack_prob, _, _ = util.model_prediction(model, adv_img)
            target_prob = attack_prob[:, target_label]
            attack_prob_tmp = attack_prob.copy()
            attack_prob_tmp[:, target_label] = 0
            other_prob = np.amax(attack_prob_tmp, 1)

            if i % 1000 == 0 and i != 0:
                if arg_mode == "ZOAdaMM": print(beta_1, beta_2)
                print("save delta_adv")
                np.save(
                    'retimgs/' + str(i) + 'itrs' +
                    str(np.argmax(attack_prob, 1)) + arg_mode +
                    str(args["lr"]), delta_adv)

            if args["print_iteration"]:
                if np.remainder(i + 1, 20) == 0:
                    if (true_label != np.argmax(attack_prob, 1)).all():
                        print(
                            "Iter %d (Succ): ID = %d, lr = %3.7f, decay = %d, ZO = %s %s, loss = %3.5f, l2sdist = %3.5f, TL = %d, PL = %s"
                            % (i + 1, class_id, args["lr"],
                               int(args["decay_lr"]), arg_mode,
                               args["constraint"], total_loss[i],
                               l2s_loss_all[i], true_label,
                               np.argmax(attack_prob, 1)))
                    else:
                        sr = np.sum(
                            true_label != np.argmax(attack_prob, 1)) / arg_bsz
                        print(
                            "Iter %d (Fail): ID = %d, lr = %3.7f, decay = %d, ZO = %s %s, loss = %3.5f, l2sdist = %3.5f, TL = %d, PL = %s, succ rate = %.2f"
                            % (i + 1, class_id, args["lr"],
                               int(args["decay_lr"]), arg_mode,
                               args["constraint"], total_loss[i],
                               l2s_loss_all[i], true_label,
                               np.argmax(attack_prob, 1), sr))

            if arg_save_iteration:
                os.system("mkdir Examples")
                if (np.logical_or(
                        true_label != np.argmax(attack_prob, 1),
                        np.remainder(i + 1, 10) == 0)):  ## every 10 iterations
                    suffix = "id_{}_Mode_{}_True_{}_Pred_{}_Ite_{}".format(
                        class_id, arg_mode, true_label,
                        np.argmax(attack_prob, 1), i + 1)
                    # util.save_img(adv_img, "Examples/{}.png".format(suffix))

            if arg_targeted_attack:
                if ((np.log(target_prob + 1e-10) - np.log(other_prob + 1e-10))
                        >= kappa).all():  # check attack confidence
                    if (distortion(adv_img, orig_img) <
                            best_distortion):  # check distortion
                        # print('best distortion obtained at',i,'-th iteration')
                        best_adv_img = adv_img
                        best_distortion = distortion(adv_img, orig_img)
                        #best_delta = adv_img - orig_img
                        best_iteration = i + 1
                        adv_class = np.argmax(attack_prob, 1)
                        attack_flag = True
                        ## Record first attack
                        if (first_flag):
                            first_flag = False  ### once gets into this, it will no longer record the next sucessful attack
                            first_adv_img = adv_img
                            first_distortion = distortion(adv_img, orig_img)
                            #first_delta = adv_img - orig_img
                            first_class = adv_class
                            first_iteration = i + 1
            else:
                if ((np.log(other_prob + 1e-10) - np.log(target_prob + 1e-10))
                        >= kappa).all():  # check attack confidence
                    if (distortion(adv_img, orig_img) <
                            best_distortion):  # check distortion
                        # print('best distortion obtained at',i,'-th iteration')
                        best_adv_img = adv_img
                        best_distortion = distortion(adv_img, orig_img)
                        #best_delta = adv_img - orig_img
                        best_iteration = i + 1
                        adv_class = np.argmax(attack_prob, 1)
                        attack_flag = True
                        ## Record first attack
                        if (first_flag):
                            first_flag = False
                            first_adv_img = adv_img
                            first_distortion = distortion(adv_img, orig_img)
                            #first_delta = adv_img - orig_img
                            first_class = adv_class
                            first_iteration = i + 1

        if (attack_flag):
            # os.system("mkdir Results_SL")
            # ## best attack (final attack)
            # suffix = "id_{}_Mode_{}_True_{}_Pred_{}".format(class_id, arg_mode, true_label, orig_class) ## orig_class, predicted label
            # suffix2 = "id_{}_Mode_{}_True_{}_Pred_{}".format(class_id, arg_mode, true_label, adv_class)
            # suffix3 = "id_{}_Mode_{}".format(class_id, arg_mode)
            # ### save original image
            # util.save_img(orig_img, "Results_SL/id_{}.png".format(class_id))
            # util.save_img(orig_img, "Results_SL/{}_Orig.png".format(suffix))
            # ### adv. image
            # util.save_img(best_adv_img, "Results_SL/{}_Adv_best.png".format(suffix2))
            # ### adv. perturbation
            # util.save_img(best_delta, "Results_SL/{}_Delta_best.png".format(suffix3))
            #
            #
            # ## first attack
            # suffix4 = "id_{}_Mode_{}_True_{}_Pred_{}".format(class_id, arg_mode, true_label, first_class)
            # ## first adv. imag
            # util.save_img(first_adv_img, "Results_SL/{}_Adv_first.png".format(suffix4))
            # ### first adv. perturbation
            # util.save_img(first_delta, "Results_SL/{}_Delta_first.png".format(suffix3))

            ## save data
            suffix0 = "id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_ini_{}".format(
                class_id, arg_mode, args["constraint"], str(args["lr"]),
                int(args["decay_lr"]), args["exp_code"], args["init_const"])
            np.savez("{}".format(suffix0),
                     id=class_id,
                     mode=arg_mode,
                     loss=total_loss,
                     perturbation=l2s_loss_all,
                     best_distortion=best_distortion,
                     first_distortion=first_distortion,
                     first_iteration=first_iteration,
                     best_iteation=best_iteration,
                     learn_rate=args["lr"],
                     decay_lr=args["decay_lr"],
                     attack_flag=attack_flag)
            ## print
            print("It takes {} iteations to find the first attack".format(
                first_iteration))
            # print(total_loss)
        else:
            ## save data
            suffix0 = "id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_ini_{}".format(
                class_id, arg_mode, args["constraint"], str(args["lr"]),
                int(args["decay_lr"]), args["exp_code"], args["init_const"])
            np.savez("{}".format(suffix0),
                     id=class_id,
                     mode=arg_mode,
                     loss=total_loss,
                     perturbation=l2s_loss_all,
                     best_distortion=best_distortion,
                     learn_rate=args["lr"],
                     decay_lr=args["decay_lr"],
                     attack_flag=attack_flag)
            print("Attack Fails")

        sys.stdout.flush()
示例#30
0
def run(args, restrict=True):
    if restrict:
        # Restrict the visible GPUs to the one for this subprocess
        id = np.int(multiprocessing.current_process().name.split("-")[1])
        os.environ["CUDA_VISIBLE_DEVICES"] = str(id - 1)

    # Load Parameters
    dataset = args[0]
    epsilon = float(args[1])
    mode = args[2]
    K = int(args[3])

    fname = dataset + "/" + str(epsilon) + "_" + mode + "_" + str(K)

    # Configure Keras/Tensorflow
    Keras.clear_session()

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    set_session(tf.Session(config=config))

    sess = Keras.get_session()
    Keras.set_learning_phase(False)

    # Fix Random Seeds
    np.random.seed(1)
    tf.set_random_seed(
        1
    )  #Having this before keras.clear_session() causes it it hang for some reason

    # Load Model/Data and setup SPSA placeholders
    N = 50
    if dataset == "MNIST":
        # Base Model
        base_model = MNISTModel("../1-Models/MNIST")
        data = MNIST()
        x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
        # SPSA
        shape_spsa = (1, 28, 28, 1)
        x_spsa = tf.placeholder(tf.float32, shape=shape_spsa)
    elif dataset == "CIFAR":
        # Base Model
        base_model = CIFARModel("../1-Models/CIFAR")
        data = CIFAR()
        x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3))
        # SPSA
        shape_spsa = (1, 32, 32, 3)
        x_spsa = tf.placeholder(tf.float32, shape=shape_spsa)
    y_spsa = tf.placeholder(tf.int32)

    # Load the hidden representations of the real and adversarial examples from the training set
    x_train_real = np.squeeze(
        np.load("../3-Representation/" + dataset + "/train_" + mode + ".npy"))
    x_train_adv = np.squeeze(
        np.load("../3-Representation/" + dataset + "/train_adv_" + mode +
                ".npy"))

    n_train = x_train_real.shape[0]
    n_train_adv = x_train_adv.shape[0]
    x_train = np.float32(np.vstack((x_train_real, x_train_adv)))
    #print("Bounds ", np.max(np.abs(x_train)))
    y_train = np.float32(
        np.hstack((-1.0 * np.ones(n_train), np.ones(n_train_adv))))

    # Create the defended model
    model_defended = DefendedModel(base_model, x_train, y_train, K)
    defended_logits = model_defended.get_logits(x)

    # Configure the attack
    attack = SPSA(model_defended, back="tf", sess=sess)
    with tf.name_scope("Attack") as scope:
        gen = attack.generate(x_spsa,
                              y=y_spsa,
                              epsilon=epsilon,
                              is_targeted=False,
                              num_steps=100,
                              batch_size=2048,
                              early_stop_loss_threshold=-5.0)

    # Run the attack
    f = open(fname + ".txt", "w")

    sample = np.random.choice(data.test_data.shape[0], N, replace=False)
    x_sample = data.test_data[sample]
    y_sample = np.argmax(data.test_labels[sample], axis=1)

    logits_nat = sess.run(defended_logits, {x: x_sample})
    f.write("Accuracy on Natural Images: " +
            str(np.mean(np.argmax(logits_nat, axis=1) == y_sample)) + "\n")

    pred_adv = -1.0 * np.ones((N))
    for i in range(N):
        x_real = x_sample[i].reshape(shape_spsa)
        x_adv = sess.run(gen, {x_spsa: x_real, y_spsa: y_sample[i]})
        pred_adv[i] = np.argmax(sess.run(defended_logits, {x: x_adv}))

    f.write("Accuracy on Adversarial Images: " +
            str(np.mean(pred_adv == y_sample)))
    f.close()