def mr(datasets, model, samples_path): """ :param datasets :param model :param samples_path :return: """ tf.reset_default_graph() X_train, Y_train, X_test, Y_test = get_data(datasets) input_shape, nb_classes = get_shape(datasets) sess, preds, x, y, model, feed_dict = model_load(datasets, model) preds_test = np.asarray([]) n_batches = int(np.ceil(1.0 * X_test.shape[0] / 256)) for i in range(n_batches): start = i * 256 end = np.minimum(len(X_test), (i + 1) * 256) preds_test = np.concatenate( (preds_test, model_argmax(sess, x, preds, X_test[start:end], feed=feed_dict))) inds_correct = np.asarray(np.where(preds_test != Y_test.argmax(axis=1))[0]) X_test = X_test[inds_correct] [image_list, image_files, real_labels, predicted_labels] = get_data_file(samples_path) for a in ua: if a in samples_path: result = len(image_files) / len(X_test) print('misclassification ratio is %.4f' % (result)) return result for a in ta: if a in samples_path: result = len(image_files) / (len(X_test) * (nb_classes - 1)) print('misclassification ratio is %.4f' % (result)) return result
def model_prediction(sess, x, predictions, samples, feed=None, batch_size=128, datasets='mnist'): input_shape, nb_classes = get_shape(datasets) nb_batches = int(math.ceil(float(samples.shape[0]) / batch_size)) pros_all = np.zeros(shape=(samples.shape[0], nb_classes), dtype='float32') for batch in range(nb_batches): start = batch * batch_size end = (batch + 1) * batch_size if end > samples.shape[0]: end = samples.shape[0] feed_dict = {x: samples[start:end]} #if feed is not None: # feed_dict.update(feed) pros = sess.run(predictions, feed_dict) for i in range(start, end): pros_all[i] = pros[i - start] return pros_all
def __init__(self, restore = None, session=None, use_softmax=False, use_brelu = False, activation = "relu", de=False, attack='fgsm', epoch=49): def bounded_relu(x): return K.relu(x, max_value=1) if use_brelu: activation = bounded_relu print("inside MNISTModel: activation = {}".format(activation)) self.num_channels = 1 self.image_size = 28 self.num_labels = 10 x=tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) y = tf.placeholder(tf.float32, shape=(None, 10)) input_shape, nb_classes = get_shape('mnist') model=model_dict[restore](input_shape, nb_classes, False) preds=model(x) if de==True: model_path = '../../de_models/'+attack+'/' + 'mnist_'+restore+'/'+str(epoch)+'/'+restore+'.model' else: model_path = '/mnt/dyz/models/'+'mnist_'+restore+'/'+str(epoch)+'/'+restore+'.model' #model_path='../../models/'+mnist_'+restore+'.model' saver = tf.train.Saver() saver.restore(session, model_path) print("load model successfully") ''' layer_outputs = [] for layer in model.layers: print(layer) if isinstance(layer, Conv2D) or isinstance(layer, Dense): layer_outputs.append(K.function([model.layers[0].input], [layer.output])) ''' self.model = model
def mr(datasets, model_name, attack, va, epoch=49): """ :param datasets :param sample: inputs to attack :param target: the class want to generate :param nb_classes: number of output classes :return: """ tf.reset_default_graph() X_train, Y_train, X_test, Y_test = get_data(datasets) input_shape, nb_classes = get_shape(datasets) sample = X_test sess, preds, x, y, model, feed_dict = model_load(datasets, model_name, epoch=epoch) probabilities = model_prediction(sess, x, preds, sample, feed=feed_dict, datasets=datasets) if sample.shape[0] == 1: current_class = np.argmax(probabilities) else: current_class = np.argmax(probabilities, axis=1) # only for correct: acc_pre_index = [] for i in range(0, sample.shape[0]): if current_class[i] == np.argmax(Y_test[i]): acc_pre_index.append(i) print(len(acc_pre_index)) sess.close() total = 0 if attack == 'fgsm': samples_path = '../adv_result/' + datasets + '/' + attack + '/' + model_name + '/' + str( va) [image_list, image_files, real_labels, predicted_labels] = get_data_file(samples_path) num = len(image_list) return num / len(acc_pre_index) else: total = 0 for tar in range(0, nb_classes): samples_path = '../adv_result/' + datasets + '/' + attack + '/' + model_name + '/' + str( va) + '_' + str(tar) [image_list, image_files, real_labels, predicted_labels] = get_data_file(samples_path) total += len(image_list) return total / len(acc_pre_index)
def sub_model_load(sess, datasets, submodel_name, target_model, epoch='9'): # This is only useful for blackbox input_shape, nb_classes = get_shape(datasets) x = tf.placeholder(tf.float32, shape=input_shape) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) model_sub = model_dict[submodel_name](input_shape, nb_classes, False) preds_sub = model_sub(x) print("Defined TensorFlow model graph for the substitute.") # Train the substitute and augment dataset alternatively model_path = path.model_path + datasets + "_" + submodel_name + "_" + target_model + '/' + epoch + "/substitute.model" saver = tf.train.Saver() saver.restore(sess, model_path) return model_sub, preds_sub
def model_load(datasets, model_name, de=False, epoch=9, attack='fgsm', mu=False, mu_var='gf'): config = tf.ConfigProto() #config.gpu_options.per_process_gpu_memory_fraction = 0.7 config.gpu_options.allow_growth = True # Create TF session and set as Keras backend session sess = tf.Session(config=config) print("Created TensorFlow session.") set_log_level(logging.DEBUG) input_shape, nb_classes = get_shape(datasets) x = tf.placeholder(tf.float32, shape=input_shape) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) feed_dict = None model = model_dict[model_name](input_shape, nb_classes, False) preds = model(x) print("Defined TensorFlow model graph.") if mu == False: if True == de: model_path = path.de_model_path + attack + '/' + datasets + "_" + model_name + '/' + str( epoch) + '/' + model_name + '.model' else: model_path = path.model_path + datasets + '_' + model_name + '/' + str( epoch) + '/' + model_name + '.model' else: model_path = path.mu_model_path + mu_var + '/' + datasets + '_' + model_name + '/0/' + datasets + "_" + model_name + '.model' saver = tf.train.Saver() saver.restore(sess, model_path) return sess, preds, x, y, model, feed_dict
def ns(datasets, model_name, ration=0.1, threshold=0.9, batch_size=256, epoch=9): tf.reset_default_graph() X_train, Y_train, X_test, Y_test = get_data(datasets) input_shape, nb_classes = get_shape(datasets) sess, preds, x, y, model, feed_dict = model_load(datasets, model_name, epoch=epoch) eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params, feed=feed_dict) print('Test accuracy on legitimate test examples for original model: {0}'. format(accuracy)) for i in range(len(model.layers)): layer = model.layers[i] if "Conv2D" in layer.__class__.__name__: unique_neurons_layer = layer.output_channels shuffle_num = unique_neurons_layer * ration if shuffle_num > 1.0: shuffle_num = math.floor( shuffle_num) if shuffle_num > 2.0 else math.ceil( shuffle_num) mutated_neurons = np.random.choice(unique_neurons_layer, int(shuffle_num), replace=False) current_weights = sess.run(layer.kernels).transpose( [3, 0, 1, 2]) current_bias = sess.run(layer.b) shuffle_neurons = copy.copy(mutated_neurons) np.random.shuffle(shuffle_neurons) current_weights[mutated_neurons] = current_weights[ shuffle_neurons] current_bias[mutated_neurons] = current_bias[shuffle_neurons] update_weights = tf.assign( layer.kernels, current_weights.transpose([1, 2, 3, 0])) update_bias = tf.assign(layer.b, current_bias) sess.run(update_weights) sess.run(update_bias) if "BN" in model.layers[i + 1].__class__.__name__: layer = model.layers[i + 1] current_gamma = sess.run(layer.gamma) current_beta = sess.run(layer.beta) current_moving_mean = sess.run(layer.moving_mean) current_moving_variance = sess.run(layer.moving_variance) current_gamma[mutated_neurons] = current_gamma[ shuffle_neurons] current_beta[mutated_neurons] = current_beta[ shuffle_neurons] current_moving_mean[mutated_neurons] = current_moving_mean[ shuffle_neurons] current_moving_variance[ mutated_neurons] = current_moving_variance[ shuffle_neurons] update_gamma = tf.assign(layer.gamma, current_gamma) update_beta = tf.assign(layer.beta, current_beta) update_moving_mean = tf.assign(layer.moving_mean, current_moving_mean) update_moving_variance = tf.assign( layer.moving_variance, current_moving_variance) sess.run(update_gamma) sess.run(update_beta) sess.run(update_moving_mean) sess.run(update_moving_variance) elif "Linear" in layer.__class__.__name__: unique_neurons_layer = layer.num_hid shuffle_num = unique_neurons_layer * ration if shuffle_num > 1.0: shuffle_num = math.floor( shuffle_num) if shuffle_num > 2.0 else math.ceil( shuffle_num) mutated_neurons = np.random.choice(unique_neurons_layer, int(shuffle_num), replace=False) current_weights = sess.run(layer.W).transpose([1, 0]) current_bias = sess.run(layer.b) shuffle_neurons = copy.copy(mutated_neurons) np.random.shuffle(shuffle_neurons) current_weights[mutated_neurons] = current_weights[ shuffle_neurons] current_bias[mutated_neurons] = current_bias[shuffle_neurons] update_weights = tf.assign(layer.W, current_weights.transpose([1, 0])) update_bias = tf.assign(layer.b, current_bias) sess.run(update_weights) sess.run(update_bias) mutated_accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params, feed=feed_dict) print('Test accuracy on legitimate test examples for mutated model: {0}'. format(mutated_accuracy)) if mutated_accuracy >= threshold * accuracy: train_dir = os.path.join(path.mu_model_path, 'ns', datasets + '_' + model_name, '0') if not os.path.exists(train_dir): os.makedirs(train_dir) save_path = os.path.join(train_dir, datasets + '_' + model_name + '.model') saver = tf.train.Saver() saver.save(sess, save_path) sess.close()
def jsma(datasets, sample_path, model_name, target, store_path='../mt_result/integration/jsma/mnist'): """ the Jacobian-based saliency map approach (JSMA) :param datasets :param sample: inputs to attack :param target: the class want to generate :param nb_classes: number of output classes :return: """ sess, preds, x, y, model, feed_dict = model_load(datasets, model_name) ########################################################################### # Craft adversarial examples using the Jacobian-based saliency map approach ########################################################################### if 'mnist' == datasets: sample = np.asarray( [np.asarray(imread(sample_path)).reshape(28, 28, 1)]).astype('float32') sample = preprocess_image_1(sample) elif 'cifar10' == datasets: sample = np.asarray( [np.asarray(imread(sample_path)).reshape(32, 32, 3)]).astype('float32') sample = preprocess_image_1(sample) elif 'svhn' == datasets: sample = np.asarray( [np.asarray(imread(sample_path)).reshape(32, 32, 3)]).astype('float32') sample = preprocess_image_1(sample) input_shape, nb_classes = get_shape(datasets) current_class = model_argmax(sess, x, preds, sample, feed=feed_dict) if not os.path.exists(store_path): os.makedirs(store_path) if target == current_class: return 'The target is equal to its original class' elif target >= nb_classes or target < 0: return 'The target is out of range' print('Start generating adv. example for target class %i' % target) # Instantiate a SaliencyMapMethod attack object jsma = SaliencyMapMethod(model, back='tf', sess=sess) jsma_params = { 'theta': 1., 'gamma': 0.1, 'clip_min': 0., 'clip_max': 1., 'y_target': None } # This call runs the Jacobian-based saliency map approach one_hot_target = np.zeros((1, nb_classes), dtype=np.float32) one_hot_target[0, target] = 1 jsma_params['y_target'] = one_hot_target adv_x = jsma.generate_np(sample, **jsma_params) # Check if success was achieved new_class_label = model_argmax( sess, x, preds, adv_x, feed=feed_dict) # Predicted class of the generated adversary res = int(new_class_label == target) # Close TF session sess.close() if res == 1: adv_img_deprocessed = deprocess_image_1(adv_x) i = sample_path.split('/')[-1].split('.')[-2] path = store_path + '/adv_' + str( time.time() * 1000) + '_' + i + '_' + str( current_class) + '_' + str(new_class_label) + '_.png' imsave(path, adv_img_deprocessed) print('$$$adv_img{' + path + '}') print('$$$ori_img{' + sample_path + '}')
from nmutant_util.utils_imgproc import deprocess_image_1, preprocess_image_1, deprocess_image_1 datasets = 'cifar10' num = 10000 train_start = 0 train_end = 50000 test_start = 0 test_end = 10000 preprocess_image = preprocess_image_1 X_train, Y_train, X_test, Y_test = data_cifar10(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end, preprocess=preprocess_image) input_shape, nb_classes = get_shape(datasets) sample = X_test[0:num] models3 = ['lenet1', 'lenet4', 'lenet5'] models2 = ['vgg11', 'vgg13', 'vgg16', 'vgg19'] models1 = [ 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152', 'googlenet12', 'googlenet16', 'googlenet22' ] models = [ 'vgg11', 'vgg13', 'vgg16', 'vgg19', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152', 'googlenet12', 'googlenet16', 'googlenet22' ] #models=['lenet1', 'lenet4'] step_sizes = ['0.01', '0.02', '0.03'] '''
def nai(datasets, model_name, ration=0.1, threshold=0.9, batch_size=256, epoch=9): tf.reset_default_graph() X_train, Y_train, X_test, Y_test = get_data(datasets) input_shape, nb_classes = get_shape(datasets) sess, preds, x, y, model, feed_dict = model_load(datasets, model_name, epoch=epoch) eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params, feed=feed_dict) print('Test accuracy on legitimate test examples for original model: {0}'. format(accuracy)) unique_neurons = 0 for layer in model.layers: if "Conv2D" in layer.__class__.__name__: unique_neurons += layer.output_channels elif "Linear" in layer.__class__.__name__: unique_neurons += layer.num_hid #as for BN, it changes when Conv2D changes, so would make sure to invert the activation indices = np.random.choice(unique_neurons, int(unique_neurons * ration), replace=False) neurons_count = 0 for i in range(len(model.layers)): layer = model.layers[i] if "Conv2D" in layer.__class__.__name__: unique_neurons_layer = layer.output_channels mutated_neurons = set(indices) & set( np.arange(neurons_count, neurons_count + unique_neurons_layer)) if mutated_neurons: mutated_neurons = np.array( list(mutated_neurons)) - neurons_count kernel_shape = layer.kernel_shape mutated_metrix = np.asarray([1.0] * unique_neurons_layer) mutated_metrix[mutated_neurons] = -1.0 mutated_kernel = np.asarray( [[[list(mutated_metrix)]] * kernel_shape[1]] * kernel_shape[0]) update_kernel = tf.assign( layer.kernels, mutated_kernel * sess.run(layer.kernels)) update_bias = tf.assign(layer.b, mutated_metrix * sess.run(layer.b)) sess.run(update_kernel) sess.run(update_bias) if "BN" in model.layers[i + 1].__class__.__name__: layer = model.layers[i + 1] update_beta = tf.assign( layer.beta, mutated_metrix * sess.run(layer.beta)) update_moving_mean = tf.assign( layer.moving_mean, mutated_metrix * sess.run(layer.moving_mean)) sess.run(update_beta) sess.run(update_moving_mean) neurons_count += unique_neurons_layer elif "Linear" in layer.__class__.__name__: unique_neurons_layer = layer.num_hid mutated_neurons = set(indices) & set( np.arange(neurons_count, neurons_count + unique_neurons_layer)) if mutated_neurons: mutated_neurons = np.array( list(mutated_neurons)) - neurons_count input_shape = layer.input_shape[1] mutated_metrix = np.asarray([1.0] * unique_neurons_layer) mutated_metrix[mutated_neurons] = -1.0 mutated_weight = np.asarray([list(mutated_metrix)] * input_shape) weight = sess.run(layer.W) update_weight = tf.assign(layer.W, mutated_weight * weight) update_bias = tf.assign(layer.b, mutated_metrix * sess.run(layer.b)) sess.run(update_weight) sess.run(update_bias) neurons_count += unique_neurons_layer mutated_accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params, feed=feed_dict) print('Test accuracy on legitimate test examples for mutated model: {0}'. format(mutated_accuracy)) if mutated_accuracy >= threshold * accuracy: train_dir = os.path.join(path.mu_model_path, 'nai', datasets + '_' + model_name, '0') if not os.path.exists(train_dir): os.makedirs(train_dir) save_path = os.path.join(train_dir, datasets + '_' + model_name + '.model') saver = tf.train.Saver() saver.save(sess, save_path) sess.close()
def gf(datasets, model_name, ration=0.1, threshold=0.9, batch_size=256, epoch=9): tf.reset_default_graph() X_train, Y_train, X_test, Y_test = get_data(datasets) input_shape, nb_classes = get_shape(datasets) sess, preds, x, y, model, feed_dict = model_load(datasets, model_name, epoch=epoch) eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params, feed=feed_dict) print('Test accuracy on legitimate test examples for original model: {0}'. format(accuracy)) num_weights = 0 for layer in model.layers: if "Conv2D" in layer.__class__.__name__: shape = layer.kernels.shape num_weights += int(shape[0] * shape[1] * shape[2] * shape[3]) elif "BN" in layer.__class__.__name__: shape = layer.gamma.shape num_weights += int(shape[0]) elif "Linear" in layer.__class__.__name__: shape = layer.W.shape num_weights += int(shape[0] * shape[1]) indices = np.random.choice(num_weights, int(num_weights * ration), replace=False) weights_count = 0 for i in range(len(model.layers)): layer = model.layers[i] if "Conv2D" in layer.__class__.__name__: shape = layer.kernels.shape num_weights_layer = int(shape[0] * shape[1] * shape[2] * shape[3]) mutated_indices = set(indices) & set( np.arange(weights_count, weights_count + num_weights_layer)) if mutated_indices: mutated_indices = np.array( list(mutated_indices)) - weights_count current_weights = sess.run(layer.kernels).reshape(-1) avg_weights = np.mean(current_weights) std_weights = np.std(current_weights) mutated_weights = np.random.normal(avg_weights, std_weights, mutated_indices.size) current_weights[mutated_indices] = mutated_weights update_weights = tf.assign(layer.kernels, current_weights.reshape(shape)) sess.run(update_weights) weights_count += num_weights_layer elif "BN" in layer.__class__.__name__: shape = layer.gamma.shape num_weights_layer = int(shape[0]) mutated_indices = set(indices) & set( np.arange(weights_count, weights_count + num_weights_layer)) if mutated_indices: mutated_indices = np.array( list(mutated_indices)) - weights_count current_weights = sess.run(layer.gamma).reshape(-1) avg_weights = np.mean(current_weights) std_weights = np.std(current_weights) mutated_weights = np.random.normal(avg_weights, std_weights, mutated_indices.size) current_weights[mutated_indices] = mutated_weights update_weights = tf.assign(layer.gamma, current_weights.reshape(shape)) sess.run(update_weights) weights_count += num_weights_layer elif "Linear" in layer.__class__.__name__: shape = layer.W.shape num_weights_layer = int(shape[0] * shape[1]) mutated_indices = set(indices) & set( np.arange(weights_count, weights_count + num_weights_layer)) if mutated_indices: mutated_indices = np.array( list(mutated_indices)) - weights_count current_weights = sess.run(layer.W).reshape(-1) avg_weights = np.mean(current_weights) std_weights = np.std(current_weights) mutated_weights = np.random.normal(avg_weights, std_weights, mutated_indices.size) current_weights[mutated_indices] = mutated_weights update_weights = tf.assign(layer.W, current_weights.reshape(shape)) sess.run(update_weights) weights_count += num_weights_layer mutated_accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params, feed=feed_dict) print('Test accuracy on legitimate test examples for mutated model: {0}'. format(mutated_accuracy)) if mutated_accuracy >= threshold * accuracy: train_dir = os.path.join(path.mu_model_path, 'gf', datasets + '_' + model_name, '0') if not os.path.exists(train_dir): os.makedirs(train_dir) save_path = os.path.join(train_dir, datasets + '_' + model_name + '.model') saver = tf.train.Saver() saver.save(sess, save_path) sess.close()
def model_training(datasets, model_name, samples_path=None, nb_epochs=6, batch_size=256, learning_rate=0.001, attack=None, mu=False, mu_var='gf'): # Set TF random seed to improve reproducibility tf.set_random_seed(1234) config = tf.ConfigProto() config.gpu_options.allow_growth = True #config.gpu_options.per_process_gpu_memory_fraction = 0.9 # Create TF session and set as Keras backend session sess = tf.Session(config=config) print("Created TensorFlow session.") set_log_level(logging.DEBUG) X_train, Y_train, X_test, Y_test = get_data(datasets) def y_one_hot(label): y = np.zeros(10) y[label] = 1 return y if samples_path != None: [image_list, image_files, real_labels, predicted_labels] = get_data_file(samples_path) #samples_adv = np.asarray([preprocess_image_1(image.astype('float64')) for image in image_list]) samples_adv = np.asarray(image_list) #print(samples_adv.shape) labels_adv = np.asarray( [y_one_hot(int(label)) for label in real_labels]) samples = np.concatenate((X_train, samples_adv)) #print(samples.shape) labels = np.concatenate((Y_train, labels_adv)) if mu == True: model_path = path.mude_model_path + mu_var + '/' + attack + '/' + datasets + "_" + model_name else: model_path = path.de_model_path + attack + '/' + datasets + "_" + model_name else: samples = X_train labels = Y_train model_path = path.model_path + datasets + "_" + model_name input_shape, nb_classes = get_shape(datasets) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=input_shape) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) feed_dict = None model = model_dict[model_name](input_shape, nb_classes) preds = model(x) print("Defined TensorFlow model graph.") ########################################################################### # Training the model using TensorFlow ########################################################################### train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'train_dir': model_path, 'filename': model_name + '.model' } sess.run(tf.global_variables_initializer()) rng = np.random.RandomState([2017, 8, 30]) model_train(sess, x, y, preds, samples, labels, args=train_params, rng=rng, save=True) # Evaluate the accuracy of the model on legitimate test examples eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params, feed=feed_dict) print('Test accuracy on legitimate test examples: {0}'.format(accuracy)) #accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params, feed=feed_dict) # Close TF session sess.close() print('Finish model training.')
def submodel_training(datasets, submodel_name, target_model, batch_size=256, learning_rate=0.001, data_aug=6, lmbda=0.1, nb_epochs=10, holdout=150): # Set TF random seed to improve reproducibility tf.set_random_seed(1234) config = tf.ConfigProto() #config.gpu_options.per_process_gpu_memory_fraction = 0.7 config.gpu_options.allow_growth = True # Create TF session and set as Keras backend session sess = tf.Session(config=config) print("Created TensorFlow session.") set_log_level(logging.DEBUG) model_path = path.model_path + datasets + "_" + submodel_name + "_" + target_model X_train, Y_train, X_test, Y_test = get_data(datasets) input_shape, nb_classes = get_shape(datasets) # Define input TF placeholder sess, preds, x, y, model, feed_dict = model_load(datasets, target_model) rng = np.random.RandomState([2017, 8, 30]) # Initialize substitute training set reserved for adversary X_sub = X_test[:holdout] Y_sub = np.argmax(Y_test[:holdout], axis=1) model_sub = model_dict[submodel_name](input_shape, nb_classes) preds_sub = model_sub(x) print("Defined TensorFlow model graph for the substitute.") # Define the Jacobian symbolically using TensorFlow grads = jacobian_graph(preds_sub, x, nb_classes) # Train the substitute and augment dataset alternatively for rho in xrange(data_aug): print("Substitute training epoch #" + str(rho)) train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'train_dir': model_path, 'filename': 'substitute.model' } with TemporaryLogLevel(logging.WARNING, "nmutant_util.utils.tf"): model_train(sess, x, y, preds_sub, X_sub, to_categorical(Y_sub, nb_classes), init_all=False, args=train_params, rng=rng, save=True) # If we are not at last substitute training iteration, augment dataset if rho < data_aug - 1: print("Augmenting substitute training data.") # Perform the Jacobian augmentation lmbda_coef = 2 * int(int(rho / 3) != 0) - 1 X_sub = jacobian_augmentation(sess, x, X_sub, Y_sub, grads, lmbda_coef * lmbda) print("Labeling substitute training data.") # Label the newly generated synthetic points using the black-box Y_sub = np.hstack([Y_sub, Y_sub]) X_sub_prev = X_sub[int(len(X_sub) / 2):] eval_params = {'batch_size': batch_size} bbox_val = batch_eval(sess, [x], [preds], [X_sub_prev], args=eval_params, feed=feed_dict)[0] # Note here that we take the argmax because the adversary # only has access to the label (not the probabilities) output # by the black-box model Y_sub[int(len(X_sub) / 2):] = np.argmax(bbox_val, axis=1) # Close TF session sess.close() print('Finish model training.')
def choose_mu(attack='fgsm', datasets='mnist', total_num=10000, model_name='lenet1', mu_var='gf'): tf.reset_default_graph() tf.set_random_seed(1234) config = tf.ConfigProto() #config.gpu_options.per_process_gpu_memory_fraction = 0.7 config.gpu_options.allow_growth = True sess = tf.Session(config=config) X_train, Y_train, X_test, Y_test = get_data(datasets) sess, preds, x, y, model, feed_dict = model_load(datasets, model_name, de=False, epoch=9, attack='fgsm', mu=True, mu_var=mu_var) pre = model_prediction(sess, x, preds, X_test, feed=feed_dict, datasets=datasets) acc_pre_index = [] for i in range(0, pre.shape[0]): if np.argmax(pre[i]) == np.argmax(Y_test[i]): acc_pre_index.append(i) input_shape, nb_classes = get_shape(datasets) train_path = '../adv_result/' + datasets + '/' + attack + '/' + model_name store_path_train = '../adv_result/mu_' + datasets + '/' + mu_var + '/' + attack + '/' + model_name + '/train_data' store_path_test = '../adv_result/mu_' + datasets + '/' + mu_var + '/' + attack + '/' + model_name + '/test_data' if not os.path.isdir(store_path_train): os.makedirs(store_path_train) if not os.path.isdir(store_path_test): os.makedirs(store_path_test) if datasets == 'cifar10': if attack == 'fgsm': step_size = [0.01, 0.02, 0.03] for s in range(0, len(step_size)): samples_path = train_path + '/' + str(step_size[s]) [image_list, image_files, real_labels, predicted_labels] = get_data_file(samples_path) samples_adv = np.asarray(image_list) result = model_prediction(sess, x, preds, samples_adv, feed=feed_dict, datasets=datasets) ind_file = [] for i in range(len(image_list)): ind_file.append(image_files[i].split('_')[0]) ind = [] for i in range(len(image_list)): nn = int(image_files[i].split('_')[0]) if (nn in acc_pre_index) and (predicted_labels[i] == np.argmax(result[i])): ind.append(image_files[i].split('_')[0]) for i in range(0, int(math.ceil(X_test.shape[0] / 6))): if str(i) in ind: i_index = ind_file.index(str(i)) image_files[i_index] = str( step_size[s]) + '_' + image_files[i_index] test_p = store_path_test + '/' + image_files[i_index] np.save(test_p, image_list[i_index]) for i in range(int(math.ceil(X_test.shape[0] / 6)), X_test.shape[0]): if str(i) in ind: i_index = ind_file.index(str(i)) image_files[i_index] = str( step_size[s]) + '_' + image_files[i_index] train_p = store_path_train + '/' + image_files[i_index] np.save(train_p, image_list[i_index]) if attack == 'cw': targets = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] cw_ini_cons = [0.1, 0.2, 0.3] for t in range(0, len(targets)): for c in range(0, len(cw_ini_cons)): samples_path = train_path + '/' + str( cw_ini_cons[c]) + '_' + str(targets[t]) [image_list, image_files, real_labels, predicted_labels] = get_data_file(samples_path) samples_adv = np.asarray(image_list) result = model_prediction(sess, x, preds, samples_adv, feed=feed_dict, datasets=datasets) ind_file = [] for i in range(len(image_list)): ind_file.append(image_files[i].split('_')[0]) ind = [] for i in range(len(image_list)): nn = int(image_files[i].split('_')[0]) if (nn in acc_pre_index) and (predicted_labels[i] == np.argmax(result[i])): ind.append(image_files[i].split('_')[0]) for i in range(1000 * t, 1000 * t + int(math.ceil(1000 / 6))): if str(i) in ind: i_index = ind_file.index(str(i)) image_files[i_index] = str( cw_ini_cons[c]) + '_' + image_files[i_index] test_p = store_path_test + '/' + image_files[ i_index] np.save(test_p, image_list[i_index]) for i in range(1000 * t + int(math.ceil(1000 / 6), 1000 * (t + 1))): if str(i) in ind: i_index = ind_file.index(str(i)) image_files[i_index] = str( cw_ini_cons[c]) + '_' + image_files[i_index] train_p = store_path_train + '/' + image_files[ i_index] np.save(train_p, image_list[i_index]) if attack == 'jsma': targets = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] jsma_var = [0.09, 0.1, 0.11] for t in range(0, len(targets)): for c in range(0, len(jsma_var)): samples_path = train_path + '/' + str( jsma_var[c]) + '_' + str(targets[t]) [image_list, image_files, real_labels, predicted_labels] = get_data_file(samples_path) samples_adv = np.asarray(image_list) result = model_prediction(sess, x, preds, samples_adv, feed=feed_dict, datasets=datasets) ind_file = [] for i in range(len(image_list)): ind_file.append(image_files[i].split('_')[0]) ind = [] for i in range(len(image_list)): nn = int(image_files[i].split('_')[0]) if (nn in acc_pre_index) and (predicted_labels[i] == np.argmax(result[i])): ind.append(image_files[i].split('_')[0]) for i in range(1000 * t, 1000 * t + int(math.ceil(1000 / 6))): if str(i) in ind: i_index = ind_file.index(str(i)) image_files[i_index] = str( jsma_var[c]) + '_' + image_files[i_index] test_p = store_path_test + '/' + image_files[ i_index] np.save(test_p, image_list[i_index]) for i in range(1000 * t + int(math.ceil(1000 / 6)), 1000 * (t + 1)): if str(i) in ind: i_index = ind_file.index(str(i)) image_files[i_index] = str( jsma_var[c]) + '_' + image_files[i_index] train_p = store_path_train + '/' + image_files[ i_index] np.save(train_p, image_list[i_index]) if datasets == 'mnist': if attack == 'fgsm': step_size = [0.2, 0.3, 0.4] for s in range(0, len(step_size)): samples_path = train_path + '/' + str(step_size[s]) [image_list, image_files, real_labels, predicted_labels] = get_data_file(samples_path) samples_adv = np.asarray(image_list) result = model_prediction(sess, x, preds, samples_adv, feed=feed_dict, datasets=datasets) ind_file = [] for i in range(len(image_list)): ind_file.append(image_files[i].split('_')[0]) ind = [] for i in range(len(image_list)): nn = int(image_files[i].split('_')[0]) if (nn in acc_pre_index) and (predicted_labels[i] == np.argmax(result[i])): ind.append(image_files[i].split('_')[0]) for i in range(0, int(math.ceil(X_test.shape[0] / 7))): if str(i) in ind: i_index = ind_file.index(str(i)) image_files[i_index] = str( step_size[s]) + '_' + image_files[i_index] test_p = store_path_test + '/' + image_files[i_index] np.save(test_p, image_list[i_index]) for i in range(int(math.ceil(X_test.shape[0] / 7)), X_test.shape[0]): if str(i) in ind: i_index = ind_file.index(str(i)) image_files[i_index] = str( step_size[s]) + '_' + image_files[i_index] train_p = store_path_train + '/' + image_files[i_index] np.save(train_p, image_list[i_index]) if attack == 'cw': targets = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] cw_ini_cons = [9, 10, 11] for t in range(0, len(targets)): for c in range(0, len(cw_ini_cons)): samples_path = train_path + '/' + str( cw_ini_cons[c]) + '_' + str(targets[t]) [image_list, image_files, real_labels, predicted_labels] = get_data_file(samples_path) samples_adv = np.asarray(image_list) result = model_prediction(sess, x, preds, samples_adv, feed=feed_dict, datasets=datasets) ind_file = [] for i in range(len(image_list)): ind_file.append(image_files[i].split('_')[0]) ind = [] for i in range(len(image_list)): nn = int(image_files[i].split('_')[0]) if (nn in acc_pre_index) and (predicted_labels[i] == np.argmax(result[i])): ind.append(image_files[i].split('_')[0]) for i in range(1000 * t, 1000 * t + int(math.ceil(1000 / 7))): if str(i) in ind: i_index = ind_file.index(str(i)) image_files[i_index] = str( cw_ini_cons[c]) + '_' + image_files[i_index] test_p = store_path_test + '/' + image_files[ i_index] np.save(test_p, image_list[i_index]) for i in range(1000 * t + int(math.ceil(1000 / 7)), 1000 * (t + 1)): if str(i) in ind: i_index = ind_file.index(str(i)) image_files[i_index] = str( cw_ini_cons[c]) + '_' + image_files[i_index] train_p = store_path_train + '/' + image_files[ i_index] np.save(train_p, image_list[i_index]) if attack == 'jsma': targets = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] jsma_var = [0.09, 0.1, 0.11] for t in range(0, len(targets)): for c in range(0, len(jsma_var)): samples_path = train_path + '/' + str( jsma_var[c]) + '_' + str(targets[t]) [image_list, image_files, real_labels, predicted_labels] = get_data_file(samples_path) samples_adv = np.asarray(image_list) result = model_prediction(sess, x, preds, samples_adv, feed=feed_dict, datasets=datasets) ind_file = [] for i in range(len(image_list)): ind_file.append(image_files[i].split('_')[0]) ind = [] for i in range(len(image_list)): nn = int(image_files[i].split('_')[0]) if (nn in acc_pre_index) and (predicted_labels[i] == np.argmax(result[i])): ind.append(image_files[i].split('_')[0]) for i in range(1000 * t, 1000 * t + int(math.ceil(1000 / 7))): if str(i) in ind: i_index = ind_file.index(str(i)) image_files[i_index] = str( jsma_var[c]) + '_' + image_files[i_index] test_p = store_path_test + '/' + image_files[ i_index] np.save(test_p, image_list[i_index]) for i in range(1000 * t + int(math.ceil(1000 / 7)), 1000 * (t + 1)): if str(i) in ind: i_index = ind_file.index(str(i)) image_files[i_index] = str( jsma_var[c]) + '_' + image_files[i_index] train_p = store_path_train + '/' + image_files[ i_index] np.save(train_p, image_list[i_index])
def cos(datasets, model, de_model, attack='fgsm', epoch=49, de_epoch=49): tf.reset_default_graph() """ :param datasets :param model :param samples_path :return: """ # Object used to keep track of (and return) key accuracies print("load defense model.") sess, preds, x, y, model, feed_dict = model_load(datasets, model, epoch=epoch) X_train, Y_train, X_test, Y_test = get_data(datasets) input_shape, nb_classes = get_shape(datasets) feed_dict_de = None #result_nor=sess.run(preds, feed_dict={x:X_test}) result_nor = model_prediction(sess, x, preds, X_test, feed=feed_dict, datasets=datasets) #print(result_nor) #print(model) #print(get_model_dict()) tf.reset_default_graph() sess, preds_de, x, y, model_de, feed_dict = model_load(datasets, de_model, de=True, attack=attack, epoch=de_epoch) #result_de=sess.run(preds_de, feed_dict={x:X_test}) result_de = model_prediction(sess, x, preds_de, X_test, feed=feed_dict, datasets=datasets) #print(result_de) # print('average confidence of adversarial class %.4f' %(result)) result = 0 num = 0 js = 0. for i in range(Y_test.shape[0]): if (np.argmax(Y_test[i]) == np.argmax(result_nor[i])) and (np.argmax( Y_test[i]) == np.argmax(result_de[i])): num += 1 p = result_nor[i] q = result_de[i] M = (p + q) / 2 js = js + 0.5 * scipy.stats.entropy( p, M) + 0.5 * scipy.stats.entropy(q, M) # Close TF session result = js / num print("JS divergence: ", result) sess.close() return result
def ws(datasets, model_name, ration=0.1, threshold=0.9, batch_size=256, epoch=9): tf.reset_default_graph() X_train, Y_train, X_test, Y_test = get_data(datasets) input_shape, nb_classes = get_shape(datasets) sess, preds, x, y, model, feed_dict = model_load(datasets, model_name, epoch=epoch) eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params, feed=feed_dict) print('Test accuracy on legitimate test examples for original model: {0}'. format(accuracy)) unique_neurons = 0 for layer in model.layers: if "Conv2D" in layer.__class__.__name__: unique_neurons += layer.output_channels elif "Linear" in layer.__class__.__name__: unique_neurons += layer.num_hid # every BN neuron only connected with a previous neuron indices = np.random.choice(unique_neurons, int(unique_neurons * ration), replace=False) neurons_count = 0 for i in range(len(model.layers)): layer = model.layers[i] if "Conv2D" in layer.__class__.__name__: unique_neurons_layer = layer.output_channels mutated_neurons = set(indices) & set( np.arange(neurons_count, neurons_count + unique_neurons_layer)) if mutated_neurons: mutated_neurons = np.array( list(mutated_neurons)) - neurons_count current_weights = sess.run(layer.kernels).transpose( [3, 0, 1, 2]) for neuron in mutated_neurons: old_data = current_weights[neuron].reshape(-1) shuffle_index = np.arange(len(old_data)) np.random.shuffle(shuffle_index) new_data = old_data[shuffle_index].reshape( layer.kernels.shape[0], layer.kernels.shape[1], layer.kernels.shape[2]) current_weights[neuron] = new_data update_weights = tf.assign( layer.kernels, current_weights.transpose([1, 2, 3, 0])) sess.run(update_weights) neurons_count += unique_neurons_layer elif "Linear" in layer.__class__.__name__: unique_neurons_layer = layer.num_hid mutated_neurons = set(indices) & set( np.arange(neurons_count, neurons_count + unique_neurons_layer)) if mutated_neurons: mutated_neurons = np.array( list(mutated_neurons)) - neurons_count current_weights = sess.run(layer.W).transpose([1, 0]) for neuron in mutated_neurons: old_data = current_weights[neuron] shuffle_index = np.arange(len(old_data)) np.random.shuffle(shuffle_index) new_data = old_data[shuffle_index] current_weights[neuron] = new_data update_weights = tf.assign(layer.W, current_weights.transpose([1, 0])) sess.run(update_weights) neurons_count += unique_neurons_layer mutated_accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params, feed=feed_dict) print('Test accuracy on legitimate test examples for mutated model: {0}'. format(mutated_accuracy)) if mutated_accuracy >= threshold * accuracy: train_dir = os.path.join(path.mu_model_path, 'ws', datasets + '_' + model_name, '0') if not os.path.exists(train_dir): os.makedirs(train_dir) save_path = os.path.join(train_dir, datasets + '_' + model_name + '.model') saver = tf.train.Saver() saver.save(sess, save_path) sess.close()
def cw(datasets, sample, model_name, target, store_path, ini_con=10, start=0, end=10000, batch_size=32, epoch=9, mu=False, mu_var='gf', de=False, attack='fgsm'): """ Carlini and Wagner's attack :param datasets :param sample: inputs to attack :param target: the class want to generate :param nb_classes: number of output classes :return: """ tf.reset_default_graph() X_train, Y_train, X_test, Y_test = get_data(datasets) # sess, preds, x, y, model, feed_dict = model_load(datasets, model_name, epoch=epoch) sess, preds, x, y, model, feed_dict = model_load(datasets, model_name, epoch=epoch, mu=mu, mu_var=mu_var, de=de, attack=attack) print('load successfule') ########################################################################### # Craft adversarial examples using Carlini and Wagner's approach ########################################################################### ''' if 'mnist' == datasets: sample = np.asarray([np.asarray(imread(sample_path)).reshape(28, 28, 1)]).astype('float32') sample = preprocess_image_1(sample) elif 'cifar10' == datasets: sample = np.asarray([np.asarray(imread(sample_path)).reshape(32, 32, 3)]).astype('float32') sample = preprocess_image_1(sample) elif 'svhn' == datasets: sample = np.asarray([np.asarray(imread(sample_path)).reshape(32,32,3)]).astype('float32') sample = preprocess_image_1(sample) ''' input_shape, nb_classes = get_shape(datasets) sample = sample[start:end] probabilities = model_prediction(sess, x, preds, sample, feed=feed_dict) current_class = [] for i in range(0, probabilities.shape[0]): current_class.append(np.argmax(probabilities[i])) if not os.path.exists(store_path): os.makedirs(store_path) ''' if target == current_class: return 'The target is equal to its original class' elif target >= nb_classes or target < 0: return 'The target is out of range' ''' #only for correct: Y_test = Y_test[start:end] #sample=sample[start:end] acc_pre_index = [] for i in range(0, sample.shape[0]): if current_class[i] == np.argmax(Y_test[i]): acc_pre_index.append(i) print('current_class', current_class) print('Start generating adv. example for target class %i' % target) # Instantiate a CW attack object sample_acc = np.zeros(shape=(len(acc_pre_index), input_shape[1], input_shape[2], input_shape[3]), dtype='float') current_class_acc = np.zeros(shape=(len(acc_pre_index)), dtype=int) for i in range(0, len(acc_pre_index)): sample_acc[i] = sample[acc_pre_index[i]] current_class_acc[i] = current_class[acc_pre_index[i]] print('current_class_acc', current_class_acc) cw = CarliniWagnerL2(model, back='tf', sess=sess) one_hot = np.zeros((1, nb_classes), dtype=np.float32) one_hot[0, target] = 1 adv_ys = one_hot yname = "y_target" if 'mnist' == datasets: cw_params = { 'binary_search_steps': 1, yname: adv_ys, 'max_iterations': 1000, 'learning_rate': 0.1, 'batch_size': 1, 'initial_const': ini_con } elif 'cifar10' == datasets: cw_params = { 'binary_search_steps': 1, yname: adv_ys, 'max_iterations': 1000, 'learning_rate': 0.1, 'batch_size': 1, 'initial_const': ini_con } suc = 0 nb_batches = int(math.ceil(float(sample_acc.shape[0]) / batch_size)) for batch in range(nb_batches): start_batch = batch * batch_size end_batch = (batch + 1) * batch_size if end_batch > sample_acc.shape[0]: end_batch = sample_acc.shape[0] adv_inputs = sample_acc[start_batch:end_batch] for j in range(start_batch, end_batch): if current_class_acc[j] != target: adv_input = adv_inputs[j - start_batch].reshape( 1, input_shape[1], input_shape[2], input_shape[3]) adv = cw.generate_np(adv_input, **cw_params) #print(adv.shape) #print(adv) new_class_labels = model_argmax(sess, x, preds, adv, feed=feed_dict) res = int(new_class_labels == target) if res == 1: adv = adv.reshape(adv.shape[1], adv.shape[2], adv.shape[3]) #adv_img_deprocessed = deprocess_image_1(adv) #adv_img_deprocessed=adv_img_deprocessed.reshape(adv_img_deprocessed.shape[1],adv_img_deprocessed.shape[2]) suc += 1 path = store_path + '/' + str( start + acc_pre_index[j] ) + '_' + str(time.time() * 1000) + '_' + str( current_class_acc[j]) + '_' + str(new_class_labels) #path = store_path + '/' + str(start+acc_pre_index[j]) + '_' + str(time.time()*1000) + '_' + str(current_class_acc[j]) + '_' + str(new_class_labels)+'.png' #path=store_path + '/' + str(j)+ '_'+ str(current_class_acc[j]) +'.png' #imsave(path, adv) np.save(path, adv) #print(adv.shape) sess.close() return suc, len(acc_pre_index)
def jsma(datasets, sample, model_name, target, store_path, gamma=0.1, start=0, end=10000, batch_size=32, epoch=9, mu=False, mu_var='gf', de=False, attack='fgsm'): """ the Jacobian-based saliency map approach (JSMA) :param datasets :param sample: inputs to attack :param target: the class want to generate :param nb_classes: number of output classes :return: """ tf.reset_default_graph() X_train, Y_train, X_test, Y_test = get_data(datasets) # sess, preds, x, y, model, feed_dict = model_load(datasets, model_name, epoch=epoch) sess, preds, x, y, model, feed_dict = model_load(datasets, model_name, epoch=epoch, mu=mu, mu_var=mu_var, de=de, attack=attack) ########################################################################### # Craft adversarial examples using the Jacobian-based saliency map approach ########################################################################### ''' if 'mnist' == datasets: sample = np.asarray([np.asarray(imread(sample_path)).reshape(28,28,1)]).astype('float32') sample = preprocess_image_1(sample) elif 'cifar10' == datasets: sample = np.asarray([np.asarray(imread(sample_path)).reshape(32,32,3)]).astype('float32') sample = preprocess_image_1(sample) elif 'svhn' == datasets: sample = np.asarray([np.asarray(imread(sample_path)).reshape(32,32,3)]).astype('float32') sample = preprocess_image_1(sample) ''' input_shape, nb_classes = get_shape(datasets) sample = sample[start:end] probabilities = model_prediction(sess, x, preds, sample, feed=feed_dict) current_class = [] for i in range(0, probabilities.shape[0]): current_class.append(np.argmax(probabilities[i])) if not os.path.exists(store_path): os.makedirs(store_path) ''' if target == current_class: return 'The target is equal to its original class' elif target >= nb_classes or target < 0: return 'The target is out of range' ''' #only for correct: Y_test = Y_test[start:end] acc_pre_index = [] for i in range(0, sample.shape[0]): if current_class[i] == np.argmax(Y_test[i]): acc_pre_index.append(i) print('Start generating adv. example for target class %i' % target) sample_acc = np.zeros(shape=(len(acc_pre_index), input_shape[1], input_shape[2], input_shape[3]), dtype='float') current_class_acc = np.zeros(shape=(len(acc_pre_index)), dtype=int) for i in range(0, len(acc_pre_index)): sample_acc[i] = sample[acc_pre_index[i]] current_class_acc[i] = current_class[acc_pre_index[i]] #print('current_class_acc',current_class_acc) # Instantiate a SaliencyMapMethod attack object jsma = SaliencyMapMethod(model, back='tf', sess=sess) jsma_params = { 'theta': 1., 'gamma': gamma, 'clip_min': 0., 'clip_max': 1., 'y_target': None } # This call runs the Jacobian-based saliency map approach one_hot_target = np.zeros((1, nb_classes), dtype=np.float32) one_hot_target[0, target] = 1 jsma_params['y_target'] = one_hot_target suc = 0 nb_batches = int(math.ceil(float(sample_acc.shape[0]) / batch_size)) for batch in range(nb_batches): #print(batch) start_batch = batch * batch_size end_batch = (batch + 1) * batch_size if end_batch > sample_acc.shape[0]: end_batch = sample_acc.shape[0] adv_inputs = sample_acc[start_batch:end_batch] for j in range(start_batch, end_batch): if current_class_acc[j] != target: adv_input = adv_inputs[j - start_batch].reshape( 1, input_shape[1], input_shape[2], input_shape[3]) adv = jsma.generate_np(adv_input, **jsma_params) new_class_labels = model_argmax(sess, x, preds, adv, feed=feed_dict) res = int(new_class_labels == target) if res == 1: adv = adv.reshape(adv.shape[1], adv.shape[2], adv.shape[3]) #adv_img_deprocessed = deprocess_image_1(adv) #adv_img_deprocessed=adv_img_deprocessed.reshape(adv_img_deprocessed.shape[1],adv_img_deprocessed.shape[2]) suc += 1 path = store_path + '/' + str( start + acc_pre_index[j] ) + '_' + str(time.time() * 1000) + '_' + str( current_class_acc[j]) + '_' + str(new_class_labels) #path=store_path + '/' + str(j)+ '_'+ str(current_class_acc[j]) +'.png' #imsave(path, adv_img_deprocessed) np.save(path, adv) #print(adv.shape) # Close TF session sess.close() return suc, len(acc_pre_index)
def blackbox(datasets, sample, model_name, submodel_name, store_path, step_size=0.3, batch_size=256): """ the black-box attack from arxiv.org/abs/1602.02697 :param datasets :param sample: inputs to attack :param target: the class want to generate :param nb_classes: number of output classes :return: """ # Simulate the black-box model locally # You could replace this by a remote labeling API for instance print("Preparing the black-box model.") tf.reset_default_graph() X_train, Y_train, X_test, Y_test = get_data(datasets) input_shape, nb_classes = get_shape(datasets) sess, bbox_preds, x, y, model, feed_dict = model_load(datasets, model_name) # Train substitute using method from https://arxiv.org/abs/1602.02697 print("Preparing the substitute model.") model_sub, preds_sub = sub_model_load(sess, datasets, submodel_name, model_name) ########################################################################### # Craft adversarial examples using the Blackbox approach ########################################################################### # Initialize the Fast Gradient Sign Method (FGSM) attack object. ''' if 'mnist' == datasets: sample = np.asarray([np.asarray(imread(sample_path)).reshape(28,28,1)]).astype('float32') sample = preprocess_image_1(sample) elif 'cifar10' == datasets: sample = np.asarray([np.asarray(imread(sample_path)).reshape(32,32,3)]).astype('float32') sample = preprocess_image_1(sample) elif 'svhn' == datasets: sample = np.asarray([np.asarray(imread(sample_path)).reshape(32,32,3)]).astype('float32') sample = preprocess_image_1(sample) ''' probabilities = model_prediction(sess, x, model(x), sample, feed=feed_dict) if sample.shape[0] == 1: current_class = np.argmax(probabilities) else: current_class = np.argmax(probabilities, axis=1) if not os.path.exists(store_path): os.makedirs(store_path) # only for correct: acc_pre_index = [] for i in range(0, sample.shape[0]): if current_class[i] == np.argmax(Y_test[i]): acc_pre_index.append(i) sample_acc = np.zeros(shape=(len(acc_pre_index), input_shape[1], input_shape[2], input_shape[3]), dtype='float32') probabilities_acc = np.zeros(shape=(len(acc_pre_index), nb_classes), dtype='float32') current_class_acc = np.zeros(shape=(len(acc_pre_index)), dtype=int) for i in range(0, len(acc_pre_index)): sample_acc[i] = sample[acc_pre_index[i]] probabilities_acc[i] = probabilities[acc_pre_index[i]] current_class_acc[i] = current_class[acc_pre_index[i]] if datasets == 'mnist': fgsm_par = { 'eps': step_size, 'ord': np.inf, 'clip_min': 0., 'clip_max': 1. } elif 'cifar10' == datasets: fgsm_par = { 'eps': step_size, 'ord': np.inf, 'clip_min': 0., 'clip_max': 1. } elif 'svhn' == datasets: fgsm_par = { 'eps': step_size, 'ord': np.inf, 'clip_min': 0., 'clip_max': 1. } fgsm = FastGradientMethod(model_sub, sess=sess) # Craft adversarial examples using the substitute x_adv_sub = fgsm.generate(x, **fgsm_par) nb_batches = int(math.ceil(float(sample_acc.shape[0]) / batch_size)) suc = 0 for batch in range(nb_batches): #start, end = batch_indices(batch, sample_acc.shape[0], batch_size) print(batch) start = batch * batch_size end = (batch + 1) * batch_size if end > sample_acc.shape[0]: end = sample_acc.shape[0] adv = sess.run(x_adv_sub, feed_dict={ x: sample_acc[start:end], y: probabilities_acc[start:end] }) adv_img_deprocessed = deprocess_image_1(adv) # Check if success was achieved #probabilities = model_prediction(sess, x, preds, sample, feed=feed_dict) new_class_label = model_argmax( sess, x, model(x), adv, feed=feed_dict) # Predicted class of the generated adversary for i in range(0, len(new_class_label)): j = batch * batch_size + i if new_class_label[i] != current_class_acc[j]: suc += 1 path = store_path + '/' + str(j) + '_' + str( time.time() * 1000) + '_' + str( current_class_acc[j]) + '_' + str( new_class_label[i]) + '.png' imsave(path, adv_img_deprocessed[i]) # Close TF session sess.close() return suc, len(acc_pre_index) '''
def bim(datasets, sample, model_name, store_path, step_size='0.3', batch_size=256, epoch=9): """ :param datasets :param sample: inputs to attack :param target: the class want to generate :param nb_classes: number of output classes :return: """ tf.reset_default_graph() X_train, Y_train, X_test, Y_test = get_data(datasets) input_shape, nb_classes = get_shape(datasets) print(epoch) sess, preds, x, y, model, feed_dict = model_load(datasets, model_name, epoch=epoch) ########################################################################### # Craft adversarial examples using the BIM approach ########################################################################### # Initialize the Basic Iterative Method (BIM) attack object and # graph ''' if 'mnist' == datasets: #sample = np.asarray([np.asarray(imread(sample_path)).reshape(28,28,1)]).astype('float32') #sample = preprocess_image_1(sample) print('1') elif 'cifar10' == datasets: sample = np.asarray([np.asarray(imread(sample_path)).reshape(32,32,3)]).astype('float32') sample = preprocess_image_1(sample) elif 'svhn' == datasets: sample = np.asarray([np.asarray(imread(sample_path)).reshape(32,32,3)]).astype('float32') sample = preprocess_image_1(sample) #print(sample.shape) ''' probabilities = model_prediction(sess, x, preds, sample, feed=feed_dict) if sample.shape[0] == 1: current_class = np.argmax(probabilities) else: current_class = np.argmax(probabilities, axis=1) if not os.path.exists(store_path): os.makedirs(store_path) # only for correct: acc_pre_index = [] for i in range(0, sample.shape[0]): if current_class[i] == np.argmax(Y_test[i]): acc_pre_index.append(i) sample_acc = np.zeros(shape=(len(acc_pre_index), input_shape[1], input_shape[2], input_shape[3]), dtype='float32') probabilities_acc = np.zeros(shape=(len(acc_pre_index), nb_classes), dtype='float32') current_class_acc = np.zeros(shape=(len(acc_pre_index)), dtype=int) for i in range(0, len(acc_pre_index)): sample_acc[i] = sample[acc_pre_index[i]] probabilities_acc[i] = probabilities[acc_pre_index[i]] current_class_acc[i] = current_class[acc_pre_index[i]] print('Start generating adv. example') #print(float(step_size)) if 'mnist' == datasets: bim_params = { 'eps': float(step_size), 'eps_iter': float(step_size) / 6, 'clip_min': 0., 'clip_max': 1. } elif 'cifar10' == datasets: bim_params = { 'eps': float(step_size), 'eps_iter': float(step_size) / 6, 'clip_min': 0., 'clip_max': 1. } elif 'svhn' == datasets: bim_params = { 'eps': float(step_size), 'eps_iter': float(step_size) / 6, 'clip_min': 0., 'clip_max': 1. } bim = BasicIterativeMethod(model, sess=sess) adv_x = bim.generate(x, **bim_params) nb_batches = int(math.ceil(float(sample_acc.shape[0]) / batch_size)) suc = 0 for batch in range(nb_batches): #start, end = batch_indices(batch, sample_acc.shape[0], batch_size) print(batch) start = batch * batch_size end = (batch + 1) * batch_size if end > sample_acc.shape[0]: end = sample_acc.shape[0] adv = sess.run(adv_x, feed_dict={ x: sample_acc[start:end], y: probabilities_acc[start:end] }) #adv_img_deprocessed = deprocess_image_1(adv) #adv:float 0-1 numpy.save("filename.npy",a) # Check if success was achieved #probabilities = model_prediction(sess, x, preds, sample, feed=feed_dict) new_class_label = model_argmax( sess, x, preds, adv, feed=feed_dict) # Predicted class of the generated adversary for i in range(0, len(new_class_label)): j = batch * batch_size + i if new_class_label[i] != current_class_acc[j]: suc += 1 path = store_path + '/' + str(acc_pre_index[j]) + '_' + str( time.time() * 1000) + '_' + str( current_class_acc[j]) + '_' + str(new_class_label[i]) np.save(path, adv[i]) # adv_img_deprocessed = deprocess_image_1(adv[i:i+1]) # adv_img_deprocessed=adv_img_deprocessed.reshape(adv_img_deprocessed.shape[1],adv_img_deprocessed.shape[2]) # path = store_path + '/' + str(acc_pre_index[j]) + '_' + str(time.time()*1000) + '_' + str(current_class_acc[j]) + '_' + str(new_class_label[i])+'.png' #print(adv[i].shape) # imsave(path, adv_img_deprocessed) # Close TF session sess.close() return suc, len(acc_pre_index)
def ccv(datasets='mnist', model='lenet1', de_model='lenet1', attack='fgsm', epoch=49, de_epoch=49): """ :param datasets :param model :param samples_path :return: """ tf.reset_default_graph() # Object used to keep track of (and return) key accuracies print("load defense model.") sess, preds, x, y, model, feed_dict = model_load(datasets, model, epoch=epoch) X_train, Y_train, X_test, Y_test = get_data(datasets) input_shape, nb_classes = get_shape(datasets) feed_dict_de = None result_nor = model_prediction(sess, x, preds, X_test, feed=feed_dict, datasets=datasets) #result_nor=sess.run(preds, feed_dict={x:X_test[0:1000]}) #print(result_nor) #print(model) #print(get_model_dict()) tf.reset_default_graph() sess, preds_de, x, y, model_de, feed_dict = model_load(datasets, de_model, True, attack=attack, epoch=de_epoch) #result_de=sess.run(preds_de, feed_dict={x:X_test[0:1000]}) result_de = model_prediction(sess, x, preds_de, X_test, feed=feed_dict, datasets=datasets) #print(result_de) # print('average confidence of adversarial class %.4f' %(result)) result = 0 num = 0 for i in range(Y_test.shape[0]): if (np.argmax(Y_test[i]) == np.argmax(result_nor[i])) and (np.argmax( Y_test[i]) == np.argmax(result_de[i])): num += 1 result += abs(result_nor[i][np.argmax(Y_test[i])] - result_de[i][np.argmax(Y_test[i])]) # Close TF session print(result / num) sess.close() return result / num