optimizer = tf.train.AdamOptimizer() env.train_op = optimizer.minimize(env.loss) env.saver = tf.train.Saver() with tf.variable_scope('model', reuse=True): env.adv_eps = tf.placeholder(tf.float32, (), name='adv_eps') env.adv_epochs = tf.placeholder(tf.int32, (), name='adv_epochs') env.adv_y = tf.placeholder(tf.int32, (), name='adv_y') env.x_fgsm = fgm(model, env.x, epochs=env.adv_epochs, eps=env.adv_eps) env.x_deepfool = deepfool(model, env.x, epochs=env.adv_epochs, batch=True) env.x_jsma = jsma(model, env.x, env.adv_y, eps=env.adv_eps, epochs=env.adv_epochs) print('\nInitializing graph') sess = tf.InteractiveSession() sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) def evaluate(sess, env, X_data, y_data, batch_size=128): """ Evaluate TF model by running env.loss and env.acc. """ print('\nEvaluating')
logits=logits) env.loss = tf.reduce_mean(xent, name='loss') with tf.variable_scope('train_op'): optimizer = tf.train.AdamOptimizer() env.train_op = optimizer.minimize(env.loss) env.saver = tf.train.Saver() with tf.variable_scope('model', reuse=True): env.target = tf.placeholder(tf.int32, (), name='target') env.adv_epochs = tf.placeholder_with_default(20, shape=(), name='epochs') env.adv_eps = tf.placeholder_with_default(0.2, shape=(), name='eps') env.x_jsma = jsma(model, env.x, env.target, eps=env.adv_eps, epochs=env.adv_epochs) print('\nInitializing graph') sess = tf.InteractiveSession() sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) def evaluate(sess, env, X_data, y_data, batch_size=128): """ Evaluate TF model by running env.loss and env.acc. """ print('\nEvaluating')
false_negative = 0 print('---------------------') print('ATTACKING BLACK_BOX CLASSIFIER...') adversarial_examples = [] clean_examples = [] aya = 0 for xmal in x_mal_train[0:x_malware.shape[0]]: # for xmal in x_mal_train[0:20]: if target_model.model.predict(xmal.reshape(1, -1)) == 0: false_negative = false_negative + 1 # print('this is a flase negative') else: xmal = torch.from_numpy(xmal).float().cuda() result = attacks.jsma(target_model, sarogate_model, xmal.unsqueeze(0), 0, max_distortion=0.027) distrotion = torch.sum(result - xmal) if target_model.model.predict( result.cpu().detach().numpy()) == 1: # print('failiure') # print('====================================================================') failiure = failiure + 1 else: # print('====================================================================') # # print(distrotion) # print('====================================================================')
logits=logits) env.loss = tf.reduce_mean(xent, name='loss') with tf.variable_scope('train_op'): optimizer = tf.train.AdamOptimizer() env.train_op = optimizer.minimize(env.loss) env.saver = tf.train.Saver() with tf.variable_scope('model', reuse=True): env.target = tf.placeholder(tf.int32, (), name='target') env.adv_epochs = tf.placeholder_with_default(20, shape=(), name='epochs') env.adv_eps = tf.placeholder_with_default(0.2, shape=(), name='eps') env.x_jsma = jsma(model, env.x, env.target, eps=env.adv_eps, epochs=env.adv_epochs, score_fn=lambda t, o: t - o) print('\nInitializing graph') sess = tf.InteractiveSession() sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) def evaluate(sess, env, X_data, y_data, batch_size=128): """ Evaluate TF model by running env.loss and env.acc. """ print('\nEvaluating')
def main(argv=None): """ MNIST tutorial for the Jacobian-based saliency map approach (JSMA) :return: """ os.environ['KERAS_BACKEND']='tensorflow' # Set TF random seed to improve reproducibility tf.set_random_seed(1234) fileName = "statistics/JAMA_dataCollection_%s.txt"%(FLAGS.round) fileHandler = open(fileName, 'a') ########################################################################### # Define the dataset and model ########################################################################### # Image dimensions ordering should follow the Theano convention if K.image_dim_ordering() != 'tf': K.set_image_dim_ordering('tf') print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' " "to 'th', temporarily setting to 'tf'") # Create TF session and set as Keras backend session sess = tf.Session() K.set_session(sess) print("Created TensorFlow session and set Keras backend.") # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist() print("Loaded MNIST test data.") # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) y = tf.placeholder(tf.float32, shape=(None, 10)) # Define TF model graph model = cnn_model() ############ # ########### first_dense = True if FLAGS.round ==1 : weight_fn = 'tf-kernels-tf-dim-ordering/mnist.h5' else: weight_fn = 'tf-kernels-tf-dim-ordering/mnist_retrained_pixelSets_5526_20_L1_0.03.h5' model.load_weights(weight_fn) # tf-kernels-tf-dim convert_all_kernels_in_model(model) # th-kernels-tf-dim count_dense = 0 for layer in model.layers: if layer.__class__.__name__ == "Dense": count_dense += 1 if count_dense == 1: first_dense = False # If there is only 1 dense, no need to perform row shuffle in Dense layer print("Nb layers : ", len(model.layers)) for index, tf_layer in enumerate(model.layers): if tf_layer.__class__.__name__ in ['Convolution1D', 'Convolution2D', 'Convolution3D', 'AtrousConvolution2D', 'Deconvolution2D']: weights = tf_layer.get_weights() # th-kernels-tf-dim model.layers[index].set_weights(weights) # th-kernels-tf-dim nb_last_conv = tf_layer.nb_filter # preserve last number of convolutions to use with dense layers print("Converted layer %d : %s" % (index + 1, tf_layer.name)) else: if tf_layer.__class__.__name__ == "Dense" and first_dense: weights = tf_layer.get_weights() nb_rows_dense_layer = weights[0].shape[0] // nb_last_conv print("Magic Number 1 : ", nb_last_conv) print("Magic nunber 2 : ", nb_rows_dense_layer) model.layers[index].set_weights(weights) first_dense = False print("Shuffled Dense Weights layer and saved %d : %s" % (index + 1, tf_layer.name)) else: model.layers[index].set_weights(tf_layer.get_weights()) print("Saved layer %d : %s" % (index + 1, tf_layer.name)) predictions = model(x) print("Defined TensorFlow model graph.") #filename = "pic/%s.jpg"%(FLAGS.starting_index) #testImage = np.squeeze(X_test[(FLAGS.starting_index):(FLAGS.starting_index+1)][0]) #print("%s--%s"%(str(np.amax(testImage)), str(np.amin(testImage)))) #save(0,testImage,filename) ########################################################################### # Training the model using TensorFlow ########################################################################### ''' # Train an MNIST model if it does not exist in the train_dir folder saver = tf.train.Saver() save_path = os.path.join(FLAGS.train_dir, FLAGS.filename) if os.path.isfile(save_path): saver.restore(sess, os.path.join(FLAGS.train_dir, FLAGS.filename)) else: train_params = { 'nb_epochs': FLAGS.nb_epochs, 'batch_size': FLAGS.batch_size, 'learning_rate': FLAGS.learning_rate } model_train(sess, x, y, predictions, X_train, Y_train, args=train_params) saver.save(sess, save_path) # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': FLAGS.batch_size} accuracy = model_eval(sess, x, y, predictions, X_test, Y_test, args=eval_params) assert X_test.shape[0] == 10000, X_test.shape print('Test accuracy on legitimate test examples: {0}'.format(accuracy)) ''' ########################################################################### # Craft adversarial examples using the Jacobian-based saliency map approach ########################################################################### print('Crafting ' + str(FLAGS.source_samples) + ' * ' + str(FLAGS.nb_classes-1) + ' adversarial examples') # This array indicates whether an adversarial example was found for each # test set sample and target class results = np.zeros((FLAGS.nb_classes, FLAGS.source_samples), dtype='i') # This array contains the fraction of perturbed features for each test set # sample and target class perturbations = np.zeros((FLAGS.nb_classes, FLAGS.source_samples), dtype='f') # Define the TF graph for the model's Jacobian grads = jacobian_graph(predictions, x, FLAGS.nb_classes) # Initialize our array for grid visualization grid_shape = (FLAGS.nb_classes, FLAGS.nb_classes, FLAGS.img_rows, FLAGS.img_cols, FLAGS.nb_channels) grid_viz_data = np.zeros(grid_shape, dtype='f') eud = {} l1d = {} succ = {} # Loop over the samples we want to perturb into adversarial examples for sample_ind in xrange(0, FLAGS.source_samples): # We want to find an adversarial example for each possible target class # (i.e. all classes that differ from the label given in the dataset) current_class = int(np.argmax(Y_test[FLAGS.starting_index + sample_ind])) target_classes = other_classes(FLAGS.nb_classes, current_class) print('working with image id: %s\n'%(FLAGS.starting_index+sample_ind)) filename = "pic/%s_jsma.jpg"%(FLAGS.starting_index + sample_ind) testImage = np.squeeze(X_test[(FLAGS.starting_index + sample_ind):(FLAGS.starting_index + sample_ind+1)][0]) save(0,testImage,filename) # For the grid visualization, keep original images along the diagonal #grid_viz_data[current_class, current_class, :, :, :] = np.reshape( # X_test[sample_ind:(sample_ind+1)], # (FLAGS.img_rows, FLAGS.img_cols, FLAGS.nb_channels)) # initialise data collection eud[sample_ind] = 1000.0 l1d[sample_ind] = 1000.0 succ[sample_ind] = 0 # Loop over all target classes for target in target_classes: print('--------------------------------------') print('Creating adv. example for target class ' + str(target)) # This call runs the Jacobian-based saliency map approach adv_x, res, percent_perturb = jsma(sess, x, predictions, grads, X_test[(FLAGS.starting_index+sample_ind): (FLAGS.starting_index+sample_ind+1)], target, theta=FLAGS.thetaValue, gamma=0.05, increase=True, back='tf', clip_min=0, clip_max=1) #print(np.max(adv_x)) # Display the original and adversarial images side-by-side #if FLAGS.viz_enabled: # if 'figure' not in vars(): # figure = pair_visual( # np.reshape(X_test[(FLAGS.starting_index+sample_ind):(FLAGS.starting_index+sample_ind+1)], # (FLAGS.img_rows, FLAGS.img_cols)), # np.reshape(adv_x, # (FLAGS.img_rows, FLAGS.img_cols))) # else: # figure = pair_visual( # np.reshape(X_test[(FLAGS.starting_index+sample_ind):(FLAGS.starting_index+sample_ind+1)], # (FLAGS.img_rows, FLAGS.img_cols)), # np.reshape(adv_x, (FLAGS.img_rows, # FLAGS.img_cols)), figure) # Add our adversarial example to our grid data #grid_viz_data[target, current_class, :, :, :] = np.reshape( # adv_x, (FLAGS.img_rows, FLAGS.img_cols, FLAGS.nb_channels)) filename = "pic/%s_jsma_%s_%s.jpg"%(FLAGS.starting_index+sample_ind,FLAGS.thetaValue,target) testImage1 = np.squeeze(adv_x[0]) fileHandler.write("\nimage id: %s\n"%(FLAGS.starting_index+sample_ind)) fileHandler.write("theta value: %s\n"%(FLAGS.thetaValue)) fileHandler.write("target: %s\n"%(target)) fileHandler.write("euclidean distance: %s\n"%(euclideanDistance(testImage1,testImage))) fileHandler.write("L1 distance: %s\n"%(l1Distance(testImage1,testImage))) save(0,testImage1,filename) # Update the arrays for later analysis results[target, sample_ind] = res perturbations[target, sample_ind] = percent_perturb # collect data temp_x = X_test[FLAGS.starting_index+sample_ind] adv_x = adv_x[0] temp_eud = euclideanDistance(temp_x,adv_x) if eud[sample_ind] > temp_eud: eud[sample_ind] = temp_eud temp_l1d = l1Distance(temp_x,adv_x) if l1d[sample_ind] > temp_l1d: l1d[sample_ind] = temp_l1d if succ[sample_ind] == 0: succ[sample_ind] = res #print("res=%s"%(res)) # Compute the number of adversarial examples that were successfuly found nb_targets_tried = ((FLAGS.nb_classes - 1) * FLAGS.source_samples) succ_rate = float(np.sum(results)) / nb_targets_tried print('Avg. rate of successful adv. examples {0:.2f}'.format(succ_rate)) # Compute the average distortion introduced by the algorithm percent_perturbed = np.mean(perturbations) print('Avg. rate of perturbed features {0:.2f}'.format(percent_perturbed)) # Compute the average distortion introduced for successful samples only percent_perturb_succ = np.mean(perturbations * (results == 1)) print('Avg. rate of perturbed features for successful ' 'adversarial examples {0:.2f}'.format(percent_perturb_succ)) # print data for e in eud.keys(): eud[e] = eud[e] * succ[e] for e in l1d.keys(): l1d[e] = l1d[e] * succ[e] print("Average Euclidean distance is %s"%(sum(eud.values()) / float(len(eud)))) print("Average L1 distance is %s"%(sum(l1d.values()) / float(len(l1d)))) print("Success rate is %s"%(sum(succ.values()) / float(len(succ)))) fileHandler.write("Average Euclidean distance is %s\n"%(sum(eud.values()) / float(len(eud)))) fileHandler.write("Average L1 distance is %s\n"%(sum(l1d.values()) / float(len(l1d)))) fileHandler.write("Success rate is %s\n"%(sum(succ.values()) / float(len(succ)))) fileHandler.close() # Close TF session sess.close()