def test(): """ """ tf.reset_default_graph() g = tf.get_default_graph() with g.as_default(): # Placeholder nodes. images_holder = tf.placeholder( tf.float32, [None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS]) label_holder = tf.placeholder(tf.float32, [None, FLAGS.NUM_CLASSES]) is_training = tf.placeholder(tf.bool, ()) # model model = model_cifar10.RDPCNN(images_holder, label_holder, FLAGS.INPUT_SIGMA, is_training) # for adv examples model_loss = model.loss() model_acc = model.cnn_accuracy # robust def inference(x): logits, _ = model.cnn.prediction(x) return logits def inference_prob(x): _, probs = model.cnn.prediction(x) return probs graph_dict = {} graph_dict["images_holder"] = images_holder graph_dict["label_holder"] = label_holder graph_dict["is_training"] = is_training config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config, graph=g) as sess: sess.run(tf.global_variables_initializer()) # load model model.tf_load(sess, name=FLAGS.CNN_CKPT_RESTORE_NAME) # adv test #################################################################################################### x_advs = {} ch_model_logits = CallableModelWrapper(callable_fn=inference, output_layer='logits') ch_model_probs = CallableModelWrapper(callable_fn=inference_prob, output_layer='probs') # FastGradientMethod fgsm_obj = FastGradientMethod(model=ch_model_probs, sess=sess) x_advs["fgsm"] = fgsm_obj.generate(x=images_holder, eps=FLAGS.ATTACK_SIZE, clip_min=0.0, clip_max=1.0) # testing now # Iterative FGSM (BasicIterativeMethod/ProjectedGradientMethod with no random init) # default: eps_iter=0.05, nb_iter=10 ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess) x_advs["ifgsm"] = ifgsm_obj.generate(x=images_holder, eps=FLAGS.ATTACK_SIZE, eps_iter=FLAGS.ATTACK_SIZE / 10, nb_iter=10, clip_min=0.0, clip_max=1.0) # MomentumIterativeMethod # default: eps_iter=0.06, nb_iter=10 mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess) x_advs["mim"] = mim_obj.generate(x=images_holder, eps=FLAGS.ATTACK_SIZE, eps_iter=FLAGS.ATTACK_SIZE / 10, nb_iter=10, decay_factor=1.0, clip_min=0.0, clip_max=1.0) # MadryEtAl (Projected Grdient with random init, same as rand+fgsm) # default: eps_iter=0.01, nb_iter=40 madry_obj = MadryEtAl(model=ch_model_probs, sess=sess) x_advs["madry"] = madry_obj.generate(x=images_holder, eps=FLAGS.ATTACK_SIZE, eps_iter=FLAGS.ATTACK_SIZE / 10, nb_iter=10, clip_min=0.0, clip_max=1.0) graph_dict["x_advs"] = x_advs #################################################################################################### # tensorboard writer #test_writer = model_utils.init_writer(FLAGS.TEST_LOG_PATH, g) print("\nTest") if FLAGS.local: total_test_batch = 2 else: total_test_batch = None dp_info = np.load(FLAGS.DP_INFO_NPY, allow_pickle=True).item() test_info(sess, model, True, graph_dict, dp_info, FLAGS.TEST_LOG_FILENAME, total_batch=total_test_batch) robust_info(sess, model, graph_dict, FLAGS.ROBUST_LOG_FILENAME)
def train(fgsm_eps, _dp_epsilon, _attack_norm_bound, log_filename, ratio): FLAGS = None #ratio = 16 #target_eps = [0.125,0.25,0.5,1,2,4,8] #target_eps = [0.25 + 0.25*ratio] target_eps = [0.2 + 0.2 * ratio] #print(target_eps[0]) #fgsm_eps = 0.1 dp_epsilon = _dp_epsilon image_size = 28 _log_filename = log_filename + str(target_eps[0]) + '_fgsm_' + str( fgsm_eps) + '_dpeps_' + str(dp_epsilon) + '_attack_norm_bound_' + str( _attack_norm_bound) + '.txt' clip_bound = 0.001 # 'the clip bound of the gradients' clip_bound_2 = 1 / 1.5 # 'the clip bound for r_kM' small_num = 1e-5 # 'a small number' large_num = 1e5 # a large number' num_images = 50000 # 'number of images N' batch_size = 125 # 'batch_size L' sample_rate = batch_size / 50000 # 'sample rate q = L / N' # 900 epochs num_steps = 1800000 # 'number of steps T = E * N / L = E / q' num_epoch = 24 # 'number of epoches E' sigma = 5 # 'sigma' delta = 1e-5 # 'delta' lambd = 1e3 # 'exponential distribution parameter' iterative_clip_step = 2 # 'iterative_clip_step' clip = 1 # 'whether to clip the gradient' noise = 0 # 'whether to add noise' redistribute = 0 # 'whether to redistribute the noise' D = 50000 sess = tf.InteractiveSession() # Create the model x = tf.placeholder(tf.float32, [None, 784]) y_ = tf.placeholder(tf.float32, [None, 10]) keep_prob = tf.placeholder(tf.float32) W_conv1 = weight_variable([5, 5, 1, 32]) b_conv1 = bias_variable([32]) W_conv2 = weight_variable([5, 5, 32, 64]) b_conv2 = bias_variable([64]) W_fc1 = weight_variable([7 * 7 * 64, 25]) b_fc1 = bias_variable([25]) W_fc2 = weight_variable([25, 10]) b_fc2 = bias_variable([10]) def inference(x, dp_mult): x_image = tf.reshape(x, [-1, 28, 28, 1]) h_conv1 = tf.nn.relu((conv2d(x_image, W_conv1) + b_conv1) + dp_mult) h_pool1 = max_pool_2x2(h_conv1) h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) h_pool2 = max_pool_2x2(h_conv2) h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64]) h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2 return y_conv, h_conv1 def inference_prob(x): logits, _ = inference(x, 0) y_prob = tf.nn.softmax(logits) return y_prob shape = W_conv1.get_shape().as_list() w_t = tf.reshape(W_conv1, [-1, shape[-1]]) w = tf.transpose(w_t) sing_vals = tf.svd(w, compute_uv=False) sensitivityW = tf.reduce_max(sing_vals) dp_delta = 0.05 attack_norm_bound = _attack_norm_bound dp_mult = attack_norm_bound * math.sqrt( 2 * math.log(1.25 / dp_delta)) / dp_epsilon noise = tf.placeholder(tf.float32, [None, 28, 28, 32]) #y_conv, h_conv1 = inference(x, dp_mult * noise) y_conv, h_conv1 = inference(x, attack_norm_bound * noise) softmax_y = tf.nn.softmax(y_conv) # Define loss and optimizer priv_accountant = accountant.GaussianMomentsAccountant(D) privacy_accum_op = priv_accountant.accumulate_privacy_spending( [None, None], sigma, batch_size) # sess.run(tf.initialize_all_variables()) sess.run(tf.global_variables_initializer()) cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv)) #train_step = tf.train.AdamOptimizer(1e-5).minimize(cross_entropy); #train_step = tf.train.AdamOptimizer(1e-5).minimize(cross_entropy) # noise redistribution # grad, = tf.gradients(cross_entropy, h_conv1) normalized_grad = tf.sign(grad) normalized_grad = tf.stop_gradient(normalized_grad) normalized_grad_r = tf.abs(tf.reduce_mean(normalized_grad, axis=(0))) #print(normalized_grad_r) sum_r = tf.reduce_sum(normalized_grad_r, axis=(0, 1, 2), keepdims=False) #print(sum_r) normalized_grad_r = 256 * 32 * normalized_grad_r / sum_r print(normalized_grad_r) shape_grad = normalized_grad_r.get_shape().as_list() grad_t = tf.reshape(normalized_grad_r, [-1, shape_grad[-1]]) g = tf.transpose(grad_t) sing_g_vals = tf.svd(g, compute_uv=False) sensitivity_2 = tf.reduce_max(sing_g_vals) ######################## opt = GradientDescentOptimizer(learning_rate=1e-1) # compute gradient gw_W1 = tf.gradients(cross_entropy, W_conv1)[0] # gradient of W1 gb1 = tf.gradients(cross_entropy, b_conv1)[0] # gradient of b1 gw_W2 = tf.gradients(cross_entropy, W_conv2)[0] # gradient of W2 gb2 = tf.gradients(cross_entropy, b_conv2)[0] # gradient of b2 gw_Wf1 = tf.gradients(cross_entropy, W_fc1)[0] # gradient of W_fc1 gbf1 = tf.gradients(cross_entropy, b_fc1)[0] # gradient of b_fc1 gw_Wf2 = tf.gradients(cross_entropy, W_fc2)[0] # gradient of W_fc2 gbf2 = tf.gradients(cross_entropy, b_fc2)[0] # gradient of b_fc2 # clip gradient gw_W1 = tf.clip_by_norm(gw_W1, clip_bound) gw_W2 = tf.clip_by_norm(gw_W2, clip_bound) gw_Wf1 = tf.clip_by_norm(gw_Wf1, clip_bound) gw_Wf2 = tf.clip_by_norm(gw_Wf2, clip_bound) # sigma = FLAGS.sigma # when comp_eps(lmbda,q,sigma,T,delta)==epsilon # sensitivity = 2 * FLAGS.clip_bound #adjacency matrix with one tuple different sensitivity = clip_bound # adjacency matrix with one more tuple gw_W1 += tf.random_normal(shape=tf.shape(gw_W1), mean=0.0, stddev=(sigma * sensitivity)**2, dtype=tf.float32) gb1 += tf.random_normal(shape=tf.shape(gb1), mean=0.0, stddev=(sigma * sensitivity)**2, dtype=tf.float32) gw_W2 += tf.random_normal(shape=tf.shape(gw_W2), mean=0.0, stddev=(sigma * sensitivity)**2, dtype=tf.float32) gb2 += tf.random_normal(shape=tf.shape(gb2), mean=0.0, stddev=(sigma * sensitivity)**2, dtype=tf.float32) gw_Wf1 += tf.random_normal(shape=tf.shape(gw_Wf1), mean=0.0, stddev=(sigma * sensitivity)**2, dtype=tf.float32) gbf1 += tf.random_normal(shape=tf.shape(gbf1), mean=0.0, stddev=(sigma * sensitivity)**2, dtype=tf.float32) gw_Wf2 += tf.random_normal(shape=tf.shape(gw_Wf2), mean=0.0, stddev=(sigma * sensitivity)**2, dtype=tf.float32) gbf2 += tf.random_normal(shape=tf.shape(gbf2), mean=0.0, stddev=(sigma * sensitivity)**2, dtype=tf.float32) train_step = opt.apply_gradients([(gw_W1, W_conv1), (gb1, b_conv1), (gw_W2, W_conv2), (gb2, b_conv2), (gw_Wf1, W_fc1), (gbf1, b_fc1), (gw_Wf2, W_fc2), (gbf2, b_fc2)]) # craft adversarial samples from x for testing #softmax_y_test = tf.nn.softmax(y_conv) #====================== attack ========================= attack_switch = { 'fgsm': True, 'ifgsm': True, 'deepfool': False, 'mim': True, 'spsa': False, 'cwl2': False, 'madry': True, 'stm': False } # define cleverhans abstract models for using cleverhans attacks ch_model_logits = CallableModelWrapper(callable_fn=inference, output_layer='logits') ch_model_probs = CallableModelWrapper(callable_fn=inference_prob, output_layer='probs') # define each attack method's tensor attack_tensor_dict = {} # FastGradientMethod if attack_switch['fgsm']: print('creating attack tensor of FastGradientMethod') fgsm_obj = FastGradientMethod(model=ch_model_probs, sess=sess) x_adv_test_fgsm = fgsm_obj.generate(x=x, eps=fgsm_eps, clip_min=0.0, clip_max=1.0) # testing now attack_tensor_dict['fgsm'] = x_adv_test_fgsm # Iterative FGSM (BasicIterativeMethod/ProjectedGradientMethod with no random init) # default: eps_iter=0.05, nb_iter=10 if attack_switch['ifgsm']: print('creating attack tensor of BasicIterativeMethod') ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess) x_adv_test_ifgsm = ifgsm_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps / 10, nb_iter=10, clip_min=0.0, clip_max=1.0) attack_tensor_dict['ifgsm'] = x_adv_test_ifgsm # MomentumIterativeMethod # default: eps_iter=0.06, nb_iter=10 if attack_switch['mim']: print('creating attack tensor of MomentumIterativeMethod') mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess) x_adv_test_mim = mim_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps / 10, nb_iter=10, decay_factor=1.0, clip_min=0.0, clip_max=1.0) attack_tensor_dict['mim'] = x_adv_test_mim # MadryEtAl (Projected Grdient with random init, same as rand+fgsm) # default: eps_iter=0.01, nb_iter=40 if attack_switch['madry']: print('creating attack tensor of MadryEtAl') madry_obj = MadryEtAl(model=ch_model_probs, sess=sess) x_adv_test_madry = madry_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps / 10, nb_iter=10, clip_min=0.0, clip_max=1.0) attack_tensor_dict['madry'] = x_adv_test_madry #====================== attack ========================= #Define the correct prediction and accuracy# correct_prediction_x = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1)) accuracy_x = tf.reduce_mean(tf.cast(correct_prediction_x, tf.float32)) correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) s = math.log(sqrt(2.0 / math.pi) * 1e+5) sigmaEGM = sqrt(2.0) * 1.0 * (sqrt(s) + sqrt(s + dp_epsilon)) / (2.0 * dp_epsilon) print(sigmaEGM) __noiseE = np.random.normal(0.0, sigmaEGM**2, 28 * 28 * 32).astype(np.float32) __noiseE = np.reshape(__noiseE, [-1, 28, 28, 32]) start_time = time.time() logfile = open(_log_filename, 'w') last_eval_time = -1 accum_time = 0 accum_epoch = 0 max_benign_acc = -1 max_adv_acc_dict = {} test_size = len(mnist.test.images) print("Computing The Noise Redistribution Vector") for i in range(4000): batch = mnist.train.next_batch(batch_size) sess.run([train_step], feed_dict={ x: batch[0], y_: batch[1], keep_prob: 0.5, noise: __noiseE * 0 }) batch = mnist.train.next_batch(batch_size * 10) grad_redis = sess.run([normalized_grad_r], feed_dict={ x: batch[0], y_: batch[1], keep_prob: 1.0, noise: __noiseE * 0 }) #print(grad_redis) _sensitivity_2 = sess.run([sensitivity_2], feed_dict={ x: batch[0], y_: batch[1], keep_prob: 1.0, noise: __noiseE * 0 }) #print(_sensitivity_2) _sensitivityW = sess.run(sensitivityW) #print(_sensitivityW) Delta_redis = _sensitivityW / sqrt(_sensitivity_2[0]) #print(Delta_redis) sigmaHGM = sqrt(2.0) * Delta_redis * (sqrt(s) + sqrt(s + dp_epsilon)) / ( 2.0 * dp_epsilon) #print(sigmaHGM) __noiseH = np.random.normal(0.0, sigmaHGM**2, 28 * 28 * 32).astype(np.float32) __noiseH = np.reshape(__noiseH, [-1, 28, 28, 32]) * grad_redis sess.run(tf.global_variables_initializer()) print("Training") for i in range(num_steps): batch = mnist.train.next_batch(batch_size) sess.run( [train_step], feed_dict={ x: batch[0], y_: batch[1], keep_prob: 0.5, noise: (__noiseE + __noiseH) / 2 }) sess.run([privacy_accum_op]) spent_eps_deltas = priv_accountant.get_privacy_spent( sess, target_eps=target_eps) if i % 1000 == 0: print(i, spent_eps_deltas) _break = False for _eps, _delta in spent_eps_deltas: if _delta >= delta: _break = True break if _break == True: break print("Testing") benign_acc = accuracy_x.eval( feed_dict={ x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0, noise: (__noiseE + __noiseH) / 2 }) ### PixelDP Robustness ### adv_acc_dict = {} robust_adv_acc_dict = {} robust_adv_utility_dict = {} for atk in attack_switch.keys(): if atk not in adv_acc_dict: adv_acc_dict[atk] = -1 robust_adv_acc_dict[atk] = -1 robust_adv_utility_dict[atk] = -1 if attack_switch[atk]: adv_images_dict = sess.run(attack_tensor_dict[atk], feed_dict={ x: mnist.test.images, y_: mnist.test.labels, keep_prob: 1.0 }) #grad_redis = sess.run([normalized_grad_r], feed_dict={x: adv_images_dict, y_: mnist.test.labels, keep_prob: 1.0, noise:__noise}) ### Robustness ### predictions_form_argmax = np.zeros([test_size, 10]) softmax_predictions = softmax_y.eval( feed_dict={ x: adv_images_dict, keep_prob: 1.0, noise: (__noiseE + __noiseH) / 2 }) argmax_predictions = np.argmax(softmax_predictions, axis=1) for n_draws in range(0, 2000): if n_draws % 1000 == 0: print(n_draws) _noiseE = np.random.normal(0.0, sigmaEGM**2, 28 * 28 * 32).astype(np.float32) _noiseE = np.reshape(_noiseE, [-1, 28, 28, 32]) _noise = np.random.normal(0.0, sigmaHGM**2, 28 * 28 * 32).astype(np.float32) _noise = np.reshape(_noise, [-1, 28, 28, 32]) * grad_redis for j in range(test_size): pred = argmax_predictions[j] predictions_form_argmax[j, pred] += 1 softmax_predictions = softmax_y.eval( feed_dict={ x: adv_images_dict, keep_prob: 1.0, noise: (__noiseE + __noiseH) / 2 + (_noiseE + _noise) / 4 }) argmax_predictions = np.argmax(softmax_predictions, axis=1) final_predictions = predictions_form_argmax is_correct = [] is_robust = [] for j in range(test_size): is_correct.append( np.argmax(mnist.test.labels[j]) == np.argmax( final_predictions[j])) robustness_from_argmax = robustnessGGaussian.robustness_size_argmax( counts=predictions_form_argmax[j], eta=0.05, dp_attack_size=fgsm_eps, dp_epsilon=dp_epsilon, dp_delta=1e-5, dp_mechanism='gaussian') / dp_mult is_robust.append(robustness_from_argmax >= fgsm_eps) adv_acc_dict[atk] = np.sum(is_correct) * 1.0 / test_size robust_adv_acc_dict[atk] = np.sum([ a and b for a, b in zip(is_robust, is_correct) ]) * 1.0 / np.sum(is_robust) robust_adv_utility_dict[atk] = np.sum(is_robust) * 1.0 / test_size print(" {}: {:.4f} {:.4f} {:.4f} {:.4f}".format( atk, adv_acc_dict[atk], robust_adv_acc_dict[atk], robust_adv_utility_dict[atk], robust_adv_acc_dict[atk] * robust_adv_utility_dict[atk])) ############################## log_str = "step: {}\t target_epsilon: {}\t dp_epsilon: {:.1f}\t attack_norm_bound: {:.1f}\t benign_acc: {:.4f}\t".format( i, target_eps, dp_epsilon, attack_norm_bound, benign_acc) for atk in attack_switch.keys(): if attack_switch[atk]: log_str += " {}: {:.4f} {:.4f} {:.4f} {:.4f}".format( atk, adv_acc_dict[atk], robust_adv_acc_dict[atk], robust_adv_utility_dict[atk], robust_adv_acc_dict[atk] * robust_adv_utility_dict[atk]) print(log_str) logfile.write(log_str + '\n') ############################## duration = time.time() - start_time logfile.write(str(duration) + '\n') logfile.flush() logfile.close()
def train(alpha, eps2_ratio, gen_ratio, fgsm_eps, LR, logfile): logfile.write("fgsm_eps \t %g, LR \t %g, alpha \t %d , eps2_ratio \t %d , gen_ratio \t %d \n"%(fgsm_eps, LR, alpha, eps2_ratio, gen_ratio)) ############################# ##Hyper-parameter Setting#### ############################# hk = 256; #number of hidden units at the last layer Delta2 = (14*14+2)*25; #global sensitivity for the first hidden layer Delta3_adv = 2*hk #10*(hk + 1/4 * hk**2) #10*(hk) #global sensitivity for the output layer Delta3_benign = 2*hk #10*(hk); #global sensitivity for the output layer D = 50000; #size of the dataset L = 2499; #batch size image_size = 28; padding = 4; #numHidUnits = 14*14*32 + 7*7*64 + M + 10; #number of hidden units #gen_ratio = 1 epsilon1 = 0.0; #0.175; #epsilon for dpLRP epsilon2 = 0.1*(1 + gen_ratio); #epsilon for the first hidden layer epsilon3 = 0.1*(1); #epsilon for the last hidden layer total_eps = epsilon1 + epsilon2 + epsilon3 print(total_eps) uncert = 0.1; #uncertainty modeling at the output layer infl = 1; #inflation rate in the privacy budget redistribution R_lowerbound = 1e-5; #lower bound of the LRP c = [0, 40, 50, 200] #norm bounds epochs = 200; #number of epochs preT_epochs = 50; #number of epochs T = int(D/L*epochs + 1); #number of steps T pre_T = int(D/L*preT_epochs + 1); step_for_epoch = int(D/L); #number of steps for one epoch broken_ratio = 1 #alpha = 9.0 # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] #eps2_ratio = 10; # [1/10, 1/8, 1/6, 1/4, 1/2, 1, 2, 4, 6, 8, 10] #eps_benign = 1/(1+eps2_ratio)*(2*epsilon2) #eps_adv = eps2_ratio/(1+eps2_ratio)*(2*epsilon2) #fgsm_eps = 0.1 rand_alpha = 0.05 ##Robustness## robustness_T = (fgsm_eps*18*18*L*epsilon2)/Delta2; #### LRPfile = os.getcwd() + '/Relevance_R_0_075.txt'; ############################# mnist = input_data.read_data_sets("MNIST_data/", one_hot = True); ############################# ##Construct the Model######## ############################# #Step 4: Randomly initiate the noise, Compute 1/|L| * Delta3 for the output layer# #Compute the 1/|L| * Delta3 for the last hidden layer# """eps3_ratio = Delta3_adv/Delta3_benign; eps3_benign = 1/(1+eps3_ratio)*(epsilon3) eps3_adv = eps3_ratio/(1+eps3_ratio)*(epsilon3)""" loc, scale3_benign, scale3_adv = 0., Delta3_benign/(epsilon3*L), Delta3_adv/(epsilon3*L); ### #End Step 4# # Parameters Declarification W_conv1 = weight_variable('W_conv1', [5, 5, 1, 32], collect=[AECODER_VARIABLES]); b_conv1 = bias_variable('b_conv1', [32], collect=[AECODER_VARIABLES]); shape = W_conv1.get_shape().as_list() w_t = tf.reshape(W_conv1, [-1, shape[-1]]) w = tf.transpose(w_t) sing_vals = tf.svd(w, compute_uv=False) sensitivity = tf.reduce_max(sing_vals) gamma = 2*(14*14 + 2)*25/(L*sensitivity) dp_epsilon=1.0 #0.1 delta_r = fgsm_eps*(image_size**2); #delta_h = 1.0 * delta_r; #sensitivity*(14**2) = sensitivity*(\beta**2) can also be used #dp_mult = (Delta2/(L*epsilon2))/(delta_r / dp_epsilon) + (2*Delta2/(L*epsilon2))/(delta_h / dp_epsilon) W_conv2 = weight_variable('W_conv2', [5, 5, 32, 64], collect=[CONV_VARIABLES]); b_conv2 = bias_variable('b_conv2', [64], collect=[CONV_VARIABLES]); W_fc1 = weight_variable('W_fc1', [4 * 4 * 64, hk], collect=[CONV_VARIABLES]); b_fc1 = bias_variable('b_fc1', [hk], collect=[CONV_VARIABLES]); W_fc2 = weight_variable('W_fc2', [hk, 10], collect=[CONV_VARIABLES]); b_fc2 = bias_variable('b_fc2', [10], collect=[CONV_VARIABLES]); """scale2 = tf.Variable(tf.ones([hk])) beta2 = tf.Variable(tf.zeros([hk])) tf.add_to_collections([CONV_VARIABLES], scale2) tf.add_to_collections([CONV_VARIABLES], beta2)""" params = [W_conv1, b_conv1, W_conv2, b_conv2, W_fc1, b_fc1, W_fc2, b_fc2] ### #Step 5: Create the model# noise = tf.placeholder(tf.float32, [None, image_size, image_size, 1]); adv_noise = tf.placeholder(tf.float32, [None, image_size, image_size, 1]); keep_prob = tf.placeholder(tf.float32); x = tf.placeholder(tf.float32, [None, image_size*image_size]); x_image = tf.reshape(x, [-1,image_size,image_size,1]); #perturbFMx = np.random.laplace(0.0, Delta2/(2*epsilon2*L), 28*28) #perturbFMx = np.reshape(perturbFMx, [-1, 28, 28, 1]); # pretrain ### #Enc_Layer1 = EncLayer(inpt=x_image, n_filter_in = 1, n_filter_out = 32, filter_size = 5, W=W_conv1, b=b_conv1, activation=tf.nn.relu) #pretrain = Enc_Layer1.get_train_ops2(xShape = tf.shape(x_image)[0], Delta = Delta2, epsilon = 2*epsilon2, batch_size = L, learning_rate= LR, W = W_conv1, b = b_conv1, perturbFMx = noise) ########### adv_x = tf.placeholder(tf.float32, [None, image_size*image_size]); adv_image = tf.reshape(adv_x, [-1,image_size,image_size,1]); #perturbFMx_adv = np.random.laplace(0.0, Delta2/(2*epsilon2*L), 28*28) #perturbFMx_adv = np.reshape(perturbFMx_adv, [-1, 28, 28, 1]); # pretrain adv ### #perturbFM_h = np.random.laplace(0.0, 2*Delta2/(epsilon2*L), 14*14*32) #perturbFM_h = np.reshape(perturbFM_h, [-1, 14, 14, 32]); FM_h = tf.placeholder(tf.float32, [None, 14, 14, 32]); Enc_Layer2 = EncLayer(inpt=adv_image, n_filter_in = 1, n_filter_out = 32, filter_size = 5, W=W_conv1, b=b_conv1, activation=tf.nn.relu) pretrain_adv = Enc_Layer2.get_train_ops2(xShape = tf.shape(adv_image)[0], Delta = Delta2, batch_size = L, learning_rate= LR, W = W_conv1, b = b_conv1, perturbFMx = adv_noise, perturbFM_h = FM_h) Enc_Layer3 = EncLayer(inpt=x_image, n_filter_in = 1, n_filter_out = 32, filter_size = 5, W=W_conv1, b=b_conv1, activation=tf.nn.relu) pretrain_benign = Enc_Layer3.get_train_ops2(xShape = tf.shape(x_image)[0], Delta = Delta2, batch_size = L, learning_rate= LR, W = W_conv1, b = b_conv1, perturbFMx = noise, perturbFM_h = FM_h) ########### x_image += noise; x_image = tf.clip_by_value(x_image, -10, 10) #Clip the values of each input feature. adv_image += adv_noise; adv_image = tf.clip_by_value(adv_image, -10, 10) #Clip the values of each input feature. #perturbFM = np.random.laplace(0.0, scale3_benign, hk) #perturbFM = np.reshape(perturbFM, [hk]); perturbFM = np.random.laplace(0.0, scale3_benign, hk * 10) perturbFM = np.reshape(perturbFM, [hk, 10]); y_conv = inference(x_image, perturbFM, hk, FM_h, params); softmax_y_conv = tf.nn.softmax(y_conv) #robust_mask = inference_robust_mask(y_conv, Delta2, L, epsilon2, robustness_T) #perturbFM = np.random.laplace(0.0, scale3_adv, hk) #perturbFM = np.reshape(perturbFM, [hk]); y_adv_conv = inference(adv_image, perturbFM, hk, FM_h, params); #adv_robust_mask = inference_robust_mask(y_adv_conv, Delta2, L, epsilon2, robustness_T) # test model perturbFM_test = np.random.laplace(0.0, 0, hk) perturbFM_test = np.reshape(perturbFM_test, [hk]); x_test = tf.reshape(x, [-1,image_size,image_size,1]); y_test = inference(x_test, perturbFM_test, hk, FM_h, params); #test_robust_mask = inference_robust_mask(y_test, Delta2, L, epsilon2, robustness_T) #Define a place holder for the output label# y_ = tf.placeholder(tf.float32, [None, 10]); adv_y_ = tf.placeholder(tf.float32, [None, 10]); #End Step 5# ############################# ############################# ##Define loss and Optimizer## ############################# ''' Computes differentially private sigmoid cross entropy given `logits`. Measures the probability error in discrete classification tasks in which each class is independent and not mutually exclusive. For brevity, let `x = logits`, `z = labels`. The logistic loss is z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x)) = z * -log(1 / (1 + exp(-x))) + (1 - z) * -log(exp(-x) / (1 + exp(-x))) = z * log(1 + exp(-x)) + (1 - z) * (-log(exp(-x)) + log(1 + exp(-x))) = z * log(1 + exp(-x)) + (1 - z) * (x + log(1 + exp(-x)) = (1 - z) * x + log(1 + exp(-x)) = x - x * z + log(1 + exp(-x)) For x < 0, to avoid overflow in exp(-x), we reformulate the above x - x * z + log(1 + exp(-x)) = log(exp(x)) - x * z + log(1 + exp(-x)) = - x * z + log(1 + exp(x)) Hence, to ensure stability and avoid overflow, the implementation uses this equivalent formulation max(x, 0) - x * z + log(1 + exp(-abs(x))) `logits` and `labels` must have the same type and shape. Let denote neg_abs_logits = -abs(y_conv) = -abs(h_fc1 * W_fc2). By Applying Taylor Expansion, we have: Taylor = max(y_conv, 0) - y_conv * y_ + log(1 + exp(-abs(y_conv))); = max(h_fc1 * W_fc2, 0) - (y_ * h_fc1) * W_fc2 + (math.log(2.0) + 0.5*neg_abs_logits + 1.0/8.0*neg_abs_logits**2) = max(h_fc1 * W_fc2, 0) - (y_ * h_fc1) * W_fc2 + (math.log(2.0) + 0.5*(-abs(h_fc1 * W_fc2)) + 1.0/8.0*(-abs(h_fc1 * W_fc2))**2) = F1 + F2 where: F1 = max(h_fc1 * W_fc2, 0) + (math.log(2.0) + 0.5*(-abs(h_fc1 * W_fc2)) + 1.0/8.0*(-abs(h_fc1 * W_fc2))**2) and F2 = - (y_ * h_fc1) * W_fc2 To ensure that Taylor is differentially private, we need to perturb all the coefficients, including the term y_ * h_fc1 * W_fc2. Note that h_fc1 is differentially private, since its computation on top of the DP Affine transformation does not access the original data. Therefore, F1 should be differentially private. We need to preserve DP in F2, which reads the groundtruth label y_, as follows: By applying Funtional Mechanism, we perturb (y_ * h_fc1) * W_fc2 as ((y_ * h_fc1) + perturbFM) * W_fc2 = (y_ * h_fc1)*W_fc2 + (perturbFM * W_fc2): perturbFM = np.random.laplace(0.0, scale3, hk * 10) perturbFM = np.reshape(perturbFM/L, [hk, 10]); where scale3 = Delta3/(epsilon3) = 2*hk/(epsilon3); To allow computing gradients at zero, we define custom versions of max and abs functions [Tensorflow]. Source: https://github.com/tensorflow/tensorflow/blob/r1.4/tensorflow/python/ops/nn_impl.py @ TensorFlow ''' ### Taylor for benign x zeros = array_ops.zeros_like(y_conv, dtype=y_conv.dtype) cond = (y_conv >= zeros) relu_logits = array_ops.where(cond, y_conv, zeros) neg_abs_logits = array_ops.where(cond, -y_conv, y_conv) #Taylor = math_ops.add(relu_logits - y_conv * y_, math_ops.log1p(math_ops.exp(neg_abs_logits))) Taylor_benign = math_ops.add(relu_logits - y_conv * y_, math.log(2.0) + 0.5*neg_abs_logits + 1.0/8.0*neg_abs_logits**2) - tf.reduce_sum(perturbFM*W_fc2) #Taylor_benign = tf.abs(y_conv - y_) ### Taylor for adv_x zeros_adv = array_ops.zeros_like(y_adv_conv, dtype=y_conv.dtype) cond_adv = (y_adv_conv >= zeros_adv) relu_logits_adv = array_ops.where(cond_adv, y_adv_conv, zeros_adv) neg_abs_logits_adv = array_ops.where(cond_adv, -y_adv_conv, y_adv_conv) #Taylor = math_ops.add(relu_logits - y_conv * y_, math_ops.log1p(math_ops.exp(neg_abs_logits))) Taylor_adv = math_ops.add(relu_logits_adv - y_adv_conv * adv_y_, math.log(2.0) + 0.5*neg_abs_logits_adv + 1.0/8.0*neg_abs_logits_adv**2) - tf.reduce_sum(perturbFM*W_fc2) #Taylor_adv = tf.abs(y_adv_conv - adv_y_) ### Adversarial training loss adv_loss = (1/(L + L*alpha))*(Taylor_benign + alpha * Taylor_adv) '''Some time, using learning rate decay can help to stablize training process. However, use this carefully, since it may affect the convergent speed.''' global_step = tf.Variable(0, trainable=False) pretrain_var_list = tf.get_collection(AECODER_VARIABLES) train_var_list = tf.get_collection(CONV_VARIABLES) #print(pretrain_var_list) #print(train_var_list) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): pretrain_step = tf.train.AdamOptimizer(LR).minimize(pretrain_adv+pretrain_benign, global_step=global_step, var_list=pretrain_var_list); train_step = tf.train.AdamOptimizer(LR).minimize(adv_loss, global_step=global_step, var_list=train_var_list); sess = tf.InteractiveSession(); # Define the correct prediction and accuracy # This needs to be changed to "Robust Prediction" correct_prediction_x = tf.equal(tf.argmax(y_test,1), tf.argmax(y_,1)); accuracy_x = tf.reduce_mean(tf.cast(correct_prediction_x, tf.float32)); ############# # use these to get predictions wrt to robust conditions """robust_correct_prediction_x = tf.multiply(test_robust_mask, tf.cast(correct_prediction_x, tf.float32)) accuracy_x_robust = tf.reduce_sum(robust_correct_prediction_x) / tf.reduce_sum(test_robust_mask) #certified_utility = 2/(1/accuracy_x_robust + 1/(tf.reduce_sum(test_robust_mask)/(1.0*tf.cast(tf.size(test_robust_mask), tf.float32)))) certified_utility = (1.0*tf.reduce_sum(test_robust_mask))/(1.0*tf.cast(tf.size(test_robust_mask), tf.float32))""" ############# # craft adversarial samples from x for training dynamic_eps = tf.placeholder(tf.float32); emsemble_L = int(L/3) softmax_y = tf.nn.softmax(y_test) #c_x_adv = fgsm(x, softmax_y, eps=fgsm_eps, clip_min=0.0, clip_max=1.0) c_x_adv = fgsm(x, softmax_y, eps=(dynamic_eps)/10, clip_min=-1.0, clip_max=1.0) # for I-FGSM x_adv = tf.reshape(c_x_adv, [emsemble_L,image_size*image_size]); #====================== attack ========================= #attack_switch = {'randfgsm':True, 'fgsm':True, 'ifgsm':True, 'deepfool':True, 'mim':True, 'spsa':False, 'cwl2':False, 'madry':True, 'stm':True} #attack_switch = {'fgsm':True, 'ifgsm':True, 'deepfool':True, 'mim':True, 'spsa':False, 'cwl2':False, 'madry':True, 'stm':True} attack_switch = {'fgsm':True, 'ifgsm':True, 'deepfool':False, 'mim':True, 'spsa':False, 'cwl2':False, 'madry':True, 'stm':False} #other possible attacks: # ElasticNetMethod # FastFeatureAdversaries # LBFGS # SaliencyMapMethod # VirtualAdversarialMethod # y_test = logits (before softmax) # softmax_y_test = preds (probs, after softmax) softmax_y_test = tf.nn.softmax(y_test) # create saver saver = tf.train.Saver(tf.all_variables()) sess.run(W_conv1.initializer) _gamma = sess.run(gamma) _gamma_x = Delta2/L epsilon2_update = epsilon2/(1.0 + 1.0/_gamma + 1/_gamma_x) print(epsilon2_update/_gamma + epsilon2_update/_gamma_x) print(epsilon2_update) _sensitivityW = sess.run(sensitivity) delta_h = _sensitivityW*(14**2) dp_mult = (Delta2/(L*epsilon2_update))/(delta_r / dp_epsilon) + (2*Delta2/(L*epsilon2_update))/(delta_h / dp_epsilon) ############################# iterativeStep = 100 # load the most recent models _global_step = 0 ckpt = tf.train.get_checkpoint_state(os.getcwd() + './tmp/train') if ckpt and ckpt.model_checkpoint_path: print(ckpt.model_checkpoint_path); saver.restore(sess, ckpt.model_checkpoint_path) _global_step = int(ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]) else: print('No checkpoint file found') start_time = time.time(); # adv pretrain model (Auto encoder layer) cost = tf.reduce_sum(Enc_Layer2.cost); logfile.write("pretrain: \n") # define cleverhans abstract models for using cleverhans attacks ch_model_logits = CustomCallableModelWrapper(callable_fn=inference_test_input, output_layer='logits', hk=hk, params=params, image_size=image_size, adv_noise = adv_noise) ch_model_probs = CustomCallableModelWrapper(callable_fn=inference_test_input_probs, output_layer='probs', hk=hk, params=params, image_size=image_size, adv_noise = adv_noise) # rand+fgsm # if attack_switch['randfgsm']: # randfgsm_obj = FastGradientMethod(model=ch_model_probs, sess=sess) # x_randfgsm_t = (fgsm_eps - rand_alpha) * randfgsm_obj.generate(x=x, eps=fgsm_eps, clip_min=-1.0, clip_max=1.0) # x_rand_t = rand_alpha * tf.sign(tf.random_normal(shape=tf.shape(x), mean=0.0, stddev=1.0)) # define each attack method's tensor mu_alpha = tf.placeholder(tf.float32, [1]); attack_tensor_dict = {} # FastGradientMethod if attack_switch['fgsm']: print('creating attack tensor of FastGradientMethod') fgsm_obj = FastGradientMethod(model=ch_model_probs, sess=sess) #x_adv_test_fgsm = fgsm_obj.generate(x=x, eps=fgsm_eps, clip_min=-1.0, clip_max=1.0, ord=2) # testing now x_adv_test_fgsm = fgsm_obj.generate(x=x, eps=mu_alpha, clip_min=-1.0, clip_max=1.0) # testing now attack_tensor_dict['fgsm'] = x_adv_test_fgsm # Iterative FGSM (BasicIterativeMethod/ProjectedGradientMethod with no random init) # default: eps_iter=0.05, nb_iter=10 if attack_switch['ifgsm']: print('creating attack tensor of BasicIterativeMethod') ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess) #x_adv_test_ifgsm = ifgsm_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, clip_min=-1.0, clip_max=1.0, ord=2) x_adv_test_ifgsm = ifgsm_obj.generate(x=x, eps=mu_alpha, eps_iter=mu_alpha/iterativeStep, nb_iter=iterativeStep, clip_min=-1.0, clip_max=1.0) attack_tensor_dict['ifgsm'] = x_adv_test_ifgsm # Deepfool if attack_switch['deepfool']: print('creating attack tensor of DeepFool') deepfool_obj = DeepFool(model=ch_model_logits, sess=sess) #x_adv_test_deepfool = deepfool_obj.generate(x=x, nb_candidate=10, overshoot=0.02, max_iter=50, nb_classes=10, clip_min=-1.0, clip_max=1.0, ord=2) x_adv_test_deepfool = deepfool_obj.generate(x=x, nb_candidate=10, overshoot=0.02, max_iter=50, nb_classes=10, clip_min=-1.0, clip_max=1.0) attack_tensor_dict['deepfool'] = x_adv_test_deepfool # MomentumIterativeMethod # default: eps_iter=0.06, nb_iter=10 if attack_switch['mim']: print('creating attack tensor of MomentumIterativeMethod') mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess) #x_adv_test_mim = mim_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, decay_factor=1.0, clip_min=-1.0, clip_max=1.0, ord=2) x_adv_test_mim = mim_obj.generate(x=x, eps=mu_alpha, eps_iter=mu_alpha/iterativeStep, nb_iter=iterativeStep, decay_factor=1.0, clip_min=-1.0, clip_max=1.0) attack_tensor_dict['mim'] = x_adv_test_mim # SPSA # note here the epsilon is the infinity norm instead of precent of perturb # Maybe exclude this method first, since it seems to have some constrain about the data value range if attack_switch['spsa']: print('creating attack tensor of SPSA') spsa_obj = SPSA(model=ch_model_logits, sess=sess) #x_adv_test_spsa = spsa_obj.generate(x=x, epsilon=fgsm_eps, num_steps=10, is_targeted=False, early_stop_loss_threshold=None, learning_rate=0.01, delta=0.01,spsa_samples=1000, spsa_iters=1, ord=2) x_adv_test_spsa = spsa_obj.generate(x=x, epsilon=fgsm_eps, num_steps=10, is_targeted=False, early_stop_loss_threshold=None, learning_rate=0.01, delta=0.01,spsa_samples=1000, spsa_iters=1) attack_tensor_dict['spsa'] = x_adv_test_spsa # CarliniWagnerL2 # confidence=0 is fron their paper # it is said to be slow, maybe exclude first if attack_switch['cwl2']: print('creating attack tensor of CarliniWagnerL2') cwl2_obj = CarliniWagnerL2(model=ch_model_logits, sess=sess) #x_adv_test_cwl2 = cwl2_obj.generate(x=x, confidence=0, batch_size=1000, learning_rate=0.005, binary_search_steps=5, max_iterations=500, abort_early=True, initial_const=0.01, clip_min=-1.0, clip_max=1.0, ord=2) x_adv_test_cwl2 = cwl2_obj.generate(x=x, confidence=0, batch_size=1000, learning_rate=0.005, binary_search_steps=5, max_iterations=500, abort_early=True, initial_const=0.01, clip_min=-1.0, clip_max=1.0) attack_tensor_dict['cwl2'] = x_adv_test_cwl2 # MadryEtAl (Projected Grdient with random init, same as rand+fgsm) # default: eps_iter=0.01, nb_iter=40 if attack_switch['madry']: print('creating attack tensor of MadryEtAl') madry_obj = MadryEtAl(model=ch_model_probs, sess=sess) #x_adv_test_madry = madry_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, clip_min=-1.0, clip_max=1.0, ord=2) x_adv_test_madry = madry_obj.generate(x=x, eps=mu_alpha, eps_iter=fgsm_eps/iterativeStep, nb_iter=iterativeStep, clip_min=-1.0, clip_max=1.0) attack_tensor_dict['madry'] = x_adv_test_madry # SpatialTransformationMethod # the params are pretty different from on the paper # so I use default # exclude since there's bug if attack_switch['stm']: print('creating attack tensor of SpatialTransformationMethod') stm_obj = SpatialTransformationMethod(model=ch_model_probs, sess=sess) #x_adv_test_stm = stm_obj.generate(x=x, batch_size=1000, n_samples=None, dx_min=-0.1, dx_max=0.1, n_dxs=2, dy_min=-0.1, dy_max=0.1, n_dys=2, angle_min=-30, angle_max=30, n_angles=6, ord=2) x_adv_test_stm = stm_obj.generate(x=x, batch_size=1000, n_samples=None, dx_min=-0.1, dx_max=0.1, n_dxs=2, dy_min=-0.1, dy_max=0.1, n_dys=2, angle_min=-30, angle_max=30, n_angles=6) attack_tensor_dict['stm'] = x_adv_test_stm #====================== attack ========================= sess.run(tf.initialize_all_variables()); ##perturb h for training perturbFM_h = np.random.laplace(0.0, 2*Delta2/(epsilon2_update*L), 14*14*32) perturbFM_h = np.reshape(perturbFM_h, [-1, 14, 14, 32]); ##perturb h for testing perturbFM_h_test = np.random.laplace(0.0, 0, 14*14*32) perturbFM_h_test = np.reshape(perturbFM_h_test, [-1, 14, 14, 32]); '''for i in range(_global_step, _global_step + pre_T): d_eps = random.random(); batch = mnist.train.next_batch(L); #Get a random batch. adv_images = sess.run(x_adv, feed_dict = {x:batch[0], y_:batch[1], FM_h: perturbFM_h_test, dynamic_eps: d_eps}) for iter in range(0, 9): adv_images = sess.run(x_adv, feed_dict = {x:adv_images, y_:batch[1], FM_h: perturbFM_h_test, dynamic_eps: d_eps}) """batch = mnist.train.next_batch(emsemble_L) adv_images_mim = sess.run(attack_tensor_dict['mim'], feed_dict = {x:batch[0], y_: batch[1]}) batch = mnist.train.next_batch(emsemble_L) adv_images_madry = sess.run(attack_tensor_dict['mim'], feed_dict = {x:batch[0], y_: batch[1]}) train_images = np.append(np.append(adv_images, adv_images_mim, axis = 0),adv_images_madry, axis = 0)""" batch_2 = mnist.train.next_batch(L); pretrain_step.run(feed_dict={adv_x: np.append(adv_images, batch_2[0], axis = 0), adv_noise: AdvLnoise, FM_h: perturbFM_h}); if i % int(5*step_for_epoch) == 0: cost_value = sess.run(cost, feed_dict={adv_x:mnist.test.images, adv_noise: AdvLnoise_test, FM_h: perturbFM_h_test})/(test_size*32) logfile.write("step \t %d \t %g \n"%(i, cost_value)) print(cost_value) pre_train_finish_time = time.time() print('pre_train finished in: ' + parse_time(pre_train_finish_time - start_time))''' # train and test model with adv samples max_benign_acc = -1; max_robust_benign_acc = -1 #max_adv_acc = -1; test_size = len(mnist.test.images) AdvLnoise = generateIdLMNoise(image_size, Delta2, epsilon2_update, L); AdvLnoise_test = generateIdLMNoise(image_size, 0, epsilon2_update, test_size); Lnoise_empty = generateIdLMNoise(image_size, 0, epsilon2_update, L); BenignLNoise = generateIdLMNoise(image_size, Delta2, epsilon2_update, L); last_eval_time = -1 accum_time = 0 accum_epoch = 0 max_adv_acc_dict = {} max_robust_adv_acc_dict = {} #max_robust_adv_utility_dict = {} for atk in attack_switch.keys(): if atk not in max_adv_acc_dict: max_adv_acc_dict[atk] = -1 max_robust_adv_acc_dict[atk] = -1 for i in range(_global_step, _global_step + T): # this batch is for generating adv samples batch = mnist.train.next_batch(emsemble_L); #Get a random batch. y_adv_batch = batch[1] #The number of epochs we print out the result. Print out the result every 5 epochs. if i % int(10*step_for_epoch) == 0 and i > int(10*step_for_epoch): cost_value = sess.run(cost, feed_dict={adv_x:mnist.test.images, adv_noise: AdvLnoise_test, FM_h: perturbFM_h_test})/(test_size*32) print(cost_value) if last_eval_time < 0: last_eval_time = time.time() #===================benign samples===================== predictions_form_argmax = np.zeros([test_size, 10]) #test_bach = mnist.test.next_batch(test_size) softmax_predictions = softmax_y_conv.eval(feed_dict={x: mnist.test.images, noise: BenignLNoise, FM_h: perturbFM_h}) argmax_predictions = np.argmax(softmax_predictions, axis=1) for n_draws in range(0, 1): _BenignLNoise = generateIdLMNoise(image_size, Delta2, epsilon2_update, L); _perturbFM_h = np.random.laplace(0.0, 2*Delta2/(epsilon2_update*L), 14*14*32) _perturbFM_h = np.reshape(_perturbFM_h, [-1, 14, 14, 32]); for j in range(test_size): pred = argmax_predictions[j] predictions_form_argmax[j, pred] += 1; softmax_predictions = softmax_y_conv.eval(feed_dict={x: mnist.test.images, noise: (BenignLNoise + _BenignLNoise/2), FM_h: (perturbFM_h + _perturbFM_h/2)}) argmax_predictions = np.argmax(softmax_predictions, axis=1) final_predictions = predictions_form_argmax; is_correct = [] is_robust = [] for j in range(test_size): is_correct.append(np.argmax(mnist.test.labels[j]) == np.argmax(final_predictions[j])) robustness_from_argmax = robustness.robustness_size_argmax(counts=predictions_form_argmax[j],eta=0.05,dp_attack_size=fgsm_eps, dp_epsilon=1.0, dp_delta=0.05, dp_mechanism='laplace') / (dp_mult) is_robust.append(robustness_from_argmax >= fgsm_eps) acc = np.sum(is_correct)*1.0/test_size robust_acc = np.sum([a and b for a,b in zip(is_robust, is_correct)])*1.0/np.sum(is_robust) robust_utility = np.sum(is_robust)*1.0/test_size max_benign_acc = max(max_benign_acc, acc) max_robust_benign_acc = max(max_robust_benign_acc, robust_acc*robust_utility) log_str = "step: {:.1f}\t epsilon: {:.1f}\t benign: {:.4f} \t {:.4f} \t {:.4f} \t {:.4f} \t".format(i, total_eps, acc, robust_acc, robust_utility, robust_acc*robust_utility) #===================adv samples===================== #log_str = "step: {:.1f}\t epsilon: {:.1f}\t".format(i, total_eps) """adv_images_dict = {} for atk in attack_switch.keys(): if attack_switch[atk]: adv_images_dict[atk] = sess.run(attack_tensor_dict[atk], feed_dict = {x:mnist.test.images, y_:mnist.test.labels}) print("Done with the generating of Adversarial samples")""" #===================adv samples===================== adv_acc_dict = {} robust_adv_acc_dict = {} robust_adv_utility_dict = {} for atk in attack_switch.keys(): if atk not in adv_acc_dict: adv_acc_dict[atk] = -1 robust_adv_acc_dict[atk] = -1 robust_adv_utility_dict[atk] = -1 if attack_switch[atk]: adv_images_dict = sess.run(attack_tensor_dict[atk], feed_dict = {x:mnist.test.images, y_: mnist.test.labels, adv_noise: AdvLnoise_test, mu_alpha:[fgsm_eps]}) ### PixelDP Robustness ### predictions_form_argmax = np.zeros([test_size, 10]) softmax_predictions = softmax_y_conv.eval(feed_dict={x: adv_images_dict, noise: BenignLNoise, FM_h: perturbFM_h}) argmax_predictions = np.argmax(softmax_predictions, axis=1) for n_draws in range(0, 2000): if n_draws % 1000 == 0: print(n_draws) _BenignLNoise = generateIdLMNoise(image_size, Delta2, epsilon2_update, L); _perturbFM_h = np.random.laplace(0.0, 2*Delta2/(epsilon2_update*L), 14*14*32) _perturbFM_h = np.reshape(_perturbFM_h, [-1, 14, 14, 32]); for j in range(test_size): pred = argmax_predictions[j] predictions_form_argmax[j, pred] += 1; softmax_predictions = softmax_y_conv.eval(feed_dict={x: adv_images_dict, noise: BenignLNoise, FM_h: (perturbFM_h + _perturbFM_h/2)}) * softmax_y_conv.eval(feed_dict={x: adv_images_dict, noise: (BenignLNoise + _BenignLNoise/2), FM_h: perturbFM_h}) #softmax_predictions = softmax_y_conv.eval(feed_dict={x: adv_images_dict, noise: BenignLNoise, FM_h: (_perturbFM_h)}) * softmax_y_conv.eval(feed_dict={x: adv_images_dict, noise: (_BenignLNoise), FM_h: perturbFM_h}) argmax_predictions = np.argmax(softmax_predictions, axis=1) final_predictions = predictions_form_argmax; is_correct = [] is_robust = [] for j in range(test_size): is_correct.append(np.argmax(mnist.test.labels[j]) == np.argmax(final_predictions[j])) robustness_from_argmax = robustness.robustness_size_argmax(counts=predictions_form_argmax[j],eta=0.05,dp_attack_size=fgsm_eps, dp_epsilon=1.0, dp_delta=0.05, dp_mechanism='laplace') / (dp_mult) is_robust.append(robustness_from_argmax >= fgsm_eps) adv_acc_dict[atk] = np.sum(is_correct)*1.0/test_size robust_adv_acc_dict[atk] = np.sum([a and b for a,b in zip(is_robust, is_correct)])*1.0/np.sum(is_robust) robust_adv_utility_dict[atk] = np.sum(is_robust)*1.0/test_size ############################## for atk in attack_switch.keys(): if attack_switch[atk]: # added robust prediction log_str += " {}: {:.4f} {:.4f} {:.4f} {:.4f}".format(atk, adv_acc_dict[atk], robust_adv_acc_dict[atk], robust_adv_utility_dict[atk], robust_adv_acc_dict[atk]*robust_adv_utility_dict[atk]) max_adv_acc_dict[atk] = max(max_adv_acc_dict[atk], adv_acc_dict[atk]) max_robust_adv_acc_dict[atk] = max(max_robust_adv_acc_dict[atk], robust_adv_acc_dict[atk]*robust_adv_utility_dict[atk]) print(log_str) logfile.write(log_str + '\n') # logfile.write("step \t %d \t %g \t %g \n"%(i, benign_acc, adv_acc)) # print("step \t %d \t %g \t %g"%(i, benign_acc, adv_acc)); # estimate end time """if i > 0 and i % int(10*step_for_epoch) == 0: current_time_interval = time.time() - last_eval_time last_eval_time = time.time() print('during last eval interval, {} epoch takes {}'.format(10, parse_time(current_time_interval))) accum_time += current_time_interval accum_epoch += 10 estimate_time = ((_global_step + T - i) / step_for_epoch) * (accum_time / accum_epoch) print('estimate finish in: {}'.format(parse_time(estimate_time)))""" #print("step \t %d \t adversarial test accuracy \t %g"%(i, accuracy_x.eval(feed_dict={x: adv_images, y_: mnist.test.labels, noise: Lnoise_empty}))); """checkpoint_path = os.path.join(os.getcwd() + '/tmp/train', 'model.ckpt') saver.save(sess, checkpoint_path, global_step=i);""" d_eps = random.random(); y_adv = batch[1] adv_images = sess.run(attack_tensor_dict['ifgsm'], feed_dict = {x:batch[0], y_: batch[1], adv_noise: AdvLnoise, mu_alpha:[d_eps]}) """for iter in range(0, 9): adv_images = sess.run(x_adv, feed_dict = {x:adv_images, y_:batch[1], FM_h: perturbFM_h_test, dynamic_eps: d_eps})""" batch = mnist.train.next_batch(emsemble_L) adv_images_mim = sess.run(attack_tensor_dict['mim'], feed_dict = {x:batch[0], y_: batch[1], adv_noise: AdvLnoise, mu_alpha:[d_eps]}) y_adv = np.append(y_adv, batch[1], axis = 0) batch = mnist.train.next_batch(emsemble_L) adv_images_madry = sess.run(attack_tensor_dict['madry'], feed_dict = {x:batch[0], y_: batch[1], adv_noise: AdvLnoise, mu_alpha:[d_eps]}) y_adv = np.append(y_adv, batch[1], axis = 0) train_images = np.append(np.append(adv_images, adv_images_mim, axis = 0),adv_images_madry, axis = 0) batch = mnist.train.next_batch(L); #Get a random batch. # train with benign and adv samples pretrain_step.run(feed_dict={adv_x: train_images, x: batch[0], adv_noise: AdvLnoise_test, noise: BenignLNoise, FM_h: perturbFM_h}); train_step.run(feed_dict={x: batch[0], adv_x: train_images, y_: batch[1], adv_y_: y_adv, noise: BenignLNoise, adv_noise: AdvLnoise_test, FM_h: perturbFM_h}); duration = time.time() - start_time; # print(parse_time(duration)); #print running time duration# max_acc_string = "max acc: benign: \t{:.4f} {:.4f}".format(max_benign_acc, max_robust_benign_acc) for atk in attack_switch.keys(): if attack_switch[atk]: max_acc_string += " {}: \t{:.4f} {:.4f}".format(atk, max_adv_acc_dict[atk], max_robust_adv_acc_dict[atk]) logfile.write(max_acc_string + '\n') logfile.write(str(duration) + '\n')
filenames, images = next(image_iterator) all_images_target_class = { image_metadata["ImageId"][i] + ".png": image_metadata["TargetClass"][i] for i in image_metadata.index } with tf.Graph().as_default(): x_input = tf.placeholder(tf.float32, shape=batch_shape) model = InceptionModel(num_classes) carlini = CarliniWagnerL2(model) jacobian = SaliencyMapMethod(model) fgsm = FastGradientMethod(model) momentum = MomentumIterativeMethod(model) x_adv = momentum.generate(x_input, eps=eps, clip_min=-1., clip_max=1.) saver = tf.train.Saver(slim.get_model_variables()) session_creator = tf.train.ChiefSessionCreator( scaffold=tf.train.Scaffold(saver=saver), checkpoint_filename_with_path=checkpoint_path, master=tensorflow_master) with tf.train.MonitoredSession( session_creator=session_creator) as sess: nontargeted_images = sess.run(x_adv, feed_dict={x_input: images}) print( "The original image is on the left, and the nontargeted adversarial image is on the right. They look very similar, don't they? It's very clear both are gondolas" ) show_image(np.concatenate([images[1], nontargeted_images[1]], axis=1))
def test(cifar10_data, checkpoint_path, epochs, L, learning_rate, scale3, Delta2, epsilon2, eps2_ratio, alpha, perturbFM, fgsm_eps, total_eps, parameter_dict, testing_step): # logfile.write("fgsm_eps \t %g, LR \t %g, alpha \t %d , epsilon \t %d \n"%(fgsm_eps, learning_rate, alpha, total_eps)) """Train CIFAR-10 for a number of steps.""" # make sure variables are placed on cpu # TODO: for AWS version, check if put variables on GPU will be better with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) attacks = ['ifgsm', 'mim', 'madry'] # manually create all scopes with tf.variable_scope('conv1', reuse=tf.AUTO_REUSE) as scope: scope_conv1 = scope with tf.variable_scope('conv2', reuse=tf.AUTO_REUSE) as scope: scope_conv2 = scope with tf.variable_scope('conv3', reuse=tf.AUTO_REUSE) as scope: scope_conv3 = scope with tf.variable_scope('local4', reuse=tf.AUTO_REUSE) as scope: scope_local4 = scope with tf.variable_scope('local5', reuse=tf.AUTO_REUSE) as scope: scope_local5 = scope # Parameters Declarification #with tf.variable_scope('conv1') as scope: # with tf.device('/gpu:{}'.format(AUX_GPU_IDX[0])): with tf.variable_scope(scope_conv1) as scope: kernel1 = _variable_with_weight_decay( 'kernel1', shape=[4, 4, 3, 128], stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2), wd=0.0, collect=[AECODER_VARIABLES]) biases1 = _bias_on_cpu('biases1', [128], tf.constant_initializer(0.0), collect=[AECODER_VARIABLES]) # shape = kernel1.get_shape().as_list() w_t = tf.reshape(kernel1, [-1, shape[-1]]) w = tf.transpose(w_t) sing_vals = tf.svd(w, compute_uv=False) sensitivity = tf.reduce_max(sing_vals) gamma = 2 * Delta2 / (L * sensitivity) with tf.variable_scope(scope_conv2) as scope: kernel2 = _variable_with_weight_decay( 'kernel2', shape=[5, 5, 128, 128], stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2), wd=0.0, collect=[CONV_VARIABLES]) biases2 = _bias_on_cpu('biases2', [128], tf.constant_initializer(0.1), collect=[CONV_VARIABLES]) with tf.variable_scope(scope_conv3) as scope: kernel3 = _variable_with_weight_decay( 'kernel3', shape=[5, 5, 256, 256], stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2), wd=0.0, collect=[CONV_VARIABLES]) biases3 = _bias_on_cpu('biases3', [256], tf.constant_initializer(0.1), collect=[CONV_VARIABLES]) with tf.variable_scope(scope_local4) as scope: kernel4 = _variable_with_weight_decay( 'kernel4', shape=[int(image_size / 4)**2 * 256, hk], stddev=0.04, wd=0.004, collect=[CONV_VARIABLES]) biases4 = _bias_on_cpu('biases4', [hk], tf.constant_initializer(0.1), collect=[CONV_VARIABLES]) with tf.variable_scope(scope_local5) as scope: kernel5 = _variable_with_weight_decay( 'kernel5', [hk, 10], stddev=np.sqrt(2.0 / (int(image_size / 4)**2 * 256)) / math.ceil(5 / 2), wd=0.0, collect=[CONV_VARIABLES]) biases5 = _bias_on_cpu('biases5', [10], tf.constant_initializer(0.1), collect=[CONV_VARIABLES]) # group these for use as parameters params = [ kernel1, biases1, kernel2, biases2, kernel3, biases3, kernel4, biases4, kernel5, biases5 ] scopes = [ scope_conv1, scope_conv2, scope_conv3, scope_local4, scope_local5 ] # placeholders for input values FM_h = tf.placeholder(tf.float32, [None, 14, 14, 128]) # one time noise = tf.placeholder(tf.float32, [None, image_size, image_size, 3]) # one time adv_noise = tf.placeholder( tf.float32, [None, image_size, image_size, 3]) # one time x = tf.placeholder(tf.float32, [None, image_size, image_size, 3 ]) # input is the bunch of n_batchs y = tf.placeholder(tf.float32, [None, 10]) # input is the bunch of n_batchs # benign conv output bi = 0 x_image = x + noise # with tf.device('/gpu:0'): y_conv = inference(x_image, FM_h, params, scopes, training=True, bn_index=bi) softmax_y_conv = tf.nn.softmax(y_conv) # start a session with memory growth config = tf.ConfigProto(log_device_placement=False) config.gpu_options.allow_growth = True sess = tf.Session(config=config) print("session created") dp_epsilon = 1.0 epsilon2_update = parameter_dict['epsilon2_update'] delta_r = parameter_dict['delta_r'] _sensitivityW = parameter_dict['_sensitivityW'] delta_h = parameter_dict['delta_h'] dp_mult = parameter_dict['dp_mult'] # ============== attacks ================ iter_step_training = parameter_dict['iter_step_training'] ch_model_probs = CustomCallableModelWrapper( callable_fn=inference_test_input_probs, output_layer='probs', params=params, scopes=scopes, image_size=image_size, adv_noise=adv_noise) attack_tensor_dict = {} # define each attack method's tensor mu_alpha = tf.placeholder(tf.float32, [1]) # build each attack for atk in attacks: print('building attack {} tensors'.format(atk)) # for each gpu assign to each attack if atk == 'ifgsm': ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess) attack_tensor_dict[atk] = ifgsm_obj.generate( x=x, eps=mu_alpha, eps_iter=mu_alpha / testing_step, nb_iter=testing_step, clip_min=-1.0, clip_max=1.0) elif atk == 'mim': mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess) attack_tensor_dict[atk] = mim_obj.generate( x=x, eps=mu_alpha, eps_iter=mu_alpha / testing_step, nb_iter=testing_step, decay_factor=1.0, clip_min=-1.0, clip_max=1.0) elif atk == 'madry': madry_obj = MadryEtAl(model=ch_model_probs, sess=sess) attack_tensor_dict[atk] = madry_obj.generate( x=x, eps=mu_alpha, eps_iter=mu_alpha / testing_step, nb_iter=testing_step, clip_min=-1.0, clip_max=1.0) # Create a saver and load checkpoint saver = tf.train.Saver(var_list=tf.all_variables(), max_to_keep=1000) saver.restore(sess, checkpoint_path) T = int(int(math.ceil(D / L)) * epochs + 1) # number of steps step_for_epoch = parameter_dict[ 'step_for_epoch'] #number of steps for one epoch # load some fixed noise perturbH_test = parameter_dict['perturbH_test'] perturbFM_h = parameter_dict['perturbFM_h'] Noise = parameter_dict['Noise'] Noise_test = parameter_dict['Noise_test'] # test on testing dataset adv_acc_dict = {} robust_adv_acc_dict = {} robust_adv_utility_dict = {} test_batch_size = 5000 n_draw = 1000 begin_time = time.time() print('on testing set') print('test_batch_size: {}'.format(test_batch_size)) print('testing iteration: {}'.format(testing_step)) print('testing n_draw: {}'.format(n_draw)) atk_index = -1 for _ in [0, 1]: for atk in attacks: print(atk) if atk not in adv_acc_dict: adv_acc_dict[atk] = -1 robust_adv_acc_dict[atk] = -1 robust_adv_utility_dict[atk] = -1 # generate test samples test_batch = cifar10_data.test.next_batch(test_batch_size) adv_images = sess.run(attack_tensor_dict[atk], feed_dict={ x: test_batch[0], adv_noise: Noise_test, mu_alpha: [fgsm_eps] }) print("Done adversarial examples") ### PixelDP Robustness ### predictions_form_argmax = np.zeros([test_batch_size, 10]) softmax_predictions = sess.run(softmax_y_conv, feed_dict={ x: adv_images, noise: Noise, FM_h: perturbFM_h }) argmax_predictions = np.argmax(softmax_predictions, axis=1) argmax_labels = np.argmax(test_batch[1], axis=1) print('labels') print(argmax_labels[0:100]) print('init predictions') print(argmax_predictions[0:100]) for _n_draws in range(0, n_draw): _BenignLNoise = generateIdLMNoise(image_size, Delta2, epsilon2_update, L) _perturbFM_h = np.random.laplace( 0.0, 2 * Delta2 / (epsilon2_update * L), 14 * 14 * 128) _perturbFM_h = np.reshape(_perturbFM_h, [-1, 14, 14, 128]) if _n_draws == 500 or _n_draws == 1000: print("n_draws = 500/1000") print('time passed: {}s'.format(time.time() - begin_time)) for j in range(test_batch_size): pred = argmax_predictions[j] predictions_form_argmax[j, pred] += 1 softmax_predictions = sess.run( softmax_y_conv, feed_dict={ x: adv_images, noise: (_BenignLNoise / 10 + Noise), FM_h: perturbFM_h }) * sess.run(softmax_y_conv, feed_dict={ x: adv_images, noise: Noise, FM_h: (_perturbFM_h / 10 + perturbFM_h) }) argmax_predictions = np.argmax(softmax_predictions, axis=1) final_predictions = predictions_form_argmax print('final predictions') print(np.argmax(final_predictions, axis=1)[0:100]) is_correct = [] is_robust = [] for j in range(test_batch_size): is_correct.append( np.argmax(test_batch[1][j]) == np.argmax( final_predictions[j])) robustness_from_argmax = robustness.robustness_size_argmax( counts=predictions_form_argmax[j], eta=0.05, dp_attack_size=fgsm_eps, dp_epsilon=dp_epsilon, dp_delta=0.05, dp_mechanism='laplace') / dp_mult is_robust.append(robustness_from_argmax >= fgsm_eps) adv_acc_dict[atk] = np.sum(is_correct) * 1.0 / test_batch_size robust_adv_acc_dict[atk] = np.sum([ a and b for a, b in zip(is_robust, is_correct) ]) * 1.0 / np.sum(is_robust) robust_adv_utility_dict[atk] = np.sum( is_robust) * 1.0 / test_batch_size ############################## log_str = 'testing, eps: {}; steps: {};'.format( fgsm_eps, testing_step) for atk in attacks: log_str += "\n{}: {:.4f} {:.4f} {:.4f} {:.4f} ".format( atk, adv_acc_dict[atk], robust_adv_acc_dict[atk], robust_adv_utility_dict[atk], robust_adv_acc_dict[atk] * robust_adv_utility_dict[atk]) print(log_str, flush=True) tf.reset_default_graph()
def train(cifar10_data, epochs, L, learning_rate, scale3, Delta2, epsilon2, eps2_ratio, alpha, perturbFM, fgsm_eps, total_eps, logfile, parameter_dict): logfile.write("fgsm_eps \t %g, LR \t %g, alpha \t %d , epsilon \t %d \n" % (fgsm_eps, learning_rate, alpha, total_eps)) """Train CIFAR-10 for a number of steps.""" # make sure variables are placed on cpu # TODO: for AWS version, check if put variables on GPU will be better with tf.Graph().as_default(), tf.device('/cpu:0'): global_step = tf.Variable(0, trainable=False) attacks = ['ifgsm', 'mim', 'madry'] # manually create all scopes with tf.variable_scope('conv1', reuse=tf.AUTO_REUSE) as scope: scope_conv1 = scope with tf.variable_scope('conv2', reuse=tf.AUTO_REUSE) as scope: scope_conv2 = scope with tf.variable_scope('conv3', reuse=tf.AUTO_REUSE) as scope: scope_conv3 = scope with tf.variable_scope('local4', reuse=tf.AUTO_REUSE) as scope: scope_local4 = scope with tf.variable_scope('local5', reuse=tf.AUTO_REUSE) as scope: scope_local5 = scope # Parameters Declarification #with tf.variable_scope('conv1') as scope: # with tf.device('/gpu:{}'.format(AUX_GPU_IDX[0])): with tf.variable_scope(scope_conv1) as scope: kernel1 = _variable_with_weight_decay( 'kernel1', shape=[4, 4, 3, 128], stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2), wd=0.0, collect=[AECODER_VARIABLES]) biases1 = _bias_on_cpu('biases1', [128], tf.constant_initializer(0.0), collect=[AECODER_VARIABLES]) # shape = kernel1.get_shape().as_list() w_t = tf.reshape(kernel1, [-1, shape[-1]]) w = tf.transpose(w_t) sing_vals = tf.svd(w, compute_uv=False) sensitivity = tf.reduce_max(sing_vals) gamma = 2 * Delta2 / (L * sensitivity) with tf.variable_scope(scope_conv2) as scope: kernel2 = _variable_with_weight_decay( 'kernel2', shape=[5, 5, 128, 128], stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2), wd=0.0, collect=[CONV_VARIABLES]) biases2 = _bias_on_cpu('biases2', [128], tf.constant_initializer(0.1), collect=[CONV_VARIABLES]) with tf.variable_scope(scope_conv3) as scope: kernel3 = _variable_with_weight_decay( 'kernel3', shape=[5, 5, 256, 256], stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2), wd=0.0, collect=[CONV_VARIABLES]) biases3 = _bias_on_cpu('biases3', [256], tf.constant_initializer(0.1), collect=[CONV_VARIABLES]) with tf.variable_scope(scope_local4) as scope: kernel4 = _variable_with_weight_decay( 'kernel4', shape=[int(image_size / 4)**2 * 256, hk], stddev=0.04, wd=0.004, collect=[CONV_VARIABLES]) biases4 = _bias_on_cpu('biases4', [hk], tf.constant_initializer(0.1), collect=[CONV_VARIABLES]) with tf.variable_scope(scope_local5) as scope: kernel5 = _variable_with_weight_decay( 'kernel5', [hk, 10], stddev=np.sqrt(2.0 / (int(image_size / 4)**2 * 256)) / math.ceil(5 / 2), wd=0.0, collect=[CONV_VARIABLES]) biases5 = _bias_on_cpu('biases5', [10], tf.constant_initializer(0.1), collect=[CONV_VARIABLES]) # group these for use as parameters params = [ kernel1, biases1, kernel2, biases2, kernel3, biases3, kernel4, biases4, kernel5, biases5 ] scopes = [ scope_conv1, scope_conv2, scope_conv3, scope_local4, scope_local5 ] # placeholders for input values FM_h = tf.placeholder(tf.float32, [None, 14, 14, 128]) # one time noise = tf.placeholder(tf.float32, [None, image_size, image_size, 3]) # one time adv_noise = tf.placeholder( tf.float32, [None, image_size, image_size, 3]) # one time x_sb = tf.placeholder(tf.float32, [None, image_size, image_size, 3 ]) # input is the bunch of n_batchs x_list = tf.split(x_sb, N_GPUS, axis=0) # split it into each batch adv_x_sb = tf.placeholder(tf.float32, [None, image_size, image_size, 3]) adv_x_list = tf.split(adv_x_sb, N_GPUS, axis=0) x_test = tf.placeholder(tf.float32, [None, image_size, image_size, 3]) y_sb = tf.placeholder(tf.float32, [None, 10]) # input is the bunch of n_batchs y_list = tf.split(y_sb, N_GPUS, axis=0) # split it into each batch adv_y_sb = tf.placeholder(tf.float32, [None, 10]) # input is the bunch of n_batchs # adv_y_list = tf.split(adv_y_sb, N_GPUS, axis=0) # split it into each batch y_test = tf.placeholder(tf.float32, [None, 10]) # re-arrange the input samples _split_adv_y_sb = tf.split(adv_y_sb, N_AUX_GPUS, axis=0) reorder_adv_y_sb = [] for i in range(N_GPUS): reorder_adv_y_sb.append( tf.concat([ _split_adv_y_sb[i + N_GPUS * atk_index] for atk_index in range(len(attacks)) ], axis=0)) tower_pretrain_grads = [] tower_train_grads = [] all_train_loss = [] pretrain_opt = tf.train.AdamOptimizer(learning_rate) train_opt = tf.train.GradientDescentOptimizer(learning_rate) # batch index bi = 0 for gpu in GPU_IDX: # putting ops on each tower (GPU) with tf.device('/gpu:{}'.format(gpu)): print('Train inference GPU placement') print('/gpu:{}'.format(gpu)) # Auto-Encoder # # pretrain_adv and pretrain_benign are cost tensor of the encoding layer with tf.variable_scope(scope_conv1) as scope: Enc_Layer2 = EncLayer(inpt=adv_x_list[bi], n_filter_in=3, n_filter_out=128, filter_size=3, W=kernel1, b=biases1, activation=tf.nn.relu) pretrain_adv = Enc_Layer2.get_train_ops2( xShape=tf.shape(adv_x_list[bi])[0], Delta=Delta2, epsilon=epsilon2, batch_size=L, learning_rate=learning_rate, W=kernel1, b=biases1, perturbFMx=adv_noise, perturbFM_h=FM_h, bn_index=bi) Enc_Layer3 = EncLayer(inpt=x_list[bi], n_filter_in=3, n_filter_out=128, filter_size=3, W=kernel1, b=biases1, activation=tf.nn.relu) pretrain_benign = Enc_Layer3.get_train_ops2( xShape=tf.shape(x_list[bi])[0], Delta=Delta2, epsilon=epsilon2, batch_size=L, learning_rate=learning_rate, W=kernel1, b=biases1, perturbFMx=noise, perturbFM_h=FM_h, bn_index=bi) pretrain_cost = pretrain_adv + pretrain_benign # this cost is not used # cost = tf.reduce_sum((Enc_Layer2.cost + Enc_Layer3.cost)/2.0); # benign conv output x_image = x_list[bi] + noise y_conv = inference(x_image, FM_h, params, scopes, training=True, bn_index=bi) # softmax_y_conv = tf.nn.softmax(y_conv) # adv conv output adv_x_image = adv_x_list[bi] + adv_noise y_adv_conv = inference(adv_x_image, FM_h, params, scopes, training=True, bn_index=bi) # Calculate loss. Apply Taylor Expansion for the output layer perturbW = perturbFM * params[8] train_loss = cifar10.TaylorExp(y_conv, y_list[bi], y_adv_conv, reorder_adv_y_sb[bi], L, alpha, perturbW) all_train_loss.append(train_loss) # list of variables to train pretrain_var_list = tf.get_collection(AECODER_VARIABLES) train_var_list = tf.get_collection(CONV_VARIABLES) # compute tower gradients pretrain_grads = pretrain_opt.compute_gradients( pretrain_cost, var_list=pretrain_var_list) train_grads = train_opt.compute_gradients( train_loss, var_list=train_var_list) # get_pretrain_grads(pretrain_cost, global_step, learning_rate, pretrain_var_list) # train_grads = get_train_grads(train_loss, global_step, learning_rate, train_var_list) # note this list contains grads and variables tower_pretrain_grads.append(pretrain_grads) tower_train_grads.append(train_grads) # batch index bi += 1 # average the gradient from each tower pretrain_var_dict = {} all_pretrain_grads = {} avg_pretrain_grads = [] for var in tf.get_collection(AECODER_VARIABLES): if var.name not in all_pretrain_grads: all_pretrain_grads[var.name] = [] pretrain_var_dict[var.name] = var for tower in tower_pretrain_grads: for var_grad in tower: all_pretrain_grads[var_grad[1].name].append(var_grad[0]) for var_name in all_pretrain_grads: # expand dim 0, then concat on dim 0, then reduce mean on dim 0 expand_pretrain_grads = [ tf.expand_dims(g, 0) for g in all_pretrain_grads[var_name] ] concat_pretrain_grads = tf.concat(expand_pretrain_grads, axis=0) reduce_pretrain_grads = tf.reduce_mean(concat_pretrain_grads, 0) # rebuild (grad, var) list avg_pretrain_grads.append( (reduce_pretrain_grads, pretrain_var_dict[var_name])) print('*****************************') print("avg_pretrain_grads:") for avg_pretrain_grad in avg_pretrain_grads: print('grads') print((avg_pretrain_grad[0].name, avg_pretrain_grad[0].shape)) print('var') print((avg_pretrain_grad[1].name, avg_pretrain_grad[1].shape)) print('------') train_var_dict = {} all_train_grads = {} avg_train_grads = [] for var in tf.get_collection(CONV_VARIABLES): if var.name not in all_train_grads: all_train_grads[var.name] = [] train_var_dict[var.name] = var for tower in tower_train_grads: for var_grad in tower: all_train_grads[var_grad[1].name].append(var_grad[0]) for var_name in all_train_grads: # expand dim 0, then concat on dim 0, then reduce mean on dim 0 expand_train_grads = [ tf.expand_dims(g, 0) for g in all_train_grads[var_name] ] concat_train_grads = tf.concat(expand_train_grads, axis=0) reduce_train_grads = tf.reduce_mean(concat_train_grads, 0) # rebuild (grad, var) list avg_train_grads.append( (reduce_train_grads, train_var_dict[var_name])) print('*****************************') print("avg_train_grads:") for avg_train_grad in avg_train_grads: print('grads') print((avg_train_grad[0].name, avg_train_grad[0].shape)) print('var') print((avg_train_grad[1].name, avg_train_grad[1].shape)) print('------') print('*****************************') # get averaged loss tensor avg_loss = tf.reduce_mean(tf.stack(all_train_loss), axis=0) # TODO: take the average of the bn variables from each tower/training GPU # currently, testing is using the bn variables on bn_index 0 (tower/training GPU 0) # build train op (apply average gradient to variables) # according to 1.13 doc, updates need to be manually applied _update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) print('update ops:') print(_update_ops) with tf.control_dependencies(_update_ops): pretrain_op = pretrain_opt.apply_gradients(avg_pretrain_grads, global_step=global_step) train_op = train_opt.apply_gradients(avg_train_grads, global_step=global_step) # start a session with memory growth config = tf.ConfigProto(log_device_placement=False) config.gpu_options.allow_growth = True sess = tf.Session(config=config) print("session created") # init kernel 1 and get some values from it sess.run(kernel1.initializer) dp_epsilon = 1.0 parameter_dict['dp_epsilon'] = dp_epsilon _gamma = sess.run(gamma) _gamma_x = Delta2 / L epsilon2_update = epsilon2 / (1.0 + 1.0 / _gamma + 1 / _gamma_x) parameter_dict['epsilon2_update'] = epsilon2_update print(epsilon2_update / _gamma + epsilon2_update / _gamma_x) print(epsilon2_update) # NOTE: these values needs to be calculated in testing delta_r = fgsm_eps * (image_size**2) parameter_dict['delta_r'] = delta_r _sensitivityW = sess.run(sensitivity) parameter_dict['_sensitivityW'] = _sensitivityW delta_h = _sensitivityW * (14**2) parameter_dict['delta_h'] = delta_h #dp_mult = (Delta2/(L*epsilon2_update))/(delta_r / dp_epsilon) + (2*Delta2/(L*epsilon2_update))/(delta_h / dp_epsilon) dp_mult = (Delta2 * dp_epsilon) / (L * epsilon2_update * (delta_h / 2 + delta_r)) parameter_dict['dp_mult'] = dp_mult # place test-time inference into CPU with tf.device('/cpu:0'): # testing pipeline test_x_image = x_test + noise test_y_conv = inference(test_x_image, FM_h, params, scopes, training=True, bn_index=0) test_softmax_y_conv = tf.nn.softmax(test_y_conv) # ============== attacks ================ iter_step_training = 3 parameter_dict['iter_step_training'] = iter_step_training # iter_step_testing = 1000 aux_dup_count = N_GPUS # split input x_super_batch into N_AUX_GPUS parts x_attacks = tf.split(x_sb, N_AUX_GPUS, axis=0) # split input x_test into aux_dup_count parts x_test_split = tf.split(x_test, aux_dup_count, axis=0) # setup all attacks # attack_switch = {'fgsm':False, 'ifgsm':True, 'deepfool':False, 'mim':True, 'spsa':False, 'cwl2':False, 'madry':True, 'stm':False} ch_model_probs = CustomCallableModelWrapper( callable_fn=inference_test_input_probs, output_layer='probs', params=params, scopes=scopes, image_size=image_size, adv_noise=adv_noise) attack_tensor_training_dict = {} attack_tensor_testing_dict = {} # define each attack method's tensor mu_alpha = tf.placeholder(tf.float32, [1]) # build each attack for atk_idx in range(len(attacks)): atk = attacks[atk_idx] print('building attack {} tensors'.format(atk)) # for each gpu assign to each attack attack_tensor_training_dict[atk] = [] attack_tensor_testing_dict[atk] = [] for i in range(aux_dup_count): if atk == 'ifgsm': with tf.device('/gpu:{}'.format(AUX_GPU_IDX[i])): print('ifgsm GPU placement: /gpu:{}'.format( AUX_GPU_IDX[i])) # ifgsm tensors for training ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess) attack_tensor_training_dict[atk].append( ifgsm_obj.generate(x=x_attacks[i], eps=mu_alpha, eps_iter=mu_alpha / iter_step_training, nb_iter=iter_step_training, clip_min=-1.0, clip_max=1.0)) elif atk == 'mim': with tf.device('/gpu:{}'.format( AUX_GPU_IDX[i + 1 * aux_dup_count])): print('mim GPU placement: /gpu:{}'.format( AUX_GPU_IDX[i + 1 * aux_dup_count])) # mim tensors for training mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess) attack_tensor_training_dict[atk].append( mim_obj.generate( x=x_attacks[i + 1 * aux_dup_count], eps=mu_alpha, eps_iter=mu_alpha / iter_step_training, nb_iter=iter_step_training, decay_factor=1.0, clip_min=-1.0, clip_max=1.0)) elif atk == 'madry': with tf.device('/gpu:{}'.format( AUX_GPU_IDX[i + 2 * aux_dup_count])): print('madry GPU placement: /gpu:{}'.format( AUX_GPU_IDX[i + 2 * aux_dup_count])) # madry tensors for training madry_obj = MadryEtAl(model=ch_model_probs, sess=sess) attack_tensor_training_dict[atk].append( madry_obj.generate( x=x_attacks[i + 2 * aux_dup_count], eps=mu_alpha, eps_iter=mu_alpha / iter_step_training, nb_iter=iter_step_training, clip_min=-1.0, clip_max=1.0)) # combine all attack tensors adv_concat_list = [] for i in range(aux_dup_count): adv_concat_list.append( tf.concat( [attack_tensor_training_dict[atk][i] for atk in attacks], axis=0)) # the tensor that contains each batch of adv samples for training # has same sample order as the labels adv_super_batch_tensor = tf.concat(adv_concat_list, axis=0) #====================== attack ========================= #adv_logits, _ = inference(c_x_adv + W_conv1Noise, perturbFM, params) print('******************** debug info **********************') # list of variables to train pretrain_var_list = tf.get_collection(AECODER_VARIABLES) print('pretrain var list') for v in pretrain_var_list: print((v.name, v.shape)) print('**********************************') train_var_list = tf.get_collection(CONV_VARIABLES) print('train var list') for v in train_var_list: print((v.name, v.shape)) print('**********************************') # all variables print('all variables') vl = tf.global_variables() for v in vl: print((v.name, v.shape)) print('**********************************') # all ops ops = [n.name for n in tf.get_default_graph().as_graph_def().node] print('total number of ops') print(len(ops)) # for op in ops: # print(op) print('******************** debug info **********************') # exit() # Create a saver. saver = tf.train.Saver(var_list=tf.all_variables(), max_to_keep=1000) # Build an initialization operation to run below. init = tf.initialize_all_variables() sess.run(init) # load the most recent models _global_step = 0 ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: print(ckpt.model_checkpoint_path) saver.restore(sess, ckpt.model_checkpoint_path) _global_step = int( ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]) else: print('No checkpoint file found') T = int(int(math.ceil(D / L)) * epochs + 1) # number of steps print('total number of steps: {}'.format(T)) step_for_epoch = int(math.ceil(D / L)) #number of steps for one epoch parameter_dict['step_for_epoch'] = step_for_epoch print('step_for_epoch: {}'.format(step_for_epoch)) # generate some fixed noise perturbH_test = np.random.laplace(0.0, 0, 14 * 14 * 128) # one time perturbH_test = np.reshape(perturbH_test, [-1, 14, 14, 128]) # one time parameter_dict['perturbH_test'] = perturbH_test print('perturbH_test') print(perturbH_test.shape) perturbFM_h = np.random.laplace(0.0, 2 * Delta2 / (epsilon2_update * L), 14 * 14 * 128) # one time perturbFM_h = np.reshape(perturbFM_h, [-1, 14, 14, 128]) # one time parameter_dict['perturbFM_h'] = perturbFM_h print('perturbFM_h') print(perturbFM_h.shape) Noise = generateIdLMNoise(image_size, Delta2, epsilon2_update, L) # one time parameter_dict['Noise'] = Noise Noise_test = generateIdLMNoise(image_size, 0, epsilon2_update, L) # one time parameter_dict['Noise_test'] = Noise_test print('Noise and Noise_test') print(Noise.shape) print(Noise_test.shape) # exit() # some timing variables adv_duration_total = 0.0 adv_duration_count = 0 train_duration_total = 0.0 train_duration_count = 0 # some debug flag adv_batch_flag = True batch_flag = True L_flag = True parameter_flag = True _global_step = 0 for step in xrange(_global_step, _global_step + T): start_time = time.time() # TODO: fix this d_eps = random.random() * 0.5 # d_eps = 0.25 print('d_eps: {}'.format(d_eps)) # version with 3 AUX GPU # get two super batchs, one for benign training, one for adv training super_batch_images, super_batch_labels = cifar10_data.train.next_super_batch( N_GPUS, random=True) super_batch_images_for_adv, super_batch_adv_labels = cifar10_data.train.next_super_batch_premix_ensemble( N_GPUS, random=True) # TODO: re-arrange the adv labels to match the adv samples # run adv_tensors_batch_concat to generate adv samples super_batch_adv_images = sess.run(adv_super_batch_tensor, feed_dict={ x_sb: super_batch_images_for_adv, adv_noise: Noise, mu_alpha: [d_eps] }) adv_finish_time = time.time() adv_duration = adv_finish_time - start_time adv_duration_total += adv_duration adv_duration_count += 1 if adv_batch_flag: print(super_batch_images.shape) print(super_batch_labels.shape) print(super_batch_adv_images.shape) print(super_batch_adv_labels.shape) adv_batch_flag = False if batch_flag: print(super_batch_images.shape) print(super_batch_labels.shape) batch_flag = False if L_flag: print("L: {}".format(L)) L_flag = False if parameter_flag: print('*=*=*=*=*') print(parameter_dict) print('*=*=*=*=*', flush=True) logfile.write('*=*=*=*=*\n') logfile.write(str(parameter_dict)) logfile.write('*=*=*=*=*\n') parameter_flag = False _, _, avg_loss_value = sess.run( [pretrain_op, train_op, avg_loss], feed_dict={ x_sb: super_batch_images, y_sb: super_batch_labels, adv_x_sb: super_batch_adv_images, adv_y_sb: super_batch_adv_labels, noise: Noise, adv_noise: Noise_test, FM_h: perturbFM_h }) assert not np.isnan( avg_loss_value), 'Model diverged with loss = NaN' train_finish_time = time.time() train_duration = train_finish_time - adv_finish_time train_duration_total += train_duration train_duration_count += 1 # save model every 50 epochs if step % (50 * step_for_epoch) == 0 and (step >= 50 * step_for_epoch): print('saving model') checkpoint_path = os.path.join(os.getcwd() + dirCheckpoint, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) # Save the model checkpoint periodically. # if step % (10*step_for_epoch) == 0 and (step > _global_step): if step % 10 == 0 and (step > _global_step): # print n steps and time print("current epoch: {:.2f}".format(step / step_for_epoch)) num_examples_per_step = L * N_GPUS * 2 avg_adv_duration = adv_duration_total / adv_duration_count avg_train_duration = train_duration_total / train_duration_count avg_total_duration = avg_adv_duration + avg_train_duration examples_per_sec = num_examples_per_step / avg_total_duration sec_per_step = avg_total_duration # sec_per_batch = sec_per_step / (N_GPUS * 2) format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.2f ' 'sec/step; %.2f sec/adv_gen_op; %.2f sec/train_op)') actual_str = format_str % ( datetime.now(), step, avg_loss_value, examples_per_sec, sec_per_step, avg_adv_duration, avg_train_duration) print(actual_str, flush=True) logfile.write(actual_str + '\n')
def SSGD_resnet_testing(TIN_data, resnet_params, train_params, test_params, all_params): # dict for encoding layer variables and output layer variables pre_define_vars = {} # list of variables to train train_vars = [] with tf.Graph().as_default(), tf.device('/cpu:0'): global_step = tf.Variable(0, trainable=False) # Parameters Declarification ###################################### # encoding (pretrain) layer variables with tf.variable_scope('enc_layer', reuse=tf.AUTO_REUSE) as scope: kernel1 = tf.get_variable( 'kernel1', shape=[ train_params.enc_kernel_size, train_params.enc_kernel_size, 3, train_params.enc_filters ], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer_conv2d()) biases1 = tf.get_variable('biases1', shape=[train_params.enc_filters], dtype=tf.float32, initializer=tf.constant_initializer(0.0)) pre_define_vars['kernel1'] = kernel1 pre_define_vars['biases1'] = biases1 train_vars.append(kernel1) train_vars.append(biases1) dp_mult = all_params['dp_mult'] # output layer variables with tf.variable_scope('fc2', reuse=tf.AUTO_REUSE) as scope: stdv = 1.0 / math.sqrt(train_params.hk) final_w = tf.get_variable( 'kernel', shape=[train_params.hk, train_params.num_classes], dtype=tf.float32, initializer=tf.random_uniform_initializer(-stdv, stdv)) final_b = tf.get_variable('bias', shape=[train_params.num_classes], dtype=tf.float32, initializer=tf.constant_initializer(0.0)) pre_define_vars['final_w'] = final_w pre_define_vars['final_b'] = final_b train_vars.append(final_w) train_vars.append(final_b) ###################################### # Build a Graph that computes the logits predictions from the inputs ###################################### # input placeholders x_sb = tf.placeholder( tf.float32, [None, train_params.image_size, train_params.image_size, 3], name='x_sb') # input is the bunch of n_batchs x_test = tf.placeholder( tf.float32, [None, train_params.image_size, train_params.image_size, 3], name='x_test') y_sb = tf.placeholder( tf.float32, [None, train_params.num_classes], name='y_sb') # input is the bunch of n_batchs (super batch) y_test = tf.placeholder(tf.float32, [None, train_params.num_classes], name='y_test') noise = tf.placeholder(tf.float32, [ None, train_params.enc_h_size, train_params.enc_h_size, train_params.enc_filters ], name='noise') # one time keep_prob = tf.placeholder(tf.float32, shape=(), name='keep_prob') with tf.device('/gpu:0'): # the model for testing y_logits_test, _ = test_inference( x_sb, train_params.attack_norm_bound * noise, keep_prob, pre_define_vars, resnet_params, train_params) y_softmax_test = tf.nn.softmax(y_logits_test) correct_prediction = tf.equal(tf.argmax(y_logits_test, 1), tf.argmax(y_sb, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # print all variables print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$') all_vars = tf.global_variables() print_var_list('all vars', all_vars) print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$') # add selected vars into list # ('res4' in var.name and ('gamma' in var.name or 'beta' in var.name)) or for var in tf.global_variables(): if 'resnet_model' in var.name and \ ('conv0' in var.name or 'fc' in var.name or 'res3' in var.name or 'res4' in var.name or 'res1' in var.name or 'res2' in var.name) and \ ('gamma' in var.name or 'beta' in var.name or 'kernel' in var.name or 'bias' in var.name): if var not in train_vars: train_vars.append(var) elif 'enc_layer' in var.name and \ ('kernel' in var.name or 'bias' in var.name or 'gamma' in var.name or 'beta' in var.name): if var not in train_vars: train_vars.append(var) print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$') print_var_list('train_vars', train_vars) print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$') ###################################### # Create a saver. saver = tf.train.Saver(var_list=tf.all_variables(), max_to_keep=1000) # start a session with memory growth config = tf.ConfigProto(log_device_placement=False) config.gpu_options.allow_growth = True sess = tf.Session(config=config) print("session created") # list all checkpoints in ckpt_path checkpoint_path_read = os.path.join(os.getcwd() + test_params.check_point_dir) ckpts = tf.train.get_checkpoint_state(checkpoint_path_read) print(ckpts) # find the ckpt we need to load and load it for ckpt in ckpts.all_model_checkpoint_paths: # print(ckpt) ckpt_step = int(ckpt.split('-')[-1]) if ckpt_step == test_params.step_to_load: saver.restore(sess, ckpt) print('model loaded from {}'.format(ckpt)) # ####################################### # # setup all attacks attack_switch = { 'fgsm': False, 'ifgsm': True, 'deepfool': False, 'mim': True, 'spsa': False, 'cwl2': False, 'madry': True, 'stm': False } ch_model_probs = CustomCallableModelWrapper( callable_fn=inference_test_output_probs, output_layer='probs', keep_prob=keep_prob, pre_define_vars=pre_define_vars, resnet_params=resnet_params, train_params=train_params) attack_tensor_testing_dict = {} # define each attack method's tensor mu_alpha = tf.placeholder(tf.float32, [1]) # Iterative FGSM (BasicIterativeMethod/ProjectedGradientMethod with no random init) with tf.device('/gpu:0'): if attack_switch['ifgsm']: print('creating attack tensor of BasicIterativeMethod') ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess) attack_tensor_testing_dict['ifgsm'] = ifgsm_obj.generate( x=x_sb, eps=mu_alpha, eps_iter=mu_alpha / train_params.iter_step_testing, nb_iter=train_params.iter_step_testing, clip_min=-1.0, clip_max=1.0) # MomentumIterativeMethod with tf.device('/gpu:0'): if attack_switch['mim']: print('creating attack tensor of MomentumIterativeMethod') mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess) attack_tensor_testing_dict['mim'] = mim_obj.generate( x=x_sb, eps=mu_alpha, eps_iter=mu_alpha / train_params.iter_step_testing, nb_iter=train_params.iter_step_testing, decay_factor=1.0, clip_min=-1.0, clip_max=1.0) # MadryEtAl (Projected Grdient with random init, same as rand+fgsm) with tf.device('/gpu:0'): if attack_switch['madry']: print('creating attack tensor of MadryEtAl') madry_obj = MadryEtAl(model=ch_model_probs, sess=sess) attack_tensor_testing_dict['madry'] = madry_obj.generate( x=x_sb, eps=mu_alpha, eps_iter=mu_alpha / train_params.iter_step_testing, nb_iter=train_params.iter_step_testing, clip_min=-1.0, clip_max=1.0) # ####################################### sigmaEGM = all_params['sigmaEGM'] __noiseE = all_params['__noiseE'] grad_redis = all_params['grad_redis'] _sensitivity_2 = all_params['_sensitivity_2'] _sensitivityW = all_params['_sensitivityW'] Delta_redis = all_params['Delta_redis'] sigmaHGM = all_params['sigmaHGM'] __noiseH = all_params['__noiseH'] __noise_zero = all_params['__noise_zero'] #################################### #################################### print('start testing') start_time = time.time() log_file_path = os.getcwd() + test_params.log_file_path log_file = open(log_file_path, 'a', encoding='utf-8') attacks_and_benign = test_params.attacks + ['benign'] #===================adv samples===================== # for each eps setting for fgsm_eps in test_params.fgsm_eps_list: adv_acc_dict = {} robust_adv_acc_dict = {} robust_adv_utility_dict = {} log_str = '' eps_start_time = time.time() # cover all test data for i in range(test_params.test_epochs): test_batch = TIN_data.test.next_batch( test_params.test_batch_size) adv_images_dict = {} # test for each attack for atk in attacks_and_benign: start_time = time.time() if atk not in adv_acc_dict: adv_acc_dict[atk] = 0.0 robust_adv_acc_dict[atk] = 0.0 robust_adv_utility_dict[atk] = 0.0 if atk == 'benign': testing_img = test_batch[0] elif attack_switch[atk]: # if only one gpu available, generate adv samples in-place if atk not in adv_images_dict: adv_images_dict[atk] = sess.run( attack_tensor_testing_dict[atk], feed_dict={ x_sb: test_batch[0], mu_alpha: [fgsm_eps], keep_prob: 1.0 }) testing_img = adv_images_dict[atk] else: continue print('adv gen time: {}s'.format(time.time() - start_time)) start_time = time.time() ### PixelDP Robustness ### predictions_form_argmax = np.zeros([ test_params.test_batch_size, train_params.num_classes ]) softmax_predictions = sess.run( y_softmax_test, feed_dict={ x_sb: testing_img, noise: (__noiseE + __noiseH) / 2, keep_prob: 1.0 }) argmax_predictions = np.argmax(softmax_predictions, axis=1) for n_draws in range(1, test_params.num_samples + 1): if n_draws % 100 == 0: print( 'current draws: {}, avg draw time: {}s'.format( n_draws, (time.time() - start_time) / n_draws)) _noiseE = np.random.normal( 0.0, sigmaEGM**2, train_params.enc_h_size * train_params.enc_h_size * train_params.enc_filters).astype(np.float32) _noiseE = np.reshape(_noiseE, [ -1, train_params.enc_h_size, train_params.enc_h_size, train_params.enc_filters ]) _noise = np.random.normal( 0.0, sigmaHGM**2, train_params.enc_h_size * train_params.enc_h_size * train_params.enc_filters).astype(np.float32) _noise = np.reshape(_noise, [ -1, train_params.enc_h_size, train_params.enc_h_size, train_params.enc_filters ]) * grad_redis for j in range(test_params.test_batch_size): pred = argmax_predictions[j] predictions_form_argmax[j, pred] += 1 softmax_predictions = sess.run( y_softmax_test, feed_dict={ x_sb: testing_img, noise: (__noiseE + __noiseH) / 2 + (_noiseE + _noise) / 4, keep_prob: 1.0 }) argmax_predictions = np.argmax(softmax_predictions, axis=1) final_predictions = predictions_form_argmax is_correct = [] is_robust = [] for j in range(test_params.test_batch_size): is_correct.append( np.argmax(test_batch[1][j]) == np.argmax( final_predictions[j])) robustness_from_argmax = robustnessGGaussian.robustness_size_argmax( counts=predictions_form_argmax[j], eta=0.05, dp_attack_size=fgsm_eps, dp_epsilon=train_params.dp_epsilon, dp_delta=0.05, dp_mechanism='gaussian') / dp_mult is_robust.append(robustness_from_argmax >= fgsm_eps) adv_acc_dict[atk] += np.sum( is_correct) * 1.0 / test_params.test_batch_size robust_adv_acc_dict[atk] += np.sum([ a and b for a, b in zip(is_robust, is_correct) ]) * 1.0 / np.sum(is_robust) robust_adv_utility_dict[atk] += np.sum( is_robust) * 1.0 / test_params.test_batch_size dt = time.time() - start_time print('atk test time: {}s'.format(dt), flush=True) ############################## # average all acc for whole test data log_str += datetime.now().strftime("%Y-%m-%d_%H:%M:%S\n") log_str += 'model trained epoch: {}\n'.format( test_params.epoch_to_test) log_str += 'fgsm_eps: {}\n'.format(fgsm_eps) log_str += 'iter_step_testing: {}\n'.format( test_params.iter_step_testing) log_str += 'num_samples: {}\n'.format(test_params.num_samples) for atk in attacks_and_benign: adv_acc_dict[atk] = adv_acc_dict[atk] / test_params.test_epochs robust_adv_acc_dict[ atk] = robust_adv_acc_dict[atk] / test_params.test_epochs robust_adv_utility_dict[atk] = robust_adv_utility_dict[ atk] / test_params.test_epochs # added robust prediction log_str += " {}: {:.6f} {:.6f} {:.6f} {:.6f}\n".format( atk, adv_acc_dict[atk], robust_adv_acc_dict[atk], robust_adv_utility_dict[atk], robust_adv_acc_dict[atk] * robust_adv_utility_dict[atk]) dt = time.time() - eps_start_time print('total test time: {}s'.format(dt), flush=True) print(log_str, flush=True) print('*******************') log_file.write(log_str) log_file.write('*******************\n') log_file.flush() dt = time.time() - start_time log_file.close()
def cifar10_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, clean_train=CLEAN_TRAIN, testing=False, backprop_through_attack=BACKPROP_THROUGH_ATTACK, nb_filters=NB_FILTERS, num_threads=None, label_smoothing=0.1): """ :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param label_smoothing: float, amount of label smoothing for cross entropy :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get CIFAR10 data # data = MNIST(train_start=train_start, train_end=train_end, # test_start=test_start, test_end=test_end) # Get Fashion MNIST test data data = keras.datasets.fashion_mnist (x_train, y_train), (x_test, y_test) = data.load_data() x_train = x_train.reshape(x_train.shape[0], 28, 28, 1) x_test = x_test.reshape(x_test.shape[0], 28, 28, 1) y_train = np_utils.to_categorical(y_train, 10) y_test = np_utils.to_categorical(y_test, 10) x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255 x_test /= 255 # dataset_size = data.x_train.shape[0] # dataset_train = data.to_tensorflow()[0] # dataset_train = dataset_train.map( # lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4) # dataset_train = dataset_train.batch(batch_size) # dataset_train = dataset_train.prefetch(16) # x_train, y_train = data.get_set('train') # x_test, y_test = data.get_set('test') # Use Image Parameters img_rows, img_cols, nchannels = x_test.shape[1:4] nb_classes = y_test.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } eval_params = {'batch_size': batch_size} fgsm_params = { 'eps': 0.3, 'clip_min': 0., 'clip_max': 1. } rng = np.random.RandomState([2017, 8, 30]) def do_eval(preds, x_set, y_set, report_key, is_adv=None): acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params) setattr(report, report_key, acc) if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'legitimate' if report_text: print('Test accuracy on %s examples: %0.4f' % (report_text, acc)) if clean_train: # model = ModelAllConvolutional('model1', nb_classes, nb_filters, input_shape=[32,32,3]) model = ModelBasicCNN('model1', nb_classes, nb_filters) preds = model.get_logits(x) loss = CrossEntropy(model, smoothing=label_smoothing) def evaluate(): do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False) train(sess, loss, x_train, y_train, evaluate=evaluate, args=train_params, rng=rng) # train(sess, loss, None, None, # dataset_train=dataset_train, dataset_size=dataset_size, # evaluate=evaluate, args=train_params, rng=rng, # var_list=model.get_params()) # Calculate training error if testing: do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval') # Initialize the Fast Gradient Sign Method (FGSM) attack object and # graph # fgsm = FastGradientMethod(model, sess=sess) # fgsm = BasicIterativeMethod(model, sess=sess) fgsm = MomentumIterativeMethod(model, sess=sess) #fgsm = SaliencyMapMethod(model, sess=sess) adv_x = fgsm.generate(x, **fgsm_params) preds_adv = model.get_logits(adv_x) # x_train = x_train[:100] # adversarial_images = adv_x.eval(session=sess, feed_dict={x: x_train}) # adversarial_labels = preds_adv.eval(session=sess, feed_dict = {x: x_train}) # for i in range(20): # ori_images = x_train[i] # adv_images = adversarial_images[i] # y_true_label = np.argmax(y_train[i]) # y_adv_label = np.argmax(adversarial_labels[i]) # Evaluate the accuracy of the MNIST model on adversarial examples do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True) # Calculate training error if testing: do_eval(preds_adv, x_train, y_train, 'train_clean_train_adv_eval') print('Repeating the process, using adversarial training') # Create a new model and train it to be robust to FastGradientMethod # model2 = ModelAllConvolutional('model2', nb_classes, nb_filters, input_shape=[32,32,3]) model2 = ModelBasicCNN('model2', nb_classes, nb_filters) # fgsm2 = FastGradientMethod(model2, sess=sess) # fgsm2 = BasicIterativeMethod(model2, sess=sess) fgsm2 = MomentumIterativeMethod(model2, sess=sess) def attack(x): return fgsm2.generate(x, **fgsm_params) loss2 = CrossEntropy(model2, smoothing=label_smoothing, attack=attack) preds2 = model2.get_logits(x) adv_x2 = attack(x) if not backprop_through_attack: # For the fgsm attack used in this tutorial, the attack has zero # gradient so enabling this flag does not change the gradient. # For some other attacks, enabling this flag increases the cost of # training, but gives the defender the ability to anticipate how # the atacker will change their strategy in response to updates to # the defender's parameters. adv_x2 = tf.stop_gradient(adv_x2) preds2_adv = model2.get_logits(adv_x2) def evaluate2(): # Accuracy of adversarially trained model on legitimate test inputs do_eval(preds2, x_test, y_test, 'adv_train_clean_eval', False) # Accuracy of the adversarially trained model on adversarial examples do_eval(preds2_adv, x_test, y_test, 'adv_train_adv_eval', True) # Perform and evaluate adversarial training train(sess, loss2, x_train, y_train, evaluate=evaluate2, args=train_params, rng=rng) # train(sess, loss2, None, None, # dataset_train=dataset_train, dataset_size=dataset_size, # evaluate=evaluate2, args=train_params, rng=rng, # var_list=model2.get_params()) # Calculate training errors if testing: do_eval(preds2, x_train, y_train, 'train_adv_train_clean_eval') do_eval(preds2_adv, x_train, y_train, 'train_adv_train_adv_eval') return report
def train(cifar10_data, epochs, L, learning_rate, scale3, Delta2, epsilon2, eps2_ratio, alpha, perturbFM, fgsm_eps, total_eps, logfile): logfile.write("fgsm_eps \t %g, LR \t %g, alpha \t %d , epsilon \t %d \n"%(fgsm_eps, learning_rate, alpha, total_eps)) """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) eps_benign = 1/(1+eps2_ratio)*(epsilon2) eps_adv = eps2_ratio/(1+eps2_ratio)*(epsilon2) # Parameters Declarification #with tf.variable_scope('conv1') as scope: kernel1 = _variable_with_weight_decay('kernel1', shape=[4, 4, 3, 128], stddev=np.sqrt(2.0/(5*5*256))/math.ceil(5 / 2), wd=0.0, collect=[AECODER_VARIABLES]) biases1 = _bias_on_cpu('biases1', [128], tf.constant_initializer(0.0), collect=[AECODER_VARIABLES]) shape = kernel1.get_shape().as_list() w_t = tf.reshape(kernel1, [-1, shape[-1]]) w = tf.transpose(w_t) sing_vals = tf.svd(w, compute_uv=False) sensitivity = tf.reduce_max(sing_vals) gamma = 2*Delta2/(L*sensitivity) #2*3*(14*14 + 2)*16/(L*sensitivity) #with tf.variable_scope('conv2') as scope: kernel2 = _variable_with_weight_decay('kernel2', shape=[5, 5, 128, 128], stddev=np.sqrt(2.0/(5*5*256))/math.ceil(5 / 2), wd=0.0, collect=[CONV_VARIABLES]) biases2 = _bias_on_cpu('biases2', [128], tf.constant_initializer(0.1), collect=[CONV_VARIABLES]) #with tf.variable_scope('conv3') as scope: kernel3 = _variable_with_weight_decay('kernel3', shape=[5, 5, 256, 256], stddev=np.sqrt(2.0/(5*5*256))/math.ceil(5 / 2), wd=0.0, collect=[CONV_VARIABLES]) biases3 = _bias_on_cpu('biases3', [256], tf.constant_initializer(0.1), collect=[CONV_VARIABLES]) #with tf.variable_scope('local4') as scope: kernel4 = _variable_with_weight_decay('kernel4', shape=[int(image_size/4)**2*256, hk], stddev=0.04, wd=0.004, collect=[CONV_VARIABLES]) biases4 = _bias_on_cpu('biases4', [hk], tf.constant_initializer(0.1), collect=[CONV_VARIABLES]) #with tf.variable_scope('local5') as scope: kernel5 = _variable_with_weight_decay('kernel5', [hk, 10], stddev=np.sqrt(2.0/(int(image_size/4)**2*256))/math.ceil(5 / 2), wd=0.0, collect=[CONV_VARIABLES]) biases5 = _bias_on_cpu('biases5', [10], tf.constant_initializer(0.1), collect=[CONV_VARIABLES]) #scale2 = tf.Variable(tf.ones([hk])) #beta2 = tf.Variable(tf.zeros([hk])) params = [kernel1, biases1, kernel2, biases2, kernel3, biases3, kernel4, biases4, kernel5, biases5] ######## # Build a Graph that computes the logits predictions from the # inference model. FM_h = tf.placeholder(tf.float32, [None, 14, 14, 128]); noise = tf.placeholder(tf.float32, [None, image_size, image_size, 3]); adv_noise = tf.placeholder(tf.float32, [None, image_size, image_size, 3]); x = tf.placeholder(tf.float32, [None,image_size,image_size,3]); adv_x = tf.placeholder(tf.float32, [None,image_size,image_size,3]); # Auto-Encoder # Enc_Layer2 = EncLayer(inpt=adv_x, n_filter_in = 3, n_filter_out = 128, filter_size = 3, W=kernel1, b=biases1, activation=tf.nn.relu) pretrain_adv = Enc_Layer2.get_train_ops2(xShape = tf.shape(adv_x)[0], Delta = Delta2, epsilon = epsilon2, batch_size = L, learning_rate= learning_rate, W = kernel1, b = biases1, perturbFMx = adv_noise, perturbFM_h = FM_h) Enc_Layer3 = EncLayer(inpt=x, n_filter_in = 3, n_filter_out = 128, filter_size = 3, W=kernel1, b=biases1, activation=tf.nn.relu) pretrain_benign = Enc_Layer3.get_train_ops2(xShape = tf.shape(x)[0], Delta = Delta2, epsilon = epsilon2, batch_size = L, learning_rate= learning_rate, W = kernel1, b = biases1, perturbFMx = noise, perturbFM_h = FM_h) cost = tf.reduce_sum((Enc_Layer2.cost + Enc_Layer3.cost)/2.0); ### x_image = x + noise; y_conv = inference(x_image, FM_h, params); softmax_y_conv = tf.nn.softmax(y_conv) y_ = tf.placeholder(tf.float32, [None, 10]); adv_x += adv_noise y_adv_conv = inference(adv_x, FM_h, params) adv_y_ = tf.placeholder(tf.float32, [None, 10]); # Calculate loss. Apply Taylor Expansion for the output layer perturbW = perturbFM*params[8] loss = cifar10.TaylorExp(y_conv, y_, y_adv_conv, adv_y_, L, alpha, perturbW) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. #pretrain_step = tf.train.AdamOptimizer(1e-4).minimize(pretrain_adv, global_step=global_step, var_list=[kernel1, biases1]); pretrain_var_list = tf.get_collection(AECODER_VARIABLES) train_var_list = tf.get_collection(CONV_VARIABLES) #print(pretrain_var_list) #print(train_var_list) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): pretrain_step = tf.train.AdamOptimizer(learning_rate).minimize(pretrain_adv+pretrain_benign, global_step=global_step, var_list=pretrain_var_list); train_op = cifar10.train(loss, global_step, learning_rate, _var_list= train_var_list) sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) sess.run(kernel1.initializer) dp_epsilon=1.0 _gamma = sess.run(gamma) _gamma_x = Delta2/L epsilon2_update = epsilon2/(1.0 + 1.0/_gamma + 1/_gamma_x) print(epsilon2_update/_gamma + epsilon2_update/_gamma_x) print(epsilon2_update) delta_r = fgsm_eps*(image_size**2); _sensitivityW = sess.run(sensitivity) delta_h = _sensitivityW*(14**2) #delta_h = 1.0 * delta_r; #sensitivity*(14**2) = sensitivity*(\beta**2) can also be used #dp_mult = (Delta2/(L*epsilon2))/(delta_r / dp_epsilon) + (2*Delta2/(L*epsilon2))/(delta_h / dp_epsilon) #dp_mult = (Delta2/(L*epsilon2_update))/(delta_r / dp_epsilon) + (2*Delta2/(L*epsilon2_update))/(delta_h / dp_epsilon) dp_mult = (Delta2*dp_epsilon) / (L*epsilon2_update * (delta_h / 2 + delta_r)) dynamic_eps = tf.placeholder(tf.float32); """y_test = inference(x, FM_h, params) softmax_y = tf.nn.softmax(y_test); c_x_adv = fgsm(x, softmax_y, eps=dynamic_eps/3, clip_min=-1.0, clip_max=1.0) x_adv = tf.reshape(c_x_adv, [L, image_size, image_size, 3])""" attack_switch = {'fgsm':True, 'ifgsm':True, 'deepfool':False, 'mim':True, 'spsa':False, 'cwl2':False, 'madry':True, 'stm':False} ch_model_probs = CustomCallableModelWrapper(callable_fn=inference_test_input_probs, output_layer='probs', params=params, image_size=image_size, adv_noise = adv_noise) # define each attack method's tensor mu_alpha = tf.placeholder(tf.float32, [1]); attack_tensor_dict = {} # FastGradientMethod if attack_switch['fgsm']: print('creating attack tensor of FastGradientMethod') fgsm_obj = FastGradientMethod(model=ch_model_probs, sess=sess) #x_adv_test_fgsm = fgsm_obj.generate(x=x, eps=fgsm_eps, clip_min=-1.0, clip_max=1.0, ord=2) # testing now x_adv_test_fgsm = fgsm_obj.generate(x=x, eps=mu_alpha, clip_min=-1.0, clip_max=1.0) # testing now attack_tensor_dict['fgsm'] = x_adv_test_fgsm # Iterative FGSM (BasicIterativeMethod/ProjectedGradientMethod with no random init) # default: eps_iter=0.05, nb_iter=10 if attack_switch['ifgsm']: print('creating attack tensor of BasicIterativeMethod') ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess) #x_adv_test_ifgsm = ifgsm_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, clip_min=-1.0, clip_max=1.0, ord=2) x_adv_test_ifgsm = ifgsm_obj.generate(x=x, eps=mu_alpha, eps_iter=fgsm_eps/3, nb_iter=3, clip_min=-1.0, clip_max=1.0) attack_tensor_dict['ifgsm'] = x_adv_test_ifgsm # MomentumIterativeMethod # default: eps_iter=0.06, nb_iter=10 if attack_switch['mim']: print('creating attack tensor of MomentumIterativeMethod') mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess) #x_adv_test_mim = mim_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, decay_factor=1.0, clip_min=-1.0, clip_max=1.0, ord=2) x_adv_test_mim = mim_obj.generate(x=x, eps=mu_alpha, eps_iter=fgsm_eps/3, nb_iter=3, decay_factor=1.0, clip_min=-1.0, clip_max=1.0) attack_tensor_dict['mim'] = x_adv_test_mim # MadryEtAl (Projected Grdient with random init, same as rand+fgsm) # default: eps_iter=0.01, nb_iter=40 if attack_switch['madry']: print('creating attack tensor of MadryEtAl') madry_obj = MadryEtAl(model=ch_model_probs, sess=sess) #x_adv_test_madry = madry_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, clip_min=-1.0, clip_max=1.0, ord=2) x_adv_test_madry = madry_obj.generate(x=x, eps=mu_alpha, eps_iter=fgsm_eps/3, nb_iter=3, clip_min=-1.0, clip_max=1.0) attack_tensor_dict['madry'] = x_adv_test_madry #====================== attack ========================= #adv_logits, _ = inference(c_x_adv + W_conv1Noise, perturbFM, params) # Create a saver. saver = tf.train.Saver(tf.all_variables()) # Build an initialization operation to run below. init = tf.initialize_all_variables() sess.run(init) # Start the queue runners. #tf.train.start_queue_runners(sess=sess) summary_writer = tf.summary.FileWriter(os.getcwd() + dirCheckpoint, sess.graph) # load the most recent models _global_step = 0 ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: print(ckpt.model_checkpoint_path); saver.restore(sess, ckpt.model_checkpoint_path) _global_step = int(ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]) else: print('No checkpoint file found') T = int(int(math.ceil(D/L))*epochs + 1) # number of steps step_for_epoch = int(math.ceil(D/L)); #number of steps for one epoch perturbH_test = np.random.laplace(0.0, 0, 14*14*128) perturbH_test = np.reshape(perturbH_test, [-1, 14, 14, 128]); #W_conv1Noise = np.random.laplace(0.0, Delta2/(L*epsilon2), 32 * 32 * 3).astype(np.float32) #W_conv1Noise = np.reshape(_W_conv1Noise, [32, 32, 3]) perturbFM_h = np.random.laplace(0.0, 2*Delta2/(epsilon2_update*L), 14*14*128) perturbFM_h = np.reshape(perturbFM_h, [-1, 14, 14, 128]); #_W_adv = np.random.laplace(0.0, 0, 32 * 32 * 3).astype(np.float32) #_W_adv = np.reshape(_W_adv, [32, 32, 3]) #_perturbFM_h_adv = np.random.laplace(0.0, 0, 10*10*128) #_perturbFM_h_adv = np.reshape(_perturbFM_h_adv, [10, 10, 128]); test_size = len(cifar10_data.test.images) #beta = redistributeNoise(os.getcwd() + '/LRP_0_25_v12.txt') #BenignLNoise = generateIdLMNoise(image_size, Delta2, eps_benign, L) #generateNoise(image_size, Delta2, eps_benign, L, beta); #AdvLnoise = generateIdLMNoise(image_size, Delta2, eps_adv, L) Noise = generateIdLMNoise(image_size, Delta2, epsilon2_update, L) #generateNoise(image_size, Delta2, eps_adv, L, beta); Noise_test = generateIdLMNoise(image_size, 0, epsilon2_update, L) #generateNoise(image_size, 0, 2*epsilon2, test_size, beta); emsemble_L = int(L/3) preT_epochs = 100 pre_T = int(int(math.ceil(D/L))*preT_epochs + 1); """logfile.write("pretrain: \n") for step in range(_global_step, _global_step + pre_T): d_eps = random.random()*0.5; batch = cifar10_data.train.next_batch(L); #Get a random batch. adv_images = sess.run(x_adv, feed_dict = {x: batch[0], dynamic_eps: d_eps, FM_h: perturbH_test}) for iter in range(0, 2): adv_images = sess.run(x_adv, feed_dict = {x: adv_images, dynamic_eps: d_eps, FM_h: perturbH_test}) #sess.run(pretrain_step, feed_dict = {x: batch[0], noise: AdvLnoise, FM_h: perturbFM_h}); batch = cifar10_data.train.next_batch(L); sess.run(pretrain_step, feed_dict = {x: np.append(batch[0], adv_images, axis = 0), noise: Noise, FM_h: perturbFM_h}); if step % int(25*step_for_epoch) == 0: cost_value = sess.run(cost, feed_dict={x: cifar10_data.test.images, noise: Noise_test, FM_h: perturbH_test})/(test_size*128) logfile.write("step \t %d \t %g \n"%(step, cost_value)) print(cost_value) print('pre_train finished')""" _global_step = 0 for step in xrange(_global_step, _global_step + T): start_time = time.time() d_eps = random.random()*0.5; batch = cifar10_data.train.next_batch(emsemble_L); #Get a random batch. y_adv_batch = batch[1] """adv_images = sess.run(x_adv, feed_dict = {x: batch[0], dynamic_eps: d_eps, FM_h: perturbH_test}) for iter in range(0, 2): adv_images = sess.run(x_adv, feed_dict = {x: adv_images, dynamic_eps: d_eps, FM_h: perturbH_test})""" adv_images_ifgsm = sess.run(attack_tensor_dict['ifgsm'], feed_dict ={x:batch[0], adv_noise: Noise, mu_alpha:[d_eps]}) batch = cifar10_data.train.next_batch(emsemble_L); y_adv_batch = np.append(y_adv_batch, batch[1], axis = 0) adv_images_mim = sess.run(attack_tensor_dict['mim'], feed_dict ={x:batch[0], adv_noise: Noise, mu_alpha:[d_eps]}) batch = cifar10_data.train.next_batch(emsemble_L); y_adv_batch = np.append(y_adv_batch, batch[1], axis = 0) adv_images_madry = sess.run(attack_tensor_dict['madry'], feed_dict ={x:batch[0], adv_noise: Noise, mu_alpha:[d_eps]}) adv_images = np.append(np.append(adv_images_ifgsm, adv_images_mim, axis = 0),adv_images_madry, axis = 0) batch = cifar10_data.train.next_batch(L); #Get a random batch. sess.run(pretrain_step, feed_dict = {x: batch[0], adv_x: adv_images, adv_noise: Noise_test, noise: Noise, FM_h: perturbFM_h}); _, loss_value = sess.run([train_op, loss], feed_dict = {x: batch[0], y_: batch[1], adv_x: adv_images, adv_y_: y_adv_batch, noise: Noise, adv_noise: Noise_test, FM_h: perturbFM_h}) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' # report the result periodically if step % (50*step_for_epoch) == 0 and step >= (300*step_for_epoch): '''predictions_form_argmax = np.zeros([test_size, 10]) softmax_predictions = sess.run(softmax_y_conv, feed_dict={x: cifar10_data.test.images, noise: Noise_test, FM_h: perturbH_test}) argmax_predictions = np.argmax(softmax_predictions, axis=1) """for n_draws in range(0, 2000): _BenignLNoise = generateIdLMNoise(image_size, Delta2, epsilon2, L) _perturbFM_h = np.random.laplace(0.0, 2*Delta2/(epsilon2*L), 14*14*128) _perturbFM_h = np.reshape(_perturbFM_h, [-1, 14, 14, 128]);""" for j in range(test_size): pred = argmax_predictions[j] predictions_form_argmax[j, pred] += 2000; """softmax_predictions = sess.run(softmax_y_conv, feed_dict={x: cifar10_data.test.images, noise: _BenignLNoise, FM_h: _perturbFM_h}) argmax_predictions = np.argmax(softmax_predictions, axis=1)""" final_predictions = predictions_form_argmax; is_correct = [] is_robust = [] for j in range(test_size): is_correct.append(np.argmax(cifar10_data.test.labels[j]) == np.argmax(final_predictions[j])) robustness_from_argmax = robustness.robustness_size_argmax(counts=predictions_form_argmax[j],eta=0.05,dp_attack_size=fgsm_eps, dp_epsilon=1.0, dp_delta=0.05, dp_mechanism='laplace') / dp_mult is_robust.append(robustness_from_argmax >= fgsm_eps) acc = np.sum(is_correct)*1.0/test_size robust_acc = np.sum([a and b for a,b in zip(is_robust, is_correct)])*1.0/np.sum(is_robust) robust_utility = np.sum(is_robust)*1.0/test_size log_str = "step: {:.1f}\t epsilon: {:.1f}\t benign: {:.4f} \t {:.4f} \t {:.4f} \t {:.4f} \t".format(step, total_eps, acc, robust_acc, robust_utility, robust_acc*robust_utility)''' #===================adv samples===================== log_str = "step: {:.1f}\t epsilon: {:.1f}\t".format(step, total_eps) """adv_images_dict = {} for atk in attack_switch.keys(): if attack_switch[atk]: adv_images_dict[atk] = sess.run(attack_tensor_dict[atk], feed_dict ={x:cifar10_data.test.images}) print("Done with the generating of Adversarial samples")""" #===================adv samples===================== adv_acc_dict = {} robust_adv_acc_dict = {} robust_adv_utility_dict = {} test_bach_size = 5000 for atk in attack_switch.keys(): print(atk) if atk not in adv_acc_dict: adv_acc_dict[atk] = -1 robust_adv_acc_dict[atk] = -1 robust_adv_utility_dict[atk] = -1 if attack_switch[atk]: test_bach = cifar10_data.test.next_batch(test_bach_size) adv_images_dict = sess.run(attack_tensor_dict[atk], feed_dict ={x:test_bach[0], adv_noise: Noise_test, mu_alpha:[fgsm_eps]}) print("Done adversarial examples") ### PixelDP Robustness ### predictions_form_argmax = np.zeros([test_bach_size, 10]) softmax_predictions = sess.run(softmax_y_conv, feed_dict={x: adv_images_dict, noise: Noise, FM_h: perturbFM_h}) argmax_predictions = np.argmax(softmax_predictions, axis=1) for n_draws in range(0, 1000): _BenignLNoise = generateIdLMNoise(image_size, Delta2, epsilon2_update, L); _perturbFM_h = np.random.laplace(0.0, 2*Delta2/(epsilon2_update*L), 14*14*128) _perturbFM_h = np.reshape(_perturbFM_h, [-1, 14, 14, 128]); if n_draws == 500: print("n_draws = 500") for j in range(test_bach_size): pred = argmax_predictions[j] predictions_form_argmax[j, pred] += 1; softmax_predictions = sess.run(softmax_y_conv, feed_dict={x: adv_images_dict, noise: (_BenignLNoise/10 + Noise), FM_h: perturbFM_h}) * sess.run(softmax_y_conv, feed_dict={x: adv_images_dict, noise: Noise, FM_h: (_perturbFM_h/10 + perturbFM_h)}) #softmax_predictions = sess.run(softmax_y_conv, feed_dict={x: adv_images_dict, noise: (_BenignLNoise), FM_h: perturbFM_h}) * sess.run(softmax_y_conv, feed_dict={x: adv_images_dict, noise: Noise, FM_h: (_perturbFM_h)}) argmax_predictions = np.argmax(softmax_predictions, axis=1) final_predictions = predictions_form_argmax; is_correct = [] is_robust = [] for j in range(test_bach_size): is_correct.append(np.argmax(test_bach[1][j]) == np.argmax(final_predictions[j])) robustness_from_argmax = robustness.robustness_size_argmax(counts=predictions_form_argmax[j],eta=0.05,dp_attack_size=fgsm_eps, dp_epsilon=dp_epsilon, dp_delta=0.05, dp_mechanism='laplace') / dp_mult is_robust.append(robustness_from_argmax >= fgsm_eps) adv_acc_dict[atk] = np.sum(is_correct)*1.0/test_bach_size robust_adv_acc_dict[atk] = np.sum([a and b for a,b in zip(is_robust, is_correct)])*1.0/np.sum(is_robust) robust_adv_utility_dict[atk] = np.sum(is_robust)*1.0/test_bach_size ############################## for atk in attack_switch.keys(): if attack_switch[atk]: # added robust prediction log_str += " {}: {:.4f} {:.4f} {:.4f} {:.4f}".format(atk, adv_acc_dict[atk], robust_adv_acc_dict[atk], robust_adv_utility_dict[atk], robust_adv_acc_dict[atk] * robust_adv_utility_dict[atk]) print(log_str) logfile.write(log_str + '\n') # Save the model checkpoint periodically. if step % (10*step_for_epoch) == 0 and (step > _global_step): num_examples_per_step = L examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print (format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) """if step % (50*step_for_epoch) == 0 and (step >= 900*step_for_epoch):
def train(cifar10_data, logfile): """Train CIFAR-10 for a number of steps.""" logfile.write("fgsm_eps \t %g, epsilon \t %d \n" % (fgsm_eps, target_eps[0])) with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) # Parameters Declarification #with tf.variable_scope('conv1') as scope: kernel1 = _variable_with_weight_decay( 'kernel1', shape=[3, 3, 3, 128], stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2), wd=0.0) biases1 = cifar10._variable_on_cpu('biases1', [128], tf.constant_initializer(0.0)) #with tf.variable_scope('conv2') as scope: kernel2 = _variable_with_weight_decay( 'kernel2', shape=[5, 5, 128, 128], stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2), wd=0.0) biases2 = cifar10._variable_on_cpu('biases2', [128], tf.constant_initializer(0.1)) #with tf.variable_scope('conv3') as scope: kernel3 = _variable_with_weight_decay( 'kernel3', shape=[5, 5, 256, 256], stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2), wd=0.0) biases3 = cifar10._variable_on_cpu('biases3', [256], tf.constant_initializer(0.1)) #with tf.variable_scope('local4') as scope: kernel4 = cifar10._variable_with_weight_decay( 'kernel4', shape=[int(image_size / 4)**2 * 256, hk], stddev=0.04, wd=0.004) biases4 = cifar10._variable_on_cpu('biases4', [hk], tf.constant_initializer(0.1)) #with tf.variable_scope('local5') as scope: kernel5 = cifar10._variable_with_weight_decay( 'kernel5', [hk, 10], stddev=np.sqrt(2.0 / (int(image_size / 4)**2 * 256)) / math.ceil(5 / 2), wd=0.0) biases5 = cifar10._variable_on_cpu('biases5', [10], tf.constant_initializer(0.1)) scale2 = tf.Variable(tf.ones([hk])) beta2 = tf.Variable(tf.zeros([hk])) params = [ kernel1, biases1, kernel2, biases2, kernel3, biases3, kernel4, biases4, kernel5, biases5, scale2, beta2 ] ######## # Build a Graph that computes the logits predictions from the # inference model. shape = kernel1.get_shape().as_list() w_t = tf.reshape(kernel1, [-1, shape[-1]]) w = tf.transpose(w_t) sing_vals = tf.svd(w, compute_uv=False) sensitivityW = tf.reduce_max(sing_vals) dp_delta = 0.05 #dp_mult = attack_norm_bound * math.sqrt(2 * math.log(1.25 / dp_delta)) / dp_epsilon noise = tf.placeholder(tf.float32, [None, 28, 28, 32]) dp_mult = attack_norm_bound * math.sqrt( 2 * math.log(1.25 / dp_delta)) / dp_epsilon noise = tf.placeholder(tf.float32, [None, 14, 14, 128]) x = tf.placeholder(tf.float32, [None, image_size, image_size, 3]) #y_conv, h_conv1 = inference(x, params, dp_mult**2 * noise); y_conv, h_conv1 = inference(x, params, attack_norm_bound * noise) softmax_y_conv = tf.nn.softmax(y_conv) y_ = tf.placeholder(tf.float32, [None, 10]) #logits = inference(images) # Calculate loss. Apply Taylor Expansion for the output layer loss = cifar10.lossDPSGD(y_conv, y_) # noise redistribution # grad, = tf.gradients(loss, h_conv1) normalized_grad = tf.sign(grad) normalized_grad = tf.stop_gradient(normalized_grad) normalized_grad_r = tf.abs(tf.reduce_mean(normalized_grad, axis=(0)))**2 sum_r = tf.reduce_sum(normalized_grad_r, axis=(0, 1, 2), keepdims=False) normalized_grad_r = 14 * 14 * 128 * normalized_grad_r / sum_r print(normalized_grad_r) shape_grad = normalized_grad_r.get_shape().as_list() grad_t = tf.reshape(normalized_grad_r, [-1, shape_grad[-1]]) g = tf.transpose(grad_t) sing_g_vals = tf.svd(g, compute_uv=False) sensitivity_2 = tf.reduce_max(sing_g_vals) ######################## opt = tf.train.GradientDescentOptimizer(lr) gw_K1 = tf.gradients(loss, kernel1)[0] gb1 = tf.gradients(loss, biases1)[0] gw_K2 = tf.gradients(loss, kernel2)[0] gb2 = tf.gradients(loss, biases2)[0] gw_K3 = tf.gradients(loss, kernel3)[0] gb3 = tf.gradients(loss, biases3)[0] gw_K4 = tf.gradients(loss, kernel4)[0] gb4 = tf.gradients(loss, biases4)[0] gw_K5 = tf.gradients(loss, kernel5)[0] gb5 = tf.gradients(loss, biases5)[0] #clip gradient gw_K1 = tf.clip_by_norm(gw_K1, clip_bound) gw_K2 = tf.clip_by_norm(gw_K2, clip_bound) gw_K3 = tf.clip_by_norm(gw_K3, clip_bound) gw_K4 = tf.clip_by_norm(gw_K4, clip_bound) gw_K5 = tf.clip_by_norm(gw_K5, clip_bound) #perturb gw_K1 += tf.random_normal(shape=tf.shape(gw_K1), mean=0.0, stddev=(sigma * sensitivity)**2, dtype=tf.float32) gw_K2 += tf.random_normal(shape=tf.shape(gw_K2), mean=0.0, stddev=(sigma * sensitivity)**2, dtype=tf.float32) gw_K3 += tf.random_normal(shape=tf.shape(gw_K3), mean=0.0, stddev=(sigma * sensitivity)**2, dtype=tf.float32) gw_K4 += tf.random_normal(shape=tf.shape(gw_K4), mean=0.0, stddev=(sigma * sensitivity)**2, dtype=tf.float32) gw_K5 += tf.random_normal(shape=tf.shape(gw_K5), mean=0.0, stddev=(sigma * sensitivity)**2, dtype=tf.float32) gb1 += tf.random_normal(shape=tf.shape(gb1), mean=0.0, stddev=(sigma * sensitivity)**2, dtype=tf.float32) gb2 += tf.random_normal(shape=tf.shape(gb2), mean=0.0, stddev=(sigma * sensitivity)**2, dtype=tf.float32) gb3 += tf.random_normal(shape=tf.shape(gb3), mean=0.0, stddev=(sigma * sensitivity)**2, dtype=tf.float32) gb4 += tf.random_normal(shape=tf.shape(gb4), mean=0.0, stddev=(sigma * sensitivity)**2, dtype=tf.float32) gb5 += tf.random_normal(shape=tf.shape(gb5), mean=0.0, stddev=(sigma * sensitivity)**2, dtype=tf.float32) # apply gradients and keep tracking moving average of the parameters apply_gradient_op = opt.apply_gradients([(gw_K1, kernel1), (gb1, biases1), (gw_K2, kernel2), (gb2, biases2), (gw_K3, kernel3), (gb3, biases3), (gw_K4, kernel4), (gb4, biases4), (gw_K5, kernel5), (gb5, biases5)], global_step=global_step) variable_averages = tf.train.ExponentialMovingAverage( MOVING_AVERAGE_DECAY, global_step) variables_averages_op = variable_averages.apply( tf.trainable_variables()) with tf.control_dependencies( [apply_gradient_op, variables_averages_op]): train_op = tf.no_op(name='train') # Build a Graph that trains the model with one batch of examples and # updates the model parameters. #train_op = cifar10.trainDPSGD(loss, global_step, clip_bound, sigma, sensitivity) sess = tf.Session(config=tf.ConfigProto(log_device_placement=False)) attack_switch = { 'fgsm': True, 'ifgsm': True, 'deepfool': False, 'mim': True, 'spsa': False, 'cwl2': False, 'madry': True, 'stm': False } ch_model_probs = CustomCallableModelWrapper( callable_fn=inference_test_input_probs, output_layer='probs', params=params, image_size=image_size) # define each attack method's tensor attack_tensor_dict = {} # FastGradientMethod if attack_switch['fgsm']: print('creating attack tensor of FastGradientMethod') fgsm_obj = FastGradientMethod(model=ch_model_probs, sess=sess) #x_adv_test_fgsm = fgsm_obj.generate(x=x, eps=fgsm_eps, clip_min=-1.0, clip_max=1.0, ord=2) # testing now x_adv_test_fgsm = fgsm_obj.generate(x=x, eps=fgsm_eps, clip_min=-1.0, clip_max=1.0) # testing now attack_tensor_dict['fgsm'] = x_adv_test_fgsm # Iterative FGSM (BasicIterativeMethod/ProjectedGradientMethod with no random init) # default: eps_iter=0.05, nb_iter=10 if attack_switch['ifgsm']: print('creating attack tensor of BasicIterativeMethod') ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess) #x_adv_test_ifgsm = ifgsm_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, clip_min=-1.0, clip_max=1.0, ord=2) x_adv_test_ifgsm = ifgsm_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps / 3, nb_iter=3, clip_min=-1.0, clip_max=1.0) attack_tensor_dict['ifgsm'] = x_adv_test_ifgsm # MomentumIterativeMethod # default: eps_iter=0.06, nb_iter=10 if attack_switch['mim']: print('creating attack tensor of MomentumIterativeMethod') mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess) #x_adv_test_mim = mim_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, decay_factor=1.0, clip_min=-1.0, clip_max=1.0, ord=2) x_adv_test_mim = mim_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps / 3, nb_iter=3, decay_factor=1.0, clip_min=-1.0, clip_max=1.0) attack_tensor_dict['mim'] = x_adv_test_mim # MadryEtAl (Projected Grdient with random init, same as rand+fgsm) # default: eps_iter=0.01, nb_iter=40 if attack_switch['madry']: print('creating attack tensor of MadryEtAl') madry_obj = MadryEtAl(model=ch_model_probs, sess=sess) #x_adv_test_madry = madry_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, clip_min=-1.0, clip_max=1.0, ord=2) x_adv_test_madry = madry_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps / 3, nb_iter=3, clip_min=-1.0, clip_max=1.0) attack_tensor_dict['madry'] = x_adv_test_madry #====================== attack ========================= correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # Create a saver. saver = tf.train.Saver(tf.all_variables()) # Privacy accountant priv_accountant = accountant.GaussianMomentsAccountant(D) privacy_accum_op = priv_accountant.accumulate_privacy_spending( [None, None], sigma, batch_size) # Build the summary operation based on the TF collection of Summaries. #summary_op = tf.summary.merge_all() # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. sess.run(init) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.summary.FileWriter(os.getcwd() + path, sess.graph) # load the most recent models _global_step = 0 ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: print(ckpt.model_checkpoint_path) saver.restore(sess, ckpt.model_checkpoint_path) _global_step = int( ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]) else: print('No checkpoint file found') T = int(int(math.ceil(D / batch_size)) * epochs + 1) # number of steps step_for_epoch = int(math.ceil(D / batch_size)) #number of steps for one epoch s = math.log(sqrt(2.0 / math.pi) * 1e+5) sigmaEGM = sqrt(2.0) * 1.0 * (sqrt(s) + sqrt(s + dp_epsilon)) / ( 2.0 * dp_epsilon) #print(sigmaEGM) __noiseE = np.random.normal(0.0, sigmaEGM**2, 14 * 14 * 128).astype(np.float32) __noiseE = np.reshape(__noiseE, [-1, 14, 14, 128]) print("Compute The Noise Redistribution Vector") for step in xrange(_global_step, 100 * step_for_epoch): batch = cifar10_data.train.next_batch(batch_size) #Get a random batch. _, loss_value = sess.run([train_op, loss], feed_dict={ x: batch[0], y_: batch[1], noise: __noiseE * 0 }) if step % (5 * step_for_epoch) == 0: print(loss_value) batch = cifar10_data.train.next_batch(40 * batch_size) grad_redis = sess.run([normalized_grad_r], feed_dict={ x: batch[0], y_: batch[1], noise: __noiseE * 0 }) _sensitivity_2 = sess.run([sensitivity_2], feed_dict={ x: batch[0], y_: batch[1], noise: __noiseE * 0 }) #print(_sensitivity_2) _sensitivityW = sess.run(sensitivityW) #print(_sensitivityW) Delta_redis = _sensitivityW / sqrt(_sensitivity_2[0]) #print(Delta_redis) sigmaHGM = sqrt(2.0) * Delta_redis * ( sqrt(s) + sqrt(s + dp_epsilon)) / (2.0 * dp_epsilon) #print(sigmaHGM) __noiseH = np.random.normal(0.0, sigmaHGM**2, 14 * 14 * 128).astype(np.float32) __noiseH = np.reshape(__noiseH, [-1, 14, 14, 128]) * grad_redis sess.run(init) print("Training") for step in xrange(_global_step, _global_step + T): start_time = time.time() batch = cifar10_data.train.next_batch(batch_size) #Get a random batch. #grad_redis = sess.run([normalized_grad_r], feed_dict = {x: batch[0], y_: batch[1], noise: (__noise + grad_redis)/2}) _, loss_value = sess.run([train_op, loss], feed_dict={ x: batch[0], y_: batch[1], noise: (__noiseE + __noiseH) / 2 }) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' sess.run([privacy_accum_op]) spent_eps_deltas = priv_accountant.get_privacy_spent( sess, target_eps=target_eps) if step % (5 * step_for_epoch) == 0: print(spent_eps_deltas) _break = False for _eps, _delta in spent_eps_deltas: if _delta >= delta: _break = True break if _break == True: break ## Robustness print("Testing") adv_acc_dict = {} robust_adv_acc_dict = {} robust_adv_utility_dict = {} test_bach_size = 5000 for atk in attack_switch.keys(): if atk not in adv_acc_dict: adv_acc_dict[atk] = -1 robust_adv_acc_dict[atk] = -1 robust_adv_utility_dict[atk] = -1 if attack_switch[atk]: test_bach = cifar10_data.test.next_batch(test_bach_size) adv_images_dict = sess.run(attack_tensor_dict[atk], feed_dict={x: test_bach[0]}) ### PixelDP Robustness ### predictions_form_argmax = np.zeros([test_bach_size, 10]) softmax_predictions = sess.run(softmax_y_conv, feed_dict={ x: adv_images_dict, noise: (__noiseE + __noiseH) / 2 }) argmax_predictions = np.argmax(softmax_predictions, axis=1) for n_draws in range(0, 1000): _noiseE = np.random.normal(0.0, sigmaEGM**2, 14 * 14 * 128).astype(np.float32) _noiseE = np.reshape(_noiseE, [-1, 14, 14, 128]) _noise = np.random.normal(0.0, sigmaHGM**2, 14 * 14 * 128).astype(np.float32) _noise = np.reshape(_noise, [-1, 14, 14, 128]) * grad_redis for j in range(test_bach_size): pred = argmax_predictions[j] predictions_form_argmax[j, pred] += 1 softmax_predictions = sess.run( softmax_y_conv, feed_dict={ x: adv_images_dict, noise: (__noiseE + __noiseH) / 2 + (_noiseE + _noise) / 4 }) argmax_predictions = np.argmax(softmax_predictions, axis=1) final_predictions = predictions_form_argmax is_correct = [] is_robust = [] for j in range(test_bach_size): is_correct.append( np.argmax(test_bach[1][j]) == np.argmax( final_predictions[j])) robustness_from_argmax = robustnessGGaussian.robustness_size_argmax( counts=predictions_form_argmax[j], eta=0.05, dp_attack_size=fgsm_eps, dp_epsilon=dp_epsilon, dp_delta=0.05, dp_mechanism='gaussian') / dp_mult is_robust.append(robustness_from_argmax >= fgsm_eps) adv_acc_dict[atk] = np.sum(is_correct) * 1.0 / test_bach_size robust_adv_acc_dict[atk] = np.sum([ a and b for a, b in zip(is_robust, is_correct) ]) * 1.0 / np.sum(is_robust) robust_adv_utility_dict[atk] = np.sum( is_robust) * 1.0 / test_bach_size ############################## log_str = "" for atk in attack_switch.keys(): if attack_switch[atk]: # added robust prediction log_str += " {}: {:.4f} {:.4f} {:.4f} {:.4f}".format( atk, adv_acc_dict[atk], robust_adv_acc_dict[atk], robust_adv_utility_dict[atk], robust_adv_acc_dict[atk] * robust_adv_utility_dict[atk]) print(log_str) logfile.write(log_str + '\n')
'clip_max': 1., 'clip_min': 0 } elif attack_method == 'JSMA': op = SaliencyMapMethod(cleverhans_model, sess=sess) params = {'gamma': eps} elif attack_method == 'EAD': op = ElasticNetMethod(cleverhans_model, sess=sess) params = {'confidence': eps, 'abort_early': True, 'max_iterations': 100} elif attack_method == 'CW': op = CarliniWagnerL2(cleverhans_model, sess=sess) params = {'confidence': eps} x_test = x_test[eps_iter:eps_iter + decay_factor] y_test = y_test[eps_iter:eps_iter + decay_factor] # generate adversarial examples adv_x_op = op.generate(x_op, **params) y_test = to_categorical(y_test) # Run an evaluation of our model against fgsm total = 0 correct = 0 advs = [] labs = [] idxs = [] #for xs, ys in test_loader: for i in range(0, len(x_test), 100): print(i) xs, ys = x_test[i:min(i + 100, len(x_test) )], y_test[i:min(i + 100, len(x_test))] params['y_target'] = ys