def __init__(self, sess, args): # Hyperparameter setting self.max_queries = args.max_queries self.epsilon = args.epsilon self.batch_size = args.batch_size self.sigma = args.sigma self.max_lr = args.max_lr self.min_lr = args.min_lr self.plateau_length = args.plateau_length self.plateau_drop = args.plateau_drop self.momentum = args.momentum self.targeted = 1 if args.targeted else -1 # Network setting self.x_input = tf.placeholder(dtype=tf.float32, shape=[1, 299, 299, 3]) self.y_input = tf.placeholder(dtype=tf.int32, shape=[1]) self.logits, self.preds = model(sess, self.x_input) noise_pos = tf.random_normal([self.batch_size // 2, 299, 299, 3], seed=0) noise = tf.concat([noise_pos, -noise_pos], axis=0) image_batch = self.x_input + self.sigma * noise label_batch = tf.tile(self.y_input, [self.batch_size]) logits, _ = model(sess, image_batch) probs = tf.nn.softmax(logits) batch_num = tf.range(0, limit=tf.shape(probs)[0]) indices = tf.stack([batch_num, label_batch], axis=1) ground_truth_probs = tf.gather_nd(params=probs, indices=indices) top_2 = tf.nn.top_k(probs, k=2) max_indices = tf.where(tf.equal(top_2.indices[:, 0], label_batch), top_2.indices[:, 1], top_2.indices[:, 0]) max_indices = tf.stack([batch_num, max_indices], axis=1) max_probs = tf.gather_nd(params=probs, indices=max_indices) if args.targeted: if args.loss_func == 'xent': losses = -tf.log(ground_truth_probs) else: tf.logging.info('Loss function must be xent') sys.exit() else: if args.loss_func == 'xent': losses = -tf.log(ground_truth_probs) elif args.loss_func == 'cw': losses = tf.log(max_probs + 1e-10) - tf.log(ground_truth_probs + 1e-10) else: tf.logging.info('Loss function must be xent or cw') sys.exit() losses_tiled = tf.tile(tf.reshape(losses, [-1, 1, 1, 1]), [1, 299, 299, 3]) self.grad_estimate = tf.reduce_mean(losses_tiled * noise, axis=0) / self.sigma self.loss = tf.reduce_mean(losses, axis=0)
def __init__(self, args, sess): self.max_queries = args.max_queries self.epsilon = args.epsilon self.gradient_iters = args.gradient_iters self.batch_size = args.batch_size self.prior_size = args.tile_size self.exploration = args.exploration self.fd_eta = args.fd_eta self.online_lr = args.online_lr self.image_lr = args.image_lr self.x_input = tf.placeholder(dtype=tf.float32, shape=[self.batch_size, IMAGENET_SL, IMAGENET_SL, 3]) self.y_input = tf.placeholder(dtype=tf.int32, shape=[self.batch_size]) logits, self.preds = model(sess, self.x_input) self.losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=self.y_input) self.prior = tf.placeholder(dtype=tf.float32, shape=[self.batch_size, self.prior_size, self.prior_size, 3]) dim = self.prior_size*self.prior_size*3 exp_noise = self.exploration*tf.random_normal([self.batch_size, self.prior_size, self.prior_size, 3])/(dim**0.5) q1 = tf.image.resize_images(self.prior+exp_noise, [IMAGENET_SL, IMAGENET_SL], tf.image.ResizeMethod.NEAREST_NEIGHBOR) q2 = tf.image.resize_images(self.prior-exp_noise, [IMAGENET_SL, IMAGENET_SL], tf.image.ResizeMethod.NEAREST_NEIGHBOR) def norm(t): t = tf.reshape(t, [self.batch_size, -1]) return tf.reshape(tf.norm(t, axis=1), [-1, 1, 1, 1]) logit1, _ = model(sess, self.x_input+self.fd_eta*q1/norm(q1)) logit2, _ = model(sess, self.x_input+self.fd_eta*q2/norm(q2)) loss1 = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logit1, labels=self.y_input) loss2 = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logit2, labels=self.y_input) est_deriv = (loss1-loss2)/(self.fd_eta*self.exploration) est_grad = tf.reshape(est_deriv, [-1, 1, 1, 1])*exp_noise def eg_step(x, g, lr): real_x = (x+1)/2 pos = real_x*tf.exp(lr*g) neg = (1-real_x)*tf.exp(-lr*g) new_x = pos/(pos+neg) return new_x*2-1 self.prior_new = eg_step(self.prior, est_grad, self.online_lr) self.image_new = self.x_input+self.image_lr*tf.sign(tf.image.resize_images(self.prior_new, [IMAGENET_SL, IMAGENET_SL], tf.image.ResizeMethod.NEAREST_NEIGHBOR))
def partial_info_loss(eval_points, noise): logits, preds = model(sess, eval_points) losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels) vals, inds = tf.nn.top_k(logits, k=k) # inds is batch_size x k good_inds = tf.where(tf.equal(inds, tf.constant(target_class))) # returns (# true) x 3 good_images = good_inds[:,0] # inds of img in batch that worked losses = tf.gather(losses, good_images) noise = tf.gather(noise, good_images) return losses, noise
def label_only_loss(eval_points, noise): noised_eval_points = tf.zeros((batch_per_gpu,)) tiled_points = tf.tile(tf.expand_dims(eval_points, 0), [zero_iters,1,1,1,1]) noised_eval_im = tiled_points + \ tf.random_uniform(tf.shape(tiled_points), minval=-1, \ maxval=1)*args.label_only_sigma logits, preds = model(sess, tf.reshape(noised_eval_im, (-1,) + initial_img.shape)) vals, inds = tf.nn.top_k(logits, k=k) real_inds = tf.reshape(inds, (zero_iters, batch_per_gpu, -1)) rank_range = tf.range(start=k, limit=0, delta=-1, dtype=tf.float32) tiled_rank_range = tf.tile(tf.reshape(rank_range, (1, 1, k)), [zero_iters, batch_per_gpu, 1]) batches_in = tf.where(tf.equal(real_inds, target_class), tiled_rank_range, tf.zeros(tf.shape(tiled_rank_range))) return 1 - tf.reduce_mean(batches_in, [0, 2]), noise
def __init__(self, sess, model, epsilon, loss_func): """Attack parameter initialization. The attack performs k steps of size a, while always staying within epsilon from the initial point.""" self.model = model self.epsilon = epsilon self.loss_func = loss_func self.model_x = tf.placeholder(tf.float32, (None, 299, 299, 3)) self.model_y = tf.placeholder(tf.int64, None) self.logits, self.predictions = model(sess, self.model_x, params.model_dir) y_xent = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.logits, labels = self.model_y) self.correct_prediction = tf.equal(self.predictions, self.model_y) self.num_correct = tf.reduce_sum( tf.cast(self.correct_prediction, tf.int32)) self.loss = y_xent
def __init__(self, sess, model, epsilon): """Attack parameter initialization. The attack performs k steps of size a, while always staying within epsilon from the initial point.""" self.model = model self.epsilon = epsilon self.x_input = tf.placeholder(tf.float32, (None, 299, 299, 3)) self.y_input = tf.placeholder(tf.int32, None) self.logits, self.predictions = model(sess, self.x_input, args.model_dir) self.predictions = tf.cast(self.predictions, tf.int32) self.correct_prediction = tf.equal(self.predictions, self.y_input) self.num_correct = tf.reduce_sum( tf.cast(self.correct_prediction, tf.int32)) self.probs = tf.nn.softmax(self.logits) self.loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.logits, labels=self.y_input) self.grad = tf.gradients(self.loss, self.x_input)[0]
parser.add_argument('--batch_size', default=64, type=int) args = parser.parse_args() if __name__ == '__main__': # Set verbosity tf.logging.set_verbosity(tf.logging.INFO) # Create session sess = tf.InteractiveSession() # Build graph x_input = tf.placeholder(dtype=tf.float32, shape=[None, None, None, 3]) y_input = tf.placeholder(dtype=tf.int32, shape=[None]) logits, preds = model(sess, x_input) model = { 'x_input': x_input, 'y_input': y_input, 'logits': logits, 'preds': preds, 'targeted': args.targeted, } # Print hyperparameters for key, val in vars(args).items(): tf.logging.info('{}={}'.format(key, val)) # Create attack class. attack_class = getattr(sys.modules[__name__], args.attack)
def run_attack(x_adv, model, sess, x_full_batch, y_full_batch, percentage_mean): model_x = tf.placeholder(tf.float32, (None, 299, 299, 3)) model_y = tf.placeholder(tf.int64, None) logits, predictions = model(sess, model_x, params.model_dir) correct_prediction = tf.equal(predictions, model_y) num_correct = tf.reduce_sum(tf.cast(correct_prediction, tf.int32)) num_eval_samples = x_adv.shape[0] eval_batch_size = min(num_eval_samples, 64) num_batches = int(math.ceil(num_eval_examples / eval_batch_size)) total_corr = 0 x_nat = x_full_batch l_inf = np.amax(np.abs(x_nat - x_adv)) # error checking if l_inf > params.eps + 0.0001: print('breached maximum perturbation') print('l_inf value:{}'.format(l_inf)) return y_pred = [] success = [] for ibatch in range(num_batches): bstart = ibatch * eval_batch_size bend = min(bstart + eval_batch_size, num_eval_examples) x_batch = x_adv[bstart:bend, :] y_batch = y_full_batch[bstart:bend] dict_adv = {model_x: x_batch, model_y: y_batch} cur_corr, correct_predictions = \ sess.run([model.num_correct, model.correct_prediction], feed_dict=dict_adv) total_corr += cur_corr success.append( np.array(np.nonzero(np.invert(correct_prediction))) + ibatch * eval_batch_size) success = np.concatenate(success, axis=1) np.save('out/parallel_admm_success.npy', success) accuracy = total_corr / num_eval_examples print('adv Accuracy: {:.2f}%'.format(100.0 * accuracy)) with open('out/result.txt', 'a') as f: f.write('''Resnet, {}, eps:{}, sample_size:{}, loss_func:{} => acc:{}, percentage:{}\n'''.format( params.eps, params.attack_type, params.sample_size, params.loss_func, accuracy, percentage_mean)) total_corr = 0 for ibatch in range(num_batches): bstart = ibatch * eval_batch_size bend = min(bstart + eval_batch_size, num_eval_examples) x_batch = x_full_batch[bstart:bend, :] y_batch = y_full_batch[bstart:bend] dict_adv = {model_x: x_batch, model_y: y_batch} cur_corr = sess.run(num_correct, feed_dict=dict_adv) total_corr += cur_corr accuracy = total_corr / num_eval_examples print('nat Accuracy: {:.2f}%'.format(100.0 * accuracy))
def perturb(self, x_nat, y, sesses, ibatch): sess = self.sess model_x = tf.placeholder(tf.float32, (None, 299, 299, 3)) model_y = tf.placeholder(tf.int64, None) logits, predictions = model(sess, model_x, params.model_dir) correct_prediction = tf.equal(predictions, model_y) num_correct = tf.reduce_sum(tf.cast(correct_prediction, tf.int32)) self.query = [0, 0] self.insert_count = 0 self.put_count = 0 self.success = False x_adv = np.copy(x_nat) x_m = np.clip(x_nat - params.eps, 0, 255) x_p = np.clip(x_nat + params.eps, 0, 255) block_size = params.block_size _, xt, yt, zt = x_nat.shape assert (xt % block_size == 0 and yt % block_size == 0) blocks = self.block_partition((xt, yt, zt)) assert (len(blocks) % params.gpus == 0) # admm variables yk_li = [] for block in blocks: yk_li.append(np.zeros(len(block))) rho = params.admm_rho tau = params.admm_tau img_indices = [(xi, yi, zi) for xi in range(xt) for yi in range(yt) for zi in range(zt)] iter_round = 0 while (iter_round < params.admm_iter): print('{}th round...'.format(iter_round)) indices_count = dict() for index in img_indices: indices_count[index] = 0 x_adv_new = np.copy(x_adv) # parallel update block_results = [0 for i in range(len(blocks))] self.block_insert_count = [0 for i in range(len(blocks))] self.block_put_count = [0 for i in range(len(blocks))] self.block_queries = [0 for i in range(len(blocks))] threads = [ threading.Thread(target=self.ldg_block_seg, args=(block_results, x_adv, y, sesses[i % params.gpus], self.models[i % params.gpus], i, blocks[i], x_m, x_p, yk_li[i], rho)) for i in range(len(blocks)) ] num_running = 0 for i in range(len(blocks)): threads[i].start() num_running += 1 if num_running == params.gpus: for j in range(i - params.gpus + 1, i + 1): threads[j].join() if self.success: img_batch = np.concatenate( block_results[i - params.gpus + 1:i + 1], axis=0) y_batch = np.tile(y, len(img_batch)) self.query[0] += params.gpus self.query[1] += params.gpus correct_predictions = sess.run(correct_prediction, feed_dict={ model_x: img_batch, model_y: y_batch }) success = np.array([ idx for idx in range(len(correct_predictions)) ])[np.invert(correct_predictions)] for k in success: print("attack success!") self.insert_count += sum(self.block_insert_count) self.put_count += sum(self.block_put_count) self.query[0] += sum(self.block_queries) self.query[1] += max(self.block_queries) print("num of re-inserted pixels:", self.insert_count) print("num of perturbed pixels:", self.put_count) print("num of queries:", self.query) self.ratios.append( (self.put_count + 0.0001) / (self.insert_count + self.put_count + 0.0001)) self.queries.append(self.query) self.block_success_stat[iter_round] += 1 return np.reshape(img_batch[k], (1, *img_batch[k].shape)) num_running = 0 # update x_adv_new for i in range(len(blocks)): block = blocks[i] block_result = block_results[i] for index in block: xi, yi, zi = index val = block_result[0, xi, yi, zi] x_adv_new[0, xi, yi, zi] = (x_adv_new[0, xi, yi, zi] * indices_count[index] + val) \ / (indices_count[index]+1) indices_count[index] += 1 pixel_change = np.count_nonzero(x_adv_new - x_adv) #unique, counts = np.unique(np.abs(x_adv-x_adv_new), return_counts=True) #unique_counts = np.asarray((unique, counts)).T #unique, counts = np.unique(np.abs(x_nat-x_adv_new), return_counts=True) #unique_counts_nat = np.asarray((unique, counts)).T print("changed pixels:", pixel_change) #print("unique count:", unique_counts) #print("unique count from nat:", unique_counts_nat) self.insert_count += sum(self.block_insert_count) self.put_count += sum(self.block_put_count) self.query[0] += sum(self.block_queries) round_query = 0 for i in range(len(blocks) // params.gpus): round_query += np.mean( self.block_queries[i * params.gpus:(i + 1) * params.gpus]) self.query[1] += round_query #self.query[1] += max(self.block_queries) print("round re-inserts:", sum(self.block_insert_count)) print("round perturbs:", sum(self.block_put_count)) print("round queries:", sum(self.block_queries)) print("round queries(parallel):", round_query) #global variable(z) update x_adv = np.copy(x_adv_new) #admm update(yk, rho) if params.block_scheme == 'admm': for i in range(len(yk_li)): block = blocks[i] block_result = block_results[i] block_dist = [] for index in block: xi, yi, zi = index block_dist.append(block_result[0, xi, yi, zi] - x_adv[0, xi, yi, zi]) block_dist = np.array(block_dist) yk_li[i] += rho * (block_dist) rho *= tau if params.early_stop == 'y': num_corrects = sess.run(num_correct, feed_dict={ model_x: x_adv, model_y: y }) self.query[0] += 1 self.query[1] += 1 assert np.amax(np.abs(x_adv - x_nat)) < params.eps + 0.0001 if num_corrects == 0: print("attack success!") print("num of re-inserted pixels:", self.insert_count) print("num of perturbed pixels:", self.put_count) print("num of queries:", self.query) self.success = True self.ratios.append( (self.put_count + 0.0001) / (self.insert_count + self.put_count + 0.0001)) self.queries.append(self.query) self.block_success_stat[iter_round] += 1 return x_adv #termination condition if pixel_change == 0: self.admm_converge_stat[iter_round] += 1 break # stop if # of blocks == 1 if len(blocks) == 1: break iter_round += 1 print("attack failed") print("num of re-inserted pixels:", self.insert_count) print("num of perturbed pixels:", self.put_count) print("num of queries:", self.query) self.ratios.append((self.put_count + 0.0001) / (self.insert_count + self.put_count + 0.0001)) return x_adv
def ldg_block_seg(self, block_results, x_adv, y, sess, model, ith_thread, block, x_m, x_p, yk=0, rho=0, Resize=params.resize): insert_count = 0 put_count = 0 queue = PeekablePriorityQueue() _, xt, yt, zt = x_adv.shape resize = Resize block = sorted(block) model_x = tf.placeholder(tf.float32, (None, 299, 299, 3)) model_y = tf.placeholder(tf.int64, None) logits, predictions = model(sess, model_x, params.model_dir) y_xent = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=model_y) correct_prediction = tf.equal(predictions, model_y) '''num_correct = tf.reduce_sum( tf.cast(correct_prediction, tf.int32))''' loss = -y_xent anchor_block = [] selected = set() for index in block: if index not in selected: anchor_block.append(index) xi, yi, zi = index for xxi in range(resize): for yyi in range(resize): selected.add((xi + xxi, yi + yyi, zi)) anchor_block = sorted(anchor_block) block = set(block) block_x_m = np.copy(x_adv) block_x_p = np.copy(x_adv) num_pixels = len(anchor_block) for index in block: xi, yi, zi = index block_x_m[0, xi, yi, zi] = x_m[0, xi, yi, zi] block_x_p[0, xi, yi, zi] = x_p[0, xi, yi, zi] cur_m = sess.run(loss, feed_dict={model_x: block_x_m, model_y: y}) cur_p = sess.run(loss, feed_dict={model_x: block_x_p, model_y: y}) if params.block_scheme == 'admm': cur_m += self.admm_loss(block, block_x_m, x_adv, yk, rho) cur_p += self.admm_loss(block, block_x_p, x_adv, yk, rho) #print('first pass', ith_thread) # first pass #start = time.time() batch_size = min(100, num_pixels) num_batches = num_pixels // batch_size block_index1, block_index2 = 0, 0 for ith_batch in range(num_batches + 1): if ith_batch == num_batches: if num_pixels % batch_size == 0: break else: batch_size = num_pixels % batch_size img_batch_m = np.tile(block_x_m, (batch_size, 1, 1, 1)) img_batch_p = np.tile(block_x_p, (batch_size, 1, 1, 1)) img_batch = np.concatenate([img_batch_m, img_batch_p]) label_batch = np.tile(y, (2 * batch_size)) for j in range(batch_size): xb, yb, zb = anchor_block[block_index1] block_index1 += 1 for xxi in range(resize): for yyi in range(resize): if (xb + xxi, yb + yyi, zi) in block: img_batch[j, xb + xxi, yb + yyi, zb] = block_x_p[0, xb + xxi, yb + yyi, zb] img_batch[batch_size + j, xb + xxi, yb + yyi, zb] = block_x_m[0, xb + xxi, yb + yyi, zb] feed_dict = {model_x: img_batch, model_y: label_batch} losses = sess.run(loss, feed_dict=feed_dict) for pos in range(losses.size // 2): xb, yb, zb = anchor_block[block_index2] block_index2 += 1 if params.block_scheme == 'admm': losses[pos] += self.admm_loss(block, img_batch[pos], x_adv, yk, rho) losses[batch_size + pos] += self.admm_loss( block, img_batch[batch_size + pos], x_adv, yk, rho) pi = losses[pos] - cur_m mi = losses[batch_size + pos] - cur_p queue.put(Greedy([xb, yb, zb], pi, mi, False)) num_queries = 2 * num_pixels #end = time.time() #print('first pass time:', end-start) #print('second pass', ith_thread) # second pass while not queue.empty(): candid = queue.get() second = None if not queue.empty(): second = queue.peek() xi, yi, zi = candid.loc img_batch = np.concatenate([block_x_m, block_x_p]) for xxi in range(resize): for yyi in range(resize): if (xi + xxi, yi + yyi, zi) in block: img_batch[0, xi + xxi, yi + yyi, zi] = block_x_p[0, xi + xxi, yi + yyi, zi] img_batch[1, xi + xxi, yi + yyi, zi] = block_x_m[0, xi + xxi, yi + yyi, zi] y_batch = np.tile(y, 2) losses, correct_predictions = sess.run([loss, correct_prediction], feed_dict={ model_x: img_batch, model_y: y_batch }) num_queries += 2 if params.early_stop == 'y': success = np.array([0, 1])[np.invert(correct_predictions)] for i in success: self.block_insert_count[ith_thread] = insert_count self.block_put_count[ith_thread] = put_count self.block_queries[ith_thread] = num_queries self.success = True block_results[ith_thread] = np.reshape( img_batch[i], (1, *img_batch[i].shape)) return if params.block_scheme == 'admm': losses[0] += self.admm_loss(block, img_batch[0], x_adv, yk, rho) losses[1] += self.admm_loss(block, img_batch[1], x_adv, yk, rho) candid.update(losses[0] - cur_m, losses[1] - cur_p) if not second or candid <= second: put_count += 1 if candid.getDir(): for xxi in range(resize): for yyi in range(resize): if (xi + xxi, yi + yyi, zi) in block: block_x_m[0, xi + xxi, yi + yyi, zi] = block_x_p[0, xi + xxi, yi + yyi, zi] cur_m = losses[0] else: for xxi in range(resize): for yyi in range(resize): if (xi + xxi, yi + yyi, zi) in block: block_x_p[0, xi + xxi, yi + yyi, zi] = block_x_m[0, xi + xxi, yi + yyi, zi] cur_p = losses[1] else: insert_count += 1 queue.put(candid) # parallel early stopping if self.success: break #print("num of re-inserted pixels:", insert_count) #print("num of pertubed pixels:", put_count) #print("num of queries:", num_queries) '''self.insert_count += insert_count self.put_count += put_count self.query += num_queries''' self.block_insert_count[ith_thread] = insert_count self.block_put_count[ith_thread] = put_count self.block_queries[ith_thread] = num_queries block_results[ith_thread] = block_x_m return
def main(args, gpus): # INITIAL IMAGE AND CLASS SELECTION if args.img_path: initial_img = np.asarray( Image.open(args.img_path).resize((SIZE, SIZE))) orig_class = args.orig_class initial_img = initial_img.astype(np.float32) / 255.0 else: x, y = get_image(args.img_index, IMAGENET_PATH) orig_class = y initial_img = x # PARAMETER SETUP if args.target_class is None: target_class = pseudorandom_target(args.img_index, NUM_LABELS, orig_class) print('chose pseudorandom target class: %d' % target_class) else: target_class = args.target_class batch_size = args.batch_size out_dir = args.out_dir epsilon = args.epsilon lower = np.clip(initial_img - args.epsilon, 0., 1.) upper = np.clip(initial_img + args.epsilon, 0., 1.) adv = initial_img.copy() if not args.restore else \ np.clip(np.load(args.restore), lower, upper) batch_per_gpu = batch_size // len(gpus) log_iters = args.log_iters current_lr = args.learning_rate queries_per_iter = args.samples_per_draw max_iters = int(np.ceil(args.max_queries // queries_per_iter)) max_lr = args.max_lr # ----- partial info params ----- k = args.top_k goal_epsilon = epsilon adv_thresh = args.adv_thresh if k > 0: if target_class == -1: raise ValueError( "Partial-information attack is a targeted attack.") adv = image_of_class(target_class, IMAGENET_PATH) epsilon = args.starting_eps delta_epsilon = args.starting_delta_eps else: k = NUM_LABELS # ----- label only params ----- label_only = args.label_only zero_iters = args.zero_iters # TARGET CLASS SELECTION if target_class < 0: one_hot_vec = one_hot(orig_class, NUM_LABELS) else: one_hot_vec = one_hot(target_class, NUM_LABELS) labels = np.repeat(np.expand_dims(one_hot_vec, axis=0), repeats=batch_per_gpu, axis=0) is_targeted = 1 if target_class >= 0 else -1 # SESSION INITIALIZATION sess = tf.InteractiveSession() x = tf.placeholder(tf.float32, initial_img.shape) eval_logits, eval_preds = model(sess, tf.expand_dims(x, 0)) eval_percent_adv = tf.equal(eval_preds[0], tf.constant(target_class, tf.int64)) # TENSORBOARD SETUP empirical_loss = tf.placeholder(dtype=tf.float32, shape=()) lr_placeholder = tf.placeholder(dtype=tf.float32, shape=()) loss_vs_queries = tf.summary.scalar('empirical loss vs queries', empirical_loss) loss_vs_steps = tf.summary.scalar('empirical loss vs step', empirical_loss) lr_vs_queries = tf.summary.scalar('lr vs queries', lr_placeholder) lr_vs_steps = tf.summary.scalar('lr vs step', lr_placeholder) writer = tf.summary.FileWriter(out_dir, graph=sess.graph) log_file = open(os.path.join(out_dir, 'log.txt'), 'w+') with open(os.path.join(out_dir, 'args.json'), 'w') as args_file: json.dump(args.__dict__, args_file) # LOSS FUNCTION def standard_loss(eval_points, noise): logits, preds = model(sess, eval_points) losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels) return losses, noise def label_only_loss(eval_points, noise): noised_eval_points = tf.zeros((batch_per_gpu, )) tiled_points = tf.tile(tf.expand_dims(eval_points, 0), [zero_iters, 1, 1, 1, 1]) noised_eval_im = tiled_points + \ tf.random_uniform(tf.shape(tiled_points), minval=-1, \ maxval=1)*args.label_only_sigma logits, preds = model( sess, tf.reshape(noised_eval_im, (-1, ) + initial_img.shape)) vals, inds = tf.nn.top_k(logits, k=k) real_inds = tf.reshape(inds, (zero_iters, batch_per_gpu, -1)) rank_range = tf.range(start=k, limit=0, delta=-1, dtype=tf.float32) tiled_rank_range = tf.tile(tf.reshape(rank_range, (1, 1, k)), [zero_iters, batch_per_gpu, 1]) batches_in = tf.where(tf.equal(real_inds, target_class), tiled_rank_range, tf.zeros(tf.shape(tiled_rank_range))) return 1 - tf.reduce_mean(batches_in, [0, 2]), noise def partial_info_loss(eval_points, noise): logits, preds = model(sess, eval_points) losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels) vals, inds = tf.nn.top_k(logits, k=k) # inds is batch_size x k good_inds = tf.where(tf.equal( inds, tf.constant(target_class))) # returns (# true) x 3 good_images = good_inds[:, 0] # inds of img in batch that worked losses = tf.gather(losses, good_images) noise = tf.gather(noise, good_images) return losses, noise # GRADIENT ESTIMATION GRAPH grad_estimates = [] final_losses = [] loss_fn = label_only_loss if label_only else \ (partial_info_loss if k < NUM_LABELS else standard_loss) for i, device in enumerate(gpus): with tf.device(device): print('loading on gpu %d of %d' % (i + 1, len(gpus))) noise_pos = tf.random_normal((batch_per_gpu // 2, ) + initial_img.shape) noise = tf.concat([noise_pos, -noise_pos], axis=0) eval_points = x + args.sigma * noise losses, noise = loss_fn(eval_points, noise) losses_tiled = tf.tile(tf.reshape(losses, (-1, 1, 1, 1)), (1, ) + initial_img.shape) grad_estimates.append( tf.reduce_mean(losses_tiled * noise, axis=0) / args.sigma) final_losses.append(losses) grad_estimate = tf.reduce_mean(grad_estimates, axis=0) final_losses = tf.concat(final_losses, axis=0) # GRADIENT ESTIMATION EVAL def get_grad(pt, spd, bs): num_batches = spd // bs losses = [] grads = [] feed_dict = {x: pt} for _ in range(num_batches): loss, dl_dx_ = sess.run([final_losses, grad_estimate], feed_dict) losses.append(np.mean(loss)) grads.append(dl_dx_) return np.array(losses).mean(), np.mean(np.array(grads), axis=0) # CONCURRENT VISUALIZATION if args.visualize: with tf.device('/cpu:0'): render_feed = tf.placeholder(tf.float32, initial_img.shape) render_exp = tf.expand_dims(render_feed, axis=0) render_logits, _ = model(sess, render_exp) assert out_dir[-1] == '/' # HISTORY VARIABLES (for backtracking and momentum) num_queries = 0 g = 0 prev_adv = adv last_ls = [] # STEP CONDITION (important for partial-info attacks) def robust_in_top_k(t_, prop_adv_, k_): if k == NUM_LABELS: return True for i in range(1): n = np.random.rand(*prop_adv_.shape) * args.sigma eval_logits_ = sess.run(eval_logits, {x: prop_adv_})[0] if not t_ in eval_logits_.argsort()[-k_:][::-1]: return False return True # MAIN LOOP for i in range(max_iters): start = time.time() if args.visualize: render_frame(sess, adv, i, render_logits, render_feed, out_dir) # CHECK IF WE SHOULD STOP padv = sess.run(eval_percent_adv, feed_dict={x: adv}) if padv == 1 and epsilon <= goal_epsilon: print('[log] early stopping at iteration %d' % i) break prev_g = g l, g = get_grad(adv, args.samples_per_draw, batch_size) # SIMPLE MOMENTUM g = args.momentum * prev_g + (1.0 - args.momentum) * g # PLATEAU LR ANNEALING last_ls.append(l) last_ls = last_ls[-args.plateau_length:] if last_ls[-1] > last_ls[0] \ and len(last_ls) == args.plateau_length: if max_lr > args.min_lr: print("[log] Annealing max_lr") max_lr = max(max_lr / args.plateau_drop, args.min_lr) last_ls = [] # SEARCH FOR LR AND EPSILON DECAY current_lr = max_lr proposed_adv = adv - is_targeted * current_lr * np.sign(g) prop_de = 0.0 if l < adv_thresh and epsilon > goal_epsilon: prop_de = delta_epsilon while current_lr >= args.min_lr: # PARTIAL INFORMATION ONLY if k < NUM_LABELS: proposed_epsilon = max(epsilon - prop_de, goal_epsilon) lower = np.clip(initial_img - proposed_epsilon, 0, 1) upper = np.clip(initial_img + proposed_epsilon, 0, 1) # GENERAL LINE SEARCH proposed_adv = adv - is_targeted * current_lr * np.sign(g) proposed_adv = np.clip(proposed_adv, lower, upper) num_queries += 1 if robust_in_top_k(target_class, proposed_adv, k): if prop_de > 0: delta_epsilon = max(prop_de, 0.1) last_ls = [] prev_adv = adv adv = proposed_adv epsilon = max(epsilon - prop_de / args.conservative, goal_epsilon) break elif current_lr >= args.min_lr * 2: current_lr = current_lr / 2 #print("[log] backtracking lr to %3f" % (current_lr,)) else: prop_de = prop_de / 2 if prop_de == 0: raise ValueError("Did not converge.") if prop_de < 2e-3: prop_de = 0 current_lr = max_lr print("[log] backtracking eps to %3f" % (epsilon - prop_de, )) # BOOK-KEEPING STUFF num_queries += args.samples_per_draw log_text = 'Step %05d: loss %.4f lr %.2E eps %.3f (time %.4f)' % (i, l, \ current_lr, epsilon, time.time() - start) log_file.write(log_text + '\n') print(log_text) if i % log_iters == 0: lvq, lvs, lrvq, lrvs = sess.run( [loss_vs_queries, loss_vs_steps, lr_vs_queries, lr_vs_steps], { empirical_loss: l, lr_placeholder: current_lr }) writer.add_summary(lvq, num_queries) writer.add_summary(lrvq, num_queries) writer.add_summary(lvs, i) writer.add_summary(lrvs, i) if (i + 1) % args.save_iters == 0 and args.save_iters > 0: np.save(os.path.join(out_dir, '%s.npy' % (i + 1)), adv) scipy.misc.imsave(os.path.join(out_dir, '%s.png' % (i + 1)), adv) log_output(sess, eval_logits, eval_preds, x, adv, initial_img, \ target_class, out_dir, orig_class, num_queries)
def standard_loss(eval_points, noise): logits, preds = model(sess, eval_points) losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels) return losses, noise
def L(img): logits, predictions = model(sess, img) y_xent = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=y) return y_xent
from tools import utils from tools.inception_v3_imagenet import model import tensorflow as tf import pickle import sys IMAGENET_PATH="" if __name__=="__main__": if IMAGENET_PATH == "": raise ValueError("Please open precompute.py and set IMAGENET_PATH") s = (299, 299, 3) dataset = sys.argv[1] last_j = 0 sess = tf.InteractiveSession() x = tf.placeholder(tf.float32, s) _, preds = model(sess, tf.expand_dims(x, 0)) label_dict = {} for i in range(1,1000): print("Looking for %d" % (i,)) if i in label_dict: continue for j in range(last_j, 50000): im, lab = utils.get_image(j, IMAGENET_PATH) if sess.run(preds, {x: im})[0] == lab: label_dict[lab] = j if lab == i: label_dict[i] = j break last_j = j pickle.dump(label_dict, open("tools/data/imagenet.pickle", "wb"))
tf.logging.set_verbosity(tf.logging.INFO) # Create session sess = tf.InteractiveSession() # Build graph x_input = tf.placeholder(dtype=tf.float32, shape=[None, None, None, 3]) y_input = tf.placeholder(dtype=tf.int32, shape=[None]) noise = tf.placeholder(dtype=tf.float32, shape=[None, None, None, 3]) noise_resized = tf.image.resize_nearest_neighbor(noise, (299, 299)) noise_centered = noise_resized - tf.reduce_mean(noise_resized) x_adv = x_input + tf.nn.l2_normalize(noise_centered, axis=(1,2,3)) * args.epsilon x_adv = tf.clip_by_value(x_adv, 0, 1) logits, preds = model(sess, x_adv) model = { 'x_input': x_input, 'noise': noise, 'x_adv': x_adv, 'y_input': y_input, 'logits': logits, 'preds': preds, 'targeted': args.targeted, } # Print hyperparameters for key, val in vars(args).items(): tf.logging.info('{}={}'.format(key, val))