def random_search_max_confidence_recipe(sess, model, x, y, eps, clip_min, clip_max, report_path, batch_size=BATCH_SIZE, num_noise_points=10000): """Max confidence using random search. References: https://openreview.net/forum?id=H1g0piA9tQ Describes the max_confidence procedure used for the bundling in this recipe https://arxiv.org/abs/1802.00420 Describes using random search with 1e5 or more random points to avoid gradient masking. :param sess: tf.Session :param model: cleverhans.model.Model :param x: numpy array containing clean example inputs to attack :param y: numpy array containing true labels :param nb_classes: int, number of classes :param eps: float, maximum size of perturbation (measured by max norm) :param eps_iter: float, step size for one version of PGD attacks (will also run another version with 25X smaller step size) :param nb_iter: int, number of iterations for one version of PGD attacks (will also run another version with 25X more iterations) :param report_path: str, the path that the report will be saved to. :batch_size: int, the total number of examples to run simultaneously """ noise_attack = Noise(model, sess) threat_params = {"eps": eps, "clip_min": clip_min, "clip_max": clip_max} noise_attack_config = AttackConfig(noise_attack, threat_params) attack_configs = [noise_attack_config] assert batch_size % num_devices == 0 new_work_goal = {noise_attack_config: num_noise_points} goals = [MaxConfidence(t=1., new_work_goal=new_work_goal)] bundle_attacks(sess, model, x, y, attack_configs, goals, report_path)
def recipe( sess, model, x, y, nb_classes, eps, clip_min, clip_max, eps_iter, nb_iter, report_path, eps_iter_small, batch_size, ): """ Mock recipe that just runs the Noise attack so the test runs fast """ attack_configs = [AttackConfig(Noise(model, sess), {"eps": eps})] new_work_goal = {config: 1 for config in attack_configs} goals = [Misclassify(new_work_goal=new_work_goal)] bundle_attacks( sess, model, x, y, attack_configs, goals, report_path, attack_batch_size=batch_size, eval_batch_size=batch_size, )
def recipe(sess, model, x, y, nb_classes, eps, clip_min, clip_max, eps_iter, nb_iter, report_path, eps_iter_small): attack_configs = [AttackConfig(Noise(model, sess), {'eps': eps})] new_work_goal = {config: 1 for config in attack_configs} goals = [Misclassify(new_work_goal=new_work_goal)] bundle_attacks(sess, model, x, y, attack_configs, goals, report_path, attack_batch_size=batch_size, eval_batch_size=batch_size)
def single_run_max_confidence_recipe(sess, model, x, y, nb_classes, eps, clip_min, clip_max, eps_iter, nb_iter, report_path, batch_size=BATCH_SIZE): """A reasonable attack bundling recipe for a max norm threat model and a defender that uses confidence thresholding. This recipe uses both uniform noise and randomly-initialized PGD targeted attacks. References: https://openreview.net/forum?id=H1g0piA9tQ This version runs each attack (noise, targeted PGD for each class with nb_iter iterations, target PGD for each class with 25X more iterations) just once and then stops. See `basic_max_confidence_recipe` for a version that runs indefinitely. :param sess: tf.Session :param model: cleverhans.model.Model :param x: numpy array containing clean example inputs to attack :param y: numpy array containing true labels :param nb_classes: int, number of classes :param eps: float, maximum size of perturbation (measured by max norm) :param eps_iter: float, step size for one version PGD attacks (will also run another version with 25X smaller step size) :param nb_iter: int, number of iterations for the cheaper PGD attacks (will also run another version with 25X more iterations) :param report_path: str, the path that the report will be saved to. :batch_size: int, the total number of examples to run simultaneously """ noise_attack = Noise(model, sess) pgd_attack = ProjectedGradientDescent(model, sess) threat_params = {"eps": eps, "clip_min" : clip_min, "clip_max" : clip_max} noise_attack_config = AttackConfig(noise_attack, threat_params, "noise") attack_configs = [noise_attack_config] pgd_attack_configs = [] pgd_params = copy.copy(threat_params) pgd_params["eps_iter"] = eps_iter pgd_params["nb_iter"] = nb_iter assert batch_size % num_devices == 0 dev_batch_size = batch_size // num_devices ones = tf.ones(dev_batch_size, tf.int32) expensive_pgd = [] for cls in range(nb_classes): cls_params = copy.copy(pgd_params) cls_params['y_target'] = tf.to_float(tf.one_hot(ones * cls, nb_classes)) cls_attack_config = AttackConfig(pgd_attack, cls_params, "pgd_" + str(cls)) pgd_attack_configs.append(cls_attack_config) expensive_params = copy.copy(cls_params) expensive_params["eps_iter"] /= 25. expensive_params["nb_iter"] *= 25. expensive_config = AttackConfig(pgd_attack, expensive_params, "expensive_pgd_" + str(cls)) expensive_pgd.append(expensive_config) attack_configs = [noise_attack_config] + pgd_attack_configs + expensive_pgd new_work_goal = {config: 1 for config in attack_configs} goals = [MaxConfidence(t=1., new_work_goal=new_work_goal)] bundle_attacks(sess, model, x, y, attack_configs, goals, report_path)
def prepare_attack(sess, args, model, adv_input, target_embeddings): if args.attack_type == 'FGSM': # Define FGSM for the model steps = 1 alpha = args.eps / steps fgsm = FastGradientMethod(model) fgsm_params = {'eps': alpha, 'clip_min': 0., 'clip_max': 1.} adv_x = fgsm.generate(model.face_input, **fgsm_params) elif args.attack_type == 'CW': model.face_input.set_shape(np.shape(adv_input)) # Instantiate a CW attack object cw = CarliniWagnerL2(model, sess) cw_params = { 'binary_search_steps': 1, # 1 'max_iterations': 100, # 100 'learning_rate': .2, # .2 'batch_size': args.lfw_batch_size, 'initial_const': args.init_c, # 10 'confidence': 10 } # # model.batch_size: 10, model.phase_train: False} feed_dict = { model.face_input: adv_input, model.victim_embedding_input: target_embeddings } # # model.batch_size: 10, model.phase_train: False} # adv_x = cw.generate(model.face_input, feed_dict, **cw_params) adv_x = cw.generate(model.face_input, **cw_params) # adv_x = cw.generate_np(adv_input, **cw_params) print('hello') elif args.attack_type == 'random': random_attack = Noise(model, sess) noise_params = { 'eps': args.eps, 'ord': np.inf, 'clip_min': 0, 'clip_max': 1 } adv_x = random_attack.generate(model.face_input, **noise_params) return adv_x
def train_child(t, p, m, load_dict=False): # model = nn.DataParallel(TestCNN().cuda(1), device_ids=[1, 2, 3]) raw_model = TestCNN().cuda(0) model = TestCNN().cuda(0) tf_model = convert_pytorch_model_to_tf(model) cleverhans_model = CallableModelWrapper(tf_model, output_layer='logits') session = tf.Session() x_op = tf.placeholder(tf.float32, shape=(None, 3, 32, 32)) fgsm = FastGradientMethod(cleverhans_model, sess=session) # stm = SpatialTransformationMethod(cleverhans_model, sess=session) # cw2 = CarliniWagnerL2(cleverhans_model, sess=session) pgd = ProjectedGradientDescent(cleverhans_model, sess=session) noise = Noise(cleverhans_model, sess=session) mim = MomentumIterativeMethod(cleverhans_model, sess=session) df = DeepFool(cleverhans_model, sess=session) tf_raw_model = convert_pytorch_model_to_tf(raw_model) cleverhans_raw_model = CallableModelWrapper(tf_raw_model, output_layer='logits') # pgd_raw = ProjectedGradientDescent(cleverhans_raw_model, sess=session) noise_raw = Noise(cleverhans_raw_model, sess=session) def fgsm_op(x, eps): att = fgsm.generate(x_op, eps=eps) return session.run(att, feed_dict={x_op: x}) # def stm_op(x, eps): # att = stm.generate(x_op, batch_size=len(x), dx_min=-0.1*eps, dx_max=0.1*eps, dy_min=-0.1*eps, dy_max=0.1*eps, angle_min=-30*eps, angle_max=30*eps) # return session.run(att, feed_dict={x_op: x}) # def cw2_op(x, eps): # att = cw2.generate(x_op, max_iterations=3) def pgd_op(x, eps): att = pgd.generate(x_op, eps=eps, eps_iter=eps * 0.2, nb_iter=3) return session.run(att, feed_dict={x_op: x}) # def pgd_raw_op(x, eps): # att = pgd_raw.generate(x_op, eps=eps, eps_iter=eps * 0.2, nb_iter=3) # return session.run(att, feed_dict={x_op: x}) def noise_op(x, eps): att = noise.generate(x_op, eps=eps) return session.run(att, feed_dict={x_op: x}) def noise_raw_op(x, eps): att = noise_raw.generate(x_op, eps=eps) return session.run(att, feed_dict={x_op: x}) def df_op(x): att = df.generate(x_op, nb_candidate=10, max_iter=3) return session.run(att, feed_dict={x_op: x}) def mim_op(x, eps): att = mim.generate(x_op, eps=eps, eps_iter=eps * 0.2) return session.run(att, feed_dict={x_op: x}) def attack_train(x): attacks = [fgsm_op, pgd_op, mim_op] attacks_name = ['FGSM', 'PGD', 'MIM'] eps = [[0.03, 0.3], [0.03, 0.3], [0.03, 0.3]] train_x_adv = x.copy() adv_type = np.random.randint(SUBPOLICY_COUNT, size=len(train_x_adv)) for i, (ti, pi, mi) in enumerate( tqdm(zip(t, p, m), total=len(t), desc='Subpolicy: ', leave=False)): adv_i = train_x_adv[adv_type == i] for j, (tj, pj, mj) in enumerate( tqdm(zip(ti, pi, mi), total=len(ti), desc='Operation: ', leave=False)): tj, pj, mj = (*tj, *pj, *mj) adv_j = adv_i[np.random.randn(len(adv_i)) < pj] for i in tqdm(range(0, len(adv_j), BATCH_SIZE), desc=attacks_name[tj] + ': ', leave=False): adv_j[i:][:BATCH_SIZE] = attacks[tj]( adv_j[i:][:BATCH_SIZE], (mj + 1) / MAGN_COUNT * (eps[tj][1] - eps[tj][0]) + eps[tj][0]) return train_x_adv optimizer = optim.SGD(model.parameters(), lr=1e-3) raw_optimizer = optim.SGD(raw_model.parameters(), lr=1e-3) train_x_adv = attack_train(train_x) adv_trainset = torch.utils.data.TensorDataset( torch.tensor(train_x_adv, dtype=torch.float), torch.tensor(train_y, dtype=torch.long)) adv_trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4) if load_dict: model.load_state_dict(torch.load('black_eval_runs/model.pt')) optimizer.load_state_dict(torch.load('black_eval_runs/optimizer.pt')) raw_model.load_state_dict(torch.load('black_eval_runs/raw_model.pt')) raw_optimizer.load_state_dict( torch.load('black_eval_runs/raw_optimizer.pt')) model.train() batch_tqdm = tqdm(adv_trainloader, leave=False) for x, y in batch_tqdm: optimizer.zero_grad() output = model(x.cuda(0)) loss = criterion(output, y.cuda(0)) loss.backward() optimizer.step() acc = torch.sum(output.cpu().argmax(axis=1) == y) / y.size(0) batch_tqdm.set_description(f'adv {loss:.3f} {acc:.3f}') batch_tqdm = tqdm(trainloader, leave=False) raw_model.train() for x, y in batch_tqdm: raw_optimizer.zero_grad() output = raw_model(x.cuda(0)) loss = criterion(output, y.cuda(0)) loss.backward() raw_optimizer.step() acc = torch.sum(output.cpu().argmax(axis=1) == y) / y.size(0) batch_tqdm.set_description(f'raw {loss:.3f} {acc:.3f}') with torch.no_grad(): model.eval() batch_tqdm = tqdm(valloader, leave=False) tot_acc = 0 for x, y in batch_tqdm: output = model(x.cuda(0)) acc = float(torch.sum(output.cpu().argmax(axis=1) == y)) tot_acc += acc adv_raw_acc = tot_acc / len(val_x) val_x_adv = np.zeros_like(val_x) for i in tqdm(range(0, len(val_x_adv), BATCH_SIZE), desc='Noise: ', leave=False): val_x_adv[i:][:BATCH_SIZE] = noise_op(val_x[i:][:BATCH_SIZE], 0.3) adv_valset = torch.utils.data.TensorDataset( torch.tensor(val_x_adv, dtype=torch.float), torch.tensor(val_y, dtype=torch.long)) adv_valloader = torch.utils.data.DataLoader(adv_valset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4) batch_tqdm = tqdm(adv_valloader, leave=False) tot_acc = 0 for x, y in batch_tqdm: output = model(x.cuda(0)) acc = float(torch.sum(output.cpu().argmax(axis=1) == y)) tot_acc += acc adv_adv_acc = tot_acc / len(val_x) raw_model.eval() batch_tqdm = tqdm(valloader, leave=False) tot_acc = 0 for x, y in batch_tqdm: output = raw_model(x.cuda(0)) acc = float(torch.sum(output.cpu().argmax(axis=1) == y)) tot_acc += acc raw_raw_acc = tot_acc / len(val_x) val_x_adv = np.zeros_like(val_x) for i in tqdm(range(0, len(val_x_adv), BATCH_SIZE), desc='Noise: ', leave=False): val_x_adv[i:][:BATCH_SIZE] = noise_raw_op(val_x[i:][:BATCH_SIZE], 0.3) adv_valset = torch.utils.data.TensorDataset( torch.tensor(val_x_adv, dtype=torch.float), torch.tensor(val_y, dtype=torch.long)) adv_valloader = torch.utils.data.DataLoader(adv_valset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4) batch_tqdm = tqdm(adv_valloader, leave=False) tot_acc = 0 for x, y in batch_tqdm: output = raw_model(x.cuda(0)) acc = float(torch.sum(output.cpu().argmax(axis=1) == y)) tot_acc += acc raw_adv_acc = tot_acc / len(val_x) with open('black_eval_runs/acc.csv', 'a') as f: f.write(f'{adv_raw_acc},{adv_adv_acc},{raw_raw_acc},{raw_adv_acc}\n') print( f'adv {adv_raw_acc:.3f} -> {adv_adv_acc:.3f} | raw {raw_raw_acc:.3f} -> {raw_adv_acc:.3f}' ) torch.save(model.state_dict(), 'black_eval_runs/model.pt') torch.save(optimizer.state_dict(), 'black_eval_runs/optimizer.pt') torch.save(raw_model.state_dict(), 'black_eval_runs/raw_model.pt') torch.save(raw_optimizer.state_dict(), 'black_eval_runs/raw_optimizer.pt')
def fixed_max_confidence_recipe(sess, model, x, y, nb_classes, eps, clip_min, clip_max, eps_iter, nb_iter, report_path, batch_size=BATCH_SIZE): """A reasonable attack bundling recipe for a max norm threat model and a defender that uses confidence thresholding. References: https://openreview.net/forum?id=H1g0piA9tQ This version runs each attack a fixed number of times. It is more exhaustive than `single_run_max_confidence_recipe` but because it uses a fixed budget rather than running indefinitely it is more appropriate for making fair comparisons between two models. :param sess: tf.Session :param model: cleverhans.model.Model :param x: numpy array containing clean example inputs to attack :param y: numpy array containing true labels :param nb_classes: int, number of classes :param eps: float, maximum size of perturbation (measured by max norm) :param eps_iter: float, step size for one version of PGD attacks (will also run another version with 25X smaller step size) :param nb_iter: int, number of iterations for one version of PGD attacks (will also run another version with 25X more iterations) :param report_path: str, the path that the report will be saved to. :batch_size: int, the total number of examples to run simultaneously """ noise_attack = Noise(model, sess) pgd_attack = ProjectedGradientDescent(model, sess) threat_params = {"eps": eps, "clip_min": clip_min, "clip_max": clip_max} noise_attack_config = AttackConfig(noise_attack, threat_params) attack_configs = [noise_attack_config] pgd_attack_configs = [] pgd_params = copy.copy(threat_params) pgd_params["eps_iter"] = eps_iter pgd_params["nb_iter"] = nb_iter assert batch_size % num_devices == 0 dev_batch_size = batch_size // num_devices ones = tf.ones(dev_batch_size, tf.int32) expensive_pgd = [] for cls in range(nb_classes): cls_params = copy.copy(pgd_params) cls_params['y_target'] = tf.to_float(tf.one_hot( ones * cls, nb_classes)) cls_attack_config = AttackConfig(pgd_attack, cls_params, "pgd_" + str(cls)) pgd_attack_configs.append(cls_attack_config) expensive_params = copy.copy(cls_params) expensive_params["eps_iter"] /= 25. expensive_params["nb_iter"] *= 25. expensive_config = AttackConfig(pgd_attack, expensive_params, "expensive_pgd_" + str(cls)) expensive_pgd.append(expensive_config) attack_configs = [noise_attack_config] + pgd_attack_configs + expensive_pgd new_work_goal = {config: 5 for config in attack_configs} pgd_work_goal = {config: 5 for config in pgd_attack_configs} # TODO: lower priority: make sure bundler won't waste time running targeted # attacks on examples where the target class is the true class goals = [ Misclassify(new_work_goal={noise_attack_config: 50}), Misclassify(new_work_goal=pgd_work_goal), MaxConfidence(t=0.5, new_work_goal=new_work_goal), MaxConfidence(t=0.75, new_work_goal=new_work_goal), MaxConfidence(t=0.875, new_work_goal=new_work_goal), MaxConfidence(t=0.9375, new_work_goal=new_work_goal), MaxConfidence(t=0.96875, new_work_goal=new_work_goal), MaxConfidence(t=0.984375, new_work_goal=new_work_goal), MaxConfidence(t=1., new_work_goal=new_work_goal) ] bundle_attacks(sess, model, x, y, attack_configs, goals, report_path)
def basic_max_confidence_recipe(sess, model, x, y, nb_classes, eps, clip_min, clip_max, eps_iter, nb_iter, report_path, batch_size=BATCH_SIZE, eps_iter_small=None): """A reasonable attack bundling recipe for a max norm threat model and a defender that uses confidence thresholding. References: https://openreview.net/forum?id=H1g0piA9tQ This version runs indefinitely, updating the report on disk continuously. :param sess: tf.Session :param model: cleverhans.model.Model :param x: numpy array containing clean example inputs to attack :param y: numpy array containing true labels :param nb_classes: int, number of classes :param eps: float, maximum size of perturbation (measured by max norm) :param eps_iter: float, step size for one version of PGD attacks (will also run another version with eps_iter_small) :param nb_iter: int, number of iterations for one version of PGD attacks (will also run another version with 25X more iterations) :param report_path: str, the path that the report will be saved to. :batch_size: int, the total number of examples to run simultaneously :param eps_iter_small: optional, float. The second version of the PGD attack is run with 25 * nb_iter iterations and eps_iter_small step size. If eps_iter_small is not specified it is set to eps_iter / 25. """ noise_attack = Noise(model, sess) pgd_attack = ProjectedGradientDescent(model, sess) threat_params = {"eps": eps, "clip_min": clip_min, "clip_max": clip_max} noise_attack_config = AttackConfig(noise_attack, threat_params) attack_configs = [noise_attack_config] pgd_attack_configs = [] pgd_params = copy.copy(threat_params) pgd_params["eps_iter"] = eps_iter pgd_params["nb_iter"] = nb_iter assert batch_size % num_devices == 0 dev_batch_size = batch_size // num_devices ones = tf.ones(dev_batch_size, tf.int32) expensive_pgd = [] if eps_iter_small is None: eps_iter_small = eps_iter / 25. for cls in range(nb_classes): cls_params = copy.copy(pgd_params) cls_params['y_target'] = tf.to_float(tf.one_hot( ones * cls, nb_classes)) cls_attack_config = AttackConfig(pgd_attack, cls_params, "pgd_" + str(cls)) pgd_attack_configs.append(cls_attack_config) expensive_params = copy.copy(cls_params) expensive_params["eps_iter"] = eps_iter_small expensive_params["nb_iter"] *= 25. expensive_config = AttackConfig(pgd_attack, expensive_params, "expensive_pgd_" + str(cls)) expensive_pgd.append(expensive_config) attack_configs = [noise_attack_config] + pgd_attack_configs + expensive_pgd new_work_goal = {config: 5 for config in attack_configs} pgd_work_goal = {config: 5 for config in pgd_attack_configs} goals = [ Misclassify(new_work_goal={noise_attack_config: 50}), Misclassify(new_work_goal=pgd_work_goal), MaxConfidence(t=0.5, new_work_goal=new_work_goal), MaxConfidence(t=0.75, new_work_goal=new_work_goal), MaxConfidence(t=0.875, new_work_goal=new_work_goal), MaxConfidence(t=0.9375, new_work_goal=new_work_goal), MaxConfidence(t=0.96875, new_work_goal=new_work_goal), MaxConfidence(t=0.984375, new_work_goal=new_work_goal), MaxConfidence(t=1.) ] bundle_attacks(sess, model, x, y, attack_configs, goals, report_path)