def main(src_model_name): np.random.seed(0) tf.set_random_seed(0) flags.DEFINE_integer('BATCH_SIZE', 32, 'Size of batches') set_mnist_flags() x = K.placeholder( (None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS)) y = K.placeholder((None, FLAGS.NUM_CLASSES)) _, _, X_test, Y_test = data_mnist() # source model for crafting adversarial examples src_model = load_model(src_model_name) prediction = src_model(x) eval_params = {'batch_size': FLAGS.BATCH_SIZE} # error = tf_test_error_rate(src_model, x, X_test, Y_test) acc = model_eval(K.get_session(), x, y, prediction, X_test, Y_test, args=eval_params) print '{}: {:.3f}'.format(basename(src_model_name), acc)
def main(model_name, model_type): np.random.seed(0) assert keras.backend.backend() == "tensorflow" set_mnist_flags() flags.DEFINE_bool('NUM_EPOCHS', args.epochs, 'Number of epochs') # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist() # Initialize substitute training set reserved for adversary X_sub = X_test[:300] Y_sub = np.argmax(Y_test[:300], axis=1) # Redefine test set as remaining samples unavailable to adversaries X_test = X_test[300:] Y_test = Y_test[300:] x = K.placeholder((None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS )) y = K.placeholder(shape=(None, FLAGS.NUM_CLASSES)) # Load Black-Box model model = load_model(blackbox_name) prediction = model(x) train_sub_out = train_sub(K.get_session(), x, y, prediction, X_sub, Y_sub, nb_classes=FLAGS.NUM_CLASSES, nb_epochs_s=args.epochs, batch_size=FLAGS.BATCH_SIZE, learning_rate=0.001, data_aug=6, lmbda=0.1, model_type=model_type) model_sub, preds_sub = train_sub_out eval_params = { 'batch_size': FLAGS.BATCH_SIZE } # Finally print the result! # test_error = tf_test_error_rate(model_sub, x, X_test, Y_test) accuracy = model_eval(K.get_session(), x, y, preds_sub, X_test, Y_test, args=eval_params) print('Test accuracy of substitute on legitimate samples: %.3f%%' % accuracy) save_model(model_sub, model_name) json_string = model_sub.to_json() with open(model_name+'.json', 'wr') as f: f.write(json_string)
def main(model_name, model_type): np.random.seed(0) assert keras.backend.backend() == "tensorflow" set_mnist_flags() flags.DEFINE_bool('NUM_EPOCHS', args.epochs, 'Number of epochs') # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist() # Initialize substitute training set reserved for adversary X_sub = X_test[:150] Y_sub = np.argmax(Y_test[:150], axis=1) # Redefine test set as remaining samples unavailable to adversaries X_test = X_test[150:] Y_test = Y_test[150:] data_gen = data_gen_mnist(X_train) x = K.placeholder( (None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS)) y = K.placeholder(shape=(None, FLAGS.NUM_CLASSES)) model = model_mnist(type=model_type) # model = cnn_model() prediction = model(x) # Train an MNIST model # tf_train(x, y, model, X_train, Y_train, data_gen) train_params = { 'nb_epochs': args.epochs, 'batch_size': FLAGS.BATCH_SIZE, 'learning_rate': 0.001 } def evaluate_1(): eval_params = {'batch_size': FLAGS.BATCH_SIZE} test_accuracy = model_eval(K.get_session(), x, y, prediction, X_test, Y_test, args=eval_params) print('Test accuracy of blackbox on legitimate test ' 'examples: {:.3f}'.format(test_accuracy)) model_train(K.get_session(), x, y, model, X_train, Y_train, data_gen, evaluate=evaluate_1, args=train_params) save_model(model, model_name) json_string = model.to_json() with open(model_name + '.json', 'wr') as f: f.write(json_string) # Finally print the result! test_error = tf_test_error_rate(model, x, X_test, Y_test) print('Test error: %.1f%%' % test_error)
def main(attack, src_model_name, target_model_names): np.random.seed(0) tf.set_random_seed(0) flags.DEFINE_integer('BATCH_SIZE', 10, 'Size of batches') set_mnist_flags() x = K.placeholder( (None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS)) y = K.placeholder((None, FLAGS.NUM_CLASSES)) _, _, X_test, Y_test = data_mnist() # source model for crafting adversarial examples src_model = load_model(src_model_name) # model(s) to target target_models = [None] * len(target_model_names) for i in range(len(target_model_names)): target_models[i] = load_model(target_model_names[i]) # simply compute test error if attack == "test": err = tf_test_error_rate(src_model, x, X_test, Y_test) print '{}: {:.1f}'.format(basename(src_model_name), err) for (name, target_model) in zip(target_model_names, target_models): err = tf_test_error_rate(target_model, x, X_test, Y_test) print '{}: {:.1f}'.format(basename(name), err) return eps = args.eps # take the random step in the RAND+FGSM if attack == "rand_fgs": X_test = np.clip( X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0, 1.0) eps -= args.alpha logits = src_model(x) grad = gen_grad(x, logits, y) # FGSM and RAND+FGSM one-shot attack if attack in ["fgs", "rand_fgs"]: adv_x = symbolic_fgs(x, grad, eps=eps) # iterative FGSM if attack == "ifgs": adv_x = iter_fgs(src_model, x, y, steps=args.steps, eps=args.eps / args.steps) # Carlini & Wagner attack if attack == "CW": X_test = X_test[0:1000] Y_test = Y_test[0:1000] cli = CarliniLi(K.get_session(), src_model, targeted=False, confidence=args.kappa, eps=args.eps) X_adv = cli.attack(X_test, Y_test) r = np.clip(X_adv - X_test, -args.eps, args.eps) X_adv = X_test + r err = tf_test_error_rate(src_model, x, X_adv, Y_test) print '{}->{}: {:.1f}'.format(basename(src_model_name), basename(src_model_name), err) for (name, target_model) in zip(target_model_names, target_models): err = tf_test_error_rate(target_model, x, X_adv, Y_test) print '{}->{}: {:.1f}'.format(basename(src_model_name), basename(name), err) return # compute the adversarial examples and evaluate X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0] # white-box attack err = tf_test_error_rate(src_model, x, X_adv, Y_test) print '{}->{}: {:.1f}'.format(basename(src_model_name), basename(src_model_name), err) # black-box attack for (name, target_model) in zip(target_model_names, target_models): err = tf_test_error_rate(target_model, x, X_adv, Y_test) print '{}->{}: {:.1f}'.format(basename(src_model_name), basename(name), err)
type=int, default=40, help="Number of iterations") parser.add_argument("--beta", type=int, default=0.01, help="Step size per iteration") args = parser.parse_args() if '_un' in args.method: RANDOM = True PCA_FLAG = False if args.num_comp != 784: PCA_FLAG = True if '_iter' in args.method: BATCH_EVAL_NUM = 10 else: BATCH_EVAL_NUM = 10 # target_model_name = basename(args.target_model) set_mnist_flags() if RANDOM is False: for i in range(NUM_CLASSES): main(args.target_model, i) elif RANDOM is True: main(args.target_model)
def main(measures, src_model_names): np.random.seed(0) tf.set_random_seed(0) flags.DEFINE_integer('BATCH_SIZE', 32, 'Size of batches') set_mnist_flags() x = K.placeholder( (None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS)) y = K.placeholder((None, FLAGS.NUM_CLASSES)) X_train, Y_train, X_test, Y_test = data_mnist() # source model for crafting adversarial examples src_models = [None] * len(src_model_names) accuracy = [None] * len(src_model_names) for i in range(len(src_model_names)): src_models[i] = load_model(src_model_names[i]) if measures == "Q": X_test = X_test[0:100] Y_test = Y_test[0:100] N = len(X_test) k = len(src_model_names) Qij = [([None] * k) for p in range(k)] for i in range(k - 1): for j in range(i + 1, k): a = b = c = d = 0.0 for n in range(N): src_model_i = src_models[i] src_model_j = src_models[j] Ci = tf_compute_C(src_model_i, x, y, X_test[n:n + 1], Y_test[n:n + 1]) Cj = tf_compute_C(src_model_j, x, y, X_test[n:n + 1], Y_test[n:n + 1]) if (Ci[0] == 1 & Cj[0] == 1): a += 1 elif (Ci[0] == 0 & Cj[0] == 0): d += 1 elif (Ci[0] == 0 & Cj[0] == 1): c += 1 elif (Ci[0] == 1 & Cj[0] == 0): b += 1 print a, b, c, d Qij[i][j] = (a * d - b * c) / (a * d + b * c) Qij_SUM = 0.0 for i in range(k - 1): for j in range(i + 1, k): Qij_SUM += Qij[i][j] QAV = (2.0 / (k * (k - 1))) * Qij_SUM print('The value of the Q statistic: %.4f' % (QAV)) return if measures == "p": X_test = X_test[0:100] Y_test = Y_test[0:100] N = len(X_test) k = len(src_model_names) Pij = [([None] * k) for p in range(k)] for i in range(k - 1): for j in range(i + 1, k): a = b = c = d = 0.0 for n in range(N): src_model_i = src_models[i] src_model_j = src_models[j] Ci = tf_compute_C(src_model_i, x, y, X_test[n:n + 1], Y_test[n:n + 1]) Cj = tf_compute_C(src_model_j, x, y, X_test[n:n + 1], Y_test[n:n + 1]) if (Ci[0] == 1 & Cj[0] == 1): a += 1 elif (Ci[0] == 0 & Cj[0] == 0): d += 1 elif (Ci[0] == 0 & Cj[0] == 1): c += 1 elif (Ci[0] == 1 & Cj[0] == 0): b += 1 print a, b, c, d Pij[i][j] = (a * d - b * c) / math.sqrt( (a + b) * (a + c) * (b + d) * (d + c)) Pij_SUM = 0.0 for i in range(k - 1): for j in range(i + 1, k): Pij_SUM += Pij[i][j] PAV = (2.0 / (k * (k - 1))) * Pij_SUM print('The value of the correlation coefficient: %.4f' % (PAV)) return if measures == "Ent": X_test = X_test[0:100] Y_test = Y_test[0:100] k = len(src_model_names) N = len(X_test) num = 0 for i in range(N): lxt = 0 print i for (name, src_model) in zip(src_model_names, src_models): C = tf_compute_C(src_model, x, y, X_test[i:i + 1], Y_test[i:i + 1]) # lxt denote the number of substitutes that accurately recognize sample x. lxt += C[0] # lxt= 0,1,2,3 m = min(lxt, k - lxt) num += ((1.0 / (k - math.ceil(k / 2.0))) * m) Ent = (1.0 / N) * num print('The value of the entropy measure: %.4f' % (Ent)) return if measures == "KW": X_test = X_test[0:100] Y_test = Y_test[0:100] k = len(src_model_names) N = len(X_test) num = 0 for i in range(N): lxt = 0 print i for (name, src_model) in zip(src_model_names, src_models): C = tf_compute_C(src_model, x, y, X_test[i:i + 1], Y_test[i:i + 1]) # lxt denote the number of substitutes that accurately recognize sample x. lxt += C[0] # lxt= 0,1,2,3 num += (lxt * (k - lxt)) KW = (1.0 / (N * math.pow(k, 2))) * num print('The value of the Kohavi-Wolpert variance: %.4f' % (KW)) return if measures == "test": X_test = X_test[0:5] Y_test = Y_test[0:5] # display_leg_sample(X_test) for j in range(1): for (name, src_model) in zip(src_model_names, src_models): # the number of substitutes from D that correctly recognize X_test[j] num = tf_test_acc_num(src_model, x, y, X_test, Y_test) # output 1, 1, 1, 1, 1, 1 print num return
def main(attack, src_model_names, target_model_name): np.random.seed(0) tf.set_random_seed(0) flags.DEFINE_integer('BATCH_SIZE', 1, 'Size of batches') set_mnist_flags() dim = FLAGS.IMAGE_ROWS * FLAGS.IMAGE_COLS * FLAGS.NUM_CHANNELS x = K.placeholder( (None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS)) y = K.placeholder((None, FLAGS.NUM_CLASSES)) _, _, X_test, Y_test = data_mnist() Y_test_uncat = np.argmax(Y_test, axis=1) # source model for crafting adversarial examples src_models = [None] * len(src_model_names) for i in range(len(src_model_names)): src_models[i] = load_model(src_model_names[i]) src_model_name_joint = '' for i in range(len(src_models)): src_model_name_joint += basename(src_model_names[i]) # model(s) to target if target_model_name is not None: target_model = load_model(target_model_name) # simply compute test error if attack == "test": for (name, src_model) in zip(src_model_names, src_models): _, _, err = tf_test_error_rate(src_model, x, X_test, Y_test) print '{}: {:.1f}'.format(basename(name), err) if target_model_name is not None: _, _, err = tf_test_error_rate(target_model, x, X_test, Y_test) print '{}: {:.1f}'.format(basename(target_model_name), err) return if args.targeted_flag == 1: pickle_name = attack + '_' + src_model_name_joint + '_' + '_' + args.loss_type + '_targets.p' if os.path.exists(pickle_name): targets = pickle.load(open(pickle_name, 'rb')) else: targets = [] allowed_targets = list(range(FLAGS.NUM_CLASSES)) for i in range(len(Y_test)): allowed_targets.remove(Y_test_uncat[i]) targets.append(np.random.choice(allowed_targets)) allowed_targets = list(range(FLAGS.NUM_CLASSES)) # targets = np.random.randint(10, size = BATCH_SIZE*BATCH_EVAL_NUM) targets = np.array(targets) print targets targets_cat = np_utils.to_categorical( targets, FLAGS.NUM_CLASSES).astype(np.float32) Y_test = targets_cat if SAVE_FLAG == True: pickle.dump(Y_test, open(pickle_name, 'wb')) # take the random step in the RAND+FGSM if attack == "rand_fgs": X_test = np.clip( X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0, 1.0) eps -= args.alpha logits = [None] * len(src_model_names) for i in range(len(src_model_names)): curr_model = src_models[i] logits[i] = curr_model(x) if args.loss_type == 'xent': loss, grad = gen_grad_ens(x, logits, y) elif args.loss_type == 'cw': grad = gen_grad_cw(x, logits, y) if args.targeted_flag == 1: grad = -1.0 * grad for eps in eps_list: # FGSM and RAND+FGSM one-shot attack if attack in ["fgs", "rand_fgs"] and args.norm == 'linf': adv_x = symbolic_fgs(x, grad, eps=eps) elif attack in ["fgs", "rand_fgs"] and args.norm == 'l2': adv_x = symbolic_fg(x, grad, eps=eps) # iterative FGSM if attack == "ifgs": l = 1000 X_test = X_test[0:l] Y_test = Y_test[0:l] adv_x = x # iteratively apply the FGSM with small step size for i in range(args.num_iter): adv_logits = [None] * len(src_model_names) for i in range(len(src_model_names)): curr_model = src_models[i] adv_logits[i] = curr_model(adv_x) if args.loss_type == 'xent': loss, grad = gen_grad_ens(adv_x, adv_logits, y) elif args.loss_type == 'cw': grad = gen_grad_cw(adv_x, adv_logits, y) if args.targeted_flag == 1: grad = -1.0 * grad adv_x = symbolic_fgs(adv_x, grad, args.delta, True) r = adv_x - x r = K.clip(r, -eps, eps) adv_x = x + r adv_x = K.clip(adv_x, 0, 1) if attack == "CW_ens": l = 1000 pickle_name = attack + '_' + src_model_name_joint + '_' + str( args.eps) + '_adv.p' print(pickle_name) Y_test = Y_test[0:l] if os.path.exists(pickle_name) and attack == "CW_ens": print 'Loading adversarial samples' X_adv = pickle.load(open(pickle_name, 'rb')) for (name, src_model) in zip(src_model_names, src_models): preds_adv, _, err = tf_test_error_rate( src_model, x, X_adv, Y_test) print '{}->{}: {:.1f}'.format(src_model_name_joint, basename(name), err) preds_adv, _, err = tf_test_error_rate(target_model, x, X_adv, Y_test) print '{}->{}: {:.1f}'.format(src_model_name_joint, basename(target_model_name), err) return X_test = X_test[0:l] time1 = time() cli = CarliniLiEns(K.get_session(), src_models, targeted=False, confidence=args.kappa, eps=eps) X_adv = cli.attack(X_test, Y_test) r = np.clip(X_adv - X_test, -eps, eps) X_adv = X_test + r time2 = time() print("Run with Adam took {}s".format(time2 - time1)) if SAVE_FLAG == True: pickle.dump(X_adv, open(pickle_name, 'wb')) for (name, src_model) in zip(src_model_names, src_models): print('Carrying out white-box attack') pres, _, err = tf_test_error_rate(src_model, x, X_adv, Y_test) print '{}->{}: {:.1f}'.format(src_model_name_joint, basename(name), err) if target_model_name is not None: print('Carrying out black-box attack') preds, orig, err = tf_test_error_rate(target_model, x, X_adv, Y_test) print '{}->{}: {:.1f}'.format(src_model_name_joint, basename(target_model_name), err) return pickle_name = attack + '_' + src_model_name_joint + '_' + args.loss_type + '_' + str( eps) + '_adv.p' if args.targeted_flag == 1: pickle_name = attack + '_' + src_model_name_joint + '_' + args.loss_type + '_' + str( eps) + '_adv_t.p' if os.path.exists(pickle_name): print 'Loading adversarial samples' X_adv = pickle.load(open(pickle_name, 'rb')) else: print 'Generating adversarial samples' X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0] if SAVE_FLAG == True: pickle.dump(X_adv, open(pickle_name, 'wb')) avg_l2_perturb = np.mean( np.linalg.norm((X_adv - X_test).reshape(len(X_test), dim), axis=1)) # white-box attack l = len(X_adv) print('Carrying out white-box attack') for (name, src_model) in zip(src_model_names, src_models): preds_adv, orig, err = tf_test_error_rate(src_model, x, X_adv, Y_test[0:l]) if args.targeted_flag == 1: err = 100.0 - err print '{}->{}: {:.1f}'.format(basename(name), basename(name), err) # black-box attack if target_model_name is not None: print('Carrying out black-box attack') preds, _, err = tf_test_error_rate(target_model, x, X_adv, Y_test) if args.targeted_flag == 1: err = 100.0 - err print '{}->{}: {:.1f}, {}, {} {}'.format( src_model_name_joint, basename(target_model_name), err, avg_l2_perturb, eps, attack)
def main(attack, src_model_name, target_model_names): np.random.seed(0) tf.set_random_seed(0) flags.DEFINE_integer('BATCH_SIZE', 32, 'Size of batches') set_mnist_flags() x = K.placeholder( (None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS)) y = K.placeholder((None, FLAGS.NUM_CLASSES)) _, _, X_test, Y_test = data_mnist() # source model for crafting adversarial examples src_model = load_model(src_model_name) # model(s) to target target_models = [None] * len(target_model_names) for i in range(len(target_model_names)): target_models[i] = load_model(target_model_names[i]) # simply compute test error if attack == "test": err = tf_test_error_rate(src_model, x, X_test, Y_test) print '{}: {:.3f}'.format(basename(src_model_name), err) for (name, target_model) in zip(target_model_names, target_models): err = tf_test_error_rate(target_model, x, X_test, Y_test) print '{}: {:.3f}'.format(basename(name), err) return eps = args.eps # take the random step in the RAND+FGSM if attack == "rand_fgs": X_test = np.clip( X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0, 1.0) eps -= args.alpha logits = src_model(x) grad = gen_grad(x, logits, y) # FGSM and RAND+FGSM one-shot attack if attack in ["fgs", "rand_fgs"]: adv_x = symbolic_fgs(x, grad, eps=eps) # iterative FGSM if attack == "ifgs": adv_x = iter_fgs(src_model, x, y, steps=args.steps, eps=args.eps / args.steps) # Carlini & Wagner attack if attack == "CW": X_test = X_test[0:1000] Y_test = Y_test[0:1000] cli = CarliniLi(K.get_session(), src_model, targeted=False, confidence=args.kappa, eps=args.eps) X_adv = cli.attack(X_test, Y_test) r = np.clip(X_adv - X_test, -args.eps, args.eps) X_adv = X_test + r err = tf_test_error_rate(src_model, x, X_adv, Y_test) print '{}->{}: {:.3f}'.format(basename(src_model_name), basename(src_model_name), err) for (name, target_model) in zip(target_model_names, target_models): err = tf_test_error_rate(target_model, x, X_adv, Y_test) print '{}->{}: {:.3f}'.format(basename(src_model_name), basename(name), err) return if attack == "cascade_ensemble": X_test = np.clip( X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0, 1.0) eps -= args.alpha sub_model_ens = (sub_model_1, sub_model_2, sub_model_3, sub_model_4, sub_model_5, sub_model_6, sub_model_7) sub_models = [None] * len(sub_model_ens) for i in range(len(sub_model_ens)): sub_models[i] = load_model(sub_model_ens[i]) adv_x = x for j in range(args.steps): for i, m in enumerate(sub_models + [src_model]): logits = m(adv_x) gradient = gen_grad(adv_x, logits, y) adv_x = symbolic_fgs(adv_x, gradient, eps=args.eps / args.steps, clipping=True) if attack == "parallel_ensemble": X_test = np.clip( X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0, 1.0) eps -= args.alpha sub_model_ens = (sub_model_1, sub_model_2, sub_model_3) sub_models = [None] * len(sub_model_ens) for i in range(len(sub_model_ens)): sub_models[i] = load_model(sub_model_ens[i]) x_advs = [([None] * len(sub_models)) for i in range(args.steps)] print x_advs x_adv = x for j in range(args.steps): for i, m in enumerate(sub_models): logits = m(x_adv) gradient = gen_grad(x_adv, logits, y) x_adv = symbolic_fgs(x_adv, gradient, eps=args.eps / args.steps, clipping=True) x_advs[j][i] = x_adv print x_advs adv_x_mean = x_advs[0][0] for j in range(args.steps): for i in range(len(sub_models)): if j == 0 and i == 0: continue adv_x_mean = adv_x_mean + x_advs[j][i] xadv = adv_x_mean / (args.steps * len(sub_models)) preds = src_model(xadv) grads = gen_grad(xadv, preds, y) adv_x = symbolic_fgs(xadv, grads, eps=args.eps, clipping=True) # compute the adversarial examples and evaluate X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0] # white-box attack err = tf_test_error_rate(src_model, x, X_adv, Y_test) print '{}->{}: {:.3f}'.format(basename(src_model_name), basename(src_model_name), err) # black-box attack for (name, target_model) in zip(target_model_names, target_models): err = tf_test_error_rate(target_model, x, X_adv, Y_test) print '{}->{}: {:.3f}'.format(basename(src_model_name), basename(name), err)