示例#1
0
def data_setup():
    args = gv.args
    if 'MNIST' in args.dataset:
        X_train, Y_train, X_test, Y_test = data_mnist()
        Y_test_uncat = np.argmax(Y_test, axis=1)
        print('Loaded f/MNIST data')
    elif args.dataset == 'CIFAR-10':
        (X_train, Y_train_uncat), (X_test, Y_test_uncat) = cifar10.load_data()

        # Convert class vectors to binary class matrices.
        Y_train = np_utils.to_categorical(Y_train_uncat, gv.NUM_CLASSES)
        Y_test = np_utils.to_categorical(Y_test_uncat, gv.NUM_CLASSES)

        X_train = X_train.astype('float32')
        X_test = X_test.astype('float32')

        # subtract mean and normalize
        mean_image = np.mean(X_train, axis=0)
        X_train -= mean_image
        X_test -= mean_image
        X_train /= 128.
        X_test /= 128.
        print('Loaded CIFAR-10 data')
    elif args.dataset == 'census':
        X_train, Y_train, X_test, Y_test = data_census()
        Y_test_uncat = np.argmax(Y_test, axis=1)
        print(Y_test)
        print(Y_test_uncat)
        print('Loaded Census data')

    return X_train, Y_train, X_test, Y_test, Y_test_uncat
示例#2
0
def main():
    x = K.placeholder((None, 28, 28, 1))

    model = load_model(args.model)
    logits = model(x)
    prediction = K.softmax(logits)

    if args.mnist:
        _, _, X, Y = data_mnist(one_hot=False)
        accuracy = get_accuracy(X, Y, prediction, x)
        output_results(accuracy)

    elif args.n is None:
        with np.load(args.dataset) as data:
            X = data['drawings'] / 255
            Y = data['Y'].reshape(-1, )

        accuracy = get_accuracy(X, Y, prediction, x)
        output_results(accuracy)

    else:
        result = []

        for i in tqdm(range(1, args.n + 1)):
            with np.load(args.dataset % i) as data:
                X = data['drawings'] / 255
                Y = data['Y'].reshape(-1, )

            predictions = \
                K.get_session().run([prediction], feed_dict={x: X, K.learning_phase(): 0})[0]
            argmax = np.argmax(predictions, axis=1)

            if args.attack and not args.targeted:
                equal = argmax != Y
            else:
                equal = argmax == Y

            accuracy = np.mean(equal)
            result.append(accuracy)

        print(result)
def main(attack, src_model_name, target_model_name):
    np.random.seed(0)
    tf.set_random_seed(0)

    dim = 28 * 28 * 1

    x = K.placeholder((None,
                       28,
                       28,
                       1))

    y = K.placeholder((None, 10))

    _, _, X_test, Y_test = data_mnist()
    Y_test_uncat = np.argmax(Y_test, axis=1)

    # source model for crafting adversarial examples
    src_model = load_model(src_model_name)

    # model(s) to target
    target_model = load_model(target_model_name)

    # simply compute test error
    if attack == "test":
        _, _, err = tf_test_error_rate(src_model, x, X_test, Y_test)
        print('{}: {:.1f}'.format(basename(src_model_name), err))
        _, _, err = tf_test_error_rate(target_model, x, X_test, Y_test)
        print('{}: {:.1f}'.format(basename(target_model_name), err))

        return

    if args.targeted_flag == 1:
        targets = []
        allowed_targets = list(range(10))
        for i in range(len(Y_test)):
            allowed_targets.remove(Y_test_uncat[i])
            targets.append(np.random.choice(allowed_targets))
            allowed_targets = list(range(10))
        targets = np.array(targets)
        print(targets)
        targets_cat = np_utils.to_categorical(targets, 10).astype(np.float32)
        Y_test = targets_cat

    logits = src_model(x)
    print('logits', logits)

    if args.loss_type == 'xent':
        loss, grad = gen_grad_ens(x, logits, y)
        assert grad is not None
    elif args.loss_type == 'cw':
        grad = gen_grad_cw(x, logits, y)
    if args.targeted_flag == 1:
        grad = -1.0 * grad

    for eps in eps_list:
        # FGSM and RAND+FGSM one-shot attack
        if attack in ["fgs", "rand_fgs"] and args.norm == 'linf':
            assert grad is not None
            adv_x = symbolic_fgs(x, grad, eps=eps)
        elif attack in ["fgs", "rand_fgs"] and args.norm == 'l2':
            adv_x = symbolic_fg(x, grad, eps=eps)

        # iterative FGSM
        if attack == "ifgs":
            l = 1000
            X_test = X_test[0:l]
            Y_test = Y_test[0:l]

            adv_x = x
            # iteratively apply the FGSM with small step size
            for i in range(args.num_iter):
                adv_logits = src_model(adv_x)

                if args.loss_type == 'xent':
                    loss, grad = gen_grad_ens(adv_x, adv_logits, y)
                elif args.loss_type == 'cw':
                    grad = gen_grad_cw(adv_x, adv_logits, y)
                if args.targeted_flag == 1:
                    grad = -1.0 * grad

                adv_x = symbolic_fgs(adv_x, grad, args.delta, True)
                r = adv_x - x
                r = K.clip(r, -eps, eps)
                adv_x = x + r

            adv_x = K.clip(adv_x, 0, 1)

        print('Generating adversarial samples')
        X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0]

        avg_l2_perturb = np.mean(np.linalg.norm((X_adv - X_test).reshape(len(X_test), dim), axis=1))

        # white-box attack
        l = len(X_adv)
        print('Carrying out white-box attack')
        preds_adv, orig, err = tf_test_error_rate(src_model, x, X_adv, Y_test[0:l])
        if args.targeted_flag == 1:
            err = 100.0 - err
        print('{}->{}: {:.1f}'.format(src_model_name, src_model_name, err))

        # black-box attack
        if target_model_name is not None:
            print('Carrying out black-box attack')
            preds, _, err = tf_test_error_rate(target_model, x, X_adv, Y_test)
            if args.targeted_flag == 1:
                err = 100.0 - err
            print('{}->{}: {:.1f}, {}, {} {}'.format(src_model_name,
                                                     basename(target_model_name), err,
                                                     avg_l2_perturb, eps, attack))
示例#4
0
def main(attack, src_model_name, target_model_names):
    np.random.seed(0)
    tf.set_random_seed(0)

    flags.DEFINE_integer('BATCH_SIZE', 10, 'Size of batches')
    set_mnist_flags()

    x = K.placeholder(
        (None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS))

    y = K.placeholder((None, FLAGS.NUM_CLASSES))

    _, _, X_test, Y_test = data_mnist()

    # source model for crafting adversarial examples
    src_model = load_model(src_model_name)

    # model(s) to target
    target_models = [None] * len(target_model_names)
    for i in range(len(target_model_names)):
        target_models[i] = load_model(target_model_names[i])

    # simply compute test error
    if attack == "test":
        err = tf_test_error_rate(src_model, x, X_test, Y_test)
        print '{}: {:.1f}'.format(basename(src_model_name), err)

        for (name, target_model) in zip(target_model_names, target_models):
            err = tf_test_error_rate(target_model, x, X_test, Y_test)
            print '{}: {:.1f}'.format(basename(name), err)
        return

    eps = args.eps

    # take the random step in the RAND+FGSM
    if attack == "rand_fgs":
        X_test = np.clip(
            X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0,
            1.0)
        eps -= args.alpha

    logits = src_model(x)
    grad = gen_grad(x, logits, y)

    # FGSM and RAND+FGSM one-shot attack
    if attack in ["fgs", "rand_fgs"]:
        adv_x = symbolic_fgs(x, grad, eps=eps)

    # iterative FGSM
    if attack == "ifgs":
        adv_x = iter_fgs(src_model,
                         x,
                         y,
                         steps=args.steps,
                         eps=args.eps / args.steps)

    # Carlini & Wagner attack
    if attack == "CW":
        X_test = X_test[0:1000]
        Y_test = Y_test[0:1000]

        cli = CarliniLi(K.get_session(),
                        src_model,
                        targeted=False,
                        confidence=args.kappa,
                        eps=args.eps)

        X_adv = cli.attack(X_test, Y_test)

        r = np.clip(X_adv - X_test, -args.eps, args.eps)
        X_adv = X_test + r

        err = tf_test_error_rate(src_model, x, X_adv, Y_test)
        print '{}->{}: {:.1f}'.format(basename(src_model_name),
                                      basename(src_model_name), err)

        for (name, target_model) in zip(target_model_names, target_models):
            err = tf_test_error_rate(target_model, x, X_adv, Y_test)
            print '{}->{}: {:.1f}'.format(basename(src_model_name),
                                          basename(name), err)

        return

    # compute the adversarial examples and evaluate
    X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0]

    # white-box attack
    err = tf_test_error_rate(src_model, x, X_adv, Y_test)
    print '{}->{}: {:.1f}'.format(basename(src_model_name),
                                  basename(src_model_name), err)

    # black-box attack
    for (name, target_model) in zip(target_model_names, target_models):
        err = tf_test_error_rate(target_model, x, X_adv, Y_test)
        print '{}->{}: {:.1f}'.format(basename(src_model_name), basename(name),
                                      err)
def main(target_model_name, target=None):
    np.random.seed(0)
    tf.set_random_seed(0)

    x = K.placeholder((None, IMAGE_ROWS, IMAGE_COLS, NUM_CHANNELS))

    y = K.placeholder((None, NUM_CLASSES))

    dim = int(IMAGE_ROWS * IMAGE_COLS)

    _, _, X_test_ini, Y_test = data_mnist()
    print('Loaded data')

    Y_test_uncat = np.argmax(Y_test, axis=1)

    # target model for crafting adversarial examples
    target_model = load_model(target_model_name)
    target_model_name = basename(target_model_name)

    logits = target_model(x)
    prediction = K.softmax(logits)

    sess = tf.Session()
    print('Creating session')

    if '_un' in args.method:
        targets = np.argmax(Y_test[:BATCH_SIZE * BATCH_EVAL_NUM], 1)
    elif RANDOM is False:
        targets = np.array([target] * (BATCH_SIZE * BATCH_EVAL_NUM))
    elif RANDOM is True:
        targets = []
        allowed_targets = list(range(NUM_CLASSES))
        for i in range(BATCH_SIZE * BATCH_EVAL_NUM):
            allowed_targets.remove(Y_test_uncat[i])
            targets.append(np.random.choice(allowed_targets))
            allowed_targets = list(range(NUM_CLASSES))
        # targets = np.random.randint(10, size = BATCH_SIZE*BATCH_EVAL_NUM)
        targets = np.array(targets)
        print targets
    targets_cat = np_utils.to_categorical(targets,
                                          NUM_CLASSES).astype(np.float32)

    if args.norm == 'linf':
        # eps_list = list(np.linspace(0.025, 0.1, 4))
        # eps_list.extend(np.linspace(0.15, 0.5, 8))
        eps_list = [0.3]
        if "_iter" in args.method:
            eps_list = [0.3]
    elif args.norm == 'l2':
        eps_list = list(np.linspace(0.0, 2.0, 5))
        eps_list.extend(np.linspace(2.5, 9.0, 14))
        # eps_list = [5.0]
    print(eps_list)

    random_perturb = np.random.randn(*X_test_ini.shape)

    if args.norm == 'linf':
        random_perturb_signed = np.sign(random_perturb)
        X_test = np.clip(X_test_ini + args.alpha * random_perturb_signed,
                         CLIP_MIN, CLIP_MAX)
    elif args.norm == 'l2':
        random_perturb_unit = random_perturb / np.linalg.norm(
            random_perturb.reshape(curr_len, dim), axis=1)[:, None, None, None]
        X_test = np.clip(X_test_ini + args.alpha * random_perturb_unit,
                         CLIP_MIN, CLIP_MAX)

    for eps in eps_list:
        if '_iter' in args.method:
            white_box_fgsm_iter(prediction, target_model, x, logits, y, X_test,
                                X_test_ini, Y_test_uncat, targets, targets_cat,
                                eps, dim, args.beta)
            estimated_grad_attack_iter(X_test, X_test_ini, x, targets,
                                       prediction, logits, eps, dim, args.beta)
        else:
            white_box_fgsm(prediction, target_model, x, logits, y, X_test,
                           X_test_ini, Y_test_uncat, targets, targets_cat, eps,
                           dim)
示例#6
0
def main(target_model_name):
    np.random.seed(0)
    tf.set_random_seed(0)

    x = K.placeholder(
        (None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS))

    y = K.placeholder((None, FLAGS.NUM_CLASSES))

    dim = int(FLAGS.IMAGE_ROWS * FLAGS.IMAGE_COLS * FLAGS.NUM_CHANNELS)

    _, _, X_test, Y_test = data_mnist()
    print('Loaded data')

    # target model for crafting adversarial examples
    target_model = load_model(target_model_name)
    target_model_name = basename(target_model_name)

    logits = target_model(x)
    prediction = K.softmax(logits)

    sess = tf.Session()
    print('Creating session')

    Y_test_uncat = np.argmax(Y_test, 1)

    means, class_frac = class_means(X_test, Y_test_uncat)

    scales, mean_dists, closest_means = length_scales(X_test, Y_test_uncat)

    if args.norm == 'linf':
        eps_list = list(np.linspace(0.0, 0.1, 5))
        eps_list.extend(np.linspace(0.2, 0.5, 7))
    elif args.norm == 'l2':
        eps_list = list(np.linspace(0.0, 9.0, 28))

    for eps in eps_list:
        eps_orig = eps
        if args.alpha > eps:
            alpha = eps
            eps = 0
        elif eps >= args.alpha:
            alpha = args.alpha
            eps -= args.alpha

        adv_success = 0.0
        avg_l2_perturb = 0.0
        for i in range(FLAGS.NUM_CLASSES):
            curr_indices = np.where(Y_test_uncat == i)
            X_test_ini = X_test[curr_indices]
            Y_test_curr = Y_test_uncat[curr_indices]
            curr_len = len(X_test_ini)
            if args.targeted_flag == 1:
                allowed_targets = list(range(FLAGS.NUM_CLASSES))
                allowed_targets.remove(i)

            random_perturb = np.random.randn(*X_test_ini.shape)

            if args.norm == 'linf':
                random_perturb_signed = np.sign(random_perturb)
                X_test_curr = np.clip(
                    X_test_ini + alpha * random_perturb_signed, CLIP_MIN,
                    CLIP_MAX)
            elif args.norm == 'l2':
                random_perturb_unit = random_perturb / np.linalg.norm(
                    random_perturb.reshape(curr_len, dim), axis=1)[:, None,
                                                                   None, None]
                X_test_curr = np.clip(X_test_ini + alpha * random_perturb_unit,
                                      CLIP_MIN, CLIP_MAX)

            if args.targeted_flag == 0:
                closest_class = int(closest_means[i])
                mean_diff_vec = means[closest_class] - means[i]
            elif args.targeted_flag == 1:
                targets = []
                mean_diff_array = np.zeros(
                    (curr_len, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS,
                     FLAGS.NUM_CHANNELS))
                for j in range(curr_len):
                    target = np.random.choice(allowed_targets)
                    targets.append(target)
                    mean_diff_array[j] = means[target] - means[i]

            if args.norm == 'linf':
                if args.targeted_flag == 0:
                    mean_diff_vec_signed = np.sign(mean_diff_vec)
                    perturb = eps * mean_diff_vec_signed
                elif args.targeted_flag == 1:
                    mean_diff_array_signed = np.sign(mean_diff_array)
                    perturb = eps * mean_diff_array_signed
            elif args.norm == 'l2':
                mean_diff_vec_unit = mean_diff_vec / np.linalg.norm(
                    mean_diff_vec.reshape(dim))
                perturb = eps * mean_diff_vec_unit

            X_adv = np.clip(X_test_curr + perturb, CLIP_MIN, CLIP_MAX)

            # Getting the norm of the perturbation
            perturb_norm = np.linalg.norm(
                (X_adv - X_test_ini).reshape(curr_len, dim), axis=1)
            perturb_norm_batch = np.mean(perturb_norm)
            avg_l2_perturb += perturb_norm_batch

            predictions_adv = K.get_session().run([prediction],
                                                  feed_dict={
                                                      x: X_adv,
                                                      K.learning_phase(): 0
                                                  })[0]

            if args.targeted_flag == 0:
                adv_success += np.sum(
                    np.argmax(predictions_adv, 1) != Y_test_curr)
            elif args.targeted_flag == 1:
                print(targets)
                adv_success += np.sum(
                    np.argmax(predictions_adv, 1) == np.array(targets))

        err = 100.0 * adv_success / len(X_test)
        avg_l2_perturb = avg_l2_perturb / FLAGS.NUM_CLASSES

        print('{}, {}, {}'.format(eps, alpha, err))
        print('{}'.format(avg_l2_perturb))
target_model_name = basename(args.target_model)

set_mnist_flags()

np.random.seed(0)
tf.set_random_seed(0)

x = K.placeholder(
    (None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS))

y = K.placeholder((None, FLAGS.NUM_CLASSES))

dim = int(FLAGS.IMAGE_ROWS * FLAGS.IMAGE_COLS)

_, _, X_test, Y_test = data_mnist()
print('Loaded data')

# target model for crafting adversarial examples
target_model = load_model(args.target_model)
target_model_name = basename(target_model_name)

logits = target_model(x)
prediction = K.softmax(logits)

sess = tf.Session()
print('Creating session')

targets = np.argmax(Y_test[:BATCH_SIZE * BATCH_EVAL_NUM], 1)
# elif RANDOM is False:
#     targets = np.array([target]*(BATCH_SIZE*BATCH_EVAL_NUM))
    RANDOM = True
PCA_FLAG=False
if args.num_comp is not None:
    PCA_FLAG=True

if '_iter' in args.method:
    BATCH_EVAL_NUM = 10
else:
    BATCH_EVAL_NUM = 1

CLIP_MIN = 0.0

if args.dataset == 'MNIST':
    target_model_name = basename(args.target_model)
    
    _, _, X_test_ini, Y_test = data_mnist()
    X_test = X_test_ini
    print('Loaded data')

    IMAGE_ROWS = 28
    IMAGE_COLS = 28
    NUM_CHANNELS = 1
    NUM_CLASSES = 10

    CLIP_MAX = 1.0

    if args.norm == 'linf':
        eps_list = list(np.linspace(0.0, 0.1, 3))
        eps_list.extend(np.linspace(0.1, 0.5, 9))
        # eps_list = [0.3]
        if "_iter" in args.method:
示例#9
0
def main(target_model_name):
    redraw = False
    save_adv_samples_only = True

    np.random.seed(0)
    tf.set_random_seed(0)

    x = K.placeholder((None,
                       28,
                       28,
                       1))

    y = K.placeholder((None, 10))

    dim = int(28 * 28 * 1)

    _, _, X_test, Y_test = data_mnist()
    print('Loaded data')

    # target model for crafting adversarial examples
    target_model = load_model(target_model_name)
    target_model_name = basename(target_model_name)

    logits = target_model(x)
    prediction = K.softmax(logits)

    sess = tf.Session()
    print('Creating session')

    Y_test_uncat = np.argmax(Y_test, 1)

    means, class_frac = class_means(X_test, Y_test_uncat)

    scales, mean_dists, closest_means = length_scales(X_test, Y_test_uncat)

    eps_list = [args.eps]

    adv_images = np.empty((10 * 28, len(eps_list) * 28))

    total_X_adv = np.empty(X_test.shape)
    total_Y = np.empty(Y_test_uncat.shape)
    total_adv_pred = np.empty(Y_test_uncat.shape)
    total_adv_prob = np.empty(Y_test_uncat.shape)

    for eps_idx, eps in tqdm(enumerate(eps_list)):
        eps_orig = eps
        if args.alpha > eps:
            alpha = eps
            eps = 0
        elif eps >= args.alpha:
            alpha = args.alpha
            eps -= args.alpha

        adv_success = 0.0
        avg_l2_perturb = 0.0
        NUM_SAVED = 0
        for i in tqdm(range(10)):
            curr_indices = np.where(Y_test_uncat == i)
            NUM_SAMPLES = len(curr_indices[0])
            X_test_ini = X_test[curr_indices]
            Y_test_curr = Y_test_uncat[curr_indices]
            curr_len = len(X_test_ini)
            if args.targeted_flag == 1:
                allowed_targets = list(range(10))
                allowed_targets.remove(i)

            random_perturb = np.random.randn(*X_test_ini.shape)

            if args.norm == 'linf':
                random_perturb_signed = np.sign(random_perturb)
                X_test_curr = np.clip(X_test_ini + alpha * random_perturb_signed, CLIP_MIN,
                                      CLIP_MAX)
            elif args.norm == 'l2':
                random_perturb_unit = random_perturb / np.linalg.norm(
                    random_perturb.reshape(curr_len, dim), axis=1)[:,
                                                       None, None, None]
                X_test_curr = np.clip(X_test_ini + alpha * random_perturb_unit, CLIP_MIN, CLIP_MAX)

            if args.targeted_flag == 0:
                closest_class = int(closest_means[i])
                mean_diff_vec = means[closest_class] - means[i]
            elif args.targeted_flag == 1:
                targets = []
                mean_diff_array = np.zeros(
                    (curr_len, 28, 28, 1))
                for j in range(curr_len):
                    target = np.random.choice(allowed_targets)
                    targets.append(target)
                    mean_diff_array[j] = means[target] - means[i]

            if args.norm == 'linf':
                if args.targeted_flag == 0:
                    mean_diff_vec_signed = np.sign(mean_diff_vec)
                    perturb = eps * mean_diff_vec_signed
                elif args.targeted_flag == 1:
                    mean_diff_array_signed = np.sign(mean_diff_array)
                    perturb = eps * mean_diff_array_signed
            elif args.norm == 'l2':
                mean_diff_vec_unit = mean_diff_vec / np.linalg.norm(mean_diff_vec.reshape(dim))
                perturb = eps * mean_diff_vec_unit

            X_adv = np.clip(X_test_curr + perturb, CLIP_MIN, CLIP_MAX)

            assert X_adv.shape[1:] == total_X_adv.shape[1:], f'X_adv.shape[1:] = {X_adv.shape[
                                                                                  1:]}, total_X_adv.shape[1:] = {total_X_adv.shape[
                                                                                                                 1:]}'
            assert X_adv.shape[0] == NUM_SAMPLES, f'X_adv.shape[0] = {X_adv.shape[0]}'
            total_X_adv[NUM_SAVED:NUM_SAVED + NUM_SAMPLES] = X_adv

            # sample_x_adv = save_sample(X_adv, eps, i)

            if redraw:
                sample_x_adv = draw(
                    images=sample_x_adv.reshape(
                        (1, sample_x_adv.shape[0], sample_x_adv.shape[1], sample_x_adv.shape[2])),
                    n=10,
                    alpha=0.8,
                    background='000000'
                )[0]

            # row_start = i * 28
            # col_start = eps_idx * 28
            # no_channels_x = sample_x_adv.reshape(28, 28)
            # adv_images[row_start:row_start + 28,
            # col_start: col_start + 28] = no_channels_x

            # Getting the norm of the perturbation
            perturb_norm = np.linalg.norm((X_adv - X_test_ini).reshape(curr_len, dim), axis=1)
            perturb_norm_batch = np.mean(perturb_norm)
            avg_l2_perturb += perturb_norm_batch

            predictions_adv = \
                K.get_session().run([prediction], feed_dict={x: X_adv, K.learning_phase(): 0})[0]

            total_adv_pred[NUM_SAVED:NUM_SAVED + NUM_SAMPLES] = np.argmax(predictions_adv, axis=1)
            total_adv_prob[NUM_SAVED:NUM_SAVED + NUM_SAMPLES] = np.max(predictions_adv, axis=1)

            if args.targeted_flag == 0:
                adv_success += np.sum(np.argmax(predictions_adv, 1) != Y_test_curr)
                assert Y_test_curr.shape[1:] == total_Y.shape[
                                                1:], f'Y_test_curr.shape[1:] = {Y_test_curr.shape[
                                                                                1:]}, total_Y.shape[1:] = {total_Y.shape[
                                                                                                           1:]}'
                assert Y_test_curr.shape[0] == NUM_SAMPLES, f'Y_test_curr.shape[0] = {
                Y_test_curr.shape[0]}'
                total_Y[NUM_SAVED:NUM_SAVED + NUM_SAMPLES] = Y_test_curr
            elif args.targeted_flag == 1:
                targets_arr = np.array(targets)
                adv_success += np.sum(np.argmax(predictions_adv, 1) == targets_arr)
                assert targets_arr.shape[1:] == total_Y.shape[
                                                1:], f'targets_arr.shape[1:] = {targets_arr.shape[
                                                                                1:]}, total_Y.shape[1:] = {total_Y.shape[
                                                                                                           1:]}'
                assert targets_arr.shape[0] == NUM_SAMPLES, f'targets_arr.shape[0] = {
                targets_arr.shape[0]}'
                total_Y[NUM_SAVED:NUM_SAVED + NUM_SAMPLES] = targets_arr

            NUM_SAVED += NUM_SAMPLES

        err = 100.0 * adv_success / len(X_test)
        avg_l2_perturb = avg_l2_perturb / 10

        print(f'eps = {eps}, alpha = {alpha}, adv success = {err}')
        print(f'avg l2 pertub = {avg_l2_perturb}')

    if redraw:
        scipy.misc.imsave('baseline_attacks_redrawned.png', adv_images)
    else:
        # save_total(adv_images, avg_l2_perturb, err)
        file = os.path.join(ADVERSARIAL_DATA_PATH,
                            f'baseline-norm-{args.norm}-alpha-{args.alpha}-targeted-{args.targeted_flag}-adv-samples')
        np.savez(file=file, X=total_X_adv, Y=total_Y, pred=total_adv_pred, prob=total_adv_prob)
def main(attack, src_model_names, target_model_name):
    np.random.seed(0)
    tf.set_random_seed(0)

    flags.DEFINE_integer('BATCH_SIZE', 1, 'Size of batches')
    set_mnist_flags()

    dim = FLAGS.IMAGE_ROWS * FLAGS.IMAGE_COLS * FLAGS.NUM_CHANNELS

    x = K.placeholder(
        (None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS))

    y = K.placeholder((None, FLAGS.NUM_CLASSES))

    _, _, X_test, Y_test = data_mnist()
    Y_test_uncat = np.argmax(Y_test, axis=1)

    # source model for crafting adversarial examples
    src_models = [None] * len(src_model_names)
    for i in range(len(src_model_names)):
        src_models[i] = load_model(src_model_names[i])

    src_model_name_joint = ''
    for i in range(len(src_models)):
        src_model_name_joint += basename(src_model_names[i])

    # model(s) to target
    if target_model_name is not None:
        target_model = load_model(target_model_name)

    # simply compute test error
    if attack == "test":
        for (name, src_model) in zip(src_model_names, src_models):
            _, _, err = tf_test_error_rate(src_model, x, X_test, Y_test)
            print '{}: {:.1f}'.format(basename(name), err)
        if target_model_name is not None:
            _, _, err = tf_test_error_rate(target_model, x, X_test, Y_test)
        print '{}: {:.1f}'.format(basename(target_model_name), err)

        return

    if args.targeted_flag == 1:
        pickle_name = attack + '_' + src_model_name_joint + '_' + '_' + args.loss_type + '_targets.p'
        if os.path.exists(pickle_name):
            targets = pickle.load(open(pickle_name, 'rb'))
        else:
            targets = []
            allowed_targets = list(range(FLAGS.NUM_CLASSES))
            for i in range(len(Y_test)):
                allowed_targets.remove(Y_test_uncat[i])
                targets.append(np.random.choice(allowed_targets))
                allowed_targets = list(range(FLAGS.NUM_CLASSES))
            # targets = np.random.randint(10, size = BATCH_SIZE*BATCH_EVAL_NUM)
            targets = np.array(targets)
            print targets
            targets_cat = np_utils.to_categorical(
                targets, FLAGS.NUM_CLASSES).astype(np.float32)
            Y_test = targets_cat
            if SAVE_FLAG == True:
                pickle.dump(Y_test, open(pickle_name, 'wb'))

    # take the random step in the RAND+FGSM
    if attack == "rand_fgs":
        X_test = np.clip(
            X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0,
            1.0)
        eps -= args.alpha

    logits = [None] * len(src_model_names)
    for i in range(len(src_model_names)):
        curr_model = src_models[i]
        logits[i] = curr_model(x)

    if args.loss_type == 'xent':
        loss, grad = gen_grad_ens(x, logits, y)
    elif args.loss_type == 'cw':
        grad = gen_grad_cw(x, logits, y)
    if args.targeted_flag == 1:
        grad = -1.0 * grad

    for eps in eps_list:
        # FGSM and RAND+FGSM one-shot attack
        if attack in ["fgs", "rand_fgs"] and args.norm == 'linf':
            adv_x = symbolic_fgs(x, grad, eps=eps)
        elif attack in ["fgs", "rand_fgs"] and args.norm == 'l2':
            adv_x = symbolic_fg(x, grad, eps=eps)

        # iterative FGSM
        if attack == "ifgs":
            l = 1000
            X_test = X_test[0:l]
            Y_test = Y_test[0:l]

            adv_x = x
            # iteratively apply the FGSM with small step size
            for i in range(args.num_iter):
                adv_logits = [None] * len(src_model_names)
                for i in range(len(src_model_names)):
                    curr_model = src_models[i]
                    adv_logits[i] = curr_model(adv_x)

                if args.loss_type == 'xent':
                    loss, grad = gen_grad_ens(adv_x, adv_logits, y)
                elif args.loss_type == 'cw':
                    grad = gen_grad_cw(adv_x, adv_logits, y)
                if args.targeted_flag == 1:
                    grad = -1.0 * grad

                adv_x = symbolic_fgs(adv_x, grad, args.delta, True)
                r = adv_x - x
                r = K.clip(r, -eps, eps)
                adv_x = x + r

            adv_x = K.clip(adv_x, 0, 1)

        if attack == "CW_ens":
            l = 1000
            pickle_name = attack + '_' + src_model_name_joint + '_' + str(
                args.eps) + '_adv.p'
            print(pickle_name)
            Y_test = Y_test[0:l]
            if os.path.exists(pickle_name) and attack == "CW_ens":
                print 'Loading adversarial samples'
                X_adv = pickle.load(open(pickle_name, 'rb'))

                for (name, src_model) in zip(src_model_names, src_models):
                    preds_adv, _, err = tf_test_error_rate(
                        src_model, x, X_adv, Y_test)
                    print '{}->{}: {:.1f}'.format(src_model_name_joint,
                                                  basename(name), err)

                preds_adv, _, err = tf_test_error_rate(target_model, x, X_adv,
                                                       Y_test)
                print '{}->{}: {:.1f}'.format(src_model_name_joint,
                                              basename(target_model_name), err)

                return

            X_test = X_test[0:l]
            time1 = time()
            cli = CarliniLiEns(K.get_session(),
                               src_models,
                               targeted=False,
                               confidence=args.kappa,
                               eps=eps)

            X_adv = cli.attack(X_test, Y_test)

            r = np.clip(X_adv - X_test, -eps, eps)
            X_adv = X_test + r
            time2 = time()
            print("Run with Adam took {}s".format(time2 - time1))

            if SAVE_FLAG == True:
                pickle.dump(X_adv, open(pickle_name, 'wb'))

            for (name, src_model) in zip(src_model_names, src_models):
                print('Carrying out white-box attack')
                pres, _, err = tf_test_error_rate(src_model, x, X_adv, Y_test)
                print '{}->{}: {:.1f}'.format(src_model_name_joint,
                                              basename(name), err)
            if target_model_name is not None:
                print('Carrying out black-box attack')
                preds, orig, err = tf_test_error_rate(target_model, x, X_adv,
                                                      Y_test)
                print '{}->{}: {:.1f}'.format(src_model_name_joint,
                                              basename(target_model_name), err)

            return

        pickle_name = attack + '_' + src_model_name_joint + '_' + args.loss_type + '_' + str(
            eps) + '_adv.p'
        if args.targeted_flag == 1:
            pickle_name = attack + '_' + src_model_name_joint + '_' + args.loss_type + '_' + str(
                eps) + '_adv_t.p'

        if os.path.exists(pickle_name):
            print 'Loading adversarial samples'
            X_adv = pickle.load(open(pickle_name, 'rb'))
        else:
            print 'Generating adversarial samples'
            X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0]
            if SAVE_FLAG == True:
                pickle.dump(X_adv, open(pickle_name, 'wb'))

        avg_l2_perturb = np.mean(
            np.linalg.norm((X_adv - X_test).reshape(len(X_test), dim), axis=1))

        # white-box attack
        l = len(X_adv)
        print('Carrying out white-box attack')
        for (name, src_model) in zip(src_model_names, src_models):
            preds_adv, orig, err = tf_test_error_rate(src_model, x, X_adv,
                                                      Y_test[0:l])
            if args.targeted_flag == 1:
                err = 100.0 - err
            print '{}->{}: {:.1f}'.format(basename(name), basename(name), err)

        # black-box attack
        if target_model_name is not None:
            print('Carrying out black-box attack')
            preds, _, err = tf_test_error_rate(target_model, x, X_adv, Y_test)
            if args.targeted_flag == 1:
                err = 100.0 - err
            print '{}->{}: {:.1f}, {}, {} {}'.format(
                src_model_name_joint, basename(target_model_name), err,
                avg_l2_perturb, eps, attack)