def get_uncertain_predictions(model,
                              inputs,
                              X,
                              Y,
                              nb_dropout_mutants=50,
                              dataset="cifar"):
    inputs_split_shape = inputs.shape

    args = dict(dataset=dataset, X_train=X, Y_train=Y)
    kde = [
        eval_kernel_density(model,
                            inputs[i, :, :, :, :],
                            reset_kde=True,
                            **args) for i in range(inputs.shape[0])
    ]

    inputs_reshaped = np.copy(
        np.reshape(inputs, (-1, inputs_split_shape[2], inputs_split_shape[3],
                            inputs_split_shape[4])))
    preds = get_mc_predictions(model,
                               inputs_reshaped,
                               nb_iter=nb_dropout_mutants)
    preds = np.reshape(
        preds,
        (nb_dropout_mutants, inputs_split_shape[0], inputs_split_shape[1], -1))
    ref = np.repeat(np.array([Y]), nb_dropout_mutants, axis=0)
    lcr = format_lcr(preds, ref)

    x = np.swapaxes(preds, 0, 1)
    var = x.var(axis=1)
    var_ = var.mean(axis=2)

    return lcr, var_, kde
Exemplo n.º 2
0
def main(args):
    assert args.dataset in ['mnist', 'cifar', 'svhn'], \
        "Dataset parameter must be either 'mnist', 'cifar' or 'svhn'"
    assert args.attack in ['fgsm', 'bim-a', 'bim-b', 'jsma', 'cw', 'all'], \
        "Attack parameter must be either 'fgsm', 'bim-a', 'bim-b', " \
        "'jsma' or 'cw'"
    assert os.path.isfile('../data/model_%s.h5' % args.dataset), \
        'model file not found... must first train model using train_model.py.'
    assert os.path.isfile('../data/Adv_%s_%s.npy' %
                          (args.dataset, args.attack)), \
        'adversarial sample file not found... must first craft adversarial ' \
        'samples using craft_adv_samples.py'
    print('Loading the data and model...')
    # Load the model
    model = load_model('../data/model_%s.h5' % args.dataset)
    # Load the dataset
    X_train, Y_train, X_test, Y_test = get_data(args.dataset)
    # Check attack type, select adversarial and noisy samples accordingly
    print('Loading noisy and adversarial samples...')
    if args.attack == 'all':
        # TODO: implement 'all' option
        #X_test_adv = ...
        #X_test_noisy = ...
        raise NotImplementedError("'All' types detector not yet implemented.")
    else:
        # Load adversarial samples
        X_test_adv = np.load('../data/Adv_%s_%s.npy' %
                             (args.dataset, args.attack))
        # Craft an equal number of noisy samples
        X_test_noisy = get_noisy_samples(X_test, X_test_adv, args.dataset,
                                         args.attack)
    # Check model accuracies on each sample type
    for s_type, dataset in zip(['normal', 'noisy', 'adversarial'],
                               [X_test, X_test_noisy, X_test_adv]):
        _, acc = model.evaluate(dataset,
                                Y_test,
                                batch_size=args.batch_size,
                                verbose=0)
        print("Model accuracy on the %s test set: %0.2f%%" %
              (s_type, 100 * acc))
        # Compute and display average perturbation sizes
        if not s_type == 'normal':
            l2_diff = np.linalg.norm(dataset.reshape(
                (len(X_test), -1)) - X_test.reshape((len(X_test), -1)),
                                     axis=1).mean()
            print("Average L-2 perturbation size of the %s test set: %0.2f" %
                  (s_type, l2_diff))
    # Refine the normal, noisy and adversarial sets to only include samples for
    # which the original version was correctly classified by the model
    preds_test = model.predict_classes(X_test,
                                       verbose=0,
                                       batch_size=args.batch_size)
    inds_correct = np.where(preds_test == Y_test.argmax(axis=1))[0]
    X_test = X_test[inds_correct]
    X_test_noisy = X_test_noisy[inds_correct]
    X_test_adv = X_test_adv[inds_correct]

    ## Get Bayesian uncertainty scores
    print('Getting Monte Carlo dropout variance predictions...')
    uncerts_normal = get_mc_predictions(model, X_test,
                                        batch_size=args.batch_size) \
        .var(axis=0).mean(axis=1)
    uncerts_noisy = get_mc_predictions(model, X_test_noisy,
                                       batch_size=args.batch_size) \
        .var(axis=0).mean(axis=1)
    uncerts_adv = get_mc_predictions(model, X_test_adv,
                                     batch_size=args.batch_size) \
        .var(axis=0).mean(axis=1)

    ## Get KDE scores
    # Get deep feature representations
    print('Getting deep feature representations...')
    X_train_features = get_deep_representations(model,
                                                X_train,
                                                batch_size=args.batch_size)
    X_test_normal_features = get_deep_representations(
        model, X_test, batch_size=args.batch_size)
    X_test_noisy_features = get_deep_representations(
        model, X_test_noisy, batch_size=args.batch_size)
    X_test_adv_features = get_deep_representations(model,
                                                   X_test_adv,
                                                   batch_size=args.batch_size)
    # Train one KDE per class
    print('Training KDEs...')
    class_inds = {}
    for i in range(Y_train.shape[1]):
        class_inds[i] = np.where(Y_train.argmax(axis=1) == i)[0]
    kdes = {}
    warnings.warn(
        "Using pre-set kernel bandwidths that were determined "
        "optimal for the specific CNN models of the paper. If you've "
        "changed your model, you'll need to re-optimize the "
        "bandwidth.")
    for i in range(Y_train.shape[1]):
        kdes[i] = KernelDensity(kernel='gaussian',
                                bandwidth=BANDWIDTHS[args.dataset]) \
            .fit(X_train_features[class_inds[i]])
    # Get model predictions
    print('Computing model predictions...')
    preds_test_normal = model.predict_classes(X_test,
                                              verbose=0,
                                              batch_size=args.batch_size)
    preds_test_noisy = model.predict_classes(X_test_noisy,
                                             verbose=0,
                                             batch_size=args.batch_size)
    preds_test_adv = model.predict_classes(X_test_adv,
                                           verbose=0,
                                           batch_size=args.batch_size)
    # Get density estimates
    print('computing densities...')
    densities_normal = score_samples(kdes, X_test_normal_features,
                                     preds_test_normal)
    densities_noisy = score_samples(kdes, X_test_noisy_features,
                                    preds_test_noisy)
    densities_adv = score_samples(kdes, X_test_adv_features, preds_test_adv)

    ## Z-score the uncertainty and density values
    uncerts_normal_z, uncerts_adv_z, uncerts_noisy_z = normalize(
        uncerts_normal, uncerts_adv, uncerts_noisy)
    densities_normal_z, densities_adv_z, densities_noisy_z = normalize(
        densities_normal, densities_adv, densities_noisy)

    ## Build detector
    values, labels, lr = train_lr(densities_pos=densities_adv_z,
                                  densities_neg=np.concatenate(
                                      (densities_normal_z, densities_noisy_z)),
                                  uncerts_pos=uncerts_adv_z,
                                  uncerts_neg=np.concatenate(
                                      (uncerts_normal_z, uncerts_noisy_z)))

    ## Evaluate detector
    # Compute logistic regression model predictions
    probs = lr.predict_proba(values)[:, 1]
    # Compute AUC
    n_samples = len(X_test)
    # The first 2/3 of 'probs' is the negative class (normal and noisy samples),
    # and the last 1/3 is the positive class (adversarial samples).
    _, _, auc_score = compute_roc(probs_neg=probs[:2 * n_samples],
                                  probs_pos=probs[2 * n_samples:])
    print('Detector ROC-AUC score: %0.4f' % auc_score)
def main(args):
    attack = args.attack
    text_file = open("../stats/" + attack + "_stats.txt", "w")
    sd_start = args.sd_start
    num_sd = args.num_sd
    for sd in range(sd_start, sd_start + num_sd):
        print("seed: " + str(sd))
        with_norm = False
        batch_size = 256
        np.random.seed(sd)
        idx0 = np.random.choice(10000, 5000)
        dataset = 'mnist'
        assert attack in ['fgsm', 'bim', 'bim-a', 'bim-b', 'jsma'], \
            "Attack parameter must be either 'fgsm', 'bim', bim-a', 'bim-b', 'jsma'"
        assert os.path.isfile('../data/model_%s.h5' % dataset), \
            'model file not found... must first train model using train_model.py.'
        assert os.path.isfile('../data/Adv_%s_%s.npy' % (dataset, attack)), \
            'adversarial sample file not found... must first craft adversarial ' \
            'samples using craft_adv_samples.py'
        print('Loading the data and model...')
        # Load the model
        model = load_model('../data/model_%s.h5' % dataset)
        # Load the dataset
        X_train, Y_train, X_test, Y_test = get_data()
        # Check attack type, select adversarial samples accordingly
        print('Loading adversarial samples...')
        X_test_adv = np.load('../data/Adv_%s_%s.npy' % (dataset, attack))

        ################################################ Table 1 ###########################################
        #Get half data from clean and adversarial images respectively and combine them
        X_test, Y_test, X_test_adv = X_test[idx0], Y_test[idx0], X_test_adv[
            idx0]
        X_test_all_un = np.concatenate((X_test, X_test_adv), axis=0)
        Y_test_all_un = np.concatenate((Y_test, Y_test), axis=0)
        # Check model accuracies on each sample type and then on combined undefended dataset
        for s_type, dt in zip(['normal', 'adversarial'], [X_test, X_test_adv]):
            _, acc = model.evaluate(dt,
                                    Y_test,
                                    batch_size=batch_size,
                                    verbose=0)
            print("Model accuracy on the %s test set: %0.2f%%" %
                  (s_type, 100 * acc))
        _, acc = model.evaluate(X_test_all_un,
                                Y_test_all_un,
                                batch_size=batch_size,
                                verbose=0)
        print("Model accuracy on the combined undefended test set: %0.2f%%" %
              (100 * acc))

        ################################################ Table 2 ###########################################
        # Refine the normal and adversarial sets to only include samples,
        # for which the original version was correctly classified by the model.
        # Then, create detector label for clean as "1" and for adversarial as "0"
        preds_test = model.predict_classes(X_test,
                                           verbose=0,
                                           batch_size=batch_size)
        inds_correct = np.where(preds_test == Y_test.argmax(axis=1))[0]
        X_test = X_test[inds_correct]
        X_test_adv = X_test_adv[inds_correct]
        Y_test = Y_test[inds_correct]
        label_clean = np.ones(Y_test.shape[0])
        label_adv = np.zeros(Y_test.shape[0])

        # Combine the filtered dataset and detector label
        X_test_all_filtered = np.concatenate((X_test, X_test_adv), axis=0)
        Y_test_all_filtered = np.concatenate((Y_test, Y_test), axis=0)
        label_filtered = np.concatenate((label_clean, label_adv), axis=0)

        # Get Prediction + Bayesian uncertainty scores
        print('Getting Monte Carlo dropout variance predictions...')
        #pred_normal = get_mc_predictions(model, X_test, batch_size=batch_size)
        #pred_adv = get_mc_predictions(model, X_test_adv, batch_size=batch_size)
        pred_all_filtered = get_mc_predictions(model,
                                               X_test_all_filtered,
                                               batch_size=batch_size)
        #uncerts_normal = pred_normal.var(axis=0).mean(axis=1)
        #uncerts_adv = pred_adv.var(axis=0).mean(axis=1)
        uncerts_all = pred_all_filtered.var(axis=0).mean(axis=1)
        if with_norm:
            ## Z-score the uncertainty
            uncerts_all = normalize(uncerts_all)
        #uncerts_all = np.concatenate((uncerts_normal, uncerts_adv),axis=0)
        #class_normal = pred_normal.mean(axis=0).argmax(axis=1)
        #class_adv = pred_adv.mean(axis=0).argmax(axis=1)
        #preds_test_all_filtered = np.concatenate((class_normal, class_adv),axis=0)
        preds_test_all_filtered = pred_all_filtered.mean(axis=0).argmax(axis=1)

        # Detector Parameters to be fine-tuned in experiments
        params = {
            'fgsm': {
                'H': 0.002,
                'L': 0.000003,
                'C': 5
            },
            'bim': {
                'H': 0.0022,
                'L': 0.0012,
                'C': 5
            },
            'jsma': {
                'H': 0.01,
                'L': 0.003,
                'C': 5
            }
        }
        start_H, start_L, start_C = params[attack]["H"], params[attack][
            "L"], params[attack]["C"]
        H_range, L_range, C_range = 0, 0, 1
        H_step, L_step, C_step = 1e-4, 5e-6, 1

        threshold_ls = []
        estimator = DecisionTreeClassifier(criterion='entropy',
                                           max_leaf_nodes=3,
                                           splitter='best',
                                           random_state=None)
        X = np.expand_dims(uncerts_all, axis=-1)
        Y = label_filtered
        estimator.fit(X, Y)
        n_nodes = estimator.tree_.node_count
        children_left = estimator.tree_.children_left
        children_right = estimator.tree_.children_right
        threshold = estimator.tree_.threshold
        node_depth = np.zeros(shape=n_nodes, dtype=np.int64)
        is_leaves = np.zeros(shape=n_nodes, dtype=bool)
        stack = [(0, -1)]  # seed is the root node id and its parent depth
        while len(stack) > 0:
            node_id, parent_depth = stack.pop()
            node_depth[node_id] = parent_depth + 1
            if (children_left[node_id] != children_right[node_id]):
                stack.append((children_left[node_id], parent_depth + 1))
                stack.append((children_right[node_id], parent_depth + 1))
            else:
                is_leaves[node_id] = True
        for i in range(n_nodes):
            if not is_leaves[i]:
                threshold_ls.append(threshold[i])
        start_H_d = max(threshold_ls)
        start_L_d = min(threshold_ls)

        #Detection
        for C in range(start_C, min(start_C + C_range, 5) + C_step, C_step):
            for sad in range(2):
                H = [start_H_d, start_H][sad]
                L = [start_L_d, start_L][sad]
                label_pred = detect_clean_adv(X_test_all_filtered,
                                              preds_test_all_filtered,
                                              uncerts_all, model, C, H, L,
                                              batch_size)
                #Detection Evaluation
                CM = confusion_matrix(label_filtered, label_pred)
                TN = CM[0][0]
                FN = CM[1][0]
                TP = CM[1][1]
                FP = CM[0][1]
                ACC_DETECT = (TP + TN) / (TP + TN + FP + FN)
                res_detect = "Seed: " + str(sd) + ", C: " + str(C) + ", H: " + str(H)+ ", L: " + str(L) \
                                + "\nDetection Acc: " + str(ACC_DETECT*100)[:4] \
                                + "%, False Negative: " + str(FN) + ", False Positive: " + str(FP) \
                                + ", True Negative: " + str(TN) + ", True Positive: " + str(TP)+"."
                print(res_detect)
                text_file.write(res_detect + "\n")

                ################################################ Table 3 ###########################################
                #Only get images reported as clean
                idx_clean_reported = np.where(label_pred)[0]
                X_test_all_def = X_test_all_filtered[idx_clean_reported]
                Y_test_all_def = Y_test_all_filtered[idx_clean_reported]
                label_def = label_filtered[idx_clean_reported]
                num_all = label_def.shape[0]

                # Reclassification
                print('Computing final model predictions...')
                preds_test_all_def = model.predict_classes(
                    X_test_all_def, verbose=0, batch_size=batch_size)
                inds_correct_def = np.where(
                    preds_test_all_def == Y_test_all_def.argmax(axis=1))[0]
                inds_incorrect_def = np.where(
                    preds_test_all_def != Y_test_all_def.argmax(axis=1))[0]

                # Reclassification Evaluation
                num_clean_correct = np.argwhere(
                    label_def[inds_correct_def] == 1).shape[0]
                num_clean_incorrect = np.argwhere(
                    label_def[inds_incorrect_def] == 1).shape[0]
                num_adv_correct = np.argwhere(
                    label_def[inds_correct_def] == 0).shape[0]
                num_adv_incorrect = np.argwhere(
                    label_def[inds_incorrect_def] == 0).shape[0]
                clean_correct_acc = num_clean_correct / num_all
                clean_incorrect_acc = num_clean_incorrect / num_all
                adv_correct_acc = num_adv_correct / num_all
                adv_incorrect_acc = num_adv_incorrect / num_all
                total_acc = clean_correct_acc + adv_correct_acc
                res_reclf = "Reclassification Acc: " + str(total_acc*100)[:4] \
                + "%, Clean Correct Acc: " + str(clean_correct_acc*100)[:4] \
                + "%, Clean Incorrect Acc: " + str(clean_incorrect_acc*100)[:4] \
                + "%, Adv Correct Acc: " + str(adv_correct_acc*100)[:4] \
                + "%, Adv Incorrect Acc: " + str(adv_incorrect_acc*100)[:4] + "%."
                print(res_reclf)
                text_file.write(res_reclf + "\n")

    text_file.close()
Exemplo n.º 4
0
def main(args):
    ## assert
    assert args.dataset in ['mnist', 'cifar', 'svhn'], "Dataset parameter must be either 'mnist', 'cifar' or 'svhn'"
    assert args.attack in ['fgsm', 'bim-a', 'bim-b', 'bim', 'jsma', 'cw', 'all'], \
        "Attack parameter must be either 'fgsm', 'bim-a', 'bim-b', " \
        "'jsma' or 'cw'"
    assert os.path.isfile('../data/Adv_%s_%s.npy' % (args.dataset, args.attack)), \
        'adversarial sample file not found... must first craft adversarial ' \
        'samples using craft_adv_samples.py'

    print('Loading the data and model...')
    # Load the model
    model = get_model(args.dataset)
    model.load_state_dict(torch.load(args.model))
    model.to('cuda')
    model.eval()

    # Load the dataset
    train_data, test_data = get_data(args.dataset)
    train_loader = DataLoader(
        dataset = train_data,
        batch_size = args.batch_size,
    )

    ##### Load adversarial samples (create by crate_adv_samples.py)
    print('Loading noisy and adversarial samples...')
    X_test_adv = np.load('../data/Adv_%s_%s.npy' % (args.dataset, args.attack))
    X_test_adv = torch.from_numpy(X_test_adv)
    #train_adv   = [ (x_tmp, y_tmp[1]) for x_tmp, y_tmp in zip(X_train_adv, train_data) ]
    test_adv    = [ (x_tmp, y_tmp[1]) for x_tmp, y_tmp in zip(X_test_adv, test_data) ]
    ##### create noisy data
    noise_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean = (0.1307, ), std = (0.3781, )),
        AddGaussianNoise(0., 0.1)
    ])
    train_noisy, test_noisy = get_data(args.dataset, noise_transform)


    # Check model accuracies on each sample type
    for s_type, dataset in zip(['normal', 'noisy', 'adversarial'],
                               [test_data, test_noisy, test_adv]):
        data_loader = DataLoader(
            dataset = dataset,
            batch_size = 1,
        )
        acc = evaluate(model, data_loader)
        print("Model accuracy on the %s test set: %0.2f%%" % (s_type, 100 * acc) )
        # Compute and display average perturbation sizes
        ### TODO
        '''
        if not s_type == 'normal':
            l2_diff = np.linalg.norm(
                dataset.reshape((len(X_test), -1)) - X_test.reshape((len(X_test), -1)),
                axis=1
            ).mean()
            print("Average L-2 perturbation size of the %s test set: %0.2f" % (s_type, l2_diff))
        '''

    ### Refine the normal, noisy and adversarial sets to only include samples for which the original version was correctly classified by the model
    ### run test data and choose the data model can correctly predict

    test_loader = DataLoader(
        dataset = test_data,
        batch_size = args.batch_size
    )

    y_test_list = []
    pred_list = []
    with torch.no_grad():
        for batch in test_loader:
            x = batch[0].to('cuda')
            y_test_list.append( batch[1] )
            pred_list.append( model(x) )

    pred_test = torch.cat(pred_list)
    Y_test = torch.cat(y_test_list).to('cuda')

    inds_correct = torch.where(Y_test == pred_test.argmax(axis=1), torch.full_like(Y_test, 1), torch.full_like(Y_test, 0)).to('cuda')
    picked_test_data       = []
    picked_test_data_noisy = []
    picked_test_data_adv   = []
    for i, (b, y_tmp) in enumerate(zip(inds_correct, Y_test)):
        if b == 1:
            picked_test_data.append( (test_data[i][0], y_tmp) )
            picked_test_data_noisy.append( (test_noisy[i][0], y_tmp) )
            picked_test_data_adv.append( (X_test_adv[i], y_tmp) )
        else:
            continue
    picked_test_loader = DataLoader(
        dataset = picked_test_data,
        batch_size = args.batch_size
    )
    picked_test_noisy_loader = DataLoader(
        dataset = picked_test_data_noisy,
        batch_size = args.batch_size
    )
    picked_test_adv_loader = DataLoader(
        dataset = picked_test_data_adv,
        batch_size = args.batch_size
    )

    #######################################
    ## Get Bayesian uncertainty scores
    nb_size = 50
    print('Getting Monte Carlo dropout variance predictions...')
    uncerts_normal  = get_mc_predictions(model, picked_test_loader,         nb_iter=nb_size)#.unsqueeze(1)
    uncerts_noisy   = get_mc_predictions(model, picked_test_noisy_loader,   nb_iter=nb_size)#.unsqueeze(1)
    uncerts_adv     = get_mc_predictions(model, picked_test_adv_loader,     nb_iter=nb_size)#.unsqueeze(1)
    
    print(uncerts_normal.shape)
    print(uncerts_noisy.shape)
    print(uncerts_adv.shape)

    ## Get KDE scores
    # Get deep feature representations
    print('Getting deep feature representations...')
    x_train_features        = get_deep_representations(model, train_loader              , args.dataset)
    x_test_normal_features  = get_deep_representations(model, picked_test_loader        , args.dataset)
    x_test_noisy_features   = get_deep_representations(model, picked_test_noisy_loader  , args.dataset)
    x_test_adv_features     = get_deep_representations(model, picked_test_adv_loader    , args.dataset)
    print(x_train_features.shape)
    print(x_test_normal_features.shape)
    print(x_test_noisy_features.shape)
    print(x_test_adv_features.shape)
    
    
    class_num = 10
    Y_train_label = [ tmp[1] for tmp in train_data ]
    Y_train_label = np.array(Y_train_label)
    Y_train = np.zeros((len(Y_train_label), class_num))
    Y_train[ np.arange(Y_train_label.size), Y_train_label ] = 1
    

    # Train one KDE per class
    print('Training KDEs...')
    class_inds = {}
    for i in range(class_num):
        #class_inds[i] = np.where(Y_train.argmax(axis=1) == i)[0]
        class_inds[i] = np.where(Y_train_label == i)[0]
        print('class_inds[', i, ']: ', class_inds[i].size )

    kdes = {}
    warnings.warn("Using pre-set kernel bandwidths that were determined "
                  "optimal for the specific CNN models of the paper. If you've "
                  "changed your model, you'll need to re-optimize the bandwidth.")
    for i in range(class_num):
        kdes[i] = KernelDensity(kernel='gaussian', bandwidth=BANDWIDTHS[args.dataset]).fit( x_train_features.cpu().numpy()[class_inds[i]] )
    #print(kdes)

    # Get model predictions
    print('Computing model predictions...')
    data_loaders = [ picked_test_loader,
        picked_test_noisy_loader,
        picked_test_adv_loader
    ]
    preds = []
    for now_loader in data_loaders:
        with torch.no_grad():
            tmp_result = []
            for batch in now_loader:
                x = batch[0].to('cuda')
                pred = model(x)
                tmp_result.append(pred)
            preds.append( torch.cat(tmp_result) )
    preds_test_normal = torch.argmax(preds[0], dim=1)
    preds_test_noisy = torch.argmax(preds[1], dim=1)
    preds_test_adv = torch.argmax(preds[2], dim=1)
    print(preds_test_normal)

    # Get density estimates
    print('computing densities...')
    densities_normal = score_samples(
        kdes,                           
        x_test_normal_features.cpu(),   
        preds_test_normal.cpu()
    )
    densities_noisy = score_samples(
        kdes,
        x_test_noisy_features.cpu(),
        preds_test_noisy.cpu()
    )
    densities_adv = score_samples(
        kdes,
        x_test_adv_features.cpu(),
        preds_test_adv.cpu()
    )

    ## Z-score the uncertainty and density values
    uncerts_normal_z, uncerts_adv_z, uncerts_noisy_z = normalize(
        uncerts_normal.cpu().numpy(),
        uncerts_adv.cpu().numpy(),
        uncerts_noisy.cpu().numpy()
    )
    densities_normal_z, densities_adv_z, densities_noisy_z = normalize(
        densities_normal,
        densities_adv,
        densities_noisy
    )
    print('.......mean,,,,,,,,,,')
    print(densities_normal_z.mean())
    print(densities_adv_z.mean())
    print(densities_noisy_z.mean())

    ## Build detector
    ### combine
    values_combine, labels_combine, lr_combine = train_lr(
        densities_pos = densities_adv_z,
        densities_neg = np.concatenate((densities_normal_z, densities_noisy_z)),
        uncerts_pos = uncerts_adv_z,
        uncerts_neg = np.concatenate((uncerts_normal_z, uncerts_noisy_z)),
        flag = 'combine'
    )
    ### dense
    values_dense, labels_dense, lr_dense = train_lr(
        densities_pos = densities_adv_z,
        densities_neg = np.concatenate((densities_normal_z, densities_noisy_z)),
        uncerts_pos = uncerts_adv_z,
        uncerts_neg = np.concatenate((uncerts_normal_z, uncerts_noisy_z)),
        flag = 'dense'
    )
    ### uncert
    values_uncert, labels_uncert, lr_uncert = train_lr(
        densities_pos = densities_adv_z,
        densities_neg = np.concatenate((densities_normal_z, densities_noisy_z)),
        uncerts_pos = uncerts_adv_z,
        uncerts_neg = np.concatenate((uncerts_normal_z, uncerts_noisy_z)),
        flag = 'uncert'
    )


    ## Evaluate detector
    # Compute logistic regression model predictions
    probs_combine   = lr_combine.predict_proba(values_combine)[:, 1]
    probs_dense     = lr_dense.predict_proba(values_dense)[:, 1]
    probs_uncert    = lr_uncert.predict_proba(values_uncert)[:, 1]

    # Compute AUC
    n_samples = len(picked_test_data)
    # The first 2/3 of 'probs' is the negative class (normal and noisy samples),
    # and the last 1/3 is the positive class (adversarial samples).

    #probs_neg = probs[:2 * n_samples],
    #probs_pos = probs[2 * n_samples:],
    prob_datas = [
            (probs_combine[:2 * n_samples], probs_combine[2 * n_samples:], 'combine'),
            (probs_dense[:2 * n_samples],   probs_dense[2 * n_samples:], 'dense'),
            (probs_uncert[:2 * n_samples],  probs_uncert[2 * n_samples:], 'uncert')
    ]
    _, _, auc_score = compute_roc(
        prob_datas,
        plot=True
    )


    '''
def evaluate_test(args, model, kdes, datatypes, nb_size, flags):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    test_dataset = {}

    transform = None
    test_dataset['normal'] = get_data(args.dataset,
                                      train=False,
                                      transform=transform)
    test_dataset['noisy'] = get_data(args.dataset,
                                     train=False,
                                     transform=transform)
    ##### Load adversarial samples (create by crate_adv_samples.py)
    ### craft adversarial examples on test_dataset
    print('[Test] Loading noisy and adversarial samples...')
    X_test_adv = np.load('../data/Adv_%s_%s.npy' % (args.dataset, args.attack))
    X_test_adv = torch.from_numpy(X_test_adv)
    test_adv = [(x_tmp, y_tmp[1])
                for x_tmp, y_tmp in zip(X_test_adv, test_dataset['normal'])]
    test_dataset['adversarial'] = test_adv

    num = len(test_dataset['normal'])

    test_loader = {}
    for datatype in datatypes:
        test_loader[datatype] = DataLoader(dataset=test_dataset[datatype],
                                           batch_size=args.batch_size,
                                           shuffle=False)
    ### TODO pick data(model predict correctly on normal)

    ################# Get Bayesian uncertainty scores
    #### mc_variance
    print('[Test] Getting Monte Carlo dropout variance...')
    mc_variance = {}
    for datatype in datatypes:
        mc_variance[datatype] = get_mc_predictions(model,
                                                   test_loader[datatype],
                                                   nb_iter=nb_size,
                                                   method='default')

    #### mc_entropy
    print('[Test] Getting Monte Carlo dropout entropy...')
    mc_entropy = {}
    for datatype in datatypes:
        mc_entropy[datatype] = get_mc_predictions(model,
                                                  test_loader[datatype],
                                                  nb_iter=nb_size,
                                                  method='entropy')
        where_are_NaNs = isnan(mc_entropy[datatype])
        mc_entropy[datatype][where_are_NaNs] = 0

    ### entropy
    print('[Test] Getting entropy...')
    entropy = {}
    for datatype in datatypes:
        entropy[datatype] = get_entropy(model, test_loader[datatype])
        where_are_NaNs = isnan(entropy[datatype])
        entropy[datatype][where_are_NaNs] = 0

    ################# Get KDE scores
    # Get deep feature representations
    print('[Test] Getting deep feature representations...')
    features = {}
    for datatype in datatypes:
        features[datatype] = get_deep_representations(model,
                                                      test_loader[datatype],
                                                      args.dataset)

    # Get model predictions
    print('[Test] Computing model predictions...')
    preds = {}
    for datatype in datatypes:
        with torch.no_grad():
            tmp_result = []
            for batch in test_loader[datatype]:
                x = batch[0].to(device)
                pred = model(x)
                tmp_result.append(pred.detach().cpu())
            preds[datatype] = torch.argmax(torch.cat(tmp_result), dim=1)

    # Get density estimates
    ###### get test density
    print('[Test] computing densities...')
    densities = {}
    for datatype in datatypes:
        densities[datatype] = score_samples(kdes, features[datatype].cpu(),
                                            preds[datatype].cpu())
    ###### Z-score the uncertainty and density values
    ###### normalize
    mc_entropy_z = {}
    mc_entropy_z['normal'], mc_entropy_z['noisy'], mc_entropy_z[
        'adversarial'] = normalize(
            mc_entropy['normal'].cpu().numpy(),
            mc_entropy['noisy'].cpu().numpy(),
            mc_entropy['adversarial'].cpu().numpy(),
        )
    mc_variance_z = {}
    mc_variance_z['normal'], mc_variance_z['noisy'], mc_variance_z[
        'adversarial'] = normalize(
            mc_variance['normal'].cpu().numpy(),
            mc_variance['noisy'].cpu().numpy(),
            mc_variance['adversarial'].cpu().numpy(),
        )
    entropy_z = {}
    entropy_z['normal'], entropy_z['noisy'], entropy_z[
        'adversarial'] = normalize(
            entropy['normal'].cpu().numpy(),
            entropy['noisy'].cpu().numpy(),
            entropy['adversarial'].cpu().numpy(),
        )
    densities_z = {}
    densities_z['normal'], densities_z['noisy'], densities_z[
        'adversarial'] = normalize(
            densities['normal'],
            densities['noisy'],
            densities['adversarial'],
        )
    print('.......Densities............')
    for datatype in datatypes:
        print(datatype, ' Mean: ', densities_z[datatype].mean())

    ### dense, uncert, combine
    values = {}
    labels = {}
    for now_flag in flags:
        tmp_values, tmp_labels = get_value(
            densities=(densities_z['adversarial'],
                       np.concatenate(
                           (densities_z['normal'], densities_z['noisy']))),
            entropy=(entropy_z['adversarial'],
                     np.concatenate(
                         (entropy_z['normal'], entropy_z['noisy']))),
            mc_entropy=(mc_entropy_z['adversarial'],
                        np.concatenate(
                            (mc_entropy_z['normal'], mc_entropy_z['noisy']))),
            mc_variance=(mc_variance_z['adversarial'],
                         np.concatenate((mc_variance_z['normal'],
                                         mc_variance_z['noisy']))),
            flag=now_flag)
        values[now_flag] = tmp_values
        labels[now_flag] = tmp_labels

    return values, labels, num
def main(args):
    print(args)

    datatypes = ['normal', 'noisy', 'adversarial']
    ## assertions
    assert args.dataset in [
        'mnist', 'cifar', 'svhn'
    ], "Dataset parameter must be either 'mnist', 'cifar' or 'svhn'"
    assert args.attack in ['fgsm', 'bim-a', 'bim-b', 'bim', 'jsma', 'cw', 'all'], \
        "Attack parameter must be either 'fgsm', 'bim-a', 'bim-b', " \
        "'jsma' or 'cw'"
    #assert os.path.isfile('../data/Adv_%s_%s.npy' % (args.dataset, args.attack)), \
    #    'adversarial sample file not found... must first craft adversarial ' \
    #    'samples using craft_adv_samples.py'

    print('Loading the data and model...')

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Load the model
    model = get_model(args.dataset)
    model.load_state_dict(torch.load(args.model))
    model.to(device)

    model.eval()
    # Load the dataset
    train_data = get_data(args.dataset, train=True)
    train_loader = DataLoader(dataset=train_data,
                              batch_size=args.batch_size,
                              shuffle=False)

    ##### Load adversarial samples (create by crate_adv_samples.py)
    print('Loading noisy and adversarial samples...')

    ### train_adv
    X_train_adv = np.load('../data/Adv_%s_%s_train.npy' %
                          (args.dataset, args.attack))
    X_train_adv = torch.from_numpy(X_train_adv)
    train_adv = [(x_tmp, y_tmp[1])
                 for x_tmp, y_tmp in zip(X_train_adv, train_data)]

    ##### create noisy data
    if args.dataset == 'mnist':
        noise_transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=(0.1307), std=(0.3081)),
            AddGaussianNoise(0., 0.1)
        ])
    elif args.dataset == 'cifar':
        noise_transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=(0.4914, 0.4822, 0.4465),
                                 std=(0.247, 0.243, 0.261)),
            AddGaussianNoise(0., 0.1)
        ])
    train_noisy = get_data(args.dataset, train=True, transform=noise_transform)
    #print('NOISY', train_noisy)
    #print(train_noisy[0])

    X_train, Y_train = getXY(train_data)
    # Check model accuracies on each sample type
    for s_type, dataset in zip(['normal', 'noisy', 'adversarial'],
                               [train_data, train_noisy, train_adv]):
        data_loader = DataLoader(
            dataset=dataset,
            batch_size=args.batch_size,
        )
        acc = evaluate(model, data_loader)
        print("Model accuracy on the %s test set: %0.2f%%" %
              (s_type, 100 * acc))

        # Compute and display average perturbation sizes
        ### TODO
        X_now, Y_now = getXY(dataset)

        if not s_type == 'normal':
            l2_diff = np.linalg.norm(X_now.reshape(
                (len(X_train), -1)) - X_train.reshape((len(X_train), -1)),
                                     axis=1).mean()
            print("Average L-2 perturbation size of the %s test set: %0.2f" %
                  (s_type, l2_diff))

    ### Refine the normal, noisy and adversarial sets to only include samples for which the original version was correctly classified by the model
    ### run test data and choose the data model can correctly predict
    y_train_list = []
    pred_train_list = []
    with torch.no_grad():
        for batch in train_loader:
            x = batch[0].to(device)
            y_train_list.append(batch[1])
            pred_train_list.append(model(x))

    y_train_list = torch.cat(y_train_list)
    Y_train = torch.tensor(y_train_list).detach().cpu()
    pred_train = torch.cat(pred_train_list).detach().cpu()

    inds_correct = torch.where(Y_train == pred_train.argmax(axis=1),
                               torch.full_like(Y_train, 1),
                               torch.full_like(Y_train, 0)).to(device)

    picked_train_data = {}
    for datatype in datatypes:
        picked_train_data[datatype] = []
    for i, (b, y_tmp) in enumerate(zip(inds_correct, Y_train)):
        if b == 1:
            picked_train_data['normal'].append((train_data[i][0], y_tmp))
            picked_train_data['noisy'].append((train_noisy[i][0], y_tmp))
            picked_train_data['adversarial'].append((X_train_adv[i], y_tmp))
        else:
            continue

    picked_train_loader = {}
    for datatype in datatypes:
        picked_train_loader[datatype] = DataLoader(
            dataset=picked_train_data[datatype], batch_size=args.batch_size)

    ###########################################################################################################################################
    ################# Get Bayesian uncertainty scores
    nb_size = 20
    #### mc_variance
    print('Getting Monte Carlo dropout variance...')
    mc_variance = {}
    for datatype in datatypes:
        mc_variance[datatype] = get_mc_predictions(
            model,
            picked_train_loader[datatype],
            nb_iter=nb_size,
            method='default')

    #### mc_entropy
    print('Getting Monte Carlo dropout entropy...')
    mc_entropy = {}
    for datatype in datatypes:
        mc_entropy[datatype] = get_mc_predictions(
            model,
            picked_train_loader[datatype],
            nb_iter=nb_size,
            method='entropy')
        where_are_NaNs = isnan(mc_entropy[datatype])
        mc_entropy[datatype][where_are_NaNs] = 0

    ### entropy
    print('Getting entropy...')
    entropy = {}
    for datatype in datatypes:
        entropy[datatype] = get_entropy(model, picked_train_loader[datatype])
        where_are_NaNs = isnan(entropy[datatype])
        entropy[datatype][where_are_NaNs] = 0

    #print(entropy['normal'])
    #print(entropy['noisy'])
    #print(entropy['adversarial'])

    ################# Get KDE scores
    # Get deep feature representations
    print('Getting deep feature representations...')
    x_train_features = get_deep_representations(model, train_loader,
                                                args.dataset)
    picked_train_features = {}
    for datatype in datatypes:
        picked_train_features[datatype] = get_deep_representations(
            model, picked_train_loader[datatype], args.dataset)

    print('Shape')
    print(x_train_features.shape)
    for datatype in datatypes:
        print(picked_train_features[datatype].shape)
    ####### CLASS NUM ########
    class_num = 10
    Y_train_label = [tmp[1] for tmp in train_data]
    Y_train_label = np.array(Y_train_label)
    Y_train = np.zeros((len(Y_train_label), class_num))
    Y_train[np.arange(Y_train_label.size), Y_train_label] = 1

    # Train one KDE per class
    print('Training KDEs...')
    class_inds = {}
    for i in range(class_num):
        class_inds[i] = np.where(Y_train_label == i)[0]
        print('class_inds[', i, ']: ', class_inds[i].size)
    kdes = {}
    warnings.warn(
        "Using pre-set kernel bandwidths that were determined optimal for the specific CNN models of the paper. If you've "
        "changed your model, you'll need to re-optimize the bandwidth.")

    ### Use train features to fit Kernel density
    for i in range(class_num):
        kdes[i] = KernelDensity(
            kernel='gaussian', bandwidth=BANDWIDTHS[args.dataset]).fit(
                x_train_features.cpu().numpy()[class_inds[i]])

    # Get model predictions
    print('Computing model predictions...')
    data_loaders = []
    for datatype in datatypes:
        data_loaders.append(picked_train_loader[datatype])
    preds = []
    preds_train = {}
    for now_loader in data_loaders:
        with torch.no_grad():
            tmp_result = []
            for batch in now_loader:
                x = batch[0].to(device)
                pred = model(x)
                tmp_result.append(pred.detach().cpu())
            preds.append(torch.cat(tmp_result))
    preds_train['normal'] = torch.argmax(preds[0], dim=1)
    preds_train['noisy'] = torch.argmax(preds[1], dim=1)
    preds_train['adversarial'] = torch.argmax(preds[2], dim=1)

    # Get density estimates
    ###### get test density
    print('computing densities...')
    train_densities = {}
    for datatype in datatypes:
        train_densities[datatype] = score_samples(
            kdes, picked_train_features[datatype].cpu(),
            preds_train[datatype].cpu())
    ###### Z-score the uncertainty and density values
    ###### normalize
    mc_entropy_z = {}
    mc_entropy_z['normal'], mc_entropy_z['noisy'], mc_entropy_z[
        'adversarial'] = normalize(
            mc_entropy['normal'].cpu().numpy(),
            mc_entropy['noisy'].cpu().numpy(),
            mc_entropy['adversarial'].cpu().numpy(),
        )
    mc_variance_z = {}
    mc_variance_z['normal'], mc_variance_z['noisy'], mc_variance_z[
        'adversarial'] = normalize(
            mc_variance['normal'].cpu().numpy(),
            mc_variance['noisy'].cpu().numpy(),
            mc_variance['adversarial'].cpu().numpy(),
        )
    entropy_z = {}
    entropy_z['normal'], entropy_z['noisy'], entropy_z[
        'adversarial'] = normalize(
            entropy['normal'].cpu().numpy(),
            entropy['noisy'].cpu().numpy(),
            entropy['adversarial'].cpu().numpy(),
        )
    densities_z = {}
    densities_z['normal'], densities_z['noisy'], densities_z[
        'adversarial'] = normalize(
            train_densities['normal'],
            train_densities['noisy'],
            train_densities['adversarial'],
        )
    #print(entropy_z['normal'])
    #print(entropy_z['noisy'])
    #print(entropy_z['adversarial'])

    print('.......Densities............')
    for datatype in datatypes:
        print(datatype, ' Mean: ', densities_z[datatype].mean())

    ## Build detector
    ### dense, uncert, combine
    flags = ['dense', 'entropy', 'mc_entropy', 'mc_variance', 'combine']
    values = {}
    labels = {}
    lrs = {}
    for now_flag in flags:
        print('processing %s ...' % now_flag)
        tmp_values, tmp_labels, tmp_lr = train_lr(
            densities=(densities_z['adversarial'],
                       np.concatenate(
                           (densities_z['normal'], densities_z['noisy']))),
            entropy=(entropy_z['adversarial'],
                     np.concatenate(
                         (entropy_z['normal'], entropy_z['noisy']))),
            mc_entropy=(mc_entropy_z['adversarial'],
                        np.concatenate(
                            (mc_entropy_z['normal'], mc_entropy_z['noisy']))),
            mc_variance=(mc_variance_z['adversarial'],
                         np.concatenate((mc_variance_z['normal'],
                                         mc_variance_z['noisy']))),
            flag=now_flag)
        #densities_pos = densities_z['adversarial'],
        #densities_neg = np.concatenate((densities_z['normal'], densities_z['noisy'])),
        #uncerts_pos = uncerts_z['adversarial'],
        #uncerts_neg = np.concatenate((uncerts_z['normal'], uncerts_z['noisy'])),
        #flag = flag
        values[now_flag] = tmp_values
        labels[now_flag] = tmp_labels
        lrs[now_flag] = tmp_lr

    if args.do_test:
        test_values, test_labels, test_num = evaluate_test(
            args, model, kdes, datatypes, nb_size, flags)

    ## Evaluate detector
    ### evaluate on train dataset
    probs = {}
    for flag in flags:
        if args.do_test:
            probs[flag] = lrs[flag].predict_proba(test_values[flag])[:, 1]
        else:
            probs[flag] = lrs[flag].predict_proba(values[flag])[:, 1]
    # Compute AUC
    if args.do_test:
        n_samples = test_num
    else:
        n_samples = len(train_data)

    # The first 2/3 of 'probs' is the negative class (normal and noisy samples) and the last 1/3 is the positive class (adversarial samples).
    prob_datas = []
    for flag in flags:
        prob_datas.append(
            (probs[flag][:2 * n_samples], probs[flag][2 * n_samples:], flag))

    _, _, auc_score = compute_roc(prob_datas,
                                  plot=True,
                                  pic_name=args.pic_name)