Пример #1
0
def calculate_lid(datasets, model_path, sample_path, attack, k_nearest,
                  batch_size):
    """
    Load multiple characteristics for one dataset and one attack.
    :param dataset: 
    :param attack: 
    :param characteristics: 
    :return: 
    """
    # Load the model
    sess, preds, x, y, model, feed_dict = model_load(datasets, model_path)

    [X_test_adv_train, adv_image_files, real_labels, predicted_labels
     ] = utils.get_data_mutation_test("../datasets/experiment/" + datasets +
                                      "/" + attack + "/train")
    [X_test_adv_test, adv_image_files, real_labels, predicted_labels
     ] = utils.get_data_mutation_test("../datasets/experiment/" + datasets +
                                      "/" + attack + "/test")
    train_num = len(X_test_adv_train)
    test_num = len(X_test_adv_test)
    X_test_adv = preprocess_image_1(
        np.concatenate((np.asarray(X_test_adv_train),
                        np.asarray(X_test_adv_test))).astype('float32'))
    if len(X_test_adv.shape) < 4:
        X_test_adv = np.expand_dims(X_test_adv, axis=3)

    [X_test_train, adv_image_files, real_labels,
     predicted_labels] = utils.get_data_normal_test("../datasets/experiment/" +
                                                    datasets + "/normal/train")
    [X_test_test, adv_image_files, real_labels,
     predicted_labels] = utils.get_data_normal_test("../datasets/experiment/" +
                                                    datasets + "/normal/test")
    X_test_train = np.asarray(X_test_train)[np.random.choice(len(X_test_train),
                                                             train_num,
                                                             replace=False)]
    X_test_test = np.asarray(X_test_test)[np.random.choice(len(X_test_test),
                                                           test_num,
                                                           replace=False)]
    X_test = preprocess_image_1(
        np.concatenate((np.asarray(X_test_train),
                        np.asarray(X_test_test))).astype('float32'))
    if len(X_test.shape) < 4:
        X_test = np.expand_dims(X_test, axis=3)

    file_name = os.path.join('../detection/lid/',
                             "%s_%s.npy" % (datasets, attack))
    if not os.path.exists(file_name):
        # extract local intrinsic dimensionality
        characteristics, labels = get_lid(sess, x, model, feed_dict, X_test,
                                          X_test_adv, k_nearest, batch_size,
                                          datasets)
        data = np.concatenate((characteristics, labels), axis=1)
        np.save(file_name, data)
    return train_num
Пример #2
0
def jsma(datasets, sample_path, model_path='../models/integration/mnist'):
    """
    the Jacobian-based saliency map approach (JSMA)
    :param datasets
    :param sample: inputs to attack
    :param target: the class want to generate
    :param nb_classes: number of output classes
    :return:
    """
    sess, preds, x, y, model, feed_dict = model_load(datasets, model_path)

    ###########################################################################
    # Craft adversarial examples using the Jacobian-based saliency map approach
    ###########################################################################
    [X_test_adv, adv_image_files, real_labels,
     predicted_labels] = utils.get_data_mutation_test(sample_path)
    import os
    for i in range(len(adv_image_files)):
        temp = adv_image_files[i].split('_')[-4]
        if os.path.exists("../datasets/integration/batch_attack/cifar10/" +
                          str(temp) + '.png'):
            os.remove("../datasets/integration/batch_attack/cifar10/" +
                      str(temp) + '.png')
Пример #3
0
def detect_adv_samples(datasets, model_path, sample_path, store_path,
                       attack_type):
    print('Loading the data and model...')
    # Load the model
    sess, preds, x, y, model, feed_dict = model_load(datasets, model_path)

    # # Load the dataset
    if 'mnist' == datasets:
        train_start = 0
        train_end = 60000
        test_start = 0
        test_end = 10000

        # Get MNIST test data
        X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                      train_end=train_end,
                                                      test_start=test_start,
                                                      test_end=test_end)
    elif 'cifar10' == datasets:
        preprocess_image = preprocess_image_1
        train_start = 0
        train_end = 50000
        test_start = 0
        test_end = 10000

        # Get CIFAR10 test data
        X_train, Y_train, fn_train, X_test, Y_test, fn_test = data_cifar10(
            train_start=train_start,
            train_end=train_end,
            test_start=test_start,
            test_end=test_end,
            preprocess=preprocess_image)

    # # Refine the normal, noisy and adversarial sets to only include samples for
    # # which the original version was correctly classified by the model
    # preds_test = model_argmax(sess, x, preds, X_test, feed=feed_dict)
    # inds_correct = np.where(preds_test == Y_test.argmax(axis=1))[0]
    # X_test = X_test[inds_correct]
    # X_test = X_test[np.random.choice(len(X_test), 500)]#500
    #
    # # Check attack type, select adversarial and noisy samples accordingly
    # print('Loading adversarial samples...')
    # # Load adversarial samplesx
    # [X_test_adv, adv_image_files, real_labels, predicted_labels] = utils.get_data_mutation_test(sample_path)
    # X_test_adv = preprocess_image_1(np.asarray(X_test_adv).astype('float32'))
    # if len(X_test_adv.shape) < 4:
    #     X_test_adv = np.expand_dims(X_test_adv, axis=3)

    [X_test_adv_train, adv_image_files, real_labels, predicted_labels
     ] = utils.get_data_mutation_test("../datasets/experiment/" + datasets +
                                      "/" + attack_type + "/train")
    [X_test_adv_test, adv_image_files, real_labels, predicted_labels
     ] = utils.get_data_mutation_test("../datasets/experiment/" + datasets +
                                      "/" + attack_type + "/test")
    train_num = len(X_test_adv_train)
    test_num = len(X_test_adv_test)
    X_test_adv = preprocess_image_1(
        np.concatenate((np.asarray(X_test_adv_train),
                        np.asarray(X_test_adv_test))).astype('float32'))
    if len(X_test_adv.shape) < 4:
        X_test_adv = np.expand_dims(X_test_adv, axis=3)

    [X_test_train, adv_image_files, real_labels,
     predicted_labels] = utils.get_data_normal_test("../datasets/experiment/" +
                                                    datasets + "/normal/train")
    [X_test_test, adv_image_files, real_labels,
     predicted_labels] = utils.get_data_normal_test("../datasets/experiment/" +
                                                    datasets + "/normal/test")
    X_test_train = np.asarray(X_test_train)[np.random.choice(len(X_test_train),
                                                             train_num,
                                                             replace=False)]
    X_test_test = np.asarray(X_test_test)[np.random.choice(len(X_test_test),
                                                           test_num,
                                                           replace=False)]
    X_test = preprocess_image_1(
        np.concatenate((np.asarray(X_test_train),
                        np.asarray(X_test_test))).astype('float32'))
    if len(X_test.shape) < 4:
        X_test = np.expand_dims(X_test, axis=3)

    ## Get Bayesian uncertainty scores
    print('Getting Monte Carlo dropout variance predictions...')
    uncerts_normal = get_mc_predictions(sess, x, preds,
                                        X_test).var(axis=0).mean(axis=1)
    uncerts_adv = get_mc_predictions(sess, x, preds,
                                     X_test_adv).var(axis=0).mean(axis=1)

    ## Get KDE scores
    # Get deep feature representations
    print('Getting deep feature representations...')
    X_train_features = get_deep_representations(sess, x, X_train, model,
                                                feed_dict)
    X_test_normal_features = get_deep_representations(sess, x, X_test, model,
                                                      feed_dict)
    X_test_adv_features = get_deep_representations(sess, x, X_test_adv, model,
                                                   feed_dict)

    # Train one KDE per class
    print('Training KDEs...')
    class_inds = {}
    for i in range(Y_train.shape[1]):
        class_inds[i] = np.where(Y_train.argmax(axis=1) == i)[0]
    kdes = {}
    warnings.warn(
        "Using pre-set kernel bandwidths that were determined "
        "optimal for the specific CNN models of the paper. If you've "
        "changed your model, you'll need to re-optimize the "
        "bandwidth.")
    for i in range(Y_train.shape[1]):
        kdes[i] = KernelDensity(kernel='gaussian',
                                bandwidth=BANDWIDTHS[datasets]) \
            .fit(X_train_features[class_inds[i]])

    # Get model predictions
    print('Computing model predictions...')
    preds_test_normal = model_argmax(sess, x, preds, X_test, feed=feed_dict)
    preds_test_adv = model_argmax(sess, x, preds, X_test_adv, feed=feed_dict)

    # Get density estimates
    print('computing densities...')
    densities_normal = score_samples(kdes, X_test_normal_features,
                                     preds_test_normal)
    densities_adv = score_samples(kdes, X_test_adv_features, preds_test_adv)

    uncerts_pos = uncerts_adv[:]
    uncerts_neg = uncerts_normal[:]
    characteristics, labels = merge_and_generate_labels(
        uncerts_pos, uncerts_neg)
    file_name = os.path.join('../detection/bu/',
                             "%s_%s.npy" % (datasets, attack_type))
    data = np.concatenate((characteristics, labels), axis=1)
    np.save(file_name, data)

    densities_pos = densities_adv[:]
    densities_neg = densities_normal[:]
    characteristics, labels = merge_and_generate_labels(
        densities_pos, densities_neg)
    file_name = os.path.join(
        '../detection/de/',
        "%s_%s_%.4f.npy" % (datasets, attack_type, BANDWIDTHS[datasets]))
    data = np.concatenate((characteristics, labels), axis=1)
    np.save(file_name, data)

    ## Z-score the uncertainty and density values
    uncerts_normal_z, uncerts_adv_z = normalize(uncerts_normal, uncerts_adv)
    densities_normal_z, densities_adv_z = normalize(densities_normal,
                                                    densities_adv)

    ## Build detector
    values, labels = features(densities_pos=densities_adv_z,
                              densities_neg=densities_normal_z,
                              uncerts_pos=uncerts_adv_z,
                              uncerts_neg=uncerts_normal_z)
    X_tr, Y_tr, X_te, Y_te = block_split(values, labels, train_num)

    lr = train_lr(X_tr, Y_tr)

    ## Evaluate detector
    # Compute logistic regression model predictions
    probs = lr.predict_proba(X_te)[:, 1]
    preds = lr.predict(X_te)
    # Compute AUC
    n_samples = int(len(X_te) / 2)
    # The first 2/3 of 'probs' is the negative class (normal and noisy samples),
    # and the last 1/3 is the positive class (adversarial samples).
    _, _, auc_score = compute_roc(probs_neg=probs[:n_samples],
                                  probs_pos=probs[n_samples:])

    precision = precision_score(Y_te, preds)
    recall = recall_score(Y_te, preds)

    y_label_pred = lr.predict(X_te)
    acc = accuracy_score(Y_te, y_label_pred)

    print(
        'Detector ROC-AUC score: %0.4f, accuracy: %.4f, precision: %.4f, recall: %.4f'
        % (auc_score, acc, precision, recall))
Пример #4
0
def prepare_datasets(datasets, model_path, attack_type, sample_path):
    print('Loading the data and model...')
    # Load the model
    sess, preds, x, y, model, feed_dict = model_load(datasets, model_path)
    # Load the dataset
    if 'mnist' == datasets:
        train_start = 0
        train_end = 60000
        test_start = 0
        test_end = 10000

        # Get MNIST test data
        X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                      train_end=train_end,
                                                      test_start=test_start,
                                                      test_end=test_end)
    elif 'cifar10' == datasets:
        preprocess_image = preprocess_image_1
        train_start = 0
        train_end = 50000
        test_start = 0
        test_end = 10000

        # Get CIFAR10 test data
        X_train, Y_train, fn_train, X_test, Y_test, fn_test = data_cifar10(
            train_start=train_start,
            train_end=train_end,
            test_start=test_start,
            test_end=test_end,
            preprocess=preprocess_image)

    if attack_type == "normal":
        # Refine the normal, noisy and adversarial sets to only include samples for
        # which the original version was correctly classified by the model
        preds_test = np.asarray([])
        for i in range(40):
            preds_test = np.concatenate(
                (preds_test,
                 model_argmax(sess,
                              x,
                              preds,
                              X_test[i * 250:(i + 1) * 250],
                              feed=feed_dict)))
        inds_correct = np.asarray(
            np.where(preds_test == Y_test.argmax(axis=1))[0])
        inds_correct = inds_correct[np.random.choice(len(inds_correct),
                                                     5000,
                                                     replace=False)]
        X_test = X_test[inds_correct]
        for i in range(4000):
            imsave(
                "../datasets/experiment/" + datasets + "/normal/train/" +
                str(inds_correct[i]) + '_' +
                str(int(preds_test[inds_correct[i]])) + '_' +
                str(int(preds_test[inds_correct[i]])) + '_.png',
                deprocess_image_1(X_test[i:i + 1]))
        for j in range(1000):
            imsave(
                "../datasets/experiment/" + datasets + "/normal/test/" +
                str(inds_correct[4000 + j]) + '_' +
                str(int(preds_test[inds_correct[4000 + j]])) + '_' +
                str(int(preds_test[inds_correct[4000 + j]])) + '_.png',
                deprocess_image_1(X_test[4000 + j:4001 + j]))
    elif attack_type == "error":
        preds_test = np.asarray([])
        for i in range(40):
            preds_test = np.concatenate(
                (preds_test,
                 model_argmax(sess,
                              x,
                              preds,
                              X_test[i * 250:(i + 1) * 250],
                              feed=feed_dict)))
        inds_correct = np.asarray(
            np.where(preds_test != Y_test.argmax(axis=1))[0])
        X_test = X_test[inds_correct]
        num = int(len(X_test) * 0.8)
        for i in range(num):
            imsave(
                "../datasets/experiment/" + datasets + "/error/train/" +
                str(inds_correct[i]) + '_' +
                str(int(np.argmax(Y_test[inds_correct[i]]))) + '_' +
                str(int(preds_test[inds_correct[i]])) + '_.png',
                deprocess_image_1(X_test[i:i + 1]))
        for j in range(len(X_test) - num):
            imsave(
                "../datasets/experiment/" + datasets + "/error/test/" +
                str(inds_correct[num + j]) + '_' +
                str(int(np.argmax(Y_test[inds_correct[num + j]]))) + '_' +
                str(int(preds_test[inds_correct[num + j]])) + '_.png',
                deprocess_image_1(X_test[num + j:num + 1 + j]))
    else:
        # Check attack type, select adversarial and noisy samples accordingly
        print('Loading adversarial samples...')
        # Load adversarial samplesx
        [X_test_adv, adv_image_files, real_labels, predicted_labels
         ] = utils.get_data_mutation_test(sample_path + attack_type + '/' +
                                          datasets)
        if len(X_test_adv) > 5000:
            index = np.asarray(range(len(X_test_adv)))
            index = index[np.random.choice(len(index), 5000, replace=False)]
            for i in range(4000):
                imsave(
                    "../datasets/experiment/" + datasets + "/" + attack_type +
                    "/train/" + adv_image_files[index[i]],
                    X_test_adv[index[i]])
            for j in range(1000):
                imsave(
                    "../datasets/experiment/" + datasets + "/" + attack_type +
                    "/test/" + adv_image_files[index[4000 + j]],
                    X_test_adv[index[4000 + j]])
        else:
            index = np.asarray(range(len(X_test_adv)))
            np.random.shuffle(index)
            cut = int(len(X_test_adv) * 0.8)
            for i in range(len(index)):
                if i < cut:
                    imsave(
                        "../datasets/experiment/" + datasets + "/" +
                        attack_type + "/train/" + adv_image_files[index[i]],
                        X_test_adv[index[i]])
                else:
                    imsave(
                        "../datasets/experiment/" + datasets + "/" +
                        attack_type + "/test/" + adv_image_files[index[i]],
                        X_test_adv[index[i]])
Пример #5
0
def directory_detect(datasets, dir_path, normal, store_path, ad, sess, preds, x, feed_dict):

    print('--- Extracting images from: ', dir_path)
    if normal:
        [adv_image_list, adv_image_files, real_labels, predicted_labels] = utils.get_normal_data_mutation_test(dir_path)
    else:
        [adv_image_list, adv_image_files, real_labels, predicted_labels] = utils.get_data_mutation_test(dir_path)
    adv_count = 0
    not_decided_images = 0
    total_mutation_counts = []
    label_change_mutation_counts = []
    suc_total_mutation_counts = []
    suc_label_change_mutation_counts = []

    print('--- Evaluating inputs ---')

    if not os.path.exists(store_path):
        os.makedirs(store_path)
    detector_results = []
    summary_results = []
    for i in range(len(adv_image_list)):
        # # print('- Running image ', i)
        ori_img = preprocess_image_1(adv_image_list[i].astype('float32'))

        orig_label = predicted_labels[i]
        [result, decided, total_mutation_count, label_change_mutation_count] = ad.detect(ori_img, orig_label, sess, x,
                                                                                         preds, feed_dict)

        detector_results.append(adv_image_files[i] + ',' + str(result) + ',' + str(decided) + ',' + str(total_mutation_count) + ',' + str(label_change_mutation_count))

        if result:
            adv_count += 1
            if not normal: # Record the counts for adversaries
                suc_total_mutation_counts.append(total_mutation_count)
                suc_label_change_mutation_counts.append(label_change_mutation_count)

        if normal and not result: # Record the counts for normals
            suc_total_mutation_counts.append(total_mutation_count)
            suc_label_change_mutation_counts.append(label_change_mutation_count)

        if not decided:
            not_decided_images += 1

        total_mutation_counts.append(total_mutation_count)
        label_change_mutation_counts.append(label_change_mutation_count)

    with open(store_path + "/detection_result.csv", "w") as f:
        for item in detector_results:
            f.write("%s\n" % item)

    summary_results.append('adv_num,' + str(len(adv_image_list)))
    summary_results.append('identified_num,' + str(adv_count))
    summary_results.append('undecided_num,' + str(not_decided_images))

    if normal:
        summary_results.append('accuracy,' + str(1 - float(adv_count)/len(total_mutation_counts)))
    else:
        summary_results.append('accuracy,' + str(float(adv_count)/len(total_mutation_counts)))

    if len(suc_label_change_mutation_counts) > 0 and not normal:
        summary_results.append(
            'avg_mutation_num,' + str(sum(suc_total_mutation_counts) / len(suc_total_mutation_counts)))
        summary_results.append(
            'avg_lc_num,' + str(float(sum(suc_label_change_mutation_counts)) / len(suc_label_change_mutation_counts)))

    if len(suc_label_change_mutation_counts) > 0 and normal:
        summary_results.append(
            'avg_mutation_num,' + str(sum(suc_total_mutation_counts) / len(suc_total_mutation_counts)))
        summary_results.append(
            'avg_lc_num,' + str(float(sum(suc_label_change_mutation_counts)) / len(suc_label_change_mutation_counts)))

    summary_results.append(total_mutation_counts)
    summary_results.append(label_change_mutation_counts)

    with open(store_path + "/detection_summary_result.csv", "w") as f:
        for item in summary_results:
            f.write("%s\n" % item)

    print('- Total adversary images evaluated: ', len(adv_image_list))
    print('- Identified adversaries: ', adv_count)
    print('- Not decided images: ', not_decided_images)
    if len(suc_label_change_mutation_counts) > 0:
        print('- Average mutation needed: ', sum(suc_total_mutation_counts) / len(suc_total_mutation_counts))
        print('- Average label change mutations: ',
              float(sum(suc_label_change_mutation_counts)) / len(suc_label_change_mutation_counts))
    else:
        summary_results.append(
            'avg_mutation_num,' + str(sum(total_mutation_counts) / len(total_mutation_counts)))
        summary_results.append(
            'avg_lc_num,' + str(float(sum(label_change_mutation_counts)) / len(label_change_mutation_counts)))