Пример #1
0
def main():
    parser = argparse.ArgumentParser(description='Arguments')
    parser.add_argument('--model-type',
                        '-m',
                        choices=['mnist', 'cifar10', 'svhn'],
                        default='mnist',
                        help='model type or name of the dataset')
    parser.add_argument('--detection-method',
                        '--dm',
                        choices=DETECTION_METHODS,
                        default='proposed',
                        help="Detection method to run. Choices are: {}".format(
                            ', '.join(DETECTION_METHODS)))
    parser.add_argument(
        '--fixed-dimension',
        '--fd',
        type=int,
        default=0,
        help=
        'Use this option to project the layer embeddings to a fixed dimension, if a layer '
        'dimension exceeds this value. Zero or a negative value disables this option.'
    )
    parser.add_argument('--batch-size',
                        '-b',
                        type=int,
                        default=256,
                        help='batch size for the data loader')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')
    parser.add_argument('--seed',
                        '-s',
                        type=int,
                        default=1,
                        help='random seed (default: 1)')
    parser.add_argument(
        '--n-jobs',
        type=int,
        default=8,
        help='number of parallel jobs to use for multiprocessing')
    parser.add_argument('--gpu',
                        type=str,
                        default='2',
                        help='gpus to execute code on')
    parser.add_argument('--output-dir',
                        '-o',
                        type=str,
                        default='',
                        help='output directory path')
    args = parser.parse_args()

    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
    use_cuda = not args.no_cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)
    device = torch.device("cuda" if use_cuda else "cpu")
    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
    n_jobs = get_num_jobs(args.n_jobs)

    # Output directory
    if args.output_dir:
        output_dir = args.output_dir
    else:
        output_dir = os.path.join(ROOT, 'outputs', args.model_type)

    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    data_path = os.path.join(ROOT, 'data')
    if args.model_type == 'mnist':
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(*NORMALIZE_IMAGES['mnist'])
        ])
        train_loader = torch.utils.data.DataLoader(datasets.MNIST(
            data_path, train=True, download=True, transform=transform),
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   **kwargs)
        test_loader = torch.utils.data.DataLoader(datasets.MNIST(
            data_path, train=False, download=True, transform=transform),
                                                  batch_size=args.batch_size,
                                                  shuffle=True,
                                                  **kwargs)
        model = MNIST().to(device)
        num_classes = 10

    elif args.model_type == 'cifar10':
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(*NORMALIZE_IMAGES['cifar10'])
        ])
        trainset = datasets.CIFAR10(root=data_path,
                                    train=True,
                                    download=True,
                                    transform=transform)
        train_loader = torch.utils.data.DataLoader(trainset,
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   **kwargs)
        testset = datasets.CIFAR10(root=data_path,
                                   train=False,
                                   download=True,
                                   transform=transform)
        test_loader = torch.utils.data.DataLoader(testset,
                                                  batch_size=args.batch_size,
                                                  shuffle=True,
                                                  **kwargs)
        model = ResNet34().to(device)
        num_classes = 10

    elif args.model_type == 'svhn':
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(*NORMALIZE_IMAGES['svhn'])
        ])
        trainset = datasets.SVHN(root=data_path,
                                 split='train',
                                 download=True,
                                 transform=transform)
        train_loader = torch.utils.data.DataLoader(trainset,
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   **kwargs)
        testset = datasets.SVHN(root=data_path,
                                split='test',
                                download=True,
                                transform=transform)
        test_loader = torch.utils.data.DataLoader(testset,
                                                  batch_size=args.batch_size,
                                                  shuffle=True,
                                                  **kwargs)
        model = SVHN().to(device)
        num_classes = 10

    else:
        raise ValueError("'{}' is not a valid model type".format(
            args.model_type))

    # Load the saved model checkpoint and set it to eval mode
    model = load_model_checkpoint(model, args.model_type)
    model.eval()

    # Get the feature embeddings from all the layers and the labels
    print("Calculating layer embeddings for the train data:")
    embeddings, labels, labels_pred, counts = extract_layer_embeddings(
        model, device, train_loader, method=args.detection_method)
    print("\nCalculating layer embeddings for the test data:")
    _, labels_test, labels_pred_test, counts_test = extract_layer_embeddings(
        model, device, test_loader, method=args.detection_method)
    accu_test = np.sum(labels_test == labels_pred_test) / float(
        labels_test.shape[0])
    print("\nTest set accuracy = {:.4f}".format(accu_test))

    ns = labels.shape[0]
    if ns > MAX_SAMPLES_DIM_REDUCTION:
        # Take a random class-stratified subsample of the data for intrinsic dimension estimation and
        # dimensionality reduction
        sss = StratifiedShuffleSplit(n_splits=1,
                                     test_size=MAX_SAMPLES_DIM_REDUCTION,
                                     random_state=args.seed)
        temp = np.zeros((ns, 2))  # placeholder data array
        _, indices_sample = next(sss.split(temp, labels))
    else:
        indices_sample = np.arange(ns)

    if args.fixed_dimension < 1:
        output_file = os.path.join(output_dir, 'output_layer_extraction.txt')
        model_file = os.path.join(output_dir, 'models_dimension_reduction.pkl')
        # Search for the best number of dimensions and number of neighbors and save the corresponding projection model
        search_dimension_and_neighbors(embeddings, labels, indices_sample,
                                       model_file, output_file, n_jobs)

    else:
        if args.detection_method in ['lid', 'lid_class_cond']:
            # This method uses a different (larger) number of layer embeddings
            output_file = os.path.join(
                output_dir, "output_fixed_dimension_{:d}_lid.txt".format(
                    args.fixed_dimension))
            model_file = os.path.join(
                output_dir, "models_fixed_dimension_{:d}_lid.pkl".format(
                    args.fixed_dimension))
        else:
            output_file = os.path.join(
                output_dir,
                "output_fixed_dimension_{:d}.txt".format(args.fixed_dimension))
            model_file = os.path.join(
                output_dir,
                "models_fixed_dimension_{:d}.pkl".format(args.fixed_dimension))

        # Project the embeddings from each layer to the specified fixed dimension, if it exceeds the fixed dimension
        project_fixed_dimension(embeddings, labels, args.fixed_dimension,
                                indices_sample, model_file, output_file,
                                n_jobs)
def main():
    # Training settings
    parser = argparse.ArgumentParser()
    parser.add_argument('--batch-size',
                        type=int,
                        default=64,
                        help='batch size of evaluation')
    parser.add_argument(
        '--max-num-adver',
        type=int,
        default=-1,
        help=
        'Maximum number of adversarial samples to generate. If set to the default of -1, it '
        'attempts to generate adversarial samples for every test fold sample.')
    parser.add_argument('--model-type',
                        '-m',
                        choices=['mnist', 'cifar10', 'svhn'],
                        default='mnist',
                        help='model type or name of the dataset')
    parser.add_argument(
        '--output-dir',
        '-o',
        default='',
        help='directory path for saving the output and model files')
    parser.add_argument('--seed',
                        '-s',
                        type=int,
                        default=SEED_DEFAULT,
                        help='seed for random number generation')
    parser.add_argument(
        '--generate-attacks',
        type=bool,
        default=True,
        help='should attack samples be generated/not (default:True)')
    parser.add_argument('--gpu',
                        type=str,
                        default="3",
                        help='which gpus to execute code on')
    parser.add_argument(
        '--defense-method',
        '--dm',
        choices=['dknn', 'proposed', 'dnn'],
        default='proposed',
        help=
        "Defense method to attack. Choices are 'dnn', 'dknn' and 'proposed'")
    parser.add_argument(
        '--det-model-file',
        '--dmf',
        default='',
        help=
        'Path to the saved detector model file. Loads from a default location of not specified.'
    )
    parser.add_argument('--dist-metric',
                        choices=['euclidean', 'cosine'],
                        default='euclidean',
                        help='distance metric to use')
    parser.add_argument(
        '--n-jobs',
        type=int,
        default=16,
        help='number of parallel jobs to use for multiprocessing')
    parser.add_argument(
        '--untargeted',
        action='store_true',
        default=False,
        help=
        'Use this option to create untargeted adversarial samples from this attack'
    )
    parser.add_argument(
        '--skip-save-batches',
        action='store_true',
        default=False,
        help=
        'Use this option to skip saving the intermediate data batches to numpy files. '
        'This will shave off some time and avoid frequent I/O')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='Disables CUDA training')
    parser.add_argument('--test-batch-size',
                        '--tb',
                        type=int,
                        default=1000,
                        metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--num-folds',
                        '--nf',
                        type=int,
                        default=CROSS_VAL_SIZE,
                        help='number of cross-validation folds')
    '''
    parser.add_argument('--stepsize', type=float, default=0.001, help='stepsize')
    parser.add_argument('--max-iterations', type=int, default=1000, help='max num. of iterations')
    '''
    args = parser.parse_args()

    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
    use_cuda = not args.no_cuda and torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

    # Output directory
    if not args.output_dir:
        output_dir = os.path.join(ROOT, 'numpy_data', args.model_type)
    else:
        output_dir = args.output_dir

    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    num_folds = args.num_folds
    data_path = os.path.join(ROOT, 'data')
    if args.model_type == 'mnist':
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(*NORMALIZE_IMAGES['mnist'])
        ])
        test_loader = torch.utils.data.DataLoader(
            datasets.MNIST(data_path,
                           train=False,
                           download=True,
                           transform=transform),
            batch_size=args.test_batch_size,
            shuffle=False,
            **kwargs)
        model = MNIST().to(device)
        model = load_model_checkpoint(model, args.model_type)
        num_classes = 10

    elif args.model_type == 'cifar10':
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(*NORMALIZE_IMAGES['cifar10'])
        ])
        testset = datasets.CIFAR10(root=data_path,
                                   train=False,
                                   download=True,
                                   transform=transform)
        test_loader = torch.utils.data.DataLoader(
            testset, batch_size=args.test_batch_size, shuffle=False, **kwargs)
        num_classes = 10
        model = ResNet34().to(device)
        model = load_model_checkpoint(model, args.model_type)

    elif args.model_type == 'svhn':
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(*NORMALIZE_IMAGES['svhn'])
        ])
        testset = datasets.SVHN(root=data_path,
                                split='test',
                                download=True,
                                transform=transform)
        test_loader = torch.utils.data.DataLoader(
            testset, batch_size=args.test_batch_size, shuffle=False, **kwargs)
        num_classes = 10
        model = SVHN().to(device)
        model = load_model_checkpoint(model, args.model_type)

    else:
        raise ValueError("'{}' is not a valid model type".format(
            args.model_type))

    # Set model to evaluation mode
    model.eval()

    # convert the test data loader to 2 ndarrays
    data, labels = get_samples_as_ndarray(test_loader)

    # Get the range of values in the data array
    bounds = get_data_bounds(data)
    print("Range of data values: ({:.4f}, {:.4f})\n".format(*bounds))

    # verify if the data loader is the same as the ndarrays it generates
    if not verify_data_loader(test_loader, batch_size=args.test_batch_size):
        raise ValueError("Data loader verification failed")

    # Path to the detection model file
    det_model_file = ''
    if args.det_model_file:
        det_model_file = args.det_model_file
    else:
        if args.defense_method != 'dnn':
            # default path the the saved detection model file
            det_model_file = os.path.join(
                ROOT, 'outputs', args.model_type, 'detection', CUSTOM_ATTACK,
                'models_{}.pkl'.format(args.defense_method))

    print("Defense method: {}".format(args.defense_method))
    if det_model_file:
        print("Loading saved detection models from the file: {}".format(
            det_model_file))
        # Load the detection models (from each cross-validation fold) from a pickle file.
        # `models_detec` will be a list of trained detection models from each fold
        with open(det_model_file, 'rb') as fp:
            models_detec = pickle.load(fp)
    else:
        models_detec = [None] * num_folds

    global models_detec_propo, models_detec_dknn
    # Detection models for the dknn method. Used for comparison
    fname = os.path.join(ROOT, 'outputs', args.model_type, 'detection',
                         CUSTOM_ATTACK, 'models_dknn.pkl')
    with open(fname, 'rb') as fp:
        models_detec_dknn = pickle.load(fp)

    # Detection models for the proposed method. Used for comparison
    fname = os.path.join(ROOT, 'outputs', args.model_type, 'detection',
                         CUSTOM_ATTACK, 'models_proposed.pkl')
    with open(fname, 'rb') as fp:
        models_detec_propo = pickle.load(fp)

    if args.max_num_adver > 0:
        max_num_adver = args.max_num_adver // num_folds
    else:
        max_num_adver = args.max_num_adver

    # repeat for each fold in the cross-validation split
    skf = StratifiedKFold(n_splits=num_folds,
                          shuffle=True,
                          random_state=args.seed)
    i = 1
    for ind_tr, ind_te in skf.split(data, labels):
        t_init = time.time()
        data_tr = data[ind_tr, :]
        labels_tr = labels[ind_tr]
        data_te = data[ind_te, :]
        labels_te = labels[ind_te]

        # Set number of nearest neighbors based on the data size and the neighborhood constant
        n_neighbors = int(np.ceil(labels_tr.shape[0]**NEIGHBORHOOD_CONST))
        print("\nProcessing fold {:d}".format(i))
        print("Number of nearest neighbors = {:d}".format(n_neighbors))

        # make dir based on fold to save data
        numpy_save_path = os.path.join(output_dir, "fold_" + str(i))
        if not os.path.isdir(numpy_save_path):
            os.makedirs(numpy_save_path)

        # save train fold to numpy_save_path or load if it exists already
        if not os.path.isfile(os.path.join(numpy_save_path, 'data_tr.npy')):
            np.save(os.path.join(numpy_save_path, 'data_tr.npy'), data_tr)
        else:
            data_tr = np.load(os.path.join(numpy_save_path, "data_tr.npy"))

        if not os.path.isfile(os.path.join(numpy_save_path, 'labels_tr.npy')):
            np.save(os.path.join(numpy_save_path, 'labels_tr.npy'), labels_tr)
        else:
            labels_tr = np.load(os.path.join(numpy_save_path, "labels_tr.npy"))

        # save test fold to numpy_save_path or load if it exists already
        if not os.path.isfile(os.path.join(numpy_save_path, 'data_te.npy')):
            np.save(os.path.join(numpy_save_path, 'data_te.npy'), data_te)
        else:
            data_te = np.load(os.path.join(numpy_save_path, "data_te.npy"))

        if not os.path.isfile(os.path.join(numpy_save_path, 'labels_te.npy')):
            np.save(os.path.join(numpy_save_path, 'labels_te.npy'), labels_te)
        else:
            labels_te = np.load(os.path.join(numpy_save_path, "labels_te.npy"))

        if args.generate_attacks:
            # print(data_tr.shape, labels_tr.shape)
            adv_save_path = os.path.join(output_dir, 'fold_{}'.format(i),
                                         CUSTOM_ATTACK)
            if not os.path.isdir(adv_save_path):
                os.makedirs(adv_save_path)

            n_test = labels_te.shape[0]
            n_train = labels_tr.shape[0]
            if n_train > MAX_NUM_REPS:
                # Select a random, class-stratified sample from the training data of size `MAX_NUM_REPS`.
                # This is done to speed-up the attack optimization
                sss = StratifiedShuffleSplit(n_splits=1,
                                             test_size=MAX_NUM_REPS,
                                             random_state=args.seed)
                _, ind_sample = next(sss.split(data_tr, labels_tr))
                data_tr_sample = data_tr[ind_sample, :]
                labels_tr_sample = labels_tr[ind_sample]
                print(
                    "\nRandomly sampling the train split from {:d} to {:d} samples"
                    .format(n_train, MAX_NUM_REPS))
            else:
                data_tr_sample = data_tr
                labels_tr_sample = labels_tr

            # Data loader for the train and test split
            train_fold_loader = convert_to_loader(data_tr_sample,
                                                  labels_tr_sample,
                                                  batch_size=args.batch_size,
                                                  custom=False)
            test_fold_loader = convert_to_loader(data_te,
                                                 labels_te,
                                                 batch_size=args.batch_size,
                                                 custom=False)
            # Extract the layer embeddings for samples from the train and test split
            layer_embeddings_train, _, _, _ = extract_layer_embeddings_numpy(
                model, device, train_fold_loader, method='proposed')
            layer_embeddings_test, _, labels_pred_dnn_test, _ = extract_layer_embeddings_numpy(
                model, device, test_fold_loader, method='proposed')
            # Calculate accuracy of the DNN and the detection methods on clean data
            accu_clean_dnn, accu_clean_propo, accu_clean_dknn = helper_accuracy(
                layer_embeddings_test, labels_pred_dnn_test, labels_te, i - 1)
            print(
                "Accuracy on clean data:\nDNN classifier: {:.4f}, proposed: {:.4f}, dknn: {:.4f}"
                .format(accu_clean_dnn, accu_clean_propo, accu_clean_dknn))

            # Load kernel sigma values from file if available
            sigma_filename = os.path.join(
                adv_save_path, 'kernel_sigma_{}.npy'.format(args.dist_metric))
            if os.path.isfile(sigma_filename):
                sigma_per_layer = np.load(sigma_filename)
            else:
                # Search for suitable kernel scale per layer.
                # `sigma_per_layer` should be a numpy array of size `(data_te.shape[0], n_layers)`
                print(
                    "Setting the kernel scale values for the test fold data.")
                sigma_per_layer = knn_attack.set_kernel_scale(
                    layer_embeddings_train,
                    layer_embeddings_test,
                    metric=args.dist_metric,
                    n_neighbors=n_neighbors,
                    n_jobs=args.n_jobs)
                np.save(sigma_filename, sigma_per_layer)

            del test_fold_loader, layer_embeddings_train, layer_embeddings_test
            # numpy array to torch tensor
            sigma_per_layer = torch.from_numpy(sigma_per_layer).to(device)
            # Index of samples from each class in `labels_tr_sample`
            labels_uniq = np.unique(labels_tr_sample)
            indices_per_class = {
                c: np.where(labels_tr_sample == c)[0]
                for c in labels_uniq
            }

            # `layer_embeddings_per_class_train` contains the layer wise embeddings corresponding to each class
            # from the `train_fold_loader`. It is a dict mapping each class to a list of torch tensors per layer
            layer_embeddings_per_class_train = knn_attack.extract_layer_embeddings(
                model,
                device,
                train_fold_loader,
                indices_per_class,
                split_by_class=True)
            if max_num_adver > 0:
                max_num_adver_fold = min(max_num_adver, n_test)
            else:
                max_num_adver_fold = n_test

            print(
                "Creating adversarial samples from the test fold. Maximum number of adversarial samples: {:d}"
                .format(max_num_adver_fold))
            # Recreating the test fold loader with `custom = True` in order to get the sample indices.
            test_fold_loader = convert_to_loader(data_te,
                                                 labels_te,
                                                 batch_size=args.batch_size,
                                                 custom=True,
                                                 shuffle=True)
            data_adver = []
            labels_adver = []
            data_clean = []
            labels_clean = []
            norm_perturb = []
            is_correct = []
            is_adver = []
            n_batches = len(test_fold_loader)
            n_adver_curr = 0
            for batch_idx, (data_temp, labels_temp,
                            index_temp) in enumerate(test_fold_loader,
                                                     start=1):
                print("Batch {:d}/{:d}".format(batch_idx, n_batches))
                index_temp = index_temp.cpu().numpy()
                # data_batch_excl = np.delete(data_te, index_temp, axis=0)
                # labels_batch_excl = np.delete(labels_te, index_temp, axis=0)
                # main attack function
                labels_pred_temp = labels_pred_dnn_test[index_temp]
                data_adver_batch, labels_adver_batch, norm_perturb_batch, is_correct_batch, is_adver_batch = \
                    knn_attack.attack(
                        model, device, data_temp.to(device), labels_temp, labels_pred_temp,
                        layer_embeddings_per_class_train, labels_uniq, sigma_per_layer[index_temp, :],
                        model_detector=models_detec[i - 1], untargeted=args.untargeted,
                        dist_metric=args.dist_metric, fast_mode=True, verbose=True
                )
                # all returned outputs are numpy arrays
                # accumulate results from this batch
                data_adver.append(data_adver_batch)
                labels_adver.append(labels_adver_batch)
                data_clean.append(data_temp.detach().cpu().numpy())
                labels_clean.append(labels_temp.detach().cpu().numpy())
                norm_perturb.append(norm_perturb_batch)
                is_correct.append(is_correct_batch)
                is_adver.append(is_adver_batch)
                if not args.skip_save_batches:
                    # combine data from the batches so far and save them to numpy files
                    _ = combine_and_save(adv_save_path, data_adver,
                                         labels_adver, data_clean,
                                         labels_clean, norm_perturb,
                                         is_correct, is_adver, labels_te)
                    print("Saved data up to batch {:d}".format(batch_idx))

                n_adver_curr += is_adver_batch[is_adver_batch].shape[0]
                if n_adver_curr >= max_num_adver_fold:
                    print(
                        "Found {:d} adversarial samples from {:d} data batches"
                        .format(n_adver_curr, batch_idx))
                    break

            del test_fold_loader
            # combine data from the batches and save them to numpy files
            data_adver, labels_adver, data_clean, labels_clean, norm_perturb, is_correct, is_adver = \
                combine_and_save(adv_save_path, data_adver, labels_adver, data_clean, labels_clean, norm_perturb,
                                 is_correct, is_adver, labels_te)

            # Calculate accuracy of the DNN and the detection methods on adversarial inputs
            data_loader = convert_to_loader(data_adver,
                                            labels_clean,
                                            batch_size=args.batch_size)
            layer_embeddings, _, labels_pred_dnn, _ = extract_layer_embeddings_numpy(
                model, device, data_loader, method='proposed')
            del data_loader
            accu_dnn, accu_propo, accu_dknn = helper_accuracy(
                layer_embeddings, labels_pred_dnn, labels_clean, i - 1)
            n_adver = is_adver[is_adver].shape[0]
            print(
                "\nTest fold {:d}: #samples = {:d}, #adversarial samples = {:d}, avg. perturbation norm = {:.6f}"
                .format(i, n_test, n_adver, np.mean(norm_perturb[is_adver])))
            print(
                "Accuracy on clean and adversarial data from test fold {:d}:".
                format(i))
            print("method\t{}\t{}".format('accu. clean', 'accu. adver'))
            print("{}\t{:.4f}\t{:.4f}".format('DNN', accu_clean_dnn, accu_dnn))
            print("{}\t{:.4f}\t{:.4f}".format('proposed', accu_clean_propo,
                                              accu_propo))
            print("{}\t{:.4f}\t{:.4f}".format('dknn', accu_clean_dknn,
                                              accu_dknn))
            t_del = (time.time() - t_init) / 3600.
            print("\nTime taken for fold {:d}: {:.2f} hours".format(i, t_del))
        else:
            print("generated original data split for fold : ", i)

        i = i + 1
Пример #3
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='Arguments')
    parser.add_argument('--model-type', '-m', choices=['mnist', 'cifar10', 'svhn'], default='cifar10',
                        help='model type or name of the dataset')
    parser.add_argument('--batch-size', '-b', type=int, default=64, metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--test-batch-size', '--tb', type=int, default=1000, metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs', '-e', type=int, default=100, metavar='N',
                        help='number of epochs to train (default: 10)')
    parser.add_argument('--lr', type=float, default=1.0, metavar='LR', help='learning rate (default: 1.0)')
    parser.add_argument('--gamma', '-g', type=float, default=0.7, metavar='M',
                        help='Learning rate step gamma (default: 0.7)')
    parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training')
    parser.add_argument('--seed', '-s', type=int, default=1, metavar='S', help='random seed (default: 1)')
    parser.add_argument('--log-interval', type=int, default=100, metavar='N',
                        help='number of batches to wait before logging training status')
    parser.add_argument('--save-model', action='store_true', default=False, help='For Saving the current Model')
    parser.add_argument('--adv-attack', '--aa', choices=['FGSM', 'PGD', 'CW'], default='FGSM',
                        help='type of adversarial attack')
    parser.add_argument('--attack', action='store_true', default=True, help='option to launch adversarial attack')
    parser.add_argument('--p-norm', '-p', choices=['2', 'inf'], default='inf',
                        help="p norm for the adversarial attack; options are '2' and 'inf'")
    parser.add_argument('--train', '-t', action='store_true', default=False, help='commence training')
    parser.add_argument('--ckpt', action='store_true', default=True, help='Use the saved model checkpoint')
    parser.add_argument('--gpu', type=str, default='2', help='gpus to execute code on')
    args = parser.parse_args()
    use_cuda = not args.no_cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)

    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
    
    device = torch.device("cuda" if use_cuda else "cpu")
    
    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

    data_path = os.path.join(ROOT, 'data')
    criterion = None
    # ToDo: Verify if the bounds is accurate; needed for adv. example generation
    if args.model_type == 'mnist':
        transform = transforms.Compose(
            [transforms.ToTensor(),
             transforms.Normalize(*NORMALIZE_IMAGES['mnist'])]
        )
        train_loader = torch.utils.data.DataLoader(
            datasets.MNIST(data_path, train=True, download=True, transform=transform),
            batch_size=args.batch_size, shuffle=True, **kwargs
        )
        test_loader = torch.utils.data.DataLoader(
            datasets.MNIST(data_path, train=False, download=True, transform=transform),
            batch_size=args.test_batch_size, shuffle=True, **kwargs
        )
        model = MNIST().to(device)
        optimizer = optim.Adadelta(model.parameters(), lr=args.lr)
        scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma)
        bounds = (-255, 255)
        num_classes = 10

    elif args.model_type == 'cifar10':
        transform_train = transforms.Compose(
            [transforms.RandomCrop(32, padding=4),
             transforms.RandomHorizontalFlip(),
             transforms.ToTensor(),
             transforms.Normalize(*NORMALIZE_IMAGES['cifar10'])]
        )
        transform_test = transforms.Compose(
            [transforms.ToTensor(),
             transforms.Normalize(*NORMALIZE_IMAGES['cifar10'])]
        )
        trainset = datasets.CIFAR10(root=data_path, train=True, download=True, transform=transform_train)
        train_loader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, **kwargs)
        testset = datasets.CIFAR10(root=data_path, train=False, download=True, transform=transform_test)
        test_loader = torch.utils.data.DataLoader(testset, batch_size=args.test_batch_size, shuffle=True, **kwargs)
        model = ResNet34().to(device)
        criterion = nn.CrossEntropyLoss()
        # Settings recommended in: https://github.com/kuangliu/pytorch-cifar
        optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
        scheduler = MultiStepLR(optimizer, [150, 250, 350], gamma=0.1)
        bounds = (-255, 255)
        num_classes = 10

    elif args.model_type == 'svhn':
        transform = transforms.Compose(
            [transforms.ToTensor(),
             transforms.Normalize(*NORMALIZE_IMAGES['svhn'])]
        )
        trainset = datasets.SVHN(root=data_path, split='train', download=True, transform=transform)
        train_loader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, **kwargs)
        testset = datasets.SVHN(root=data_path, split='test', download=True, transform=transform)
        test_loader = torch.utils.data.DataLoader(testset, batch_size=args.test_batch_size, shuffle=True, **kwargs)
        model = SVHN().to(device)
        optimizer = optim.Adadelta(model.parameters(), lr=args.lr)
        scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma)
        bounds = (-255, 255)
        num_classes = 10

    else:
        raise ValueError("'{}' is not a valid model type".format(args.model_type))
   
    if args.train:
        for epoch in range(1, args.epochs + 1):
            model = train(args, model, device, train_loader, optimizer, epoch, criterion=criterion)
            model = test(args, model, device, test_loader, criterion=criterion)
            scheduler.step()
            # periodic checkpoint of the model
            if epoch % 20 == 0:
                save_model_checkpoint(model, args.model_type, epoch=epoch)
   
    elif args.ckpt:
        print("Loading model from checkpoint")
        model = load_model_checkpoint(model, args.model_type)
    
    if args.attack:
        # ToDo: Verify correctness
        #https://stackoverflow.com/questions/56699048/how-to-get-the-filename-of-a-sample-from-a-dataloader
        adversarials, _ = foolbox_attack(model, device, test_loader, bounds, p_norm=args.p_norm, adv_attack=args.adv_attack)

    if args.save_model:
        save_model_checkpoint(model, args.model_type)
Пример #4
0
def main():
    # Training settings
    parser = argparse.ArgumentParser()
    parser.add_argument('--model-type',
                        '-m',
                        choices=['mnist', 'cifar10', 'svhn'],
                        default='mnist',
                        help='model type or name of the dataset')
    parser.add_argument('--detection-method',
                        '--dm',
                        choices=DETECTION_METHODS,
                        default='proposed',
                        help="Detection method to run. Choices are: {}".format(
                            ', '.join(DETECTION_METHODS)))
    parser.add_argument(
        '--index-adv',
        type=int,
        default=0,
        help=
        'Index of the adversarial attack parameter to use. This indexes the sorted directories '
        'containing the adversarial data files from different attack parameters.'
    )
    parser.add_argument('--batch-size',
                        type=int,
                        default=256,
                        help='batch size of evaluation')
    ################ Optional arguments for the proposed method
    parser.add_argument(
        '--test-statistic',
        '--ts',
        choices=TEST_STATS_SUPPORTED,
        default='multinomial',
        help=
        "Test statistic to calculate at the layers for the proposed method. Choices are: {}"
        .format(', '.join(TEST_STATS_SUPPORTED)))
    parser.add_argument(
        '--score-type',
        '--st',
        choices=SCORE_TYPES,
        default='pvalue',
        help="Score type to use for the proposed method. Choices are: {}".
        format(', '.join(SCORE_TYPES)))
    parser.add_argument(
        '--pvalue-fusion',
        '--pf',
        choices=['harmonic_mean', 'fisher'],
        default='harmonic_mean',
        help=
        "Name of the method to use for combining p-values from multiple layers for the "
        "proposed method. Choices are: 'harmonic_mean' and 'fisher'")
    parser.add_argument(
        '--ood-detection',
        '--ood',
        action='store_true',
        default=False,
        help=
        "Option that enables out-of-distribution detection instead of adversarial detection "
        "for the proposed method")
    parser.add_argument(
        '--use-top-ranked',
        '--utr',
        action='store_true',
        default=False,
        help=
        "Option that enables the proposed method to use only the top-ranked (by p-values) test statistics for "
        "detection. The number of test statistics is specified through the option '--num-layers'"
    )
    parser.add_argument(
        '--use-deep-layers',
        '--udl',
        action='store_true',
        default=False,
        help=
        "Option that enables the proposed method to use only a given number of last few layers of the DNN. "
        "The number of layers is specified through the option '--num-layers'")
    parser.add_argument(
        '--num-layers',
        '--nl',
        type=int,
        default=NUM_TOP_RANKED,
        help=
        "If the option '--use-top-ranked' or '--use-deep-layers' is provided, this option specifies the number "
        "of layers or test statistics to be used by the proposed method")
    parser.add_argument(
        '--combine-classes',
        '--cc',
        action='store_true',
        default=False,
        help=
        "Option that allows low probability classes to be automatically combined into one group for the "
        "multinomial test statistic used with the proposed method")
    ################ Optional arguments for the proposed method
    parser.add_argument(
        '--num-neighbors',
        '--nn',
        type=int,
        default=-1,
        help=
        'Number of nearest neighbors (if applicable to the method). By default, this is set '
        'to be a power of the number of samples (n): n^{:.1f}'.format(
            NEIGHBORHOOD_CONST))
    parser.add_argument(
        '--modelfile-dim-reduc',
        '--mdr',
        default='',
        help=
        'Path to the saved dimension reduction model file. Specify only if the default path '
        'needs to be changed.')
    parser.add_argument(
        '--output-dir',
        '-o',
        default='',
        help='directory path for saving the results of detection')
    parser.add_argument(
        '--adv-attack',
        '--aa',
        choices=['FGSM', 'PGD', 'CW', CUSTOM_ATTACK, 'none'],
        default='PGD',
        help=
        "Type of adversarial attack. Use 'none' to evaluate on clean samples.")
    parser.add_argument(
        '--max-attack-prop',
        '--map',
        type=float,
        default=0.5,
        help=
        "Maximum proportion of attack samples in the test fold. Should be a value in (0, 1]"
    )
    parser.add_argument('--num-folds',
                        '--nf',
                        type=int,
                        default=CROSS_VAL_SIZE,
                        help='number of cross-validation folds')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')
    parser.add_argument('--gpu',
                        type=str,
                        default='2',
                        help='which gpus to execute code on')
    parser.add_argument(
        '--n-jobs',
        type=int,
        default=8,
        help='number of parallel jobs to use for multiprocessing')
    parser.add_argument('--seed',
                        '-s',
                        type=int,
                        default=SEED_DEFAULT,
                        help='seed for random number generation')
    args = parser.parse_args()

    if args.use_top_ranked and args.use_deep_layers:
        raise ValueError(
            "Cannot provide both command line options '--use-top-ranked' and '--use-deep-layers'. "
            "Specify only one of them.")

    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
    use_cuda = not args.no_cuda and torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    kwargs_loader = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

    # Number of neighbors
    n_neighbors = args.num_neighbors
    if n_neighbors <= 0:
        n_neighbors = None

    # Output directory
    if not args.output_dir:
        base_dir = get_output_path(args.model_type)
        output_dir = os.path.join(base_dir, 'prediction')
    else:
        output_dir = args.output_dir

    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    # Method name for results and plots
    method_name = METHOD_NAME_MAP[args.detection_method]

    # Dimensionality reduction to the layer embeddings is applied only for methods in certain configurations
    apply_dim_reduc = False
    if args.detection_method == 'proposed':
        # Name string for the proposed method based on the input configuration
        # Score type suffix in the method name
        st = '{:.4s}'.format(args.score_type)
        if args.score_type == 'pvalue':
            if args.pvalue_fusion == 'harmonic_mean':
                st += '_hmp'
            if args.pvalue_fusion == 'fisher':
                st += '_fis'

        if not args.ood_detection:
            method_name = '{:.5s}_{:.5s}_{}_adv'.format(
                method_name, args.test_statistic, st)
        else:
            method_name = '{:.5s}_{:.5s}_{}_ood'.format(
                method_name, args.test_statistic, st)

        if args.use_top_ranked:
            method_name = '{}_top{:d}'.format(method_name, args.num_layers)
        elif args.use_deep_layers:
            method_name = '{}_last{:d}'.format(method_name, args.num_layers)

        # If `n_neighbors` is specified, append that value to the name string
        if n_neighbors is not None:
            method_name = '{}_k{:d}'.format(method_name, n_neighbors)

        apply_dim_reduc = True

    elif args.detection_method == 'dknn':
        apply_dim_reduc = False
        # If `n_neighbors` is specified, append that value to the name string
        if n_neighbors is not None:
            method_name = '{}_k{:d}'.format(method_name, n_neighbors)

    # Model file for dimension reduction, if required
    model_dim_reduc = None
    if apply_dim_reduc:
        if args.modelfile_dim_reduc:
            fname = args.modelfile_dim_reduc
        else:
            # Path to the dimension reduction model file
            fname = get_path_dr_models(args.model_type,
                                       args.detection_method,
                                       test_statistic=args.test_statistic)

        if not os.path.isfile(fname):
            raise ValueError(
                "Model file for dimension reduction is required, but does not exist: {}"
                .format(fname))
        else:
            # Load the dimension reduction models for each layer from the pickle file
            model_dim_reduc = load_dimension_reduction_models(fname)

    # Data loader and pre-trained DNN model corresponding to the dataset
    if args.model_type == 'mnist':
        num_classes = 10
        model = MNIST().to(device)
        model = load_model_checkpoint(model, args.model_type)

    elif args.model_type == 'cifar10':
        num_classes = 10
        model = ResNet34().to(device)
        model = load_model_checkpoint(model, args.model_type)

    elif args.model_type == 'svhn':
        num_classes = 10
        model = SVHN().to(device)
        model = load_model_checkpoint(model, args.model_type)

    else:
        raise ValueError("'{}' is not a valid model type".format(
            args.model_type))

    # Set model in evaluation mode
    model.eval()

    # Check if the numpy data directory exists
    d = os.path.join(NUMPY_DATA_PATH, args.model_type)
    if not os.path.isdir(d):
        raise ValueError(
            "Directory for the numpy data files not found: {}".format(d))

    if args.adv_attack.lower() == 'none':
        evaluate_on_clean = True
    else:
        evaluate_on_clean = False

    # Initialization
    labels_true_folds = []
    labels_pred_dnn_folds = []
    scores_detec_folds = []
    labels_pred_detec_folds = []
    thresholds_folds = []
    ti = time.time()
    # Cross-validation
    for i in range(args.num_folds):
        print("\nProcessing cross-validation fold {:d}:".format(i + 1))
        # Load the saved clean numpy data from this fold
        numpy_save_path = get_clean_data_path(args.model_type, i + 1)
        # Temporary hack to use backup data directory
        # numpy_save_path = numpy_save_path.replace('varun', 'jayaram', 1)

        data_tr, labels_tr, data_te, labels_te = load_numpy_data(
            numpy_save_path)
        num_clean_tr = labels_tr.shape[0]
        num_clean_te = labels_te.shape[0]
        # Data loader for the train and test fold
        train_fold_loader = convert_to_loader(data_tr,
                                              labels_tr,
                                              dtype_x=torch.float,
                                              batch_size=args.batch_size,
                                              device=device)
        test_fold_loader = convert_to_loader(data_te,
                                             labels_te,
                                             dtype_x=torch.float,
                                             batch_size=args.batch_size,
                                             device=device)
        print(
            "\nCalculating the layer embeddings and DNN predictions for the clean train data split:"
        )
        layer_embeddings_tr, labels_pred_tr = helper_layer_embeddings(
            model, device, train_fold_loader, args.detection_method, labels_tr)
        print(
            "\nCalculating the layer embeddings and DNN predictions for the clean test data split:"
        )
        layer_embeddings_te, labels_pred_te = helper_layer_embeddings(
            model, device, test_fold_loader, args.detection_method, labels_te)
        del train_fold_loader
        del test_fold_loader

        if not evaluate_on_clean:
            # Load the saved adversarial numpy data generated from this training and test fold
            _, _, data_tr_adv, labels_tr_adv, data_te_adv, labels_te_adv = load_adversarial_wrapper(
                i,
                args.model_type,
                args.adv_attack,
                args.max_attack_prop,
                num_clean_te,
                index_adv=args.index_adv)
            num_adv_tr = labels_tr_adv.shape[0]
            num_adv_te = labels_te_adv.shape[0]
            print(
                "\nTrain fold: number of clean samples = {:d}, number of adversarial samples = {:d}, % of "
                "adversarial samples = {:.4f}".format(
                    num_clean_tr, num_adv_tr,
                    (100. * num_adv_tr) / (num_clean_tr + num_adv_tr)))
            print(
                "Test fold: number of clean samples = {:d}, number of adversarial samples = {:d}, % of adversarial "
                "samples = {:.4f}".format(num_clean_te, num_adv_te,
                                          (100. * num_adv_te) /
                                          (num_clean_te + num_adv_te)))
            # Adversarial data loader for the test fold
            adv_test_fold_loader = convert_to_loader(
                data_te_adv,
                labels_te_adv,
                dtype_x=torch.float,
                batch_size=args.batch_size,
                device=device)
            print(
                "\nCalculating the layer embeddings and DNN predictions for the adversarial test data split:"
            )
            layer_embeddings_te_adv, labels_pred_te_adv = helper_layer_embeddings(
                model, device, adv_test_fold_loader, args.detection_method,
                labels_te_adv)
            check_label_mismatch(labels_te_adv, labels_pred_te_adv)
            del adv_test_fold_loader

            # True class labels of adversarial samples from this test fold
            labels_true_folds.append(labels_te_adv)
            # Class predictions of the DNN on adversarial samples from this test fold
            labels_pred_dnn_folds.append(labels_pred_te_adv)
            num_expec = num_adv_te
        else:
            print("\nTrain fold: number of clean samples = {:d}".format(
                num_clean_tr))
            print("Test fold: number of clean samples = {:d}".format(
                num_clean_te))
            # True class labels of clean samples from this test fold
            labels_true_folds.append(labels_te)
            # Class predictions of the DNN on clean samples from this test fold
            labels_pred_dnn_folds.append(labels_pred_te)
            num_expec = num_clean_te

        # Detection methods
        if args.detection_method == 'proposed':
            nl = len(layer_embeddings_tr)
            st_ind = 0
            if args.use_deep_layers:
                if args.num_layers > nl:
                    print(
                        "WARNING: number of layers specified using the option '--num-layers' exceeds the number "
                        "of layers in the model. Using all the layers.")
                    st_ind = 0
                else:
                    st_ind = nl - args.num_layers
                    print(
                        "Using only the last {:d} layer embeddings from the {:d} layers for the proposed method."
                        .format(args.num_layers, nl))

            mod_dr = None if (
                model_dim_reduc is None) else model_dim_reduc[st_ind:]
            det_model = DetectorLayerStatistics(
                layer_statistic=args.test_statistic,
                score_type=args.score_type,
                ood_detection=args.ood_detection,
                pvalue_fusion=args.pvalue_fusion,
                use_top_ranked=args.use_top_ranked,
                num_top_ranked=args.num_layers,
                skip_dim_reduction=(not apply_dim_reduc),
                model_dim_reduction=mod_dr,
                n_neighbors=n_neighbors,
                n_jobs=args.n_jobs,
                seed_rng=args.seed)
            # Fit the detector on clean data from the training fold
            if args.combine_classes and (args.test_statistic == 'multinomial'):
                _ = det_model.fit(layer_embeddings_tr[st_ind:],
                                  labels_tr,
                                  labels_pred_tr,
                                  combine_low_proba_classes=True)
            else:
                _ = det_model.fit(layer_embeddings_tr[st_ind:], labels_tr,
                                  labels_pred_tr)

            # Find the score thresholds corresponding to the target FPRs using the scores from the clean train
            # fold data
            scores_detec_train = det_model.score(layer_embeddings_tr[st_ind:],
                                                 labels_pred_tr,
                                                 test_layer_pairs=True,
                                                 is_train=True)
            thresholds = find_score_thresholds(scores_detec_train, FPRS_TARGET)
            if evaluate_on_clean:
                # Scores and class predictions on clean data from the test fold
                scores_detec, labels_pred_detec = det_model.score(
                    layer_embeddings_te[st_ind:],
                    labels_pred_te,
                    return_corrected_predictions=True,
                    test_layer_pairs=True)
            else:
                # Scores and class predictions on adversarial data from the test fold
                scores_detec, labels_pred_detec = det_model.score(
                    layer_embeddings_te_adv[st_ind:],
                    labels_pred_te_adv,
                    return_corrected_predictions=True,
                    test_layer_pairs=True)

        elif args.detection_method == 'dknn':
            det_model = DeepKNN(n_neighbors=n_neighbors,
                                skip_dim_reduction=(not apply_dim_reduc),
                                model_dim_reduction=model_dim_reduc,
                                n_jobs=args.n_jobs,
                                seed_rng=args.seed)
            # Fit the detector on clean data from the training fold
            _ = det_model.fit(layer_embeddings_tr, labels_tr)
            # Find the score thresholds corresponding to the target FPRs using the scores from the clean train
            # fold data
            scores_detec_train, _ = det_model.score(layer_embeddings_tr,
                                                    is_train=True)
            thresholds = find_score_thresholds(scores_detec_train, FPRS_TARGET)
            if evaluate_on_clean:
                # Scores and class predictions on clean data from the test fold
                scores_detec, labels_pred_detec = det_model.score(
                    layer_embeddings_te)
            else:
                # Scores and class predictions on adversarial data from the test fold
                scores_detec, labels_pred_detec = det_model.score(
                    layer_embeddings_te_adv)

        else:
            raise ValueError("Unknown detection method name '{}'".format(
                args.detection_method))

        # Sanity check
        if (scores_detec.shape[0] != num_expec) or (labels_pred_detec.shape[0]
                                                    != num_expec):
            raise ValueError(
                "Detection scores and/or predicted labels do not have the expected length of {:d}; method = {}, "
                "fold = {:d}".format(num_expec, args.detection_method, i + 1))

        scores_detec_folds.append(scores_detec)
        labels_pred_detec_folds.append(labels_pred_detec)
        thresholds_folds.append(thresholds)

    print(
        "\nCalculating the combined classification accuracy of the DNN and detector system:"
    )
    fname = os.path.join(output_dir,
                         'corrected_accuracies_{}.pkl'.format(method_name))
    results = combined_classification_performance(scores_detec_folds,
                                                  thresholds_folds,
                                                  labels_pred_detec_folds,
                                                  labels_pred_dnn_folds,
                                                  labels_true_folds,
                                                  FPRS_TARGET,
                                                  output_file=fname)
    print("Performance metrics saved to the file: {}".format(fname))
    tf = time.time()
    print("Total time taken: {:.4f} minutes".format((tf - ti) / 60.))
def main():
    # Training settings
    parser = argparse.ArgumentParser()
    parser.add_argument('--batch-size',
                        type=int,
                        default=256,
                        help='batch size of evaluation')
    parser.add_argument('--model-type',
                        '-m',
                        choices=['mnist', 'cifar10', 'cifar10aug', 'svhn'],
                        default='mnist',
                        help='model type or name of the dataset')
    parser.add_argument('--detection-method',
                        '--dm',
                        choices=DETECTION_METHODS,
                        default='proposed',
                        help="Detection method to run. Choices are: {}".format(
                            ', '.join(DETECTION_METHODS)))
    parser.add_argument(
        '--resume-from-ckpt',
        action='store_true',
        default=False,
        help=
        'Use this option to load results and resume from a previous partially completed run. '
        'Cross-validation folds that were completed earlier will be skipped in the current run.'
    )
    parser.add_argument(
        '--save-detec-model',
        action='store_true',
        default=False,
        help=
        'Use this option to save the list of detection models from the CV folds to a pickle '
        'file. Note that the files tend to large in size.')
    parser.add_argument(
        '--censor-classes',
        action='store_true',
        default=False,
        help=
        'Use this option to censor data from a random subset of classes in the training fold.'
    )
    ################ Optional arguments for the proposed method
    parser.add_argument(
        '--test-statistic',
        '--ts',
        choices=TEST_STATS_SUPPORTED,
        default='multinomial',
        help=
        "Test statistic to calculate at the layers for the proposed method. Choices are: {}"
        .format(', '.join(TEST_STATS_SUPPORTED)))
    parser.add_argument(
        '--score-type',
        '--st',
        choices=SCORE_TYPES,
        default='pvalue',
        help="Score type to use for the proposed method. Choices are: {}".
        format(', '.join(SCORE_TYPES)))
    parser.add_argument(
        '--pvalue-fusion',
        '--pf',
        choices=['harmonic_mean', 'fisher'],
        default='harmonic_mean',
        help=
        "Name of the method to use for combining p-values from multiple layers for the "
        "proposed method. Choices are: 'harmonic_mean' and 'fisher'")
    parser.add_argument(
        '--use-top-ranked',
        '--utr',
        action='store_true',
        default=False,
        help=
        "Option that enables the proposed method to use only the top-ranked (by p-values) test statistics for "
        "detection. The number of test statistics is specified through the option '--num-layers'"
    )
    parser.add_argument(
        '--use-deep-layers',
        '--udl',
        action='store_true',
        default=False,
        help=
        "Option that enables the proposed method to use only a given number of last few layers of the DNN. "
        "The number of layers is specified through the option '--num-layers'")
    parser.add_argument(
        '--num-layers',
        '--nl',
        type=int,
        default=NUM_TOP_RANKED,
        help=
        "If the option '--use-top-ranked' or '--use-deep-layers' is provided, this option specifies the number "
        "of layers or test statistics to be used by the proposed method")
    parser.add_argument(
        '--combine-classes',
        '--cc',
        action='store_true',
        default=False,
        help=
        "Option that allows low probability classes to be automatically combined into one group for the "
        "multinomial test statistic used with the proposed method")
    ################ Optional arguments for the proposed method
    parser.add_argument(
        '--layer-trust-score',
        '--lts',
        choices=LAYERS_TRUST_SCORE,
        default='input',
        help=
        "Which layer to use for the trust score calculation. Choices are: {}".
        format(', '.join(LAYERS_TRUST_SCORE)))
    parser.add_argument(
        '--batch-lid',
        action='store_true',
        default=False,
        help=
        'Use this option to enable batched, faster version of the LID detector'
    )
    parser.add_argument(
        '--num-neighbors',
        '--nn',
        type=int,
        default=-1,
        help=
        'Number of nearest neighbors (if applicable to the method). By default, this is set '
        'to be a power of the number of samples (n): n^{:.1f}'.format(
            NEIGHBORHOOD_CONST))
    parser.add_argument(
        '--modelfile-dim-reduc',
        '--mdr',
        default='',
        help=
        'Path to the saved dimension reduction model file. Specify only if the default path '
        'needs to be changed.')
    parser.add_argument(
        '--output-dir',
        '-o',
        default='',
        help='directory path for saving the results of detection')
    parser.add_argument(
        '--max-outlier-prop',
        '--mop',
        type=float,
        default=0.25,
        help=
        "Maximum proportion of outlier samples in the test fold. Should be a value in (0, 1]"
    )
    parser.add_argument('--num-folds',
                        '--nf',
                        type=int,
                        default=CROSS_VAL_SIZE,
                        help='number of cross-validation folds')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')
    parser.add_argument('--gpu',
                        type=str,
                        default='2',
                        help='which gpus to execute code on')
    parser.add_argument(
        '--n-jobs',
        type=int,
        default=8,
        help='number of parallel jobs to use for multiprocessing')
    parser.add_argument('--seed',
                        '-s',
                        type=int,
                        default=SEED_DEFAULT,
                        help='seed for random number generation')
    args = parser.parse_args()

    if args.use_top_ranked and args.use_deep_layers:
        raise ValueError(
            "Cannot provide both command line options '--use-top-ranked' and '--use-deep-layers'. "
            "Specify only one of them.")

    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
    use_cuda = not args.no_cuda and torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    kwargs_loader = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
    random.seed(args.seed)

    # Number of neighbors
    n_neighbors = args.num_neighbors
    if n_neighbors <= 0:
        n_neighbors = None

    # Output directory
    if not args.output_dir:
        base_dir = get_output_path(args.model_type)
        output_dir = os.path.join(base_dir, 'detection_ood')
    else:
        output_dir = args.output_dir

    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    # Method name for results and plots
    method_name = METHOD_NAME_MAP[args.detection_method]

    # Dimensionality reduction to the layer embeddings is applied only for methods in certain configurations
    apply_dim_reduc = False
    if args.detection_method == 'proposed':
        # Name string for the proposed method based on the input configuration
        # Score type suffix in the method name
        st = '{:.4s}'.format(args.score_type)
        if args.score_type == 'pvalue':
            if args.pvalue_fusion == 'harmonic_mean':
                st += '_hmp'
            if args.pvalue_fusion == 'fisher':
                st += '_fis'

        method_name = '{:.5s}_{:.5s}_{}_ood'.format(method_name,
                                                    args.test_statistic, st)
        if args.use_top_ranked:
            method_name = '{}_top{:d}'.format(method_name, args.num_layers)
        elif args.use_deep_layers:
            method_name = '{}_last{:d}'.format(method_name, args.num_layers)

        # If `n_neighbors` is specified, append that value to the name string
        if n_neighbors is not None:
            method_name = '{}_k{:d}'.format(method_name, n_neighbors)

        apply_dim_reduc = True

    elif args.detection_method == 'trust':
        # Append the layer name to the method name
        method_name = '{:.5s}_{}'.format(method_name, args.layer_trust_score)
        # If `n_neighbors` is specified, append that value to the name string
        if n_neighbors is not None:
            method_name = '{}_k{:d}'.format(method_name, n_neighbors)

        # Dimension reduction is not applied to the logit layer
        if args.layer_trust_score != 'logit':
            apply_dim_reduc = True

    elif args.detection_method == 'dknn':
        apply_dim_reduc = False
        # If `n_neighbors` is specified, append that value to the name string
        if n_neighbors is not None:
            method_name = '{}_k{:d}'.format(method_name, n_neighbors)

    elif args.detection_method == 'mahalanobis':
        # No dimensionality reduction needed here
        # According to the paper, they internally transform a `C x H x W` layer embedding to a `C x 1` vector
        # through global average pooling
        apply_dim_reduc = False

    # Model file for dimension reduction, if required
    model_dim_reduc = None
    if apply_dim_reduc:
        if args.modelfile_dim_reduc:
            fname = args.modelfile_dim_reduc
        else:
            # Path to the dimension reduction model file
            fname = get_path_dr_models(args.model_type,
                                       args.detection_method,
                                       test_statistic=args.test_statistic)

        if not os.path.isfile(fname):
            raise ValueError(
                "Model file for dimension reduction is required, but does not exist: {}"
                .format(fname))
        else:
            # Load the dimension reduction models for each layer from the pickle file
            model_dim_reduc = load_dimension_reduction_models(fname)

    config_trust_score = dict()
    if args.detection_method == 'trust':
        # Get the layer index and the layer-specific dimensionality reduction model for the trust score
        config_trust_score = get_config_trust_score(model_dim_reduc,
                                                    args.layer_trust_score,
                                                    n_neighbors)

    # Data loader and pre-trained DNN model corresponding to the dataset
    data_path = DATA_PATH
    if args.model_type == 'mnist':
        '''
        transform = transforms.Compose(
            [transforms.ToTensor(),
             transforms.Normalize(*NORMALIZE_IMAGES['mnist'])]
        )
        test_loader = torch.utils.data.DataLoader(
            datasets.MNIST(data_path, train=False, download=True, transform=transform),
            batch_size=args.batch_size, shuffle=True, **kwargs_loader
        )
        '''
        num_classes = 10
        model = MNIST().to(device)
        model = load_model_checkpoint(model, args.model_type)

    elif args.model_type in ('cifar10', 'cifar10aug'):
        '''
        transform_test = transforms.Compose(
            [transforms.ToTensor(),
             transforms.Normalize(*NORMALIZE_IMAGES['cifar10'])]
        )
        testset = datasets.CIFAR10(root=data_path, train=False, download=True, transform=transform_test)
        test_loader = torch.utils.data.DataLoader(testset, batch_size=args.batch_size, shuffle=True, **kwargs_loader)
        '''
        num_classes = 10
        model = ResNet34().to(device)
        model = load_model_checkpoint(model, args.model_type)

    elif args.model_type == 'svhn':
        '''
        transform = transforms.Compose(
            [transforms.ToTensor(),
             transforms.Normalize(*NORMALIZE_IMAGES['svhn'])]
        )
        testset = datasets.SVHN(root=data_path, split='test', download=True, transform=transform)
        test_loader = torch.utils.data.DataLoader(testset, batch_size=args.batch_size, shuffle=True, **kwargs_loader)
        '''
        num_classes = 10
        model = SVHN().to(device)
        model = load_model_checkpoint(model, args.model_type)

    else:
        raise ValueError("'{}' is not a valid model type".format(
            args.model_type))

    # Set model in evaluation mode
    model.eval()

    # Check if the numpy data directory exists
    d = os.path.join(NUMPY_DATA_PATH, args.model_type)
    if not os.path.isdir(d):
        raise ValueError(
            "Directory for the numpy data files not found: {}".format(d))

    # Initialization
    if args.resume_from_ckpt:
        scores_folds, labels_folds, models_folds, init_fold = load_detector_checkpoint(
            output_dir, method_name, args.save_detec_model)
        print(
            "Loading saved results from a previous run. Completed {:d} fold(s). Resuming from fold {:d}."
            .format(init_fold, init_fold + 1))
    else:
        scores_folds = []
        labels_folds = []
        models_folds = []
        init_fold = 0

    ti = time.time()
    # Cross-validation
    for i in range(init_fold, args.num_folds):
        print("\nProcessing cross-validation fold {:d}:".format(i + 1))
        # Load the saved clean numpy data from this fold
        numpy_save_path = get_clean_data_path(args.model_type, i + 1)
        # Temporary hack to use backup data directory
        # numpy_save_path = numpy_save_path.replace('varun', 'jayaram', 1)

        data_tr, labels_tr, data_te, labels_te = load_numpy_data(
            numpy_save_path)
        # Data loader for the train fold
        train_fold_loader = convert_to_loader(data_tr,
                                              labels_tr,
                                              batch_size=args.batch_size,
                                              device=device,
                                              dtype_x=torch.float)
        # Data loader for the test fold
        test_fold_loader = convert_to_loader(data_te,
                                             labels_te,
                                             batch_size=args.batch_size,
                                             device=device,
                                             dtype_x=torch.float)

        # Get the range of values in the data array
        # bounds = get_data_bounds(np.concatenate([data_tr, data_te], axis=0))
        print(
            "\nCalculating the layer embeddings and DNN predictions for the clean train data split:"
        )
        layer_embeddings_tr, labels_pred_tr = helper_layer_embeddings(
            model, device, train_fold_loader, args.detection_method, labels_tr)
        print(
            "\nCalculating the layer embeddings and DNN predictions for the clean test data split:"
        )
        layer_embeddings_te, labels_pred_te = helper_layer_embeddings(
            model, device, test_fold_loader, args.detection_method, labels_te)
        # Delete the data loaders in case they are not used further
        del test_fold_loader
        if args.detection_method != 'mahalanobis':
            del train_fold_loader

        ############################ OUTLIERS ########################################################
        # path to the OOD dataset
        numpy_save_path_ood = get_clean_data_path(
            inlier_outlier_map[args.model_type], i + 1)
        # Temporary hack to use backup data directory
        # numpy_save_path_ood = numpy_save_path_ood.replace('varun', 'jayaram', 1)

        data_tr_ood, labels_tr_ood, data_te_ood, labels_te_ood = load_numpy_data(
            numpy_save_path_ood)
        if args.censor_classes:
            # Exclude data from a random subset of classes for the training fold
            data_tr_ood, labels_tr_ood, data_te_ood, labels_te_ood = filter_data_classes(
                data_tr_ood,
                labels_tr_ood,
                data_te_ood,
                labels_te_ood,
                i,
                include_noise_samples=True)
        '''
        # Data loader for the outlier data from the train fold
        train_fold_loader_ood = convert_to_loader(data_tr_ood, labels_tr_ood, batch_size=args.batch_size, 
                                                  device=device, dtype_x=torch.float)
        print("\nCalculating the layer embeddings and DNN predictions for the ood train data split:")
        layer_embeddings_tr_ood, labels_pred_tr_ood = helper_layer_embeddings(
            model, device, train_fold_loader_ood, args.detection_method, labels_tr_ood
        )
        '''
        # Data loader for the outlier data from the test fold
        test_fold_loader_ood = convert_to_loader(data_te_ood,
                                                 labels_te_ood,
                                                 batch_size=args.batch_size,
                                                 device=device,
                                                 dtype_x=torch.float)
        print(
            "\nCalculating the layer embeddings and DNN predictions for the ood test data split:"
        )
        layer_embeddings_te_ood, labels_pred_te_ood = helper_layer_embeddings(
            model, device, test_fold_loader_ood, args.detection_method,
            labels_te_ood)
        # Delete the data loaders in case they are not used further
        del test_fold_loader_ood

        ############################# NOISY #########################################################
        # Load the saved noisy (Gaussian noise) numpy data generated from this training and test fold
        numpy_save_path = get_noisy_data_path(args.model_type, i + 1)
        # Temporary hack to use backup data directory
        # numpy_save_path = numpy_save_path.replace('varun', 'jayaram', 1)

        data_tr_noisy, data_te_noisy = load_noisy_data(numpy_save_path)
        # Noisy data have the same labels as the clean data
        # labels_tr_noisy = labels_tr
        # labels_te_noisy = labels_te

        # Run the detection method
        # Detection labels (0 denoting clean and 1 outlier)
        labels_detec = np.concatenate([
            np.zeros(labels_pred_te.shape[0], dtype=np.int),
            np.ones(labels_pred_te_ood.shape[0], dtype=np.int)
        ])
        if args.detection_method == 'proposed':
            nl = len(layer_embeddings_tr)
            st_ind = 0
            if args.use_deep_layers:
                if args.num_layers > nl:
                    print(
                        "WARNING: number of layers specified using the option '--num-layers' exceeds the number "
                        "of layers in the model. Using all the layers.")
                    st_ind = 0
                else:
                    st_ind = nl - args.num_layers
                    print(
                        "Using only the last {:d} layer embeddings from the {:d} layers for the proposed method."
                        .format(args.num_layers, nl))

            mod_dr = None if (
                model_dim_reduc is None) else model_dim_reduc[st_ind:]
            det_model = DetectorLayerStatistics(
                layer_statistic=args.test_statistic,
                score_type=args.score_type,
                ood_detection=True,
                pvalue_fusion=args.pvalue_fusion,
                use_top_ranked=args.use_top_ranked,
                num_top_ranked=args.num_layers,
                skip_dim_reduction=(not apply_dim_reduc),
                model_dim_reduction=mod_dr,
                n_neighbors=n_neighbors,
                n_jobs=args.n_jobs,
                seed_rng=args.seed)
            # Fit the detector on clean data from the training fold
            if args.combine_classes and (args.test_statistic == 'multinomial'):
                _ = det_model.fit(layer_embeddings_tr[st_ind:],
                                  labels_tr,
                                  labels_pred_tr,
                                  combine_low_proba_classes=True)
            else:
                _ = det_model.fit(layer_embeddings_tr[st_ind:], labels_tr,
                                  labels_pred_tr)

            # Scores on clean data from the test fold
            scores_adv1 = det_model.score(layer_embeddings_te[st_ind:],
                                          labels_pred_te,
                                          test_layer_pairs=True)

            # Scores on ood data from the test fold
            scores_adv2 = det_model.score(layer_embeddings_te_ood[st_ind:],
                                          labels_pred_te_ood,
                                          test_layer_pairs=True)

            scores_adv = np.concatenate([scores_adv1, scores_adv2])
            if args.save_detec_model:
                models_folds.append(det_model)

        elif args.detection_method == 'dknn':
            det_model = DeepKNN(n_neighbors=n_neighbors,
                                skip_dim_reduction=(not apply_dim_reduc),
                                model_dim_reduction=model_dim_reduc,
                                n_jobs=args.n_jobs,
                                seed_rng=args.seed)
            # Fit the detector on clean data from the training fold
            _ = det_model.fit(layer_embeddings_tr, labels_tr)

            # Scores on clean data from the test fold
            scores_adv1, labels_pred_dknn1 = det_model.score(
                layer_embeddings_te)

            # Scores on ood data from the test fold
            scores_adv2, labels_pred_dknn2 = det_model.score(
                layer_embeddings_te_ood)

            scores_adv = np.concatenate([scores_adv1, scores_adv2])
            # labels_pred_dknn = np.concatenate([labels_pred_dknn1, labels_pred_dknn2])
            if args.save_detec_model:
                models_folds.append(det_model)

        elif args.detection_method == 'trust':
            ind_layer = config_trust_score['layer']
            det_model = TrustScore(
                alpha=config_trust_score['alpha'],
                n_neighbors=config_trust_score['n_neighbors'],
                skip_dim_reduction=(not apply_dim_reduc),
                model_dim_reduction=config_trust_score['model_dr'],
                n_jobs=args.n_jobs,
                seed_rng=args.seed)
            # Fit the detector on clean data from the training fold
            _ = det_model.fit(layer_embeddings_tr[ind_layer], labels_tr,
                              labels_pred_tr)

            # Scores on clean data from the test fold
            scores_adv1 = det_model.score(layer_embeddings_te[ind_layer],
                                          labels_pred_te)

            # Scores on adversarial data from the test fold
            #line below needs to be changed
            scores_adv2 = det_model.score(layer_embeddings_te_ood[ind_layer],
                                          labels_pred_te_ood)

            scores_adv = np.concatenate([scores_adv1, scores_adv2])
            if args.save_detec_model:
                models_folds.append(det_model)

        elif args.detection_method == 'mahalanobis':
            # Sub-directory for this fold so that the output files are not overwritten
            temp_direc = os.path.join(output_dir, 'fold_{}'.format(i + 1))
            if not os.path.isdir(temp_direc):
                os.makedirs(temp_direc)

            # Calculate the mahalanobis distance features per layer and fit a logistic classifier on the extracted
            # features using data from the training fold
            model_detector = fit_mahalanobis_scores(model,
                                                    device,
                                                    'ood',
                                                    args.model_type,
                                                    num_classes,
                                                    temp_direc,
                                                    train_fold_loader,
                                                    data_tr,
                                                    data_tr_ood,
                                                    data_tr_noisy,
                                                    n_jobs=args.n_jobs)
            # Calculate the mahalanobis distance features per layer for the best noise magnitude and predict the
            # logistic classifer to score the samples.
            # Scores on clean data from the test fold
            scores_adv1 = get_mahalanobis_scores(model_detector, data_te,
                                                 model, device,
                                                 args.model_type)

            # Scores on adversarial data from the test fold
            scores_adv2 = get_mahalanobis_scores(model_detector, data_te_ood,
                                                 model, device,
                                                 args.model_type)

            scores_adv = np.concatenate([scores_adv1, scores_adv2])
        else:
            raise ValueError("Unknown detection method name '{}'".format(
                args.detection_method))

        # Sanity check
        if scores_adv.shape[0] != labels_detec.shape[0]:
            raise ValueError(
                "Detection scores and labels do not have the same length ({:d} != {:d}); method = {}, fold = {:d}"
                .format(scores_adv.shape[0], labels_detec.shape[0],
                        args.detection_method, i + 1))

        scores_folds.append(scores_adv)
        labels_folds.append(labels_detec)
        save_detector_checkpoint(scores_folds, labels_folds, models_folds,
                                 output_dir, method_name,
                                 args.save_detec_model)

    print(
        "\nCalculating performance metrics for different proportion of outlier samples:"
    )
    fname = os.path.join(output_dir,
                         'detection_metrics_{}.pkl'.format(method_name))
    results_dict = metrics_varying_positive_class_proportion(
        scores_folds,
        labels_folds,
        output_file=fname,
        max_pos_proportion=args.max_outlier_prop,
        log_scale=False)
    print("Performance metrics saved to the file: {}".format(fname))
    tf = time.time()
    print("Total time taken: {:.4f} minutes".format((tf - ti) / 60.))
def main():
    # Training settings
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--stdev-high',
        type=float,
        default=-1.0,
        help=
        "Upper bound on the noise standard deviation. Use the option '--search-noise-stdev' "
        "to set this value automatically")
    parser.add_argument('--stdev-low',
                        type=float,
                        default=-1.0,
                        help="Lower bound on the noise standard deviation")
    parser.add_argument('--test-batch-size',
                        '--tb',
                        type=int,
                        default=1000,
                        metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument(
        '--output-dir',
        '-o',
        default='',
        help='directory path for saving the output and model files')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')
    parser.add_argument('--model-type',
                        '-m',
                        choices=['mnist', 'cifar10', 'svhn'],
                        default='cifar10',
                        help='model type or name of the dataset')
    parser.add_argument('--seed',
                        '-s',
                        type=int,
                        default=SEED_DEFAULT,
                        help='seed for random number generation')
    parser.add_argument('--gpu',
                        type=str,
                        default='2',
                        help='which gpus to execute code on')
    parser.add_argument('--batch-size',
                        type=int,
                        default=BATCH_SIZE_DEF,
                        help='batch size of evaluation')
    parser.add_argument(
        '--search-noise-stdev',
        '--sns',
        action='store_true',
        default=False,
        help='use option to search for a suitable noise standard deviation')
    parser.add_argument('--num-folds',
                        '--nf',
                        type=int,
                        default=CROSS_VAL_SIZE,
                        help='number of cross-validation folds')
    args = parser.parse_args()

    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu

    if not args.output_dir:
        output_dir = os.path.join(ROOT, 'numpy_data', args.model_type)
    else:
        output_dir = args.output_dir

    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    use_cuda = not args.no_cuda and torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

    data_path = os.path.join(ROOT, 'data')
    if args.model_type == 'mnist':
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(*NORMALIZE_IMAGES['mnist'])
        ])
        test_loader = torch.utils.data.DataLoader(
            datasets.MNIST(data_path,
                           train=False,
                           download=True,
                           transform=transform),
            batch_size=args.test_batch_size,
            shuffle=False,
            **kwargs)
        model = MNIST().to(device)
        model = load_model_checkpoint(model, args.model_type)
        num_classes = 10

    elif args.model_type == 'cifar10':
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(*NORMALIZE_IMAGES['cifar10'])
        ])
        testset = datasets.CIFAR10(root=data_path,
                                   train=False,
                                   download=True,
                                   transform=transform)
        test_loader = torch.utils.data.DataLoader(
            testset, batch_size=args.test_batch_size, shuffle=False, **kwargs)
        num_classes = 10
        model = ResNet34().to(device)
        model = load_model_checkpoint(model, args.model_type)

    elif args.model_type == 'svhn':
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(*NORMALIZE_IMAGES['svhn'])
        ])
        testset = datasets.SVHN(root=data_path,
                                split='test',
                                download=True,
                                transform=transform)
        test_loader = torch.utils.data.DataLoader(
            testset, batch_size=args.test_batch_size, shuffle=False, **kwargs)
        num_classes = 10
        model = SVHN().to(device)
        model = load_model_checkpoint(model, args.model_type)

    else:
        raise ValueError("'{}' is not a valid model type".format(
            args.model_type))

    # convert the test data loader to 2 ndarrays
    data, labels = get_samples_as_ndarray(test_loader)

    # verify if the data loader is the same as the ndarrays it generates
    if not verify_data_loader(test_loader, batch_size=args.test_batch_size):
        raise ValueError("Data loader verification failed")

    stdev_high = args.stdev_high
    if args.search_noise_stdev or (stdev_high < 0.):
        # Search for a suitable noise standard deviation
        stdev_high = get_noise_stdev(model,
                                     device,
                                     data,
                                     labels,
                                     seed=args.seed)
        if stdev_high is None:
            print(
                "\nERROR: no good noise standard deviation found. Try searching over a larger range of values."
            )
            return

    # Noise standard deviation values
    stdev_low = args.stdev_low
    if (stdev_low < 0.) or (stdev_low >= stdev_high):
        stdev_low = stdev_high / 16.

    stdev_values = np.linspace(stdev_low, stdev_high, num=NUM_NOISE_VALUES)

    # repeat for each fold in the cross-validation split
    skf = StratifiedKFold(n_splits=args.num_folds,
                          shuffle=True,
                          random_state=args.seed)
    # Indicates whether the train and test data from a fold was loaded from file
    loaded_from_file = np.zeros(args.num_folds, dtype=np.bool)
    i = 1
    for ind_tr, ind_te in skf.split(data, labels):
        data_tr = data[ind_tr, :]
        labels_tr = labels[ind_tr]
        data_te = data[ind_te, :]
        labels_te = labels[ind_te]

        numpy_save_path = os.path.join(output_dir, "fold_{}".format(i))
        if not os.path.isdir(numpy_save_path):
            # Create directory for this fold and save the data to numpy files
            os.makedirs(numpy_save_path)
            np.save(os.path.join(numpy_save_path, 'data_tr.npy'), data_tr)
            np.save(os.path.join(numpy_save_path, 'labels_tr.npy'), labels_tr)
            np.save(os.path.join(numpy_save_path, 'data_te.npy'), data_te)
            np.save(os.path.join(numpy_save_path, 'labels_te.npy'), labels_te)
            loaded_from_file[i - 1] = False
        else:
            # load existing data files
            data_tr, labels_tr, data_te, labels_te = load_numpy_data(
                numpy_save_path)
            loaded_from_file[i - 1] = True

        # Directory for noisy train and test data from this fold
        noise_base_path = os.path.join(output_dir, 'fold_{}'.format(i),
                                       'noise_gaussian')
        if os.path.isdir(noise_base_path):
            # Clear out any old data files
            shutil.rmtree(noise_base_path)

        os.makedirs(noise_base_path)
        # Generate noisy data from the train and test fold for different standard deviation values and save them
        # to numpy files
        filenames_train = []
        filenames_test = []
        for sig in stdev_values:
            noise = np.random.normal(loc=0., scale=sig, size=data_tr.shape)
            data_tr_noisy = data_tr + noise
            noise = np.random.normal(loc=0., scale=sig, size=data_te.shape)
            data_te_noisy = data_te + noise

            fname = os.path.join(noise_base_path,
                                 'data_tr_noisy_stdev_{:.6f}.npy'.format(sig))
            np.save(fname, data_tr_noisy)
            filenames_train.append(fname + '\n')

            fname = os.path.join(noise_base_path,
                                 'data_te_noisy_stdev_{:.6f}.npy'.format(sig))
            np.save(fname, data_te_noisy)
            filenames_test.append(fname + '\n')

        print("Saved noisy data files from fold {:d}.".format(i))
        fname = os.path.join(noise_base_path, 'filenames_train.txt')
        with open(fname, 'w') as fp:
            fp.writelines(filenames_train)

        print(
            "List of filenames for noisy train data from this fold can be found in the file: {}"
            .format(fname))

        fname = os.path.join(noise_base_path, 'filenames_test.txt')
        with open(fname, 'w') as fp:
            fp.writelines(filenames_test)

        print(
            "List of filenames for noisy test data from this fold can be found in the file: {}"
            .format(fname))
        print('\n')
        i = i + 1

    if not (np.all(loaded_from_file)
            or np.all(np.logical_not(loaded_from_file))):
        raise ValueError(
            "Unexpected error: some of the data files from the train and test folds may not "
            "be consistent.")
def gather_test_stats(args):
    detection_method = 'proposed'
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
    use_cuda = not args.no_cuda and torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    kwargs_loader = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

    # Number of neighbors
    n_neighbors = args.num_neighbors
    if n_neighbors <= 0:
        n_neighbors = None

    # Model file for dimension reduction
    apply_dim_reduc = True
    model_dim_reduc = None
    if apply_dim_reduc:
        if args.modelfile_dim_reduc:
            fname = args.modelfile_dim_reduc
        else:
            # Path to the dimension reduction model file
            fname = get_path_dr_models(args.model_type,
                                       detection_method,
                                       test_statistic=args.test_statistic)

        if not os.path.isfile(fname):
            raise ValueError(
                "Model file for dimension reduction is required, but does not exist: {}"
                .format(fname))
        else:
            # Load the dimension reduction models for each layer from the pickle file
            model_dim_reduc = load_dimension_reduction_models(fname)

    # Pre-trained DNN model corresponding to the dataset
    if args.model_type == 'mnist':
        num_classes = 10
        model = MNIST().to(device)
        model = load_model_checkpoint(model, args.model_type)
    elif args.model_type == 'cifar10':
        num_classes = 10
        model = ResNet34().to(device)
        model = load_model_checkpoint(model, args.model_type)
    elif args.model_type == 'svhn':
        num_classes = 10
        model = SVHN().to(device)
        model = load_model_checkpoint(model, args.model_type)
    else:
        raise ValueError("'{}' is not a valid model type".format(
            args.model_type))

    # Set model in evaluation mode
    model.eval()
    # Check if the numpy data directory exists
    d = os.path.join(NUMPY_DATA_PATH, args.model_type)
    if not os.path.isdir(d):
        raise ValueError(
            "Directory for the numpy data files not found: {}".format(d))

    n_samples_per_class = 5000
    test_stats_pred = {'clean': [], 'adversarial': []}
    test_stats_true = {'clean': [], 'adversarial': []}
    # Select a particular data fold
    ind_fold = 0
    for i in range(ind_fold, ind_fold + 1):
        print("\nProcessing cross-validation fold {:d}:".format(i + 1))
        # Load the saved clean numpy data from this fold
        numpy_save_path = get_clean_data_path(args.model_type, i + 1)
        # Temporary hack to use backup data directory
        # numpy_save_path = numpy_save_path.replace('varun', 'jayaram', 1)
        data_tr, labels_tr, data_te, labels_te = load_numpy_data(
            numpy_save_path)
        num_clean_tr = labels_tr.shape[0]
        num_clean_te = labels_te.shape[0]
        # Data loader for the train fold
        train_fold_loader = convert_to_loader(data_tr,
                                              labels_tr,
                                              dtype_x=torch.float,
                                              batch_size=args.batch_size,
                                              device=device)
        # Data loader for the test fold
        test_fold_loader = convert_to_loader(data_te,
                                             labels_te,
                                             dtype_x=torch.float,
                                             batch_size=args.batch_size,
                                             device=device)
        # Get the range of values in the data array
        # bounds = get_data_bounds(np.concatenate([data_tr, data_te], axis=0))
        print(
            "\nCalculating the layer embeddings and DNN predictions for the clean train data split:"
        )
        layer_embeddings_tr, labels_pred_tr = helper_layer_embeddings(
            model, device, train_fold_loader, detection_method, labels_tr)
        print(
            "\nCalculating the layer embeddings and DNN predictions for the clean test data split:"
        )
        layer_embeddings_te, labels_pred_te = helper_layer_embeddings(
            model, device, test_fold_loader, detection_method, labels_te)
        del train_fold_loader, test_fold_loader

        # Load the saved noisy (Gaussian noise) numpy data generated from this training and test fold
        numpy_save_path = get_noisy_data_path(args.model_type, i + 1)
        # Temporary hack to use backup data directory
        # numpy_save_path = numpy_save_path.replace('varun', 'jayaram', 1)
        data_tr_noisy, data_te_noisy = load_noisy_data(numpy_save_path)
        # Noisy data have the same labels as the clean data
        labels_tr_noisy = labels_tr
        labels_te_noisy = labels_te
        # Check the number of noisy samples
        assert data_tr_noisy.shape[0] == num_clean_tr, (
            "Number of noisy samples from the train fold is different "
            "from expected")
        assert data_te_noisy.shape[0] == num_clean_te, (
            "Number of noisy samples from the test fold is different "
            "from expected")
        # Data loader for the noisy train and test fold data
        noisy_train_fold_loader = convert_to_loader(data_tr_noisy,
                                                    labels_tr_noisy,
                                                    dtype_x=torch.float,
                                                    batch_size=args.batch_size,
                                                    device=device)
        noisy_test_fold_loader = convert_to_loader(data_te_noisy,
                                                   labels_te_noisy,
                                                   dtype_x=torch.float,
                                                   batch_size=args.batch_size,
                                                   device=device)
        print(
            "\nCalculating the layer embeddings and DNN predictions for the noisy train data split:"
        )
        layer_embeddings_tr_noisy, labels_pred_tr_noisy = helper_layer_embeddings(
            model, device, noisy_train_fold_loader, detection_method,
            labels_tr_noisy)
        print(
            "\nCalculating the layer embeddings and DNN predictions for the noisy test data split:"
        )
        layer_embeddings_te_noisy, labels_pred_te_noisy = helper_layer_embeddings(
            model, device, noisy_test_fold_loader, detection_method,
            labels_te_noisy)
        del noisy_train_fold_loader, noisy_test_fold_loader

        # Load the saved adversarial numpy data generated from this training and test fold
        _, data_te_clean, data_tr_adv, labels_tr_adv, data_te_adv, labels_te_adv = load_adversarial_wrapper(
            i,
            args.model_type,
            args.adv_attack,
            args.max_attack_prop,
            num_clean_te,
            index_adv=args.index_adv)
        # `labels_te_adv` corresponds to the class labels of the clean samples, not that predicted by the DNN
        labels_te_clean = labels_te_adv
        num_adv_tr = labels_tr_adv.shape[0]
        num_adv_te = labels_te_adv.shape[0]
        print(
            "\nTrain fold: number of clean samples = {:d}, number of adversarial samples = {:d}, % of adversarial "
            "samples = {:.4f}".format(num_clean_tr, num_adv_tr,
                                      (100. * num_adv_tr) /
                                      (num_clean_tr + num_adv_tr)))
        print(
            "Test fold: number of clean samples = {:d}, number of adversarial samples = {:d}, % of adversarial "
            "samples = {:.4f}".format(num_clean_te, num_adv_te,
                                      (100. * num_adv_te) /
                                      (num_clean_te + num_adv_te)))

        # Adversarial data loader for the train fold
        adv_train_fold_loader = convert_to_loader(data_tr_adv,
                                                  labels_tr_adv,
                                                  dtype_x=torch.float,
                                                  batch_size=args.batch_size,
                                                  device=device)
        # Adversarial data loader for the test fold
        adv_test_fold_loader = convert_to_loader(data_te_adv,
                                                 labels_te_adv,
                                                 dtype_x=torch.float,
                                                 batch_size=args.batch_size,
                                                 device=device)
        print(
            "\nCalculating the layer embeddings and DNN predictions for the adversarial train data split:"
        )
        layer_embeddings_tr_adv, labels_pred_tr_adv = helper_layer_embeddings(
            model, device, adv_train_fold_loader, detection_method,
            labels_tr_adv)
        check_label_mismatch(labels_tr_adv, labels_pred_tr_adv)
        print(
            "\nCalculating the layer embeddings and DNN predictions for the adversarial test data split:"
        )
        layer_embeddings_te_adv, labels_pred_te_adv = helper_layer_embeddings(
            model, device, adv_test_fold_loader, detection_method,
            labels_te_adv)
        check_label_mismatch(labels_te_adv, labels_pred_te_adv)
        del adv_train_fold_loader, adv_test_fold_loader

        # Detection labels (0 denoting clean and 1 adversarial)
        labels_detec = np.concatenate([
            np.zeros(labels_pred_te.shape[0], dtype=np.int),
            np.ones(labels_pred_te_adv.shape[0], dtype=np.int)
        ])
        # Proposed method
        nl = len(layer_embeddings_tr)
        st_ind = 0
        if args.use_deep_layers:
            if args.num_layers > nl:
                print(
                    "WARNING: number of layers specified using the option '--num-layers' exceeds the number "
                    "of layers in the model. Using all the layers.")
                st_ind = 0
            else:
                st_ind = nl - args.num_layers
                print(
                    "Using only the last {:d} layer embeddings from the {:d} layers for the proposed method."
                    .format(args.num_layers, nl))

        mod_dr = None if (
            model_dim_reduc is None) else model_dim_reduc[st_ind:]
        for cat in ('clean', 'adversarial'):
            det_model = DetectorLayerStatistics(
                layer_statistic=args.test_statistic,
                score_type=args.score_type,
                ood_detection=args.ood_detection,
                pvalue_fusion=args.pvalue_fusion,
                use_top_ranked=args.use_top_ranked,
                num_top_ranked=args.num_layers,
                skip_dim_reduction=(not apply_dim_reduc),
                model_dim_reduction=mod_dr,
                n_neighbors=n_neighbors,
                n_jobs=args.n_jobs,
                seed_rng=args.seed)
            # Fit the detector on clean or adversarial data from the training fold
            if cat == 'clean':
                _ = det_model.fit(layer_embeddings_tr[st_ind:], labels_tr,
                                  labels_pred_tr)
            else:
                _ = det_model.fit(layer_embeddings_tr_adv[st_ind:],
                                  labels_tr_adv, labels_pred_tr_adv)

            # Test statistics from each layer conditioned on the predicted class
            for c, arr in det_model.test_stats_pred_null.items():
                if n_samples_per_class < arr.shape[0]:
                    ind_samp = np.random.permutation(
                        arr.shape[0])[:n_samples_per_class]
                    test_stats_pred[cat].append(arr[ind_samp, :])
                else:
                    test_stats_pred[cat].append(arr)

            # Test statistics from each layer conditioned on the true class
            for c, arr in det_model.test_stats_true_null.items():
                if n_samples_per_class < arr.shape[0]:
                    ind_samp = np.random.permutation(
                        arr.shape[0])[:n_samples_per_class]
                    test_stats_true[cat].append(arr[ind_samp, :])
                else:
                    test_stats_true[cat].append(arr)

    test_stats_pred['clean'] = np.concatenate(test_stats_pred['clean'], axis=0)
    test_stats_pred['adversarial'] = np.concatenate(
        test_stats_pred['adversarial'], axis=0)
    test_stats_true['clean'] = np.concatenate(test_stats_true['clean'], axis=0)
    test_stats_true['adversarial'] = np.concatenate(
        test_stats_true['adversarial'], axis=0)
    return test_stats_pred, test_stats_true
def main():
    # Training settings
    parser = argparse.ArgumentParser()
    parser.add_argument('--test-batch-size',
                        '--tb',
                        type=int,
                        default=1000,
                        metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument(
        '--output-dir',
        '-o',
        default='',
        help='directory path for saving the output and model files')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='Disables CUDA training')
    parser.add_argument('--model-type',
                        '-m',
                        choices=['mnist', 'cifar10', 'svhn'],
                        default='cifar10',
                        help='model type or name of the dataset')
    parser.add_argument('--seed',
                        '-s',
                        type=int,
                        default=SEED_DEFAULT,
                        help='seed for random number generation')
    parser.add_argument(
        '--generate-attacks',
        type=bool,
        default=True,
        help='should attack samples be generated/not (default:True)')
    parser.add_argument('--adv-attack',
                        '--aa',
                        choices=['FGSM', 'PGD', 'CW'],
                        default='PGD',
                        help='type of adversarial attack')
    parser.add_argument('--gpu',
                        type=str,
                        default="2",
                        help='which gpus to execute code on')
    parser.add_argument('--batch-size',
                        type=int,
                        default=BATCH_SIZE_DEF,
                        help='batch size of evaluation')
    parser.add_argument(
        '--p-norm',
        '-p',
        choices=['0', '2', 'inf'],
        default='inf',
        help="p norm for the adversarial attack; options are '0', '2' and 'inf'"
    )
    parser.add_argument('--stepsize',
                        type=float,
                        default=0.001,
                        help='stepsize')
    parser.add_argument('--confidence',
                        type=int,
                        default=0,
                        help='confidence needed by CW')
    parser.add_argument('--epsilon',
                        type=float,
                        default=0.3,
                        help='epsilon value')
    parser.add_argument('--max-iterations',
                        type=int,
                        default=1000,
                        help='max num. of iterations')
    parser.add_argument('--iterations',
                        type=int,
                        default=40,
                        help='num. of iterations')
    parser.add_argument('--max-epsilon',
                        type=float,
                        default=1.,
                        help='max. value of epsilon')
    parser.add_argument('--num-folds',
                        '--nf',
                        type=int,
                        default=CROSS_VAL_SIZE,
                        help='number of cross-validation folds')
    args = parser.parse_args()

    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu

    generate_attacks = args.generate_attacks

    if not args.output_dir:
        output_dir = os.path.join(ROOT, 'numpy_data', args.model_type)
    else:
        output_dir = args.output_dir

    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    use_cuda = not args.no_cuda and torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

    data_path = os.path.join(ROOT, 'data')
    if args.model_type == 'mnist':
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(*NORMALIZE_IMAGES['mnist'])
        ])
        test_loader = torch.utils.data.DataLoader(
            datasets.MNIST(data_path,
                           train=False,
                           download=True,
                           transform=transform),
            batch_size=args.test_batch_size,
            shuffle=False,
            **kwargs)
        model = MNIST().to(device)
        model = load_model_checkpoint(model, args.model_type)
        num_classes = 10

    elif args.model_type == 'cifar10':
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(*NORMALIZE_IMAGES['cifar10'])
        ])
        testset = datasets.CIFAR10(root=data_path,
                                   train=False,
                                   download=True,
                                   transform=transform)
        test_loader = torch.utils.data.DataLoader(
            testset, batch_size=args.test_batch_size, shuffle=False, **kwargs)
        num_classes = 10
        model = ResNet34().to(device)
        model = load_model_checkpoint(model, args.model_type)

    elif args.model_type == 'svhn':
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(*NORMALIZE_IMAGES['svhn'])
        ])
        testset = datasets.SVHN(root=data_path,
                                split='test',
                                download=True,
                                transform=transform)
        test_loader = torch.utils.data.DataLoader(
            testset, batch_size=args.test_batch_size, shuffle=False, **kwargs)
        num_classes = 10
        model = SVHN().to(device)
        model = load_model_checkpoint(model, args.model_type)

    else:
        raise ValueError("'{}' is not a valid model type".format(
            args.model_type))

    # Set model to evaluation mode
    model.eval()

    # convert the test data loader to 2 ndarrays
    data, labels = get_samples_as_ndarray(test_loader)

    # Get the range of values in the data array
    bounds = get_data_bounds(data)
    print("Range of data values: ({:.4f}, {:.4f})\n".format(*bounds))

    # verify if the data loader is the same as the ndarrays it generates
    if not verify_data_loader(test_loader, batch_size=args.test_batch_size):
        raise ValueError("Data loader verification failed")

    # repeat for each fold in the cross-validation split
    skf = StratifiedKFold(n_splits=args.num_folds,
                          shuffle=True,
                          random_state=args.seed)
    i = 1
    for ind_tr, ind_te in skf.split(data, labels):
        data_tr = data[ind_tr, :]
        labels_tr = labels[ind_tr]
        data_te = data[ind_te, :]
        labels_te = labels[ind_te]

        #make dir based on fold to save data
        numpy_save_path = os.path.join(output_dir, "fold_" + str(i))
        if not os.path.isdir(numpy_save_path):
            os.makedirs(numpy_save_path)

        # save train fold to numpy_save_path or load if it exists already
        if not os.path.isfile(os.path.join(numpy_save_path, 'data_tr.npy')):
            np.save(os.path.join(numpy_save_path, 'data_tr.npy'), data_tr)
        else:
            data_tr = np.load(os.path.join(numpy_save_path, "data_tr.npy"))

        if not os.path.isfile(os.path.join(numpy_save_path, 'labels_tr.npy')):
            np.save(os.path.join(numpy_save_path, 'labels_tr.npy'), labels_tr)
        else:
            labels_tr = np.load(os.path.join(numpy_save_path, "labels_tr.npy"))

        #save test fold to numpy_save_path or load if it exists already
        if not os.path.isfile(os.path.join(numpy_save_path, 'data_te.npy')):
            np.save(os.path.join(numpy_save_path, 'data_te.npy'), data_te)
        else:
            data_te = np.load(os.path.join(numpy_save_path, "data_te.npy"))

        if not os.path.isfile(os.path.join(numpy_save_path, 'labels_te.npy')):
            np.save(os.path.join(numpy_save_path, 'labels_te.npy'), labels_te)
        else:
            labels_te = np.load(os.path.join(numpy_save_path, "labels_te.npy"))

        # if attack samples are to be generated
        if generate_attacks:

            # data loader for the training and test data split
            test_fold_loader = convert_to_loader(data_te,
                                                 labels_te,
                                                 batch_size=args.batch_size)
            train_fold_loader = convert_to_loader(data_tr,
                                                  labels_tr,
                                                  batch_size=args.batch_size)

            adv_save_path = os.path.join(output_dir, 'fold_{}'.format(i),
                                         args.adv_attack)
            if not os.path.isdir(adv_save_path):
                os.makedirs(adv_save_path)

            #setting adv. attack parameters
            stepsize = args.stepsize
            confidence = args.confidence
            epsilon = args.epsilon
            max_iterations = args.max_iterations
            iterations = args.iterations
            max_epsilon = args.max_epsilon

            print("parameter choices")
            print("stepsize:", stepsize, type(stepsize))
            print("confidence:", confidence, type(confidence))
            print("max_iterations:", max_iterations, type(max_iterations))
            print("iterations:", iterations, type(iterations))
            print("max_epsilon:", max_epsilon, type(max_epsilon))
            print("epsilon:", epsilon, type(epsilon))

            #create path based on attack configs
            params_list = [('stepsize', stepsize), ('confidence', confidence),
                           ('epsilon', epsilon),
                           ('maxiterations', max_iterations),
                           ('iterations', iterations),
                           ('maxepsilon', max_epsilon), ('pnorm', args.p_norm)]
            param_path = ''.join(
                ['{}_{}'.format(a, str(b)) for a, b in params_list])

            adv_path = os.path.join(adv_save_path, param_path)
            if not os.path.isdir(adv_path):
                os.makedirs(adv_path)

            #use dataloader to create adv. examples; adv_inputs is an ndarray
            adv_inputs, adv_labels, clean_inputs, clean_labels = foolbox_attack(
                model,
                device,
                test_fold_loader,
                loader_type="test",
                loader_batch_size=args.batch_size,
                bounds=bounds,
                num_classes=num_classes,
                dataset=args.model_type,
                fold_num=i,
                p_norm=args.p_norm,
                adv_attack=args.adv_attack,
                stepsize=stepsize,
                confidence=confidence,
                epsilon=epsilon,
                max_iterations=max_iterations,
                iterations=iterations,
                max_epsilon=max_epsilon)

            #save test fold's adv. examples
            np.save(os.path.join(adv_path, 'data_te_adv.npy'), adv_inputs)
            np.save(os.path.join(adv_path, 'labels_te_adv.npy'), adv_labels)
            np.save(os.path.join(adv_path, 'data_te_clean.npy'), clean_inputs)
            np.save(os.path.join(adv_path, 'labels_te_clean.npy'),
                    clean_labels)
            print("saved adv. examples generated from the test data for fold:",
                  i)

            #use dataloader to create adv. examples; adv_inputs is an ndarray
            adv_inputs, adv_labels, clean_inputs, clean_labels = foolbox_attack(
                model,
                device,
                train_fold_loader,
                loader_type="train",
                loader_batch_size=args.batch_size,
                bounds=bounds,
                num_classes=num_classes,
                dataset=args.model_type,
                fold_num=i,
                p_norm=args.p_norm,
                adv_attack=args.adv_attack,
                stepsize=stepsize,
                confidence=confidence,
                epsilon=epsilon,
                max_iterations=max_iterations,
                iterations=iterations,
                max_epsilon=max_epsilon)

            #save train_fold's adv. examples
            np.save(os.path.join(adv_path, 'data_tr_adv.npy'), adv_inputs)
            np.save(os.path.join(adv_path, 'labels_tr_adv.npy'), adv_labels)
            np.save(os.path.join(adv_path, 'data_tr_clean.npy'), clean_inputs)
            np.save(os.path.join(adv_path, 'labels_tr_clean.npy'),
                    clean_labels)
            print(
                "saved adv. examples generated from the train data for fold:",
                i)

        else:
            print("generated original data split for fold : ", i)

        i = i + 1