示例#1
0
    def __init__(self, loss, dataset, kernel, **kwargs):

        # initialize
        self.svm = None
        self.cv_svm = None
        self.loss = loss
        self.kernel = kernel
        self.K_train = None
        self.K_val = None
        self.K_test = None
        self.nu = None
        self.gamma = None
        self.initialize_svm(loss, **kwargs)

        # load dataset
        load_dataset(self, dataset)

        # train and test time
        self.clock = 0
        self.clocked = 0
        self.train_time = 0
        self.val_time = 0
        self.test_time = 0

        # Scores and AUC
        self.diag = {}

        self.diag['train'] = {}
        self.diag['val'] = {}
        self.diag['test'] = {}

        self.diag['train']['scores'] = np.zeros((len(self.data._y_train), 1))
        self.diag['val']['scores'] = np.zeros((len(self.data._y_val), 1))
        self.diag['test']['scores'] = np.zeros((len(self.data._y_test), 1))

        self.diag['train']['auc'] = np.zeros(1)
        self.diag['val']['auc'] = np.zeros(1)
        self.diag['test']['auc'] = np.zeros(1)

        self.diag['train']['acc'] = np.zeros(1)
        self.diag['val']['acc'] = np.zeros(1)
        self.diag['test']['acc'] = np.zeros(1)

        self.rho = None

        # AD results log
        self.ad_log = AD_Log()

        # diagnostics
        self.best_weight_dict = None  # attribute to reuse nnet plot-functions
示例#2
0
    def __init__(self, dataset, use_weights=None, pretrain=False, profile=False):
        """
        initialize instance
        """

        # whether to enable profiling in Theano functions
        self.profile = profile

        self.initialize_variables(dataset)

        # load dataset
        load_dataset(self, dataset.lower(), pretrain)

        if use_weights and not pretrain:
            self.load_weights(use_weights)
示例#3
0
def main(dataset_name, net_name, xp_path, data_path, load_model, device, seed,
         batch_size, n_jobs_dataloader, normal_class, isize, rep_dim, k,
         npc_temperature, npc_momentum, ans_select_rate, ans_size):

    cfg = Config(locals().copy())
    # Set seed
    if seed != -1:
        random.seed(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)

    # Default device to 'cpu' if cuda is not available
    if not torch.cuda.is_available():
        device = 'cpu'

    dataset = load_dataset(dataset_name, data_path, normal_class, isize)
    network = build_network(net_name, rep_dim)
    lr, n_epochs, weight_decay, w_rec, w_contrast = 0, 0, 0, 0, 0
    trainer = Solver(dataset, network, lr, n_epochs, batch_size, rep_dim, k,
                     weight_decay, device, n_jobs_dataloader, w_rec,
                     w_contrast, npc_temperature, npc_momentum,
                     ans_select_rate, ans_size, cfg)
    trainer.load_model(load_model)
    auc_score = trainer.test()
    print("AUC score = %.5f" % (auc_score))
def main(dataset_name, net_name, xp_path, data_path, load_model, objective, nu,
         device, seed, batch_size, n_jobs_dataloader, normal_class, isize,
         rep_dim):

    # Set seed
    if seed != -1:
        random.seed(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)

    # Default device to 'cpu' if cuda is not available
    if not torch.cuda.is_available():
        device = 'cpu'

    # Load data
    dataset = load_dataset(dataset_name, data_path, normal_class, isize)

    # Initialize DeepSVDD model and set neural network \phi
    deep_SVDD = DeepSVDD(objective, nu)
    deep_SVDD.set_network(net_name, rep_dim)
    # If specified, load Deep SVDD model (radius R, center c, network weights, and possibly autoencoder weights)
    deep_SVDD.load_model(model_path=load_model)
    # Test model
    deep_SVDD.test(dataset,
                   device=device,
                   n_jobs_dataloader=n_jobs_dataloader,
                   batch_size=batch_size)
    print(deep_SVDD.results['test_auc'])

    # Save results, model, and configuration
    deep_SVDD.save_results(export_json=xp_path + '/results_test.json')
示例#5
0
def main():
    args = parse_args()

    # Get configuration
    cfg = load_yaml(args.cfg_path)

    # Set up log
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    set_memory_growth()

    logger = tf.get_logger()
    logger.disabled = True
    logger.setLevel(logging.FATAL)

    # define network
    model = DeepSAD(cfg)
    model.build_model()

    # load train & test set
    train_dataset, test_dataset = load_dataset(cfg)

    # pretrain
    if cfg['pretrain']:
        model.pretrain(train_dataset, cfg['ae_lr'], cfg['ae_epochs'])

    # train
    model.train(train_dataset, cfg['lr'], cfg['epochs'])

    # test
    model.test(test_dataset)
    def __init__(self,
                 dataset,
                 n_estimators=100,
                 max_samples='auto',
                 contamination=0.1,
                 **kwargs):

        # load dataset
        load_dataset(self, dataset)

        # initialize
        self.isoForest = None
        self.n_estimators = n_estimators
        self.max_samples = max_samples
        self.contamination = contamination
        self.initialize_isoForest(seed=self.data.seed, **kwargs)

        # train and test time
        self.clock = 0
        self.clocked = 0
        self.train_time = 0
        self.test_time = 0

        # Scores and AUC
        self.diag = {}

        self.diag['train'] = {}
        self.diag['val'] = {}
        self.diag['test'] = {}

        self.diag['train']['scores'] = np.zeros((len(self.data._y_train), 1))
        self.diag['val']['scores'] = np.zeros((len(self.data._y_val), 1))
        self.diag['test']['scores'] = np.zeros((len(self.data._y_test), 1))

        self.diag['train']['auc'] = np.zeros(1)
        self.diag['val']['auc'] = np.zeros(1)
        self.diag['test']['auc'] = np.zeros(1)

        self.diag['train']['acc'] = np.zeros(1)
        self.diag['val']['acc'] = np.zeros(1)
        self.diag['test']['acc'] = np.zeros(1)

        # AD results log
        self.ad_log = AD_Log()

        # diagnostics
        self.best_weight_dict = None  # attribute to reuse nnet plot-functions
def main(dataset_name, net_name, xp_path, data_path, load_model, device, seed,
         lr, n_epochs, batch_size, weight_decay, n_jobs_dataloader, normal_class, isize, rep_dim, k, w_rec, w_feat):

    # Get configuration
    cfg = Config(locals().copy())

    # Set up logging
    if not os.path.exists(xp_path):
        os.mkdir(xp_path)
    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    log_file = xp_path + '/log.txt'
    file_handler = logging.FileHandler(log_file)
    file_handler.setLevel(logging.INFO)
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)

    # Print arguments
    logger.info('Log file is %s.' % log_file)
    logger.info('Data path is %s.' % data_path)
    logger.info('Export path is %s.' % xp_path)

    logger.info('Dataset: %s' % dataset_name)
    logger.info('Normal class: %d' % normal_class)
    logger.info('Network: %s' % net_name)

    # Set seed
    if cfg.settings['seed'] != -1:
        random.seed(cfg.settings['seed'])
        np.random.seed(cfg.settings['seed'])
        torch.manual_seed(cfg.settings['seed'])
        logger.info('Set seed to %d.' % cfg.settings['seed'])

    # Default device to 'cpu' if cuda is not available
    if not torch.cuda.is_available():
        device = 'cpu'
    logger.info('Computation device: %s' % device)
    logger.info('Number of dataloader workers: %d' % n_jobs_dataloader)

    # Log training details
    logger.info('Training learning rate: %g' % cfg.settings['lr'])
    logger.info('Training epochs: %d' % cfg.settings['n_epochs'])
    logger.info('Training batch size: %d' % cfg.settings['batch_size'])
    logger.info('Training weight decay: %g' % cfg.settings['weight_decay'])
    logger.info('Training rep_dim: %d' % cfg.settings['rep_dim'])
    logger.info('Training k: %d' % cfg.settings['k'])
    logger.info('Training reconstruction loss weight: %d' % cfg.settings['w_rec'])
    logger.info('Training feature consistency loss weight: %d' % cfg.settings['w_feat'])

    dataset = load_dataset(dataset_name, data_path, normal_class, isize)
    network = build_network(net_name, rep_dim)
    trainer = Solver(dataset, network, k, lr, n_epochs, batch_size, rep_dim, k, weight_decay, device, n_jobs_dataloader, w_rec, w_feat, cfg)

    trainer.train()
示例#8
0
    def __init__(self, dataset, kernel, **kwargs):

        # initialize
        self.kde = None
        self.kernel = kernel
        self.bandwidth = None
        self.initialize_kde(**kwargs)

        # load dataset
        load_dataset(self, dataset)

        # train and test time
        self.clock = 0
        self.clocked = 0
        self.train_time = 0
        self.test_time = 0

        # Scores and AUC
        self.diag = {}

        self.diag['train'] = {}
        self.diag['val'] = {}
        self.diag['test'] = {}

        self.diag['train']['scores'] = np.zeros((len(self.data._y_train), 1))
        self.diag['val']['scores'] = np.zeros((len(self.data._y_val), 1))
        self.diag['test']['scores'] = np.zeros((len(self.data._y_test), 1))

        self.diag['train']['auc'] = np.zeros(1)
        self.diag['val']['auc'] = np.zeros(1)
        self.diag['test']['auc'] = np.zeros(1)

        self.diag['train']['acc'] = np.zeros(1)
        self.diag['val']['acc'] = np.zeros(1)
        self.diag['test']['acc'] = np.zeros(1)

        # AD results log
        self.ad_log = AD_Log()

        # diagnostics
        self.best_weight_dict = None  # attribute to reuse nnet plot-functions
示例#9
0
    def __init__(self, dataset, use_weights=None, profile=False):
        """ initialize instance
        """

        # whether to enable profiling in Theano functions
        self.profile = profile

        # patch lasagne creation of parameters
        # (otherwise broadcasting issue with latest versions)
        patch_lasagne()

        self.initialize_variables(dataset)

        # load dataset
        load_dataset(self, dataset.lower())

        if use_weights:
            # special case for VGG pre-trained network
            if use_weights.endswith('vgg16.pkl'):
                self.data.load_weights(self)
            else:
                self.load_weights(use_weights)
def main():
    dataset = load_dataset("cifar10", "data", args.normal_class)
    train_loader, test_loader = dataset.loaders(batch_size=args.batch_size)
    model = CIFAR10_LeNet().to(device)
    model = nn.DataParallel(model)

    #Restore from checkpoint
    if args.restore == 1:
        model.load_state_dict(
            torch.load(os.path.join(args.model_dir, 'model_cifar.pt')))
        print("Saved Model Loaded")

    # Training the model
    train(args, model, device, train_loader, test_loader)
示例#11
0
def main(dataset_name, net_name, xp_path, data_path, load_config, load_model, load_ae_model, objective, nu, number_clusters, device, seed,
         optimizer_name, lr, n_epochs, lr_milestone, batch_size, weight_decay, pretrain, ae_optimizer_name, ae_lr,
         ae_n_epochs, ae_lr_milestone, ae_batch_size, ae_weight_decay, n_jobs_dataloader, normal_class):
    """
    Deep SVDD, a fully deep method for anomaly detection.

    :arg DATASET_NAME: Name of the dataset to load.
    :arg NET_NAME: Name of the neural network to use.
    :arg XP_PATH: Export path for logging the experiment.
    :arg DATA_PATH: Root path of data.
    """

    # Get configuration
    cfg = Config(locals().copy())

    # Set up logging
    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    log_file = xp_path + '/log.txt'
    file_handler = logging.FileHandler(log_file)
    file_handler.setLevel(logging.INFO)
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)

    # If specified, load experiment config from JSON-file
    if load_config:
        cfg.load_config(import_json=load_config)
        logger.info('Loaded configuration from %s.' % load_config)
    
    # Print configuration
    logger.info('Deep SVDD objective: %s' % cfg.settings['objective'])
    logger.info('Nu-paramerter: %.2f' % cfg.settings['nu'])
    logger.info('Number of hyperphere centers: %d' % cfg.settings['number_clusters'])
    
    # Default device to 'cpu' if cuda is not available
    if not torch.cuda.is_available():
        device = 'cpu'
    logger.info('Computation device: %s' % device)
    logger.info('Number of dataloader workers: %d' % n_jobs_dataloader)

    # Load data
    dataset = load_dataset(dataset_name, data_path, normal_class)

    # Initialize DeepSVDD model and set neural network \phi
    deep_SVDD = DeepSVDD(xp_path, cfg.settings['objective'], cfg.settings['nu'], cfg.settings['number_clusters'])
    deep_SVDD.set_network(net_name)
    # If specified, load Deep SVDD model (radius R, center c, network weights, and possibly autoencoder weights)
    if load_model:
        deep_SVDD.load_model(model_path=load_model, load_ae=False)
        logger.info('Loading model from %s.' % load_model)

    # import pdb; pdb.set_trace()
    # Test model
    deep_SVDD.test(dataset, device=device, n_jobs_dataloader=n_jobs_dataloader)

    # import pdb; pdb.set_trace()

    # Plot most anomalous and most normal (within-class) test samples
    indices, labels, scores = zip(*deep_SVDD.results['test_scores'])
    indices, labels, scores = np.array(indices), np.array(labels), np.array(scores)
    idx_sorted = indices[labels == 0][np.argsort(scores[labels == 0])]  # sorted from lowest to highest anomaly score

    ### NEW ### 
    # idx_sorted_normal = indices[labels == 0][np.argsort(scores[labels == 0])]  # normal images sorted from lowest to highest anomaly score
    # idx_sorted_outlier = indices[labels == 1][np.argsort(scores[labels == 1])]  # anomaly images sorted from lowest to highest anomaly score

    # Lowest to highest uncertainty scores
    idx_sorted_all = indices[np.argsort(scores)]
    labels_sorted_all = labels[np.argsort(scores)]
    scores_sorted_all = np.sort(scores)
    for i in range(128):
        idx = idx_sorted_all[i]   
        X = dataset.test_set[idx][0].unsqueeze(1)
        plot_images_labels(X, label = labels_sorted_all[i], export_img=xp_path + '/images/img_'+str(i), title='Score = {:4.2f}'.format(scores_sorted_all[i]), padding=2)
    # Assemble Gallery
    folder = xp_path + '/images'
    image_paths = [os.path.join(folder, f) 
                for f in os.listdir(folder) if f.endswith('.png')]

    # Random selection of images
    image_array = random.sample(image_paths, k=128)

    # Create and save image grid
    image = concat_images(image_array, (100, 100), (16, 8))
    image.save(os.path.join(folder,'gallery_128.png'), 'PNG')

    # for i in range(32):
    #     idx = idx_sorted_all[i]   
    #     X = dataset.test_set[idx][0].unsqueeze(1)
    #     plot_images_labels(X, label = labels_sorted_all[i], export_img=xp_path + '/simple_img_'+str(i), title='Simplest Example: Score = {:4.2f}'.format(scores_sorted_all[i]), padding=2)

    # # Highest to lowest uncertainty scores
    # idx_sorted_all = np.flip(idx_sorted_all)
    # labels_sorted_all = np.flip(labels_sorted_all)
    # scores_sorted_all = np.flip(scores_sorted_all)

    # for i in range(32):
    #     idx = idx_sorted_all[i]
    #     X = dataset.test_set[idx][0].unsqueeze(1)
    #     plot_images_labels(X, label = labels_sorted_all[i], export_img=xp_path + '/difficult_img_'+str(i), title='Difficult Example: Score = {:4.2f}'.format(scores_sorted_all[i]), padding=2)

    import pdb; pdb.set_trace()

    # X_n = [dataset.test_set[i][0] for i in idx_sorted_normal[-8:]]
    # X_n = torch.cat(X_n).unsqueeze(1)
    # X_o = [dataset.test_set[i][0] for i in idx_sorted_outlier[-8:]]
    # X_o = torch.cat(X_o).unsqueeze(1)

    # # import pdb; pdb.set_trace()
    # plot_images_labels(X_n, label = 0, export_img=xp_path + '/normals', title='Hardest normal examples', padding=2)
    # # import pdb; pdb.set_trace()
    # plot_images_labels(X_o, label = 1, export_img=xp_path + '/outliers', title='Hardest outlier examples', padding=2)
    ### - ###

    # From clean images, extract the ones model predicts as normal with highest confidence
    X_normals = [dataset.test_set[i][0] for i in idx_sorted[:64]]
    X_normals = torch.cat(X_normals).unsqueeze(1)

    # From clean images, extract the ones model predicts as normal with lowest confidence
    X_outliers = [dataset.test_set[i][0] for i in idx_sorted[-64:]]
    X_outliers = torch.cat(X_outliers).unsqueeze(1)

    plot_images_grid(X_normals, export_img=xp_path + '/normals_64', title='Most normal examples', padding=2)
    plot_images_grid(X_outliers, export_img=xp_path + '/outliers_64', title='Most anomalous examples', padding=2)
示例#12
0
def main(dataset_name, net_name, xp_path, data_path, load_config, load_model,
         load_ae_model, objective, nu, number_clusters, device, seed,
         optimizer_name, lr, n_epochs, lr_milestone, batch_size, weight_decay,
         pretrain, ae_optimizer_name, ae_lr, ae_n_epochs, ae_lr_milestone,
         ae_batch_size, ae_weight_decay, n_jobs_dataloader, normal_class):
    """
    Deep SVDD, a fully deep method for anomaly detection.

    :arg DATASET_NAME: Name of the dataset to load.
    :arg NET_NAME: Name of the neural network to use.
    :arg XP_PATH: Export path for logging the experiment.
    :arg DATA_PATH: Root path of data.
    """

    # Get configuration
    cfg = Config(locals().copy())

    # Set up logging
    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    log_file = xp_path + '/log.txt'
    file_handler = logging.FileHandler(log_file)
    file_handler.setLevel(logging.INFO)
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)

    # Print arguments
    logger.info('Log file is %s.' % log_file)
    logger.info('Data path is %s.' % data_path)
    logger.info('Export path is %s.' % xp_path)

    logger.info('Dataset: %s' % dataset_name)
    logger.info('Normal class: %d' % normal_class)
    logger.info('Network: %s' % net_name)

    # If specified, load experiment config from JSON-file
    if load_config:
        cfg.load_config(import_json=load_config)
        logger.info('Loaded configuration from %s.' % load_config)

    # Print configuration
    logger.info('Deep SVDD objective: %s' % cfg.settings['objective'])
    logger.info('Nu-paramerter: %.2f' % cfg.settings['nu'])
    logger.info('Number of hyperphere centers: %d' %
                cfg.settings['number_clusters'])

    # Set seed
    if cfg.settings['seed'] != -1:
        random.seed(cfg.settings['seed'])
        np.random.seed(cfg.settings['seed'])
        torch.manual_seed(cfg.settings['seed'])
        logger.info('Set seed to %d.' % cfg.settings['seed'])

    # Default device to 'cpu' if cuda is not available
    if not torch.cuda.is_available():
        device = 'cpu'
    logger.info('Computation device: %s' % device)
    logger.info('Number of dataloader workers: %d' % n_jobs_dataloader)

    # Load data
    dataset = load_dataset(dataset_name, data_path, normal_class)

    # Initialize DeepSVDD model and set neural network \phi
    deep_SVDD = DeepSVDD(xp_path, cfg.settings['objective'],
                         cfg.settings['nu'], cfg.settings['number_clusters'])
    deep_SVDD.set_network(net_name)
    # If specified, load Deep SVDD model (radius R, center c, network weights, and possibly autoencoder weights)
    if load_model:
        deep_SVDD.load_model(model_path=load_model, load_ae=True)
        logger.info('Loading model from %s.' % load_model)
    # If specified, load pretrained AE model (autoencoder weights)
    if load_ae_model:
        deep_SVDD.load_pretrained_AE_model(model_path=load_ae_model)
        logger.info('Loading pretrained AE model from %s.' % load_ae_model)

    logger.info('Pretraining: %s' % pretrain)
    if pretrain:
        # Log pretraining details
        logger.info('Pretraining optimizer: %s' %
                    cfg.settings['ae_optimizer_name'])
        logger.info('Pretraining learning rate: %g' % cfg.settings['ae_lr'])
        logger.info('Pretraining epochs: %d' % cfg.settings['ae_n_epochs'])
        logger.info('Pretraining learning rate scheduler milestones: %s' %
                    (cfg.settings['ae_lr_milestone'], ))
        logger.info('Pretraining batch size: %d' %
                    cfg.settings['ae_batch_size'])
        logger.info('Pretraining weight decay: %g' %
                    cfg.settings['ae_weight_decay'])

        # Pretrain model on dataset (via autoencoder)
        deep_SVDD.pretrain(dataset,
                           optimizer_name=cfg.settings['ae_optimizer_name'],
                           lr=cfg.settings['ae_lr'],
                           n_epochs=cfg.settings['ae_n_epochs'],
                           lr_milestones=cfg.settings['ae_lr_milestone'],
                           batch_size=cfg.settings['ae_batch_size'],
                           weight_decay=cfg.settings['ae_weight_decay'],
                           device=device,
                           n_jobs_dataloader=n_jobs_dataloader)

    # Log training details
    logger.info('Training optimizer: %s' % cfg.settings['optimizer_name'])
    logger.info('Training learning rate: %g' % cfg.settings['lr'])
    logger.info('Training epochs: %d' % cfg.settings['n_epochs'])
    logger.info('Training learning rate scheduler milestones: %s' %
                (cfg.settings['lr_milestone'], ))
    logger.info('Training batch size: %d' % cfg.settings['batch_size'])
    logger.info('Training weight decay: %g' % cfg.settings['weight_decay'])

    # Train model on dataset
    deep_SVDD.train(dataset,
                    optimizer_name=cfg.settings['optimizer_name'],
                    lr=cfg.settings['lr'],
                    n_epochs=cfg.settings['n_epochs'],
                    lr_milestones=cfg.settings['lr_milestone'],
                    batch_size=cfg.settings['batch_size'],
                    weight_decay=cfg.settings['weight_decay'],
                    device=device,
                    n_jobs_dataloader=n_jobs_dataloader)

    # Test model
    deep_SVDD.test(dataset, device=device, n_jobs_dataloader=n_jobs_dataloader)

    # Save results, model, and configuration
    deep_SVDD.save_results(export_json=xp_path + '/results.json')
    if pretrain:
        deep_SVDD.save_model(export_model=xp_path + '/model.tar')
    else:
        deep_SVDD.save_model(export_model=xp_path + '/model.tar',
                             save_ae=False)
    cfg.save_config(export_json=xp_path + '/config.json')
    # Save cluster centers
    np.save(os.path.join(xp_path, 'centers.npy'), np.asarray(deep_SVDD.c))

    # Plot most anomalous and most normal (within-class) test samples
    indices, labels, scores = zip(*deep_SVDD.results['test_scores'])
    indices, labels, scores = np.array(indices), np.array(labels), np.array(
        scores)
    idx_sorted = indices[labels == 0][np.argsort(
        scores[labels == 0])]  # sorted from lowest to highest anomaly score

    if dataset_name in ('mnist', 'cifar10', 'cycif'):

        if dataset_name == 'mnist':
            X_normals = dataset.test_set.test_data[idx_sorted[:32],
                                                   ...].unsqueeze(1)
            X_outliers = dataset.test_set.test_data[idx_sorted[-32:],
                                                    ...].unsqueeze(1)

            plot_images_grid(X_normals,
                             export_img=xp_path + '/normals',
                             title='Most normal examples',
                             padding=2)
            plot_images_grid(X_outliers,
                             export_img=xp_path + '/outliers',
                             title='Most anomalous examples',
                             padding=2)

        if dataset_name == 'cifar10':
            X_normals = torch.tensor(
                np.transpose(dataset.test_set.test_data[idx_sorted[:32], ...],
                             (0, 3, 1, 2)))
            X_outliers = torch.tensor(
                np.transpose(dataset.test_set.test_data[idx_sorted[-32:], ...],
                             (0, 3, 1, 2)))

            plot_images_grid(X_normals,
                             export_img=xp_path + '/normals',
                             title='Most normal examples',
                             padding=2)
            plot_images_grid(X_outliers,
                             export_img=xp_path + '/outliers',
                             title='Most anomalous examples',
                             padding=2)

        if dataset_name == 'cycif':
            # # Lowest to highest uncertainty scores
            # idx_sorted_all = indices[np.argsort(scores)]
            # labels_sorted_all = labels[np.argsort(scores)]
            # scores_sorted_all = np.sort(scores)

            # for i in range(32):
            #     idx = idx_sorted_all[i]
            #     X = dataset.test_set[idx][0].unsqueeze(1)
            #     # From test images, extract the ones model predicts as normal with highest confidence (better)
            #     plot_images_labels(X, label = labels_sorted_all[i], export_img=xp_path + '/images/simple_img_'+str(i), title='Simplest Example: Score = {:4.2f}'.format(scores_sorted_all[i]), padding=2)

            # # Highest to lowest uncertainty scores
            # idx_sorted_all = np.flip(idx_sorted_all)
            # labels_sorted_all = np.flip(labels_sorted_all)
            # scores_sorted_all = np.flip(scores_sorted_all)

            # for i in range(32):
            #     idx = idx_sorted_all[i]
            #     X = dataset.test_set[idx][0].unsqueeze(1)
            #     # From test images, extract the ones model predicts as normal with lowest confidence (worse)
            #     plot_images_labels(X, label = labels_sorted_all[i], export_img=xp_path + '/images/difficult_img_'+str(i), title='Difficult Example: Score = {:4.2f}'.format(scores_sorted_all[i]), padding=2)

            # Lowest to highest uncertainty scores
            idx_sorted_all = indices[np.argsort(scores)]
            labels_sorted_all = labels[np.argsort(scores)]
            scores_sorted_all = np.sort(scores)
            for i in range(128):
                idx = idx_sorted_all[i]
                X = dataset.test_set[idx][0].unsqueeze(1)
                plot_images_labels(
                    X,
                    label=labels_sorted_all[i],
                    export_img=xp_path + '/images/img_' + str(i),
                    title='Score = {:4.2f}'.format(scores_sorted_all[i]),
                    padding=2)
            # Assemble Gallery
            folder = xp_path + '/images'
            image_paths = [
                os.path.join(folder, f) for f in os.listdir(folder)
                if f.endswith('.png')
            ]

            # Random selection of images
            image_array = random.sample(image_paths, k=128)

            # Create and save image grid
            image = concat_images(image_array, (100, 100), (16, 8))
            image.save(os.path.join(folder, 'gallery_128.png'), 'PNG')
示例#13
0
def main(dataset_name, xp_path, data_path, load_config, load_model,
         ratio_known_normal, ratio_known_outlier, ratio_pollution, seed,
         n_estimators, max_samples, contamination, n_jobs_model, hybrid,
         load_ae, n_jobs_dataloader, normal_class, known_outlier_class,
         n_known_outlier_classes):
    """
    (Hybrid) Isolation Forest model for anomaly detection.

    :arg DATASET_NAME: Name of the dataset to load.
    :arg XP_PATH: Export path for logging the experiment.
    :arg DATA_PATH: Root path of data.
    """

    # Get configuration
    cfg = Config(locals().copy())

    # Set up logging
    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    log_file = xp_path + '/log.txt'
    file_handler = logging.FileHandler(log_file)
    file_handler.setLevel(logging.INFO)
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)

    # Print paths
    logger.info('Log file is %s.' % log_file)
    logger.info('Data path is %s.' % data_path)
    logger.info('Export path is %s.' % xp_path)

    # Print experimental setup
    logger.info('Dataset: %s' % dataset_name)
    logger.info('Normal class: %d' % normal_class)
    logger.info('Ratio of labeled normal train samples: %.2f' %
                ratio_known_normal)
    logger.info('Ratio of labeled anomalous samples: %.2f' %
                ratio_known_outlier)
    logger.info('Pollution ratio of unlabeled train data: %.2f' %
                ratio_pollution)
    if n_known_outlier_classes == 1:
        logger.info('Known anomaly class: %d' % known_outlier_class)
    else:
        logger.info('Number of known anomaly classes: %d' %
                    n_known_outlier_classes)

    # If specified, load experiment config from JSON-file
    if load_config:
        cfg.load_config(import_json=load_config)
        logger.info('Loaded configuration from %s.' % load_config)

    # Print Isolation Forest configuration
    logger.info('Number of base estimators in the ensemble: %d' %
                cfg.settings['n_estimators'])
    logger.info('Number of samples for training each base estimator: %d' %
                cfg.settings['max_samples'])
    logger.info('Contamination parameter: %.2f' %
                cfg.settings['contamination'])
    logger.info('Number of jobs for model training: %d' % n_jobs_model)
    logger.info('Hybrid model: %s' % cfg.settings['hybrid'])

    # Set seed
    if cfg.settings['seed'] != -1:
        random.seed(cfg.settings['seed'])
        np.random.seed(cfg.settings['seed'])
        torch.manual_seed(cfg.settings['seed'])
        torch.cuda.manual_seed(cfg.settings['seed'])
        torch.backends.cudnn.deterministic = True
        logger.info('Set seed to %d.' % cfg.settings['seed'])

    # Use 'cpu' as device for Isolation Forest
    device = 'cpu'
    torch.multiprocessing.set_sharing_strategy(
        'file_system')  # fix multiprocessing issue for ubuntu
    logger.info('Computation device: %s' % device)
    logger.info('Number of dataloader workers: %d' % n_jobs_dataloader)

    # Load data
    dataset = load_dataset(dataset_name,
                           data_path,
                           normal_class,
                           known_outlier_class,
                           n_known_outlier_classes,
                           ratio_known_normal,
                           ratio_known_outlier,
                           ratio_pollution,
                           random_state=np.random.RandomState(
                               cfg.settings['seed']))
    # Log random sample of known anomaly classes if more than 1 class
    if n_known_outlier_classes > 1:
        logger.info('Known anomaly classes: %s' %
                    (dataset.known_outlier_classes, ))

    # Initialize Isolation Forest model
    Isoforest = IsoForest(hybrid=cfg.settings['hybrid'],
                          n_estimators=cfg.settings['n_estimators'],
                          max_samples=cfg.settings['max_samples'],
                          contamination=cfg.settings['contamination'],
                          n_jobs=n_jobs_model,
                          seed=cfg.settings['seed'])

    # If specified, load model parameters from already trained model
    if load_model:
        Isoforest.load_model(import_path=load_model, device=device)
        logger.info('Loading model from %s.' % load_model)

    # If specified, load model autoencoder weights for a hybrid approach
    if hybrid and load_ae is not None:
        Isoforest.load_ae(dataset_name, model_path=load_ae)
        logger.info('Loaded pretrained autoencoder for features from %s.' %
                    load_ae)

    # Train model on dataset
    Isoforest.train(dataset,
                    device=device,
                    n_jobs_dataloader=n_jobs_dataloader)

    # Test model
    Isoforest.test(dataset, device=device, n_jobs_dataloader=n_jobs_dataloader)

    # Save results and configuration
    Isoforest.save_results(export_json=xp_path + '/results.json')
    cfg.save_config(export_json=xp_path + '/config.json')

    # Plot most anomalous and most normal test samples
    indices, labels, scores = zip(*Isoforest.results['test_scores'])
    indices, labels, scores = np.array(indices), np.array(labels), np.array(
        scores)
    idx_all_sorted = indices[np.argsort(
        scores)]  # from lowest to highest score
    idx_normal_sorted = indices[labels == 0][np.argsort(
        scores[labels == 0])]  # from lowest to highest score

    if dataset_name in ('mnist', 'fmnist', 'cifar10'):

        if dataset_name in ('mnist', 'fmnist'):
            X_all_low = dataset.test_set.data[idx_all_sorted[:32],
                                              ...].unsqueeze(1)
            X_all_high = dataset.test_set.data[idx_all_sorted[-32:],
                                               ...].unsqueeze(1)
            X_normal_low = dataset.test_set.data[idx_normal_sorted[:32],
                                                 ...].unsqueeze(1)
            X_normal_high = dataset.test_set.data[idx_normal_sorted[-32:],
                                                  ...].unsqueeze(1)

        if dataset_name == 'cifar10':
            X_all_low = torch.tensor(
                np.transpose(dataset.test_set.data[idx_all_sorted[:32], ...],
                             (0, 3, 1, 2)))
            X_all_high = torch.tensor(
                np.transpose(dataset.test_set.data[idx_all_sorted[-32:], ...],
                             (0, 3, 1, 2)))
            X_normal_low = torch.tensor(
                np.transpose(
                    dataset.test_set.data[idx_normal_sorted[:32], ...],
                    (0, 3, 1, 2)))
            X_normal_high = torch.tensor(
                np.transpose(
                    dataset.test_set.data[idx_normal_sorted[-32:], ...],
                    (0, 3, 1, 2)))

        plot_images_grid(X_all_low, export_img=xp_path + '/all_low', padding=2)
        plot_images_grid(X_all_high,
                         export_img=xp_path + '/all_high',
                         padding=2)
        plot_images_grid(X_normal_low,
                         export_img=xp_path + '/normals_low',
                         padding=2)
        plot_images_grid(X_normal_high,
                         export_img=xp_path + '/normals_high',
                         padding=2)
示例#14
0
def main(dataset_name, net_name, xp_path, data_path, load_config, load_model,
         device, seed, tokenizer, clean_txt, embedding_size, pretrained_model,
         ad_score, n_attention_heads, attention_size, lambda_p,
         alpha_scheduler, optimizer_name, lr, n_epochs, lr_milestone,
         batch_size, weight_decay, n_jobs_dataloader, n_threads, normal_class):
    """
    Context Vector Data Description (CVDD): An unsupervised anomaly detection method for text.

    :arg DATASET_NAME: Name of the dataset to load.
    :arg NET_NAME: Name of the neural network to use.
    :arg XP_PATH: Export path for logging the experiment.
    :arg DATA_PATH: Root path of data.
    """

    # Get configuration
    cfg = Config(locals().copy())

    # Set up logging
    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    log_file = xp_path + '/log.txt'
    file_handler = logging.FileHandler(log_file)
    file_handler.setLevel(logging.INFO)
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)

    # Print paths
    logger.info('Log file is %s.' % log_file)
    logger.info('Data path is %s.' % data_path)
    logger.info('Export path is %s.' % xp_path)

    # Print experimental setup
    logger.info('Dataset: %s' % dataset_name)
    logger.info('Normal class: %d' % normal_class)
    logger.info('Network: %s' % net_name)
    logger.info('Tokenizer: %s' % cfg.settings['tokenizer'])
    logger.info('Clean text in pre-processing: %s' % cfg.settings['clean_txt'])
    if cfg.settings['embedding_size'] is not None:
        logger.info('Word vector embedding size: %d' %
                    cfg.settings['embedding_size'])
    logger.info('Load pre-trained model: %s' %
                cfg.settings['pretrained_model'])

    # Print CVDD configuration)
    logger.info('Anomaly Score: %s' % cfg.settings['ad_score'])
    logger.info('Number of attention heads: %d' %
                cfg.settings['n_attention_heads'])
    logger.info('Attention size: %d' % cfg.settings['attention_size'])
    logger.info('Orthogonality regularization hyperparameter: %.3f' %
                cfg.settings['lambda_p'])
    logger.info('Temperature alpha annealing strategy: %s' %
                cfg.settings['alpha_scheduler'])

    # If specified, load experiment config from JSON-file
    if load_config:
        cfg.load_config(import_json=load_config)
        logger.info('Loaded configuration from %s.' % load_config)

    # Set seed for reproducibility
    if cfg.settings['seed'] != -1:
        random.seed(cfg.settings['seed'])
        np.random.seed(cfg.settings['seed'])
        torch.manual_seed(cfg.settings['seed'])
        torch.cuda.manual_seed(cfg.settings['seed'])
        torch.backends.cudnn.deterministic = True
        logger.info('Set seed to %d.' % cfg.settings['seed'])

    # Default device to 'cpu' if cuda is not available
    if not torch.cuda.is_available():
        device = 'cpu'
    logger.info('Computation device: %s' % device)
    logger.info('Number of dataloader workers: %d' % n_jobs_dataloader)
    if n_threads > 0:
        torch.set_num_threads(n_threads)
        logger.info(
            'Number of threads used for parallelizing CPU operations: %d' %
            n_threads)

    # Load data
    dataset = load_dataset(dataset_name,
                           data_path,
                           normal_class,
                           cfg.settings['tokenizer'],
                           clean_txt=cfg.settings['clean_txt'])

    # Initialize CVDD model and set word embedding
    cvdd = CVDD(cfg.settings['ad_score'])
    cvdd.set_network(net_name=net_name,
                     dataset=dataset,
                     pretrained_model=cfg.settings['pretrained_model'],
                     embedding_size=cfg.settings['embedding_size'],
                     attention_size=cfg.settings['attention_size'],
                     n_attention_heads=cfg.settings['n_attention_heads'])

    # If specified, load model parameters from already trained model
    if load_model:
        cvdd.load_model(import_path=load_model, device=device)
        logger.info('Loading model from %s.' % load_model)

    # Train model on dataset
    cvdd.train(dataset,
               optimizer_name=cfg.settings['optimizer_name'],
               lr=cfg.settings['lr'],
               n_epochs=cfg.settings['n_epochs'],
               lr_milestones=cfg.settings['lr_milestone'],
               batch_size=cfg.settings['batch_size'],
               lambda_p=cfg.settings['lambda_p'],
               alpha_scheduler=cfg.settings['alpha_scheduler'],
               weight_decay=cfg.settings['weight_decay'],
               device=device,
               n_jobs_dataloader=n_jobs_dataloader)

    # Test model
    cvdd.test(dataset, device=device, n_jobs_dataloader=n_jobs_dataloader)

    # Print most anomalous and most normal test samples
    indices, labels, scores, heads = zip(*cvdd.results['test_scores'])
    indices, scores = np.array(indices), np.array(scores)
    sort_idx = np.argsort(
        scores).tolist()  # sorted from lowest to highest anomaly score
    idx_sorted = indices[sort_idx]
    idx_normal = idx_sorted[:50].tolist()
    idx_outlier = idx_sorted[-50:].tolist()[::-1]
    att_weights = cvdd.test_att_weights
    att_weights_sorted = [att_weights[i] for i in sort_idx]
    att_weights_normal = att_weights_sorted[:50]
    att_weights_outlier = att_weights_sorted[-50:][::-1]
    heads_sorted = [heads[i] for i in sort_idx]
    heads_normal = heads_sorted[:50]
    heads_outlier = heads_sorted[-50:][::-1]

    print_text_samples(dataset.test_set,
                       dataset.encoder,
                       idx_normal,
                       export_file=xp_path + '/normals',
                       att_heads=heads_normal,
                       weights=att_weights_normal,
                       title='Most normal examples')
    print_text_samples(dataset.test_set,
                       dataset.encoder,
                       idx_outlier,
                       export_file=xp_path + '/outliers',
                       att_heads=heads_outlier,
                       weights=att_weights_outlier,
                       title='Most anomalous examples')

    # Print top words per context
    train_top_words, test_top_words = cvdd.train_top_words, cvdd.test_top_words
    print_top_words(train_top_words,
                    export_file=xp_path + '/top_words_train',
                    title='Top words per context in train set')
    print_top_words(test_top_words,
                    export_file=xp_path + '/top_words_test',
                    title='Top words per context in test set')

    # Print context vector correlation matrix
    if cfg.settings['n_attention_heads'] > 1:
        context_vectors = np.array(cvdd.results['context_vectors'])
        corr_mat = get_correlation_matrix(context_vectors)
        plot_matrix_heatmap(corr_mat,
                            title='Context vectors correlation matrix',
                            export_pdf=xp_path + '/context_vecs_matrix')

    # Print attention matrix heatmaps
    if cfg.settings['n_attention_heads'] > 1:
        train_att_matrix = cvdd.results['train_att_matrix']
        test_att_matrix = cvdd.results['test_att_matrix']
        train_att_matrix, test_att_matrix = np.array(
            train_att_matrix), np.array(test_att_matrix)
        plot_matrix_heatmap(train_att_matrix,
                            title='Self-attention heads correlation matrix',
                            export_pdf=xp_path + '/att_heatmap_train')
        plot_matrix_heatmap(test_att_matrix,
                            title='Self-attention heads correlation matrix',
                            export_pdf=xp_path + '/att_heatmap_test')

    # Plot distributions of distances to context vector per attention head
    train_dists, test_dists = cvdd.train_dists, cvdd.test_dists
    plot_joyplot(train_dists,
                 title='Distances from context vector per attention head',
                 export_pdf=xp_path + '/dists_train')
    plot_joyplot(test_dists[np.array(labels) == 0, :],
                 title='Distances from context vector per attention head',
                 export_pdf=xp_path + '/dists_test_normals')
    if np.sum(np.array(labels)) > 0:
        plot_joyplot(test_dists[np.array(labels) == 1, :],
                     title='Distances from context vector per attention head',
                     export_pdf=xp_path + '/dists_test_outliers')

    # Save results, model, and configuration
    cvdd.save_results(export_json=xp_path + '/results.json')
    cvdd.save_model(export_path=xp_path + '/model.tar')
    cfg.save_config(export_json=xp_path + '/config.json')
示例#15
0
def main(
    dataset_name,
    net_name,
    xp_path,
    data_path,
    load_config,
    load_model,
    eta,
    ratio_known_normal,
    ratio_known_outlier,
    ratio_pollution,
    device,
    seed,
    optimizer_name,
    lr,
    n_epochs,
    lr_milestone,
    batch_size,
    weight_decay,
    pretrain,
    ae_optimizer_name,
    ae_lr,
    ae_n_epochs,
    ae_lr_milestone,
    ae_batch_size,
    ae_weight_decay,
    num_threads,
    n_jobs_dataloader,
    normal_class,
    known_outlier_class,
    n_known_outlier_classes,
):
    """
    Deep SAD, a method for deep semi-supervised anomaly detection.

    :arg DATASET_NAME: Name of the dataset to load.
    :arg NET_NAME: Name of the neural network to use.
    :arg XP_PATH: Export path for logging the experiment.
    :arg DATA_PATH: Root path of data.
    """

    # Get configuration
    cfg = Config(locals().copy())

    # Set up logging
    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter(
        "%(asctime)s - %(name)s - %(levelname)s - %(message)s")
    log_file = xp_path + "/log.txt"
    file_handler = logging.FileHandler(log_file)
    file_handler.setLevel(logging.INFO)
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)

    # Print paths
    logger.info("Log file is %s" % log_file)
    logger.info("Data path is %s" % data_path)
    logger.info("Export path is %s" % xp_path)

    # Print experimental setup
    logger.info("Dataset: %s" % dataset_name)
    logger.info("Normal class: %d" % normal_class)
    logger.info("Ratio of labeled normal train samples: %.2f" %
                ratio_known_normal)
    logger.info("Ratio of labeled anomalous samples: %.2f" %
                ratio_known_outlier)
    logger.info("Pollution ratio of unlabeled train data: %.2f" %
                ratio_pollution)
    if n_known_outlier_classes == 1:
        logger.info("Known anomaly class: %d" % known_outlier_class)
    else:
        logger.info("Number of known anomaly classes: %d" %
                    n_known_outlier_classes)
    logger.info("Network: %s" % net_name)

    # If specified, load experiment config from JSON-file
    if load_config:
        cfg.load_config(import_json=load_config)
        logger.info("Loaded configuration from %s." % load_config)

    # Print model configuration
    logger.info("Eta-parameter: %.2f" % cfg.settings["eta"])

    # Set seed
    if cfg.settings["seed"] != -1:
        random.seed(cfg.settings["seed"])
        np.random.seed(cfg.settings["seed"])
        torch.manual_seed(cfg.settings["seed"])
        torch.cuda.manual_seed(cfg.settings["seed"])
        torch.backends.cudnn.deterministic = True
        logger.info("Set seed to %d." % cfg.settings["seed"])

    # Default device to 'cpu' if cuda is not available
    if not torch.cuda.is_available():
        device = "cpu"
    # Set the number of threads used for parallelizing CPU operations
    if num_threads > 0:
        torch.set_num_threads(num_threads)
    logger.info("Computation device: %s" % device)
    logger.info("Number of threads: %d" % num_threads)
    logger.info("Number of dataloader workers: %d" % n_jobs_dataloader)

    # Load data
    dataset = load_dataset(
        dataset_name,
        data_path,
        normal_class,
        known_outlier_class,
        n_known_outlier_classes,
        ratio_known_normal,
        ratio_known_outlier,
        ratio_pollution,
        random_state=np.random.RandomState(cfg.settings["seed"]),
    )
    # Log random sample of known anomaly classes if more than 1 class
    if n_known_outlier_classes > 1:
        logger.info("Known anomaly classes: %s" %
                    (dataset.known_outlier_classes, ))

    # Initialize DeepSAD model and set neural network phi
    deepSAD = DeepSAD(cfg.settings["eta"])
    deepSAD.set_network(net_name)

    # If specified, load Deep SAD model (center c, network weights, and possibly autoencoder weights)
    if load_model:
        deepSAD.load_model(model_path=load_model,
                           load_ae=True,
                           map_location=device)
        logger.info("Loading model from %s." % load_model)

    logger.info("Pretraining: %s" % pretrain)
    if pretrain:
        # Log pretraining details
        logger.info("Pretraining optimizer: %s" %
                    cfg.settings["ae_optimizer_name"])
        logger.info("Pretraining learning rate: %g" % cfg.settings["ae_lr"])
        logger.info("Pretraining epochs: %d" % cfg.settings["ae_n_epochs"])
        logger.info("Pretraining learning rate scheduler milestones: %s" %
                    (cfg.settings["ae_lr_milestone"], ))
        logger.info("Pretraining batch size: %d" %
                    cfg.settings["ae_batch_size"])
        logger.info("Pretraining weight decay: %g" %
                    cfg.settings["ae_weight_decay"])

        # Pretrain model on dataset (via autoencoder)
        deepSAD.pretrain(
            dataset,
            optimizer_name=cfg.settings["ae_optimizer_name"],
            lr=cfg.settings["ae_lr"],
            n_epochs=cfg.settings["ae_n_epochs"],
            lr_milestones=cfg.settings["ae_lr_milestone"],
            batch_size=cfg.settings["ae_batch_size"],
            weight_decay=cfg.settings["ae_weight_decay"],
            device=device,
            n_jobs_dataloader=n_jobs_dataloader,
        )

        # Save pretraining results
        deepSAD.save_ae_results(export_json=xp_path + "/ae_results.json")

    # Log training details
    logger.info("Training optimizer: %s" % cfg.settings["optimizer_name"])
    logger.info("Training learning rate: %g" % cfg.settings["lr"])
    logger.info("Training epochs: %d" % cfg.settings["n_epochs"])
    logger.info("Training learning rate scheduler milestones: %s" %
                (cfg.settings["lr_milestone"], ))
    logger.info("Training batch size: %d" % cfg.settings["batch_size"])
    logger.info("Training weight decay: %g" % cfg.settings["weight_decay"])

    # Train model on dataset
    deepSAD.train(
        dataset,
        optimizer_name=cfg.settings["optimizer_name"],
        lr=cfg.settings["lr"],
        n_epochs=cfg.settings["n_epochs"],
        lr_milestones=cfg.settings["lr_milestone"],
        batch_size=cfg.settings["batch_size"],
        weight_decay=cfg.settings["weight_decay"],
        device=device,
        n_jobs_dataloader=n_jobs_dataloader,
    )

    # Test model
    deepSAD.test(dataset, device=device, n_jobs_dataloader=n_jobs_dataloader)

    # Save results, model, and configuration
    deepSAD.save_results(export_json=xp_path + "/results.json")
    deepSAD.save_model(export_model=xp_path + "/model.tar")
    cfg.save_config(export_json=xp_path + "/config.json")

    # Plot most anomalous and most normal test samples
    indices, labels, scores = zip(*deepSAD.results["test_scores"])
    indices, labels, scores = np.array(indices), np.array(labels), np.array(
        scores)
    idx_all_sorted = indices[np.argsort(
        scores)]  # from lowest to highest score
    idx_normal_sorted = indices[labels == 0][np.argsort(
        scores[labels == 0])]  # from lowest to highest score

    if dataset_name in ("mnist", "fmnist", "cifar10"):

        if dataset_name in ("mnist", "fmnist"):
            X_all_low = dataset.test_set.data[idx_all_sorted[:32],
                                              ...].unsqueeze(1)
            X_all_high = dataset.test_set.data[idx_all_sorted[-32:],
                                               ...].unsqueeze(1)
            X_normal_low = dataset.test_set.data[idx_normal_sorted[:32],
                                                 ...].unsqueeze(1)
            X_normal_high = dataset.test_set.data[idx_normal_sorted[-32:],
                                                  ...].unsqueeze(1)

        if dataset_name == "cifar10":
            X_all_low = torch.tensor(
                np.transpose(dataset.test_set.data[idx_all_sorted[:32], ...],
                             (0, 3, 1, 2)))
            X_all_high = torch.tensor(
                np.transpose(dataset.test_set.data[idx_all_sorted[-32:], ...],
                             (0, 3, 1, 2)))
            X_normal_low = torch.tensor(
                np.transpose(
                    dataset.test_set.data[idx_normal_sorted[:32], ...],
                    (0, 3, 1, 2)))
            X_normal_high = torch.tensor(
                np.transpose(
                    dataset.test_set.data[idx_normal_sorted[-32:], ...],
                    (0, 3, 1, 2)))
示例#16
0
def main(dataset_name, net_name, xp_path, data_path, load_config, load_model, eta,
         device, seed, optimizer_name, lr, n_epochs, lr_milestone, batch_size, weight_decay,
         pretrain, ae_optimizer_name, ae_lr, ae_n_epochs, ae_lr_milestone, ae_batch_size, ae_weight_decay,
         num_threads, n_jobs_dataloader, normal_class):
    """
    Deep SAD, a method for deep semi-supervised anomaly detection.

    :arg DATASET_NAME: Name of the dataset to load.
    :arg NET_NAME: Name of the neural network to use.
    :arg XP_PATH: Export path for logging the experiment.
    :arg DATA_PATH: Root path of data.
    """

    # Get configuration
    cfg = Config(locals().copy())

    # Set up logging
    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    log_file = xp_path + '/log.txt'
    file_handler = logging.FileHandler(log_file)
    file_handler.setLevel(logging.INFO)
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)

    # Print paths
    logger.info('Log file is %s' % log_file)
    logger.info('Data path is %s' % data_path)
    logger.info('Export path is %s' % xp_path)

    # Print experimental setup
    logger.info('Dataset: %s' % dataset_name)
    logger.info('Normal class: %d' % normal_class)
    logger.info('Network: %s' % net_name)

    # If specified, load experiment config from JSON-file
    if load_config:
        cfg.load_config(import_json=load_config)
        logger.info('Loaded configuration from %s.' % load_config)

    # Print model configuration
    logger.info('Eta-parameter: %.2f' % cfg.settings['eta'])

    # Set seed
    if cfg.settings['seed'] != -1:
        random.seed(cfg.settings['seed'])
        np.random.seed(cfg.settings['seed'])
        torch.manual_seed(cfg.settings['seed'])
        torch.cuda.manual_seed(cfg.settings['seed'])
        torch.backends.cudnn.deterministic = True
        logger.info('Set seed to %d.' % cfg.settings['seed'])

    # Default device to 'cpu' if cuda is not available
    if not torch.cuda.is_available():
        device = 'cpu'
    # Set the number of threads used for parallelizing CPU operations
    if num_threads > 0:
        torch.set_num_threads(num_threads)
    logger.info('Computation device: %s' % device)
    logger.info('Number of threads: %d' % num_threads)
    logger.info('Number of dataloader workers: %d' % n_jobs_dataloader)

    # Load data
    dataset = load_dataset(dataset_name, data_path, normal_class, random_state=np.random.RandomState(cfg.settings['seed']))
    # Log random sample of known anomaly classes if more than 1 class

    # Initialize DeepSAD model and set neural network phi
    deepSAD = DeepSAD(cfg.settings['eta'])
    deepSAD.set_network(net_name)

    # If specified, load Deep SAD model (center c, network weights, and possibly autoencoder weights)
    if load_model:
        deepSAD.load_model(model_path=load_model, load_ae=True, map_location=device)
        logger.info('Loading model from %s.' % load_model)

    logger.info('Pretraining: %s' % pretrain)
    if pretrain:
        # Log pretraining details
        logger.info('Pretraining optimizer: %s' % cfg.settings['ae_optimizer_name'])
        logger.info('Pretraining learning rate: %g' % cfg.settings['ae_lr'])
        logger.info('Pretraining epochs: %d' % cfg.settings['ae_n_epochs'])
        logger.info('Pretraining learning rate scheduler milestones: %s' % (cfg.settings['ae_lr_milestone'],))
        logger.info('Pretraining batch size: %d' % cfg.settings['ae_batch_size'])
        logger.info('Pretraining weight decay: %g' % cfg.settings['ae_weight_decay'])

        # Pretrain model on dataset (via autoencoder)
        deepSAD.pretrain(dataset,
                         optimizer_name=cfg.settings['ae_optimizer_name'],
                         lr=cfg.settings['ae_lr'],
                         n_epochs=cfg.settings['ae_n_epochs'],
                         lr_milestones=cfg.settings['ae_lr_milestone'],
                         batch_size=cfg.settings['ae_batch_size'],
                         weight_decay=cfg.settings['ae_weight_decay'],
                         device=device,
                         n_jobs_dataloader=n_jobs_dataloader)

        # Save pretraining results
        deepSAD.save_ae_results(export_json=xp_path + '/ae_results.json')

    # Log training details
    logger.info('Training optimizer: %s' % cfg.settings['optimizer_name'])
    logger.info('Training learning rate: %g' % cfg.settings['lr'])
    logger.info('Training epochs: %d' % cfg.settings['n_epochs'])
    logger.info('Training learning rate scheduler milestones: %s' % (cfg.settings['lr_milestone'],))
    logger.info('Training batch size: %d' % cfg.settings['batch_size'])
    logger.info('Training weight decay: %g' % cfg.settings['weight_decay'])

    # Train model on dataset
    deepSAD.train(dataset,
                  optimizer_name=cfg.settings['optimizer_name'],
                  lr=cfg.settings['lr'],
                  n_epochs=cfg.settings['n_epochs'],
                  lr_milestones=cfg.settings['lr_milestone'],
                  batch_size=cfg.settings['batch_size'],
                  weight_decay=cfg.settings['weight_decay'],
                  device=device,
                  n_jobs_dataloader=n_jobs_dataloader)

    # Test model
    deepSAD.test(dataset, device=device, n_jobs_dataloader=n_jobs_dataloader)

    # Save results, model, and configuration
    deepSAD.save_results(export_json=xp_path + '/results.json')
    deepSAD.save_model(export_model=xp_path + '/model.tar')
    cfg.save_config(export_json=xp_path + '/config.json')

    # Plot most anomalous and most normal test samples
    indices, labels, scores = zip(*deepSAD.results['test_scores'])
    indices, labels, scores = np.array(indices), np.array(labels), np.array(scores)
    idx_all_sorted = indices[np.argsort(scores)]  # from lowest to highest score
    idx_normal_sorted = indices[labels == 0][np.argsort(scores[labels == 0])]  # from lowest to highest score

    if dataset_name in ('mnist', 'fmnist', 'cifar10'):

        if dataset_name in ('mnist', 'fmnist'):
            X_all_low = dataset.test_set.data[idx_all_sorted[:32], ...].unsqueeze(1)
            X_all_high = dataset.test_set.data[idx_all_sorted[-32:], ...].unsqueeze(1)
            X_normal_low = dataset.test_set.data[idx_normal_sorted[:32], ...].unsqueeze(1)
            X_normal_high = dataset.test_set.data[idx_normal_sorted[-32:], ...].unsqueeze(1)

        if dataset_name == 'cifar10':
            X_all_low = torch.tensor(np.transpose(dataset.test_set.data[idx_all_sorted[:32], ...], (0,3,1,2)))
            X_all_high = torch.tensor(np.transpose(dataset.test_set.data[idx_all_sorted[-32:], ...], (0,3,1,2)))
            X_normal_low = torch.tensor(np.transpose(dataset.test_set.data[idx_normal_sorted[:32], ...], (0,3,1,2)))
            X_normal_high = torch.tensor(np.transpose(dataset.test_set.data[idx_normal_sorted[-32:], ...], (0,3,1,2)))

        plot_images_grid(X_all_low, export_img=xp_path + '/all_low', padding=2)
        plot_images_grid(X_all_high, export_img=xp_path + '/all_high', padding=2)
        plot_images_grid(X_normal_low, export_img=xp_path + '/normals_low', padding=2)
        plot_images_grid(X_normal_high, export_img=xp_path + '/normals_high', padding=2)
示例#17
0
def main(dataset_name, net_name, xp_path, data_path, load_config, load_model,
         objective, nu, device, seed, optimizer_name, lr, n_epochs,
         lr_milestone, batch_size, weight_decay, pretrain, ae_optimizer_name,
         ae_lr, ae_n_epochs, ae_lr_milestone, ae_batch_size, ae_weight_decay,
         n_jobs_dataloader, normal_class, save_points_outside_r):
    """
    Deep SVDD, a fully deep method for anomaly detection.

    :arg DATASET_NAME: Name of the dataset to load.
    :arg NET_NAME: Name of the neural network to use.
    :arg XP_PATH: Export path for logging the experiment.
    :arg DATA_PATH: Root path of data.
    """

    # Get configuration
    cfg = Config(locals().copy())

    # Set up logging
    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    log_file = xp_path + '/log.txt'
    file_handler = logging.FileHandler(log_file)
    file_handler.setLevel(logging.INFO)
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)

    # Print arguments
    logger.info('Log file is %s.' % log_file)
    logger.info('Data path is %s.' % data_path)
    logger.info('Export path is %s.' % xp_path)

    logger.info('Dataset: %s' % dataset_name)
    logger.info('Normal class: %d' % normal_class)
    logger.info('Network: %s' % net_name)

    # If specified, load experiment config from JSON-file
    if load_config:
        cfg.load_config(import_json=load_config)
        logger.info('Loaded configuration from %s.' % load_config)

    # Print configuration
    logger.info('Deep SVDD objective: %s' % cfg.settings['objective'])
    logger.info('Nu-paramerter: %.2f' % cfg.settings['nu'])

    # Set seed
    if cfg.settings['seed'] != -1:
        random.seed(cfg.settings['seed'])
        np.random.seed(cfg.settings['seed'])
        torch.manual_seed(cfg.settings['seed'])
        logger.info('Set seed to %d.' % cfg.settings['seed'])

    # Default device to 'cpu' if cuda is not available
    if not torch.cuda.is_available():
        device = 'cpu'
    logger.info('Computation device: %s' % device)
    logger.info('Number of dataloader workers: %d' % n_jobs_dataloader)

    # Load data
    dataset = load_dataset(dataset_name, data_path, normal_class)

    # Initialize DeepSVDD model and set neural network \phi
    deep_SVDD = DeepSVDD(cfg.settings['objective'], cfg.settings['nu'])
    deep_SVDD.set_network(net_name)
    # If specified, load Deep SVDD model (radius R, center c, network weights, and possibly autoencoder weights)
    if load_model:
        deep_SVDD.load_model(model_path=load_model, load_ae=False)
        logger.info('Loading model from %s.' % load_model)

    logger.info('Pretraining: %s' % pretrain)
    if pretrain:
        # Log pretraining details
        logger.info('Pretraining optimizer: %s' %
                    cfg.settings['ae_optimizer_name'])
        logger.info('Pretraining learning rate: %g' % cfg.settings['ae_lr'])
        logger.info('Pretraining epochs: %d' % cfg.settings['ae_n_epochs'])
        logger.info('Pretraining learning rate scheduler milestones: %s' %
                    (cfg.settings['ae_lr_milestone'], ))
        logger.info('Pretraining batch size: %d' %
                    cfg.settings['ae_batch_size'])
        logger.info('Pretraining weight decay: %g' %
                    cfg.settings['ae_weight_decay'])

        # Pretrain model on dataset (via autoencoder)
        deep_SVDD.pretrain(dataset,
                           optimizer_name=cfg.settings['ae_optimizer_name'],
                           lr=cfg.settings['ae_lr'],
                           n_epochs=cfg.settings['ae_n_epochs'],
                           lr_milestones=cfg.settings['ae_lr_milestone'],
                           batch_size=cfg.settings['ae_batch_size'],
                           weight_decay=cfg.settings['ae_weight_decay'],
                           device=device,
                           n_jobs_dataloader=n_jobs_dataloader)

    # Log training details
    logger.info('Training optimizer: %s' % cfg.settings['optimizer_name'])
    logger.info('Training learning rate: %g' % cfg.settings['lr'])
    logger.info('Training epochs: %d' % cfg.settings['n_epochs'])
    logger.info('Training learning rate scheduler milestones: %s' %
                (cfg.settings['lr_milestone'], ))
    logger.info('Training batch size: %d' % cfg.settings['batch_size'])
    logger.info('Training weight decay: %g' % cfg.settings['weight_decay'])

    # Train model on dataset
    deep_SVDD.train(dataset,
                    optimizer_name=cfg.settings['optimizer_name'],
                    lr=cfg.settings['lr'],
                    n_epochs=cfg.settings['n_epochs'],
                    lr_milestones=cfg.settings['lr_milestone'],
                    batch_size=cfg.settings['batch_size'],
                    weight_decay=cfg.settings['weight_decay'],
                    device=device,
                    n_jobs_dataloader=n_jobs_dataloader)

    if save_points_outside_r:
        logger.info('Saving points outside of r')
        deep_SVDD.savePointsOutsideR(dataset,
                                     device=device,
                                     n_jobs_dataloader=n_jobs_dataloader)

    # Test model
    deep_SVDD.test(dataset, device=device, n_jobs_dataloader=n_jobs_dataloader)

    deep_SVDD.save_results(export_json=xp_path + '/results.json')
    deep_SVDD.save_model(export_model=xp_path + '/model.tar')
    cfg.save_config(export_json=xp_path + '/config.json')
示例#18
0
import numpy as np
import torch
import torchvision.datasets as dst
import torch.utils.data as Data
from realNVP import RealNVP
import torch.optim as optim
from torch.utils.data import Subset
import torchvision.transforms as transforms
from sklearn.metrics import roc_auc_score
import torch.nn as nn
import torch.nn.functional as F
from datasets.main import load_dataset

dataset = load_dataset(dataset_name='mnist',
                       data_path='./data',
                       normal_class=(2))
#dataset = load_dataset(dataset_name = 'cifar10', data_path='./data', normal_class=(1,2))
train_loader = Data.DataLoader(dataset=dataset.train_set,
                               batch_size=200,
                               shuffle=False,
                               drop_last=True)
test_loader = Data.DataLoader(dataset=dataset.test_set,
                              batch_size=200,
                              shuffle=False,
                              drop_last=True)

if __name__ == "__main__":
    eta_set = [0., 5., 10.]
    hidden_size_set = [50, 60, 80, 100]
    num_epochs_set = [100, 150, 200]
    n_blocks_set = [5, 6]
示例#19
0
def main(dataset_name, net_name, xp_path, data_path, load_config, load_model,
         objective, nu, device, seed, optimizer_name, lr, n_epochs,
         lr_milestone, batch_size, weight_decay, pretrain, ae_optimizer_name,
         ae_lr, ae_n_epochs, ae_lr_milestone, ae_batch_size, ae_weight_decay,
         n_jobs_dataloader, normal_class, img_name):
    """
    Deep SVDD, a fully deep method for anomaly detection.

    :arg DATASET_NAME: Name of the dataset to load.
    :arg NET_NAME: Name of the neural network to use.
    :arg XP_PATH: Export path for logging the experiment.
    :arg DATA_PATH: Root path of data.
    """

    # Get configuration
    cfg = Config(locals().copy())

    # Set up logging
    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    log_file = xp_path + '/log.txt'
    file_handler = logging.FileHandler(log_file)
    file_handler.setLevel(logging.INFO)
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)

    # Current date and time
    logger.info('Current date and time is %s.' % datetime.now())

    # () arguments
    logger.info('Log file is %s.' % log_file)
    logger.info('Data path is %s.' % data_path)
    logger.info('Export path is %s.' % xp_path)

    logger.info('Dataset: %s' % dataset_name)
    logger.info('Normal class: %d' % normal_class)
    logger.info('Network: %s' % net_name)

    # If specified, load experiment config from JSON-file
    if load_config:
        cfg.load_config(import_json=load_config)
        logger.info('Loaded configuration from %s.' % load_config)

    # Print configuration
    logger.info('Deep SVDD objective: %s' % cfg.settings['objective'])
    logger.info('Nu-parameter: %.2f' % cfg.settings['nu'])

    # Set seed
    if cfg.settings['seed'] != -1:
        random.seed(cfg.settings['seed'])
        np.random.seed(cfg.settings['seed'])
        torch.manual_seed(cfg.settings['seed'])
        logger.info('Set seed to %d.' % cfg.settings['seed'])

    # Default device to 'cpu' if cuda is not available
    if not torch.cuda.is_available():
        device = 'cpu'
    logger.info('Computation device: %s' % device)
    logger.info('Number of dataloader workers: %d' % n_jobs_dataloader)

    # Load data
    dataset = load_dataset(dataset_name, data_path, normal_class)

    # Initialize DeepSVDD model and set neural network \phi
    deep_SVDD = DeepSVDD(cfg.settings['objective'], cfg.settings['nu'])
    deep_SVDD.set_network(net_name)
    # If specified, load Deep SVDD model (radius R, center c, network weights, and possibly autoencoder weights)
    if load_model:
        deep_SVDD.load_model(model_path=load_model, load_ae=True)
        logger.info('Loading model from %s.' % load_model)

    logger.info('Pretraining: %s' % pretrain)
    if pretrain:
        # Log pretraining details
        logger.info('Pretraining optimizer: %s' %
                    cfg.settings['ae_optimizer_name'])
        logger.info('Pretraining learning rate: %g' % cfg.settings['ae_lr'])
        logger.info('Pretraining epochs: %d' % cfg.settings['ae_n_epochs'])
        logger.info('Pretraining learning rate scheduler milestones: %s' %
                    (cfg.settings['ae_lr_milestone'], ))
        logger.info('Pretraining batch size: %d' %
                    cfg.settings['ae_batch_size'])
        logger.info('Pretraining weight decay: %g' %
                    cfg.settings['ae_weight_decay'])

        # Pretrain model on dataset (via autoencoder)
        deep_SVDD.pretrain(dataset,
                           optimizer_name=cfg.settings['ae_optimizer_name'],
                           lr=cfg.settings['ae_lr'],
                           n_epochs=cfg.settings['ae_n_epochs'],
                           lr_milestones=cfg.settings['ae_lr_milestone'],
                           batch_size=cfg.settings['ae_batch_size'],
                           weight_decay=cfg.settings['ae_weight_decay'],
                           device=device,
                           n_jobs_dataloader=n_jobs_dataloader)

    # Log training details
    logger.info('Training optimizer: %s' % cfg.settings['optimizer_name'])
    logger.info('Training learning rate: %g' % cfg.settings['lr'])
    logger.info('Training epochs: %d' % cfg.settings['n_epochs'])
    logger.info('Training learning rate scheduler milestones: %s' %
                (cfg.settings['lr_milestone'], ))
    logger.info('Training batch size: %d' % cfg.settings['batch_size'])
    logger.info('Training weight decay: %g' % cfg.settings['weight_decay'])

    # Train model on dataset
    deep_SVDD.train(dataset,
                    optimizer_name=cfg.settings['optimizer_name'],
                    lr=cfg.settings['lr'],
                    n_epochs=cfg.settings['n_epochs'],
                    lr_milestones=cfg.settings['lr_milestone'],
                    batch_size=cfg.settings['batch_size'],
                    weight_decay=cfg.settings['weight_decay'],
                    device=device,
                    n_jobs_dataloader=n_jobs_dataloader)

    # Test model
    deep_SVDD.test(dataset, device=device, n_jobs_dataloader=n_jobs_dataloader)

    # Plot most anomalous and most normal (within-class) test samples
    indices, labels, scores = zip(*deep_SVDD.results['test_scores'])
    indices, labels, scores = np.array(indices), np.array(labels), np.array(
        scores)

    idx_all_sorted = indices[np.argsort(scores)]
    idx_normal_sorted = indices[labels == 0][np.argsort(
        scores[labels == 0])]  # sorted from lowest to highest anomaly score
    idx_outlier_sorted = indices[labels == 1][np.argsort(scores[labels == 1])]
    idx_sorted = indices[np.argsort(scores)]

    test_auc = deep_SVDD.results['test_auc']

    if dataset_name in ('mnist', 'cifar10', 'fashion', 'crack', 'crack128'):
        if dataset_name == 'mnist':
            X_normals = dataset.test_set.test_data[idx_sorted_normal[:32],
                                                   ...].unsqueeze(1)
            X_outliers = dataset.test_set.test_data[idx_sorted_normal[-32:],
                                                    ...].unsqueeze(1)

        if dataset_name == 'cifar10':
            X_normals = torch.tensor(
                np.transpose(
                    dataset.test_set.test_data[idx_sorted_normal[:32], ...],
                    (0, 3, 1, 2)))
            X_outliers = torch.tensor(
                np.transpose(
                    dataset.test_set.test_data[idx_sorted_normal[-32:], ...],
                    (0, 3, 1, 2)))

        if dataset_name == 'fashion':
            X_normals = torch.reshape(
                torch.tensor(dataset.test_set.data[idx_sorted_normal[:32],
                                                   ...]),
                (32, 28, 28)).unsqueeze(1)
            X_outliers = torch.reshape(
                torch.tensor(dataset.test_set.data[idx_sorted_normal[-32:],
                                                   ...]),
                (32, 28, 28)).unsqueeze(1)

        plot_imgs = True
        indices, labels, scores = zip(
            *deep_SVDD.results['test_scores (corner)'])
        indices, labels, scores = np.array(indices), np.array(
            labels), np.array(scores)
        idx_all_sorted = indices[np.argsort(
            scores)]  # from lowest to highest score
        idx_normal_sorted = indices[labels == 0][np.argsort(
            scores[labels == 0])]  # from lowest to highest score
        idx_outlier_sorted = indices[labels == 1][np.argsort(
            scores[labels == 1])]  # from lowest to highest score

        if dataset_name == 'crack':
            mid = len(idx_all_sorted) / 2
            if len(idx_all_sorted) > 64 and len(
                    idx_normal_sorted) > 64 and len(idx_outlier_sorted) > 100:
                #X_middle = torch.reshape(torch.tensor(dataset.test_set.data[idx_all_sorted[int(mid-312):int(mid+313)], ...]), (625,64,64)).unsqueeze(1)
                X_all_normal = torch.reshape(
                    torch.tensor(
                        dataset.test_set_corner.data[idx_all_sorted[:64],
                                                     ...]),
                    (64, 64, 64)).unsqueeze(1)
                X_all_outlier = torch.reshape(
                    torch.tensor(
                        dataset.test_set_corner.data[idx_all_sorted[-144:],
                                                     ...]),
                    (144, 64, 64)).unsqueeze(1)
                X_normals_normal = torch.reshape(
                    torch.tensor(
                        dataset.test_set_corner.data[idx_normal_sorted[:64],
                                                     ...]),
                    (64, 64, 64)).unsqueeze(1)
                X_normals_outlier = torch.reshape(
                    torch.tensor(
                        dataset.test_set_corner.data[idx_normal_sorted[-64:],
                                                     ...]),
                    (64, 64, 64)).unsqueeze(1)
                X_outliers_normal = torch.reshape(
                    torch.tensor(
                        dataset.test_set_corner.data[idx_outlier_sorted[:100],
                                                     ...]),
                    (100, 64, 64)).unsqueeze(1)
                X_outliers_outlier = torch.reshape(
                    torch.tensor(
                        dataset.test_set_corner.data[idx_outlier_sorted[-100:],
                                                     ...]),
                    (100, 64, 64)).unsqueeze(1)
            else:
                plot_imgs = False
                X_all_normal = torch.reshape(
                    torch.tensor(
                        dataset.test_set_corner.data[idx_all_sorted[:1], ...]),
                    (1, 64, 64)).unsqueeze(1)
                X_all_outlier = torch.reshape(
                    torch.tensor(
                        dataset.test_set_corner.data[idx_all_sorted[-1:],
                                                     ...]),
                    (1, 64, 64)).unsqueeze(1)
                X_normals_normal = torch.reshape(
                    torch.tensor(
                        dataset.test_set_corner.data[idx_normal_sorted[:1],
                                                     ...]),
                    (1, 64, 64)).unsqueeze(1)
                X_normals_outlier = torch.reshape(
                    torch.tensor(
                        dataset.test_set_corner.data[idx_normal_sorted[-1:],
                                                     ...]),
                    (1, 64, 64)).unsqueeze(1)
                X_outliers_normal = torch.reshape(
                    torch.tensor(
                        dataset.test_set_corner.data[idx_outlier_sorted[:1],
                                                     ...]),
                    (1, 64, 64)).unsqueeze(1)
                X_outliers_outlier = torch.reshape(
                    torch.tensor(
                        dataset.test_set_corner.data[idx_outlier_sorted[-1:],
                                                     ...]),
                    (1, 64, 64)).unsqueeze(1)

        if dataset_name == 'crack128':
            mid = len(idx_all_sorted) / 2
            if len(idx_all_sorted) > 64 and len(
                    idx_normal_sorted) > 64 and len(idx_outlier_sorted) > 100:

                #X_middle = torch.reshape(torch.tensor(dataset.test_set.data[idx_all_sorted[int(mid-312):int(mid+313)], ...]), (625,128,128)).unsqueeze(1)
                X_all_normal = torch.reshape(
                    torch.tensor(
                        dataset.test_set_corner.data[idx_all_sorted[:64],
                                                     ...]),
                    (64, 128, 128)).unsqueeze(1)
                X_all_outlier = torch.reshape(
                    torch.tensor(
                        dataset.test_set_corner.data[idx_all_sorted[-144:],
                                                     ...]),
                    (144, 128, 128)).unsqueeze(1)
                X_normals_normal = torch.reshape(
                    torch.tensor(
                        dataset.test_set_corner.data[idx_normal_sorted[:64],
                                                     ...]),
                    (64, 128, 128)).unsqueeze(1)
                X_normals_outlier = torch.reshape(
                    torch.tensor(
                        dataset.test_set_corner.data[idx_normal_sorted[-64:],
                                                     ...]),
                    (64, 128, 128)).unsqueeze(1)
                X_outliers_normal = torch.reshape(
                    torch.tensor(
                        dataset.test_set_corner.data[idx_outlier_sorted[:100],
                                                     ...]),
                    (100, 128, 128)).unsqueeze(1)
                X_outliers_outlier = torch.reshape(
                    torch.tensor(
                        dataset.test_set_corner.data[idx_outlier_sorted[-100:],
                                                     ...]),
                    (100, 128, 128)).unsqueeze(1)
            else:
                plot_imgs = False
                X_all_normal = torch.reshape(
                    torch.tensor(
                        dataset.test_set_corner.data[idx_all_sorted[:1], ...]),
                    (1, 128, 128)).unsqueeze(1)
                X_all_outlier = torch.reshape(
                    torch.tensor(
                        dataset.test_set_corner.data[idx_all_sorted[-1:],
                                                     ...]),
                    (1, 128, 128)).unsqueeze(1)
                X_normals_normal = torch.reshape(
                    torch.tensor(
                        dataset.test_set_corner.data[idx_normal_sorted[:1],
                                                     ...]),
                    (1, 128, 128)).unsqueeze(1)
                X_normals_outlier = torch.reshape(
                    torch.tensor(
                        dataset.test_set_corner.data[idx_normal_sorted[-1:],
                                                     ...]),
                    (1, 128, 128)).unsqueeze(1)
                X_outliers_normal = torch.reshape(
                    torch.tensor(
                        dataset.test_set_corner.data[idx_outlier_sorted[:1],
                                                     ...]),
                    (1, 128, 128)).unsqueeze(1)
                X_outliers_outlier = torch.reshape(
                    torch.tensor(
                        dataset.test_set_corner.data[idx_outlier_sorted[-1:],
                                                     ...]),
                    (1, 128, 128)).unsqueeze(1)

        if plot_imgs:
            #plot_images_grid(X_middle, export_img=xp_path + '/plots/' + img_name + '_all_middle_', title='All samples', padding=2, nrow=25)
            #plot_images_grid(X_all_normal, export_img=xp_path + '/plots/' + img_name + '_all_low_', title='Least anomalous samples', padding=2, nrow=8)
            plot_images_grid(X_all_outlier,
                             export_img=xp_path + '/plots/' + img_name +
                             '_all_high_',
                             title='Most anomalous samples',
                             padding=2,
                             nrow=12)
            #plot_images_grid(X_normals_normal, export_img=xp_path + '/plots/' + img_name + '_normals_low_', title='Least anomalous normal samples', padding=2, nrow=8)
            plot_images_grid(X_normals_outlier,
                             export_img=xp_path + '/plots/' + img_name +
                             '_normals_high_',
                             title='Most anmalous normal samples',
                             padding=2,
                             nrow=8)
            plot_images_grid(X_outliers_normal,
                             export_img=xp_path + '/plots/' + img_name +
                             '_outliers_low_',
                             title='Least anomalous anomaly samples',
                             padding=2,
                             nrow=10)
            plot_images_grid(X_outliers_outlier,
                             export_img=xp_path + '/plots/' + img_name +
                             '_outliers_high_',
                             title='Most anomalous anomaly samples',
                             padding=2,
                             nrow=10)

        test_auc = deep_SVDD.results['test_auc']
        test_auc_corner = deep_SVDD.results['test_auc (corner)']
        plot_images_hist(
            scores[labels == 0],
            scores[labels == 1],
            export_img=xp_path + '/plots/' + img_name + '_hist_corner',
            title=
            'Deep SVDD Anomaly scores of normal and crack samples (with corner cracks)',
            auc=test_auc_corner)

        indices, labels, scores = zip(*deep_SVDD.results['test_scores'])
        indices, labels, scores = np.array(indices), np.array(
            labels), np.array(scores)
        plot_images_hist(
            scores[labels == 0],
            scores[labels == 1],
            export_img=xp_path + '/plots/' + img_name + '_hist',
            title='Deep SVDD anomaly scores of normal and crack samples',
            auc=test_auc)

    # Save results, model, and configuration
    deep_SVDD.save_results(export_json=xp_path + '/results.json')
    deep_SVDD.save_model(export_model=xp_path + '/model.tar')
    cfg.save_config(export_json=xp_path + '/config.json')
示例#20
0
def main(dataset_name, xp_path, data_path, load_config, load_model, seed,
         kernel, nu, tokenizer, clean_txt, embedding_size,
         pretrained_word_vectors, embedding_reduction, use_tfidf_weights,
         normalize_embedding, n_jobs_dataloader, normal_class):
    """
    One-Class SVM for anomaly detection.

    :arg DATASET_NAME: Name of the dataset to load.
    :arg XP_PATH: Export path for logging the experiment.
    :arg DATA_PATH: Root path of data.
    """

    # Get configuration
    cfg = Config(locals().copy())

    # Set up logging
    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    log_file = xp_path + '/log.txt'
    file_handler = logging.FileHandler(log_file)
    file_handler.setLevel(logging.INFO)
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)

    # Print paths
    logger.info('Log file is %s.' % log_file)
    logger.info('Data path is %s.' % data_path)
    logger.info('Export path is %s.' % xp_path)

    # Print experimental setup
    logger.info('Dataset: %s' % dataset_name)
    logger.info('Normal class: %d' % normal_class)
    logger.info('Tokenizer: %s' % cfg.settings['tokenizer'])
    logger.info('Clean text in pre-processing: %s' % cfg.settings['clean_txt'])
    logger.info('Word vector embedding size: %d' %
                cfg.settings['embedding_size'])
    logger.info('Load pre-trained word vectors: %s' %
                cfg.settings['pretrained_word_vectors'])
    logger.info('Reduction of word embeddings: %s' %
                cfg.settings['embedding_reduction'])
    logger.info('Use tf-idf weights: %s' % cfg.settings['use_tfidf_weights'])
    logger.info('Normalize embedding: %s' %
                cfg.settings['normalize_embedding'])

    # Print OC-SVM configuration
    logger.info('OC-SVM kernel: %s' % cfg.settings['kernel'])
    logger.info('Nu-paramerter: %.2f' % cfg.settings['nu'])

    # If specified, load experiment config from JSON-file
    if load_config:
        cfg.load_config(import_json=load_config)
        logger.info('Loaded configuration from %s.' % load_config)

    # Set seed for reproducibility
    if cfg.settings['seed'] != -1:
        random.seed(cfg.settings['seed'])
        np.random.seed(cfg.settings['seed'])
        torch.manual_seed(cfg.settings['seed'])
        torch.cuda.manual_seed(cfg.settings['seed'])
        torch.backends.cudnn.deterministic = True
        logger.info('Set seed to %d.' % cfg.settings['seed'])

    # Use 'cpu' as device for loading embeddings
    device = 'cpu'
    logger.info('Computation device: %s' % device)
    logger.info('Number of dataloader workers: %d' % n_jobs_dataloader)

    # Load data
    dataset = load_dataset(dataset_name,
                           data_path,
                           normal_class,
                           cfg.settings['tokenizer'],
                           cfg.settings['use_tfidf_weights'],
                           clean_txt=cfg.settings['clean_txt'])

    # Initialize OC-SVM model and set word embedding
    ocsvm = OCSVM(cfg.settings['kernel'], cfg.settings['nu'])
    ocsvm.set_embedding(
        dataset,
        embedding_size=cfg.settings['embedding_size'],
        pretrained_word_vectors=cfg.settings['pretrained_word_vectors'],
        embedding_reduction=cfg.settings['embedding_reduction'],
        use_tfidf_weights=cfg.settings['use_tfidf_weights'],
        normalize_embedding=cfg.settings['normalize_embedding'],
        device=device)
    # If specified, load model parameters from already trained model
    if load_model:
        ocsvm.load_model(import_path=load_model, device=device)
        logger.info('Loading model from %s.' % load_model)

    # Train model on dataset
    ocsvm.train(dataset, device=device, n_jobs_dataloader=n_jobs_dataloader)

    # Test model
    ocsvm.test(dataset, device=device, n_jobs_dataloader=n_jobs_dataloader)

    # Print most anomalous and most normal test samples
    indices, labels, scores = zip(*ocsvm.results['test_scores'])
    indices, labels, scores = np.array(indices), np.array(labels), np.array(
        scores)
    idx_sorted = indices[np.argsort(
        scores)]  # sorted from lowest to highest anomaly score
    idx_normal = idx_sorted[:50].tolist()
    idx_outlier = idx_sorted[-50:].tolist()[::-1]
    print_text_samples(dataset.test_set,
                       dataset.encoder,
                       idx_normal,
                       export_file=xp_path + '/normals',
                       title='Most normal examples')
    print_text_samples(dataset.test_set,
                       dataset.encoder,
                       idx_outlier,
                       export_file=xp_path + '/outliers',
                       title='Most anomalous examples')

    # Save results, model, and configuration
    ocsvm.save_results(export_json=xp_path + '/results.json')
    ocsvm.save_model(export_path=xp_path + '/model.tar')
    cfg.save_config(export_json=xp_path + '/config.json')
示例#21
0
def main(dataset_name, net_name, xp_path, data_path, load_config, load_model,
         eta, ratio_known_normal, ratio_known_outlier, ratio_pollution, device,
         seed, optimizer_name, lr, n_epochs, lr_milestone, batch_size,
         weight_decay, pretrain, ae_optimizer_name, ae_lr, ae_n_epochs,
         ae_lr_milestone, ae_batch_size, ae_weight_decay, num_threads,
         n_jobs_dataloader, normal_class, known_outlier_class,
         n_known_outlier_classes, normal_data_file, abnormal_data_file,
         txt_result_file):
    """
    Deep SAD, a method for deep semi-supervised anomaly detection.

    :arg DATASET_NAME: Name of the dataset to load.
    :arg NET_NAME: Name of the neural network to use.
    :arg XP_PATH: Export path for logging the experiment.
    :arg DATA_PATH: Root path of data.
    """

    # Get configuration
    cfg = Config(locals().copy())

    # Set up logging
    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    if not os.path.exists(xp_path):
        os.makedirs(xp_path)
    log_file = xp_path + '/log.txt'
    file_handler = logging.FileHandler(log_file)
    file_handler.setLevel(logging.INFO)
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)

    # Print paths
    logger.info('Log file is %s' % log_file)
    logger.info('Data path is %s' % data_path)
    logger.info('Export path is %s' % xp_path)

    # Print experimental setup
    logger.info('Dataset: %s' % dataset_name)
    logger.info('Normal class: %d' % normal_class)
    logger.info('Network: %s' % net_name)

    # If specified, load experiment config from JSON-file
    if load_config:
        cfg.load_config(import_json=load_config)
        logger.info('Loaded configuration from %s.' % load_config)

    # Print model configuration
    logger.info('Eta-parameter: %.2f' % cfg.settings['eta'])

    # Set seed
    if cfg.settings['seed'] != -1:
        random.seed(cfg.settings['seed'])
        np.random.seed(cfg.settings['seed'])
        torch.manual_seed(cfg.settings['seed'])
        torch.cuda.manual_seed(cfg.settings['seed'])
        torch.backends.cudnn.deterministic = True
        logger.info('Set seed to %d.' % cfg.settings['seed'])

    # Default device to 'cpu' if cuda is not available
    if not torch.cuda.is_available():
        device = 'cpu'
    # Set the number of threads used for parallelizing CPU operations
    if num_threads > 0:
        torch.set_num_threads(num_threads)
    logger.info('Computation device: %s' % device)
    logger.info('Number of threads: %d' % num_threads)
    logger.info('Number of dataloader workers: %d' % n_jobs_dataloader)
    logger.info('Normal data file: %s' % normal_data_file)
    logger.info('Abormal data file: %s' % abnormal_data_file)
    # Load data
    dataset = load_dataset(dataset_name,
                           data_path,
                           normal_class,
                           known_outlier_class,
                           n_known_outlier_classes,
                           ratio_known_normal,
                           ratio_known_outlier,
                           ratio_pollution,
                           normal_data_file=normal_data_file,
                           abnormal_data_file=abnormal_data_file,
                           random_state=np.random.RandomState(
                               cfg.settings['seed']))

    # Initialize DeepSAD model and set neural network phi
    deepSAD = DeepSAD(cfg.settings['eta'])
    deepSAD.set_network(net_name)

    # If specified, load Deep SAD model (center c, network weights, and possibly autoencoder weights)
    if load_model:
        deepSAD.load_model(model_path=load_model,
                           load_ae=True,
                           map_location=device)
        logger.info('Loading model from %s.' % load_model)

    logger.info('Pretraining: %s' % pretrain)
    if pretrain:
        # Log pretraining details
        logger.info('Pretraining optimizer: %s' %
                    cfg.settings['ae_optimizer_name'])
        logger.info('Pretraining learning rate: %g' % cfg.settings['ae_lr'])
        logger.info('Pretraining epochs: %d' % cfg.settings['ae_n_epochs'])
        logger.info('Pretraining learning rate scheduler milestones: %s' %
                    (cfg.settings['ae_lr_milestone'], ))
        logger.info('Pretraining batch size: %d' %
                    cfg.settings['ae_batch_size'])
        logger.info('Pretraining weight decay: %g' %
                    cfg.settings['ae_weight_decay'])

        # Pretrain model on dataset (via autoencoder)
        deepSAD.pretrain(dataset,
                         optimizer_name=cfg.settings['ae_optimizer_name'],
                         lr=cfg.settings['ae_lr'],
                         n_epochs=cfg.settings['ae_n_epochs'],
                         lr_milestones=cfg.settings['ae_lr_milestone'],
                         batch_size=cfg.settings['ae_batch_size'],
                         weight_decay=cfg.settings['ae_weight_decay'],
                         device=device,
                         n_jobs_dataloader=n_jobs_dataloader)

        # Save pretraining results
        deepSAD.save_ae_results(export_json=xp_path + '/ae_results.json')
        pretrain_auc = deepSAD.ae_results['test_auc']

    # Log training details
    logger.info('Training optimizer: %s' % cfg.settings['optimizer_name'])
    logger.info('Training learning rate: %g' % cfg.settings['lr'])
    logger.info('Training epochs: %d' % cfg.settings['n_epochs'])
    logger.info('Training learning rate scheduler milestones: %s' %
                (cfg.settings['lr_milestone'], ))
    logger.info('Training batch size: %d' % cfg.settings['batch_size'])
    logger.info('Training weight decay: %g' % cfg.settings['weight_decay'])

    # Train model on dataset
    deepSAD.train(dataset,
                  optimizer_name=cfg.settings['optimizer_name'],
                  lr=cfg.settings['lr'],
                  n_epochs=cfg.settings['n_epochs'],
                  lr_milestones=cfg.settings['lr_milestone'],
                  batch_size=cfg.settings['batch_size'],
                  weight_decay=cfg.settings['weight_decay'],
                  device=device,
                  n_jobs_dataloader=n_jobs_dataloader)

    # Test model
    deepSAD.test(dataset, device=device, n_jobs_dataloader=n_jobs_dataloader)

    # Save results, model, and configuration
    deepSAD.save_results(export_json=xp_path + '/results.json')
    deepSAD.save_model(export_model=xp_path + '/model.tar')
    cfg.save_config(export_json=xp_path + '/config.json')

    # Plot most anomalous and most normal test samples
    train_auc = deepSAD.results['test_auc']
    indices, labels, scores = zip(*deepSAD.results['test_scores'])
    indices, labels, scores = np.array(indices), np.array(labels), np.array(
        scores)
    result_df = pd.DataFrame()
    result_df['indices'] = indices
    result_df['labels'] = labels
    result_df['scores'] = scores
    result_df_path = '{}/result_df_{}_{}.pkl'.format(xp_path, normal_data_file,
                                                     abnormal_data_file)
    result_df.to_pickle(result_df_path)

    # Write the file for detection rate
    result_df.drop('indices', inplace=True, axis=1)
    df_normal = result_df[result_df.labels == 0]
    df_abnormal = result_df[result_df.labels == 1]
    cut = df_normal.scores.quantile(0.95)
    y = [1 if e > cut else 0 for e in df_abnormal['scores'].values]
    f = open(txt_result_file, 'a')
    f.write('=====================\n')
    f.write('[DataFrame Name] {}\n'.format(result_df_path))
    f.write('[Normal to Abnormal Ratio] 1:{}\n'.format(
        len(df_abnormal) / len(df_normal)))
    if pretrain:
        f.write('[Pretrain AUC] {}\n'.format(pretrain_auc))
    f.write('[Train AUC] {}\n'.format(train_auc))
    f.write('[Detection Rate] {}\n'.format(sum(y) / len(y)))
    f.write('=====================\n\n')
    f.close()
示例#22
0
def main(dataset_name, net_name, xp_path, data_path, load_config, load_model, objective, nu, device, seed,
         optimizer_name, lr, n_epochs, lr_milestone, batch_size, weight_decay, pretrain, ae_optimizer_name, ae_lr,
         ae_n_epochs, ae_lr_milestone, ae_batch_size, ae_weight_decay, n_jobs_dataloader, normal_class):
    """
    Deep SVDD, a fully deep method for anomaly detection.

    :arg DATASET_NAME: Name of the dataset to load.
    :arg NET_NAME: Name of the neural network to use.
    :arg XP_PATH: Export path for logging the experiment.
    :arg DATA_PATH: Root path of data.
    """

    # Get configuration
    cfg = Config(locals().copy())

    # Set up logging
    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    log_file = xp_path + '/log.txt'
    file_handler = logging.FileHandler(log_file)
    file_handler.setLevel(logging.INFO)
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)

    # Print arguments
    logger.info('Log file is %s.' % log_file)
    logger.info('Data path is %s.' % data_path)
    logger.info('Export path is %s.' % xp_path)

    logger.info('Dataset: %s' % dataset_name)
    logger.info('Normal class: %d' % normal_class)
    logger.info('Network: %s' % net_name)

    # If specified, load experiment config from JSON-file
    if load_config:
        cfg.load_config(import_json=load_config)
        logger.info('Loaded configuration from %s.' % load_config)

    # Print configuration
    logger.info('Deep SVDD objective: %s' % cfg.settings['objective'])
    logger.info('Nu-paramerter: %.2f' % cfg.settings['nu'])

    # Set seed
    if cfg.settings['seed'] != -1:
        random.seed(cfg.settings['seed'])
        np.random.seed(cfg.settings['seed'])
        torch.manual_seed(cfg.settings['seed'])
        logger.info('Set seed to %d.' % cfg.settings['seed'])

    # Default device to 'cpu' if cuda is not available
    if not torch.cuda.is_available():
        device = 'cpu'
    logger.info('Computation device: %s' % device)
    logger.info('Number of dataloader workers: %d' % n_jobs_dataloader)

    # Load data
    dataset = load_dataset(dataset_name, data_path, normal_class)

    # Initialize DeepSVDD model and set neural network \phi
    deep_SVDD = DeepSVDD(cfg.settings['objective'], cfg.settings['nu'])
    deep_SVDD.set_network(net_name)
    # If specified, load Deep SVDD model (radius R, center c, network weights, and possibly autoencoder weights)
    if load_model:
        deep_SVDD.load_model(model_path=load_model, load_ae=True)
        logger.info('Loading model from %s.' % load_model)

    logger.info('Pretraining: %s' % pretrain)
    if pretrain:
        # Log pretraining details
        logger.info('Pretraining optimizer: %s' % cfg.settings['ae_optimizer_name'])
        logger.info('Pretraining learning rate: %g' % cfg.settings['ae_lr'])
        logger.info('Pretraining epochs: %d' % cfg.settings['ae_n_epochs'])
        logger.info('Pretraining learning rate scheduler milestones: %s' % (cfg.settings['ae_lr_milestone'],))
        logger.info('Pretraining batch size: %d' % cfg.settings['ae_batch_size'])
        logger.info('Pretraining weight decay: %g' % cfg.settings['ae_weight_decay'])

        # Pretrain model on dataset (via autoencoder)
        deep_SVDD.pretrain(dataset,
                           optimizer_name=cfg.settings['ae_optimizer_name'],
                           lr=cfg.settings['ae_lr'],
                           n_epochs=cfg.settings['ae_n_epochs'],
                           lr_milestones=cfg.settings['ae_lr_milestone'],
                           batch_size=cfg.settings['ae_batch_size'],
                           weight_decay=cfg.settings['ae_weight_decay'],
                           device=device,
                           n_jobs_dataloader=n_jobs_dataloader)

    # Log training details
    logger.info('Training optimizer: %s' % cfg.settings['optimizer_name'])
    logger.info('Training learning rate: %g' % cfg.settings['lr'])
    logger.info('Training epochs: %d' % cfg.settings['n_epochs'])
    logger.info('Training learning rate scheduler milestones: %s' % (cfg.settings['lr_milestone'],))
    logger.info('Training batch size: %d' % cfg.settings['batch_size'])
    logger.info('Training weight decay: %g' % cfg.settings['weight_decay'])

    # Train model on dataset
    deep_SVDD.train(dataset,
                    optimizer_name=cfg.settings['optimizer_name'],
                    lr=cfg.settings['lr'],
                    n_epochs=cfg.settings['n_epochs'],
                    lr_milestones=cfg.settings['lr_milestone'],
                    batch_size=cfg.settings['batch_size'],
                    weight_decay=cfg.settings['weight_decay'],
                    device=device,
                    n_jobs_dataloader=n_jobs_dataloader)

    # Test model
    deep_SVDD.test(dataset, device=device, n_jobs_dataloader=n_jobs_dataloader)

    # Plot most anomalous and most normal (within-class) test samples
    indices, labels, scores = zip(*deep_SVDD.results['test_scores'])
    indices, labels, scores = np.array(indices), np.array(labels), np.array(scores)
    idx_sorted = indices[labels == 0][np.argsort(scores[labels == 0])]  # sorted from lowest to highest anomaly score

    if dataset_name in ('mnist', 'cifar10'):

        if dataset_name == 'mnist':
            X_normals = dataset.test_set.test_data[idx_sorted[:32], ...].unsqueeze(1)
            X_outliers = dataset.test_set.test_data[idx_sorted[-32:], ...].unsqueeze(1)

        if dataset_name == 'cifar10':
            X_normals = torch.tensor(np.transpose(dataset.test_set.test_data[idx_sorted[:32], ...], (0, 3, 1, 2)))
            X_outliers = torch.tensor(np.transpose(dataset.test_set.test_data[idx_sorted[-32:], ...], (0, 3, 1, 2)))

        plot_images_grid(X_normals, export_img=xp_path + '/normals', title='Most normal examples', padding=2)
        plot_images_grid(X_outliers, export_img=xp_path + '/outliers', title='Most anomalous examples', padding=2)

    # Save results, model, and configuration
    deep_SVDD.save_results(export_json=xp_path + '/results.json')
    deep_SVDD.save_model(export_model=xp_path + '/model.tar')
    cfg.save_config(export_json=xp_path + '/config.json')
    def __init__(self,
                 src="amazon",
                 tar="webcam",
                 pseudo_label_method="lp",
                 strategy="easy2hard",
                 schedule=None,
                 subdomains=5,
                 experiment="",
                 epoch=10,
                 alpha=0.5,
                 src_w=1,
                 tar_w=1,
                 init=0.1,
                 mft_w=0.1,
                 k=50,
                 record=True,
                 device="cuda:0"):

        ## parameter
        self.experiment = experiment
        self.pseudo_label_method = pseudo_label_method
        self.strategy = strategy
        self.subdomains = subdomains
        self.k = k
        self.alpha = alpha
        self.src = src
        self.tar = tar
        self.src_w = src_w
        self.tar_w = tar_w
        self.mft_w = mft_w
        self.init = init
        self.device = device
        self.record = record
        self.epoch = epoch
        self.schedule = schedule
        ## record pseudo label
        self.weight = None
        self.pseudo_label = None
        self.src_feature = None
        self.tar_feature = None
        self.seg_point = None
        self.globalstep = 0

        ## data
        self.src_dataloader = None
        self.tar_dataloader = None
        self.src_dataloader_shuffle = None
        self.tar_dataloader_shuffle = None
        self.src_dataset = load_dataset(src)
        self.tar_dataset = load_dataset(tar)
        self.easy_tar_dataset = load_dataset(tar)
        ## model
        self.build_default_parameter()
        self.load_dataloader()

        ## log files
        self.writer = SummaryWriter("../log/" + experiment)
        fp = open('experiments.txt', "r")
        for i in iter(fp):
            if not os.path.exists('../exp/exp_fig/' + i + '/'):
                os.makedirs('../exp/exp_fig/' + i + '/')
        fp.close()
        ## loss
        self.ce = torch.nn.NLLLoss()
        self.ce_noreduce = torch.nn.NLLLoss(reduce=False)
        self.ent = HLoss()
示例#24
0
def main(dataset_name, xp_path, data_path, load_config, load_model, seed,
         kernel, kappa, hybrid, load_ae, n_jobs_dataloader, normal_class):
    """
    (Hybrid) SSAD for anomaly detection as in Goernitz et al., Towards Supervised Anomaly Detection, JAIR, 2013.

    :arg DATASET_NAME: Name of the dataset to load.
    :arg XP_PATH: Export path for logging the experiment.
    :arg DATA_PATH: Root path of data.
    """

    # Get configuration
    cfg = Config(locals().copy())

    # Set up logging
    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    log_file = xp_path + '/log.txt'
    file_handler = logging.FileHandler(log_file)
    file_handler.setLevel(logging.INFO)
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)

    # Print paths
    logger.info('Log file is %s.' % log_file)
    logger.info('Data path is %s.' % data_path)
    logger.info('Export path is %s.' % xp_path)

    # Print experimental setup
    logger.info('Dataset: %s' % dataset_name)
    logger.info('Normal class: %d' % normal_class)
    # If specified, load experiment config from JSON-file
    if load_config:
        cfg.load_config(import_json=load_config)
        logger.info('Loaded configuration from %s.' % load_config)

    # Print SSAD configuration
    logger.info('SSAD kernel: %s' % cfg.settings['kernel'])
    logger.info('Kappa-paramerter: %.2f' % cfg.settings['kappa'])
    logger.info('Hybrid model: %s' % cfg.settings['hybrid'])

    # Set seed
    if cfg.settings['seed'] != -1:
        random.seed(cfg.settings['seed'])
        np.random.seed(cfg.settings['seed'])
        co.setseed(cfg.settings['seed'])
        torch.manual_seed(cfg.settings['seed'])
        torch.cuda.manual_seed(cfg.settings['seed'])
        torch.backends.cudnn.deterministic = True
        logger.info('Set seed to %d.' % cfg.settings['seed'])

    # Use 'cpu' as device for SSAD
    device = 'cpu'
    torch.multiprocessing.set_sharing_strategy(
        'file_system')  # fix multiprocessing issue for ubuntu
    logger.info('Computation device: %s' % device)
    logger.info('Number of dataloader workers: %d' % n_jobs_dataloader)

    # Load data
    dataset = load_dataset(dataset_name,
                           data_path,
                           normal_class,
                           random_state=np.random.RandomState(
                               cfg.settings['seed']))

    # Initialize SSAD model
    ssad = SSAD(kernel=cfg.settings['kernel'],
                kappa=cfg.settings['kappa'],
                hybrid=cfg.settings['hybrid'])

    # If specified, load model parameters from already trained model
    if load_model:
        ssad.load_model(import_path=load_model, device=device)
        logger.info('Loading model from %s.' % load_model)

    # If specified, load model autoencoder weights for a hybrid approach
    if hybrid and load_ae is not None:
        ssad.load_ae(dataset_name, model_path=load_ae)
        logger.info('Loaded pretrained autoencoder for features from %s.' %
                    load_ae)

    # Train model on dataset
    ssad.train(dataset, device=device, n_jobs_dataloader=n_jobs_dataloader)

    # Test model
    ssad.test(dataset, device=device, n_jobs_dataloader=n_jobs_dataloader)

    # Save results and configuration
    ssad.save_results(export_json=xp_path + '/results.json')
    cfg.save_config(export_json=xp_path + '/config.json')

    # Plot most anomalous and most normal test samples
    indices, labels, scores = zip(*ssad.results['test_scores'])
    indices, labels, scores = np.array(indices), np.array(labels), np.array(
        scores)
    idx_all_sorted = indices[np.argsort(
        scores)]  # from lowest to highest score
    idx_normal_sorted = indices[labels == 0][np.argsort(
        scores[labels == 0])]  # from lowest to highest score

    if dataset_name in ('mnist', 'fmnist', 'cifar10'):

        if dataset_name in ('mnist', 'fmnist'):
            X_all_low = dataset.test_set.data[idx_all_sorted[:32],
                                              ...].unsqueeze(1)
            X_all_high = dataset.test_set.data[idx_all_sorted[-32:],
                                               ...].unsqueeze(1)
            X_normal_low = dataset.test_set.data[idx_normal_sorted[:32],
                                                 ...].unsqueeze(1)
            X_normal_high = dataset.test_set.data[idx_normal_sorted[-32:],
                                                  ...].unsqueeze(1)

        if dataset_name == 'cifar10':
            X_all_low = torch.tensor(
                np.transpose(dataset.test_set.data[idx_all_sorted[:32], ...],
                             (0, 3, 1, 2)))
            X_all_high = torch.tensor(
                np.transpose(dataset.test_set.data[idx_all_sorted[-32:], ...],
                             (0, 3, 1, 2)))
            X_normal_low = torch.tensor(
                np.transpose(
                    dataset.test_set.data[idx_normal_sorted[:32], ...],
                    (0, 3, 1, 2)))
            X_normal_high = torch.tensor(
                np.transpose(
                    dataset.test_set.data[idx_normal_sorted[-32:], ...],
                    (0, 3, 1, 2)))

        plot_images_grid(X_all_low, export_img=xp_path + '/all_low', padding=2)
        plot_images_grid(X_all_high,
                         export_img=xp_path + '/all_high',
                         padding=2)
        plot_images_grid(X_normal_low,
                         export_img=xp_path + '/normals_low',
                         padding=2)
        plot_images_grid(X_normal_high,
                         export_img=xp_path + '/normals_high',
                         padding=2)
示例#25
0
def main(dataset_name, net_name, xp_path, data_path, load_config, load_model, objective, nu, focal_parameter, update_center_epochs, device, seed,
         optimizer_name, lr, n_epochs, lr_milestone, batch_size, weight_decay, pretrain, ae_optimizer_name, ae_lr,
         ae_n_epochs, ae_lr_milestone, ae_batch_size, ae_weight_decay, n_jobs_dataloader, normal_class):
    """
    Deep SVDD, a fully deep method for anomaly detection.

    :arg DATASET_NAME: Name of the dataset to load.
    :arg NET_NAME: Name of the neural network to use.
    :arg XP_PATH: Export path for logging the experiment.
    :arg DATA_PATH: Root path of data.
    """

    # Get configuration
    cfg = Config(locals().copy())

    # Set up logging
    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    log_file = xp_path + '/log.txt'
    file_handler = logging.FileHandler(log_file)
    file_handler.setLevel(logging.INFO)
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)

    # Print arguments
    logger.info('Log file is %s.' % log_file)
    logger.info('Data path is %s.' % data_path)
    logger.info('Export path is %s.' % xp_path)

    logger.info('Dataset: %s' % dataset_name)
    logger.info('Normal class: %d' % normal_class)
    logger.info('Network: %s' % net_name)

    # If specified, load experiment config from JSON-file
    if load_config:
        cfg.load_config(import_json=load_config)
        logger.info('Loaded configuration from %s.' % load_config)

    # Print configuration
    logger.info('Deep SVDD objective: %s' % cfg.settings['objective'])
    logger.info('Nu-paramerter: %.2f' % cfg.settings['nu'])
    logger.info('focal_parameter: %.2f' % cfg.settings['focal_parameter'])
    logger.info('update_center_epochs: %d' % cfg.settings['update_center_epochs'])

    # Set seed
    if cfg.settings['seed'] != -1:
        random.seed(cfg.settings['seed'])
        os.environ['PYTHONHASHSEED'] = str(seed)
        np.random.seed(cfg.settings['seed'])
        torch.manual_seed(cfg.settings['seed'])
        torch.cuda.manual_seed(cfg.settings['seed'])
        torch.cuda.manual_seed_all(cfg.settings['seed'])
        cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True
        logger.info('Set seed to %d.' % cfg.settings['seed'])

    # Default device to 'cpu' if cuda is not available
    if not torch.cuda.is_available():
        device = 'cpu'
    logger.info('Computation device: %s' % device)
    logger.info('Number of dataloader workers: %d' % n_jobs_dataloader)

    # Load data
    dataset = load_dataset(dataset_name, data_path, normal_class)

    # Initialize DeepSVDD model and set neural network \phi
    deep_SVDD = DeepSVDD(cfg.settings['objective'], cfg.settings['nu'], cfg.settings['focal_parameter'], cfg.settings['update_center_epochs'])
    deep_SVDD.set_network(net_name)
    # If specified, load Deep SVDD model (radius R, center c, network weights, and possibly autoencoder weights)
    if load_model:
        deep_SVDD.load_model(model_path=load_model, load_ae=True)
        logger.info('Loading model from %s.' % load_model)

    logger.info('Pretraining: %s' % pretrain)
    if pretrain:
        # Log pretraining details
        logger.info('Pretraining optimizer: %s' % cfg.settings['ae_optimizer_name'])
        logger.info('Pretraining learning rate: %g' % cfg.settings['ae_lr'])
        logger.info('Pretraining epochs: %d' % cfg.settings['ae_n_epochs'])
        logger.info('Pretraining learning rate scheduler milestones: %s' % (cfg.settings['ae_lr_milestone'],))
        logger.info('Pretraining batch size: %d' % cfg.settings['ae_batch_size'])
        logger.info('Pretraining weight decay: %g' % cfg.settings['ae_weight_decay'])

        # Pretrain model on dataset (via autoencoder)
        deep_SVDD.pretrain(dataset,
                           optimizer_name=cfg.settings['ae_optimizer_name'],
                           lr=cfg.settings['ae_lr'],
                           n_epochs=cfg.settings['ae_n_epochs'],
                           lr_milestones=cfg.settings['ae_lr_milestone'],
                           batch_size=cfg.settings['ae_batch_size'],
                           weight_decay=cfg.settings['ae_weight_decay'],
                           device=device,
                           n_jobs_dataloader=n_jobs_dataloader)

    # Log training details
    logger.info('Training optimizer: %s' % cfg.settings['optimizer_name'])
    logger.info('Training learning rate: %g' % cfg.settings['lr'])
    logger.info('Training epochs: %d' % cfg.settings['n_epochs'])
    logger.info('Training learning rate scheduler milestones: %s' % (cfg.settings['lr_milestone'],))
    logger.info('Training batch size: %d' % cfg.settings['batch_size'])
    logger.info('Training weight decay: %g' % cfg.settings['weight_decay'])

    # record test AUC after each 100 epoch
    # f_get_para = open('../log/mnist_test/100_AUC.txt', 'w')
    # f_get_para.write("\r\n \r\nrecord test AUC each 100 epoch   \r\n \r\n \r\n")
    # f_get_para.close()
    #
    # f_get_para = open('../log/mnist_test/get_param.txt', 'w')
    # f_get_para.write("\r\n \r\nrecord test AUC each very epoch   \r\n \r\n \r\n")
    # f_get_para.close()

    # Train model on dataset
    deep_SVDD.train(dataset,
                    optimizer_name=cfg.settings['optimizer_name'],
                    lr=cfg.settings['lr'],
                    n_epochs=cfg.settings['n_epochs'],
                    lr_milestones=cfg.settings['lr_milestone'],
                    batch_size=cfg.settings['batch_size'],
                    weight_decay=cfg.settings['weight_decay'],
                    device=device,
                    n_jobs_dataloader=n_jobs_dataloader)

    # Test model
    # deep_SVDD.test(dataset, device=device, n_jobs_dataloader=n_jobs_dataloader)

    # Plot most anomalous and most normal (within-class) test samples
    # indices, labels, scores = zip(*deep_SVDD.results['test_scores'])
    # indices, labels, scores = np.array(indices), np.array(labels), np.array(scores)
    # idx_sorted = indices[labels == 0][np.argsort(scores[labels == 0])]  # sorted from lowest to highest anomaly score
    #
    # if dataset_name in ('mnist', 'cifar10'):
    #
    #     if dataset_name == 'mnist':
    #         X_normals = dataset.test_set.test_data[idx_sorted[:32], ...].unsqueeze(1)
    #         X_outliers = dataset.test_set.test_data[idx_sorted[-32:], ...].unsqueeze(1)
    #
    #     if dataset_name == 'cifar10':
    #         X_normals = torch.tensor(np.transpose(dataset.test_set.test_data[idx_sorted[:32], ...], (0, 3, 1, 2)))
    #         X_outliers = torch.tensor(np.transpose(dataset.test_set.test_data[idx_sorted[-32:], ...], (0, 3, 1, 2)))
    #
    #     plot_images_grid(X_normals, export_img=xp_path + '/normals', title='Most normal examples', padding=2)
    #     plot_images_grid(X_outliers, export_img=xp_path + '/outliers', title='Most anomalous examples', padding=2)
    #
    # # Save results, model, and configuration
    # deep_SVDD.save_results(export_json=xp_path + '/results.json')
    # deep_SVDD.save_model(export_model=xp_path + '/model.tar')
    # cfg.save_config(export_json=xp_path + '/config.json')

    # plot curves
    loss_plot = deep_SVDD.trainer.Loss_list
    accuracy_plot = deep_SVDD.trainer.Accuracy_list
    x1 = range(0, n_epochs)
    y1 = accuracy_plot
    x2 = range(0, n_epochs)
    y2 = loss_plot
    plt.subplot(2, 1, 1)
    plt.plot(x1, y1, 'o-')
    plt.title('Class %d vs focal %.2f' % (normal_class, focal_parameter))
    plt.ylabel('Test accuracy')
    plt.subplot(2, 1, 2)
    plt.plot(x2, y2, '.-')
    plt.xlabel('Test loss vs. epoches')
    plt.ylabel('Test loss')
    plt.savefig("accuracy_loss focal %.2f .jpg" % focal_parameter)
示例#26
0
def main(dataset_name, net_name, xp_path, data_path, load_data, load_config,
         load_model, objective, nu, device, seed, optimizer_name, lr, n_epochs,
         lr_milestone, batch_size, weight_decay, pretrain, ae_optimizer_name,
         ae_lr, ae_n_epochs, ae_lr_milestone, ae_batch_size, ae_weight_decay,
         n_jobs_dataloader, normal_class):
    """
    Deep SVDD, a fully deep method for anomaly detection.

    :arg DATASET_NAME: Name of the dataset to load.
    :arg NET_NAME: Name of the neural n etwork to use.
    :arg XP_PATH: Export path for loggi ng the experiment.
    :arg DATA_PATH: Root path of data.  
    """

    # Get configuration
    cfg = Config(locals().copy())

    # Set up logging
    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    log_file = xp_path + '/log.txt'
    file_handler = logging.FileHandler(log_file)
    file_handler.setLevel(logging.INFO)
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)

    # Print arguments
    logger.info('Log file is %s.' % log_file)
    logger.info('Data path is %s.' % data_path)
    logger.info('Export path is %s.' % xp_path)

    logger.info('Dataset: %s' % dataset_name)
    logger.info('Normal class: %d' % normal_class)
    logger.info('Network: %s' % net_name)

    # If specified, load experiment config from JSON-file
    if load_config:
        cfg.load_config(import_json=load_config)
        logger.info('Loaded configuration from %s.' % load_config)

    # Print configuration
    logger.info('Deep SVDD objective: %s' % cfg.settings['objective'])
    logger.info('Nu-paramerter: %.2f' % cfg.settings['nu'])

    # Set seed
    if cfg.settings['seed'] != -1:
        random.seed(cfg.settings['seed'])
        np.random.seed(cfg.settings['seed'])
        torch.manual_seed(cfg.settings['seed'])
        logger.info('Set seed to %d.' % cfg.settings['seed'])

    # Default device to 'cpu' if cuda is not available
    if not torch.cuda.is_available():
        device = 'cpu'
    logger.info('Computation device: %s' % device)
    logger.info('Number of dataloader workers: %d' % n_jobs_dataloader)

    # Load data
    if dataset_name == 'campus':
        train_path = 'train/OK'
        test_path = 'test'
        train_image, test_image, test_label = train_test_numpy_load(
            data_path, train_path, test_path, load_data)
        dataset = load_campus_dataset(dataset_name, data_path, train_image,
                                      test_image, test_label)
    else:
        dataset = load_dataset(dataset_name, data_path, normal_class)

    # Initialize DeepSVDD model and set neural network \phi
    deep_SVDD = DeepSVDD(cfg.settings['objective'], cfg.settings['nu'])
    deep_SVDD.set_network(net_name)
    # If specified, load Deep SVDD model (radius R, center c, network weights, and possibly autoencoder weights)
    if load_model:
        deep_SVDD.load_model(model_path=load_model, load_ae=True)
        logger.info('Loading model from %s.' % load_model)

    # logger.info('Pretraining: %s' % pretrain)
    if pretrain:
        # Log pretraining details
        logger.info('Pretraining optimizer: %s' %
                    cfg.settings['ae_optimizer_name'])
        logger.info('Pretraining learning rate: %g' % cfg.settings['ae_lr'])
        logger.info('Pretraining epochs: %d' % cfg.settings['ae_n_epochs'])
        logger.info('Pretraining learning rate scheduler milestones: %s' %
                    (cfg.settings['ae_lr_milestone'], ))
        logger.info('Pretraining batch size: %d' %
                    cfg.settings['ae_batch_size'])
        logger.info('Pretraining weight decay: %g' %
                    cfg.settings['ae_weight_decay'])

        # Pretrain model on dataset (via autoencoder)
        deep_SVDD.pretrain(dataset,
                           optimizer_name=cfg.settings['ae_optimizer_name'],
                           lr=cfg.settings['ae_lr'],
                           n_epochs=cfg.settings['ae_n_epochs'],
                           lr_milestones=cfg.settings['ae_lr_milestone'],
                           batch_size=cfg.settings['ae_batch_size'],
                           weight_decay=cfg.settings['ae_weight_decay'],
                           device=device,
                           n_jobs_dataloader=n_jobs_dataloader,
                           test_image=test_image)

    # Log training details
    logger.info('Training optimizer: %s' % cfg.settings['optimizer_name'])
    logger.info('Training learning rate: %g' % cfg.settings['lr'])
    logger.info('Training epochs: %d' % cfg.settings['n_epochs'])
    logger.info('Training learning rate scheduler milestones: %s' %
                (cfg.settings['lr_milestone'], ))
    logger.info('Training batch size: %d' % cfg.settings['batch_size'])
    logger.info('Training weight decay: %g' % cfg.settings['weight_decay'])

    # Train model on dataset
    deep_SVDD.train(dataset,
                    optimizer_name=cfg.settings['optimizer_name'],
                    lr=cfg.settings['lr'],
                    n_epochs=cfg.settings['n_epochs'],
                    lr_milestones=cfg.settings['lr_milestone'],
                    batch_size=cfg.settings['batch_size'],
                    weight_decay=cfg.settings['weight_decay'],
                    device=device,
                    n_jobs_dataloader=n_jobs_dataloader)
    # plot t_sne
    # deep_SVDD.t_sne(dataset, device=device, n_jobs_dataloader=n_jobs_dataloader, data_path=data_path, xp_path=xp_path)

    # Test model
    deep_SVDD.test(dataset, device=device, n_jobs_dataloader=n_jobs_dataloader)

    # Plot most anomalous and most normal (within-class) test samples
    indices, labels, scores = zip(*deep_SVDD.results['test_scores'])
    indices, labels, scores = np.array(indices), np.array(labels), np.array(
        scores)

    if dataset_name in ('mnist', 'cifar10', 'campus'):

        if dataset_name == 'mnist':
            X_normals = dataset.test_set.test_data[idx_sorted[:32],
                                                   ...].unsqueeze(1)
            X_outliers = dataset.test_set.test_data[idx_sorted[-32:],
                                                    ...].unsqueeze(1)

        if dataset_name == 'cifar10':
            X_normals = torch.tensor(
                np.transpose(dataset.test_set.test_data[idx_sorted[:32], ...],
                             (0, 3, 1, 2)))
            X_outliers = torch.tensor(
                np.transpose(dataset.test_set.test_data[idx_sorted[-32:], ...],
                             (0, 3, 1, 2)))

        if dataset_name == 'campus':
            test_score_path = os.path.join(xp_path, 'test_score.pickle')
            with open(test_score_path, 'wb') as f:
                pickle.dump(deep_SVDD.results['test_scores'], f,
                            pickle.HIGHEST_PROTOCOL)

        if dataset_name == 'campus':
            fpr = dict()
            tpr = dict()
            roc_auc = dict()
            fpr, tpr, threshold = roc_curve(labels, scores)
            roc_auc = auc(fpr, tpr)
            plt.figure()
            lw = 2
            plt.plot(fpr,
                     tpr,
                     color='darkorange',
                     lw=lw,
                     label='ROC curve (area= %0.2f)' % roc_auc)
            plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
            plt.xlim([0.0, 1.0])
            plt.ylim([0.0, 1.05])
            plt.xlabel('False Positive Rate')
            plt.ylabel('True Positive Rate')
            plt.title('Receiver operating characteristic example')
            plt.legend(loc="lower right")
            plt.savefig(os.path.join(xp_path, 'auc_roc.png'))
        else:
            plot_images_grid(X_normals,
                             export_img=xp_path + '/normals',
                             title='Most normal examples',
                             padding=2)
            plot_images_grid(X_outliers,
                             export_img=xp_path + '/outliers',
                             title='Most anomalous examples',
                             padding=2)

    # Save results, model, and configuration
    deep_SVDD.save_results(export_json=xp_path + '/results.json')
    deep_SVDD.save_model(export_model=xp_path + '/model.tar')
    cfg.save_config(export_json=xp_path + '/config.json')
示例#27
0
文件: main.py 项目: tianzhaotju/HAE
def main(dataset_name, net_name, xp_path, data_path, load_config, load_model,
         objective, nu, device, seed, optimizer_name, lr, n_epochs,
         lr_milestone, batch_size, weight_decay, pretrain, ae_optimizer_name,
         ae_lr, ae_n_epochs, ae_lr_milestone, ae_batch_size, ae_weight_decay,
         n_jobs_dataloader, ae_loss_type, ae_only, normal_class, ae_test_only):
    """
    Deep SVDD, a fully deep method for anomaly detection.

    :arg DATASET_NAME: Name of the dataset to load.
    :arg NET_NAME: Name of the neural network to use.
    :arg XP_PATH: Export path for logging the experiment.
    :arg DATA_PATH: Root path of data.
    """

    # Get configuration
    mean = [
        (0.36607818518032215, 0.3528722483374472, 0.3585191239764038),  # 0
        (0.4487305946663354, 0.4487305946663354, 0.4487305946663354),  # 1
        (0.3923340318128373, 0.26295472525674995, 0.22025334692657814),  # 2
        (0.4536255693657713, 0.4682865838881645, 0.4452575836280415),  # 3
        (0.672454086143443, 0.4779993567370712, 0.35007702036667776),  # 4
        (0.5352967021800805, 0.5314880132137422, 0.547828897157147),  # 5
        (0.3267409463643222, 0.41484389522093523, 0.46695618025405883),  # 6
        (0.6926364358307354, 0.662149771557822, 0.6490556404776292),  # 7
        (0.24011281595607017, 0.1769201147939173, 0.17123964257174726),  # 8
        (0.21251877631977975, 0.23440739849813622, 0.2363959074824541),  # 9
        (0.3025230547246622, 0.30300693821061303, 0.32466943588225744),  # 10
        (0.7214971293922232, 0.7214971293922232, 0.7214971293922232),  # 11
        (0.20453672401964704, 0.19061953742573437, 0.1973630989492544),  # 12
        (0.38709726938081024, 0.27680750921869235, 0.24161576675737736),  # 13
        (0.39719792798156195, 0.39719792798156195, 0.39719792798156195),  # 14
    ]
    std = [
        (0.1334089197933497, 0.13091438558839882, 0.11854704285817017),  # 0
        (0.16192189716258867, 0.16192189716258867, 0.16192189716258867),  # 1
        (0.0527090063203568, 0.035927180158353854, 0.026535684323885065),  # 2
        (0.11774565267141425, 0.13039328961987165, 0.12533147519872007),  # 3
        (0.07714836895006975, 0.06278302787607731, 0.04349760909698915),  # 4
        (0.36582285759516936, 0.3661720233895615, 0.34943018535446296),  # 5
        (0.14995070226373788, 0.2117666336616603, 0.23554648659289779),  # 6
        (0.23612927993223184, 0.25644744015075704, 0.25718179933681784),  # 7
        (0.168789697373752, 0.07563237349131141, 0.043146545992581754),  # 8
        (0.15779873915363898, 0.18099161937329614, 0.15159372072430388),  # 9
        (0.15720102988319967, 0.1803989691876269, 0.15113407058442763),  # 10
        (0.13265686578689692, 0.13265686578689692, 0.13265686578689692),  # 11
        (0.2316392849251032, 0.21810285502082638, 0.19743939091294657),  # 12
        (0.20497542590257026, 0.14190994609091834, 0.11531548927488476),  # 13
        (0.3185215984033291, 0.3185215984033291, 0.3185215984033291),  # 14
    ]

    cfg = Config(locals().copy())

    # Set up logging
    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')

    xp_path = xp_path + '/'

    xp_path = xp_path + str(normal_class)
    log_file = xp_path + '/log.txt'
    file_handler = logging.FileHandler(log_file)
    file_handler.setLevel(logging.INFO)
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)

    # Print arguments
    logger.info('Log file is %s.' % log_file)
    logger.info('Data path is %s.' % data_path)
    logger.info('Export path is %s.' % xp_path)
    logger.info('Dataset: %s' % dataset_name)
    logger.info('Normal class: %s' % normal_class)
    logger.info('Network: %s' % net_name)

    # If specified, load experiment config from JSON-file
    if load_config:
        cfg.load_config(import_json=load_config)
        logger.info('Loaded configuration from %s.' % load_config)

    # Print configuration
    logger.info('Deep SVDD objective: %s' % cfg.settings['objective'])
    logger.info('Nu-paramerter: %.2f' % cfg.settings['nu'])

    # Set seed
    if cfg.settings['seed'] != -1:
        random.seed(cfg.settings['seed'])
        np.random.seed(cfg.settings['seed'])
        torch.manual_seed(cfg.settings['seed'])
        logger.info('Set seed to %d.' % cfg.settings['seed'])

    # Default device to 'cpu' if cuda is not available
    if not torch.cuda.is_available():
        device = 'cpu'
    logger.info('Computation device: %s' % device)
    logger.info('Number of dataloader workers: %d' % n_jobs_dataloader)

    # Load data
    dataset = load_dataset(dataset_name, data_path, normal_class)

    # Initialize DeepSVDD model and set neural network \phi
    deep_SVDD = DeepSVDD(cfg.settings['objective'], cfg.settings['nu'])
    deep_SVDD.set_network(net_name)
    # If specified, load Deep SVDD model (radius R, center c, network weights, and possibly autoencoder weights)
    if load_model:
        deep_SVDD.load_model(model_path=load_model, load_ae=True)
        logger.info('Loading model from %s.' % load_model)

    logger.info('Pretraining: %s' % pretrain)
    if pretrain:
        # Log pretraining details
        logger.info('Pretraining optimizer: %s' %
                    cfg.settings['ae_optimizer_name'])
        logger.info('Pretraining learning rate: %g' % cfg.settings['ae_lr'])
        logger.info('Pretraining epochs: %d' % cfg.settings['ae_n_epochs'])
        logger.info('Pretraining learning rate scheduler milestones: %s' %
                    (cfg.settings['ae_lr_milestone'], ))
        logger.info('Pretraining batch size: %d' %
                    cfg.settings['ae_batch_size'])
        logger.info('Pretraining weight decay: %g' %
                    cfg.settings['ae_weight_decay'])

        # Pretrain model on dataset (via autoencoder)
        model_save_path = './models/' + dataset_name + '/' + str(
            normal_class) + '_' + str(
                ae_n_epochs) + '_' + ae_loss_type + '.pth'

        if ae_test_only == False:
            deep_SVDD.pretrain(
                dataset,
                optimizer_name=cfg.settings['ae_optimizer_name'],
                lr=cfg.settings['ae_lr'],
                n_epochs=cfg.settings['ae_n_epochs'],
                lr_milestones=cfg.settings['ae_lr_milestone'],
                batch_size=cfg.settings['ae_batch_size'],
                weight_decay=cfg.settings['ae_weight_decay'],
                device=device,
                n_jobs_dataloader=n_jobs_dataloader,
                dataset_name=dataset_name,
                ae_loss_type=ae_loss_type,
                ae_only=ae_only,
                model_save_path=model_save_path)
        else:
            deep_SVDD.load_test(
                dataset,
                optimizer_name=cfg.settings['ae_optimizer_name'],
                lr=cfg.settings['ae_lr'],
                n_epochs=cfg.settings['ae_n_epochs'],
                lr_milestones=cfg.settings['ae_lr_milestone'],
                batch_size=cfg.settings['ae_batch_size'],
                weight_decay=cfg.settings['ae_weight_decay'],
                device=device,
                n_jobs_dataloader=n_jobs_dataloader,
                dataset_name=dataset_name,
                ae_loss_type=ae_loss_type,
                ae_only=ae_only,
                model_save_path=model_save_path)

        # Plot most anomalous and most normal (within-class) test samples
        exit(0)
        indices, labels, scores = zip(*deep_SVDD.results['ae_test_scores'])
        indices, labels, scores = np.array(indices), np.array(
            labels), np.array(scores)
        idx_sorted = indices[labels == 0][np.argsort(scores[
            labels == 0])]  # sorted from lowest to highest anomaly score

        if dataset_name in ('mnist', 'cifar10', 'object', 'texture'):

            if dataset_name == 'mnist':
                X_normals = dataset.test_set.test_data[idx_sorted[:32],
                                                       ...].unsqueeze(1)
                X_outliers = dataset.test_set.test_data[idx_sorted[-32:],
                                                        ...].unsqueeze(1)

            if dataset_name == 'cifar10':
                X_normals = torch.tensor(
                    np.transpose(
                        dataset.test_set.test_data[idx_sorted[:32], ...],
                        (0, 3, 1, 2)))
                X_outliers = torch.tensor(
                    np.transpose(
                        dataset.test_set.test_data[idx_sorted[-32:], ...],
                        (0, 3, 1, 2)))

            if dataset_name == 'object':
                # 22 3 256 256
                X_normals = torch.tensor(dataset.test_data[idx_sorted[:32],
                                                           ...])
                X_outliers = torch.tensor(dataset.test_data[idx_sorted[-32:],
                                                            ...])

                for i in range(3):
                    X_normals[:, i, :, :] *= std[normal_class][i]
                    X_normals[:, i, :, :] += mean[normal_class][i]
                    X_outliers[:, i, :, :] *= std[normal_class][i]
                    X_outliers[:, i, :, :] += mean[normal_class][i]

            #plot_images_grid(X_normals, export_img=xp_path + '/AE_normals', title='Most normal examples', padding=2)
            #plot_images_grid(X_outliers, export_img=xp_path + '/AE_outliers', title='Most anomalous examples', padding=2)
            if ae_only:
                exit(0)
    # Log training details
    logger.info('Training optimizer: %s' % cfg.settings['optimizer_name'])
    logger.info('Training learning rate: %g' % cfg.settings['lr'])
    logger.info('Training epochs: %d' % cfg.settings['n_epochs'])
    logger.info('Training learning rate scheduler milestones: %s' %
                (cfg.settings['lr_milestone'], ))
    logger.info('Training batch size: %d' % cfg.settings['batch_size'])
    logger.info('Training weight decay: %g' % cfg.settings['weight_decay'])

    # Train model on dataset
    deep_SVDD.train(dataset,
                    optimizer_name=cfg.settings['optimizer_name'],
                    lr=cfg.settings['lr'],
                    n_epochs=cfg.settings['n_epochs'],
                    lr_milestones=cfg.settings['lr_milestone'],
                    batch_size=cfg.settings['batch_size'],
                    weight_decay=cfg.settings['weight_decay'],
                    device=device,
                    n_jobs_dataloader=n_jobs_dataloader,
                    dataset_name=dataset_name)

    # Test model
    deep_SVDD.test(dataset, device=device, n_jobs_dataloader=n_jobs_dataloader)

    # Plot most anomalous and most normal (within-class) test samples
    indices, labels, scores, _, _ = zip(*deep_SVDD.results['test_scores'])
    indices, labels, scores = np.array(indices), np.array(labels), np.array(
        scores)
    idx_sorted = indices[labels == 0][np.argsort(
        scores[labels == 0])]  # sorted from lowest to highest anomaly score

    if dataset_name in ('mnist', 'cifar10', 'object', 'texture'):

        if dataset_name == 'mnist':
            X_normals = dataset.test_set.test_data[idx_sorted[:32],
                                                   ...].unsqueeze(1)
            X_outliers = dataset.test_set.test_data[idx_sorted[-32:],
                                                    ...].unsqueeze(1)

        if dataset_name == 'cifar10':
            X_normals = torch.tensor(
                np.transpose(dataset.test_set.test_data[idx_sorted[:32], ...],
                             (0, 3, 1, 2)))
            X_outliers = torch.tensor(
                np.transpose(dataset.test_set.test_data[idx_sorted[-32:], ...],
                             (0, 3, 1, 2)))

        if dataset_name == 'object':
            # 22 3 256 256
            X_normals = torch.tensor(dataset.test_data[idx_sorted[:32], ...])
            X_outliers = torch.tensor(dataset.test_data[idx_sorted[-32:], ...])
            for i in range(3):
                X_normals[:, i, :, :] *= std[normal_class][i]
                X_normals[:, i, :, :] += mean[normal_class][i]
                X_outliers[:, i, :, :] *= std[normal_class][i]
                X_outliers[:, i, :, :] += mean[normal_class][i]

        plot_images_grid(X_normals,
                         export_img=xp_path + '/normals',
                         title='Most normal examples',
                         padding=2)
        plot_images_grid(X_outliers,
                         export_img=xp_path + '/outliers',
                         title='Most anomalous examples',
                         padding=2)
示例#28
0
def main(dataset_name, net_name, load_config, load_model, objective, nu,
         device, seed, optimizer_name, lr, n_epochs, lr_milestone, batch_size,
         weight_decay, pretrain, ae_optimizer_name, ae_lr, ae_n_epochs,
         ae_lr_milestone, ae_batch_size, ae_weight_decay, n_jobs_dataloader,
         ratio, run_times):
    """
    Deep SVDD, a fully deep method for anomaly detection.

    :arg DATASET_NAME: Name of the dataset to load.
    :arg NET_NAME: Name of the neural network to use.
    :arg XP_PATH: Export path for logging the experiment.
    :arg DATA_PATH: Root path of data.
    """
    class_num = 10
    if dataset_name == 'cifar100':
        class_num = 20
    for run_index in range(run_times):
        #for ratio in [0.05, 0.1, 0.15, 0.2, 0.25]:
        for i in range(class_num):
            normal_class = i
            class_name = get_class_name_from_index(normal_class, dataset_name)
            os.makedirs(os.path.join(RESULTS_DIR), exist_ok=True)
            # Get configuration
            cfg = Config(locals().copy())

            # Set up logging
            logging.basicConfig(level=logging.INFO)
            logger = logging.getLogger()
            logger.setLevel(logging.INFO)
            formatter = logging.Formatter(
                '%(asctime)s - %(name)s - %(levelname)s - %(message)s')

            xp_path = RESULTS_DIR

            log_file = xp_path + '/log.txt'
            file_handler = logging.FileHandler(log_file)
            file_handler.setLevel(logging.INFO)
            file_handler.setFormatter(formatter)
            logger.addHandler(file_handler)

            # Print arguments
            logger.info('Log file is %s.' % log_file)
            logger.info('Export path is %s.' % xp_path)

            logger.info('Dataset: %s' % dataset_name)
            logger.info('Normal class: %d' % normal_class)
            logger.info('Network: %s' % net_name)

            # If specified, load experiment config from JSON-file
            if load_config:
                cfg.load_config(import_json=load_config)
                logger.info('Loaded configuration from %s.' % load_config)

            # Print configuration
            logger.info('Deep SVDD objective: %s' % cfg.settings['objective'])
            logger.info('Nu-paramerter: %.2f' % cfg.settings['nu'])

            # Set seed
            cfg.settings['seed'] = run_index
            if cfg.settings['seed'] != -1:
                random.seed(cfg.settings['seed'])
                np.random.seed(cfg.settings['seed'])
                torch.manual_seed(cfg.settings['seed'])
                logger.info('Set seed to %d.' % cfg.settings['seed'])

            # Default device to 'cpu' if cuda is not available
            if not torch.cuda.is_available():
                device = 'cpu'
            logger.info('Computation device: %s' % device)
            logger.info('Number of dataloader workers: %d' % n_jobs_dataloader)

            # Load data
            dataset = load_dataset(dataset_name, normal_class, ratio)

            # Initialize DeepSVDD model and set neural network \phi
            deep_SVDD = DeepSVDD(cfg.settings['objective'], cfg.settings['nu'])
            deep_SVDD.set_network(net_name)
            # If specified, load Deep SVDD model (radius R, center c, network weights, and possibly autoencoder weights)
            if load_model:
                deep_SVDD.load_model(model_path=load_model, load_ae=True)
                logger.info('Loading model from %s.' % load_model)

            logger.info('Pretraining: %s' % pretrain)
            if pretrain:
                # Log pretraining details
                logger.info('Pretraining optimizer: %s' %
                            cfg.settings['ae_optimizer_name'])
                logger.info('Pretraining learning rate: %g' %
                            cfg.settings['ae_lr'])
                logger.info('Pretraining epochs: %d' %
                            cfg.settings['ae_n_epochs'])
                logger.info(
                    'Pretraining learning rate scheduler milestones: %s' %
                    (cfg.settings['ae_lr_milestone'], ))
                logger.info('Pretraining batch size: %d' %
                            cfg.settings['ae_batch_size'])
                logger.info('Pretraining weight decay: %g' %
                            cfg.settings['ae_weight_decay'])

                # Pretrain model on dataset (via autoencoder)
                deep_SVDD.pretrain(
                    dataset,
                    optimizer_name=cfg.settings['ae_optimizer_name'],
                    lr=cfg.settings['ae_lr'],
                    n_epochs=cfg.settings['ae_n_epochs'],
                    lr_milestones=cfg.settings['ae_lr_milestone'],
                    batch_size=cfg.settings['ae_batch_size'],
                    weight_decay=cfg.settings['ae_weight_decay'],
                    device=device,
                    n_jobs_dataloader=n_jobs_dataloader)

            # Log training details
            logger.info('Training optimizer: %s' %
                        cfg.settings['optimizer_name'])
            logger.info('Training learning rate: %g' % cfg.settings['lr'])
            logger.info('Training epochs: %d' % cfg.settings['n_epochs'])
            logger.info('Training learning rate scheduler milestones: %s' %
                        (cfg.settings['lr_milestone'], ))
            logger.info('Training batch size: %d' % cfg.settings['batch_size'])
            logger.info('Training weight decay: %g' %
                        cfg.settings['weight_decay'])

            # Train model on dataset
            deep_SVDD.train(dataset,
                            optimizer_name=cfg.settings['optimizer_name'],
                            lr=cfg.settings['lr'],
                            n_epochs=cfg.settings['n_epochs'],
                            lr_milestones=cfg.settings['lr_milestone'],
                            batch_size=cfg.settings['batch_size'],
                            weight_decay=cfg.settings['weight_decay'],
                            device=device,
                            n_jobs_dataloader=n_jobs_dataloader)

            # Test model
            scores, labels = deep_SVDD.test(
                dataset, device=device, n_jobs_dataloader=n_jobs_dataloader)

            res_file_name = '{}_dsvdd-{}_{}_{}.npz'.format(
                dataset_name, ratio, class_name,
                datetime.now().strftime('%Y-%m-%d-%H%M'))
            res_file_path = os.path.join(RESULTS_DIR, dataset_name,
                                         res_file_name)
            os.makedirs(os.path.join(RESULTS_DIR, dataset_name), exist_ok=True)
            save_roc_pr_curve_data(scores, labels, res_file_path)

            # Plot most anomalous and most normal (within-class) test samples
            # indices, labels, scores = zip(*deep_SVDD.results['test_scores'])
            # indices, labels, scores = np.array(indices), np.array(labels), np.array(scores)
            # idx_sorted = indices[labels == 0][np.argsort(scores[labels == 0])]  # sorted from lowest to highest anomaly score
            #
            # if dataset_name in ('mnist', 'cifar10'):
            #
            #     if dataset_name == 'mnist':
            #         X_normals = dataset.test_set.test_data[idx_sorted[:32], ...].unsqueeze(1)
            #         X_outliers = dataset.test_set.test_data[idx_sorted[-32:], ...].unsqueeze(1)
            #
            #     if dataset_name == 'cifar10':
            #         X_normals = torch.tensor(np.transpose(dataset.test_set.test_data[idx_sorted[:32], ...], (0, 3, 1, 2)))
            #         X_outliers = torch.tensor(np.transpose(dataset.test_set.test_data[idx_sorted[-32:], ...], (0, 3, 1, 2)))
            #
            #     plot_images_grid(X_normals, export_img=xp_path + '/normals', title='Most normal examples', padding=2)
            #     plot_images_grid(X_outliers, export_img=xp_path + '/outliers', title='Most anomalous examples', padding=2)

            # Save results, model, and configuration
            logger.info('finish class {} training.'.format(class_name))
    logger.info('send exp finish mail.')
    send_mailgun()
示例#29
0
def main(dataset_name, net_name, xp_path, data_path, load_config, load_model,
         upper_bound, device, seed, optimizer_name, lr, n_epochs, lr_milestone,
         batch_size, weight_decay, num_threads, n_jobs_dataloader,
         normal_class):
    """
    Deep DR, a method for deep semi-supervised anomaly detection.

    :arg DATASET_NAME: Name of the dataset to load.
    :arg NET_NAME: Name of the neural network to use.
    :arg XP_PATH: Export path for logging the experiment.
    :arg DATA_PATH: Root path of data.
    """

    # Get configuration
    cfg = Config(locals().copy())

    # Set up logging
    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    log_file = xp_path + '/log.txt'
    file_handler = logging.FileHandler(log_file)
    file_handler.setLevel(logging.INFO)
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)

    # Print paths
    logger.info('Log file is %s' % log_file)
    logger.info('Data path is %s' % data_path)
    logger.info('Export path is %s' % xp_path)

    # Print experimental setup
    logger.info('Dataset: %s' % dataset_name)
    logger.info('Normal class: %d' % normal_class)
    logger.info('Network: %s' % net_name)

    # If specified, load experiment config from JSON-file
    if load_config:
        cfg.load_config(import_json=load_config)
        logger.info('Loaded configuration from %s.' % load_config)

    # Print model configuration
    logger.info('Pi-parameter: %.2f' % cfg.settings['upper_bound'])

    # Set seed
    if cfg.settings['seed'] != -1:
        random.seed(cfg.settings['seed'])
        np.random.seed(cfg.settings['seed'])
        torch.manual_seed(cfg.settings['seed'])
        torch.cuda.manual_seed(cfg.settings['seed'])
        torch.backends.cudnn.deterministic = True
        logger.info('Set seed to %d.' % cfg.settings['seed'])

    # Default device to 'cpu' if cuda is not available
    if not torch.cuda.is_available():
        device = 'cpu'
    # Set the number of threads used for parallelizing CPU operations
    if num_threads > 0:
        torch.set_num_threads(num_threads)

    logger.info('Computation device: %s' % device)
    logger.info('Number of threads: %d' % num_threads)
    logger.info('Number of dataloader workers: %d' % n_jobs_dataloader)

    # Load data
    dataset = load_dataset(dataset_name,
                           data_path,
                           normal_class,
                           random_state=np.random.RandomState(
                               cfg.settings['seed']))

    if net_name == 'wrn1':
        if dataset_name in ['mnist', 'fmnist']:
            temp = dataset.train_set.data
            temp = np.pad(temp, ((0, 0), (2, 2), (2, 2)), 'constant')
            #temp = np.expand_dims(temp, -1)
            #temp = np.concatenate([temp, temp, temp], axis=3)
            #print(np.pad(dataset.train_set.data, ((0, 0), (2, 2), (2, 2)), 'constant').shape)
            #temp = torch.FloatTensor(temp)
            dataset.train_set.data = temp

            temp = dataset.test_set.data
            temp = np.pad(temp, ((0, 0), (2, 2), (2, 2)), 'constant')
            #temp = np.expand_dims(temp, -1)
            #temp = np.concatenate([temp, temp, temp], axis=3)
            #print(np.pad(dataset.train_set.data, ((0, 0), (2, 2), (2, 2)), 'constant').shape)
            #temp = torch.FloatTensor(temp)
            dataset.test_set.data = temp

    # Initialize DeepSAD model and set neural network phi
    D3RE = D3REnnLSIF(cfg.settings['upper_bound'])
    D3RE.set_network(net_name, rep_dim=1)

    # If specified, load Deep DR model (center c, network weights, and possibly autoencoder weights)
    if load_model:
        D3RE.load_model(model_path=load_model,
                        load_ae=True,
                        map_location=device)
        logger.info('Loading model from %s.' % load_model)

    # Log training details
    logger.info('Training optimizer: %s' % cfg.settings['optimizer_name'])
    logger.info('Training learning rate: %g' % cfg.settings['lr'])
    logger.info('Training epochs: %d' % cfg.settings['n_epochs'])
    logger.info('Training learning rate scheduler milestones: %s' %
                (cfg.settings['lr_milestone'], ))
    logger.info('Training batch size: %d' % cfg.settings['batch_size'])
    logger.info('Training weight decay: %g' % cfg.settings['weight_decay'])

    # Train model on dataset
    D3RE.train(dataset,
               optimizer_name=cfg.settings['optimizer_name'],
               lr=cfg.settings['lr'],
               n_epochs=cfg.settings['n_epochs'],
               lr_milestones=cfg.settings['lr_milestone'],
               batch_size=cfg.settings['batch_size'],
               weight_decay=cfg.settings['weight_decay'],
               device=device,
               n_jobs_dataloader=n_jobs_dataloader)

    # Test model
    D3RE.test(dataset, device=device, n_jobs_dataloader=n_jobs_dataloader)

    # Save results, model, and configuration
    D3RE.save_results(export_json=xp_path + '/results.json')
    D3RE.save_model(export_model=xp_path + '/model.tar')
    cfg.save_config(export_json=xp_path + '/config.json')
示例#30
0
def main(dataset_name, net_name, xp_path, data_path, load_config, load_model,
         device, seed, tokenizer, clean_txt, embedding_size, pretrained_model,
         embedding_reduction, num_dimensions, flow_type, coupling_hidden_size,
         coupling_hidden_layers, coupling_num_flows, coupling_num_mixtures,
         coupling_dropout, coupling_input_dropout, max_seq_len,
         use_length_prior, use_time_embed, prior_dist_type, prior_dist_mu,
         prior_dist_sigma, prior_dist_start_x, prior_dist_stop_x, ad_score,
         n_attention_heads, attention_size, lambda_p, alpha_scheduler,
         optimizer_name, lr, n_epochs, lr_milestone, batch_size, weight_decay,
         n_jobs_dataloader, n_threads, normal_class):
    """
    :arg DATASET_NAME: Name of the dataset to load.
    :arg NET_NAME: Name of the neural network to use.
    :arg XP_PATH: Export path for logging the experiment.
    :arg DATA_PATH: Root path of data.
    """

    # Get configuration
    cfg = Config(locals().copy())

    # Set up logging
    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    xp_path += '/text_{}_{}'.format(dataset_name, net_name)
    if not os.path.exists(xp_path):
        os.makedirs(xp_path)
    log_file = xp_path + '/log.txt'
    file_handler = logging.FileHandler(log_file)
    file_handler.setLevel(logging.INFO)
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)

    # Print paths
    logger.info('Log file is %s.' % log_file)
    logger.info('Data path is %s.' % data_path)
    logger.info('Export path is %s.' % xp_path)

    # Print experimental setup
    logger.info('Dataset: %s' % dataset_name)
    logger.info('Normal class: %d' % normal_class)
    logger.info('Network: %s' % net_name)
    logger.info('Tokenizer: %s' % cfg.settings['tokenizer'])
    logger.info('Clean text in pre-processing: %s' % cfg.settings['clean_txt'])
    if cfg.settings['embedding_size'] is not None:
        logger.info('Word vector embedding size: %d' %
                    cfg.settings['embedding_size'])
    logger.info('Load pre-trained model: %s' %
                cfg.settings['pretrained_model'])

    # If specified, load experiment config from JSON-file
    if load_config:
        cfg.load_config(import_json=load_config)
        logger.info('Loaded configuration from %s.' % load_config)

    # Set seed for reproducibility
    if cfg.settings['seed'] != -1:
        random.seed(cfg.settings['seed'])
        np.random.seed(cfg.settings['seed'])
        torch.manual_seed(cfg.settings['seed'])
        torch.cuda.manual_seed(cfg.settings['seed'])
        torch.backends.cudnn.deterministic = True
        logger.info('Set seed to %d.' % cfg.settings['seed'])

    # Default device to 'cpu' if cuda is not available
    if not torch.cuda.is_available():
        device = 'cpu'
    logger.info('Computation device: %s' % device)
    logger.info('Number of dataloader workers: %d' % n_jobs_dataloader)
    if n_threads > 0:
        torch.set_num_threads(n_threads)
        logger.info(
            'Number of threads used for parallelizing CPU operations: %d' %
            n_threads)

    # Load data
    dataset = load_dataset(dataset_name,
                           data_path,
                           normal_class,
                           cfg.settings['tokenizer'],
                           clean_txt=cfg.settings['clean_txt'],
                           max_seq_len=cfg.settings['max_seq_len'])
    if net_name == 'CNF':
        # Initialize CNF model
        cnf = CNF()
        encoding_params = {
            "num_flows": 0,
            "hidden_layers": 2,
            "hidden_size": 128
        }
        cnf.set_network(
            net_name=net_name,
            dataset=dataset,
            pretrained_model=cfg.settings['pretrained_model'],
            embedding_size=cfg.settings['embedding_size'],
            num_dimensions=cfg.settings['num_dimensions'],
            encoding_params=encoding_params,
            coupling_hidden_size=cfg.settings['coupling_hidden_size'],
            coupling_hidden_layers=cfg.settings['coupling_hidden_layers'],
            coupling_num_flows=cfg.settings['coupling_num_flows'],
            coupling_num_mixtures=cfg.settings['coupling_num_mixtures'],
            coupling_dropout=cfg.settings['coupling_dropout'],
            coupling_input_dropout=cfg.settings['coupling_input_dropout'],
            max_seq_len=cfg.settings['max_seq_len'],
            use_time_embed=cfg.settings['use_time_embed'])

        # If specified, load model parameters from already trained model
        if load_model:
            cnf.load_model(import_path=load_model, device=device)
            logger.info('Loading model from %s.' % load_model)

        # Train model on dataset
        prior_dist_params = {
            "distribution_type": cfg.settings['prior_dist_type'],
            "mu": cfg.settings['prior_dist_mu'],
            "sigma": cfg.settings['prior_dist_sigma'],
            "start_x": cfg.settings['prior_dist_start_x'],
            "stop_x": cfg.settings['prior_dist_stop_x']
        }
        cnf.train(dataset,
                  optimizer_name=cfg.settings['optimizer_name'],
                  lr=cfg.settings['lr'],
                  n_epochs=cfg.settings['n_epochs'],
                  lr_milestones=cfg.settings['lr_milestone'],
                  batch_size=cfg.settings['batch_size'],
                  use_length_prior=cfg.settings['use_length_prior'],
                  prior_dist_params=prior_dist_params,
                  weight_decay=cfg.settings['weight_decay'],
                  device=device,
                  n_jobs_dataloader=n_jobs_dataloader)

        # Test model
        cnf.test(dataset, device=device, n_jobs_dataloader=n_jobs_dataloader)
    elif net_name == 'EmbeddingNF':
        # Initialize EmbeddingNF model and set word embedding
        enf = EmbeddingNF()
        enf.set_network(
            net_name=net_name,
            dataset=dataset,
            pretrained_model=cfg.settings['pretrained_model'],
            embedding_size=cfg.settings['embedding_size'],
            embedding_reduction=cfg.settings['embedding_reduction'],
            flow_type=cfg.settings['flow_type'],
            coupling_hidden_size=cfg.settings['coupling_hidden_size'],
            coupling_num_flows=cfg.settings['coupling_num_flows'],
            use_length_prior=cfg.settings['use_length_prior'],
            device=cfg.settings['device'])

        # If specified, load model parameters from already trained model
        if load_model:
            enf.load_model(import_path=load_model, device=device)
            logger.info('Loading model from %s.' % load_model)

        # Train model on dataset
        enf.train(dataset,
                  optimizer_name=cfg.settings['optimizer_name'],
                  lr=cfg.settings['lr'],
                  n_epochs=cfg.settings['n_epochs'],
                  lr_milestones=cfg.settings['lr_milestone'],
                  batch_size=cfg.settings['batch_size'],
                  weight_decay=cfg.settings['weight_decay'],
                  device=device,
                  n_jobs_dataloader=n_jobs_dataloader)

        # Test model
        enf.test(dataset, device=device, n_jobs_dataloader=n_jobs_dataloader)
    elif net_name == 'date_Net':
        os.environ["TOKENIZERS_PARALLELISM"] = 'false'
        masks_ = pkl.load(open(data_path + '/pseudo_labels128_p50.pkl', 'rb'))
        subset = [dataset.subset]
        tensorboard_dir = 'run'
        exp_prefix = 'tests'
        now = datetime.now()
        date_time = now.strftime("%m%d%Y_%H%M%S")
        run_name = f'{subset[0]}_{date_time}'

        train_args = {
            "fp16": False,
            "use_multiprocessing": False,
            "reprocess_input_data": False,
            "overwrite_output_dir": True,
            "num_train_epochs": 20,
            "save_eval_checkpoints": False,
            "save_model_every_epoch": False,
            "learning_rate": 1e-5,
            "warmup_steps": 1000,
            "train_batch_size": 16,  #was 32
            "eval_batch_size": 16,  #was 32
            "gradient_accumulation_steps": 1,
            "block_size": 128 + 2,
            "max_seq_length": 128 + 2,
            "dataset_type": "simple",
            "logging_steps": 500,
            "evaluate_during_training": True,
            "evaluate_during_training_steps": 500,  #was 500
            "evaluate_during_training_steps_anomaly": 500,  #was 500
            "anomaly_batch_size": 16,
            "evaluate_during_training_verbose": True,
            "use_cached_eval_features": True,
            "sliding_window": True,
            "vocab_size": 52000,
            "eval_anomalies": True,
            "random_generator": 1,
            "use_rtd_loss": True,
            "rtd_loss_weight": 50,
            "rmd_loss_weight": 100,
            "mlm_loss_weight": 1,
            "dump_histogram": 0,
            "eval_anomaly_after": 0,
            "train_just_generator": 0,
            "replace_tokens": 0,
            "extract_scores": 1,
            "subset_name": subset[0],
            "extract_repr": 0,
            # "vanilla_electra": {
            #     "no_masks": masks,
            # },
            # "vanilla_electra": False,
            "train_document": True,
            "tokenizer_name": "bert-base-uncased",
            "tensorboard_dir": f'{tensorboard_dir}/{exp_prefix}/{run_name}',
            "extract_reps": 0,
            "weight_decay": weight_decay,
            "optimizer": "AdamW",
            "scores_export_path": f"./token_scores/{run_name}/",
            "generator_config": {
                "embedding_size": 128,
                "hidden_size": 16,
                "num_hidden_layers": 1,
            },
            "discriminator_config": {
                "hidden_dropout_prob": 0.5,
                "attention_probs_dropout_prob": 0.5,
                "embedding_size": 128,
                "hidden_size": 256,
                "num_hidden_layers": 4,
            },
            "mlm_lr_ratio": 1,
        }

        train_file = f"{data_path}/{dataset_name}/train/{subset[0]}.txt"
        test_file = f"{data_path}/{dataset_name}/test/{subset[0]}.txt"

        outlier_file = f"{data_path}/{dataset_name}/test/{subset[0]}-outliers.txt"

    elif net_name == 'cvdd_Net':
        # Print CVDD configuration
        logger.info('Anomaly Score: %s' % cfg.settings['ad_score'])
        logger.info('Number of attention heads: %d' %
                    cfg.settings['n_attention_heads'])
        logger.info('Attention size: %d' % cfg.settings['attention_size'])
        logger.info('Orthogonality regularization hyperparameter: %.3f' %
                    cfg.settings['lambda_p'])
        logger.info('Temperature alpha annealing strategy: %s' %
                    cfg.settings['alpha_scheduler'])

        # Initialize CVDD model and set word embedding
        cvdd = CVDD(cfg.settings['ad_score'])
        cvdd.set_network(net_name=net_name,
                         dataset=dataset,
                         pretrained_model=cfg.settings['pretrained_model'],
                         embedding_size=cfg.settings['embedding_size'],
                         attention_size=cfg.settings['attention_size'],
                         n_attention_heads=cfg.settings['n_attention_heads'])

        # If specified, load model parameters from already trained model
        if load_model:
            cvdd.load_model(import_path=load_model, device=device)
            logger.info('Loading model from %s.' % load_model)

        # Train model on dataset
        cvdd.train(dataset,
                   optimizer_name=cfg.settings['optimizer_name'],
                   lr=cfg.settings['lr'],
                   n_epochs=cfg.settings['n_epochs'],
                   lr_milestones=cfg.settings['lr_milestone'],
                   batch_size=cfg.settings['batch_size'],
                   lambda_p=cfg.settings['lambda_p'],
                   alpha_scheduler=cfg.settings['alpha_scheduler'],
                   weight_decay=cfg.settings['weight_decay'],
                   device=device,
                   n_jobs_dataloader=n_jobs_dataloader)

        # Test model
        cvdd.test(dataset, device=device, n_jobs_dataloader=n_jobs_dataloader)
    elif net_name == 'cvdd_flow':
        # Initialize CVDD_Flow model and set word embedding
        cvdd_flow = CVDD_Flow(cfg.settings['ad_score'])
        cvdd_flow.set_network(
            net_name=net_name,
            dataset=dataset,
            pretrained_model=cfg.settings['pretrained_model'],
            embedding_size=cfg.settings['embedding_size'],
            attention_size=cfg.settings['attention_size'],
            n_attention_heads=cfg.settings['n_attention_heads'])
        # Train model on dataset
        cvdd_flow.train(dataset,
                        optimizer_name=cfg.settings['optimizer_name'],
                        lr=cfg.settings['lr'],
                        n_epochs=cfg.settings['n_epochs'],
                        lr_milestones=cfg.settings['lr_milestone'],
                        batch_size=cfg.settings['batch_size'],
                        lambda_p=cfg.settings['lambda_p'],
                        alpha_scheduler=cfg.settings['alpha_scheduler'],
                        weight_decay=cfg.settings['weight_decay'],
                        device=device,
                        n_jobs_dataloader=n_jobs_dataloader)

        # Test model
        cvdd_flow.test(dataset,
                       device=device,
                       n_jobs_dataloader=n_jobs_dataloader)