Exemplo n.º 1
0
    def test(self, dataset: BaseADDataset, net1: BaseNet, net2: BaseNet):
        logger = logging.getLogger()
        print('R', self.R)
        print('c', self.c)

        # Set device for networks
        net1 = net1.to(self.device)
        net2 = net2.to(self.device)

        # Get test data loader
        _, test_loader = dataset.loaders(batch_size=self.batch_size,
                                         num_workers=self.n_jobs_dataloader)

        # Testing
        logger.info('Starting testing...')
        start_time = time.time()
        idx_label_score = []
        net1.eval()
        net2.eval()
        with torch.no_grad():
            for data in test_loader:
                inputs, labels, idx = data
                inputs = inputs.to(self.device)
                code, _ = net1(inputs.view(-1, 1, 9))
                outputs = net2(code)
                dist = torch.sum((outputs - self.c)**2, dim=1)
                if self.objective == 'soft-boundary':
                    scores = dist - self.R**2
                else:
                    scores = dist

                # Save triples of (idx, label, score) in a list
                idx_label_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        labels.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist()))

        self.test_time = time.time() - start_time
        logger.info('Testing time: %.3f' % self.test_time)

        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)

        self.test_ftr, self.test_tpr, _ = roc_curve(labels, scores)

        self.test_score = scores
        self.test_auc = roc_auc_score(labels, scores)
        logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc))
        logger.info('Finished testing.')
    def _get_output(self, loader, net: BaseNet):
        logger = logging.getLogger()
        epoch_loss = 0.0
        n_batches = 0

        # Set device for network
        net = net.to(self.device)

        # Testing
        logger.info('Starting testing...')
        start_time = time.time()
        idx_label_output = []
        net.eval()
        with torch.no_grad():
            for data in loader:
                inputs, labels, idx, _ = data
                inputs = inputs.to(self.device)
                outputs = net(inputs)

                # Save triples of (idx, label, score) in a list
                idx_label_output += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        labels.cpu().data.numpy().tolist(),
                        outputs.cpu().data.numpy().tolist()))

        self.test_time = time.time() - start_time
        logger.info('Testing time: %.3f' % self.test_time)

        _, labels, outputs = zip(*idx_label_output)
        labels = np.array(labels)
        outputs = np.array(outputs)

        return labels, outputs
Exemplo n.º 3
0
    def test(self, dataset: BaseADDataset, net: BaseNet):
        logger = logging.getLogger()

        # Get test data loader
        _, test_loader = dataset.loaders(batch_size=self.batch_size,
                                         num_workers=self.n_jobs_dataloader)

        # Set device for network
        net = net.to(self.device)

        # Testing
        logger.info('Starting testing...')
        epoch_loss = 0.0
        n_batches = 0
        start_time = time.time()
        idx_label_score = []
        net.eval()
        with torch.no_grad():
            for data in test_loader:
                inputs, labels, semi_targets, idx = data

                inputs = inputs.to(self.device)
                labels = labels.to(self.device)
                semi_targets = semi_targets.to(self.device)
                print('Unique Semi Targets: ',
                      np.unique(semi_targets.data.cpu().numpy()))

                idx = idx.to(self.device)

                outputs = net(inputs)
                dist = torch.sum((outputs - self.c)**2, dim=1)
                losses = torch.where(
                    semi_targets == 0, dist,
                    self.eta * ((dist + self.eps)**semi_targets.float()))
                loss = torch.mean(losses)
                scores = dist

                # Save triples of (idx, label, score) in a list
                idx_label_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        labels.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist()))

                epoch_loss += loss.item()
                n_batches += 1

        self.test_time = time.time() - start_time
        self.test_scores = idx_label_score

        # Compute AUC
        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)
        self.test_auc = roc_auc_score(labels, scores)

        # Log results
        logger.info('Test Loss: {:.6f}'.format(epoch_loss / n_batches))
        logger.info('Test AUC: {:.2f}%'.format(100. * self.test_auc))
        logger.info('Test Time: {:.3f}s'.format(self.test_time))
        logger.info('Finished testing.')
Exemplo n.º 4
0
    def pretrain(self, deepSVDD, cfg, dataset: BaseADDataset, net: BaseNet):
        logger = logging.getLogger()

        # Set device for network
        net = net.to(self.device)

        # Get train data loader
        train_loader, _ = dataset.loaders(batch_size=self.batch_size,
                                          num_workers=self.n_jobs_dataloader)

        # Set optimizer (Adam optimizer for now)
        optimizer = optim.Adam(net.parameters(),
                               lr=self.lr,
                               weight_decay=self.weight_decay,
                               amsgrad=self.optimizer_name == 'amsgrad')

        # Set learning rate scheduler
        scheduler = optim.lr_scheduler.MultiStepLR(
            optimizer, milestones=self.lr_milestones, gamma=0.1)

        # Training
        logger.info('Starting pretraining...')
        start_time = time.time()
        net.train()
        best_score = 0
        for epoch in range(self.pre_training_epochs):

            loss_epoch = 0.0
            n_batches = 0
            epoch_start_time = time.time()

            for data in tqdm(train_loader):
                inputs, _, _ = data
                inputs = inputs.to(self.device)

                # Zero the network parameter gradients
                optimizer.zero_grad()
                # Update network parameters via backpropagation: forward + backward + optimize
                _, rec_images = net(inputs)
                loss = torch.mean(
                    torch.sum(torch.abs(rec_images - inputs),
                              dim=tuple(range(1, rec_images.dim()))))
                loss.backward()
                optimizer.step()

                loss_epoch += loss.item()
                n_batches += 1

            # log epoch statistics
            epoch_train_time = time.time() - epoch_start_time
            logger.info('  Epoch {}/{}\t Time: {:.3f}\t Loss: {:.8f}'.format(
                epoch + 1, self.pre_training_epochs, epoch_train_time,
                loss_epoch / n_batches))

        self.train_time = time.time() - start_time
        logger.info('Training time: %.3f' % self.train_time)

        logger.info('Finished training.')

        return net
Exemplo n.º 5
0
    def train(self, dataset: BaseADDataset, svm_net: BaseNet):
        """ 训练 svm 模型 """
        logger = logging.getLogger()

        # Set device for networks
        svm_net = svm_net.to(self.device)

        train_loader, _ = dataset.loaders(batch_size=self.batch_size,
                                          num_workers=self.n_jobs_dataloader)
        optimizer = optim.SGD(svm_net.parameters(),
                              lr=self.lr,
                              momentum=self.momentum)
        scheduler = optim.lr_scheduler.StepLR(optimizer,
                                              step_size=self.step_size,
                                              gamma=self.gamma)

        # Training
        logger.info('Starting train svm_trainer ...')
        start_time = time.time()
        svm_net.train()
        for epoch in range(self.n_epochs):

            scheduler.step()
            if epoch in self.lr_milestones:
                logger.info('  LR scheduler: new learning rate is %g' %
                            float(scheduler.get_lr()[0]))

            loss_epoch = 0.0
            n_batches = 0
            epoch_start_time = time.time()
            for data in train_loader:
                inputs, labels, _ = data
                inputs = inputs.to(self.device)

                # Zero the networks parameter gradients
                optimizer.zero_grad()

                # Update networks parameters via back propagation: forward + backward + optimize
                outputs = svm_net(inputs)

                # get loss
                loss = self.hinge_loss(outputs, labels)
                loss.backward()
                optimizer.step()

                loss_epoch += loss.item()
                n_batches += 1

            # log epoch statistics
            epoch_train_time = time.time() - epoch_start_time
            logger.info('  Epoch {}/{}\t Time: {:.3f}\t Loss: {:.8f}'.format(
                epoch + 1, self.n_epochs, epoch_train_time,
                loss_epoch / n_batches))

        pretrain_time = time.time() - start_time
        logger.info('svm_trainer train time: %.3f' % pretrain_time)
        logger.info('Finished train svm_trainer.')

        return svm_net
    def test(self, dataset: BaseADDataset, net: BaseNet):
        logger = logging.getLogger()

        # Get test data loader
        _, test_loader = dataset.loaders(batch_size=self.batch_size,
                                         num_workers=self.n_jobs_dataloader)

        # Set device for network
        net = net.to(self.device)

        # Testing
        logger.info('Starting testing...')
        epoch_loss = 0.0
        n_batches = 0
        start_time = time.time()
        idx_label_score = []
        net.eval()
        with torch.no_grad():
            for data in test_loader:
                inputs, labels, semi_targets, idx = data

                inputs = inputs.to(self.device)
                labels = labels.to(self.device)
                semi_targets = semi_targets.to(self.device)
                idx = idx.to(self.device)

                outputs = net(inputs)
                dist = torch.sum((outputs - self.c)**2, dim=1)
                losses = torch.where(
                    semi_targets == 0, dist,
                    self.eta * ((dist + self.eps)**semi_targets.float()))
                loss = torch.mean(losses)
                scores = dist

                # Save triples of (idx, label, score) in a list
                idx_label_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        labels.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist()))

                epoch_loss += loss.item()
                n_batches += 1

        self.test_time = time.time() - start_time
        self.test_scores = idx_label_score

        # Compute metrics
        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)
        # AUC
        self.auc_roc = roc_auc_score(labels, scores)
        # PR-curve
        self.pr_curve = precision_recall_curve(labels, scores)
        precision, recall, thresholds = self.pr_curve
        self.auc_pr = auc(recall, precision)
        self.test_loss = epoch_loss / n_batches
    def t_sne(self, dataset: BaseADDataset, net: BaseNet, data_path, xp_path):
        logger = logging.getLogger()

        center = np.array(self.c.cpu()).reshape(1, 100)

        save_path = xp_path
        with open(os.path.join(data_path, 'test_label.pickle'), 'rb') as f:
            test_class = pickle.load(f)
        test_class = np.array(test_class)
        test_class = np.append(test_class, 2)  # 2: center

        # Set device for network
        net = net.to(self.device)
        _, test_loader = dataset.loaders(batch_size=self.batch_size,
                                         num_workers=self.n_jobs_dataloader)

        # t_sne
        logger.info('Start plot t_sne')
        t_sne_array = np.empty((0, 100))
        with torch.no_grad():
            for data in test_loader:
                inputs, labels, idx = data
                inputs = inputs.to(self.device)
                outputs = net(inputs)
                t_sne_array = np.append(t_sne_array,
                                        outputs.cpu().numpy(),
                                        axis=0)

        t_sne_array = np.append(t_sne_array, center, axis=0)

        tsne = TSNE(n_components=2, random_state=32)
        tsne_results = tsne.fit_transform(t_sne_array)
        plt.figure(figsize=(16, 10))

        normal_index = (test_class == 0)
        abnormal_index = (test_class == 1)
        plt.scatter(tsne_results[normal_index, 0],
                    tsne_results[normal_index, 1],
                    c='b',
                    label='normal',
                    s=1,
                    marker=',')
        plt.scatter(tsne_results[abnormal_index, 0],
                    tsne_results[abnormal_index, 1],
                    c='r',
                    label='abnormal',
                    s=1,
                    marker=',')
        plt.scatter(tsne_results[-1, 0],
                    tsne_results[-1, 1],
                    c='k',
                    label='center',
                    s=20,
                    marker='D')

        plt.legend()
        plt.savefig(os.path.join(save_path, 't_sne.png'))
Exemplo n.º 8
0
    def train(self, dataset: BaseADDataset, ae_net: BaseNet):
        logger = logging.getLogger()

        # Set device for network
        ae_net = ae_net.to(self.device)

        # Get train data loader
        train_loader, _ = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader)

        # Set optimizer (Adam optimizer for now)
        optimizer = optim.Adam(ae_net.parameters(), lr=self.lr, weight_decay=self.weight_decay,
                               amsgrad=self.optimizer_name == 'amsgrad')

        # Set learning rate scheduler
        scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=self.lr_milestones, gamma=0.1)

        # Training
        logger.info('Starting pretraining...')
        start_time = time.time()
        ae_net.train()
        for epoch in range(self.n_epochs):

            scheduler.step()
            if epoch in self.lr_milestones:
                logger.info('  LR scheduler: new learning rate is %g' % float(scheduler.get_lr()[0]))

            loss_epoch = 0.0
            n_batches = 0
            epoch_start_time = time.time()
            for data in train_loader:
                inputs, _, _ = data
                inputs = inputs.to(self.device)

                # Zero the network parameter gradients
                optimizer.zero_grad()

                # Update network parameters via backpropagation: forward + backward + optimize
                outputs = ae_net(inputs)
                scores = torch.sum((outputs.float() - inputs.float()) ** 2, dim=tuple(range(1, outputs.dim())))
                loss = torch.mean(scores)
                loss.backward()
                optimizer.step()

                loss_epoch += loss.item()
                n_batches += 1

            # log epoch statistics
            epoch_train_time = time.time() - epoch_start_time
            logger.info('  Epoch {}/{}\t Time: {:.3f}\t Loss: {:.8f}'
                        .format(epoch + 1, self.n_epochs, epoch_train_time, loss_epoch / n_batches))

        pretrain_time = time.time() - start_time
        logger.info('Pretraining time: %.3f' % pretrain_time)
        logger.info('Finished pretraining.')

        return ae_net
Exemplo n.º 9
0
    def test(self, dataset: BaseADDataset, vae: BaseNet):
        logger = logging.getLogger()

        # Get test data loader
        _, test_loader = dataset.loaders(batch_size=self.batch_size,
                                         num_workers=self.n_jobs_dataloader)

        # Set device
        vae = vae.to(self.device)

        # Testing
        logger.info('Starting testing...')
        epoch_loss = 0.0
        n_batches = 0
        start_time = time.time()
        idx_label_score = []
        vae.eval()
        with torch.no_grad():
            for data in test_loader:
                inputs, labels, _, idx = data
                inputs, labels, idx = inputs.to(self.device), labels.to(
                    self.device), idx.to(self.device)

                inputs = inputs.view(inputs.size(0), -1)

                rec = vae(inputs)
                likelihood = -binary_cross_entropy(rec, inputs)
                scores = -likelihood  # negative likelihood as anomaly score

                # Save triple of (idx, label, score) in a list
                idx_label_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        labels.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist()))

                # Overall loss
                elbo = likelihood - vae.kl_divergence
                loss = -torch.mean(elbo)

                epoch_loss += loss.item()
                n_batches += 1

        self.test_time = time.time() - start_time

        # Compute AUC
        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)
        self.test_auc = roc_auc_score(labels, scores)

        # Log results
        logger.info('Test Loss: {:.6f}'.format(epoch_loss / n_batches))
        logger.info('Test AUC: {:.2f}%'.format(100. * self.test_auc))
        logger.info('Test Time: {:.3f}s'.format(self.test_time))
        logger.info('Finished testing variational autoencoder.')
Exemplo n.º 10
0
    def test(self, dataset: BaseADDataset, ae_net: BaseNet):
        logger = logging.getLogger()

        # Get test data loader
        _, test_loader = dataset.loaders(batch_size=self.batch_size,
                                         num_workers=self.n_jobs_dataloader)

        # Set loss
        criterion = nn.MSELoss(reduction='none')

        # Set device for network
        ae_net = ae_net.to(self.device)
        criterion = criterion.to(self.device)

        # Testing
        logger.info('Testing autoencoder...')
        epoch_loss = 0.0
        n_batches = 0
        start_time = time.time()
        idx_label_score = []
        ae_net.eval()
        with torch.no_grad():
            for data in test_loader:
                inputs, labels, _, idx = data
                inputs, labels, idx = inputs.to(self.device), labels.to(
                    self.device), idx.to(self.device)

                rec = ae_net(inputs)
                rec_loss = criterion(rec, inputs)
                scores = torch.mean(rec_loss, dim=tuple(range(1, rec.dim())))

                # Save triple of (idx, label, score) in a list
                idx_label_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        labels.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist()))

                loss = torch.mean(rec_loss)
                epoch_loss += loss.item()
                n_batches += 1

        self.test_time = time.time() - start_time

        # Compute AUC
        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)
        self.test_auc = roc_auc_score(labels, scores)

        # Log results
        logger.info('Test Loss: {:.6f}'.format(epoch_loss / n_batches))
        logger.info('Test AUC: {:.2f}%'.format(100. * self.test_auc))
        logger.info('Test Time: {:.3f}s'.format(self.test_time))
        logger.info('Finished testing autoencoder.')
Exemplo n.º 11
0
    def train(self, dataset: BaseADDataset, net: BaseNet):
        logger = logging.getLogger()

        # Set device for networks
        net = net.to(self.device)

        train_loader, _ = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader)

        optimizer = optim.RMSprop(net.parameters(), lr=self.lr, weight_decay=self.weight_decay, eps=self.epsilon,
                                  momentum=self.momentum)

        # Set learning rate scheduler
        scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=self.lr_milestones, gamma=0.1)

        # Training
        logger.info('Starting train lstm_autoencoder ...')
        start_time = time.time()
        net.train()
        for epoch in range(self.n_epochs):

            scheduler.step()
            if epoch in self.lr_milestones:
                logger.info('  LR scheduler: new learning rate is %g' % float(scheduler.get_lr()[0]))

            loss_epoch = 0.0
            n_batches = 0
            epoch_start_time = time.time()
            for data in train_loader:
                inputs, _, _ = data
                inputs = inputs.to(self.device)

                # Zero the networks parameter gradients
                optimizer.zero_grad()

                # Update networks parameters via back propagation: forward + backward + optimize
                _, outputs = net(inputs.view(-1, 1, self.n_features))
                scores = torch.sum((outputs - inputs) ** 2, dim=tuple(range(1, outputs.dim())))
                loss = torch.mean(scores)
                loss.backward()
                optimizer.step()

                loss_epoch += loss.item()
                n_batches += 1

            # log epoch statistics
            epoch_train_time = time.time() - epoch_start_time
            logger.info('  Epoch {}/{}\t Time: {:.3f}\t Loss: {:.8f}'
                        .format(epoch + 1, self.n_epochs, epoch_train_time, loss_epoch / n_batches))

        self.train_time = time.time() - start_time
        logger.info('lstm_autoencoder train time: %.3f' % self.train_time)
        logger.info('Finished train lstm_autoencoder.')

        return net
    def test(self, dataset: BaseADDataset, net: BaseNet, corner_cracks=True):
        logger = logging.getLogger()

        # Set device for network
        net = net.to(self.device)

        # Get test data loader
        if not corner_cracks:
            _, test_loader, _ = dataset.loaders(
                batch_size=self.batch_size, num_workers=self.n_jobs_dataloader)
        else:
            _, _, test_loader = dataset.loaders(
                batch_size=self.batch_size, num_workers=self.n_jobs_dataloader)
        # Testing
        logger.info('Starting testing...')
        start_time = time.time()
        idx_label_score = []
        net.eval()
        with torch.no_grad():
            for data in test_loader:
                inputs, labels, idx = data
                inputs = inputs.to(self.device)
                outputs = net(inputs)
                dist = torch.sum((outputs - self.c)**2, dim=1)
                if self.objective == 'soft-boundary':
                    scores = dist - self.R**2
                else:
                    scores = dist

                # Save triples of (idx, label, score) in a list
                idx_label_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        labels.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist()))

        self.test_time = time.time() - start_time
        logger.info('Testing time: %.3f' % self.test_time)

        self.test_scores = idx_label_score

        # Compute AUC
        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)

        self.test_auc = roc_auc_score(labels, scores)
        if not corner_cracks:
            logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc))
        else:
            logger.info('Test set AUC (corner): {:.2f}%'.format(100. *
                                                                self.test_auc))

        logger.info('Finished testing.')
Exemplo n.º 13
0
    def test(self, dataset: BaseADDataset, ae_net: BaseNet, test_image):
        logger = logging.getLogger()

        # Set device for network
        ae_net = ae_net.to(self.device)

        # Get test data loader
        _, test_loader = dataset.loaders(batch_size=self.batch_size,
                                         num_workers=self.n_jobs_dataloader)

        # Testing
        logger.info('Testing autoencoder...')
        loss_epoch = 0.0
        n_batches = 0
        start_time = time.time()
        idx_label_score = []
        ae_net.eval()
        with torch.no_grad():
            for i, data in enumerate(test_loader):
                inputs, labels, idx = data
                inputs = inputs.to(self.device)
                outputs = ae_net(inputs)
                # import pdb;pdb.set_trace()
                if labels == 0:
                    check_autoencoder_quality(inputs, test_image[i], outputs)
                scores = torch.sum((outputs - inputs)**2,
                                   dim=tuple(range(1, outputs.dim())))
                loss = torch.mean(scores)

                # Save triple of (idx, label, score) in a list
                idx_label_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        labels.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist()))

                loss_epoch += loss.item()
                n_batches += 1

        logger.info('Test set Loss: {:.8f}'.format(loss_epoch / n_batches))

        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)

        auc = roc_auc_score(labels, scores)
        logger.info('Test set AUC: {:.2f}%'.format(100. * auc))

        test_time = time.time() - start_time
        logger.info('Autoencoder testing time: %.3f' % test_time)
        logger.info('Finished testing autoencoder.')
Exemplo n.º 14
0
    def test(self, dataset: BaseADDataset, net: BaseNet):
        logger = logging.getLogger()

        # Set device for network
        net = net.to(self.device)
        list_output = []
        # Get test data loader
        _, test_loader = dataset.loaders(batch_size=self.batch_size,
                                         num_workers=self.n_jobs_dataloader)
        print('num of test_loader : {}'.format(len(test_loader)))
        # Testing
        logger.info('Starting testing...')
        start_time = time.time()
        idx_label_score = []
        net.eval()
        print('deepSVDD test()---------------')
        with torch.no_grad():
            for data in test_loader:
                inputs, labels, idx = data
                inputs = inputs.to(self.device)
                outputs = net(inputs)
                dist = torch.sum((outputs - self.c)**2, dim=1)
                print(dist)
                print(labels)
                dist_ = dist.cpu().numpy().tolist()
                if self.objective == 'soft-boundary':
                    scores = dist - self.R**2
                else:
                    scores = dist
                # Save triples of (idx, label, score) in a list
                idx_label_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        labels.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist()))

        self.test_time = time.time() - start_time
        logger.info('Testing time: %.3f' % self.test_time)
        self.test_scores = idx_label_score

        # Compute AUC
        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)
        # self.test_auc = roc_auc_score(labels, scores)
        test_acc = accuracy_score(labels, list_output)
        logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc))
        #logger.info('Test set AUCCURAY : {:.2f}%'.format(100. * test_acc))
        logger.info('Finished testing.')
        return str(test_acc)
    def _test(self, loader, net: BaseNet):
        logger = logging.getLogger()

        # Set device for network
        net = net.to(self.device)
        criterion = BCEWithLogitsLoss()

        # Testing
        logger.info('Starting testing...')
        epoch_loss = 0.0
        n_batches = 0
        start_time = time.time()
        idx_label_score = []
        net.eval()
        with torch.no_grad():
            for data in loader:
                inputs, labels, semi_targets, idx = data

                inputs = inputs.to(self.device)
                labels = labels.to(self.device)
                semi_targets = semi_targets.to(self.device)
                idx = idx.to(self.device)

                outputs = net(inputs)
                labels = labels.type_as(outputs)
                loss = criterion(outputs, labels.unsqueeze(1))

                scores = outputs.sigmoid()

                # Save triples of (idx, label, score) in a list
                idx_label_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        labels.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist()))

                epoch_loss += loss.item()
                n_batches += 1

        self.test_time = time.time() - start_time
        self.test_scores = idx_label_score

        # Compute metrics
        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)
        test_loss = epoch_loss / n_batches

        return labels, scores, test_loss
Exemplo n.º 16
0
    def test(self, dataset: BaseADDataset, ae_net: BaseNet):
        logger = logging.getLogger()

        # Set device for network
        ae_net = ae_net.to(self.device)

        # Get test data loader
        test_loader = dataset.test_set

        # Testing
        logger.info('Testing autoencoder...')
        loss_epoch = 0.0
        n_batches = 0
        start_time = time.time()
        idx_label_score = []
        ae_net.eval()
        with torch.no_grad():
            for inputs, labels in test_loader:
                if len(inputs) == 32:
                    inputs = inputs.to(self.device)
                    inputs = inputs.unsqueeze(1)
                    outputs = ae_net(inputs.float())
                    scores = torch.sum((outputs.float() - inputs.float())**2,
                                       dim=tuple(range(1, outputs.dim())))
                    loss = torch.mean(scores)

                    # Save triple of (idx, label, score) in a list
                    idx_label_score += list(
                        zip(labels.cpu().data.numpy().tolist(),
                            scores.cpu().data.numpy().tolist()))

                    loss_epoch += loss.item()
                    n_batches += 1

        logger.info('Test set Loss: {:.8f}'.format(loss_epoch / n_batches))

        labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)

        fpr, tpr, thresholds = roc_curve(labels, scores, pos_label=1)
        test_auc = auc(fpr, tpr)
        logger.info('Test set AUC: {:.2f}%'.format(100. * test_auc))

        test_time = time.time() - start_time
        logger.info('Autoencoder testing time: %.3f' % test_time)
        logger.info('Finished testing autoencoder.')
    def test(self, dataset: BaseADDataset, net: BaseNet):
        logger = logging.getLogger()

        # Set device for network
        net = net.to(self.device)

        # Get test data loader
        test_loader = dataset.test_set

        # Testing
        logger.info('Starting testing...')
        start_time = time.time()
        idx_label_score = []
        net.eval()
        with torch.no_grad():
            for inputs, labels in test_loader:
                inputs = inputs.to(self.device)
                inputs = inputs.unsqueeze(1)
                outputs = net(inputs.float())
                dist = torch.sum((outputs.float() - self.c)**2, dim=1)
                if self.objective == 'soft-boundary':
                    scores = dist - self.R**2
                else:
                    scores = dist

                # Save triples of (idx, label, score) in a list
                idx_label_score += list(
                    zip(labels.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist()))

        self.test_time = time.time() - start_time
        logger.info('Testing time: %.3f' % self.test_time)

        self.test_scores = idx_label_score

        # Compute AUC
        labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)

        fpr, tpr, thresholds = roc_curve(labels, scores, pos_label=1)
        self.test_auc = auc(fpr, tpr)
        logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc))

        logger.info('Finished testing.')
    def train_one_step(self, net: BaseNet, epoch: int):

        logger = logging.getLogger()

        # Set device for network
        net = net.to(self.device)

        # Training
        logger.info('Starting training...')
        start_time = time.time()
        net.train()

        epoch_loss = 0.0
        n_batches = 0
        epoch_start_time = time.time()

        for data in self.train_loader:
            inputs, targets, _, _ = data
            inputs, targets = inputs.to(self.device), targets.to(self.device)

            # Zero the network parameter gradients
            self.optimizer.zero_grad()

            # Update network parameters via backpropagation: forward + backward + optimize
            outputs = net(inputs)
            targets = targets.type_as(outputs)
            loss = self.criterion(outputs, targets.unsqueeze(1))
            loss.backward()
            self.optimizer.step()

            epoch_loss += loss.item()
            n_batches += 1
            self.scheduler.step()

        if epoch in self.lr_milestones:
            logger.info('  LR scheduler: new learning rate is %g' %
                        float(scheduler.get_lr()[0]))

            # log epoch statistics
        epoch_train_time = time.time() - epoch_start_time
        logger.info(
            f'| Epoch: {epoch + 1:03}/{self.n_epochs:03} | Train Time: {epoch_train_time:.3f}s '
            f'| Train Loss: {epoch_loss / n_batches:.6f} |')

        return {'train_loss': epoch_loss / n_batches}
Exemplo n.º 19
0
    def _test(self, loader, net: BaseNet):
        logger = logging.getLogger()
        epoch_loss = 0.0
        n_batches = 0

        # Set device for network
        net = net.to(self.device)

        # Testing
        logger.info('Starting testing...')
        start_time = time.time()
        idx_label_score = []
        net.eval()
        with torch.no_grad():
            for data in loader:
                inputs, labels, idx, _ = data
                inputs = inputs.to(self.device)
                outputs = net(inputs)
                dist = torch.sum((outputs - self.c)**2, dim=1)
                if self.objective == 'soft-boundary':
                    scores = dist - self.R**2
                    loss = self.R**2 + (1 / self.nu) * torch.mean(
                        torch.max(torch.zeros_like(scores), scores))
                else:
                    loss = torch.mean(dist)
                    scores = dist

                epoch_loss += loss.item()
                n_batches += 1
                # Save triples of (idx, label, score) in a list
                idx_label_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        labels.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist()))

        self.test_time = time.time() - start_time
        logger.info('Testing time: %.3f' % self.test_time)

        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)
        test_loss = epoch_loss / n_batches

        return labels, scores, test_loss
Exemplo n.º 20
0
    def test(self, dataset: BaseADDataset, net: BaseNet):
        logger = logging.getLogger()

        # Set device for network
        net = net.to(self.device)

        # Get test data loader
        _, test_loader = dataset.loaders(batch_size=self.batch_size,
                                         num_workers=self.n_jobs_dataloader)

        # Testing
        logger.info('Starting testing...')
        start_time = time.time()
        idx_label_score = []
        net.eval()
        with torch.no_grad():
            for data in test_loader:
                inputs, labels, idx = data
                inputs = inputs.to(self.device)
                outputs = net(inputs)
                scores = self.lastlay(outputs)

                # Save triples of (idx, label, score) in a list
                idx_label_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        labels.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist()))

        self.test_time = time.time() - start_time
        logger.info('Testing time: %.3f' % self.test_time)

        self.test_scores = idx_label_score

        # Compute AUC
        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)

        self.test_auc = roc_auc_score(labels, scores)
        logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc))

        logger.info('Finished testing.')
Exemplo n.º 21
0
def test(dataset: BaseADDataset, ae_net: BaseNet):
    # Set device for network
    ae_net = ae_net.to(device)

    # Get test data loader

    letter, labels = dataset.loaders(batch_size=batch_size,
                                     num_workers=0,
                                     shuffle_test=False,
                                     shuffle_train=False)

    loss_epoch = 0.0
    n_batches = 0
    start_time = time.time()

    with torch.no_grad():
        i = 0
        for data, label in zip(letter, labels):
            i += 1
            inputs, _ = data
            lab, _ = label
            inputs = inputs.to(device)
            lab = lab.to(device)
            # Zero the network parameter gradients
            outputs = ae_net(inputs)
            plot_images_grid(inputs[0:16],
                             export_img='./log/test/input' + str(i),
                             title='Input ',
                             nrow=4,
                             padding=4)
            plot_images_grid(lab[0:16],
                             export_img='./log/test/label' + str(i),
                             title='Label ',
                             nrow=4,
                             padding=4)
            plot_images_grid(outputs[0:16],
                             export_img='./log/test/output' + str(i),
                             title='Output ',
                             nrow=4,
                             padding=4)
Exemplo n.º 22
0
    def apply_model(self, dataset: BaseADDataset, net: BaseNet):
        logger = logging.getLogger()

        # Set device for network
        net = net.to(self.device)

        # Get apply_model data loader
        _, _, apply_loader = dataset.loaders(
            batch_size=self.batch_size, num_workers=self.n_jobs_dataloader)

        # Applying model
        logger.info('Starting Deep SVDD application.')
        start_time = time.time()
        idx_score = []
        net.eval()
        with torch.no_grad():
            for data in apply_loader:
                inputs, nolabels, idx = data  # nolables are NaN
                inputs = inputs.to(self.device)
                outputs = net(inputs)
                dist = torch.sum((outputs - self.c)**2, dim=1)
                if self.objective == 'soft-boundary':
                    scores = dist - self.R**2
                else:
                    scores = dist

                # Save triples of (idx, label, score) in a list
                idx_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist()))

        self.apply_time = time.time() - start_time
        logger.info('Deep SVDD application time: %.3f' % self.apply_time)

        ind, scores = zip(*idx_score)
        self.ind = np.array(ind)
        self.scores = np.array(scores)

        logger.info('Finished Deep SVDD application.')
    def __init__(self, dataset: BaseADDataset, network: BaseNet, k: int,
                 lr: float, n_epochs: int, batch_size: int, rep_dim: int,
                 K: int, weight_decay: float, device: str,
                 n_jobs_dataloader: int, w_rec: float, w_feat: float, cfg):

        super().__init__(lr, n_epochs, batch_size, rep_dim, K, weight_decay,
                         device, n_jobs_dataloader, w_rec, w_feat)
        self.ae_net = network.to(self.device)
        self.train_loader, self.test_loader = dataset.loaders(
            batch_size=self.batch_size, num_workers=self.n_jobs_dataloader)

        self.optimizer = optim.Adam(self.ae_net.parameters(),
                                    lr=self.lr,
                                    weight_decay=self.weight_decay)
        self.rec_loss = torch.nn.L1Loss()
        self.feat_loss = torch.nn.MSELoss()
        self.best_score = 0
        self.min_loss = 1000
        self.k = k
        self.cfg = cfg
        self.logger = logging.getLogger()
        self.memory = torch.randn(size=(len(self.train_loader.dataset),
                                        self.rep_dim)).to(self.device)
Exemplo n.º 24
0
    def test(self, dataset: BaseADDataset, net: BaseNet, is_test=0):
        """
            dt_type:数据集的类型, 测试集 0 / 训练集 1
        """
        logger = logging.getLogger()

        # Set device for networks
        net = net.to(self.device)

        # Get test data loader
        if is_test == 0:  # 测试集加载器
            _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader)
        if is_test == 1:  # 训练集加载器
            test_loader, _ = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader)

        # Testing
        logger.info('Testing lstm_autoencoder...')
        loss_epoch = 0.0
        n_batches = 0
        start_time = time.time()
        idx_label_score = []
        net.eval()
        with torch.no_grad():
            for data in test_loader:
                inputs, labels, idx = data
                inputs = inputs.to(self.device)

                # get lstm test label,label.shape = (128,)
                label = labels.numpy()

                if is_test == 0:
                    for i in range(len(label)):
                        self.test_label.append(label[i])
                if is_test == 1:
                    for i in range(len(label)):
                        self.train_label.append(label[i])

                code, outputs = net(inputs.view(-1, 1, self.n_features))
                code = code.detach().numpy()

                if is_test == 0:
                    for i in range(len(code)):
                        self.test_code.append(code[i])
                if is_test == 1:
                    for i in range(len(code)):
                        self.train_code.append(code[i])

                scores = torch.sum((outputs - inputs) ** 2, dim=tuple(range(1, outputs.dim())))
                loss = torch.mean(scores)

                # Save triple of (idx, label, score) in a list
                idx_label_score += list(zip(idx.cpu().data.numpy().tolist(),
                                            labels.cpu().data.numpy().tolist(),
                                            scores.cpu().data.numpy().tolist()))

                loss_epoch += loss.item()
                n_batches += 1

        logger.info('Test set Loss: {:.8f}'.format(loss_epoch / n_batches))

        self.test_time = time.time() - start_time
        logger.info('lstm_autoencoder testing time: %.3f' % self.test_time)
        self.test_scores = idx_label_score

        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)

        print(len(labels))
        print(len(scores))

        """ 测试集 """
        if is_test == 0:
            self.test_auc = roc_auc_score(labels, scores)
            logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc))
            logger.info('Finished test lstm_autoencoder.')
Exemplo n.º 25
0
    def test(self, dataset: BaseADDataset, ae_net: BaseNet, flg=0):
        """
            训练集 获取正常数据簇 -- 中心点,半径
            测试集 Kmeans 对数据进行预测,超过簇半径为异常数据,否则正常数据
        """

        logger = logging.getLogger()

        # Set device for networks
        ae_net = ae_net.to(self.device)

        # 训练集 flg==1  测试集 flg==0
        if flg == 1:
            test_loader, _ = dataset.loaders(
                batch_size=self.batch_size, num_workers=self.n_jobs_dataloader)
        else:
            _, test_loader = dataset.loaders(
                batch_size=self.batch_size, num_workers=self.n_jobs_dataloader)

        # Testing
        logger.info('Testing ae...')
        loss_epoch = 0.0
        n_batches = 0
        start_time = time.time()
        idx_label_score = []
        ae_net.eval()
        with torch.no_grad():
            for data in test_loader:
                inputs, labels, idx = data
                inputs = inputs.to(self.device)
                outputs = ae_net(inputs)
                scores = torch.sum((outputs - inputs)**2,
                                   dim=tuple(range(1, outputs.dim())))
                error = (outputs - inputs)**2
                loss = torch.mean(scores)

                # Save triple of (idx, label, score) in a list
                idx_label_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        labels.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist(),
                        error.cpu().data.numpy().tolist()), )

                loss_epoch += loss.item()
                n_batches += 1

        logger.info('Test set Loss: {:.8f}'.format(loss_epoch / n_batches))

        _, labels, scores, error = zip(*idx_label_score)
        labels = np.array(labels)  # labels.shape(97278, )
        scores = np.array(scores)  # scores.shape(97278, )
        error = np.array(error)  # scores.shape(97278, )

        if flg == 1:  # 训练集
            X = error
            self.kmeans = KMeans(n_clusters=self.clusters).fit(X)
            self.center = self.kmeans.cluster_centers_.tolist()
            self.radius = self.get_radius(X)
            print("roc_self.center", self.center)
            print("roc_self.radius", self.radius)
        else:  # 测试集
            Y = error
            pred_labels = []  # 实际标签
            pred_km = self.kmeans.predict(Y)
            print(pred_km.shape)
            print(pred_km)
            for i in range(len(pred_km)):
                dis = self.manhattan_distance(self.center[pred_km[i]],
                                              Y[i])  # dis:簇中心到点的距离,作为分类依据
                if dis > self.radius[pred_km[i]]:
                    pred_labels.append(1)
                else:
                    pred_labels.append(0)

            pred_labels = np.array(pred_labels)
            self.test_ftr, self.test_tpr, _ = roc_curve(labels, pred_labels)
            # roc_self.test_auc = roc_auc_score(pred_labels, labels)
            fpr, tpr, thresholds = roc_curve(labels, pred_labels)  # 面积作为准确率
            print(fpr, tpr)
            self.test_auc = auc(fpr, tpr)
            self.test_mcc = matthews_corrcoef(labels, pred_labels)
            _, _, f_score, _ = precision_recall_fscore_support(labels,
                                                               pred_labels,
                                                               labels=[0, 1])
            self.test_f_score = f_score[1]

        print(len(labels))
        print(len(scores))

        self.test_time = time.time() - start_time
        if flg == 0:
            logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc))

        logger.info('ae testing time: %.3f' % self.test_time)
        logger.info('Finished testing ae.')
    def train(self, dataset: BaseADDataset, net: BaseNet):
        logger = logging.getLogger()

        # Set device for network
        net = net.to(self.device)

        # Get train data loader
        train_loader, _ = dataset.loaders(batch_size=self.batch_size,
                                          num_workers=self.n_jobs_dataloader)

        # Set optimizer (Adam optimizer for now)
        optimizer = optim.Adam(net.parameters(),
                               lr=self.lr,
                               weight_decay=self.weight_decay,
                               amsgrad=self.optimizer_name == 'amsgrad')

        # Set learning rate scheduler
        scheduler = optim.lr_scheduler.MultiStepLR(
            optimizer, milestones=self.lr_milestones, gamma=0.1)

        # Initialize hypersphere center c (if c not loaded)
        if self.c is None:
            logger.info('Initializing center c...')
            self.c = self.init_center_c(train_loader, net)
            logger.info('Center c initialized.')

        # Training
        logger.info('Starting training...')
        start_time = time.time()
        net.train()
        for epoch in range(self.n_epochs):

            scheduler.step()
            if epoch in self.lr_milestones:
                logger.info('  LR scheduler: new learning rate is %g' %
                            float(scheduler.get_lr()[0]))

            loss_epoch = 0.0
            n_batches = 0
            epoch_start_time = time.time()
            for data in train_loader:
                inputs, _, _ = data
                inputs = inputs.to(self.device)

                # Zero the network parameter gradients
                optimizer.zero_grad()

                # Update network parameters via backpropagation: forward + backward + optimize
                outputs = net(inputs)
                # dist = torch.sum((outputs - self.c) ** 2, dim=1)

                ### NEW - get closest cluster center, take dist, sum/mean for loss
                centers = torch.transpose(self.c, 0, 1)
                dist = torch.zeros(outputs.shape[0], device=self.device)
                for i in range(outputs.shape[0]):
                    # Sum dists from each data point to its corresponding cluster
                    dist[i] = torch.sum((centers - outputs[i])**2, dim=1).min()
                #import pdb; pdb.set_trace()
                ###
                if self.objective == 'soft-boundary':
                    scores = dist - self.R**2
                    loss = self.R**2 + (1 / self.nu) * torch.mean(
                        torch.max(torch.zeros_like(scores), scores))
                else:
                    loss = torch.mean(dist)
                loss.backward()
                optimizer.step()

                # Update hypersphere radius R on mini-batch distances
                if (self.objective == 'soft-boundary') and (
                        epoch >= self.warm_up_n_epochs):
                    self.R.data = torch.tensor(get_radius(dist, self.nu),
                                               device=self.device)

                loss_epoch += loss.item()
                n_batches += 1

            # log epoch statistics
            epoch_train_time = time.time() - epoch_start_time
            logger.info('  Epoch {}/{}\t Time: {:.3f}\t Loss: {:.8f}'.format(
                epoch + 1, self.n_epochs, epoch_train_time,
                loss_epoch / n_batches))

        self.train_time = time.time() - start_time
        logger.info('Training time: %.3f' % self.train_time)

        logger.info('Finished training.')

        return net
    def test(self, dataset: BaseADDataset, net: BaseNet):
        logger = logging.getLogger()

        # Set device for network
        net = net.to(self.device)

        # Get test data loader
        _, test_loader = dataset.loaders(batch_size=self.batch_size,
                                         num_workers=self.n_jobs_dataloader)

        # Testing
        logger.info('Starting testing...')
        start_time = time.time()
        idx_label_score = []
        net.eval()
        output_data = []
        label_data = []
        with torch.no_grad():
            for data in test_loader:
                inputs, labels, idx = data
                inputs = inputs.to(self.device)
                outputs = net(inputs)
                output_data.append(outputs)
                label_data.append(labels)
                # dist = torch.sum((outputs - self.c) ** 2, dim=1)

                ### NEW
                if (self.c.dim() == 1):  # naive deep_svdd
                    centers = self.c
                    dist = torch.sum((outputs - self.c)**2, dim=1)
                else:
                    centers = torch.transpose(self.c, 0, 1)
                    dist = torch.zeros(outputs.shape[0], device=self.device)
                    for i in range(outputs.shape[0]):
                        # Sum dists from each data point to its corresponding cluster
                        dist[i] = torch.sum((centers - outputs[i])**2,
                                            dim=1).min()
                #import pdb; pdb.set_trace()
                ###
                if self.objective == 'soft-boundary':
                    scores = dist - self.R**2
                else:
                    scores = dist

                # Save triples of (idx, label, score) in a list
                idx_label_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        labels.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist()))

        self.test_time = time.time() - start_time
        logger.info('Testing time: %.3f' % self.test_time)

        self.test_scores = idx_label_score

        # Compute AUC
        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)

        self.test_auc = roc_auc_score(labels, scores)
        logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc))

        #UMAP (same umap model fit in training) - use anomaly_data = True
        # # UMAP Plot (on testing data)
        # kmeans_centers = np.load('centers.npy')
        # output_data = torch.cat(output_data)
        # label_data = torch.cat(label_data).numpy()
        # self.latent_UMAP(output_data, label_data, kmeans_centers, anomaly_data = True)
        # import pdb; pdb.set_trace()

        # UMAP Plot (on training data)
        # Get train data loader
        train_loader, _ = dataset.loaders(batch_size=self.batch_size,
                                          num_workers=self.n_jobs_dataloader)

        output_data = []
        label_data = []
        with torch.no_grad():
            for data in train_loader:
                # get the inputs of the batch
                inputs, labels, _ = data  #labels are only for UMAP of hyperspheres
                inputs = inputs.to(self.device)
                outputs = net(inputs)
                output_data.append(outputs)
                label_data.append(labels)
        kmeans_centers = np.load('centers.npy')
        output_data = torch.cat(output_data)
        label_data = torch.cat(label_data).numpy()
        self.latent_UMAP(output_data,
                         label_data,
                         kmeans_centers,
                         anomaly_data=True)

        logger.info('Finished testing.')
Exemplo n.º 28
0
    def train(self, dataset: BaseADDataset, net: BaseNet):
        logger = logging.getLogger()

        # Get train data loader
        train_loader, _ = dataset.loaders(batch_size=self.batch_size,
                                          num_workers=self.n_jobs_dataloader)

        # Set device for network
        net = net.to(self.device)

        # Set optimizer (Adam optimizer for now)
        optimizer = optim.Adam(net.parameters(),
                               lr=self.lr,
                               weight_decay=self.weight_decay)

        # Set learning rate scheduler
        scheduler = optim.lr_scheduler.MultiStepLR(
            optimizer, milestones=self.lr_milestones, gamma=0.1)

        # Initialize hypersphere center c (if c not loaded)
        if self.c is None:
            logger.info('Initializing center c...')
            self.c = self.init_center_c(train_loader, net)
            logger.info('Center c initialized.')

        # Training
        logger.info('Starting training...')
        start_time = time.time()
        net.train()
        for epoch in range(self.n_epochs):

            scheduler.step()
            if epoch in self.lr_milestones:
                logger.info('  LR scheduler: new learning rate is %g' %
                            float(scheduler.get_lr()[0]))

            epoch_loss = 0.0
            n_batches = 0
            epoch_start_time = time.time()
            for data in train_loader:
                inputs, _, semi_targets, _ = data
                inputs, semi_targets = inputs.to(self.device), semi_targets.to(
                    self.device)

                # Zero the network parameter gradients
                optimizer.zero_grad()

                # Update network parameters via backpropagation: forward + backward + optimize
                outputs = net(inputs)
                dist = torch.sum((outputs - self.c)**2, dim=1)
                losses = torch.where(
                    semi_targets == 0, dist,
                    self.eta * ((dist + self.eps)**semi_targets.float()))
                loss = torch.mean(losses)
                loss.backward()
                optimizer.step()

                epoch_loss += loss.item()
                n_batches += 1

            # log epoch statistics
            epoch_train_time = time.time() - epoch_start_time
            logger.info(
                f'| Epoch: {epoch + 1:03}/{self.n_epochs:03} | Train Time: {epoch_train_time:.3f}s '
                f'| Train Loss: {epoch_loss / n_batches:.6f} |')

        self.train_time = time.time() - start_time
        logger.info('Training Time: {:.3f}s'.format(self.train_time))
        logger.info('Finished training.')

        return net
Exemplo n.º 29
0
    def train(self, dataset: BaseADDataset, net: BaseNet):
        logger = logging.getLogger()

        # Get train data loader
        train_loader, _ = dataset.loaders(batch_size=self.batch_size,
                                          num_workers=self.n_jobs_dataloader)

        self.train_loader = train_loader

        # Set device for network
        net = net.to(self.device)

        # Set optimizer (Adam optimizer for now)
        optimizer = optim.Adam(net.parameters(),
                               lr=self.lr,
                               weight_decay=self.weight_decay)

        # Set learning rate scheduler
        scheduler = optim.lr_scheduler.MultiStepLR(
            optimizer, milestones=self.lr_milestones, gamma=0.1)

        # Training
        logger.info('Starting training...')
        start_time = time.time()
        net.train()
        for epoch in range(self.n_epochs):

            scheduler.step()
            if epoch in self.lr_milestones:
                logger.info('  LR scheduler: new learning rate is %g' %
                            float(scheduler.get_lr()[0]))

            epoch_loss = 0.0
            n_batches = 0
            epoch_start_time = time.time()

            for data in train_loader:
                inputs, _, semi_targets, _ = data
                inputs, semi_targets = inputs.to(self.device), semi_targets.to(
                    self.device)

                # Zero the network parameter gradients
                optimizer.zero_grad()

                # Update network parameters via backpropagation: forward + backward + optimize
                outputs = net(inputs)

                positive, unlabeled = semi_targets, 1 - semi_targets
                n_positive, n_unlabeled = max([1., torch.sum(positive)]), max(
                    [1., torch.sum(unlabeled)])

                gp = torch.t(torch.log(1 + torch.exp(-outputs)))
                gu = torch.t(torch.log(1 + torch.exp(outputs)))

                loss_positive = self.pi * torch.sum(gp * positive) / n_positive
                loss_negative = torch.sum(
                    gu * unlabeled) / n_unlabeled - self.pi * torch.sum(
                        gu * positive) / n_positive

                loss = loss_positive + loss_negative
                """

                func = torch.t(torch.sigmoid(-outputs))

                loss_positive = -self.pi*torch.sum(func*positive)/n_positive
                loss_negative = torch.sum(1/(1-func*unlabeled))/n_unlabeled - self.pi*torch.sum(1/(1-func*positive))/n_positive

                loss = (loss_positive + loss_negative)**2

                """
                loss.backward()
                optimizer.step()

                epoch_loss += loss.item()
                n_batches += 1

            # log epoch statistics
            epoch_train_time = time.time() - epoch_start_time
            logger.info(
                f'| Epoch: {epoch + 1:03}/{self.n_epochs:03} | Train Time: {epoch_train_time:.3f}s '
                f'| Train Loss: {epoch_loss / n_batches:.6f} |')

        self.train_time = time.time() - start_time
        logger.info('Training Time: {:.3f}s'.format(self.train_time))
        logger.info('Finished training.')

        return net
Exemplo n.º 30
0
    def train(self, dataset: BaseADDataset, oe_dataset: BaseADDataset,
              net: BaseNet):
        logger = logging.getLogger()

        # Get train data loader
        if oe_dataset is not None:
            num_workers = int(self.n_jobs_dataloader / 2)
        else:
            num_workers = self.n_jobs_dataloader

        train_loader, _ = dataset.loaders(batch_size=self.batch_size,
                                          num_workers=num_workers)
        if oe_dataset is not None:
            if oe_dataset.shuffle:
                if len(dataset.train_set) > len(oe_dataset.train_set):
                    oe_sampler = RandomSampler(oe_dataset.train_set,
                                               replacement=True,
                                               num_samples=len(
                                                   dataset.train_set))
                    oe_loader = DataLoader(dataset=oe_dataset.train_set,
                                           batch_size=self.batch_size,
                                           shuffle=False,
                                           sampler=oe_sampler,
                                           num_workers=num_workers,
                                           drop_last=True)
                else:
                    oe_loader = DataLoader(dataset=oe_dataset.train_set,
                                           batch_size=self.batch_size,
                                           shuffle=True,
                                           num_workers=num_workers,
                                           drop_last=True)

            else:
                oe_loader = DataLoader(dataset=oe_dataset.train_set,
                                       batch_size=self.batch_size,
                                       shuffle=False,
                                       num_workers=num_workers,
                                       drop_last=True)
            dataset_loader = zip(train_loader, oe_loader)
        else:
            dataset_loader = train_loader

        # Set loss
        if self.objective in ['bce', 'focal']:
            if self.objective == 'bce':
                criterion = nn.BCEWithLogitsLoss()
            if self.objective == 'focal':
                criterion = FocalLoss(gamma=self.focal_gamma)
            criterion = criterion.to(self.device)

        # Set device
        net = net.to(self.device)

        # Set optimizer
        optimizer = optim.Adam(net.parameters(),
                               lr=self.lr,
                               weight_decay=self.weight_decay)

        # Set learning rate scheduler
        scheduler = optim.lr_scheduler.MultiStepLR(
            optimizer, milestones=self.lr_milestones, gamma=0.1)

        # Training
        logger.info('Starting training...')
        net.train()
        start_time = time.time()

        for epoch in range(self.n_epochs + 1):
            epoch_loss = 0.0
            n_batches = 0
            idx_label_score = []
            epoch_start_time = time.time()

            # start at random point for the outlier exposure dataset in each epoch
            if (oe_dataset is not None) and (epoch < self.n_epochs):
                oe_loader.dataset.offset = np.random.randint(
                    len(oe_loader.dataset))
                if oe_loader.dataset.shuffle_idxs:
                    random.shuffle(oe_loader.dataset.idxs)
                dataset_loader = zip(train_loader, oe_loader)

            # only load samples from the original training set in a last epoch for saving train scores
            if epoch == self.n_epochs:
                dataset_loader = train_loader
                net.eval()

            for data in dataset_loader:
                if (oe_dataset is not None) and (epoch < self.n_epochs):
                    inputs = torch.cat((data[0][0], data[1][0]), 0)
                    labels = torch.cat((data[0][1], data[1][1]), 0)
                    semi_targets = torch.cat((data[0][2], data[1][2]), 0)
                    idx = torch.cat((data[0][3], data[1][3]), 0)
                else:
                    inputs, labels, semi_targets, idx = data

                inputs = inputs.to(self.device)
                labels = labels.to(self.device)
                semi_targets = semi_targets.to(self.device)
                idx = idx.to(self.device)

                # Zero the network parameter gradients
                if epoch < self.n_epochs:
                    optimizer.zero_grad()

                # Update network parameters via backpropagation: forward + backward + optimize
                outputs = net(inputs)

                if self.objective == 'hsc':
                    if self.hsc_norm == 'l1':
                        dists = torch.norm(outputs, p=1, dim=1)
                    if self.hsc_norm == 'l2':
                        dists = torch.norm(outputs, p=2, dim=1)
                    if self.hsc_norm == 'l2_squared':
                        dists = torch.norm(outputs, p=2, dim=1)**2
                    if self.hsc_norm == 'l2_squared_linear':
                        dists = torch.sqrt(
                            torch.norm(outputs, p=2, dim=1)**2 + 1) - 1

                    scores = 1 - torch.exp(-dists)
                    losses = torch.where(semi_targets == 0, dists,
                                         -torch.log(scores + self.eps))
                    loss = torch.mean(losses)

                if self.objective == 'deepSAD':
                    dists = torch.norm(outputs, p=2, dim=1)**2
                    scores = dists
                    losses = torch.where(
                        semi_targets == 0, dists,
                        ((dists + self.eps)**semi_targets.float()))
                    loss = torch.mean(losses)

                if self.objective in ['bce', 'focal']:
                    targets = torch.zeros(inputs.size(0))
                    targets[semi_targets == -1] = 1
                    targets = targets.view(-1, 1).to(self.device)

                    scores = torch.sigmoid(outputs)
                    loss = criterion(outputs, targets)

                if epoch < self.n_epochs:
                    loss.backward()
                    optimizer.step()

                # save train scores in last epoch
                if epoch == self.n_epochs:
                    idx_label_score += list(
                        zip(idx.cpu().data.numpy().tolist(),
                            labels.cpu().data.numpy().tolist(),
                            scores.flatten().cpu().data.numpy().tolist()))

                epoch_loss += loss.item()
                n_batches += 1

            # Take learning rate scheduler step
            scheduler.step()
            if epoch in self.lr_milestones:
                logger.info('  LR scheduler: new learning rate is %g' %
                            float(scheduler.get_last_lr()[0]))

            # log epoch statistics
            epoch_train_time = time.time() - epoch_start_time
            logger.info(
                f'| Epoch: {epoch + 1:03}/{self.n_epochs:03} | Train Time: {epoch_train_time:.3f}s '
                f'| Train Loss: {epoch_loss / n_batches:.6f} |')

        self.train_time = time.time() - start_time
        self.train_scores = idx_label_score

        # Log results
        logger.info('Train Time: {:.3f}s'.format(self.train_time))
        logger.info('Train Loss: {:.6f}'.format(epoch_loss / n_batches))
        logger.info('Finished training.')

        return net