Пример #1
0
 def test_add_metric(self):
     self.wrapper.add_metric('cls_report', lambda: ClassificationReport(2))
     assert 'test_cls_report' in self.wrapper.metrics
     assert 'train_cls_report' in self.wrapper.metrics
     self.wrapper.train_on_dataset(self.dataset, self.optim, 32, 2, False)
     self.wrapper.test_on_dataset(self.dataset, 32, False)
     assert (self.wrapper.metrics['train_cls_report'].value['accuracy'] != 0).any()
     assert (self.wrapper.metrics['test_cls_report'].value['accuracy'] != 0).any()
    def __init__(self, exp_dict):
        super().__init__()
        self.backbone = models.vgg16(
            pretrained=exp_dict["imagenet_pretraining"], progress=True)
        num_ftrs = self.backbone.classifier[-1].in_features
        self.backbone.classifier[-1] = torch.nn.Linear(num_ftrs,
                                                       exp_dict["num_classes"])
        self.backbone = patch_module(self.backbone)
        self.initial_weights = deepcopy(self.backbone.state_dict())
        self.backbone.cuda()

        self.batch_size = exp_dict['batch_size']
        self.calibrate = exp_dict.get('calibrate', False)
        self.learning_epoch = exp_dict['learning_epoch']
        self.optimizer = torch.optim.SGD(self.backbone.parameters(),
                                         lr=exp_dict['lr'],
                                         weight_decay=5e-4,
                                         momentum=0.9,
                                         nesterov=True)

        self.criterion = CrossEntropyLoss()
        shuffle_prop = exp_dict.get('shuffle_prop', 0.0)
        max_sample = -1
        self.heuristic = get_heuristic(exp_dict['heuristic'],
                                       shuffle_prop=shuffle_prop)
        self.wrapper = ModelWrapper(self.backbone, criterion=self.criterion)
        self.wrapper.add_metric(
            'cls_report',
            lambda: ClassificationReport(exp_dict["num_classes"]))
        self.wrapper.add_metric('accuracy', lambda: Accuracy())
        self.loop = ActiveLearningLoop(None,
                                       self.wrapper.predict_on_dataset,
                                       heuristic=self.heuristic,
                                       ndata_to_label=exp_dict['query_size'],
                                       batch_size=self.batch_size,
                                       iterations=exp_dict['iterations'],
                                       use_cuda=True,
                                       max_sample=max_sample)

        self.calib_set = get_dataset('calib', exp_dict['dataset'])
        self.valid_set = get_dataset('val', exp_dict['dataset'])
        self.calibrator = DirichletCalibrator(
            self.wrapper,
            exp_dict["num_classes"],
            lr=0.001,
            reg_factor=exp_dict['reg_factor'],
            mu=exp_dict['mu'])

        self.active_dataset = None
        self.active_dataset_settings = None
Пример #3
0
def test_classification_report():
    met = ClassificationReport(num_classes=3)
    pred = torch.FloatTensor([[0.4, 0.5, 0.1], [0.1, 0.8, 0.1]])
    target = torch.LongTensor([2, 1])
    met.update(pred, target)
    pred = torch.FloatTensor([[0.1, 0.5, 0.4], [0.8, 0.1, 0.1]])
    target = torch.LongTensor([1, 3])
    met.update(pred, target)
    assert np.allclose(met.value['accuracy'], [1.0, 0.666666666, 0.666666666])
Пример #4
0
def main():
    args = parse_args()
    batch_size = args.batch_size
    use_cuda = torch.cuda.is_available()
    hyperparams = vars(args)
    pprint(hyperparams)

    active_set, test_set = get_datasets(hyperparams['initial_pool'],
                                        hyperparams['data_path'])

    # We will use the FocalLoss
    criterion = FocalLoss(gamma=2, alpha=0.25)

    # Our model is a simple Unet
    model = smp.Unet(encoder_name='resnext50_32x4d',
                     encoder_depth=5,
                     encoder_weights='imagenet',
                     decoder_use_batchnorm=False,
                     classes=len(pascal_voc_ids))
    # Add a Dropout layerto use MC-Dropout
    add_dropout(model, classes=len(pascal_voc_ids), activation=None)

    # This will enable Dropout at test time.
    model = MCDropoutModule(model)

    # Put everything on GPU.
    if use_cuda:
        model.cuda()

    # Make an optimizer
    optimizer = optim.SGD(model.parameters(),
                          lr=hyperparams["lr"],
                          momentum=0.9,
                          weight_decay=5e-4)
    # Keep a copy of the original weights
    initial_weights = deepcopy(model.state_dict())

    # Add metrics
    model = ModelWrapper(model, criterion)
    model.add_metric('cls_report',
                     lambda: ClassificationReport(len(pascal_voc_ids)))

    # Which heuristic you want to use?
    # We will use our custom reduction function.
    heuristic = get_heuristic(hyperparams['heuristic'], reduction=mean_regions)

    # The ALLoop is in charge of predicting the uncertainty and
    loop = ActiveLearningLoop(
        active_set,
        model.predict_on_dataset_generator,
        heuristic=heuristic,
        ndata_to_label=hyperparams['n_data_to_label'],
        # Instead of predicting on the entire pool, only a subset is used
        max_sample=1000,
        batch_size=batch_size,
        iterations=hyperparams["iterations"],
        use_cuda=use_cuda)
    acc = []
    for epoch in tqdm(range(args.al_step)):
        # Following Gal et al. 2016, we reset the weights.
        model.load_state_dict(initial_weights)
        # Train 50 epochs before sampling.
        model.train_on_dataset(active_set, optimizer, batch_size,
                               hyperparams['learning_epoch'], use_cuda)

        # Validation!
        model.test_on_dataset(test_set, batch_size, use_cuda)
        should_continue = loop.step()
        metrics = model.metrics

        val_loss = metrics['test_loss'].value
        logs = {
            "val": val_loss,
            "epoch": epoch,
            "train": metrics['train_loss'].value,
            "labeled_data": active_set._labelled,
            "Next Training set size": len(active_set),
            'cls_report': metrics['test_cls_report'].value,
        }
        pprint(logs)
        acc.append(logs)
        if not should_continue:
            break