X_test = numpy.float32(X_test)
X_test /= 255.0
X_test *= 2.0

train_dataset = supervised_dataset.SupervisedDataset(X_train, y_train)
val_dataset = supervised_dataset.SupervisedDataset(X_val, y_val)
train_iterator = train_dataset.iterator(
    mode='random_uniform', batch_size=64, num_batches=31000)
val_iterator = val_dataset.iterator(
    mode='random_uniform', batch_size=64, num_batches=31000)

# Create object to local contrast normalize a batch.
# Note: Every batch must be normalized before use.
normer = util.Normer3(filter_size=5, num_channels=1)
module_list = [normer]
preprocessor = util.Preprocessor(module_list)

print('Training Model')
for x_batch, y_batch in train_iterator:
    x_batch = preprocessor.run(x_batch)
    monitor.start()
    log_prob, accuracy = model.train(x_batch, y_batch)
    monitor.stop(1-accuracy)

    if monitor.test:
        monitor.start()
        x_val_batch, y_val_batch = val_iterator.next()
        x_val_batch = preprocessor.run(x_val_batch)
        val_accuracy = model.eval(x_val_batch, y_val_batch)
        monitor.stop_test(1-val_accuracy)
                                    batch_size=64,
                                    num_batches=31000)

# Create object to local contrast normalize a batch.
# Note: Every batch must be normalized before use.
normer = util.Normer3(filter_size=5, num_channels=1)
module_list = [normer]
preprocessor = util.Preprocessor(module_list)

print('Training Model')
for x_batch, y_batch in train_iterator:
    #x_batch = preprocessor.run(x_batch)
    x_batch = (x_batch - mean) / std

    # loop over batch
    for i in range(len(x_batch)):
        # hide patch for an image
        x_batch[i] = hide_patch(x_batch[i])

    monitor.start()
    log_prob, accuracy = model.train(x_batch, y_batch)
    monitor.stop(1 - accuracy)  # monitor takes error instead of accuracy

    if monitor.test:
        monitor.start()
        x_val_batch, y_val_batch = val_iterator.next()
        #x_val_batch = preprocessor.run(x_val_batch)
        x_val_batch = (x_val_batch - mean) / std
        val_accuracy = model.eval(x_val_batch, y_val_batch)
        monitor.stop_test(1 - val_accuracy)
Exemplo n.º 3
0
def learning(
    cfg: OmegaConf,
    training_data_loader: torch.utils.data.DataLoader,
    validation_data_loader: torch.utils.data.DataLoader,
    model: SupervisedModel,
) -> None:
    """
    Learning function including evaluation

    :param cfg: Hydra's config instance
    :param training_data_loader: Training data loader
    :param validation_data_loader: Validation data loader
    :param model: Model
    :return: None
    """

    local_rank = cfg["distributed"]["local_rank"]
    num_gpus = cfg["distributed"]["world_size"]
    epochs = cfg["parameter"]["epochs"]
    num_training_samples = len(training_data_loader.dataset.data)
    steps_per_epoch = int(
        num_training_samples /
        (cfg["experiment"]["batches"] * num_gpus))  # because the drop=True
    total_steps = cfg["parameter"]["epochs"] * steps_per_epoch
    warmup_steps = cfg["parameter"]["warmup_epochs"] * steps_per_epoch
    current_step = 0

    best_metric = np.finfo(np.float64).max

    optimizer = torch.optim.SGD(params=model.parameters(),
                                lr=calculate_initial_lr(cfg),
                                momentum=cfg["parameter"]["momentum"],
                                nesterov=False,
                                weight_decay=cfg["experiment"]["decay"])

    # https://github.com/google-research/simclr/blob/master/lars_optimizer.py#L26
    optimizer = LARC(optimizer=optimizer, trust_coefficient=0.001, clip=False)

    cos_lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer.optim,
        T_max=total_steps - warmup_steps,
    )

    for epoch in range(1, epochs + 1):
        # training
        model.train()
        training_data_loader.sampler.set_epoch(epoch)

        for data, targets in training_data_loader:
            # adjust learning rate by applying linear warming
            if current_step <= warmup_steps:
                lr = calculate_lr(cfg, warmup_steps, current_step)
                for param_group in optimizer.param_groups:
                    param_group["lr"] = lr

            optimizer.zero_grad()
            data, targets = data.to(local_rank), targets.to(local_rank)
            unnormalized_features = model(data)
            loss = torch.nn.functional.cross_entropy(unnormalized_features,
                                                     targets)
            loss.backward()
            optimizer.step()

            # adjust learning rate by applying cosine annealing
            if current_step > warmup_steps:
                cos_lr_scheduler.step()

            current_step += 1

        if local_rank == 0:
            logger_line = "Epoch:{}/{} progress:{:.3f} loss:{:.3f}, lr:{:.7f}".format(
                epoch, epochs, epoch / epochs, loss.item(),
                optimizer.param_groups[0]["lr"])

        # During warmup phase, we skip validation
        sum_val_loss, num_val_corrects = validation(validation_data_loader,
                                                    model, local_rank)

        torch.distributed.barrier()
        torch.distributed.reduce(sum_val_loss, dst=0)
        torch.distributed.reduce(num_val_corrects, dst=0)

        num_val_samples = len(validation_data_loader.dataset)

        # logging and save checkpoint
        if local_rank == 0:

            validation_loss = sum_val_loss.item() / num_val_samples
            validation_acc = num_val_corrects.item() / num_val_samples

            logging.info(logger_line +
                         " val loss:{:.3f}, val acc:{:.2f}%".format(
                             validation_loss, validation_acc * 100.))

            if cfg["parameter"]["metric"] == "loss":
                metric = validation_loss
            else:
                metric = 1. - validation_acc

            if metric <= best_metric:
                if "save_fname" in locals():
                    if os.path.exists(save_fname):
                        os.remove(save_fname)

                save_fname = "epoch={}-{}".format(
                    epoch, cfg["experiment"]["output_model_name"])
                torch.save(model.state_dict(), save_fname)