示例#1
0
def test_TensorBoard():
    runner = Runner(
        model=TestModel,
        optimizer=TestOptimizer,
        criterion=TestCriterion,
        metrics=TestMetric,
        callbacks=pt_clb.TensorBoard(log_dir=TMP_PATH),
    )
    runner.fit(TestLoader, epochs=2)
示例#2
0
def test_tensorboar_CM():
    runner = Runner(
        model=TEST_MODEL,
        optimizer=TEST_OPTIMIZER,
        criterion=TEST_CRITERION,
        callbacks=[
            pt_clb.TensorBoardCM(),
            pt_clb.TensorBoard(log_dir=TMP_PATH)
        ],
    )
    runner.fit(TEST_LOADER, epochs=2)
示例#3
0
# We only test that callbacks don't crash NOT that they do what they should do
TMP_PATH = "/tmp/pt_tools2/"
os.makedirs(TMP_PATH, exist_ok=True)


@pytest.mark.parametrize(
    "callback",
    [
        pt_clb.Timer(),
        pt_clb.ReduceLROnPlateau(),
        pt_clb.CheckpointSaver(TMP_PATH, save_name="model.chpn"),
        pt_clb.CheckpointSaver(
            TMP_PATH, save_name="model.chpn", monitor=TEST_METRIC.name, mode="max"
        ),
        pt_clb.TensorBoard(log_dir=TMP_PATH),
        pt_clb.TensorBoardWithCM(log_dir=TMP_PATH),
        pt_clb.ConsoleLogger(),
        pt_clb.FileLogger(TMP_PATH),
        pt_clb.Mixup(0.2, NUM_CLASSES),
        pt_clb.Cutmix(1.0, NUM_CLASSES),
        pt_clb.ScheduledDropout(),
    ],
)
def test_callback(callback):
    runner = Runner(
        model=TEST_MODEL,
        optimizer=TEST_OPTIMIZER,
        criterion=TEST_CRITERION,
        metrics=TEST_METRIC,
        callbacks=callback,
示例#4
0
def main():
    # Get config for this run
    hparams = parse_args()

    # Setup logger
    config = {
        "handlers": [
            {
                "sink": sys.stdout,
                "format": "{time:[MM-DD HH:mm]} - {message}"
            },
            {
                "sink": f"{hparams.outdir}/logs.txt",
                "format": "{time:[MM-DD HH:mm]} - {message}"
            },
        ],
    }
    logger.configure(**config)
    logger.info(f"Parameters used for training: {hparams}")

    # Fix seeds for reprodusability
    pt.utils.misc.set_random_seed(hparams.seed)

    # Save config
    os.makedirs(hparams.outdir, exist_ok=True)
    yaml.dump(vars(hparams), open(hparams.outdir + "/config.yaml", "w"))

    # Get model
    model = Model(arch=hparams.arch,
                  model_params=hparams.model_params,
                  embedding_size=hparams.embedding_size,
                  pooling=hparams.pooling).cuda()

    # Get loss
    # loss = LOSS_FROM_NAME[hparams.criterion](in_features=hparams.embedding_size, **hparams.criterion_params).cuda()
    loss = LOSS_FROM_NAME["cross_entropy"].cuda()
    logger.info(f"Loss for this run is: {loss}")

    if hparams.resume:
        checkpoint = torch.load(
            hparams.resume, map_location=lambda storage, loc: storage.cuda())
        model.load_state_dict(checkpoint["state_dict"], strict=True)
        loss.load_state_dict(checkpoint["loss"], strict=True)

    if hparams.freeze_bn:
        freeze_batch_norm(model)

    # Get optimizer
    # optim_params = pt.utils.misc.filter_bn_from_wd(model)
    optim_params = list(loss.parameters()) + list(
        model.parameters())  # add loss params
    optimizer = optimizer_from_name(hparams.optim)(
        optim_params, lr=0, weight_decay=hparams.weight_decay, amsgrad=True)

    num_params = pt.utils.misc.count_parameters(model)[0]
    logger.info(f"Model size: {num_params / 1e6:.02f}M")
    # logger.info(model)

    # Scheduler is an advanced way of planning experiment
    sheduler = pt.fit_wrapper.callbacks.PhasesScheduler(hparams.phases)

    # Save logs
    TB_callback = pt_clb.TensorBoard(hparams.outdir, log_every=20)

    # Get dataloaders
    train_loader, val_loader, val_indexes = get_dataloaders(
        root=hparams.root,
        augmentation=hparams.augmentation,
        size=hparams.size,
        val_size=hparams.val_size,
        batch_size=hparams.batch_size,
        workers=hparams.workers,
    )

    # Load validation query / gallery split and resort it according to indexes from sampler
    df_val = pd.read_csv(os.path.join(hparams.root, "train_val.csv"))
    df_val = df_val[df_val["is_train"].astype(np.bool) == False]
    val_is_query = df_val.is_query.values[val_indexes].astype(np.bool)

    logger.info(f"Start training")
    # Init runner
    runner = pt.fit_wrapper.Runner(
        model,
        optimizer,
        criterion=loss,
        callbacks=[
            # pt_clb.BatchMetrics([pt.metrics.Accuracy(topk=1)]),
            ContestMetricsCallback(
                is_query=val_is_query[:1280] if hparams.debug else val_is_query
            ),
            pt_clb.Timer(),
            pt_clb.ConsoleLogger(),
            pt_clb.FileLogger(),
            TB_callback,
            CheckpointSaver(hparams.outdir,
                            save_name="model.chpn",
                            monitor="target",
                            mode="max"),
            CheckpointSaver(hparams.outdir,
                            save_name="model_mapr.chpn",
                            monitor="mAP@R",
                            mode="max"),
            CheckpointSaver(hparams.outdir, save_name="model_loss.chpn"),
            sheduler,
            # EMA must go after other checkpoints
            pt_clb.ModelEma(model, hparams.ema_decay)
            if hparams.ema_decay else pt_clb.Callback(),
        ],
        use_fp16=hparams.
        use_fp16,  # use mixed precision by default.  # hparams.opt_level != "O0",
    )

    if hparams.head_warmup_epochs > 0:
        #Freeze model
        for p in model.parameters():
            p.requires_grad = False

        runner.fit(
            train_loader,
            # val_loader=val_loader,
            epochs=hparams.head_warmup_epochs,
            steps_per_epoch=20 if hparams.debug else None,
            # val_steps=20 if hparams.debug else None,
        )

        # Unfreeze model
        for p in model.parameters():
            p.requires_grad = True

        if hparams.freeze_bn:
            freeze_batch_norm(model)

        # Re-init to avoid nan's in loss
        optim_params = list(loss.parameters()) + list(model.parameters())

        optimizer = optimizer_from_name(hparams.optim)(
            optim_params,
            lr=0,
            weight_decay=hparams.weight_decay,
            amsgrad=True)

        runner.state.model = model
        runner.state.optimizer = optimizer
        runner.state.criterion = loss

    # Train
    runner.fit(
        train_loader,
        # val_loader=val_loader,
        start_epoch=hparams.head_warmup_epochs,
        epochs=sheduler.tot_epochs,
        steps_per_epoch=20 if hparams.debug else None,
        # val_steps=20 if hparams.debug else None,
    )

    logger.info(f"Loading best model")
    checkpoint = torch.load(os.path.join(hparams.outdir, f"model.chpn"))
    model.load_state_dict(checkpoint["state_dict"], strict=True)
    # runner.state.model = model
    # loss.load_state_dict(checkpoint["loss"], strict=True)

    # Evaluate
    _, [acc1, map10, target, mapR] = runner.evaluate(
        val_loader,
        steps=20 if hparams.debug else None,
    )

    logger.info(
        f"Val: Acc@1 {acc1:0.5f}, mAP@10 {map10:0.5f}, Target {target:0.5f}, mAP@R {mapR:0.5f}"
    )

    # Save params used for training and final metrics into separate TensorBoard file
    metric_dict = {
        "hparam/Acc@1": acc1,
        "hparam/mAP@10": map10,
        "hparam/mAP@R": target,
        "hparam/Target": mapR,
    }

    # Convert all lists / dicts to avoid TB error
    hparams.phases
    hparams.phases = str(hparams.phases)
    hparams.model_params = str(hparams.model_params)
    hparams.criterion_params = str(hparams.criterion_params)

    with pt.utils.tensorboard.CorrectedSummaryWriter(hparams.outdir) as writer:
        writer.add_hparams(hparam_dict=vars(hparams), metric_dict=metric_dict)