Пример #1
0
def experiment(logdir, device) -> None:
    """Experiment function
    Args:
        logdir (Path): directory where should be placed logs
        device (str): device name to use
    """
    tb_dir = logdir / "tensorboard"
    main_metric = "score"
    minimize_metric = True

    seed_all()

    history_n_frames = cfg["model_params"]["history_num_frames"]
    future_n_frames = cfg["model_params"]["future_num_frames"]
    n_trajectories = 3
    model = ModelWithConfidence(
        backbone=resnet18(
            pretrained=True,
            in_channels=3 + 2 * (history_n_frames + 1),
            num_classes=2 * future_n_frames * n_trajectories + n_trajectories,
        ),
        future_num_frames=future_n_frames,
        num_trajectories=n_trajectories,
    )
    # model = nn.DataParallel(model)
    model = model.to(device)
    optimizer = optim.AdamP(model.parameters(),
                            lr=0.001,
                            betas=(0.9, 0.999),
                            weight_decay=1e-2)

    load_checkpoint("./logs/checkpoint_17/train_25868.pth", model, optimizer)

    criterion = neg_multi_log_likelihood_batch
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=100)

    with TensorboardLogger(tb_dir) as tb:
        stage = "stage_0"
        n_epochs = 1
        print(f"Stage - {stage}")

        checkpointer = CheckpointManager(
            logdir=logdir / stage,
            metric=main_metric,
            metric_minimization=minimize_metric,
            save_n_best=5,
        )

        train_loader, (valid_loader,
                       valid_gt_path) = get_loaders(train_batch_size=32,
                                                    valid_batch_size=32)

        valid_func = partial(
            valid_fn,
            loader=valid_loader,
            ground_truth_file=valid_gt_path,
            logdir=logdir,
            verbose=True,
        )

        for epoch in range(1, n_epochs + 1):
            epoch_start_time = time.strftime("%Y-%m-%d %H:%M:%S",
                                             time.localtime())
            print(f"[{epoch_start_time}]\n[Epoch {epoch}/{n_epochs}]")

            try:
                train_metrics = train_fn(
                    model,
                    train_loader,
                    device,
                    criterion,
                    optimizer,
                    tensorboard_logger=tb,
                    logdir=logdir / f"epoch_{epoch}",
                    validation_fn=valid_func,
                )
                log_metrics(stage, train_metrics, tb, "train", epoch)
            except BaseException:
                train_metrics = {"message": "An exception occured!"}

            # valid_metrics = train_metrics
            valid_metrics = valid_fn(model, valid_loader, device,
                                     valid_gt_path, logdir)
            log_metrics(stage, valid_metrics, tb, "valid", epoch)

            checkpointer.process(
                metric_value=valid_metrics[main_metric],
                epoch=epoch,
                checkpoint=make_checkpoint(
                    stage,
                    epoch,
                    model,
                    optimizer,
                    scheduler,
                    metrics={
                        "train": train_metrics,
                        "valid": valid_metrics
                    },
                ),
            )
Пример #2
0
def experiment(logdir, device) -> None:
    """Experiment function

    Args:
        logdir (Path): directory where should be placed logs
        device (str): device name to use
    """
    tb_dir = logdir / "tensorboard"
    main_metric = "loss"
    minimize_metric = True

    seed_all()

    history_n_frames = cfg["model_params"]["history_num_frames"]
    future_n_frames = cfg["model_params"]["future_num_frames"]
    n_trajectories = 3
    model = ModelWithConfidence(
        backbone=resnet18(
            pretrained=True,
            in_channels=3 + 2 * (history_n_frames + 1),
            num_classes=2 * future_n_frames * n_trajectories + n_trajectories,
        ),
        future_num_frames=future_n_frames,
        num_trajectories=n_trajectories,
    )
    # model = nn.DataParallel(model)
    model = model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    criterion = neg_multi_log_likelihood_batch
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=100)

    with TensorboardLogger(tb_dir) as tb:
        stage = "stage_0"
        n_epochs = 1
        print(f"Stage - {stage}")

        checkpointer = CheckpointManager(
            logdir=logdir / stage,
            metric=main_metric,
            metric_minimization=minimize_metric,
            save_n_best=5,
        )

        train_loader, valid_loader = get_loaders(train_batch_size=32,
                                                 valid_batch_size=32)

        for epoch in range(1, n_epochs + 1):
            epoch_start_time = time.strftime("%Y-%m-%d %H:%M:%S",
                                             time.localtime())
            print(f"[{epoch_start_time}]\n[Epoch {epoch}/{n_epochs}]")

            train_metrics = train_fn(model, train_loader, device, criterion,
                                     optimizer)
            log_metrics(stage, train_metrics, tb, "train", epoch)

            valid_metrics = valid_fn(model, valid_loader, device, criterion)
            log_metrics(stage, valid_metrics, tb, "valid", epoch)

            checkpointer.process(
                metric_value=valid_metrics[main_metric],
                epoch=epoch,
                checkpoint=make_checkpoint(
                    stage,
                    epoch,
                    model,
                    optimizer,
                    scheduler,
                    metrics={
                        "train": train_metrics,
                        "valid": valid_metrics
                    },
                ),
            )

            scheduler.step()
Пример #3
0
        "filter_agents_threshold": 0.5,
    },
    "train_data_loader": {
        "key": "scenes/train.zarr",
        "batch_size": 32,
        "shuffle": True,
        "num_workers": 4,
    },
}

future_n_frames = cfg["model_params"]["future_num_frames"]
n_trajectories = 3
model = ModelWithConfidence(
    backbone=resnet18(
        pretrained=True,
        in_channels=6,
        num_classes=2 * future_n_frames * n_trajectories + n_trajectories,
    ),
    future_num_frames=future_n_frames,
    num_trajectories=n_trajectories,
)

load_checkpoint(checkpoint_path, model)
model = model.eval()

device = torch.device("cuda:0")
model = model.to(device)

valid_mask = np.load(
    f"{DATA_DIR}/scenes/validate_chopped_100/mask.npz")["arr_0"]