"filter_agents_threshold": 0.5, }, "train_data_loader": { "key": "scenes/train.zarr", "batch_size": 32, "shuffle": True, "num_workers": 4, }, } future_n_frames = cfg["model_params"]["future_num_frames"] n_trajectories = 3 model = ModelWithConfidence( backbone=resnet34( pretrained=True, in_channels=6, num_classes=2 * future_n_frames * n_trajectories + n_trajectories, ), future_num_frames=future_n_frames, num_trajectories=n_trajectories, ) load_checkpoint(checkpoint_path, model) model = model.eval() device = torch.device("cuda:0") model = model.to(device) valid_mask = np.load( f"{DATA_DIR}/scenes/validate_chopped_100/mask.npz")["arr_0"]
def experiment(logdir, device) -> None: """Experiment function Args: logdir (Path): directory where should be placed logs device (str): device name to use """ tb_dir = logdir / "tensorboard" main_metric = "loss" minimize_metric = True seed_all() history_n_frames = cfg["model_params"]["history_num_frames"] future_n_frames = cfg["model_params"]["future_num_frames"] n_trajectories = 3 model = ModelWithConfidence( backbone=resnet34( pretrained=True, in_channels=6, num_classes=2 * future_n_frames * n_trajectories + n_trajectories, ), future_num_frames=future_n_frames, num_trajectories=n_trajectories, ) # model = nn.DataParallel(model) model = model.to(device) optimizer = optim.Adam(model.parameters(), lr=1e-3) criterion = neg_multi_log_likelihood_batch scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=100) with TensorboardLogger(tb_dir) as tb: stage = "stage_0" n_epochs = 3 print(f"Stage - {stage}") checkpointer = CheckpointManager( logdir=logdir / stage, metric=main_metric, metric_minimization=minimize_metric, save_n_best=5, ) train_loader, valid_loader = get_loaders(train_batch_size=128, valid_batch_size=128) for epoch in range(1, n_epochs + 1): epoch_start_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) print(f"[{epoch_start_time}]\n[Epoch {epoch}/{n_epochs}]") train_metrics = train_fn(model, train_loader, device, criterion, optimizer, tensorboard_logger=tb, logdir=logdir / f'{epoch}_epoch') log_metrics(stage, train_metrics, tb, "train", epoch) valid_metrics = train_metrics # valid_metrics = valid_fn(model, valid_loader, device, criterion) # log_metrics(stage, valid_metrics, tb, "valid", epoch) checkpointer.process( metric_value=valid_metrics[main_metric], epoch=epoch, checkpoint=make_checkpoint( stage, epoch, model, optimizer, scheduler, metrics={ "train": train_metrics, "valid": valid_metrics }, ), ) scheduler.step()