Exemplo n.º 1
0
def main():
    # Get config for this run
    hparams = parse_args()

    # Setup logger
    config = {
        "handlers": [
            {
                "sink": sys.stdout,
                "format": "{time:[MM-DD HH:mm]} - {message}"
            },
            {
                "sink": f"{hparams.outdir}/logs.txt",
                "format": "{time:[MM-DD HH:mm]} - {message}"
            },
        ],
    }
    logger.configure(**config)
    logger.info(f"Parameters used for training: {hparams}")

    # Fix seeds for reprodusability
    pt.utils.misc.set_random_seed(hparams.seed)

    # Save config
    os.makedirs(hparams.outdir, exist_ok=True)
    yaml.dump(vars(hparams), open(hparams.outdir + "/config.yaml", "w"))

    # Get model
    model = Model(arch=hparams.arch,
                  model_params=hparams.model_params,
                  embedding_size=hparams.embedding_size,
                  pooling=hparams.pooling).cuda()

    # Get loss
    # loss = LOSS_FROM_NAME[hparams.criterion](in_features=hparams.embedding_size, **hparams.criterion_params).cuda()
    loss = LOSS_FROM_NAME["cross_entropy"].cuda()
    logger.info(f"Loss for this run is: {loss}")

    if hparams.resume:
        checkpoint = torch.load(
            hparams.resume, map_location=lambda storage, loc: storage.cuda())
        model.load_state_dict(checkpoint["state_dict"], strict=True)
        loss.load_state_dict(checkpoint["loss"], strict=True)

    if hparams.freeze_bn:
        freeze_batch_norm(model)

    # Get optimizer
    # optim_params = pt.utils.misc.filter_bn_from_wd(model)
    optim_params = list(loss.parameters()) + list(
        model.parameters())  # add loss params
    optimizer = optimizer_from_name(hparams.optim)(
        optim_params, lr=0, weight_decay=hparams.weight_decay, amsgrad=True)

    num_params = pt.utils.misc.count_parameters(model)[0]
    logger.info(f"Model size: {num_params / 1e6:.02f}M")
    # logger.info(model)

    # Scheduler is an advanced way of planning experiment
    sheduler = pt.fit_wrapper.callbacks.PhasesScheduler(hparams.phases)

    # Save logs
    TB_callback = pt_clb.TensorBoard(hparams.outdir, log_every=20)

    # Get dataloaders
    train_loader, val_loader, val_indexes = get_dataloaders(
        root=hparams.root,
        augmentation=hparams.augmentation,
        size=hparams.size,
        val_size=hparams.val_size,
        batch_size=hparams.batch_size,
        workers=hparams.workers,
    )

    # Load validation query / gallery split and resort it according to indexes from sampler
    df_val = pd.read_csv(os.path.join(hparams.root, "train_val.csv"))
    df_val = df_val[df_val["is_train"].astype(np.bool) == False]
    val_is_query = df_val.is_query.values[val_indexes].astype(np.bool)

    logger.info(f"Start training")
    # Init runner
    runner = pt.fit_wrapper.Runner(
        model,
        optimizer,
        criterion=loss,
        callbacks=[
            # pt_clb.BatchMetrics([pt.metrics.Accuracy(topk=1)]),
            ContestMetricsCallback(
                is_query=val_is_query[:1280] if hparams.debug else val_is_query
            ),
            pt_clb.Timer(),
            pt_clb.ConsoleLogger(),
            pt_clb.FileLogger(),
            TB_callback,
            CheckpointSaver(hparams.outdir,
                            save_name="model.chpn",
                            monitor="target",
                            mode="max"),
            CheckpointSaver(hparams.outdir,
                            save_name="model_mapr.chpn",
                            monitor="mAP@R",
                            mode="max"),
            CheckpointSaver(hparams.outdir, save_name="model_loss.chpn"),
            sheduler,
            # EMA must go after other checkpoints
            pt_clb.ModelEma(model, hparams.ema_decay)
            if hparams.ema_decay else pt_clb.Callback(),
        ],
        use_fp16=hparams.
        use_fp16,  # use mixed precision by default.  # hparams.opt_level != "O0",
    )

    if hparams.head_warmup_epochs > 0:
        #Freeze model
        for p in model.parameters():
            p.requires_grad = False

        runner.fit(
            train_loader,
            # val_loader=val_loader,
            epochs=hparams.head_warmup_epochs,
            steps_per_epoch=20 if hparams.debug else None,
            # val_steps=20 if hparams.debug else None,
        )

        # Unfreeze model
        for p in model.parameters():
            p.requires_grad = True

        if hparams.freeze_bn:
            freeze_batch_norm(model)

        # Re-init to avoid nan's in loss
        optim_params = list(loss.parameters()) + list(model.parameters())

        optimizer = optimizer_from_name(hparams.optim)(
            optim_params,
            lr=0,
            weight_decay=hparams.weight_decay,
            amsgrad=True)

        runner.state.model = model
        runner.state.optimizer = optimizer
        runner.state.criterion = loss

    # Train
    runner.fit(
        train_loader,
        # val_loader=val_loader,
        start_epoch=hparams.head_warmup_epochs,
        epochs=sheduler.tot_epochs,
        steps_per_epoch=20 if hparams.debug else None,
        # val_steps=20 if hparams.debug else None,
    )

    logger.info(f"Loading best model")
    checkpoint = torch.load(os.path.join(hparams.outdir, f"model.chpn"))
    model.load_state_dict(checkpoint["state_dict"], strict=True)
    # runner.state.model = model
    # loss.load_state_dict(checkpoint["loss"], strict=True)

    # Evaluate
    _, [acc1, map10, target, mapR] = runner.evaluate(
        val_loader,
        steps=20 if hparams.debug else None,
    )

    logger.info(
        f"Val: Acc@1 {acc1:0.5f}, mAP@10 {map10:0.5f}, Target {target:0.5f}, mAP@R {mapR:0.5f}"
    )

    # Save params used for training and final metrics into separate TensorBoard file
    metric_dict = {
        "hparam/Acc@1": acc1,
        "hparam/mAP@10": map10,
        "hparam/mAP@R": target,
        "hparam/Target": mapR,
    }

    # Convert all lists / dicts to avoid TB error
    hparams.phases
    hparams.phases = str(hparams.phases)
    hparams.model_params = str(hparams.model_params)
    hparams.criterion_params = str(hparams.criterion_params)

    with pt.utils.tensorboard.CorrectedSummaryWriter(hparams.outdir) as writer:
        writer.add_hparams(hparam_dict=vars(hparams), metric_dict=metric_dict)
Exemplo n.º 2
0
def main():

    # Params
    DEVICE = 'cuda'
    GROUP_SIZE = 6
    EPOCHS = 800
    TBOARD = False  # If you have tensorboard running set it to true

    # Load data
    coseg = Coseg(
        img_set='images/',
        gt_set='ground_truth/',
        root_dir="data/042_reproducible/",
    )
    trloader = DataLoader(coseg, batch_size=1, shuffle=False, num_workers=1)
    imgs = []
    GTs = []
    for i, (In, GTn) in enumerate(trloader):
        if i == GROUP_SIZE:
            break
        else:
            In = In.to(DEVICE)
            GTn = GTn.to(DEVICE)
            imgs.append(In)
            GTs.append(GTn)
    print("[ OK ] Data loaded")

    # Precompute features
    vgg19_original = models.vgg19()
    phi = nn.Sequential((*(list(vgg19_original.children())[:-2])))
    for param in phi.parameters():
        param.requires_grad = False
    phi = phi.to(DEVICE)
    features = precompute_features(imgs, GTs, phi)
    print("[ OK ] Feature precomputed")

    # Instantiate the model
    if DEVICE == 'cuda':
        groupnet = Model((1, 3, 224, 224)).cuda()
    else:
        groupnet = Model((1, 3, 224, 224))
    print("[ OK ] Model instantiated")

    # Optimizer
    # [ PAPER ] suggests SGD with these parametes, but desn't work
    #optimizer = optim.SGD(groupnet.parameters(), momentum=0.99,lr=0.00005, weight_decay=0.0005)
    optimizer = optim.Adam(groupnet.parameters(), lr=0.00002)

    # Train Loop
    losses = []
    if TBOARD:
        writer = SummaryWriter()
    for epoch in range(EPOCHS):

        optimizer.zero_grad()
        lss = 0
        lcs = 0
        loss = 0

        masks = groupnet(imgs)
        for i in range(len(imgs)):
            lss += Ls(masks[i], GTs[i])

            # [ PAPER ] suggests to activate group loss after 100 epochs
            if epoch >= 100:
                lcs += Lc(i, imgs, masks, features, phi)

        lss /= len(imgs)

        if epoch >= 100:
            lcs /= len(imgs)

        # [ PAPER ] suggests 0.1, but it does not work
        loss = lss + 1. * lcs
        loss.backward(retain_graph=True)
        optimizer.step()

        if TBOARD:
            writer.add_scalar("loss", loss.item(), epoch)
            utils.tboard_imlist(masks, "masks", epoch, writer)
        losses.append(loss.item())
        print(f'[ ep {epoch} ] - Loss: {loss.item():.4f}')

    if TBOARD:
        writer.close()

    # Plot results in the same folder
    fig, axs = plt.subplots(nrows=3, ncols=GROUP_SIZE, figsize=(10, 5))
    for i in range(len(imgs)):
        axs[0, i].imshow(imgs[i].detach().cpu().numpy().squeeze(0).transpose(
            1, 2, 0))
        axs[0, i].axis('off')
        axs[1, i].imshow(GTs[i].detach().cpu().numpy().squeeze(0).squeeze(0))
        axs[1, i].axis('off')
        axs[2, i].imshow(masks[i].detach().cpu().numpy().squeeze(0).squeeze(0))
        axs[2, i].axis('off')
    plt.savefig("predictions.png")
    plt.close()

    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10, 5))
    ax.plot(losses)
    if epoch > 100:
        ax.axvline(100, c='r', ls='--', label="Activate Lc loss")
    ax.set_xlabel("Epoch")
    ax.set_ylabel("Loss")
    ax.legend()
    plt.savefig("loss.png")
    plt.close()
    print("[ OK ] Plot")
Exemplo n.º 3
0
def run(X_seq_train, X_cont_train, y_train, X_seq_test, X_cont_test, timestamp,
        random_state):
    seed_everything(random_state)

    oof_preds = np.zeros(len(X_seq_train))
    test_preds = np.zeros(len(X_seq_test))
    cv_scores = []
    for i, (trn_idx, val_idx) in enumerate(
            get_folds(5, "stratified",
                      random_state).split(X_cont_train, y_train)):
        print(f"fold {i + 1}")
        train_dataset = TensorDataset(
            torch.from_numpy(X_seq_train[trn_idx]).float(),
            torch.from_numpy(X_cont_train[trn_idx]).float(),
            torch.from_numpy(y_train[trn_idx]).float(),
        )
        valid_dataset = TensorDataset(
            torch.from_numpy(X_seq_train[val_idx]).float(),
            torch.from_numpy(X_cont_train[val_idx]).float(),
            torch.from_numpy(y_train[val_idx]).float(),
        )
        test_dataset = TensorDataset(
            torch.from_numpy(X_seq_test).float(),
            torch.from_numpy(X_cont_test).float())

        train_loader = DataLoader(train_dataset, shuffle=True, batch_size=32)
        valid_loader = DataLoader(valid_dataset, shuffle=False, batch_size=128)
        test_loader = DataLoader(test_dataset, shuffle=False, batch_size=128)
        loaders = {"train": train_loader, "valid": valid_loader}

        runner = CustomRunner(device="cuda")

        model = Model(
            in_channels=X_seq_train.shape[1],
            n_cont_features=X_cont_train.shape[1],
            hidden_channels=64,
            kernel_sizes=[3, 5, 7, 15, 21, 51, 101],
            out_dim=1,
        )
        criterion = torch.nn.BCELoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                               T_max=30,
                                                               eta_min=1e-6)

        logdir = f"./logdir/{timestamp}_fold{i}"
        runner.train(
            model=model,
            criterion=criterion,
            optimizer=optimizer,
            scheduler=scheduler,
            loaders=loaders,
            logdir=logdir,
            num_epochs=30,
            verbose=True,
        )

        pred = np.concatenate(
            list(
                map(
                    lambda x: x.cpu().numpy(),
                    runner.predict_loader(
                        loader=valid_loader,
                        resume=f"{logdir}/checkpoints/best.pth",
                        model=model,
                    ),
                )))
        oof_preds[val_idx] = pred
        score = average_precision_score(y_train[val_idx], pred)
        cv_scores.append(score)
        print("score", score)

        pred = np.concatenate(
            list(
                map(
                    lambda x: x.cpu().numpy(),
                    runner.predict_loader(
                        loader=test_loader,
                        resume=f"{logdir}/checkpoints/best.pth",
                        model=model,
                    ),
                )))
        test_preds += pred / 5
    return oof_preds, test_preds, cv_scores