def save_metadata(filename,
                  model,
                  n_epoch,
                  dev_dataloader,
                  optimizer,
                  criterion,
                  val_dataloader,
                  scheduler=None):
    file_path = os.path.join(result_metadata_path, '{}.yaml'.format(filename))
    n_dev, bs_dev, _ = get_batch_info(dev_dataloader)

    metadata = OrderedDict()
    metadata['model'] = {'name': model.__class__.__name__}
    metadata['n_epoch'] = n_epoch
    metadata['train_dataset'] = {'n_obs': n_dev, 'batch_size': bs_dev}
    metadata['optimizer'] = {
        'name': optimizer.__class__.__name__,
        'params': optimizer.defaults
    }
    metadata['criterion'] = {'name': criterion.__class__.__name__}
    if scheduler:
        metadata['scheduler'] = {
            'name': scheduler.__class__.__name__,
            'params': scheduler.state_dict()
        }
    if val_dataloader:
        n_val, bs_val, _ = get_batch_info(val_dataloader)
        metadata['val_dataset'] = {'n_obs': n_val, 'batch_size': bs_val}
    write_yaml(metadata, file_path)
示例#2
0
def fit_model(
    model,
    n_epoch,
    dev_dataloader,
    optimizer,
    criterion,
    loss_fn,
    metric_fn,
    val_dataloader=None,
    checkpoint=False,
    model_fn="pytorch",
):
    n_dev_obs, dev_batch_size, dev_batch_per_epoch = get_batch_info(
        dev_dataloader)
    for idx_epoch in tqdm(range(n_epoch), total=n_epoch):
        t = tqdm(enumerate(dev_dataloader), total=dev_batch_per_epoch)
        for idx_batch, data in t:
            model = model.train()
            loss = loss_fn(model, criterion, data)
            train_step(optimizer, loss)
            with torch.no_grad():
                model = model.eval()
                metric = metric_fn(model, data)
            t.set_postfix({"loss": loss.item(), "metric": metric.item()})
        if val_dataloader is not None:
            val_loss, val_metric = validate_model(model, criterion, loss_fn,
                                                  metric_fn, val_dataloader)
            print(" val_loss : {}, val_metric : {}".format(
                val_loss, val_metric))
        if checkpoint:
            model_filename = "{}_{}".format(model_fn, idx_epoch)
            save_checkpoint(model, optimizer, model_filename)
    return model
示例#3
0
def predict_model_full(model, test_dataloader):
    n_obs, batch_size, batch_size_per_epoch = get_batch_info(test_dataloader)
    target_list, x_min_list, y_min_list, width_list, height_list = ([], [], [],
                                                                    [], [])
    model = model.eval()
    t = tqdm(enumerate(test_dataloader), total=batch_size_per_epoch)
    for idx, data in t:
        img = data
        prediction = F.sigmoid(model(img))
        prediction_array = prediction.data.cpu().numpy()
        target, x_min, y_min, width, height = [
            prediction_array[:, i] for i in range(5)
        ]
        img_h, img_w = 1024.0, 1024.0
        x_min, y_min, width, height = denormalize_bb(img_w, img_h, x_min,
                                                     y_min, width, height)
        target_list.extend(target.reshape(-1).tolist())
        x_min_list.extend(x_min.reshape(-1).tolist())
        y_min_list.extend(y_min.reshape(-1).tolist())
        width_list.extend(width.reshape(-1).tolist())
        height_list.extend(height.reshape(-1).tolist())
    return pd.DataFrame({
        "patientId": test_dataloader.dataset.patientId,
        "target": target_list,
        "x_min": x_min_list,
        "y_min": y_min_list,
        "width": width_list,
        "height": height_list,
    })
示例#4
0
def fit_model_full(
    model,
    n_epoch,
    dev_dataloader,
    optimizer,
    criterion,
    callbacks=[],
    val_dataloader=None,
):
    n_dev_obs, dev_batch_size, dev_batch_per_epoch = get_batch_info(
        dev_dataloader)
    total_lossr, label_lossr, bb_lossr = (
        LossRecorder(n_epoch, dev_batch_per_epoch),
        LossRecorder(n_epoch, dev_batch_per_epoch),
        LossRecorder(n_epoch, dev_batch_per_epoch),
    )
    lossr_list = [total_lossr, label_lossr, bb_lossr]
    callbacks.extend(lossr_list)
    for cb in callbacks:
        cb.on_train_begin()

    for idx_epoch in tqdm(range(n_epoch), total=n_epoch):

        model = model.train()

        for cb in callbacks:
            cb.on_epoch_begin(idx_epoch)

        t = tqdm(enumerate(dev_dataloader), total=dev_batch_per_epoch)
        for idx_batch, data in t:
            for cb in callbacks:
                cb.on_batch_begin(idx_batch)

            loss, label_loss, bb_loss = calc_loss(model, criterion, data)
            train_step(optimizer, loss)
            smooth_loss, smooth_label_loss, smooth_bb_loss = record_loss(
                lossr_list,
                [loss.item(), label_loss.item(),
                 bb_loss.item()],
                train=True)
            t.set_postfix({
                "loss": smooth_loss,
                "label_loss": smooth_label_loss,
                "bb_loss": smooth_bb_loss,
            })

            for cb in callbacks:
                cb.on_batch_end(idx_batch)
        if val_dataloader is not None:
            val_loss, val_loss_label, val_loss_bb = validate_model(
                model, criterion, val_dataloader)
            record_loss(lossr_list, [val_loss, val_loss_label, val_loss_bb])

        for cb in callbacks:
            cb.on_epoch_end(idx_epoch)

    for cb in callbacks:
        cb.on_train_end()

    return model, callbacks
def lr_find(model,
            dataloader,
            criterion,
            loss_fn,
            metric_fn,
            min_lr=1e-8,
            max_lr=10.0):
    clone_model = copy.deepcopy(model)
    optimizer = optim.SGD(clone_model.parameters(), lr=min_lr)
    n_epoch = 1
    n_obs, batch_size, batch_per_epoch = get_batch_info(dataloader)
    lr_finder = LR_Finder(n_epoch, batch_per_epoch, min_lr, max_lr)
    loss_recorder = LossRecorder(n_epoch, batch_per_epoch, is_val=False)
    model, callbacks = fit_model_full(
        model=clone_model,
        n_epoch=n_epoch,
        dev_dataloader=dataloader,
        optimizer=optimizer,
        criterion=criterion,
        loss_fn=loss_fn,
        metric_fn=metric_fn,
        callbacks=[lr_finder, loss_recorder],
        val_dataloader=None,
    )
    train_loss = loss_recorder.smooth_batch_list
    while train_loss[-1] > (train_loss[-2] * 2.):
        logger.info("removing last train_loss...")
        train_loss.pop()
    sns.lineplot(x=lr_finder.lr_schedule, y=loss_recorder.smooth_batch_list)
def validate_model_full(model, criterion, val_dataloader):
    n_val_obs, val_batch_size, val_batch_per_epoch = get_batch_info(val_dataloader)
    total_val_loss, total_val_loss_label, total_val_loss_bb = (
        np.zeros(val_batch_per_epoch),
        np.zeros(val_batch_per_epoch),
        np.zeros(val_batch_per_epoch),
    )
    model = model.eval()
    t = tqdm(enumerate(val_dataloader), total=val_batch_per_epoch)
    for idx, data in t:
        val_loss, val_loss_label, val_loss_bb = calc_loss(model, criterion, data)
        val_loss, val_loss_label, val_loss_bb = (
            val_loss.item(),
            val_loss_label.item(),
            val_loss_bb.item(),
        )
        total_val_loss[idx], total_val_loss_label[idx], total_val_loss_bb[idx] = (
            val_loss,
            val_loss_label,
            val_loss_bb,
        )
        t.set_postfix(
            {"loss": val_loss, "loss_label": val_loss_label, "loss_bb": val_loss_bb}
        )
    return total_val_loss.mean(), total_val_loss_label.mean(), total_val_loss_bb.mean()
示例#7
0
def new_whale_threshold(low, high, step, model, predict_proba_fn,
                        val_dataloader):
    model = model.eval()
    row = []
    target_array_list = []
    pred_array_list = []
    n_val_obs, val_batch_size, val_batch_per_epoch = get_batch_info(
        val_dataloader)
    thresh_range = np.arange(low, high + 1e-8, step)

    t = tqdm(enumerate(val_dataloader), total=val_batch_per_epoch)
    with torch.no_grad():
        for idx, data in t:
            target, prediction = predict_proba_fn(model, data)
            target_array_list.append(target)
            pred_array_list.append(prediction)
    target_array = np.vstack(target_array_list)
    prediction_array = np.vstack(pred_array_list)

    for threshold in tqdm(thresh_range, total=len(thresh_range)):
        prediction_array[:, 0] = threshold
        prediction_indices = (-prediction_array).argsort()[:, :5]
        mapk_array = mapk(target_array, prediction_indices, 5)
        mapk_result = mapk_array.mean()
        row.append({'threshold': threshold, 'mapk': mapk_result})
    return pd.DataFrame(row)
示例#8
0
def predict_model(model, test_dataloader, pred_fn):
    n_obs, batch_size, batch_size_per_epoch = get_batch_info(test_dataloader)
    prediction_list = []
    model = model.eval()
    t = tqdm(enumerate(test_dataloader), total=batch_size_per_epoch)
    with torch.no_grad():
        for idx, data in t:
            prediction = pred_fn(model, data)
            prediction_list.extend(prediction)
    return prediction_list
def validate_model(model, criterion, loss_fn, metric_fn, val_dataloader):
    n_val_obs, val_batch_size, val_batch_per_epoch = get_batch_info(
        val_dataloader)
    total_loss = np.zeros(val_batch_per_epoch)
    total_metric = np.zeros(val_batch_per_epoch)
    model = model.eval()
    t = tqdm(enumerate(val_dataloader), total=val_batch_per_epoch)
    with torch.no_grad():
        for idx, data in t:
            loss = loss_fn(model, criterion, data)
            metric = metric_fn(model, data)
            total_loss[idx] = loss
            total_metric[idx] = metric
    return total_loss.mean(), total_metric.mean()
示例#10
0
def fit_model_full(
    model,
    n_epoch,
    dev_dataloader,
    optimizer,
    criterion,
    loss_fn,
    metric_fn,
    callbacks=[],
    val_dataloader=None,
):
    n_dev_obs, dev_batch_size, dev_batch_per_epoch = get_batch_info(
        dev_dataloader)
    [cb.on_train_begin(model, optimizer) for cb in callbacks]
    for idx_epoch in tqdm(range(n_epoch), total=n_epoch):
        [cb.on_epoch_begin(idx_epoch, model, optimizer) for cb in callbacks]
        t = tqdm(enumerate(dev_dataloader), total=dev_batch_per_epoch)
        for idx_batch, data in t:
            [
                cb.on_batch_begin(idx_batch, model, optimizer)
                for cb in callbacks
            ]
            model = model.train()
            loss = loss_fn(model, criterion, data)
            train_step(optimizer, loss)
            with torch.no_grad():
                model = model.eval()
                metric = metric_fn(model, data)
                t.set_postfix({"loss": loss.item(), "metric": metric.item()})
                [
                    cb.on_batch_end(idx_batch, model, optimizer, loss.item(),
                                    metric.item()) for cb in callbacks
                ]
        if val_dataloader is not None:
            val_loss, val_metric = validate_model(model, criterion, loss_fn,
                                                  metric_fn, val_dataloader)
            print(" val_loss : {}, val_metric : {}".format(
                val_loss, val_metric))
            [
                cb.on_epoch_end(idx_epoch, model, optimizer, val_loss,
                                val_metric) for cb in callbacks
            ]
        else:
            [cb.on_epoch_end(idx_epoch, model, optimizer) for cb in callbacks]
    [cb.on_train_end(model, optimizer) for cb in callbacks]
    return model
示例#11
0
def fit_model(model,
              n_epoch,
              dev_dataloader,
              optimizer,
              criterion,
              loss_fn,
              metric_fn,
              val_dataloader=None,
              checkpoint=False,
              model_filename="checkpoint",
              **kwargs):
    cur_time = datetime.datetime.now().strftime('%Y%m%d-%H%M')
    if not os.path.exists(os.path.join(model_cp_path, cur_time)):
        os.mkdir(os.path.join(model_cp_path, cur_time))
    save_metadata(cur_time, model, n_epoch, dev_dataloader, optimizer,
                  criterion, val_dataloader)
    n_dev_obs, dev_batch_size, dev_batch_per_epoch = get_batch_info(
        dev_dataloader)
    for idx_epoch in tqdm(range(n_epoch), total=n_epoch):
        t = tqdm(enumerate(dev_dataloader), total=dev_batch_per_epoch)
        for idx_batch, data in t:
            model = model.train()
            loss = loss_fn(model, criterion, data)
            train_step(optimizer, loss)
            with torch.no_grad():
                model = model.eval()
                metric = metric_fn(model, data)
            t.set_postfix({"loss": loss.item(), "metric": metric.item()})
        if val_dataloader is not None:
            val_loss, val_metric = validate_model(model, criterion, loss_fn,
                                                  metric_fn, val_dataloader)
            print(" val_loss : {}, val_metric : {}".format(
                val_loss, val_metric))
        if checkpoint:
            filename = "{}_{}".format(model_filename, idx_epoch)
            save_checkpoint(model, optimizer, cur_time, filename)
    return model
示例#12
0
def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    bb_df = pd.read_csv(bb_repo)
    train_idx = np.arange(len(bb_df))
    dev_idx, val_idx = train_test_split(train_idx, test_size=0.20)
    dev_df = bb_df.iloc[dev_idx, :].reset_index(drop=True)
    val_df = bb_df.iloc[val_idx, :].reset_index(drop=True)

    bb_train_dataset = BBDataset(True, device, dev_df)
    bb_dev_dataset = BBDataset(True, device, dev_df)
    bb_val_dataset = BBDataset(True, device, val_df)
    bb_test_dataset = BBDataset(False, device)
    train_dataloader = DataLoader(bb_train_dataset, batch_size=32)
    dev_dataloader = DataLoader(bb_dev_dataset, batch_size=32, shuffle=True)
    val_dataloader = DataLoader(bb_val_dataset, batch_size=32)
    test_dataloader = DataLoader(bb_test_dataset, batch_size=32)

    preload_model = torchvision.models.resnet50(pretrained=True).to(device)
    header_model = Res50BBHead([1000], 0.5).to(device)
    model = ResPneuNet(preload_model, header_model)

    n_epoch = 5
    optimizer = optim.Adam(
        [
            {
                "params": model.preload_backbone.parameters(),
                "lr": 0.0001
            },
            {
                "params": model.header.parameters(),
                "lr": 0.001
            },
        ],
        betas=(0.9, 0.999),
        eps=1e-08,
        weight_decay=0,
        amsgrad=False,
    )
    criterion = nn.L1Loss().to(device)

    n_obs, batch_size, n_batch_per_epoch = get_batch_info(dev_dataloader)
    clr = CLR(n_epoch, n_batch_per_epoch, 0.1, 1., 0.95, 0.85, 2)
    callbacks = [clr]

    model = fit_model(
        model,
        n_epoch,
        dev_dataloader,
        optimizer,
        criterion,
        loss_fn,
        metric_fn,
        val_dataloader,
        checkpoint=True,
        model_fn="bb",
    )

    prediction = predict_model(model, test_dataloader, pred_fn)
    string_prediction = [
        "{} {} {} {}".format(x[0], x[1], x[2], x[3]) for x in prediction
    ]
    patientid = test_dataloader.dataset.patientId
    pneu_bb = string_prediction
    bb_pred_df = pd.DataFrame({"name": patientid, "label": pneu_bb})
    bb_pred_df.to_csv(bb_predict_repo, index=False)
    save_checkpoint(model, optimizer, fname="bb")
示例#13
0
def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    label_df = pd.read_csv(label_repo)
    train_idx = np.arange(len(label_df))
    dev_idx, val_idx = train_test_split(train_idx, test_size=0.20)
    dev_df = label_df.iloc[dev_idx, :].reset_index(drop=True)
    val_df = label_df.iloc[val_idx, :].reset_index(drop=True)

    label_train_dataset = LabelDataset(True, device, label_df)
    label_dev_dataset = LabelDataset(True, device, dev_df)
    label_val_dataset = LabelDataset(True, device, val_df)
    label_test_dataset = LabelDataset(False, device)
    train_dataloader = DataLoader(label_train_dataset, batch_size=32)
    dev_dataloader = DataLoader(label_dev_dataset, batch_size=32, shuffle=True)
    val_dataloader = DataLoader(label_val_dataset, batch_size=32)
    test_dataloader = DataLoader(label_test_dataset, batch_size=32)

    preload_model = torchvision.models.resnet50(pretrained=True).to(device)
    header_model = Res50ClassHead([1000], 0.5).to(device)
    model = ResPneuNet(preload_model, header_model)

    n_epoch = 5
    optimizer = optim.Adam(
        [
            {
                "params": model.preload_backbone.parameters(),
                "lr": 0.0001
            },
            {
                "params": model.header.parameters(),
                "lr": 0.001
            },
        ],
        betas=(0.9, 0.999),
        eps=1e-08,
        weight_decay=0,
        amsgrad=False,
    )
    criterion = nn.BCEWithLogitsLoss().to(device)
    n_obs, batch_size, n_batch_per_epoch = get_batch_info(dev_dataloader)
    clr = CLR(n_epoch, n_batch_per_epoch, 0.1, 1., 0.95, 0.85, 2)
    callbacks = [clr]

    model = fit_model(
        model,
        n_epoch,
        dev_dataloader,
        optimizer,
        criterion,
        loss_fn,
        metric_fn,
        val_dataloader,
        checkpoint=True,
        model_fn="label",
    )

    prediction = predict_model(model, test_dataloader, pred_fn)
    patientid = test_dataloader.dataset.patientId
    pneu_prob = prediction
    label_df = pd.DataFrame({"name": patientid, "prob": pneu_prob})
    label_df.to_csv(label_predict_repo, index=False)