def save_metadata(filename, model, n_epoch, dev_dataloader, optimizer, criterion, val_dataloader, scheduler=None): file_path = os.path.join(result_metadata_path, '{}.yaml'.format(filename)) n_dev, bs_dev, _ = get_batch_info(dev_dataloader) metadata = OrderedDict() metadata['model'] = {'name': model.__class__.__name__} metadata['n_epoch'] = n_epoch metadata['train_dataset'] = {'n_obs': n_dev, 'batch_size': bs_dev} metadata['optimizer'] = { 'name': optimizer.__class__.__name__, 'params': optimizer.defaults } metadata['criterion'] = {'name': criterion.__class__.__name__} if scheduler: metadata['scheduler'] = { 'name': scheduler.__class__.__name__, 'params': scheduler.state_dict() } if val_dataloader: n_val, bs_val, _ = get_batch_info(val_dataloader) metadata['val_dataset'] = {'n_obs': n_val, 'batch_size': bs_val} write_yaml(metadata, file_path)
def fit_model( model, n_epoch, dev_dataloader, optimizer, criterion, loss_fn, metric_fn, val_dataloader=None, checkpoint=False, model_fn="pytorch", ): n_dev_obs, dev_batch_size, dev_batch_per_epoch = get_batch_info( dev_dataloader) for idx_epoch in tqdm(range(n_epoch), total=n_epoch): t = tqdm(enumerate(dev_dataloader), total=dev_batch_per_epoch) for idx_batch, data in t: model = model.train() loss = loss_fn(model, criterion, data) train_step(optimizer, loss) with torch.no_grad(): model = model.eval() metric = metric_fn(model, data) t.set_postfix({"loss": loss.item(), "metric": metric.item()}) if val_dataloader is not None: val_loss, val_metric = validate_model(model, criterion, loss_fn, metric_fn, val_dataloader) print(" val_loss : {}, val_metric : {}".format( val_loss, val_metric)) if checkpoint: model_filename = "{}_{}".format(model_fn, idx_epoch) save_checkpoint(model, optimizer, model_filename) return model
def predict_model_full(model, test_dataloader): n_obs, batch_size, batch_size_per_epoch = get_batch_info(test_dataloader) target_list, x_min_list, y_min_list, width_list, height_list = ([], [], [], [], []) model = model.eval() t = tqdm(enumerate(test_dataloader), total=batch_size_per_epoch) for idx, data in t: img = data prediction = F.sigmoid(model(img)) prediction_array = prediction.data.cpu().numpy() target, x_min, y_min, width, height = [ prediction_array[:, i] for i in range(5) ] img_h, img_w = 1024.0, 1024.0 x_min, y_min, width, height = denormalize_bb(img_w, img_h, x_min, y_min, width, height) target_list.extend(target.reshape(-1).tolist()) x_min_list.extend(x_min.reshape(-1).tolist()) y_min_list.extend(y_min.reshape(-1).tolist()) width_list.extend(width.reshape(-1).tolist()) height_list.extend(height.reshape(-1).tolist()) return pd.DataFrame({ "patientId": test_dataloader.dataset.patientId, "target": target_list, "x_min": x_min_list, "y_min": y_min_list, "width": width_list, "height": height_list, })
def fit_model_full( model, n_epoch, dev_dataloader, optimizer, criterion, callbacks=[], val_dataloader=None, ): n_dev_obs, dev_batch_size, dev_batch_per_epoch = get_batch_info( dev_dataloader) total_lossr, label_lossr, bb_lossr = ( LossRecorder(n_epoch, dev_batch_per_epoch), LossRecorder(n_epoch, dev_batch_per_epoch), LossRecorder(n_epoch, dev_batch_per_epoch), ) lossr_list = [total_lossr, label_lossr, bb_lossr] callbacks.extend(lossr_list) for cb in callbacks: cb.on_train_begin() for idx_epoch in tqdm(range(n_epoch), total=n_epoch): model = model.train() for cb in callbacks: cb.on_epoch_begin(idx_epoch) t = tqdm(enumerate(dev_dataloader), total=dev_batch_per_epoch) for idx_batch, data in t: for cb in callbacks: cb.on_batch_begin(idx_batch) loss, label_loss, bb_loss = calc_loss(model, criterion, data) train_step(optimizer, loss) smooth_loss, smooth_label_loss, smooth_bb_loss = record_loss( lossr_list, [loss.item(), label_loss.item(), bb_loss.item()], train=True) t.set_postfix({ "loss": smooth_loss, "label_loss": smooth_label_loss, "bb_loss": smooth_bb_loss, }) for cb in callbacks: cb.on_batch_end(idx_batch) if val_dataloader is not None: val_loss, val_loss_label, val_loss_bb = validate_model( model, criterion, val_dataloader) record_loss(lossr_list, [val_loss, val_loss_label, val_loss_bb]) for cb in callbacks: cb.on_epoch_end(idx_epoch) for cb in callbacks: cb.on_train_end() return model, callbacks
def lr_find(model, dataloader, criterion, loss_fn, metric_fn, min_lr=1e-8, max_lr=10.0): clone_model = copy.deepcopy(model) optimizer = optim.SGD(clone_model.parameters(), lr=min_lr) n_epoch = 1 n_obs, batch_size, batch_per_epoch = get_batch_info(dataloader) lr_finder = LR_Finder(n_epoch, batch_per_epoch, min_lr, max_lr) loss_recorder = LossRecorder(n_epoch, batch_per_epoch, is_val=False) model, callbacks = fit_model_full( model=clone_model, n_epoch=n_epoch, dev_dataloader=dataloader, optimizer=optimizer, criterion=criterion, loss_fn=loss_fn, metric_fn=metric_fn, callbacks=[lr_finder, loss_recorder], val_dataloader=None, ) train_loss = loss_recorder.smooth_batch_list while train_loss[-1] > (train_loss[-2] * 2.): logger.info("removing last train_loss...") train_loss.pop() sns.lineplot(x=lr_finder.lr_schedule, y=loss_recorder.smooth_batch_list)
def validate_model_full(model, criterion, val_dataloader): n_val_obs, val_batch_size, val_batch_per_epoch = get_batch_info(val_dataloader) total_val_loss, total_val_loss_label, total_val_loss_bb = ( np.zeros(val_batch_per_epoch), np.zeros(val_batch_per_epoch), np.zeros(val_batch_per_epoch), ) model = model.eval() t = tqdm(enumerate(val_dataloader), total=val_batch_per_epoch) for idx, data in t: val_loss, val_loss_label, val_loss_bb = calc_loss(model, criterion, data) val_loss, val_loss_label, val_loss_bb = ( val_loss.item(), val_loss_label.item(), val_loss_bb.item(), ) total_val_loss[idx], total_val_loss_label[idx], total_val_loss_bb[idx] = ( val_loss, val_loss_label, val_loss_bb, ) t.set_postfix( {"loss": val_loss, "loss_label": val_loss_label, "loss_bb": val_loss_bb} ) return total_val_loss.mean(), total_val_loss_label.mean(), total_val_loss_bb.mean()
def new_whale_threshold(low, high, step, model, predict_proba_fn, val_dataloader): model = model.eval() row = [] target_array_list = [] pred_array_list = [] n_val_obs, val_batch_size, val_batch_per_epoch = get_batch_info( val_dataloader) thresh_range = np.arange(low, high + 1e-8, step) t = tqdm(enumerate(val_dataloader), total=val_batch_per_epoch) with torch.no_grad(): for idx, data in t: target, prediction = predict_proba_fn(model, data) target_array_list.append(target) pred_array_list.append(prediction) target_array = np.vstack(target_array_list) prediction_array = np.vstack(pred_array_list) for threshold in tqdm(thresh_range, total=len(thresh_range)): prediction_array[:, 0] = threshold prediction_indices = (-prediction_array).argsort()[:, :5] mapk_array = mapk(target_array, prediction_indices, 5) mapk_result = mapk_array.mean() row.append({'threshold': threshold, 'mapk': mapk_result}) return pd.DataFrame(row)
def predict_model(model, test_dataloader, pred_fn): n_obs, batch_size, batch_size_per_epoch = get_batch_info(test_dataloader) prediction_list = [] model = model.eval() t = tqdm(enumerate(test_dataloader), total=batch_size_per_epoch) with torch.no_grad(): for idx, data in t: prediction = pred_fn(model, data) prediction_list.extend(prediction) return prediction_list
def validate_model(model, criterion, loss_fn, metric_fn, val_dataloader): n_val_obs, val_batch_size, val_batch_per_epoch = get_batch_info( val_dataloader) total_loss = np.zeros(val_batch_per_epoch) total_metric = np.zeros(val_batch_per_epoch) model = model.eval() t = tqdm(enumerate(val_dataloader), total=val_batch_per_epoch) with torch.no_grad(): for idx, data in t: loss = loss_fn(model, criterion, data) metric = metric_fn(model, data) total_loss[idx] = loss total_metric[idx] = metric return total_loss.mean(), total_metric.mean()
def fit_model_full( model, n_epoch, dev_dataloader, optimizer, criterion, loss_fn, metric_fn, callbacks=[], val_dataloader=None, ): n_dev_obs, dev_batch_size, dev_batch_per_epoch = get_batch_info( dev_dataloader) [cb.on_train_begin(model, optimizer) for cb in callbacks] for idx_epoch in tqdm(range(n_epoch), total=n_epoch): [cb.on_epoch_begin(idx_epoch, model, optimizer) for cb in callbacks] t = tqdm(enumerate(dev_dataloader), total=dev_batch_per_epoch) for idx_batch, data in t: [ cb.on_batch_begin(idx_batch, model, optimizer) for cb in callbacks ] model = model.train() loss = loss_fn(model, criterion, data) train_step(optimizer, loss) with torch.no_grad(): model = model.eval() metric = metric_fn(model, data) t.set_postfix({"loss": loss.item(), "metric": metric.item()}) [ cb.on_batch_end(idx_batch, model, optimizer, loss.item(), metric.item()) for cb in callbacks ] if val_dataloader is not None: val_loss, val_metric = validate_model(model, criterion, loss_fn, metric_fn, val_dataloader) print(" val_loss : {}, val_metric : {}".format( val_loss, val_metric)) [ cb.on_epoch_end(idx_epoch, model, optimizer, val_loss, val_metric) for cb in callbacks ] else: [cb.on_epoch_end(idx_epoch, model, optimizer) for cb in callbacks] [cb.on_train_end(model, optimizer) for cb in callbacks] return model
def fit_model(model, n_epoch, dev_dataloader, optimizer, criterion, loss_fn, metric_fn, val_dataloader=None, checkpoint=False, model_filename="checkpoint", **kwargs): cur_time = datetime.datetime.now().strftime('%Y%m%d-%H%M') if not os.path.exists(os.path.join(model_cp_path, cur_time)): os.mkdir(os.path.join(model_cp_path, cur_time)) save_metadata(cur_time, model, n_epoch, dev_dataloader, optimizer, criterion, val_dataloader) n_dev_obs, dev_batch_size, dev_batch_per_epoch = get_batch_info( dev_dataloader) for idx_epoch in tqdm(range(n_epoch), total=n_epoch): t = tqdm(enumerate(dev_dataloader), total=dev_batch_per_epoch) for idx_batch, data in t: model = model.train() loss = loss_fn(model, criterion, data) train_step(optimizer, loss) with torch.no_grad(): model = model.eval() metric = metric_fn(model, data) t.set_postfix({"loss": loss.item(), "metric": metric.item()}) if val_dataloader is not None: val_loss, val_metric = validate_model(model, criterion, loss_fn, metric_fn, val_dataloader) print(" val_loss : {}, val_metric : {}".format( val_loss, val_metric)) if checkpoint: filename = "{}_{}".format(model_filename, idx_epoch) save_checkpoint(model, optimizer, cur_time, filename) return model
def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") bb_df = pd.read_csv(bb_repo) train_idx = np.arange(len(bb_df)) dev_idx, val_idx = train_test_split(train_idx, test_size=0.20) dev_df = bb_df.iloc[dev_idx, :].reset_index(drop=True) val_df = bb_df.iloc[val_idx, :].reset_index(drop=True) bb_train_dataset = BBDataset(True, device, dev_df) bb_dev_dataset = BBDataset(True, device, dev_df) bb_val_dataset = BBDataset(True, device, val_df) bb_test_dataset = BBDataset(False, device) train_dataloader = DataLoader(bb_train_dataset, batch_size=32) dev_dataloader = DataLoader(bb_dev_dataset, batch_size=32, shuffle=True) val_dataloader = DataLoader(bb_val_dataset, batch_size=32) test_dataloader = DataLoader(bb_test_dataset, batch_size=32) preload_model = torchvision.models.resnet50(pretrained=True).to(device) header_model = Res50BBHead([1000], 0.5).to(device) model = ResPneuNet(preload_model, header_model) n_epoch = 5 optimizer = optim.Adam( [ { "params": model.preload_backbone.parameters(), "lr": 0.0001 }, { "params": model.header.parameters(), "lr": 0.001 }, ], betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False, ) criterion = nn.L1Loss().to(device) n_obs, batch_size, n_batch_per_epoch = get_batch_info(dev_dataloader) clr = CLR(n_epoch, n_batch_per_epoch, 0.1, 1., 0.95, 0.85, 2) callbacks = [clr] model = fit_model( model, n_epoch, dev_dataloader, optimizer, criterion, loss_fn, metric_fn, val_dataloader, checkpoint=True, model_fn="bb", ) prediction = predict_model(model, test_dataloader, pred_fn) string_prediction = [ "{} {} {} {}".format(x[0], x[1], x[2], x[3]) for x in prediction ] patientid = test_dataloader.dataset.patientId pneu_bb = string_prediction bb_pred_df = pd.DataFrame({"name": patientid, "label": pneu_bb}) bb_pred_df.to_csv(bb_predict_repo, index=False) save_checkpoint(model, optimizer, fname="bb")
def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") label_df = pd.read_csv(label_repo) train_idx = np.arange(len(label_df)) dev_idx, val_idx = train_test_split(train_idx, test_size=0.20) dev_df = label_df.iloc[dev_idx, :].reset_index(drop=True) val_df = label_df.iloc[val_idx, :].reset_index(drop=True) label_train_dataset = LabelDataset(True, device, label_df) label_dev_dataset = LabelDataset(True, device, dev_df) label_val_dataset = LabelDataset(True, device, val_df) label_test_dataset = LabelDataset(False, device) train_dataloader = DataLoader(label_train_dataset, batch_size=32) dev_dataloader = DataLoader(label_dev_dataset, batch_size=32, shuffle=True) val_dataloader = DataLoader(label_val_dataset, batch_size=32) test_dataloader = DataLoader(label_test_dataset, batch_size=32) preload_model = torchvision.models.resnet50(pretrained=True).to(device) header_model = Res50ClassHead([1000], 0.5).to(device) model = ResPneuNet(preload_model, header_model) n_epoch = 5 optimizer = optim.Adam( [ { "params": model.preload_backbone.parameters(), "lr": 0.0001 }, { "params": model.header.parameters(), "lr": 0.001 }, ], betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False, ) criterion = nn.BCEWithLogitsLoss().to(device) n_obs, batch_size, n_batch_per_epoch = get_batch_info(dev_dataloader) clr = CLR(n_epoch, n_batch_per_epoch, 0.1, 1., 0.95, 0.85, 2) callbacks = [clr] model = fit_model( model, n_epoch, dev_dataloader, optimizer, criterion, loss_fn, metric_fn, val_dataloader, checkpoint=True, model_fn="label", ) prediction = predict_model(model, test_dataloader, pred_fn) patientid = test_dataloader.dataset.patientId pneu_prob = prediction label_df = pd.DataFrame({"name": patientid, "prob": pneu_prob}) label_df.to_csv(label_predict_repo, index=False)