def main(): with timer('load data'): df = pd.read_csv(TEST_PATH) df[['ID', 'Image', 'Diagnosis']] = df['ID'].str.split('_', expand=True) df = df[['Image', 'Diagnosis', 'Label']] df.drop_duplicates(inplace=True) df = df.pivot(index='Image', columns='Diagnosis', values='Label').reset_index() df['Image'] = 'ID_' + df['Image'] df = df[["Image"]] ids = df["Image"].values gc.collect() with timer('preprocessing'): test_augmentation = Compose([ CenterCrop(512 - 50, 512 - 50, p=1.0), Resize(img_size, img_size, p=1) ]) test_dataset = RSNADatasetTest(df, img_size, IMAGE_PATH, id_colname=ID_COLUMNS, transforms=test_augmentation, black_crop=False, subdural_window=True, conc_type="concat_all", conc_type2="concat_prepost", n_tta=N_TTA) test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=16, pin_memory=True) del df, test_dataset gc.collect() with timer('create model'): model = CnnModel(num_classes=N_CLASSES, encoder="se_resnext50_32x4d", pretrained="imagenet", pool_type="avg") model.load_state_dict(torch.load(model_path)) model.to(device) model = torch.nn.DataParallel(model) with timer('predict'): pred = predict(model, test_loader, device, n_tta=N_TTA) pred = np.clip(pred, 1e-6, 1 - 1e-6) with timer('sub'): sub = pd.DataFrame(pred, columns=TARGET_COLUMNS) sub["ID"] = ids sub = sub.set_index("ID") sub = sub.unstack().reset_index() sub["ID"] = sub["ID"] + "_" + sub["level_0"] sub = sub.rename(columns={0: "Label"}) sub = sub.drop("level_0", axis=1) LOGGER.info(sub.head()) sub.to_csv("../output/{}_sub_st2.csv".format(EXP_ID), index=False)
def main(): with timer('load data'): df = pd.read_csv(TRAIN_PATH)[:10] df = df[df.Image != "ID_6431af929"].reset_index(drop=True) df.loc[df.pre_SOPInstanceUID == "ID_6431af929", "pre1_SOPInstanceUID"] = df.loc[df.pre_SOPInstanceUID == "ID_6431af929", "Image"] df.loc[df.post_SOPInstanceUID == "ID_6431af929", "post1_SOPInstanceUID"] = df.loc[df.post_SOPInstanceUID == "ID_6431af929", "Image"] df = df[["Image", "pre1_SOPInstanceUID", "post1_SOPInstanceUID"]] ids = df["Image"].values gc.collect() with timer('preprocessing'): test_augmentation = Compose([ CenterCrop(512 - 50, 512 - 50, p=1.0), Resize(img_size, img_size, p=1) ]) test_dataset = RSNADatasetTest(df, img_size, IMAGE_PATH, id_colname=ID_COLUMNS, transforms=test_augmentation, black_crop=False, three_window=True, rescaling=False, n_tta=N_TTA) test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=16, pin_memory=True) del df, test_dataset gc.collect() with timer('create model'): model = CnnModel(num_classes=N_CLASSES, encoder="se_resnext50_32x4d", pretrained="imagenet", pool_type="avg") model.load_state_dict(torch.load(model_path)) model.to(device) model = torch.nn.DataParallel(model) with timer('predict'): pred = predict(model, test_loader, device, n_tta=N_TTA) pred = np.clip(pred, 1e-6, 1 - 1e-6) with timer('sub'): sub = pd.DataFrame(pred, columns=TARGET_COLUMNS) sub["ID"] = ids sub = sub.set_index("ID") sub = sub.unstack().reset_index() sub["ID"] = sub["ID"] + "_" + sub["level_0"] sub = sub.rename(columns={0: "Label"}) sub = sub.drop("level_0", axis=1) LOGGER.info(sub.head()) sub.to_csv("../output/{}_train.csv".format(EXP_ID), index=False)
def main(): with timer('load data'): df = pd.read_csv(TRAIN_PATH) df = df[df.Image != "ID_6431af929"].reset_index(drop=True) df.loc[df.pre_SOPInstanceUID=="ID_6431af929", "pre1_SOPInstanceUID"] = df.loc[ df.pre_SOPInstanceUID=="ID_6431af929", "Image"] df.loc[df.post_SOPInstanceUID == "ID_6431af929", "post1_SOPInstanceUID"] = df.loc[ df.post_SOPInstanceUID == "ID_6431af929", "Image"] df.loc[df.prepre_SOPInstanceUID == "ID_6431af929", "pre2_SOPInstanceUID"] = df.loc[ df.prepre_SOPInstanceUID == "ID_6431af929", "pre1_SOPInstanceUID"] df.loc[df.postpost_SOPInstanceUID == "ID_6431af929", "post2_SOPInstanceUID"] = df.loc[ df.postpost_SOPInstanceUID == "ID_6431af929", "post1_SOPInstanceUID"] y = df[TARGET_COLUMNS].values df = df[["Image", "pre1_SOPInstanceUID", "post1_SOPInstanceUID", "pre2_SOPInstanceUID", "post2_SOPInstanceUID"]] gc.collect() with timer('preprocessing'): train_augmentation = Compose([ CenterCrop(512 - 50, 512 - 50, p=1.0), HorizontalFlip(p=0.5), OneOf([ ElasticTransform(p=0.5, alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03), GridDistortion(p=0.5), OpticalDistortion(p=1, distort_limit=2, shift_limit=0.5) ], p=0.5), Rotate(limit=30, border_mode=0, p=0.7), Resize(img_size, img_size, p=1) ]) train_dataset = RSNADataset(df, y, img_size, IMAGE_PATH, id_colname=ID_COLUMNS, transforms=train_augmentation, black_crop=False, subdural_window=True, user_window=2) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True) del df, train_dataset gc.collect() with timer('create model'): model = CnnModel(num_classes=N_CLASSES, encoder="se_resnext50_32x4d", pretrained="imagenet", pool_type="avg") if model_path is not None: model.load_state_dict(torch.load(model_path)) model.to(device) criterion = torch.nn.BCEWithLogitsLoss(weight=torch.FloatTensor([2, 1, 1, 1, 1, 1]).cuda()) optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, eps=1e-4) model = torch.nn.DataParallel(model) with timer('train'): for epoch in range(1, epochs + 1): if epoch == 5: for param_group in optimizer.param_groups: param_group['lr'] = param_group['lr'] * 0.1 seed_torch(SEED + epoch) LOGGER.info("Starting {} epoch...".format(epoch)) tr_loss = train_one_epoch(model, train_loader, criterion, optimizer, device) LOGGER.info('Mean train loss: {}'.format(round(tr_loss, 5))) torch.save(model.module.state_dict(), 'models/{}_ep{}.pth'.format(EXP_ID, epoch))
def main(): with timer('load data'): df = pd.read_csv(TEST_PATH) df["post_SOPInstanceUID"] = df["post_SOPInstanceUID"].fillna( df["SOPInstanceUID"]) df["pre_SOPInstanceUID"] = df["pre_SOPInstanceUID"].fillna( df["SOPInstanceUID"]) df = df[["Image", "pre_SOPInstanceUID", "post_SOPInstanceUID"]] ids = df["Image"].values pre_ids = df["pre_SOPInstanceUID"].values pos_ids = df["post_SOPInstanceUID"].values gc.collect() with timer('preprocessing'): test_augmentation = Compose([ CenterCrop(512 - 50, 512 - 50, p=1.0), Resize(img_size, img_size, p=1) ]) test_dataset = RSNADatasetTest(df, img_size, IMAGE_PATH, id_colname=ID_COLUMNS, transforms=test_augmentation, black_crop=False, subdural_window=True, n_tta=N_TTA) test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=16, pin_memory=True) del df, test_dataset gc.collect() with timer('create model'): model = CnnModel(num_classes=N_CLASSES, encoder="se_resnext50_32x4d", pretrained="imagenet", pool_type="avg") model.load_state_dict(torch.load(model_path)) model.to(device) model = torch.nn.DataParallel(model) with timer('predict'): pred = predict(model, test_loader, device, n_tta=N_TTA) pred = np.clip(pred, 1e-6, 1 - 1e-6) with timer('sub'): sub = pd.DataFrame(pred, columns=TARGET_COLUMNS) sub["ID"] = ids sub["PRE_ID"] = pre_ids sub["POST_ID"] = pos_ids sub = postprocess_multitarget(sub) LOGGER.info(sub.head()) sub.to_csv("../output/{}_sub_st2.csv".format(EXP_ID), index=False)
def main(): with timer('load data'): path = glob.glob("../input_ext/*/*/*/*.dcm") df = pd.DataFrame({"Image": path}) df = df[["Image"]] ids = df["Image"].values gc.collect() with timer('preprocessing'): test_augmentation = Compose([ CenterCrop(512 - 50, 512 - 50, p=1.0), Resize(img_size, img_size, p=1) ]) test_dataset = RSNADatasetTest(df, img_size, IMAGE_PATH, id_colname=ID_COLUMNS, transforms=test_augmentation, black_crop=False, subdural_window=True, n_tta=N_TTA, img_type="", external=True) test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True) del df, test_dataset gc.collect() with timer('create model'): model = CnnModel(num_classes=N_CLASSES, encoder="se_resnext50_32x4d", pretrained="imagenet", pool_type="avg") model.load_state_dict(torch.load(model_path)) model.to(device) model = torch.nn.DataParallel(model) with timer('predict'): pred, is_dicoms = predict_external(model, test_loader, device, n_tta=N_TTA) pred = np.clip(pred, 1e-6, 1 - 1e-6) with timer('sub'): sub = pd.DataFrame(pred, columns=TARGET_COLUMNS) sub["is_dicom"] = is_dicoms.reshape(-1) sub["Image"] = ids.reshape(-1) LOGGER.info(sub.head()) sub.to_csv("../input_ext/{}_externalv2.csv".format(EXP_ID), index=False)
def main(): with timer('load data'): df = pd.read_csv(TRAIN_PATH) df["loc_x"] = df["loc_x"] / 100 df["loc_y"] = df["loc_y"] / 100 y = df[TARGET_COLUMNS].values df = df[[ID_COLUMNS]] gc.collect() with timer("split data"): folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=0).split(df, y) for n_fold, (train_index, val_index) in enumerate(folds): train_df = df.loc[train_index] val_df = df.loc[val_index] y_train = y[train_index] y_val = y[val_index] if n_fold == fold_id: break with timer('preprocessing'): train_augmentation = Compose([ HorizontalFlip(p=0.5), OneOf([ ElasticTransform(p=0.5, alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03), GridDistortion(p=0.5), OpticalDistortion(p=1, distort_limit=2, shift_limit=0.5) ], p=0.5), RandomBrightnessContrast(p=0.5), ShiftScaleRotate(rotate_limit=20, p=0.5), Resize(img_size, img_size, p=1) ]) val_augmentation = Compose([ Resize(img_size, img_size, p=1) ]) train_dataset = KDDataset(train_df, y_train, img_size, IMAGE_PATH, id_colname=ID_COLUMNS, transforms=train_augmentation) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True) val_dataset = KDDataset(val_df, y_val, img_size, IMAGE_PATH, id_colname=ID_COLUMNS, transforms=val_augmentation) val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True) del df, train_dataset, val_dataset gc.collect() with timer('create model'): model = CnnModel(num_classes=N_CLASSES, encoder="se_resnext50_32x4d", pretrained="../input/pytorch-pretrained-models/se_resnext50_32x4d-a260b3a4.pth", pool_type="avg") if model_path is not None: model.load_state_dict(torch.load(model_path)) model.to(device) criterion = torch.nn.BCEWithLogitsLoss() optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, eps=1e-4) # model, optimizer = amp.initialize(model, optimizer, opt_level="O1", verbosity=0) with timer('train'): best_score = 0 for epoch in range(1, epochs + 1): seed_torch(SEED + epoch) if epoch == epochs - 3: for param_group in optimizer.param_groups: param_group['lr'] = param_group['lr'] * 0.1 LOGGER.info("Starting {} epoch...".format(epoch)) tr_loss = train_one_epoch(model, train_loader, criterion, optimizer, device, N_CLASSES) LOGGER.info('Mean train loss: {}'.format(round(tr_loss, 5))) y_pred, target, val_loss = validate(model, val_loader, criterion, device, N_CLASSES) score = roc_auc_score(target, y_pred) LOGGER.info('Mean val loss: {}'.format(round(val_loss, 5))) LOGGER.info('val score: {}'.format(round(score, 5))) if score > best_score: best_score = score np.save("y_pred.npy", y_pred) torch.save(model.state_dict(), save_path) np.save("target.npy", target) with timer('predict'): test_df = pd.read_csv(TEST_PATH) test_ids = test_df["id"].values test_augmentation = Compose([ Resize(img_size, img_size, p=1) ]) test_dataset = KDDatasetTest(test_df, img_size, TEST_IMAGE_PATH, id_colname=ID_COLUMNS, transforms=test_augmentation, n_tta=2) test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True) model.load_state_dict(torch.load(save_path)) pred = predict(model, test_loader, device, N_CLASSES, n_tta=2) print(pred.shape) results = pd.DataFrame({"id": test_ids, "is_star": pred.reshape(-1)}) results.to_csv("results.csv", index=False)