def build_rle_dict(mask_dict): encoded_pixels = [] for name, mask in tqdm(mask_dict.items()): if mask.shape != (350, 525): mask = cv2.resize(mask, dsize=(525, 350), interpolation=cv2.INTER_LINEAR) predict, num_predict = post_process(mask, 0.7, 10000) if num_predict == 0: encoded_pixels.append("") else: r = mask2rle(predict) encoded_pixels.append(r) return encoded_pixels
def postprocess_shape(file,mode): sub = pd.read_csv(file) name = file.split('/')[-1].split('.')[0] # mode = 'convex' # choose from 'rect', 'min', 'convex' and 'approx' model_class_names = ['Fish', 'Flower', 'Gravel', 'Sugar'] min_size = [25000, 15000, 22500, 10000] img_label_list = [] enc_pixels_list = [] test_imgs = os.listdir('../../dados/test_images/') for test_img_i, test_img in enumerate(tqdm(test_imgs)): for class_i, class_name in enumerate(model_class_names): path = os.path.join('../../dados/test_images/', test_img) img = cv2.imread(path).astype(np.float32) # use already-resized ryches' dataset img = img / 255. img = np_resize(img, (350, 525)) img2 = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) img_label_list.append(f'{test_img}_{class_name}') mask = make_mask(sub, test_img + '_' + class_name, shape=(350, 525)) if True: # if class_name == 'Flower' or class_name =='Sugar': # you can decide to post-process for some certain classes mask = draw_convex_hull(mask.astype(np.uint8), mode=mode) mask[img2 <= 2 / 255.] = 0 mask = post_process_minsize(mask, min_size[class_i]) if mask.sum() == 0: enc_pixels_list.append(np.nan) else: mask = np.where(mask > 0.5, 1.0, 0.0) enc_pixels_list.append(mask2rle(mask)) name = name + '_convex.csv' submission_df = pd.DataFrame({'Image_Label': img_label_list, 'EncodedPixels': enc_pixels_list}) submission_df.to_csv(name, index=None)
def run_seg(config_file_seg): os.environ['CUDA_VISIBLE_DEVICES'] = '0' # ------------------------------------------------------------------------------------------------------------ # 2. segmentation inference # ------------------------------------------------------------------------------------------------------------ config = load_config(config_file_seg) model = getattr(smp, config.model.arch)( encoder_name=config.model.encoder, encoder_weights=config.model.pretrained, classes=config.data.num_classes, activation=None, ) if os.path.exists('cls_preds.csv'): testloader = make_loader(data_folder=config.data.test_dir, df_path='cls_preds.csv', phase='filtered_test', batch_size=config.test.batch_size, num_workers=config.num_workers, transforms=get_transforms( config.transforms.test)) else: testloader = make_loader(data_folder=config.data.test_dir, df_path=config.data.sample_submission_path, phase='test', batch_size=config.test.batch_size, num_workers=config.num_workers, transforms=get_transforms( config.transforms.test)) model.to(config.device) model.eval() checkpoint = load_checkpoint(f"{config.work_dir}/checkpoints/best.pth") model.load_state_dict(checkpoint['model_state_dict']) if os.path.exists(config.work_dir + '/threshold_search.json'): with open(config.work_dir + '/threshold_search.json') as json_file: data = json.load(json_file) df = pd.DataFrame(data) min_sizes = list(df.T.idxmax().values.astype(int)) print('load best threshold from validation:', min_sizes) else: min_sizes = config.test.min_size print('load default threshold:', min_sizes) predictions = [] with torch.no_grad(): for i, (batch_fnames, batch_images) in enumerate(tqdm(testloader)): batch_images = batch_images.to(config.device) batch_preds = predict_batch(model, batch_images, tta=config.test.tta) for fname, preds in zip(batch_fnames, batch_preds): if config.data.num_classes == 4: for cls in range(preds.shape[0]): mask = preds[cls, :, :] mask, num = post_process(mask, config.test.best_threshold, min_sizes[cls]) rle = mask2rle(mask) name = fname + f"_{cls + 1}" predictions.append([name, rle]) else: # == 5 for cls in range(1, 5): mask = preds[cls, :, :] mask, num = post_process(mask, config.test.best_threshold, min_sizes[cls]) rle = mask2rle(mask) name = fname + f"_{cls}" predictions.append([name, rle]) # ------------------------------------------------------------------------------------------------------------ # submission # ------------------------------------------------------------------------------------------------------------ df = pd.DataFrame(predictions, columns=['ImageId_ClassId', 'EncodedPixels']) df.to_csv(config.work_dir + "/submission.csv", index=False)
def ensemble(): os.environ['CUDA_VISIBLE_DEVICES'] = '0' # ------------------------------------------------------------------------------------------------------------ # parmeters and configs # ------------------------------------------------------------------------------------------------------------ config_paths320 = [ 'config/seg/017_efnet_b3_Unet_img320_cutout5_aug_fold0.yml', 'config/seg/017_efnet_b3_Unet_img320_cutout5_aug_fold1.yml', 'config/seg/017_efnet_b3_Unet_img320_cutout5_aug_fold2.yml', 'config/seg/017_efnet_b3_Unet_img320_cutout5_aug_fold3.yml', 'config/seg/017_efnet_b3_Unet_img320_cutout5_aug_fold4.yml', 'config/seg/030_efnet_b0_Unet_bs16_half_cosine_fold0.yml', 'config/seg/030_efnet_b0_Unet_bs16_half_cosine_fold1.yml', 'config/seg/030_efnet_b0_Unet_bs16_half_cosine_fold2.yml', 'config/seg/030_efnet_b0_Unet_bs16_half_cosine_fold3.yml', 'config/seg/030_efnet_b0_Unet_bs16_half_cosine_fold4.yml', ] config_paths384 = [ 'config/seg/032_efnet_b3_Unet_img384_RandomSizedCrop_half_cosine_fold0.yml', 'config/seg/032_efnet_b3_Unet_img384_RandomSizedCrop_half_cosine_fold1.yml', 'config/seg/032_efnet_b3_Unet_img384_RandomSizedCrop_half_cosine_fold2.yml', 'config/seg/032_efnet_b3_Unet_img384_RandomSizedCrop_half_cosine_fold3.yml', 'config/seg/032_efnet_b3_Unet_img384_RandomSizedCrop_half_cosine_fold4.yml', 'config/seg/048_resnet34_FPN_img384_mixup_fold0.yml', 'config/seg/048_resnet34_FPN_img384_mixup_fold1.yml', 'config/seg/048_resnet34_FPN_img384_mixup_fold2.yml', 'config/seg/048_resnet34_FPN_img384_mixup_fold3.yml', 'config/seg/048_resnet34_FPN_img384_mixup_fold4.yml', ] LABEL_THRESHOLDS = [0.68, 0.69, 0.69, 0.67] MASK_THRESHOLDS = [0.31, 0.36, 0.31, 0.34] MIN_SIZES = [7500, 10000, 7500, 7500] WEIGHTS = [0.5, 0.5] # ------------------------------------------------------------------------------------------------------------ # # ------------------------------------------------------------------------------------------------------------ config = load_config('config/base_config.yml') def get_model_and_loader(config_paths): config = load_config(config_paths[0]) models = [] for c in config_paths: models.append(load_model(c)) model = MultiSegModels(models) testloader = make_loader( data_folder=config.data.test_dir, df_path=config.data.sample_submission_path, phase='test', img_size=(config.data.height, config.data.width), batch_size=config.test.batch_size, num_workers=config.num_workers, transforms=get_transforms(config.transforms.test)) return model, testloader model320, loader320 = get_model_and_loader(config_paths320) model384, loader384 = get_model_and_loader(config_paths384) predictions = [] with torch.no_grad(): for (batch_fnames320, batch_images320), (batch_fnames384, batch_images384) in tqdm( zip(loader320, loader384)): batch_images320 = batch_images320.to(config.device) batch_images384 = batch_images384.to(config.device) batch_preds320 = predict_batch(model320, batch_images320, tta=config.test.tta) batch_preds384 = predict_batch(model384, batch_images384, tta=config.test.tta) batch_preds320 = resize_batch_images(batch_preds320, SUB_HEIGHT, SUB_WIDTH) batch_preds384 = resize_batch_images(batch_preds384, SUB_HEIGHT, SUB_WIDTH) batch_preds = batch_preds320 * \ WEIGHTS[0] + batch_preds384 * WEIGHTS[1] batch_labels320 = torch.nn.functional.adaptive_max_pool2d( torch.sigmoid(torch.Tensor(batch_preds320)), 1).view(batch_preds320.shape[0], -1) batch_labels384 = torch.nn.functional.adaptive_max_pool2d( torch.sigmoid(torch.Tensor(batch_preds384)), 1).view(batch_preds384.shape[0], -1) batch_labels = batch_labels320 * \ WEIGHTS[0] + batch_labels384 * WEIGHTS[1] for fname, preds, labels in zip(batch_fnames320, batch_preds, batch_labels): for cls in range(4): if labels[cls] <= LABEL_THRESHOLDS[cls]: pred = np.zeros(preds[cls, :, :].shape) else: pred, _ = post_process(preds[cls, :, :], MASK_THRESHOLDS[cls], MIN_SIZES[cls], height=SUB_HEIGHT, width=SUB_WIDTH) rle = mask2rle(pred) cls_name = INV_CLASSES[cls] name = fname + f"_{cls_name}" predictions.append([name, rle]) # ------------------------------------------------------------------------------------------------------------ # submission # ------------------------------------------------------------------------------------------------------------ sub_df = pd.DataFrame(predictions, columns=['Image_Label', 'EncodedPixels']) sample_submission = pd.read_csv(config.data.sample_submission_path) df_merged = pd.merge(sample_submission, sub_df, on='Image_Label', how='left') df_merged.fillna('', inplace=True) df_merged['EncodedPixels'] = df_merged['EncodedPixels_y'] df_merged = df_merged[['Image_Label', 'EncodedPixels']] df_merged.to_csv("submission.csv", index=False) if 'COLAB_GPU' in os.environ: config.work_dir = '/content/drive/My Drive/kaggle_cloud/' elif 'KAGGLE_WORKING_DIR' in os.environ: config.work_dir = '/kaggle/working/' else: config.work_dir = '.' df_merged.to_csv(config.work_dir + '/submission.csv', index=False)
def run_seg(config_file_seg): os.environ['CUDA_VISIBLE_DEVICES'] = '0' # ------------------------------------------------------------------------------------------------------------ # 2. segmentation inference # ------------------------------------------------------------------------------------------------------------ config = load_config(config_file_seg) if 'COLAB_GPU' in os.environ: config.work_dir = '/content/drive/My Drive/kaggle_cloud/' + config.work_dir elif 'KAGGLE_WORKING_DIR' in os.environ: config.work_dir = '/kaggle/working/' + config.work_dir if os.path.exists('cls_preds.csv'): testloader = make_loader( data_folder=config.data.test_dir, df_path='cls_preds.csv', phase='filtered_test', img_size=(config.data.height, config.data.width), batch_size=config.test.batch_size, num_workers=config.num_workers, transforms=get_transforms(config.transforms.test)) else: testloader = make_loader( data_folder=config.data.test_dir, df_path=config.data.sample_submission_path, phase='test', img_size=(config.data.height, config.data.width), batch_size=config.test.batch_size, num_workers=config.num_workers, transforms=get_transforms(config.transforms.test)) model = load_model(config_file_seg) if os.path.exists(config.work_dir + '/threshold_search.json'): with open(config.work_dir + '/threshold_search.json') as json_file: data = json.load(json_file) df = pd.DataFrame(data) min_sizes = list(df.T.idxmax().values.astype(int)) print('load best threshold from validation:', min_sizes) else: min_sizes = config.test.min_size print('load default threshold:', min_sizes) predictions = [] with torch.no_grad(): for i, (batch_fnames, batch_images) in enumerate(tqdm(testloader)): batch_images = batch_images.to(config.device) batch_preds = predict_batch(model, batch_images, tta=config.test.tta) for fname, preds in zip(batch_fnames, batch_preds): for cls in range(preds.shape[0]): pred, _ = post_process( preds[cls, :, :], config.test.best_threshold, min_sizes[cls], height=config.transforms.test.Resize.height, width=config.transforms.test.Resize.width) pred = cv2.resize(pred, (SUB_WIDTH, SUB_HEIGHT)) pred = (pred > 0.5).astype(int) rle = mask2rle(pred) cls_name = INV_CLASSES[cls] name = fname + f"_{cls_name}" predictions.append([name, rle]) # ------------------------------------------------------------------------------------------------------------ # submission # ------------------------------------------------------------------------------------------------------------ df = pd.DataFrame(predictions, columns=['Image_Label', 'EncodedPixels']) df.to_csv(config.work_dir + "/submission.csv", index=False)
def run_seg(config_dir): os.environ['CUDA_VISIBLE_DEVICES'] = '0' # ------------------------------------------------------------------------------------------------------------ # 2. segmentation inference # ------------------------------------------------------------------------------------------------------------ config_root = Path(config_dir) / 'seg' config_paths = [config_root / p for p in os.listdir(config_root)] base_config_paths = [ Path(config_dir) / p for p in os.listdir(config_dir) if 'yml' in p ] config = load_config(base_config_paths[0]) models = [] for c in config_paths: models.append(load_model(c)) model = MultiSegModels(models) if os.path.exists('cls_preds.csv'): testloader = make_loader(data_folder=config.data.test_dir, df_path='cls_preds.csv', phase='filtered_test', batch_size=config.test.batch_size, num_workers=config.num_workers, transforms=get_transforms( config.transforms.test)) else: testloader = make_loader(data_folder=config.data.test_dir, df_path=config.data.sample_submission_path, phase='test', batch_size=config.test.batch_size, num_workers=config.num_workers, transforms=get_transforms( config.transforms.test)) if os.path.exists(config.work_dir + '/threshold_search.json'): with open(config.work_dir + '/threshold_search.json') as json_file: data = json.load(json_file) df = pd.DataFrame(data) min_sizes = list(df.T.idxmax().values.astype(int)) print('load best threshold from validation:', min_sizes) else: min_sizes = config.test.min_size print('load default threshold:', min_sizes) predictions = [] with torch.no_grad(): for i, (batch_fnames, batch_images) in enumerate(tqdm(testloader)): batch_images = batch_images.to(config.device) batch_preds = predict_batch(model, batch_images, tta=config.test.tta) for fname, preds in zip(batch_fnames, batch_preds): for cls in range(preds.shape[0]): mask = preds[cls, :, :] mask, num = post_process(mask, config.test.best_threshold, min_sizes[cls]) rle = mask2rle(mask) name = fname + f"_{cls + 1}" predictions.append([name, rle]) # ------------------------------------------------------------------------------------------------------------ # submission # ------------------------------------------------------------------------------------------------------------ sub_df = pd.DataFrame(predictions, columns=['ImageId_ClassId', 'EncodedPixels']) sample_submission = pd.read_csv(config.data.sample_submission_path) df_merged = pd.merge(sample_submission, sub_df, on='ImageId_ClassId', how='left') df_merged.fillna('', inplace=True) df_merged['EncodedPixels'] = df_merged['EncodedPixels_y'] df_merged = df_merged[['ImageId_ClassId', 'EncodedPixels']] df_merged.to_csv("submission.csv", index=False) df_merged.to_csv(KAGGLE_WORK_DIR + "/submission.csv", index=False)