def _validate_augmentation_path(self):
     # Check if augmentation pipeline file exists
     if self.augmentation_path.is_file() and self.augmentation_path.suffix == '.yml':
         self.transforms = A.load(self.augmentation_path, data_format='yaml')
     else:
         self.transforms = None
         warnings.warn(f'{self.augmentation_path} is not a file. No augmentations will be applied')
Пример #2
0
def make_data(data_folder: str,
              mode: str,
              transform: dict,
              num_workers: int,
              batch_size: int,
              fold: str = None,
              folds_path: str = None,
              positive_ratio_range: Tuple[float, float] = (0.3, 0.8),
              epochs: int = 50):
    img_filenames, mask_filenames, non_emptiness = make_filenames(
        data_folder=data_folder, mode=mode, fold=fold, folds_path=folds_path)
    _transform = A.load(transform[mode], 'yaml')
    _transform.transforms = _transform.transforms + [ToTensor()]
    dataset = PneumothoraxDataset(img_filenames=img_filenames,
                                  mask_filenames=mask_filenames,
                                  transform=_transform,
                                  non_emptiness=non_emptiness)

    sampler = EmptySampler(data_source=dataset,
                           positive_ratio_range=positive_ratio_range,
                           epochs=epochs)
    loader = make_data_loader(dataset=dataset,
                              sampler=sampler if mode == 'train' else None,
                              batch_size=batch_size,
                              num_workers=num_workers)
    return loader
Пример #3
0
def augment_data(save_dir):
    """
    A special that implemnets the data augmentation pipeline.
    :param save_dir: Where to save the augmented data?
    :return:
    """

    seed = 1337
    random.seed(seed)
    start_time = time.time()
    print(f"====== Augmenting data. Seed set at {seed} ======")

    data_file = h5py.File(os.path.join(save_dir, 'data_file.h5'), 'r')
    data_shape = data_file['data/data'].shape

    data_aug = np.zeros(shape=data_shape, dtype=np.float32)

    n_samples = data_shape[0]
    img_channels, img_height, img_width, img_depth = data_shape[1:5]

    try:
        aug = alb.load(os.path.join(save_dir, 'aug_pipeline_1.json'))
    except FileNotFoundError:
        print("Pipeline not found. Generating One ...")
        aug = Compose([
            OneOf([VerticalFlip(p=1), HorizontalFlip(p=1)], p=1),
            OneOf([
                ElasticTransform(p=1, sigma=6, alpha_affine=4, alpha=75),
                GridDistortion(p=1),
                OpticalDistortion(p=1, distort_limit=2, shift_limit=0.5)
            ],
                  p=0.8)
        ])

        alb.save(aug, os.path.join(save_dir, 'aug_pipeline_1.json'))

    for data_idx in np.arange(n_samples):
        img = data_file['data/data'][data_idx, ...]
        img = img.reshape(img_channels, img_height, img_width, -1)
        img_aug = aug(image=img[0,
                                ...])['image'].reshape(img_channels,
                                                       img_height, img_width,
                                                       img_depth, -1)

        data_aug[data_idx, ...] = img_aug

        del img_aug
        del img

    data_file.close()

    with h5py.File(os.path.join(save_dir, 'data_aug.h5'), 'w') as file:
        file.create_dataset('data/data', data=data_aug, dtype=np.float32)

    print(
        f"====== Finished augmentation. Time taken: {time.time() - start_time}s ======"
    )
Пример #4
0
def load_albu_transform(path: Union[str, Path],
                        data_format: str = "yaml") -> Transform:
    """
    :param path: path to augmentation config
    :param data_format: config format
    :return: Albumentations transform
    """
    albu_transform = albu.load(path, data_format=data_format)
    return albu_transform
Пример #5
0
def test_serialization_v2():
    current_directory = os.path.dirname(os.path.abspath(__file__))
    files_directory = os.path.join(current_directory, "files")
    transform = A.load(os.path.join(files_directory, "transform_serialization_v2.json"))
    with open(os.path.join(files_directory, "output_v0.4.6.json")) as f:
        output_0_4_6 = json.load(f)
    np.random.seed(42)
    image = np.random.randint(low=0, high=255, size=(256, 256, 3), dtype=np.uint8)
    random.seed(42)
    transformed_image = transform(image=image)["image"]
    assert transformed_image.numpy().tolist() == output_0_4_6
def test_augmentations_serialization_to_file_with_custom_parameters(
    augmentation_cls, params, p, seed, image, mask, always_apply, data_format
):
    with patch("builtins.open", OpenMock()):
        aug = augmentation_cls(p=p, always_apply=always_apply, **params)
        filepath = "serialized.{}".format(data_format)
        A.save(aug, filepath, data_format=data_format)
        deserialized_aug = A.load(filepath, data_format=data_format)
        set_seed(seed)
        aug_data = aug(image=image, mask=mask)
        set_seed(seed)
        deserialized_aug_data = deserialized_aug(image=image, mask=mask)
        assert np.array_equal(aug_data["image"], deserialized_aug_data["image"])
        assert np.array_equal(aug_data["mask"], deserialized_aug_data["mask"])
Пример #7
0
def predict(dcm_path, cfg):
    image = pydicom.read_file(dcm_path).pixel_array
    image = resize(image, (cfg['IMAGE_SIZE'], cfg['IMAGE_SIZE']))
    image = (image * 255).astype('uint8')
    image = np.dstack([image] * 3)

    fn = dcm_path[:dcm_path.rfind('.')]
    cv2.imwrite(fn + '.png', image)
    print(f'DCM file is trasformed to PNG in {fn}.png')

    # model = AlbuNet(pretrained=False).to(cfg['DEVICE'])
    module = importlib.import_module(cfg['MODEL']['PY'])
    model_class = getattr(module, cfg['MODEL']['CLASS'])
    model = model_class(**cfg['MODEL'].get('ARGS', None)).to(cfg['DEVICE'])

    transform = albu.load(cfg['TRANSFORMS'])

    to_tensor = ToTensor()
    sample = transform(image=image)
    sample = to_tensor(**sample)
    image = sample['image'].unsqueeze(0).to(cfg['DEVICE'])

    checkpoints_list = build_checkpoints_list(cfg)
    mask = 0
    for pred_idx, checkpoint_path in enumerate(checkpoints_list):
        print(checkpoint_path)
        model.load_state_dict(
            torch.load(checkpoint_path,
                       map_location=torch.device(cfg['DEVICE'])))
        model.eval()

        preds = model(image)
        curr_masks = torch.sigmoid(preds)
        curr_masks = curr_masks.squeeze(1).cpu().detach().numpy()
        mask = (mask * pred_idx + curr_masks) / (pred_idx + 1)
    # return (mask.squeeze(0) * 255).astype('uint8')

    area_threshold = cfg['AREA_THRESHOLD']
    top_score_threshold = cfg['TOP_SCORE_THRESHOLD']
    bottom_score_threshold = cfg['BOTTOM_SCORE_THRESHOLD']
    if cfg['USELEAK']:
        leak_score_threshold = cfg['LEAK_SCORE_THRESHOLD']
    else:
        leak_score_threshold = bottom_score_threshold

    return apply_thresholds(mask.squeeze(0), 1, area_threshold,
                            top_score_threshold, bottom_score_threshold,
                            leak_score_threshold)
Пример #8
0
def get_augmentation(save_path=None, load_path=None):
        if load_path:
            return A.load(load_path)
        else:
            aug_seq1 = A.OneOf([
                A.Rotate(limit=(-90, 90), p=1.0),
                A.Flip(p=1.0),
                A.OpticalDistortion(always_apply=False, p=1.0, distort_limit=(-0.3, 0.3), 
                                    shift_limit=(-0.05, 0.05), interpolation=3, 
                                    border_mode=3, value=(0, 0, 0), mask_value=None),
            ], p=1.0)
            aug_seq2 = A.OneOf([
                # A.ChannelDropout(always_apply=False, p=1.0, channel_drop_range=(1, 1), fill_value=0),
                A.RGBShift(r_shift_limit=15, g_shift_limit=15,
                           b_shift_limit=15, p=1.0),
                A.RandomBrightnessContrast(always_apply=False, p=1.0, brightness_limit=(
                    -0.2, 0.2), contrast_limit=(-0.2, 0.2), brightness_by_max=True)
            ], p=1.0)
            aug_seq3 = A.OneOf([
                A.GaussNoise(always_apply=False, p=1.0, var_limit=(10, 50)),
                A.ISONoise(always_apply=False, p=1.0, intensity=(
                    0.1, 1.0), color_shift=(0.01, 0.3)),
                A.MultiplicativeNoise(always_apply=False, p=1.0, multiplier=(
                    0.8, 1.6), per_channel=True, elementwise=True),
            ], p=1.0)
            aug_seq4 = A.OneOf([
                A.Equalize(always_apply=False, p=1.0,
                           mode='pil', by_channels=True),
                A.InvertImg(always_apply=False, p=1.0),
                A.MotionBlur(always_apply=False, p=1.0, blur_limit=(3, 7)),
                A.RandomFog(always_apply=False, p=1.0, 
                            fog_coef_lower=0.01, fog_coef_upper=0.2, alpha_coef=0.2)
            ], p=1.0)
            aug_seq = A.Compose([
                # A.Resize(self.img_size, self.img_size),
                # aug_seq1,
                aug_seq2,
                aug_seq3,
                aug_seq4,
                # A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                # A.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
            ])
            # aug_path = '/home/jitesh/prj/classification/test/bolt/aug/aug_seq.json'
            if save_path:
                A.save(aug_seq, save_path)
            # loaded_transform = A.load(aug_path)
            return aug_seq
Пример #9
0
def aug_flip_and_rotate(load_path=None):
    if load_path:
        return A.load(load_path)
    else:
        aug_seq = A.Compose([
            A.Rotate(limit=(-90, 90), p=0.5),
            A.Flip(p=0.5),
            A.OpticalDistortion(distort_limit=0.05,
                                shift_limit=0.05,
                                interpolation=cv2.INTER_LINEAR,
                                border_mode=cv2.BORDER_REFLECT_101,
                                value=None,
                                mask_value=None,
                                always_apply=False,
                                p=0.5)
        ])
        return aug_seq
def main():
    args = argparser()
    config_path = Path(args.cfg.strip("/"))
    experiment_folder = config_path.parents[0]
    inference_config = load_yaml(config_path)
    print(inference_config)
    
    batch_size = inference_config['BATCH_SIZE']
    device = inference_config['DEVICE']
    
    module = importlib.import_module(inference_config['MODEL']['PY'])
    model_class = getattr(module, inference_config['MODEL']['CLASS'])
    model = model_class(**inference_config['MODEL'].get('ARGS', None)).to(device)
    model.eval()

    num_workers = inference_config['NUM_WORKERS']
    transform = albu.load(inference_config['TEST_TRANSFORMS']) 
    dataset_folder = inference_config['DATA_DIRECTORY'] 
    dataset = PneumothoraxDataset(
        data_folder=dataset_folder, mode='test', 
        transform=transform,
    )
    dataloader =  DataLoader(
        dataset=dataset, batch_size=batch_size, 
        num_workers=num_workers, shuffle=False
    )

    use_flip = inference_config['FLIP']
    checkpoints_list = build_checkpoints_list(inference_config)
  
    mask_dict = defaultdict(int)
    for pred_idx, checkpoint_path in enumerate(checkpoints_list):
        print(checkpoint_path)
        model.load_state_dict(torch.load(checkpoint_path))
        model.eval()
        current_mask_dict = inference_model(model, dataloader, device, use_flip)
        for name, mask in current_mask_dict.items():
            mask_dict[name] = (mask_dict[name] * pred_idx + mask) / (pred_idx + 1)

    if 'RESULT_FOLDER' in inference_config:
        result_path = Path(inference_config['RESULT_FOLDER'], inference_config['RESULT'])
    else:
        result_path = Path(experiment_folder, inference_config['RESULT'])

    with open(result_path, 'wb') as handle:
        pickle.dump(mask_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
Пример #11
0
def test_serialization_v2_conversion_without_totensor():
    current_directory = os.path.dirname(os.path.abspath(__file__))
    files_directory = os.path.join(current_directory, "files")
    transform_1_1_0 = A.load(
        os.path.join(files_directory,
                     "transform_v1.1.0_without_totensor.json"))
    with open(
            os.path.join(files_directory,
                         "output_v1.1.0_without_totensor.json")) as f:
        output_1_1_0 = json.load(f)
    np.random.seed(42)
    image = np.random.randint(low=0,
                              high=255,
                              size=(256, 256, 3),
                              dtype=np.uint8)
    random.seed(42)
    transformed_image = transform_1_1_0(image=image)["image"]
    assert transformed_image.tolist() == output_1_1_0
Пример #12
0
def main():
    args = parse_args()
    config = get_config(args.config)
    transform = A.load(config['data_params']['transform']['train'], 'yaml')
    transform.transforms = transform.transforms[:-1]
    img_filenames, mask_filenames, non_emptiness = make_filenames(
        data_folder=config['data_params']['data_folder'],
        mode='train',
        fold='0',
        folds_path=config['data_params']['folds_path'])
    dataset = PneumothoraxDataset(img_filenames,
                                  mask_filenames,
                                  transform=transform,
                                  non_emptiness=non_emptiness)
    for sample in dataset:
        image = sample['image']
        mask = sample['mask']
        plt.figure(figsize=(20, 10))
        plt.imshow(np.hstack([image, np.stack([mask, mask, mask], axis=-1)]))
        plt.show()
if __name__ == '__main__':
    args = argparser()
    config_path = Path(args['config'].strip('/'))
    experiment_folder = config_path.parents[0]
    inference_config = load_yaml(config_path)

    batch_size = inference_config['BATCH_SIZE']
    device = inference_config['DEVICE']

    module = importlib.import_module(inference_config['MODEL']['PY'])
    model_class = getattr(module, inference_config['MODEL']['CLASS'])
    model = model_class(
        **inference_config['MODEL'].get('ARGS', None)).to(device)

    num_workers = inference_config['WORKERS']
    transform = albu.load(inference_config['TEST_TRANSFORMS'])
    dataset_folder = inference_config['DATA_DIRECTORY']
    dataset = PneumothoraxDataset(data_folder=dataset_folder,
                                  mode='test',
                                  transform=transform)
    dataloader = torch.utils.data.DataLoader(dataset=dataset,
                                             batch_size=batch_size,
                                             num_workers=num_workers,
                                             shuffle=False)

    use_flip = inference_config['FLIP']
    checkpoints_list = build_checkpoints_list(inference_config)
    result_path = Path(experiment_folder, inference_config['RESULT'])

    mask_dict = defaultdict(int)
    for pred_idx, checkpoint_path in enumerate(checkpoints_list):
Пример #14
0
    A.InvertImg(always_apply=False, p=1.0),
    A.MotionBlur(always_apply=False, p=1.0, blur_limit=(3, 7)),
    A.OpticalDistortion(always_apply=False, p=1.0, distort_limit=(-0.3, 0.3), shift_limit=(-0.05, 0.05), interpolation=0, border_mode=0, value=(0, 0, 0), mask_value=None),
    A.RandomFog(always_apply=False, p=1.0, fog_coef_lower=0.1, fog_coef_upper=0.45, alpha_coef=0.5)
    ], p=1.0)
aug_seq = A.Compose([
    A.Resize(IMG_SIZE, IMG_SIZE),
    aug_seq1,
    aug_seq2,
    aug_seq3,
    aug_seq4,
    A.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])
aug_path = '/home/jitesh/prj/classification/test/bolt/aug/aug_seq.json'
A.save(aug_seq, aug_path)
loaded_transform = A.load(aug_path)
# In[6]:

class BoltDataset(Dataset):
    def __init__(self, file_list, dir, mode='train', transform = None, test_label: int=1):
        self.file_list = file_list
        self.dir = dir
        self.mode= mode
        # self.transform = transform
        self.test_label = test_label
        if self.mode == 'train':
            # print(self.file_list)
            # if 'b00' in self.file_list[0]:
            if 'b10' in self.file_list[0]:
                self.label = 0
            else:
Пример #15
0
import cv2
import numpy as np
import os
import pandas as pd
from torch.utils.data import Dataset
import albumentations as albu
from albumentations.pytorch.transforms import ToTensor

transform_path = "../configures/train_transforms_complex_512.json"
transform = albu.load(transform_path)


class SIIMDataset(Dataset):
    def __init__(self, folder, img_size=512):
        self.root = folder
        self.to_tensor = ToTensor()

        df = pd.read_csv('../dataset/train_folds_5.csv')
        self.image_name_list = df[df['exist_labels'] == 1]['fname'].to_list()
        self.img_size = img_size

        print("number of sample: ", self.__len__())

    def __getitem__(self, idx):
        image_id = self.image_name_list[idx]

        size = self.img_size

        image_path = os.path.join(self.root, 'train', image_id)
        mask_path = os.path.join(self.root, 'mask2', image_id)
        image = cv2.imread(image_path)
def main():
    args = argparser()
    config_folder = Path(args.train_cfg.strip("/"))
    # config_folder = Path('experiments/albunet_public/01_train_config_part0.yaml'.strip("/"))
    experiment_folder = config_folder.parents[0]

    train_config = load_yaml(config_folder)

    log_dir = Path(experiment_folder, train_config['LOGGER_DIR'])
    log_dir.mkdir(exist_ok=True, parents=True)

    main_logger = init_logger(log_dir, 'train_main.log')

    seed = train_config['SEED']
    init_seed(seed)
    main_logger.info(train_config)

    if "DEVICE_LIST" in train_config:
        os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(
            map(str, train_config["DEVICE_LIST"]))

    pipeline_name = train_config['PIPELINE_NAME']

    train_transform = albu.load(train_config['TRAIN_TRANSFORMS'])
    valid_transform = albu.load(train_config['VALID_TRANSFORMS'])

    non_empty_mask_proba = train_config.get('NON_EMPTY_MASK_PROBA', 0)
    use_sampler = train_config['USE_SAMPLER']

    dataset_folder = train_config['DATA_DIRECTORY']
    folds_distr_path = train_config['FOLD']['FILE']

    num_workers = train_config['WORKERS']
    batch_size = train_config['BATCH_SIZE']
    n_folds = train_config['FOLD']['NUMBER']

    usefolds = map(str, train_config['FOLD']['USEFOLDS'])
    # local_metric_fn, global_metric_fn = init_eval_fns(train_config)

    binarizer_module = importlib.import_module(
        train_config['MASK_BINARIZER']['PY'])
    binarizer_class = getattr(binarizer_module,
                              train_config['MASK_BINARIZER']['CLASS'])
    binarizer_fn = binarizer_class(**train_config['MASK_BINARIZER']['ARGS'])

    eval_module = importlib.import_module(
        train_config['EVALUATION_METRIC']['PY'])
    eval_fn = getattr(eval_module, train_config['EVALUATION_METRIC']['CLASS'])
    eval_fn = functools.partial(eval_fn,
                                **train_config['EVALUATION_METRIC']['ARGS'])

    for fold_id in usefolds:
        main_logger.info('Start training of {} fold....'.format(fold_id))

        train_dataset = BodyMorpDataset(data_folder=dataset_folder,
                                        mode='train',
                                        transform=train_transform,
                                        fold_index=fold_id,
                                        folds_distr_path=folds_distr_path)
        train_sampler = PartDataSampler(folds_distr_path, fold_id,
                                        non_empty_mask_proba)
        if use_sampler:
            train_dataloader = DataLoader(dataset=train_dataset,
                                          batch_size=batch_size,
                                          num_workers=num_workers,
                                          sampler=train_sampler)
        else:
            train_dataloader = DataLoader(dataset=train_dataset,
                                          batch_size=batch_size,
                                          num_workers=num_workers,
                                          shuffle=True)

        valid_dataset = BodyMorpDataset(
            data_folder=dataset_folder,
            mode='val',
            transform=valid_transform,
            fold_index=str(fold_id),
            folds_distr_path=folds_distr_path,
        )
        valid_dataloader = DataLoader(dataset=valid_dataset,
                                      batch_size=batch_size,
                                      num_workers=num_workers,
                                      shuffle=False)

        train_fold(train_config, experiment_folder, pipeline_name, log_dir,
                   fold_id, train_dataloader, valid_dataloader, binarizer_fn,
                   eval_fn)
Пример #17
0
def main():
    args = argparser()
    config_folder = Path(args.train_cfg.strip("/"))
    experiment_folder = config_folder.parents[0]

    train_config = load_yaml(config_folder)

    log_dir = Path(experiment_folder, train_config['LOGGER_DIR'])
    log_dir.mkdir(exist_ok=True, parents=True)

    main_logger = init_logger(log_dir, 'train_main.log')

    seed = train_config['SEED']
    init_seed(seed)
    main_logger.info(train_config)

    if "DEVICE_LIST" in train_config:
        os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(
            map(str, train_config["DEVICE_LIST"]))

    pipeline_name = train_config['PIPELINE_NAME']
    dataset_folder = train_config['DATA_DIRECTORY']

    train_transform = albu.load(train_config['TRAIN_TRANSFORMS'])
    valid_transform = albu.load(train_config['VALID_TRANSFORMS'])

    non_empty_mask_proba = train_config.get('NON_EMPTY_MASK_PROBA', 0)
    use_sampler = train_config['USE_SAMPLER']

    dataset_folder = train_config['DATA_DIRECTORY']
    folds_distr_path = train_config['FOLD']['FILE']

    num_workers = train_config['WORKERS']
    batch_size = train_config['BATCH_SIZE']
    n_folds = train_config['FOLD']['NUMBER']

    usefolds = map(str, train_config['FOLD']['USEFOLDS'])
    local_metric_fn, global_metric_fn = init_eval_fns(train_config)

    for fold_id in usefolds:
        main_logger.info('Start training of {} fold....'.format(fold_id))

        train_dataset = PneumothoraxDataset(
            data_folder=dataset_folder,
            mode='train',
            transform=train_transform,
            fold_index=fold_id,
            folds_distr_path=folds_distr_path,
        )
        train_sampler = PneumoSampler(folds_distr_path, fold_id,
                                      non_empty_mask_proba)
        if use_sampler:
            train_dataloader = DataLoader(dataset=train_dataset,
                                          batch_size=batch_size,
                                          num_workers=num_workers,
                                          sampler=train_sampler)
        else:
            train_dataloader = DataLoader(dataset=train_dataset,
                                          batch_size=batch_size,
                                          num_workers=num_workers,
                                          shuffle=True)

        valid_dataset = PneumothoraxDataset(
            data_folder=dataset_folder,
            mode='val',
            transform=valid_transform,
            fold_index=str(fold_id),
            folds_distr_path=folds_distr_path,
        )
        valid_dataloader = DataLoader(dataset=valid_dataset,
                                      batch_size=batch_size,
                                      num_workers=num_workers,
                                      shuffle=False)

        train_fold(train_config, experiment_folder, pipeline_name, log_dir,
                   fold_id, train_dataloader, valid_dataloader,
                   local_metric_fn, global_metric_fn)
Пример #18
0
def create_examples(anotations_dir, desired_labels, test_split,
                    albumentations_transforms, base_path):
    print()
    print("Settings and Creating Augmented Examples...")
    if albumentations_transforms is not None:
        albumentations_transforms = albu.load(albumentations_transforms)

    desired_labels = list(set(desired_labels))
    examples = []

    USE_ALL_LABELS = False
    if not desired_labels:
        USE_ALL_LABELS = True

    def bounding_box(points):
        x_coordinates, y_coordinates = zip(*points)
        return min(x_coordinates), min(y_coordinates), max(x_coordinates), max(
            y_coordinates)

    print()
    print('Creating examples')
    base_path = FLAGS.base_path
    files = glob.glob(os.path.join(anotations_dir, "*json"))
    for classification in tqdm(files):
        classification = json.load(open(classification))
        img_path = classification['imagePath']
        img_binary = os.path.join(base_path, img_path)
        img = Image.open(img_binary, "r")
        #width, height = img.size
        width = classification["imageWidth"]
        height = classification["imageHeight"]
        width, height = float(width), float(height)
        filename = classification['imagePath']
        temp_img_name = f'temp_img.{img.format.lower()}'
        img.save(temp_img_name)
        with tf.io.gfile.GFile(temp_img_name, 'rb') as gfile:
            encoded_img = gfile.read()
        os.remove(temp_img_name)
        xmins = []
        xmaxs = []
        ymins = []
        ymaxs = []

        classes_text = []
        classes = []

        labels = classification['shapes']
        #talvez um loop para percorrer as lista de objetos
        objetos = labels
        for itens in objetos:
            label = itens["label"]
            if USE_ALL_LABELS:
                if label not in desired_labels:
                    desired_labels.append(label)
            if label in desired_labels:
                coords = itens["points"]
                x1, y1, x2, y2 = bounding_box(coords)
                x1 = x1 / width  #Normalize BBox
                x2 = x2 / width  #Normalize BBox
                y1 = y1 / height  #Normalize BBox
                y2 = y2 / height  #Normalize BBox
                xmins.append(float(x1))
                xmaxs.append(float(x2))
                ymins.append(float(y1))
                ymaxs.append(float(y2))
                class_index = desired_labels.index(label) + 1
                classes_text.append(str.encode(label))
                classes.append(class_index)
                example = dict()
                example['img_height'] = int(height)
                example['img_width'] = int(width)
                example['filename'] = str.encode(filename)
                example['encoded_img'] = encoded_img
                example['img_format'] = str.encode(img.format.lower())
                example['xmins'] = xmins
                example['xmaxs'] = xmaxs
                example['ymins'] = ymins
                example['ymaxs'] = ymaxs
                example['classes_text'] = classes_text
                example['classes'] = classes
                examples.append(example)
        if albumentations_transforms is not None:
            np_img = np.asarray(img)
            # Prepare bbox in Albumentations format: [x_min, y_min, x_max, y_max]
            bboxes = []
            for bbox in list(
                    zip(example['xmins'], example['ymins'], example['xmaxs'],
                        example['ymaxs'])):

                bboxes.append(bbox)

            annotations = {
                'image': np_img,
                'bboxes': bboxes,
                'classes_text': example['classes_text']
            }
            augmented_annotations = albumentations_transforms(**annotations)

            # Create new Example
            augmented_img = augmented_annotations['image']
            img = Image.fromarray(np.uint8(augmented_img))
            width, height = img.size
            width, height = float(width), float(height)

            splitted_filename = filename.split('.')
            splitted_filename[-2] = splitted_filename[-2] + '-augmented'
            filename = '.'.join(splitted_filename)

            img.save(temp_img_name)
            img = Image.open(temp_img_name)
            with tf.io.gfile.GFile(temp_img_name, 'rb') as gfile:
                encoded_img = gfile.read()
            os.remove(temp_img_name)

            xmins = []
            xmaxs = []
            ymins = []
            ymaxs = []
            for bbox in augmented_annotations['bboxes']:
                # Albumentations format: [x_min, y_min, x_max, y_max]
                xs = [bbox[0], bbox[2]]
                ys = [bbox[1], bbox[3]]
                xmins.append(min(xs))
                xmaxs.append(max(xs))
                ymins.append(min(ys))
                ymaxs.append(max(ys))

            classes_text = augmented_annotations['classes_text']
            classes = [
                desired_labels.index(label.decode('utf-8')) + 1
                for label in classes_text
            ]

            example = dict()
            example['img_height'] = int(height)
            example['img_width'] = int(width)
            example['filename'] = str.encode(filename)
            example['encoded_img'] = encoded_img
            example['img_format'] = str.encode(img.format.lower())
            example['xmins'] = xmins
            example['xmaxs'] = xmaxs
            example['ymins'] = ymins
            example['ymaxs'] = ymaxs
            example['classes_text'] = classes_text
            example['classes'] = classes
            examples.append(example)

    random.shuffle(examples)
    split = int(len(examples) * (1 - test_split))

    train_examples = examples[:split]
    test_examples = examples[split:]

    random.shuffle(train_examples)
    random.shuffle(test_examples)

    class_id_set = set()
    for example in examples:
        for class_id in zip(example['classes'], example['classes_text']):
            class_id_set.add(class_id)

    class_id_set = sorted(class_id_set)

    category_index = {k: v.decode("utf-8") for k, v in class_id_set}
    category_index = json.dumps(category_index)
    with open('category_index.json', 'w') as f:
        f.write(category_index)

    print(),
    print(f'TOTAL EXAMPLES : {len(examples)}')
    print(f'TRAIN EXAMPLES : {len(train_examples)}')
    print(f'TEST EXAMPLES  : {len(test_examples)}')
    print(f'TOTAL CLASSES  : {len(class_id_set)}')
    for class_id in class_id_set:
        print(f'    - {class_id[1].decode("utf-8")} ({class_id[0]})')

    return train_examples, test_examples
Пример #19
0
def make_transforms(transform: Dict[str, str], mode: str):
    _transform = albu.load(transform[mode], 'yaml')
    _transform.transforms = albu.Compose(
        [*_transform.transforms, ToTensorV2()])
    return _transform
Пример #20
0
    log_dir.mkdir(parents=True, exist_ok=True)

    main_logger = helpers.init_logger(log_dir, 'train_main.log')

    seed = train_config['SEED']
    helpers.init_seed(seed)
    main_logger.info(train_config)

    if "DEVICE_LIST" in train_config:
        os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(
            map(str, train_config['DEVICE_LIST']))

    pipeline_name = train_config['PIPELINE_NAME']
    dataset_folder = train_config['DATA_DIRECTORY']

    train_transform = albu.load(train_config['TRAIN_TRANSFORMS'])
    val_transform = albu.load(train_config['VAL_TRANSFORMS'])

    # ? non_empty_mask_prob/ use_sampler is not sure
    non_empty_mask_prob = train_config.get('NON_EMPTY_MASK_PROB', 0)
    use_sampler = train_config['USE_SAMPLER']

    dataset_folder = train_config['DATA_DIRECTORY']
    folds_distr_path = train_config['FOLD']['FILE']

    num_workers = train_config['WORKERS']
    batch_size = train_config['BATCH_SIZE']
    n_folds = train_config['FOLD']['NUMBER']

    usefolds = map(str, train_config['FOLD']['USEFOLDS'])
Пример #21
0
def main(cfg: DictConfig):

    cwd = Path(get_original_cwd())

    # overwrite config if continue training from checkpoint
    resume_cfg = None
    if "resume" in cfg:
        cfg_path = cwd / cfg.resume / ".hydra/config.yaml"
        print(f"Continue from: {cfg.resume}")
        # Overwrite everything except device
        # TODO config merger (perhaps continue training with the same optimizer but other lrs?)
        resume_cfg = OmegaConf.load(cfg_path)
        cfg.model = resume_cfg.model
        if cfg.train.num_epochs == 0:
            cfg.data.scale_factor = resume_cfg.data.scale_factor
        OmegaConf.save(cfg, ".hydra/config.yaml")

    print(OmegaConf.to_yaml(cfg))

    device = set_device_id(cfg.device)
    set_seed(cfg.seed, device=device)

    # Augmentations
    if cfg.data.aug == "auto":
        transforms = albu.load(cwd / "autoalbument/autoconfig.json")
    else:
        transforms = D.get_training_augmentations()

    if OmegaConf.is_missing(cfg.model, "convert_bottleneck"):
        cfg.model.convert_bottleneck = (0, 0, 0)

    # Model
    print(f"Setup model {cfg.model.arch} {cfg.model.encoder_name} "
          f"convert_bn={cfg.model.convert_bn} "
          f"convert_bottleneck={cfg.model.convert_bottleneck} ")
    model = get_segmentation_model(
        arch=cfg.model.arch,
        encoder_name=cfg.model.encoder_name,
        encoder_weights=cfg.model.encoder_weights,
        classes=1,
        convert_bn=cfg.model.convert_bn,
        convert_bottleneck=cfg.model.convert_bottleneck,
        # decoder_attention_type="scse",  # TODO to config
    )
    model = model.to(device)
    model.train()
    print(model)

    # Optimization
    # Reduce LR for pretrained encoder
    layerwise_params = {
        "encoder*":
        dict(lr=cfg.optim.lr_encoder, weight_decay=cfg.optim.wd_encoder)
    }
    model_params = cutils.process_model_params(
        model, layerwise_params=layerwise_params)

    # Select optimizer
    optimizer = get_optimizer(
        name=cfg.optim.name,
        model_params=model_params,
        lr=cfg.optim.lr,
        wd=cfg.optim.wd,
        lookahead=cfg.optim.lookahead,
    )

    criterion = {
        "dice": DiceLoss(),
        # "dice": SoftDiceLoss(mode="binary", smooth=1e-7),
        "iou": IoULoss(),
        "bce": nn.BCEWithLogitsLoss(),
        "lovasz": LovaszLossBinary(),
        "focal_tversky": FocalTverskyLoss(eps=1e-7, alpha=0.7, gamma=0.75),
    }

    # Load states if resuming training
    if "resume" in cfg:
        checkpoint_path = (cwd / cfg.resume / cfg.train.logdir /
                           "checkpoints/best_full.pth")
        if checkpoint_path.exists():
            print(f"\nLoading checkpoint {str(checkpoint_path)}")
            checkpoint = cutils.load_checkpoint(checkpoint_path)
            cutils.unpack_checkpoint(
                checkpoint=checkpoint,
                model=model,
                optimizer=optimizer
                if resume_cfg.optim.name == cfg.optim.name else None,
                criterion=criterion,
            )
        else:
            raise ValueError("Nothing to resume, checkpoint missing")

    # We could only want to validate resume, in this case skip training routine
    best_th = 0.5

    stats = None
    if cfg.data.stats:
        print(f"Use statistics from file: {cfg.data.stats}")
        stats = cwd / cfg.data.stats

    if cfg.train.num_epochs is not None:
        callbacks = [
            # Each criterion is calculated separately.
            CriterionCallback(input_key="mask",
                              prefix="loss_dice",
                              criterion_key="dice"),
            CriterionCallback(input_key="mask",
                              prefix="loss_iou",
                              criterion_key="iou"),
            CriterionCallback(input_key="mask",
                              prefix="loss_bce",
                              criterion_key="bce"),
            CriterionCallback(input_key="mask",
                              prefix="loss_lovasz",
                              criterion_key="lovasz"),
            CriterionCallback(
                input_key="mask",
                prefix="loss_focal_tversky",
                criterion_key="focal_tversky",
            ),
            # And only then we aggregate everything into one loss.
            MetricAggregationCallback(
                prefix="loss",
                mode="weighted_sum",  # can be "sum", "weighted_sum" or "mean"
                # because we want weighted sum, we need to add scale for each loss
                metrics={
                    "loss_dice": cfg.loss.dice,
                    "loss_iou": cfg.loss.iou,
                    "loss_bce": cfg.loss.bce,
                    "loss_lovasz": cfg.loss.lovasz,
                    "loss_focal_tversky": cfg.loss.focal_tversky,
                },
            ),
            # metrics
            DiceCallback(input_key="mask"),
            IouCallback(input_key="mask"),
            # gradient accumulation
            OptimizerCallback(accumulation_steps=cfg.optim.accumulate),
            # early stopping
            SchedulerCallback(reduced_metric="loss_dice",
                              mode=cfg.scheduler.mode),
            EarlyStoppingCallback(**cfg.scheduler.early_stopping,
                                  minimize=False),
            # TODO WandbLogger works poorly with multistage right now
            WandbLogger(project=cfg.project, config=dict(cfg)),
            # CheckpointCallback(save_n_best=cfg.checkpoint.save_n_best),
        ]

        # Training
        runner = SupervisedRunner(device=device,
                                  input_key="image",
                                  input_target_key="mask")

        # TODO Scheduler does not work now, every stage restarts from base lr
        scheduler_warm_restart = optim.lr_scheduler.MultiStepLR(
            optimizer,
            milestones=[1, 2],
            gamma=10,
        )

        for i, (size, num_epochs) in enumerate(
                zip(cfg.data.sizes, cfg.train.num_epochs)):
            scale = size / 1024
            print(
                f"Training stage {i}, scale {scale}, size {size}, epochs {num_epochs}"
            )

            # Datasets
            (
                train_ds,
                valid_ds,
                train_images,
                val_images,
            ) = D.get_train_valid_datasets_from_path(
                # path=(cwd / cfg.data.path),
                path=(cwd / f"data/hubmap-{size}x{size}/"),
                train_ids=cfg.data.train_ids,
                valid_ids=cfg.data.valid_ids,
                seed=cfg.seed,
                valid_split=cfg.data.valid_split,
                mean=cfg.data.mean,
                std=cfg.data.std,
                transforms=transforms,
                stats=stats,
            )

            train_bs = int(cfg.loader.train_bs / (scale**2))
            valid_bs = int(cfg.loader.valid_bs / (scale**2))
            print(
                f"train: {len(train_ds)}; bs {train_bs}",
                f"valid: {len(valid_ds)}, bs {valid_bs}",
            )

            # Data loaders
            data_loaders = D.get_data_loaders(
                train_ds=train_ds,
                valid_ds=valid_ds,
                train_bs=train_bs,
                valid_bs=valid_bs,
                num_workers=cfg.loader.num_workers,
            )

            # Select scheduler
            scheduler = get_scheduler(
                name=cfg.scheduler.type,
                optimizer=optimizer,
                num_epochs=num_epochs * (len(data_loaders["train"]) if
                                         cfg.scheduler.mode == "batch" else 1),
                eta_min=scheduler_warm_restart.get_last_lr()[0] /
                cfg.scheduler.eta_min_factor,
                plateau=cfg.scheduler.plateau,
            )

            runner.train(
                model=model,
                criterion=criterion,
                optimizer=optimizer,
                scheduler=scheduler,
                callbacks=callbacks,
                logdir=cfg.train.logdir,
                loaders=data_loaders,
                num_epochs=num_epochs,
                verbose=True,
                main_metric=cfg.train.main_metric,
                load_best_on_end=True,
                minimize_metric=False,
                check=cfg.check,
                fp16=dict(amp=cfg.amp),
            )

            # Set new initial LR for optimizer after restart
            scheduler_warm_restart.step()
            print(
                f"New LR for warm restart {scheduler_warm_restart.get_last_lr()[0]}"
            )

            # Find optimal threshold for dice score
            model.eval()
            best_th, dices = find_dice_threshold(model, data_loaders["valid"])
            print("Best dice threshold", best_th, np.max(dices[1]))
            np.save(f"dices_{size}.npy", dices)
    else:
        print("Validation only")
        # Datasets
        size = cfg.data.sizes[-1]
        train_ds, valid_ds = D.get_train_valid_datasets_from_path(
            # path=(cwd / cfg.data.path),
            path=(cwd / f"data/hubmap-{size}x{size}/"),
            train_ids=cfg.data.train_ids,
            valid_ids=cfg.data.valid_ids,
            seed=cfg.seed,
            valid_split=cfg.data.valid_split,
            mean=cfg.data.mean,
            std=cfg.data.std,
            transforms=transforms,
            stats=stats,
        )

        train_bs = int(cfg.loader.train_bs / (cfg.data.scale_factor**2))
        valid_bs = int(cfg.loader.valid_bs / (cfg.data.scale_factor**2))
        print(
            f"train: {len(train_ds)}; bs {train_bs}",
            f"valid: {len(valid_ds)}, bs {valid_bs}",
        )

        # Data loaders
        data_loaders = D.get_data_loaders(
            train_ds=train_ds,
            valid_ds=valid_ds,
            train_bs=train_bs,
            valid_bs=valid_bs,
            num_workers=cfg.loader.num_workers,
        )

        # Find optimal threshold for dice score
        model.eval()
        best_th, dices = find_dice_threshold(model, data_loaders["valid"])
        print("Best dice threshold", best_th, np.max(dices[1]))
        np.save(f"dices_val.npy", dices)

    #
    # # Load best checkpoint
    # checkpoint_path = Path(cfg.train.logdir) / "checkpoints/best.pth"
    # if checkpoint_path.exists():
    #     print(f"\nLoading checkpoint {str(checkpoint_path)}")
    #     state_dict = torch.load(checkpoint_path, map_location=torch.device("cpu"))[
    #         "model_state_dict"
    #     ]
    #     model.load_state_dict(state_dict)
    #     del state_dict
    # model = model.to(device)
    # Load config for updating with threshold and metric
    # (otherwise loading do not work)
    cfg = OmegaConf.load(".hydra/config.yaml")
    cfg.threshold = float(best_th)

    # Evaluate on full-size image if valid_ids is non-empty
    df_train = pd.read_csv(cwd / "data/train.csv")
    df_train = {
        r["id"]: r["encoding"]
        for r in df_train.to_dict(orient="record")
    }
    dices = []
    unique_ids = sorted(
        set(
            str(p).split("/")[-1].split("_")[0]
            for p in (cwd / cfg.data.path / "train").iterdir()))
    size = cfg.data.sizes[-1]
    scale = size / 1024
    for image_id in cfg.data.valid_ids:
        image_name = unique_ids[image_id]
        print(f"\nValidate for {image_name}")

        rle_pred, shape = inference_one(
            image_path=(cwd / f"data/train/{image_name}.tiff"),
            target_path=Path("."),
            cfg=cfg,
            model=model,
            scale_factor=scale,
            tile_size=cfg.data.tile_size,
            tile_step=cfg.data.tile_step,
            threshold=best_th,
            save_raw=False,
            tta_mode=None,
            weight="pyramid",
            device=device,
            filter_crops="tissue",
            stats=stats,
        )

        print("Predict", shape)
        pred = rle_decode(rle_pred["predicted"], shape)
        mask = rle_decode(df_train[image_name], shape)
        assert pred.shape == mask.shape, f"pred {pred.shape}, mask {mask.shape}"
        assert pred.shape == shape, f"pred {pred.shape}, expected {shape}"

        dices.append(
            dice(
                torch.from_numpy(pred).type(torch.uint8),
                torch.from_numpy(mask).type(torch.uint8),
                threshold=None,
                activation="none",
            ))
    print("Full image dice:", np.mean(dices))
    OmegaConf.save(cfg, ".hydra/config.yaml")
    return