Exemplo n.º 1
0
def main(args: argparse.Namespace) -> None:
    print(
        f'>>> Starting data augmentation (original + {args.n_aug} new images)')

    root_dir: str = args.root_dir
    dest_dir: str = args.dest_dir

    folders: List[Path] = list(Path(root_dir).glob("*"))
    dest_folders: List[Path] = [Path(dest_dir, p.name) for p in folders]
    print(
        f"Will augment data from {len(folders)} folders ({map_(str, folders)})"
    )

    # Create all the destination folders
    for d_folder in dest_folders:
        d_folder.mkdir(parents=True, exist_ok=True)

    names: List[str] = map_(lambda p: str(p.name), folders[0].glob("*.png"))

    partial_process = partial(process_name,
                              folders=folders,
                              dest_folders=dest_folders,
                              n_aug=args.n_aug,
                              args=args)
    mmap_(partial_process, names)
Exemplo n.º 2
0
def main():
    assert len(argv) == 3

    folder = Path(argv[1])
    changes = eval(argv[2])
    remap_ = partial(remap, changes)

    targets: Iterable[str] = map(str, folder.glob("*.png"))
    mmap_(remap_, targets)
Exemplo n.º 3
0
def main(args: Namespace) -> None:
    inputs: List[Path] = list(
        Path(args.base_folder, args.GT_subfolder).glob(args.regex))
    names: List[str] = [p.name for p in inputs]
    print(f"Found {len(names)} images to weaken")
    if args.verbose:
        pprint(names[:10])

    strategy: Callable = eval(args.strategy)
    strat: Callable = partial(weaken_img, strategy=strategy)

    # sizes: np.ndarray = np.zeros(len(inputs), dtype=np.uint32)
    # for i, (pn) in tqdm(enumerate(zip(inputs, names)), ncols=100, total=len(names)):
    #     sizes[i] = strat(pn)
    orig_sizes, new_sizes = map_(np.asarray,
                                 zip(*mmap_(strat, zip(inputs, names))))
    assert len(orig_sizes) == len(new_sizes) == len(names)

    try:
        print("Orig sizes: (min, mean, max)", orig_sizes[orig_sizes > 0].min(),
              orig_sizes.mean(), orig_sizes.max())
        print(
            f"Annotated {new_sizes.sum()} pixels for {len(new_sizes)} images")
    except ValueError:
        pass
Exemplo n.º 4
0
def main(args: argparse.Namespace):
    src_path: Path = Path(args.source_dir)
    dest_path: Path = Path(args.dest_dir)

    # Assume the cleaning up is done before calling the script
    assert src_path.exists()
    assert not dest_path.exists()

    # Get all the file names, avoid the temporal ones
    nii_paths: List[Path] = [p for p in src_path.rglob('*.nii.gz') if "_4d" not in str(p)]
    assert len(nii_paths) % 2 == 0, "Uneven number of .nii, one+ pair is broken"

    # We sort now, but also id matching is checked while iterating later on
    img_nii_paths: List[Path] = sorted(p for p in nii_paths if "_gt" not in str(p))
    gt_nii_paths: List[Path] = sorted(p for p in nii_paths if "_gt" in str(p))
    assert len(img_nii_paths) == len(gt_nii_paths)
    paths: List[Tuple[Path, Path]] = list(zip(img_nii_paths, gt_nii_paths))

    print(f"Found {len(img_nii_paths)} pairs in total")
    pprint(paths[:5])

    validation_paths: List[Tuple[Path, Path]] = random.sample(paths, args.retain)
    training_paths: List[Tuple[Path, Path]] = [p for p in paths if p not in validation_paths]
    assert set(validation_paths).isdisjoint(set(training_paths))
    assert len(paths) == (len(validation_paths) + len(training_paths))

    for mode, _paths, n_augment in zip(["train", "val"], [training_paths, validation_paths], [args.n_augment, 0]):
        img_paths, gt_paths = zip(*_paths)  # type: Tuple[Any, Any]

        dest_dir = Path(dest_path, mode)
        print(f"Slicing {len(img_paths)} pairs to {dest_dir}")
        assert len(img_paths) == len(gt_paths)

        pfun = partial(save_slices, dest_dir=dest_dir, shape=args.shape, n_augment=n_augment)
        all_sizes = mmap_(uc_(pfun), zip(img_paths, gt_paths))
        # for paths in tqdm(list(zip(img_paths, gt_paths)), ncols=50):
        #     uc_(pfun)(paths)

        all_slices_sizes_px, all_slices_sizes_mm2, all_volume_size_px, all_volume_size_mm3 = zip(*all_sizes)

        flat_sizes_px = flatten_(all_slices_sizes_px)
        flat_sizes_mm2 = flatten_(all_slices_sizes_mm2)
        print("px", len(flat_sizes_px), min(flat_sizes_px), max(flat_sizes_px))
        print('\t', "px 5/95", np.percentile(flat_sizes_px, 5), np.percentile(flat_sizes_px, 95))
        print('\t', "mm2", f"{min(flat_sizes_mm2):.02f}", f"{max(flat_sizes_mm2):.02f}")

        _, axes = plt.subplots(nrows=2, ncols=2)
        axes = axes.flatten()

        axes[0].set_title("Slice surface (pixel)")
        axes[0].boxplot(all_slices_sizes_px, whis=[0, 100])

        axes[1].set_title("Slice surface (mm2)")
        axes[1].boxplot(all_slices_sizes_mm2, whis=[0, 100])

        axes[2].set_title("LV volume (pixel)")
        axes[2].hist(all_volume_size_px, bins=len(all_volume_size_px) // 2)

        axes[3].set_title("LV volume (mm3)")
        axes[3].hist(all_volume_size_mm3, bins=len(all_volume_size_px) // 2)
Exemplo n.º 5
0
def main(args: argparse.Namespace):
    src_path: Path = Path(args.source_dir)
    dest_path: Path = Path(args.dest_dir)

    # Assume the cleaning up is done before calling the script
    assert src_path.exists()
    assert not dest_path.exists()

    # Get all the file names, avoid the temporal ones
    nii_paths: List[Path] = [p for p in src_path.rglob('*.nii')]
    assert len(
        nii_paths) % 2 == 0, "Uneven number of .nii, one+ pair is broken"

    # We sort now, but also id matching is checked while iterating later on
    img_nii_paths: List[Path] = sorted(p for p in nii_paths
                                       if "_Labels" not in str(p))
    gt_nii_paths: List[Path] = sorted(p for p in nii_paths
                                      if "_Labels" in str(p))
    assert len(img_nii_paths) == len(gt_nii_paths)
    paths: List[Tuple[Path, Path]] = list(zip(img_nii_paths, gt_nii_paths))

    print(f"Found {len(img_nii_paths)} pairs in total")
    pprint(paths[:5])

    validation_paths: List[Tuple[Path,
                                 Path]] = random.sample(paths, args.retain)
    training_paths: List[Tuple[Path, Path]] = [
        p for p in paths if p not in validation_paths
    ]
    assert set(validation_paths).isdisjoint(set(training_paths))
    assert len(paths) == (len(validation_paths) + len(training_paths))

    for mode, _paths in zip(["train", "val"],
                            [training_paths, validation_paths]):
        img_paths, gt_paths = zip(*_paths)  # type: Tuple[Any, Any]

        dest_dir = Path(dest_path, mode)
        print(f"Slicing {len(img_paths)} pairs to {dest_dir}")
        assert len(img_paths) == len(gt_paths)

        pfun = partial(process_patient,
                       dest_dir=dest_dir,
                       shape=args.shape,
                       cr=args.crop)
        sizess = mmap_(uc_(pfun), zip(img_paths, gt_paths))
        # for paths in tqdm(list(zip(img_paths, gt_paths)), ncols=50):
        #     uc_(pfun)(paths)

        all_sizes = np.array(flatten_(sizess))
        all_pos = all_sizes[all_sizes > 0]

        print(
            f"sizes: min={np.min(all_pos)}, 5th={np.percentile(all_pos, 5):0.02f}, median={np.median(all_pos):0.0f}, "
            +
            f"mean={np.mean(all_pos):0.02f}, 95th={np.percentile(all_pos, 95):0.02f}, max={np.max(all_pos)}"
        )
Exemplo n.º 6
0
def main(args) -> None:
    W, H = args.wh  # Tuple[int, int]
    r: int = args.r

    for folder, n_img in zip(["train", "val"], args.n):
        gt_folder: Path = Path(args.dest, folder, 'gt')
        img_folder: Path = Path(args.dest, folder, 'img')

        gt_folder.mkdir(parents=True, exist_ok=True)
        img_folder.mkdir(parents=True, exist_ok=True)

        gen_fn = partial(gen_img,
                         W=W,
                         H=W,
                         r=r,
                         gt_folder=gt_folder,
                         img_folder=img_folder)

        mmap_(gen_fn, range(n_img))
def main(args: argparse.Namespace):
    src_path: Path = Path(args.source_dir)
    dest_path: Path = Path(args.dest_dir)

    # Assume the cleaning up is done before calling the script
    assert src_path.exists()
    assert not dest_path.exists()

    # Get all the file names, avoid the temporal ones
    nii_paths: List[Path] = [p for p in src_path.rglob('*.mhd')]
    assert len(
        nii_paths) % 2 == 0, "Uneven number of .nii, one+ pair is broken"

    # We sort now, but also id matching is checked while iterating later on
    img_nii_paths: List[Path] = sorted(p for p in nii_paths
                                       if "_segmentation" not in str(p))
    gt_nii_paths: List[Path] = sorted(p for p in nii_paths
                                      if "_segmentation" in str(p))
    assert len(img_nii_paths) == len(gt_nii_paths)
    paths: List[Tuple[Path, Path]] = list(zip(img_nii_paths, gt_nii_paths))

    print(f"Found {len(img_nii_paths)} pairs in total")
    pprint(paths[:5])

    validation_paths: List[Tuple[Path,
                                 Path]] = random.sample(paths, args.retain)
    training_paths: List[Tuple[Path, Path]] = [
        p for p in paths if p not in validation_paths
    ]
    assert set(validation_paths).isdisjoint(set(training_paths))
    assert len(paths) == (len(validation_paths) + len(training_paths))

    for mode, _paths, n_augment in zip(["train", "val"],
                                       [training_paths, validation_paths],
                                       [args.n_augment, 0]):
        img_paths, gt_paths = zip(*_paths)  # type: Tuple[Any, Any]

        dest_dir = Path(dest_path, mode)
        print(f"Slicing {len(img_paths)} pairs to {dest_dir}")
        assert len(img_paths) == len(gt_paths)

        pfun = partial(save_slices,
                       dest_dir=dest_dir,
                       shape=args.shape,
                       n_augment=n_augment)
        sizes = mmap_(uc_(pfun), zip(img_paths, gt_paths))
        # sizes = []
        # for paths in tqdm(list(zip(img_paths, gt_paths)), ncols=50):
        #     sizes.append(uc_(pfun)(paths))
        sizes_3d, sizes_2d_min, sizes_2d_max = map_(np.asarray, zip(*sizes))

        print("2d sizes: ", sizes_2d_min.min(), sizes_2d_max.max())
        print("3d sizes: ", sizes_3d.min(), sizes_3d.mean(), sizes_3d.max())
Exemplo n.º 8
0
def main(args: argparse.Namespace):
    src_path: Path = Path(args.source_dir)
    dest_path: Path = Path(args.dest_dir)

    # Assume the cleaning up is done before calling the script
    assert src_path.exists()
    assert not dest_path.exists()

    # Get all the file names, avoid the temporal ones
    all_paths: List[Path] = list(src_path.rglob('*.nii'))
    nii_paths: List[Path] = [p for p in all_paths if "_4D" not in str(p)]
    assert len(nii_paths) % 6 == 0, "Number of .nii not multiple of 6, some pairs are broken"

    # We sort now, but also id matching is checked while iterating later on
    CT_nii_paths: List[Path] = sorted(p for p in nii_paths if "CT." in str(p))
    CBF_nii_paths: List[Path] = sorted(p for p in nii_paths if "CT_CBF" in str(p))
    CBV_nii_paths: List[Path] = sorted(p for p in nii_paths if "CT_CBV" in str(p))
    MTT_nii_paths: List[Path] = sorted(p for p in nii_paths if "CT_MTT" in str(p))
    Tmax_nii_paths: List[Path] = sorted(p for p in nii_paths if "CT_Tmax" in str(p))
    gt_nii_paths: List[Path] = sorted(p for p in nii_paths if "OT" in str(p))
    assert len(CT_nii_paths) == len(CBF_nii_paths) == len(CBV_nii_paths) == len(MTT_nii_paths) \
        == len(Tmax_nii_paths) == len(gt_nii_paths)
    paths: List[Tuple[Path, ...]] = list(zip(CT_nii_paths, CBF_nii_paths, CBV_nii_paths, MTT_nii_paths,
                                             Tmax_nii_paths, gt_nii_paths))

    print(f"Found {len(CT_nii_paths)} pairs in total")
    pprint(paths[:2])

    validation_paths: List[Tuple[Path, ...]] = random.sample(paths, args.retain)
    training_paths: List[Tuple[Path, ...]] = [p for p in paths if p not in validation_paths]
    assert set(validation_paths).isdisjoint(set(training_paths))
    assert len(paths) == (len(validation_paths) + len(training_paths))

    for mode, _paths, n_augment in zip(["train", "val"], [training_paths, validation_paths], [args.n_augment, 0]):
        # ct_paths, cbf_paths, cbv_paths, mtt_paths, tmax_paths, gt_paths = zip(*_paths)
        six_paths = list(zip(*_paths))

        dest_dir = Path(dest_path, mode)
        print(f"Slicing {len(six_paths[0])} pairs to {dest_dir}")
        assert len(set(map_(len, six_paths))) == 1

        pfun = partial(save_slices, dest_dir=dest_dir, shape=args.shape, n_augment=n_augment)
        space_dicts = mmap_(uc_(pfun), zip(*six_paths))
        # for case_paths in tqdm(list(zip(*six_paths)), ncols=50):
        #     uc_(pfun)(case_paths)

        final_dict = {k: v for space_dict in space_dicts for k, v in space_dict.items()}

        with open(Path(dest_dir, "spacing.pkl"), 'wb') as f:
            pickle.dump(final_dict, f, pickle.HIGHEST_PROTOCOL)
            print(f"Saved spacing dictionnary to {f}")
Exemplo n.º 9
0
def main(args: argparse.Namespace):
    src_path: Path = Path(args.source_dir)
    dest_path: Path = Path(args.dest_dir)

    # Assume the cleaning up is done before calling the script
    assert src_path.exists()
    assert not dest_path.exists()

    training_ids: List[str]
    validation_ids: List[str]
    training_ids, validation_ids = get_splits(args.id_list, args.retains, args.fold)

    split_ids: List[str]
    for mode, split_ids in zip(["train", "val"], [training_ids, validation_ids]):
        dest_mode: Path = Path(dest_path, mode)
        print(f"Slicing {len(split_ids)} pairs to {dest_mode}")

        pfun: Callable = partial(slice_patient,
                                 dest_path=dest_mode,
                                 source_path=src_path,
                                 shape=tuple(args.shape),
                                 n_augment=args.n_augment if mode == "train" else 0)
        mmap_(pfun, split_ids)
Exemplo n.º 10
0
def main(args: argparse.Namespace):
    src_path: Path = Path(args.source_dir)
    dest_path: Path = Path(args.dest_dir)

    # Assume the cleaning up is done before calling the script
    assert src_path.exists()
    assert not dest_path.exists()

    # Get all the file names, avoid the temporal ones
    all_paths: List[Path] = list(src_path.rglob('*.nii.gz'))
    nii_paths: List[Path] = [p for p in all_paths if "_4D" not in str(p)]
    assert len(nii_paths) % 3 == 0, "Number of .nii not multiple of 6, some pairs are broken"

    # We sort now, but also id matching is checked while iterating later on
    flair_nii_paths: List[Path] = sorted(p for p in nii_paths if "FLAIR" in str(p))
    t1_nii_paths: List[Path] = sorted(p for p in nii_paths if "T1" in str(p))
    gt_nii_paths: List[Path] = sorted(p for p in nii_paths if "wmh.nii" in str(p))
    assert len(flair_nii_paths) == len(t1_nii_paths) == len(gt_nii_paths)
    paths: List[Tuple[Path, ...]] = list(zip(flair_nii_paths, t1_nii_paths, gt_nii_paths))

    print(f"Found {len(flair_nii_paths)} pairs in total")
    pprint(paths[:2])

    validation_paths: List[Tuple[Path, ...]] = random.sample(paths, args.retain)
    training_paths: List[Tuple[Path, ...]] = [p for p in paths if p not in validation_paths]
    assert set(validation_paths).isdisjoint(set(training_paths))
    assert len(paths) == (len(validation_paths) + len(training_paths))

    for mode, _paths, n_augment in zip(["train", "val"], [training_paths, validation_paths], [args.n_augment, 0]):
        three_paths = list(zip(*_paths))

        dest_dir = Path(dest_path, mode)
        print(f"Slicing {len(three_paths[0])} pairs to {dest_dir}")
        assert len(set(map_(len, three_paths))) == 1

        pfun = partial(save_slices, dest_dir=dest_dir, shape=args.shape, n_augment=n_augment,
                       discard_negatives=args.discard_negatives)
        sizes = mmap_(uc_(pfun), zip(*three_paths))
        all_neg, all_pos, space_dicts = zip(*sizes)
        neg, pos = sum(all_neg), sum(all_pos)
        ratio = pos / neg
        print(f"Ratio between pos/neg: {ratio} ({pos}/{neg})")

        final_dict = {k: v for space_dict in space_dicts for k, v in space_dict.items()}

        with open(Path(dest_dir, "spacing.pkl"), 'wb') as f:
            pickle.dump(final_dict, f, pickle.HIGHEST_PROTOCOL)
            print(f"Saved spacing dictionnary to {f}")
Exemplo n.º 11
0
def main(args: argparse.Namespace):
    src_path: Path = Path(args.source_dir)
    dest_path: Path = Path(args.dest_dir)

    # Assume the cleaning up is done before calling the script
    assert src_path.exists()
    assert not dest_path.exists()

    # Get all the file names, avoid the temporal ones
    nii_paths: list[Path] = [p for p in src_path.rglob('*.mhd')]
    assert len(
        nii_paths) % 2 == 0, "Uneven number of .nii, one+ pair is broken"

    # We sort now, but also id matching is checked while iterating later on
    img_nii_paths: list[Path] = sorted(p for p in nii_paths
                                       if "_segmentation" not in str(p))
    gt_nii_paths: list[Path] = sorted(p for p in nii_paths
                                      if "_segmentation" in str(p))
    assert len(img_nii_paths) == len(
        gt_nii_paths) == 50  # Hardcode that value for sanity test
    paths: list[Tuple[Path, Path]] = list(zip(img_nii_paths, gt_nii_paths))

    print(f"Found {len(img_nii_paths)} pairs in total")
    pprint(paths[:5])

    pids: list[str] = sorted(set(map_(get_p_id, img_nii_paths)))
    # Sanity test: there is two scans per patients: we don't want to mix them up
    assert len(pids) == len(img_nii_paths), (len(pids), len(img_nii_paths))

    random.shuffle(
        pids
    )  # Shuffle before to avoid any problem if the patients are sorted in any way
    fold_size: int = args.retains + args.retains_test
    offset: int = args.fold * fold_size
    # offset by (fold_size) at the beginning
    validation_slice = slice(offset, offset + args.retains)
    # offset by (fold_size + val_retains) at the beginning)
    test_slice = slice(offset + args.retains,
                       offset + args.retains + args.retains_test)

    validation_pids: list[str] = pids[validation_slice]
    test_pids: list[str] = pids[test_slice]
    training_pids: list[str] = [
        pid for pid in pids
        if (pid not in validation_pids) and (pid not in test_pids)
    ]

    assert len(validation_pids) == args.retains
    assert (len(validation_pids) + len(training_pids) +
            len(test_pids)) == len(pids)
    assert set(validation_pids).union(training_pids).union(test_pids) == set(
        pids)
    assert set(validation_pids).isdisjoint(training_pids)
    assert set(validation_pids).isdisjoint(test_pids)
    assert set(test_pids).isdisjoint(training_pids)

    # assert len(test_pids) == args.retains_test

    validation_paths: list[Tuple[Path, Path]] = [
        p for p in paths if get_p_id(p[0]) in validation_pids
    ]
    test_paths: list[Tuple[Path, Path]] = [
        p for p in paths if get_p_id(p[0]) in test_pids
    ]
    training_paths: list[Tuple[Path, Path]] = [
        p for p in paths if get_p_id(p[0]) in training_pids
    ]

    # redundant sanity, but you never know
    assert set(validation_paths).isdisjoint(set(training_paths))
    assert set(validation_paths).isdisjoint(set(test_paths))
    assert set(test_paths).isdisjoint(set(training_paths))
    assert len(paths) == (len(validation_paths) + len(training_paths) +
                          len(test_paths))
    assert len(validation_paths) == args.retains
    assert len(test_paths) == args.retains_test
    assert len(training_paths) == (len(paths) - fold_size)

    for mode, _paths, n_augment in zip(
        ["train", "val", "test"],
        [training_paths, validation_paths, test_paths],
        [args.n_augment, 0, 0]):
        img_paths, gt_paths = zip(*_paths)  # type: Tuple[Any, Any]

        dest_dir = Path(dest_path, mode)
        print(f"Slicing {len(img_paths)} pairs to {dest_dir}")
        assert len(img_paths) == len(gt_paths)

        pfun = partial(save_slices,
                       dest_dir=dest_dir,
                       shape=args.shape,
                       n_augment=n_augment)
        sizes = mmap_(uc_(pfun), zip(img_paths, gt_paths))
        # sizes = []
        # for paths in tqdm(list(zip(img_paths, gt_paths)), ncols=50):
        #     sizes.append(uc_(pfun)(paths))
        sizes_3d, sizes_2d_min, sizes_2d_max = map_(np.asarray, zip(*sizes))

        print("2d sizes: ", sizes_2d_min.min(), sizes_2d_max.max())
        print("3d sizes: ", sizes_3d.min(), sizes_3d.mean(), sizes_3d.max())
def main(args: argparse.Namespace):
    train_path: Path = Path(os.path.join(args.source_dir, 'train'))
    val_path: Path = Path(os.path.join(args.source_dir, 'val'))
    dest_path: Path = Path(args.dest_dir)

    # Assume the cleaning up is done before calling the script
    assert train_path.exists() and val_path.exists()
    assert not dest_path.exists()

    # Get all the file names, avoid the temporal ones in the training directory
    all_paths_train: List[Path] = list(train_path.rglob('*.nii.gz'))
    nii_paths_train: List[Path] = [
        p for p in all_paths_train if "_4D" not in str(p)
    ]
    assert len(
        nii_paths_train
    ) % 2 == 0, "Number of .nii not multiple of 6, some pairs are broken"

    # Get all the file names, avoid the temporal ones in the validation directory
    all_paths_val: List[Path] = list(val_path.rglob('*.nii.gz'))
    nii_paths_val: List[Path] = [
        p for p in all_paths_val if "_4D" not in str(p)
    ]
    assert len(
        nii_paths_val
    ) % 2 == 0, "Number of .nii not multiple of 2, some pairs GT/CT are broken"

    # For training
    IMG_nii_paths_train: List[Path] = sorted(p for p in nii_paths_train
                                             if "imagesTr" in str(p))
    gt_nii_paths_train: List[Path] = sorted(p for p in nii_paths_train
                                            if "labelsTr" in str(p))
    assert len(IMG_nii_paths_train) == len(gt_nii_paths_train)
    paths_train: List[Tuple[Path, ...]] = list(
        zip(IMG_nii_paths_train, gt_nii_paths_train))

    # For validation
    IMG_nii_paths_val: List[Path] = sorted(p for p in nii_paths_val
                                           if "imagesTr" in str(p))
    gt_nii_paths_val: List[Path] = sorted(p for p in nii_paths_val
                                          if "labelsTr" in str(p))
    assert len(IMG_nii_paths_val) == len(gt_nii_paths_val)
    paths_val: List[Tuple[Path,
                          ...]] = list(zip(IMG_nii_paths_val,
                                           gt_nii_paths_val))

    print(f"Found training {len(IMG_nii_paths_train)} pairs in total")
    pprint(paths_train[:2])

    print(f"Found training {len(IMG_nii_paths_val)} pairs in total")
    pprint(paths_val[:2])

    validation_paths: List[Tuple[Path, ...]] = [p for p in paths_val]
    training_paths: List[Tuple[Path, ...]] = [p for p in paths_train]
    assert set(validation_paths).isdisjoint(set(training_paths))
    #len(paths) == (len(validation_paths) + len(training_paths))

    for mode, _paths, n_augment in zip(["train", "val"],
                                       [training_paths, validation_paths],
                                       [args.n_augment, 0]):
        three_paths = list(zip(*_paths))

        dest_dir = Path(dest_path, mode)
        print(f"Slicing {len(three_paths[0])} pairs to {dest_dir}")
        assert len(set(map_(len, three_paths))) == 1

        pfun = partial(save_slices,
                       dest_dir=dest_dir,
                       shape=args.shape,
                       n_augment=n_augment,
                       discard_negatives=args.discard_negatives)
        sizes = mmap_(uc_(pfun), zip(*three_paths))
        all_neg, all_pos, space_dicts = zip(*sizes)
        neg, pos = sum(all_neg), sum(all_pos)
        ratio = pos / neg
        print(f"Ratio between pos/neg: {ratio} ({pos}/{neg})")

        final_dict = {
            k: v
            for space_dict in space_dicts for k, v in space_dict.items()
        }

        with open(Path(dest_dir, "spacing.pkl"), 'wb') as f:
            pickle.dump(final_dict, f, pickle.HIGHEST_PROTOCOL)
            print(f"Saved spacing dictionnary to {f}")
Exemplo n.º 13
0
def main(args: argparse.Namespace):
    src_path: Path = Path(args.source_dir)
    dest_path: Path = Path(args.dest_dir)

    # Assume the cleaning up is done before calling the script
    assert src_path.exists()
    assert not dest_path.exists()

    # Get all the file names, avoid the temporal ones
    nii_paths: list[Path] = [
        p for p in src_path.rglob('*.nii.gz') if "_4d" not in str(p)
    ]
    assert len(
        nii_paths) % 2 == 0, "Uneven number of .nii, one+ pair is broken"

    # We sort now, but also id matching is checked while iterating later on
    img_nii_paths: list[Path] = sorted(p for p in nii_paths
                                       if "_gt" not in str(p))
    gt_nii_paths: list[Path] = sorted(p for p in nii_paths if "_gt" in str(p))
    assert len(img_nii_paths) == len(
        gt_nii_paths) == 200  # Hardcode that value for sanity test
    paths: list[Tuple[Path, Path]] = list(zip(img_nii_paths, gt_nii_paths))

    print(f"Found {len(img_nii_paths)} pairs in total")
    pprint(paths[:5])

    pids: list[str] = sorted(set(map_(get_p_id, img_nii_paths)))
    # Sanity test: there is two scans per patients: we don't want to mix them up
    assert len(pids) == (len(img_nii_paths) // 2), (len(pids),
                                                    len(img_nii_paths))

    random.shuffle(
        pids
    )  # Shuffle before to avoid any problem if the patients are sorted in any way
    fold_size: int = args.retains + args.retains_test
    offset: int = args.fold * fold_size
    # offset by (fold_size) at the beginning
    validation_slice = slice(offset, offset + args.retains)
    # offset by (fold_size + val_retains) at the beginning)
    test_slice = slice(offset + args.retains,
                       offset + args.retains + args.retains_test)

    validation_pids: list[str] = pids[validation_slice]
    test_pids: list[str] = pids[test_slice]
    training_pids: list[str] = [
        pid for pid in pids
        if (pid not in validation_pids) and (pid not in test_pids)
    ]

    assert len(validation_pids) == args.retains
    assert (len(validation_pids) + len(training_pids) +
            len(test_pids)) == len(pids)
    assert set(validation_pids).union(training_pids).union(test_pids) == set(
        pids)
    assert set(validation_pids).isdisjoint(training_pids)
    assert set(validation_pids).isdisjoint(test_pids)
    assert set(test_pids).isdisjoint(training_pids)

    # assert len(test_pids) == args.retains_test

    validation_paths: list[Tuple[Path, Path]] = [
        p for p in paths if get_p_id(p[0]) in validation_pids
    ]
    test_paths: list[Tuple[Path, Path]] = [
        p for p in paths if get_p_id(p[0]) in test_pids
    ]
    training_paths: list[Tuple[Path, Path]] = [
        p for p in paths if get_p_id(p[0]) in training_pids
    ]

    # redundant sanity, but you never know
    assert set(validation_paths).isdisjoint(set(training_paths))
    assert set(validation_paths).isdisjoint(set(test_paths))
    assert set(test_paths).isdisjoint(set(training_paths))
    assert len(paths) == (len(validation_paths) + len(training_paths) +
                          len(test_paths))
    assert len(validation_paths) == 2 * args.retains
    assert len(test_paths) == 2 * args.retains_test
    assert len(training_paths) == (len(paths) - 2 * fold_size)

    for mode, _paths, n_augment in zip(
        ["train", "val", "test"],
        [training_paths, validation_paths, test_paths],
        [args.n_augment, 0, 0]):
        img_paths, gt_paths = zip(*_paths)  # type: Tuple[Any, Any]

        dest_dir = Path(dest_path, mode)
        print(f"Slicing {len(img_paths)} pairs to {dest_dir}")
        assert len(img_paths) == len(gt_paths)

        pfun = partial(save_slices,
                       dest_dir=dest_dir,
                       shape=args.shape,
                       n_augment=n_augment)
        all_sizes = mmap_(uc_(pfun), zip(img_paths, gt_paths))
        # for paths in tqdm(list(zip(img_paths, gt_paths)), ncols=50):
        #     uc_(pfun)(paths)

        all_slices_sizes_px, all_slices_sizes_mm2, all_volume_size_px, all_volume_size_mm3 = zip(
            *all_sizes)

        flat_sizes_px = flatten_(all_slices_sizes_px)
        flat_sizes_mm2 = flatten_(all_slices_sizes_mm2)
        print("px", len(flat_sizes_px), min(flat_sizes_px), max(flat_sizes_px))
        print('\t', "px 5/95", np.percentile(flat_sizes_px, 5),
              np.percentile(flat_sizes_px, 95))
        print('\t', "mm2", f"{min(flat_sizes_mm2):.02f}",
              f"{max(flat_sizes_mm2):.02f}")

        _, axes = plt.subplots(nrows=2, ncols=2)
        axes = axes.flatten()

        axes[0].set_title("Slice surface (pixel)")
        axes[0].boxplot(all_slices_sizes_px, whis=[0, 100])

        axes[1].set_title("Slice surface (mm2)")
        axes[1].boxplot(all_slices_sizes_mm2, whis=[0, 100])

        axes[2].set_title("LV volume (pixel)")
        axes[2].hist(all_volume_size_px, bins=len(all_volume_size_px) // 2)

        axes[3].set_title("LV volume (mm3)")
        axes[3].hist(all_volume_size_mm3, bins=len(all_volume_size_px) // 2)