Exemplo n.º 1
0
def main(args: argparse.Namespace):
    src_path: Path = Path(args.source_dir)
    dest_path: Path = Path(args.dest_dir)

    # Assume the cleaning up is done before calling the script
    assert src_path.exists()
    assert not dest_path.exists()

    # Get all the file names, avoid the temporal ones
    nii_paths: List[Path] = [p for p in src_path.rglob('*.nii.gz') if "_4d" not in str(p)]
    assert len(nii_paths) % 2 == 0, "Uneven number of .nii, one+ pair is broken"

    # We sort now, but also id matching is checked while iterating later on
    img_nii_paths: List[Path] = sorted(p for p in nii_paths if "_gt" not in str(p))
    gt_nii_paths: List[Path] = sorted(p for p in nii_paths if "_gt" in str(p))
    assert len(img_nii_paths) == len(gt_nii_paths)
    paths: List[Tuple[Path, Path]] = list(zip(img_nii_paths, gt_nii_paths))

    print(f"Found {len(img_nii_paths)} pairs in total")
    pprint(paths[:5])

    validation_paths: List[Tuple[Path, Path]] = random.sample(paths, args.retain)
    training_paths: List[Tuple[Path, Path]] = [p for p in paths if p not in validation_paths]
    assert set(validation_paths).isdisjoint(set(training_paths))
    assert len(paths) == (len(validation_paths) + len(training_paths))

    for mode, _paths, n_augment in zip(["train", "val"], [training_paths, validation_paths], [args.n_augment, 0]):
        img_paths, gt_paths = zip(*_paths)  # type: Tuple[Any, Any]

        dest_dir = Path(dest_path, mode)
        print(f"Slicing {len(img_paths)} pairs to {dest_dir}")
        assert len(img_paths) == len(gt_paths)

        pfun = partial(save_slices, dest_dir=dest_dir, shape=args.shape, n_augment=n_augment)
        all_sizes = mmap_(uc_(pfun), zip(img_paths, gt_paths))
        # for paths in tqdm(list(zip(img_paths, gt_paths)), ncols=50):
        #     uc_(pfun)(paths)

        all_slices_sizes_px, all_slices_sizes_mm2, all_volume_size_px, all_volume_size_mm3 = zip(*all_sizes)

        flat_sizes_px = flatten_(all_slices_sizes_px)
        flat_sizes_mm2 = flatten_(all_slices_sizes_mm2)
        print("px", len(flat_sizes_px), min(flat_sizes_px), max(flat_sizes_px))
        print('\t', "px 5/95", np.percentile(flat_sizes_px, 5), np.percentile(flat_sizes_px, 95))
        print('\t', "mm2", f"{min(flat_sizes_mm2):.02f}", f"{max(flat_sizes_mm2):.02f}")

        _, axes = plt.subplots(nrows=2, ncols=2)
        axes = axes.flatten()

        axes[0].set_title("Slice surface (pixel)")
        axes[0].boxplot(all_slices_sizes_px, whis=[0, 100])

        axes[1].set_title("Slice surface (mm2)")
        axes[1].boxplot(all_slices_sizes_mm2, whis=[0, 100])

        axes[2].set_title("LV volume (pixel)")
        axes[2].hist(all_volume_size_px, bins=len(all_volume_size_px) // 2)

        axes[3].set_title("LV volume (mm3)")
        axes[3].hist(all_volume_size_mm3, bins=len(all_volume_size_px) // 2)
Exemplo n.º 2
0
def main(args: argparse.Namespace):
    src_path: Path = Path(args.source_dir)
    dest_path: Path = Path(args.dest_dir)

    # Assume the cleaning up is done before calling the script
    assert src_path.exists()
    assert not dest_path.exists()

    # Get all the file names, avoid the temporal ones
    nii_paths: List[Path] = [p for p in src_path.rglob('*.nii')]
    assert len(
        nii_paths) % 2 == 0, "Uneven number of .nii, one+ pair is broken"

    # We sort now, but also id matching is checked while iterating later on
    img_nii_paths: List[Path] = sorted(p for p in nii_paths
                                       if "_Labels" not in str(p))
    gt_nii_paths: List[Path] = sorted(p for p in nii_paths
                                      if "_Labels" in str(p))
    assert len(img_nii_paths) == len(gt_nii_paths)
    paths: List[Tuple[Path, Path]] = list(zip(img_nii_paths, gt_nii_paths))

    print(f"Found {len(img_nii_paths)} pairs in total")
    pprint(paths[:5])

    validation_paths: List[Tuple[Path,
                                 Path]] = random.sample(paths, args.retain)
    training_paths: List[Tuple[Path, Path]] = [
        p for p in paths if p not in validation_paths
    ]
    assert set(validation_paths).isdisjoint(set(training_paths))
    assert len(paths) == (len(validation_paths) + len(training_paths))

    for mode, _paths in zip(["train", "val"],
                            [training_paths, validation_paths]):
        img_paths, gt_paths = zip(*_paths)  # type: Tuple[Any, Any]

        dest_dir = Path(dest_path, mode)
        print(f"Slicing {len(img_paths)} pairs to {dest_dir}")
        assert len(img_paths) == len(gt_paths)

        pfun = partial(process_patient,
                       dest_dir=dest_dir,
                       shape=args.shape,
                       cr=args.crop)
        sizess = mmap_(uc_(pfun), zip(img_paths, gt_paths))
        # for paths in tqdm(list(zip(img_paths, gt_paths)), ncols=50):
        #     uc_(pfun)(paths)

        all_sizes = np.array(flatten_(sizess))
        all_pos = all_sizes[all_sizes > 0]

        print(
            f"sizes: min={np.min(all_pos)}, 5th={np.percentile(all_pos, 5):0.02f}, median={np.median(all_pos):0.0f}, "
            +
            f"mean={np.mean(all_pos):0.02f}, 95th={np.percentile(all_pos, 95):0.02f}, max={np.max(all_pos)}"
        )
Exemplo n.º 3
0
def main(args: argparse.Namespace):
    src_path: Path = Path(args.source_dir)
    dest_path: Path = Path(args.dest_dir)

    # Assume the cleaning up is done before calling the script
    assert src_path.exists()
    assert not dest_path.exists()

    # Get all the file names, avoid the temporal ones
    nii_paths: list[Path] = [
        p for p in src_path.rglob('*.nii.gz') if "_4d" not in str(p)
    ]
    assert len(
        nii_paths) % 2 == 0, "Uneven number of .nii, one+ pair is broken"

    # We sort now, but also id matching is checked while iterating later on
    img_nii_paths: list[Path] = sorted(p for p in nii_paths
                                       if "_gt" not in str(p))
    gt_nii_paths: list[Path] = sorted(p for p in nii_paths if "_gt" in str(p))
    assert len(img_nii_paths) == len(
        gt_nii_paths) == 200  # Hardcode that value for sanity test
    paths: list[Tuple[Path, Path]] = list(zip(img_nii_paths, gt_nii_paths))

    print(f"Found {len(img_nii_paths)} pairs in total")
    pprint(paths[:5])

    pids: list[str] = sorted(set(map_(get_p_id, img_nii_paths)))
    # Sanity test: there is two scans per patients: we don't want to mix them up
    assert len(pids) == (len(img_nii_paths) // 2), (len(pids),
                                                    len(img_nii_paths))

    random.shuffle(
        pids
    )  # Shuffle before to avoid any problem if the patients are sorted in any way
    fold_size: int = args.retains + args.retains_test
    offset: int = args.fold * fold_size
    # offset by (fold_size) at the beginning
    validation_slice = slice(offset, offset + args.retains)
    # offset by (fold_size + val_retains) at the beginning)
    test_slice = slice(offset + args.retains,
                       offset + args.retains + args.retains_test)

    validation_pids: list[str] = pids[validation_slice]
    test_pids: list[str] = pids[test_slice]
    training_pids: list[str] = [
        pid for pid in pids
        if (pid not in validation_pids) and (pid not in test_pids)
    ]

    assert len(validation_pids) == args.retains
    assert (len(validation_pids) + len(training_pids) +
            len(test_pids)) == len(pids)
    assert set(validation_pids).union(training_pids).union(test_pids) == set(
        pids)
    assert set(validation_pids).isdisjoint(training_pids)
    assert set(validation_pids).isdisjoint(test_pids)
    assert set(test_pids).isdisjoint(training_pids)

    # assert len(test_pids) == args.retains_test

    validation_paths: list[Tuple[Path, Path]] = [
        p for p in paths if get_p_id(p[0]) in validation_pids
    ]
    test_paths: list[Tuple[Path, Path]] = [
        p for p in paths if get_p_id(p[0]) in test_pids
    ]
    training_paths: list[Tuple[Path, Path]] = [
        p for p in paths if get_p_id(p[0]) in training_pids
    ]

    # redundant sanity, but you never know
    assert set(validation_paths).isdisjoint(set(training_paths))
    assert set(validation_paths).isdisjoint(set(test_paths))
    assert set(test_paths).isdisjoint(set(training_paths))
    assert len(paths) == (len(validation_paths) + len(training_paths) +
                          len(test_paths))
    assert len(validation_paths) == 2 * args.retains
    assert len(test_paths) == 2 * args.retains_test
    assert len(training_paths) == (len(paths) - 2 * fold_size)

    for mode, _paths, n_augment in zip(
        ["train", "val", "test"],
        [training_paths, validation_paths, test_paths],
        [args.n_augment, 0, 0]):
        img_paths, gt_paths = zip(*_paths)  # type: Tuple[Any, Any]

        dest_dir = Path(dest_path, mode)
        print(f"Slicing {len(img_paths)} pairs to {dest_dir}")
        assert len(img_paths) == len(gt_paths)

        pfun = partial(save_slices,
                       dest_dir=dest_dir,
                       shape=args.shape,
                       n_augment=n_augment)
        all_sizes = mmap_(uc_(pfun), zip(img_paths, gt_paths))
        # for paths in tqdm(list(zip(img_paths, gt_paths)), ncols=50):
        #     uc_(pfun)(paths)

        all_slices_sizes_px, all_slices_sizes_mm2, all_volume_size_px, all_volume_size_mm3 = zip(
            *all_sizes)

        flat_sizes_px = flatten_(all_slices_sizes_px)
        flat_sizes_mm2 = flatten_(all_slices_sizes_mm2)
        print("px", len(flat_sizes_px), min(flat_sizes_px), max(flat_sizes_px))
        print('\t', "px 5/95", np.percentile(flat_sizes_px, 5),
              np.percentile(flat_sizes_px, 95))
        print('\t', "mm2", f"{min(flat_sizes_mm2):.02f}",
              f"{max(flat_sizes_mm2):.02f}")

        _, axes = plt.subplots(nrows=2, ncols=2)
        axes = axes.flatten()

        axes[0].set_title("Slice surface (pixel)")
        axes[0].boxplot(all_slices_sizes_px, whis=[0, 100])

        axes[1].set_title("Slice surface (mm2)")
        axes[1].boxplot(all_slices_sizes_mm2, whis=[0, 100])

        axes[2].set_title("LV volume (pixel)")
        axes[2].hist(all_volume_size_px, bins=len(all_volume_size_px) // 2)

        axes[3].set_title("LV volume (mm3)")
        axes[3].hist(all_volume_size_mm3, bins=len(all_volume_size_px) // 2)