Пример #1
0
def convert_raw(task, overwrite, ov):
    task_name_full = get_task(task, name=True)
    task_num, task_name = task_name_full[4:].split('_', 1)
    new_task_name_full = f"Task{task_num}FG_{task_name}"

    cfg = compose(task, "config.yaml", overrides=ov if ov is not None else [])
    print(cfg.pretty())

    source_splitted_dir = Path(cfg["host"]["splitted_4d_output_dir"])
    target_splitted_dir = Path(str(source_splitted_dir).replace(task_name_full, new_task_name_full))
    if target_splitted_dir.is_dir() and overwrite:
        shutil.rmtree(target_splitted_dir)
    target_splitted_dir.mkdir(parents=True)

    logger.remove()
    logger.add(sys.stdout, level="INFO")
    logger.add(target_splitted_dir.parent / "convert_cls2fg.log", level="DEBUG")

    # update dataset_info
    source_data_info = Path(cfg["host"]["data_dir"])
    data_info = load_dataset_info(source_data_info)
    data_info.pop("labels")
    data_info["labels"] = {"0": "fg"}
    data_info["task"] = new_task_name_full
    save_json(data_info, target_splitted_dir.parent / "dataset.json", indent=4)

    for postfix in ["Tr", "Ts"]:
        source_image_dir = source_splitted_dir / f"images{postfix}"
        source_label_dir = source_splitted_dir / f"labels{postfix}"

        if not source_image_dir.is_dir():
            logger.info(f"{source_image_dir} is not a dir. Skipping it.")
            continue

        # copy images and labels
        shutil.copytree(source_image_dir, target_splitted_dir / f"images{postfix}")
        shutil.copytree(source_label_dir, target_splitted_dir / f"labels{postfix}")

        # remap properties file to foreground class
        target_label_dir = target_splitted_dir / f"labels{postfix}"
        for f in [l for l in target_label_dir.glob("*.json")]:
            props = load_json(f)
            props["instances"] = {key: 0 for key in props["instances"].keys()}
            save_json(props, f)
Пример #2
0
def _full_check(
    case_paths: List[Path],
    mask_info_path: Optional[Path] = None,
    ) -> None:
    """
    Performas itk and instance chekcs on provided paths

    Args:
        case_paths: paths to all itk images to check properties
            if label is provided it needs to be at the last position
        mask_info_path: optionally check label properties. If None, no
            check of label properties will be performed.

    Raises:
        ValueError: Inconsistent instances in label info and label image

    See also:
        :func:`_check_itk_params`
    """
    img_itk_seq = [load_sitk(cp) for cp in case_paths]
    _check_itk_params(img_itk_seq, case_paths)

    if mask_info_path is not None:
        mask_itk = img_itk_seq[-1]
        mask_info = load_json(mask_info_path)
        info_instances = list(map(int, mask_info["instances"].keys()))
        mask_instances = np.unique(sitk.GetArrayViewFromImage(mask_itk))
        mask_instances = mask_instances[mask_instances > 0]

        for mi in mask_instances:
            if not mi in info_instances:
                raise ValueError(f"Found instance ID {mi} in mask which is "
                                f"not present in info {info_instances} in {mask_info_path}")
        if not len(info_instances) == len(mask_instances):
            raise ValueError("Found instances in info which are not present in mask: "
                            f"mask: {mask_instances} info {info_instances} in {mask_info_path}")
Пример #3
0
def nnunet_dataset_json(nnunet_task: str):
    if (p := os.getenv("nnUNet_raw_data_base")) is not None:
        search_dir = Path(p) / "nnUNet_raw_data" / nnunet_task
        logger.info(f"Looking for dataset.json in {search_dir}")
        if (fp := search_dir / "dataset.json").is_file():
            return load_json(fp)
Пример #4
0
    if save_seg:
        segmentation = {"pred_seg": np.argmax(probs, axis=0)}
        save_pickle(segmentation, segmentation_target)


def nnunet_dataset_json(nnunet_task: str):
    if (p := os.getenv("nnUNet_raw_data_base")) is not None:
        search_dir = Path(p) / "nnUNet_raw_data" / nnunet_task
        logger.info(f"Looking for dataset.json in {search_dir}")
        if (fp := search_dir / "dataset.json").is_file():
            return load_json(fp)
    elif (p := os.getenv("nnUNet_preprocessed")) is not None:
        search_dir = Path(p) / nnunet_task
        logger.info(f"Looking for dataset.json in {search_dir}")
        if (fp := search_dir / "dataset.json").is_file():
            return load_json(fp)
    else:
        raise ValueError("Was not able to find nnunet dataset.json")


def copy_and_ensemble(cid, nnunet_dirs, nnunet_prediction_dir):
    logger.info(f"Copy and ensemble: {cid}")
    case = [
        np.load(_nnunet_dir / f"fold_{fold}" / "validation_raw" /
                f"{cid}.npz")["softmax"] for _nnunet_dir in nnunet_dirs
    ]
    assert len(case) == len(nnunet_dirs)
    case_ensemble = np.mean(case, axis=0)
    assert case_ensemble.shape == case[0].shape

    np.savez_compressed(nnunet_prediction_dir / f"{cid}.npz",
Пример #5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        'tasks',
        type=str,
        nargs='+',
        help="One or multiple of: Task003_Liver, Task007_Pancreas, "
        "Task008_HepaticVessel, Task010_Colon",
    )
    args = parser.parse_args()
    tasks = args.tasks

    decathlon_props = {
        "Task003_Liver": {
            "seg2det_stuff": [
                1,
            ],  # liver
            "seg2det_things": [
                2,
            ],  # cancer
            "min_size": 3.,
            "labels": {
                "0": "cancer"
            },
            "labels_stuff": {
                "1": "liver"
            },
        },
        "Task007_Pancreas": {
            "seg2det_stuff": [
                1,
            ],  # pancreas
            "seg2det_things": [
                2,
            ],
            "min_size": 3.,
            "labels": {
                "0": "cancer"
            },
            "labels_stuff": {
                "1": "pancreas"
            },
        },
        "Task008_HepaticVessel": {
            "seg2det_stuff": [
                1,
            ],  # vessel
            "seg2det_things": [
                2,
            ],
            "min_size": 3.,
            "labels": {
                "0": "tumour"
            },
            "labels_stuff": {
                "1": "vessel"
            },
        },
        "Task010_Colon": {
            "seg2det_stuff": [],
            "seg2det_things": [
                1,
            ],
            "min_size": 3.,
            "labels": {
                "0": "cancer"
            },
            "labels_stuff": {},
        },
    }

    basedir = Path(os.getenv('det_data'))
    for task in tasks:
        task_data_dir = basedir / task

        logger.remove()
        logger.add(sys.stdout, level="INFO")
        logger.add(task_data_dir / "prepare.log", level="DEBUG")
        logger.info(f"Preparing task: {task}")

        source_raw_dir = task_data_dir / "raw"
        source_data_dir = source_raw_dir / "imagesTr"
        source_labels_dir = source_raw_dir / "labelsTr"
        splitted_dir = task_data_dir / "raw_splitted"

        if not source_data_dir.is_dir():
            raise ValueError(f"Exptected training images at {source_data_dir}")
        if not source_labels_dir.is_dir():
            raise ValueError(
                f"Exptected training labels at {source_labels_dir}")
        if not (p := source_raw_dir / "dataset.json").is_file():
            raise ValueError(f"Expected dataset json to be located at {p}")

        target_data_dir = splitted_dir / "imagesTr"
        target_label_dir = splitted_dir / "labelsTr"
        target_data_dir.mkdir(parents=True, exist_ok=True)
        target_label_dir.mkdir(parents=True, exist_ok=True)

        # preapre meta
        original_meta = load_json(source_raw_dir / "dataset.json")

        dataset_info = {
            "task": task,
            "name": original_meta["name"],
            "target_class": None,
            "test_labels": True,
            "modalities": original_meta["modality"],
            "dim": 3,
            "info": {
                "original_labels": original_meta["labels"],
                "original_numTraining": original_meta["numTraining"],
            },
        }
        dataset_info.update(decathlon_props[task])
        save_json(dataset_info, task_data_dir / "dataset.json")

        # prepare data and labels
        case_ids = get_case_ids_from_dir(source_data_dir,
                                         remove_modality=False)
        case_ids = sorted([c for c in case_ids if c])
        logger.info(f"Found {len(case_ids)} for preparation.")

        for cid in maybe_verbose_iterable(case_ids):
            process_case(
                cid,
                source_data_dir,
                source_labels_dir,
                target_data_dir,
                target_label_dir,
            )

        # with Pool(processes=6) as p:
        #     p.starmap(process_case, zip(case_ids,
        #                                 repeat(source_images),
        #                                 repeat(source_labels),
        #                                 repeat(target_images),
        #                                 repeat(target_labels),
        #                                 ))

        # create an artificial test split
        create_test_split(
            splitted_dir=splitted_dir,
            num_modalities=1,
            test_size=0.3,
            random_state=0,
            shuffle=True,
        )
Пример #6
0
def check_data_and_label_splitted(
    task_name: str,
    test: bool = False,
    labels: bool = True,
    full_check: bool = True,
    ):
    """
    Perform checks of data and label in raw splitted format

    Args:
        task_name: name of task to check
        test: check test data
        labels: check labels
        full_check: Per default a full check will be performed which needs to
            load all files. If this is disabled, a computationall light check
            will be performed 

    Raises:
        ValueError: if not all raw splitted files were found
        ValueError: missing label info file
        ValueError: instances in label info file need to start at 1
        ValueError: instances in label info file need to be consecutive
    """
    print("Start data and label check.")
    cfg = load_dataset_info(get_task(task_name))

    splitted_paths = get_paths_from_splitted_dir(
        num_modalities=len(cfg["modalities"]),
        splitted_4d_output_dir=Path(os.getenv('det_data')) / task_name / "raw_splitted",
        labels=labels,
        test=test,
    )

    for case_paths in maybe_verbose_iterable(splitted_paths):
        # check all files exist
        for cp in case_paths:
            if not Path(cp).is_file():
                raise ValueError(f"Expected {cp} to be a raw splitted "
                                 "data path but it does not exist.")

        if labels:
            # check label info (json files)
            mask_path = case_paths[-1]
            mask_info_path = mask_path.parent / f"{mask_path.stem.split('.')[0]}.json"
            if not Path(mask_info_path).is_file():
                raise ValueError(f"Expected {mask_info_path} to be a raw splitted "
                                "mask info path but it does not exist.")
            mask_info = load_json(mask_info_path)
            if mask_info["instances"]:
                mask_info_instances = list(map(int, mask_info["instances"].keys()))

                if j := not min(mask_info_instances) == 1:
                    raise ValueError(f"Instance IDs need to start at 1, found {j} in {mask_info_path}")

                for i in range(1, len(mask_info_instances) + 1):
                    if i not in mask_info_instances:
                        raise ValueError(f"Exptected {i} to be an Instance ID in "
                                        f"{mask_info_path} but only found {mask_info_instances}")
        else:
            mask_info_path = None

        if full_check:
            _full_check(case_paths, mask_info_path)