def convert_raw(task, overwrite, ov): task_name_full = get_task(task, name=True) task_num, task_name = task_name_full[4:].split('_', 1) new_task_name_full = f"Task{task_num}FG_{task_name}" cfg = compose(task, "config.yaml", overrides=ov if ov is not None else []) print(cfg.pretty()) source_splitted_dir = Path(cfg["host"]["splitted_4d_output_dir"]) target_splitted_dir = Path(str(source_splitted_dir).replace(task_name_full, new_task_name_full)) if target_splitted_dir.is_dir() and overwrite: shutil.rmtree(target_splitted_dir) target_splitted_dir.mkdir(parents=True) logger.remove() logger.add(sys.stdout, level="INFO") logger.add(target_splitted_dir.parent / "convert_cls2fg.log", level="DEBUG") # update dataset_info source_data_info = Path(cfg["host"]["data_dir"]) data_info = load_dataset_info(source_data_info) data_info.pop("labels") data_info["labels"] = {"0": "fg"} data_info["task"] = new_task_name_full save_json(data_info, target_splitted_dir.parent / "dataset.json", indent=4) for postfix in ["Tr", "Ts"]: source_image_dir = source_splitted_dir / f"images{postfix}" source_label_dir = source_splitted_dir / f"labels{postfix}" if not source_image_dir.is_dir(): logger.info(f"{source_image_dir} is not a dir. Skipping it.") continue # copy images and labels shutil.copytree(source_image_dir, target_splitted_dir / f"images{postfix}") shutil.copytree(source_label_dir, target_splitted_dir / f"labels{postfix}") # remap properties file to foreground class target_label_dir = target_splitted_dir / f"labels{postfix}" for f in [l for l in target_label_dir.glob("*.json")]: props = load_json(f) props["instances"] = {key: 0 for key in props["instances"].keys()} save_json(props, f)
def _full_check( case_paths: List[Path], mask_info_path: Optional[Path] = None, ) -> None: """ Performas itk and instance chekcs on provided paths Args: case_paths: paths to all itk images to check properties if label is provided it needs to be at the last position mask_info_path: optionally check label properties. If None, no check of label properties will be performed. Raises: ValueError: Inconsistent instances in label info and label image See also: :func:`_check_itk_params` """ img_itk_seq = [load_sitk(cp) for cp in case_paths] _check_itk_params(img_itk_seq, case_paths) if mask_info_path is not None: mask_itk = img_itk_seq[-1] mask_info = load_json(mask_info_path) info_instances = list(map(int, mask_info["instances"].keys())) mask_instances = np.unique(sitk.GetArrayViewFromImage(mask_itk)) mask_instances = mask_instances[mask_instances > 0] for mi in mask_instances: if not mi in info_instances: raise ValueError(f"Found instance ID {mi} in mask which is " f"not present in info {info_instances} in {mask_info_path}") if not len(info_instances) == len(mask_instances): raise ValueError("Found instances in info which are not present in mask: " f"mask: {mask_instances} info {info_instances} in {mask_info_path}")
def nnunet_dataset_json(nnunet_task: str): if (p := os.getenv("nnUNet_raw_data_base")) is not None: search_dir = Path(p) / "nnUNet_raw_data" / nnunet_task logger.info(f"Looking for dataset.json in {search_dir}") if (fp := search_dir / "dataset.json").is_file(): return load_json(fp)
if save_seg: segmentation = {"pred_seg": np.argmax(probs, axis=0)} save_pickle(segmentation, segmentation_target) def nnunet_dataset_json(nnunet_task: str): if (p := os.getenv("nnUNet_raw_data_base")) is not None: search_dir = Path(p) / "nnUNet_raw_data" / nnunet_task logger.info(f"Looking for dataset.json in {search_dir}") if (fp := search_dir / "dataset.json").is_file(): return load_json(fp) elif (p := os.getenv("nnUNet_preprocessed")) is not None: search_dir = Path(p) / nnunet_task logger.info(f"Looking for dataset.json in {search_dir}") if (fp := search_dir / "dataset.json").is_file(): return load_json(fp) else: raise ValueError("Was not able to find nnunet dataset.json") def copy_and_ensemble(cid, nnunet_dirs, nnunet_prediction_dir): logger.info(f"Copy and ensemble: {cid}") case = [ np.load(_nnunet_dir / f"fold_{fold}" / "validation_raw" / f"{cid}.npz")["softmax"] for _nnunet_dir in nnunet_dirs ] assert len(case) == len(nnunet_dirs) case_ensemble = np.mean(case, axis=0) assert case_ensemble.shape == case[0].shape np.savez_compressed(nnunet_prediction_dir / f"{cid}.npz",
def main(): parser = argparse.ArgumentParser() parser.add_argument( 'tasks', type=str, nargs='+', help="One or multiple of: Task003_Liver, Task007_Pancreas, " "Task008_HepaticVessel, Task010_Colon", ) args = parser.parse_args() tasks = args.tasks decathlon_props = { "Task003_Liver": { "seg2det_stuff": [ 1, ], # liver "seg2det_things": [ 2, ], # cancer "min_size": 3., "labels": { "0": "cancer" }, "labels_stuff": { "1": "liver" }, }, "Task007_Pancreas": { "seg2det_stuff": [ 1, ], # pancreas "seg2det_things": [ 2, ], "min_size": 3., "labels": { "0": "cancer" }, "labels_stuff": { "1": "pancreas" }, }, "Task008_HepaticVessel": { "seg2det_stuff": [ 1, ], # vessel "seg2det_things": [ 2, ], "min_size": 3., "labels": { "0": "tumour" }, "labels_stuff": { "1": "vessel" }, }, "Task010_Colon": { "seg2det_stuff": [], "seg2det_things": [ 1, ], "min_size": 3., "labels": { "0": "cancer" }, "labels_stuff": {}, }, } basedir = Path(os.getenv('det_data')) for task in tasks: task_data_dir = basedir / task logger.remove() logger.add(sys.stdout, level="INFO") logger.add(task_data_dir / "prepare.log", level="DEBUG") logger.info(f"Preparing task: {task}") source_raw_dir = task_data_dir / "raw" source_data_dir = source_raw_dir / "imagesTr" source_labels_dir = source_raw_dir / "labelsTr" splitted_dir = task_data_dir / "raw_splitted" if not source_data_dir.is_dir(): raise ValueError(f"Exptected training images at {source_data_dir}") if not source_labels_dir.is_dir(): raise ValueError( f"Exptected training labels at {source_labels_dir}") if not (p := source_raw_dir / "dataset.json").is_file(): raise ValueError(f"Expected dataset json to be located at {p}") target_data_dir = splitted_dir / "imagesTr" target_label_dir = splitted_dir / "labelsTr" target_data_dir.mkdir(parents=True, exist_ok=True) target_label_dir.mkdir(parents=True, exist_ok=True) # preapre meta original_meta = load_json(source_raw_dir / "dataset.json") dataset_info = { "task": task, "name": original_meta["name"], "target_class": None, "test_labels": True, "modalities": original_meta["modality"], "dim": 3, "info": { "original_labels": original_meta["labels"], "original_numTraining": original_meta["numTraining"], }, } dataset_info.update(decathlon_props[task]) save_json(dataset_info, task_data_dir / "dataset.json") # prepare data and labels case_ids = get_case_ids_from_dir(source_data_dir, remove_modality=False) case_ids = sorted([c for c in case_ids if c]) logger.info(f"Found {len(case_ids)} for preparation.") for cid in maybe_verbose_iterable(case_ids): process_case( cid, source_data_dir, source_labels_dir, target_data_dir, target_label_dir, ) # with Pool(processes=6) as p: # p.starmap(process_case, zip(case_ids, # repeat(source_images), # repeat(source_labels), # repeat(target_images), # repeat(target_labels), # )) # create an artificial test split create_test_split( splitted_dir=splitted_dir, num_modalities=1, test_size=0.3, random_state=0, shuffle=True, )
def check_data_and_label_splitted( task_name: str, test: bool = False, labels: bool = True, full_check: bool = True, ): """ Perform checks of data and label in raw splitted format Args: task_name: name of task to check test: check test data labels: check labels full_check: Per default a full check will be performed which needs to load all files. If this is disabled, a computationall light check will be performed Raises: ValueError: if not all raw splitted files were found ValueError: missing label info file ValueError: instances in label info file need to start at 1 ValueError: instances in label info file need to be consecutive """ print("Start data and label check.") cfg = load_dataset_info(get_task(task_name)) splitted_paths = get_paths_from_splitted_dir( num_modalities=len(cfg["modalities"]), splitted_4d_output_dir=Path(os.getenv('det_data')) / task_name / "raw_splitted", labels=labels, test=test, ) for case_paths in maybe_verbose_iterable(splitted_paths): # check all files exist for cp in case_paths: if not Path(cp).is_file(): raise ValueError(f"Expected {cp} to be a raw splitted " "data path but it does not exist.") if labels: # check label info (json files) mask_path = case_paths[-1] mask_info_path = mask_path.parent / f"{mask_path.stem.split('.')[0]}.json" if not Path(mask_info_path).is_file(): raise ValueError(f"Expected {mask_info_path} to be a raw splitted " "mask info path but it does not exist.") mask_info = load_json(mask_info_path) if mask_info["instances"]: mask_info_instances = list(map(int, mask_info["instances"].keys())) if j := not min(mask_info_instances) == 1: raise ValueError(f"Instance IDs need to start at 1, found {j} in {mask_info_path}") for i in range(1, len(mask_info_instances) + 1): if i not in mask_info_instances: raise ValueError(f"Exptected {i} to be an Instance ID in " f"{mask_info_path} but only found {mask_info_instances}") else: mask_info_path = None if full_check: _full_check(case_paths, mask_info_path)