def main(): det_data_dir = Path(os.getenv('det_data')) task_data_dir = det_data_dir / "Task019FG_ADAM" target_label_dir = task_data_dir / "raw_splitted" / "labelsTr" splits_file_dir = task_data_dir / "preprocessed" splits_file_dir.mkdir(parents=True, exist_ok=True) splits_file = splits_file_dir / "splits_final.pkl" case_ids = sorted( get_case_ids_from_dir(target_label_dir, remove_modality=False)) case_ids_pat = [c if c.isdigit() else c[:-1] for c in case_ids] case_ids_pat_unique = list(set(case_ids_pat)) print(f"Found {len(case_ids_pat_unique)} unique patient ids.") splits = [] kfold = GroupKFold(n_splits=5) for i, (train_idx, test_idx) in enumerate(kfold.split(case_ids, groups=case_ids_pat)): train_keys = np.array(case_ids)[train_idx] test_keys = np.array(case_ids)[test_idx] splits.append(OrderedDict()) splits[-1]['train'] = train_keys splits[-1]['val'] = test_keys print(f"Generated split: {splits[-1]}") save_pickle(splits, splits_file)
def import_nnunet_boxes( # settings nnunet_prediction_dir: Pathlike, save_dir: Pathlike, boxes_gt_dir: Pathlike, classes: Sequence[str], stuff: Optional[Sequence[int]] = None, num_workers: int = 6, ): assert nnunet_prediction_dir.is_dir( ), f"{nnunet_prediction_dir} is not a dir" save_dir = Path(save_dir) save_dir.mkdir(parents=True, exist_ok=True) summary = [] # create sweep dir sweep_dir = Path(nnunet_prediction_dir) postprocessing_settings = {} # optimize min num voxels logger.info("Looking for optimal min voxel size") min_num_voxel_settings = [0, 5, 10, 15, 20] scores = [] for min_num_voxel in min_num_voxel_settings: # create temp dir sweep_prediction = sweep_dir / f"sweep_min_voxel{min_num_voxel}" sweep_prediction.mkdir(parents=True) # import with settings import_dir( nnunet_prediction_dir=nnunet_prediction_dir, target_dir=sweep_prediction, min_num_voxel=min_num_voxel, save_seg=False, save_iseg=False, stuff=stuff, num_workers=num_workers, ) # evaluate _scores, _ = evaluate_box_dir( pred_dir=sweep_prediction, gt_dir=boxes_gt_dir, classes=classes, save_dir=None, ) scores.append(_scores[TARGET_METRIC]) summary.append({f"Min voxel {min_num_voxel}": _scores[TARGET_METRIC]}) logger.info(f"Min voxel {min_num_voxel} :: {_scores[TARGET_METRIC]}") shutil.rmtree(sweep_prediction) idx = int(np.argmax(scores)) postprocessing_settings["min_num_voxel"] = min_num_voxel_settings[idx] logger.info( f"Found min num voxel {min_num_voxel_settings[idx]} with score {scores[idx]}" ) # optimize score threshold logger.info("Looking for optimal min probability threshold") min_threshold_settings = [None, 0.1, 0.2, 0.3, 0.4, 0.5] scores = [] for min_threshold in min_threshold_settings: # create temp dir sweep_prediction = sweep_dir / f"sweep_min_threshold_{min_threshold}" sweep_prediction.mkdir(parents=True) # import with settings import_dir( nnunet_prediction_dir=nnunet_prediction_dir, target_dir=sweep_prediction, min_threshold=min_threshold, save_seg=False, save_iseg=False, stuff=stuff, num_workers=num_workers, **postprocessing_settings, ) # evaluate _scores, _ = evaluate_box_dir( pred_dir=sweep_prediction, gt_dir=boxes_gt_dir, classes=classes, save_dir=None, ) scores.append(_scores[TARGET_METRIC]) summary.append({f"Min score {min_threshold}": _scores[TARGET_METRIC]}) logger.info(f"Min score {min_threshold} :: {_scores[TARGET_METRIC]}") shutil.rmtree(sweep_prediction) idx = int(np.argmax(scores)) postprocessing_settings["min_threshold"] = min_threshold_settings[idx] logger.info( f"Found min threshold {min_threshold_settings[idx]} with score {scores[idx]}" ) logger.info("Looking for best probability aggregation") aggreagtion_settings = ["max", "median", "mean", "percentile95"] scores = [] for aggregation in aggreagtion_settings: # create temp dir sweep_prediction = sweep_dir / f"sweep_aggregation_{aggregation}" sweep_prediction.mkdir(parents=True) # import with settings import_dir( nnunet_prediction_dir=nnunet_prediction_dir, target_dir=sweep_prediction, aggregation=aggregation, save_seg=False, save_iseg=False, stuff=stuff, num_workers=num_workers, **postprocessing_settings, ) # evaluate _scores, _ = evaluate_box_dir( pred_dir=sweep_prediction, gt_dir=boxes_gt_dir, classes=classes, save_dir=None, ) scores.append(_scores[TARGET_METRIC]) summary.append({f"Aggreagtion {aggregation}": _scores[TARGET_METRIC]}) logger.info(f"Aggreagtion {aggregation} :: {_scores[TARGET_METRIC]}") shutil.rmtree(sweep_prediction) idx = int(np.argmax(scores)) postprocessing_settings["aggregation"] = aggreagtion_settings[idx] logger.info( f"Found aggregation {aggreagtion_settings[idx]} with score {scores[idx]}" ) save_pickle(postprocessing_settings, save_dir / "postprocessing.pkl") save_json(summary, save_dir / "summary.json") return postprocessing_settings
def import_single_case( logits_source: Path, logits_target_dir: Optional[Path], aggregation: str, min_num_voxel: int, min_threshold: Optional[float], save_seg: bool = True, save_iseg: bool = True, stuff: Optional[Sequence[int]] = None, ): """ Process a single case Args: logits_source: path to nnunet prediction logits_target_dir: path to dir where result should be saved aggregation: aggregation method for probabilities. save_seg: save semantic segmentation save_iseg: save instance segmentation stuff: stuff classes to remove """ assert logits_source.is_file( ), f"Logits source needs to be a file, found {logits_source}" assert logits_target_dir.is_dir( ), f"Logits target dir needs to be a dir, found {logits_target_dir}" case_name = logits_source.stem logger.info(f"Processing {case_name}") properties_file = logits_source.parent / f"{case_name}.pkl" probs = np.load(str(logits_source))["softmax"] if properties_file.is_file(): properties_dict = load_pickle(properties_file) bbox = properties_dict.get('crop_bbox') shape_original_before_cropping = properties_dict.get( 'original_size_of_raw_data') if bbox is not None: tmp = np.zeros((probs.shape[0], *shape_original_before_cropping)) for c in range(3): bbox[c][1] = np.min((bbox[c][0] + probs.shape[c + 1], shape_original_before_cropping[c])) tmp[:, bbox[0][0]:bbox[0][1], bbox[1][0]:bbox[1][1], bbox[2][0]:bbox[2][1]] = probs probs = tmp res = instance_results_from_seg( probs, aggregation=aggregation, min_num_voxel=min_num_voxel, min_threshold=min_threshold, stuff=stuff, ) detection_target = logits_target_dir / f"{case_name}_boxes.pkl" segmentation_target = logits_target_dir / f"{case_name}_segmentation.pkl" instances_target = logits_target_dir / f"{case_name}_instances.pkl" boxes = { key: res[key] for key in ["pred_boxes", "pred_labels", "pred_scores"] } save_pickle(boxes, detection_target) if save_iseg: instances = { key: res[key] for key in ["pred_instances", "pred_labels", "pred_scores"] } save_pickle(instances, instances_target) if save_seg: segmentation = {"pred_seg": np.argmax(probs, axis=0)} save_pickle(segmentation, segmentation_target)
zip( case_ids, repeat(nnunet_dirs), repeat(nnunet_prediction_dir), )) else: for cid in case_ids: copy_and_ensemble(cid, nnunet_dirs, nnunet_prediction_dir) if simple: postprocessing_settings = { "aggregation": "max", "min_num_voxel": 5, "min_threshold": None, } save_pickle(postprocessing_settings, nndet_unet_dir / "postprocessing.pkl") else: postprocessing_settings = import_nnunet_boxes( nnunet_prediction_dir=nnunet_prediction_dir, save_dir=nndet_unet_dir, boxes_gt_dir=Path(os.getenv("det_data")) / task / "preprocessed" / "labelsTr", classes=list(cfg["data"]["labels"].keys()), stuff=stuff, num_workers=num_workers, ) save_pickle({}, nndet_unet_dir / "plan.pkl") target_dir = nndet_unet_dir / "val_predictions" else: case_ids = [
def main(): parser = argparse.ArgumentParser() parser.add_argument( 'task', type=str, help="Task id e.g. Task12_LIDC OR 12 OR LIDC", ) parser.add_argument( 'model', type=str, help="model name, e.g. RetinaUNetV0", ) parser.add_argument( '-o', '--overwrites', type=str, nargs='+', required=False, help="overwrites for config file. Only needed in case of box eval", ) parser.add_argument( '-c', '--consolidate', type=str, default="export", required=False, help= ("Determines how to consolidate predictions: 'export' or 'copy'. " "'copy' will copy the predictions of each fold into the directory for evaluation. " "'export' will use the updated parameters after consolidation to update the " "predictions and export them. This is only supported if one of the " "sweep settings is active! Default: export"), ) parser.add_argument( '--num_folds', type=int, default=5, required=False, help="Number of folds. Default: 5", ) parser.add_argument( '--no_model', action="store_false", help="Deactivate if consolidating nnUNet results", ) parser.add_argument( '--sweep_boxes', action="store_true", help="Sweep for best parameters for bounding box based models", ) parser.add_argument( '--sweep_instances', action="store_true", help="Sweep for best parameters for instance segmentation based models", ) parser.add_argument( '--ckpt', type=str, default="last", required=False, help="Define identifier of checkpoint for consolidation. " "Use this with care!") args = parser.parse_args() model = args.model task = args.task ov = args.overwrites consolidate = args.consolidate num_folds = args.num_folds do_model_consolidation = args.no_model sweep_boxes = args.sweep_boxes sweep_instances = args.sweep_instances ckpt = args.ckpt if consolidate == "export" and not (sweep_boxes or sweep_instances): raise ValueError( "Export needs new parameter sweep! Actiate one of the sweep " "arguments or change to copy mode") task_dir = Path(os.getenv("det_models")) / get_task( task, name=True, models=True) model_dir = task_dir / model if not model_dir.is_dir(): raise ValueError(f"{model_dir} does not exist") target_dir = model_dir / "consolidated" logger.remove() logger.add(sys.stdout, format="{level} {message}", level="INFO") logger.add(Path(target_dir) / "consolidate.log", level="DEBUG") logger.info(f"looking for models in {model_dir}") training_dirs = [ get_latest_model(model_dir, fold) for fold in range(num_folds) ] logger.info(f"Found training dirs: {training_dirs}") # model consolidation if do_model_consolidation: logger.info("Consolidate models") if ckpt != "last": logger.warning( f"Found ckpt overwrite {ckpt}, this is not the default, " "this can drastically influence the performance!") consolidate_models(training_dirs, target_dir, ckpt) # consolidate predictions logger.info("Consolidate predictions") consolidate_predictions( source_dirs=training_dirs, target_dir=target_dir, consolidate=consolidate, ) shutil.copy2(training_dirs[0] / "plan.pkl", target_dir) shutil.copy2(training_dirs[0] / "config.yaml", target_dir) # invoke new parameter sweeps cfg = OmegaConf.load(str(target_dir / "config.yaml")) ov = ov if ov is not None else [] ov.append("host.parent_data=${env:det_data}") ov.append("host.parent_results=${env:det_models}") if ov is not None: cfg.merge_with_dotlist(ov) preprocessed_output_dir = Path(cfg["host"]["preprocessed_output_dir"]) plan = load_pickle(target_dir / "plan.pkl") gt_dir = preprocessed_output_dir / plan["data_identifier"] / "labelsTr" if sweep_boxes: logger.info("Sweeping box predictions") module = MODULE_REGISTRY[cfg["module"]] ensembler_cls = module.get_ensembler_cls( key="boxes", dim=plan["network_dim"]) # TODO: make this configurable sweeper = BoxSweeper( classes=[item for _, item in cfg["data"]["labels"].items()], pred_dir=target_dir / "sweep_predictions", gt_dir=gt_dir, target_metric=cfg["trainer_cfg"].get( "eval_score_key", "mAP_IoU_0.10_0.50_0.05_MaxDet_100"), ensembler_cls=ensembler_cls, save_dir=target_dir / "sweep", ) inference_plan = sweeper.run_postprocessing_sweep() elif sweep_instances: raise NotImplementedError plan = load_pickle(target_dir / "plan.pkl") if consolidate != 'copy': plan["inference_plan"] = inference_plan save_pickle(plan, target_dir / "plan_inference.pkl") for restore in [True, False]: export_dir = target_dir / "val_predictions" if restore else \ target_dir / "val_predictions_preprocessed" extract_results( source_dir=target_dir / "sweep_predictions", target_dir=export_dir, ensembler_cls=ensembler_cls, restore=restore, **inference_plan, ) else: logger.warning("Plan used from fold 0, not updated with consolidation") save_pickle(plan, target_dir / "plan_inference.pkl")