Пример #1
0
def main():
    det_data_dir = Path(os.getenv('det_data'))
    task_data_dir = det_data_dir / "Task019FG_ADAM"

    target_label_dir = task_data_dir / "raw_splitted" / "labelsTr"
    splits_file_dir = task_data_dir / "preprocessed"
    splits_file_dir.mkdir(parents=True, exist_ok=True)
    splits_file = splits_file_dir / "splits_final.pkl"

    case_ids = sorted(
        get_case_ids_from_dir(target_label_dir, remove_modality=False))
    case_ids_pat = [c if c.isdigit() else c[:-1] for c in case_ids]
    case_ids_pat_unique = list(set(case_ids_pat))
    print(f"Found {len(case_ids_pat_unique)} unique patient ids.")

    splits = []
    kfold = GroupKFold(n_splits=5)
    for i, (train_idx,
            test_idx) in enumerate(kfold.split(case_ids, groups=case_ids_pat)):
        train_keys = np.array(case_ids)[train_idx]
        test_keys = np.array(case_ids)[test_idx]

        splits.append(OrderedDict())
        splits[-1]['train'] = train_keys
        splits[-1]['val'] = test_keys
        print(f"Generated split: {splits[-1]}")
    save_pickle(splits, splits_file)
Пример #2
0
def import_nnunet_boxes(
    # settings
    nnunet_prediction_dir: Pathlike,
    save_dir: Pathlike,
    boxes_gt_dir: Pathlike,
    classes: Sequence[str],
    stuff: Optional[Sequence[int]] = None,
    num_workers: int = 6,
):
    assert nnunet_prediction_dir.is_dir(
    ), f"{nnunet_prediction_dir} is not a dir"
    save_dir = Path(save_dir)
    save_dir.mkdir(parents=True, exist_ok=True)
    summary = []

    # create sweep dir
    sweep_dir = Path(nnunet_prediction_dir)
    postprocessing_settings = {}

    # optimize min num voxels
    logger.info("Looking for optimal min voxel size")
    min_num_voxel_settings = [0, 5, 10, 15, 20]
    scores = []
    for min_num_voxel in min_num_voxel_settings:
        # create temp dir
        sweep_prediction = sweep_dir / f"sweep_min_voxel{min_num_voxel}"
        sweep_prediction.mkdir(parents=True)

        # import with settings
        import_dir(
            nnunet_prediction_dir=nnunet_prediction_dir,
            target_dir=sweep_prediction,
            min_num_voxel=min_num_voxel,
            save_seg=False,
            save_iseg=False,
            stuff=stuff,
            num_workers=num_workers,
        )

        # evaluate
        _scores, _ = evaluate_box_dir(
            pred_dir=sweep_prediction,
            gt_dir=boxes_gt_dir,
            classes=classes,
            save_dir=None,
        )
        scores.append(_scores[TARGET_METRIC])
        summary.append({f"Min voxel {min_num_voxel}": _scores[TARGET_METRIC]})
        logger.info(f"Min voxel {min_num_voxel} :: {_scores[TARGET_METRIC]}")
        shutil.rmtree(sweep_prediction)

    idx = int(np.argmax(scores))
    postprocessing_settings["min_num_voxel"] = min_num_voxel_settings[idx]
    logger.info(
        f"Found min num voxel {min_num_voxel_settings[idx]} with score {scores[idx]}"
    )

    # optimize score threshold
    logger.info("Looking for optimal min probability threshold")
    min_threshold_settings = [None, 0.1, 0.2, 0.3, 0.4, 0.5]
    scores = []
    for min_threshold in min_threshold_settings:
        # create temp dir
        sweep_prediction = sweep_dir / f"sweep_min_threshold_{min_threshold}"
        sweep_prediction.mkdir(parents=True)

        # import with settings
        import_dir(
            nnunet_prediction_dir=nnunet_prediction_dir,
            target_dir=sweep_prediction,
            min_threshold=min_threshold,
            save_seg=False,
            save_iseg=False,
            stuff=stuff,
            num_workers=num_workers,
            **postprocessing_settings,
        )

        # evaluate
        _scores, _ = evaluate_box_dir(
            pred_dir=sweep_prediction,
            gt_dir=boxes_gt_dir,
            classes=classes,
            save_dir=None,
        )
        scores.append(_scores[TARGET_METRIC])
        summary.append({f"Min score {min_threshold}": _scores[TARGET_METRIC]})
        logger.info(f"Min score {min_threshold} :: {_scores[TARGET_METRIC]}")
        shutil.rmtree(sweep_prediction)

    idx = int(np.argmax(scores))
    postprocessing_settings["min_threshold"] = min_threshold_settings[idx]
    logger.info(
        f"Found min threshold {min_threshold_settings[idx]} with score {scores[idx]}"
    )

    logger.info("Looking for best probability aggregation")
    aggreagtion_settings = ["max", "median", "mean", "percentile95"]
    scores = []
    for aggregation in aggreagtion_settings:
        # create temp dir
        sweep_prediction = sweep_dir / f"sweep_aggregation_{aggregation}"
        sweep_prediction.mkdir(parents=True)

        # import with settings
        import_dir(
            nnunet_prediction_dir=nnunet_prediction_dir,
            target_dir=sweep_prediction,
            aggregation=aggregation,
            save_seg=False,
            save_iseg=False,
            stuff=stuff,
            num_workers=num_workers,
            **postprocessing_settings,
        )
        # evaluate
        _scores, _ = evaluate_box_dir(
            pred_dir=sweep_prediction,
            gt_dir=boxes_gt_dir,
            classes=classes,
            save_dir=None,
        )
        scores.append(_scores[TARGET_METRIC])
        summary.append({f"Aggreagtion {aggregation}": _scores[TARGET_METRIC]})
        logger.info(f"Aggreagtion {aggregation} :: {_scores[TARGET_METRIC]}")
        shutil.rmtree(sweep_prediction)

    idx = int(np.argmax(scores))
    postprocessing_settings["aggregation"] = aggreagtion_settings[idx]
    logger.info(
        f"Found aggregation {aggreagtion_settings[idx]} with score {scores[idx]}"
    )

    save_pickle(postprocessing_settings, save_dir / "postprocessing.pkl")
    save_json(summary, save_dir / "summary.json")
    return postprocessing_settings
Пример #3
0
def import_single_case(
    logits_source: Path,
    logits_target_dir: Optional[Path],
    aggregation: str,
    min_num_voxel: int,
    min_threshold: Optional[float],
    save_seg: bool = True,
    save_iseg: bool = True,
    stuff: Optional[Sequence[int]] = None,
):
    """
    Process a single case

    Args:
        logits_source: path to nnunet prediction
        logits_target_dir: path to dir where result should be saved
        aggregation: aggregation method for probabilities.
        save_seg: save semantic segmentation
        save_iseg: save instance segmentation
        stuff: stuff classes to remove
    """
    assert logits_source.is_file(
    ), f"Logits source needs to be a file, found {logits_source}"
    assert logits_target_dir.is_dir(
    ), f"Logits target dir needs to be a dir, found {logits_target_dir}"

    case_name = logits_source.stem
    logger.info(f"Processing {case_name}")
    properties_file = logits_source.parent / f"{case_name}.pkl"
    probs = np.load(str(logits_source))["softmax"]

    if properties_file.is_file():
        properties_dict = load_pickle(properties_file)
        bbox = properties_dict.get('crop_bbox')
        shape_original_before_cropping = properties_dict.get(
            'original_size_of_raw_data')

        if bbox is not None:
            tmp = np.zeros((probs.shape[0], *shape_original_before_cropping))
            for c in range(3):
                bbox[c][1] = np.min((bbox[c][0] + probs.shape[c + 1],
                                     shape_original_before_cropping[c]))

            tmp[:, bbox[0][0]:bbox[0][1], bbox[1][0]:bbox[1][1],
                bbox[2][0]:bbox[2][1]] = probs
            probs = tmp

    res = instance_results_from_seg(
        probs,
        aggregation=aggregation,
        min_num_voxel=min_num_voxel,
        min_threshold=min_threshold,
        stuff=stuff,
    )

    detection_target = logits_target_dir / f"{case_name}_boxes.pkl"
    segmentation_target = logits_target_dir / f"{case_name}_segmentation.pkl"
    instances_target = logits_target_dir / f"{case_name}_instances.pkl"

    boxes = {
        key: res[key]
        for key in ["pred_boxes", "pred_labels", "pred_scores"]
    }
    save_pickle(boxes, detection_target)
    if save_iseg:
        instances = {
            key: res[key]
            for key in ["pred_instances", "pred_labels", "pred_scores"]
        }
        save_pickle(instances, instances_target)
    if save_seg:
        segmentation = {"pred_seg": np.argmax(probs, axis=0)}
        save_pickle(segmentation, segmentation_target)
Пример #4
0
                        zip(
                            case_ids,
                            repeat(nnunet_dirs),
                            repeat(nnunet_prediction_dir),
                        ))
            else:
                for cid in case_ids:
                    copy_and_ensemble(cid, nnunet_dirs, nnunet_prediction_dir)

        if simple:
            postprocessing_settings = {
                "aggregation": "max",
                "min_num_voxel": 5,
                "min_threshold": None,
            }
            save_pickle(postprocessing_settings,
                        nndet_unet_dir / "postprocessing.pkl")
        else:
            postprocessing_settings = import_nnunet_boxes(
                nnunet_prediction_dir=nnunet_prediction_dir,
                save_dir=nndet_unet_dir,
                boxes_gt_dir=Path(os.getenv("det_data")) / task /
                "preprocessed" / "labelsTr",
                classes=list(cfg["data"]["labels"].keys()),
                stuff=stuff,
                num_workers=num_workers,
            )

        save_pickle({}, nndet_unet_dir / "plan.pkl")
        target_dir = nndet_unet_dir / "val_predictions"
    else:
        case_ids = [
Пример #5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        'task',
        type=str,
        help="Task id e.g. Task12_LIDC OR 12 OR LIDC",
    )
    parser.add_argument(
        'model',
        type=str,
        help="model name, e.g. RetinaUNetV0",
    )
    parser.add_argument(
        '-o',
        '--overwrites',
        type=str,
        nargs='+',
        required=False,
        help="overwrites for config file. Only needed in case of box eval",
    )
    parser.add_argument(
        '-c',
        '--consolidate',
        type=str,
        default="export",
        required=False,
        help=
        ("Determines how to consolidate predictions: 'export' or 'copy'. "
         "'copy' will copy the predictions of each fold into the directory for evaluation. "
         "'export' will use the updated parameters after consolidation to update the "
         "predictions and export them. This is only supported if one of the "
         "sweep settings is active! Default: export"),
    )
    parser.add_argument(
        '--num_folds',
        type=int,
        default=5,
        required=False,
        help="Number of folds. Default: 5",
    )
    parser.add_argument(
        '--no_model',
        action="store_false",
        help="Deactivate if consolidating nnUNet results",
    )
    parser.add_argument(
        '--sweep_boxes',
        action="store_true",
        help="Sweep for best parameters for bounding box based models",
    )
    parser.add_argument(
        '--sweep_instances',
        action="store_true",
        help="Sweep for best parameters for instance segmentation based models",
    )
    parser.add_argument(
        '--ckpt',
        type=str,
        default="last",
        required=False,
        help="Define identifier of checkpoint for consolidation. "
        "Use this with care!")

    args = parser.parse_args()
    model = args.model
    task = args.task
    ov = args.overwrites

    consolidate = args.consolidate
    num_folds = args.num_folds
    do_model_consolidation = args.no_model

    sweep_boxes = args.sweep_boxes
    sweep_instances = args.sweep_instances
    ckpt = args.ckpt

    if consolidate == "export" and not (sweep_boxes or sweep_instances):
        raise ValueError(
            "Export needs new parameter sweep! Actiate one of the sweep "
            "arguments or change to copy mode")

    task_dir = Path(os.getenv("det_models")) / get_task(
        task, name=True, models=True)
    model_dir = task_dir / model
    if not model_dir.is_dir():
        raise ValueError(f"{model_dir} does not exist")
    target_dir = model_dir / "consolidated"

    logger.remove()
    logger.add(sys.stdout, format="{level} {message}", level="INFO")
    logger.add(Path(target_dir) / "consolidate.log", level="DEBUG")

    logger.info(f"looking for models in {model_dir}")
    training_dirs = [
        get_latest_model(model_dir, fold) for fold in range(num_folds)
    ]
    logger.info(f"Found training dirs: {training_dirs}")

    # model consolidation
    if do_model_consolidation:
        logger.info("Consolidate models")
        if ckpt != "last":
            logger.warning(
                f"Found ckpt overwrite {ckpt}, this is not the default, "
                "this can drastically influence the performance!")
        consolidate_models(training_dirs, target_dir, ckpt)

    # consolidate predictions
    logger.info("Consolidate predictions")
    consolidate_predictions(
        source_dirs=training_dirs,
        target_dir=target_dir,
        consolidate=consolidate,
    )

    shutil.copy2(training_dirs[0] / "plan.pkl", target_dir)
    shutil.copy2(training_dirs[0] / "config.yaml", target_dir)

    # invoke new parameter sweeps
    cfg = OmegaConf.load(str(target_dir / "config.yaml"))
    ov = ov if ov is not None else []
    ov.append("host.parent_data=${env:det_data}")
    ov.append("host.parent_results=${env:det_models}")
    if ov is not None:
        cfg.merge_with_dotlist(ov)

    preprocessed_output_dir = Path(cfg["host"]["preprocessed_output_dir"])
    plan = load_pickle(target_dir / "plan.pkl")
    gt_dir = preprocessed_output_dir / plan["data_identifier"] / "labelsTr"

    if sweep_boxes:
        logger.info("Sweeping box predictions")
        module = MODULE_REGISTRY[cfg["module"]]
        ensembler_cls = module.get_ensembler_cls(
            key="boxes",
            dim=plan["network_dim"])  # TODO: make this configurable

        sweeper = BoxSweeper(
            classes=[item for _, item in cfg["data"]["labels"].items()],
            pred_dir=target_dir / "sweep_predictions",
            gt_dir=gt_dir,
            target_metric=cfg["trainer_cfg"].get(
                "eval_score_key", "mAP_IoU_0.10_0.50_0.05_MaxDet_100"),
            ensembler_cls=ensembler_cls,
            save_dir=target_dir / "sweep",
        )
        inference_plan = sweeper.run_postprocessing_sweep()
    elif sweep_instances:
        raise NotImplementedError

    plan = load_pickle(target_dir / "plan.pkl")
    if consolidate != 'copy':
        plan["inference_plan"] = inference_plan
        save_pickle(plan, target_dir / "plan_inference.pkl")

        for restore in [True, False]:
            export_dir = target_dir / "val_predictions" if restore else \
                target_dir / "val_predictions_preprocessed"
            extract_results(
                source_dir=target_dir / "sweep_predictions",
                target_dir=export_dir,
                ensembler_cls=ensembler_cls,
                restore=restore,
                **inference_plan,
            )
    else:
        logger.warning("Plan used from fold 0, not updated with consolidation")
        save_pickle(plan, target_dir / "plan_inference.pkl")