Exemplo n.º 1
0
 def __prepare_tensor_dataset(self):
     tensor_dataset_path = os.path.join(
         self.metaconf["ws_path"], "tensor_datasets",
         self.dataset_params.tensor_dataset_name)
     # compare configs, if not same, refresh dataset
     current_config_snapshot_exists = H.config_snapshot(
         "dataset_params",
         self.dataset_params.params,
         "src/data/aux/.dataset_config_snapshot.json",
     )
     if not current_config_snapshot_exists:
         H.makedirs(tensor_dataset_path)
         _tqdm_kwargs = {
             "desc": "Preparing TensorDataset",
             "total": len(self.generic_dataset)
         }
         for i, sample in tqdm(enumerate(self.generic_dataset),
                               **_tqdm_kwargs):
             f_folder_path = os.path.join(tensor_dataset_path,
                                          f"{sample['texture']}")
             H.makedirs(f_folder_path)
             f_path = os.path.join(f_folder_path, f"nodule_{i}.pt")
             save_nodules = {
                 "nodule": sample["nodule"],
                 "texture": sample["texture"]
             }
             torch.save(save_nodules, f_path)
     return tensor_dataset_path
Exemplo n.º 2
0
    def __prepare_nodules_annotations(self):
        """Search through pylidc database for annotations, make clusters
        of anns corresponged to same nodules and forms list of clusters.
        """

        # Prepare or load annotations clustered for each nodule
        lidc_ann_config = {
            "annotations_number_required": self.annotations_number_required
        }
        ann_snapshot_exists = config_snapshot(
            "lidc_ann", lidc_ann_config,
            "./src/data/aux/.lidcann_config_snapshot.json")
        ann_pickle_exists = os.path.exists(self.cluster_list_pickle_path)
        if not ann_pickle_exists or not ann_snapshot_exists:
            cluster_list = []

            for series in tqdm(pylidc.query(pylidc.Scan).all(),
                               desc="Preparing LIDC annotations list"):
                clusters = series.cluster_annotations(verbose=False)
                # We take only nodules with >=3 annotations for robustness.
                clusters = [
                    c for c in clusters
                    if len(c) >= self.annotations_number_required
                ]
                if len(clusters) > 0:
                    cluster_list.append(clusters)
            # Flatten cluster_list
            cluster_list = [c for cl in cluster_list for c in cl]
            # Dump cluster_list for future use
            logger.info("pickling LIDC annotation list for future use")
            with open(self.cluster_list_pickle_path, "wb") as f:
                pickle.dump(cluster_list, f)
        else:
            with open(self.cluster_list_pickle_path, "rb") as f:
                cluster_list = pickle.load(f)
        return cluster_list
Exemplo n.º 3
0
    def __prepare_nodule_list(self,
                              cluster_list: List[List[pylidc.Annotation]]):
        lidc_nodule_config = {
            "diam_interval": self.diam_interval,
            "extract_size_mm": self.extract_size_mm,
            "mask_dilation_iters": self.mask_dilation_iters,
        }
        nodule_pickle_exists = os.path.exists(self.nodule_list_pickle_path)
        snapshot_exists = config_snapshot(
            "lidc_nodule", lidc_nodule_config,
            "./src/data/aux/.lidcnod_config_snapshot.json")
        if not nodule_pickle_exists or not snapshot_exists:
            nodule_list = []
            _tqdm_kwargs = {
                "desc": "Preparing LIDC nodule list",
                "total": len(cluster_list)
            }
            for i, cluster in tqdm(enumerate(cluster_list), **_tqdm_kwargs):
                # Check if all annotations belong to the same scan
                if len(np.unique([ann.scan.id for ann in cluster])) != 1:
                    logger.warning(
                        f"annotations not from the same scans! skip")
                    continue

                nodule_diam = np.mean([ann.diameter for ann in cluster])
                texture_scores = [ann.texture for ann in cluster]
                # Skip nodules out of diam interval and with amiguous texture scores
                if (nodule_diam < self.diam_interval[0]
                        or nodule_diam >= self.diam_interval[1]
                        or not_valid_score(texture_scores)):
                    continue

                # Minimal possible bbox size (in mm).
                minsize = max([max(cl.bbox_dims(pad=None)) for cl in cluster])
                pad_mm = max(float(self.extract_size_mm), minsize)
                nodule_mask, nodule_bbox = consensus(cluster,
                                                     clevel=0.8,
                                                     pad=pad_mm,
                                                     ret_masks=False)
                dilated_nodule_mask = binary_dilation(
                    nodule_mask, iterations=self.mask_dilation_iters)
                nodule_coords = np.mean([ann.centroid for ann in cluster],
                                        axis=0)
                nodule_diam = np.mean([ann.diameter for ann in cluster])
                nodule_texture = mode(texture_scores).mode.item()

                nodule = LIDCNodule(
                    pylidc_scan=cluster[0].scan,
                    bbox=nodule_bbox,
                    mask=dilated_nodule_mask,
                    centroid=nodule_coords,
                    diameter=nodule_diam,
                    texture=nodule_texture,
                )
                nodule_list.append(nodule)

            logger.info("pickling LIDC nodule list for future use")
            with open(self.nodule_list_pickle_path, "wb") as f:
                pickle.dump(nodule_list, f)
        else:
            with open(self.nodule_list_pickle_path, "rb") as f:
                nodule_list = pickle.load(f)
        return nodule_list