Пример #1
0
    def test_set_data(self):
        data_list1 = list(range(10))

        transform = Compose([
            Lambda(func=lambda x: np.array([x * 10])),
            RandLambda(func=lambda x: x + 1)
        ])

        dataset = CacheDataset(
            data=data_list1,
            transform=transform,
            cache_rate=1.0,
            num_workers=4,
            progress=True,
            copy_cache=not sys.platform == "linux",
        )

        num_workers = 2 if sys.platform == "linux" else 0
        dataloader = DataLoader(dataset=dataset,
                                num_workers=num_workers,
                                batch_size=1)
        for i, d in enumerate(dataloader):
            np.testing.assert_allclose([[data_list1[i] * 10 + 1]], d)
        # simulate another epoch, the cache content should not be modified
        for i, d in enumerate(dataloader):
            np.testing.assert_allclose([[data_list1[i] * 10 + 1]], d)

        # update the datalist and fill the cache content
        data_list2 = list(range(-10, 0))
        dataset.set_data(data=data_list2)
        # rerun with updated cache content
        for i, d in enumerate(dataloader):
            np.testing.assert_allclose([[data_list2[i] * 10 + 1]], d)
Пример #2
0
    def __init__(
        self,
        root_dir: str,
        task: str,
        section: str,
        transform: Union[Sequence[Callable], Callable] = (),
        download: bool = False,
        seed: int = 0,
        val_frac: float = 0.2,
        cache_num: int = sys.maxsize,
        cache_rate: float = 1.0,
        num_workers: int = 0,
    ) -> None:
        if not os.path.isdir(root_dir):
            raise ValueError("Root directory root_dir must be a directory.")
        self.section = section
        self.val_frac = val_frac
        self.set_random_state(seed=seed)
        if task not in self.resource:
            raise ValueError(
                f"Unsupported task: {task}, available options are: {list(self.resource.keys())}."
            )
        dataset_dir = os.path.join(root_dir, task)
        tarfile_name = f"{dataset_dir}.tar"
        if download:
            download_and_extract(self.resource[task], tarfile_name, root_dir,
                                 self.md5[task])

        if not os.path.exists(dataset_dir):
            raise RuntimeError(
                f"Cannot find dataset directory: {dataset_dir}, please use download=True to download it."
            )
        self.indices: np.ndarray = np.array([])
        data = self._generate_data_list(dataset_dir)
        # as `release` key has typo in Task04 config file, ignore it.
        property_keys = [
            "name",
            "description",
            "reference",
            "licence",
            "tensorImageSize",
            "modality",
            "labels",
            "numTraining",
            "numTest",
        ]
        self._properties = load_decathlon_properties(
            os.path.join(dataset_dir, "dataset.json"), property_keys)
        if transform == ():
            transform = LoadImaged(["image", "label"])
        CacheDataset.__init__(self,
                              data,
                              transform,
                              cache_num=cache_num,
                              cache_rate=cache_rate,
                              num_workers=num_workers)
Пример #3
0
    def __init__(
        self,
        root_dir: PathLike,
        section: str,
        transform: Union[Sequence[Callable], Callable] = (),
        download: bool = False,
        seed: int = 0,
        val_frac: float = 0.1,
        test_frac: float = 0.1,
        cache_num: int = sys.maxsize,
        cache_rate: float = 1.0,
        num_workers: Optional[int] = 1,
        progress: bool = True,
        copy_cache: bool = True,
        as_contiguous: bool = True,
    ) -> None:
        root_dir = Path(root_dir)
        if not root_dir.is_dir():
            raise ValueError("Root directory root_dir must be a directory.")
        self.section = section
        self.val_frac = val_frac
        self.test_frac = test_frac
        self.set_random_state(seed=seed)
        tarfile_name = root_dir / self.compressed_file_name
        dataset_dir = root_dir / self.dataset_folder_name
        self.num_class = 0
        if download:
            download_and_extract(
                url=self.resource,
                filepath=tarfile_name,
                output_dir=root_dir,
                hash_val=self.md5,
                hash_type="md5",
                progress=progress,
            )

        if not dataset_dir.is_dir():
            raise RuntimeError(
                f"Cannot find dataset directory: {dataset_dir}, please use download=True to download it."
            )
        data = self._generate_data_list(dataset_dir)
        if transform == ():
            transform = LoadImaged("image")
        CacheDataset.__init__(
            self,
            data=data,
            transform=transform,
            cache_num=cache_num,
            cache_rate=cache_rate,
            num_workers=num_workers,
            progress=progress,
            copy_cache=copy_cache,
            as_contiguous=as_contiguous,
        )
Пример #4
0
    def test_hash_as_key(self, transform, expected_shape):
        test_image = nib.Nifti1Image(
            np.random.randint(0, 2, size=[128, 128, 128]), np.eye(4))
        with tempfile.TemporaryDirectory() as tempdir:
            test_data = []
            for i in ["1", "2", "2", "3", "3"]:
                for k in ["image", "label", "extra"]:
                    nib.save(test_image, os.path.join(tempdir,
                                                      f"{k}{i}.nii.gz"))
                test_data.append({
                    k: os.path.join(tempdir, f"{k}{i}.nii.gz")
                    for k in ["image", "label", "extra"]
                })

            dataset = CacheDataset(data=test_data,
                                   transform=transform,
                                   cache_num=4,
                                   num_workers=2,
                                   hash_as_key=True)
            self.assertEqual(len(dataset), 5)
            # ensure no duplicated cache content
            self.assertEqual(len(dataset._cache), 3)
            self.assertEqual(dataset.cache_num, 3)
            data1 = dataset[0]
            data2 = dataset[1]
            data3 = dataset[-1]
            # test slice indices
            data4 = dataset[0:-1]
            self.assertEqual(len(data4), 4)

            if transform is None:
                self.assertEqual(data1["image"],
                                 os.path.join(tempdir, "image1.nii.gz"))
                self.assertEqual(data2["label"],
                                 os.path.join(tempdir, "label2.nii.gz"))
                self.assertEqual(data3["image"],
                                 os.path.join(tempdir, "image3.nii.gz"))
            else:
                self.assertTupleEqual(data1["image"].shape, expected_shape)
                self.assertTupleEqual(data2["label"].shape, expected_shape)
                self.assertTupleEqual(data3["image"].shape, expected_shape)
                for d in data4:
                    self.assertTupleEqual(d["image"].shape, expected_shape)

            test_data2 = test_data[:3]
            dataset.set_data(data=test_data2)
            self.assertEqual(len(dataset), 3)
            # ensure no duplicated cache content
            self.assertEqual(len(dataset._cache), 2)
            self.assertEqual(dataset.cache_num, 2)
Пример #5
0
    def test_shape(self, transform, expected_shape):
        test_image = nib.Nifti1Image(
            np.random.randint(0, 2, size=[128, 128, 128]), np.eye(4))
        with tempfile.TemporaryDirectory() as tempdir:
            test_data = []
            for i in ["1", "2"]:
                for k in ["image", "label", "extra"]:
                    nib.save(test_image, os.path.join(tempdir,
                                                      f"{k}{i}.nii.gz"))
                test_data.append({
                    k: os.path.join(tempdir, f"{k}{i}.nii.gz")
                    for k in ["image", "label", "extra"]
                })

            dataset = CacheDataset(data=test_data,
                                   transform=transform,
                                   cache_rate=0.5,
                                   as_contiguous=True)
            data1 = dataset[0]
            data2 = dataset[1]
            data3 = dataset[0:-1]
            data4 = dataset[-1]
            self.assertEqual(len(data3), 1)

            if transform is None:
                # Check without providing transfrom
                dataset2 = CacheDataset(data=test_data,
                                        cache_rate=0.5,
                                        as_contiguous=True)
                for k in ["image", "label", "extra"]:
                    self.assertEqual(dataset[0][k], dataset2[0][k])

        if transform is None:
            self.assertEqual(data1["image"],
                             os.path.join(tempdir, "image1.nii.gz"))
            self.assertEqual(data2["label"],
                             os.path.join(tempdir, "label2.nii.gz"))
            self.assertEqual(data4["image"],
                             os.path.join(tempdir, "image2.nii.gz"))
        else:
            self.assertTupleEqual(data1["image"].shape, expected_shape)
            self.assertTupleEqual(data1["label"].shape, expected_shape)
            self.assertTupleEqual(data1["extra"].shape, expected_shape)
            self.assertTupleEqual(data2["image"].shape, expected_shape)
            self.assertTupleEqual(data2["label"].shape, expected_shape)
            self.assertTupleEqual(data2["extra"].shape, expected_shape)
            for d in data3:
                self.assertTupleEqual(d["image"].shape, expected_shape)
Пример #6
0
 def test_shape(self, expected_shape):
     test_image = nib.Nifti1Image(
         np.random.randint(0, 2, size=[128, 128, 128]), np.eye(4))
     tempdir = tempfile.mkdtemp()
     nib.save(test_image, os.path.join(tempdir, 'test_image1.nii.gz'))
     nib.save(test_image, os.path.join(tempdir, 'test_label1.nii.gz'))
     nib.save(test_image, os.path.join(tempdir, 'test_extra1.nii.gz'))
     nib.save(test_image, os.path.join(tempdir, 'test_image2.nii.gz'))
     nib.save(test_image, os.path.join(tempdir, 'test_label2.nii.gz'))
     nib.save(test_image, os.path.join(tempdir, 'test_extra2.nii.gz'))
     test_data = [{
         'image': os.path.join(tempdir, 'test_image1.nii.gz'),
         'label': os.path.join(tempdir, 'test_label1.nii.gz'),
         'extra': os.path.join(tempdir, 'test_extra1.nii.gz')
     }, {
         'image': os.path.join(tempdir, 'test_image2.nii.gz'),
         'label': os.path.join(tempdir, 'test_label2.nii.gz'),
         'extra': os.path.join(tempdir, 'test_extra2.nii.gz')
     }]
     dataset = CacheDataset(
         data=test_data,
         transform=Compose([LoadNiftid(keys=['image', 'label', 'extra'])]),
         cache_rate=0.5)
     data1 = dataset[0]
     data2 = dataset[1]
     shutil.rmtree(tempdir)
     self.assertTupleEqual(data1['image'].shape, expected_shape)
     self.assertTupleEqual(data1['label'].shape, expected_shape)
     self.assertTupleEqual(data1['extra'].shape, expected_shape)
     self.assertTupleEqual(data2['image'].shape, expected_shape)
     self.assertTupleEqual(data2['label'].shape, expected_shape)
     self.assertTupleEqual(data2['extra'].shape, expected_shape)
Пример #7
0
    def _get_loader(self, folders):
        images = []
        segs = []
        for folder in folders:
            images += glob(os.path.join(folder, "*_im.nii.gz"))
            segs += glob(os.path.join(folder, "*_seg.nii.gz"))
        images = sorted(images, key=os.path.basename)
        segs = sorted(segs, key=os.path.basename)

        files = [{"img": img, "seg": seg} for img, seg in zip(images, segs)]

        transforms = Compose([
            LoadImaged(keys=["img", "seg"]),
            AsChannelFirstd(keys=["img", "seg"], channel_dim=-1),
            ScaleIntensityd(keys="img"),
            ToTensord(keys=["img", "seg"]),
        ])

        ds = CacheDataset(data=files, transform=transforms)
        loader = DataLoader(ds,
                            batch_size=1,
                            num_workers=4,
                            collate_fn=list_data_collate)

        return loader
Пример #8
0
 def test_shape(self, expected_shape):
     test_image = nib.Nifti1Image(
         np.random.randint(0, 2, size=[128, 128, 128]), np.eye(4))
     tempdir = tempfile.mkdtemp()
     nib.save(test_image, os.path.join(tempdir, "test_image1.nii.gz"))
     nib.save(test_image, os.path.join(tempdir, "test_label1.nii.gz"))
     nib.save(test_image, os.path.join(tempdir, "test_extra1.nii.gz"))
     nib.save(test_image, os.path.join(tempdir, "test_image2.nii.gz"))
     nib.save(test_image, os.path.join(tempdir, "test_label2.nii.gz"))
     nib.save(test_image, os.path.join(tempdir, "test_extra2.nii.gz"))
     test_data = [
         {
             "image": os.path.join(tempdir, "test_image1.nii.gz"),
             "label": os.path.join(tempdir, "test_label1.nii.gz"),
             "extra": os.path.join(tempdir, "test_extra1.nii.gz"),
         },
         {
             "image": os.path.join(tempdir, "test_image2.nii.gz"),
             "label": os.path.join(tempdir, "test_label2.nii.gz"),
             "extra": os.path.join(tempdir, "test_extra2.nii.gz"),
         },
     ]
     dataset = CacheDataset(
         data=test_data,
         transform=Compose([LoadNiftid(keys=["image", "label", "extra"])]),
         cache_rate=0.5)
     data1 = dataset[0]
     data2 = dataset[1]
     shutil.rmtree(tempdir)
     self.assertTupleEqual(data1["image"].shape, expected_shape)
     self.assertTupleEqual(data1["label"].shape, expected_shape)
     self.assertTupleEqual(data1["extra"].shape, expected_shape)
     self.assertTupleEqual(data2["image"].shape, expected_shape)
     self.assertTupleEqual(data2["label"].shape, expected_shape)
     self.assertTupleEqual(data2["extra"].shape, expected_shape)
Пример #9
0
    def test_decollation(self, batch_size=2, num_workers=2):

        im = create_test_image_2d(100, 101)[0]
        data = [{
            "image": make_nifti_image(im) if has_nib else im
        } for _ in range(6)]

        transforms = Compose([
            AddChanneld("image"),
            SpatialPadd("image", 150),
            RandFlipd("image", prob=1.0, spatial_axis=1),
            ToTensord("image"),
        ])
        # If nibabel present, read from disk
        if has_nib:
            transforms = Compose([LoadImaged("image"), transforms])

        dataset = CacheDataset(data, transforms, progress=False)
        loader = DataLoader(dataset,
                            batch_size=batch_size,
                            shuffle=False,
                            num_workers=num_workers)

        for b, batch_data in enumerate(loader):
            decollated_1 = decollate_batch(batch_data)
            decollated_2 = Decollated()(batch_data)

            for decollated in [decollated_1, decollated_2]:
                for i, d in enumerate(decollated):
                    self.check_match(dataset[b * batch_size + i], d)
Пример #10
0
    def test_pad_collation(self, t_type, collate_method, transform):

        if t_type == dict:
            dataset = CacheDataset(self.dict_data, transform, progress=False)
        else:
            dataset = _Dataset(self.list_data, self.list_labels, transform)

        # Default collation should raise an error
        loader_fail = DataLoader(dataset, batch_size=10)
        with self.assertRaises(RuntimeError):
            for _ in loader_fail:
                pass

        # Padded collation shouldn't
        loader = DataLoader(dataset, batch_size=10, collate_fn=collate_method)
        # check collation in forward direction
        for data in loader:
            if t_type == dict:
                shapes = []
                decollated_data = decollate_batch(data)
                for d in decollated_data:
                    output = PadListDataCollate.inverse(d)
                    shapes.append(output["image"].shape)
                self.assertTrue(
                    len(set(shapes)) > 1
                )  # inverted shapes must be different because of random xforms
    def test_collation(self, _, transform, collate_fn, ndim):
        data = self.data_3d if ndim == 3 else self.data_2d
        if collate_fn:
            modified_transform = transform
        else:
            modified_transform = Compose(
                [transform,
                 ResizeWithPadOrCropd(KEYS, 100),
                 ToTensord(KEYS)])

        # num workers = 0 for mac or gpu transforms
        num_workers = 0 if sys.platform != "linux" or torch.cuda.is_available(
        ) else 2

        dataset = CacheDataset(data,
                               transform=modified_transform,
                               progress=False)
        loader = DataLoader(dataset,
                            num_workers,
                            batch_size=self.batch_size,
                            collate_fn=collate_fn)

        for item in loader:
            np.testing.assert_array_equal(
                item["image_transforms"][0]["do_transforms"],
                item["label_transforms"][0]["do_transforms"])
Пример #12
0
    def test_decollation(self, *transforms):

        batch_size = 2
        num_workers = 2

        t_compose = Compose(
            [AddChanneld(KEYS),
             Compose(transforms),
             ToTensord(KEYS)])
        # If nibabel present, read from disk
        if has_nib:
            t_compose = Compose([LoadImaged("image"), t_compose])

        dataset = CacheDataset(self.data, t_compose, progress=False)
        loader = DataLoader(dataset,
                            batch_size=batch_size,
                            shuffle=False,
                            num_workers=num_workers)

        for b, batch_data in enumerate(loader):
            decollated_1 = decollate_batch(batch_data)
            decollated_2 = Decollated()(batch_data)

            for decollated in [decollated_1, decollated_2]:
                for i, d in enumerate(decollated):
                    self.check_match(dataset[b * batch_size + i], d)
Пример #13
0
 def test_values(self):
     datalist = [
         {
             "image": "spleen_19.nii.gz",
             "label": "spleen_label_19.nii.gz"
         },
         {
             "image": "spleen_31.nii.gz",
             "label": "spleen_label_31.nii.gz"
         },
     ]
     transform = Compose([
         DataStatsd(keys=["image", "label"],
                    data_shape=False,
                    value_range=False,
                    data_value=True),
         SimulateDelayd(keys=["image", "label"], delay_time=0.1),
     ])
     dataset = CacheDataset(data=datalist,
                            transform=transform,
                            cache_rate=0.5,
                            cache_num=1)
     dataloader = DataLoader(dataset=dataset, batch_size=2, num_workers=2)
     for d in dataloader:
         self.assertEqual(d["image"][0], "spleen_19.nii.gz")
         self.assertEqual(d["image"][1], "spleen_31.nii.gz")
         self.assertEqual(d["label"][0], "spleen_label_19.nii.gz")
         self.assertEqual(d["label"][1], "spleen_label_31.nii.gz")
Пример #14
0
    def test_epistemic_scoring(self):
        input_size = (20, 20, 20)
        device = "cuda" if torch.cuda.is_available() else "cpu"
        keys = ["image", "label"]
        num_training_ims = 10
        train_data = self.get_data(num_training_ims, input_size)
        test_data = self.get_data(1, input_size)

        transforms = Compose([
            AddChanneld(keys),
            CropForegroundd(keys, source_key="image"),
            DivisiblePadd(keys, 4),
        ])

        infer_transforms = Compose([
            AddChannel(),
            CropForeground(),
            DivisiblePad(4),
        ])

        train_ds = CacheDataset(train_data, transforms)
        # output might be different size, so pad so that they match
        train_loader = DataLoader(train_ds,
                                  batch_size=2,
                                  collate_fn=pad_list_data_collate)

        model = UNet(3, 1, 1, channels=(6, 6), strides=(2, 2)).to(device)
        loss_function = DiceLoss(sigmoid=True)
        optimizer = torch.optim.Adam(model.parameters(), 1e-3)

        num_epochs = 10
        for _ in trange(num_epochs):
            epoch_loss = 0

            for batch_data in train_loader:
                inputs, labels = batch_data["image"].to(
                    device), batch_data["label"].to(device)
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = loss_function(outputs, labels)
                loss.backward()
                optimizer.step()
                epoch_loss += loss.item()

            epoch_loss /= len(train_loader)

        entropy_score = EpistemicScoring(model=model,
                                         transforms=infer_transforms,
                                         roi_size=[20, 20, 20],
                                         num_samples=10)
        # Call Individual Infer from Epistemic Scoring
        ip_stack = [test_data["image"], test_data["image"], test_data["image"]]
        ip_stack = np.array(ip_stack)
        score_3d = entropy_score.entropy_3d_volume(ip_stack)
        score_3d_sum = np.sum(score_3d)
        # Call Entropy Metric from Epistemic Scoring
        self.assertEqual(score_3d.shape, input_size)
        self.assertIsInstance(score_3d_sum, np.float32)
        self.assertGreater(score_3d_sum, 3.0)
Пример #15
0
    def test_decollation_dict(self, *transforms):
        t_compose = Compose([AddChanneld(KEYS), Compose(transforms), ToTensord(KEYS)])
        # If nibabel present, read from disk
        if has_nib:
            t_compose = Compose([LoadImaged("image"), t_compose])

        dataset = CacheDataset(self.data_dict, t_compose, progress=False)
        self.check_decollate(dataset=dataset)
Пример #16
0
    def test_inverse_inferred_seg(self):

        test_data = []
        for _ in range(20):
            image, label = create_test_image_2d(100, 101)
            test_data.append({
                "image": image,
                "label": label.astype(np.float32)
            })

        batch_size = 10
        # num workers = 0 for mac
        num_workers = 2 if sys.platform != "darwin" else 0
        transforms = Compose([
            AddChanneld(KEYS),
            SpatialPadd(KEYS, (150, 153)),
            CenterSpatialCropd(KEYS, (110, 99))
        ])
        num_invertible_transforms = sum(1 for i in transforms.transforms
                                        if isinstance(i, InvertibleTransform))

        dataset = CacheDataset(test_data, transform=transforms, progress=False)
        loader = DataLoader(dataset,
                            batch_size=batch_size,
                            shuffle=False,
                            num_workers=num_workers)

        device = "cuda" if torch.cuda.is_available() else "cpu"
        model = UNet(
            dimensions=2,
            in_channels=1,
            out_channels=1,
            channels=(2, 4),
            strides=(2, ),
        ).to(device)

        data = first(loader)
        labels = data["label"].to(device)
        segs = model(labels).detach().cpu()
        label_transform_key = "label" + InverseKeys.KEY_SUFFIX.value
        segs_dict = {
            "label": segs,
            label_transform_key: data[label_transform_key]
        }

        segs_dict_decollated = decollate_batch(segs_dict)

        # inverse of individual segmentation
        seg_dict = first(segs_dict_decollated)
        with allow_missing_keys_mode(transforms):
            inv_seg = transforms.inverse(seg_dict)["label"]
        self.assertEqual(len(data["label_transforms"]),
                         num_invertible_transforms)
        self.assertEqual(len(seg_dict["label_transforms"]),
                         num_invertible_transforms)
        self.assertEqual(inv_seg.shape[1:], test_data[0]["label"].shape)
Пример #17
0
    def test_inverse_inferred_seg(self, extra_transform):

        test_data = []
        for _ in range(20):
            image, label = create_test_image_2d(100, 101)
            test_data.append({
                "image": image,
                "label": label.astype(np.float32)
            })

        batch_size = 10
        # num workers = 0 for mac
        num_workers = 2 if sys.platform == "linux" else 0
        transforms = Compose([
            AddChanneld(KEYS),
            SpatialPadd(KEYS, (150, 153)), extra_transform
        ])

        dataset = CacheDataset(test_data, transform=transforms, progress=False)
        loader = DataLoader(dataset,
                            batch_size=batch_size,
                            shuffle=False,
                            num_workers=num_workers)

        device = "cuda" if torch.cuda.is_available() else "cpu"
        model = UNet(spatial_dims=2,
                     in_channels=1,
                     out_channels=1,
                     channels=(2, 4),
                     strides=(1, )).to(device)

        data = first(loader)
        self.assertEqual(data["image"].shape[0], batch_size * NUM_SAMPLES)

        labels = data["label"].to(device)
        self.assertIsInstance(labels, MetaTensor)
        segs = model(labels).detach().cpu()
        segs_decollated = decollate_batch(segs)
        self.assertIsInstance(segs_decollated[0], MetaTensor)
        # inverse of individual segmentation
        seg_metatensor = first(segs_decollated)
        # test to convert interpolation mode for 1 data of model output batch
        convert_applied_interp_mode(seg_metatensor.applied_operations,
                                    mode="nearest",
                                    align_corners=None)

        # manually invert the last crop samples
        xform = seg_metatensor.applied_operations.pop(-1)
        shape_before_extra_xform = xform["orig_size"]
        resizer = ResizeWithPadOrCrop(spatial_size=shape_before_extra_xform)
        with resizer.trace_transform(False):
            seg_metatensor = resizer(seg_metatensor)

        with allow_missing_keys_mode(transforms):
            inv_seg = transforms.inverse({"label": seg_metatensor})["label"]
        self.assertEqual(inv_seg.shape[1:], test_data[0]["label"].shape)
Пример #18
0
    def __init__(
        self,
        root_dir: str,
        section: str,
        transform: Union[Sequence[Callable], Callable] = (),
        download: bool = False,
        seed: int = 0,
        val_frac: float = 0.1,
        test_frac: float = 0.1,
        cache_num: int = sys.maxsize,
        cache_rate: float = 1.0,
        num_workers: int = 0,
    ) -> None:
        if not os.path.isdir(root_dir):
            raise ValueError("Root directory root_dir must be a directory.")
        self.section = section
        self.val_frac = val_frac
        self.test_frac = test_frac
        self.set_random_state(seed=seed)
        tarfile_name = os.path.join(root_dir, self.compressed_file_name)
        dataset_dir = os.path.join(root_dir, self.dataset_folder_name)
        self.num_class = 0
        if download:
            download_and_extract(self.resource, tarfile_name, root_dir,
                                 self.md5)

        if not os.path.exists(dataset_dir):
            raise RuntimeError(
                f"Cannot find dataset directory: {dataset_dir}, please use download=True to download it."
            )
        data = self._generate_data_list(dataset_dir)
        if transform == ():
            transform = LoadImaged("image")
        CacheDataset.__init__(self,
                              data,
                              transform,
                              cache_num=cache_num,
                              cache_rate=cache_rate,
                              num_workers=num_workers)
Пример #19
0
 def _get_predictions_iterator(self, segs):
     files = [{"seg": seg} for seg in segs]
     transforms = Compose([
         LoadImaged(keys=["seg"]),
         AsChannelFirstd(keys=["seg"], channel_dim=-1),
         ToTensord(keys=["seg"]),
     ])
     ds = CacheDataset(data=files, transform=transforms)
     loader = DataLoader(ds,
                         batch_size=1,
                         num_workers=4,
                         collate_fn=list_data_collate)
     for data in loader:
         yield (data["seg"], data["seg_meta_dict"])
Пример #20
0
 def test_value(self):
     device = "cuda:0"
     data = [{"img": torch.tensor(i)} for i in range(4)]
     dataset = CacheDataset(data=data,
                            transform=ToDeviced(keys="img",
                                                device=device,
                                                non_blocking=True),
                            cache_rate=1.0)
     dataloader = ThreadDataLoader(dataset=dataset,
                                   num_workers=0,
                                   batch_size=1)
     for i, d in enumerate(dataloader):
         torch.testing.assert_allclose(d["img"],
                                       torch.tensor([i], device=device))
Пример #21
0
    def test_collation(self, _, transform, collate_fn):

        if collate_fn:
            modified_transform = transform
        else:
            modified_transform = Compose([transform, ResizeWithPadOrCropd(KEYS, [100, 100, 100])])

        # num workers = 0 for mac
        num_workers = 2 if sys.platform != "darwin" else 0

        dataset = CacheDataset(self.data, transform=modified_transform, progress=False)
        loader = DataLoader(dataset, num_workers, batch_size=self.batch_size, collate_fn=collate_fn)

        for _ in loader:
            pass
    def test_duplicate_transforms(self):
        im, _ = create_test_image_2d(128, 128, num_seg_classes=1, channel_dim=0)
        data = [{"img": im} for _ in range(2)]

        # at least 1 deterministic followed by at least 1 random
        transform = Compose([Spacingd("img", pixdim=(1, 1)), RandAffined("img", prob=1.0)])

        # cachedataset and data loader w persistent_workers
        train_ds = CacheDataset(data, transform, cache_num=1)
        train_loader = DataLoader(train_ds, num_workers=2, persistent_workers=True)

        b1 = next(iter(train_loader))
        b2 = next(iter(train_loader))

        self.assertEqual(len(b1["img_transforms"]), len(b2["img_transforms"]))
Пример #23
0
    def test_duplicate_transforms(self):
        data = [{"img": create_test_image_2d(128, 128, num_seg_classes=1, channel_dim=0)[0]} for _ in range(2)]

        # at least 1 deterministic followed by at least 1 random
        transform = Compose([Spacingd("img", pixdim=(1, 1)), RandAffined("img", prob=1.0)])

        # cachedataset and data loader w persistent_workers
        train_ds = CacheDataset(data, transform, cache_num=1)
        # num_workers > 1 may fail randomly with 21.09 on A100 test node
        # https://github.com/Project-MONAI/MONAI/issues/3283
        train_loader = DataLoader(train_ds, num_workers=1, persistent_workers=True)

        b1 = next(iter(train_loader))
        b2 = next(iter(train_loader))

        self.assertEqual(len(b1["img"].applied_operations), len(b2["img"].applied_operations))
Пример #24
0
    def test_shape(self, transform, expected_shape):
        test_image = nib.Nifti1Image(
            np.random.randint(0, 2, size=[128, 128, 128]), np.eye(4))
        with tempfile.TemporaryDirectory() as tempdir:
            nib.save(test_image, os.path.join(tempdir, "test_image1.nii.gz"))
            nib.save(test_image, os.path.join(tempdir, "test_label1.nii.gz"))
            nib.save(test_image, os.path.join(tempdir, "test_extra1.nii.gz"))
            nib.save(test_image, os.path.join(tempdir, "test_image2.nii.gz"))
            nib.save(test_image, os.path.join(tempdir, "test_label2.nii.gz"))
            nib.save(test_image, os.path.join(tempdir, "test_extra2.nii.gz"))
            test_data = [
                {
                    "image": os.path.join(tempdir, "test_image1.nii.gz"),
                    "label": os.path.join(tempdir, "test_label1.nii.gz"),
                    "extra": os.path.join(tempdir, "test_extra1.nii.gz"),
                },
                {
                    "image": os.path.join(tempdir, "test_image2.nii.gz"),
                    "label": os.path.join(tempdir, "test_label2.nii.gz"),
                    "extra": os.path.join(tempdir, "test_extra2.nii.gz"),
                },
            ]
            dataset = CacheDataset(data=test_data,
                                   transform=transform,
                                   cache_rate=0.5)
            data1 = dataset[0]
            data2 = dataset[1]
            data3 = dataset[0:-1]
            data4 = dataset[-1]
            self.assertEqual(len(data3), 1)

        if transform is None:
            self.assertEqual(data1["image"],
                             os.path.join(tempdir, "test_image1.nii.gz"))
            self.assertEqual(data2["label"],
                             os.path.join(tempdir, "test_label2.nii.gz"))
            self.assertEqual(data4["image"],
                             os.path.join(tempdir, "test_image2.nii.gz"))
        else:
            self.assertTupleEqual(data1["image"].shape, expected_shape)
            self.assertTupleEqual(data1["label"].shape, expected_shape)
            self.assertTupleEqual(data1["extra"].shape, expected_shape)
            self.assertTupleEqual(data2["image"].shape, expected_shape)
            self.assertTupleEqual(data2["label"].shape, expected_shape)
            self.assertTupleEqual(data2["extra"].shape, expected_shape)
            for d in data3:
                self.assertTupleEqual(d["image"].shape, expected_shape)
Пример #25
0
    def test_transforms(self, case_id):
        set_determinism(2022)
        config = ConfigParser()
        config.read_config(TEST_CASES)
        config["input_keys"] = keys
        test_case = config.get_parsed_content(id=case_id, instantiate=True)  # transform instance

        dataset = CacheDataset(self.files, transform=test_case)
        loader = DataLoader(dataset, batch_size=3, shuffle=True)
        for x in loader:
            self.assertIsInstance(x[keys[0]], MetaTensor)
            self.assertIsInstance(x[keys[1]], MetaTensor)
            out = decollate_batch(x)  # decollate every batch should work

        # test forward patches
        loaded = out[0]
        self.assertEqual(len(loaded), len(keys))
        img, seg = loaded[keys[0]], loaded[keys[1]]
        expected = config.get_parsed_content(id=f"{case_id}_answer", instantiate=True)  # expected results
        self.assertEqual(expected["load_shape"], list(x[keys[0]].shape))
        assert_allclose(expected["affine"], img.affine, type_test=False, atol=TINY_DIFF, rtol=TINY_DIFF)
        assert_allclose(expected["affine"], seg.affine, type_test=False, atol=TINY_DIFF, rtol=TINY_DIFF)
        test_cls = [type(x).__name__ for x in test_case.transforms]
        tracked_cls = [x[TraceKeys.CLASS_NAME] for x in img.applied_operations]
        self.assertTrue(len(tracked_cls) <= len(test_cls))  # tracked items should  be no more than the compose items.
        with tempfile.TemporaryDirectory() as tempdir:  # test writer
            SaveImageD(keys, resample=False, output_dir=tempdir, output_postfix=case_id)(loaded)

        # test inverse
        inv = InvertD(keys, orig_keys=keys, transform=test_case, nearest_interp=True)
        out = inv(loaded)
        img, seg = out[keys[0]], out[keys[1]]
        assert_allclose(expected["inv_affine"], img.affine, type_test=False, atol=TINY_DIFF, rtol=TINY_DIFF)
        assert_allclose(expected["inv_affine"], seg.affine, type_test=False, atol=TINY_DIFF, rtol=TINY_DIFF)
        self.assertFalse(img.applied_operations)
        self.assertFalse(seg.applied_operations)
        assert_allclose(expected["inv_shape"], img.shape, type_test=False, atol=TINY_DIFF, rtol=TINY_DIFF)
        assert_allclose(expected["inv_shape"], seg.shape, type_test=False, atol=TINY_DIFF, rtol=TINY_DIFF)
        with tempfile.TemporaryDirectory() as tempdir:  # test writer
            SaveImageD(keys, resample=False, output_dir=tempdir, output_postfix=case_id)(out)
            seg_file = os.path.join(tempdir, key_1, f"{key_1}_{case_id}.nii.gz")
            segout = nib.load(seg_file).get_fdata()
            segin = nib.load(FILE_PATH_1).get_fdata()
            ndiff = np.sum(np.abs(segout - segin) > 0)
            total = np.prod(segout.shape)
        self.assertTrue(ndiff / total < 0.4, f"{ndiff / total}")
Пример #26
0
    def test_inverse_inferred_seg(self, extra_transform):

        test_data = []
        for _ in range(20):
            image, label = create_test_image_2d(100, 101)
            test_data.append({"image": image, "label": label.astype(np.float32)})

        batch_size = 10
        # num workers = 0 for mac
        num_workers = 2 if sys.platform == "linux" else 0
        transforms = Compose([AddChanneld(KEYS), SpatialPadd(KEYS, (150, 153)), extra_transform])
        num_invertible_transforms = sum(1 for i in transforms.transforms if isinstance(i, InvertibleTransform))

        dataset = CacheDataset(test_data, transform=transforms, progress=False)
        loader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

        device = "cuda" if torch.cuda.is_available() else "cpu"
        model = UNet(spatial_dims=2, in_channels=1, out_channels=1, channels=(2, 4), strides=(2,)).to(device)

        data = first(loader)
        self.assertEqual(len(data["label_transforms"]), num_invertible_transforms)
        self.assertEqual(data["image"].shape[0], batch_size * NUM_SAMPLES)

        labels = data["label"].to(device)
        segs = model(labels).detach().cpu()
        label_transform_key = "label" + InverseKeys.KEY_SUFFIX
        segs_dict = {"label": segs, label_transform_key: data[label_transform_key]}

        segs_dict_decollated = decollate_batch(segs_dict)
        # inverse of individual segmentation
        seg_dict = first(segs_dict_decollated)
        # test to convert interpolation mode for 1 data of model output batch
        convert_inverse_interp_mode(seg_dict, mode="nearest", align_corners=None)

        with allow_missing_keys_mode(transforms):
            inv_seg = transforms.inverse(seg_dict)["label"]
        self.assertEqual(len(data["label_transforms"]), num_invertible_transforms)
        self.assertEqual(len(seg_dict["label_transforms"]), num_invertible_transforms)
        self.assertEqual(inv_seg.shape[1:], test_data[0]["label"].shape)

        # Inverse of batch
        batch_inverter = BatchInverseTransform(transforms, loader, collate_fn=no_collation, detach=True)
        with allow_missing_keys_mode(transforms):
            inv_batch = batch_inverter(segs_dict)
        self.assertEqual(inv_batch[0]["label"].shape[1:], test_data[0]["label"].shape)
Пример #27
0
 def test_shape(self, num_workers, dataset_size, transform):
     test_image = nib.Nifti1Image(np.random.randint(0, 2, size=[128, 128, 128]), np.eye(4))
     tempdir = tempfile.mkdtemp()
     nib.save(test_image, os.path.join(tempdir, "test_image1.nii.gz"))
     nib.save(test_image, os.path.join(tempdir, "test_label1.nii.gz"))
     nib.save(test_image, os.path.join(tempdir, "test_extra1.nii.gz"))
     test_data = [
         {
             "image": os.path.join(tempdir, "test_image1.nii.gz"),
             "label": os.path.join(tempdir, "test_label1.nii.gz"),
             "extra": os.path.join(tempdir, "test_extra1.nii.gz"),
         }
     ] * dataset_size
     dataset = CacheDataset(data=test_data, transform=transform, cache_rate=1, num_workers=num_workers,)
     shutil.rmtree(tempdir)
     self.assertEqual(len(dataset._cache), dataset.cache_num)
     for i in range(dataset.cache_num):
         self.assertIsNotNone(dataset._cache[i])
Пример #28
0
    def test_pad_collation(self, t_type, transform):

        if t_type == dict:
            dataset = CacheDataset(self.dict_data, transform, progress=False)
        else:
            dataset = _Dataset(self.list_data, self.list_labels, transform)

        # Default collation should raise an error
        loader_fail = DataLoader(dataset, batch_size=10)
        with self.assertRaises(RuntimeError):
            for _ in loader_fail:
                pass

        # Padded collation shouldn't
        loader = DataLoader(dataset,
                            batch_size=2,
                            collate_fn=pad_list_data_collate)
        for _ in loader:
            pass
Пример #29
0
    def test_pad_collation(self, t_type, collate_method, transform):

        if t_type == dict:
            dataset = CacheDataset(self.dict_data, transform, progress=False)
        else:
            dataset = _Dataset(self.list_data, self.list_labels, transform)

        # Default collation should raise an error
        loader_fail = DataLoader(dataset, batch_size=10)
        with self.assertRaises(RuntimeError):
            for _ in loader_fail:
                pass

        # Padded collation shouldn't
        loader = DataLoader(dataset, batch_size=10, collate_fn=collate_method)
        # check collation in forward direction
        for data in loader:
            if t_type == dict:
                decollated_data = decollate_batch(data)
                for d in decollated_data:
                    PadListDataCollate.inverse(d)
Пример #30
0
 def test_shape(self, num_workers, dataset_size):
     test_image = nib.Nifti1Image(
         np.random.randint(0, 2, size=[128, 128, 128]), np.eye(4))
     tempdir = tempfile.mkdtemp()
     nib.save(test_image, os.path.join(tempdir, 'test_image1.nii.gz'))
     nib.save(test_image, os.path.join(tempdir, 'test_label1.nii.gz'))
     nib.save(test_image, os.path.join(tempdir, 'test_extra1.nii.gz'))
     test_data = [{
         'image': os.path.join(tempdir, 'test_image1.nii.gz'),
         'label': os.path.join(tempdir, 'test_label1.nii.gz'),
         'extra': os.path.join(tempdir, 'test_extra1.nii.gz')
     }] * dataset_size
     dataset = CacheDataset(
         data=test_data,
         transform=Compose([LoadNiftid(keys=['image', 'label', 'extra'])]),
         cache_rate=1,
         num_workers=num_workers)
     shutil.rmtree(tempdir)
     self.assertEqual(len(dataset._cache), dataset.cache_num)
     for i in range(dataset.cache_num):
         self.assertIsNotNone(dataset._cache[i])