Exemplo n.º 1
0
    def __init__(self,
                 array: np.ndarray,
                 bounding_box: BoundingBox = None,
                 iteration_size: BoundingBox = BoundingBox(
                     Vector(0, 0, 0), Vector(128, 128, 32)),
                 stride: Vector = Vector(64, 64, 16)):
        """
        Initializes a volume with a bounding box and iteration parameters

        :param array: A 3D Numpy array
        :param bounding_box: The bounding box encompassing the volume
        :param iteration_size: The bounding box of each data sample in the
dataset iterable
        :param stride: The stride displacement of each data sample in the
dataset iterable. The displacement proceeds first from X then to Y then to Z.
        """
        if isinstance(array, np.ndarray):
            self._setArray(array)
        elif isinstance(array, BoundingBox):
            self.createArray(array)
        else:
            raise ValueError("array must be an ndarray or a BoundingBox")

        self.setBoundingBox(bounding_box)
        self.setIteration(iteration_size=iteration_size, stride=stride)
        super().__init__()
Exemplo n.º 2
0
    def setIteration(self, iteration_size: BoundingBox, stride: Vector):
        """
        Sets the parameters for iterating through the dataset

        :param iteration_size: The size of each data sample in the volume
        :param stride: The displacement of each iteration
        """
        if not isinstance(iteration_size, BoundingBox):
            error_string = ("iteration_size must have type BoundingBox" +
                            " instead it has type {}")
            error_string = error_string.format(type(iteration_size))
            raise ValueError(error_string)

        if not isinstance(stride, Vector):
            raise ValueError("stride must have type Vector")

        if not iteration_size.isSubset(
                BoundingBox(Vector(0, 0, 0),
                            self.getBoundingBox().getSize())):
            raise ValueError("iteration_size must be smaller than volume size")

        self.setIterationSize(iteration_size)
        self.setStride(stride)

        def ceil(x):
            return int(round(x))

        self.element_vec = Vector(
            *map(lambda L, l, s: ceil((L - l) / s + 1),
                 self.getBoundingBox().getSize().getComponents(),
                 self.iteration_size.getSize().getComponents(),
                 self.stride.getComponents()))

        self.index = 0
Exemplo n.º 3
0
    def test_tiff_dataset(self):
        # Test that TiffVolume opens a TIFF stack
        testDataset = TiffVolume(os.path.join(IMAGE_PATH, "inputs.tif"),
                                 BoundingBox(Vector(0, 0, 0),
                                             Vector(1024, 512, 50)),
                                 iteration_size=BoundingBox(
                                     Vector(0, 0, 0), Vector(128, 128, 20)),
                                 stride=Vector(128, 128, 20))
        testDataset.__enter__()

        # Test that TiffVolume has the correct length
        self.assertEqual(64, len(testDataset),
                         "TIFF dataset size does not match correct size")

        # Test that TiffVolume outputs the correct samples
        self.assertTrue((tif.imread(os.path.join(
            IMAGE_PATH, "test_sample.tif")) == testDataset[10].getArray()).all,
                        "TIFF dataset value does not match correct value")

        # Test that TiffVolume can read and write consistent samples
        tif.imsave(os.path.join(IMAGE_PATH, "test_write.tif"),
                   testDataset[10].getArray())
        self.assertTrue((tif.imread(os.path.join(
            IMAGE_PATH, "test_write.tif")) == testDataset[10].getArray()).all,
                        "TIFF dataset output does not match written output")
Exemplo n.º 4
0
    def test_checkpoint(self):
        if not os.path.isdir('./tests/checkpoints'):
            os.mkdir('tests/checkpoints')

        net = RSUNet()
        inputs_dataset = TiffVolume(
            os.path.join(IMAGE_PATH, "inputs.tif"),
            BoundingBox(Vector(0, 0, 0), Vector(1024, 512, 50)))
        labels_dataset = TiffVolume(
            os.path.join(IMAGE_PATH, "labels.tif"),
            BoundingBox(Vector(0, 0, 0), Vector(1024, 512, 50)))
        inputs_dataset.__enter__()
        labels_dataset.__enter__()
        trainer = Trainer(net,
                          inputs_dataset,
                          labels_dataset,
                          max_epochs=10,
                          gpu_device=1)
        trainer = CheckpointWriter(trainer,
                                   checkpoint_dir='./tests/checkpoints',
                                   checkpoint_period=5)
        trainer.run_training()
        trainer = Trainer(net,
                          inputs_dataset,
                          labels_dataset,
                          max_epochs=10,
                          checkpoint='./tests/checkpoints/iteration_5.ckpt',
                          gpu_device=1)
        trainer.run_training()
Exemplo n.º 5
0
    def setIteration(self, iteration_size: BoundingBox, stride: Vector):
        if not isinstance(iteration_size, BoundingBox):
            error_string = ("iteration_size must have type BoundingBox" +
                            " instead it has type {}")
            error_string = error_string.format(type(iteration_size))
            raise ValueError(error_string)

        if not isinstance(stride, Vector):
            raise ValueError("stride must have type Vector")

        if not iteration_size.isSubset(
                BoundingBox(Vector(0, 0, 0),
                            self.getBoundingBox().getSize())):
            raise ValueError(
                "iteration_size must be smaller than volume size " +
                "instead the iteration size is {} ".format(
                    iteration_size.getSize()) +
                "and the volume size is {}".format(
                    self.getBoundingBox().getSize()))

        self.setIterationSize(iteration_size)
        self.setStride(stride)

        def ceil(x):
            return int(round(x))

        self.element_vec = Vector(
            *map(lambda L, l, s: ceil((L - l) / s + 1),
                 self.getBoundingBox().getSize().getComponents(),
                 self.iteration_size.getSize().getComponents(),
                 self.stride.getComponents()))

        self.index = 0
Exemplo n.º 6
0
    def get(self, bounding_box: BoundingBox) -> Data:
        """
        Requests a data sample from the volume. If the bounding box does
not exist, then the method raises a ValueError.

        :param bounding_box: The bounding box of the request data sample
        :return: The data sample requested
        """
        if bounding_box.isDisjoint(self.getBoundingBox()):
            error_string = ("Bounding box must be inside dataset " +
                            "dimensions instead bounding box is {} while " +
                            "the dataset dimensions are {}")
            error_string = error_string.format(bounding_box,
                                               self.getBoundingBox())
            raise ValueError(error_string)

        sub_bounding_box = bounding_box.intersect(self.getBoundingBox())
        array = self.getArray(sub_bounding_box)

        before_pad = bounding_box.getEdges()[0] - sub_bounding_box.getEdges(
        )[0]
        after_pad = bounding_box.getEdges()[1] - sub_bounding_box.getEdges()[1]

        if before_pad != Vector(0, 0, 0) or after_pad != Vector(0, 0, 0):
            pad_size = tuple(
                zip(before_pad.getNumpyDim(), after_pad.getNumpyDim()))
            array = np.pad(array, pad_width=pad_size, mode="constant")

        return Data(array, bounding_box)
Exemplo n.º 7
0
 def __init__(self,
              bounding_box: BoundingBox = None,
              iteration_size: BoundingBox = BoundingBox(
                  Vector(0, 0, 0), Vector(128, 128, 32)),
              stride: Vector = Vector(64, 64, 16)):
     self.setBoundingBox(bounding_box)
     self.setIteration(iteration_size, stride)
     self.valid_data = None
Exemplo n.º 8
0
 def test_cpu_training(self):
     net = RSUNet()
     inputs_dataset = TiffVolume(
         os.path.join(IMAGE_PATH, "inputs.tif"),
         BoundingBox(Vector(0, 0, 0), Vector(1024, 512, 50)))
     labels_dataset = TiffVolume(
         os.path.join(IMAGE_PATH, "labels.tif"),
         BoundingBox(Vector(0, 0, 0), Vector(1024, 512, 50)))
     inputs_dataset.__enter__()
     labels_dataset.__enter__()
     trainer = Trainer(net, inputs_dataset, labels_dataset, max_epochs=1)
     trainer.run_training()
Exemplo n.º 9
0
    def test_json_spec(self):
        # Tests the JSON volume specification
        json_spec = JsonSpec()
        pooled_volume = json_spec.open(
            os.path.join(IMAGE_PATH, "inputs_spec.json"))

        output = pooled_volume.get(
            BoundingBox(Vector(0, 0, 40), Vector(128, 128, 60)))

        self.assertTrue((tif.imread(
            os.path.join(IMAGE_PATH,
                         "test_pooled_volume.tif")) == output.getArray()).all,
                        "JsonSpec output does not match test case")
Exemplo n.º 10
0
    def get(self, bounding_box: BoundingBox) -> Data:
        indexes = self._queryBoundingBox(bounding_box)

        data = []

        stack_volumes = [volume for i, volume in self.stack if i in indexes]
        stack_disjoint = list(set(indexes) - set([i for i, v in self.stack]))

        for volume in stack_volumes:
            sub_bbox = bounding_box.intersect(volume.getBoundingBox())
            data.append(volume.get(sub_bbox))

        for index in stack_disjoint:
            volume = self.volumes[index]
            i = self._pushStack(index, volume)

            sub_bbox = bounding_box.intersect(volume.getBoundingBox())
            data.append(volume.get(sub_bbox))

        shape = bounding_box.getNumpyDim()
        array = Array(np.zeros(shape).astype(np.uint16),
                      bounding_box=bounding_box,
                      iteration_size=BoundingBox(Vector(0, 0, 0),
                                                 bounding_box.getSize()),
                      stride=bounding_box.getSize())
        [array.set(item) for item in data]
        return Data(array.getArray(), bounding_box)
Exemplo n.º 11
0
    def __init__(self,
                 tiff_file,
                 bounding_box: BoundingBox,
                 iteration_size: BoundingBox = BoundingBox(
                     Vector(0, 0, 0), Vector(128, 128, 32)),
                 stride: Vector = Vector(64, 64, 16)):
        """
        Loads a TIFF stack file or a directory of TIFF files and creates a
corresponding three-dimensional volume dataset
        :param tiff_file: Either a TIFF stack file or a directory
containing TIFF files
        :param chunk_size: Dimensions of the sample subvolume
        """
        # Set TIFF file and bounding box
        self.setFile(tiff_file)
        super().__init__(bounding_box, iteration_size, stride)
Exemplo n.º 12
0
    def test_stitcher(self):
        # Stitch a test TIFF dataset
        inputDataset = TiffVolume(
            os.path.join(IMAGE_PATH, "inputs.tif"),
            BoundingBox(Vector(0, 0, 0), Vector(1024, 512, 50)))
        outputDataset = Array(
            np.zeros(inputDataset.getBoundingBox().getNumpyDim()))
        inputDataset.__enter__()
        for data in inputDataset:
            outputDataset.blend(data)

        self.assertTrue(
            (inputDataset[20].getArray() == outputDataset[20].getArray()).all,
            "Blending output does not match input")

        tif.imsave(os.path.join(IMAGE_PATH, "test_stitch.tif"),
                   outputDataset[100].getArray().astype(np.uint16))
Exemplo n.º 13
0
 def test_loss(self):
     net = RSUNet()
     inputs_dataset = TiffVolume(
         os.path.join(IMAGE_PATH, "inputs.tif"),
         BoundingBox(Vector(0, 0, 0), Vector(1024, 512, 50)))
     labels_dataset = TiffVolume(
         os.path.join(IMAGE_PATH, "labels.tif"),
         BoundingBox(Vector(0, 0, 0), Vector(1024, 512, 50)))
     inputs_dataset.__enter__()
     labels_dataset.__enter__()
     trainer = Trainer(net,
                       inputs_dataset,
                       labels_dataset,
                       max_epochs=10,
                       gpu_device=1,
                       criterion=SimplePointBCEWithLogitsLoss())
     trainer.run_training()
Exemplo n.º 14
0
    def __init__(self,
                 hdf5_file,
                 dataset,
                 bounding_box: BoundingBox,
                 iteration_size: BoundingBox = BoundingBox(
                     Vector(0, 0, 0), Vector(128, 128, 20)),
                 stride: Vector = Vector(64, 64, 10)):
        """
        Loads a HDF5 dataset and creates a corresponding three-dimensional
volume dataset

        :param hdf5_file: A HDF5 file path
        :param dataset: A HDF5 dataset name
        :param chunk_size: Dimensions of the sample subvolume
        """
        self.setFile(hdf5_file)
        self.setDataset(dataset)
        super().__init__(bounding_box, iteration_size, stride)
Exemplo n.º 15
0
    def __init__(self,
                 volumes=None,
                 stack_size: int = 5,
                 iteration_size: BoundingBox = BoundingBox(
                     Vector(0, 0, 0), Vector(128, 128, 32)),
                 stride: Vector = Vector(64, 64, 16)):
        if volumes is not None:
            self.volumes = volumes
            self.volumes_changed = True
        else:
            self.volumes = []
            self.volumes_changed = False

        self.volume_list = []
        self.setStack(stack_size)

        self.setIteration(iteration_size, stride)

        self.valid_data = None
Exemplo n.º 16
0
    def setBoundingBox(self,
                       bounding_box: BoundingBox = None,
                       displacement: Vector = None):
        """
        Sets the bounding box of the volume. By default, it sets the bounding
box to the volume size

        :param bounding_box: The bounding box of the volume
        :param displacement: The displacement of the bounding box from the
origin
        """
        if bounding_box is None:
            self.bounding_box = BoundingBox(
                Vector(0, 0, 0), Vector(*self.getArray().shape[::-1]))
        else:
            self.bounding_box = bounding_box

        if displacement is not None:
            self.bounding_box = self.bounding_box + displacement
Exemplo n.º 17
0
    def test_prediction(self):
        if not os.path.isdir('./tests/checkpoints'):
            os.mkdir('tests/checkpoints')

        net = RSUNet()

        checkpoint = './tests/checkpoints/iteration_10.ckpt'
        inputs_dataset = TiffVolume(
            os.path.join(IMAGE_PATH, "inputs.tif"),
            BoundingBox(Vector(0, 0, 0), Vector(1024, 512, 50)))
        inputs_dataset.__enter__()
        predictor = Predictor(net, checkpoint, gpu_device=1)

        output_volume = Array(
            np.zeros(inputs_dataset.getBoundingBox().getNumpyDim()))

        predictor.run(inputs_dataset, output_volume, batch_size=5)

        tif.imsave(os.path.join(IMAGE_PATH, "test_prediction.tif"),
                   output_volume.getArray().astype(np.float32))
Exemplo n.º 18
0
    def _indexToBoundingBox(self, idx):
        if idx >= len(self):
            self.index = 0
            raise StopIteration

        element_vec = np.unravel_index(idx,
                                       dims=self.element_vec.getComponents())

        element_vec = Vector(*element_vec)
        bounding_box = self.iteration_size + self.stride * element_vec

        return bounding_box
Exemplo n.º 19
0
    def test_loss_writer(self):
        if not os.path.isdir('./tests/test_experiment'):
            os.mkdir('tests/test_experiment')
        shutil.rmtree('./tests/test_experiment')

        net = RSUNet()
        inputs_dataset = TiffVolume(
            os.path.join(IMAGE_PATH, "inputs.tif"),
            BoundingBox(Vector(0, 0, 0), Vector(1024, 512, 50)))
        labels_dataset = TiffVolume(
            os.path.join(IMAGE_PATH, "labels.tif"),
            BoundingBox(Vector(0, 0, 0), Vector(1024, 512, 50)))
        inputs_dataset.__enter__()
        labels_dataset.__enter__()
        trainer = Trainer(net,
                          inputs_dataset,
                          labels_dataset,
                          max_epochs=1,
                          gpu_device=1)
        trainer = LossWriter(trainer, './tests/', "test_experiment")
        trainer.run_training()
Exemplo n.º 20
0
    def test_memory_free(self):
        process = Process(getpid())
        initial_memory = process.memory_info().rss

        start = time.perf_counter()
        with TiffVolume(os.path.join(IMAGE_PATH, "inputs.tif"),
                        BoundingBox(Vector(0, 0, 0), Vector(1024, 512,
                                                            50))) as v:
            volume_memory = process.memory_info().rss
        end = time.perf_counter()
        print("Load time: {} secs".format(end - start))

        final_memory = process.memory_info().rss

        self.assertAlmostEqual(initial_memory,
                               final_memory,
                               delta=initial_memory * 0.2,
                               msg=("memory leakage: final memory usage is " +
                                    "larger than the initial memory usage"))
        self.assertLess(initial_memory,
                        volume_memory,
                        msg=("volume loading error: volume memory usage is " +
                             "not less than the initial memory usage"))
Exemplo n.º 21
0
    def openVolume(self, volume_spec):
        """
        Opens a volume from a volume specification

        :param volume_spec: A dictionary specifying the volume's parameters

        :return: The volume corresponding to the volume dataset
        """
        try:
            filename = os.path.abspath(volume_spec["filename"])

            if filename.endswith(".tif"):
                edges = volume_spec["bounding_box"]
                bounding_box = BoundingBox(Vector(*edges[0]),
                                           Vector(*edges[1]))
                volume = TiffVolume(filename, bounding_box)

                return volume

            elif filename.endswith(".hdf5"):
                pooled_volume = PooledVolume()
                for dataset in volume_spec["datasets"]:
                    edges = dataset["bounding_box"]
                    bounding_box = BoundingBox(Vector(*edges[0]),
                                               Vector(*edges[1]))
                    volume = Hdf5Volume(filename, dataset, bounding_box)
                    pooled_volume.add(volume)

                return pooled_volume

            else:
                error_string = "{} is an unsupported filetype".format(volume_type)
                raise ValueError(error_string)

        except KeyError:
            error_string = "given volume_spec is corrupt"
            raise ValueError(error_string)
Exemplo n.º 22
0
    def test_torch_dataset(self):
        input_dataset = TiffVolume(
            os.path.join(IMAGE_PATH, "inputs.tif"),
            BoundingBox(Vector(0, 0, 0), Vector(1024, 512, 50)))
        label_dataset = TiffVolume(
            os.path.join(IMAGE_PATH, "labels.tif"),
            BoundingBox(Vector(0, 0, 0), Vector(1024, 512, 50)))
        input_dataset.__enter__()
        label_dataset.__enter__()
        training_dataset = AlignedVolume(
            (input_dataset, label_dataset),
            iteration_size=BoundingBox(Vector(0, 0, 0), Vector(128, 128, 20)),
            stride=Vector(128, 128, 20))

        tif.imsave(os.path.join(IMAGE_PATH, "test_input.tif"),
                   training_dataset[10][0].getArray())
        tif.imsave(os.path.join(IMAGE_PATH, "test_label.tif"),
                   training_dataset[10][1].getArray() * 255)
Exemplo n.º 23
0
    def test_hdf5_volume(self):
        pooled_volume = PooledVolume(stack_size=5)
        pooled_volume.add(
            Hdf5Volume(os.path.join(IMAGE_PATH, "inputs.h5"), "input-1",
                       BoundingBox(Vector(0, 0, 0), Vector(1024, 512, 50))))
        pooled_volume.add(
            Hdf5Volume(os.path.join(IMAGE_PATH, "inputs.h5"), "input-2",
                       BoundingBox(Vector(0, 0, 50), Vector(1024, 512, 100))))
        output = pooled_volume.get(
            BoundingBox(Vector(0, 0, 40), Vector(128, 128, 60)))

        self.assertTrue((tif.imread(
            os.path.join(IMAGE_PATH,
                         "test_pooled_volume.tif")) == output.getArray()).all,
                        "PooledVolume output does not match test case")
Exemplo n.º 24
0
    def _indexToBoundingBox(self, idx: int) -> BoundingBox:
        if self.volumes_changed:
            len(self)

        if idx >= len(self):
            self.index = 0
            raise StopIteration

        index = max(
            filter(lambda index: self.volume_index[index] <= idx,
                   range(len(self.volume_index))))
        volume = self.volumes[index]
        _idx = idx - self.volume_index[index]

        element_vec = np.unravel_index(_idx,
                                       dims=volume.element_vec.getComponents())

        element_vec = Vector(*element_vec)
        bounding_box = volume.iteration_size+volume.stride*element_vec \
                       + volume.getBoundingBox().getEdges()[0]

        return bounding_box
Exemplo n.º 25
0
    def augment(self, bounding_box):
        # Get dropped slices and location
        dropped_slices = 2 * random.randrange(1, self.max_slices // 2)
        location = random.randrange(dropped_slices,
                                    bounding_box.getSize()[1] - dropped_slices)

        # Get enlarged bounding box
        edge1, edge2 = bounding_box.getEdges()
        edge2 = edge2 + Vector(0, dropped_slices, 0)
        initial_bounding_box = BoundingBox(edge1, edge2)

        # Get data
        raw, label = self.getParent().get(initial_bounding_box)

        # Augment Numpy arrays
        augmented_raw, augmented_label = self.drop(
            raw, label, dropped_slices=dropped_slices, location=location)

        # Convert back into the data format
        augmented_raw_data = Data(augmented_raw, bounding_box)
        augmented_label_data = Data(augmented_label, bounding_box)

        return (augmented_raw_data, augmented_label_data)
Exemplo n.º 26
0
    def augment(self, bounding_box):
        # Get error and location
        error = random.randrange(2, self.max_error)
        x_len = bounding_box.getSize()[0]
        location = random.randrange(10, x_len - 10)

        # Get initial bounding box
        edge1, edge2 = bounding_box.getEdges()
        edge2 += Vector(20, error, 0)
        initial_bounding_box = BoundingBox(edge1, edge2)

        # Get data
        raw_data, label_data = self.getParent().get(initial_bounding_box)
        raw, label = (raw_data.getArray().copy(), label_data.getArray().copy())
        augmented_raw, augmented_label = self.stitch(raw,
                                                     label,
                                                     location=location,
                                                     error=error)

        # Convert to the data format
        augmented_raw_data = Data(augmented_raw, bounding_box)
        augmented_label_data = Data(augmented_label, bounding_box)

        return (augmented_raw_data, augmented_label_data)
Exemplo n.º 27
0
 def setIterationSize(self, iteration_size):
     self.iteration_size = BoundingBox(Vector(0, 0, 0),
                                       iteration_size.getSize())