def test_tiff_dataset(self): # Test that TiffVolume opens a TIFF stack testDataset = TiffVolume(os.path.join(IMAGE_PATH, "inputs.tif"), BoundingBox(Vector(0, 0, 0), Vector(1024, 512, 50)), iteration_size=BoundingBox( Vector(0, 0, 0), Vector(128, 128, 20)), stride=Vector(128, 128, 20)) testDataset.__enter__() # Test that TiffVolume has the correct length self.assertEqual(64, len(testDataset), "TIFF dataset size does not match correct size") # Test that TiffVolume outputs the correct samples self.assertTrue((tif.imread(os.path.join( IMAGE_PATH, "test_sample.tif")) == testDataset[10].getArray()).all, "TIFF dataset value does not match correct value") # Test that TiffVolume can read and write consistent samples tif.imsave(os.path.join(IMAGE_PATH, "test_write.tif"), testDataset[10].getArray()) self.assertTrue((tif.imread(os.path.join( IMAGE_PATH, "test_write.tif")) == testDataset[10].getArray()).all, "TIFF dataset output does not match written output")
def setIteration(self, iteration_size: BoundingBox, stride: Vector): if not isinstance(iteration_size, BoundingBox): error_string = ("iteration_size must have type BoundingBox" + " instead it has type {}") error_string = error_string.format(type(iteration_size)) raise ValueError(error_string) if not isinstance(stride, Vector): raise ValueError("stride must have type Vector") if not iteration_size.isSubset( BoundingBox(Vector(0, 0, 0), self.getBoundingBox().getSize())): raise ValueError( "iteration_size must be smaller than volume size " + "instead the iteration size is {} ".format( iteration_size.getSize()) + "and the volume size is {}".format( self.getBoundingBox().getSize())) self.setIterationSize(iteration_size) self.setStride(stride) def ceil(x): return int(round(x)) self.element_vec = Vector( *map(lambda L, l, s: ceil((L - l) / s + 1), self.getBoundingBox().getSize().getComponents(), self.iteration_size.getSize().getComponents(), self.stride.getComponents())) self.index = 0
def get(self, bounding_box: BoundingBox) -> Data: """ Requests a data sample from the volume. If the bounding box does not exist, then the method raises a ValueError. :param bounding_box: The bounding box of the request data sample :return: The data sample requested """ if bounding_box.isDisjoint(self.getBoundingBox()): error_string = ("Bounding box must be inside dataset " + "dimensions instead bounding box is {} while " + "the dataset dimensions are {}") error_string = error_string.format(bounding_box, self.getBoundingBox()) raise ValueError(error_string) sub_bounding_box = bounding_box.intersect(self.getBoundingBox()) array = self.getArray(sub_bounding_box) before_pad = bounding_box.getEdges()[0] - sub_bounding_box.getEdges( )[0] after_pad = bounding_box.getEdges()[1] - sub_bounding_box.getEdges()[1] if before_pad != Vector(0, 0, 0) or after_pad != Vector(0, 0, 0): pad_size = tuple( zip(before_pad.getNumpyDim(), after_pad.getNumpyDim())) array = np.pad(array, pad_width=pad_size, mode="constant") return Data(array, bounding_box)
def test_checkpoint(self): if not os.path.isdir('./tests/checkpoints'): os.mkdir('tests/checkpoints') net = RSUNet() inputs_dataset = TiffVolume( os.path.join(IMAGE_PATH, "inputs.tif"), BoundingBox(Vector(0, 0, 0), Vector(1024, 512, 50))) labels_dataset = TiffVolume( os.path.join(IMAGE_PATH, "labels.tif"), BoundingBox(Vector(0, 0, 0), Vector(1024, 512, 50))) inputs_dataset.__enter__() labels_dataset.__enter__() trainer = Trainer(net, inputs_dataset, labels_dataset, max_epochs=10, gpu_device=1) trainer = CheckpointWriter(trainer, checkpoint_dir='./tests/checkpoints', checkpoint_period=5) trainer.run_training() trainer = Trainer(net, inputs_dataset, labels_dataset, max_epochs=10, checkpoint='./tests/checkpoints/iteration_5.ckpt', gpu_device=1) trainer.run_training()
def setIteration(self, iteration_size: BoundingBox, stride: Vector): """ Sets the parameters for iterating through the dataset :param iteration_size: The size of each data sample in the volume :param stride: The displacement of each iteration """ if not isinstance(iteration_size, BoundingBox): error_string = ("iteration_size must have type BoundingBox" + " instead it has type {}") error_string = error_string.format(type(iteration_size)) raise ValueError(error_string) if not isinstance(stride, Vector): raise ValueError("stride must have type Vector") if not iteration_size.isSubset( BoundingBox(Vector(0, 0, 0), self.getBoundingBox().getSize())): raise ValueError("iteration_size must be smaller than volume size") self.setIterationSize(iteration_size) self.setStride(stride) def ceil(x): return int(round(x)) self.element_vec = Vector( *map(lambda L, l, s: ceil((L - l) / s + 1), self.getBoundingBox().getSize().getComponents(), self.iteration_size.getSize().getComponents(), self.stride.getComponents())) self.index = 0
def test_cpu_training(self): net = RSUNet() inputs_dataset = TiffVolume( os.path.join(IMAGE_PATH, "inputs.tif"), BoundingBox(Vector(0, 0, 0), Vector(1024, 512, 50))) labels_dataset = TiffVolume( os.path.join(IMAGE_PATH, "labels.tif"), BoundingBox(Vector(0, 0, 0), Vector(1024, 512, 50))) inputs_dataset.__enter__() labels_dataset.__enter__() trainer = Trainer(net, inputs_dataset, labels_dataset, max_epochs=1) trainer.run_training()
def test_hdf5_volume(self): pooled_volume = PooledVolume(stack_size=5) pooled_volume.add( Hdf5Volume(os.path.join(IMAGE_PATH, "inputs.h5"), "input-1", BoundingBox(Vector(0, 0, 0), Vector(1024, 512, 50)))) pooled_volume.add( Hdf5Volume(os.path.join(IMAGE_PATH, "inputs.h5"), "input-2", BoundingBox(Vector(0, 0, 50), Vector(1024, 512, 100)))) output = pooled_volume.get( BoundingBox(Vector(0, 0, 40), Vector(128, 128, 60))) self.assertTrue((tif.imread( os.path.join(IMAGE_PATH, "test_pooled_volume.tif")) == output.getArray()).all, "PooledVolume output does not match test case")
def __init__(self, array: np.ndarray, bounding_box: BoundingBox = None, iteration_size: BoundingBox = BoundingBox( Vector(0, 0, 0), Vector(128, 128, 32)), stride: Vector = Vector(64, 64, 16)): """ Initializes a volume with a bounding box and iteration parameters :param array: A 3D Numpy array :param bounding_box: The bounding box encompassing the volume :param iteration_size: The bounding box of each data sample in the dataset iterable :param stride: The stride displacement of each data sample in the dataset iterable. The displacement proceeds first from X then to Y then to Z. """ if isinstance(array, np.ndarray): self._setArray(array) elif isinstance(array, BoundingBox): self.createArray(array) else: raise ValueError("array must be an ndarray or a BoundingBox") self.setBoundingBox(bounding_box) self.setIteration(iteration_size=iteration_size, stride=stride) super().__init__()
def __init__(self, bounding_box: BoundingBox = None, iteration_size: BoundingBox = BoundingBox( Vector(0, 0, 0), Vector(128, 128, 32)), stride: Vector = Vector(64, 64, 16)): self.setBoundingBox(bounding_box) self.setIteration(iteration_size, stride) self.valid_data = None
def _queryBoundingBox(self, bounding_box: BoundingBox) -> Volume: if self.volumes_changed: self._rebuildIndexes() edge1 = [bounding_box.getEdges()[0].getComponents()] distances, indexes = self.edge1_list.query(edge1, k=8) indexes = [ index for index, dist in zip(indexes[0], distances[0]) if dist < float('Inf') ] indexes = filter( lambda index: not bounding_box.isDisjoint(self.volumes[ index].getBoundingBox()), indexes) if not indexes: raise IndexError("bounding_box is not present in any indexes") return list(indexes)
def test_loss(self): net = RSUNet() inputs_dataset = TiffVolume( os.path.join(IMAGE_PATH, "inputs.tif"), BoundingBox(Vector(0, 0, 0), Vector(1024, 512, 50))) labels_dataset = TiffVolume( os.path.join(IMAGE_PATH, "labels.tif"), BoundingBox(Vector(0, 0, 0), Vector(1024, 512, 50))) inputs_dataset.__enter__() labels_dataset.__enter__() trainer = Trainer(net, inputs_dataset, labels_dataset, max_epochs=10, gpu_device=1, criterion=SimplePointBCEWithLogitsLoss()) trainer.run_training()
def test_torch_dataset(self): input_dataset = TiffVolume( os.path.join(IMAGE_PATH, "inputs.tif"), BoundingBox(Vector(0, 0, 0), Vector(1024, 512, 50))) label_dataset = TiffVolume( os.path.join(IMAGE_PATH, "labels.tif"), BoundingBox(Vector(0, 0, 0), Vector(1024, 512, 50))) input_dataset.__enter__() label_dataset.__enter__() training_dataset = AlignedVolume( (input_dataset, label_dataset), iteration_size=BoundingBox(Vector(0, 0, 0), Vector(128, 128, 20)), stride=Vector(128, 128, 20)) tif.imsave(os.path.join(IMAGE_PATH, "test_input.tif"), training_dataset[10][0].getArray()) tif.imsave(os.path.join(IMAGE_PATH, "test_label.tif"), training_dataset[10][1].getArray() * 255)
def test_loss_writer(self): if not os.path.isdir('./tests/test_experiment'): os.mkdir('tests/test_experiment') shutil.rmtree('./tests/test_experiment') net = RSUNet() inputs_dataset = TiffVolume( os.path.join(IMAGE_PATH, "inputs.tif"), BoundingBox(Vector(0, 0, 0), Vector(1024, 512, 50))) labels_dataset = TiffVolume( os.path.join(IMAGE_PATH, "labels.tif"), BoundingBox(Vector(0, 0, 0), Vector(1024, 512, 50))) inputs_dataset.__enter__() labels_dataset.__enter__() trainer = Trainer(net, inputs_dataset, labels_dataset, max_epochs=1, gpu_device=1) trainer = LossWriter(trainer, './tests/', "test_experiment") trainer.run_training()
def get(self, bounding_box: BoundingBox) -> Data: indexes = self._queryBoundingBox(bounding_box) data = [] stack_volumes = [volume for i, volume in self.stack if i in indexes] stack_disjoint = list(set(indexes) - set([i for i, v in self.stack])) for volume in stack_volumes: sub_bbox = bounding_box.intersect(volume.getBoundingBox()) data.append(volume.get(sub_bbox)) for index in stack_disjoint: volume = self.volumes[index] i = self._pushStack(index, volume) sub_bbox = bounding_box.intersect(volume.getBoundingBox()) data.append(volume.get(sub_bbox)) shape = bounding_box.getNumpyDim() array = Array(np.zeros(shape).astype(np.uint16), bounding_box=bounding_box, iteration_size=BoundingBox(Vector(0, 0, 0), bounding_box.getSize()), stride=bounding_box.getSize()) [array.set(item) for item in data] return Data(array.getArray(), bounding_box)
def test_json_spec(self): # Tests the JSON volume specification json_spec = JsonSpec() pooled_volume = json_spec.open( os.path.join(IMAGE_PATH, "inputs_spec.json")) output = pooled_volume.get( BoundingBox(Vector(0, 0, 40), Vector(128, 128, 60))) self.assertTrue((tif.imread( os.path.join(IMAGE_PATH, "test_pooled_volume.tif")) == output.getArray()).all, "JsonSpec output does not match test case")
def openVolume(self, volume_spec): """ Opens a volume from a volume specification :param volume_spec: A dictionary specifying the volume's parameters :return: The volume corresponding to the volume dataset """ try: filename = os.path.abspath(volume_spec["filename"]) if filename.endswith(".tif"): edges = volume_spec["bounding_box"] bounding_box = BoundingBox(Vector(*edges[0]), Vector(*edges[1])) volume = TiffVolume(filename, bounding_box) return volume elif filename.endswith(".hdf5"): pooled_volume = PooledVolume() for dataset in volume_spec["datasets"]: edges = dataset["bounding_box"] bounding_box = BoundingBox(Vector(*edges[0]), Vector(*edges[1])) volume = Hdf5Volume(filename, dataset, bounding_box) pooled_volume.add(volume) return pooled_volume else: error_string = "{} is an unsupported filetype".format(volume_type) raise ValueError(error_string) except KeyError: error_string = "given volume_spec is corrupt" raise ValueError(error_string)
def __init__(self, tiff_file, bounding_box: BoundingBox, iteration_size: BoundingBox = BoundingBox( Vector(0, 0, 0), Vector(128, 128, 32)), stride: Vector = Vector(64, 64, 16)): """ Loads a TIFF stack file or a directory of TIFF files and creates a corresponding three-dimensional volume dataset :param tiff_file: Either a TIFF stack file or a directory containing TIFF files :param chunk_size: Dimensions of the sample subvolume """ # Set TIFF file and bounding box self.setFile(tiff_file) super().__init__(bounding_box, iteration_size, stride)
def test_stitcher(self): # Stitch a test TIFF dataset inputDataset = TiffVolume( os.path.join(IMAGE_PATH, "inputs.tif"), BoundingBox(Vector(0, 0, 0), Vector(1024, 512, 50))) outputDataset = Array( np.zeros(inputDataset.getBoundingBox().getNumpyDim())) inputDataset.__enter__() for data in inputDataset: outputDataset.blend(data) self.assertTrue( (inputDataset[20].getArray() == outputDataset[20].getArray()).all, "Blending output does not match input") tif.imsave(os.path.join(IMAGE_PATH, "test_stitch.tif"), outputDataset[100].getArray().astype(np.uint16))
def __init__(self, hdf5_file, dataset, bounding_box: BoundingBox, iteration_size: BoundingBox = BoundingBox( Vector(0, 0, 0), Vector(128, 128, 20)), stride: Vector = Vector(64, 64, 10)): """ Loads a HDF5 dataset and creates a corresponding three-dimensional volume dataset :param hdf5_file: A HDF5 file path :param dataset: A HDF5 dataset name :param chunk_size: Dimensions of the sample subvolume """ self.setFile(hdf5_file) self.setDataset(dataset) super().__init__(bounding_box, iteration_size, stride)
def __init__(self, volumes=None, stack_size: int = 5, iteration_size: BoundingBox = BoundingBox( Vector(0, 0, 0), Vector(128, 128, 32)), stride: Vector = Vector(64, 64, 16)): if volumes is not None: self.volumes = volumes self.volumes_changed = True else: self.volumes = [] self.volumes_changed = False self.volume_list = [] self.setStack(stack_size) self.setIteration(iteration_size, stride) self.valid_data = None
def setBoundingBox(self, bounding_box: BoundingBox = None, displacement: Vector = None): """ Sets the bounding box of the volume. By default, it sets the bounding box to the volume size :param bounding_box: The bounding box of the volume :param displacement: The displacement of the bounding box from the origin """ if bounding_box is None: self.bounding_box = BoundingBox( Vector(0, 0, 0), Vector(*self.getArray().shape[::-1])) else: self.bounding_box = bounding_box if displacement is not None: self.bounding_box = self.bounding_box + displacement
def test_prediction(self): if not os.path.isdir('./tests/checkpoints'): os.mkdir('tests/checkpoints') net = RSUNet() checkpoint = './tests/checkpoints/iteration_10.ckpt' inputs_dataset = TiffVolume( os.path.join(IMAGE_PATH, "inputs.tif"), BoundingBox(Vector(0, 0, 0), Vector(1024, 512, 50))) inputs_dataset.__enter__() predictor = Predictor(net, checkpoint, gpu_device=1) output_volume = Array( np.zeros(inputs_dataset.getBoundingBox().getNumpyDim())) predictor.run(inputs_dataset, output_volume, batch_size=5) tif.imsave(os.path.join(IMAGE_PATH, "test_prediction.tif"), output_volume.getArray().astype(np.float32))
def getArray(self, bounding_box: BoundingBox = None) -> np.ndarray: """ Retrieves the array contents of the volume. If a bounding box is provided, the subsection is returned. :param bounding_box: The bounding box of a subsection of the volume. If the bounding box is outside of the volume, a ValueError is raised. """ if bounding_box is None: return self.array else: if not bounding_box.isSubset(self.getBoundingBox()): raise ValueError("The bounding box must be a subset" + " of the volume") centered_bounding_box = bounding_box - self.getBoundingBox( ).getEdges()[0] edge1, edge2 = centered_bounding_box.getEdges() x1, y1, z1 = edge1.getComponents() x2, y2, z2 = edge2.getComponents() return self.array[z1:z2, y1:y2, x1:x2]
def test_memory_free(self): process = Process(getpid()) initial_memory = process.memory_info().rss start = time.perf_counter() with TiffVolume(os.path.join(IMAGE_PATH, "inputs.tif"), BoundingBox(Vector(0, 0, 0), Vector(1024, 512, 50))) as v: volume_memory = process.memory_info().rss end = time.perf_counter() print("Load time: {} secs".format(end - start)) final_memory = process.memory_info().rss self.assertAlmostEqual(initial_memory, final_memory, delta=initial_memory * 0.2, msg=("memory leakage: final memory usage is " + "larger than the initial memory usage")) self.assertLess(initial_memory, volume_memory, msg=("volume loading error: volume memory usage is " + "not less than the initial memory usage"))
def augment(self, bounding_box): # Get dropped slices and location dropped_slices = 2 * random.randrange(1, self.max_slices // 2) location = random.randrange(dropped_slices, bounding_box.getSize()[1] - dropped_slices) # Get enlarged bounding box edge1, edge2 = bounding_box.getEdges() edge2 = edge2 + Vector(0, dropped_slices, 0) initial_bounding_box = BoundingBox(edge1, edge2) # Get data raw, label = self.getParent().get(initial_bounding_box) # Augment Numpy arrays augmented_raw, augmented_label = self.drop( raw, label, dropped_slices=dropped_slices, location=location) # Convert back into the data format augmented_raw_data = Data(augmented_raw, bounding_box) augmented_label_data = Data(augmented_label, bounding_box) return (augmented_raw_data, augmented_label_data)
def augment(self, bounding_box): # Get error and location error = random.randrange(2, self.max_error) x_len = bounding_box.getSize()[0] location = random.randrange(10, x_len - 10) # Get initial bounding box edge1, edge2 = bounding_box.getEdges() edge2 += Vector(20, error, 0) initial_bounding_box = BoundingBox(edge1, edge2) # Get data raw_data, label_data = self.getParent().get(initial_bounding_box) raw, label = (raw_data.getArray().copy(), label_data.getArray().copy()) augmented_raw, augmented_label = self.stitch(raw, label, location=location, error=error) # Convert to the data format augmented_raw_data = Data(augmented_raw, bounding_box) augmented_label_data = Data(augmented_label, bounding_box) return (augmented_raw_data, augmented_label_data)
def setIterationSize(self, iteration_size): self.iteration_size = BoundingBox(Vector(0, 0, 0), iteration_size.getSize())