def create( cls, *, outer_path: Path, inner_path: PurePosixPath, filesystem: JsonableFilesystem, attributes: N5DatasetAttributes, ) -> "N5DatasetSink": full_path = outer_path.joinpath(inner_path.as_posix().lstrip("/")) filesystem.makedirs(full_path.as_posix(), recreate=True) with filesystem.openbin( outer_path.joinpath("attributes.json").as_posix(), "w") as f: f.write(json.dumps({"n5": "2.0.0"}).encode("utf8")) with filesystem.openbin( full_path.joinpath("attributes.json").as_posix(), "w") as f: f.write(json.dumps(attributes.to_json_data()).encode("utf-8")) # create all directories in the constructor to avoid races when processing tiles created_dirs: Set[Path] = set() for tile in attributes.interval.split(attributes.blockSize): dir_path = full_path / attributes.get_tile_path(tile).parent if dir_path and dir_path not in created_dirs: # print(f"Will create dir at {dir_path}") filesystem.makedirs(dir_path.as_posix()) created_dirs.add(dir_path) return N5DatasetSink( path=full_path, filesystem=filesystem, attributes=attributes, )
def test_n5_attributes(): attributes = N5DatasetAttributes( dimensions=Shape5D(x=100, y=200), blockSize=Shape5D(x=10, y=20), c_axiskeys="yx", dataType=np.dtype("uint16").newbyteorder(">"), compression=GzipCompressor(level=3)) reserialized_attributes = N5DatasetAttributes.from_json_data( attributes.to_json_data()) assert reserialized_attributes == attributes assert attributes.to_json_data()["axes"] == ("x", "y")
def test_distributed_n5_datasink(): tmp_path = create_tmp_dir(prefix="test_distributed_n5_datasink") filesystem = OsFs(tmp_path.as_posix()) outer_path = PurePosixPath("test_distributed_n5_datasink.n5") inner_path = PurePosixPath("/data") full_path = PurePosixPath("test_distributed_n5_datasink.n5/data") attributes = N5DatasetAttributes( dimensions=datasource.shape, blockSize=datasource.tile_shape, c_axiskeys=data.axiskeys, #FIXME: double check this dataType=datasource.dtype, compression=RawCompressor()) sink = N5DatasetSink(outer_path=outer_path, inner_path=inner_path, filesystem=filesystem, attributes=attributes) sink_writer = sink.create() assert not isinstance(sink_writer, Exception) sink_writers = [sink_writer] * 4 for idx, piece in enumerate(DataRoi(datasource).default_split()): sink = sink_writers[idx % len(sink_writers)] sink.write(piece.retrieve()) n5ds = N5DataSource(filesystem=filesystem, path=full_path) assert n5ds.retrieve() == data
def test_distributed_n5_datasink(tmp_path: Path, data: Array5D, datasource: DataSource): filesystem = OsFs(tmp_path.as_posix()) outer_path = Path("test_distributed_n5_datasink.n5") inner_path = PurePosixPath("/data") full_path = Path("test_distributed_n5_datasink.n5/data") attributes = N5DatasetAttributes(dimensions=datasource.shape, blockSize=datasource.tile_shape, axiskeys=datasource.axiskeys, dataType=datasource.dtype, compression=RawCompressor()) sinks = [ N5DatasetSink.create(outer_path=outer_path, inner_path=inner_path, filesystem=filesystem, attributes=attributes), N5DatasetSink.open(path=full_path, filesystem=filesystem), N5DatasetSink.open(path=full_path, filesystem=filesystem), N5DatasetSink.open(path=full_path, filesystem=filesystem), ] for idx, piece in enumerate(DataRoi(datasource).default_split()): sink = sinks[idx % len(sinks)] sink.write(piece.retrieve()) n5ds = N5DataSource(filesystem=filesystem, path=full_path) assert n5ds.retrieve() == data
def open(cls, *, path: Path, filesystem: JsonableFilesystem) -> "N5DatasetSink": with filesystem.openbin( path.joinpath("attributes.json").as_posix(), "r") as f: attributes_json = f.read().decode("utf8") attributes = N5DatasetAttributes.from_json_data( json.loads(attributes_json)) return N5DatasetSink(filesystem=filesystem, path=path, attributes=attributes)
def __init__(self, path: Path, *, location: Optional[Point5D] = None, filesystem: JsonableFilesystem): self.path = path self.filesystem = filesystem with self.filesystem.openbin(path.joinpath("attributes.json").as_posix(), "r") as f: attributes_json = f.read().decode("utf8") self.attributes = N5DatasetAttributes.from_json_data(json.loads(attributes_json), location_override=location) super().__init__( tile_shape=self.attributes.blockSize, interval=self.attributes.interval, dtype=self.attributes.dataType, axiskeys=self.attributes.axiskeys, )
def test_n5_datasink(tmp_path: Path, data: Array5D, datasource: DataSource): sink = N5DatasetSink.create(filesystem=OsFs(tmp_path.as_posix()), outer_path=Path("test_n5_datasink.n5"), inner_path=PurePosixPath("/data"), attributes=N5DatasetAttributes( dimensions=datasource.shape, blockSize=Shape5D(x=10, y=10), axiskeys=datasource.axiskeys, dataType=datasource.dtype, compression=RawCompressor(), location=Point5D.zero(x=7, y=13))) for tile in DataRoi(datasource).split(sink.tile_shape): sink.write(tile.retrieve().translated(Point5D.zero(x=7, y=13))) n5ds = N5DataSource(filesystem=sink.filesystem, path=sink.path) saved_data = n5ds.retrieve() assert saved_data.location == Point5D.zero(x=7, y=13) assert saved_data == data
def create_n5(array: Array5D, *, axiskeys: Optional[str] = None, chunk_size: Shape5D, compression: N5Compressor = RawCompressor()): path = Path(tempfile.mkstemp()[1] + ".n5") sink = N5DatasetSink.create(outer_path=path, inner_path=PurePosixPath("/data"), filesystem=OsFs("/"), attributes=N5DatasetAttributes( dimensions=array.shape, blockSize=chunk_size, axiskeys=axiskeys or array.axiskeys, dataType=array.dtype, compression=compression, )) for tile in array.split(chunk_size): sink.write(tile) return path.as_posix()
def __init__( self, *, filesystem: JsonableFilesystem, path: PurePosixPath, location: Optional[Point5D] = None, spatial_resolution: Optional[Tuple[int, int, int]] = None, ): with filesystem.openbin(path.joinpath("attributes.json").as_posix(), "r") as f: attributes_json = f.read().decode("utf8") self.attributes = N5DatasetAttributes.from_json_data(json.loads(attributes_json), location_override=location) super().__init__( c_axiskeys_on_disk=self.attributes.c_axiskeys, filesystem=filesystem, path=path, tile_shape=self.attributes.blockSize, interval=self.attributes.interval, dtype=self.attributes.dataType, spatial_resolution=spatial_resolution, )
def test_n5_datasink(): tmp_path = create_tmp_dir(prefix="test_n5_datasink") sink = N5DatasetSink( filesystem=OsFs(tmp_path.as_posix()), outer_path=PurePosixPath("test_n5_datasink.n5"), inner_path=PurePosixPath("/data"), attributes=N5DatasetAttributes( dimensions=datasource.shape, blockSize=Shape5D(x=10, y=10), c_axiskeys=data.axiskeys, #FIXME: double check this dataType=datasource.dtype, compression=RawCompressor(), location=Point5D.zero(x=7, y=13))) sink_writer = sink.create() assert not isinstance(sink_writer, Exception) for tile in DataRoi(datasource).split(sink.tile_shape): sink_writer.write(tile.retrieve().translated(Point5D.zero(x=7, y=13))) n5ds = N5DataSource(filesystem=sink.filesystem, path=sink.full_path) saved_data = n5ds.retrieve() assert saved_data.location == Point5D.zero(x=7, y=13) assert saved_data == data