def test_bucket_read_write(): raw_data_source = get_sample_c_cells_datasource() bucket_fs = get_test_output_bucket_fs() precomp_path = PurePosixPath("c_cells_1.precomputed") sink = PrecomputedChunksScaleSink( info_dir=precomp_path, filesystem=bucket_fs, num_channels=raw_data_source.shape.c, scale=PrecomputedChunksScale( key=PurePosixPath("exported_data"), size=(raw_data_source.shape.x, raw_data_source.shape.y, raw_data_source.shape.z), chunk_sizes=tuple([ (raw_data_source.tile_shape.x, raw_data_source.tile_shape.y, raw_data_source.tile_shape.z) ]), encoding=RawEncoder(), voxel_offset=(raw_data_source.location.x, raw_data_source.location.y, raw_data_source.location.z), resolution=raw_data_source.spatial_resolution), dtype=raw_data_source.dtype, ) sink_writer = sink.create() assert not isinstance(sink_writer, Exception) assert bucket_fs.exists(precomp_path.joinpath("info").as_posix()) assert not bucket_fs.exists( precomp_path.joinpath("i_dont_exist").as_posix()) with ProcessPoolExecutor() as executor: _ = list( executor.map(partial(_write_data, sink_writer=sink_writer), raw_data_source.roi.get_datasource_tiles())) data_proxy_source = PrecomputedChunksDataSource( path=precomp_path, filesystem=bucket_fs, resolution=(raw_data_source.spatial_resolution)) retrieved_data = data_proxy_source.retrieve() assert np.all( retrieved_data.raw("yxc") == raw_data_source.retrieve().raw("yxc"))
def create_precomputed_chunks_sink( *, shape: Shape5D, dtype: "np.dtype[Any]", chunk_size: Shape5D, fs: "JsonableFilesystem | None" = None) -> FsDataSink: return PrecomputedChunksScaleSink( filesystem=fs or get_test_output_osfs(), info_dir=PurePosixPath(f"{uuid.uuid4()}.precomputed"), dtype=dtype, num_channels=shape.c, scale=PrecomputedChunksScale( key=PurePosixPath("some_data"), size=(shape.x, shape.y, shape.z), resolution=(1, 1, 1), voxel_offset=(0, 0, 0), chunk_sizes=tuple([(chunk_size.x, chunk_size.y, chunk_size.z)]), encoding=RawEncoder(), ))
assert not isinstance(classifier, Exception) # we will output to neuroglancer's Precomputed Chunks format # https://github.com/google/neuroglancer/tree/master/src/neuroglancer/datasource/precomputed output_interval: Interval5D = classifier.get_expected_roi(data_source.roi) predictions_data_sink = PrecomputedChunksScaleSink( filesystem=OsFs("/tmp"), dtype=np.dtype("float32"), info_dir=PurePosixPath("my_exported_data"), num_channels=classifier.num_classes, scale=PrecomputedChunksScale( key=PurePosixPath("1_1_1"), size=(output_interval.shape.x, output_interval.shape.y, output_interval.shape.z), resolution=(1,1,1), voxel_offset=(output_interval.start.x, output_interval.start.y, output_interval.start.z), chunk_sizes=( (data_source.tile_shape.x, data_source.tile_shape.y, data_source.tile_shape.z), ), encoding=RawEncoder() ) ) #creates info file on disk plus the "my_exported_data" dir, making us ready to write sink_writer = predictions_data_sink.create() assert not isinstance(sink_writer, Exception) # predict on independent tiles. You could run this with e.g. concurrent.futures.Executor for lazy_tile in data_source.roi.get_datasource_tiles(): predictions: Array5D = classifier(lazy_tile) #if you need the raw numpy array, call .e.g predictions.raw("yx") #predictions.as_uint8().show_channels() sink_writer.write(predictions)