def setup(self): """Sets up the beam bundle.""" # pylint: disable=g-import-not-at-top, import-outside-toplevel import tensorstore as ts self._ds_in = ts.open(self._input_spec).result() self._shape = self._ds_in.domain.shape self._dtype = self._ds_in.dtype.numpy_dtype self._ds_out = ts.open(self._output_spec).result()
def check_vol(box_zyx, scale): # raw volume handle spec = dict(config["tensorstore"]["spec"]) spec['scale_index'] = scale context = ts.Context(config["tensorstore"]["context"]) store = ts.open(spec, read=True, write=False, context=context).result() store_box = np.array([ store.spec().domain.inclusive_min[:3][::-1], store.spec().domain.exclusive_max[:3][::-1] ]) # Just verify that the 'service' wrapper is consistent with the low-level handle assert service.dtype == store.dtype.numpy_dtype assert (service.bounding_box_zyx // (2**scale) == store_box).all(), \ f"{service.bounding_box_zyx.tolist()} != {store_box.tolist()}" if scale == 0: # Service INSERTS geometry into config if necessary assert config["geometry"][ "bounding-box"] == store_box[:, ::-1].tolist() store_subvol = store[box_to_slicing(*box_zyx[:, ::-1])].read( order='F').result().transpose() assert store_subvol.any( ), "Volume from raw API is all zeros; this is a bad test" subvol = service.get_subvolume(box_zyx, scale) assert subvol.any(), "Volume from service is all zeros" assert (subvol.shape == (box_zyx[1] - box_zyx[0])).all() assert (subvol == store_subvol).all()
def slice(startstr, sizestr, format): """ Retrieve slice with specified format """ try: start_arr = startstr.split("_") start = [int(start_arr[0]), int(start_arr[1]), int(start_arr[2])] size_arr = sizestr.split("_") size = [int(size_arr[0]), int(size_arr[1]), int(size_arr[2])] location = request.args.get("location") if size[0] != 1 and size[1] != 1 and size[2] != 1: return Response("one dimension must be size 1", 400) location_arr = location.split('/') bucket = location_arr[0] path = '/'.join(location_arr[1:]) stderr = sys.stderr cache_key = f"{location}_{startstr}_{sizestr}_{format}" try: resp = SLICE_CACHE.get(cache_key) except Exception: # reuse tensorstore object dataset = ts.open({ 'driver': 'neuroglancer_precomputed', 'kvstore': { 'driver': 'gcs', 'bucket': bucket, }, 'path': path, 'recheck_cached_data': 'open' }).result() dataset = dataset[ts.d['channel'][0]] x, y, z = start sx, sy, sz = size data = dataset[x:x + sx, y:y + sy, z:z + sz].read(order='F').result() # write 2D image to jpeg or png data = np.squeeze(data) imgByteArr = io.BytesIO() im = Image.fromarray(data.transpose((1, 0))) im.save(imgByteArr, format=format) resp = imgByteArr.getvalue() SLICE_CACHE.put(cache_key, resp) r = make_response(resp) r.headers.set('Content-Type', f"image/{format}") return r except Exception as e: return Response(traceback.format_exc(), 400)
async def test_memory_n5_cache_open(): ts.open({ "context": { "cache_pool": { "total_bytes_limit": 1000000 } }, "driver": "n5", "kvstore": { "driver": "memory", }, "metadata": { "compression": { "type": "gzip" }, "dataType": "uint32", "dimensions": [1000, 20000], "blockSize": [10, 10], }, "create": True, "delete_existing": True, }).result()
def _open_labels(self): labels_file = self.labels_file if self.tensorstore: # labels file should be the spec dict for tensorstore labels = ts.open(labels_file, create=False, open=True).result() if not self.gt_file: # we need to apply the slice and so need to construct # the correct tuple of int / slices labels = labels[self.time_index] else: labels = zarr.open(labels_file, mode='r+') if not self.gt_file: labels = labels[self.time_index] return labels
async def async_deserialize(mesh, mesh_axes, tensorstore_spec, global_shape=None): t = ts.open(ts.Spec(tensorstore_spec), open=True).result() shape = t.shape if global_shape is None else global_shape new_shard_shape = gda.get_shard_shape(shape, mesh, mesh_axes) async def cb(index): out = np.zeros(new_shard_shape, dtype=t.dtype.numpy_dtype) requested_domain = ts.IndexTransform(input_shape=shape)[index].domain restricted_domain = t.domain.intersect(requested_domain) await ts.array(out)[ts.d[:].translate_to[requested_domain.origin] ][restricted_domain].write(t[restricted_domain]) return out return await create_async_gda_from_callback(shape, mesh, mesh_axes, cb)
def prepare_tensorstore_from_pyramid( pyr: Sequence[DataArray], level_names: Sequence[str], jpeg_quality: int, output_chunks: Sequence[int], root_container_path: Path, ): store_arrays = [] # sharding = {'@type': 'neuroglancer_uint64_sharded_v1', # 'preshift_bits': 9, # 'hash': 'identity', # 'minishard_index_encoding': 'gzip', # 'minishard_bits': 6, # 'shard_bits': 15} for p, ln in zip(pyr, level_names): res = [abs(float(p.coords[k][1] - p.coords[k][0])) for k in p.dims] spec: Dict[str, Any] = { "driver": "neuroglancer_precomputed", "kvstore": { "driver": "file", "path": str(Path(root_container_path).parent), }, "path": root_container_path.parts[-1], "scale_metadata": { "size": p.shape, "resolution": res, "encoding": "jpeg", "jpeg_quality": jpeg_quality, #'sharding': sharding, "chunk_size": output_chunks, "key": ln, "voxel_offset": (0, 0, 0), }, "multiscale_metadata": { "data_type": p.dtype.name, "num_channels": 1, "type": "image", }, } try: ts.open(spec=spec, open=True).result() except ValueError: try: ts.open(spec=spec, create=True).result() except ValueError: ts.open(spec=spec, create=True, delete_existing=True).result() nicer_array = NicerTensorStore(spec=spec, open_kwargs={"write": True}) store_arrays.append(nicer_array) return store_arrays
async def async_serialize(gda_inp: gda.GlobalDeviceArray, tensorstore_spec, commit_future=None): # 'metadata' may not be present at the top level (for example, if we are using # a 'cast' driver). if not _spec_has_metadata(tensorstore_spec): tensorstore_spec['metadata'] = _get_metadata(gda_inp) if jax.process_index() == 0: open_future = ts.open( ts.Spec(tensorstore_spec), create=True, open=True, context=TS_CONTEXT) # Asynchronous case. if commit_future is not None: assert isinstance(commit_future, list) commit_future.append(open_future) else: await open_future # `ts.open` runs twice for process 0 because for the first time, we just get # the future to be awaited upon in the background thread. The second one runs # with `assume_metadata=True` which does no I/O operation and returns the # tensorstore object. # For every process other than `0`, we open with `assume_metadata=True`. t = await ts.open( ts.Spec(tensorstore_spec), open=True, assume_metadata=True, context=TS_CONTEXT) async def _write_array(shard): if shard.replica_id == 0: write_future = t[shard.index].write(shard.data) if commit_future is not None: assert isinstance(commit_future, list) commit_future.append(write_future.commit) await write_future.copy else: await write_future.commit future_write_state = jax.tree_util.tree_map(_write_array, gda_inp.local_shards) return await asyncio.gather(*future_write_state)
def volume3d_ng(location, bbox, size=132, seed=None, array=None, cloudrun=None, sample_array=False, sample_class=False): """Returns a dataset based on a generator that will produce an infinite number of 3D volumes from ng precomputed randomly from given bounding box or from provided list of ROIs. Note: only support uint8blk. Note: this Args: loccation (str): directory for precomputed volume (assume google bucket directories) bbox (tuple of tuples): ((x,y,z), (x2,y2,z2)) start and stop location for samples size (int): size of dimension array (list): list of rois cloudrun (str) location of cloud run service to retrieve volumes (if not provided use the locally installed tensorstore library) Returns: tf.dataset containing uint8 3D tensors """ if cloudrun is None: try: import tensorstore as ts except ImportError: raise Exception("tensorstore not installed") def generator(): if array is not None and sample_array == False: for start in array: yield start else: # make repeatable if a seed is set if seed is not None: tf.random.set_seed(seed) while True: curr_bbox = bbox if array is not None: tarray = array if sample_class: cspot = tf.random.uniform(shape=[], minval=0, maxval=len(array), dtype=tf.int64, seed=seed) tarray = array[cspot] spot = tf.random.uniform(shape=[], minval=0, maxval=len(tarray), dtype=tf.int64, seed=seed) curr_bbox = tarray[spot] # get random starting point from bbox (x1,y1,z1) (x2,y2,z2) xstart = tf.random.uniform(shape=[], minval=curr_bbox[0][0], maxval=curr_bbox[1][0], dtype=tf.int64, seed=seed) ystart = tf.random.uniform(shape=[], minval=curr_bbox[0][1], maxval=curr_bbox[1][1], dtype=tf.int64, seed=seed) zstart = tf.random.uniform(shape=[], minval=curr_bbox[0][2], maxval=curr_bbox[1][2], dtype=tf.int64, seed=seed) yield (xstart, ystart, zstart) location_arr = location.split('/') bucket = location_arr[0] path = '/'.join(location_arr[1:]) if cloudrun is None: # reuse tensorstore object dataset = ts.open({ 'driver': 'neuroglancer_precomputed', 'kvstore': { 'driver': 'gcs', 'bucket': bucket, }, 'path': path, 'recheck_cached_data': 'open', 'scale_index': 0 }).result() dataset = dataset[ts.d['channel'][0]] else: import requests token = subprocess.check_output(["gcloud auth print-identity-token"], shell=True).decode() headers = {} headers["Authorization"] = f"Bearer {token[:-1]}" headers["Content-type"] = "application/json" #@tf.function def mapper(xstart, ystart, zstart): #xstart = xstart.numpy() #ystart = ystart.numpy() #zstart = zstart.numpy() if cloudrun is None: # read from tensorstore data = dataset[xstart:(xstart + size), ystart:(ystart + size), zstart:(zstart + size)].read().result() return tf.convert_to_tensor(data, dtype=tf.uint8) else: # read from cloud run function config = { "location": location, "size": [int(size), int(size), int(size)], "start": [int(xstart), int(ystart), int(zstart)] } res = requests.post(cloudrun + "/volume", data=json.dumps(config), headers=headers) if res.status_code != 200: # refetch token if obsolete token = subprocess.check_output( ["gcloud auth print-identity-token"], shell=True).decode() headers["Authorization"] = f"Bearer {token[:-1]}" res = requests.post(cloudrun + "/volume", data=json.dumps(config), headers=headers) if res.status_code != 200: raise RuntimeError("cloud run failed") data = np.fromstring(res.content, dtype=np.uint8) data = data.reshape((size, size, size)) #data = data.transpose((2,1,0)) return tf.convert_to_tensor(data, dtype=tf.uint8) #@tf.function def wrapper_mapper(x, y, z): tensor = tf.py_function(func=mapper, inp=(x, y, z), Tout=tf.uint8) tensor.set_shape((size, size, size)) return tensor return tf.data.Dataset.from_generator( generator, output_types=(tf.int64, tf.int64, tf.int64) ).map( wrapper_mapper, num_parallel_calls=AUTOTUNE ) # ideally set to some concurrency that matches number of parallel http calls possible
def volume(): """ Retrieve volume from tensorstore. See fetch_subvolume() function below for client-side example usage. """ try: config_file = request.get_json() # Strip gs:// prefix location = config_file["location"] # contains source and destination if location.startswith('gs://'): location = location[len('gs://'):] start = config_file["start"] # in XYZ order size = config_file["size"] # in XYZ order scale_index = config_file.get("scale_index", 0) use_jpeg = config_file.get("jpeg", False) location_arr = location.split('/') bucket = location_arr[0] path = '/'.join(location_arr[1:]) stderr = sys.stderr # reuse tensorstore object dataset = ts.open({ 'driver': 'neuroglancer_precomputed', 'kvstore': { 'driver': 'gcs', 'bucket': bucket, }, 'path': path, 'recheck_cached_data': 'open', 'scale_index': scale_index }).result() dataset = dataset[ts.d['channel'][0]] # +--------------------------------------------------------------------+ # | A quick guide to 3D array index semantics and memory order choices | # +--------------------------------------------------------------------+ # # +-----------------+--------------+-----------------------------------------------+ # | Index semantics | Memory order | Notes | # +-----------------+--------------+-----------------------------------------------+ # | a[Z,Y,X] | C | Standard for Python users. Prefer this. | # | | | | # | a[X,Y,Z] | F | Identical memory layout to the above, | # | | | (the RAM contents are identical to the above) | # | | | but due to the reverse index meaning, this | # | | | is likely to introduce confusion and/or | # | | | accidental inefficiences when you pass this | # | | | array to library functions which expect a | # | | | standard C-order array. | # | | | | # | a[Z,Y,X] | F | Never do this. | # | | | | # | a[X,Y,Z] | C | Never do this. | # +-----------------+--------------+-----------------------------------------------+ # Unfortunately, TensorStore.read() always returns an [X,Y,Z]-indexed array, # but it does permit you to specify the memory ordering. # Therefore, we request F-order, the only sane choice. # The buffer we'll return to the caller can be interpreted as either F/XYZ or C/ZYX. # (That's their business.) x, y, z = start sx, sy, sz = size data = dataset[x:x + sx, y:y + sy, z:z + sz].read(order='F').result() if not use_jpeg or sz > 1: r = make_response(data.tobytes(order='F')) r.headers.set('Content-Type', 'application/octet-stream') return r else: # write 2D image to JPEG data = np.squeeze(data) imgByteArr = io.BytesIO() im = Image.fromarray(data.transpose((1, 0))) im.save(imgByteArr, format="JPEG") r = make_response(imgByteArr.getvalue()) r.headers.set('Content-Type', 'image/jpeg') return r except Exception as e: return Response(traceback.format_exc(), 400)
def __setitem__(self, slices, values): ts.open( spec=self.spec, **self.open_kwargs).result()[ts.d["channel"][0]][slices] = values return None
def __getitem__(self, slices): return ts.open(spec=self.spec, **self.open_kwargs).result()[slices]