def add_group_metadata( zarr_root: Group, image: Optional[omero.gateway.ImageWrapper], axes: List[str], resolutions: int = 1, ) -> None: if image: image_data = { "id": 1, "channels": [channelMarshal(c) for c in image.getChannels()], "rdefs": { "model": (image.isGreyscaleRenderingModel() and "greyscale" or "color"), "defaultZ": image._re.getDefaultZ(), "defaultT": image._re.getDefaultT(), }, "version": VERSION, } zarr_root.attrs["omero"] = image_data image._closeRE() multiscales = [{ "version": "0.3", "datasets": [{ "path": str(r) } for r in range(resolutions)], "axes": axes, }] zarr_root.attrs["multiscales"] = multiscales
def test_group_init_errors_1(self): store, chunk_store = self.create_store() # group metadata not initialized with pytest.raises(ValueError): Group(store, chunk_store=chunk_store) if hasattr(store, 'close'): store.close()
def load(store: StoreLike): """Load data from an array or group into memory. Parameters ---------- store : MutableMapping or string Store or path to directory in file system or name of zip file. Returns ------- out If the store contains an array, out will be a numpy array. If the store contains a group, out will be a dict-like object where keys are array names and values are numpy arrays. See Also -------- save, savez Notes ----- If loading data from a group of arrays, data will not be immediately loaded into memory. Rather, arrays will be loaded into memory as they are requested. """ # handle polymorphic store arg _store = normalize_store_arg(store) if contains_array(_store, path=None): return Array(store=_store, path=None)[...] elif contains_group(_store, path=None): grp = Group(store=_store, path=None) return LazyLoader(grp)
def test_group_init_errors_2(self): store, chunk_store = self.create_store() init_array(store, shape=1000, chunks=100, chunk_store=chunk_store) # array blocks group with pytest.raises(ValueError): Group(store, chunk_store=chunk_store) store.close()
def create_group(self, store=None, path=None, read_only=False, chunk_store=None, synchronizer=None): # can be overridden in sub-classes if store is None: store, chunk_store = self.create_store() init_group(store, path=path, chunk_store=chunk_store) g = Group(store, path=path, read_only=read_only, chunk_store=chunk_store, synchronizer=synchronizer) return g
def test_group_init_from_dict(chunk_dict): if chunk_dict: store, chunk_store = dict(), dict() else: store, chunk_store = dict(), None init_group(store, path=None, chunk_store=chunk_store) g = Group(store, path=None, read_only=False, chunk_store=chunk_store) assert store is not g.store assert isinstance(g.store, KVStore) if chunk_store is None: assert g.store is g.chunk_store else: assert chunk_store is not g.chunk_store
def add_multiscales_metadata( zarr_root: Group, axes: List[str], resolutions: int = 1, ) -> None: multiscales = [{ "version": "0.3", "datasets": [{ "path": str(r) } for r in range(resolutions)], "axes": axes, }] zarr_root.attrs["multiscales"] = multiscales
def create_group(self, store=None, path=None, read_only=False, chunk_store=None, synchronizer=None): if store is None: store, chunk_store = self.create_store() init_group(store, path=path, chunk_store=chunk_store) synchronizer = ThreadSynchronizer() g = Group(store, path=path, read_only=read_only, chunk_store=chunk_store, synchronizer=synchronizer) return g
def add_omero_metadata(zarr_root: Group, image: omero.gateway.ImageWrapper) -> None: image_data = { "id": 1, "channels": [channelMarshal(c) for c in image.getChannels()], "rdefs": { "model": (image.isGreyscaleRenderingModel() and "greyscale" or "color"), "defaultZ": image._re.getDefaultZ(), "defaultT": image._re.getDefaultT(), }, "version": VERSION, } zarr_root.attrs["omero"] = image_data image._closeRE()
def create_group(self, store=None, path=None, read_only=False, chunk_store=None, synchronizer=None): if store is None: store, chunk_store = self.create_store() init_group(store, path=path, chunk_store=chunk_store) sync_path = tempfile.mkdtemp() atexit.register(atexit_rmtree, sync_path) synchronizer = ProcessSynchronizer(sync_path) g = Group(store, path=path, read_only=read_only, synchronizer=synchronizer, chunk_store=chunk_store) return g
def load(store: StoreLike, zarr_version=None, path=None): """Load data from an array or group into memory. Parameters ---------- store : MutableMapping or string Store or path to directory in file system or name of zip file. zarr_version : {2, 3, None}, optional The zarr protocol version to use when loading. The default value of None will attempt to infer the version from `store` if possible, otherwise it will fall back to 2. path : str or None, optional The path within the store from which to load. Returns ------- out If the store contains an array, out will be a numpy array. If the store contains a group, out will be a dict-like object where keys are array names and values are numpy arrays. See Also -------- save, savez Notes ----- If loading data from a group of arrays, data will not be immediately loaded into memory. Rather, arrays will be loaded into memory as they are requested. """ # handle polymorphic store arg _store = normalize_store_arg(store, zarr_version=zarr_version) path = _check_and_update_path(_store, path) if contains_array(_store, path=path): return Array(store=_store, path=path)[...] elif contains_group(_store, path=path): grp = Group(store=_store, path=path) return LazyLoader(grp)
def conv_chrom(fname: str, num_samples: int, root: Group, chrom: int) -> None: tfam = open( fname ) # We need to open each time, so that seeks are different for parallel executions chrom_group = root.create_group(f'chromosome-{chrom}') positions = [] place_stream_start(tfam, chrom) for line in tfam: tokens = line.rstrip().split(' ') line_chrom = int(tokens[0]) if chrom != line_chrom: break positions.append(int(tokens[3])) chrom_group.array('positions', positions) all_calls = chrom_group.zeros( 'calls', shape=(len(positions), num_samples), dtype='B') #, compressor='none') # chunk this??? # comment on dtype place_stream_start(tfam, chrom) for count, line in enumerate(tfam): if count == len(positions): break tokens = line.rstrip().split(' ') calls = tokens[4:] alleles = list(set(calls[4:]) - set([0])) sample_calls = np.empty(shape=num_samples, dtype='B') for sample_position, sample in enumerate(range(num_samples)): a1, a2 = calls[2 * sample:2 * sample + 2] try: sample_calls[sample_position] = encode_alleles(a1, a2, alleles) except: print(chrom, count, sample_position, num_samples, len(positions)) raise all_calls[count, :] = sample_calls if count % 1000 == 0: print(chrom, count)
def test_group_init_errors_1(self): store, chunk_store = self.create_store() # group metadata not initialized with assert_raises(ValueError): Group(store, chunk_store=chunk_store)
def test_group_init_errors_2(self): store, chunk_store = self.create_store() init_array(store, shape=1000, chunks=100, chunk_store=chunk_store) # array blocks group with assert_raises(KeyError): Group(store, chunk_store=chunk_store)
def add_toplevel_metadata(zarr_root: Group) -> None: zarr_root.attrs["_creator"] = { "name": "omero-zarr", "version": __version__ }
def add_image(image: omero.gateway.ImageWrapper, parent: Group, cache_dir: Optional[str] = None) -> int: """ Adds the image pixel data as array to the given parent zarr group. Optionally caches the pixel data in the given cache_dir directory. Returns the number of resolution levels generated for the image. """ if cache_dir is not None: cache = True os.makedirs(os.path.join(cache_dir, str(image.id)), mode=511, exist_ok=True) else: cache = False cache_dir = "" size_c = image.getSizeC() size_z = image.getSizeZ() size_x = image.getSizeX() size_y = image.getSizeY() size_t = image.getSizeT() d_type = image.getPixelsType() zct_list = [] for t in range(size_t): for c in range(size_c): for z in range(size_z): if cache: # We only want to load from server if not cached locally filename = os.path.join( cache_dir, str(image.id), f"{z:03d}-{c:03d}-{t:03d}.npy", ) if not os.path.exists(filename): zct_list.append((z, c, t)) else: zct_list.append((z, c, t)) pixels = image.getPrimaryPixels() def planeGen() -> np.ndarray: planes = pixels.getPlanes(zct_list) yield from planes planes = planeGen() # Target size for smallest multiresolution TARGET_SIZE = 96 level_count = 1 longest = max(size_x, size_y) while longest > TARGET_SIZE: longest = longest // 2 level_count += 1 field_groups = [] for t in range(size_t): for c in range(size_c): for z in range(size_z): if cache: filename = os.path.join( cache_dir, str(image.id), f"{z:03d}-{c:03d}-{t:03d}.npy", ) if os.path.exists(filename): plane = numpy.load(filename) else: plane = next(planes) numpy.save(filename, plane) else: plane = next(planes) for level in range(level_count): size_y = plane.shape[0] size_x = plane.shape[1] # If on first plane, create a new group for this resolution level if t == 0 and c == 0 and z == 0: field_groups.append( parent.create( str(level), shape=(size_t, size_c, size_z, size_y, size_x), chunks=(1, 1, 1, size_y, size_x), dtype=d_type, )) # field_group = field_groups[level] field_groups[level][t, c, z, :, :] = plane if (level + 1) < level_count: # resize for next level... plane = cv2.resize( plane, dsize=(size_x // 2, size_y // 2), interpolation=cv2.INTER_NEAREST, ) return level_count
def add_raw_image( *, planes: Iterator[np.ndarray], size_z: int, size_c: int, size_t: int, d_type: np.dtype, parent: Group, level_count: int, cache_dir: Optional[str] = None, cache_file_name_func: Callable[[int, int, int], str] = None, ) -> Tuple[int, List[str]]: """Adds the raw image pixel data as array to the given parent zarr group. Optionally caches the pixel data in the given cache_dir directory. Returns the number of resolution levels generated for the image. planes: Generator returning planes in order of zct (whatever order OMERO returns in its plane generator). Each plane must be a numpy array with shape (size_y, sizex), or None to skip the plane. """ if cache_dir is not None: cache = True else: cache = False cache_dir = "" dims = [dim for dim in [size_t, size_c, size_z] if dim != 1] axes = [] if size_t > 1: axes.append("t") if size_c > 1: axes.append("c") if size_z > 1: axes.append("z") field_groups: List[Array] = [] for t in range(size_t): for c in range(size_c): for z in range(size_z): if cache: assert cache_file_name_func filename = cache_file_name_func(z, c, t) if os.path.exists(filename): plane = numpy.load(filename) else: plane = next(planes) os.makedirs(os.path.dirname(filename), mode=511, exist_ok=True) numpy.save(filename, plane) else: plane = next(planes) if plane is None: continue for level in range(level_count): size_y = plane.shape[0] size_x = plane.shape[1] # If on first plane, create a new group for this resolution level if len(field_groups) <= level: field_groups.append( parent.create( str(level), shape=tuple(dims + [size_y, size_x]), chunks=tuple([1] * len(dims) + [size_y, size_x]), dtype=d_type, )) indices = [] if size_t > 1: indices.append(t) if size_c > 1: indices.append(c) if size_z > 1: indices.append(z) field_groups[level][tuple(indices)] = plane if (level + 1) < level_count: # resize for next level... plane = cv2.resize( plane, dsize=(size_x // 2, size_y // 2), interpolation=cv2.INTER_NEAREST, ) return (level_count, axes + ["y", "x"])