def load_hdf5(path, fields=None): if fields is None: fields = [] store = h5py.File(str(path), mode='r') samples = store['samples'] print(samples) variations = Variations( samples=da.from_array(samples, chunks=samples.shape)) metadata = {} for group_name, group in (store.items()): if isinstance(group, Group): for array_name, dataset in group.items(): path = f'{group_name}/{array_name}' path = ZARR_VARIATION_FIELD_MAPPING[path] if fields and path not in fields: continue if dataset.attrs: metadata[path] = dict(dataset.attrs.items()) chunks = list(dataset.shape) chunks[0] = DEF_CHUNK_SIZE chunks = tuple(chunks) variations[path] = da.from_array(dataset, chunks=chunks) variations.metadata = metadata return variations
def load_zarr(path, num_vars_per_chunk=DEFAULT_VARIATION_NUM_IN_CHUNK): z_object = zarr.open_group(str(path), mode='r') variations = Variations(samples=da.from_zarr(z_object.samples)) metadata = {} for group_name, group in (z_object.groups()): for array_name, array in group.arrays(): zarr_field = f'{group_name}/{array_name}' try: field = ZARR_VARIATION_FIELD_MAPPING[zarr_field] except KeyError: continue if array.attrs: metadata[field] = dict(array.attrs.items()) chunks = (num_vars_per_chunk,) + array.shape[1:] # chunks = None variations[field] = da.from_zarr(array, chunks=chunks) variations.metadata = metadata return variations
def load_hdf5(path): store = h5py.File(str(path), mode='r') samples = store['samples'] variations = Variations(samples=da.from_array(samples, chunks=samples.shape)) metadata = {} for group_name, group in (store.items()): if isinstance(group, Group): for array_name, dataset in group.items(): path = f'{group_name}/{array_name}' path = ZARR_VARIATION_FIELD_MAPPING[path] if dataset.attrs: metadata[path] = dict(dataset.attrs.items()) chunks = [600] if dataset.ndim > 1: chunks.append(dataset.shape[1]) if dataset.ndim > 2: chunks.append(dataset.shape[2]) variations[path] = da.from_array(dataset, chunks=tuple(chunks)) variations.metadata = metadata return variations