Exemplo n.º 1
0
def load_hdf5(path, fields=None):
    if fields is None:
        fields = []
    store = h5py.File(str(path), mode='r')
    samples = store['samples']
    print(samples)
    variations = Variations(
        samples=da.from_array(samples, chunks=samples.shape))
    metadata = {}
    for group_name, group in (store.items()):
        if isinstance(group, Group):
            for array_name, dataset in group.items():
                path = f'{group_name}/{array_name}'
                path = ZARR_VARIATION_FIELD_MAPPING[path]
                if fields and path not in fields:
                    continue
                if dataset.attrs:
                    metadata[path] = dict(dataset.attrs.items())

                chunks = list(dataset.shape)
                chunks[0] = DEF_CHUNK_SIZE
                chunks = tuple(chunks)

                variations[path] = da.from_array(dataset, chunks=chunks)

    variations.metadata = metadata
    return variations
Exemplo n.º 2
0
def load_zarr(path, num_vars_per_chunk=DEFAULT_VARIATION_NUM_IN_CHUNK):
    z_object = zarr.open_group(str(path), mode='r')
    variations = Variations(samples=da.from_zarr(z_object.samples))
    metadata = {}
    for group_name, group in (z_object.groups()):
        for array_name, array in group.arrays():
            zarr_field = f'{group_name}/{array_name}'
            try:
                field = ZARR_VARIATION_FIELD_MAPPING[zarr_field]
            except KeyError:
                continue
            if array.attrs:
                metadata[field] = dict(array.attrs.items())

            chunks = (num_vars_per_chunk,) + array.shape[1:]
            # chunks = None
            variations[field] = da.from_zarr(array, chunks=chunks)
    variations.metadata = metadata

    return variations
Exemplo n.º 3
0
def load_hdf5(path):
    store = h5py.File(str(path), mode='r')
    samples = store['samples']
    variations = Variations(samples=da.from_array(samples,
                                                  chunks=samples.shape))
    metadata = {}
    for group_name, group in (store.items()):
        if isinstance(group, Group):
            for array_name, dataset in group.items():
                path = f'{group_name}/{array_name}'
                path = ZARR_VARIATION_FIELD_MAPPING[path]
                if dataset.attrs:
                    metadata[path] = dict(dataset.attrs.items())
                chunks = [600]
                if dataset.ndim > 1:
                    chunks.append(dataset.shape[1])
                if dataset.ndim > 2:
                    chunks.append(dataset.shape[2])
                variations[path] = da.from_array(dataset, chunks=tuple(chunks))

    variations.metadata = metadata
    return variations