def _add_array(file: tables.File, where: tables.Group, name: str, *lists: HomList) -> None: """ Adds an homogeneous array to a tables file, where the array is filled with the contents of a set of lists. Each list in *lists is copied into a column of the resulting tables.CArray using the same ordering as *lists. Parameters ---------- file : tables.File where : tables.Group name : str lists : list of lists, each containing the same scalar data type (e.g. float, int) """ arrays = [np.array(ll) for ll in lists] nda = np.empty((max(1, max(a.size for a in arrays)), len(arrays)), dtype=arrays[0].dtype) nda.fill(-1) for i, a in enumerate(arrays): nda[:a.size, i] = a ca: tables.CArray = file.create_carray( where, name, tables.Atom.from_dtype(nda.dtype), nda.shape, filters=compression_filter, ) ca[...] = nda[...]
def write_coordinates(array_src: CoordinateArraySource, h5file: tables.File, batchsize: int) -> None: with array_src: shape = array_src.shape[0:1] atom = tables.Float64Atom(shape=(array_src.shape[1], )) filters = tables.Filters(complevel=1, complib="blosc:lz4") array = h5file.create_carray(h5file.root, name="coordinates", atom=atom, shape=shape, filters=filters) _make_str_vlarray(h5file, "coordinates_columns", array_src.columns) array.attrs.missing = array_src.missing for s in batch_slices(batchsize, array_src.shape[0]): array[s.start:s.stop] = array_src(s)
def _write_source(src: ArraySource, hfile: tables.File, atom: tables.Atom, name: str, transform: Worker, n_workers: int, batchrows: Optional[int] = None) -> None: front_shape = src.shape[0:-1] filters = tables.Filters(complevel=1, complib="blosc:lz4") array = hfile.create_carray(hfile.root, name=name, atom=atom, shape=front_shape, filters=filters) array.attrs.missing = src.missing batchrows = batchrows if batchrows else src.native log.info("Writing {} to HDF5 in {}-row batches".format(name, batchrows)) _write(src, array, batchrows, n_workers, transform)