def __init__(self, path, shape, dtype, axis=0): assert axis == 0 # only concatenation along the first axis is supported right now # Only C order is supported at the moment. self.shape = shape self.dtype = np.dtype(dtype) header = _npy_header(self.shape, self.dtype) version = None _check_version(version) self.fp = open(path, 'wb') _write_array_header(self.fp, header, version)
def make_header(dtype): """ Ref: https://numpy.org/devdocs/reference/generated/numpy.lib.format.html We're doing version 3. Only difference is the zero shape, since we're going to deduce the array size from the filesize. """ assert not dtype.hasobject, 'Arrays with objects in get pickled, so can\'t be appended to' bs = BytesIO() npformat._write_array_header(bs, { 'descr': dtype.descr, 'fortran_order': False, 'shape': (0, ) }, version=(3, 0)) return bs.getvalue()
def save_large_array(fp, array, axis=0, desc=None): """Save a large, potentially memmapped array, into a NPY file, chunk by chunk to avoid loading it entirely in memory.""" assert axis == 0 # TODO: support other axes version = None _check_version(version) _write_array_header(fp, header_data_from_array_1_0(array), version) N = array.shape[axis] if N == 0: return k = int(ceil(float(N) / 100)) # 100 chunks assert k >= 1 for i in tqdm(range(0, N, k), desc=desc): chunk = array[i:i + k, ...] fp.write(chunk.tobytes())