def concatenate_row_chunks(array, group_every=1000): """ When averaging, the output array's are substantially smaller, which can affect disk I/O since many small operations are submitted. This operation concatenates row chunks together so that more rows are submitted at once """ # Single chunk already if len(array.chunks[0]) == 1: return array data = partial_reduce(np.concatenate, array, split_every={0: group_every}, reduced_meta=None, keepdims=True) # NOTE(sjperkins) # partial_reduce sets the number of rows in each chunk # to 1, which is untrue. Correctly set the row chunks to nan, # steal the graph and recreate the array row_chunks = tuple(np.nan for _ in data.chunks[0]) chunks = (row_chunks, ) + data.chunks[1:] graph = data.__dask_graph__() return da.Array(graph, data.name, chunks, dtype=data.dtype)
def concatenate_row_chunks(array, group_every=4): """ Parameters ---------- array : :class:`dask.array.Array` dask array to average. First dimension must correspond to the MS 'row' dimension group_every : int Number of adjust dask array chunks to group together. Defaults to 4. When averaging, the output array's are substantially smaller, which can affect disk I/O since many small operations are submitted. This operation concatenates row chunks together so that more rows are submitted at once """ # Single chunk already if len(array.chunks[0]) == 1: return array # Restrict the number of chunks to group to the # actual number of chunks in the array group_every = min(len(array.chunks[0]), group_every) data = partial_reduce(_safe_concatenate, array, split_every={0: group_every}, reduced_meta=None, keepdims=True) # NOTE(sjperkins) # partial_reduce sets the number of rows in each chunk # to 1, which is untrue. Correctly set the row chunks to nan, # steal the graph and recreate the array row_chunks = tuple(np.nan for _ in data.chunks[0]) chunks = (row_chunks, ) + data.chunks[1:] graph = data.__dask_graph__() return da.Array(graph, data.name, chunks, dtype=data.dtype)