def test_ms_update(ms, group_cols, index_cols, select_cols): # Zero everything to be sure with TableProxy(pt.table, ms, readonly=False, lockoptions='auto', ack=False) as T: nrows = T.nrows().result() T.putcol("STATE_ID", np.full(nrows, 0, dtype=np.int32)).result() data = np.zeros_like(T.getcol("DATA").result()) data_dtype = data.dtype T.putcol("DATA", data).result() xds = xds_from_ms(ms, columns=select_cols, group_cols=group_cols, index_cols=index_cols, chunks={"row": 2}) written_states = [] written_data = [] writes = [] # Write out STATE_ID and DATA for i, ds in enumerate(xds): dims = ds.dims chunks = ds.chunks state = da.arange(i, i + dims["row"], chunks=chunks["row"]) state = state.astype(np.int32) written_states.append(state) data = da.arange(i, i + dims["row"] * dims["chan"] * dims["corr"]) data = data.reshape(dims["row"], dims["chan"], dims["corr"]) data = data.rechunk((chunks["row"], chunks["chan"], chunks["corr"])) data = data.astype(data_dtype) written_data.append(data) nds = ds.assign(STATE_ID=(("row", ), state), DATA=(("row", "chan", "corr"), data)) write = xds_to_table(nds, ms, ["STATE_ID", "DATA"]) writes.append(write) # Do all writes in parallel dask.compute(writes) xds = xds_from_ms(ms, columns=select_cols, group_cols=group_cols, index_cols=index_cols, chunks={"row": 2}) # Check that state and data have been correctly written it = enumerate(zip(xds, written_states, written_data)) for i, (ds, state, data) in it: assert_array_equal(ds.STATE_ID.data, state) assert_array_equal(ds.DATA.data, data)
def test_ms_read(ms, group_cols, index_cols, select_cols): xds = xds_from_ms(ms, columns=select_cols, group_cols=group_cols, index_cols=index_cols, chunks={"row": 2}) order = orderby_clause(index_cols) np_column_data = [] with TableProxy(pt.table, ms, lockoptions='auto', ack=False) as T: for ds in xds: assert "ROWID" in ds.coords group_col_values = [ds.attrs[a] for a in group_cols] where = where_clause(group_cols, group_col_values) query = f"SELECT * FROM $1 {where} {order}" with TableProxy(taql_factory, query, tables=[T]) as Q: column_data = {c: Q.getcol(c).result() for c in select_cols} np_column_data.append(column_data) del T for d, (ds, column_data) in enumerate(zip(xds, np_column_data)): for c in select_cols: dask_data = ds.data_vars[c].data.compute() assert_array_equal(column_data[c], dask_data)
def test_column_promotion(ms): """ Test singleton columns promoted to lists """ xds = xds_from_ms(ms, group_cols="SCAN_NUMBER", columns=("DATA", )) for ds in xds: assert "DATA" in ds.data_vars assert list(ds.attrs.keys()) == ["SCAN_NUMBER"]
def test_column_promotion(ms): """ Test singleton columns promoted to lists """ xds = xds_from_ms(ms, group_cols="SCAN_NUMBER", columns=("DATA",)) for ds in xds: assert "DATA" in ds.data_vars assert "SCAN_NUMBER" in ds.attrs assert ds.attrs[DASKMS_PARTITION_KEY] == (("SCAN_NUMBER", "int32"),)
def test_multireadwrite(ms, group_cols, index_cols): xds = xds_from_ms(ms, group_cols=group_cols, index_cols=index_cols) nds = [ds.copy() for ds in xds] writes = [xds_to_table(sds, ms, [k for k in sds.data_vars.keys() if k != "ROWID"]) for sds in nds] da.compute(writes)
def test_read_array_names(ms): _, short_name, _ = table_path_split(ms) datasets = xds_from_ms(ms) for ds in datasets: for k, v in ds.data_vars.items(): product = ("~[" + str(ds.FIELD_ID) + "," + str(ds.DATA_DESC_ID) + "]") prefix = "".join(("read~", k, product)) assert key_split(v.data.name) == prefix
def _proc_map_fn(args): try: ms, i = args xds = xds_from_ms(ms, columns=["STATE_ID"], group_cols=["FIELD_ID"]) xds[i] = xds[i].assign(STATE_ID=(("row", ), xds[i].STATE_ID.data + i)) write = xds_to_table(xds[i], ms, ["STATE_ID"]) write.compute(scheduler='sync') except Exception as e: print(str(e)) return True
def test_write_array_names(ms, tmp_path): _, short_name, _ = table_path_split(ms) datasets = xds_from_ms(ms) out_table = str(tmp_path / short_name) writes = xds_to_table(datasets, out_table, "ALL") for ds in writes: for k, v in ds.data_vars.items(): prefix = "".join(("write~", k)) assert key_split(v.data.name) == prefix
def parquet_ms(ms, tmp_path_factory, request): parquet_store = tmp_path_factory.mktemp("parquet") / "test.parquet" # Chunk in row so we can probe chunk behaviour on reads. xdsl = xds_from_ms(ms, chunks={"row": request.param}) writes = xds_to_parquet(xdsl, parquet_store) dask.compute(writes) # Write to parquet. return parquet_store
def test_row_query(ms, index_cols): T = TableProxy(pt.table, ms, readonly=True, lockoptions='auto', ack=False) # Get the expected row ordering by lexically # sorting the indexing columns cols = [(name, T.getcol(name).result()) for name in index_cols] expected_rows = np.lexsort(tuple(c for n, c in reversed(cols))) del T xds = xds_from_ms(ms, columns=index_cols, group_cols="__row__", index_cols=index_cols, chunks={"row": 2}) actual_rows = da.concatenate([ds.ROWID.data for ds in xds]) assert_array_equal(actual_rows, expected_rows)
def _proc_map_fn(args): import dask.threaded as dt # No dask pools are spun up with dt.pools_lock: assert dt.default_pool is None assert len(dt.pools) == 0 try: ms, i = args xds = xds_from_ms(ms, columns=["STATE_ID"], group_cols=["FIELD_ID"]) xds[i] = xds[i].assign(STATE_ID=(("row",), xds[i].STATE_ID.data + i)) write = xds_to_table(xds[i], ms, ["STATE_ID"]) dask.compute(write) except Exception as e: print(str(e)) return True