Пример #1
0
def test_ms_update(ms, group_cols, index_cols, select_cols):
    # Zero everything to be sure
    with TableProxy(pt.table,
                    ms,
                    readonly=False,
                    lockoptions='auto',
                    ack=False) as T:
        nrows = T.nrows().result()
        T.putcol("STATE_ID", np.full(nrows, 0, dtype=np.int32)).result()
        data = np.zeros_like(T.getcol("DATA").result())
        data_dtype = data.dtype
        T.putcol("DATA", data).result()

    xds = xds_from_ms(ms,
                      columns=select_cols,
                      group_cols=group_cols,
                      index_cols=index_cols,
                      chunks={"row": 2})

    written_states = []
    written_data = []
    writes = []

    # Write out STATE_ID and DATA
    for i, ds in enumerate(xds):
        dims = ds.dims
        chunks = ds.chunks
        state = da.arange(i, i + dims["row"], chunks=chunks["row"])
        state = state.astype(np.int32)
        written_states.append(state)

        data = da.arange(i, i + dims["row"] * dims["chan"] * dims["corr"])
        data = data.reshape(dims["row"], dims["chan"], dims["corr"])
        data = data.rechunk((chunks["row"], chunks["chan"], chunks["corr"]))
        data = data.astype(data_dtype)
        written_data.append(data)

        nds = ds.assign(STATE_ID=(("row", ), state),
                        DATA=(("row", "chan", "corr"), data))

        write = xds_to_table(nds, ms, ["STATE_ID", "DATA"])
        writes.append(write)

    # Do all writes in parallel
    dask.compute(writes)

    xds = xds_from_ms(ms,
                      columns=select_cols,
                      group_cols=group_cols,
                      index_cols=index_cols,
                      chunks={"row": 2})

    # Check that state and data have been correctly written
    it = enumerate(zip(xds, written_states, written_data))
    for i, (ds, state, data) in it:
        assert_array_equal(ds.STATE_ID.data, state)
        assert_array_equal(ds.DATA.data, data)
Пример #2
0
def test_ms_read(ms, group_cols, index_cols, select_cols):
    xds = xds_from_ms(ms, columns=select_cols,
                      group_cols=group_cols,
                      index_cols=index_cols,
                      chunks={"row": 2})

    order = orderby_clause(index_cols)
    np_column_data = []

    with TableProxy(pt.table, ms, lockoptions='auto', ack=False) as T:
        for ds in xds:
            assert "ROWID" in ds.coords
            group_col_values = [ds.attrs[a] for a in group_cols]
            where = where_clause(group_cols, group_col_values)
            query = f"SELECT * FROM $1 {where} {order}"

            with TableProxy(taql_factory, query, tables=[T]) as Q:
                column_data = {c: Q.getcol(c).result() for c in select_cols}
                np_column_data.append(column_data)

    del T

    for d, (ds, column_data) in enumerate(zip(xds, np_column_data)):
        for c in select_cols:
            dask_data = ds.data_vars[c].data.compute()
            assert_array_equal(column_data[c], dask_data)
Пример #3
0
def test_column_promotion(ms):
    """ Test singleton columns promoted to lists """
    xds = xds_from_ms(ms, group_cols="SCAN_NUMBER", columns=("DATA", ))

    for ds in xds:
        assert "DATA" in ds.data_vars
        assert list(ds.attrs.keys()) == ["SCAN_NUMBER"]
Пример #4
0
def test_column_promotion(ms):
    """ Test singleton columns promoted to lists """
    xds = xds_from_ms(ms, group_cols="SCAN_NUMBER", columns=("DATA",))

    for ds in xds:
        assert "DATA" in ds.data_vars
        assert "SCAN_NUMBER" in ds.attrs
        assert ds.attrs[DASKMS_PARTITION_KEY] == (("SCAN_NUMBER", "int32"),)
Пример #5
0
def test_multireadwrite(ms, group_cols, index_cols):
    xds = xds_from_ms(ms, group_cols=group_cols, index_cols=index_cols)

    nds = [ds.copy() for ds in xds]
    writes = [xds_to_table(sds, ms,
                           [k for k in sds.data_vars.keys() if k != "ROWID"])
              for sds in nds]

    da.compute(writes)
Пример #6
0
def test_read_array_names(ms):
    _, short_name, _ = table_path_split(ms)
    datasets = xds_from_ms(ms)

    for ds in datasets:
        for k, v in ds.data_vars.items():
            product = ("~[" + str(ds.FIELD_ID) +
                       "," + str(ds.DATA_DESC_ID) + "]")
            prefix = "".join(("read~", k, product))
            assert key_split(v.data.name) == prefix
Пример #7
0
def _proc_map_fn(args):
    try:
        ms, i = args
        xds = xds_from_ms(ms, columns=["STATE_ID"], group_cols=["FIELD_ID"])
        xds[i] = xds[i].assign(STATE_ID=(("row", ), xds[i].STATE_ID.data + i))
        write = xds_to_table(xds[i], ms, ["STATE_ID"])
        write.compute(scheduler='sync')
    except Exception as e:
        print(str(e))

    return True
Пример #8
0
def test_write_array_names(ms, tmp_path):
    _, short_name, _ = table_path_split(ms)
    datasets = xds_from_ms(ms)

    out_table = str(tmp_path / short_name)

    writes = xds_to_table(datasets, out_table, "ALL")

    for ds in writes:
        for k, v in ds.data_vars.items():
            prefix = "".join(("write~", k))
            assert key_split(v.data.name) == prefix
Пример #9
0
def parquet_ms(ms, tmp_path_factory, request):

    parquet_store = tmp_path_factory.mktemp("parquet") / "test.parquet"

    # Chunk in row so we can probe chunk behaviour on reads.
    xdsl = xds_from_ms(ms, chunks={"row": request.param})

    writes = xds_to_parquet(xdsl, parquet_store)

    dask.compute(writes)  # Write to parquet.

    return parquet_store
Пример #10
0
def test_row_query(ms, index_cols):
    T = TableProxy(pt.table, ms, readonly=True, lockoptions='auto', ack=False)

    # Get the expected row ordering by lexically
    # sorting the indexing columns
    cols = [(name, T.getcol(name).result()) for name in index_cols]
    expected_rows = np.lexsort(tuple(c for n, c in reversed(cols)))

    del T

    xds = xds_from_ms(ms, columns=index_cols,
                      group_cols="__row__",
                      index_cols=index_cols,
                      chunks={"row": 2})

    actual_rows = da.concatenate([ds.ROWID.data for ds in xds])
    assert_array_equal(actual_rows, expected_rows)
Пример #11
0
def _proc_map_fn(args):
    import dask.threaded as dt

    # No dask pools are spun up
    with dt.pools_lock:
        assert dt.default_pool is None
        assert len(dt.pools) == 0

    try:
        ms, i = args
        xds = xds_from_ms(ms, columns=["STATE_ID"], group_cols=["FIELD_ID"])
        xds[i] = xds[i].assign(STATE_ID=(("row",), xds[i].STATE_ID.data + i))
        write = xds_to_table(xds[i], ms, ["STATE_ID"])
        dask.compute(write)
    except Exception as e:
        print(str(e))

    return True