Пример #1
0
    def datasets(self):
        table_proxy = self._table_proxy()

        # No grouping case
        if len(self.group_cols) == 0:
            order_taql = ordering_taql(table_proxy, self.index_cols,
                                       self.taql_where)
            orders = row_ordering(order_taql, self.index_cols, self.chunks[0])
            datasets = [self._single_dataset(orders)]
        # Group by row
        elif len(self.group_cols) == 1 and self.group_cols[0] == "__row__":
            order_taql = ordering_taql(table_proxy, self.index_cols,
                                       self.taql_where)
            sorted_rows, row_runs = row_ordering(
                order_taql,
                self.index_cols,
                # chunk ordering on each row
                dict(self.chunks[0], row=1))

            # Produce a dataset for each chunk (block),
            # each containing a single row
            row_blocks = sorted_rows.blocks
            run_blocks = row_runs.blocks

            # Exemplar actually correspond to the sorted rows.
            # We reify them here so they can be assigned on each
            # dataset as an attribute
            np_sorted_row = sorted_rows.compute()

            datasets = [
                self._single_dataset((row_blocks[r], run_blocks[r]),
                                     exemplar_row=er)
                for r, er in enumerate(np_sorted_row)
            ]
        # Grouping column case
        else:
            order_taql = group_ordering_taql(table_proxy, self.group_cols,
                                             self.index_cols, self.taql_where)
            orders = group_row_ordering(order_taql, self.group_cols,
                                        self.index_cols, self.chunks)

            groups = [order_taql.getcol(g).result() for g in self.group_cols]
            exemplar_rows = order_taql.getcol("__firstrow__").result()
            assert len(orders) == len(exemplar_rows)

            datasets = self._group_datasets(groups, exemplar_rows, orders)

        ret = (datasets, )

        if self.table_keywords is True:
            ret += (table_proxy.getkeywords().result(), )

        if self.column_keywords is True:
            keywords = table_proxy.submit(_col_keyword_getter, READLOCK)
            ret += (keywords.result(), )

        if len(ret) == 1:
            return ret[0]

        return ret
Пример #2
0
def test_row_ordering_no_group(ms, index_cols, chunks):
    order_taql = ordering_taql(table_proxy(ms), index_cols)
    assert_liveness(2, 1)
    orders = row_ordering(order_taql, index_cols, chunks)
    assert_liveness(2, 1)

    # Normalise chunks to match that of the output array
    expected_chunks = da.core.normalize_chunks(chunks['row'], (10, ))

    assert orders[0].chunks == expected_chunks

    rowids = dask.compute(orders[0])[0]
    assert_array_equal(rowids, [9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

    del orders, order_taql
    assert_liveness(0, 0)