Пример #1
0
    def partition_lossless(A, stable=True):
        """
        :param A:  Matrix (either sparse or dense) whose columns will be analyzed to produce a partition (typically workload matrix or measurement matrix)
                        (queries will be rows here, even if they are flattened k-dimensional queries)
        :return:   1D partition vector encoding groups
        The returned partition describes groups derived from matching columns (i.e. columns that are component-wise equal)
        (There is no guarantee that new ordering of groups will be "stable" w.r.t original ordering of columns)
        """
        # mat = A.T # transpose so we can operate on rows
        # form a view where each row is joined into void type
        # for explanation, see: http://stackoverflow.com/questions/16970982/find-unique-rows-in-numpy-array
        # (couldn't make this work on columns)
        # compressed = numpy.ascontiguousarray(mat).view(numpy.dtype((numpy.void, mat.dtype.itemsize * mat.shape[1])))

        # http://www.ryanhmckenna.com/2017/01/efficiently-remove-duplicate-rows-from.html
        v = numpy.random.rand(A.shape[0])
        vA = A.T.dot(v)

        # use numpy.unique
        # returned 'inverse' is the index of the unique value present in each position (this functions as a group id)
        _u, index, inverse = numpy.unique(vA,
                                          return_index=True,
                                          return_inverse=True)

        if stable:
            return support.canonical_ordering(_replace(inverse, index),
                                              canonical_order=True)
        else:
            return support.canonical_ordering(inverse, canonical_order=True)
Пример #2
0
def cells_to_mapping(cells, domain):

    n, m = domain
    partition_vector = np.empty([n, m], dtype=int)
    group_no = 0
    for ul, lr in cells:
        up, left = ul
        low, right = lr
        for row in range(up, low + 1):
            partition_vector[row, left:right + 1] = group_no

        group_no += 1
    return support.canonical_ordering(partition_vector).flatten()
Пример #3
0
def partition_grid(domain_shape, grid_shape, canonical_order=False):
    """
    :param domain_shape: a shape tuple describing the domain, e.g (6,6) (in 2D)
    :param grid_shape: a shape tuple describing cells to be grouped, e.g. (2,3) to form groups of 2 rows and 3 cols
        note: in 1D both of the above params can simply be integers
    :return: a partition array in which grouped cells are assigned some unique 'group id' values
             no guarantee on order of the group ids, only that they are unique
    """

    # allow for integers instead of shape tuples in 1D
    if isinstance(domain_shape, int):
        domain_shape = (domain_shape, )
    if isinstance(grid_shape, int):
        grid_shape = (grid_shape, )

    assert sum(
        divmod(d, b)[1] for (d, b) in zip(domain_shape, grid_shape)
    ) == 0, "Domain size along each dimension should be a multiple of size of block"

    def g(*idx):
        """
        This function will receive an index tuple from numpy.fromfunction
        It's behavior depends on grid_shape: take (i,j) and divide by grid_shape (in each dimension)
        That becomes an identifier of the block; then assign a unique integer to it using pairing.
        """
        x = np.array(idx)
        y = np.array(grid_shape)

        return general_pairing(util.old_div(
            x, y))  # broadcasting integer division

    h = np.vectorize(g)

    # numpy.fromfunction builds an array of domain_shape by calling a function with each index tuple (e.g. (i,j))
    partition_array = np.fromfunction(h, domain_shape, dtype=int)

    if canonical_order:
        partition_array = support.canonical_ordering(partition_array)

    return partition_array
Пример #4
0
    def test_canonical_ordering(self):
        ordering = support.canonical_ordering(self.mapping)

        np.testing.assert_array_equal(ordering,
                                      np.array([0, 1, 2, 3, 4, 0, 3, 2, 1, 4]))