def quotient_similarity(m, partition, agg='sum', diag_value=None, check=False, n_cpu=DEFAULT_CPUS): '''Generate quotient similarity matrix based on the given partition of matrix rows Params: - m: A symmetric 2D array (can be sparse) containing the similarity matrix - partition: A list-of-lists partitionning the rows/columns of `m` - agg: One of ('sum', 'min'. 'max', 'mean', 'getnnz') or a function that takes as parameters the matrix and a range of indices from the partition and aggregates the values across the rows - check: Logical. Should the dunction check that `partition` is a valid partition of the rows of m? Default is 'True' Returns a similarity matrix reduced to the dimension induced by the partition ''' if callable(agg): f_agg = agg elif isinstance(agg, str): if agg in MATRIX_METHOD_STR: f_agg = lambda m, p: csr_matrix(getattr(m[p, :], agg)(axis=0)) else: raise ValueError(MATRIX_METHOD_STR_ERR) else: raise ValueError(MATRIX_METHOD_STR_ERR) if check: if not is_partition(partition, start=0, end=m.shape[0] - 1): raise ValueError('Please provide a proper partition') result = merge_row_partition( merge_row_partition(m, partition, f_agg, n_cpu).T, partition, f_agg, n_cpu) if diag_value is not None: result.setdiag(diag_value) result.eliminate_zeros() return result
def test_not_partition2(): p = [[0], [1, 2, 3], [4, 6]] assert not is_partition(p)
def test_not_partition1(): p = [[0], [1, 2], [4, 5]] assert not is_partition(p)
def test_not_partition(): p = [[10], [11, 12, 13], [14, 15]] assert not is_partition(p, start=10, end=16)
def test_is_partition(): p = [[10], [11, 12, 13], [14, 15]] assert is_partition(p, start=10)
def test_is_partition(): p = [[0], [1, 2, 3], [4, 5]] assert is_partition(p)