def test_sparsify_or_densify(array, dense_threshold, expect_sparse): # test with dense input array = numpy.array(array, dtype=numpy.bool_) output = sparsify_or_densify(array, dense_threshold) assert scipy.sparse.issparse(output) == expect_sparse # test with sparse input array = scipy.sparse.csc_matrix(array) output = sparsify_or_densify(array, dense_threshold) assert scipy.sparse.issparse(output) == expect_sparse
def dwwc_sequential(graph, metapath, damping=0.5, dense_threshold=0, dtype=numpy.float64): """ Compute the degree-weighted walk count (DWWC) in which nodes can be repeated within a path. Parameters ---------- graph : hetio.hetnet.Graph metapath : hetio.hetnet.MetaPath damping : float dense_threshold : float (0 <= dense_threshold <= 1) sets the density threshold at which a sparse matrix will be converted to a dense automatically. dtype : dtype object """ dwwc_matrix = None row_names = None for metaedge in metapath: rows, cols, adj_mat = hetmatpy.matrix.metaedge_to_adjacency_matrix( graph, metaedge, dense_threshold=dense_threshold, dtype=dtype) adj_mat = _degree_weight(adj_mat, damping, dtype=dtype) if dwwc_matrix is None: row_names = rows dwwc_matrix = adj_mat else: dwwc_matrix = dwwc_matrix @ adj_mat dwwc_matrix = sparsify_or_densify(dwwc_matrix, dense_threshold) return row_names, cols, dwwc_matrix
def _dwpc_approx(graph, metapath, damping=0.5, dense_threshold=0, dtype=numpy.float64): """ Compute an approximation of DWPC. Only removes the diagonal for the first repeated node, and any disjoint repetitions that follow the last occurrence of the first repeating node. Examples -------- GiGbCrC -> Identical output to DWPC GiGbCbGiG -> Approximation """ dwpc_matrix = None row_names = None # Find the first repeated metanode and where it occurs nodes = metapath.get_nodes() repeated_nodes = [ node for i, node in enumerate(nodes) if node in nodes[i + 1:] ] first_repeat = repeated_nodes[0] repeated_indices = [i for i, v in enumerate(nodes) if v == first_repeat] for i, segment in enumerate(repeated_indices[1:]): rows, cols, dwpc_matrix = dwpc(graph, metapath[repeated_indices[i]:segment], damping=damping, dense_threshold=dense_threshold, dtype=dtype) if row_names is None: row_names = rows # Add head and tail segments, if applicable if repeated_indices[0] != 0: row_names, _, head_seg = dwwc(graph, metapath[0:repeated_indices[0]], damping=damping, dense_threshold=dense_threshold, dtype=dtype) dwpc_matrix = head_seg @ dwpc_matrix if nodes[repeated_indices[-1]] != nodes[-1]: _, cols, tail_seg = dwpc(graph, metapath[repeated_indices[-1]:], damping=damping, dense_threshold=dense_threshold, dtype=dtype) dwpc_matrix = dwpc_matrix @ tail_seg dwpc_matrix = sparsify_or_densify(dwpc_matrix, dense_threshold) return row_names, cols, dwpc_matrix
def dwwc_chain(graph, metapath, damping=0.5, dense_threshold=0, dtype=numpy.float64): """ Uses optimal matrix chain multiplication as in numpy.multi_dot, but allows for sparse matrices. Uses ordering modified from numpy.linalg.linalg._multi_dot (https://git.io/vh31f) which is released under a 3-Clause BSD License (https://git.io/vhCDC). """ metapath = graph.metagraph.get_metapath(metapath) array_dims = [graph.count_nodes(mn) for mn in metapath.get_nodes()] row_ids = hetmatpy.matrix.get_node_identifiers(graph, metapath.source()) columns_ids = hetmatpy.matrix.get_node_identifiers(graph, metapath.target()) ordering = _dimensions_to_ordering(array_dims) dwwc_matrix = _multi_dot(metapath, ordering, 0, len(metapath) - 1, graph, damping, dense_threshold, dtype) dwwc_matrix = sparsify_or_densify(dwwc_matrix, dense_threshold) return row_ids, columns_ids, dwwc_matrix
def dwwc_recursive(graph, metapath, damping=0.5, dense_threshold=0, dtype=numpy.float64): """ Recursive DWWC implementation to take better advantage of caching. """ rows, cols, adj_mat = hetmatpy.matrix.metaedge_to_adjacency_matrix( graph, metapath[0], dense_threshold=dense_threshold, dtype=dtype) adj_mat = _degree_weight(adj_mat, damping, dtype=dtype) if len(metapath) > 1: _, cols, dwwc_next = dwwc( graph, metapath[1:], damping=damping, dense_threshold=dense_threshold, dtype=dtype, dwwc_method=dwwc_recursive) dwwc_matrix = adj_mat @ dwwc_next else: dwwc_matrix = adj_mat dwwc_matrix = sparsify_or_densify(dwwc_matrix, dense_threshold) return rows, cols, dwwc_matrix
def wrapper(*args, **kwargs): bound_args = signature.bind(*args, **kwargs) bound_args.apply_defaults() arguments = bound_args.arguments graph = arguments['graph'] metapath = graph.metagraph.get_metapath(arguments['metapath']) arguments['metapath'] = metapath damping = arguments['damping'] cached_result = None start = time.perf_counter() supports_cache = isinstance( graph, hetmatpy.hetmat.HetMat) and graph.path_counts_cache if supports_cache: cache_key = { 'metapath': metapath, 'metric': metric, 'damping': damping } cached_result = graph.path_counts_cache.get(**cache_key) if cached_result: row_names, col_names, matrix = cached_result matrix = sparsify_or_densify(matrix, arguments['dense_threshold']) matrix = matrix.astype(arguments['dtype']) if cached_result is None: if arguments['dwwc_method'] is None: # import default_dwwc_method here to avoid circular dependencies from hetmatpy.degree_weight import default_dwwc_method arguments['dwwc_method'] = default_dwwc_method row_names, col_names, matrix = user_function(**arguments) if supports_cache: runtime = time.perf_counter() - start graph.path_counts_cache.set(**cache_key, matrix=matrix, runtime=runtime) return row_names, col_names, matrix