def test_unload_app(sess, arrow_property_graph_lpa_u2i): # case 1 a1 = AppDAGNode( arrow_property_graph_lpa_u2i, AppAssets(algo="lpau2i", context="labeled_vertex_property"), ) ua1 = a1.unload() assert sess.run(ua1) is None # case 2 # unload app twice a1 = AppDAGNode( arrow_property_graph_lpa_u2i, AppAssets(algo="lpau2i", context="labeled_vertex_property"), ) ua1 = a1.unload() assert sess.run(ua1) is None assert sess.run(ua1) is None # case 3 # load app after unload a1 = AppDAGNode( arrow_property_graph_lpa_u2i, AppAssets(algo="lpau2i", context="labeled_vertex_property"), ) ua1 = a1.unload() assert sess.run(ua1) is None c1 = a1(max_round=10) r1 = c1.to_numpy("r:v0.label_0") r = sess.run(r1)
def clustering(graph): """Local clustering coefficient of a node in a Graph is the fraction of pairs of the node’s neighbors that are adjacent to each other. Args: graph (:class:`graphscope.Graph`): A simple graph. Returns: :class:`graphscope.framework.context.VertexDataContextDAGNode`: A context with each vertex assigned the computed clustering value, will be evaluated in eager mode. Examples: .. code:: python >>> import graphscope >>> from graphscope.dataset import load_p2p_network >>> sess = graphscope.session(cluster_type="hosts", mode="eager") >>> g = load_p2p_network(sess) >>> # project to a simple graph (if needed) >>> pg = g.project(vertices={"host": ["id"]}, edges={"connect": ["dist"]}) >>> c = graphscope.clustering(pg) >>> sess.close() """ if graph.is_directed(): return AppAssets(algo="clustering", context="vertex_data")(graph) else: return AppAssets(algo="lcc", context="vertex_data")(graph)
def voterank(graph, num_of_nodes=0): """Evalute VoteRank on a graph. Args: graph (:class:`graphscope.Graph`): A simple graph. num_of_nodes (unsigned long int, optional): Number of ranked nodes to extract. Default all nodes. Returns: :voterank : list Ordered list of computed seeds. Only nodes with positive number of votes are returned. Examples: .. code:: python >>> import graphscope >>> from graphscope.dataset import load_p2p_network >>> sess = graphscope.session(cluster_type="hosts", mode="eager") >>> g = load_p2p_network(sess) >>> # project to a simple graph (if needed) >>> pg = g.project(vertices={"host": ["id"]}, edges={"connect": ["dist"]}) >>> c = graphscope.voterank(pg, num_of_nodes=10) >>> sess.close() """ num_of_nodes = int(num_of_nodes) c = AppAssets(algo="voterank", context="vertex_data")(graph, num_of_nodes) r = c.to_dataframe({"id": "v.id", "result": "r"}) r = r[r["result"] != 0].sort_values(by=["result"]) return r["id"].tolist()
def bfs_edges(G, source, reverse=False, depth_limit=None): # FIXME: reverse not support. pg = G.project_to_simple() ctx = AppAssets(algo="bfs_generic")(pg, source, depth_limit, format="edges") return ctx.to_numpy("r", axis=0).tolist()
def degree_assortativity_coefficient(graph, x="out", y="in", weight=None): """Compute degree assortativity of graph. Assortativity measures the similarity of connections in the graph with respect to the node degree. Parameters ---------- graph (:class:`graphscope.Graph`): A simple graph. x: string ('in','out') The degree type for source node (directed graphs only). y: string ('in','out') The degree type for target node (directed graphs only). weighted: bool (True, False) weighted graph or unweighted graph Returns ------- r : float Assortativity of graph by degree. Examples .. code:: python >>> import graphscope >>> from graphscope.dataset import load_modern_graph >>> sess = graphscope.session(cluster_type="hosts", mode="eager") >>> g = load_modern_graph(sess) >>> g.schema >>> c = graphscope.degree_assortativity_coefficient(g, weight="weight") >>> sess.close() Notes ----- This computes Eq. (21) in Ref. [1]_ , where e is the joint probability distribution (mixing matrix) of the degrees. If G is directed than the matrix e is the joint probability of the user-specified degree type for the source and target. References ---------- .. [1] M. E. J. Newman, Mixing patterns in networks, Physical Review E, 67 026126, 2003 .. [2] Foster, J.G., Foster, D.V., Grassberger, P. & Paczuski, M. Edge direction and the structure of networks, PNAS 107, 10815-20 (2010). """ weighted = False if weight is None else True ctx = AppAssets(algo="degree_assortativity_coefficient", context="tensor")( graph, source_degree_type=x, target_degree_type=y, weighted=weighted, ) return ctx.to_numpy("r", axis=0)[0]
def test_create_app(): # builtin-ldbc compatible graph: arrow_projected dynamic_projected # builtin-property compatible graph: arrow_property, append_only # builtin-property app on property graph a1 = AppAssets(algo="property_sssp") # builtin app on arrow projected graph a2 = AppAssets(algo="sssp") # on dynamic projected graph a3 = AppAssets(algo="sssp_has_path")
def _node_boundary(G, nbunch1, nbunch2=None): n1json = json.dumps(list(nbunch1)) if nbunch2 is not None: n2json = json.dumps(list(nbunch2)) else: n2json = "" ctx = AppAssets(algo="node_boundary", context="tensor")(G, n1json, n2json) return set(ctx.to_numpy("r", axis=0).tolist())
def _boundary(G, nbunch1, nbunch2=None): n1json = json.dumps(list(nbunch1)) if nbunch2: n2json = json.dumps(list(nbunch2)) else: n2json = "" ctx = AppAssets(algo="edge_boundary", context="tensor")(G, n1json, n2json) ret = ctx.to_numpy("r", axis=0).tolist() for e in ret: yield (e[0], e[1])
def degree_centrality(graph, centrality_type="both"): """The degree centrality values are normalized by dividing by the maximum possible degree in a simple graph n-1 where n is the number of nodes in G. Args: graph (:class:`Graph`): A simple graph. centrality_type (str, optional): Available options are in/out/both. Defaults to "both". Returns: :class:`graphscope.framework.context.VertexDataContextDAGNode`: A context with each vertex assigned with the computed degree centrality, evaluated in eager mode. Examples: .. code:: python >>> import graphscope >>> from graphscope.dataset import load_p2p_network >>> sess = graphscope.session(cluster_type="hosts", mode="eager") >>> g = load_p2p_network(sess) >>> # project to a simple graph (if needed) >>> pg = g.project(vertices={"host": ["id"]}, edges={"connect": ["dist"]}) >>> c = graphscope.degree_centrality(pg, centrality_type="both") >>> sess.close() """ centrality_type = str(centrality_type) return AppAssets(algo="degree_centrality", context="vertex_data")(graph, centrality_type)
def eigenvector_centrality(graph, tolerance=1e-06, max_round=100, weight=None): """Compute the eigenvector centrality for the `graph`. See more about eigenvector centrality here: https://networkx.org/documentation/networkx-1.10/reference/generated/networkx.algorithms.centrality.eigenvector_centrality.html Args: graph (:class:`graphscope.Graph`): A simple graph. tolerance (float, optional): Defaults to 1e-06. max_round (int, optional): Defaults to 100. weight (str, optional): The edge data key corresponding to the edge weight. Note that property under multiple labels should have the consistent index. Defaults to None. Returns: :class:`graphscope.framework.context.VertexDataContextDAGNode`: A context with each vertex assigned with a gv-centrality, evaluated in eager mode. Examples: .. code:: python >>> import graphscope >>> from graphscope.dataset import load_p2p_network >>> sess = graphscope.session(cluster_type="hosts", mode="eager") >>> g = load_p2p_network(sess) >>> # project to a simple graph (if needed) >>> pg = g.project(vertices={"host": ["id"]}, edges={"connect": ["dist"]}) >>> c = graphscope.eigenvector_centrality(pg, tolerance=1e-06, max_round=10) >>> sess.close() """ tolerance = float(tolerance) max_round = int(max_round) return AppAssets(algo="eigenvector_centrality", context="vertex_data")(graph, tolerance, max_round)
def avg_clustering(graph): """Compute the average clustering coefficient for the directed graph. Args: graph (:class:`graphscope.Graph`): A simple graph. Returns: r: float The average clustering coefficient. Examples: .. code:: python >>> import graphscope >>> from graphscope.dataset import load_p2p_network >>> sess = graphscope.session(cluster_type="hosts", mode="eager") >>> g = load_p2p_network(sess) >>> # project to a simple graph >>> pg = g.project(vertices={"host": ["id"]}, edges={"connect": ["dist"]}) >>> c = graphscope.avg_clustering(pg) >>> print(c.to_numpy("r", axis=0)[0]) >>> sess.close() """ return AppAssets(algo="avg_clustering", context="tensor")(graph)
def pagerank(graph, delta=0.85, max_round=10): """Evalute PageRank on a graph. Args: graph (Graph): A projected simple graph. delta (float, optional): Dumping factor. Defaults to 0.85. max_round (int, optional): Maximum number of rounds. Defaults to 10. Returns: :class:`VertexDataContext`: A context with each vertex assigned with the pagerank value. Examples: .. code:: python import graphscope as gs sess = gs.session() g = sess.g() pg = g.project_to_simple(v_label='vlabel', e_label='elabel') r = gs.pagerank(pg, delta=0.85, max_round=10) s.close() """ delta = float(delta) max_round = int(max_round) return AppAssets(algo="pagerank")(graph, delta, max_round)
def bfs(graph, src=0): """Breadth first search from the src on projected simple graph. Args: graph (:class:`graphscope.Graph`): A simple graph. src (optional): Source vertex of breadth first search. The type should be consistent with the id type of the `graph`, that is, it's `int` or `str` depending on the `oid_type` is `int64_t` or `string` of the `graph`. Defaults to 0. Returns: :class:`graphscope.framework.context.VertexDataContextDAGNode`: A context with each vertex with a distance from the source, will be evaluated in eager mode. Examples: .. code:: python >>> import graphscope >>> from graphscope.dataset import load_p2p_network >>> sess = graphscope.session(cluster_type="hosts", mode="eager") >>> g = load_p2p_network(sess) >>> # project to a simple graph (if needed) >>> pg = g.project(vertices={"host": ["id"]}, edges={"connect": ["dist"]}) >>> c = graphscope.bfs(pg, src=6) >>> sess.close() """ return AppAssets(algo="bfs", context="vertex_data")(graph, src)
def single_source_dijkstra_path_length(G, source, weight=None): """Find shortest weighted path lengths in G from a source node. Compute the shortest path length between source and all other reachable nodes for a weighted graph. Parameters ---------- G : networkx graph source : node label Starting node for path weight : string the edge weights will be accessed via the edge attribute with this key (that is, the weight of the edge joining `u` to `v` will be ``G.edges[u, v][weight]``). Returns ------- length : dataframe Dataframe by node to shortest path length from source. Examples -------- >>> G = nx.path_graph(5) >>> length = nx.single_source_dijkstra_path_length(G, 0) Notes ----- Edge weight attributes must be numerical. Distances are calculated as sums of weighted edges traversed. """ return AppAssets(algo="sssp_projected", context="vertex_data")(G, source)
def k_shell(graph, k: int): """The k-shell is the subgraph induced by nodes with core number k. That is, nodes in the k-core that are not in the (k+1)-core. Args: graph (:class:`graphscope.Graph`): A simple graph. k (int): The order of the k_shell. Returns: :class:`graphscope.framework.context.VertexDataContextDAGNode`: A context with each vertex assigned with a boolean: 1 if the vertex satisfies k-shell, otherwise 0. Evaluated in eager mode. Examples: .. code:: python >>> import graphscope >>> from graphscope.dataset import load_p2p_network >>> sess = graphscope.session(cluster_type="hosts", mode="eager") >>> g = load_p2p_network(sess) >>> # project to a simple graph (if needed) >>> pg = g.project(vertices={"host": ["id"]}, edges={"connect": ["dist"]}) >>> c = graphscope.k_shell(pg, k=3) >>> sess.close() """ k = int(k) return AppAssets(algo="kshell", context="vertex_data")(graph, k)
def k_core(graph, k: int): """K-cores of the graph are connected components that are left after all vertices of degree less than `k` have been removed. Args: graph (:class:`Graph`): A projected simple graph. k (int): The `k` for k-core. Returns: :class:`VertexDataContext`: A context with each vertex assigned with a boolean: 1 if the vertex satisfies k-core, otherwise 0. Examples: .. code:: python import graphscope as gs sess = gs.session() g = sess.g() pg = g.project_to_simple(v_label='vlabel', e_label='elabel') r = gs.k_core(pg) s.close() """ return AppAssets(algo="kcore")(graph, k=k)
def eigenvector_centrality(graph, tolerance=1e-06, max_round=100): """Compute the eigenvector centrality for the `graph`. See more about eigenvector centrality here: https://networkx.org/documentation/networkx-1.10/reference/generated/networkx.algorithms.centrality.eigenvector_centrality.html Args: graph (:class:`Graph`): A projected simple graph. tolerance (float, optional): Defaults to 1e-06. max_round (int, optional): Defaults to 100. Returns: :class:`VertexDataContext`: A context with each vertex assigned with a gv-centrality. Examples: .. code:: python import graphscope as gs sess = gs.session() g = gs.Graph(sess) pg = g.project_to_simple(v_label='vlabel', e_label='elabel') r = gs.eigenvector_centrality(pg) s.close() """ tolerance = float(tolerance) max_round = int(max_round) return AppAssets(algo="eigenvector_centrality")(graph, tolerance, max_round)
def triangles(graph): """Evaluate triangle counting of the graph G. Args: graph (:class:`graphscope.Graph`): A simple graph. Returns: :class:`graphscope.framework.context.VertexDataContextDAGNode`: A context with each vertex assigned with the triangle counting result, evaluated in eager mode. Examples: .. code:: python >>> import graphscope >>> from graphscope.dataset import load_p2p_network >>> sess = graphscope.session(cluster_type="hosts", mode="eager") >>> g = load_p2p_network(sess) >>> # project to a simple graph (if needed) >>> pg = g.project(vertices={"host": ["id"]}, edges={"connect": ["dist"]}) >>> c = graphscope.triangles(pg) >>> sess.close() """ return AppAssets(algo="triangles", context="vertex_data")(graph)
def k_shell(graph, k: int): """The k-shell is the subgraph induced by nodes with core number k. That is, nodes in the k-core that are not in the (k+1)-core. Args: graph (:class:`Graph`): A projected simple graph. k (int): The `k` for k-shell. Returns: :class:`VertexDataContext`: A context with each vertex assigned with a boolean: 1 if the vertex satisfies k-shell, otherwise 0. Examples: .. code:: python import graphscope as gs s = gs.session() g = s.load_from('The parameters for loading a graph...') pg = g.project_to_simple(v_label='vlabel', e_label='elabel') r = gs.k_shell(pg) s.close() """ k = int(k) return AppAssets(algo="kshell")(graph, k)
def degree_centrality(graph, centrality_type="both"): """The degree centrality values are normalized by dividing by the maximum possible degree in a simple graph n-1 where n is the number of nodes in G. Args: graph (:class:`Graph`): A projected simple graph. centrality_type (str, optional): Available options are in/out/both. Defaults to "both". Returns: :class:`VertexDataContext`: A context with each vertex assigned with the computed degree centrality. Examples: .. code:: python import graphscope as gs sess = gs.session() g = gs.Graph(sess) pg = g.project_to_simple(v_label="vlabel", e_label="elabel") r = gs.degree_centrality(pg, centrality_type="both") s.close() """ centrality_type = str(centrality_type) return AppAssets(algo="degree_centrality")(graph, centrality_type)
def hits(graph, tolerance=0.01, max_round=100, normalized=True): """Compute HITS on `graph`. Hyperlink-Induced Topic Search (HITS; also known as hubs and authorities) is a link analysis algorithm that rates Web pages. See more here: https://en.wikipedia.org/wiki/HITS_algorithm Args: graph (:class:`Graph`): A projected simple graph. tolerance (float, optional): Defaults to 0.01. max_round (int, optional): Defaults to 100. normalized (bool, optional): Whether to normalize the result to 0-1. Defaults to True. Returns: :class:`VertexPropertyContext`: A context with each vertex assigned with the HITS value. Examples: .. code:: python import graphscope as gs sess = gs.session() g = sess.g() pg = g.project_to_simple(v_label='vlabel', e_label='elabel') r = gs.hits(pg) s.close() """ tolerance = float(tolerance) max_round = int(max_round) normalized = bool(normalized) return AppAssets(algo="hits")(graph, tolerance, max_round, normalized)
def pagerank(graph, delta=0.85, max_round=10): """Evalute PageRank on a graph. Args: graph (:class:`graphscope.Graph`): A simple graph. delta (float, optional): Dumping factor. Defaults to 0.85. max_round (int, optional): Maximum number of rounds. Defaults to 10. Returns: :class:`graphscope.framework.context.VertexDataContextDAGNode`: A context with each vertex assigned with the pagerank value, evaluated in eager mode. Examples: .. code:: python >>> import graphscope >>> from graphscope.dataset import load_p2p_network >>> sess = graphscope.session(cluster_type="hosts", mode="eager") >>> g = load_p2p_network(sess) >>> # project to a simple graph (if needed) >>> pg = g.project(vertices={"host": ["id"]}, edges={"connect": ["dist"]}) >>> c = graphscope.pagerank(pg, delta=0.85, max_round=10) >>> sess.close() """ delta = float(delta) max_round = int(max_round) return AppAssets(algo="pagerank", context="vertex_data")(graph, delta, max_round)
def pagerank_nx(graph, alpha=0.85, max_iter=100, tol=1e-06): """Evaluate pagerank on a graph using algorithm exactly follows the implemented in NetworkX library. Args: graph (:class:`graphscope.Graph`): A simple graph. alpha (float, optional): Dumping factor. Defaults to 0.85. max_iter (int, optional): Maximum number of iteration. Defaults to 100. tol (float, optional): Error tolerance used to check convergence in power method solver. Returns: :class:`graphscope.framework.context.VertexDataContextDAGNode`: A context with each vertex assigned with the pagerank value, evaluated in eager mode. Examples: .. code:: python >>> import graphscope >>> from graphscope.dataset import load_p2p_network >>> sess = graphscope.session(cluster_type="hosts", mode="eager") >>> g = load_p2p_network(sess) >>> # project to a simple graph (if needed) >>> pg = g.project(vertices={"host": ["id"]}, edges={"connect": ["dist"]}) >>> c = graphscope.pagerank_nx(pg, alpha=0.85, max_iter=10, tol=1e-06) >>> sess.close() """ alpha = float(alpha) max_iter = int(max_iter) return AppAssets(algo="pagerank_nx", context="vertex_data")(graph, alpha, max_iter, tol)
def sssp(graph, src=0, weight=None): """Compute single source shortest path length on the `graph`. Note that the `sssp` algorithm requires an numerical property on the edge. Args: graph (:class:`graphscope.Graph`): A simple graph. src (optional): The source vertex. The type should be consistent with the id type of the `graph`, that is, it's `int` or `str` depending on the `oid_type` is `int64_t` or `string` of the `graph`. Defaults to 0. weight (str, optional): The edge data key corresponding to the edge weight. Note that property under multiple labels should have the consistent index. Defaults to None. Returns: :class:`graphscope.framework.context.VertexDataContextDAGNode`: A context with each vertex assigned with the shortest distance from the `src`, evaluated in eager mode. Examples: .. code:: python >>> import graphscope >>> from graphscope.dataset import load_p2p_network >>> sess = graphscope.session(cluster_type="hosts", mode="eager") >>> g = load_p2p_network(sess) >>> # project to a simple graph (if needed) >>> pg = g.project(vertices={"host": ["id"]}, edges={"connect": ["dist"]}) >>> c = graphscope.sssp(pg, src=6) >>> sess.close() """ return AppAssets(algo="sssp", context="vertex_data")(graph, src)
def test_errors_on_create_app(arrow_property_graph, arrow_project_graph): # builtin-property app is incompatible with projected graph with pytest.raises(graphscope.CompilationError): a = AppAssets(algo="property_sssp") a(arrow_project_graph, 4) # builtin app is incompatible with property graph with pytest.raises(graphscope.CompilationError): a = AppAssets(algo="sssp") a(arrow_property_graph, 4) # algo not exist with pytest.raises( graphscope.CompilationError, match="Algorithm does not exist in the gar resource", ): a = AppAssets(algo="invalid") a(arrow_property_graph, 4)
def shortest_path(G, source=None, target=None, weight=None): # FIXME: target and method not support. if weight is None: weight = "weight" default = False else: default = True pg = G.project_to_simple(e_prop=weight) return AppAssets(algo="sssp_path")(pg, source, weight=default)
def test_simple_context_to_vineyard_tensor(simple_context, p2p_project_directed_graph): out = simple_context.to_vineyard_tensor("v.id") assert out is not None out = simple_context.to_vineyard_tensor("r") assert out is not None has_path = AppAssets(algo="sssp_has_path") ctx = has_path(p2p_project_directed_graph, source=6, target=3728) assert ctx.to_vineyard_tensor(axis=0) is not None
def property_sssp(graph, src=0): """Compute single source shortest path on graph G. Args: graph (Graph): a property graph. src (int, optional): the source. Defaults to 0. Returns: A context with each vertex assigned with the shortest distance from the src. """ return AppAssets(algo="property_sssp")(graph, src)
def numeric_assortativity_coefficient(graph, attribute): """Compute assortativity for numerical node attributes. Assortativity measures the similarity of connections in the graph with respect to the given numeric attribute. Args: graph (:class:`graphscope.Graph`): A simple graph. attribute (str): Node attribute key. Returns: r (float): Assortativity of graph for given attribute Examples -------- .. code:: python >>> import graphscope >>> from graphscope.dataset import load_modern_graph >>> sess = graphscope.session(cluster_type="hosts", mode="eager") >>> g = load_modern_graph(sess) >>> g.schema >>> c = graphscope.numeric_assortativity_coefficient(g, attribute="name") >>> sess.close() Notes ----- This computes Eq. (21) in Ref. [1]_ , for the mixing matrix of the specified attribute. References ---------- .. [1] M. E. J. Newman, Mixing patterns in networks Physical Review E, 67 026126, 2003 """ ctx = AppAssets(algo="attribute_assortativity_coefficient", context="tensor")(graph, True) return ctx.to_numpy("r", axis=0)[0]
def _betweenness_centrality(G, k=None, normalized=True, weight=None, endpoints=False, seed=None): algorithm = "betweenness_centrality" if weight is not None: algorithm = "betweenness_centrality_generic" return AppAssets(algo=algorithm, context="vertex_data")(G, normalized=normalized, endpoints=endpoints)