Exemplo n.º 1
0
def test_error_on_parameters_not_correct(arrow_project_graph):
    # Incorrect type of parameters
    with pytest.raises(ValueError, match="could not convert string to float"):
        pagerank(arrow_project_graph, "delta=0.85", 10)
    with pytest.raises(ValueError,
                       match=r"invalid literal for int\(\) with base 10"):
        pagerank(arrow_project_graph, 0.85, "max_round=10")
    with pytest.raises(
            TypeError,
            match="takes from 1 to 3 positional arguments but 6 were given"):
        pagerank(arrow_project_graph, 0.85, 10, 100, 1000, 10000)
Exemplo n.º 2
0
def test_add_column_string_oid(p2p_property_graph_string,
                               p2p_project_directed_graph_string):
    g1 = p2p_property_graph_string
    g2 = p2p_project_directed_graph_string

    property_names = [
        p.name for p in g1.schema.get_vertex_properties("person")
    ]
    assert "pagerank" not in property_names

    ctx = graphscope.pagerank(g2)
    g3 = g1.add_column(ctx, selector={"pagerank": "r"})

    property_names = [
        p.name for p in g3.schema.get_vertex_properties("person")
    ]
    assert "pagerank" in property_names
Exemplo n.º 3
0
def test_demo_distribute(gs_session_distributed, data_dir,
                         modern_graph_data_dir):
    graph = load_ldbc(gs_session_distributed, data_dir)

    # Interactive engine
    interactive = gs_session_distributed.gremlin(graph)
    sub_graph = interactive.subgraph(  # noqa: F841
        'g.V().hasLabel("person").outE("knows")')
    person_count = (interactive.execute(
        'g.V().hasLabel("person").outE("knows").bothV().dedup().count()').all(
        ).result()[0])
    knows_count = (interactive.execute(
        'g.V().hasLabel("person").outE("knows").count()').all().result()[0])
    interactive2 = gs_session_distributed.gremlin(sub_graph)
    sub_person_count = interactive2.execute("g.V().count()").all().result()[0]
    sub_knows_count = interactive2.execute("g.E().count()").all().result()[0]
    assert person_count == sub_person_count
    assert knows_count == sub_knows_count

    # Analytical engine
    # project the projected graph to simple graph.
    simple_g = sub_graph.project_to_simple(v_label="person", e_label="knows")

    pr_result = graphscope.pagerank(simple_g, delta=0.8)
    tc_result = graphscope.triangles(simple_g)

    # add the PageRank and triangle-counting results as new columns to the property graph
    # FIXME: Add column to sub_graph
    sub_graph.add_column(pr_result, {"Ranking": "r"})
    sub_graph.add_column(tc_result, {"TC": "r"})

    # test subgraph on modern graph
    mgraph = load_modern_graph(gs_session_distributed, modern_graph_data_dir)

    # Interactive engine
    minteractive = gs_session_distributed.gremlin(mgraph)
    msub_graph = minteractive.subgraph(  # noqa: F841
        'g.V().hasLabel("person").outE("knows")')
    person_count = (minteractive.execute(
        'g.V().hasLabel("person").outE("knows").bothV().dedup().count()').all(
        ).result()[0])
    msub_interactive = gs_session_distributed.gremlin(msub_graph)
    sub_person_count = msub_interactive.execute(
        "g.V().count()").all().result()[0]
    assert person_count == sub_person_count
Exemplo n.º 4
0
def test_demo(gs_session, data_dir):
    graph = load_ldbc(gs_session, data_dir)

    # Interactive engine
    interactive = gs_session.gremlin(graph)
    sub_graph = interactive.subgraph(  # noqa: F841
        'g.V().hasLabel("person").outE("knows")')

    # Analytical engine
    # project the projected graph to simple graph.
    simple_g = sub_graph.project_to_simple(v_label="person", e_label="knows")

    pr_result = graphscope.pagerank(simple_g, delta=0.8)
    tc_result = graphscope.triangles(simple_g)

    # add the PageRank and triangle-counting results as new columns to the property graph
    # FIXME: Add column to sub_graph
    sub_graph.add_column(pr_result, {"Ranking": "r"})
    sub_graph.add_column(tc_result, {"TC": "r"})
Exemplo n.º 5
0
def test_demo(data_dir):
    gs_image, gie_manager_image = get_gs_image_on_ci_env()
    sess = graphscope.session(
        num_workers=1,
        k8s_gs_image=gs_image,
        k8s_gie_graph_manager_image=gie_manager_image,
        k8s_coordinator_cpu=0.5,
        k8s_coordinator_mem="2500Mi",
        k8s_vineyard_cpu=0.1,
        k8s_vineyard_mem="512Mi",
        k8s_engine_cpu=0.1,
        k8s_engine_mem="1500Mi",
        k8s_etcd_cpu=2,
        k8s_vineyard_shared_mem="2Gi",
        k8s_volumes=get_k8s_volumes(),
    )
    graph = load_ldbc(sess, data_dir)

    # Interactive engine
    interactive = sess.gremlin(graph)
    sub_graph = interactive.subgraph(  # noqa: F841
        'g.V().hasLabel("person").outE("knows")'
    )

    # Analytical engine
    # project the projected graph to simple graph.
    simple_g = sub_graph.project_to_simple(v_label="person", e_label="knows")

    pr_result = graphscope.pagerank(simple_g, delta=0.8)
    tc_result = graphscope.triangles(simple_g)

    # add the PageRank and triangle-counting results as new columns to the property graph
    # FIXME: Add column to sub_graph
    sub_graph.add_column(pr_result, {"Ranking": "r"})
    sub_graph.add_column(tc_result, {"TC": "r"})

    # GNN engine
    sess.close()
Exemplo n.º 6
0
def test_run_app_on_directed_graph(
    p2p_project_directed_graph,
    sssp_result,
    pagerank_result,
    hits_result,
    bfs_result,
    clustering_result,
    dc_result,
    ev_result,
    katz_result,
):
    # sssp
    ctx1 = sssp(p2p_project_directed_graph, src=6)
    r1 = (ctx1.to_dataframe({
        "node": "v.id",
        "r": "r"
    }).sort_values(by=["node"]).to_numpy(dtype=float))
    r1[r1 == 1.7976931348623157e308] = float(
        "inf")  # replace limit::max with inf
    assert np.allclose(r1, sssp_result["directed"])
    ctx2 = sssp(p2p_project_directed_graph, 6)
    r2 = (ctx2.to_dataframe({
        "node": "v.id",
        "r": "r"
    }).sort_values(by=["node"]).to_numpy(dtype=float))
    r2[r2 == 1.7976931348623157e308] = float(
        "inf")  # replace limit::max with inf
    assert np.allclose(r2, sssp_result["directed"])
    assert np.allclose(
        ctx2.to_dataframe({
            "node": "v.id",
            "r": "r"
        },
                          vertex_range={
                              "begin": 1,
                              "end": 4
                          }).sort_values(by=["node"]).to_numpy(),
        [[1.0, 260.0], [2.0, 229.0], [3.0, 310.0]],
    )
    assert np.allclose(
        sorted(ctx1.to_numpy("r", vertex_range={
            "begin": 1,
            "end": 4
        })),
        sorted([260.0, 229.0, 310.0]),
    )

    r3 = sssp(p2p_project_directed_graph, 100000000)
    assert r3 is not None

    # pagerank
    ctx_pr = pagerank(p2p_project_directed_graph, delta=0.85, max_round=10)
    ret_pr = (ctx_pr.to_dataframe({
        "node": "v.id",
        "r": "r"
    }).sort_values(by=["node"]).to_numpy(dtype=float))
    assert np.allclose(ret_pr, pagerank_result["directed"])

    # hits
    ctx_hits = hits(p2p_project_directed_graph, tolerance=0.001)
    ret_hub = (ctx_hits.to_dataframe({
        "node": "v.id",
        "hub": "r.hub"
    }).sort_values(by=["node"]).to_numpy(dtype=float))
    ret_auth = (ctx_hits.to_dataframe({
        "node": "v.id",
        "auth": "r.auth"
    }).sort_values(by=["node"]).to_numpy(dtype=float))
    assert np.allclose(ret_hub, hits_result["hub"])
    assert np.allclose(ret_auth, hits_result["auth"])

    # bfs
    ctx4 = bfs(p2p_project_directed_graph, src=6)
    r4 = (ctx4.to_dataframe({
        "node": "v.id",
        "r": "r"
    }).sort_values(by=["node"]).to_numpy(dtype=int))
    assert np.all(r4 == bfs_result["directed"])
    ctx5 = bfs(p2p_project_directed_graph, 6)
    r5 = (ctx5.to_dataframe({
        "node": "v.id",
        "r": "r"
    }).sort_values(by=["node"]).to_numpy(dtype=int))
    assert np.all(r5 == bfs_result["directed"])
    assert np.all(
        ctx5.to_dataframe(
            {
                "node": "v.id",
                "r": "r"
            }, vertex_range={
                "begin": 1,
                "end": 4
            }).sort_values(by=["node"]).to_numpy() == [[1, 5], [2, 5], [3, 6]])
    assert np.all(
        sorted(ctx5.to_numpy("r", vertex_range={
            "begin": 1,
            "end": 4
        })) == [5, 5, 6])

    # simple_path
    assert is_simple_path(p2p_project_directed_graph, [1, 10])

    with pytest.raises(InvalidArgumentError,
                       match="Louvain not support directed graph."):
        louvain(p2p_project_directed_graph)

    # clustering
    ctx_clustering = clustering(p2p_project_directed_graph)
    ret_clustering = (ctx_clustering.to_dataframe({
        "node": "v.id",
        "r": "r"
    }).sort_values(by=["node"]).to_numpy(dtype=float))
    assert np.allclose(ret_clustering, clustering_result["directed"])

    # degree_centrality
    ctx_dc = degree_centrality(p2p_project_directed_graph)
    ret_dc = (ctx_dc.to_dataframe({
        "node": "v.id",
        "r": "r"
    }).sort_values(by=["node"]).to_numpy(dtype=float))
    assert np.allclose(ret_dc, dc_result["directed"])

    # eigenvector_centrality
    ctx_ev = eigenvector_centrality(p2p_project_directed_graph)
    ret_ev = (ctx_ev.to_dataframe({
        "node": "v.id",
        "r": "r"
    }).sort_values(by=["node"]).to_numpy(dtype=float))
    assert np.allclose(ret_ev, ev_result["directed"])

    # katz_centrality
    ctx_katz = katz_centrality(p2p_project_directed_graph)
Exemplo n.º 7
0
def test_app_on_undirected_graph(
    p2p_project_undirected_graph,
    sssp_result,
    pagerank_result,
    bfs_result,
    wcc_result,
    lpa_result,
    triangles_result,
    kshell_result,
):
    # sssp
    ctx1 = sssp(p2p_project_undirected_graph, src=6)
    r1 = (ctx1.to_dataframe({
        "node": "v.id",
        "r": "r"
    }).sort_values(by=["node"]).to_numpy(dtype=float))
    r1[r1 == 1.7976931348623157e308] = float(
        "inf")  # replace limit<double>::max with inf
    assert np.allclose(r1, sssp_result["undirected"])
    assert np.allclose(
        ctx1.to_dataframe({
            "node": "v.id",
            "r": "r"
        },
                          vertex_range={
                              "begin": 1,
                              "end": 4
                          }).sort_values(by=["node"]).to_numpy(),
        [[1.0, 31.0], [2.0, 39.0], [3.0, 78.0]],
    )
    assert np.allclose(
        sorted(ctx1.to_numpy("r", vertex_range={
            "begin": 1,
            "end": 4
        })),
        [31.0, 39.0, 78.0],
    )

    # pagerank (only work on undirected graph)
    ctx2 = pagerank(p2p_project_undirected_graph, delta=0.85, max_round=10)
    r2 = (ctx2.to_dataframe({
        "node": "v.id",
        "r": "r"
    }).sort_values(by=["node"]).to_numpy(dtype=float))
    assert np.allclose(r2, pagerank_result["undirected"])
    ctx3 = pagerank(p2p_project_undirected_graph, 0.85, 10)
    r3 = (ctx3.to_dataframe({
        "node": "v.id",
        "r": "r"
    }).sort_values(by=["node"]).to_numpy(dtype=float))
    assert np.allclose(r3, pagerank_result["undirected"])
    # r4 = pagerank(arrow_project_graph, 10, 0.85) # check max_round=10
    # assert r4 is not None
    ctx5 = pagerank(p2p_project_undirected_graph, "0.85", "10")
    r5 = (ctx5.to_dataframe({
        "node": "v.id",
        "r": "r"
    }).sort_values(by=["node"]).to_numpy(dtype=float))
    assert np.allclose(r5, pagerank_result["undirected"])
    ctx6 = pagerank(p2p_project_undirected_graph)
    r6 = (ctx6.to_dataframe({
        "node": "v.id",
        "r": "r"
    }).sort_values(by=["node"]).to_numpy(dtype=float))
    assert np.allclose(r6, pagerank_result["undirected"])
    assert np.allclose(
        ctx6.to_dataframe({
            "node": "v.id",
            "r": "r"
        },
                          vertex_range={
                              "begin": 1,
                              "end": 4
                          }).sort_values(by=["node"]).to_numpy(),
        [
            [1.0, 6.153724343761569e-05],
            [2.0, 9.280361872165397e-05],
            [3.0, 1.643246086005906e-05],
        ],
    )
    assert np.allclose(
        sorted(ctx6.to_numpy("r", vertex_range={
            "begin": 1,
            "end": 4
        })),
        sorted([
            6.153724343761569e-05, 9.280361872165397e-05, 1.643246086005906e-05
        ]),
    )

    # bfs
    ctx7 = bfs(p2p_project_undirected_graph, src=6)
    r7 = (ctx7.to_dataframe({
        "node": "v.id",
        "r": "r"
    }).sort_values(by=["node"]).to_numpy(dtype=int))
    assert np.all(r7 == bfs_result["undirected"])
    assert np.all(
        ctx7.to_dataframe(
            {
                "node": "v.id",
                "r": "r"
            }, vertex_range={
                "begin": 1,
                "end": 4
            }).sort_values(by=["node"]).to_numpy() == [[1, 1], [2, 2], [3, 2]])
    assert np.all(
        sorted(ctx7.to_numpy("r", vertex_range={
            "begin": 1,
            "end": 4
        })) == [1, 2, 2])

    # wcc
    ctx8 = wcc(p2p_project_undirected_graph)
    r8 = (ctx8.to_dataframe({
        "node": "v.id",
        "r": "r"
    }).sort_values(by=["node"]).to_numpy(dtype=int))
    assert np.all(r8 == wcc_result)
    assert np.all(
        ctx8.to_dataframe(
            {
                "node": "v.id",
                "r": "r"
            }, vertex_range={
                "begin": 1,
                "end": 4
            }).sort_values(by=["node"]).to_numpy() == [[1, 1], [2, 1], [3, 1]])
    assert np.all(
        ctx8.to_numpy("r", vertex_range={
            "begin": 1,
            "end": 4
        }) == [1, 1, 1])

    # lpa
    ctx9 = lpa(p2p_project_undirected_graph, max_round=10)
    r9 = (ctx9.to_dataframe({
        "node": "v.id",
        "r": "r"
    }).sort_values(by=["node"]).to_numpy(dtype=int))
    assert np.all(r9 == lpa_result)
    assert np.all(
        ctx9.to_dataframe(
            {
                "node": "v.id",
                "r": "r"
            }, vertex_range={
                "begin": 1,
                "end": 4
            }).sort_values(by=["node"]).to_numpy() == [[1, 1], [2, 2], [3, 2]])
    assert np.all(
        sorted(ctx9.to_numpy("r", vertex_range={
            "begin": 1,
            "end": 4
        })) == [1, 2, 2])

    # kshell
    ctx10 = k_shell(p2p_project_undirected_graph, k=3)
    r10 = (ctx10.to_dataframe({
        "node": "v.id",
        "r": "r"
    }).sort_values(by=["node"]).to_numpy(dtype=int))
    assert np.all(r10 == kshell_result)
    assert np.all(
        ctx10.to_dataframe(
            {
                "node": "v.id",
                "r": "r"
            }, vertex_range={
                "begin": 1,
                "end": 4
            }).sort_values(by=["node"]).to_numpy() == [[1, 0], [2, 0], [3, 0]])
    assert np.all(
        ctx10.to_numpy("r", vertex_range={
            "begin": 1,
            "end": 4
        }) == [0, 0, 0])

    # triangles
    ctx_triangles = triangles(p2p_project_undirected_graph)
    ret_triangles = (ctx_triangles.to_dataframe({
        "node": "v.id",
        "r": "r"
    }).sort_values(by=["node"]).to_numpy(dtype=float))
    assert np.allclose(ret_triangles, triangles_result["undirected"])

    # louvain
    ctx10 = louvain(p2p_project_undirected_graph,
                    min_progress=50,
                    progress_tries=2)

    # simple_path
    assert is_simple_path(p2p_project_undirected_graph, [1, 10])
Exemplo n.º 8
0
def test_demo_distribute(data_dir, modern_graph_data_dir):
    gs_image, gie_manager_image = get_gs_image_on_ci_env()
    sess = graphscope.session(
        num_workers=1,
        k8s_gs_image=gs_image,
        k8s_gie_graph_manager_image=gie_manager_image,
        k8s_coordinator_cpu=0.5,
        k8s_coordinator_mem="2500Mi",
        k8s_vineyard_cpu=0.1,
        k8s_vineyard_mem="512Mi",
        k8s_engine_cpu=0.1,
        k8s_engine_mem="1500Mi",
        k8s_vineyard_shared_mem="2Gi",
        k8s_volumes=get_k8s_volumes(),
    )
    graph = load_ldbc(sess, data_dir)

    # Interactive engine
    interactive = sess.gremlin(graph)
    sub_graph = interactive.subgraph(  # noqa: F841
        'g.V().hasLabel("person").outE("knows")')
    person_count = (interactive.execute(
        'g.V().hasLabel("person").outE("knows").bothV().dedup().count()').all(
        ).result()[0])
    knows_count = (interactive.execute(
        'g.V().hasLabel("person").outE("knows").count()').all().result()[0])
    interactive2 = sess.gremlin(sub_graph)
    sub_person_count = interactive2.execute("g.V().count()").all().result()[0]
    sub_knows_count = interactive2.execute("g.E().count()").all().result()[0]
    assert person_count == sub_person_count
    assert knows_count == sub_knows_count

    # Analytical engine
    # project the projected graph to simple graph.
    simple_g = sub_graph.project_to_simple(v_label="person", e_label="knows")

    pr_result = graphscope.pagerank(simple_g, delta=0.8)
    tc_result = graphscope.triangles(simple_g)

    # add the PageRank and triangle-counting results as new columns to the property graph
    # FIXME: Add column to sub_graph
    sub_graph.add_column(pr_result, {"Ranking": "r"})
    sub_graph.add_column(tc_result, {"TC": "r"})

    # test subgraph on modern graph
    mgraph = load_modern_graph(sess, modern_graph_data_dir)

    # Interactive engine
    minteractive = sess.gremlin(mgraph)
    msub_graph = minteractive.subgraph(  # noqa: F841
        'g.V().hasLabel("person").outE("knows")')
    person_count = (minteractive.execute(
        'g.V().hasLabel("person").outE("knows").bothV().dedup().count()').all(
        ).result()[0])
    msub_interactive = sess.gremlin(msub_graph)
    sub_person_count = msub_interactive.execute(
        "g.V().count()").all().result()[0]
    assert person_count == sub_person_count

    # GNN engine
    sess.close()