示例#1
0
def test_demo(data_dir):
    image, ci = get_gs_image_on_ci_env()
    if ci:
        sess = graphscope.session(
            show_log=True,
            num_workers=1,
            k8s_gs_image=image,
        )
    else:
        sess = graphscope.session(
            show_log=True,
            num_workers=1,
        )
    graph = load_ldbc(sess, data_dir)

    # Interactive engine
    interactive = sess.gremlin(graph)
    sub_graph = interactive.subgraph(  # noqa: F841
        'g.V().hasLabel("person").outE("knows")')

    # Analytical engine
    # project the projected graph to simple graph.
    simple_g = sub_graph.project_to_simple(v_label="person", e_label="knows")

    pr_result = graphscope.pagerank(simple_g, delta=0.8)
    tc_result = graphscope.triangles(simple_g)

    # add the PageRank and triangle-counting results as new columns to the property graph
    # FIXME: Add column to sub_graph
    sub_graph.add_column(pr_result, {"Ranking": "r"})
    sub_graph.add_column(tc_result, {"TC": "r"})

    # GNN engine
    sess.close()
示例#2
0
def test_demo_distribute(data_dir, modern_graph_data_dir):
    image, ci = get_gs_image_on_ci_env()
    if ci:
        sess = graphscope.session(
            show_log=True,
            num_workers=2,
            k8s_gs_image=image,
        )
    else:
        sess = graphscope.session(
            show_log=True,
            num_workers=2,
        )

    graph = load_ldbc(sess, data_dir)

    # Interactive engine
    interactive = sess.gremlin(graph)
    sub_graph = interactive.subgraph(  # noqa: F841
        'g.V().hasLabel("person").outE("knows")')
    person_count = (interactive.execute(
        'g.V().hasLabel("person").outE("knows").bothV().dedup().count()').all(
        ).result()[0])
    knows_count = (interactive.execute(
        'g.V().hasLabel("person").outE("knows").count()').all().result()[0])
    interactive2 = sess.gremlin(sub_graph)
    sub_person_count = interactive2.execute("g.V().count()").all().result()[0]
    sub_knows_count = interactive2.execute("g.E().count()").all().result()[0]
    assert person_count == sub_person_count
    assert knows_count == sub_knows_count

    # Analytical engine
    # project the projected graph to simple graph.
    simple_g = sub_graph.project_to_simple(v_label="person", e_label="knows")

    pr_result = graphscope.pagerank(simple_g, delta=0.8)
    tc_result = graphscope.triangles(simple_g)

    # add the PageRank and triangle-counting results as new columns to the property graph
    # FIXME: Add column to sub_graph
    sub_graph.add_column(pr_result, {"Ranking": "r"})
    sub_graph.add_column(tc_result, {"TC": "r"})

    # test subgraph on modern graph
    mgraph = load_modern_graph(sess, modern_graph_data_dir)

    # Interactive engine
    minteractive = sess.gremlin(mgraph)
    msub_graph = minteractive.subgraph(  # noqa: F841
        'g.V().hasLabel("person").outE("knows")')
    person_count = (minteractive.execute(
        'g.V().hasLabel("person").outE("knows").bothV().dedup().count()').all(
        ).result()[0])
    msub_interactive = sess.gremlin(msub_graph)
    sub_person_count = msub_interactive.execute(
        "g.V().count()").all().result()[0]
    assert person_count == sub_person_count

    # GNN engine
    sess.close()
示例#3
0
def test_demo(data_dir):
    gs_image, gie_manager_image = get_gs_image_on_ci_env()
    sess = graphscope.session(
        num_workers=1,
        k8s_gs_image=gs_image,
        k8s_gie_graph_manager_image=gie_manager_image,
        k8s_coordinator_cpu=0.5,
        k8s_coordinator_mem="2500Mi",
        k8s_vineyard_cpu=0.1,
        k8s_vineyard_mem="512Mi",
        k8s_engine_cpu=0.1,
        k8s_engine_mem="1500Mi",
        k8s_vineyard_shared_mem="2Gi",
    )
    graph = load_ldbc(sess, data_dir)

    # Interactive engine
    interactive = sess.gremlin(graph)
    sub_graph = interactive.subgraph(  # noqa: F841
        'g.V().hasLabel("person").outE("knows")')

    # Analytical engine
    # project the projected graph to simple graph.
    simple_g = sub_graph.project_to_simple(v_label="person", e_label="knows")

    pr_result = graphscope.pagerank(simple_g, delta=0.8)
    tc_result = graphscope.triangles(simple_g)

    # add the PageRank and triangle-counting results as new columns to the property graph
    # FIXME: Add column to sub_graph
    sub_graph.add_column(pr_result, {"Ranking": "r"})
    sub_graph.add_column(tc_result, {"TC": "r"})

    # GNN engine
    sess.close()
示例#4
0
def test_demo_with_default_session(ogbn_small_script):
    graph = load_ogbn_mag()

    # Interactive engine
    interactive = graphscope.gremlin(graph)
    papers = interactive.execute(ogbn_small_script).one()

    sub_graph = interactive.subgraph(
        "g.timeout(1000000).V().has('year', inside(2014, 2020)).outE('cites')")

    simple_g = sub_graph.project(vertices={"paper": []}, edges={"cites": []})

    ret1 = graphscope.k_core(simple_g, k=5)
    ret2 = graphscope.triangles(simple_g)

    sub_graph = sub_graph.add_column(ret1, {"kcore": "r"})
    sub_graph = sub_graph.add_column(ret2, {"tc": "r"})

    # GLE on ogbn_mag_small graph
    paper_features = []
    for i in range(128):
        paper_features.append("feat_" + str(i))
    paper_features.append("kcore")
    paper_features.append("tc")
    lg = graphscope.graphlearn(
        sub_graph,
        nodes=[("paper", paper_features)],
        edges=[("paper", "cites", "paper")],
        gen_labels=[
            ("train", "paper", 100, (0, 75)),
            ("val", "paper", 100, (75, 85)),
            ("test", "paper", 100, (85, 100)),
        ],
    )

    # hyperparameters config.
    config = {
        "class_num": 349,  # output dimension
        "features_num": 130,  # 128 dimension + kcore + triangle count
        "batch_size": 500,
        "val_batch_size": 100,
        "test_batch_size": 100,
        "categorical_attrs_desc": "",
        "hidden_dim": 256,
        "in_drop_rate": 0.5,
        "hops_num": 2,
        "neighs_num": [5, 10],
        "full_graph_mode": False,
        "agg_type": "gcn",  # mean, sum
        "learning_algo": "adam",
        "learning_rate": 0.01,
        "weight_decay": 0.0005,
        "epoch": 2,
        "node_type": "paper",
        "edge_type": "cites",
    }

    train(config, lg)
示例#5
0
def test_demo_distribute(gs_session_distributed, data_dir,
                         modern_graph_data_dir):
    graph = load_ldbc(gs_session_distributed, data_dir)

    # Interactive engine
    interactive = gs_session_distributed.gremlin(graph)
    sub_graph = interactive.subgraph(  # noqa: F841
        'g.V().hasLabel("person").outE("knows")')
    person_count = (interactive.execute(
        'g.V().hasLabel("person").outE("knows").bothV().dedup().count()').all(
        ).result()[0])
    knows_count = (interactive.execute(
        'g.V().hasLabel("person").outE("knows").count()').all().result()[0])
    interactive2 = gs_session_distributed.gremlin(sub_graph)
    sub_person_count = interactive2.execute("g.V().count()").all().result()[0]
    sub_knows_count = interactive2.execute("g.E().count()").all().result()[0]
    assert person_count == sub_person_count
    assert knows_count == sub_knows_count

    # Analytical engine
    # project the projected graph to simple graph.
    simple_g = sub_graph.project_to_simple(v_label="person", e_label="knows")

    pr_result = graphscope.pagerank(simple_g, delta=0.8)
    tc_result = graphscope.triangles(simple_g)

    # add the PageRank and triangle-counting results as new columns to the property graph
    # FIXME: Add column to sub_graph
    sub_graph.add_column(pr_result, {"Ranking": "r"})
    sub_graph.add_column(tc_result, {"TC": "r"})

    # test subgraph on modern graph
    mgraph = load_modern_graph(gs_session_distributed, modern_graph_data_dir)

    # Interactive engine
    minteractive = gs_session_distributed.gremlin(mgraph)
    msub_graph = minteractive.subgraph(  # noqa: F841
        'g.V().hasLabel("person").outE("knows")')
    person_count = (minteractive.execute(
        'g.V().hasLabel("person").outE("knows").bothV().dedup().count()').all(
        ).result()[0])
    msub_interactive = gs_session_distributed.gremlin(msub_graph)
    sub_person_count = msub_interactive.execute(
        "g.V().count()").all().result()[0]
    assert person_count == sub_person_count
示例#6
0
def test_demo(gs_session, data_dir):
    graph = load_ldbc(gs_session, data_dir)

    # Interactive engine
    interactive = gs_session.gremlin(graph)
    sub_graph = interactive.subgraph(  # noqa: F841
        'g.V().hasLabel("person").outE("knows")')

    # Analytical engine
    # project the projected graph to simple graph.
    simple_g = sub_graph.project_to_simple(v_label="person", e_label="knows")

    pr_result = graphscope.pagerank(simple_g, delta=0.8)
    tc_result = graphscope.triangles(simple_g)

    # add the PageRank and triangle-counting results as new columns to the property graph
    # FIXME: Add column to sub_graph
    sub_graph.add_column(pr_result, {"Ranking": "r"})
    sub_graph.add_column(tc_result, {"TC": "r"})
示例#7
0
def test_app_on_undirected_graph(
    p2p_project_undirected_graph,
    sssp_result,
    pagerank_result,
    bfs_result,
    wcc_result,
    lpa_result,
    triangles_result,
    kshell_result,
):
    # sssp
    ctx1 = sssp(p2p_project_undirected_graph, src=6)
    r1 = (ctx1.to_dataframe({
        "node": "v.id",
        "r": "r"
    }).sort_values(by=["node"]).to_numpy(dtype=float))
    r1[r1 == 1.7976931348623157e308] = float(
        "inf")  # replace limit<double>::max with inf
    assert np.allclose(r1, sssp_result["undirected"])
    assert np.allclose(
        ctx1.to_dataframe({
            "node": "v.id",
            "r": "r"
        },
                          vertex_range={
                              "begin": 1,
                              "end": 4
                          }).sort_values(by=["node"]).to_numpy(),
        [[1.0, 31.0], [2.0, 39.0], [3.0, 78.0]],
    )
    assert np.allclose(
        sorted(ctx1.to_numpy("r", vertex_range={
            "begin": 1,
            "end": 4
        })),
        [31.0, 39.0, 78.0],
    )

    # pagerank (only work on undirected graph)
    ctx2 = pagerank(p2p_project_undirected_graph, delta=0.85, max_round=10)
    r2 = (ctx2.to_dataframe({
        "node": "v.id",
        "r": "r"
    }).sort_values(by=["node"]).to_numpy(dtype=float))
    assert np.allclose(r2, pagerank_result["undirected"])
    ctx3 = pagerank(p2p_project_undirected_graph, 0.85, 10)
    r3 = (ctx3.to_dataframe({
        "node": "v.id",
        "r": "r"
    }).sort_values(by=["node"]).to_numpy(dtype=float))
    assert np.allclose(r3, pagerank_result["undirected"])
    # r4 = pagerank(arrow_project_graph, 10, 0.85) # check max_round=10
    # assert r4 is not None
    ctx5 = pagerank(p2p_project_undirected_graph, "0.85", "10")
    r5 = (ctx5.to_dataframe({
        "node": "v.id",
        "r": "r"
    }).sort_values(by=["node"]).to_numpy(dtype=float))
    assert np.allclose(r5, pagerank_result["undirected"])
    ctx6 = pagerank(p2p_project_undirected_graph)
    r6 = (ctx6.to_dataframe({
        "node": "v.id",
        "r": "r"
    }).sort_values(by=["node"]).to_numpy(dtype=float))
    assert np.allclose(r6, pagerank_result["undirected"])
    assert np.allclose(
        ctx6.to_dataframe({
            "node": "v.id",
            "r": "r"
        },
                          vertex_range={
                              "begin": 1,
                              "end": 4
                          }).sort_values(by=["node"]).to_numpy(),
        [
            [1.0, 6.153724343761569e-05],
            [2.0, 9.280361872165397e-05],
            [3.0, 1.643246086005906e-05],
        ],
    )
    assert np.allclose(
        sorted(ctx6.to_numpy("r", vertex_range={
            "begin": 1,
            "end": 4
        })),
        sorted([
            6.153724343761569e-05, 9.280361872165397e-05, 1.643246086005906e-05
        ]),
    )

    # bfs
    ctx7 = bfs(p2p_project_undirected_graph, src=6)
    r7 = (ctx7.to_dataframe({
        "node": "v.id",
        "r": "r"
    }).sort_values(by=["node"]).to_numpy(dtype=int))
    assert np.all(r7 == bfs_result["undirected"])
    assert np.all(
        ctx7.to_dataframe(
            {
                "node": "v.id",
                "r": "r"
            }, vertex_range={
                "begin": 1,
                "end": 4
            }).sort_values(by=["node"]).to_numpy() == [[1, 1], [2, 2], [3, 2]])
    assert np.all(
        sorted(ctx7.to_numpy("r", vertex_range={
            "begin": 1,
            "end": 4
        })) == [1, 2, 2])

    # wcc
    ctx8 = wcc(p2p_project_undirected_graph)
    r8 = (ctx8.to_dataframe({
        "node": "v.id",
        "r": "r"
    }).sort_values(by=["node"]).to_numpy(dtype=int))
    assert np.all(r8 == wcc_result)
    assert np.all(
        ctx8.to_dataframe(
            {
                "node": "v.id",
                "r": "r"
            }, vertex_range={
                "begin": 1,
                "end": 4
            }).sort_values(by=["node"]).to_numpy() == [[1, 1], [2, 1], [3, 1]])
    assert np.all(
        ctx8.to_numpy("r", vertex_range={
            "begin": 1,
            "end": 4
        }) == [1, 1, 1])

    # lpa
    ctx9 = lpa(p2p_project_undirected_graph, max_round=10)
    r9 = (ctx9.to_dataframe({
        "node": "v.id",
        "r": "r"
    }).sort_values(by=["node"]).to_numpy(dtype=int))
    assert np.all(r9 == lpa_result)
    assert np.all(
        ctx9.to_dataframe(
            {
                "node": "v.id",
                "r": "r"
            }, vertex_range={
                "begin": 1,
                "end": 4
            }).sort_values(by=["node"]).to_numpy() == [[1, 1], [2, 2], [3, 2]])
    assert np.all(
        sorted(ctx9.to_numpy("r", vertex_range={
            "begin": 1,
            "end": 4
        })) == [1, 2, 2])

    # kshell
    ctx10 = k_shell(p2p_project_undirected_graph, k=3)
    r10 = (ctx10.to_dataframe({
        "node": "v.id",
        "r": "r"
    }).sort_values(by=["node"]).to_numpy(dtype=int))
    assert np.all(r10 == kshell_result)
    assert np.all(
        ctx10.to_dataframe(
            {
                "node": "v.id",
                "r": "r"
            }, vertex_range={
                "begin": 1,
                "end": 4
            }).sort_values(by=["node"]).to_numpy() == [[1, 0], [2, 0], [3, 0]])
    assert np.all(
        ctx10.to_numpy("r", vertex_range={
            "begin": 1,
            "end": 4
        }) == [0, 0, 0])

    # triangles
    ctx_triangles = triangles(p2p_project_undirected_graph)
    ret_triangles = (ctx_triangles.to_dataframe({
        "node": "v.id",
        "r": "r"
    }).sort_values(by=["node"]).to_numpy(dtype=float))
    assert np.allclose(ret_triangles, triangles_result["undirected"])

    # louvain
    ctx10 = louvain(p2p_project_undirected_graph,
                    min_progress=50,
                    progress_tries=2)

    # simple_path
    assert is_simple_path(p2p_project_undirected_graph, [1, 10])
示例#8
0
def test_demo_distribute(data_dir, modern_graph_data_dir):
    gs_image, gie_manager_image = get_gs_image_on_ci_env()
    sess = graphscope.session(
        num_workers=1,
        k8s_gs_image=gs_image,
        k8s_gie_graph_manager_image=gie_manager_image,
        k8s_coordinator_cpu=0.5,
        k8s_coordinator_mem="2500Mi",
        k8s_vineyard_cpu=0.1,
        k8s_vineyard_mem="512Mi",
        k8s_engine_cpu=0.1,
        k8s_engine_mem="1500Mi",
        k8s_etcd_cpu=2,
        k8s_vineyard_shared_mem="2Gi",
        k8s_volumes=get_k8s_volumes(),
    )
    graph = load_ldbc(sess, data_dir)

    # Interactive engine
    interactive = sess.gremlin(graph)
    sub_graph = interactive.subgraph(  # noqa: F841
        'g.V().hasLabel("person").outE("knows")'
    )
    person_count = (
        interactive.execute(
            'g.V().hasLabel("person").outE("knows").bothV().dedup().count()'
        )
        .all()
        .result()[0]
    )
    knows_count = (
        interactive.execute('g.V().hasLabel("person").outE("knows").count()')
        .all()
        .result()[0]
    )
    interactive2 = sess.gremlin(sub_graph)
    sub_person_count = interactive2.execute("g.V().count()").all().result()[0]
    sub_knows_count = interactive2.execute("g.E().count()").all().result()[0]
    assert person_count == sub_person_count
    assert knows_count == sub_knows_count

    # Analytical engine
    # project the projected graph to simple graph.
    simple_g = sub_graph.project_to_simple(v_label="person", e_label="knows")

    pr_result = graphscope.pagerank(simple_g, delta=0.8)
    tc_result = graphscope.triangles(simple_g)

    # add the PageRank and triangle-counting results as new columns to the property graph
    # FIXME: Add column to sub_graph
    sub_graph.add_column(pr_result, {"Ranking": "r"})
    sub_graph.add_column(tc_result, {"TC": "r"})

    # test subgraph on modern graph
    mgraph = load_modern_graph(sess, modern_graph_data_dir)

    # Interactive engine
    minteractive = sess.gremlin(mgraph)
    msub_graph = minteractive.subgraph(  # noqa: F841
        'g.V().hasLabel("person").outE("knows")'
    )
    person_count = (
        minteractive.execute(
            'g.V().hasLabel("person").outE("knows").bothV().dedup().count()'
        )
        .all()
        .result()[0]
    )
    msub_interactive = sess.gremlin(msub_graph)
    sub_person_count = msub_interactive.execute("g.V().count()").all().result()[0]
    assert person_count == sub_person_count

    # GNN engine
    sess.close()
示例#9
0
def triangles(G, nodes=None):
    # FIXME: nodes not support.
    pg = G.project_to_simple()
    ctx = graphscope.triangles(pg)
    return ctx.to_dataframe({"node": "v.id", "result": "r"})
示例#10
0
 def _triangles(G):
     return graphscope.triangles(G)