示例#1
0
文件: bfs.py 项目: pamvc/katana
def main():
    import argparse
    from galois.shmem import setActiveThreads

    parser = argparse.ArgumentParser()
    parser.add_argument("--startNode", type=int, default=0)
    parser.add_argument("--propertyName", type=str, default="NewProperty")
    parser.add_argument("--reportNode", type=int, default=1)
    parser.add_argument("--noverify", action="store_true", default=False)
    parser.add_argument("--cython", action="store_true", default=False)
    parser.add_argument("--threads", "-t", type=int, default=1)
    parser.add_argument("input", type=str)

    args = parser.parse_args()

    print("Using threads:", setActiveThreads(args.threads))

    graph = PropertyGraph(args.input)

    if args.cython:
        cython_bfs(graph, args.startNode, args.propertyName)
    else:
        bfs_sync_pg(graph, args.startNode, args.propertyName)

    print("Node {}: {}".format(
        args.reportNode,
        graph.get_node_property(args.propertyName)[args.reportNode]))

    if not args.noverify:
        numNodeProperties = len(graph.node_schema())
        newPropertyId = numNodeProperties - 1
        if args.cython:
            cython_verify_bfs(graph, args.startNode, newPropertyId)
        else:
            verify_bfs(graph, args.startNode, newPropertyId)
示例#2
0
def cc_push_topo(graph: PropertyGraph, property_name):
    print("Executing Push algo\n")
    num_nodes = graph.num_nodes()

    timer = StatTimer("CC: Property Graph Numba: " + property_name)
    timer.start()
    # Stores the component id assignment
    comp_current = np.empty((num_nodes,), dtype=np.uint32)
    comp_old = np.empty((num_nodes,), dtype=np.uint32)

    # Initialize
    do_all(
        range(num_nodes),
        initialize_cc_push_operator(graph, comp_current, comp_old),
        steal=True,
        loop_name="initialize_cc_push",
    )

    # Execute while component ids are updated
    changed = GReduceLogicalOr()
    changed.update(True)
    while changed.reduce():
        changed.reset()
        do_all(
            range(num_nodes),
            cc_push_topo_operator(graph, changed, comp_current, comp_old),
            steal=True,
            loop_name="cc_push_topo",
        )

    timer.stop()
    # Add the component assignment as a new property to the property graph
    graph.add_node_property(pyarrow.table({property_name: comp_current}))
示例#3
0
文件: bfs.py 项目: pamvc/katana
def bfs_sync_pg(graph: PropertyGraph, source, property_name):
    next_level_number = 0

    curr_level = InsertBag[np.uint64]()
    next_level = InsertBag[np.uint64]()

    timer = StatTimer("BFS Property Graph Numba: " + property_name)
    timer.start()
    distance = np.empty((len(graph), ), dtype=np.uint32)
    initialize(graph, source, distance)
    next_level.push(source)
    while not next_level.empty():
        curr_level.swap(next_level)
        next_level.clear()
        next_level_number += 1
        do_all(
            curr_level,
            bfs_sync_operator_pg(graph, next_level, next_level_number,
                                 distance),
            steal=True,
            loop_name="bfs_sync_pg",
        )
    timer.stop()

    graph.add_node_property(pyarrow.table({property_name: distance}))
示例#4
0
def verify_sssp(graph: PropertyGraph, _source_i: int, property_id: int):
    chunk_array = graph.get_node_property(property_id)
    not_visited = GAccumulator[int](0)
    max_dist = GReduceMax[int]()

    do_all(
        range(len(chunk_array)),
        not_visited_operator(graph.num_nodes(), not_visited, chunk_array),
        loop_name="not_visited_op",
    )

    if not_visited.reduce() > 0:
        print(
            not_visited.reduce(),
            " unvisited nodes; this is an error if graph is strongly connected",
        )

    do_all(
        range(len(chunk_array)),
        max_dist_operator(graph.num_nodes(), max_dist, chunk_array),
        steal=True,
        loop_name="max_dist_operator",
    )

    print("Max distance:", max_dist.reduce())
示例#5
0
def main():
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument("--startNode", type=int, default=0)
    parser.add_argument("--propertyName", type=str, default="NewProperty")
    parser.add_argument("--edgeWeightProperty", type=str, required=True)
    parser.add_argument("--shift", type=int, default=6)
    parser.add_argument("--reportNode", type=int, default=1)
    parser.add_argument("--noverify", action="store_true", default=False)
    parser.add_argument("--threads", "-t", type=int, default=1)
    parser.add_argument("input", type=str)
    args = parser.parse_args()

    print("Using threads:", setActiveThreads(args.threads))

    graph = PropertyGraph(args.input)

    sssp(graph, args.startNode, args.edgeWeightProperty, args.shift,
         args.propertyName)

    print("Node {}: {}".format(
        args.reportNode,
        graph.get_node_property(args.propertyName)[args.reportNode]))

    if not args.noverify:
        numNodeProperties = len(graph.node_schema())
        newPropertyId = numNodeProperties - 1
        verify_sssp(graph, args.startNode, newPropertyId)
示例#6
0
def main():
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument("--algoType", type=str, default="push")
    parser.add_argument("--propertyName", type=str, default="NewProperty")
    parser.add_argument("--reportNode", type=int, default=1)
    parser.add_argument("--noverify", action="store_true", default=False)
    parser.add_argument("--threads", "-t", type=int, default=1)
    parser.add_argument("input", type=str)
    args = parser.parse_args()

    print("Using threads:", setActiveThreads(args.threads))

    graph = PropertyGraph(args.input)

    if args.algoType == "push":
        cc_push_topo(graph, args.propertyName)
    else:
        cc_pull_topo(graph, args.propertyName)

    print("Node {}: {}".format(args.reportNode, graph.get_node_property(args.propertyName)[args.reportNode]))

    if not args.noverify:
        numNodeProperties = len(graph.node_schema())
        newPropertyId = numNodeProperties - 1
        verify_cc(graph, newPropertyId)
示例#7
0
def cc_pull_topo_operator(graph: PropertyGraph, changed, comp_current: np.ndarray, nid):
    for ii in graph.edges(nid):
        dst = graph.get_edge_dst(ii)
        # Pull the minimum component from your neighbors
        if comp_current[nid] > comp_current[dst]:
            comp_current[nid] = comp_current[dst]
            # Indicates that update happened
            changed.update(True)
示例#8
0
def compute_pagerank_pull_residual_operator(graph: PropertyGraph, delta, residual, nid):
    sum = 0
    for ii in graph.edges(nid):
        dst = graph.get_edge_dst(ii)
        if delta[dst] > 0:
            sum += delta[dst]

    if sum > 0:
        residual[nid] = sum
示例#9
0
def compute_async_kcore_operator(graph: PropertyGraph, current_degree,
                                 k_core_num, nid, ctx):
    # Decrement degree of all the neighbors of dead node
    for ii in graph.edges(nid):
        dst = graph.get_edge_dst(ii)
        old_degree = atomic_sub(current_degree, dst, 1)
        # Add new dead nodes to the worklist
        if old_degree == k_core_num:
            ctx.push(dst)
示例#10
0
def cc_push_topo_operator(graph: PropertyGraph, changed, comp_current: np.ndarray, comp_old: np.ndarray, nid):
    if comp_old[nid] > comp_current[nid]:
        comp_old[nid] = comp_current[nid]
        # Indicates that update happened
        changed.update(True)
        for ii in graph.edges(nid):
            dst = graph.get_edge_dst(ii)
            new_comp = comp_current[nid]
            # Push the minimum component to your neighbors
            atomic_min(comp_current, dst, new_comp)
示例#11
0
def bfs_sync_operator_pg(
    graph: PropertyGraph, next_level: InsertBag[np.uint64], next_level_number: int, distance: np.ndarray, nid,
):
    num_nodes = graph.num_nodes()

    for ii in graph.edges(nid):
        dst = graph.get_edge_dst(ii)
        if distance[dst] == num_nodes:
            distance[dst] = next_level_number
            next_level.push(dst)
示例#12
0
def sssp_operator(g: PropertyGraph, dists: np.ndarray, edge_weights, item,
                  ctx: UserContext):
    if dists[item.src] < item.dist:
        return
    for ii in g.edges(item.src):
        dst = g.get_edge_dst(ii)
        edge_length = edge_weights[ii]
        new_distance = edge_length + dists[item.src]
        old_distance = atomic_min(dists, dst, new_distance)
        if new_distance < old_distance:
            ctx.push((dst, new_distance))
示例#13
0
def test_assert_valid(property_graph: PropertyGraph):
    with raises(AssertionError):
        bfs_assert_valid(property_graph, "workFrom")
    property_name = "NewProp"
    start_node = 0

    bfs(property_graph, start_node, property_name)

    v = property_graph.get_node_property(property_name).to_numpy().copy()
    v[0] = 100
    property_graph.add_node_property(table({"Prop2": v}))

    with raises(AssertionError):
        bfs_assert_valid(property_graph, "Prop2")
示例#14
0
def pagerank_pull_sync_residual(graph: PropertyGraph, maxIterations, tolerance, property_name):
    num_nodes = graph.num_nodes()

    rank = LargeArray[float](num_nodes, AllocationPolicy.INTERLEAVED)
    nout = LargeArray[np.uint64](num_nodes, AllocationPolicy.INTERLEAVED)
    delta = LargeArray[float](num_nodes, AllocationPolicy.INTERLEAVED)
    residual = LargeArray[float](num_nodes, AllocationPolicy.INTERLEAVED)

    # Initialize
    do_all(
        range(num_nodes),
        initialize_residual_operator(rank.as_numpy(), nout.as_numpy(), delta.as_numpy(), residual.as_numpy(),),
        steal=True,
        loop_name="initialize_pagerank_pull_residual",
    )

    # Compute out-degree for each node
    do_all(
        range(num_nodes), compute_out_deg_operator(graph, nout.as_numpy()), steal=True, loop_name="Compute_out_degree",
    )

    print("Out-degree of 0: ", nout[0])

    changed = GReduceLogicalOr(True)
    iterations = 0
    timer = StatTimer("Pagerank: Property Graph Numba: " + property_name)
    timer.start()
    while iterations < maxIterations and changed.reduce():
        print("Iter: ", iterations, "\n")
        changed.reset()
        iterations += 1
        do_all(
            range(num_nodes),
            compute_pagerank_pull_delta_operator(
                rank.as_numpy(), nout.as_numpy(), delta.as_numpy(), residual.as_numpy(), tolerance, changed,
            ),
            steal=True,
            loop_name="pagerank_delta",
        )

        do_all(
            range(num_nodes),
            compute_pagerank_pull_residual_operator(graph, delta.as_numpy(), residual.as_numpy()),
            steal=True,
            loop_name="pagerank",
        )

    timer.stop()
    # Add the ranks as a new property to the property graph
    graph.add_node_property(pyarrow.table({property_name: rank}))
示例#15
0
def test_bfs(property_graph: PropertyGraph):
    property_name = "NewProp"
    start_node = 0

    bfs(property_graph, start_node, property_name)

    node_schema: Schema = property_graph.node_schema()
    num_node_properties = len(node_schema)
    new_property_id = num_node_properties - 1
    assert node_schema.names[new_property_id] == property_name

    assert property_graph.get_node_property(
        property_name)[start_node].as_py() == 0

    # Verify with numba implementation of verifier
    verify_bfs(property_graph, start_node, new_property_id)
示例#16
0
文件: bfs.py 项目: pamvc/katana
def initialize(graph: PropertyGraph, source: int, distance: np.ndarray):
    num_nodes = graph.num_nodes()
    for n in range(num_nodes):
        if n == source:
            distance[n] = 0
        else:
            distance[n] = distance_infinity
示例#17
0
def create_distance_array(g: PropertyGraph, source, length_property):
    inf_distance = numba.types.uint64.maxval
    a = np.empty(len(g),
                 dtype=dtype_of_pyarrow_array(
                     g.get_edge_property(length_property)))
    a[:] = inf_distance
    a[source] = 0
    return a
示例#18
0
def verify_cc(graph: PropertyGraph, property_id: int):
    chunk_array = graph.get_node_property(property_id)
    num_components = GAccumulator[int](0)

    do_all(
        range(len(chunk_array)), verify_cc_operator(num_components, chunk_array), loop_name="num_components",
    )

    print("Number of components are : ", num_components.reduce())
示例#19
0
def test_load_garbage_file():
    fi = NamedTemporaryFile(delete=False)
    try:
        with fi:
            fi.write(b"Test")
        with pytest.raises(TsubaError):
            PropertyGraph(fi.name)
    finally:
        os.unlink(fi.name)
示例#20
0
def sssp(graph: PropertyGraph, source, length_property, shift, property_name):
    dists = create_distance_array(graph, source, length_property)
    init_bag = InsertBag[UpdateRequest]()
    init_bag.push((source, 0))

    t = StatTimer("Total SSSP")
    t.start()
    for_each(
        init_bag,
        sssp_operator(graph, dists, graph.get_edge_property(length_property)),
        worklist=OrderedByIntegerMetric(obim_indexer(shift)),
        disable_conflict_detection=True,
        loop_name="SSSP",
    )
    t.stop()
    print("Elapsed time: ", t.get(), "milliseconds.")

    graph.add_node_property(pyarrow.table({property_name: dists}))
示例#21
0
def test_sssp(property_graph: PropertyGraph):
    property_name = "NewProp"
    weight_name = "workFrom"
    start_node = 0

    sssp(property_graph, start_node, weight_name, property_name)

    node_schema: Schema = property_graph.node_schema()
    num_node_properties = len(node_schema)
    new_property_id = num_node_properties - 1
    assert node_schema.names[new_property_id] == property_name

    assert property_graph.get_node_property(property_name)[start_node].as_py() == 0

    sssp_assert_valid(property_graph, start_node, weight_name, property_name)

    stats = SsspStatistics(property_graph, property_name)

    assert stats.max_distance == 4294967295.0

    # Verify with numba implementation of verifier
    verify_sssp(property_graph, start_node, new_property_id)
示例#22
0
def test_bfs(property_graph: PropertyGraph):
    property_name = "NewProp"
    start_node = 0

    bfs(property_graph, start_node, property_name)

    node_schema: Schema = property_graph.node_schema()
    num_node_properties = len(node_schema)
    new_property_id = num_node_properties - 1
    assert node_schema.names[new_property_id] == property_name

    assert property_graph.get_node_property(property_name)[start_node].as_py() == 0

    bfs_assert_valid(property_graph, property_name)

    stats = BfsStatistics(property_graph, property_name)

    assert stats.source_node == start_node
    assert stats.max_distance == 7

    # Verify with numba implementation of verifier as well
    verify_bfs(property_graph, start_node, new_property_id)
示例#23
0
def verify_kcore(graph: PropertyGraph, property_name: str, k_core_num: int):
    """Check output sanity"""
    chunk_array = graph.get_node_property(property_name)
    alive_nodes = GAccumulator[float](0)

    do_all(
        range(len(chunk_array)),
        sanity_check_operator(alive_nodes, chunk_array, k_core_num),
        steal=True,
        loop_name="sanity_check_operator",
    )

    print("Number of nodes in the", k_core_num, "-core is",
          alive_nodes.reduce())
示例#24
0
def kcore_async(graph: PropertyGraph, k_core_num, property_name):
    num_nodes = graph.num_nodes()
    initial_worklist = InsertBag[np.uint64]()
    current_degree = LargeArray[np.uint64](num_nodes,
                                           AllocationPolicy.INTERLEAVED)

    timer = StatTimer("Kcore: Property Graph Numba: " + property_name)
    timer.start()

    # Initialize
    do_all(
        range(num_nodes),
        compute_degree_count_operator(graph, current_degree.as_numpy()),
        steal=True,
    )

    # Setup initial worklist
    do_all(
        range(num_nodes),
        setup_initial_worklist_operator(initial_worklist,
                                        current_degree.as_numpy(), k_core_num),
        steal=True,
    )

    # Compute k-core
    for_each(
        initial_worklist,
        compute_async_kcore_operator(graph, current_degree.as_numpy(),
                                     k_core_num),
        steal=True,
        disable_conflict_detection=True,
    )

    timer.stop()
    # Add the ranks as a new property to the property graph
    graph.add_node_property(pyarrow.table({property_name: current_degree}))
示例#25
0
def main():
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument("--propertyName", type=str, default="NewProperty")
    parser.add_argument("--noverify", action="store_true", default=False)
    parser.add_argument("--threads", "-t", type=int, default=1)
    parser.add_argument("--kcore", "-k", type=int, default=100)
    parser.add_argument("--reportNode", type=int, default=0)
    parser.add_argument("input", type=str)
    args = parser.parse_args()

    print("Using threads:", setActiveThreads(args.threads))

    graph = PropertyGraph(args.input)

    kcore_async(graph, args.kcore, args.propertyName)

    print("Node {}: {}".format(
        args.reportNode,
        graph.get_node_property(args.propertyName)[args.reportNode]))

    if not args.noverify:
        verify_kcore(graph, args.propertyName, args.kcore)
示例#26
0
def main():
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument("--baseNode", type=int, default=0)
    parser.add_argument("--reportNode", type=int, default=1)
    parser.add_argument("--propertyName", type=str, default="NewProperty")
    parser.add_argument("--threads", "-t", type=int, default=1)
    parser.add_argument("input", type=str)
    args = parser.parse_args()

    print("Using threads:", setActiveThreads(args.threads))

    g = PropertyGraph(args.input)

    timer = StatTimer("Jaccard (Property Graph) Numba")
    timer.start()
    jaccard(g, args.baseNode, args.propertyName)
    timer.stop()
    # del timer

    print("Node {}: {}".format(
        args.reportNode,
        g.get_node_property(args.propertyName)[args.reportNode]))
示例#27
0
def main():
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument("--propertyName", type=str, default="NewProperty")
    parser.add_argument("--maxIterations", type=int, default=100)
    parser.add_argument("--tolerance", type=float, default=1.0e-3)
    parser.add_argument("--noverify", action="store_true", default=False)
    parser.add_argument("--printTopN", type=int, default=10)
    parser.add_argument("--threads", "-t", type=int, default=1)
    parser.add_argument("--reportNode", type=int, default=0)
    parser.add_argument("input", type=str)
    args = parser.parse_args()

    print("Using threads:", setActiveThreads(args.threads))

    graph = PropertyGraph(args.input)

    pagerank_pull_sync_residual(graph, args.maxIterations, args.tolerance, args.propertyName)

    print("Node {}: {}".format(args.reportNode, graph.get_node_property(args.propertyName)[args.reportNode]))

    if not args.noverify:
        verify_pr(graph, args.propertyName, args.printTopN)
示例#28
0
def test_bfs(property_graph: PropertyGraph):
    start_node = 0
    property_name = "NewProp"

    bfs_sync_pg(property_graph, start_node, property_name)

    num_node_properties = len(property_graph.node_schema())
    new_property_id = num_node_properties - 1
    verify_bfs(property_graph, start_node, new_property_id)

    stats = BfsStatistics(property_graph, property_name)

    assert stats.source_node == start_node
    assert stats.max_distance == 7

    bfs_assert_valid(property_graph, property_name)
示例#29
0
def verify_pr(graph: PropertyGraph, property_name: str, topn: int):
    """Check output sanity"""
    chunk_array = graph.get_node_property(property_name)
    sum_rank = GAccumulator[float](0)
    max_rank = GReduceMax[float]()
    min_rank = GReduceMin[float]()

    do_all(
        range(len(chunk_array)),
        sanity_check_operator(sum_rank, max_rank, min_rank, chunk_array),
        steal=True,
        loop_name="sanity_check_operator",
    )

    print("Max rank is ", max_rank.reduce())
    print("Min rank is ", min_rank.reduce())
    print("rank sum is ", sum_rank.reduce())

    # Print top N ranked nodes
    if topn > 0:
        np_array = np.array(chunk_array, dtype=np.float)
        arr = np_array.argsort()[-topn:][::-1]
        for i in arr:
            print(np_array[i], " : ", i, "\n")
示例#30
0
def compute_degree_count_operator(graph: PropertyGraph, current_degree, nid):
    """
    Operator to initialize degree fields in graph with current degree. Since symmetric,
    out edge count is equivalent to in-edge count.
    """
    current_degree[nid] = len(graph.edges(nid))