Пример #1
0
def main():
    import argparse

    import katana.local
    from katana import set_active_threads

    katana.local.initialize()

    parser = argparse.ArgumentParser()
    parser.add_argument("--startNode", type=int, default=0)
    parser.add_argument("--propertyName", type=str, default="NewProperty")
    parser.add_argument("--reportNode", type=int, default=1)
    parser.add_argument("--noverify", action="store_true", default=False)
    parser.add_argument("--threads", "-t", type=int, default=1)
    parser.add_argument("input", type=str)

    args = parser.parse_args()

    print("Using threads:", set_active_threads(args.threads))

    graph = Graph(args.input)

    bfs_sync_pg(graph, args.startNode, args.propertyName)

    print("Node {}: {}".format(
        args.reportNode,
        graph.get_node_property(args.propertyName)[args.reportNode]))

    if not args.noverify:
        newPropertyID = graph.loaded_node_schema()[-1].name
        verify_bfs(graph, args.startNode, newPropertyID)
Пример #2
0
def cc_push_topo(graph: Graph, property_name):
    print("Executing Push algo\n")
    num_nodes = graph.num_nodes()

    timer = StatTimer("CC: Property Graph Numba: " + property_name)
    timer.start()
    # Stores the component id assignment
    comp_current = np.empty((num_nodes, ), dtype=np.uint32)
    comp_old = np.empty((num_nodes, ), dtype=np.uint32)

    # Initialize
    do_all(
        range(num_nodes),
        initialize_cc_push_operator(graph, comp_current, comp_old),
        steal=True,
        loop_name="initialize_cc_push",
    )

    # Execute while component ids are updated
    changed = ReduceLogicalOr()
    changed.update(True)
    while changed.reduce():
        changed.reset()
        do_all(
            range(num_nodes),
            cc_push_topo_operator(graph, changed, comp_current, comp_old),
            steal=True,
            loop_name="cc_push_topo",
        )

    timer.stop()
    # Add the component assignment as a new property to the property graph
    graph.add_node_property(pyarrow.table({property_name: comp_current}))
Пример #3
0
def main():
    import argparse

    import katana.local

    katana.local.initialize()

    parser = argparse.ArgumentParser()
    parser.add_argument("--baseNode", type=int, default=0)
    parser.add_argument("--reportNode", type=int, default=1)
    parser.add_argument("--propertyName", type=str, default="NewProperty")
    parser.add_argument("--threads", "-t", type=int, default=1)
    parser.add_argument("input", type=str)
    args = parser.parse_args()

    print("Using threads:", set_active_threads(args.threads))

    g = Graph(args.input)

    timer = StatTimer("Jaccard (Property Graph) Numba")
    timer.start()
    jaccard(g, args.baseNode, args.propertyName)
    timer.stop()
    del timer

    print("Node {}: {}".format(args.reportNode, g.get_node_property(args.propertyName)[args.reportNode]))
Пример #4
0
def bfs_sync_pg(graph: Graph, source, property_name):
    next_level_number = 0

    curr_level = InsertBag[np.uint64]()
    next_level = InsertBag[np.uint64]()

    timer = StatTimer("BFS Property Graph Numba: " + property_name)
    timer.start()
    distance = np.empty((graph.num_nodes(), ), dtype=np.uint32)
    initialize(graph, source, distance)
    next_level.push(source)
    while not next_level.empty():
        curr_level.swap(next_level)
        next_level.clear()
        next_level_number += 1
        do_all(
            curr_level,
            bfs_sync_operator_pg(graph, next_level, next_level_number,
                                 distance),
            steal=True,
            loop_name="bfs_sync_pg",
        )
    timer.stop()

    graph.add_node_property(pyarrow.table({property_name: distance}))
Пример #5
0
def test_connected_components():
    graph = Graph(get_rdg_dataset("rmat10_symmetric"))

    # Graph is already symmetric. Last bool argument (True)
    # indicates that.
    connected_components(graph, "output_sym", True)

    stats_sym = ConnectedComponentsStatistics(graph, "output_sym")

    assert stats_sym.total_components == 69
    assert stats_sym.total_non_trivial_components == 1
    assert stats_sym.largest_component_size == 956
    assert stats_sym.largest_component_ratio == approx(0.933594)

    connected_components_assert_valid(graph, "output_sym")

    # Graph is not symmetric. Last bool argument (False)
    # indicates that. Connected components routine will create
    # undirected view for computation.
    graph = Graph(get_rdg_dataset("rmat10"))

    connected_components(graph, "output", False)

    stats = ConnectedComponentsStatistics(graph, "output")

    assert stats.total_components == stats_sym.total_components
    assert stats.total_non_trivial_components == stats_sym.total_non_trivial_components
    assert stats.largest_component_size == stats_sym.largest_component_size
    assert stats.largest_component_ratio == stats_sym.largest_component_ratio
Пример #6
0
def main():
    import argparse

    import katana.local

    katana.local.initialize()

    parser = argparse.ArgumentParser()
    parser.add_argument("--startNode", type=int, default=0)
    parser.add_argument("--propertyName", type=str, default="NewProperty")
    parser.add_argument("--edgeWeightProperty", type=str, required=True)
    parser.add_argument("--shift", type=int, default=6)
    parser.add_argument("--reportNode", type=int, default=1)
    parser.add_argument("--noverify", action="store_true", default=False)
    parser.add_argument("--threads", "-t", type=int, default=1)
    parser.add_argument("input", type=str)
    args = parser.parse_args()

    print("Using threads:", set_active_threads(args.threads))

    graph = Graph(args.input)

    sssp(graph, args.startNode, args.edgeWeightProperty, args.shift,
         args.propertyName)

    print("Node {}: {}".format(
        args.reportNode,
        graph.get_node_property(args.propertyName)[args.reportNode]))

    if not args.noverify:
        numNodeProperties = len(graph.loaded_node_schema())
        newPropertyID = numNodeProperties - 1
        verify_sssp(graph, args.startNode, newPropertyID)
Пример #7
0
def kcore_async(graph: Graph, k_core_num, property_name):
    num_nodes = graph.num_nodes()
    initial_worklist = InsertBag[np.uint64]()
    current_degree = NUMAArray[np.uint64](num_nodes, AllocationPolicy.INTERLEAVED)

    timer = StatTimer("Kcore: Property Graph Numba: " + property_name)
    timer.start()

    # Initialize
    do_all(
        range(num_nodes), compute_degree_count_operator(graph, current_degree.as_numpy()), steal=True,
    )

    # Setup initial worklist
    do_all(
        range(num_nodes),
        setup_initial_worklist_operator(initial_worklist, current_degree.as_numpy(), k_core_num),
        steal=True,
    )

    # Compute k-core
    for_each(
        initial_worklist,
        compute_async_kcore_operator(graph, current_degree.as_numpy(), k_core_num),
        steal=True,
        disable_conflict_detection=True,
    )

    timer.stop()
    # Add the ranks as a new property to the property graph
    graph.add_node_property(pyarrow.table({property_name: current_degree}))
Пример #8
0
def main():
    import argparse

    import katana.local

    katana.local.initialize()

    parser = argparse.ArgumentParser()
    parser.add_argument("--propertyName", type=str, default="NewProperty")
    parser.add_argument("--noverify", action="store_true", default=False)
    parser.add_argument("--threads", "-t", type=int, default=1)
    parser.add_argument("--kcore", "-k", type=int, default=100)
    parser.add_argument("--reportNode", type=int, default=0)
    parser.add_argument("input", type=str)
    args = parser.parse_args()

    print("Using threads:", set_active_threads(args.threads))

    graph = Graph(args.input)

    kcore_async(graph, args.kcore, args.propertyName)

    print("Node {}: {}".format(args.reportNode, graph.get_node_property(args.propertyName)[args.reportNode]))

    if not args.noverify:
        verify_kcore(graph, args.propertyName, args.kcore)
Пример #9
0
def count_in_and_out_degree(graph: Graph, nout, nin, nid):
    out_degree = 0
    for edge in graph.edge_ids(nid):
        out_degree += 1
        dst = graph.get_edge_dest(edge)
        atomic_add(nin, dst, 1)
    nout[nid] = out_degree
Пример #10
0
def calculate_degree(graph: Graph,
                     in_degree_property,
                     out_degree_property,
                     weight_property=None):
    """
    Calculate the (potentially weighted) in and out degrees of a graph.
    The function will modify the given graph by adding two new node properties,
    one for the in degree and one for the out degree. Nothing is returned.
    Parameters:
        graph: a Graph
        in_degree_property: the property name for the in degree
        out_degree_property: the property name for the out degree
        weight_property: an edge property to use in calculating the weighted degree
    """
    num_nodes = graph.num_nodes()
    nout = NUMAArray[np.uint64](num_nodes, AllocationPolicy.INTERLEAVED)
    nin = NUMAArray[np.uint64](num_nodes, AllocationPolicy.INTERLEAVED)

    do_all(range(num_nodes), initialize_in_degree(nin.as_numpy()), steal=False)

    # are we calculating weighted degree?
    if not weight_property:
        count_operator = count_in_and_out_degree(graph, nout.as_numpy(),
                                                 nin.as_numpy())
    else:
        count_operator = count_weighted_in_and_out_degree(
            graph, nout.as_numpy(), nin.as_numpy(),
            graph.get_edge_property(weight_property))
    do_all(range(num_nodes), count_operator, steal=True)

    graph.add_node_property(
        pyarrow.table({
            in_degree_property: nin,
            out_degree_property: nout
        }))
Пример #11
0
def main():
    import argparse

    import katana.local

    katana.local.initialize()

    parser = argparse.ArgumentParser()
    parser.add_argument("--algoType", type=str, default="push")
    parser.add_argument("--propertyName", type=str, default="NewProperty")
    parser.add_argument("--reportNode", type=int, default=1)
    parser.add_argument("--noverify", action="store_true", default=False)
    parser.add_argument("--threads", "-t", type=int, default=1)
    parser.add_argument("input", type=str)
    args = parser.parse_args()

    print("Using threads:", set_active_threads(args.threads))

    graph = Graph(args.input)

    if args.algoType == "push":
        cc_push_topo(graph, args.propertyName)
    else:
        cc_pull_topo(graph, args.propertyName)

    print("Node {}: {}".format(
        args.reportNode,
        graph.get_node_property(args.propertyName)[args.reportNode]))

    if not args.noverify:
        numNodeProperties = len(graph.loaded_node_schema())
        newPropertyID = numNodeProperties - 1
        verify_cc(graph, newPropertyID)
Пример #12
0
def main():
    import argparse

    import katana.local

    katana.local.initialize()

    parser = argparse.ArgumentParser()
    parser.add_argument("--propertyName", type=str, default="NewProperty")
    parser.add_argument("--maxIterations", type=int, default=100)
    parser.add_argument("--tolerance", type=float, default=1.0e-3)
    parser.add_argument("--noverify", action="store_true", default=False)
    parser.add_argument("--printTopN", type=int, default=10)
    parser.add_argument("--threads", "-t", type=int, default=1)
    parser.add_argument("--reportNode", type=int, default=0)
    parser.add_argument("input", type=str)
    args = parser.parse_args()

    print("Using threads:", set_active_threads(args.threads))

    graph = Graph(args.input)

    pagerank_pull_sync_residual(graph, args.maxIterations, args.tolerance,
                                args.propertyName)

    print("Node {}: {}".format(
        args.reportNode,
        graph.get_node_property(args.propertyName)[args.reportNode]))

    if not args.noverify:
        verify_pr(graph, args.propertyName, args.printTopN)
Пример #13
0
def cc_pull_topo_operator(graph: Graph, changed, comp_current: np.ndarray, nid):
    for ii in graph.out_edge_ids_for_node(nid):
        dst = graph.out_edge_dst(ii)
        # Pull the minimum component from your neighbors
        if comp_current[nid] > comp_current[dst]:
            comp_current[nid] = comp_current[dst]
            # Indicates that update happened
            changed.update(True)
Пример #14
0
def test_sort_nodes_by_degree(graph: Graph):
    sort_nodes_by_degree(graph)
    assert len(graph.edge_ids(0)) == 103
    last_node_n_edges = 103
    for n in range(1, NODES_TO_SAMPLE):
        v = len(graph.edge_ids(n))
        assert v <= last_node_n_edges
        last_node_n_edges = v
Пример #15
0
def compute_async_kcore_operator(graph: Graph, current_degree, k_core_num, nid, ctx):
    # Decrement degree of all the neighbors of dead node
    for ii in graph.out_edge_ids_for_node(nid):
        dst = graph.out_edge_dst(ii)
        old_degree = atomic_sub(current_degree, dst, 1)
        # Add new dead nodes to the worklist
        if old_degree == k_core_num:
            ctx.push(dst)
Пример #16
0
def count_weighted_in_and_out_degree(graph: Graph, nout, nin, weight_array, nid):
    out_degree = 0
    for edge in graph.out_edge_ids(nid):
        weight = weight_array[edge]
        out_degree += weight
        dst = graph.out_edge_dst(edge)
        atomic_add(nin, dst, weight)
    nout[nid] = out_degree
Пример #17
0
def create_distance_array(g: Graph, source, length_property):
    a = np.empty(g.num_nodes(),
                 dtype=dtype_of_pyarrow_array(
                     g.get_edge_property(length_property)))
    # TODO(amp): Remove / 4
    infinity = dtype_info(a.dtype).max / 4
    a[:] = infinity
    a[source] = 0
    return a
Пример #18
0
def test_local_clustering_coefficient():
    graph = Graph(get_input("propertygraphs/rmat15_cleaned_symmetric"))

    local_clustering_coefficient(graph, "output")
    graph: Graph
    out = graph.get_node_property("output")

    assert out[-1].as_py() == 0
    assert not np.any(np.isnan(out))
Пример #19
0
def compute_pagerank_pull_residual_operator(graph: Graph, delta, residual, nid):
    total = 0
    for ii in graph.out_edge_ids_for_node(nid):
        dst = graph.out_edge_dst(ii)
        if delta[dst] > 0:
            total += delta[dst]

    if total > 0:
        residual[nid] = total
Пример #20
0
def test_load_graphml_write():
    input_file = Path(get_misc_dataset("graph-convert/movies.graphml"))
    pg = from_graphml(input_file)
    with TemporaryDirectory() as tmpdir:
        pg.write(tmpdir)
        del pg
        graph = Graph(tmpdir)
        assert graph.path == f"file://{tmpdir}"
    assert graph.get_node_property("name")[1].as_py() == "Keanu Reeves"
Пример #21
0
def cc_push_topo_operator(graph: Graph, changed, comp_current: np.ndarray, comp_old: np.ndarray, nid):
    if comp_old[nid] > comp_current[nid]:
        comp_old[nid] = comp_current[nid]
        # Indicates that update happened
        changed.update(True)
        for ii in graph.out_edge_ids_for_node(nid):
            dst = graph.out_edge_dst(ii)
            new_comp = comp_current[nid]
            # Push the minimum component to your neighbors
            atomic_min(comp_current, dst, new_comp)
Пример #22
0
def sssp_operator(g: Graph, dists: np.ndarray, edge_weights, item,
                  ctx: UserContext):
    if dists[item.src] < item.dist:
        return
    for ii in g.edge_ids(item.src):
        dst = g.get_edge_dest(ii)
        edge_length = edge_weights[ii]
        new_distance = edge_length + dists[item.src]
        old_distance = atomic_min(dists, dst, new_distance)
        if new_distance < old_distance:
            ctx.push((dst, new_distance))
Пример #23
0
def test_load_graphml_write():
    input_file = Path(
        os.environ["KATANA_SOURCE_DIR"]
    ) / "tools" / "graph-convert" / "test-inputs" / "movies.graphml"
    pg = from_graphml(input_file)
    with TemporaryDirectory() as tmpdir:
        pg.write(tmpdir)
        del pg
        graph = Graph(tmpdir)
        assert graph.path == f"file://{tmpdir}"
    assert graph.get_node_property(0)[1].as_py() == "Keanu Reeves"
Пример #24
0
def bfs_sync_operator_pg(
    graph: Graph,
    next_level: InsertBag[np.uint64],
    next_level_number: int,
    distance: np.ndarray,
    nid,
):
    for ii in graph.out_edge_ids(nid):
        dst = graph.out_edge_dst(ii)
        if distance[dst] == distance_infinity:
            distance[dst] = next_level_number
            next_level.push(dst)
Пример #25
0
def sum_degree_operator(
    graph: Graph,
    source_degree,
    sum_source: ReduceSum[np.uint64],
    destination_degree,
    sum_destination: ReduceSum[np.uint64],
    nid,
):
    for edge in graph.out_edge_ids_for_node(nid):
        sum_source.update(source_degree[nid])
        dst = graph.out_edge_dst(edge)
        sum_destination.update(destination_degree[dst])
Пример #26
0
def pagerank_pull_sync_residual(graph: Graph, maxIterations, tolerance, property_name):
    num_nodes = graph.num_nodes()

    rank = NUMAArray[float](num_nodes, AllocationPolicy.INTERLEAVED)
    nout = NUMAArray[np.uint64](num_nodes, AllocationPolicy.INTERLEAVED)
    delta = NUMAArray[float](num_nodes, AllocationPolicy.INTERLEAVED)
    residual = NUMAArray[float](num_nodes, AllocationPolicy.INTERLEAVED)

    # Initialize
    do_all(
        range(num_nodes),
        initialize_residual_operator(rank.as_numpy(), nout.as_numpy(), delta.as_numpy(), residual.as_numpy(),),
        steal=True,
        loop_name="initialize_pagerank_pull_residual",
    )

    # Compute out-degree for each node
    do_all(
        range(num_nodes), compute_out_deg_operator(graph, nout.as_numpy()), steal=True, loop_name="Compute_out_degree",
    )

    print("Out-degree of 0: ", nout[0])

    changed = ReduceOr(True)
    iterations = 0
    timer = StatTimer("Pagerank: Property Graph Numba: " + property_name)
    timer.start()
    while iterations < maxIterations and changed.reduce():
        print("Iter: ", iterations, "\n")
        changed.reset()
        iterations += 1
        do_all(
            range(num_nodes),
            compute_pagerank_pull_delta_operator(
                rank.as_numpy(), nout.as_numpy(), delta.as_numpy(), residual.as_numpy(), tolerance, changed,
            ),
            steal=True,
            loop_name="pagerank_delta",
        )

        do_all(
            range(num_nodes),
            compute_pagerank_pull_residual_operator(graph, delta.as_numpy(), residual.as_numpy()),
            steal=True,
            loop_name="pagerank",
        )

    timer.stop()
    # Add the ranks as a new property to the property graph
    graph.add_node_property(pyarrow.table({property_name: rank}))
Пример #27
0
def test_assert_valid(graph: Graph):
    property_name = "NewProp"
    start_node = 0

    with raises(AssertionError):
        bfs_assert_valid(graph, start_node, "workFrom")

    bfs(graph, start_node, property_name)

    v = graph.get_node_property(property_name).to_numpy().copy()
    v[0] = 100
    graph.add_node_property(table({"Prop2": v}))

    with raises(AssertionError):
        bfs_assert_valid(graph, start_node, "Prop2")
Пример #28
0
def verify_sssp(graph: Graph, _source_i: int, property_id: int):
    prop_array = graph.get_node_property(property_id)
    not_visited = ReduceSum[int](0)
    max_dist = ReduceMax[int]()
    # TODO(amp): Remove / 4
    infinity = dtype_info(dtype_of_pyarrow_array(prop_array)).max / 4

    do_all(
        range(len(prop_array)),
        not_visited_operator(infinity, not_visited, prop_array),
        loop_name="not_visited_op",
    )

    if not_visited.reduce() > 0:
        print(
            not_visited.reduce(),
            " unvisited nodes; this is an error if graph is strongly connected",
        )

    do_all(
        range(len(prop_array)),
        max_dist_operator(infinity, max_dist, prop_array),
        steal=True,
        loop_name="max_dist_operator",
    )

    print("Max distance:", max_dist.reduce())
Пример #29
0
def verify_bfs(graph: Graph, _source_i: int, property_id):
    chunk_array = graph.get_node_property(property_id)
    not_visited = ReduceSum[int](0)
    max_dist = ReduceMax[int]()

    do_all(
        range(len(chunk_array)),
        not_visited_operator(not_visited, chunk_array),
        loop_name="not_visited_op",
    )

    if not_visited.reduce() > 0:
        print(
            not_visited.reduce(),
            " unvisited nodes; this is an error if graph is strongly connected",
        )

    do_all(
        range(len(chunk_array)),
        max_dist_operator(max_dist, chunk_array),
        steal=True,
        loop_name="max_dist_operator",
    )

    print("BFS Max distance:", max_dist.reduce())
Пример #30
0
def initialize(graph: Graph, source: int, distance: np.ndarray):
    num_nodes = graph.num_nodes()
    for n in range(num_nodes):
        if n == source:
            distance[n] = 0
        else:
            distance[n] = distance_infinity