示例#1
0
def degree_assortativity_coefficient(
    graph: Graph,
    source_degree_type: DegreeType = DegreeType.OUT,
    destination_degree_type: DegreeType = DegreeType.IN,
    weight=None,
):
    """
    Calculates and returns the degree assortativity of a given graph.
    Paramaters:
       * graph: the Graph to be analyzed
       * source_degree_type: description of degree type to consider for the source node on an edge
            expected values are DegreeType.IN or DegreeType.OUT
       * destination_degree_type: description the degree type to consider for the destination node on an edge
            expected values are DegreeType.IN or DegreeType.OUT
       * weight (optional): edge property to use if using weighted degrees
    """
    # get the tables associated with the degree types of the source and destination nodes
    calculate_degree(graph, "temp_DegreeType.IN", "temp_DegreeType.OUT",
                     weight)
    source_degree = graph.get_node_property("temp_" + str(source_degree_type))
    destination_degree = graph.get_node_property("temp_" +
                                                 str(destination_degree_type))

    try:
        # Calculate the average in and out degrees of graph
        # (with respect to number of edges, not number of nodes)
        num_edges = graph.num_edges()
        source_average, destination_average = average_degree(
            graph, num_edges, source_degree, destination_degree)

        # Calculate the numerator (product of deviation from mean)
        # and the factors of the denominator (square deviation from mean)
        product_of_dev = ReduceSum[float](0)
        square_of_source_dev = ReduceSum[float](0)
        square_of_destination_dev = ReduceSum[float](0)
        do_all(
            range(graph.num_nodes()),
            degree_assortativity_coefficient_operator(
                graph,
                source_degree,
                source_average,
                destination_degree,
                destination_average,
                product_of_dev,
                square_of_source_dev,
                square_of_destination_dev,
            ),
            steal=True,
            loop_name="degree assortativity coefficient calculation",
        )
        return product_of_dev.reduce() / sqrt(
            square_of_source_dev.reduce() * square_of_destination_dev.reduce())
    finally:
        graph.remove_node_property("temp_DegreeType.IN")
        graph.remove_node_property("temp_DegreeType.OUT")
示例#2
0
文件: jaccard.py 项目: witchel/katana
def main():
    import argparse

    import katana.local

    katana.local.initialize()

    parser = argparse.ArgumentParser()
    parser.add_argument("--baseNode", type=int, default=0)
    parser.add_argument("--reportNode", type=int, default=1)
    parser.add_argument("--propertyName", type=str, default="NewProperty")
    parser.add_argument("--threads", "-t", type=int, default=1)
    parser.add_argument("input", type=str)
    args = parser.parse_args()

    print("Using threads:", set_active_threads(args.threads))

    g = Graph(args.input)

    timer = StatTimer("Jaccard (Property Graph) Numba")
    timer.start()
    jaccard(g, args.baseNode, args.propertyName)
    timer.stop()
    del timer

    print("Node {}: {}".format(args.reportNode, g.get_node_property(args.propertyName)[args.reportNode]))
示例#3
0
文件: bfs.py 项目: aneeshdurg/katana
def main():
    import argparse

    import katana.local
    from katana import set_active_threads

    katana.local.initialize()

    parser = argparse.ArgumentParser()
    parser.add_argument("--startNode", type=int, default=0)
    parser.add_argument("--propertyName", type=str, default="NewProperty")
    parser.add_argument("--reportNode", type=int, default=1)
    parser.add_argument("--noverify", action="store_true", default=False)
    parser.add_argument("--threads", "-t", type=int, default=1)
    parser.add_argument("input", type=str)

    args = parser.parse_args()

    print("Using threads:", set_active_threads(args.threads))

    graph = Graph(args.input)

    bfs_sync_pg(graph, args.startNode, args.propertyName)

    print("Node {}: {}".format(
        args.reportNode,
        graph.get_node_property(args.propertyName)[args.reportNode]))

    if not args.noverify:
        newPropertyID = graph.loaded_node_schema()[-1].name
        verify_bfs(graph, args.startNode, newPropertyID)
示例#4
0
文件: bfs.py 项目: aneeshdurg/katana
def verify_bfs(graph: Graph, _source_i: int, property_id):
    chunk_array = graph.get_node_property(property_id)
    not_visited = ReduceSum[int](0)
    max_dist = ReduceMax[int]()

    do_all(
        range(len(chunk_array)),
        not_visited_operator(not_visited, chunk_array),
        loop_name="not_visited_op",
    )

    if not_visited.reduce() > 0:
        print(
            not_visited.reduce(),
            " unvisited nodes; this is an error if graph is strongly connected",
        )

    do_all(
        range(len(chunk_array)),
        max_dist_operator(max_dist, chunk_array),
        steal=True,
        loop_name="max_dist_operator",
    )

    print("BFS Max distance:", max_dist.reduce())
示例#5
0
文件: sssp.py 项目: bowu/katana
def verify_sssp(graph: Graph, _source_i: int, property_id: int):
    prop_array = graph.get_node_property(property_id)
    not_visited = ReduceSum[int](0)
    max_dist = ReduceMax[int]()
    # TODO(amp): Remove / 4
    infinity = dtype_info(dtype_of_pyarrow_array(prop_array)).max / 4

    do_all(
        range(len(prop_array)),
        not_visited_operator(infinity, not_visited, prop_array),
        loop_name="not_visited_op",
    )

    if not_visited.reduce() > 0:
        print(
            not_visited.reduce(),
            " unvisited nodes; this is an error if graph is strongly connected",
        )

    do_all(
        range(len(prop_array)),
        max_dist_operator(infinity, max_dist, prop_array),
        steal=True,
        loop_name="max_dist_operator",
    )

    print("Max distance:", max_dist.reduce())
示例#6
0
文件: sssp.py 项目: bowu/katana
def main():
    import argparse

    import katana.local

    katana.local.initialize()

    parser = argparse.ArgumentParser()
    parser.add_argument("--startNode", type=int, default=0)
    parser.add_argument("--propertyName", type=str, default="NewProperty")
    parser.add_argument("--edgeWeightProperty", type=str, required=True)
    parser.add_argument("--shift", type=int, default=6)
    parser.add_argument("--reportNode", type=int, default=1)
    parser.add_argument("--noverify", action="store_true", default=False)
    parser.add_argument("--threads", "-t", type=int, default=1)
    parser.add_argument("input", type=str)
    args = parser.parse_args()

    print("Using threads:", set_active_threads(args.threads))

    graph = Graph(args.input)

    sssp(graph, args.startNode, args.edgeWeightProperty, args.shift,
         args.propertyName)

    print("Node {}: {}".format(
        args.reportNode,
        graph.get_node_property(args.propertyName)[args.reportNode]))

    if not args.noverify:
        numNodeProperties = len(graph.loaded_node_schema())
        newPropertyID = numNodeProperties - 1
        verify_sssp(graph, args.startNode, newPropertyID)
示例#7
0
def main():
    import argparse

    import katana.local

    katana.local.initialize()

    parser = argparse.ArgumentParser()
    parser.add_argument("--propertyName", type=str, default="NewProperty")
    parser.add_argument("--noverify", action="store_true", default=False)
    parser.add_argument("--threads", "-t", type=int, default=1)
    parser.add_argument("--kcore", "-k", type=int, default=100)
    parser.add_argument("--reportNode", type=int, default=0)
    parser.add_argument("input", type=str)
    args = parser.parse_args()

    print("Using threads:", set_active_threads(args.threads))

    graph = Graph(args.input)

    kcore_async(graph, args.kcore, args.propertyName)

    print("Node {}: {}".format(args.reportNode, graph.get_node_property(args.propertyName)[args.reportNode]))

    if not args.noverify:
        verify_kcore(graph, args.propertyName, args.kcore)
示例#8
0
def main():
    import argparse

    import katana.local

    katana.local.initialize()

    parser = argparse.ArgumentParser()
    parser.add_argument("--algoType", type=str, default="push")
    parser.add_argument("--propertyName", type=str, default="NewProperty")
    parser.add_argument("--reportNode", type=int, default=1)
    parser.add_argument("--noverify", action="store_true", default=False)
    parser.add_argument("--threads", "-t", type=int, default=1)
    parser.add_argument("input", type=str)
    args = parser.parse_args()

    print("Using threads:", set_active_threads(args.threads))

    graph = Graph(args.input)

    if args.algoType == "push":
        cc_push_topo(graph, args.propertyName)
    else:
        cc_pull_topo(graph, args.propertyName)

    print("Node {}: {}".format(
        args.reportNode,
        graph.get_node_property(args.propertyName)[args.reportNode]))

    if not args.noverify:
        numNodeProperties = len(graph.loaded_node_schema())
        newPropertyID = numNodeProperties - 1
        verify_cc(graph, newPropertyID)
示例#9
0
def main():
    import argparse

    import katana.local

    katana.local.initialize()

    parser = argparse.ArgumentParser()
    parser.add_argument("--propertyName", type=str, default="NewProperty")
    parser.add_argument("--maxIterations", type=int, default=100)
    parser.add_argument("--tolerance", type=float, default=1.0e-3)
    parser.add_argument("--noverify", action="store_true", default=False)
    parser.add_argument("--printTopN", type=int, default=10)
    parser.add_argument("--threads", "-t", type=int, default=1)
    parser.add_argument("--reportNode", type=int, default=0)
    parser.add_argument("input", type=str)
    args = parser.parse_args()

    print("Using threads:", set_active_threads(args.threads))

    graph = Graph(args.input)

    pagerank_pull_sync_residual(graph, args.maxIterations, args.tolerance,
                                args.propertyName)

    print("Node {}: {}".format(
        args.reportNode,
        graph.get_node_property(args.propertyName)[args.reportNode]))

    if not args.noverify:
        verify_pr(graph, args.propertyName, args.printTopN)
示例#10
0
def test_local_clustering_coefficient():
    graph = Graph(get_input("propertygraphs/rmat15_cleaned_symmetric"))

    local_clustering_coefficient(graph, "output")
    graph: Graph
    out = graph.get_node_property("output")

    assert out[-1].as_py() == 0
    assert not np.any(np.isnan(out))
示例#11
0
def verify_cc(graph: Graph, property_id: int):
    chunk_array = graph.get_node_property(property_id)
    num_components = ReduceSum[int](0)

    do_all(
        range(len(chunk_array)), verify_cc_operator(num_components, chunk_array), loop_name="num_components",
    )

    print("Number of components are : ", num_components.reduce())
示例#12
0
def test_load_graphml_write():
    input_file = Path(get_misc_dataset("graph-convert/movies.graphml"))
    pg = from_graphml(input_file)
    with TemporaryDirectory() as tmpdir:
        pg.write(tmpdir)
        del pg
        graph = Graph(tmpdir)
        assert graph.path == f"file://{tmpdir}"
    assert graph.get_node_property("name")[1].as_py() == "Keanu Reeves"
示例#13
0
def test_load_graphml_write():
    input_file = Path(
        os.environ["KATANA_SOURCE_DIR"]
    ) / "tools" / "graph-convert" / "test-inputs" / "movies.graphml"
    pg = from_graphml(input_file)
    with TemporaryDirectory() as tmpdir:
        pg.write(tmpdir)
        del pg
        graph = Graph(tmpdir)
        assert graph.path == f"file://{tmpdir}"
    assert graph.get_node_property(0)[1].as_py() == "Keanu Reeves"
示例#14
0
def verify_kcore(graph: Graph, property_name: str, k_core_num: int):
    """Check output sanity"""
    chunk_array = graph.get_node_property(property_name)
    alive_nodes = ReduceSum[float](0)

    do_all(
        range(len(chunk_array)),
        sanity_check_operator(alive_nodes, chunk_array, k_core_num),
        steal=True,
        loop_name="sanity_check_operator",
    )

    print("Number of nodes in the", k_core_num, "-core is", alive_nodes.reduce())
示例#15
0
def test_jaccard_sorted(graph: Graph):
    sort_all_edges_by_dest(graph)

    property_name = "NewProp"
    compare_node = 0

    jaccard(graph, compare_node, property_name, JaccardPlan.sorted())

    jaccard_assert_valid(graph, compare_node, property_name)

    similarities: np.ndarray = graph.get_node_property(
        property_name).to_numpy()
    assert similarities[compare_node] == 1
    assert similarities[1917] == approx(0.0)
    assert similarities[2812] == approx(0.0)
示例#16
0
def test_assert_valid(graph: Graph):
    property_name = "NewProp"
    start_node = 0

    with raises(AssertionError):
        bfs_assert_valid(graph, start_node, "workFrom")

    bfs(graph, start_node, property_name)

    v = graph.get_node_property(property_name).to_numpy().copy()
    v[0] = 100
    graph.add_node_property(table({"Prop2": v}))

    with raises(AssertionError):
        bfs_assert_valid(graph, start_node, "Prop2")
示例#17
0
def test_busy_wait(graph: Graph):
    set_busy_wait()
    property_name = "NewProp"
    start_node = 0

    bfs(graph, start_node, property_name)

    node_schema: Schema = graph.loaded_node_schema()
    num_node_properties = len(node_schema)
    new_property_id = num_node_properties - 1
    assert node_schema.names[new_property_id] == property_name

    assert graph.get_node_property(property_name)[start_node].as_py() == 0

    bfs_assert_valid(graph, start_node, property_name)

    BfsStatistics(graph, property_name)

    # Verify with numba implementation of verifier as well
    verify_bfs(graph, start_node, new_property_id)
    set_busy_wait(0)
示例#18
0
def test_sssp(graph: Graph):
    property_name = "NewProp"
    weight_name = "workFrom"
    start_node = 0

    sssp(graph, start_node, weight_name, property_name)

    node_schema: Schema = graph.loaded_node_schema()
    num_node_properties = len(node_schema)
    new_property_id = num_node_properties - 1
    assert node_schema.names[new_property_id] == property_name

    assert graph.get_node_property(property_name)[start_node].as_py() == 0

    sssp_assert_valid(graph, start_node, weight_name, property_name)

    stats = SsspStatistics(graph, property_name)

    print(stats)
    assert stats.max_distance == 0.0

    # Verify with numba implementation of verifier
    verify_sssp(graph, start_node, new_property_id)
示例#19
0
def verify_pr(graph: Graph, property_name: str, topn: int):
    """Check output sanity"""
    chunk_array = graph.get_node_property(property_name)
    sum_rank = ReduceSum[float](0)
    max_rank = ReduceMax[float]()
    min_rank = ReduceMin[float]()

    do_all(
        range(len(chunk_array)),
        sanity_check_operator(sum_rank, max_rank, min_rank, chunk_array),
        steal=True,
        loop_name="sanity_check_operator",
    )

    print("Max rank is ", max_rank.reduce())
    print("Min rank is ", min_rank.reduce())
    print("rank sum is ", sum_rank.reduce())

    # Print top N ranked nodes
    if topn > 0:
        np_array = np.array(chunk_array, dtype=np.float)
        arr = np_array.argsort()[-topn:][::-1]
        for i in arr:
            print(np_array[i], " : ", i, "\n")
示例#20
0
def test_jaccard(graph: Graph):
    property_name = "NewProp"
    compare_node = 0

    jaccard(graph, compare_node, property_name)

    node_schema: Schema = graph.loaded_node_schema()
    num_node_properties = len(node_schema)
    new_property_id = num_node_properties - 1
    assert node_schema.names[new_property_id] == property_name

    jaccard_assert_valid(graph, compare_node, property_name)

    stats = JaccardStatistics(graph, compare_node, property_name)

    assert stats.max_similarity == approx(1)
    assert stats.min_similarity == approx(0)
    assert stats.average_similarity == approx(0.000552534)

    similarities: np.ndarray = graph.get_node_property(
        property_name).to_numpy()
    assert similarities[compare_node] == 1
    assert similarities[1917] == approx(0.0)
    assert similarities[2812] == approx(0.0)