Exemplo n.º 1
0
def get_trivial_graph(n_node_features=0):
    """Generate a trivial graph for internal purposes."""
    nodes = pd.DataFrame([0, 1, 2])
    if n_node_features > 0:
        nodes["features"] = 3 * [n_node_features * [0.0]]
    edges = pd.DataFrame()
    edges["start_node"] = [0, 1, 2]
    edges["end_node"] = [1, 2, 0]
    return Graph(nodes, edges, 0)
Exemplo n.º 2
0
def make(folder="./datasets",
         add_features=False,
         write_to_file=True,
         n_graphs=5):
    """ Makes pickle with graphs that test robustness of hcga """

    graphs = []
    # one, two and three node graphs
    for _ in range(n_graphs):
        graphs.append(
            _add_graph_desc(nx.grid_graph([1]).copy(), "one-node graph"))
        graphs.append(
            _add_graph_desc(nx.grid_graph([2]).copy(), "two-node graph"))
        graphs.append(
            _add_graph_desc(nx.grid_graph([3]).copy(), "three-node graph"))

    # no edges
    G = nx.Graph()
    G.add_node(0)
    G.add_node(1, weight=2)
    G.add_node(2, weight=3)
    for _ in range(n_graphs):
        graphs.append(_add_graph_desc(G.copy(), "graph without edges"))

    # directed graph no weights
    G = nx.DiGraph()
    G.add_nodes_from(range(100, 110))
    for _ in range(n_graphs):
        graphs.append(
            _add_graph_desc(G.copy(), "directed graph with no weights"))

    # directed graph weighted
    G = nx.DiGraph()
    H = nx.path_graph(10)
    G.add_nodes_from(H)
    G.add_edges_from(H.edges)
    for _ in range(n_graphs):
        graphs.append(_add_graph_desc(G.copy(), "directed graph weighted"))

    # adding features to all
    if add_features:
        graphs = [add_dummy_node_features(graph) for graph in graphs]

    graphs_coll = GraphCollection()
    for graph in graphs:
        nodes = pd.DataFrame(list(graph.nodes))
        edges = pd.DataFrame(columns=["start_node", "end_node"])
        for i, edge in enumerate(graph.edges):
            edges.loc[i, "start_node"] = edge[0]
            edges.loc[i, "end_node"] = edge[1]
        graphs_coll.add_graph(Graph(nodes, edges, np.random.randint(0, 2)))

    if write_to_file:
        save_dataset(graphs_coll, "TESTDATA", folder=folder)

    return graphs_coll
Exemplo n.º 3
0
def extract_benchmark_graphs(datadir, dataname):  # pylint: disable=too-many-locals
    """ Read data from https://ls11-www.cs.tu-dortmund.de/staff/morris/graphkerneldatasets
        graph index starts with 1 in file

    Returns:
        List of networkx objects with graph and node labels
    """
    prefix = str(Path(datadir) / dataname)

    with open(prefix + "_graph_indicator.txt") as f:
        nodes_df = pd.read_csv(f, dtype=np.int, header=None) - 1
    nodes_df.columns = ["graph_id"]

    with open(prefix + "_graph_labels.txt") as f:
        graph_labels = pd.read_csv(f, header=None)

    edges_df = pd.DataFrame()
    with open(prefix + "_A.txt") as f:
        for edges_df_next in pd.read_csv(
            f, sep=",", delimiter=None, dtype=np.int, header=None, chunksize=1e6
        ):
            edges_df = edges_df.append(edges_df_next - 1)
    edges_df.columns = ["start_node", "end_node"]
    edges_df["graph_id"] = nodes_df["graph_id"][
        edges_df["start_node"].to_list()
    ].to_list()

    columns = []
    if Path(prefix + "_node_labels.txt").exists():
        with open(prefix + "_node_labels.txt") as f:
            nodes_df["labels_value"] = pd.read_csv(f, header=None)
        nodes_df["labels"] = list(
            pd.get_dummies(nodes_df["labels_value"]).to_numpy(dtype=float)
        )
        columns.append("labels")

    if Path(prefix + "_node_attributes.txt").exists():
        with open(prefix + "_node_attributes.txt") as f:
            nodes_df["attributes"] = list(pd.read_csv(f, header=None).to_numpy())
        columns.append("attributes")

    graph_ids = list(set(nodes_df["graph_id"]))
    graphs = GraphCollection()
    for graph_id in graph_ids:
        nodes = nodes_df.loc[nodes_df["graph_id"] == graph_id][columns]
        edges = edges_df.loc[edges_df["graph_id"] == graph_id][
            ["start_node", "end_node"]
        ]
        graphs.add_graph(Graph(nodes, edges, int(graph_labels.loc[graph_id][0])))

    return graphs
Exemplo n.º 4
0
 def _add_graph(label):
     for _ in range(n_graphs):
         graph = nx.stochastic_block_model(sizes, probs)
         graphs.add_graph(Graph(list(graph.nodes), list(graph.edges),
                                label))