def read_graph(graph_file, dataset_name, directed=False, weighted=False): """ Reads the input network in networkx. :param graph_file: The directory where graph in EPGM format is stored :param dataset_name: The name of the graph selected out of all the graph heads in EPGM file :return: The graph in networkx format """ try: # assume args.input points to an EPGM graph G_epgm = EPGM(graph_file) graphs = G_epgm.G["graphs"] if ( dataset_name is None ): # if dataset_name is not given, use the name of the 1st graph head dataset_name = graphs[0]["meta"]["label"] print( "WARNING: dataset name not specified, using dataset '{}' in the 1st graph head" .format(dataset_name)) graph_id = None for g in graphs: if g["meta"]["label"] == dataset_name: graph_id = g["id"] g = G_epgm.to_nx(graph_id, directed) if weighted: raise NotImplementedError else: # This is the correct way to set the edge weight in a MultiGraph. edge_weights = {e: 1 for e in g.edges(keys=True)} nx.set_edge_attributes(g, name="weight", values=edge_weights) except: # otherwise, assume arg.input points to an edgelist file if weighted: g = nx.read_edgelist( graph_file, nodetype=int, data=(("weight", float), ), create_using=nx.DiGraph(), ) else: g = nx.read_edgelist(graph_file, nodetype=int, create_using=nx.DiGraph()) for edge in g.edges(): g[edge[0]][edge[1]]["weight"] = 1 if not directed: g = g.to_undirected() if not nx.is_connected(g): print("Graph is not connected") # take the largest connected component as the data g_ccs = (g.subgraph(c).copy() for c in nx.connected_components(g)) g = max(g_ccs, key=len) print("Largest subgraph statistics: {} nodes, {} edges".format( g.number_of_nodes(), g.number_of_edges())) print("Graph statistics: {} nodes, {} edges".format( g.number_of_nodes(), g.number_of_edges())) return g
def from_epgm(epgm_location, dataset_name=None, directed=False): """ Imports a graph stored in EPGM format to a NetworkX object Args: epgm_location (str): The directory containing the EPGM data dataset_name (str), optional: The name of the dataset to import directed (bool): If True, load as a directed graph, otherwise load as an undirected graph Returns: A NetworkX graph containing the data for the EPGM-stored graph. """ G_epgm = EPGM(epgm_location) graphs = G_epgm.G["graphs"] # if dataset_name is not given, use the name of the 1st graph head if not dataset_name: dataset_name = graphs[0]["meta"]["label"] warnings.warn( "dataset name not specified, using dataset '{}' in the 1st graph head".format( dataset_name ), RuntimeWarning, stacklevel=2, ) # Select graph using dataset_name for g in graphs: if g["meta"]["label"] == dataset_name: graph_id = g["id"] # Convert to StellarGraph (via nx) Gnx = G_epgm.to_nx(graph_id, directed=directed) print( "Graph statistics: {} nodes, {} edges".format( Gnx.number_of_nodes(), Gnx.number_of_edges() ) ) return Gnx
def read_graph(graph_file, dataset_name, is_directed=False, is_weighted=False): """ Reads the input network in networkx. Args: graph_file: The directory where graph in EPGM format is stored. dataset_name: The name of the graph selected out of all the graph heads in EPGM file. Returns: The graph in networkx format """ if graph_file.split('.')[-1] == 'gpickle': g = nx.read_gpickle(graph_file) for edge in g.edges(): g[edge[0]][edge[1]]["weight"] = 1 # {'weight': 1} if not is_directed: g = g.to_undirected() return g try: # assume args.input points to an EPGM graph G_epgm = EPGM(graph_file) graphs = G_epgm.G["graphs"] if ( dataset_name is None ): # if dataset_name is not given, use the name of the 1st graph head dataset_name = graphs[0]["meta"]["label"] print( "WARNING: dataset name not specified, using dataset '{}' in the 1st graph head" .format(dataset_name)) graph_id = None for g in graphs: if g["meta"]["label"] == dataset_name: graph_id = g["id"] g = G_epgm.to_nx(graph_id, is_directed) if is_weighted: raise NotImplementedError else: # This is the correct way to set the edge weight in a MultiGraph. edge_weights = {e: 1 for e in g.edges(keys=True)} nx.set_edge_attributes(g, name="weight", values=edge_weights) except: # otherwise, assume arg.input points to an edgelist file if is_weighted: g = nx.read_edgelist( graph_file, nodetype=int, data=(("weight", float), ), create_using=nx.DiGraph(), ) else: g = nx.read_edgelist(graph_file, nodetype=int, create_using=nx.DiGraph()) for edge in g.edges(): g[edge[0]][edge[1]]["weight"] = 1 # {'weight': 1} if not is_directed: g = g.to_undirected() print("Graph statistics: {} nodes, {} edges".format( g.number_of_nodes(), g.number_of_edges())) return g