def test_selfloops_attr(self): G = self.K3.copy() G.add_edge(0, 0) G.add_edge(1, 1, weight=2) assert_edges_equal(nx.selfloop_edges(G, data=True), [(0, 0, {}), (1, 1, {'weight': 2})]) assert_edges_equal(nx.selfloop_edges(G, data='weight'), [(0, 0, None), (1, 1, 2)])
def test_selfloops(): graphs = [nx.Graph(), nx.DiGraph(), nx.MultiGraph(), nx.MultiDiGraph()] for graph in graphs: G = nx.complete_graph(3, create_using=graph) G.add_edge(0, 0) assert_nodes_equal(nx.nodes_with_selfloops(G), [0]) assert_edges_equal(nx.selfloop_edges(G), [(0, 0)]) assert_edges_equal(nx.selfloop_edges(G, data=True), [(0, 0, {})]) assert_equal(nx.number_of_selfloops(G), 1) # test selfloop attr G.add_edge(1, 1, weight=2) assert_edges_equal(nx.selfloop_edges(G, data=True), [(0, 0, {}), (1, 1, {'weight': 2})]) assert_edges_equal(nx.selfloop_edges(G, data='weight'), [(0, 0, None), (1, 1, 2)])
def test_configuration(): seeds = [2718183590, 2470619828, 1694705158, 3001036531, 2401251497] for seed in seeds: deg_seq = nx.random_powerlaw_tree_sequence(20, seed=seed, tries=5000) G = nx.Graph(nx.configuration_model(deg_seq, seed=seed)) G.remove_edges_from(nx.selfloop_edges(G)) _check_augmentations(G)
def delete_lowval_edges(graph, weight_threshold, remove_self_loops=True): """Deletes all edges with weight below the threshold value. Also deletes all self-looping edges. """ lowedge_graph = graph.copy() if remove_self_loops: # First, delete all self-loops selfloop_list = list(nx.selfloop_edges(lowedge_graph)) lowedge_graph.remove_edges_from(selfloop_list) logging.info("Deleted " + str(len(selfloop_list)) + " self-looping edges") edge_dellist = [] edge_totlist = [] weight_dict = nx.get_edge_attributes(lowedge_graph, "weight") for edge in lowedge_graph.edges(): edge_totlist.append(edge) if weight_dict[edge] < weight_threshold: edge_dellist.append(edge) lowedge_graph.remove_edges_from(edge_dellist) logging.info( "Deleted " + str(len(edge_dellist)) + "/" + str(graph.number_of_edges()) + " low valued edges" ) return lowedge_graph
def test_configuration_directed(): # seeds = [671221681, 2403749451, 124433910, 672335939, 1193127215] seeds = [67] for seed in seeds: deg_seq = nx.random_powerlaw_tree_sequence(20, seed=seed, tries=5000) G = nx.DiGraph(nx.configuration_model(deg_seq, seed=seed)) G.remove_edges_from(nx.selfloop_edges(G)) _check_edge_connectivity(G)
def without_selfloops(G): """return copy of G without selfloop edges""" H = G.copy() num_loops = nx.number_of_selfloops(G) if num_loops: log.warning("Network contains {} self-loops. " "Removing...".format(num_loops)) H.remove_edges_from(nx.selfloop_edges(G)) return H
def test_selfloops(self): G = self.K3.copy() G.add_edge(0, 0) assert_nodes_equal(nx.nodes_with_selfloops(G), [0]) assert_edges_equal(nx.selfloop_edges(G), [(0, 0)]) assert_equal(nx.number_of_selfloops(G), 1) G.remove_edge(0, 0) G.add_edge(0, 0) G.remove_edges_from([(0, 0)]) G.add_edge(1, 1) G.remove_node(1) G.add_edge(0, 0) G.add_edge(1, 1) G.remove_nodes_from([0, 1])
def _relabel_inplace(G, mapping): old_labels = set(mapping.keys()) new_labels = set(mapping.values()) if len(old_labels & new_labels) > 0: # labels sets overlap # can we topological sort and still do the relabeling? D = nx.DiGraph(list(mapping.items())) D.remove_edges_from(nx.selfloop_edges(D)) try: nodes = reversed(list(nx.topological_sort(D))) except nx.NetworkXUnfeasible: raise nx.NetworkXUnfeasible('The node label sets are overlapping ' 'and no ordering can resolve the ' 'mapping. Use copy=True.') else: # non-overlapping label sets nodes = old_labels multigraph = G.is_multigraph() directed = G.is_directed() for old in nodes: try: new = mapping[old] except KeyError: continue if new == old: continue try: G.add_node(new, **G.nodes[old]) except KeyError: raise KeyError("Node %s is not in the graph" % old) if multigraph: new_edges = [(new, new if old == target else target, key, data) for (_, target, key, data) in G.edges(old, data=True, keys=True)] if directed: new_edges += [(new if old == source else source, new, key, data) for (source, _, key, data) in G.in_edges(old, data=True, keys=True)] else: new_edges = [(new, new if old == target else target, data) for (_, target, data) in G.edges(old, data=True)] if directed: new_edges += [(new if old == source else source, new, data) for (source, _, data) in G.in_edges(old, data=True)] G.remove_node(old) G.add_edges_from(new_edges) return G
def core_topological_sort(vg_en_tn_prdct,threshold=1): invdistmerit=inverse_distance_intrinsic_merit(vg_en_tn_prdct) vg_en_tn_prdct_nxg=nx.DiGraph() rowframe=0 columnframe=0 for row in invdistmerit[0]: for column in row: print "column:",column if max(column) > threshold: vg_en_tn_prdct_nxg.add_edge(rowframe, columnframe) columnframe = columnframe + 1 rowframe = rowframe + 1 vg_en_tn_prdct_nxg.remove_edges_from(nx.selfloop_edges(vg_en_tn_prdct_nxg)) video_core=nx.k_core(vg_en_tn_prdct_nxg.to_undirected()) topsorted_video_core=nx.topological_sort(video_core) print "Topological Sorted Core Summary of the Video - Edges:",topsorted_video_core return topsorted_video_core
def number_of_selfloops(G): """Returns the number of selfloop edges. A selfloop edge has the same node at both ends. Returns ------- nloops : int The number of selfloops. See Also -------- nodes_with_selfloops, selfloop_edges Examples -------- >>> G = nx.Graph() # or DiGraph, MultiGraph, MultiDiGraph, etc >>> G.add_edge(1, 1) >>> G.add_edge(1, 2) >>> nx.number_of_selfloops(G) 1 """ return sum(1 for _ in nx.selfloop_edges(G))
def girvan_newman(G, most_valuable_edge=None): """Finds communities in a graph using the Girvan–Newman method. Parameters ---------- G : NetworkX graph most_valuable_edge : function Function that takes a graph as input and outputs an edge. The edge returned by this function will be recomputed and removed at each iteration of the algorithm. If not specified, the edge with the highest :func:`networkx.edge_betweenness_centrality` will be used. Returns ------- iterator Iterator over tuples of sets of nodes in `G`. Each set of node is a community, each tuple is a sequence of communities at a particular level of the algorithm. Examples -------- To get the first pair of communities:: >>> G = nx.path_graph(10) >>> comp = girvan_newman(G) >>> tuple(sorted(c) for c in next(comp)) ([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]) To get only the first *k* tuples of communities, use :func:`itertools.islice`:: >>> import itertools >>> G = nx.path_graph(8) >>> k = 2 >>> comp = girvan_newman(G) >>> for communities in itertools.islice(comp, k): ... print(tuple(sorted(c) for c in communities)) # doctest: +SKIP ... ([0, 1, 2, 3], [4, 5, 6, 7]) ([0, 1], [2, 3], [4, 5, 6, 7]) To stop getting tuples of communities once the number of communities is greater than *k*, use :func:`itertools.takewhile`:: >>> import itertools >>> G = nx.path_graph(8) >>> k = 4 >>> comp = girvan_newman(G) >>> limited = itertools.takewhile(lambda c: len(c) <= k, comp) >>> for communities in limited: ... print(tuple(sorted(c) for c in communities)) # doctest: +SKIP ... ([0, 1, 2, 3], [4, 5, 6, 7]) ([0, 1], [2, 3], [4, 5, 6, 7]) ([0, 1], [2, 3], [4, 5], [6, 7]) To just choose an edge to remove based on the weight:: >>> from operator import itemgetter >>> G = nx.path_graph(10) >>> edges = G.edges() >>> nx.set_edge_attributes(G, {(u, v): v for u, v in edges}, 'weight') >>> def heaviest(G): ... u, v, w = max(G.edges(data='weight'), key=itemgetter(2)) ... return (u, v) ... >>> comp = girvan_newman(G, most_valuable_edge=heaviest) >>> tuple(sorted(c) for c in next(comp)) ([0, 1, 2, 3, 4, 5, 6, 7, 8], [9]) To utilize edge weights when choosing an edge with, for example, the highest betweenness centrality:: >>> from networkx import edge_betweenness_centrality as betweenness >>> def most_central_edge(G): ... centrality = betweenness(G, weight='weight') ... return max(centrality, key=centrality.get) ... >>> G = nx.path_graph(10) >>> comp = girvan_newman(G, most_valuable_edge=most_central_edge) >>> tuple(sorted(c) for c in next(comp)) ([0, 1, 2, 3, 4], [5, 6, 7, 8, 9]) To specify a different ranking algorithm for edges, use the `most_valuable_edge` keyword argument:: >>> from networkx import edge_betweenness_centrality >>> from random import random >>> def most_central_edge(G): ... centrality = edge_betweenness_centrality(G) ... max_cent = max(centrality.values()) ... # Scale the centrality values so they are between 0 and 1, ... # and add some random noise. ... centrality = {e: c / max_cent for e, c in centrality.items()} ... # Add some random noise. ... centrality = {e: c + random() for e, c in centrality.items()} ... return max(centrality, key=centrality.get) ... >>> G = nx.path_graph(10) >>> comp = girvan_newman(G, most_valuable_edge=most_central_edge) Notes ----- The Girvan–Newman algorithm detects communities by progressively removing edges from the original graph. The algorithm removes the "most valuable" edge, traditionally the edge with the highest betweenness centrality, at each step. As the graph breaks down into pieces, the tightly knit community structure is exposed and the result can be depicted as a dendrogram. """ # If the graph is already empty, simply return its connected # components. if G.number_of_edges() == 0: yield tuple(nx.connected_components(G)) return # If no function is provided for computing the most valuable edge, # use the edge betweenness centrality. if most_valuable_edge is None: def most_valuable_edge(G): """Returns the edge with the highest betweenness centrality in the graph `G`. """ # We have guaranteed that the graph is non-empty, so this # dictionary will never be empty. betweenness = nx.edge_betweenness_centrality(G) return max(betweenness, key=betweenness.get) # The copy of G here must include the edge weight data. g = G.copy().to_undirected() # Self-loops must be removed because their removal has no effect on # the connected components of the graph. g.remove_edges_from(nx.selfloop_edges(g)) while g.number_of_edges() > 0: yield _without_most_central_edges(g, most_valuable_edge)
def graphStats(G, stats=('nodes', 'edges', 'isolates', 'loops', 'density', 'transitivity'), makeString=True, sentenceString=False): """Returns a string or list containing statistics about the graph _G_. **graphStats()** gives 6 different statistics: number of nodes, number of edges, number of isolates, number of loops, density and transitivity. The ones wanted can be given to _stats_. By default a string giving each stat on a different line it can also produce a sentence containing all the requested statistics or the raw values can be accessed instead by setting _makeString_ to `False`. # Parameters _G_ : `networkx Graph` > The graph for the statistics to be determined of _stats_ : `optional [list or tuple [str]]` > Default `('nodes', 'edges', 'isolates', 'loops', 'density', 'transitivity')`, a list or tuple containing any number or combination of the strings: > `"nodes"`, `"edges"`, `"isolates"`, `"loops"`, `"density"` and `"transitivity"`` > At least one occurrence of the corresponding string causes the statistics to be provided in the string output. For the non-string (tuple) output the returned tuple has the same length as the input and each output is at the same index as the string that requested it, e.g. > `_stats_ = ("edges", "loops", "edges")` > The return is a tuple with 2 elements the first and last of which are the number of edges and the second is the number of loops _makeString_ : `optional [bool]` > Default `True`, if `True` a string is returned if `False` a tuple _sentenceString_ : `optional [bool]` >Default `False` : if `True` the returned string is a sentce, otherwise each value has a seperate line. # Returns `str or tuple [float and int]` > The type is determined by _makeString_ and the layout by _stats_ """ for sts in stats: if sts not in [ 'nodes', 'edges', 'isolates', 'loops', 'density', 'transitivity' ]: raise RuntimeError('"{}" is not a valid stat.'.format(sts)) if makeString: stsData = [] else: stsData = {} if 'nodes' in stats: if makeString: if sentenceString: stsData.append("{:G} nodes".format(len(G.nodes()))) else: stsData.append("Nodes: {:G}".format(len(G.nodes()))) else: stsData['nodes'] = len(G.nodes()) if 'edges' in stats: if makeString: if sentenceString: stsData.append("{:G} edges".format(len(G.edges()))) else: stsData.append("Edges: {:G}".format(len(G.edges()))) else: stsData['edges'] = len(G.edges()) if 'isolates' in stats: if makeString: if sentenceString: stsData.append("{:G} isolates".format(len(list( nx.isolates(G))))) else: stsData.append("Isolates: {:G}".format( len(list(nx.isolates(G))))) else: stsData['isolates'] = len(list(nx.isolates(G))) if 'loops' in stats: if makeString: if sentenceString: stsData.append("{:G} self loops".format( len(list(nx.selfloop_edges(G))))) else: stsData.append("Self loops: {:G}".format( len(list(nx.selfloop_edges(G))))) else: stsData['loops'] = len(list(nx.selfloop_edges(G))) if 'density' in stats: if makeString: if sentenceString: stsData.append("a density of {:G}".format(nx.density(G))) else: stsData.append("Density: {:G}".format(nx.density(G))) else: stsData['density'] = nx.density(G) if 'transitivity' in stats: if makeString: if sentenceString: stsData.append("a transitivity of {:G}".format( nx.transitivity(G))) else: stsData.append("Transitivity: {:G}".format(nx.transitivity(G))) else: stsData['transitivity'] = nx.transitivity(G) if makeString: if sentenceString: retString = "The graph has " if len(stsData) < 1: return retString elif len(stsData) == 1: return retString + stsData[0] else: return retString + ', '.join( stsData[:-1]) + ' and ' + stsData[-1] else: return '\n'.join(stsData) else: retLst = [] for sts in stats: retLst.append(stsData[sts]) return tuple(retLst)
def upload_file(): if request.method == 'POST': if 'file' not in request.files: flash('No file part') return redirect(request.url) file = request.files['file'] if file.filename == '': flash('No selected file') return redirect(request.url) if file and allowed_file(file.filename): filename = secure_filename(file.filename) file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) initialGraphJson = formatNetwork2(os.path.join( app.config['UPLOAD_FOLDER'], filename)) G = linkpred.read_network(os.path.join( app.config['UPLOAD_FOLDER'], filename)) H = G.copy() num_loops = nx.number_of_selfloops(G) if num_loops: H.remove_edges_from(nx.selfloop_edges(G)) CommonNeighbours = mypred.predictors.CommonNeighboursGF( H, excluded=H.edges()) CommonNeighbours_results = CommonNeighbours.predict() top = CommonNeighbours_results.top() sentence = [] sentenceunsorted = [] newLinks = [] jsonDict = [] # resultsList = [] G = nx.convert_node_labels_to_integers(H, 1, "default", "label") CommonNeighboursG = mypred.predictors.CommonNeighboursGF( G, excluded=G.edges()) CommonNeighbours_resultsG = CommonNeighboursG.predict() topG = CommonNeighbours_resultsG.top() for authors, score in topG.items(): authorsArray = [authors[0], authors[1]] common = intersection( list(G.neighbors(authors[0])), list(G.neighbors(authors[1]))) + authorsArray subG = G.subgraph(common) cngfScore = 0 for nodeID, nodeInfo in subG.nodes(data=True): if nodeID not in authorsArray: cngfScore = cngfScore + \ (subG.degree[nodeID] / math.log10(G.degree[nodeID])) authorOne = G.nodes[authorsArray[1]] authorTwo = G.nodes[authorsArray[0]] sentenceunsorted.append({ "text": authorOne['label'] + " - " + authorTwo['label'] + " le score est :" + str(cngfScore), "score": cngfScore }) newLinks.append({ "from": authorOne['id'], "to": authorTwo['id'], "value": float(1.0), "authOne": authorOne, "authTwo": authorTwo, "score": cngfScore }) for s in sentenceunsorted: sentence.append(s['text']) for authors, score in top.items(): jsonDict.append({ "authorSource": str(authors).split(' - ')[0], "authorDest": str(authors).split(' - ')[1], "score": cngfScore }) # responseDict = {"results": jsonDict} # return json.dumps(newLinks) return render_template('generatedGraph.html', newLinks=newLinks, predictions=sentence, data=initialGraphJson, filename=filename, DL_AS_NET_URL=DL_AS_NET_URL) else: flash("format inccorecte, veillez sélectionner un fichier .net valide ") return redirect(request.url) return render_template('downloads.html')
def test_configuration(): deg_seq = nx.random_powerlaw_tree_sequence(100, tries=5000) G = nx.Graph(nx.configuration_model(deg_seq)) G.remove_edges_from(nx.selfloop_edges(G)) _check_connectivity(G)
def find_good_gurobi_subgraph( root, targets, node_name_dict, prior_probabilities, time_limit, num_threads, max_neighborhood_size, seed=None, num_iter=-1, weighted=False, n_neighbors=10, ): """ Sub-Function used for multi-threading in hybrid method :param root: Sub-root of the subgraph that is attempted to be reconstructed :param targets: List of sub-targets for a given subroot where each node is in the form 'Ch1|Ch2|....|Chn' :param prior_probabilities: A nested dictionary containing prior probabilities for [character][state] mappings where characters are in the form of integers, and states are in the form of strings, and values are the probability of mutation from the '0' state. :param time_limit: Length of time allowed for ILP convergence. :param num_threads: Number of threads to be used during ILP solving. :param max_neighborhood_size: Maximum size of potential graph allowed. :return: Optimal ilp subgraph for a given subset of nodes in the time limit allowed. """ if weighted: assert prior_probabilities is not None pid = hashlib.md5(root.encode("utf-8")).hexdigest() print( "Started new thread for: " + str(root) + " (num targets = " + str(len(targets)) + ") , pid = " + str(pid), flush=True, ) if len(set(targets)) == 1: graph = nx.DiGraph() graph.add_node(node_name_dict[root]) return [graph], root, pid, {} proot, targets_pruned, pruned_to_orig = prune_unique_alleles(root, targets) lca = root_finder(targets_pruned) distances = [get_edge_length(lca, t) for t in targets_pruned] widths = [0] for i in range(len(distances)): for j in range(i, len(distances)): if i != j: widths.append(distances[i] + distances[j] + 1) max_lca = max(widths) ( potential_network_priors, lca_dist, graph_sizes, ) = build_potential_graph_from_base_graph( targets_pruned, proot, priors=prior_probabilities, max_neighborhood_size=max_neighborhood_size, pid=pid, weighted=weighted, lca_dist=max_lca, ) # network was too large to compute, so just run greedy on it if potential_network_priors is None: neighbors, distances = find_neighbors(targets, n_neighbors=n_neighbors) subgraph = greedy_build(targets, neighbors, distances, priors=prior_probabilities, cell_cutoff=-1)[0] subgraph = nx.relabel_nodes(subgraph, node_name_dict) print("Max Neighborhood Exceeded", flush=True) return [subgraph], root, pid, graph_sizes print("Potential Graph built with maximum LCA of " + str(lca_dist) + " (pid: " + str(pid) + "). Proceeding to solver.") for l in nx.selfloop_edges(potential_network_priors): potential_network_priors.remove_edge(l[0], l[1]) nodes = list(potential_network_priors.nodes()) encoder = dict(zip(nodes, list(range(len(nodes))))) decoder = dict((v, k) for k, v in encoder.items()) assert len(encoder) == len(decoder) _potential_network = nx.relabel_nodes(potential_network_priors, encoder) _targets = map(lambda x: encoder[x], targets_pruned) model, edge_variables = generate_mSteiner_model(_potential_network, encoder[proot], _targets) subgraphs = solve_steiner_instance( model, _potential_network, edge_variables, MIPGap=0.01, detailed_output=False, time_limit=time_limit, num_threads=num_threads, seed=seed, num_iter=num_iter, ) all_subgraphs = [] for subgraph in subgraphs: subgraph = nx.relabel_nodes(subgraph, decoder) subgraph = subgraph = post_process_ILP(subgraph, root, pruned_to_orig, proot, targets, node_name_dict, pid) all_subgraphs.append(subgraph) r_name = root if root in node_name_dict: r_name = node_name_dict[root] return all_subgraphs, r_name, pid, graph_sizes
def main(args): # load and preprocess dataset data = load_data(args) features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) if hasattr(torch, 'BoolTensor'): train_mask = torch.BoolTensor(data.train_mask) val_mask = torch.BoolTensor(data.val_mask) test_mask = torch.BoolTensor(data.test_mask) else: train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() # graph preprocess g = data.graph # add self loop if args.self_loop: g.remove_edges_from(nx.selfloop_edges(g)) g.add_edges_from(zip(g.nodes(), g.nodes())) g = DGLGraph(g) n_edges = g.number_of_edges() # create DGI model dgi = DGI(g, in_feats, args.n_hidden, args.n_layers, nn.PReLU(args.n_hidden), args.dropout) if cuda: dgi.cuda() dgi_optimizer = torch.optim.Adam(dgi.parameters(), lr=args.dgi_lr, weight_decay=args.weight_decay) # train deep graph infomax cnt_wait = 0 best = 1e9 best_t = 0 dur = [] for epoch in range(args.n_dgi_epochs): dgi.train() if epoch >= 3: t0 = time.time() dgi_optimizer.zero_grad() loss = dgi(features) loss.backward() dgi_optimizer.step() if loss < best: best = loss best_t = epoch cnt_wait = 0 torch.save(dgi.state_dict(), 'best_dgi.pkl') else: cnt_wait += 1 if cnt_wait == args.patience: print('Early stopping!') break if epoch >= 3: dur.append(time.time() - t0) print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | " "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(), n_edges / np.mean(dur) / 1000)) # create classifier model classifier = Classifier(args.n_hidden, n_classes) if cuda: classifier.cuda() classifier_optimizer = torch.optim.Adam(classifier.parameters(), lr=args.classifier_lr, weight_decay=args.weight_decay) # train classifier print('Loading {}th epoch'.format(best_t)) dgi.load_state_dict(torch.load('best_dgi.pkl')) embeds = dgi.encoder(features, corrupt=False) embeds = embeds.detach() dur = [] for epoch in range(args.n_classifier_epochs): classifier.train() if epoch >= 3: t0 = time.time() classifier_optimizer.zero_grad() preds = classifier(embeds) loss = F.nll_loss(preds[train_mask], labels[train_mask]) loss.backward() classifier_optimizer.step() if epoch >= 3: dur.append(time.time() - t0) acc = evaluate(classifier, embeds, labels, val_mask) print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(), acc, n_edges / np.mean(dur) / 1000)) print() acc = evaluate(classifier, embeds, labels, test_mask) print("Test Accuracy {:.4f}".format(acc))
def test_configuration(): deg_seq = nx.random_powerlaw_tree_sequence(100, tries=5, seed=72) G = nx.Graph(nx.configuration_model(deg_seq)) G.remove_edges_from(nx.selfloop_edges(G)) _check_separating_sets(G)
def graph_to_adj_bet(list_graph, list_n_sequence, list_node_num, model_size): list_adjacency = list() list_adjacency_t = list() max_nodes = model_size for i in range(len(list_graph)): print(f"Processing graphs: {i+1}/{len(list_graph)}", end='\r') graph = list_graph[i] edges = list(graph.edges()) graph = nx.MultiDiGraph() graph.add_edges_from(edges) # remove self-loop self_loops = list(nx.selfloop_edges(graph)) graph.remove_edges_from(self_loops) node_sequence = list_n_sequence[i] adj_temp = nx.adjacency_matrix(graph, nodelist=node_sequence) node_num = list_node_num[i] adj_temp_t = adj_temp.transpose() arr_temp1 = np.sum(adj_temp, axis=1) arr_temp2 = np.sum(adj_temp_t, axis=1) arr_multi = np.multiply(arr_temp1, arr_temp2) arr_multi = np.where(arr_multi > 0, 1.0, 0.0) degree_arr = arr_multi non_zero_ind = np.nonzero(degree_arr.flatten()) non_zero_ind = non_zero_ind[0] g_nkit = nx2nkit(graph) in_n_seq = [node_sequence[nz_ind] for nz_ind in non_zero_ind] all_out_dict = get_out_edges(g_nkit, node_sequence) all_in_dict = get_in_edges(g_nkit, in_n_seq) for index in non_zero_ind: is_zero = clique_check(index, node_sequence, all_out_dict, all_in_dict) if is_zero == True: degree_arr[index, 0] = 0.0 adj_temp = adj_temp.multiply(csr_matrix(degree_arr)) adj_temp_t = adj_temp_t.multiply(csr_matrix(degree_arr)) rand_pos = 0 top_mat = csr_matrix((rand_pos, rand_pos)) remain_ind = max_nodes - rand_pos - node_num bottom_mat = csr_matrix((remain_ind, remain_ind)) #add extra padding to adj mat,normalise and save as torch tensor adj_temp = csr_matrix(adj_temp) adj_mat = sp.block_diag((top_mat, adj_temp, bottom_mat)) adj_temp_t = csr_matrix(adj_temp_t) adj_mat_t = sp.block_diag((top_mat, adj_temp_t, bottom_mat)) adj_mat = sparse_mx_to_torch_sparse_tensor(adj_mat) list_adjacency.append(adj_mat) adj_mat_t = sparse_mx_to_torch_sparse_tensor(adj_mat_t) list_adjacency_t.append(adj_mat_t) print("") return list_adjacency, list_adjacency_t
def SimpleGraph(G): G.remove_edges_from(nx.selfloop_edges(G)) isol = list(nx.isolates(G)) G.remove_nodes_from(isol) return G, isol
def main(args): torch.manual_seed(1234) if args.dataset == 'cora' or args.dataset == 'citeseer' or args.dataset == 'pubmed': data = load_data(args) features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) in_feats = features.shape[1] g = data.graph if args.dataset == 'cora': g.remove_edges_from(nx.selfloop_edges(g)) g.add_edges_from(zip(g.nodes(), g.nodes())) g = DGLGraph(g) attr_matrix = data.features labels = data.labels else: if args.dataset == 'physics': data = Coauthor('physics') if args.dataset == 'cs': data = Coauthor('cs') if args.dataset == 'computers': data = AmazonCoBuy('computers') if args.dataset == 'photo': data = AmazonCoBuy('photo') g = data g = data[0] attr_matrix = g.ndata['feat'] labels = g.ndata['label'] features = torch.FloatTensor(g.ndata['feat']) ### LCC of the graph n_components = 1 sparse_graph = g.adjacency_matrix_scipy(return_edge_ids=False) _, component_indices = sp.csgraph.connected_components(sparse_graph) component_sizes = np.bincount(component_indices) components_to_keep = np.argsort( component_sizes )[::-1][:n_components] # reverse order to sort descending nodes_to_keep = [ idx for (idx, component) in enumerate(component_indices) if component in components_to_keep ] adj_matrix = sparse_graph[nodes_to_keep][:, nodes_to_keep] num_nodes = len(nodes_to_keep) g = adj_matrix g = DGLGraph(g) g = remove_self_loop(g) g = add_self_loop(g) g = DGLGraph(g) g.ndata['feat'] = attr_matrix[nodes_to_keep] features = torch.FloatTensor(g.ndata['feat'].float()) if args.dataset == 'cora' or args.dataset == 'pubmed': features = features / (features.norm(dim=1) + 1e-8)[:, None] g.ndata['label'] = labels[nodes_to_keep] labels = torch.LongTensor(g.ndata['label']) in_feats = features.shape[1] unique_l = np.unique(labels, return_counts=False) n_classes = len(unique_l) n_nodes = g.number_of_nodes() n_edges = g.number_of_edges() print('Number of nodes', n_nodes, 'Number of edges', n_edges) enc = OneHotEncoder() enc.fit(labels.reshape(-1, 1)) ylabels = enc.transform(labels.reshape(-1, 1)).toarray() for beta in [args.beta]: for K in [args.num_clusters]: for alpha in [args.alpha]: accs = [] t_st = time.time() sets = "imbalanced" for k in range(2): #number of differnet trainings #print(k) random_state = np.random.RandomState() if sets == "imbalanced": train_idx, val_idx, test_idx = get_train_val_test_split( random_state, ylabels, train_examples_per_class=None, val_examples_per_class=None, test_examples_per_class=None, train_size=20 * n_classes, val_size=30 * n_classes, test_size=None) elif sets == "balanced": train_idx, val_idx, test_idx = get_train_val_test_split( random_state, ylabels, train_examples_per_class=20, val_examples_per_class=30, test_examples_per_class=None, train_size=None, val_size=None, test_size=None) else: ("No such set configuration (imbalanced/balanced)") n_nodes = len(nodes_to_keep) train_mask = np.zeros(n_nodes) train_mask[train_idx] = 1 val_mask = np.zeros(n_nodes) val_mask[val_idx] = 1 test_mask = np.zeros(n_nodes) test_mask[test_idx] = 1 train_mask = torch.BoolTensor(train_mask) val_mask = torch.BoolTensor(val_mask) test_mask = torch.BoolTensor(test_mask) """ Planetoid Split for CORA, CiteSeer, PubMed train_mask = torch.BoolTensor(data.train_mask) val_mask = torch.BoolTensor(data.val_mask) test_mask = torch.BoolTensor(data.test_mask) train_mask2 = torch.BoolTensor(data.train_mask) val_mask2 = torch.BoolTensor(data.val_mask) test_mask2 = torch.BoolTensor(data.test_mask) """ if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() gic = GIC(g, in_feats, args.n_hidden, args.n_layers, nn.PReLU(args.n_hidden), args.dropout, K, beta, alpha) if cuda: gic.cuda() gic_optimizer = torch.optim.Adam( gic.parameters(), lr=args.gic_lr, weight_decay=args.weight_decay) # train GIC cnt_wait = 0 best = 1e9 best_t = 0 dur = [] for epoch in range(args.n_gic_epochs): gic.train() if epoch >= 3: t0 = time.time() gic_optimizer.zero_grad() loss = gic(features) #print(loss) loss.backward() gic_optimizer.step() if loss < best: best = loss best_t = epoch cnt_wait = 0 torch.save(gic.state_dict(), 'best_gic.pkl') else: cnt_wait += 1 if cnt_wait == args.patience: #print('Early stopping!') break if epoch >= 3: dur.append(time.time() - t0) #print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | " #"ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(), #n_edges / np.mean(dur) / 1000)) # train classifier #print('Loading {}th epoch'.format(best_t)) gic.load_state_dict(torch.load('best_gic.pkl')) embeds = gic.encoder(features, corrupt=False) embeds = embeds / (embeds + 1e-8).norm(dim=1)[:, None] embeds = embeds.detach() # create classifier model classifier = Classifier(args.n_hidden, n_classes) if cuda: classifier.cuda() classifier_optimizer = torch.optim.Adam( classifier.parameters(), lr=args.classifier_lr, weight_decay=args.weight_decay) dur = [] best_a = 0 cnt_wait = 0 for epoch in range(args.n_classifier_epochs): classifier.train() if epoch >= 3: t0 = time.time() classifier_optimizer.zero_grad() preds = classifier(embeds) loss = F.nll_loss(preds[train_mask], labels[train_mask]) loss.backward() classifier_optimizer.step() if epoch >= 3: dur.append(time.time() - t0) acc = evaluate( classifier, embeds, labels, val_mask ) #+ evaluate(classifier, embeds, labels, train_mask) if acc > best_a and epoch > 100: best_a = acc best_t = epoch torch.save(classifier.state_dict(), 'best_class.pkl') #print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " #"ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(), #acc, n_edges / np.mean(dur) / 1000)) acc = evaluate(classifier, embeds, labels, test_mask) accs.append(acc) print('=================== ', ' alpha', alpha, ' beta ', beta, 'K', K) print(args.dataset, ' Acc (mean)', mean(accs), ' (std)', stdev(accs)) print('=================== time', int( (time.time() - t_st) / 60))
def main(args): torch.manual_seed(args.rnd_seed) np.random.seed(args.rnd_seed) random.seed(args.rnd_seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False multitask_data = set(['ppi']) multitask = args.dataset in multitask_data # load and preprocess dataset data = load_data(args) train_nid = np.nonzero(data.train_mask)[0].astype(np.int64) # Normalize features if args.normalize: train_feats = data.features[train_nid] scaler = sklearn.preprocessing.StandardScaler() scaler.fit(train_feats) features = scaler.transform(data.features) else: features = data.features features = torch.FloatTensor(features) if not multitask: labels = torch.LongTensor(data.labels) else: labels = torch.FloatTensor(data.labels) if hasattr(torch, 'BoolTensor'): train_mask = torch.BoolTensor(data.train_mask) val_mask = torch.BoolTensor(data.val_mask) test_mask = torch.BoolTensor(data.test_mask) else: train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() n_train_samples = train_mask.int().sum().item() n_val_samples = val_mask.int().sum().item() n_test_samples = test_mask.int().sum().item() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, n_train_samples, n_val_samples, n_test_samples)) # create GCN model g = data.graph if args.self_loop and not args.dataset.startswith('reddit'): g.remove_edges_from(nx.selfloop_edges(g)) g.add_edges_from(zip(g.nodes(), g.nodes())) print("adding self-loop edges") g = DGLGraph(g, readonly=True) # set device for dataset tensors if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() print(torch.cuda.get_device_name(0)) g.ndata['features'] = features g.ndata['labels'] = labels g.ndata['train_mask'] = train_mask print('labels shape:', labels.shape) cluster_iterator = ClusterIter( args.dataset, g, args.psize, args.batch_size, train_nid, use_pp=args.use_pp) print("features shape, ", features.shape) model = GraphSAGE(in_feats, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout, args.use_pp) if cuda: model.cuda() # logger and so on log_dir = save_log_dir(args) writer = SummaryWriter(log_dir) logger = Logger(os.path.join(log_dir, 'loggings')) logger.write(args) # Loss function if multitask: print('Using multi-label loss') loss_f = nn.BCEWithLogitsLoss() else: print('Using multi-class loss') loss_f = nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # set train_nids to cuda tensor if cuda: train_nid = torch.from_numpy(train_nid).cuda() print("current memory after model before training", torch.cuda.memory_allocated(device=train_nid.device) / 1024 / 1024) start_time = time.time() best_f1 = -1 for epoch in range(args.n_epochs): for j, cluster in enumerate(cluster_iterator): # sync with upper level training graph cluster.copy_from_parent() model.train() # forward pred = model(cluster) batch_labels = cluster.ndata['labels'] batch_train_mask = cluster.ndata['train_mask'] loss = loss_f(pred[batch_train_mask], batch_labels[batch_train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() # in PPI case, `log_every` is chosen to log one time per epoch. # Choose your log freq dynamically when you want more info within one epoch if j % args.log_every == 0: print(f"epoch:{epoch}/{args.n_epochs}, Iteration {j}/" f"{len(cluster_iterator)}:training loss", loss.item()) writer.add_scalar('train/loss', loss.item(), global_step=j + epoch * len(cluster_iterator)) print("current memory:", torch.cuda.memory_allocated(device=pred.device) / 1024 / 1024) # evaluate if epoch % args.val_every == 0: val_f1_mic, val_f1_mac = evaluate( model, g, labels, val_mask, multitask) print( "Val F1-mic{:.4f}, Val F1-mac{:.4f}". format(val_f1_mic, val_f1_mac)) if val_f1_mic > best_f1: best_f1 = val_f1_mic print('new best val f1:', best_f1) torch.save(model.state_dict(), os.path.join( log_dir, 'best_model.pkl')) writer.add_scalar('val/f1-mic', val_f1_mic, global_step=epoch) writer.add_scalar('val/f1-mac', val_f1_mac, global_step=epoch) end_time = time.time() print(f'training using time {start_time-end_time}') # test if args.use_val: model.load_state_dict(torch.load(os.path.join( log_dir, 'best_model.pkl'))) test_f1_mic, test_f1_mac = evaluate( model, g, labels, test_mask, multitask) print("Test F1-mic{:.4f}, Test F1-mac{:.4f}". format(test_f1_mic, test_f1_mac)) writer.add_scalar('test/f1-mic', test_f1_mic) writer.add_scalar('test/f1-mac', test_f1_mac)
OVERWRITE = False # if we compute and overwrite class where its already been computed SAVE_RATE = 20 # save every n networks if __name__ == '__main__': networks_df = pd.read_csv('data/precomputed_with_classes.csv') if OVERWRITE: networks_df['class'] = np.nan for lpm in link_prediction_methods: networks_df[lpm] = np.nan # for every network compute best link prediction method for index, row in networks_df.iterrows(): if isnan(row['class']) or OVERWRITE: G = get_graph(row['name'], row['download_url']) # self loops and weights not allowed remove_weights(G) G.remove_edges_from(list(nx.selfloop_edges(G))) aucs, classVariable = compute_class(G) networks_df.iloc[ index, networks_df.columns.get_loc('class')] = classVariable for lpmi, lpm in enumerate(link_prediction_methods): networks_df.iloc[index, networks_df.columns.get_loc(lpm)] = aucs[lpmi] # save every 20 networks or when over if index % SAVE_RATE == 0 or index == networks_df.shape[0] - 1: networks_df.to_csv('data/precomputed_with_classes_4.csv', index=False)
def fcn_metagraph_scc(self, A_sparse_sub): matr_size = A_sparse_sub.shape[0] g_sub = nx.from_scipy_sparse_matrix(A_sparse_sub, create_using=nx.DiGraph()) g_sub.remove_edges_from(nx.selfloop_edges(g_sub)) # Here we reverse it only for debugging purpose # The order shouldn't matter, but it's nice to have the same as matlab scc_list = list(reversed(list( nx.strongly_connected_components(g_sub)))) # print("%d connected components" % len(scc_list)) num_verts_per_scc = [] scc_memb_per_vert = np.zeros((matr_size, 1)) for i, scc in enumerate(scc_list): num_verts_per_scc.append(len(scc)) scc_memb_per_vert[list(scc), :] = i # row, col = np.where((A_sparse_sub - np.diag(A_sparse_sub.diagonal())) > 0) # Yet another trick to get the exact same results as matlab # The difference is returning the list from parsing via columns or via rows, hopefully nothing critical t_matr = (A_sparse_sub - sparse.diags(A_sparse_sub.diagonal())).transpose() col, row, _ = sparse.find(t_matr > 0) diff = scc_memb_per_vert[row] != scc_memb_per_vert[col] row_sel = row[np.where(diff[:, 0])] col_sel = col[np.where(diff[:, 0])] A_metagraph = sparse.csr_matrix( (np.array(A_sparse_sub[row_sel, col_sel]).flatten(), (scc_memb_per_vert[row_sel][:, 0], scc_memb_per_vert[col_sel][:, 0])), shape=(len(num_verts_per_scc), len(num_verts_per_scc))) metagraph = nx.from_scipy_sparse_matrix(A_metagraph, create_using=nx.DiGraph()) metagraph_ordering = np.array(list(nx.topological_sort(metagraph))) terminal_scc_ind, _ = np.where(A_metagraph.sum(axis=1) == 0) terminal_scc_pos = np.isin(metagraph_ordering, terminal_scc_ind) nonterm_scc_num = len(num_verts_per_scc) - len(terminal_scc_ind) scc_sup1 = [i for i, scc in enumerate(scc_list) if len(scc) > 1] term_cycles_ind = set(scc_sup1).intersection(set(terminal_scc_ind)) where_terminal_scc_pos, = np.where(terminal_scc_pos) if np.sum( np.logical_not( where_terminal_scc_pos > (nonterm_scc_num - 1))) > 0: nonterm_scc_inds = np.logical_not( np.isin(metagraph_ordering, terminal_scc_ind)) metagraph_ordering_terminal_bottom = np.concatenate([ metagraph_ordering[nonterm_scc_inds], metagraph_ordering[terminal_scc_pos] ]) else: metagraph_ordering_terminal_bottom = metagraph_ordering if len(term_cycles_ind) > 0: scc_cell_reordered = [ scc_list[i] for i in metagraph_ordering_terminal_bottom ] # index of cells containing term cycles after reordering term_cycles_ind, = np.where( np.isin(metagraph_ordering_terminal_bottom, np.array(list(term_cycles_ind)))) # we need a cell of the indices of certices withing whese scc_cell_reordered_lengths = np.array( [len(scc) for scc in scc_cell_reordered]) scc_cell_reordered_cumsum = np.cumsum(scc_cell_reordered_lengths) cycle_first_verts = scc_cell_reordered_cumsum[ term_cycles_ind] - scc_cell_reordered_lengths[term_cycles_ind] cycle_last_verts = scc_cell_reordered_cumsum[term_cycles_ind] - 1 term_cycles_bounds = [ np.concatenate([cycle_first_verts, cycle_last_verts]) ] else: term_cycles_ind = [] term_cycles_bounds = [] # reordered original vertices vert_topol_sort = np.concatenate( [list(scc_list[i]) for i in metagraph_ordering_terminal_bottom]) return vert_topol_sort, term_cycles_ind, A_metagraph, scc_list, term_cycles_bounds
def fcn_scc_subgraphs(self, A_sparse, x0): # print("Indentifying SCCs") B_sparse = sparse.csc_matrix(A_sparse) B_sparse.setdiag(0) nb_scc, labels = sparse.csgraph.connected_components(B_sparse, directed=True, connection='weak') scc = [[] for _ in range(nb_scc)] for i, label in enumerate(labels): scc[label].append(i) self.subnetws = scc cell_subgraphs = [] self.scc_submats = [] self.nonempty_subgraphs = [] # print("Identifying SCCs in subgraphs") for i, subnet in enumerate(self.subnetws): cell_subgraphs.append(subnet) # Slicing done it two steps : First the rows, which is the most efficient for csr sparse matrix # then columns. I should probably dig deeper t_sparse = A_sparse[subnet, :][:, subnet] t_sparse.setdiag(0) nb_scc, labels = sparse.csgraph.connected_components( t_sparse, directed=True, connection='strong') scc = [[] for _ in range(nb_scc)] for j, label in enumerate(labels): scc[label].append(j) self.scc_submats.append(scc) if sum(x0[subnet]) > 0: self.nonempty_subgraphs.append(i) self.sorted_vertices = [] self.cyclic_sorted_subgraphs = [] counter = 0 for nonempty_subgraph in self.nonempty_subgraphs: A_sparse_sub = A_sparse[ self. subnetws[nonempty_subgraph], :][:, self. subnetws[nonempty_subgraph]] if A_sparse_sub.shape[0] == len( self.scc_submats[nonempty_subgraph]): t_g = nx.from_scipy_sparse_matrix(A_sparse_sub, create_using=nx.DiGraph()) t_g.remove_edges_from(nx.selfloop_edges(t_g)) self.sorted_vertices.append(list(nx.topological_sort(t_g))) else: # print("Cycles in STG") # If entire graph is only one connected component, no need for re-ordering if len(self.scc_submats[nonempty_subgraph]) == 1: self.sorted_vertices.append( self.scc_submats[nonempty_subgraph]) else: vert_topol_sort, term_cycles_ind, _, scc_cell, term_cycle_bounds = self.fcn_metagraph_scc( A_sparse_sub) cycle_lengths = [len(scc) for scc in scc_cell] a = np.zeros((max(cycle_lengths))) for i in range(max(cycle_lengths)): for j in cycle_lengths: if j == i + 1: a[j - 1] += 1 # print('Cycles of lenth: %s (%s times)' % (set(cycle_lengths), a[np.where(a>0)]) ) self.cyclic_sorted_subgraphs.append( (vert_topol_sort, term_cycles_ind, term_cycle_bounds)) counter += 1
def test_configuration(): deg_seq = nx.random_powerlaw_tree_sequence(100, tries=5, seed=72) G = nx.Graph(nx.configuration_model(deg_seq)) G.remove_edges_from(nx.selfloop_edges(G)) result = nx.k_components(G) _check_connectivity(G, result)
paths = [path % file for file in files] # Parameters: drop_first_eigenvector = True # Defines whether the first eigenvector should be dropped min_random_seed = 0 # minimum of the range of k-means random seeds for grid search max_random_seed = 4 # maximum of the range of k-means random seeds for grid search normalised_vals = True # sets whether Laplacian matrix should normalised. possible values: True, False, [True, False] max_offset = 26 # contributes defining the set of eigenvecs to be used (=k+max_offset-1+negative_offset) negative_offset = 0 # contributes defining the set of eigenvecs to be used (=k+max_offset-1+negative_offset) for filepath in paths: print("Started {}".format(filepath)) task_params = Reader.read(filepath) # Removing loops does not change for ca-GrQc graph_no_loops = task_params["graph"].copy() graph_no_loops.remove_edges_from(nx.selfloop_edges(task_params["graph"])) curr_smallest_value = inf # sets the current smallest score to infinite if type(normalised_vals) is bool and type(normalised_vals) is not list: normalised_vals = [normalised_vals] for normalised in normalised_vals: eval_graph = task_params["graph"] # save the graph read from file task_params["graph"] = graph_no_loops # swap to compute the embedding embedding = Reader.load_embedding(output_directory, task_params, max_offset, negative_offset, normalised=normalised, manifold_method=False) # restore the original graph for evaluation
#!/usr/bin/python from networkx.algorithms import approximation as apxa import networkx as nx import random G1 = nx.read_gexf("graph.gexf") print("Graph has loaded") G1.remove_edges_from(nx.selfloop_edges(G1)) G = G1.to_undirected() k_core = nx.k_core(G, k=2) print("2-core") nx.write_gexf(k_core, "2-core.gexf") print("2-core has written!") k_core = nx.k_core(G, k=3) print("3-core") nx.write_gexf(k_core, "3-core.gexf") print("3-core has written!") k_core = nx.k_core(G, k=4) print("4-core") nx.write_gexf(k_core, "4-core.gexf") print("4-core has written!")
def main(args): # load and preprocess dataset data = load_data(args) features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) if hasattr(torch, 'BoolTensor'): train_mask = torch.BoolTensor(data.train_mask) val_mask = torch.BoolTensor(data.val_mask) test_mask = torch.BoolTensor(data.test_mask) else: train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) num_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() g = data.graph # add self loop g.remove_edges_from(nx.selfloop_edges(g)) g = DGLGraph(g) g.add_edges(g.nodes(), g.nodes()) n_edges = g.number_of_edges() # create model heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] model = GAT(g, args.num_layers, num_feats, args.num_hidden, n_classes, heads, F.elu, args.in_drop, args.attn_drop, args.negative_slope, args.residual) print(model) if args.early_stop: stopper = EarlyStopping(patience=100) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] for epoch in range(args.epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) train_acc = accuracy(logits[train_mask], labels[train_mask]) if args.fastmode: val_acc = accuracy(logits[val_mask], labels[val_mask]) else: val_acc = evaluate(model, features, labels, val_mask) if args.early_stop: if stopper.step(val_acc, model): break print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |" " ValAcc {:.4f} | ETputs(KTEPS) {:.2f}".format( epoch, np.mean(dur), loss.item(), train_acc, val_acc, n_edges / np.mean(dur) / 1000)) print() if args.early_stop: model.load_state_dict(torch.load('es_checkpoint.pt')) acc = evaluate(model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(acc))
def _test_neg_metagraph_merge(): """ Test that the negative metagraph tracks the number of negative edges between PCCs through label-changing merge operations """ from wbia.algo.graph import demo from wbia.algo.graph.state import POSTV, NEGTV, INCMP, UNREV, UNKWN # NOQA # Create a graph with 4 CCs, with 3-pos-redun, and no negative edges infr = demo.demodata_infr(num_pccs=4, pcc_size=5, pos_redun=3, ignore_pair=True, infer=True) cc_a, cc_b, cc_c, cc_d = infr.positive_components() a1, a2, a3, a4, a5 = cc_a b1, b2, b3, b4, b5 = cc_b c1, c2, c3, c4, c5 = cc_c d1, d2, d3, d4, d5 = cc_d nmg = infr.neg_metagraph # Add three negative edges between a and b # one between (a, c), (b, d), (a, d), and (c, d) A, B, C, D = infr.node_labels(a1, b1, c1, d1) infr.add_feedback((a1, b1), NEGTV) infr.add_feedback((a2, b2), NEGTV) infr.add_feedback((a3, b3), NEGTV) infr.add_feedback((a4, c4), NEGTV) infr.add_feedback((b4, d4), NEGTV) infr.add_feedback((c1, d1), NEGTV) infr.add_feedback((a4, d4), NEGTV) assert nmg.edges[(A, B)]['weight'] == 3 assert nmg.edges[(A, C)]['weight'] == 1 assert (B, C) not in nmg.edges assert nmg.edges[(A, D)]['weight'] == 1 assert nmg.edges[(B, D)]['weight'] == 1 assert nmg.number_of_edges() == 5 assert nmg.number_of_nodes() == 4 # Now merge A and B infr.add_feedback((a1, b1), POSTV) AB = infr.node_label(a1) # The old meta-nodes should not be combined into AB assert infr.node_label(b1) == AB assert A != B assert A == AB or A not in nmg.nodes assert B == AB or B not in nmg.nodes # Should have combined weights from (A, D) and (B, D) # And (A, C) should be brought over as-is assert nmg.edges[(AB, D)]['weight'] == 2 assert nmg.edges[(AB, C)]['weight'] == 1 # should not have a self-loop weight weight 2 # (it decreased because we changed a previously neg edge to pos) assert nmg.edges[(AB, AB)]['weight'] == 2 assert len(list(nx.selfloop_edges(nmg))) == 1 # nothing should change between C and D assert nmg.edges[(C, D)]['weight'] == 1 # Should decrease number of nodes and edges assert nmg.number_of_nodes() == 3 assert nmg.number_of_edges() == 4 infr.assert_neg_metagraph() # Additional merge infr.add_feedback((c2, d2), POSTV) CD = infr.node_label(c1) infr.assert_neg_metagraph() assert nmg.number_of_nodes() == 2 assert nmg.number_of_edges() == 3 assert nmg.edges[(CD, CD)]['weight'] == 1 assert nmg.edges[(AB, CD)]['weight'] == 3 assert nmg.edges[(AB, AB)]['weight'] == 2 # Yet another merge infr.add_feedback((a1, c1), POSTV) ABCD = infr.node_label(c1) assert nmg.number_of_nodes() == 1 assert nmg.number_of_edges() == 1 nmg.edges[(ABCD, ABCD)]['weight'] = 6 infr.assert_neg_metagraph()
def to_scipy_sparse_matrix(G, nodelist=None, dtype=None, weight='weight', format='csr'): """Returns the graph adjacency matrix as a SciPy sparse matrix. Parameters ---------- G : graph The NetworkX graph used to construct the NumPy matrix. nodelist : list, optional The rows and columns are ordered according to the nodes in `nodelist`. If `nodelist` is None, then the ordering is produced by G.nodes(). dtype : NumPy data-type, optional A valid NumPy dtype used to initialize the array. If None, then the NumPy default is used. weight : string or None optional (default='weight') The edge attribute that holds the numerical value used for the edge weight. If None then all edge weights are 1. format : str in {'bsr', 'csr', 'csc', 'coo', 'lil', 'dia', 'dok'} The type of the matrix to be returned (default 'csr'). For some algorithms different implementations of sparse matrices can perform better. See [1]_ for details. Returns ------- M : SciPy sparse matrix Graph adjacency matrix. Notes ----- For directed graphs, matrix entry i,j corresponds to an edge from i to j. The matrix entries are populated using the edge attribute held in parameter weight. When an edge does not have that attribute, the value of the entry is 1. For multiple edges the matrix values are the sums of the edge weights. When `nodelist` does not contain every node in `G`, the matrix is built from the subgraph of `G` that is induced by the nodes in `nodelist`. Uses coo_matrix format. To convert to other formats specify the format= keyword. The convention used for self-loop edges in graphs is to assign the diagonal matrix entry value to the weight attribute of the edge (or the number 1 if the edge has no weight attribute). If the alternate convention of doubling the edge weight is desired the resulting Scipy sparse matrix can be modified as follows: >>> import scipy as sp >>> G = nx.Graph([(1, 1)]) >>> A = nx.to_scipy_sparse_matrix(G) >>> print(A.todense()) [[1]] >>> A.setdiag(A.diagonal() * 2) >>> print(A.todense()) [[2]] Examples -------- >>> G = nx.MultiDiGraph() >>> G.add_edge(0, 1, weight=2) 0 >>> G.add_edge(1, 0) 0 >>> G.add_edge(2, 2, weight=3) 0 >>> G.add_edge(2, 2) 1 >>> S = nx.to_scipy_sparse_matrix(G, nodelist=[0, 1, 2]) >>> print(S.todense()) [[0 2 0] [1 0 0] [0 0 4]] References ---------- .. [1] Scipy Dev. References, "Sparse Matrices", https://docs.scipy.org/doc/scipy/reference/sparse.html """ from scipy import sparse if nodelist is None: nodelist = list(G) nlen = len(nodelist) if nlen == 0: raise nx.NetworkXError("Graph has no nodes or edges") if len(nodelist) != len(set(nodelist)): msg = "Ambiguous ordering: `nodelist` contained duplicates." raise nx.NetworkXError(msg) index = dict(zip(nodelist, range(nlen))) coefficients = zip(*((index[u], index[v], d.get(weight, 1)) for u, v, d in G.edges(nodelist, data=True) if u in index and v in index)) try: row, col, data = coefficients except ValueError: # there is no edge in the subgraph row, col, data = [], [], [] if G.is_directed(): M = sparse.coo_matrix((data, (row, col)), shape=(nlen, nlen), dtype=dtype) else: # symmetrize matrix d = data + data r = row + col c = col + row # selfloop entries get double counted when symmetrizing # so we subtract the data on the diagonal selfloops = list(nx.selfloop_edges(G, data=True)) if selfloops: diag_index, diag_data = zip(*((index[u], -d.get(weight, 1)) for u, v, d in selfloops if u in index and v in index)) d += diag_data r += diag_index c += diag_index M = sparse.coo_matrix((d, (r, c)), shape=(nlen, nlen), dtype=dtype) try: return M.asformat(format) # From Scipy 1.1.0, asformat will throw a ValueError instead of an # AttributeError if the format if not recognized. except (AttributeError, ValueError): raise nx.NetworkXError("Unknown sparse matrix format: %s" % format)
def network_simplex(G, demand='demand', capacity='capacity', weight='weight'): r"""Find a minimum cost flow satisfying all demands in digraph G. This is a primal network simplex algorithm that uses the leaving arc rule to prevent cycling. G is a digraph with edge costs and capacities and in which nodes have demand, i.e., they want to send or receive some amount of flow. A negative demand means that the node wants to send flow, a positive demand means that the node want to receive flow. A flow on the digraph G satisfies all demand if the net flow into each node is equal to the demand of that node. Parameters ---------- G : NetworkX graph DiGraph on which a minimum cost flow satisfying all demands is to be found. demand : string Nodes of the graph G are expected to have an attribute demand that indicates how much flow a node wants to send (negative demand) or receive (positive demand). Note that the sum of the demands should be 0 otherwise the problem in not feasible. If this attribute is not present, a node is considered to have 0 demand. Default value: 'demand'. capacity : string Edges of the graph G are expected to have an attribute capacity that indicates how much flow the edge can support. If this attribute is not present, the edge is considered to have infinite capacity. Default value: 'capacity'. weight : string Edges of the graph G are expected to have an attribute weight that indicates the cost incurred by sending one unit of flow on that edge. If not present, the weight is considered to be 0. Default value: 'weight'. Returns ------- flowCost : integer, float Cost of a minimum cost flow satisfying all demands. flowDict : dictionary Dictionary of dictionaries keyed by nodes such that flowDict[u][v] is the flow edge (u, v). Raises ------ NetworkXError This exception is raised if the input graph is not directed, not connected or is a multigraph. NetworkXUnfeasible This exception is raised in the following situations: * The sum of the demands is not zero. Then, there is no flow satisfying all demands. * There is no flow satisfying all demand. NetworkXUnbounded This exception is raised if the digraph G has a cycle of negative cost and infinite capacity. Then, the cost of a flow satisfying all demands is unbounded below. Notes ----- This algorithm is not guaranteed to work if edge weights or demands are floating point numbers (overflows and roundoff errors can cause problems). As a workaround you can use integer numbers by multiplying the relevant edge attributes by a convenient constant factor (eg 100). See also -------- cost_of_flow, max_flow_min_cost, min_cost_flow, min_cost_flow_cost Examples -------- A simple example of a min cost flow problem. >>> import networkx as nx >>> G = nx.DiGraph() >>> G.add_node('a', demand=-5) >>> G.add_node('d', demand=5) >>> G.add_edge('a', 'b', weight=3, capacity=4) >>> G.add_edge('a', 'c', weight=6, capacity=10) >>> G.add_edge('b', 'd', weight=1, capacity=9) >>> G.add_edge('c', 'd', weight=2, capacity=5) >>> flowCost, flowDict = nx.network_simplex(G) >>> flowCost 24 >>> flowDict # doctest: +SKIP {'a': {'c': 1, 'b': 4}, 'c': {'d': 1}, 'b': {'d': 4}, 'd': {}} The mincost flow algorithm can also be used to solve shortest path problems. To find the shortest path between two nodes u and v, give all edges an infinite capacity, give node u a demand of -1 and node v a demand a 1. Then run the network simplex. The value of a min cost flow will be the distance between u and v and edges carrying positive flow will indicate the path. >>> G=nx.DiGraph() >>> G.add_weighted_edges_from([('s', 'u' ,10), ('s' ,'x' ,5), ... ('u', 'v' ,1), ('u' ,'x' ,2), ... ('v', 'y' ,1), ('x' ,'u' ,3), ... ('x', 'v' ,5), ('x' ,'y' ,2), ... ('y', 's' ,7), ('y' ,'v' ,6)]) >>> G.add_node('s', demand = -1) >>> G.add_node('v', demand = 1) >>> flowCost, flowDict = nx.network_simplex(G) >>> flowCost == nx.shortest_path_length(G, 's', 'v', weight='weight') True >>> sorted([(u, v) for u in flowDict for v in flowDict[u] if flowDict[u][v] > 0]) [('s', 'x'), ('u', 'v'), ('x', 'u')] >>> nx.shortest_path(G, 's', 'v', weight = 'weight') ['s', 'x', 'u', 'v'] It is possible to change the name of the attributes used for the algorithm. >>> G = nx.DiGraph() >>> G.add_node('p', spam=-4) >>> G.add_node('q', spam=2) >>> G.add_node('a', spam=-2) >>> G.add_node('d', spam=-1) >>> G.add_node('t', spam=2) >>> G.add_node('w', spam=3) >>> G.add_edge('p', 'q', cost=7, vacancies=5) >>> G.add_edge('p', 'a', cost=1, vacancies=4) >>> G.add_edge('q', 'd', cost=2, vacancies=3) >>> G.add_edge('t', 'q', cost=1, vacancies=2) >>> G.add_edge('a', 't', cost=2, vacancies=4) >>> G.add_edge('d', 'w', cost=3, vacancies=4) >>> G.add_edge('t', 'w', cost=4, vacancies=1) >>> flowCost, flowDict = nx.network_simplex(G, demand='spam', ... capacity='vacancies', ... weight='cost') >>> flowCost 37 >>> flowDict # doctest: +SKIP {'a': {'t': 4}, 'd': {'w': 2}, 'q': {'d': 1}, 'p': {'q': 2, 'a': 2}, 't': {'q': 1, 'w': 1}, 'w': {}} References ---------- .. [1] Z. Kiraly, P. Kovacs. Efficient implementation of minimum-cost flow algorithms. Acta Universitatis Sapientiae, Informatica 4(1):67--118. 2012. .. [2] R. Barr, F. Glover, D. Klingman. Enhancement of spanning tree labeling procedures for network optimization. INFOR 17(1):16--34. 1979. """ ########################################################################### # Problem essentials extraction and sanity check ########################################################################### if len(G) == 0: raise nx.NetworkXError('graph has no nodes') # Number all nodes and edges and hereafter reference them using ONLY their # numbers N = list(G) # nodes I = {u: i for i, u in enumerate(N)} # node indices D = [G.nodes[u].get(demand, 0) for u in N] # node demands inf = float('inf') for p, b in zip(N, D): if abs(b) == inf: raise nx.NetworkXError('node %r has infinite demand' % (p,)) multigraph = G.is_multigraph() S = [] # edge sources T = [] # edge targets if multigraph: K = [] # edge keys E = {} # edge indices U = [] # edge capacities C = [] # edge weights if not multigraph: edges = G.edges(data=True) else: edges = G.edges(data=True, keys=True) edges = (e for e in edges if e[0] != e[1] and e[-1].get(capacity, inf) != 0) for i, e in enumerate(edges): S.append(I[e[0]]) T.append(I[e[1]]) if multigraph: K.append(e[2]) E[e[:-1]] = i U.append(e[-1].get(capacity, inf)) C.append(e[-1].get(weight, 0)) for e, c in zip(E, C): if abs(c) == inf: raise nx.NetworkXError('edge %r has infinite weight' % (e,)) if not multigraph: edges = nx.selfloop_edges(G, data=True) else: edges = nx.selfloop_edges(G, data=True, keys=True) for e in edges: if abs(e[-1].get(weight, 0)) == inf: raise nx.NetworkXError('edge %r has infinite weight' % (e[:-1],)) ########################################################################### # Quick infeasibility detection ########################################################################### if sum(D) != 0: raise nx.NetworkXUnfeasible('total node demand is not zero') for e, u in zip(E, U): if u < 0: raise nx.NetworkXUnfeasible('edge %r has negative capacity' % (e,)) if not multigraph: edges = nx.selfloop_edges(G, data=True) else: edges = nx.selfloop_edges(G, data=True, keys=True) for e in edges: if e[-1].get(capacity, inf) < 0: raise nx.NetworkXUnfeasible( 'edge %r has negative capacity' % (e[:-1],)) ########################################################################### # Initialization ########################################################################### # Add a dummy node -1 and connect all existing nodes to it with infinite- # capacity dummy edges. Node -1 will serve as the root of the # spanning tree of the network simplex method. The new edges will used to # trivially satisfy the node demands and create an initial strongly # feasible spanning tree. n = len(N) # number of nodes for p, d in enumerate(D): if d > 0: # Must be greater-than here. Zero-demand nodes must have # edges pointing towards the root to ensure strong # feasibility. S.append(-1) T.append(p) else: S.append(p) T.append(-1) faux_inf = 3 * max(chain([sum(u for u in U if u < inf), sum(abs(c) for c in C)], (abs(d) for d in D))) or 1 C.extend(repeat(faux_inf, n)) U.extend(repeat(faux_inf, n)) # Construct the initial spanning tree. e = len(E) # number of edges x = list(chain(repeat(0, e), (abs(d) for d in D))) # edge flows pi = [faux_inf if d <= 0 else -faux_inf for d in D] # node potentials parent = list(chain(repeat(-1, n), [None])) # parent nodes edge = list(range(e, e + n)) # edges to parents size = list(chain(repeat(1, n), [n + 1])) # subtree sizes next = list(chain(range(1, n), [-1, 0])) # next nodes in depth-first thread prev = list(range(-1, n)) # previous nodes in depth-first thread last = list(chain(range(n), [n - 1])) # last descendants in depth-first thread ########################################################################### # Pivot loop ########################################################################### def reduced_cost(i): """Return the reduced cost of an edge i. """ c = C[i] - pi[S[i]] + pi[T[i]] return c if x[i] == 0 else -c def find_entering_edges(): """Yield entering edges until none can be found. """ if e == 0: return # Entering edges are found by combining Dantzig's rule and Bland's # rule. The edges are cyclically grouped into blocks of size B. Within # each block, Dantzig's rule is applied to find an entering edge. The # blocks to search is determined following Bland's rule. B = int(ceil(sqrt(e))) # pivot block size M = (e + B - 1) // B # number of blocks needed to cover all edges m = 0 # number of consecutive blocks without eligible # entering edges f = 0 # first edge in block while m < M: # Determine the next block of edges. l = f + B if l <= e: edges = range(f, l) else: l -= e edges = chain(range(f, e), range(l)) f = l # Find the first edge with the lowest reduced cost. i = min(edges, key=reduced_cost) c = reduced_cost(i) if c >= 0: # No entering edge found in the current block. m += 1 else: # Entering edge found. if x[i] == 0: p = S[i] q = T[i] else: p = T[i] q = S[i] yield i, p, q m = 0 # All edges have nonnegative reduced costs. The current flow is # optimal. def find_apex(p, q): """Find the lowest common ancestor of nodes p and q in the spanning tree. """ size_p = size[p] size_q = size[q] while True: while size_p < size_q: p = parent[p] size_p = size[p] while size_p > size_q: q = parent[q] size_q = size[q] if size_p == size_q: if p != q: p = parent[p] size_p = size[p] q = parent[q] size_q = size[q] else: return p def trace_path(p, w): """Return the nodes and edges on the path from node p to its ancestor w. """ Wn = [p] We = [] while p != w: We.append(edge[p]) p = parent[p] Wn.append(p) return Wn, We def find_cycle(i, p, q): """Return the nodes and edges on the cycle containing edge i == (p, q) when the latter is added to the spanning tree. The cycle is oriented in the direction from p to q. """ w = find_apex(p, q) Wn, We = trace_path(p, w) Wn.reverse() We.reverse() We.append(i) WnR, WeR = trace_path(q, w) del WnR[-1] Wn += WnR We += WeR return Wn, We def residual_capacity(i, p): """Return the residual capacity of an edge i in the direction away from its endpoint p. """ return U[i] - x[i] if S[i] == p else x[i] def find_leaving_edge(Wn, We): """Return the leaving edge in a cycle represented by Wn and We. """ j, s = min(zip(reversed(We), reversed(Wn)), key=lambda i_p: residual_capacity(*i_p)) t = T[j] if S[j] == s else S[j] return j, s, t def augment_flow(Wn, We, f): """Augment f units of flow along a cycle represented by Wn and We. """ for i, p in zip(We, Wn): if S[i] == p: x[i] += f else: x[i] -= f def trace_subtree(p): """Yield the nodes in the subtree rooted at a node p. """ yield p l = last[p] while p != l: p = next[p] yield p def remove_edge(s, t): """Remove an edge (s, t) where parent[t] == s from the spanning tree. """ size_t = size[t] prev_t = prev[t] last_t = last[t] next_last_t = next[last_t] # Remove (s, t). parent[t] = None edge[t] = None # Remove the subtree rooted at t from the depth-first thread. next[prev_t] = next_last_t prev[next_last_t] = prev_t next[last_t] = t prev[t] = last_t # Update the subtree sizes and last descendants of the (old) acenstors # of t. while s is not None: size[s] -= size_t if last[s] == last_t: last[s] = prev_t s = parent[s] def make_root(q): """Make a node q the root of its containing subtree. """ ancestors = [] while q is not None: ancestors.append(q) q = parent[q] ancestors.reverse() for p, q in zip(ancestors, islice(ancestors, 1, None)): size_p = size[p] last_p = last[p] prev_q = prev[q] last_q = last[q] next_last_q = next[last_q] # Make p a child of q. parent[p] = q parent[q] = None edge[p] = edge[q] edge[q] = None size[p] = size_p - size[q] size[q] = size_p # Remove the subtree rooted at q from the depth-first thread. next[prev_q] = next_last_q prev[next_last_q] = prev_q next[last_q] = q prev[q] = last_q if last_p == last_q: last[p] = prev_q last_p = prev_q # Add the remaining parts of the subtree rooted at p as a subtree # of q in the depth-first thread. prev[p] = last_q next[last_q] = p next[last_p] = q prev[q] = last_p last[q] = last_p def add_edge(i, p, q): """Add an edge (p, q) to the spanning tree where q is the root of a subtree. """ last_p = last[p] next_last_p = next[last_p] size_q = size[q] last_q = last[q] # Make q a child of p. parent[q] = p edge[q] = i # Insert the subtree rooted at q into the depth-first thread. next[last_p] = q prev[q] = last_p prev[next_last_p] = last_q next[last_q] = next_last_p # Update the subtree sizes and last descendants of the (new) ancestors # of q. while p is not None: size[p] += size_q if last[p] == last_p: last[p] = last_q p = parent[p] def update_potentials(i, p, q): """Update the potentials of the nodes in the subtree rooted at a node q connected to its parent p by an edge i. """ if q == T[i]: d = pi[p] - C[i] - pi[q] else: d = pi[p] + C[i] - pi[q] for q in trace_subtree(q): pi[q] += d # Pivot loop for i, p, q in find_entering_edges(): Wn, We = find_cycle(i, p, q) j, s, t = find_leaving_edge(Wn, We) augment_flow(Wn, We, residual_capacity(j, s)) if i != j: # Do nothing more if the entering edge is the same as the # the leaving edge. if parent[t] != s: # Ensure that s is the parent of t. s, t = t, s if We.index(i) > We.index(j): # Ensure that q is in the subtree rooted at t. p, q = q, p remove_edge(s, t) make_root(q) add_edge(i, p, q) update_potentials(i, p, q) ########################################################################### # Infeasibility and unboundedness detection ########################################################################### if any(x[i] != 0 for i in range(-n, 0)): raise nx.NetworkXUnfeasible('no flow satisfies all node demands') if (any(x[i] * 2 >= faux_inf for i in range(e)) or any(e[-1].get(capacity, inf) == inf and e[-1].get(weight, 0) < 0 for e in nx.selfloop_edges(G, data=True))): raise nx.NetworkXUnbounded( 'negative cycle with infinite capacity found') ########################################################################### # Flow cost calculation and flow dict construction ########################################################################### del x[e:] flow_cost = sum(c * x for c, x in zip(C, x)) flow_dict = {n: {} for n in N} def add_entry(e): """Add a flow dict entry. """ d = flow_dict[e[0]] for k in e[1:-2]: try: d = d[k] except KeyError: t = {} d[k] = t d = t d[e[-2]] = e[-1] S = (N[s] for s in S) # Use original nodes. T = (N[t] for t in T) # Use original nodes. if not multigraph: for e in zip(S, T, x): add_entry(e) edges = G.edges(data=True) else: for e in zip(S, T, K, x): add_entry(e) edges = G.edges(data=True, keys=True) for e in edges: if e[0] != e[1]: if e[-1].get(capacity, inf) == 0: add_entry(e[:-1] + (0,)) else: c = e[-1].get(weight, 0) if c >= 0: add_entry(e[:-1] + (0,)) else: u = e[-1][capacity] flow_cost += c * u add_entry(e[:-1] + (u,)) return flow_cost, flow_dict
def network_simplex(G, demand="demand", capacity="capacity", weight="weight"): r"""Find a minimum cost flow satisfying all demands in digraph G. This is a primal network simplex algorithm that uses the leaving arc rule to prevent cycling. G is a digraph with edge costs and capacities and in which nodes have demand, i.e., they want to send or receive some amount of flow. A negative demand means that the node wants to send flow, a positive demand means that the node want to receive flow. A flow on the digraph G satisfies all demand if the net flow into each node is equal to the demand of that node. Parameters ---------- G : NetworkX graph DiGraph on which a minimum cost flow satisfying all demands is to be found. demand : string Nodes of the graph G are expected to have an attribute demand that indicates how much flow a node wants to send (negative demand) or receive (positive demand). Note that the sum of the demands should be 0 otherwise the problem in not feasible. If this attribute is not present, a node is considered to have 0 demand. Default value: 'demand'. capacity : string Edges of the graph G are expected to have an attribute capacity that indicates how much flow the edge can support. If this attribute is not present, the edge is considered to have infinite capacity. Default value: 'capacity'. weight : string Edges of the graph G are expected to have an attribute weight that indicates the cost incurred by sending one unit of flow on that edge. If not present, the weight is considered to be 0. Default value: 'weight'. Returns ------- flowCost : integer, float Cost of a minimum cost flow satisfying all demands. flowDict : dictionary Dictionary of dictionaries keyed by nodes such that flowDict[u][v] is the flow edge (u, v). Raises ------ NetworkXError This exception is raised if the input graph is not directed, not connected or is a multigraph. NetworkXUnfeasible This exception is raised in the following situations: * The sum of the demands is not zero. Then, there is no flow satisfying all demands. * There is no flow satisfying all demand. NetworkXUnbounded This exception is raised if the digraph G has a cycle of negative cost and infinite capacity. Then, the cost of a flow satisfying all demands is unbounded below. Notes ----- This algorithm is not guaranteed to work if edge weights or demands are floating point numbers (overflows and roundoff errors can cause problems). As a workaround you can use integer numbers by multiplying the relevant edge attributes by a convenient constant factor (eg 100). See also -------- cost_of_flow, max_flow_min_cost, min_cost_flow, min_cost_flow_cost Examples -------- A simple example of a min cost flow problem. >>> G = nx.DiGraph() >>> G.add_node("a", demand=-5) >>> G.add_node("d", demand=5) >>> G.add_edge("a", "b", weight=3, capacity=4) >>> G.add_edge("a", "c", weight=6, capacity=10) >>> G.add_edge("b", "d", weight=1, capacity=9) >>> G.add_edge("c", "d", weight=2, capacity=5) >>> flowCost, flowDict = nx.network_simplex(G) >>> flowCost 24 >>> flowDict # doctest: +SKIP {'a': {'c': 1, 'b': 4}, 'c': {'d': 1}, 'b': {'d': 4}, 'd': {}} The mincost flow algorithm can also be used to solve shortest path problems. To find the shortest path between two nodes u and v, give all edges an infinite capacity, give node u a demand of -1 and node v a demand a 1. Then run the network simplex. The value of a min cost flow will be the distance between u and v and edges carrying positive flow will indicate the path. >>> G = nx.DiGraph() >>> G.add_weighted_edges_from( ... [ ... ("s", "u", 10), ... ("s", "x", 5), ... ("u", "v", 1), ... ("u", "x", 2), ... ("v", "y", 1), ... ("x", "u", 3), ... ("x", "v", 5), ... ("x", "y", 2), ... ("y", "s", 7), ... ("y", "v", 6), ... ] ... ) >>> G.add_node("s", demand=-1) >>> G.add_node("v", demand=1) >>> flowCost, flowDict = nx.network_simplex(G) >>> flowCost == nx.shortest_path_length(G, "s", "v", weight="weight") True >>> sorted([(u, v) for u in flowDict for v in flowDict[u] if flowDict[u][v] > 0]) [('s', 'x'), ('u', 'v'), ('x', 'u')] >>> nx.shortest_path(G, "s", "v", weight="weight") ['s', 'x', 'u', 'v'] It is possible to change the name of the attributes used for the algorithm. >>> G = nx.DiGraph() >>> G.add_node("p", spam=-4) >>> G.add_node("q", spam=2) >>> G.add_node("a", spam=-2) >>> G.add_node("d", spam=-1) >>> G.add_node("t", spam=2) >>> G.add_node("w", spam=3) >>> G.add_edge("p", "q", cost=7, vacancies=5) >>> G.add_edge("p", "a", cost=1, vacancies=4) >>> G.add_edge("q", "d", cost=2, vacancies=3) >>> G.add_edge("t", "q", cost=1, vacancies=2) >>> G.add_edge("a", "t", cost=2, vacancies=4) >>> G.add_edge("d", "w", cost=3, vacancies=4) >>> G.add_edge("t", "w", cost=4, vacancies=1) >>> flowCost, flowDict = nx.network_simplex( ... G, demand="spam", capacity="vacancies", weight="cost" ... ) >>> flowCost 37 >>> flowDict # doctest: +SKIP {'a': {'t': 4}, 'd': {'w': 2}, 'q': {'d': 1}, 'p': {'q': 2, 'a': 2}, 't': {'q': 1, 'w': 1}, 'w': {}} References ---------- .. [1] Z. Kiraly, P. Kovacs. Efficient implementation of minimum-cost flow algorithms. Acta Universitatis Sapientiae, Informatica 4(1):67--118. 2012. .. [2] R. Barr, F. Glover, D. Klingman. Enhancement of spanning tree labeling procedures for network optimization. INFOR 17(1):16--34. 1979. """ ########################################################################### # Problem essentials extraction and sanity check ########################################################################### if len(G) == 0: raise nx.NetworkXError("graph has no nodes") # Number all nodes and edges and hereafter reference them using ONLY their # numbers N = list(G) # nodes I = {u: i for i, u in enumerate(N)} # node indices D = [G.nodes[u].get(demand, 0) for u in N] # node demands inf = float("inf") for p, b in zip(N, D): if abs(b) == inf: raise nx.NetworkXError(f"node {p!r} has infinite demand") multigraph = G.is_multigraph() S = [] # edge sources T = [] # edge targets if multigraph: K = [] # edge keys E = {} # edge indices U = [] # edge capacities C = [] # edge weights if not multigraph: edges = G.edges(data=True) else: edges = G.edges(data=True, keys=True) edges = (e for e in edges if e[0] != e[1] and e[-1].get(capacity, inf) != 0) for i, e in enumerate(edges): S.append(I[e[0]]) T.append(I[e[1]]) if multigraph: K.append(e[2]) E[e[:-1]] = i U.append(e[-1].get(capacity, inf)) C.append(e[-1].get(weight, 0)) for e, c in zip(E, C): if abs(c) == inf: raise nx.NetworkXError(f"edge {e!r} has infinite weight") if not multigraph: edges = nx.selfloop_edges(G, data=True) else: edges = nx.selfloop_edges(G, data=True, keys=True) for e in edges: if abs(e[-1].get(weight, 0)) == inf: raise nx.NetworkXError(f"edge {e[:-1]!r} has infinite weight") ########################################################################### # Quick infeasibility detection ########################################################################### if sum(D) != 0: raise nx.NetworkXUnfeasible("total node demand is not zero") for e, u in zip(E, U): if u < 0: raise nx.NetworkXUnfeasible(f"edge {e!r} has negative capacity") if not multigraph: edges = nx.selfloop_edges(G, data=True) else: edges = nx.selfloop_edges(G, data=True, keys=True) for e in edges: if e[-1].get(capacity, inf) < 0: raise nx.NetworkXUnfeasible( f"edge {e[:-1]!r} has negative capacity") ########################################################################### # Initialization ########################################################################### # Add a dummy node -1 and connect all existing nodes to it with infinite- # capacity dummy edges. Node -1 will serve as the root of the # spanning tree of the network simplex method. The new edges will used to # trivially satisfy the node demands and create an initial strongly # feasible spanning tree. n = len(N) # number of nodes for p, d in enumerate(D): # Must be greater-than here. Zero-demand nodes must have # edges pointing towards the root to ensure strong # feasibility. if d > 0: S.append(-1) T.append(p) else: S.append(p) T.append(-1) faux_inf = (3 * max( chain( [sum(u for u in U if u < inf), sum(abs(c) for c in C)], (abs(d) for d in D), )) or 1) C.extend(repeat(faux_inf, n)) U.extend(repeat(faux_inf, n)) # Construct the initial spanning tree. e = len(E) # number of edges x = list(chain(repeat(0, e), (abs(d) for d in D))) # edge flows pi = [faux_inf if d <= 0 else -faux_inf for d in D] # node potentials parent = list(chain(repeat(-1, n), [None])) # parent nodes edge = list(range(e, e + n)) # edges to parents size = list(chain(repeat(1, n), [n + 1])) # subtree sizes next = list(chain(range(1, n), [-1, 0])) # next nodes in depth-first thread prev = list(range(-1, n)) # previous nodes in depth-first thread last = list(chain(range(n), [n - 1])) # last descendants in depth-first thread ########################################################################### # Pivot loop ########################################################################### def reduced_cost(i): """Returns the reduced cost of an edge i.""" c = C[i] - pi[S[i]] + pi[T[i]] return c if x[i] == 0 else -c def find_entering_edges(): """Yield entering edges until none can be found.""" if e == 0: return # Entering edges are found by combining Dantzig's rule and Bland's # rule. The edges are cyclically grouped into blocks of size B. Within # each block, Dantzig's rule is applied to find an entering edge. The # blocks to search is determined following Bland's rule. B = int(ceil(sqrt(e))) # pivot block size M = (e + B - 1) // B # number of blocks needed to cover all edges m = 0 # number of consecutive blocks without eligible # entering edges f = 0 # first edge in block while m < M: # Determine the next block of edges. l = f + B if l <= e: edges = range(f, l) else: l -= e edges = chain(range(f, e), range(l)) f = l # Find the first edge with the lowest reduced cost. i = min(edges, key=reduced_cost) c = reduced_cost(i) if c >= 0: # No entering edge found in the current block. m += 1 else: # Entering edge found. if x[i] == 0: p = S[i] q = T[i] else: p = T[i] q = S[i] yield i, p, q m = 0 # All edges have nonnegative reduced costs. The current flow is # optimal. def find_apex(p, q): """Find the lowest common ancestor of nodes p and q in the spanning tree. """ size_p = size[p] size_q = size[q] while True: while size_p < size_q: p = parent[p] size_p = size[p] while size_p > size_q: q = parent[q] size_q = size[q] if size_p == size_q: if p != q: p = parent[p] size_p = size[p] q = parent[q] size_q = size[q] else: return p def trace_path(p, w): """Returns the nodes and edges on the path from node p to its ancestor w. """ Wn = [p] We = [] while p != w: We.append(edge[p]) p = parent[p] Wn.append(p) return Wn, We def find_cycle(i, p, q): """Returns the nodes and edges on the cycle containing edge i == (p, q) when the latter is added to the spanning tree. The cycle is oriented in the direction from p to q. """ w = find_apex(p, q) Wn, We = trace_path(p, w) Wn.reverse() We.reverse() if We != [i]: We.append(i) WnR, WeR = trace_path(q, w) del WnR[-1] Wn += WnR We += WeR return Wn, We def residual_capacity(i, p): """Returns the residual capacity of an edge i in the direction away from its endpoint p. """ return U[i] - x[i] if S[i] == p else x[i] def find_leaving_edge(Wn, We): """Returns the leaving edge in a cycle represented by Wn and We.""" j, s = min(zip(reversed(We), reversed(Wn)), key=lambda i_p: residual_capacity(*i_p)) t = T[j] if S[j] == s else S[j] return j, s, t def augment_flow(Wn, We, f): """Augment f units of flow along a cycle represented by Wn and We.""" for i, p in zip(We, Wn): if S[i] == p: x[i] += f else: x[i] -= f def trace_subtree(p): """Yield the nodes in the subtree rooted at a node p.""" yield p l = last[p] while p != l: p = next[p] yield p def remove_edge(s, t): """Remove an edge (s, t) where parent[t] == s from the spanning tree.""" size_t = size[t] prev_t = prev[t] last_t = last[t] next_last_t = next[last_t] # Remove (s, t). parent[t] = None edge[t] = None # Remove the subtree rooted at t from the depth-first thread. next[prev_t] = next_last_t prev[next_last_t] = prev_t next[last_t] = t prev[t] = last_t # Update the subtree sizes and last descendants of the (old) acenstors # of t. while s is not None: size[s] -= size_t if last[s] == last_t: last[s] = prev_t s = parent[s] def make_root(q): """Make a node q the root of its containing subtree.""" ancestors = [] while q is not None: ancestors.append(q) q = parent[q] ancestors.reverse() for p, q in zip(ancestors, islice(ancestors, 1, None)): size_p = size[p] last_p = last[p] prev_q = prev[q] last_q = last[q] next_last_q = next[last_q] # Make p a child of q. parent[p] = q parent[q] = None edge[p] = edge[q] edge[q] = None size[p] = size_p - size[q] size[q] = size_p # Remove the subtree rooted at q from the depth-first thread. next[prev_q] = next_last_q prev[next_last_q] = prev_q next[last_q] = q prev[q] = last_q if last_p == last_q: last[p] = prev_q last_p = prev_q # Add the remaining parts of the subtree rooted at p as a subtree # of q in the depth-first thread. prev[p] = last_q next[last_q] = p next[last_p] = q prev[q] = last_p last[q] = last_p def add_edge(i, p, q): """Add an edge (p, q) to the spanning tree where q is the root of a subtree. """ last_p = last[p] next_last_p = next[last_p] size_q = size[q] last_q = last[q] # Make q a child of p. parent[q] = p edge[q] = i # Insert the subtree rooted at q into the depth-first thread. next[last_p] = q prev[q] = last_p prev[next_last_p] = last_q next[last_q] = next_last_p # Update the subtree sizes and last descendants of the (new) ancestors # of q. while p is not None: size[p] += size_q if last[p] == last_p: last[p] = last_q p = parent[p] def update_potentials(i, p, q): """Update the potentials of the nodes in the subtree rooted at a node q connected to its parent p by an edge i. """ if q == T[i]: d = pi[p] - C[i] - pi[q] else: d = pi[p] + C[i] - pi[q] for q in trace_subtree(q): pi[q] += d # Pivot loop for i, p, q in find_entering_edges(): Wn, We = find_cycle(i, p, q) j, s, t = find_leaving_edge(Wn, We) augment_flow(Wn, We, residual_capacity(j, s)) # Do nothing more if the entering edge is the same as the leaving edge. if i != j: if parent[t] != s: # Ensure that s is the parent of t. s, t = t, s if We.index(i) > We.index(j): # Ensure that q is in the subtree rooted at t. p, q = q, p remove_edge(s, t) make_root(q) add_edge(i, p, q) update_potentials(i, p, q) ########################################################################### # Infeasibility and unboundedness detection ########################################################################### if any(x[i] != 0 for i in range(-n, 0)): raise nx.NetworkXUnfeasible("no flow satisfies all node demands") if any(x[i] * 2 >= faux_inf for i in range(e)) or any( e[-1].get(capacity, inf) == inf and e[-1].get(weight, 0) < 0 for e in nx.selfloop_edges(G, data=True)): raise nx.NetworkXUnbounded( "negative cycle with infinite capacity found") ########################################################################### # Flow cost calculation and flow dict construction ########################################################################### del x[e:] flow_cost = sum(c * x for c, x in zip(C, x)) flow_dict = {n: {} for n in N} def add_entry(e): """Add a flow dict entry.""" d = flow_dict[e[0]] for k in e[1:-2]: try: d = d[k] except KeyError: t = {} d[k] = t d = t d[e[-2]] = e[-1] S = (N[s] for s in S) # Use original nodes. T = (N[t] for t in T) # Use original nodes. if not multigraph: for e in zip(S, T, x): add_entry(e) edges = G.edges(data=True) else: for e in zip(S, T, K, x): add_entry(e) edges = G.edges(data=True, keys=True) for e in edges: if e[0] != e[1]: if e[-1].get(capacity, inf) == 0: add_entry(e[:-1] + (0, )) else: c = e[-1].get(weight, 0) if c >= 0: add_entry(e[:-1] + (0, )) else: u = e[-1][capacity] flow_cost += c * u add_entry(e[:-1] + (u, )) return flow_cost, flow_dict
def to_scipy_sparse_matrix(G, nodelist=None, dtype=None, weight='weight', format='csr'): """Return the graph adjacency matrix as a SciPy sparse matrix. Parameters ---------- G : graph The NetworkX graph used to construct the NumPy matrix. nodelist : list, optional The rows and columns are ordered according to the nodes in `nodelist`. If `nodelist` is None, then the ordering is produced by G.nodes(). dtype : NumPy data-type, optional A valid NumPy dtype used to initialize the array. If None, then the NumPy default is used. weight : string or None optional (default='weight') The edge attribute that holds the numerical value used for the edge weight. If None then all edge weights are 1. format : str in {'bsr', 'csr', 'csc', 'coo', 'lil', 'dia', 'dok'} The type of the matrix to be returned (default 'csr'). For some algorithms different implementations of sparse matrices can perform better. See [1]_ for details. Returns ------- M : SciPy sparse matrix Graph adjacency matrix. Notes ----- The matrix entries are populated using the edge attribute held in parameter weight. When an edge does not have that attribute, the value of the entry is 1. For multiple edges the matrix values are the sums of the edge weights. When `nodelist` does not contain every node in `G`, the matrix is built from the subgraph of `G` that is induced by the nodes in `nodelist`. Uses coo_matrix format. To convert to other formats specify the format= keyword. The convention used for self-loop edges in graphs is to assign the diagonal matrix entry value to the weight attribute of the edge (or the number 1 if the edge has no weight attribute). If the alternate convention of doubling the edge weight is desired the resulting Scipy sparse matrix can be modified as follows: >>> import scipy as sp >>> G = nx.Graph([(1, 1)]) >>> A = nx.to_scipy_sparse_matrix(G) >>> print(A.todense()) [[1]] >>> A.setdiag(A.diagonal() * 2) >>> print(A.todense()) [[2]] Examples -------- >>> G = nx.MultiDiGraph() >>> G.add_edge(0, 1, weight=2) 0 >>> G.add_edge(1, 0) 0 >>> G.add_edge(2, 2, weight=3) 0 >>> G.add_edge(2, 2) 1 >>> S = nx.to_scipy_sparse_matrix(G, nodelist=[0, 1, 2]) >>> print(S.todense()) [[0 2 0] [1 0 0] [0 0 4]] References ---------- .. [1] Scipy Dev. References, "Sparse Matrices", https://docs.scipy.org/doc/scipy/reference/sparse.html """ from scipy import sparse if nodelist is None: nodelist = list(G) nlen = len(nodelist) if nlen == 0: raise nx.NetworkXError("Graph has no nodes or edges") if len(nodelist) != len(set(nodelist)): msg = "Ambiguous ordering: `nodelist` contained duplicates." raise nx.NetworkXError(msg) index = dict(zip(nodelist, range(nlen))) coefficients = zip(*((index[u], index[v], d.get(weight, 1)) for u, v, d in G.edges(nodelist, data=True) if u in index and v in index)) try: row, col, data = coefficients except ValueError: # there is no edge in the subgraph row, col, data = [], [], [] if G.is_directed(): M = sparse.coo_matrix((data, (row, col)), shape=(nlen, nlen), dtype=dtype) else: # symmetrize matrix d = data + data r = row + col c = col + row # selfloop entries get double counted when symmetrizing # so we subtract the data on the diagonal selfloops = list(nx.selfloop_edges(G, data=True)) if selfloops: diag_index, diag_data = zip(*((index[u], -d.get(weight, 1)) for u, v, d in selfloops if u in index and v in index)) d += diag_data r += diag_index c += diag_index M = sparse.coo_matrix((d, (r, c)), shape=(nlen, nlen), dtype=dtype) try: return M.asformat(format) # From Scipy 1.1.0, asformat will throw a ValueError instead of an # AttributeError if the format if not recognized. except (AttributeError, ValueError): raise nx.NetworkXError("Unknown sparse matrix format: %s" % format)
def edges(self): if self._edges is None: self.network.remove_edges_from(nx.selfloop_edges(self.network)) self._edges = set(self.network.edges()) return self._edges
def main(args): # load and preprocess dataset data = load_data(args) features = torch.FloatTensor(data.features) labels = torch.LongTensor(data.labels) if hasattr(torch, 'BoolTensor'): train_mask = torch.BoolTensor(data.train_mask) val_mask = torch.BoolTensor(data.val_mask) test_mask = torch.BoolTensor(data.test_mask) else: train_mask = torch.ByteTensor(data.train_mask) val_mask = torch.ByteTensor(data.val_mask) test_mask = torch.ByteTensor(data.test_mask) in_feats = features.shape[1] n_classes = data.num_labels n_edges = data.graph.number_of_edges() print("""----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % (n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item())) if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() # graph preprocess and calculate normalization factor g = data.graph # add self loop if args.self_loop: g.remove_edges_from(nx.selfloop_edges(g)) g.add_edges_from(zip(g.nodes(), g.nodes())) g = DGLGraph(g) n_edges = g.number_of_edges() # normalization degs = g.in_degrees().float() norm = torch.pow(degs, -0.5) norm[torch.isinf(norm)] = 0 if cuda: norm = norm.cuda() g.ndata['norm'] = norm.unsqueeze(1) # create GCN model model = GCN(g, in_feats, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # initialize graph dur = [] for epoch in range(args.n_epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) acc = evaluate(model, features, labels, val_mask) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(), acc, n_edges / np.mean(dur) / 1000)) print() acc = evaluate(model, features, labels, test_mask) print("Test accuracy {:.2%}".format(acc))
def dist(self, G1, G2, dist='lccm'): """Jenson-Shannon divergence between the feature distributions fixed by dist. Assumes simple graphs. Parameters ---------- G1, G2 (nx.Graph) two networkx graphs to be compared. dist (str) type of distribution divergence to output. Choices are 'cm', 'ccm', 'lccm_node' and 'lccm'. The type stand for the associated random graph ensemble. 'cm' compares only the degree distribution. 'ccm' compares the networks according to the edges degree-degree distribution. 'lccm_node' compares the distribution of nodes according to their onion centrality (degree, coreness, and layer within core). Finally, 'lccm' compares the networks according to the edges joint degree, coreness and layer distribution for both endpoints. Returns ------- dist (float) the distance between `G1` and `G2`. References ---------- .. [1] https://www.nature.com/articles/srep31708 .. [2] https://journals.aps.org/prx/abstract/10.1103/PhysRevX.9.011023 """ # take the simple graph version G1_simple = ensure_undirected(G1) G2_simple = ensure_undirected(G2) G1_simple.remove_edges_from(nx.selfloop_edges(G1_simple)) G2_simple.remove_edges_from(nx.selfloop_edges(G2_simple)) # get sparse matrices values for each graph matrices_G1 = _create_sparse_matrices_for_graph(G1_simple) matrices_G2 = _create_sparse_matrices_for_graph(G2_simple) # get the different distances # cm_dist = _divergence_of_sparse_matrices(*matrices_G1['cm'], *matrices_G2['cm']) # ccm_dist = _divergence_of_sparse_matrices( # *matrices_G1['ccm'], *matrices_G2['ccm'] # ) # lccm_node_dist = _divergence_of_sparse_matrices( # *matrices_G1['lccm_node'], *matrices_G2['lccm_node'] # ) lccm_dist = _divergence_of_sparse_matrices(*matrices_G1['lccm'], *matrices_G2['lccm']) # store the distances # self.results['cm_dist'] = cm_dist # self.results['ccm_dist'] = ccm_dist # self.results['lccm_node_dist'] = lccm_node_dist self.results['lccm_dist'] = lccm_dist dist_id = '{}_dist'.format(dist) self.results['dist'] = self.results[dist_id] return self.results[dist_id]
def capacity_scaling(G, demand="demand", capacity="capacity", weight="weight", heap=BinaryHeap): r"""Find a minimum cost flow satisfying all demands in digraph G. This is a capacity scaling successive shortest augmenting path algorithm. G is a digraph with edge costs and capacities and in which nodes have demand, i.e., they want to send or receive some amount of flow. A negative demand means that the node wants to send flow, a positive demand means that the node want to receive flow. A flow on the digraph G satisfies all demand if the net flow into each node is equal to the demand of that node. Parameters ---------- G : NetworkX graph DiGraph or MultiDiGraph on which a minimum cost flow satisfying all demands is to be found. demand : string Nodes of the graph G are expected to have an attribute demand that indicates how much flow a node wants to send (negative demand) or receive (positive demand). Note that the sum of the demands should be 0 otherwise the problem in not feasible. If this attribute is not present, a node is considered to have 0 demand. Default value: 'demand'. capacity : string Edges of the graph G are expected to have an attribute capacity that indicates how much flow the edge can support. If this attribute is not present, the edge is considered to have infinite capacity. Default value: 'capacity'. weight : string Edges of the graph G are expected to have an attribute weight that indicates the cost incurred by sending one unit of flow on that edge. If not present, the weight is considered to be 0. Default value: 'weight'. heap : class Type of heap to be used in the algorithm. It should be a subclass of :class:`MinHeap` or implement a compatible interface. If a stock heap implementation is to be used, :class:`BinaryHeap` is recommended over :class:`PairingHeap` for Python implementations without optimized attribute accesses (e.g., CPython) despite a slower asymptotic running time. For Python implementations with optimized attribute accesses (e.g., PyPy), :class:`PairingHeap` provides better performance. Default value: :class:`BinaryHeap`. Returns ------- flowCost : integer Cost of a minimum cost flow satisfying all demands. flowDict : dictionary If G is a digraph, a dict-of-dicts keyed by nodes such that flowDict[u][v] is the flow on edge (u, v). If G is a MultiDiGraph, a dict-of-dicts-of-dicts keyed by nodes so that flowDict[u][v][key] is the flow on edge (u, v, key). Raises ------ NetworkXError This exception is raised if the input graph is not directed, not connected. NetworkXUnfeasible This exception is raised in the following situations: * The sum of the demands is not zero. Then, there is no flow satisfying all demands. * There is no flow satisfying all demand. NetworkXUnbounded This exception is raised if the digraph G has a cycle of negative cost and infinite capacity. Then, the cost of a flow satisfying all demands is unbounded below. Notes ----- This algorithm does not work if edge weights are floating-point numbers. See also -------- :meth:`network_simplex` Examples -------- A simple example of a min cost flow problem. >>> import networkx as nx >>> G = nx.DiGraph() >>> G.add_node('a', demand = -5) >>> G.add_node('d', demand = 5) >>> G.add_edge('a', 'b', weight = 3, capacity = 4) >>> G.add_edge('a', 'c', weight = 6, capacity = 10) >>> G.add_edge('b', 'd', weight = 1, capacity = 9) >>> G.add_edge('c', 'd', weight = 2, capacity = 5) >>> flowCost, flowDict = nx.capacity_scaling(G) >>> flowCost 24 >>> flowDict # doctest: +SKIP {'a': {'c': 1, 'b': 4}, 'c': {'d': 1}, 'b': {'d': 4}, 'd': {}} It is possible to change the name of the attributes used for the algorithm. >>> G = nx.DiGraph() >>> G.add_node('p', spam = -4) >>> G.add_node('q', spam = 2) >>> G.add_node('a', spam = -2) >>> G.add_node('d', spam = -1) >>> G.add_node('t', spam = 2) >>> G.add_node('w', spam = 3) >>> G.add_edge('p', 'q', cost = 7, vacancies = 5) >>> G.add_edge('p', 'a', cost = 1, vacancies = 4) >>> G.add_edge('q', 'd', cost = 2, vacancies = 3) >>> G.add_edge('t', 'q', cost = 1, vacancies = 2) >>> G.add_edge('a', 't', cost = 2, vacancies = 4) >>> G.add_edge('d', 'w', cost = 3, vacancies = 4) >>> G.add_edge('t', 'w', cost = 4, vacancies = 1) >>> flowCost, flowDict = nx.capacity_scaling(G, demand = 'spam', ... capacity = 'vacancies', ... weight = 'cost') >>> flowCost 37 >>> flowDict # doctest: +SKIP {'a': {'t': 4}, 'd': {'w': 2}, 'q': {'d': 1}, 'p': {'q': 2, 'a': 2}, 't': {'q': 1, 'w': 1}, 'w': {}} """ R = _build_residual_network(G, demand, capacity, weight) inf = float("inf") # Account cost of negative selfloops. flow_cost = sum(0 if e.get(capacity, inf) <= 0 or e.get(weight, 0) >= 0 else e[capacity] * e[weight] for u, v, e in nx.selfloop_edges(G, data=True)) # Determine the maxmimum edge capacity. wmax = max(chain([-inf], (e["capacity"] for u, v, e in R.edges(data=True)))) if wmax == -inf: # Residual network has no edges. return flow_cost, _build_flow_dict(G, R, capacity, weight) R_nodes = R.nodes R_succ = R.succ delta = 2**int(log(wmax, 2)) while delta >= 1: # Saturate Δ-residual edges with negative reduced costs to achieve # Δ-optimality. for u in R: p_u = R_nodes[u]["potential"] for v, es in R_succ[u].items(): for k, e in es.items(): flow = e["capacity"] - e["flow"] if e["weight"] - p_u + R_nodes[v]["potential"] < 0: flow = e["capacity"] - e["flow"] if flow >= delta: e["flow"] += flow R_succ[v][u][(k[0], not k[1])]["flow"] -= flow R_nodes[u]["excess"] -= flow R_nodes[v]["excess"] += flow # Determine the Δ-active nodes. S = set() T = set() S_add = S.add S_remove = S.remove T_add = T.add T_remove = T.remove for u in R: excess = R_nodes[u]["excess"] if excess >= delta: S_add(u) elif excess <= -delta: T_add(u) # Repeatedly augment flow from S to T along shortest paths until # Δ-feasibility is achieved. while S and T: s = arbitrary_element(S) t = None # Search for a shortest path in terms of reduce costs from s to # any t in T in the Δ-residual network. d = {} pred = {s: None} h = heap() h_insert = h.insert h_get = h.get h_insert(s, 0) while h: u, d_u = h.pop() d[u] = d_u if u in T: # Path found. t = u break p_u = R_nodes[u]["potential"] for v, es in R_succ[u].items(): if v in d: continue wmin = inf # Find the minimum-weighted (u, v) Δ-residual edge. for k, e in es.items(): if e["capacity"] - e["flow"] >= delta: w = e["weight"] if w < wmin: wmin = w kmin = k emin = e if wmin == inf: continue # Update the distance label of v. d_v = d_u + wmin - p_u + R_nodes[v]["potential"] if h_insert(v, d_v): pred[v] = (u, kmin, emin) if t is not None: # Augment Δ units of flow from s to t. while u != s: v = u u, k, e = pred[v] e["flow"] += delta R_succ[v][u][(k[0], not k[1])]["flow"] -= delta # Account node excess and deficit. R_nodes[s]["excess"] -= delta R_nodes[t]["excess"] += delta if R_nodes[s]["excess"] < delta: S_remove(s) if R_nodes[t]["excess"] > -delta: T_remove(t) # Update node potentials. d_t = d[t] for u, d_u in d.items(): R_nodes[u]["potential"] -= d_u - d_t else: # Path not found. S_remove(s) delta //= 2 if any(R.nodes[u]["excess"] != 0 for u in R): raise nx.NetworkXUnfeasible("No flow satisfying all demands.") # Calculate the flow cost. for u in R: for v, es in R_succ[u].items(): for e in es.values(): flow = e["flow"] if flow > 0: flow_cost += flow * e["weight"] return flow_cost, _build_flow_dict(G, R, capacity, weight)
def setNetworkXGraph( self ): self.G = nx.from_scipy_sparse_matrix(self.Adjacency) self.G.remove_edges_from(nx.selfloop_edges(self.G)) return