def dumpjson_graph(self): assert self.COMM.rank==0 import json import networkx as nx from networkx.readwrite import json_graph h=self.h #import pickle #json_graph.node_link_graph #Create a whole network of both transmitter types. self.global_whole_net=nx.compose(self.global_ecg, self.global_icg) self.global_whole_net.remove_nodes_from(nx.isolates(self.global_whole_net)) self.global_icg.remove_nodes_from(nx.isolates(self.global_icg)) self.global_ecg.remove_nodes_from(nx.isolates(self.global_ecg)) d =[] whole=nx.to_numpy_matrix(self.global_whole_net) #TODO sort whole (network) here in Python, as Python is arguably easier to understand than JS. d.append(whole.tolist()) #d.append(self.global_whole_net.tolist()) #d.append(json_graph.node_link_data(self.global_whole_net)) d.append(self.global_namedict) json.dump(d, open('web/js/global_whole_network.json','w')) d=json.load(open('web/js/global_whole_network.json','r')) #read the object just to prove that is readable. d=None #destroy the object. print('Wrote JSON data to web/js/network.json') print('Wrote node-link JSON data to web/js/network.json')
def create_3comms_bipartite(n,m,p,No_isolates=True): import community as comm from networkx.algorithms import bipartite as bip u=0 while True: G=nx.bipartite_random_graph(n,m,p) list_of_isolates=nx.isolates(G) if No_isolates: G.remove_nodes_from(nx.isolates(G)) partition=comm.best_partition(G) sel=max(partition.values()) if sel==2 and nx.is_connected(G): break u+=1 print u,sel ndlss=bip.sets(G) ndls=[list(i) for i in ndlss] slayer1=ndls[0] slayer2=ndls[1] layer1=[i for i,v in partition.items() if v==0] layer2=[i for i,v in partition.items() if v==1] layer3=[i for i,v in partition.items() if v==2] edgeList=[] for e in G.edges(): if (e[0] in slayer1 and e[1] in slayer2) or (e[0] in slayer2 and e[1] in slayer1): edgeList.append(e) return G,layer1,layer2,layer3,slayer1,slayer2,edgeList,partition
def synthetic_three_level(n,p1,p2,p3,J_isolates=False,F_isolates=False,D_isolates=False):#,isolate_up=True,isolate_down=True): k=n J=nx.erdos_renyi_graph(n,p1) #The first layer graph Jis = nx.isolates(J) F=nx.erdos_renyi_graph(n,p2) #The second layer graph Fis = nx.isolates(F) D=nx.erdos_renyi_graph(n,p3) #The third layer graph Dis = nx.isolates(D) def translation_graph(J,F,D): H1=nx.Graph() H2=nx.Graph() for i in range(n): H1.add_edges_from([(J.nodes()[i],F.nodes()[i])]) H2.add_edges_from([(F.nodes()[i],D.nodes()[i])]) return H1, H2 Jed = set(J.edges()) Fed = set(F.edges()) Ded = set(D.edges()) l=[Jed,Fed,Ded] lu = list(set.union(*l)) JFD=nx.Graph() JFD.add_edges_from(lu) G=nx.Graph() #The synthetic two-layer graph # Relabing nodes maps mappingF={} for i in range(2*n): mappingF[i]=n+i FF=nx.relabel_nodes(F,mappingF,copy=True) mappingD={} for i in range(2*n): if i >n-1: mappingD[i]=i-n else: mappingD[i]=2*n+i DD=nx.relabel_nodes(D,mappingD,copy=True) H1, HH2 = translation_graph(J,FF,DD) G.add_edges_from(J.edges()) G.add_edges_from(H1.edges()) G.add_edges_from(DD.edges()) G.add_edges_from(HH2.edges()) G.add_edges_from(FF.edges()) edgeList = [] for e in H1.edges(): edgeList.append(e) for e in HH2.edges(): edgeList.append(e) return G, J, FF, DD, JFD, edgeList
def test_dim_error(): import sys authority_dict={} graph_file = '/home/michal/SALSA_files/tmp/real_run/middle_graph_authority' G_new = gm.read_graph_from_file(graph_file) isolates = nx.isolates(G_new) print 'num of isolates: '+str(len(isolates)); sys.stdout.flush() num_of_not_isolates = G_new.number_of_nodes() - len(isolates) authority_dict = {} classes = nx.strongly_connected_component_subgraphs(G_new) print 'num of classes including isolates: '+str(len(classes)); sys.stdout.flush() #remove classes of isolated nodes: classes[:] = [ c for idx,c in enumerate(classes) if c.nodes()[0] not in isolates ] print 'num of classes NOT including isolates: '+str(len(classes)); sys.stdout.flush() for subG in classes: #print type(subG) out_file = ''.join(['/home/michal/SALSA_files/tmp/real_run/graph_',str(classes.index(subG))]) gm.write_graph_to_file(subG, out_file) tmp_d = salsa.eig_calc(subG, normalize=num_of_not_isolates) #power_iteration(subG) ''' for k,v in tmp_d.items(): authority_dict[G.nodes()[k]] = v #print power_iteration(subG, tol=1.0e-10) for i in isolates: authority_dict[G.nodes()[i]] = 0 #print authority_dict print '\n--- calc_salsa_per_class took: '+str(datetime.now()-startTime); sys.stdout.flush()''' return
def correlation_betweenness_degree_on_ErdosNetwork(): G = nx.read_pajek("dataset/Erdos971.net") isolated_nodes = nx.isolates(G) G.remove_nodes_from(isolated_nodes) print nx.info(G) ND, ND_lambda = ECT.get_number_of_driver_nodes(G) print "ND = ", ND print "ND lambda:", ND_lambda ND, driverNodes = ECT.get_driver_nodes(G) print "ND =", ND degrees = [] betweenness = [] tot_degree = nx.degree_centrality(G) tot_betweenness = nx.betweenness_centrality(G,weight=None) for node in driverNodes: degrees.append(tot_degree[node]) betweenness.append(tot_betweenness[node]) with open("results/driver_degree_Erdos.txt", "w") as f: for x in degrees: print >> f, x with open("results/driver_betweenness_Erdos.txt", "w") as f: for x in betweenness: print >> f, x with open("results/tot_degree_Erdos.txt", "w") as f: for key, value in tot_degree.iteritems(): print >> f, value with open("results/tot_betweenness_Erdos.txt", "w") as f: for key, value in tot_betweenness.iteritems(): print >> f, value
def reciprocated_graph(D): G=D.to_undirected() # copy for (u,v) in D.edges(): if not D.has_edge(v,u): G.remove_edge(u,v) G.remove_nodes_from(nx.isolates(G)) return G
def make_shared_user_editing_network(alter_revisions_dict,threshold): # Make the graph net = nx.DiGraph() for editor,revisions in alter_revisions_dict.iteritems(): articles = [r['title'] for r in revisions] for num,article in enumerate(articles[:-1]): if net.has_edge(article,articles[num+1]): net[article][articles[num+1]]['weight'] += 1 else: net.add_edge(article,articles[num+1],weight=1) # If edge is below threshold, remove it for i,j,d in net.edges_iter(data=True): if d['weight'] < threshold: net.remove_edge(i,j) # Remove self-loops for i,j,d in net.edges_iter(data=True): if i == j: net.remove_edge(i,j) # Remove resulting isolates isolates = nx.isolates(net) for isolate in isolates: net.remove_node(isolate) return net
def graph_preprocessing_with_counts(G_input=None, save_file=None): if not G_input: graph_file = os.path.join(work_dir, "adj_graph.p") G = nx.read_gpickle(graph_file) else: G = G_input.copy() print "Raw graph size:", G.size() print "Raw graph nodes", G.number_of_nodes() profile2prob = {l.split()[0]: float(l.split()[1]) for l in open(os.path.join(work_dir, 'profile_weight.txt'))} for edge in G.edges(data=True): nodes = edge[:2] _weight = edge[2]['weight'] _count = edge[2]['count'] if _count < 3: G.remove_edge(*nodes) print "Pre-processed graph size", G.size() print "Pre-processed graph nodes", G.number_of_nodes() G.remove_nodes_from(nx.isolates(G)) print "Pre-processed graph size", G.size() print "Pre-processed graph nodes", G.number_of_nodes() if save_file: print "Saving to", save_file nx.write_gpickle(G,save_file) return G
def getRandomPageRanks(filename): Ga=nx.read_graphml(sys.argv[1]) # create a copy of the graph and extract giant component # get component size distribution cc=nx.connected_components(Ga) cc_dict={} for x in range(0,len(cc)): try: cc_dict[len(cc[x])].append(x) except KeyError: cc_dict[len(cc[x])]=[] cc_dict[len(cc[x])].append(x) isolates=nx.isolates(Ga) rg=nx.fast_gnp_random_graph(Ga.number_of_nodes(),2.0*Ga.number_of_edges()/(Ga.number_of_nodes()*(Ga.number_of_nodes()-1))) c_rg=nx.average_clustering(rg) rg_cc=nx.connected_component_subgraphs(rg)[0] rg_asp=nx.algorithms.shortest_paths.generic.average_shortest_path_length(rg_cc) p_rg=community.best_partition(rg_cc) m_rg=community.modularity(p_rg,rg_cc) pageranks = nx.pagerank_numpy(rg) return pageranks
def make_shared_page_editing_network(alter_revisions_dict,threshold): inverted_alter_revisions_dict = invert_alter_revisions(alter_revisions_dict) # Make the graph g = nx.DiGraph() for page,users in inverted_alter_revisions_dict.iteritems(): user_list = users.keys() for num,user in enumerate(user_list[:-1]): next_user = user_list[num+1] if g.has_edge(user,next_user): g[user][next_user]['weight'] += 1 else: g.add_edge(user,next_user,weight=1) # If edge is below threshold, remove it for i,j,d in g.edges_iter(data=True): if d['weight'] < threshold: g.remove_edge(i,j) # Remove self-loops for i,j,d in g.edges_iter(data=True): if i == j: g.remove_edge(i,j) # Remove resulting isolates isolates = nx.isolates(g) for isolate in isolates: g.remove_node(isolate) return g
def rand_delete(G, num_nodes): G=nx.convert_node_labels_to_integers(G,first_label=0) nodes_to_delete=list(random_integers(low=0,high=len(G.nodes()),size=num_nodes)) G.remove_nodes_from(nodes_to_delete) isos=nx.isolates(G) G.remove_nodes_from(isos) return(G)
def _proba(self, G): """ [TO BE TESTED] Compute transition probabilities. Only available when feature_type is 'fisher'. Parameters ------- :param G: DAG of Fisher features. Attribute 'proba_': edge attribute, float Transition probability that one node transfers to another. :return: G, DAG with edge attribute 'proba_' assigned. """ for node in G.nodes(): s = (np.sum(G[node][x]['kern_unnorm_']) for x in G.successors(node)) s = sum(s) for successor_ in G.successors(node): if s == 0: G[node][successor_]['proba_'] = 0. else: G[node][successor_]['proba_'] = np.sum(G[node][successor_]['kern_unnorm_'])/s if G[node][successor_]['proba_'] < self.proba_threshold: G.remove_edge(node, successor_) isolated_ = nx.isolates(G) G.remove_nodes_from(isolated_) return G
def residual_graph(G,v): # Input, G, the original graph # v, the vertex added to the vertex cover # degreeQ, the priority queue with node degrees # from the original graph # Output: G', the graph consisting of edges not # convered by C and the nodes not in C G1 = nx.Graph() for node in G.nodes(): G1.add_node(node) for edge in G.edges(): G1.add_edge(edge[0],edge[1]) # Remove all edges in G that are covered by v neighbors = G1.neighbors(v) for u in neighbors: G1.remove_edge(v,u) # Remove v from G G1.remove_node(v) # Remove isolated nodes from G (this will include v) isolates = nx.isolates(G1) for node in isolates: G1.remove_node(node) # degreeQ.remove_node(node) return G1
def whole_graph_metrics(graph, weighted=False): graph_metrics = {} # Shortest average path length graph_metrics['avg_shortest_path'] = \ nx.average_shortest_path_length(graph, weight=weighted) # Average eccentricity ecc_dict = nx.eccentricity(graph) graph_metrics['avg_eccentricity'] = np.mean(np.array(ecc_dict.values())) # Average clustering coefficient # NOTE: Option to include or exclude zeros graph_metrics['avg_ccoeff'] = \ nx.average_clustering(graph, weight=weighted, count_zeros=True) # Average node betweeness avg_node_btwn_dict = nx.betweenness_centrality(graph, normalized=True) graph_metrics['avg_node_btwn'] = \ np.mean(np.array(avg_node_btwn_dict.values())) # Average edge betweeness avg_edge_btwn_dict = nx.edge_betweenness_centrality(graph, normalized=True) graph_metrics['avg_edge_btwn'] = \ np.mean(np.array(avg_edge_btwn_dict.values())) # Number of isolates graph_metrics['isolates'] = len(nx.isolates(graph)) return graph_metrics
def vehicle_accusation_graph(n, p, seed=None, directed=True): """Return a random vehicle accusation graph G_{n,p}. Chooses each of the possible edges with accusation probability p. Parameters ---------- n : int The number of vehicles. p : float Probability for accusation. seed : int, optional Seed for random number generator (default=None). directed : bool, optional (default=True) If True return a directed graph """ if directed: G=nx.DiGraph() else: G=nx.Graph() G.add_nodes_from(range(n)) G.name='Vehicle_accusation_graph({}, {})'.format(n, p) if p<=0: return G if p>=1: return complete_graph(n,create_using=G) if not seed is None: random.seed(seed) if G.is_directed(): edges=itertools.permutations(range(n),2) else: edges=itertools.combinations(range(n),2) for e in edges: if random.random() < p: G.add_edge(*e) """ Remove all isolates in the graph & relabel the nodes of the graph """ if nx.isolates(G): G.remove_nodes_from(nx.isolates(G)) mapping = dict(zip(G.nodes(), range(G.number_of_nodes()))) G = nx.relabel_nodes(G, mapping) return G
def create_conn_random_graph(nodes,p): while True: # G=nx.connected_watts_strogatz_graph(25, 2, 0.8, tries=100) G=nx.erdos_renyi_graph(nodes,p) if nx.is_connected(G): break G.remove_nodes_from(nx.isolates(G)) return G
def set_isolated(nodes_list, mdg): ts = int(datetime.now().strftime("%s")) dsg = extract_dpsg(mdg, ts, True) usg = dsg.to_undirected() isolated_nodes = set(nx.isolates(usg)) for node in nodes_list: if node['id'] in isolated_nodes: node['isolated'] = True
def set_isolated(nodes_list, mdg): ts = int(time.mktime(datetime.now().timetuple())) # Windows-compatible dsg = extract_dpsg(mdg, ts, True) usg = dsg.to_undirected() isolated_nodes = set(nx.isolates(usg)) for node in nodes_list: if node['id'] in isolated_nodes: node['isolated'] = True
def create_conn_random_graph(nodes,p): while True: # G=nx.connected_watts_strogatz_graph(25, 2, 0.8, tries=100) G=nx.erdos_renyi_graph(nodes,p) if nx.is_connected(G): break G.remove_nodes_from(nx.isolates(G)) sstt="Erdos-Renyi Random Graph with %i nodes and probability %.02f" %(nodes,p) return G, sstt
def _generate_nlist(): G = self.graph # TODO: imaginative, but shit. revise. isolates = set(nx.isolates(G)) independent = set(nx.maximal_independent_set(G)) - isolates dominating = set(nx.dominating_set(G)) - independent - isolates rest = set(G.nodes()) - dominating - independent - isolates nlist = list(map(sorted, filter(None, (isolates, independent, dominating, rest)))) return nlist
def island_nodes(self): """ Finds single nodes that are completely disconnected from the rest of the graph Returns ------- : list A list of disconnected nodes, nodes of degree zero, island nodes, etc. """ return nx.isolates(self)
def synthetic_multi_level(k,n,p=[],No_isolates=True): list_of_Graphs=[] list_of_isolates=[] list_of_Graphs_final=[] for ij in range(k): list_of_Graphs.append(nx.erdos_renyi_graph(n,p[ij])) list_of_isolates.append(nx.isolates(list_of_Graphs[ij])) Gagr=nx.Graph() for i in list_of_Graphs: Gagr.add_edges_from(i.edges()) Gagr.add_nodes_from(i.nodes()) G=nx.Graph() #The synthetic two-layer graph # Relabing nodes maps mapping={} for i in range(k): mapping[i]={} for ij in range(n): mapping[i][ij]=ij+i*n list_of_Graphs_final.append(nx.relabel_nodes(list_of_Graphs[i],mapping[i],copy=True)) list_of_translation_graphs=[] for ij in range(k-1): H1=nx.Graph() #### A small fix to pain in the ass g1=sorted(list_of_Graphs_final[ij].nodes()) g2=sorted(list_of_Graphs_final[ij+1].nodes()) ####### for ji in range(n): H1.add_edge(g1[ji],g2[ji]) #a small fix list_of_translation_graphs.append(H1) luf=set() for i in list_of_Graphs_final: luf=luf.union(set(i.edges())) luf=list(luf) G.add_edges_from(luf) luf=set() for i in list_of_translation_graphs: luf=luf.union(set(i.edges())) edgeList=list(luf) G.add_edges_from(luf) nmap={} for i in mapping: for j in mapping[i]: nmap[mapping[i][j]]=j return G, list_of_Graphs_final, Gagr, edgeList ,nmap ,mapping#F
def draw_network(G,sstt,pos={},with_edgewidth=False,withLabels=True,pernode_dict={},labfs=10,valpha=0.4,ealpha=0.4): # GI = graph_dic[ract_dic[cnum[3]]] # print "The number of actors in Macbeth's Act IV is", len(GI.nodes()) # print "The number of conversational relationships in Macbeth's Act IV is", len(GI.edges()) G.remove_nodes_from(nx.isolates(G)) # if with_weights: # weights={(i[0],i[1]):i[2]['weight'] for i in G.edges(data=True) }#if all((i[0],i[1])) in G.nodes() } plt.figure(figsize=(12,12)) # try: # f=open('positions_of_Mc_Shake.dmp') # pos_dict=pickle.load(f) # pos =pos_dict[3] # except: # pos=nx.spring_layout(G,scale=50) # pos_dict[3]=pos if len(pos)==0: pos=nx.spring_layout(G,scale=50) # pos=nx.spring_layout(G,scale=50) # pos_dict[3]=pos # if: # labels={i:v for v,i in pernode_dict.items() if i in G.nodes()} # else: # labels={i:v for v,i in pernode_dict.items() if i in G.nodes()} if with_edgewidth: edgewidth=[] for (u,v,d) in G.edges(data=True): edgewidth.append(d['weight']) else: edgewidth=[1 for i in G.edges()] nx.draw_networkx_nodes(G,pos=pos,with_labels=False,alpha=0.4) if withLabels: if len(pernode_dict)>0: labels={i:v for v,i in pernode_dict.items() if i in G.nodes()} labe=nx.draw_networkx_labels(G,pos=pos,labels=labels,font_size=20) else: labe=nx.draw_networkx_labels(G,pos=pos,font_size=labfs) nx.draw_networkx_edges(G,pos=pos,edge_color='b',width=edgewidth, alpha=0.5)#,edge_labels=weights,label_pos=0.2) # pos=nx.spring_layout(G,scale=50) # plt.figure(figsize=(12,12)) # nx.draw_networkx_nodes(G,pos=pos,with_labels=withLabels,alpha=valpha) # if withLabels: # labe=nx.draw_networkx_labels(G,pos=pos,font_size=labfs) # # nx.draw_networkx_edges(G,pos=pos,edge_color='b',alpha=ealpha) plt.title(sstt,fontsize=20) kk=plt.axis('off') return pos
def test_edgelist_integers(self): G = nx.convert_node_labels_to_integers(self.G) (fd, fname) = tempfile.mkstemp() nx.write_edgelist(G, fname) H = nx.read_edgelist(fname, nodetype=int) # isolated nodes are not written in edgelist G.remove_nodes_from(list(nx.isolates(G))) assert_nodes_equal(list(H), list(G)) assert_edges_equal(list(H.edges()), list(G.edges())) os.close(fd) os.unlink(fname)
def make_json_graph(msm, request): c = float(request.get_argument('cutoff')) e = str(request.get_argument('resize')) t = sparse.csr_matrix(msm.transmat_.copy()) t.data[t.data < c] = 0.0 t.eliminate_zeros() G = nx.from_scipy_sparse_matrix(t, create_using=nx.DiGraph()) metric = resize[e](G, msm, t) nx.set_node_attributes(G, 'size', metric) G.remove_nodes_from(nx.isolates(G)) return json_graph.node_link_data(G)
def create_conn_random_graph_chrom(nodes,p,x): while True: # G=nx.connected_watts_strogatz_graph(25, 2, 0.8, tries=100) G=nx.erdos_renyi_graph(nodes,p) if nx.is_connected(G): g=Graph(G) cn=vertex_coloring(g, value_only=True) if cn==x: break G.remove_nodes_from(nx.isolates(G)) return G
def color(G): """Returns a two-coloring of the graph. Raises an exception if the graph is not bipartite. Parameters ---------- G : NetworkX graph Returns ------- color : dictionary A dictionary keyed by node with a 1 or 0 as data for each node color. Raises ------ NetworkXError if the graph is not two-colorable. Examples -------- >>> from networkx.algorithms import bipartite >>> G = nx.path_graph(4) >>> c = bipartite.color(G) >>> print(c) {0: 1, 1: 0, 2: 1, 3: 0} You can use this to set a node attribute indicating the biparite set: >>> nx.set_node_attributes(G, 'bipartite', c) >>> print(G.node[0]['bipartite']) 1 >>> print(G.node[1]['bipartite']) 0 """ color = {} for n in G: # handle disconnected graphs if n in color or len(G[n])==0: # skip isolates continue queue = [n] color[n] = 1 # nodes seen with color (1 or 0) while queue: v = queue.pop() c = 1 - color[v] # opposite color of node v for w in G[v]: if w in color: if color[w] == color[v]: raise nx.NetworkXError("Graph is not bipartite.") else: color[w] = c queue.append(w) # color isolates with 0 color.update(dict.fromkeys(nx.isolates(G),0)) return color
def get_boundaries(self): # Remove internal edges from a copy of our pixgrid graph and just get the boundaries self.outlines_graph = networkx.Graph(self.grid_graph) for pixel, attrs in self.pixel_graph.nodes_iter(data=True): corners = attrs['corners'] for neighbor in self.pixel_graph.neighbors(pixel): edge = corners & self.pixel_graph.node[neighbor]['corners'] if len(edge) != 2: # If the number of edges is not 2 print edge elif self.outlines_graph.has_edge(*edge): # Remove the internal edges in the outlines graph self.outlines_graph.remove_edge(*edge) for node in networkx.isolates(self.outlines_graph): self.outlines_graph.remove_node(node) # Remove the nodes from the outline graph too
def isolate_outlines(self): # Remove internal edges from a copy of our pixgrid graph. self.outlines_graph = nx.Graph(self.grid_graph) for pixel, attrs in self.pixel_graph.nodes_iter(data=True): corners = attrs['corners'] for neighbor in self.pixel_graph.neighbors(pixel): edge = corners & self.pixel_graph.node[neighbor]['corners'] if len(edge) != 2: print edge if self.outlines_graph.has_edge(*edge): self.outlines_graph.remove_edge(*edge) for node in nx.isolates(self.outlines_graph): self.outlines_graph.remove_node(node)
def clean_met_net(self, MetNet, genes_list, food_list): #Verificar com mais calma se a delecao de targets nao afeta nada... chemis = deepcopy(MetNet) #sera que esse objeto vai receber as funcoes da classe MetabolicNetwork? for r in [x for x in MetNet.nodes() if MetNet.node[x]['Type'] == 'R']: if r not in [reac for reac in genes_list if reac not in food_list]: chemis.remove_node(r) #Alguns targets podem ser deletados nesse passo: chemis.remove_nodes_from([isol for isol in nx.isolates(chemis) if isol not in food_list]) #chemis.remove_nodes_from([n for n in nx.isolates(chemis) if n not in food_list])???? #Talvez tenha que verificar se food ou target molecules foram removidos? return chemis
def has_isolated_nodes(G): """Returns if the graph `G` has isolated nodes.""" if len(list(nx.isolates(G))) > 0: return True else: return False
print("Do you want to add any edges?") keyadd = input("y/n? ") if (keyadd == "y"): val = int(input("How many edges need to add? ")) for i in range(val): startV = int(input("Enter start vertex ")) endV = int(input("Enter end vertex ")) G.add_edges_from([(startV, endV)]) #Добавляем ребро от вершины startV к вершине endV. print("==========================================") #Кратчайшие пути: от введенной вершины до всех остальных и от нулевой до введённой end = 1 eccentricity = dict() lens = [] isolate = list(nx.isolates(G)) if isolate: for i in isolate: G.add_edge(i, end) for i in range(n-1): if end in isolate: end+=1 else: start = 0 p = nx.shortest_path(G,source = 0, target = end) end+=1 if len(p) in eccentricity: eccentricity[len(p)].append(p) else: eccentricity[len(p)] = []
# To Run this script: python run_generate_adjlist_largestcomponent.py import networkx as nx import matplotlib.pyplot as plt import sys from matplotlib.legend_handler import HandlerLine2D from matplotlib.font_manager import FontProperties x = int(sys.argv[1]) year = [] largestcomponent = [] fh=open("../data/adjlistfile_till_year_"+str(x)) G = nx.read_adjlist(fh, create_using=nx.DiGraph()) G = G.to_undirected() #print "Year "+str(x)+":" #print "Number of nodes:", G.number_of_nodes() #print "Number of isolates:", len(nx.isolates(G)) G.remove_nodes_from(nx.isolates(G)) #print "Number of nodes after removing isolates:", G.number_of_nodes() components = sorted(nx.connected_components(G), key = len, reverse=True) largestcomponent = G.subgraph(components[0]) year.append(x) for line in nx.generate_adjlist(largestcomponent): print(line)
def color(G): """Returns a two-coloring of the graph. Raises an exception if the graph is not bipartite. Parameters ---------- G : NetworkX graph Returns ------- color : dictionary A dictionary keyed by node with a 1 or 0 as data for each node color. Raises ------ NetworkXError If the graph is not two-colorable. Examples -------- >>> from networkx.algorithms import bipartite >>> G = nx.path_graph(4) >>> c = bipartite.color(G) >>> print(c) {0: 1, 1: 0, 2: 1, 3: 0} You can use this to set a node attribute indicating the biparite set: >>> nx.set_node_attributes(G, c, "bipartite") >>> print(G.nodes[0]["bipartite"]) 1 >>> print(G.nodes[1]["bipartite"]) 0 """ if G.is_directed(): import itertools def neighbors(v): return itertools.chain.from_iterable( [G.predecessors(v), G.successors(v)]) else: neighbors = G.neighbors color = {} for n in G: # handle disconnected graphs if n in color or len(G[n]) == 0: # skip isolates continue queue = [n] color[n] = 1 # nodes seen with color (1 or 0) while queue: v = queue.pop() c = 1 - color[v] # opposite color of node v for w in neighbors(v): if w in color: if color[w] == color[v]: raise nx.NetworkXError("Graph is not bipartite.") else: color[w] = c queue.append(w) # color isolates with 0 color.update(dict.fromkeys(nx.isolates(G), 0)) return color
def find_roots(G): dfs_tree = nx.dfs_tree(G, depth_limit=0) return set(list([n1 for n1, n2 in dfs_tree.edges]) + list(nx.isolates(G)))
G = nx.DiGraph() G.add_nodes_from(nodes.name) G.add_edges_from([(s,t) for s,t in zip(edges.name_source, edges.name_target)]) # adicionando as arestas nx.write_graphml(G, "dependencies_py.graphml") nx.set_node_attributes(G, pd.Series(list(nodes.position.str.split(",")), index=nodes.name).to_dict(), 'pos') print(nx.number_of_nodes(G)) print(nx.number_of_edges(G)) print(G.nodes.data()) print(nx.number_of_isolates(G)) #nós isolados G.remove_nodes_from(list(nx.isolates(G)))# removendo nós isolados print(nx.number_of_nodes(G)) print(nx.number_of_edges(G)) print(G.nodes) """# ANÁLISES, MÉTRICAS, GRAU ##Matrizes e mais """ print(list(G.adj['labkit'])) # or list(G.neighbors(1)) print(nx.center(nx.Graph(G))) # ['beautifulsoup4', 'requests', 'six', 'docopt', 'docutils', 'gevent', 'pycrypto', 'distribute', 'lxml', 'argparse', 'pyyaml', 'jinja2', 'simplejson', 'mock', 'numpy', 'sphinx', 'python-dateutil', 'flake8', 'sqlalchemy', 'twisted', 'babel', 'psycopg2', 'click', 'flask', 'pillow', 'pytz', 'pep8'] """##Densidade"""
def load_graph(data_dir, min_num_nodes, max_num_nodes, node_labels, graph_labels): #Each file should contain the datasetname at the front of the file name = data_dir.split('/')[-1] #(node_x, node_y) data_adj = np.loadtxt(fname=os.path.join(data_dir, '{}_A.txt'.format(name)), delimiter='|').astype(int) if node_labels: #(node_id, **info) data_node_label = np.loadtxt( fname=os.path.join(data_dir, '{}_node_labels.txt'.format(name)), delimiter='|', dtype={ 'names': ('node_id', 'tree_id', 'node_type', 'node_name', 'node_path'), 'formats': ('i4', 'i4', 'S4', 'S100', 'S250') }) else: #(node_id, graph_id) data_node_label = np.loadtxt(fname=os.path.join( data_dir, '{}_graph_indicators.txt'.format(name)), delimiter='|').astype(int) #(graph_id, **info) if graph_labels: data_graph_label = np.loadtxt( fname=os.path.join(data_dir, '{}_graph_labels.txt'.format(name)), delimiter='|', dtype={ 'names': ('tree_id', 'tree_name', 'language', 'stars', 'git_uri', 'last_update'), 'formats': ('i4', 'S100', 'S100', 'i4', 'S250', 'S100') }) else: #(graph_id) data_node_label = np.loadtxt(fname=os.path.join( data_dir, '{}_graph_labels.txt'.format(name)), delimiter=',', usecols=(0)).astype(int) DG = nx.DiGraph() # Add Edges data_tuple = list(map(tuple, data_adj)) DG.add_edges_from(data_tuple) # Add Nodes node_bar = tqdm(range(data_node_label.shape[0])) for i in node_bar: #node_bar.set_description("Processing node {}".format(i)) if node_labels: DG.add_node( data_node_label[i][0], label=data_node_label[i][0], tree_id=data_node_label[i][1], node_type=data_node_label[i][2], node_name=data_node_label[i][3], node_path=data_node_label[i][4], ) else: DG.add_node(data_node_label[i][0], label=data_node_label[i][0], tree_id=data_node_label[i][1]) isolates = list(nx.isolates(DG)) selfloops = list(nx.selfloop_edges(DG)) if len(isolates) or len(selfloops): print("Removing isolates ({}) and selfloops ({})".format( len(isolates), len(selfloops))) DG.remove_nodes_from(isolates) DG.remove_edges_from(selfloops) tree_id_node_list = dict() tree_id_lang = dict() for n in DG.nodes.data(): tree_id = n[1]['tree_id'] if tree_id not in tree_id_node_list: tree_id_node_list[tree_id] = [] tree_id_lang[tree_id] = False tree_id_node_list[tree_id].append(n[0]) #check if .jl extension exists if ext(name) in n[1]['node_name'].decode("utf-8"): tree_id_lang[tree_id] = True graphs = [] graph_bar = tqdm(range(data_graph_label.shape[0])) for i in graph_bar: #graph_bar.set_description("Processing graph {}".format(i)) tree_id = data_graph_label[i][0] #Search for nodes with same tree-id nodes = tree_id_node_list[tree_id] #Language file exist lang = tree_id_lang[tree_id] #Create sub-graph G_sub = DG.subgraph(nodes).copy() G_sub.graph['label'] = tree_id #lang node reduces the number of additional steps if graph_labels: G_sub.graph['tree_id'] = tree_id G_sub.graph['tree_name'] = data_graph_label[i][1] G_sub.graph['language'] = data_graph_label[i][2] G_sub.graph['stars'] = data_graph_label[i][3] G_sub.graph['git_uri'] = data_graph_label[i][4] G_sub.graph['last_update'] = data_graph_label[i][5] if G_sub.number_of_nodes() >= min_num_nodes \ and G_sub.number_of_nodes() <= max_num_nodes \ and lang and nx.is_arborescence(G_sub): graphs.append(G_sub) #print(G_sub.graph['tree_name'], G_sub.graph['tree_id']) return graphs
def fast_consensus(G, algorithm='louvain', n_p=20, thresh=0.2, delta=0.02): graph = G.copy() L = G.number_of_edges() N = G.number_of_nodes() for u, v in graph.edges(): graph[u][v]['weight'] = 1.0 while (True): if (algorithm == 'louvain'): nextgraph = graph.copy() L = G.number_of_edges() for u, v in nextgraph.edges(): nextgraph[u][v]['weight'] = 0.0 with mp.Pool(processes=mp.cpu_count()) as pool: communities_all = pool.map(louvain_community_detection, get_yielded_graph(graph, n_p)) for node, nbr in graph.edges(): if (node, nbr) in graph.edges() or (nbr, node) in graph.edges(): if graph[node][nbr]['weight'] not in (0, n_p): for i in range(n_p): communities = communities_all[i] if communities[node] == communities[nbr]: nextgraph[node][nbr]['weight'] += 1 else: nextgraph[node][nbr]['weight'] = graph[node][ nbr]['weight'] remove_edges = [] for u, v in nextgraph.edges(): if nextgraph[u][v]['weight'] < thresh * n_p: remove_edges.append((u, v)) nextgraph.remove_edges_from(remove_edges) if check_consensus_graph(nextgraph, n_p=n_p, delta=delta): break for _ in range(L): node = np.random.choice(nextgraph.nodes()) neighbors = [a[1] for a in nextgraph.edges(node)] if (len(neighbors) >= 2): a, b = random.sample(set(neighbors), 2) if not nextgraph.has_edge(a, b): nextgraph.add_edge(a, b, weight=0) for i in range(n_p): communities = communities_all[i] if communities[a] == communities[b]: nextgraph[a][b]['weight'] += 1 for node in nx.isolates(nextgraph): nbr, weight = sorted(graph[node].items(), key=lambda edge: edge[1]['weight'])[0] nextgraph.add_edge(node, nbr, weight=weight['weight']) graph = nextgraph.copy() if check_consensus_graph(nextgraph, n_p=n_p, delta=delta): break elif (algorithm in ('infomap', 'lpm')): nextgraph = graph.copy() for u, v in nextgraph.edges(): nextgraph[u][v]['weight'] = 0.0 if algorithm == 'infomap': communities = [{ frozenset(c) for c in nx_to_igraph( graph).community_infomap().as_cover() } for _ in range(n_p)] if algorithm == 'lpm': communities = [{ frozenset(c) for c in nx_to_igraph( graph).community_label_propagation().as_cover() } for _ in range(n_p)] for node, nbr in graph.edges(): for i in range(n_p): for c in communities[i]: if node in c and nbr in c: if not nextgraph.has_edge(node, nbr): nextgraph.add_edge(node, nbr, weight=0) nextgraph[node][nbr]['weight'] += 1 remove_edges = [] for u, v in nextgraph.edges(): if nextgraph[u][v]['weight'] < thresh * n_p: remove_edges.append((u, v)) nextgraph.remove_edges_from(remove_edges) for _ in range(L): node = np.random.choice(nextgraph.nodes()) neighbors = [a[1] for a in nextgraph.edges(node)] if (len(neighbors) >= 2): a, b = random.sample(set(neighbors), 2) if not nextgraph.has_edge(a, b): nextgraph.add_edge(a, b, weight=0) for i in range(n_p): if a in communities[i] and b in communities[i]: nextgraph[a][b]['weight'] += 1 graph = nextgraph.copy() if check_consensus_graph(nextgraph, n_p=n_p, delta=delta): break elif (algorithm == 'cnm'): nextgraph = graph.copy() for u, v in nextgraph.edges(): nextgraph[u][v]['weight'] = 0.0 communities = [] mapping = [] inv_map = [] for _ in range(n_p): order = list(range(N)) random.shuffle(order) maps = dict(zip(range(N), order)) mapping.append(maps) inv_map.append({v: k for k, v in maps.items()}) G_c = nx.relabel_nodes(graph, mapping=maps, copy=True) G_igraph = nx_to_igraph(G_c) communities.append( G_igraph.community_fastgreedy( weights='weight').as_clustering()) for i in range(n_p): edge_list = [(mapping[i][j], mapping[i][k]) for j, k in graph.edges()] for node, nbr in edge_list: a, b = inv_map[i][node], inv_map[i][nbr] if graph[a][b] not in (0, n_p): for c in communities[i]: if node in c and nbr in c: nextgraph[a][b]['weight'] += 1 else: nextgraph[a][b]['weight'] = graph[a][b]['weight'] remove_edges = [] for u, v in nextgraph.edges(): if nextgraph[u][v]['weight'] < thresh * n_p: remove_edges.append((u, v)) nextgraph.remove_edges_from(remove_edges) for _ in range(L): node = np.random.choice(nextgraph.nodes()) neighbors = [a[1] for a in nextgraph.edges(node)] if (len(neighbors) >= 2): a, b = random.sample(set(neighbors), 2) if not nextgraph.has_edge(a, b): nextgraph.add_edge(a, b, weight=0) for i in range(n_p): for c in communities[i]: if mapping[i][a] in c and mapping[i][b] in c: nextgraph[a][b]['weight'] += 1 if check_consensus_graph(nextgraph, n_p, delta): break else: break if (algorithm == 'louvain'): with mp.Pool(processes=mp.cpu_count()) as pool: communities_all = pool.map(louvain_community_detection, get_yielded_graph(graph, n_p)) return communities_all if algorithm == 'infomap': return [{ frozenset(c) for c in nx_to_igraph(graph).community_infomap().as_cover() } for _ in range(n_p)] if algorithm == 'lpm': return [{ frozenset(c) for c in nx_to_igraph( graph).community_label_propagation().as_cover() } for _ in range(n_p)] if algorithm == 'cnm': communities = [] mapping = [] inv_map = [] for _ in range(n_p): order = list(range(N)) random.shuffle(order) maps = dict(zip(range(N), order)) mapping.append(maps) inv_map.append({v: k for k, v in maps.items()}) G_c = nx.relabel_nodes(graph, mapping=maps, copy=True) G_igraph = nx_to_igraph(G_c) communities.append( G_igraph.community_fastgreedy( weights='weight').as_clustering()) return communities
def drawNetwork(path1, path2, sele=None, sele1=None, sele2=None, top1=None, top2=None, r=1, edge_norm=None, alpha=0.5, mutations=False, align_with = None, node_color=(0.6, 0.6, 0.6), edge_color1 = (0, 0, 1), palette="colorblind", edge_color2 = (1, 0, 0), labeling='0', norm_expected=False, threshold=0, topk=None, max_compo=None, mean_vp=None, strong_compo=None, around=None, keep_previous=False, compo_size=None, save_cc=None, load_cc=None, compos_to_excel = None, force_binary_color=False, compo_radius=None, compo_diam=None, label_compo='', auto_patch=True, printall=False, sum=False, n_clusters=None, color_by_compo=False, color_by_group=False, show_top_group=None, name1 = None, name2 = None, name_nodes='nodes', userSelection='all', fromstruct=None, color_by_contact_type=False, standard_and_expected=None): ''' Draws a NetworkX network on the PyMol structure ''' #Initialization of labeling variables and retreieving residue XYZ positions if not keep_previous: cmd.delete('*nodes *edges Component* Group*') cmd.label(selection=userSelection, expression="") cmd.hide("licorice", "?mutations") # Building position -- name correspondance stored.posCA = [] stored.names = [] stored.ss = [] userSelection = userSelection + " and ((n. CA) or n. C)" cmd.iterate_state(1, selector.process(userSelection), "stored.posCA.append([x,y,z])") cmd.iterate(userSelection, "stored.ss.append(ss)") cmd.iterate(userSelection, 'stored.names.append(resn+resi+chain)') stored.labels = list(map(relabel, stored.names)) stored.resid = list(map(selection, stored.names)) node2id = dict(zip(stored.labels, stored.resid)) node2CA = dict(zip(stored.labels, stored.posCA)) #Secondary Structure labels prevSS, prevChain = None, None counters = {'': 0, 'H': 0, 'S': 0, 'L': 0} node2SS = dict(zip(stored.labels, stored.ss)) SS2nodelist = {} putflag = lambda X: 'U' if X in ['', 'L'] else X for label in node2SS: ss = node2SS[label] chain = label[-1] if prevChain != chain: for counter in counters: counters[counter] = 0 if prevSS != ss: counters[ss] +=1 labss = putflag(ss)+str(counters[ss])+':'+chain if labss in SS2nodelist: SS2nodelist[labss].append(label) else: SS2nodelist[labss] = [label] prevSS = ss prevChain = chain prevkey, prevChain = None, None order = [] keys = list(SS2nodelist.keys()) for key in keys: if prevChain != key.split(':')[-1]: prevkey = None if key[0] == 'U': if prevkey == None: newkey = 'Head:'+key.split(':')[-1] else: newkey = 'U'+prevkey SS2nodelist[newkey] = SS2nodelist.pop(key) order.append(newkey) else: order.append(key) prevkey = key prevChain = key.split(':')[-1] prevkey = None final = [] for key in order[::-1]: if prevChain != key.split(':')[-1]: prevkey = None if key[0] == 'U': if prevkey == None: newkey = 'Tail:'+key.split(':')[-1] else: newkey = '{}-{}'.format(key[1:], prevkey) SS2nodelist[newkey] = SS2nodelist.pop(key) final.append(newkey) else: final.append(key) prevkey = key prevChain = key.split(':')[-1] # ss_dict = dict(zip(keys, final[::-1])) mapss = {} for key in final: newkey = key.replace('S', 'β').replace('H', 'α').replace('αead', 'Head') if 'IGPS' in str(label_compo): _ = [] for elt in newkey.split('-'): if elt.split(':')[1] in ['A', 'C', 'E']: _.append('𝘧{}'.format(elt.split(':')[0])) elif elt.split(':')[1] in ['B', 'D', 'F']: _.append('𝘩{}'.format(elt.split(':')[0])) newkey = '-'.join(_) mapss[key] = IGPS_mapping[newkey] else: mapss[key] = newkey for ss in SS2nodelist: for node in SS2nodelist[ss]: node2SS[node] = mapss[ss] #Loading external data atom_mat1, atom_mat2 = list(map(load, [path1, path2])) get_ext = lambda X: X.split('.')[-1] ext1, ext2 = list(map(get_ext, [path1, path2])) top1 = load(path1.split('_')[0].split('.')[0]+'.topy') if top1 == None else load(top1) top2 = load(path2.split('_')[0].split('.')[0]+'.topy') if top2 == None else load(top2) #Handling selections if sele != None: sele1, sele2 = [sele]*2 if sele == None and sele1 == None and sele2 == None: sele1, sele2 = ['protein && not hydrogen']*2 print('Default selection protein without hydrogens') sels = [sele1, sele2] #Creating topology matrices for each selection topg1, topd1 = [create_top(sel, top1, fromstruct) for sel in sels] topg2, topd2 = [create_top(sel, top2, fromstruct) for sel in sels] #From atomic to residual contacts and perturbation network computation mat1 = (atom_mat1 @ topd1).transpose() @ topg1 mat2 = (atom_mat2 @ topd2).transpose() @ topg2 #Apply expected norm if necessary if norm_expected: exp1 = (topd1.sum(axis=1).transpose() @ topd1).transpose() @ (topg1.sum(axis=1).transpose() @ topg1) exp2 = (topd2.sum(axis=1).transpose() @ topd2).transpose() @ (topg2.sum(axis=1).transpose() @ topg2) mat1 = divide_expected(mat1, exp1) mat2 = divide_expected(mat2, exp2) mat1, mat2 = list(map(csr_matrix, [mat1, mat2])) if align_with != None: cmd.align(align_with, userSelection, object='aln') raw_aln = cmd.get_raw_alignment('aln') cmd.hide('cgo', 'aln') order_string = [idx[0] for idx in raw_aln[-1]][::-1] trans_mat = dok_matrix(tuple([cmd.count_atoms(X) for X in order_string])) for idx1, idx2 in raw_aln: trans_mat[idx2[1]-1, idx1[1]-1] = 1 trans_mat = csr_matrix(trans_mat) top_t1, top_t2 = [create_top('name CA', top) for top in [top1, top2]] trans_res = (trans_mat @ top_t1).transpose() @ top_t2 mat2 = trans_res @ (mat2 @ trans_res.transpose()) pertmat = mat2 - mat1 pertmat.setdiag(0) pertmat.eliminate_zeros() net = nx.from_scipy_sparse_matrix(pertmat) #Creating labeling dictionnary if str(next(top1.residues))[-1] == '0': offset = 1 else: offset = 0 chain_names = [chr(ord('A') + i) for i in range(26)] t2o = lambda X: three2one[X] if X in three2one else X[0] get_chain = lambda X: chain_names[(X.chain.index % len(chain_names))] res2str = lambda X: t2o(X.name)+str(X.resSeq+offset)+':'+get_chain(X) id2label = {i: res2str(res) for i, res in enumerate(top1.residues)} # if 'IGPS' in label_compo: # igps_label = {} # for elt in id2label.items(): # if elt.split(':')[1] in ['A', 'C', 'E']: # rerelabel[elt] = '𝘧{}'.format(elt.split(':')[0]) # elif elt.split(':')[1] in ['B', 'D', 'F']: # rerelabel[elt] = '𝘩{}'.format(elt.split(':')[0]) #Relabeling network net = nx.relabel_nodes(net, id2label) label2id = {res2str(res): i for i, res in enumerate(top1.residues)} #Auto_patching network labels if not all(elem in node2CA for elem in net.nodes()): print('PDB structure and topology labeling not matching.') if auto_patch: print('Attempting to auto-patch residue names. (this can be disabled with auto_patch=False)') if len(node2CA.keys()) == len(net.nodes()): remap = dict(zip(net.nodes(), node2CA.keys())) net = nx.relabel_nodes(net, remap) label2id = dict(zip(node2CA.keys(), range(top1.n_residues))) else: print("Auto-patching not working, please try on different PDB file") #Output topK if necessary if type(topk) == int: limit_weight = np.sort([abs(net.edges[(u, v)]['weight']) for u, v in net.edges])[::-1][topk] threshold = limit_weight if type(standard_and_expected) == int: limit_weight = np.sort([abs(net.edges[(u, v)]['weight']) for u, v in net.edges])[::-1][standard_and_expected] relabel_net2 = dict(enumerate(net.nodes())) threshold = limit_weight if max_compo or mean_vp or any(np.array([compo_size, compo_diam, compo_radius, strong_compo])!= None): color_by_compo = True if load_cc != None: cc = np.load(load_cc) else: cc = get_connected_components(pertmat) if save_cc != None: np.save(save_cc, cc) if max_compo: threshold = np.sort(np.abs(pertmat.data))[::-1][np.argmax(cc[::-1])] else: lastmax = np.sort(np.abs(pertmat.data))[::-1][np.argmax(cc[::-1])] print('last maximum: {}'.format(np.round(lastmax, 2))) net.remove_edges_from([(u, v) for u, v in net.edges() if abs(net[u][v]['weight']) < lastmax]) net.remove_nodes_from(list(nx.isolates(net))) components_list = [net.subgraph(c).copy() for c in nx.connected_components(net)] if mean_vp: vanishing_points = [np.max([abs(net[u][v]['weight']) for u, v in c.edges()]) for c in components_list] threshold = np.median(vanishing_points) elif compo_size !=None: robust = [list(c.nodes()) for c in components_list if len(c.edges())>=float(compo_size)] net = net.subgraph([x for robust in list(robust) for x in robust]) threshold = 0 elif compo_diam !=None: robust = [list(c.nodes()) for c in components_list if nx.diameter(c)>=float(compo_diam)] net = net.subgraph([x for robust in list(robust) for x in robust]) threshold = 0 elif compo_radius !=None: robust = [list(c.nodes()) for c in components_list if nx.radius(c)>=float(compo_radius)] net = net.subgraph([x for robust in list(robust) for x in robust]) threshold = 0 elif strong_compo !=None: vanishing_points = [np.max([abs(net[u][v]['weight']) for u, v in c.edges()]) for c in components_list] edges_len = [len(c.edges()) for c in components_list] percentile = float(strong_compo)*len(components_list)/100 vani_ranks = len(vanishing_points)+1-rankdata(vanishing_points, method='max') size_ranks = len(edges_len)+1-rankdata(edges_len, method='max') vani_nodes = [list(c.nodes()) for i, c in enumerate(components_list) if vani_ranks[i]<percentile] size_nodes = [list(c.nodes()) for i, c in enumerate(components_list) if size_ranks[i]<percentile] vani_nodes = [x for vani_nodes in list(vani_nodes) for x in vani_nodes] size_nodes = [x for size_nodes in list(size_nodes) for x in size_nodes] strong = list(set(vani_nodes) & set(size_nodes)) net = net.subgraph(strong) #Detect mutations if mutations: cmd.show_as(representation="cartoon", selection="?mutations") cmd.color(color="grey80", selection="?mutations") cmd.delete("?mutations") mutations_list = [] y = {j: res2str(res) for j, res in enumerate(top2.residues)} for resid in id2label: if resid in y: if id2label[resid] != y[resid]: mutations_list.append((resid, (y[resid][0]+':').join(id2label[resid].split(':')))) cmd.select("mutations", 'resi '+str(id2label[resid].split(':')[0][1:])+ ' and chain '+id2label[resid][-1], merge=1) else: print('Deletion of ', id2label[resid]) print('List of mutations: ', ', '.join([elt[1] for elt in mutations_list])) cmd.show_as(representation="licorice", selection="?mutations") cmd.color(color="magenta", selection="?mutations") #Apply threshold if threshold !=0: print('Applying threshold {}'.format(threshold)) net.remove_edges_from([(u, v) for u, v in net.edges() if abs(net[u][v]['weight']) < threshold]) net.remove_nodes_from(list(nx.isolates(net))) #Induced perturbation network if needed if around !=None: net = net.subgraph(nx.node_connected_component(net, around)) #Setting Pymol parameters cmd.set('auto_zoom', 0) cmd.set("cgo_sphere_quality", 4) if len(net.edges()) == 0: raise ValueError('Computations give empty network') #Norm edges if edge_norm == None: edge_norm = max([net.edges()[(u, v)]['weight'] for u, v in net.edges()])/r elif edge_norm == True: tot_atoms_in_sel = np.sum([np.sum(elt) for elt in [topd1, topd2, topg1, topg2]]) tot_atoms = np.sum([max(elt.shape) for elt in [topd1, topd2, topg1, topg2]]) norm_fact = tot_atoms_in_sel**2/tot_atoms**2 edge_norm = norm_fact*30 print('Global normalization factor: {}'.format(1/norm_fact)) #Function to name edges def name_edges(name, path): if name == None: return '.'.join(basename(path).split('.')[:-1]) return name if type(standard_and_expected) == int: exp1 = (topd1.sum(axis=1).transpose() @ topd1).transpose() @ (topg1.sum(axis=1).transpose() @ topg1) exp2 = (topd2.sum(axis=1).transpose() @ topd2).transpose() @ (topg2.sum(axis=1).transpose() @ topg2) mat1 = divide_expected(mat1, exp1) mat2 = divide_expected(mat2, exp2) mat1, mat2 = list(map(csr_matrix, [mat1, mat2])) net2 = nx.from_scipy_sparse_matrix(mat2-mat1) net2 = nx.relabel_nodes(net2, relabel_net2) limit_weight = np.sort([abs(net2.edges[(u, v)]['weight']) for u, v in net2.edges])[::-1][standard_and_expected] net2.remove_edges_from([(u, v) for u, v in net2.edges() if abs(net2[u][v]['weight']) < limit_weight]) net2.remove_nodes_from(list(nx.isolates(net2))) colors = [(1, 1, 0), (0, 1, 1), (1, 0, 1)] objs_inboth = [] objs_instd = [] objs_inexp = [] nodes = [] for u, v in net.edges(): radius = net[u][v]['weight']/edge_norm if (u, v) in list(net2.edges()): objs_inboth += [CYLINDER, *node2CA[u], *node2CA[v], radius, *colors[0], *colors[0]] else: objs_instd += [CYLINDER, *node2CA[u], *node2CA[v], radius, *colors[1], *colors[1]] nodes += [u, v] edge_norm2 = max([net2.edges()[(u, v)]['weight'] for u, v in net2.edges()])/r for u, v in net2.edges(): radius = net2[u][v]['weight']/edge_norm2 if (u, v) not in list(net.edges()): objs_inexp += [CYLINDER, *node2CA[u], *node2CA[v], radius, *colors[2], *colors[2]] nodes += [u, v] nodelist = set(nodes) objs_nodes = [COLOR, *node_color] for u in nodelist: x, y, z = node2CA[u] objs_nodes += [SPHERE, x, y, z, r] selnodes = ''.join([node2id[u] for u in nodelist])[4:] cmd.load_cgo(objs_inboth, 'in_both_edges') cmd.load_cgo(objs_instd, 'in_std_edges') cmd.load_cgo(objs_inexp, 'in_exp_edges') cmd.load_cgo(objs_nodes, 'nodes') elif color_by_contact_type: expected_matrices = get_expected_type(atom_mat1, atom_mat2, top1, top2, fromstruct) name1, name2 = list(map(name_edges, [name1, name2], [path1, path2])) names = ['{0}_{1}'.format(name1, sel) for sel in ['hydro', 'polar', 'mixed']] + ['{0}_{1}'.format(name2, sel) for sel in ['hydro', 'polar', 'mixed']] nodes_dict = {i: [] for i in range(len(names))} objs_dict = {i: [] for i in range(len(names))} colors = [(1, 0.86, 0.73), (0.68, 0.85, 0.90), (0.60, 0.98, 0.60), (1, 0.86, 0), (0.25, 0.41, 0.88), (0, 0.50, 0)] for u, v in net.edges(): radius = net[u][v]['weight']/edge_norm id_u, id_v = label2id[u], label2id[v] values = list(map(lambda _mat: _mat[id_v, id_u], expected_matrices)) type_of_contact = np.argmax(values) objs_dict[type_of_contact] += [CYLINDER, *node2CA[u], *node2CA[v], radius, *colors[type_of_contact], *colors[type_of_contact]] nodes_dict[type_of_contact] += [u, v] selnodes = '' for toc in nodes_dict: nodelist = set(nodes_dict[toc]) objs_dict[toc]+=[COLOR, *node_color] for u in nodelist: x, y, z = node2CA[u] objs_dict[toc]+=[SPHERE, x, y, z, r] selnodes += ''.join([node2id[u] for u in nodelist])[4:] for i, name in zip(objs_dict.keys(), names): cmd.load_cgo(objs_dict[i], '{}_edges'.format(name)) #Coloring by components elif color_by_compo: components_list = [net.subgraph(c).copy() for c in nx.connected_components(net)] diameters = [nx.diameter(c) for c in components_list] ranking = np.argsort(diameters)[::-1] colors = sns.color_palette(palette, n_colors=len(components_list)+1) for i, c in enumerate(colors): if c[0] == c[1] == c[2]: print(c) colors.pop(i) break selnodes = '' for i, rank in enumerate(ranking): color, compo = colors[rank], components_list[rank] _obj, nodelist = [], [] for u, v in compo.edges(): radius = net[u][v]['weight']/edge_norm if abs(net[u][v]['weight']) >= threshold: if not force_binary_color: _obj+=[CYLINDER, *node2CA[u], *node2CA[v], radius, *color, *color] else: if net[u][v]['weight'] <= 0: _obj+=[CYLINDER, *node2CA[u], *node2CA[v], radius, *edge_color1, *edge_color1] else: _obj+=[CYLINDER, *node2CA[u], *node2CA[v], radius, *edge_color2, *edge_color2] nodelist += [u, v] # cmd.load_cgo(_obj, 'Component{}_edges'.format(i+1)) _obj+=[COLOR, *node_color] nodelist = set(nodelist) selnodes += ''.join([node2id[u] for u in nodelist])[4:] for u in nodelist: x, y, z = node2CA[u] _obj+=[SPHERE, x, y, z, r] cmd.load_cgo(_obj, 'Component{}'.format(i+1)) #Color by group of relevance elif color_by_group: weights = np.array([abs(net[u][v]['weight']) for u, v in net.edges()]).reshape(-1, 1) birch = Birch(n_clusters=n_clusters).fit(weights) labels = birch.predict(weights) ordered_labels = labels[np.argsort(pertmat.data)] _, idx = np.unique(ordered_labels, return_index=True) mapping = dict(zip(ordered_labels[np.sort(idx)], np.sort(np.unique(ordered_labels)))) i2color = dict(zip(ordered_labels[np.sort(idx)], sns.color_palette(palette, len(np.unique(ordered_labels)))[::-1])) selnodes = '' if show_top_group == None: show_top_group = len(mapping.keys()) for j, i in enumerate(list(mapping.keys())[:show_top_group]): _obj, nodelist = [], [] _net = net.copy() to_remove_edges = [(u, v) for j, (u, v) in enumerate(net.edges()) if labels[j] != i] _net.remove_edges_from(to_remove_edges) _net.remove_nodes_from(list(nx.isolates(_net))) for u, v in _net.edges(): radius = net[u][v]['weight']/edge_norm if abs(net[u][v]['weight']) >= threshold: _obj+=[CYLINDER, *node2CA[u], *node2CA[v], radius, *i2color[j], *i2color[j]] nodelist += [u, v] # cmd.load_cgo(_obj, 'Component{}_edges'.format(i+1)) _obj+=[COLOR, *node_color] nodelist = set(nodelist) selnodes += ''.join([node2id[u] for u in nodelist])[4:] for u in nodelist: x, y, z = node2CA[u] _obj+=[SPHERE, x, y, z, r] cmd.load_cgo(_obj, 'Group{}'.format(j+1)) #Default edge coloring else: obj1, obj2, nodelist = [], [], [] for u, v in net.edges(): radius = net[u][v]['weight']/edge_norm if abs(net[u][v]['weight']) >= threshold: if 'color' in net[u][v]: if net[u][v]['color'] == 'r': obj1+=[CYLINDER, *node2CA[u], *node2CA[v], radius, *edge_color1, *edge_color1] else: obj2+=[CYLINDER, *node2CA[u], *node2CA[v], radius, *edge_color2, *edge_color2] else: if net[u][v]['weight'] <= 0: obj1+=[CYLINDER, *node2CA[u], *node2CA[v], radius, *edge_color1, *edge_color1] else: obj2+=[CYLINDER, *node2CA[u], *node2CA[v], radius, *edge_color2, *edge_color2] nodelist+=[u, v] name1, name2 = map(name_edges, [name1, name2], [path1, path2]) cmd.load_cgo(obj1, name1+'_edges') cmd.load_cgo(obj2, name2+'_edges') #Drawing nodes obj=[COLOR, *node_color] nodelist = set(nodelist) selnodes = ''.join([node2id[u] for u in nodelist])[4:] for u in nodelist: x, y, z = node2CA[u] obj+=[SPHERE, x, y, z, r] cmd.load_cgo(obj, name_nodes) #Creating text for labeling components if label_compo != '' or compos_to_excel !=None: if compos_to_excel != None: rows_list = [] objtxt = [] axes = -np.array(cmd.get_view()[:9]).reshape(3,3) components_list = [net.subgraph(c).copy() for c in nx.connected_components(net)] diameters = [nx.diameter(c) for c in components_list] for i, j in enumerate(np.argsort(diameters)[::-1]): row_dict = {} c = components_list[j] sses = sorted(list(set([node2SS[node] for node in c]))) if compos_to_excel !=None: row_dict['Secondary structure elements'] = ','.join(sses) row_dict['Vanishing point'] = np.max([abs(net[u][v]['weight']) for u, v in c.edges()]) row_dict['Diameter'] = nx.diameter(c) row_dict['Size'] = len(c.edges()) row_dict['Size rank'] = i+1 else: print('Component {}\n'.format(i+1), ', '.join(sses)) print('Size (number of edges) {}'.format(len(c.edges()))) print('Vanishing point: {}'.format(np.max([abs(net[u][v]['weight']) for u, v in c.edges()]))) if 'h' in str(label_compo): methods = ['eigenvector', 'hits_hub', 'hits_authority', 'pagerank', 'betweenness', 'katz'] hubs = [get_hubs(c, method) for method in methods] if compos_to_excel !=None: row_dict.update(dict(zip(methods, hubs))) else: print(dict(zip(methods, hubs))) if 'c' in str(label_compo): pos = np.array(node2CA[next(c.__iter__())]) + (axes[0]) cyl_text(objtxt, plain, pos, 'Component {}'.format(i+1), radius=0.1, color=[0, 0, 0], axes=axes) if compos_to_excel: rows_list.append(row_dict) if compos_to_excel: df = pd.DataFrame(rows_list) df.to_excel(compos_to_excel) if 's' in str(label_compo): for ss in SS2nodelist: nodelist = SS2nodelist[ss] print(mapss[ss], ': ', ('{}--{}'.format(nodelist[0], nodelist[-1]) if len(nodelist)>1 else nodelist[0])) # print(objtxt) cmd.set("cgo_line_radius", 0.03) cmd.load_cgo(objtxt, 'txt') #labeling if labeling==1: cmd.label(selection=selnodes, expression="t2o(resn)+resi") if labeling==3: cmd.label(selection=selnodes, expression="resn+resi") #Summing if sum: print('Sum of contacts lost: ', np.sum(pertmat)) if printall: print([(u,v, net[u][v]) for u, v in net.edges()])
def _enumMaximumMatchingIter(g, match, all_matches, add_e=None): """Recurively search maximum matchings. Parameters ---------- g : Undirected bipartite graph. Nodes are separated by their 'bipartite' attribute. match : List of edges forming one maximum matching of `g`. all_matches : List, each is a list of edges forming a maximum matching of `g`. Newly found matchings will be appended into this list. add_e : tuple, optional Edge used to form subproblems. If not `None`, will be added to each newly found matchings. Returns ------- list Updated list of all maximum matchings. Author ------ guangzhi XU ([email protected]; [email protected]) Update time: 2017-05-21 20:09:06. """ #---------------Form directed graph D--------------- d = _formDirected(g, match) #-----------------Find cycles in D----------------- cycles = list(nx.simple_cycles(d)) if len(cycles) == 0: #---------If no cycle, find a feasible path--------- all_uncovered = set(g.node).difference(set([ii[0] for ii in match])) all_uncovered = all_uncovered.difference(set([ii[1] for ii in match])) all_uncovered = list(all_uncovered) #--------------If no path, terminiate-------------- if len(all_uncovered) == 0: return all_matches #----------Find a length 2 feasible path---------- idx = 0 uncovered = all_uncovered[idx] while True: if uncovered not in nx.isolates(g): paths = nx.single_source_shortest_path(d, uncovered, cutoff=2) len2paths = [vv for kk, vv in paths.items() if len(vv) == 3] if len(len2paths) > 0: reversed = False break #----------------Try reversed path---------------- paths_rev = nx.single_source_shortest_path(d.reverse(), uncovered, cutoff=2) len2paths = [ vv for kk, vv in paths_rev.items() if len(vv) == 3 ] if len(len2paths) > 0: reversed = True break idx += 1 if idx > len(all_uncovered) - 1: return all_matches uncovered = all_uncovered[idx] #-------------Create a new matching M'------------- len2path = len2paths[0] if reversed: len2path = len2path[::-1] len2path = list(zip(len2path[:-1], len2path[1:])) new_match = [] for ee in d.edges(): if ee in len2path: if g.node[ee[1]]['bipartite'] == 0: new_match.append((ee[1], ee[0])) else: if g.node[ee[0]]['bipartite'] == 0: new_match.append(ee) if add_e is not None: for ii in add_e: new_match.append(ii) all_matches.append(new_match) #---------------------Select e--------------------- e = set(len2path).difference(set(match)) e = list(e)[0] #-----------------Form subproblems----------------- g_plus = g.copy() g_minus = g.copy() g_plus.remove_node(e[0]) g_plus.remove_node(e[1]) g_minus.remove_edge(e[0], e[1]) add_e_new = [ e, ] if add_e is not None: add_e_new.extend(add_e) all_matches = _enumMaximumMatchingIter(g_minus, match, all_matches, add_e) all_matches = _enumMaximumMatchingIter(g_plus, new_match, all_matches, add_e_new) else: #----------------Find a cycle in D---------------- cycle = cycles[0] cycle.append(cycle[0]) cycle = list(zip(cycle[:-1], cycle[1:])) #-------------Create a new matching M'------------- new_match = [] for ee in d.edges(): if ee in cycle: if g.node[ee[1]]['bipartite'] == 0: new_match.append((ee[1], ee[0])) else: if g.node[ee[0]]['bipartite'] == 0: new_match.append(ee) if add_e is not None: for ii in add_e: new_match.append(ii) all_matches.append(new_match) #-----------------Choose an edge E----------------- e = set(match).intersection(set(cycle)) e = list(e)[0] #-----------------Form subproblems----------------- g_plus = g.copy() g_minus = g.copy() g_plus.remove_node(e[0]) g_plus.remove_node(e[1]) g_minus.remove_edge(e[0], e[1]) add_e_new = [ e, ] if add_e is not None: add_e_new.extend(add_e) all_matches = _enumMaximumMatchingIter(g_minus, new_match, all_matches, add_e) all_matches = _enumMaximumMatchingIter(g_plus, match, all_matches, add_e_new) return all_matches
def fault_tolerance(G, elem_type, removing_flag, removal_percentage, weight_flag='weight', rseed=0, deg=None, bn=None): rng = np.random.RandomState(seed=rseed) dg_flag = False if deg is not None: dg_flag = True #print('Not None') bn_flag = False if bn is not None: bn_flag = True G_copy = G.copy() #print('number of nodes',len(G_copy.nodes)) # defining the 'bucket' (i.e. set of options to remove) if elem_type == 'node': bucket = list(G.nodes()) elif elem_type == 'edge': bucket = list(G.edges()) else: print('elem_type not defined!') bucket = np.array(bucket) N = len(bucket) # defining the number of removals (a % of the things in the bucket) if removal_percentage > 1: rem_bound = removal_percentage else: rem_bound = int(removal_percentage * N) if rem_bound == 0: # for very small percentages rem_bound += 1 No = len(G.nodes()) if No - rem_bound != 0: # removing by cases: ## random: we take 'rem_bound' elements from the bucket if removing_flag == 'random': idx_to_be_removed = rng.choice(len(bucket), rem_bound) to_be_removed = bucket[idx_to_be_removed] ## targeted: we remove the 'most important' elements elif removing_flag == 'targeted': if elem_type == 'node': # we remove the nodes with highest degree if 'betweenness' in weight_flag or 'degree' in weight_flag: if not bn_flag: bn = nx.betweenness_centrality(G) if not dg_flag: deg = nx.degree_centrality(G) deg_sorted_rem_bound = { key: deg[key] for key in sorted(deg, key=deg.get, reverse=True) [:rem_bound] } bn_sorted_rem_bound = { key: bn[key] for key in sorted(bn, key=bn.get, reverse=True) [:rem_bound] } #print(deg_sorted_rem_bound) if weight_flag == 'degree': to_be_removed = list(deg_sorted_rem_bound.keys()) #print('different-value elements:',len(set(deg.values())),'/',len(deg.keys())) elif weight_flag == 'degree+betweenness': # getting the min degree of the candidates (why? so we dont get these rem_bound amount of nodes sorted just by labels) min_deg = min(deg_sorted_rem_bound.values()) #print('len before adding the equal-degree ones',len(deg_sorted_rem_bound),'of',rem_bound) equal_degree = { node: deg[node] for node in deg.keys() if deg[node] == min_deg } top_sorted_without_equal_degree = [ node for node in deg_sorted_rem_bound.keys() if node not in equal_degree.keys() ] equal_degree_sorted_by_bn = [ key for key in sorted(bn, key=bn.get, reverse=True) [:rem_bound - len(top_sorted_without_equal_degree)] ] #the reamining rem_bound - len (taken) nodes to_be_removed = top_sorted_without_equal_degree + equal_degree_sorted_by_bn elif weight_flag == 'betweenness': to_be_removed = list(bn_sorted_rem_bound.keys()) #print('different-value elements:',len(bn.values()),'/',len(bn.keys())) else: print('weight_flag not defined') elif 'clustering' in weight_flag: cl = nx.clustering(G) cl_sorted_rem_bound = { key: cl[key] for key in sorted(cl, key=cl.get, reverse=True) [:rem_bound] } to_be_removed = list(cl_sorted_rem_bound.keys()) #print('different-value elements:',len(cl.values()),'/',len(cl.keys())) else: print('weight_flag not defined') #to_be_removed = rem elif elem_type == 'edge': # we remove the edges with largest weight (eihter 'weight' or 'length') weights = {edge: G.edges[edge][weight_flag] for edge in bucket} weights_sorted_rem_bound = { key: weights[key] for key in sorted(weights, key=weights.get, reverse=True) [:rem_bound] } to_be_removed = list(weights_sorted_rem_bound.keys()) else: print('removing_flag not defined!') #print('tbr',to_be_removed) if elem_type == 'node': G_copy.remove_nodes_from(to_be_removed) elif elem_type == 'edge': G_copy.remove_edges_from(to_be_removed) else: print('wrong input') G_copy.remove_nodes_from(list(nx.isolates(G_copy))) N1 = len(G_copy.nodes()) try: largest_cc = max(nx.connected_components(G_copy), key=len) # this is a list of nodes except: print('graph is too small') largest_cc = [] N1 = 1 # if elem_type == 'edge': #using the same removal_percentage given for the edges causes inconsistencies # removal_percentage = (No-len(G_copy.nodes()))/No # print('No',No, 'removed',No-len(G_copy.nodes()), '%',removal_percentage) # print(removal_percentage) # denominator: the remaining number of NODES after removing # denom =int((1-removal_percentage)*No) denom = N1 #print('number of elements removed:',len(to_be_removed)) # debugging: cc = list(nx.connected_components(G_copy)) len_cc = [len(c) for c in cc] #assert No >= sum(len_cc) + round(removal_percentage*No) - 1 gcc = len(largest_cc) / denom #print('lcc',len(largest_cc),'den',denom) else: print('no nodes left.') gcc = 0 #print('gcc',gcc) return gcc, G_copy
def k_truss(G, k): """Returns the k-truss of `G`. The k-truss is the maximal induced subgraph of `G` which contains at least three vertices where every edge is incident to at least `k-2` triangles. Parameters ---------- G : NetworkX graph An undirected graph k : int The order of the truss Returns ------- H : NetworkX graph The k-truss subgraph Raises ------ NetworkXError The k-truss is not defined for graphs with self loops or parallel edges or directed graphs. Notes ----- A k-clique is a (k-2)-truss and a k-truss is a (k+1)-core. Not implemented for digraphs or graphs with parallel edges or self loops. Graph, node, and edge attributes are copied to the subgraph. K-trusses were originally defined in [2] which states that the k-truss is the maximal induced subgraph where each edge belongs to at least `k-2` triangles. A more recent paper, [1], uses a slightly different definition requiring that each edge belong to at least `k` triangles. This implementation uses the original definition of `k-2` triangles. References ---------- .. [1] Bounds and Algorithms for k-truss. Paul Burkhardt, Vance Faber, David G. Harris, 2018. https://arxiv.org/abs/1806.05523v2 .. [2] Trusses: Cohesive Subgraphs for Social Network Analysis. Jonathan Cohen, 2005. """ H = G.copy() n_dropped = 1 while n_dropped > 0: n_dropped = 0 to_drop = [] seen = set() for u in H: nbrs_u = set(H[u]) seen.add(u) new_nbrs = [v for v in nbrs_u if v not in seen] for v in new_nbrs: if len(nbrs_u & set(H[v])) < (k - 2): to_drop.append((u, v)) H.remove_edges_from(to_drop) n_dropped = len(to_drop) H.remove_nodes_from(list(nx.isolates(H))) return H
tmp = df['time'][0] # time is in ascending order for i in range(len(df['time'])): if tmp == df['time'][i]: # if is in current day g.add_edge(str(df['from'][i]), str(df['to'][i])) if i == len(df['time']) - 1: # EOF --- cnt_graphs += 1 # graphs.append(g.copy()) # ignore the last day print('processed graphs ', cnt_graphs, '/', all_days, 'ALL done......\n') elif tmp < df['time'][i]: # if goes to next day cnt_graphs += 1 if (cnt_graphs // gap) >= ( all_days // gap - 70 ) and cnt_graphs % gap == 0: # the last 50 graphs 'and' the gap g.remove_edges_from(g.selfloop_edges()) g.remove_nodes_from(list(nx.isolates(g))) graphs.append(g.copy( )) # append previous g; for a part of graphs to reduce ROM # g = nx.Graph() # reset graph, based on the real-world application if cnt_graphs % 50 == 0: print('processed graphs ', cnt_graphs, '/', all_days) tmp = df['time'][i] g.add_edge(str(df['from'][i]), str(df['to'][i])) else: print( 'ERROR -- EXIT -- please double check if time is in ascending order!' ) exit(0) # --- take out and save part of graphs ---- print('total graphs: ', len(graphs))
def network_generate(): import numpy as np import math as math import networkx as nx from scipy.stats import bernoulli ### We may have real world networks or synthetic graphs following the DCSBM ## Choice of the network to generate #network='real_world' ## Choose either 'real_world' or 'DCSBM' network = 'DCSBM' if network == 'real_world': ##Get the adjacency matrix #Given the network, find the corresponding adjacency matrix scenario = 'dolphins.gml' G = nx.read_gml(scenario) G_0 = G ##Remove nodes without neighbors isolated_nodes = nx.isolates(G) G.remove_nodes_from(isolated_nodes) ## Adjacency matrix A = nx.adjacency_matrix(G, nodelist=None, weight=None) A = A.todense() ##New number of instances after removing isolated nodes n = len(A[:, 1]) ## Ground truth ground_truth = np.zeros((n, 1)) for i in range(int(0), int(n)): if (0 in G_0.nodes()): ground_truth[i] = G_0.node[i]['value'] else: ground_truth[i] = G_0.node[i + 1]['value'] ## Remove the label of the node with no neighbor np.delete(ground_truth, isolated_nodes) ##Check wether the first element of the ground truth start from 0 or from 1 if (min(ground_truth) == 0): startGround_truth = 0 else: startGround_truth = 1 if network == 'DCSBM': n_init = 1000 ##Class proportions cs = [0.25, 0.25, 0.5] ## Number of classes K = len(cs) ## Number of instances per class ns = np.array(cs) * n_init ##Setting of the model parameters ## Average connectivities q's #bs = [0.25,0.75] bs = [0.75, 0.25] q1 = 0.4 q2 = 0.8 q = np.repeat( np.array([q1, q2]), [int(n_init * bs[0]), int(n_init * bs[1])]) # Choice of affinity matrix M #M = 10 * (-1 * np.ones(K) + 2 * np.identity(K)) M = 5 * np.identity(K) # Construction of C C = np.ones((int(K), int(K))) + M / math.sqrt(n_init) ##Extenxion of C in an nxn bloc matrix large_C = np.zeros((int(n_init), int(n_init))) for i in range(int(0), int(K)): for j in range(int(0), int(K)): large_C[int(np.sum(ns[int(0):i])):int(np.sum(ns[int(0):i + 1])), int(np.sum(ns[int(0):j])):int(np.sum(ns[int(0):j + 1]) )] = C[i, j] * np.ones( (ns[i], ns[j])) ## Construction of matrix of DCSBM edge probabilities P = np.minimum(((np.diag(q)).dot(large_C)).dot(np.diag(q)), np.ones((n_init, n_init))) ## Generation adjacency matrix A A = np.zeros((int(n_init), int(n_init))) for i in range(int(0), int(n_init)): #A[i,]=np.random.binomial(1,P[i,], size=n) A[i, ] = bernoulli.rvs(P[i, :], size=n_init) ## Ground_truth ground_truth = np.zeros((n_init, 1)) for i in range(int(0), int(K)): ground_truth[int(np.sum(ns[int(0):i])):int(np.sum(ns[int(0):i + 1]) )] = i * np.ones( (ns[i], 1)) ##Construct graph from adjacency matrix G = nx.from_numpy_matrix(A) ##Remove nodes without neighbors isolated_nodes = nx.isolates(G) G.remove_nodes_from(isolated_nodes) ## Remove the label of the node with no neighbor np.delete(ground_truth, isolated_nodes) A = nx.adjacency_matrix(G, nodelist=None, weight=None) A = A.todense() ## Symmetrization of the adjacency matrix in order to have an undirected unweighted graph A = np.triu(A) + np.transpose(np.triu(A)) startGround_truth = 0 return A, ground_truth, startGround_truth
def get_num_isolates(self): """ return the number of isolated nodes """ return len(list(nx.isolates(self.G)))
final_mat.iloc[i, j] = 0 print(final_mat) for column in CONFIG.column_names: final_mat[column] = np.where(np.abs(final_mat[column]) < .5, 0, 1) # Save final binary adjacency matrix final_mat.to_csv("results/final_adjacency_matrix.csv", index=True) # Draw the DAG final_DAG = from_numpy_matrix(final_mat.to_numpy(), create_using=nx.DiGraph) final_DAG = nx.relabel_nodes( final_DAG, dict(zip(list(range(CONFIG.data_variable_size)), CONFIG.column_names))) final_DAG.remove_nodes_from(list(nx.isolates(final_DAG))) nx.draw( final_DAG, node_color="lightcoral", node_size=75, font_size=3, width=0.5, arrowsize=4, with_labels=True, pos=nx.spring_layout(final_DAG), ) plt.draw() plt.savefig(os.path.expanduser("results/DAG_plot_alarm.png"), format="PNG", dpi=500)
def graph_load_batch(data_dir, min_num_nodes=20, max_num_nodes=1000, name='ENZYMES', node_attributes=True, graph_labels=True): ''' load many graphs, e.g. enzymes :return: a list of graphs ''' print('Loading graph dataset: ' + str(name)) G = nx.Graph() # load data path = os.path.join(data_dir, name) data_adj = np.loadtxt( os.path.join(path, '{}_A.txt'.format(name)), delimiter=',').astype(int) if node_attributes: data_node_att = np.loadtxt( os.path.join(path, '{}_node_attributes.txt'.format(name)), delimiter=',') data_node_label = np.loadtxt( os.path.join(path, '{}_node_labels.txt'.format(name)), delimiter=',').astype(int) data_graph_indicator = np.loadtxt( os.path.join(path, '{}_graph_indicator.txt'.format(name)), delimiter=',').astype(int) if graph_labels: data_graph_labels = np.loadtxt( os.path.join(path, '{}_graph_labels.txt'.format(name)), delimiter=',').astype(int) data_tuple = list(map(tuple, data_adj)) # print(len(data_tuple)) # print(data_tuple[0]) # add edges G.add_edges_from(data_tuple) # add node attributes for i in range(data_node_label.shape[0]): if node_attributes: G.add_node(i + 1, feature=data_node_att[i]) G.add_node(i + 1, label=data_node_label[i]) G.remove_nodes_from(list(nx.isolates(G))) # remove self-loop G.remove_edges_from(nx.selfloop_edges(G)) # print(G.number_of_nodes()) # print(G.number_of_edges()) # split into graphs graph_num = data_graph_indicator.max() node_list = np.arange(data_graph_indicator.shape[0]) + 1 graphs = [] max_nodes = 0 for i in range(graph_num): # find the nodes for each graph nodes = node_list[data_graph_indicator == i + 1] G_sub = G.subgraph(nodes) if graph_labels: G_sub.graph['label'] = data_graph_labels[i] # print('nodes', G_sub.number_of_nodes()) # print('edges', G_sub.number_of_edges()) # print('label', G_sub.graph) if G_sub.number_of_nodes() >= min_num_nodes and G_sub.number_of_nodes( ) <= max_num_nodes: graphs.append(G_sub) if G_sub.number_of_nodes() > max_nodes: max_nodes = G_sub.number_of_nodes() # print(G_sub.number_of_nodes(), 'i', i) # print('Graph dataset name: {}, total graph num: {}'.format(name, len(graphs))) # logging.warning('Graphs loaded, total num: {}'.format(len(graphs))) print('Loaded') return graphs
nx.draw_networkx_labels(net,pos=pos, labels={user:nodeLabel}, font_size=sizeLabel**8) plt.legend(markerscale=1, loc="best") plt.show() #%% # to indirected unet=net.to_undirected(reciprocal=True) # removing isolates unet.remove_nodes_from(list(nx.isolates(unet))) #%% # bring algorithm from cdlib import algorithms # Find the communities modCommunity = algorithms.greedy_modularity(unet).communities #%% import numpy as np import matplotlib.pyplot as plt from matplotlib.colors import rgb2hex
def _prune_nodes(self, network_graph): """ Remove all nonzero nodes if threshold == 0 """ nodes_to_remove = list(nx.isolates(network_graph)) network_graph.remove_nodes_from(nodes_to_remove) return network_graph
''' print 'Number of nodes (before preprocessing) = %d' % len(G.nodes()) print 'Number of edges (before preprocessing) = %d' % len(G.edges()) deg_list = nx.degree(G, G.nodes()).values() avg_degree = sum(deg_list)/float(len(deg_list)) print 'Average degree (before preprocessing) = %f' % avg_degree # self loops removal print 'Number of self-loops = %d' % len(G.selfloop_edges()) G.remove_edges_from(G.selfloop_edges()) print 'Self-loops removed!' # isolated nodes removal print 'Number of isolated nodes = %d' % len(nx.isolates(G)) G.remove_nodes_from(nx.isolates(G)) print 'Isolated nodes removed!' # multiple parallel edges test no_of_nodes_with_multiP_edges = np.count_nonzero( nx.adjacency_matrix(G).data > 1)/2 if (no_of_nodes_with_multiP_edges == 0): print 'No multiple parallel edges in the graph.' else: print 'Multiple parallel edges found in the graph!' print 'Number of nodes with multiple parallel edges = %d' % no_of_nodes_with_multiP_edges # summary of the graph (after preprocessing)
df, df_label = processed_data() all_userids = set(df['user_id'].unique()).union( set(df['dest_user_id_if_known'].unique())) userids_to_newids, newids_to_userids = get_contiguous_ids( all_userids, df_label) #for relabling GT0 = construct_graph_sequence(df, userids_to_newids, newids_to_userids) #Remove the last three months because they have lots of isolates isolated = [] for G in GT0: isolated.append([x for x in nx.isolates(G)]) # print([len(x) for x in isolated]) isolated = isolated[:6] # print([len(x) for x in isolated]) #Remove nodes that are isolated in at least one snapshot beta_nodes = set() for x in isolated: beta_nodes = set().union(beta_nodes, x) print('beta_nodes', beta_nodes) # print('len(beta_nodes)',len(beta_nodes)) beta_userids = [newids_to_userids[x] for x in beta_nodes] all_userids_filtered = [x for x in all_userids if x not in beta_userids] userids_to_newids_filtered, newids_to_userids_filtered = get_contiguous_ids( all_userids_filtered, df_label) #for relabling
def read_cites_contents_graph(graph_name): G = nx.Graph() node_class = {} cite_file = "data/{}.cites".format(graph_name) content_file = "data/{}.content".format(graph_name) feature_dict = {} with open(content_file, "r") as f: lines = f.readlines() for line in lines: strs = line.split("\t") node = strs[0] G.add_node(node) paper_class = strs[-1].strip() node_class[node] = paper_class # G.nodes[node]["class"] = paper_class features = strs[1:-1] for i, feat in enumerate(features): # continue if feat == "0": continue feat_name = "feat{}".format(i) if feat_name not in feature_dict.keys(): feature_dict[feat_name] = 1 else: feature_dict[feat_name] += 1 G.nodes[node][feat_name] = 1 with open(cite_file, "r") as f: lines = f.readlines() for line in lines: strs = line.split("\t") s = strs[0] t = strs[1].strip() if s not in node_class.keys(): continue if t not in node_class.keys(): continue G.add_edge(s, t) self_edge = [] for n, nbrs in G.adjacency(): for nbr in nbrs.keys(): if n == nbr: self_edge.append(n) for s in self_edge: G.remove_edge(s, s) # print("remove {} to {}".format(s, s)) G.remove_nodes_from(list(nx.isolates(G))) sorted_items = sorted(feature_dict.items(), key=lambda d: (d[1]), reverse=True) key_features = [feat for feat, num in sorted_items] for node in list(G.nodes(data=True)): node_name = node[0] attr_dict = node[1] for feat in list(attr_dict.keys()): if feat in key_features: continue else: G.nodes[node_name].pop(feat) # G = largest_connected_subgraph(G) print("Read {}, {} nodes, {} edges.".format( graph_name, len(list(G.nodes)), len(list(G.edges)) )) class_idx = {} count = 0 for node, clas in node_class.items(): if clas in class_idx.keys(): continue class_idx[clas] = count count += 1 for node in node_class.keys(): node_class[node] = class_idx[node_class[node]] return G, node_class
def filtering( Gpe, sources=None, sinks=None, beta_d=1.5, threshold=1e-3, tdens0=None, BPweights="tdens", stopping_threshold_f=1e-3, weight_flag="unit", rhs=None, MaxNumIter=100, verbose=False, ): inputs = {} if sources is None and sinks is None and rhs is None: raise ValueError( "Either rhs or sources/sinks need to be passed as inputs.") ### relabeling # todo: add an if for the case in which nodes are already relabeled mapping = {} k = -1 for node in Gpe.nodes(): k += 1 mapping[node] = k Gpe_rel = nx.relabel_nodes(Gpe, mapping, copy=True) edges = Gpe_rel.edges() nedges = len(edges) nodes = Gpe_rel.nodes() nnodes = len(nodes) # tdens0 if tdens0 != None: try: tdens0 = np.array([(Gpe_rel.edges[edge]["tdens"]) for edge in edges]) except: tdens0 = np.array([(Gpe_rel.edges[edge]["flux"]) for edge in edges]) # topol topol = np.zeros((nedges, 2)) k = -1 for edge in edges: k += 1 topol[k, :] = edge # weight (uniform) weight = np.empty(nedges, dtype=object) k = -1 for edge in edges: k += 1 if weight_flag == "unit": weight[k] = 1 elif weight_flag == "length": weight[k] = distance.euclidean(Gpe_rel.nodes[edge[0]]["pos"], Gpe_rel.nodes[edge[1]]["pos"]) else: weight[k] = Gpe_rel.edges[edge][weight_flag] # rhs (f+ and f-) if ( sinks is not None and sources is not None ): # there are lists from the sources and sinks are going to be chosen. # (else) if this is not pass, then the rhs is passed. rhs = np.zeros(nnodes) sources_rel = [mapping[node] for node in sources] sinks_rel = [mapping[node] for node in sinks] number_sources = len(sources_rel) number_sinks = len(sinks_rel) for node in nodes: if node in sources_rel: rhs[node] = 1 / number_sources elif node in sinks_rel: rhs[node] = -1 / number_sinks else: rhs[node] = 0 else: sources_rel = [i for i in range(len(rhs)) if rhs[i] > 0] sinks_rel = [i for i in range(len(rhs)) if rhs[i] < 0] assert sum(rhs) < 0.01 assert len(rhs) == nnodes # init and set controls ctrl = Dmkcontrols.DmkCtrl() Dmkcontrols.get_from_file(ctrl, root + "/nextrout_core/dmk_discr.ctrl") # if and where save data ctrl.id_save_dat = 1 ctrl.fn_tdens = "tdens.dat" ctrl.fn_pot = "pot.dat" ctrl.max_time_iterations = MaxNumIter # if and where save log ctrl.id_save_statistics = 1 ctrl.fn_statistics = "dmk.log" # if print info # if verbose: ctrl.info_state = 3 ctrl.info_update = 3 print(ctrl.outer_solver_approach) else: ctrl.info_state = 0 ctrl.info_update = 0 [info, tdens, pot, flux, timefun] = dmk_graph.dmk_graph( topol, rhs, pflux=beta_d, tdens0=tdens0, tolerance=stopping_threshold_f, weight=weight, ctrl=ctrl, ) tdens = list(tdens) flux = list(flux) if (info == 0) and verbose: print("Convergence achieved") max_flux = max(flux) max_tdens = max(tdens) Gf = nx.Graph() ed_count = -1 weights_in_Gf = [] for edge in Gpe_rel.edges(): ed_count += 1 if BPweights == "flux": if abs(flux[ed_count]) > max_flux * threshold: Gf.add_edge(*edge, flux=flux[ed_count]) weights_in_Gf.append(flux[ed_count]) elif BPweights == "tdens": if abs(tdens[ed_count]) > max_tdens * threshold: Gf.add_edge(*edge, tdens=tdens[ed_count]) weights_in_Gf.append(tdens[ed_count]) else: raise ValueError("BPweights flag not defined!.") try: Gf.add_node( edge[0], weight=Gpe_rel.nodes[edge[0]]["tdens"] ) # todo: this needs to be fixed once the flux is working again (BPweights) Gf.add_node(edge[1], weight=Gpe_rel.nodes[edge[1]]["tdens"]) except: pass Gf.remove_nodes_from(list(nx.isolates(Gf))) weights_in_Gf = np.array(weights_in_Gf) colors = [] for node in Gf.nodes(): Gf.nodes[node]["pos"] = Gpe_rel.nodes[node]["pos"] if node in sources_rel: colors.append("g") elif node in sinks_rel: colors.append("r") else: colors.append("k") inputs["topol"] = topol inputs["rhs"] = rhs inputs["pflux"] = beta_d inputs["tdens0"] = tdens0 return Gf, weights_in_Gf, colors, inputs
def clusterByDistances( cooc_matrix , field1=None, field2=None , distance=None): ''' clusterByDistance :: Coocs[nga, ngb => ccweight] -> (Graph, Partition, {ids}, {weight}) ''' # implicit global session authorized = ['conditional', 'distributional', 'cosine'] if distance not in authorized: raise ValueError("Distance must be in %s" % str(authorized)) matrix = defaultdict(lambda : defaultdict(float)) ids = defaultdict(lambda : defaultdict(int)) labels = dict() weight = dict() for cooc in cooc_matrix.items: ngram1_id = cooc[0] ngram2_id = cooc[1] ccweight = cooc_matrix.items[cooc] matrix[ngram1_id][ngram2_id] = ccweight matrix[ngram2_id][ngram1_id] = ccweight ids[ngram1_id] = (field1, ngram1_id) ids[ngram2_id] = (field2, ngram2_id) weight[ngram1_id] = weight.get(ngram1_id, 0) + ccweight weight[ngram2_id] = weight.get(ngram2_id, 0) + ccweight x = pd.DataFrame(matrix).fillna(0) if distance == 'conditional': x = x / x.sum(axis=1) #y = y / y.sum(axis=0) xs = x.sum(axis=1) - x ys = x.sum(axis=0) - x # top inclus ou exclus n = ( xs + ys) / (2 * (x.shape[0] - 1)) # top generic or specific m = ( xs - ys) / (2 * (x.shape[0] - 1)) n = n.sort_index(inplace=False) m = m.sort_index(inplace=False) nodes_included = 10000 #int(round(size/20,0)) #nodes_excluded = int(round(size/10,0)) nodes_specific = 10000 #int(round(size/10,0)) #nodes_generic = int(round(size/10,0)) # TODO use the included score for the node size n_index = pd.Index.intersection(x.index, n.index[:nodes_included]) # Generic: #m_index = pd.Index.intersection(x.index, m.index[:nodes_generic]) # Specific: m_index = pd.Index.intersection(x.index, m.index[-nodes_specific:]) #m_index = pd.Index.intersection(x.index, n.index[:nodes_included]) x_index = pd.Index.union(n_index, m_index) xx = x[list(x_index)].T[list(x_index)] # Removing unconnected nodes xxx = xx.values threshold = min(xxx.max(axis=1)) matrix_filtered = np.where(xxx >= threshold, xxx, 0) #matrix_filtered = matrix_filtered.resize((90,90)) G = nx.from_numpy_matrix(np.matrix(matrix_filtered)) G = nx.relabel_nodes(G, dict(enumerate([ ids[id_][1] for id_ in list(xx.columns)]))) elif distance == 'cosine': scd = defaultdict(lambda : defaultdict(int)) for i in matrix.keys(): for j in matrix.keys(): numerator = sum( [ matrix[i][k] * matrix[j][k] for k in matrix.keys() if i != j and k != i and k != j ] ) denominator = sqrt( sum([ matrix[i][k] for k in matrix.keys() if k != i and k != j #and matrix[i][k] > 0 ]) * sum([ matrix[i][k] for k in matrix.keys() if k != i and k != j #and matrix[i][k] > 0 ]) ) try: scd[i][j] = numerator / denominator except Exception as error: scd[i][j] = 0 minmax = min([ max([ scd[i][j] for i in scd.keys()]) for j in scd.keys()]) G = nx.DiGraph() G.add_edges_from( [ (i, j, {'weight': scd[i][j]}) for i in scd.keys() for j in scd.keys() if i != j and scd[i][j] > minmax and scd[i][j] > scd[j][i] ] ) elif distance == 'distributional': mi = defaultdict(lambda : defaultdict(int)) total_cooc = x.sum().sum() for i in matrix.keys(): si = sum([matrix[i][j] for j in matrix[i].keys() if i != j]) for j in matrix[i].keys(): sj = sum([matrix[j][k] for k in matrix[j].keys() if j != k]) if i!=j : mi[i][j] = log( matrix[i][j] / ((si * sj) / total_cooc) ) r = defaultdict(lambda : defaultdict(int)) for i in matrix.keys(): for j in matrix.keys(): sumMin = sum( [ min(mi[i][k], mi[j][k]) for k in matrix.keys() if i != j and k != i and k != j and mi[i][k] > 0 ] ) sumMi = sum( [ mi[i][k] for k in matrix.keys() if k != i and k != j and mi[i][k] > 0 ] ) try: r[i][j] = sumMin / sumMi except Exception as error: r[i][j] = 0 # Need to filter the weak links, automatic threshold here minmax = min([ max([ r[i][j] for i in r.keys()]) for j in r.keys()]) G = nx.DiGraph() G.add_edges_from( [ (i, j, {'weight': r[i][j]}) for i in r.keys() for j in r.keys() if i != j and r[i][j] > minmax and r[i][j] > r[j][i] ] ) # degree_max = max([(n, d) for n,d in G.degree().items()], key=itemgetter(1))[1] # nodes_to_remove = [n for (n,d) in G.degree().items() if d <= round(degree_max/2)] # G.remove_nodes_from(nodes_to_remove) # Removing too connected nodes (find automatic way to do it) #edges_to_remove = [ e for e in G.edges_iter() if # nodes_to_remove = [n for n in degree if degree[n] <= 1] # G.remove_nodes_from(nodes_to_remove) def getWeight(item): return item[1] # # node_degree = sorted(G.degree().items(), key=getWeight, reverse=True) # #print(node_degree) # nodes_too_connected = [n[0] for n in node_degree[0:(round(len(node_degree)/5))]] # # for n in nodes_too_connected: # n_edges = list() # for v in nx.neighbors(G,n): # #print((n, v), G[n][v]['weight'], ":", (v,n), G[v][n]['weight']) # n_edges.append(((n, v), G[n][v]['weight'])) # # n_edges_sorted = sorted(n_edges, key=getWeight, reverse=True) # #G.remove_edges_from([ e[0] for e in n_edges_sorted[round(len(n_edges_sorted)/2):]]) # #G.remove_edges_from([ e[0] for e in n_edges_sorted[(round(len(nx.neighbors(G,n))/3)):]]) # G.remove_edges_from([ e[0] for e in n_edges_sorted[10:]]) G.remove_nodes_from(nx.isolates(G)) partition = best_partition(G.to_undirected()) return(G,partition,ids,weight)
def preprocessing(): '''removing isolated nodes''' isolated = nx.isolates(G) for j in isolated: for key in nodes_data.keys(): if j == key: del nodes_data[key] G.remove_node(j) #print G.number_of_nodes(), len(nodes_data) continue for k in edges_data.keys(): '''deleting all edges from edges_data which no more exist''' if not G.has_edge(edges_data[k][1], edges_data[k][2], key=k): #print len(edges_data) del edges_data[k] gen = [] indegree = G.in_degree(G.nodes()) '''number of nodes with no in_degree''' '''removing joints,merge nodes categorised as generators from graph and node data''' t = 0 for i in indegree: if indegree[i] == 0: if nodes_data[i][3] == 'merge' or nodes_data[i][3] == 'joint': continue gen.append(i) for node, nd in nodes_data.iteritems(): '''adding plants to generators''' if nd[3] == 'plant' and nd[0] not in gen: gen.append(node) voltages = {} z = 0 for aa, val in edges_data.iteritems(): '''adding edge attributes''' z += 1 #print z if edges_data[aa][3].find(';') != -1: xx = map(float, edges_data[aa][3].split(';')) edges_data[aa][3] = max(xx) del xx[:] elif edges_data[aa][3] == '': edges_data[aa][3] = np.nan else: edges_data[aa][3] = float(edges_data[aa][3]) if val[4].find(';') != -1: xy = map(float, val[4].split(';')) val[4] = sum(xy) del xy[:] else: val[4] = float(val[4]) voltages[(val[1], val[2], val[0])] = val[3] nx.set_edge_attributes(G, 'voltages', voltages) distr = distributers() for f in gen: if f in distr: distr.remove(f) #G.remove_edges_from(G.in_edges(f)) if node in gen: nodes_data[node][3] = 'generators' elif node in distr: nodes_data[node][3] = 'distributors' else: nodes_data[node][3] = 'transmitters' for ab, v in nodes_data.iteritems(): '''adding nodes attributes''' if v[4].find(';') != -1: ss = map(float, v[4].split(';')) if v[3] == 'generator' or v[3] == 'distributor': v[4] = min(ss) else: v[4] = max(ss) del ss[:] elif v[4] == '': v[4] = np.nan else: v[4] = float(v[4]) '''removing multi edges''' for x in G.nodes(): for y in G.nodes(): if x == y: continue if G.number_of_edges(x, y) > 1: kkk = [] for ee in list(G.edges(x, keys=True)): if ee[0] == x and ee[1] == y: kkk.append(ee[2]) for l in range(1, len(kkk)): edges_data[kkk[0]][4] += edges_data[kkk[l]][4] edges_data[kkk[0]][3] = max(edges_data[kkk[0]][3], edges_data[kkk[l]][3]) del edges_data[kkk[l]] G.remove_edge(x, y, key=kkk[l]) G[x][y][kkk[0]]['cable'] = edges_data[kkk[0]][4] G[x][y][kkk[0]]['voltages'] = edges_data[kkk[0]][3] #return (G,nodes_data,edges_data) o = [] with open('../../NS_project/Data/Vertices_new.csv', 'wb') as csvfile: nodewriter = csv.writer(csvfile, delimiter=',') header = ['v_id', 'lon', 'lat', 'typ', 'voltage'] nodewriter.writerow(header) for da, it in nodes_data.iteritems(): for l in range(0, 5): o.append(str(it[l])) nodewriter.writerow(o) del o[:] with open('../../NS_project/Data/Edges_new.csv', 'wb') as csvfile1: edgewriter = csv.writer(csvfile1, delimiter=',') header1 = ['l_id', 'v_id_1', 'v_id_2', 'voltage', 'cables', 'length_m'] edgewriter.writerow(header1) for db, itt in edges_data.iteritems(): for l in range(0, 5): o.append(str(itt[l])) o.append(str(itt[10])) edgewriter.writerow(o) del o[:] return G
for ( source, target ), weight in c.items( ): edge = ( source, target, { 'weight' : weight } ) edge_list.append( edge ) #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++# G = nx.Graph( ) G.add_edges_from( edge_list ) nx.write_gexf( G, OUTPUT_GEXF_FULL ) weights = nx.get_edge_attributes( G, 'weight' ) edges_to_remove = [ k for k, v in weights.items( ) if v < WEIGHT_THRESHOLD ] G.remove_edges_from( edges_to_remove ) G.remove_nodes_from( list( nx.isolates( G ) ) ) nodes_to_remove = [ ] for node in G: if len( list( nx.neighbors( G, node ) ) ) < NEIGHBOR_THRESHOLD: nodes_to_remove.append( node ) G.remove_nodes_from( nodes_to_remove ) G.remove_nodes_from( BOT_LIST ) G.remove_nodes_from( list( nx.isolates( G ) ) ) #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++# nx.write_gexf( G, OUTPUT_GEXF_FILTERED ) ###############################################################################
def drop_entities(triples_file, train_size=0.8, valid_size=0.1, test_size=0.1, seed=0, types_file=None): """Drop entities from a graph, to create training, validation and test splits. Entities are dropped so that no disconnected nodes are left in the training graph. Dropped entities are distributed between disjoint validation and test sets. """ splits_sum = train_size + valid_size + test_size if splits_sum < 0 or splits_sum > 1: raise ValueError('Sum of split sizes must be between greater than 0' ' and less than or equal to 1.') use_types = types_file is not None if use_types: type2entities = read_entity_types(types_file) types = list(type2entities.keys()) random.seed(seed) graph = nx.MultiDiGraph() triples, rel_counts = parse_triples(triples_file) graph.add_weighted_edges_from(triples) original_num_edges = graph.number_of_edges() original_num_nodes = graph.number_of_nodes() print(f'Loaded graph with {graph.number_of_nodes():,} entities ' f'and {graph.number_of_edges():,} edges') dropped_entities = [] dropped_edges = dict() num_to_drop = int(original_num_nodes * (1 - train_size)) num_val = int(original_num_nodes * valid_size) num_test = int(original_num_nodes * test_size) print(f'Removing {num_to_drop:,} entities...') progress = tqdm(total=num_to_drop, file=sys.stdout) while len(dropped_entities) < num_to_drop: if use_types: # Sample an entity with probability proportional to its type count # (minus 1 to keep at least one entity of any type) weights = [len(type2entities[t]) - 1 for t in types] rand_type = random.choices(types, weights, k=1)[0] rand_ent = random.choice(list(type2entities[rand_type])) else: # Sample an entity uniformly at random rand_ent = random.choice(list(graph.nodes)) removed_tuple = get_safely_removed_edges(graph, rand_ent, rel_counts) if removed_tuple is not None: removed_edges, removed_counts = removed_tuple dropped_edges[rand_ent] = removed_edges graph.remove_node(rand_ent) dropped_entities.append(rand_ent) rel_counts.subtract(removed_counts) if use_types: type2entities[rand_type].remove(rand_ent) progress.update(1) progress.close() # Are there indeed no disconnected nodes? assert len(list(nx.isolates(graph))) == 0 # Did we keep track of removed edges correctly? num_removed_edges = sum(map(len, dropped_edges.values())) assert num_removed_edges + graph.number_of_edges() == original_num_edges # Test entities MUST come from first slice! This guarantees that # validation entities don't have edges with them (because nodes were # removed in sequence) test_ents = set(dropped_entities[:num_test]) val_ents = set(dropped_entities[num_test:num_test + num_val]) train_ents = set(graph.nodes()) # Check that entity sets are disjoint assert len(train_ents.intersection(val_ents)) == 0 assert len(train_ents.intersection(test_ents)) == 0 assert len(val_ents.intersection(test_ents)) == 0 # Check that validation graph does not contain test entities val_graph = nx.MultiDiGraph() val_edges = [] for entity in val_ents: val_edges += dropped_edges[entity] val_graph.add_weighted_edges_from(val_edges) assert len(set(val_graph.nodes()).intersection(test_ents)) == 0 names = ('train', 'dev', 'test') dirname = osp.dirname(triples_file) prefix = 'ind-' for entity_set, set_name in zip((train_ents, val_ents, test_ents), names): # Save file with entities for set with open(osp.join(dirname, f'{set_name}-ents.txt'), 'w') as file: file.writelines('\n'.join(entity_set)) if set_name == 'train': # Triples for train split are saved later continue # Save file with triples for entities in set with open(osp.join(dirname, f'{prefix}{set_name}.tsv'), 'w') as file: for entity in entity_set: triples = dropped_edges[entity] for head, tail, rel in triples: file.write(f'{head}\t{rel}\t{tail}\n') with open(osp.join(dirname, f'{prefix}train.tsv'), 'w') as train_file: for head, tail, rel in graph.edges(data=True): train_file.write(f'{head}\t{rel["weight"]}\t{tail}\n') print(f'Dropped {len(val_ents):,} entities for validation' f' and {len(test_ents):,} for test.') print(f'{graph.number_of_nodes():,} entities are left for training.') print(f'Saved output files to {dirname}/')
def smith_waterman_filter(graph, flanking_reads, params): fasta_filename = params['fasta_filename'] paf_filename = params['paf_filename'] window_size = params['sw_window_size'] fasta_dict = get_fasta_dict(fasta_filename) paf_dict = get_paf_dict(paf_filename) score_threshold = compute_sw_threshold(flanking_reads, paf_dict, fasta_dict, window_size) # Generate scores dictionary scores = {} num_good_scores = 0 num_bad_scores = 0 edges_to_remove = set() for query, target in nx.edges(graph): # Get overlap info from the paf dictionary if str(query + target) in paf_dict: # get the info overlap_info = paf_dict[query + target] elif str(target + query) in paf_dict: # get info and swap them overlap_info = paf_dict[target + query] query, target = target, query else: overlap_info = None query_start = overlap_info['query_start'] query_end = overlap_info['query_end'] target_start = overlap_info['target_start'] target_end = overlap_info['target_end'] query_seq = fasta_dict[query][query_start:query_end] target_seq = fasta_dict[target][target_start:target_end] # Align the sequences using the rolling method bad_score = False min_len = min(len(query_seq), len(target_seq)) # Get scores for this pair; store in cur_scores cur_scores = [] if window_size: # Use rolling window for start, end in utils.pairwise(range(0, min_len, window_size)): qs = query_seq[start:end] ts = target_seq[start:end] score = smith_waterman.smith_waterman(qs, ts) cur_scores.append(score) else: # No rolling window score = smith_waterman.smith_waterman(query_seq, target_seq) cur_scores = [score] # Save data to scores dictionary # Sometimes the scores dictionary is never used # Other times it's extremely useful for plotting data scores[str(query + target)] = cur_scores # Analyze scores score = max(cur_scores) if score < score_threshold: num_good_scores += 1 else: num_bad_scores += 1 edges_to_remove.add((query, target)) # remove edges and isolated nodes graph.remove_edges_from(list(edges_to_remove)) isolates = list(nx.isolates(graph)) graph.remove_nodes_from(isolates) # the histogram of the data plt.subplot(2, 3, 3) all_scores = list(utils.flatten(list(scores.values()))) plt.hist(all_scores) plt.title( "histogram of num_gaps / len(aligned_sequence)\n{} bad_scores {} good_scores\nthreshold = {}\nwindow_size = {}" .format(num_bad_scores, num_good_scores, score_threshold, window_size)) return graph
def onion_layers(G): """Returns the layer of each vertex in an onion decomposition of the graph. The onion decomposition refines the k-core decomposition by providing information on the internal organization of each k-shell. It is usually used alongside the `core numbers`. Parameters ---------- G : NetworkX graph A simple graph without self loops or parallel edges Returns ------- od_layers : dictionary A dictionary keyed by vertex to the onion layer. The layers are contiguous integers starting at 1. Raises ------ NetworkXError The onion decomposition is not implemented for graphs with self loops or parallel edges or for directed graphs. Notes ----- Not implemented for graphs with parallel edges or self loops. Not implemented for directed graphs. See Also -------- core_number References ---------- .. [1] Multi-scale structure and topological anomaly detection via a new network statistic: The onion decomposition L. Hébert-Dufresne, J. A. Grochow, and A. Allard Scientific Reports 6, 31708 (2016) http://doi.org/10.1038/srep31708 .. [2] Percolation and the effective structure of complex networks A. Allard and L. Hébert-Dufresne Physical Review X 9, 011023 (2019) http://doi.org/10.1103/PhysRevX.9.011023 """ if nx.number_of_selfloops(G) > 0: msg = ("Input graph contains self loops which is not permitted; " "Consider using G.remove_edges_from(nx.selfloop_edges(G)).") raise NetworkXError(msg) # Dictionaries to register the k-core/onion decompositions. od_layers = {} # Adjacency list neighbors = {v: list(nx.all_neighbors(G, v)) for v in G} # Effective degree of nodes. degrees = dict(G.degree()) # Performs the onion decomposition. current_core = 1 current_layer = 1 # Sets vertices of degree 0 to layer 1, if any. isolated_nodes = [v for v in nx.isolates(G)] if len(isolated_nodes) > 0: for v in isolated_nodes: od_layers[v] = current_layer degrees.pop(v) current_layer = 2 # Finds the layer for the remaining nodes. while len(degrees) > 0: # Sets the order for looking at nodes. nodes = sorted(degrees, key=degrees.get) # Sets properly the current core. min_degree = degrees[nodes[0]] if min_degree > current_core: current_core = min_degree # Identifies vertices in the current layer. this_layer = [] for n in nodes: if degrees[n] > current_core: break this_layer.append(n) # Identifies the core/layer of the vertices in the current layer. for v in this_layer: od_layers[v] = current_layer for n in neighbors[v]: neighbors[n].remove(v) degrees[n] = degrees[n] - 1 degrees.pop(v) # Updates the layer count. current_layer = current_layer + 1 # Returns the dictionaries containing the onion layer of each vertices. return od_layers
def remove_isolates(G, node_list, option, basic, to_b_nodes, from_a_nodes, a_to_b_edges, net): '''Removed any isolated nodes in the given network and any associated edges. Retruns the eddited network and a number of lists which require updating due to the removal. Input: Return: ''' #remove any isolated nodes and assocaited edges try: isolatednodes = nx.isolates(G) except: return 4010 if G.number_of_edges() == 0: #print 'The number of nodes left is:', G.number_of_nodes() raise error_classes.GraphError( 'Error. The network is dissconnected, there are no edges left in the network.' ) else: #remove all nodes which are in the isolated list G.remove_nodes_from(isolatednodes) j = 0 #loop through the isolated nodes and remove from the node list try: while j < len(isolatednodes): k = 0 while k < len(node_list): if isolatednodes[j] == node_list[k]: node_list.remove(node_list[k]) k -= 1 k += 1 j += 1 except: return 4012 #update some of the lists to record the simulation process tot = 0 if net == 'B': for nd in isolatednodes: v = 0 found = False try: while v < len(a_to_b_edges): if int(nd) == int(a_to_b_edges[v][1]): a_to_b_edges.pop(v) found = True tot += 1 v -= 1 if found == False: print('node is:', nd) for item in a_to_b_edges: print(item[1]) exit() v += 1 except: return 4013 var = G, node_list, basic, option, isolatednodes, to_b_nodes, from_a_nodes, a_to_b_edges return var