def _good_k_break(self, old_edges, new_edges): """ Checks that the break does not change chromomsome structure significantly """ MIN_OVLP_SCORE = 0.9 MAX_K_BREAK = 4 if len(old_edges) > MAX_K_BREAK: return False new_adj_graph = self.adj_graph.copy() for u, v in old_edges: new_adj_graph.remove_edge(u, v) for u, v in new_edges: new_adj_graph.add_edge(u, v) all_nodes = new_adj_graph.nodes() old_sets = list(map(lambda g: set(g.nodes()), nx.connected_component_subgraphs(self.adj_graph))) new_sets = list(map(lambda g: set(g.nodes()), nx.connected_component_subgraphs(new_adj_graph))) if len(old_sets) != len(new_sets): return False for old_set in old_sets: max_overlap = 0 best_score = 0 for new_set in new_sets: overlap = len(old_set & new_set) if overlap > max_overlap: max_overlap = overlap best_score = float(overlap) / len(old_set | new_set) if best_score < MIN_OVLP_SCORE: return False return True
def zc(G,list_G1,list_G2,f):#计算z值 """ 输入参数:原始网络G,不保持连通性置乱网络list_G1,保持连通性置乱网络list_G2,要求网络连通的指标函数名f 返回:该指标的不保持连通性z值z1,保持连通性z值z2 """ list_G_l0 = []; list_G_l1 = []; list_G_l2 = [] for g in nx.connected_component_subgraphs(G): list_G_l0.append(f(g)) for G1 in list_G1: for g1 in nx.connected_component_subgraphs(G1): list_G_l1.append(f(g1))#指标值列表 for G2 in list_G2: for g2 in nx.connected_component_subgraphs(G2): list_G_l2.append(f(g2)) #print list_G_l0, list_G_l1, list_G_l2 G_l0 = np.mean(list_G_l0) G_l1 = np.mean(list_G_l1) #求均值 G_l2 = np.mean(list_G_l2) var_z1 = np.var(list_G_l1) #求方差 var_z2 = np.var(list_G_l2) if var_z1 == 0: #若方差为0,则z值取0 z1 = 0 else: z1 = (G_l0 - G_l1)/var_z1#z值 if var_z2 == 0: z2 = 0 else: z2 = (G_l0 - G_l2)/var_z2#z值 return z1, z2
def main(): tempo_dir = "../corpus-local/tempo-txt" file_regex = ".*\.txt" G = build_graph(tempo_dir, file_regex) """ ccs = nx.clustering(G) avg_clust = sum(ccs.values()) / len(ccs) """ print tempo_dir print "\tAda " + str(len(G.nodes())) + " node." print "\tAda " + str(len(G.edges())) + " edge." print "\tClustering coefficient : " + str(nx.average_clustering(G)) print "\tAverage shortest path length" for g in nx.connected_component_subgraphs(G): print "\t\t" + str(nx.average_shortest_path_length(g)) kompas_dir = "../corpus-local/kompas-txt" G = build_graph(kompas_dir, file_regex) print kompas_dir print "\tAda " + str(len(G.nodes())) + " node." print "\tAda " + str(len(G.edges())) + " edge." print "\tClustering coefficient : " + str(nx.average_clustering(G)) print "\tAverage shortest path length" for g in nx.connected_component_subgraphs(G): print "\t\t" + str(nx.average_shortest_path_length(g))
def _extract_ccomponents(self, graph, threshold=0, min_size=2): # remove all vertices that have a score less then threshold cc_list = [] if self.less_then: less_component_graph = graph.copy() for v, d in less_component_graph.nodes_iter(data=True): if d.get(self.attribute, False): if d[self.attribute] < threshold: less_component_graph.remove_node(v) for cc in nx.connected_component_subgraphs(less_component_graph): if len(cc) >= min_size: cc_list.append(cc) # remove all vertices that have a score more then threshold if self.more_than: more_component_graph = graph.copy() for v, d in more_component_graph.nodes_iter(data=True): if d.get(self.attribute, False): if d[self.attribute] >= threshold: more_component_graph.remove_node(v) for cc in nx.connected_component_subgraphs(more_component_graph): if len(cc) >= min_size: cc_list.append(cc) return cc_list
def printStats(filename): ''' Converts json adjacency list into networkx to calculate and print the graphs's - average clustering coefficient - overall clustering coefficient - maximum diameter - average diameter - number of paritions using community.best_parition - modularity of community.best_partition ''' g = makeGraphFromJSON(filename) print "Average Clustering Coefficient: %f" % nx.average_clustering(g) print "Overall Clustering Coefficient: %f" % nx.transitivity(g) connected_subgraphs = list(nx.connected_component_subgraphs(g)) largest = max(nx.connected_component_subgraphs(g), key=len) print "# Connected Components: %d" % len(connected_subgraphs) print " Maximal Diameter: %d" % nx.diameter(largest) print " Average Diameter: %f" % nx.average_shortest_path_length(largest) # Find partition that maximizes modularity using Louvain's algorithm part = community.best_partition(g) print "# Paritions: %d" % (max(part.values()) + 1) print "Louvain Modularity: %f" % community.modularity(part, g)
def _extract_ccomponents(self, graph, threshold=0, min_size=2, max_size=20): # remove all vertices that have a score less then threshold cc_list = [] if self.less_then: less_component_graph = graph.copy() for v, d in less_component_graph.nodes_iter(data=True): if self.get_attr_from_noded(d): if self.get_attr_from_noded(d) < threshold: less_component_graph.remove_node(v) for cc in nx.connected_component_subgraphs(less_component_graph): if len(cc) >= min_size and len(cc) <= max_size: cc_list.append(cc) if len(cc) > max_size and self.shrink_graphs: cc_list += list(self.enforce_max_size(cc, min_size, max_size)) # remove all vertices that have a score more then threshold if self.more_than: more_component_graph = graph.copy() for v, d in more_component_graph.nodes_iter(data=True): if self.get_attr_from_noded(d): if self.get_attr_from_noded(d) >= threshold: more_component_graph.remove_node(v) for cc in nx.connected_component_subgraphs(more_component_graph): if len(cc) >= min_size and len(cc) <= max_size: cc_list.append(cc) if len(cc) > max_size and self.shrink_graphs: cc_list += list(self.enforce_max_size(cc, min_size, max_size, choose_cut_node=max)) return cc_list
def get_boundary_for_label(data, classifier, num_label, step): # See # http://en.wikipedia.org/wiki/Postcodes_in_the_United_Kingdom#Operation_and_application # for the various divisions. t_start = time.time() district = data[data[:,0] == num_label, 1:] # Align grid to nearest "step". Also grow border by 25 units to # to make sure the marching squares can build a full loop. x0, y0 = np.floor(district.min(0) / step - 25) * step x1, y1 = np.ceil(district.max(0) / step + 25) * step # Use KNN to colour a grid that covers the district xx, yy = np.mgrid[x0:x1:step, y0:y1:step] prediction = classifier.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape) # Split predicted labels into inside/outside prediction = (prediction == num_label).astype('u1') # We transpose to make reasoning about the lookups easier. prediction = prediction.transpose() # zero-pad predictions to make sure marching squares creates # closed outlines. tmp = np.zeros((prediction.shape[0] + 2, prediction.shape[1] + 2), dtype='u1') tmp[1:-1,1:-1] = prediction prediction = tmp outline = networkx.Graph() h, w = prediction.shape for i, j in np.ndindex(h - 1, w - 1): # We use tostring() as a cheap, hashable lookup type for the # marching squared implementation. # Dimension 0 ~ y ~ i, dim 1 ~ x ~ j: piter = iter(MARCHING_SQUARE_LOOKUP[prediction[i:i+2,j:j+2].tostring()]) for rel1, rel2 in zip(piter, piter): p1 = int(x0 + step * (j + rel1[0])), int(y0 + step * (i + rel1[1])) p2 = int(x0 + step * (j + rel2[0])), int(y0 + step * (i + rel2[1])) outline.add_node(p1) outline.add_node(p2) outline.add_edge(p1, p2) # Pick the largest subgraph, other graphs are most likely outliers. logging.info( "%s: Found %s connected graphs in %.2fs", num_label, len(networkx.connected_component_subgraphs(outline)), time.time() - t_start, ) largest = max( networkx.connected_component_subgraphs(outline), key=lambda x: x.size() ) return list(shapely.ops.polygonize(largest.edges()))[0]
def get_small_worldness(filename): import networkx as nx threshold = 0 f = open(filename[:-4]+'_small_worldness.dat','w') for i in range(0,101): threshold = float(i)/100 G = get_threshold_matrix(filename, threshold) ER_graph = nx.erdos_renyi_graph(nx.number_of_nodes(G), nx.density(G)) cluster = nx.average_clustering(G) ER_cluster = nx.average_clustering(ER_graph) transi = nx.transitivity(G) ER_transi = nx.transitivity(ER_graph) print 'threshold: %f, average cluster coefficient: %f, random nw: %f, transitivity: %f, random nw: %f' %(threshold, cluster, ER_cluster, transi, ER_transi) f.write("%f\t%f\t%f" % (threshold, cluster, ER_cluster)) components = nx.connected_component_subgraphs(G) ER_components = nx.connected_component_subgraphs(ER_graph) values = [] ER_values = [] for i in range(len(components)): if nx.number_of_nodes(components[i]) > 1: values.append(nx.average_shortest_path_length(components[i])) for i in range(len(ER_components)): if nx.number_of_nodes(ER_components[i]) > 1: ER_values.append(nx.average_shortest_path_length(ER_components[i])) if len(values) == 0: f.write("\t0.") else: f.write("\t%f" % (sum(values)/len(values))) if len(ER_values) == 0: f.write("\t0.") else: f.write("\t%f" % (sum(ER_values)/len(ER_values))) f.write("\t%f\t%f" % (transi, ER_transi)) if (ER_cluster*sum(values)*len(values)*sum(ER_values)*len(ER_values)) >0 : S_WS = (cluster/ER_cluster) / ((sum(values)/len(values)) / (sum(ER_values)/len(ER_values))) else: S_WS = 0. if (ER_transi*sum(values)*len(values)*sum(ER_values)*len(ER_values)) >0 : S_Delta = (transi/ER_transi) / ((sum(values)/len(values)) / (sum(ER_values)/len(ER_values))) else: S_Delta = 0. f.write("\t%f\t%f" % (S_WS, S_Delta)) f.write("\n") f.close() print "1:threshold 2:cluster-coefficient 3:random-cluster-coefficient 4:shortest-pathlength 5:random-shortest-pathlength 6:transitivity 7:random-transitivity 8:S-Watts-Strogatz 9:S-transitivity"
def hemst(G, k): nc = 1 mst = G point_set = {} while nc != k: nc = 1 mst = nx.minimum_spanning_tree(mst) weights = np.array([attrs['weight'] for _,_,attrs in mst.edges(data=True)]) mean_w = weights.mean() std = weights.std() for a,b,attrs in mst.edges(data=True): w = attrs['weight'] if w > mean_w + std: mst.remove_edge(a,b) nc+=1 if nc < k: while nc != k: remove_longest_edge(mst) nc+=1 break if nc > k: sG = nx.connected_component_subgraphs(mst) centroid_nodes = [] for g in sG: cl = nx.closeness_centrality(g) sorted_set_nodes = sorted(cl.items(), key=lambda a: a[1]) closest_to_c = sorted_set_nodes[0][0] point_set[closest_to_c] = g.nodes() for p, _ in sorted_set_nodes[1:]: if p in point_set: point_set[closest_to_c]+= point_set[p] centroid_nodes.append(closest_to_c) edges=itertools.combinations(centroid_nodes,2) mst.clear() mst.add_nodes_from(centroid_nodes) mst.add_edges_from(edges) for u,v in mst.edges(): weight = G.get_edge_data(u,v)["weight"] nx.set_edge_attributes(mst, "weight", {(u,v):weight}) sG = nx.connected_component_subgraphs(mst) if point_set: for g in sG: for node in g.nodes(): if node in point_set: g.add_nodes_from(point_set[node]) return sG
def eigenvector_apl(g, recalculate=False): """ Performs robustness analysis based on eigenvector centrality, on the network specified by infile using sequential (recalculate = True) or simultaneous (recalculate = False) approach. Returns a list with fraction of nodes removed, a list with the corresponding sizes of the largest component of the network, and the overall vulnerability of the network. """ m = networkx.eigenvector_centrality(g) l = sorted(m.items(), key=operator.itemgetter(1), reverse=True) x = [] y = [] average_path_length = 0.0 number_of_components = 0 n = len(g.nodes()) for sg in networkx.connected_component_subgraphs(g): average_path_length += networkx.average_shortest_path_length(sg) number_of_components += 1 average_path_length /= number_of_components initial_apl = average_path_length r = 0.0 for i in range(1, n - 1): g.remove_node(l.pop(0)[0]) if recalculate: try: m = networkx.eigenvector_centrality(g, max_iter=5000) except networkx.NetworkXError: break l = sorted(m.items(), key=operator.itemgetter(1), reverse=True) average_path_length = 0.0 number_of_components = 0 for sg in networkx.connected_component_subgraphs(g): if len(sg.nodes()) > 1: average_path_length += networkx.average_shortest_path_length(sg) number_of_components += 1 average_path_length = average_path_length / number_of_components x.append(i * 1. / initial_apl) r += average_path_length * 1. / initial_apl y.append(average_path_length * 1. / initial_apl) return x, y, r / initial_apl
def get_small_worldness(G, thr): f = open(out_prfx + 'small_worldness.dat', 'a') g = open(out_prfx + 'cc_trans_ER.dat', 'a') #g.write('r(thre.)\t\cc_A\tcc_ER\ttran_A\ttran_ER\n') ER_graph = nx.erdos_renyi_graph(nx.number_of_nodes(G), nx.density(G)) # erdos-renyi, binomial random graph generator ...(N,D:density) cluster = nx.average_clustering(G) # clustering coef. of whole network ER_cluster = nx.average_clustering(ER_graph) #cc of random graph transi = nx.transitivity(G) ER_transi = nx.transitivity(ER_graph) g.write("%f\t%f\t%f\t%f\t%f\n" % (thr, cluster,ER_cluster,transi,ER_transi )) f.write("%f\t%f\t%f" % (thr, cluster, ER_cluster)) components = nx.connected_component_subgraphs(G) ER_components = nx.connected_component_subgraphs(ER_graph) values = [] ER_values = [] for i in range(len(components)): if nx.number_of_nodes(components[i]) > 1: values.append(nx.average_shortest_path_length(components[i])) for i in range(len(ER_components)): if nx.number_of_nodes(ER_components[i]) > 1: ER_values.append(nx.average_shortest_path_length(ER_components[i])) if len(values) == 0: f.write("\t0.") else: f.write("\t%f" % (sum(values)/len(values))) # pathlenght if len(ER_values) == 0: f.write("\t0.") else: f.write("\t%f" % (sum(ER_values)/len(ER_values))) f.write("\t%f\t%f" % (transi, ER_transi)) if (ER_cluster*sum(values)*len(values)*sum(ER_values)*len(ER_values)) >0 : S_WS = (cluster/ER_cluster) / ((sum(values)/len(values)) / (sum(ER_values)/len(ER_values))) else: S_WS = 0. if (ER_transi*sum(values)*len(values)*sum(ER_values)*len(ER_values)) >0 : S_Delta = (transi/ER_transi) / ((sum(values)/len(values)) / (sum(ER_values)/len(ER_values))) else: S_Delta = 0. f.write("\t%f\t%f" % (S_WS, S_Delta)) # S_WS ~ small worldness f.write("\n") f.close() g.close()
def process_network(G, namespace): print 'Nodes:', len(G) print 'Edges:', G.number_of_edges() if namespace.clustering_coefficient: print 'Clustering Coefficient:', nx.average_clustering(G) if namespace.components: components = nx.connected_component_subgraphs(G) print 'Number of Components:', len(components) isles = [c for c in components if len(c) == 1] print 'Isles:', len(isles) print 'Largest Component Size:', len(components[0]) else: components = None if namespace.cpl: if namespace.approximate_cpl: average_shortest_path_length = approximate_cpl else: print 'Using full slow CPL' average_shortest_path_length = nx.average_shortest_path_length if components is None: components = nx.connected_component_subgraphs(G) for i, g in enumerate(g for g in components if float(len(g))/float(len(G)) > namespace.component_size): print 'CPL %d: (%f)' % (i, float(len(g))/float(len(G))) print average_shortest_path_length(g) if namespace.assortativity: print 'Assortativity: NOT IMPLEMENTED.' if namespace.degree_distribution: hst = nx.degree_histogram(G) plt.subplot(121) plt.xscale('log') plt.yscale('log') plt.title("Degree Distribution") plt.ylabel("Occurrencies") plt.xlabel("Degree") plt.plot(range(len(hst)), hst, marker='+') plt.subplot(122) ccdf = pynetsym.mathutil.ccdf(hst) plt.xscale('log') plt.yscale('log') plt.title("CCDF Degree Distribution") plt.ylabel("$P(X>x)$") plt.xlabel("Degree") plt.plot(range(len(ccdf)), ccdf, color='red') if namespace.degree_distribution_out is None: plt.show() else: plt.save_fig(namespace.degree_distribution_out)
def one_girvan_newman(self,G): def find_best_edge(G0): eb = nx.edge_betweenness_centrality(G0) eb_il = eb.items() eb_il.sort(key=lambda x: x[1], reverse=True) return eb_il[0][0] num_clusters = len(sorted(nx.connected_component_subgraphs(G),key=len,reverse=True)) caused_split = False while not caused_split: G.remove_edge(*find_best_edge(G)) components = sorted(nx.connected_component_subgraphs(G),key=len,reverse=True) if len(components) == num_clusters+1: caused_split = True
def _plot_graphs(self): self.f,self.ax = plt.subplots(len(self.transition['all']),4,figsize=(14,10)) # first col motion , second distance self.f.suptitle('Scene : '+str(self.scene), fontsize=20) for feature in [0,2]: # plot the different graphs of motion and distance for sub,T in enumerate(self.transition['all']): plt.sca(self.ax[sub,feature]) print 'plotting graph : '+str(sub+1)+' from '+str(len(self.transition['all'])) if feature == 0: if T not in self.transition['motion']: for i in self.transition['motion']: if i<T: t=i else: t=T G=self.G_motion[t]['graph'] elif feature == 2: if T not in self.transition['touch']: for i in self.transition['touch']: if i<T: t=i else: t=T G=self.G_touch[t]['graph'] # layout graphs with positions using graphviz neato pos=nx.graphviz_layout(G,prog="neato") # color nodes the same in each connected subgraph C=nx.connected_component_subgraphs(G) cK = 0 for i in C: cK += 1 C=nx.connected_component_subgraphs(G) colors = np.linspace(.2,.6,cK) for count,g in enumerate(C): c=[colors[count]]*nx.number_of_nodes(g) # same color... nx.draw(g,pos,node_size=80,node_color=c,vmin=0.0,vmax=1.0,with_labels=False) #nx.draw_networkx_edges(g,pos, with_labels=False, edge_color=c[0], width=6.0, alpha=0.5) nx.draw_networkx_nodes(self.G,pos, node_color='b', node_size=100, alpha=1) nx.draw_networkx_nodes(self.G,pos, nodelist=['G'], node_color='r', node_size=100, alpha=1) nx.draw_networkx_nodes(self.G,pos, nodelist=[str(self.m_obj)], node_color='c', node_size=100, alpha=1) nx.draw_networkx_edges(G,pos, alpha=0.8) #nx.draw(G) # networkx draw() self.ax[sub,feature].axis('on') self.ax[sub,feature].axis('equal') plt.tick_params(axis='x',which='both',bottom='off',top='off',labelbottom='off') plt.tick_params(axis='y',which='both',right='off',left='off',labelleft='off') if feature == 0: self.ax[sub,feature].set_ylabel('frame : '+str(T)) if sub == 0: self.ax[sub,feature].set_title('motion') if feature == 2: self.ax[sub,feature].set_ylabel('frame : '+str(T)) if sub == 0: self.ax[sub,feature].set_title('connectivity')
def rig(x, y, G, labs=None, res=1e-9): """ Compute the RIG metric on all components. Parameters ---------- x : pd.Series or array_like Vector of nodes and their abundance in sample x y : pd.Series or array_like Vector of nodes and their abundance in sample y G : nx.Graph A connected graph of weighted edges Returns ------- float : Distance between sample x and sample y Note ---- If x or y is None, then 1 will be added to the total distance. If they are both None, then the distance will be zero. """ if labs is not None: x = pd.Series(x, index=labs) y = pd.Series(y, index=labs) cost = 0 _G = copy.deepcopy(G) # This converts all of the weights to integers for u, v, d in _G.edges(data=True): d["weight"] = int(d["weight"] / res) # This calculates the largest edge set to offset the insertion cost. weights = [] for comp in nx.connected_component_subgraphs(_G): edges = list(comp.edges(data="weight")) if len(edges) > 0: weights.append(sum(list(zip(*edges))[2])) maxW = max(weights) + 1 for comp in nx.connected_component_subgraphs(_G): nodes = set(comp.nodes()) subx = x[nodes & set(x.keys())] suby = y[nodes & set(y.keys())] c = rig_component(comp, subx, suby, maxW) cost += c return (cost) * res
def sensi_diameter(G): import networkx as nx """ Compute graph sensitivity to node removal, in terms of the difference in graph diameter on the removal of each node in turn. This uses local function x_diameter(G), which is modified from networkx.diamter(G) to work on XGraphs. DL Urban (9 Feb 2007) """ # Starting diameter for full graph: if nx.is_connected(G): d0 = x_diameter(G) else: G0 = nx.connected_component_subgraphs(G) [0] # the largest subgraph d0 = x_diameter(G0) nc = nx.number_connected_components(G) # how many are there? sensi = {} for node in G.nodes(): ex = G.edges(node) # a set of edges adjacent to node; G.delete_edges_from(ex) # remove all of these, G.delete_node(node) # and then kill the node, too if nx.is_connected(G): dx = x_diameter(G) cuts = 0 else: Gx = nx.connected_component_subgraphs(G) [0] # the biggest ncx = nx.number_connected_components(G) if nc == ncx: cuts = 0 else: cuts = 1 dx = x_diameter(Gx) delta = d0 - dx G.add_node(node) # put the node and edges back again G.add_edges_from(ex) sensi[node] = (cuts, delta) # create and return a tuple (cuts, delta) return sensi
def analyze_graph(self, graph): start_time = time.time() self.clear_stats() self._graph = graph self.node_count = nx.number_of_nodes(graph) self.edge_count = nx.number_of_edges(graph) degree_list = nx.degree(graph).values() self.connected_component_count = \ sum(1 for cx in nx.connected_components(graph)) if self.connected_component_count is 0: return self._connected_component_graphs = \ nx.connected_component_subgraphs(graph) self._largest_component_graph = \ max(nx.connected_component_subgraphs(graph), key=len) self.average_degree = sum(degree_list) / float(len(degree_list)) self._degree_histogram = nx.degree_histogram(graph) spc = self.shortest_paths(graph) self.shortest_path_count = len(spc) self.maximum_shortest_path_length = \ self.max_shortest_path_length(graph) if self.connected_component_count is 1: self.diameter = nx.diameter(graph) if self.node_count > 1: self.average_shortest_path_length = \ nx.average_shortest_path_length(graph) self.minimum_connectivity = self.min_connectivity(graph) if self.node_count > 0: self.maximum_degree = max(degree_list) self.minimum_degree = min(degree_list) if self.node_count > 1: dg = nx.degree_centrality(graph) self.maximum_degree_centrality = max(list(dg.values())) bc = nx.betweenness_centrality(graph) self.maximum_between_centrality = max(list(bc.values())) self.elapsed_time = time.time() - start_time
def fix_face_winding(mesh): ''' Traverse and change mesh faces in-place to make sure winding is coherent, or that edges on adjacent faces are in opposite directions ''' # we create the face adjacency graph: # every node in g is an index of mesh.faces # every edge in g represents two faces which are connected graph_all = nx.from_edgelist(mesh.face_adjacency) flipped = 0 # we are going to traverse the graph using BFS, so we have to start # a traversal for every connected component for graph in nx.connected_component_subgraphs(graph_all): start = graph.nodes()[0] # we traverse every pair of faces in the graph # we modify mesh.faces and mesh.face_normals in place for face_pair in nx.bfs_edges(graph, start): # for each pair of faces, we convert them into edges, # find the edge that both faces share, and then see if the edges # are reversed in order as you would expect in a well constructed mesh pair = mesh.faces[[face_pair]] edges = faces_to_edges(pair) overlap = group_rows(np.sort(edges,axis=1), require_count=2) if len(overlap) == 0: # only happens on non-watertight meshes continue edge_pair = edges[[overlap[0]]] if edge_pair[0][0] == edge_pair[1][0]: # if the edges aren't reversed, invert the order of one of the faces flipped += 1 mesh.faces[face_pair[1]] = mesh.faces[face_pair[1]][::-1] log.info('Flipped %d/%d edges', flipped, len(mesh.faces)*3)
def get_giant_component(g): """ Take only the big connected component of the graph. """ graphs = nx.connected_component_subgraphs(g) graphs.sort(key=lambda x: -x.number_of_nodes()) return graphs[0]
def start(G, name): #pega somente o maior subgrafo if(not(nx.is_connected(G))): G = nx.connected_component_subgraphs(G)[0] # tuple of all parallel python servers to connect with ppservers = () #ppservers = ("a3.ft.unicamp.br","a9.ft.unicamp.br","a7.ft.unicamp.br","a8.ft.unicamp.br","a10.ft.unicamp.br") job_server = pp.Server(ppservers=ppservers) job_server.set_ncpus(1) job = [] capacities = [] damage = [] ran = 30 #range print "server e variaveis carregados" for i in xrange(1,ran): #Aqui faz-se um range de para 50 valores diferentes de capacidade inicial na rede capacity = 1.0+(1.0/float(ran)*float(i)) job.append(job_server.submit(Attack, (cp.copy(G),capacity),(GlobalEfficiency,setCapacity), ("networkx as nx",))) capacities.append(capacity) job_server.wait() for i in xrange(len(job)): damage.append(job[i]()) #Salva o arquivo da estrategia testada res = (capacities, damage) pickle.dump(res, open("dados/planejada/"+name+".pickle","w")) job_server.print_stats()
def main(): files = [] for i in range(1,26): files.append("db/Minna_no_nihongo_1.%02d.txt" % i) for i in range(26,51): files.append("db/Minna_no_nihongo_2.%02d.txt" % i) words = get_words_from_files(files) G=nx.Graph() for w in words: G.add_node(w) G.node[w]['chapter'] = words[w]['chapter'] G.node[w]['kana'] = words[w]['kana'] G.node[w]['meaning'] = words[w]['meaning'][:-1] for word1, word2 in itertools.combinations(words,2): for w1 in word1[:-1]: #print w1.encode('utf-8') #print ud.name(w1) if "CJK UNIFIED" in ud.name(w1) and w1 in word2: #print word1.encode('utf-8'), word2.encode('utf-8') G.add_edge(word1, word2) break #G = nx.connected_component_subgraphs(G) G = sorted(nx.connected_component_subgraphs(G), key = len, reverse=True) #print len(G) #nx.draw(G) nx.write_graphml(G[0], "kanjis.graphml", encoding='utf-8', prettyprint=True)
def unitigs(args): """ %prog unitigs best.edges Reads Celera Assembler's "best.edges" and extract all unitigs. """ p = OptionParser(unitigs.__doc__) p.add_option("--maxerr", default=2, type="int", help="Maximum error rate") opts, args = p.parse_args(args) if len(args) != 1: sys.exit(not p.print_help()) bestedges, = args G = read_graph(bestedges, maxerr=opts.maxerr, directed=True) H = nx.Graph() intconv = lambda x: int(x.split("-")[0]) for k, v in G.iteritems(): if k == G.get(v, None): H.add_edge(intconv(k), intconv(v)) nunitigs = nreads = 0 for h in nx.connected_component_subgraphs(H, copy=False): st = [x for x in h if h.degree(x) == 1] if len(st) != 2: continue src, target = st path = list(nx.all_simple_paths(h, src, target)) assert len(path) == 1 path, = path print "|".join(str(x) for x in path) nunitigs += 1 nreads += len(path) logging.debug("A total of {0} unitigs built from {1} reads.".format(nunitigs, nreads))
def brute_rule(g, *args): subgraphs = sorted(nx.connected_component_subgraphs(g), key=len) if len(subgraphs) == 1: return er_rule(g) # connect different subgraphs of smallest size edge = (subgraphs[0].nodes()[0], subgraphs[1].nodes()[0]) g.add_edge(*edge) return edge
def get_sim_setting( N=10, p=.3, mu=1., K=5, lam=1. ) : """ get largest connected component of Erdos(N,p) graph with exponentially distr. road lengths (avg. mu); Choose k road pairs randomly and assign intensity randomly, exponential lam """ g = nx.erdos_renyi_graph( N, p ) g = nx.connected_component_subgraphs( g )[0] roadnet = nx.MultiDiGraph() def roadmaker() : for i in itertools.count() : yield 'road%d' % i, np.random.exponential( mu ) road_iter = roadmaker() for i, ( u,v,data ) in enumerate( g.edges_iter( data=True ) ) : label, length = road_iter.next() roadnet.add_edge( u, v, label, length=length ) rates = nx.DiGraph() ROADS = [ key for u,v,key in roadnet.edges_iter( keys=True ) ] for i in range( K ) : r1 = random.choice( ROADS ) r2 = random.choice( ROADS ) if not rates.has_edge( r1, r2 ) : rates.add_edge( r1, r2, rate=0. ) data = rates.get_edge_data( r1, r2 ) data['rate'] += np.random.exponential( lam ) return roadnet, rates
def main(): """ Pre-processing: load data, compute centrality measures, write files with node data """ print(nx.__version__) # Load network data, create storage dict, and extract main component depends=nx.read_edgelist("data/depends.csv",delimiter=",",create_using=nx.DiGraph(),nodetype=str,data=(("weight",time_from_today),)) depends.name="depends" suggests=nx.read_edgelist("data/suggests.csv",delimiter=",",create_using=nx.DiGraph(),nodetype=str,data=(("weight",time_from_today),)) suggests.name="suggests" imports=nx.read_edgelist("data/imports.csv",delimiter=",",create_using=nx.DiGraph(),nodetype=str,data=(("weight",time_from_today),)) imports.name="imports" nets_dict={"depends":depends,"suggests":suggests,"imports":imports} for k in nets_dict.keys(): main_component=nx.connected_component_subgraphs(nets_dict[k].to_undirected())[0].nodes() nets_dict[k]=nx.subgraph(nets_dict[k],main_component) # Run multiple measures on graphs and normalize weights measure_list=[nx.in_degree_centrality,nx.betweenness_centrality,nx.pagerank] for g in nets_dict.values(): multiple_measures(g,measure_list) normalize_weights(g) # Output networks in GraphML format (to store node attributes) for i in nets_dict.items(): # print(i[1].edges(data=True)) nx.write_graphml(i[1],"data/"+i[0]+"_data.graphml") print("") print("All files written with data") """Visualization:
def get_underlying_tree(self, connected_component): # Find the root (color with only one occurrence) root = None colors = [self.coloring[node] for node in connected_component.nodes()] for index, color in enumerate(colors): colors[index] = 'Not a color' if color not in colors: root = connected_component.nodes()[index] break colors[index] = color # If we can't find a root, something's wrong! if root == None: print 'WARNING: Coloring this has no root', colors return connected_component # Create a new NetworkX graph to represent the tree tree = nx.Graph() tree.add_node(root) # Remove the root from the connected component connected_component = nx.Graph(connected_component) connected_component.remove_node(root) # Every new connected component is a subtree for sub_cc in nx.connected_component_subgraphs(connected_component): subtree = self.get_underlying_tree(sub_cc) tree = nx.compose(tree, subtree) tree.add_edge(root, subtree.root) # Root field for use in recursive case to connect tree and subtree tree.root = root return tree
def get_connected_components(self, color_set): """ A generator for connected components given a specific color set :param color_set: The color set :return: A generator for connected components (subgraphs) induced by color_set """ # Make an empty set to store vertices v_set = set() # Find vertices that are colored with colors in color_set for index, color in enumerate(self.coloring): if color in color_set: v_set.add(index) cc_list = [] for new_cc in nx.connected_component_subgraphs(self.graph.subgraph(v_set)): found = False for n in new_cc.node: new_cc.node[n]['color'] = self.coloring[n] for i, cc in enumerate(cc_list): if nx.is_isomorphic(new_cc, cc, node_match=lambda n1, n2: n1['color'] == n2['color']): cc_list[i].occ += 1 found = True break if not found: new_cc.occ = 1 cc_list.append(new_cc) return cc_list
def lanl_graph(): """ Return the lanl internet view graph from lanl.edges """ import networkx as nx try: fh=open('lanl_routes.edgelist','r') except IOError: print "lanl.edges not found" raise G=nx.Graph() time={} time[0]=0 # assign 0 to center node for line in fh.readlines(): (head,tail,rtt)=line.split() G.add_edge(int(head),int(tail)) time[int(head)]=float(rtt) # get largest component and assign ping times to G0time dictionary G0=nx.connected_component_subgraphs(G)[0] G0.rtt={} for n in G0: G0.rtt[n]=time[n] return G0
def atlas6(): """ Return the atlas of all connected graphs of 6 nodes or less. Attempt to check for isomorphisms and remove. """ Atlas = graph_atlas_g()[0:208] # 208 # remove isolated nodes, only connected graphs are left U = nx.Graph() # graph for union of all graphs in atlas for G in Atlas: zerodegree = [n for n in G if G.degree(n) == 0] for n in zerodegree: G.remove_node(n) U = nx.disjoint_union(U, G) # list of graphs of all connected components C = nx.connected_component_subgraphs(U) UU = nx.Graph() # do quick isomorphic-like check, not a true isomorphism checker nlist = [] # list of nonisomorphic graphs for G in C: # check against all nonisomorphic graphs so far if not iso(G, nlist): nlist.append(G) UU = nx.disjoint_union(UU, G) # union the nonisomorphic graphs return UU
def is_bipartite_node_set(G,nodes): """Returns True if nodes and G/nodes are a bipartition of G. Parameters ---------- G : NetworkX graph nodes: list or container Check if nodes are a one of a bipartite set. Examples -------- >>> G = nx.path_graph(4) >>> X = set([1,3]) >>> nx.is_bipartite_node_set(G,X) True Notes ----- For connected graphs the bipartite sets are unique. This function handles disconnected graphs. """ S=set(nodes) for CC in nx.connected_component_subgraphs(G): X,Y=bipartite_sets(CC) if not ( (X.issubset(S) and Y.isdisjoint(S)) or (Y.issubset(S) and X.isdisjoint(S)) ): return False return True
) if args.subsample_graph: # subsample g_nx nodes = g_nx.nodes(data=False) np.random.shuffle(nodes) subgraph_num_nodes = int(len(nodes) * subgraph_size) g_nx = g_nx.subgraph(nodes[0:subgraph_num_nodes]) # Check if graph is connected; if not, then select the largest subgraph to continue if nx.is_connected(g_nx): print("Graph is connected") else: print("Graph is not connected") # take the largest connected component as the data g_nx = max(nx.connected_component_subgraphs(g_nx, copy=True), key=len) print("Largest subgraph statistics: {} nodes, {} edges".format( g_nx.number_of_nodes(), g_nx.number_of_edges())) # From the original graph, extract E_test and G_test edge_splitter_test = EdgeSplitter(g_nx) if args.hin: g_test, edge_data_ids_test, edge_data_labels_test = edge_splitter_test.train_test_split( p=p, edge_label=args.edge_type, edge_attribute_label=args.edge_attribute_label, edge_attribute_threshold=args.edge_attribute_threshold, attribute_is_datetime=args.attribute_is_datetime, method=args.sampling_method, probs=sampling_probs, )
def scenario_geant(net_cache=[0.05], n_contents=100000, alpha=[0.6, 0.8, 1.0]): """ Return a scenario based on GARR topology Parameters ---------- scenario_id : str String identifying the scenario (will be in the filename) net_cache : float Size of network cache (sum of all caches) normalized by size of content population n_contents : int Size of content population alpha : float List of alpha of Zipf content distribution """ rate = 12.0 warmup = 9000 duration = 36000 T = 'GEANT' # name of the topology # 240 nodes in the main component topology = fnss.parse_topology_zoo( path.join(scenarios_dir, 'resources/Geant2012.graphml')).to_undirected() topology = list(nx.connected_component_subgraphs(topology))[0] deg = nx.degree(topology) receivers = [v for v in topology.nodes() if deg[v] == 1] # 8 nodes caches = [v for v in topology.nodes() if deg[v] > 2] # 19 nodes # attach sources to topology source_attachments = [v for v in topology.nodes() if deg[v] == 2] # 13 nodes sources = [] for v in source_attachments: u = v + 1000 # node ID of source topology.add_edge(v, u) sources.append(u) routers = [ v for v in topology.nodes() if v not in caches + sources + receivers ] # randomly allocate contents to sources contents = dict([(v, []) for v in sources]) for c in range(1, n_contents + 1): s = choice(sources) contents[s].append(c) for v in sources: fnss.add_stack(topology, v, 'source', {'contents': contents[v]}) for v in receivers: fnss.add_stack(topology, v, 'receiver', {}) for v in routers: fnss.add_stack(topology, v, 'router', {}) # set weights and delays on all links fnss.set_weights_constant(topology, 1.0) fnss.set_delays_constant(topology, internal_link_delay, 'ms') # label links as internal or external for u, v in topology.edges(): if u in sources or v in sources: topology.edge[u][v]['type'] = 'external' # this prevents sources to be used to route traffic fnss.set_weights_constant(topology, 1000.0, [(u, v)]) fnss.set_delays_constant(topology, external_link_delay, 'ms', [(u, v)]) else: topology.edge[u][v]['type'] = 'internal' for nc in net_cache: size = (float(nc) * n_contents) / len(caches) # size of a single cache C = str(nc) for v in caches: fnss.add_stack(topology, v, 'cache', {'size': size}) fnss.write_topology( topology, path.join(scenarios_dir, topo_prefix + 'T=%s@C=%s' % (T, C) + '.xml')) print('[WROTE TOPOLOGY] T: %s, C: %s' % (T, C)) for a in alpha: event_schedule = gen_req_schedule(receivers, rate, warmup, duration, n_contents, a) fnss.write_event_schedule( event_schedule, path.join(scenarios_dir, es_prefix + 'T=%s@A=%s' % (T, str(a)) + '.xml')) print('[WROTE SCHEDULE] T: %s, Alpha: %s, Events: %d' % (T, str(a), len(event_schedule)))
def probabilistic_hrg(G, num_samples=1, n=None): ''' Args: ------------ G: input graph (nx obj) num_samples: (int) in the 'grow' process, this is number of synthetic graphs to generate n: (int) num_nodes; number of nodes in the resulting graphs Returns: List of synthetic graphs (H^stars) ''' graphletG = [] if DEBUG: print G.number_of_nodes() if DEBUG: print G.number_of_edges() start_time = time.time() G.remove_edges_from(G.selfloop_edges()) giant_nodes = max(nx.connected_component_subgraphs(G), key=len) G = nx.subgraph(G, giant_nodes) if n is None: num_nodes = G.number_of_nodes() else: num_nodes = n if DEBUG: print G.number_of_nodes() if DEBUG: print G.number_of_edges() graph_checks(G) if DEBUG: print if DEBUG: print "--------------------" if DEBUG: print "-Tree Decomposition-" if DEBUG: print "--------------------" prod_rules = {} if num_nodes >= 500: print ' -- subgraphs' for Gprime in gs.rwr_sample(G, 2, 300): T = td.quickbb(Gprime) root = list(T)[0] T = td.make_rooted(T, root) T = binarize(T) root = list(T)[0] root, children = T #td.new_visit(T, G, prod_rules, TD) td.new_visit(T, G, prod_rules) else: T = td.quickbb(G) root = list(T)[0] T = td.make_rooted(T, root) T = binarize(T) root = list(T)[0] root, children = T # td.new_visit(T, G, prod_rules, TD) td.new_visit(T, G, prod_rules) if DEBUG: print if DEBUG: print "--------------------" if DEBUG: print "- Production Rules -" if DEBUG: print "--------------------" for k in prod_rules.iterkeys(): if DEBUG: print k s = 0 for d in prod_rules[k]: s += prod_rules[k][d] for d in prod_rules[k]: prod_rules[k][d] = float(prod_rules[k][d]) / float( s) # normailization step to create probs not counts. if DEBUG: print '\t -> ', d, prod_rules[k][d] rules = [] id = 0 for k, v in prod_rules.iteritems(): sid = 0 for x in prod_rules[k]: rhs = re.findall("[^()]+", x) rules.append( ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x])) if DEBUG: print("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x]) sid += 1 id += 1 # print rules #print 'P. Rules' if DEBUG: print(" --- Inference (PHRG) %s seconds ---" % (time.time() - start_time)) start_time = time.time() g = pcfg.Grammar('S') for (id, lhs, rhs, prob) in rules: #print type(id), type(lhs), type(rhs), type(prob) if DEBUG: print ' ', id, lhs, rhs, prob g.add_rule(pcfg.Rule(id, lhs, rhs, prob)) if DEBUG: print "Starting max size" num_nodes = num_nodes num_samples = num_samples g.set_max_size(num_nodes) if DEBUG: print "Done with max size" Hstars = [] for i in range(0, num_samples): rule_list = g.sample(num_nodes) if DEBUG: pp.pprint(rule_list) hstar = grow(rule_list, g)[0] # print "H* nodes: " + str(hstar.number_of_nodes()) # print "H* edges: " + str(hstar.number_of_edges()) Hstars.append(hstar) if DEBUG: print(" --- Graph gen (Fixed-size) %s seconds ---" % (time.time() - start_time)) return Hstars
def probabilistic_hrg_deriving_prod_rules(G, n=None): ''' Rule extraction procedure ''' if G is None: return G.remove_edges_from(G.selfloop_edges()) giant_nodes = max(nx.connected_component_subgraphs(G), key=len) G = nx.subgraph(G, giant_nodes) if n is None: num_nodes = G.number_of_nodes() else: num_nodes = n graph_checks(G) if DEBUG: print if DEBUG: print "--------------------" if DEBUG: print "-Tree Decomposition-" if DEBUG: print "--------------------" prod_rules = {} if num_nodes >= 500: for Gprime in gs.rwr_sample(G, 2, 300): T = td.quickbb(Gprime) root = list(T)[0] T = td.make_rooted(T, root) T = binarize(T) root = list(T)[0] root, children = T td.new_visit(T, G, prod_rules) else: T = td.quickbb(G) root = list(T)[0] T = td.make_rooted(T, root) T = binarize(T) root = list(T)[0] root, children = T td.new_visit(T, G, prod_rules) # print (T) # print type(root), type(children) # print type(T), len(T), len(T[1]) # print [type(x) for x in T[1]] # print (prod_rules) #TODO from enumhrgtree import enum_hrg_tree # enum_hrg_tree(T) # exit() if DEBUG: print if DEBUG: print "--------------------" if DEBUG: print "- Production Rules -" if DEBUG: print "--------------------" for k in prod_rules.iterkeys(): if DEBUG: print k s = 0 for d in prod_rules[k]: s += prod_rules[k][d] for d in prod_rules[k]: prod_rules[k][d] = float(prod_rules[k][d]) / float( s) # normailization step to create probs not counts. if DEBUG: print '\t -> ', d, prod_rules[k][d] # pp.pprint(prod_rules) rules = [] id = 0 for k, v in prod_rules.iteritems(): sid = 0 for x in prod_rules[k]: rhs = re.findall("[^()]+", x) rules.append( ("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x])) if DEBUG: print("r%d.%d" % (id, sid), "%s" % re.findall("[^()]+", k)[0], rhs, prod_rules[k][x]) sid += 1 id += 1 return rules
source='author1', target='author2', edge_attr='title') degree_sequence = sorted([d for n, d in GG.degree()], reverse=True) # degree sequence degreeCount = collections.Counter(degree_sequence) deg, cnt = zip(*degreeCount.items()) fig, ax = plt.subplots() plt.bar(deg, cnt, width=0.80, color='b') plt.title("Degree Histogram") plt.ylabel("Count") plt.xlabel("Degree") ax.set_xticks([d + 0.4 for d in deg]) ax.set_xticklabels(deg) plt.axes([0.4, 0.4, 0.5, 0.5]) Gcc = sorted(nx.connected_component_subgraphs(GG), key=len, reverse=True)[0] pos = nx.spring_layout(GG) plt.axis('off') nx.draw_networkx_nodes(GG, pos, node_size=20) nx.draw_networkx_edges(GG, pos, alpha=0.4) plt.show() print('Degree Distribution Histogram done') #FIRST NETWORK MEASURE print("first network measure") print("transitivity:") print(nx.transitivity(G)) #SECOND NETOWRK MEASURE print("second network measure") print("reciprocity:")
def get_single_data(trial_i, root_input): import scipy.io as sio cancer_related_genes = { 4288: 'MKI67', 1026: 'CDKN1A', 472: 'ATM', 7033: 'TFF3', 2203: 'FBP1', 7494: 'XBP1', 1824: 'DSC2', 1001: 'CDH3', 11200: 'CHEK2', 7153: 'TOP2A', 672: 'BRCA1', 675: 'BRCA2', 580: 'BARD1', 9: 'NAT1', 771: 'CA12', 367: 'AR', 7084: 'TK2', 5892: 'RAD51D', 2625: 'GATA3', 7155: 'TOP2B', 896: 'CCND3', 894: 'CCND2', 10551: 'AGR2', 3169: 'FOXA1', 2296: 'FOXC1' } data = dict() f_name = 'overlap_data_%02d.mat' % trial_i re = sio.loadmat(root_input + f_name)['save_data'][0][0] data['data_X'] = np.asarray(re['data_X'], dtype=np.float64) data_y = [_[0] for _ in re['data_Y']] data['data_Y'] = np.asarray(data_y, dtype=np.float64) data_edges = [[_[0] - 1, _[1] - 1] for _ in re['data_edges']] data['data_edges'] = np.asarray(data_edges, dtype=int) data_pathways = [[_[0], _[1]] for _ in re['data_pathways']] data['data_pathways'] = np.asarray(data_pathways, dtype=int) data_entrez = [_[0] for _ in re['data_entrez']] data['data_entrez'] = np.asarray(data_entrez, dtype=int) data['data_splits'] = {i: dict() for i in range(5)} data['data_subsplits'] = { i: {j: dict() for j in range(5)} for i in range(5) } for i in range(5): xx = re['data_splits'][0][i][0][0]['train'] data['data_splits'][i]['train'] = [_ - 1 for _ in xx[0]] xx = re['data_splits'][0][i][0][0]['test'] data['data_splits'][i]['test'] = [_ - 1 for _ in xx[0]] for j in range(5): xx = re['data_subsplits'][0][i][0][j]['train'][0][0] data['data_subsplits'][i][j]['train'] = [_ - 1 for _ in xx[0]] xx = re['data_subsplits'][0][i][0][j]['test'][0][0] data['data_subsplits'][i][j]['test'] = [_ - 1 for _ in xx[0]] re_path = [_[0] for _ in re['re_path_varInPath']] data['re_path_varInPath'] = np.asarray(re_path) re_path_entrez = [_[0] for _ in re['re_path_entrez']] data['re_path_entrez'] = np.asarray(re_path_entrez) re_path_ids = [_[0] for _ in re['re_path_ids']] data['re_path_ids'] = np.asarray(re_path_ids) re_path_lambdas = [_ for _ in re['re_path_lambdas'][0]] data['re_path_lambdas'] = np.asarray(re_path_lambdas) re_path_groups = [_[0][0] for _ in re['re_path_groups_lasso'][0]] data['re_path_groups_lasso'] = np.asarray(re_path_groups) re_path_groups_overlap = [_[0][0] for _ in re['re_path_groups_overlap'][0]] data['re_path_groups_overlap'] = np.asarray(re_path_groups_overlap) re_edge = [_[0] for _ in re['re_edge_varInGraph']] data['re_edge_varInGraph'] = np.asarray(re_edge) re_edge_entrez = [_[0] for _ in re['re_edge_entrez']] data['re_edge_entrez'] = np.asarray(re_edge_entrez) data['re_edge_groups_lasso'] = np.asarray(re['re_edge_groups_lasso']) data['re_edge_groups_overlap'] = np.asarray(re['re_edge_groups_overlap']) for method in [ 're_path_re_lasso', 're_path_re_overlap', 're_edge_re_lasso', 're_edge_re_overlap' ]: res = {fold_i: dict() for fold_i in range(5)} for fold_ind, fold_i in enumerate(range(5)): res[fold_i]['lambdas'] = re[method][0][fold_i]['lambdas'][0][0][0] res[fold_i]['kidx'] = re[method][0][fold_i]['kidx'][0][0][0] res[fold_i]['kgroups'] = re[method][0][fold_i]['kgroups'][0][0][0] res[fold_i]['kgroupidx'] = re[method][0][fold_i]['kgroupidx'][0][0] res[fold_i]['groups'] = re[method][0][fold_i]['groups'][0] res[fold_i]['sbacc'] = re[method][0][fold_i]['sbacc'][0] res[fold_i]['AS'] = re[method][0][fold_i]['AS'][0] res[fold_i]['completeAS'] = re[method][0][fold_i]['completeAS'][0] res[fold_i]['lstar'] = re[method][0][fold_i]['lstar'][0][0][0][0] res[fold_i]['auc'] = re[method][0][fold_i]['auc'][0] res[fold_i]['acc'] = re[method][0][fold_i]['acc'][0] res[fold_i]['bacc'] = re[method][0][fold_i]['bacc'][0] res[fold_i]['perf'] = re[method][0][fold_i]['perf'][0][0] res[fold_i]['pred'] = re[method][0][fold_i]['pred'] res[fold_i]['Ws'] = re[method][0][fold_i]['Ws'][0][0] res[fold_i]['oWs'] = re[method][0][fold_i]['oWs'][0][0] res[fold_i]['nextGrad'] = re[method][0][fold_i]['nextGrad'][0] data[method] = res import networkx as nx g = nx.Graph() ind_pathways = {_: i for i, _ in enumerate(data['data_entrez'])} all_nodes = {ind_pathways[_]: '' for _ in data['re_path_entrez']} maximum_nodes, maximum_list_edges = set(), [] for edge in data['data_edges']: if edge[0] in all_nodes and edge[1] in all_nodes: g.add_edge(edge[0], edge[1]) isolated_genes = set() maximum_genes = set() for cc in nx.connected_component_subgraphs(g): if len(cc) <= 5: for item in list(cc): isolated_genes.add(data['data_entrez'][item]) else: for item in list(cc): maximum_nodes = set(list(cc)) maximum_genes.add(data['data_entrez'][item]) maximum_nodes = np.asarray(list(maximum_nodes)) subgraph = nx.Graph() for edge in data['data_edges']: if edge[0] in maximum_nodes and edge[1] in maximum_nodes: if edge[0] != edge[1]: # remove some self-loops maximum_list_edges.append(edge) subgraph.add_edge(edge[0], edge[1]) data['map_entrez'] = np.asarray( [data['data_entrez'][_] for _ in maximum_nodes]) data['edges'] = np.asarray(maximum_list_edges, dtype=int) data['costs'] = np.asarray([1.] * len(maximum_list_edges), dtype=np.float64) data['x'] = data['data_X'][:, maximum_nodes] data['y'] = data['data_Y'] data['nodes'] = np.asarray(range(len(maximum_nodes)), dtype=int) data['cancer_related_genes'] = cancer_related_genes for edge_ind, edge in enumerate(data['edges']): uu = list(maximum_nodes).index(edge[0]) vv = list(maximum_nodes).index(edge[1]) data['edges'][edge_ind][0] = uu data['edges'][edge_ind][1] = vv method_list = [ 're_path_re_lasso', 're_path_re_overlap', 're_edge_re_lasso', 're_edge_re_overlap' ] found_set = {method: set() for method in method_list} for method in method_list: for fold_i in range(5): best_lambda = data[method][fold_i]['lstar'] kidx = data[method][fold_i]['kidx'] re = list(data[method][fold_i]['lambdas']).index(best_lambda) ws = data[method][fold_i]['oWs'][:, re] for item in [kidx[_] for _ in np.nonzero(ws[1:])[0]]: if item in cancer_related_genes: found_set[method].add(cancer_related_genes[item]) data['found_related_genes'] = found_set return data
def topology_tiscali2(**kwargs): """Return a scenario based on Tiscali topology, parsed from RocketFuel dataset Differently from plain Tiscali, this topology some receivers are appended to routers and only a subset of routers which are actually on the path of some traffic are selected to become ICN routers. These changes make this topology more realistic. Parameters ---------- seed : int, optional The seed used for random number generation Returns ------- topology : fnss.Topology The topology object """ # 240 nodes in the main component topology = fnss.parse_rocketfuel_isp_map( path.join(TOPOLOGY_RESOURCES_DIR, '3257.r0.cch')).to_undirected() topology = list(nx.connected_component_subgraphs(topology))[0] # degree of nodes deg = nx.degree(topology) # nodes with degree = 1 onedeg = [v for v in topology.nodes() if deg[v] == 1] # they are 80 # we select as caches nodes with highest degrees # we use as min degree 6 --> 36 nodes # If we changed min degrees, that would be the number of caches we would have: # Min degree N caches # 2 160 # 3 102 # 4 75 # 5 50 # 6 36 # 7 30 # 8 26 # 9 19 # 10 16 # 11 12 # 12 11 # 13 7 # 14 3 # 15 3 # 16 2 icr_candidates = [v for v in topology.nodes() if deg[v] >= 6] # 36 nodes # Add remove caches to adapt betweenness centrality of caches for i in [181, 208, 211, 220, 222, 250, 257]: icr_candidates.remove(i) icr_candidates.extend([232, 303, 326, 363, 378]) # sources are node with degree 1 whose neighbor has degree at least equal to 5 # we assume that sources are nodes connected to a hub # they are 44 sources = [ v for v in onedeg if deg[list(topology.edge[v].keys())[0]] > 4.5 ] # they are # receivers are node with degree 1 whose neighbor has degree at most equal to 4 # we assume that receivers are nodes not well connected to the network # they are 36 receivers = [ v for v in onedeg if deg[list(topology.edge[v].keys())[0]] < 4.5 ] # we set router stacks because some strategies will fail if no stacks # are deployed routers = [v for v in topology.nodes() if v not in sources + receivers] # set weights and delays on all links fnss.set_weights_constant(topology, 1.0) fnss.set_delays_constant(topology, INTERNAL_LINK_DELAY, 'ms') # deploy stacks topology.graph['icr_candidates'] = set(icr_candidates) for v in sources: fnss.add_stack(topology, v, 'source') for v in receivers: fnss.add_stack(topology, v, 'receiver') for v in routers: fnss.add_stack(topology, v, 'router') # label links as internal or external for u, v in topology.edges(): if u in sources or v in sources: topology.edge[u][v]['type'] = 'external' # this prevents sources to be used to route traffic fnss.set_weights_constant(topology, 1000.0, [(u, v)]) fnss.set_delays_constant(topology, EXTERNAL_LINK_DELAY, 'ms', [(u, v)]) else: topology.edge[u][v]['type'] = 'internal' return IcnTopology(topology)
def partition_girvan_newman(graph, max_depth): """ Use your approximate_betweenness implementation to partition a graph. Unlike in class, here you will not implement this recursively. Instead, just remove edges until more than one component is created, then return those components. That is, compute the approximate betweenness of all edges, and remove them until multiple comonents are created. You only need to compute the betweenness once. If there are ties in edge betweenness, break by edge name (e.g., (('A', 'B'), 1.0) comes before (('B', 'C'), 1.0)). Note: the original graph variable should not be modified. Instead, make a copy of the original graph prior to removing edges. See the Graph.copy method https://networkx.github.io/documentation/development/reference/generated/networkx.Graph.copy.html Params: graph.......A networkx Graph max_depth...An integer representing the maximum depth to search. Returns: A list of networkx Graph objects, one per partition. >>> components = partition_girvan_newman(example_graph(), 5) >>> components = sorted(components, key=lambda x: sorted(x.nodes())[0]) >>> sorted(components[0].nodes()) ['A', 'B', 'C'] >>> sorted(components[1].nodes()) ['D', 'E', 'F', 'G'] """ ###TODO #approximate_betweenness(graph,max_depth) graph_new = graph.copy() betweenness = approximate_betweenness(graph_new, max_depth) #print("betweenness_result::",betweenness) def getKey_one(item1): return item1[1] sort_betweenness = sorted(betweenness.items(), key=getKey_one, reverse=True) list_graphs = [] tuple_list = 0 while len(list_graphs) <= 1: graph_new.remove_edge(sort_betweenness[tuple_list][0][1], sort_betweenness[tuple_list][0][0]) tuple_to_list = nx.connected_component_subgraphs(graph_new) #print("printlist::",tuple_to_list) list_graphs = list(tuple_to_list) tuple_list = tuple_list + 1 #print("chk len::",len(list_graphs)) return list_graphs pass
def get_components(graph): """ A helper function you may use below. Returns the list of all connected components in the given graph. """ return [c for c in nx.connected_component_subgraphs(graph)]
def fold(self, reverse=False): """ fold edges of the PlanarNet Returns ------- A solid or a compound of faces Notes ----- This method fold the planar net w.r.t to the edge angles. It yields a shell member """ for edge in list(self.edges()): if0 = edge[0] if1 = edge[1] ag = self[if0][if1]['angle'] # handle folding direction if reverse: angle = -ag else: angle = ag iedge = self[if0][if1]['iedge'] ed = self.lfaces[if0].subshapes('Edge')[iedge] points = ed.poly() pdir = np.array(points[1]) - np.array(points[0]) pabout = ed.center() # create 2 subgraphs self.remove_edge(if0, if1) # DEPRECATED function in networkx lgraphs = list(nx.connected_component_subgraphs(nx.Graph(self))) #lgraphs = [ nx.Graph(self).subgraph(c).copy() for c in nx.connected_components(nx.Graph(self)) ] ln0 = lgraphs[0].node.keys() ln1 = lgraphs[1].node.keys() self.add_edge(if0, if1, angle=ag, iedge=iedge) if if1 in ln1: lfaces1 = ln1 else: lfaces1 = ln0 # fold all faces in set lfaces1 for f in lfaces1: self.lfaces[f] = cm.rotated(self.lfaces[f], pabout, pdir, angle) # update faces centroid in the Graph for iface in self.node: face = self.lfaces[iface] self.pos[iface] = face.center()[0:2] # creates the shell self.shell = cm.Shell(self.lfaces) if reverse: self.folded = False else: self.folded = True asolid = cm.Solid([self.shell]) vertices = asolid.subshapes('Vertex') edges = asolid.subshapes('Edge') faces = asolid.subshapes('Face') Euler = len(vertices)-len(edges)+len(faces) print("V", len(vertices)) print("E", len(edges)) print("F", len(faces)) print("Euler check (2): V-E+F :", Euler) if asolid.check(): print("closed shape") # update the graph else: print("open shape") return asolid
def getOldDataSummary(self, ls, data): """ dump the graphs from a database. This is for legacy compatibility with the communityNetworkMonitor project """ scanQuery = "SELECT * from scan" QUERY = """select snode.Id AS sid, dnode.Id AS did, etx.etx_value AS etxv from \ link, scan, node as snode, node as dnode, etx \ WHERE link.scan_Id = scan.Id AND snode.Id = link.from_node_Id \ AND dnode.Id = link.to_node_Id AND etx.link_Id = link.Id \ AND dnode.scan_Id = scan.Id AND snode.scan_Id = scan.Id AND \ scan.Id= %d""" try: q = ls.query("Id", "time", "scan_type", "network").from_statement(scanQuery) if len(q.all()) == 0: raise except: print "something went wrong opening the db" import sys sys.exit(1) numScan = len(q.all()) scanCounter = 0 data.etxThreshold = 10 for [scanId, scanTime, scanType, scanNetwork] in q: data.scanTree[scanNetwork][scanType].append([scanId, scanTime]) for net in data.scanTree: counter = 0 # for graz I have one sample every 10 minutes, # for ninux/Wien I have one sample every 5 minutes if net == "FFGraz": networkPenalty = 2 else: networkPenalty = 1 for scanId in data.scanTree[net]['ETX']: queryString = QUERY % scanId[0] q = ls.query("sid", "did", "etxv").\ from_statement(queryString) dirtyG = nx.Graph() for s, d, e in q: if e < data.etxThreshold: dirtyG.add_edge(s, d, weight=float(e)) if len(dirtyG) != 0: G = max(nx.connected_component_subgraphs(dirtyG, copy=True), key=len) componentSize = len(G) G.graph = { "network": net, "scan_time": scanId[1], "scan_id": scanId[0] } else: G = nx.Graph() componentSize = 0 if componentSize < 10: continue counter += 1 etxV = [e[2]['weight'] for e in G.edges(data=True)] data.routeData[net][scanId[0]]["Graph"] = G weightedPaths = nx.shortest_path(G, weight="weight") for s in G.nodes(): for d in G.nodes(): if s == d: continue if d in data.routeData[net][scanId[0]]["data"] and \ s in data.routeData[net][scanId[0]]["data"][d]: continue currPath = weightedPaths[s][d] pathWeight = 0 for i in range(len(currPath) - 1): pathWeight += G[currPath[i]][currPath[i + 1]]["weight"] data.routeData[net][scanId[0]]["data"][s][d] = \ [len(weightedPaths[s][d])-1, pathWeight] data.routeData[net][scanId[0]]["Graph"] = G nd = filter(lambda x: x == 1, dirtyG.degree().values()) nl = len(nd) nn = len(dirtyG) le = len(etxV) data.dataSummary[net][scanId[0]][("numLeaves", 9)] = nl data.dataSummary[net][scanId[0]][("time", 30)] = scanId[1] data.dataSummary[net][scanId[0]][("numNodes", 9)] = nn data.dataSummary[net][scanId[0]][("numEdges", 9)] = le data.dataSummary[net][scanId[0]][("largestComponent", 16)] = \ componentSize scanCounter += 1 if int((100000 * 1.0 * scanCounter / numScan)) % 10000 == 0: print int( (100 * 1.0 * scanCounter / numScan)), "% complete"
if is_node_class(t): for o in model[t]: if o in G.nodes(): G.nodes()[o]['nclass'] = t G.nodes()[o]['ndata'] = model[t][o] else: print('orphaned node', t, o) swing_node = '' for n1, data in G.nodes(data=True): if 'nclass' in data: if 'bustype' in data['ndata']: if data['ndata']['bustype'] == 'SWING': swing_node = n1 sub_graphs = nx.connected_component_subgraphs(G) seg_loads = {} # [name][kva, phases] total_kva = 0.0 # for sg in sub_graphs: # print (sg.number_of_nodes()) # if sg.number_of_nodes() < 10: # print(sg.nodes) # print(sg.edges) for n1, data in G.nodes(data=True): if 'ndata' in data: kva = accumulate_load_kva(data['ndata']) if kva > 0: total_kva += kva nodes = nx.shortest_path(G, n1, swing_node) edges = zip(nodes[0:], nodes[1:]) # print (n1, '{:.2f}'.format(kva), 'kva on', data['ndata']['phases'])
def main(graph_name): G = nx.read_gml(graph_name) G = nx.connected_component_subgraphs(G)[0] # Giant component #dir=graph_name.split("fr")[0] dir = graph_name.split("mast")[0] dir = dir + "roles/" dir2 = graph_name.split("mast")[0] time_in_system = 100 #minimum amount of time in the sytem for a user to be included in the statistics # clustering, vitality, activity, betweenness,weigh_ins,degree,time_in_system top_ten_feature = 'activity' print "\n\n", top_ten_feature name0 = dir + "overlap_top_ten_" + str( top_ten_feature) + "_averages_" + str( time_in_system) + "days_excluding_themselves_15.dat" file0 = open(name0, 'wt') file0.close() list_top_tens = [] # collect the top_tens of the system list_top_tens_percent_weight_change = [] # for node in G.nodes(): # print G.node[node]["activity"],G.node[node]["time_in_system"] # G.node[node]["activity"]=float(G.node[node]["activity"])/float(G.node[node]["time_in_system"]) # print G.node[node]["activity"] f = lambda x: x[1][top_ten_feature] membership = map(f, G.nodes(data=True)) membership.sort() top_ten_values = membership[-10:] #TOP TEN # print top_ten_values # the sorted top-tens: from smallest to largest #print membership #the whole sorted list cont = 0 for value in top_ten_values: for node in G.nodes(): if (G.node[node][top_ten_feature] == value) and (node not in list_top_tens): list_top_tens.append(node) list_top_tens_percent_weight_change.append( float(G.node[node]['percentage_weight_change'])) name260 = dir2 + "scatter_plot_roles_top_ten_" + str( top_ten_feature) + ".dat" file260 = open(name260, 'at') print >> file260, G.node[node]['Pi'], G.node[node][ 'zi'], top_ten_feature file260.close() break # if there are more than 10, it will pick just the first 10 according to their id name00 = dir + "R6s_and_top_tens_averages_" + str( time_in_system) + "days_exclude_R6s.dat" file0 = open(name00, 'at') print >> file0, top_ten_feature, numpy.mean( list_top_tens_percent_weight_change), numpy.std( list_top_tens_percent_weight_change) file0.close() file260.close() for node in list_top_tens: print G.node[node]['label'], G.node[node][top_ten_feature], len( G.neighbors(node)), G.node[node]['Pi'], G.node[node]['zi'] # studying the possible cumulative effect of more than one R6 on the population: for node in G.nodes(): cont = 0 for n in G.neighbors(node): if (n in list_top_tens): cont += 1 G.node[node]["top_ten_overlap"] = int(cont) for r in range(len(list_top_tens) + 1): list_BMI_changes = [] list_weight_changes = [] list_percentage_weight_changes = [] list_activities = [] for node in G.nodes(): if int(G.node[node]["top_ten_overlap"]) == r: if node in list_top_tens: # i exclude the top_tens per se pass else: if int(G.node[node]['time_in_system']) > time_in_system: list_BMI_changes.append( float(G.node[node]['final_BMI']) - float(G.node[node]['initial_BMI'])) list_weight_changes.append( float(G.node[node]['weight_change'])) list_percentage_weight_changes.append( float(G.node[node]['percentage_weight_change'])) list_activities.append( float(G.node[node]['activity']) / float(G.node[node]['time_in_system'])) if len(list_BMI_changes) > 0: average_BMI_change = numpy.mean(list_BMI_changes) average_weight_change = numpy.mean(list_weight_changes) average_percentage_weight_change = numpy.mean( list_percentage_weight_changes) average_activity = numpy.mean(list_activities) deviation_BMI = numpy.std(list_BMI_changes) deviation_weight = numpy.std(list_weight_changes) deviation_percentage_weight = numpy.std( list_percentage_weight_changes) deviation_activity = numpy.std(list_activities) #print out file0 = open(name0, 'at') print >> file0, r, len( list_BMI_changes ), average_percentage_weight_change, deviation_percentage_weight, average_BMI_change, deviation_BMI, average_weight_change, deviation_weight, average_activity, deviation_activity file0.close() # averages for the neighbors of a given top-ten ######## for node in list_top_tens: neighbors = G.neighbors(node) #a list of nodes average_BMI_change = 0.0 list_BMI_changes = [] average_weight_change = 0.0 list_weight_changes = [] average_percentage_weight_change = 0.0 list_percentage_weight_changes = [] average_activity = 0.0 # ojo! sera dividida por el numero de dias!!!!! list_activities = [] eff_degree = 0 for n in G.neighbors(node): if int(G.node[n]['time_in_system']) > time_in_system: eff_degree = eff_degree + 1.0 list_BMI_changes.append( float(G.node[n]['final_BMI']) - float(G.node[n]['initial_BMI'])) list_weight_changes.append(float(G.node[n]['weight_change'])) list_percentage_weight_changes.append( float(G.node[n]['percentage_weight_change'])) list_activities.append( float(G.node[n]['activity']) / float(G.node[n]['time_in_system'])) #averages average_weight_change = numpy.mean(list_weight_changes) average_percentage_weight_change = numpy.mean( list_percentage_weight_changes) average_BMI_change = numpy.mean(list_BMI_changes) average_activity = numpy.mean(list_activities) #standard deviation deviation_BMI = numpy.std(list_BMI_changes) deviation_weight = numpy.std(list_weight_changes) deviation_percentage_weight = numpy.std(list_weight_changes) deviation_activity = numpy.std(list_activities) # print cont,"R6: ",average_weight_change,deviation_weight,average_BMI_change,deviation_BMI,average_activity,deviation_activity #print out name1 = dir + "ego_top_ten_" + str( top_ten_feature) + "_average_BMI_change_" + str( time_in_system) + "days.dat" file1 = open(name1, 'at') print >> file1, cont, G.node[node]['role'], G.node[node]['label'], len( G.neighbors(node) ), eff_degree, average_BMI_change, deviation_BMI #,list_BMI_changes file1.close() name2 = dir + "ego_top_ten_" + str( time_in_system) + "_average_weight_change_" + str( time_in_system) + "days.dat" file2 = open(name2, 'at') print >> file2, cont, G.node[node]['role'], G.node[node]['label'], len( G.neighbors(node) ), eff_degree, average_weight_change, deviation_weight #,list_weight_changes file2.close() name3 = dir + "ego_top_ten_" + str( top_ten_feature) + "_average_activity_" + str( time_in_system) + "days.dat" file3 = open(name3, 'at') print >> file3, cont, G.node[node]['role'], G.node[node]['label'], len( G.neighbors(node) ), eff_degree, average_activity, deviation_activity #,list_activities file3.close() name4 = dir + "ego_top_ten_" + str( top_ten_feature) + "_dispersions_" + str( time_in_system) + "days.dat" file4 = open(name4, 'at') for i in range(len(list_activities)): print >> file4, cont, list_BMI_changes[i], list_weight_changes[ i], list_activities[i] print >> file4, "\n\n" #to separate roles file4.close() cont = cont + 1
def get_inter_cluster_relation(seq_records, geo_id): logging.debug('Calculating inter cluster relations on geo_record "%s"..' % (geo_id)) data = [] full_g = nx.Graph() cluster_genes = {} bio_genes = set() cur_cluster1 = 0 # First, inspect all cluster to get cluster_genes for record in seq_records: for cluster in utils.get_cluster_features(record): cur_cluster1 += 1 cluster_genes[cur_cluster1] = set() for cluster_gene in utils.get_cluster_cds_features( cluster, record): # We only care about cluster_genes that have a geo match for cluster_gene_geo in utils.parse_geo_feature(cluster_gene): # We only care about data from the current geo_id if cluster_gene_geo['rec_id'] == geo_id: cur_gene1 = utils.get_gene_id(cluster_gene) cur_gene1_distances = cluster_gene_geo['dist'] cur_gene1_neighbors = set(cur_gene1_distances) # Add each gene to cluster_genes, and to the full_g(raph) and to bio_genes cluster_genes[cur_cluster1].add(cur_gene1) full_g.add_node(cur_gene1) if 'sec_met' in cluster_gene.qualifiers: bio_genes.add(cur_gene1) # Get intra-cluster edges interactions = cur_gene1_neighbors.intersection( cluster_genes[cur_cluster1]) update_g(cur_gene1, interactions, cur_gene1_distances, full_g) # From the second cluster onwards, we'll add inter-cluster edges backwards, i.e.: 2-1, 3-1, 3-2, 4-1, 4-2, etc... if cur_cluster1 is not 1: for cur_cluster2 in cluster_genes: if cur_cluster1 is not cur_cluster2: interactions = cur_gene1_neighbors.intersection( cluster_genes[cur_cluster2]) update_g(cur_gene1, interactions, cur_gene1_distances, full_g) # Remove single nodes for node in full_g.nodes(): if full_g.degree(node) == 0: full_g.remove_node(node) # Get communities community_dict = community.best_partition(full_g) number_of_clusters = len(cluster_genes) # Now check inter-cluster interactions for i in range(1, number_of_clusters + 1): cluster1 = cluster_genes[i] for j in range(i + 1, number_of_clusters + 1): cluster2 = cluster_genes[j] cluster3 = cluster1.union(cluster2) cluster_pair_g = full_g.subgraph(cluster3) communities_present = np.unique( [community_dict[n] for n in cluster3 if n in community_dict]) # CRITERIA 1 = only intra-community edges for cur_community in communities_present: cur_community_nodes = [ n for n in cluster3 if n in community_dict and community_dict[n] == cur_community ] cur_community_g = cluster_pair_g.subgraph(cur_community_nodes) decomposed_g = list( nx.connected_component_subgraphs(cur_community_g)) for cur_g in decomposed_g: # CRITERIA 2 = no isolates. anything with a clustering_coefficient=0 will be pruned out. clustering_coefficient = nx.clustering(cur_g) pred_nodes = [ n for n in clustering_coefficient if clustering_coefficient[n] > 0 ] pred_g = cur_g.subgraph(pred_nodes) pred_edges = pred_g.edges() prediction = set(pred_g.nodes()) prediction_cluster1 = prediction.intersection(cluster1) prediction_cluster2 = prediction.intersection(cluster2) bio_prediction = prediction.intersection(bio_genes) bio_prediction_cluster1 = prediction_cluster1.intersection( bio_genes) bio_prediction_cluster2 = prediction_cluster2.intersection( bio_genes) #CRITERIA 3 = at least 2 genes per cluster #CRITERIA 5 = at least 1 bio per cluster #CRITERIA 4 = at least 3 bio if (len(prediction_cluster1) >= 2 and len(prediction_cluster2) >= 2 and len(bio_prediction_cluster1) >= 1 and len(bio_prediction_cluster2) >= 1 and len(bio_prediction) >= 3): pred_edges1 = [ n for n in pred_edges if n[0] in cluster1 and n[1] in cluster1 ] pred_edges2 = [ n for n in pred_edges if n[0] in cluster2 and n[1] in cluster2 ] pred_edges12 = [ n for n in pred_edges if n[0] in cluster1 and n[1] in cluster2 ] pred_edges21 = [ n for n in pred_edges if n[0] in cluster2 and n[1] in cluster1 ] inter_cluster_edges = pred_edges12 + pred_edges21 data.append({}) data[-1]['source'] = {} data[-1]['source']['id'] = i data[-1]['source']['links'] = pred_edges1 data[-1]['target'] = {} data[-1]['target']['id'] = j data[-1]['target']['links'] = pred_edges2 data[-1]['links'] = inter_cluster_edges return data
def get_nk_lcc_undirected(G): G2 = max(nx.connected_component_subgraphs(G), key=len) tdl_nodes = G2.nodes() nodeListMap = dict(zip(tdl_nodes, range(len(tdl_nodes)))) G2 = nx.relabel_nodes(G2, nodeListMap, copy=True) return G2, nodeListMap
R_GC[i].nodes[n]['degree'] = deg R_GC[i].nodes[n]['eigenvector'] = eigen R_GC[i].nodes[n]['betweenness'] = between R_GC[i].nodes[n]['current'] = current return R_GC #%% R_GC = centralidades2(redes_analisis) #%% nodes = np.empty( (4, len(R_GC)), dtype=object ) #aca se van a guardar los nombres de los nodos a eliminar en cada caso(para cada centralidad) for i in range(len(R_GC)): largo = max( nx.connected_component_subgraphs(R_GC[i]), key=len ).number_of_nodes( ) #int(np.sum([c[1]['Esencialidad'] for c in list(R_GC[i].nodes.data())])) nodes[0, i] = [ b[0] for b in list( sorted(R_GC[i].nodes.data(), key=lambda x: -x[1]['degree'])) ][0:largo] nodes[1, i] = [ b[0] for b in list( sorted(R_GC[i].nodes.data(), key=lambda x: -x[1]['eigenvector'])) ][0:largo] nodes[2, i] = [ b[0] for b in list( sorted(R_GC[i].nodes.data(), key=lambda x: -x[1]['betweenness'])) ][0:largo] nodes[3, i] = [
def S2(A): # Returns the size of the largest component that is already thresholded #from adjacency matrix A G = nx.from_numpy_matrix(A) S = nx.number_of_nodes(max(nx.connected_component_subgraphs(G), key=len)) return S
def graph_stats(G, diameter=False): logging.debug("Graph stats: |V|={0}, |E|={1}".format(len(G), G.size())) if diameter: d = max(nx.diameter(H) for H in nx.connected_component_subgraphs(G)) logging.debug("Graph diameter: {0}".format(d))
def cond_test_generation(args, train_loader, test_loader, model, decoder, decoder_name, flow_model=None, epoch=-1, oracle=None): node_dist = args.node_dist edge_index = None flow_name = '' model.eval() if args.flow_model: flow_name = args.flow_model if not decoder_name: decoder_name = '' save_gen_base = plots = './visualization/gen_plots/' + args.dataset + '/' save_gen_plots = save_gen_base + args.model + str(args.z_dim) + '_' \ + str(flow_name) + '_' + decoder_name + '/' gen_graph_list, gen_graph_copy_list = [], [] avg_connected_components, avg_triangles, avg_transitivity = [], [], [] raw_triangles = [] A_list = [] test_recon_loss_avg = [] test_recon_loss_avg.append(0) for i, data_batch in enumerate(test_loader): batch = extract_batch(args, data_batch) # Correct shapes for VGAE processing if len(batch[0]['adj'].shape) > 2: # We give the full Adj to the encoder adj = batch[0]['adj'] + batch[0]['adj'].transpose(2,3) node_feats = adj.view(-1, args.max_nodes) else: node_feats = batch[0]['adj'] if batch[0]['edges'].shape[0] != 2: edge_index = batch[0]['encoder_edges'].long() else: edge_index = batch[0]['edges'] z, z_k = model.encode(node_feats, edge_index) batch[0]['node_latents'] = z_k test_recon_loss = model.decode(batch) test_recon_loss_avg[-1] += test_recon_loss.sum(dim=-1).item() if args.decoder == 'gran': decoder.eval() num_nodes_pmf = train_loader.dataset.num_nodes_pmf num_adj_batch = batch[0]['adj'].size(0) A = decoder._sampling(num_adj_batch, enc_node_feats=z_k) A_list += [A[ii, :batch[0]['num_nodes_gt'][ii], :batch[0]['num_nodes_gt'][ii]] for ii in range(num_adj_batch)] # This is only needed for constrain sat eval, padded rows will be # masked out again. We have to check for 0-rows before Max Nodes # though which is why we cant just not-pad. adj_mats_padded = pad_adj_mat(args, A_list) decoder.train() else: num_nodes = None decoder.eval() adj_mats = decoder(z_k, edge_index, return_adj=True)[-1] decoder.train() if args.deterministic_decoding: adj_mats = (adj_mats > 0.5).float() else: adj_mats = torch.bernoulli(adj_mats) num_nodes = [] for adj_mat in A_list: g = nx.from_numpy_matrix(adj_mat.detach().cpu().numpy()) g.remove_edges_from(nx.selfloop_edges(g)) g_copy = copy.deepcopy(g) gen_graph_copy_list.append(g_copy) num_nodes.append(g.number_of_nodes()) if len(g) > 0: # process the graphs if args.better_vis: g = max(nx.connected_component_subgraphs(g), key=len) num_connected_components = nx.number_connected_components(g) avg_connected_components.append(num_connected_components) num_triangles = list(nx.triangles(g).values()) avg_triangles.append(sum(num_triangles) / float(len(num_triangles))) avg_transitivity.append(nx.transitivity(g)) raw_triangles.append([num_triangles, len(g.nodes)]) gen_graph_list.append(g) # once graphs are generated model.train() total = len(gen_graph_list) # min(3, len(vis_graphs)) draw_graph_list(gen_graph_list[:args.num_gen_samples], 3, int(total // 3), fname='./visualization/sample/{}/Cond_{}_{}.png'.format(args.namestr, constraint_str, epoch), layout='spring') # Evaluate Generated Graphs using GraphRNN metrics if args.decoder == 'gran' or args.model == 'gran': test_dataset = [test_G for test_G in test_loader.dataset.graphs] else: test_dataset = [to_networkx(test_G).to_undirected() for test_G in test_loader] metrics = evaluate_generated( test_dataset, gen_graph_list, args.dataset) metrics_copy = evaluate_generated( test_dataset, gen_graph_copy_list, args.dataset) # Orginal Graphs with nodes remoed mmd_degree, mmd_clustering, mmd_4orbits = metrics[0], metrics[1], metrics[2] mmd_spectral, accuracy = metrics[3], metrics[4] mean_connected_comps = sum( avg_connected_components) / float(len(avg_connected_components)) mean_triangles = sum(avg_triangles) / float(len(avg_triangles)) mean_transitivity = sum(avg_transitivity) / \ float(len(avg_transitivity)) # Copied Graphs with nodes not removed mmd_degree_copy, mmd_clustering_copy, mmd_4orbits_copy = metrics_copy[ 0], metrics_copy[1], metrics_copy[2] mmd_spectral_copy, accuracy_copy = metrics_copy[3], metrics_copy[4] test_recon_loss_avg[-1] /= len(test_loader.dataset) if args.wandb: wandb.log({"Cond Deg": mmd_degree, "Cond Clus": mmd_clustering, "Cond Orb": mmd_4orbits, "Cond Acc": accuracy, "Cond Spec.": mmd_spectral, "Cond Avg_CC": mean_connected_comps, "Cond Avg_Tri": mean_triangles, "Cond Avg_transitivity": mean_transitivity, "Cond Raw_triangles": raw_triangles, "Cond Deg_copy": mmd_degree_copy, "Cond Clus_copy": mmd_clustering_copy, "Cond Orb_copy": mmd_4orbits_copy, "Cond Acc_copy": accuracy_copy, "Cond Spec_copy": mmd_spectral_copy, "Cond Test Constr Loss": constraint_loss, "Cond Test Constr Sat": constr_sat, "Test Recon Loss": test_recon_loss_avg[-1], "test_step": epoch}) print('Cond. Deg: {:.4f}, Clus.: {:.4f}, Orbit: {:.4f}, Spec.:{:.4f}, Acc: {:.4f}'.format(mmd_degree, mmd_clustering, mmd_4orbits, mmd_spectral, accuracy)) print('Cond. Avg CC: {:.4f}, Avg. Tri: {:.4f}, Avg. Trans: {:.4f}'.format(mean_connected_comps, mean_triangles, mean_transitivity)) return [mmd_degree, mmd_clustering, mmd_4orbits, mmd_spectral, accuracy]
def analyze(G): components = [] components = nx.connected_component_subgraphs(G) i = 0 for cc in components: #Set the connected component for each group for node in cc: G.node[node]['component'] = i #Calculate the in component betweeness, closeness and eigenvector centralities cent_betweenness = nx.betweenness_centrality(cc) # cent_eigenvector = nx.eigenvector_centrality_numpy(cc) cent_eigenvector = nx.eigenvector_centrality(cc) cent_closeness = nx.closeness_centrality(cc) for name in cc.nodes(): G.node[name]['cc-betweenness'] = cent_betweenness[name] G.node[name]['cc-eigenvector'] = cent_eigenvector[name] G.node[name]['cc-closeness'] = cent_closeness[name] i += 1 # Calculate cliques cliques = list(nx.find_cliques(G)) j = 0 processed_members = [] for clique in cliques: for member in clique: if not member in processed_members: G.node[member]['cliques'] = [] processed_members.append(member) G.node[member]['cliques'].append(j) j += 1 #calculate degree degrees = G.degree() for name in degrees: G.node[name]['degree'] = degrees[name] betweenness = nx.betweenness_centrality(G) eigenvector = nx.eigenvector_centrality_numpy(G) closeness = nx.closeness_centrality(G) pagerank = nx.pagerank(G) k_cliques = nx.k_clique_communities(G, 3) for name in G.nodes(): G.node[name]['betweenness'] = betweenness[name] G.node[name]['eigenvector'] = eigenvector[name] G.node[name]['closeness'] = closeness[name] G.node[name]['pagerank'] = pagerank[name] for pos, k_clique in enumerate(k_cliques): for member in k_clique: G.node[member]['k-clique'] = pos partitions = community.best_partition(G) for key in partitions: G.node[key]['modularity'] = partitions[key] return G
nx.draw_networkx_labels(G2, pos=position) plt.draw() # But maybe rendering the labels wasn't a very good idea ?? See how cluttered it became. connectedComp = nx.connected_components(G2) connectedComp = list(connectedComp) countComp = len(connectedComp) print("This graph has ", countComp, " many connected components") # It is already sorted but just to make sure, we get the largest component compLengths = [] for i in range(0, countComp): compLengths.append(len(connectedComp[i])) highestIndex = np.argmax(compLengths) componentGraphs = list(nx.connected_component_subgraphs(G2)) largestComponent = componentGraphs[highestIndex] # This is a subgraph with 379 nodes and 914 edges #nx.draw(largestComponent) #plt.draw() betweenValues = nx.betweenness_centrality(largestComponent) # betweenValues is a dictionary, let's get the values and keys in separate lists values = list(betweenValues.values()) keys = list(betweenValues.keys()) # find the index of the node with highest betweeness centrality highestIndex = np.argmax(values) print("The node id ", keys[highestIndex], " has the centrality degree of ",
def test_generation(args, config, train_loader, test_loader, decoder, decoder_name, flow_model=None, epoch=-1): node_dist = args.node_dist edge_index = None flow_name = '' if args.flow_model: flow_name = args.flow_model if not decoder_name: decoder_name = '' save_gen_base = plots = './visualization/gen_plots/' + config.dataset.name + '/' save_gen_plots = save_gen_base + args.model + str(args.z_dim) + '_' \ + str(flow_name) + '_' + decoder_name + '/' gen_graph_list, gen_graph_copy_list = [], [] avg_connected_components, avg_triangles, avg_transitivity = [], [], [] raw_triangles = [] if args.model == 'gran': num_nodes_pmf = train_loader.dataset.num_nodes_pmf model = decoder model.eval() A = decoder._sampling(args.num_gen_samples) num_nodes_pmf = torch.from_numpy(num_nodes_pmf).to(model.device) num_nodes = torch.multinomial(num_nodes_pmf.float(), args.num_gen_samples, replacement=True) # shape B X 1 A_list = [ A[ii, :num_nodes[ii], :num_nodes[ii]] for ii in range(args.num_gen_samples) ] adj_mats_padded = pad_adj_mat(args, A_list) model.train() else: # VAE # sample lengths of graphs if args.decoder == 'gran': num_nodes_pmf = train_loader.dataset.num_nodes_pmf num_nodes_pmf = torch.from_numpy(num_nodes_pmf).to(args.dev) num_nodes = torch.multinomial(num_nodes_pmf.float(), args.num_gen_samples, replacement=True) # shape B X 1 else: num_nodes_pmf = np.random.choice(node_dist, args.num_gen_samples) batch_all_nodes = [] for i, len_ in enumerate(num_nodes): fully_connected = nx.complete_graph(len_) edge_index_i = torch.tensor(list(fully_connected.edges)).to(args.dev).t().contiguous() batch_all_nodes += [edge_index_i + i * args.max_nodes] edge_index = torch.cat(batch_all_nodes, 1).to(args.dev) if flow_model is None: z_0 = torch.FloatTensor( args.num_gen_samples * args.max_nodes, args.z_dim).to(args.dev).normal_() z_k = z_0 else: z_0 = flow_model.base_dist.sample((args.num_gen_samples, flow_model.n_components)).squeeze() # z_0 = z_0.view(args.num_gen_samples, -1) z_k, _ = flow_model.inverse(z_0, edge_index=edge_index) z_k = z_k.view(args.num_gen_samples * args.max_nodes, args.z_dim) if args.decoder == 'gran': decoder.eval() A = decoder._sampling( args.num_gen_samples, enc_node_feats=z_k) A_list = [ A[ii, :num_nodes[ii], :num_nodes[ii]] for ii in range(args.num_gen_samples) ] # This is only needed for constrain sat eval, padded rows will be # masked out again. We have to check for 0-rows before Max Nodes # though which is why we cant just not-pad. adj_mats_padded = pad_adj_mat(args, A_list) decoder.train() else: num_nodes = None decoder.eval() adj_mats = decoder(z_k, edge_index, return_adj=True)[-1] decoder.train() if args.deterministic_decoding: adj_mats = (adj_mats > 0.5).float() else: adj_mats = torch.bernoulli(adj_mats) for adj_mat in A_list: g = nx.from_numpy_matrix(adj_mat.detach().cpu().numpy()) g.remove_edges_from(nx.selfloop_edges(g)) g_copy = copy.deepcopy(g) gen_graph_copy_list.append(g_copy) if len(g) > 0: # process the graphs if config.test.better_vis: g = max(nx.connected_component_subgraphs(g), key=len) num_connected_components = nx.number_connected_components(g) avg_connected_components.append(num_connected_components) num_triangles = list(nx.triangles(g).values()) avg_triangles.append(sum(num_triangles) / float(len(num_triangles))) avg_transitivity.append(nx.transitivity(g)) raw_triangles.append([num_triangles, len(g.nodes)]) gen_graph_list.append(g) # once graphs are generated total = len(gen_graph_list) # min(3, len(vis_graphs)) draw_graph_list(gen_graph_list[:total], 3, int(total // 3), fname='./visualization/sample/{}/{}_{}.png'.format(args.namestr, constraint_str, epoch), layout='spring') # Evaluate Generated Graphs using GraphRNN metrics if args.decoder == 'gran' or args.model == 'gran': test_dataset = [test_G for test_G in test_loader.dataset.graphs] else: test_dataset = [to_networkx(test_G).to_undirected() for test_G in test_loader] metrics = evaluate_generated( test_dataset, gen_graph_list, args.dataset) metrics_copy = evaluate_generated( test_dataset, gen_graph_copy_list, args.dataset) # Orginal Graphs with nodes remoed mmd_degree, mmd_clustering, mmd_4orbits = metrics[0], metrics[1], metrics[2] mmd_spectral, accuracy = metrics[3], metrics[4] mean_connected_comps = sum( avg_connected_components) / float(len(avg_connected_components)) mean_triangles = sum(avg_triangles) / float(len(avg_triangles)) mean_transitivity = sum(avg_transitivity) / \ float(len(avg_transitivity)) # Copied Graphs with nodes not removed mmd_degree_copy, mmd_clustering_copy, mmd_4orbits_copy = metrics_copy[ 0], metrics_copy[1], metrics_copy[2] mmd_spectral_copy, accuracy_copy = metrics_copy[3], metrics_copy[4] if args.wandb: wandb.log({"Deg": mmd_degree, "Clus": mmd_clustering, "Orb": mmd_4orbits, "Acc": accuracy, "Spec.": mmd_spectral, "Avg_CC": mean_connected_comps, "Avg_Tri": mean_triangles, "Avg_transitivity": mean_transitivity, "Raw_triangles": raw_triangles, "Deg_copy": mmd_degree_copy, "Clus_copy": mmd_clustering_copy, "Orb_copy": mmd_4orbits_copy, "Acc_copy": accuracy_copy, "Spec_copy": mmd_spectral_copy, "Test Constr Loss": constraint_loss, "Test Constr Sat": constr_sat, "test_step": epoch}) print('Deg: {:.4f}, Clus.: {:.4f}, Orbit: {:.4f}, Spec.:{:.4f}, Acc: {:.4f}'.format(mmd_degree, mmd_clustering, mmd_4orbits, mmd_spectral, accuracy)) print('Avg CC: {:.4f}, Avg. Tri: {:.4f}, Avg. Trans: {:.4f}'.format(mean_connected_comps, mean_triangles, mean_transitivity)) return [mmd_degree, mmd_clustering, mmd_4orbits, mmd_spectral, accuracy]
# Example From KDD Paper # Graph is undirected # G = nx.Graph() # G.add_edge(1, 2) # G.add_edge(2, 3) # G.add_edge(2, 4) # G.add_edge(3, 4) # G.add_edge(3, 5) # G.add_edge(4, 6) # G.add_edge(5, 6) # G.add_edge(1, 5) # Graph much be connected if not nx.is_connected(G): print "Graph must be connected" G = list(nx.connected_component_subgraphs(G))[0] # Graph must be simple G.remove_edges_from(G.selfloop_edges()) if G.number_of_selfloops() > 0: print "Graph must be not contain self-loops" exit() num_nodes = G.number_of_nodes() print "Number of Nodes:\t" + str(num_nodes) num_edges = G.number_of_edges() print "Number of Edges:\t" + str(num_edges) # To parse a large graph we use 10 samples of size 500 each. It is # possible to parse the whole graph, but the approximate
def count_inf_links_nodes_GC(G_period): H_period_aux = G_period.copy( ) # make a copy to get only the infected links and nodes (for cluster distribution) ##### i count inf. nodes and links fract_inf_nodes = 0. fract_inf_links = 0. for node in G_period.nodes(): if G_period.node[node]["status"] == "I": fract_inf_nodes += 1. for neighbor in G_period.neighbors(node): if G_period.node[neighbor]["status"] == "I": if node < neighbor: # so i dont count each link twice fract_inf_links += 1. ###### i remove non-infected links from the aux subgraph else: try: H_period_aux.remove_edge(node, neighbor) except: try: H_period_aux.remove_edge(neighbor, node) except: pass else: H_period_aux.remove_node( node) # with this, i remove the node and all its links try: fract_inf_nodes = fract_inf_nodes / float(len(G_period.nodes())) except ZeroDivisionError: fract_inf_nodes = float('NaN') try: fract_inf_links = fract_inf_links / float(len(G_period.edges())) except ZeroDivisionError: fract_inf_links = float('NaN') ##### i remove the isolates from aux graph: list_to_remove = [] for node in H_period_aux.nodes(): if H_period_aux.degree(node) == 0: list_to_remove.append(node) H_period_aux.remove_nodes_from(list_to_remove) # print " # nodes: in G:", len(G_period.nodes()), " in H_aux:", len(H_period_aux.nodes()) #print " # edges: in G:", len(G_period.edges()), " in H_aux:", len(H_period_aux.edges()) lista_components = [] ##### i calculate components on the infected subgraph #print "components of Infected subgraph:" for item in nx.connected_component_subgraphs(H_period_aux): try: # print "comp. size:",len(item.nodes()), " avg.path lenght within component:",nx.average_shortest_path_length(item) lista_components.append(len(item.nodes())) except ZeroDivisionError: pass #print "comp. size:",len(item.nodes()) ####### i get the GC of the infected subgraph try: Gc = len(max(nx.connected_component_subgraphs(H_period_aux), key=len)) / float(len(G_period.edges())) # print "GC:", Gc, "\n" except ValueError: Gc = float('NaN') return fract_inf_links, fract_inf_nodes, Gc, lista_components
def scenario_tiscali(net_cache=[0.05], n_contents=100000, alpha=[0.6, 0.8, 1.0]): """ Return a scenario based on Tiscali topology, parsed from RocketFuel dataset Parameters ---------- scenario_id : str String identifying the scenario (will be in the filename) net_cache : float Size of network cache (sum of all caches) normalized by size of content population n_contents : int Size of content population alpha : float List of alpha of Zipf content distribution """ rate = 12.0 warmup = 9000 duration = 36000 T = 'TISCALI' # name of the topology # 240 nodes in the main component topology = fnss.parse_rocketfuel_isp_map( path.join(scenarios_dir, 'resources/3257.r0.cch')).to_undirected() topology = list(nx.connected_component_subgraphs(topology))[0] deg = nx.degree(topology) onedeg = [v for v in topology.nodes() if deg[v] == 1] # they are 80 # we select as caches nodes with highest degrees # we use as min degree 6 --> 36 nodes # If we changed min degrees, that would be the number of caches we would have: # Min degree N caches # 2 160 # 3 102 # 4 75 # 5 50 # 6 36 # 7 30 # 8 26 # 9 19 # 10 16 # 11 12 # 12 11 # 13 7 # 14 3 # 15 3 # 16 2 caches = [v for v in topology.nodes() if deg[v] >= 6] # 36 nodes # sources are node with degree 1 whose neighbor has degree at least equal to 5 # we assume that sources are nodes connected to a hub # they are 44 sources = [ v for v in onedeg if deg[list(topology.edge[v].keys())[0]] > 4.5 ] # they are # receivers are node with degree 1 whose neighbor has degree at most equal to 4 # we assume that receivers are nodes not well connected to the network # they are 36 receivers = [ v for v in onedeg if deg[list(topology.edge[v].keys())[0]] < 4.5 ] # we set router stacks because some strategies will fail if no stacks # are deployed routers = [ v for v in topology.nodes() if v not in caches + sources + receivers ] # set weights and delays on all links fnss.set_weights_constant(topology, 1.0) fnss.set_delays_constant(topology, internal_link_delay, 'ms') # randomly allocate contents to sources contents = dict([(v, []) for v in sources]) for c in range(1, n_contents + 1): s = choice(sources) contents[s].append(c) for v in sources: fnss.add_stack(topology, v, 'source', {'contents': contents[v]}) for v in receivers: fnss.add_stack(topology, v, 'receiver', {}) for v in routers: fnss.add_stack(topology, v, 'router', {}) # label links as internal or external for u, v in topology.edges(): if u in sources or v in sources: topology.edge[u][v]['type'] = 'external' # this prevents sources to be used to route traffic fnss.set_weights_constant(topology, 1000.0, [(u, v)]) fnss.set_delays_constant(topology, external_link_delay, 'ms', [(u, v)]) else: topology.edge[u][v]['type'] = 'internal' for nc in net_cache: size = (float(nc) * n_contents) / len(caches) # size of a single cache C = str(nc) for v in caches: fnss.add_stack(topology, v, 'cache', {'size': size}) fnss.write_topology( topology, path.join(scenarios_dir, topo_prefix + 'T=%s@C=%s' % (T, C) + '.xml')) print('[WROTE TOPOLOGY] T: %s, C: %s' % (T, C)) for a in alpha: event_schedule = gen_req_schedule(receivers, rate, warmup, duration, n_contents, a) fnss.write_event_schedule( event_schedule, path.join(scenarios_dir, es_prefix + 'T=%s@A=%s' % (T, str(a)) + '.xml')) print('[WROTE SCHEDULE] T: %s, Alpha: %s, Events: %d' % (T, str(a), len(event_schedule)))
def graph_clus(df, DIST_THRESH=DIST_THRESH): topics = [] node_lens = [] all_dict = {} counter = 0 G = nx.Graph() for each in df["Topics"]: counter += 1 topics_here = each.split("\n") topics.extend(topics_here) all_dict[str(counter)] = topics_here node_lens.append(len(topics_here)) G.add_node(counter) topics = set(topics) node_list = range(1, counter + 1) # for i in range(len(node_list)): # for j in range(len(node_list)): # node_list[i].append(-1) tot_count = len(node_list) for i in range(tot_count): for j in range(i + 1, tot_count): dist_here = part1.jaccard_coef(all_dict[str(i + 1)], all_dict[str(j + 1)]) # print(dist_here) if (dist_here > DIST_THRESH): G.add_edge(i + 1, j + 1) def second_elem(a): return a[1] # pu.db iter_ = 1 print() while (1): clusters = list(nx.connected_component_subgraphs(G)) # pu.db num_clusters = len(clusters) print("iter_no: " + str(iter_) + ", clusters: " + str(num_clusters), end="\r") iter_ += 1 if num_clusters >= 9: break centralities = list(nx.edge_betweenness_centrality(G).items()) # pu.db centralities.sort(key=second_elem, reverse=True) req_edges = centralities[0][0] # print(req_edges) G.remove_edge(req_edges[0], req_edges[1]) print() nx.draw(G) plt.show("Clusters map") clusters = list(nx.connected_component_subgraphs(G)) all_nodes = [] for each in clusters: all_nodes.append(list(each.nodes)) # pu.db return all_nodes
def draw_colocalization(G, seed_nodes_1, seed_nodes_2, edge_cmap=plt.cm.autumn_r, export_file='colocalization.json', export_network=False, highlight_nodes=None, k=None, largest_connected_component=False, node_cmap=plt.cm.autumn_r, node_size=10, num_nodes=None, physics_enabled=False, Wprime=None, **kwargs): ''' Implements and displays the network propagation for a given graph and two sets of seed nodes. Additional kwargs are passed to visJS_module. Inputs: - G: a networkX graph - seed_nodes_1: first set of nodes on which to initialize the simulation - seed_nodes_2: second set of nodes on which to initialize the simulation - edge_cmap: matplotlib colormap for edges, optional, default: matplotlib.cm.autumn_r - export_file: JSON file to export graph data, default: 'colocalization.json' - export_network: export network to Cytoscape, default: False - highlight_nodes: list of nodes to place borders around, default: None - k: float, optional, optimal distance between nodes for nx.spring_layout(), default: None - largest_connected_component: boolean, optional, whether or not to display largest_connected_component, default: False - node_cmap: matplotlib colormap for nodes, optional, default: matplotlib.cm.autumn_r - node_size: size of nodes, default: 10 - num_nodes: the number of the hottest nodes to graph, default: None (all nodes will be graphed) - physics_enabled: enable physics simulation, default: False - Wprime: Normalized adjacency matrix (from normalized_adj_matrix) Returns: - VisJS html network plot (iframe) of the colocalization. ''' # check for invalid nodes in seed_nodes invalid_nodes = [(node, 'seed_nodes_1') for node in seed_nodes_1 if node not in G.nodes()] invalid_nodes.extend([(node, 'seed_nodes_2') for node in seed_nodes_2 if node not in G.nodes()]) for node in invalid_nodes: print('Node {} in {} not in graph'.format(node[0], node[1])) if invalid_nodes: return # perform the colocalization if Wprime is None: Wprime = normalized_adj_matrix(G) prop_graph_1 = network_propagation(G, Wprime, seed_nodes_1).to_dict() prop_graph_2 = network_propagation(G, Wprime, seed_nodes_2).to_dict() prop_graph = { node: (prop_graph_1[node] * prop_graph_2[node]) for node in prop_graph_1 } nx.set_node_attributes(G, name='node_heat', values=prop_graph) # find top num_nodes hottest nodes and connected component if requested G = set_num_nodes(G, num_nodes) if largest_connected_component: G = max(nx.connected_component_subgraphs(G), key=len) nodes = list(G.nodes()) edges = list(G.edges()) # check for empty nodes and edges after getting subgraph of G if not nodes: print('There are no nodes in the graph. Try increasing num_nodes.') return if not edges: print('There are no edges in the graph. Try increasing num_nodes.') return # set position of each node if k is None: pos = nx.spring_layout(G) else: pos = nx.spring_layout(G, k=k) xpos, ypos = zip(*pos.values()) nx.set_node_attributes(G, name='xpos', values=dict( zip(pos.keys(), [x * 1000 for x in xpos]))) nx.set_node_attributes(G, name='ypos', values=dict( zip(pos.keys(), [y * 1000 for y in ypos]))) # set the border width of nodes if 'node_border_width' not in kwargs.keys(): kwargs['node_border_width'] = 2 border_width = {} for n in nodes: if n in seed_nodes_1 or n in seed_nodes_2: border_width[n] = kwargs['node_border_width'] elif highlight_nodes is not None and n in highlight_nodes: border_width[n] = kwargs['node_border_width'] else: border_width[n] = 0 nx.set_node_attributes(G, name='nodeOutline', values=border_width) # set the shape of each node nodes_shape = [] for node in G.nodes(): if node in seed_nodes_1: nodes_shape.append('triangle') elif node in seed_nodes_2: nodes_shape.append('square') else: nodes_shape.append('dot') node_to_shape = dict(zip(G.nodes(), nodes_shape)) nx.set_node_attributes(G, name='nodeShape', values=node_to_shape) # add a field for node labels if highlight_nodes: node_labels = {} for node in nodes: if node in seed_nodes_1 or n in seed_nodes_2: node_labels[node] = str(node) elif node in highlight_nodes: node_labels[node] = str(node) else: node_labels[node] = '' else: node_labels = {n: str(n) for n in nodes} nx.set_node_attributes(G, name='nodeLabel', values=node_labels) # set the title of each node node_titles = [ str(node[0]) + '<br/>heat = ' + str(round(node[1]['node_heat'], 10)) for node in G.nodes(data=True) ] node_titles = dict(zip(nodes, node_titles)) nx.set_node_attributes(G, name='nodeTitle', values=node_titles) # set the color of each node node_to_color = visJS_module.return_node_to_color( G, field_to_map='node_heat', cmap=node_cmap, color_vals_transform='log') # set heat value of edge based off hottest connecting node's value node_attr = nx.get_node_attributes(G, 'node_heat') edge_weights = {} for e in edges: if node_attr[e[0]] > node_attr[e[1]]: edge_weights[e] = node_attr[e[0]] else: edge_weights[e] = node_attr[e[1]] nx.set_edge_attributes(G, name='edge_weight', values=edge_weights) # set the color of each edge edge_to_color = visJS_module.return_edge_to_color( G, field_to_map='edge_weight', cmap=edge_cmap, color_vals_transform='log') # create the nodes_dict with all relevant fields nodes_dict = [{ 'id': str(n), 'border_width': border_width[n], 'degree': G.degree(n), 'color': node_to_color[n], 'node_label': node_labels[n], 'node_size': node_size, 'node_shape': node_to_shape[n], 'title': node_titles[n], 'x': np.float64(pos[n][0]).item() * 1000, 'y': np.float64(pos[n][1]).item() * 1000 } for n in nodes] # map nodes to indices for source/target in edges node_map = dict(zip(nodes, range(len(nodes)))) # create the edges_dict with all relevant fields edges_dict = [{ 'source': node_map[edges[i][0]], 'target': node_map[edges[i][1]], 'color': edge_to_color[edges[i]] } for i in range(len(edges))] # set node_size_multiplier to increase node size as graph gets smaller if 'node_size_multiplier' not in kwargs.keys(): if len(nodes) > 500: kwargs['node_size_multiplier'] = 1 elif len(nodes) > 200: kwargs['node_size_multiplier'] = 3 else: kwargs['node_size_multiplier'] = 5 kwargs['physics_enabled'] = physics_enabled # if node hovering color not set, set default to black if 'node_color_hover_background' not in kwargs.keys(): kwargs['node_color_hover_background'] = 'black' # node size determined by size in nodes_dict, not by id if 'node_size_field' not in kwargs.keys(): kwargs['node_size_field'] = 'node_size' # node label determined by value in nodes_dict if 'node_label_field' not in kwargs.keys(): kwargs['node_label_field'] = 'node_label' # export the network to JSON for Cytoscape if export_network: node_colors = map_node_to_color(G, 'node_heat', True) nx.set_node_attributes(G, name='nodeColor', values=node_colors) edge_colors = map_edge_to_color(G, 'edge_weight', True) nx.set_edge_attributes(G, name='edgeColor', values=edge_colors) visJS_module.export_to_cytoscape(G=G, export_file=export_file) return visJS_module.visjs_network(nodes_dict, edges_dict, **kwargs)
def detect_communities(g=None, comm_opt=None): maybe_print(" Detecting communities.", 2, 'i') ENABLE_DETECTION = False ALGORITHM = 'fluid' graph = g if not graph: maybe_print(" Can't detect community because the graph is undefined (value is None).\n " " Trying to load from tmp/pruned_graph.gpickle",1,'E') try: graph = nx.read_gpickle("tmp/pruned_graph.gpickle") except Exception: raise RuntimeError("Unable to detect communities. Invalid input graph.") if not comm_opt: raise ValueError("Invalid community detection options.") else: ENABLE_DETECTION = comm_opt['enable_community_detection'] if 'enable_community_detection' in comm_opt else False ALGORITHM = comm_opt['method']['algorithm'] if 'algorithm' in comm_opt else 'fluid_communities' LABEL_DETECTION_METHOD = comm_opt['community_label_inference']['method'] \ if 'community_label_inference' in comm_opt and 'method' in comm_opt['community_label_inference']\ else 'distributed_semantic' # Convert directed graph to undirected graph undir_graph = graph.to_undirected() if not undir_graph: raise ValueError("Unable to perform community detection! Perhaps due to the malformed graph.") if ENABLE_DETECTION: # Load model for inferring cluster name using Glove glove_model = functions.glove_model try: if not glove_model: GLOVE_MODEL_FILE = config.uni_options['unify_semantic_similarity']['glove_model_file'] maybe_print(" + Glove model is undefined. Trying to load from " + GLOVE_MODEL_FILE, 2, "i") glove_model = Glove.load_stanford(GLOVE_MODEL_FILE) maybe_print(" + Model loading completed :)", 2) except Exception as inst: maybe_print(" + Error while detecting group names. Check whether the Glove model was correctly loaded.", 2, "E") print(inst) # Run algorithm try: if ALGORITHM == "fluid_communities": # get the largest messy graph # Get number of communities to be detected n_com = comm_opt['method']['params']['n_communities'] \ if 'n_communities' in comm_opt['method']['params'] else 4 enable_pagerank = comm_opt['method']['params']['enable_pagerank_initialization'] \ if 'enable_pagerank_initialization' in comm_opt['method']['params'] else 4 gc = max(nx.connected_component_subgraphs(undir_graph), key=len) # list of list. Each sublist contain ID of nodes in the same community communities = list(asyn_fluidc(gc, n_com,enable_pr=enable_pagerank)) maybe_print("Detected communities: {0}".format(communities,'i')) com_index = -1 for com in communities: com_index += 1 # SVM One class classifier for outlier detection. clf = OneClassSVM(nu=0.90 * outliers_fraction + 0.01, kernel="poly", gamma=0.03, degree=3) ##################### # How this work? the program compute weight sum over the vector of all member of the communities who # DO EXIST in the glove vector space. The scale factor is the ratio between the node's frequency # (under 'weight' attribute) and the sum of weights of all keywords that DO EXIST in the vector spac # -e. Those who many not exist will be disregarded. # It first extract vector representation of each member of the community. Those # keywords whose are successfully extracted (exist in glove vector space) has NonZero vector. Then # weights are computed for the original keywords of these NonZero vectors. # Suggest a label for the community comm_labels = [graph.node[node_id]['label'] for node_id in com] comm_labels_array = np.array(comm_labels) # Now run abstraction by different method suggested_labels = None if LABEL_DETECTION_METHOD == 'distributed_semantic': WINDOW = comm_opt['community_label_inference']['params']['window'] \ if 'window' in comm_opt['community_label_inference']['params'] else 3 V_WEIGHTS = comm_opt['community_label_inference']['params']['weight_ls'] \ if 'weight_ls' in comm_opt['community_label_inference']['params'] else 3 COMPOSITION_METHOD = comm_opt['community_label_inference']['params']['composition_method'] \ if 'composition_method' in comm_opt['community_label_inference']['params'] else 3 words_matrix = extract_vector_from_text_list(comm_labels, model=glove_model, window=WINDOW, vector_weights=V_WEIGHTS) # get indices for rows whose is zero rows zeros_indices = np.where(~words_matrix.any(axis=1))[0] maybe_print(' --> Community ' + str(com_index) + ' has ' + str(len(zeros_indices)) + " zero key(s) out of "+ str(len(com)),2,'i') # remove zero rows from words_matrix words_matrix = np.delete(words_matrix, zeros_indices, axis=0) # remaining labels comm_labels_array = np.delete(comm_labels_array,zeros_indices) maybe_print(' --> Remaining labels: {0}'.format(', '.join(comm_labels_array))) # get all the weight in the community, then convert to float by multiply 1.0 # Compute vector weight according to composition method vector_weight = None if COMPOSITION_METHOD == 'weighted_average': vector_weight = np.array([graph.node[n]['weight'] for n in com]) * 1.0 vector_weight = np.delete(vector_weight, zeros_indices, axis=0) # remove zero rows # Compute weights -> this is a kind of weighted sum vector_weight = vector_weight/np.sum(vector_weight) # compute scale/co-efficient, whatever :D vector_weight = vector_weight.reshape((len(vector_weight), 1)) # Transpose to column vector elif COMPOSITION_METHOD == 'average': n_row = len(com) - len(zeros_indices) vector_weight = np.full((n_row,1),1.0/n_row,dtype=np.float) elif COMPOSITION_METHOD == 'vec_sum': vector_weight = np.ones((len(com) - len(zeros_indices), 1)) else: raise ValueError('Invalid vector composition method') # print words_matrix.shape, vector_weight.shape assert words_matrix.shape[0] == vector_weight.shape[0], \ 'Mismatch size of matrix for community {0} with {1} members and its weight matrix.\n'\ .format(com_index-1, len(com)) # Multiple matrices and the sum te vector to be the representative vector for the community # composition_matrix = np.multiply(words_matrix,vector_weight) # Remove outliers # clf.fit(X=composition_matrix) # fit the model y_pred = None print words_matrix.shape, vector_weight.flatten().shape if len(comm_labels) < 15: maybe_print("Community {0} has less than 10 members, outliner removal skipped!".format(com)) y_pred = np.ones(words_matrix.shape[0]) else: clf.fit(X=words_matrix,sample_weight=vector_weight.flatten()) # fit the model # predict with the model. The outcome is an array, each element is the predicted value of # the word/row. It can be 1 (inlier) or -1 (outlier) # y_pred = clf.predict(composition_matrix) y_pred = clf.predict(words_matrix) print y_pred # Weighted AVERAGE composition composition_matrix = np.multiply(words_matrix, vector_weight) # Now filter inliner only filtered_composition_vector = composition_matrix[np.where(y_pred == 1)] # filtered_composition_vector = words_matrix[np.where(y_pred == 1)] # Remove predicted outlier comm_labels_array = np.delete(comm_labels_array, np.where(y_pred == -1)) maybe_print(' --> Outlier removal discarded {0} words. Remaining words: {1}' .format(len(np.where(y_pred == -1)[0]), str(comm_labels_array))) # Sum the matrix by row to form one vector composition_vector = np.sum(filtered_composition_vector, axis=0) # print composition_vector # Dig to vector space of Glove to get the label dst = (np.dot(glove_model.word_vectors, composition_vector) / np.linalg.norm(glove_model.word_vectors, axis=1) / np.linalg.norm(composition_vector)) word_ids = np.argsort(-dst) # Get 2 most similar words @@@@@ raw_suggested_labels = [glove_model.inverse_dictionary[x] for x in word_ids[:50] if x in glove_model.inverse_dictionary] suggested_labels = [] # Filter result by POS for w in raw_suggested_labels: if len(w)>2: related_pos = set([syn.pos() for syn in wn.synsets(w)]) if related_pos and len(set([u'v',u'a',u's',u'r']) & related_pos) == 0: # Filter: exclude some pos suggested_labels.append(w) # Get 3 most frequent word freqs = [w for w,_ in sorted([(g.node[n]['label'],g.node[n]['weight']) for n in com], key=lambda e: int(e[1]),reverse=True)] # suggested_labels = glove_model.most_similar_paragraph(comm_labels) if len(suggested_labels) > 5: suggested_labels = suggested_labels[:5] ''' # Apply DBPedia Labeler # top10 =[subword for word in freqs[:5] for subword in word.split('_') if en.is_noun(subword)] + freqs[:5] top10 = freqs[:10] print "---> ",top10 # DB_labels = DbpediaLabeller.DBPprocess(top10) DB_labels = DbpediaLabeller.DBPprocess(top10) # print 'ZZZZZZZZZzzzzz',comm_labels_array # DB_labels = DbpediaLabeller.DBPprocess(comm_labels_array) print DB_labels if len(DB_labels) >5: DB_labels = DB_labels[:5] for node_id in com: # sample_community_names[com_index] graph.node[node_id]['cluster_id'] = u'[{0}] Top: {1} \nV.Comp: {2} \nDbpedia: {3}'\ .format(sample_community_names[com_index], ', '.join(freqs[:5]), ' - '.join(suggested_labels).upper(), ' - '.join(DB_labels).upper()) ''' for node_id in com: # sample_community_names[com_index] graph.node[node_id]['cluster_id'] = u'[{0}] Top: {1} \nV.Comp: {2}' \ .format(sample_community_names[com_index], ', '.join(freqs[:5]), ' - '.join(suggested_labels).upper()) return graph except Exception as inst: maybe_print(" Error while running algorithm {0} to detect communities. Error name: {1}. \n" "Perhaps incorrect algorithm name of parameters. Community detection is skipped and community " "label for all nodes is set to be \'unknown\'.".format(ALGORITHM,inst.message), 2, 'E') traceback.print_exc() return g else: return g
def main(graph_name): H = nx.read_gml(graph_name) #dir=graph_name.split("fr")[0] # dir=graph_name.split("mas")[0] name00=graph_name.split(".gml")[0] print type(name00) name00=name00+"_average_percent_weight_change_per_kshell_clinically_signif.dat" list_conn=[] for node in H.nodes(): # i remove self loops if node in H.neighbors(node): if len(H.neighbors(node))>1: H.remove_edge(node,node) else: H.remove_node(node) try: list_conn.append(len(H.neighbors(node))) except: pass max_connect=max(list_conn) for node in H.nodes(): if H.node[node]['weigh_ins'] <5: #Adherent filter H.remove_node(node) # print node, "is going down" G = nx.connected_component_subgraphs(H)[0] # Giant component print "final size of the GC:",len(G.nodes()) cum_size_set=float(len(G.nodes())) list_percent_weight_change_k_shell=[] for index in range (max_connect+1): k_core=nx.algorithms.core.k_shell(G,k=index) if len (k_core)>0: num_users_set=cum_size_set num_users_clinically_signif=0.0 for node in k_core: list_percent_weight_change_k_shell.append(float(G.node[node]['percentage_weight_change'])) if int(index)==12:#inner core G.node[node]['role']="inner_core" G.node[node]['kshell_index']=int(index) #print node, G.node[node]['kshell_index'] cum_size_set-=1.0 if G.node [node]['percentage_weight_change']<=-5.0: num_users_clinically_signif+=1.0 print "\n",index,len(k_core),num_users_set/float(len(G.nodes())),num_users_clinically_signif/len(list_percent_weight_change_k_shell),numpy.mean(list_percent_weight_change_k_shell),numpy.std(list_percent_weight_change_k_shell) file0=open(name00, 'at') print >> file0,index,len(k_core),num_users_set/float(len(G.nodes())),num_users_clinically_signif/len(list_percent_weight_change_k_shell),numpy.mean(list_percent_weight_change_k_shell),numpy.std(list_percent_weight_change_k_shell), print >> file0,stats.shapiro(list_percent_weight_change_k_shell) #w entre 0 y 1 (normal si cerca de 1), p menor que 0.05 para normalidad file0.close() # print "size main k-core:",len(nx.algorithms.core.k_shell(G)) list_nodes_kindex=[] for index in range (max_connect+1): list=[] for node in G.nodes(): if G.node[node]['kshell_index']==index: list.append(node) if len(list)>0: list_nodes_kindex.append(list) name1=graph_name.split(".gml")[0] name=name1+"_list_of_lists_kshells.dat" file=open(name, 'wt') print >> file,list_nodes_kindex file.close() #print list_nodes_kindex nx.write_gml(G,name1+"_inner_core.gml")
def topology_rocketfuel_latency(asn, source_ratio=0.1, ext_delay=EXTERNAL_LINK_DELAY, **kwargs): """Parse a generic RocketFuel topology with annotated latencies To each node of the parsed topology it is attached an artificial receiver node. To the routers with highest degree it is also attached a source node. Parameters ---------- asn : int AS number source_ratio : float Ratio between number of source nodes (artificially attached) and routers ext_delay : float Delay on external nodes """ if source_ratio < 0 or source_ratio > 1: raise ValueError('source_ratio must be comprised between 0 and 1') f_topo = path.join(TOPOLOGY_RESOURCES_DIR, 'rocketfuel-latency', str(asn), 'latencies.intra') topology = fnss.parse_rocketfuel_isp_latency(f_topo).to_undirected() topology = list(nx.connected_component_subgraphs(topology))[0] # First mark all current links as inernal for u, v in topology.edges_iter(): topology.edge[u][v]['type'] = 'internal' # Note: I don't need to filter out nodes with degree 1 cause they all have # a greater degree value but we compute degree to decide where to attach sources routers = topology.nodes() # Source attachment n_sources = int(source_ratio * len(routers)) sources = ['src_%d' % i for i in range(n_sources)] deg = nx.degree(topology) # Attach sources based on their degree purely, but they may end up quite clustered routers = sorted(routers, key=lambda k: deg[k], reverse=True) for i in range(len(sources)): topology.add_edge(sources[i], routers[i], delay=ext_delay, type='external') # Here let's try attach them via cluster # clusters = compute_clusters(topology, n_sources, distance=None, n_iter=1000) # source_attachments = [max(cluster, key=lambda k: deg[k]) for cluster in clusters] # for i in range(len(sources)): # topology.add_edge(sources[i], source_attachments[i], delay=ext_delay, type='external') # attach artificial receiver nodes to ICR candidates receivers = ['rec_%d' % i for i in range(len(routers))] for i in range(len(routers)): topology.add_edge(receivers[i], routers[i], delay=0, type='internal') # Set weights to latency values for u, v in topology.edges_iter(): topology.edge[u][v]['weight'] = topology.edge[u][v]['delay'] # Deploy stacks on nodes topology.graph['icr_candidates'] = set(routers) for v in sources: fnss.add_stack(topology, v, 'source') for v in receivers: fnss.add_stack(topology, v, 'receiver') for v in routers: fnss.add_stack(topology, v, 'router') return IcnTopology(topology)