def load_GT_graph(graphExample, gcc=False, removeSL=False, removePE=False): ''' Input: - graphExample, graph example from Graph-tool collections (e.g., 'cond-mat-2003', 'adjnoun' 'karate' 'netscience') or a graphfile in .gml format - gcc = True if only the giant connected component should be returned - removeSL = True if any self-loops must be removed - removePE = True if any parallel-edge must be removed Output: the corresponding graph_tool graph object ''' if graphExample[-4:] == ".gml": g = load_graph(graphExample) else: g = collection.data[graphExample] if g.is_directed: g.set_directed(False) # g = Graph(g, directed=False) if removePE: gtStats.remove_parallel_edges(g) if removeSL: gtStats.remove_self_loops(g) if gcc: l = topology.label_largest_component( g) #Keep Largest Connected Component g.set_vertex_filter(l) #g = GraphView(g, vfilt=l) g.purge_vertices() return g
def get_largest_cc(g): l = gt.label_largest_component(g) to_remove = [] for index, v in enumerate(l): if (v == 0): to_remove.append(index) to_remove.reverse() for i in to_remove: g.remove_vertex(g.vertex(i)) return g
def create_graph(N=100, nb_clusters=4): from graph_tool.topology import label_largest_component, pseudo_diameter is_connected = False nb_iter = 0 while not is_connected and nb_iter < N: cexp.fast_random_graph(N, .05) g = cexp.to_graph_tool() is_connected = label_largest_component(g).a.sum() == N cexp.turn_into_signed_graph_by_propagation(nb_clusters, .8) return g, int(pseudo_diameter(g)[0])
def make_graph(n): start = clock() cexp.preferential_attachment(n, m=3, gamma=1.05, c=.4, bonus_neighbor_prob=.13) k = cexp.to_graph_tool() lcc = label_largest_component(k) k.set_vertex_filter(lcc) lcc_nodes = np.where(lcc.a)[0] full_dst = shortest_distance(k, dense=False) full_mat = np.zeros((n, n), dtype=np.uint8) for v in k.vertices(): full_mat[int(v), :] = full_dst[v].a.astype(np.uint8) del full_dst print('make_graph {:.3f}'.format(clock() - start)) return k, lcc_nodes, full_mat
def load_wiki(): import graph_tool as gt import real_world as rw graph_file = 'wiki_simple.gt' ds_file = 'wiki_dst.npy' k = gt.load_graph(graph_file) dst_mat = np.load(ds_file) lcc = label_largest_component(k) k.set_vertex_filter(lcc) lcc_nodes = np.where(lcc.a)[0] rw.read_original_graph('soc-wiki.txt') cexp.redensify.G = rw.G cexp.redensify.N = len(rw.G) cexp.redensify.EDGES_SIGN = rw.EDGE_SIGN return k, lcc_nodes, dst_mat
def choose_giant_component(edges, nodes): g = Graph(directed=False) g.add_vertex(max(nodes) + 1) for edge in edges: u, w = map(int, edge.split()) g.add_edge(g.vertex(u), g.vertex(w)) labels = topology.label_largest_component(g) new_nodes = set() new_edges = set() for e in edges: u, w = map(int, e.split()) if labels[g.vertex(u)] and labels[g.vertex(w)]: new_nodes.add(u) new_nodes.add(w) new_edges.add(e) return new_edges, new_nodes
def subgraphs(self, g): if g.num_vertices() == 0: raise StopIteration prop = label_largest_component(g, False) filt = g.new_vertex_property('boolean') for v in g.vertices(): if prop[v] > 0: filt[v] = True yield gt.GraphView(g, filt) filt = g.new_vertex_property('boolean') for v in g.vertices(): if prop[v] <= 0: filt[v] = True gv = gt.GraphView(g, filt) for sgv in self.subgraphs(gv): yield sgv
def f_pseudo_diameter( D, stats, options={ 'features': [] } ): """""" LC = label_largest_component(D) LCD = GraphView( D, vfilt=LC ) if 'diameter' in options['features']: if LCD.num_vertices() == 0 or LCD.num_vertices() == 1: # if largest component does practically not exist, use the whole graph dist, ends = pseudo_diameter(D) else: dist, ends = pseudo_diameter(LCD) stats['pseudo_diameter']=dist # D may be used in both cases stats['pseudo_diameter_src_vertex']=D.vertex_properties['name'][ends[0]] stats['pseudo_diameter_trg_vertex']=D.vertex_properties['name'][ends[1]] log.debug( 'done pseudo_diameter' )
def makeISOPoisson(numberOfNodes, averDegree, gcc, save=False, saveAt=None): def degreeSample(): return np.random.poisson(averDegree) g = generation.random_graph(numberOfNodes, degreeSample, directed=False) if gcc: l = topology.label_largest_component( g) #Keep Largest Connected Component g.set_vertex_filter(l) g.purge_vertices() Ag = spectral.adjacency(g).todense() gp, forwardMap = graph_analysis.IO.createIsoGraph(Ag) if save: assert (saveAt != None) graph_analysis.IO.save_data(saveAt, g, gp, forwardMap) return g, gp, forwardMap
def make_simple_graph(g, undirected=True, gcc=True): ''' Returns input graph -g- in a version without parallel edges or self-loops. If undirected = True, returned graph is also undirected. If gcc = True, returned graph is giant connected component of g. ''' if undirected and g.is_directed: g.set_directed(False) gtStats.remove_self_loops(g) gtStats.remove_parallel_edges(g) if gcc: l = topology.label_largest_component( g) # Keep Largest Connected Component. print "Nodes in largest connected component: " + str(np.sum(l.a)) g.set_vertex_filter(l) g.purge_vertices() return g
def loadGraphWithAnnotations(graphFile): ''' Used to read the graphs provides by RoleSim people, regarding scientific collaborations and the g/h index. ''' g = Graph(directed=False) with open(graphFile, "r") as inF: num_nodes = int(inF.readline().split()[1]) g.add_vertex(num_nodes) g_names = g.new_vertex_property("string") g_H_ind = g.new_vertex_property("int") g_G_ind = g.new_vertex_property("int") for i, line in enumerate(inF): # Read Meta-Data of Nodes if rstrip(line) == "*Edges": break contents = rstrip(line).split("\t") gID, name, gIndex, hIndex = contents[0], contents[1], int( contents[2]), int(contents[3]) assert (gID == str(i)) # print gID, name, gIndex, hIndex g_names[g.vertex(i)] = name g_H_ind.a[i] = gIndex g_G_ind.a[i] = hIndex for i, line in enumerate(inF): # Read Edges tokens = line.split() fromE, toE = int(tokens[0]), int(tokens[1]) g.add_edge(fromE, toE) g.vp["names"] = g_names g.vp["h-Index"] = g_H_ind g.vp["g-Index"] = g_G_ind gtStats.remove_parallel_edges(g) gtStats.remove_self_loops(g) l = topology.label_largest_component(g) #Keep Largest Connected Component g.set_vertex_filter(l) g.purge_vertices() return g
def user_network_summary(g): span = "{:D MMM YYYY, HH:mm} - {:D MMM YYYY, HH:mm}".format( arrow.get(g.edge_properties["created_at"].a.min()), arrow.get(g.edge_properties["created_at"].a.max()) ) largest_component = label_largest_component(g, directed=False).a.sum() display(Markdown("### " + g.graph_properties["track"].replace("#", r"\#"))) display(Markdown("#### " + span)) graph_draw(g, inline=True, output_size=[1000, 1000], vertex_fill_color=[.2, .3, .9, .7], vertex_size=2) stats = pd.DataFrame([ ["Vertices", g.num_vertices()], ["Edges", g.num_edges()], ["Avg. degree", float(g.num_edges()) / g.num_vertices()], ["Avg. clustering", vertex_average(g, local_clustering(g))[0]], ["Giant component share", "{:.1%}".format(largest_component / g.num_vertices())] ], columns=["Metric", "Value"]) display(stats) bins = 20 counts, _ = vertex_hist(g, "in", range(bins)) plt.bar(range(1, bins), counts, align="center") plt.xticks(range(bins)) plt.xlim([0.5, bins - 1]) plt.title("Degree distribution") plt.show()
prefix = 'epi' size = 131580 n = size idx = int(sys.argv[1]) def print_diag(msg): global start, idx info = '{}{:.2f} seconds\n'.format with open('{}_out.{}'.format(prefix, idx), 'a') as f: f.write(info(msg.ljust(60), clock() - start)) start = clock() k = gt.load_graph(graph_file) dst_mat = np.load(ds_file) lcc = label_largest_component(k) k.set_vertex_filter(lcc) lcc_nodes = np.where(lcc.a)[0] slcc = set(lcc_nodes) all_lcc_edges = {(int(u), int(v)) for u, v in k.edges() if int(u) in slcc} rw.read_original_graph(orig_file) high_degree = [_[0] for _ in rw.DEGREES[-200:][::-1]] for e, s in rw.EDGE_SIGN.items(): rw.EDGE_SIGN[e] = 1 if s else -1 print_diag('load graph') root = high_degree[idx] bfs_tree = set(pot.get_bfs_tree(rw.G, root)) test_edges = all_lcc_edges - bfs_tree test_graph = {} for u, v in test_edges: pot.add_edge_to_tree(test_graph, u, v)
def calculate_largest_strongly_connected_comp(g): l = topology.label_largest_component(g, directed=True) return GraphView(g, vfilt=l, directed=True)
def makeGraphFast(self,img,dia,xScale,yScale): print('Building Graph Data Structure'), start=time.time() G = Graph(directed=False) sumAddVertices=0 vprop=G.new_vertex_property('object') eprop=G.new_edge_property('object') epropW=G.new_edge_property("float") h, w = np.shape(img) if xScale>0 and yScale>0: avgScale=(xScale+yScale)/2 else: avgScale=1. xScale=1. yScale=1. addedVerticesLine2=[] vListLine2=[] percentOld=0 counter=0 ''' Sweep over each line in the image except the last line ''' for idx,i in enumerate(img[:len(img)-2]): ''' Get foreground indices in the current line of the image and make vertices ''' counter+=1 percent=(float(counter)/float(h))*100 if percentOld+10< percent: print (str(np.round(percent,1))+'% '), percentOld=percent line1=np.where(i==True) if len(line1[0])>0: line1=set(line1[0]).difference(set(addedVerticesLine2)) vL=G.add_vertex(len(list(line1))) if len(line1)>1 : vList=vListLine2+list(vL) else: vList=vListLine2+[vL] line1=addedVerticesLine2+list(line1) for jdx,j in enumerate(line1): vprop[vList[jdx]]={'imgIdx':(j,idx),'coord': (float(j)*xScale,float(idx)*yScale), 'nrOfPaths':0, 'diameter':float(dia[idx][j])*avgScale} ''' keep order of the inserted vertices ''' sumAddVertices+=len(line1) addedVerticesLine2=[] vListLine2=[] ''' Connect foreground indices to neighbours in the next line ''' for v1 in line1: va=vList[line1.index(v1)] diagonalLeft = diagonalRight = True try: if img[idx][v1-1]==True: diagonalLeft=False vb=vList[line1.index(v1-1)] e=G.add_edge(va,vb) eprop[e]={'coord1':vprop[va]['coord'], 'coord2':vprop[vb]['coord'],'weight':((vprop[va]['diameter']+vprop[vb]['diameter'])/2),'RTP':False} epropW[e]=2./(eprop[e]['weight']**2) except: print 'Boundary vertex at: '+str([v1,idx-1])+' image size: '+ str([w,h]) pass try: if img[idx][v1+1]==True: diagonalRight=False vb=vList[line1.index(v1+1)] e=G.add_edge(va,vb) eprop[e]={'coord1':vprop[va]['coord'], 'coord2':vprop[vb]['coord'],'weight':((vprop[va]['diameter']+vprop[vb]['diameter'])/2),'RTP':False} epropW[e]=2./(eprop[e]['weight']**2) except: print 'Boundary vertex at: '+str([v1+1,idx])+' image size: '+ str([w,h]) pass # just if we are out of bounds try: if img[idx+1][v1]==True: diagonalRight=False diagonalLeft=False vNew=G.add_vertex() vprop[vNew]={'imgIdx':(v1,idx+1),'coord': (float(v1)*xScale,float(idx+1)*yScale), 'nrOfPaths':0, 'diameter':float(dia[idx+1][v1])*avgScale} vListLine2.append(vNew) e=G.add_edge(vList[line1.index(v1)],vNew) eprop[e]={'coord1':vprop[va]['coord'], 'coord2':vprop[vNew]['coord'],'weight':((vprop[va]['diameter']+vprop[vNew]['diameter'])/2),'RTP':False} epropW[e]=1./(eprop[e]['weight']**2) if v1 not in addedVerticesLine2: addedVerticesLine2.append(v1) except: print 'Boundary vertex at: '+str([v1,idx+1])+' image size: '+ str([w,h]) pass try: if diagonalRight == True and img[idx+1][v1+1]==True: vNew=G.add_vertex() vprop[vNew]={'imgIdx':(v1+1,idx+1),'coord': (float(v1+1)*xScale,float(idx+1)*yScale), 'nrOfPaths':0, 'diameter':float(dia[idx+1][v1+1])*avgScale} vListLine2.append(vNew) e=G.add_edge(vList[line1.index(v1)],vNew) eprop[e]={'coord1':vprop[va]['coord'], 'coord2':vprop[vNew]['coord'],'weight':((vprop[va]['diameter']+vprop[vNew]['diameter'])/2),'RTP':False} epropW[e]=1.41/(eprop[e]['weight']**2) if v1+1 not in addedVerticesLine2: addedVerticesLine2.append(v1+1) except: print 'Boundary vertex at: '+str([v1+1,idx+1])+' image size: '+ str([w,h]) pass try: if diagonalLeft == True and img[idx+1][v1-1]==True: vNew=G.add_vertex() vprop[vNew]={'imgIdx':(v1-1,idx+1),'coord': (float(v1-1)*xScale,float(idx+1)*yScale), 'nrOfPaths':0, 'diameter':float(dia[idx+1][v1-1])*avgScale} vListLine2.append(vNew) e=G.add_edge(vList[line1.index(v1)],vNew) eprop[e]={'coord1':vprop[va]['coord'], 'coord2':vprop[vNew]['coord'],'weight':((vprop[va]['diameter']+vprop[vNew]['diameter'])/2),'RTP':False} epropW[e]=1.41/(eprop[e]['weight']**2) if v1-1 not in addedVerticesLine2: addedVerticesLine2.append(v1-1) except: print 'Boundary vertex at: '+str([v1-1,idx+1])+' image size: '+ str([w,h]) pass try: if img[idx][v1+1]==False and img[idx][v1-1]==False and img[idx+1][v1]==False and diagonalLeft==False and diagonalRight==False: print 'tip detected' if img[idx-1][v1-1]==False and img[idx-1][v1+1]==False and img[idx-1][v1]==False: print 'floating pixel' except: pass print'done!' G.edge_properties["ep"] = eprop G.edge_properties["w"] = epropW G.vertex_properties["vp"] = vprop print 'graph build in '+str(time.time()-start) l = gt.label_largest_component(G) u = gt.GraphView(G, vfilt=l) print '# vertices' print(u.num_vertices()) print(G.num_vertices()) if u.num_vertices()!=G.num_vertices(): self.__fail=float((G.num_vertices()-u.num_vertices()))/float(G.num_vertices()) return u,u.num_vertices()
def makeGraph(self,img,dia,xScale,yScale): print 'Building Graph Data Structure' start=time.time() G = Graph(directed=False) vprop=G.new_vertex_property('object') eprop=G.new_edge_property('object') epropW=G.new_edge_property("int32_t") avgScale=(xScale+yScale)/2 test=np.where(img==True) ss = np.shape(test) cccc=0 percentOld=0.0 print str(np.round(percentOld,1))+'%' for (i,j) in zip(test[1],test[0]): cccc+=1 percent=(float(cccc)/float(ss[1]))*100 if percentOld+10< percent: print str(np.round(percent,1))+'%' percentOld=percent nodeNumber1 = (float(i)*yScale,float(j)*xScale) if gu.find_vertex(G, vprop, {'imgIdx':(j,i),'coord':nodeNumber1, 'nrOfPaths':0, 'diameter':float(dia[j][i])*avgScale}): v1=gu.find_vertex(G, vprop, {'imgIdx':(j,i),'coord':nodeNumber1, 'nrOfPaths':0, 'diameter':float(dia[j][i])*avgScale})[0] else: v1=G.add_vertex() vprop[G.vertex(v1)]={'imgIdx':(j,i),'coord':nodeNumber1, 'nrOfPaths':0, 'diameter':float(dia[j][i])*avgScale} try: if img[j,i+1] == True: nodeNumber2 = (float(i+1)*yScale,float(j)*xScale) if gu.find_vertex(G, vprop, {'imgIdx':(j,i+1),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j][i+1])*avgScale}): v2=gu.find_vertex(G, vprop, {'imgIdx':(j,i+1),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j][i+1])*avgScale})[0] if gu.find_edge(G, eprop, {'coord1':vprop[v2]['coord'], 'coord2':vprop[v1]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False}): pass else: e = G.add_edge(v1, v2) epropW[e]=(((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)/avgScale)**4 eprop[e]={'coord1':vprop[v1]['coord'], 'coord2':vprop[v2]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False} else: v2=G.add_vertex() vprop[G.vertex(v2)]={'imgIdx':(j,i+1),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j][i+1])*avgScale} e = G.add_edge(v1, v2) epropW[e]=(((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)/avgScale)**4 eprop[e]={'coord1':vprop[v1]['coord'], 'coord2':vprop[v2]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False} except: pass try: if img[j,i-1] == True: nodeNumber2 = (float(i-1)*yScale,float(j)*xScale) if gu.find_vertex(G, vprop, {'imgIdx':(j,i-1),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j][i-1])*avgScale}): v2=gu.find_vertex(G, vprop, {'imgIdx':(j,i-1),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j][i-1])*avgScale})[0] if gu.find_edge(G, eprop, {'coord1':vprop[v2]['coord'], 'coord2':vprop[v1]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False}): pass else: e = G.add_edge(v1, v2) epropW[e]=(((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)/avgScale)**4 eprop[e]={'coord1':vprop[v1]['coord'], 'coord2':vprop[v2]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False} else: v2=G.add_vertex() vprop[G.vertex(v2)]={'imgIdx':(j,i-1),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j][i-1])*avgScale} e = G.add_edge(v1, v2) epropW[e]=(((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)/avgScale)**4 eprop[e]={'coord1':vprop[v1]['coord'], 'coord2':vprop[v2]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False} except:pass try: if img[j + 1,i] == True: nodeNumber2 = (float(i)*yScale,float(j+1)*xScale) if gu.find_vertex(G, vprop, {'imgIdx':(j+1,i),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j+1][i])*avgScale}): v2=gu.find_vertex(G, vprop, {'imgIdx':(j+1,i),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j+1][i])*avgScale})[0] if gu.find_edge(G, eprop, {'coord1':vprop[v2]['coord'], 'coord2':vprop[v1]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False}): pass else: e = G.add_edge(v1, v2) epropW[e]=(((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)/avgScale)**4 eprop[e]={'coord1':vprop[v1]['coord'], 'coord2':vprop[v2]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False} else: v2=G.add_vertex() vprop[G.vertex(v2)]={'imgIdx':(j+1,i),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j+1][i])*avgScale} e = G.add_edge(v1, v2) epropW[e]=(((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)/avgScale)**4 eprop[e]={'coord1':vprop[v1]['coord'], 'coord2':vprop[v2]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False} except:pass try: if img[j - 1,i] == True: nodeNumber2 = (float(i)*yScale,float(j-1)*xScale) if gu.find_vertex(G, vprop, {'imgIdx':(j-1,i),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j-1][i])*avgScale}): v2=gu.find_vertex(G, vprop, {'imgIdx':(j-1,i),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j-1][i])*avgScale})[0] if gu.find_edge(G, eprop, {'coord1':vprop[v2]['coord'], 'coord2':vprop[v1]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False}): pass else: e = G.add_edge(v1, v2) epropW[e]=(((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)/avgScale)**4 eprop[e]={'coord1':vprop[v1]['coord'], 'coord2':vprop[v2]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False} else: v2=G.add_vertex() vprop[G.vertex(v2)]={'imgIdx':(j-1,i),'coord':nodeNumber2, 'nrOfPaths':0, 'diameter':float(dia[j-1][i])*avgScale} e = G.add_edge(v1, v2) epropW[e]=(((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)/avgScale)**4 eprop[e]={'coord1':vprop[v1]['coord'], 'coord2':vprop[v2]['coord'],'weight':((vprop[v1]['diameter']+vprop[v2]['diameter'])/2)**4,'RTP':False} except: pass # print '100.0%' print 'selecting largest connected component' G.edge_properties["ep"] = eprop G.edge_properties["w"] = epropW G.vertex_properties["vp"] = vprop l = gt.label_largest_component(G) print(l.a) u = gt.GraphView(G, vfilt=l) print '# vertices' print(u.num_vertices()) print(G.num_vertices()) print '# edges' print(u.num_edges()) print 'building graph finished in: '+str(time.time()-start)+'s' return u
print "Interactions:\n%d Noun-Noun Edges \n %d Noun-Verb Edges \n %d Verb-Verb Edges" % ( nounNoun, nounVerb, verbVerb) print "Total Edge Weight = " + str(totalW) novel = "Zarathustra" # novel = "Sawyer" lang = "English" inFile = "../Data/" + novel + "_" + lang + ".txt" # coOcDic, posMap = partOfSpeech(novel, inFile, lang) # IO.saveData(novel+"_"+lang+"_nounVSverb.data" , coOcDic, posMap) coOcDic, posMap = graph_analysis.IO.load_data(novel + "_" + lang + "_nounVSverb.data") g = coOcDicToGraph(coOcDic, posMap) l = topology.label_largest_component(g) #Keep Largest Connected Component g.set_vertex_filter(l) g.purge_vertices() printNounVerbGraphStats(g) nodesColored = [] nodesLabels = [] palet = palets("posToColor") allTypes = set() for v in g.vertices(): nodesColored.append(palet[g.vp['partOs'][v][0]]) allTypes.add(g.vp['partOs'][v]) #Make binary label for SVM for v in g.vertices():
def _get_lcc_size(G): '''Return the size of the largest connected component (LCC) within G.''' return label_largest_component(G).a.sum()
def makeGraph(self, img, dia, xScale, yScale): print 'Building Graph Data Structure' start = time.time() G = Graph(directed=False) vprop = G.new_vertex_property('object') eprop = G.new_edge_property('object') epropW = G.new_edge_property("int32_t") avgScale = (xScale + yScale) / 2 test = np.where(img == True) ss = np.shape(test) cccc = 0 percentOld = 0.0 print str(np.round(percentOld, 1)) + '%' for (i, j) in zip(test[1], test[0]): cccc += 1 percent = (float(cccc) / float(ss[1])) * 100 if percentOld + 10 < percent: print str(np.round(percent, 1)) + '%' percentOld = percent nodeNumber1 = (float(i) * yScale, float(j) * xScale) if gu.find_vertex( G, vprop, { 'imgIdx': (j, i), 'coord': nodeNumber1, 'nrOfPaths': 0, 'diameter': float(dia[j][i]) * avgScale }): v1 = gu.find_vertex( G, vprop, { 'imgIdx': (j, i), 'coord': nodeNumber1, 'nrOfPaths': 0, 'diameter': float(dia[j][i]) * avgScale })[0] else: v1 = G.add_vertex() vprop[G.vertex(v1)] = { 'imgIdx': (j, i), 'coord': nodeNumber1, 'nrOfPaths': 0, 'diameter': float(dia[j][i]) * avgScale } try: if img[j, i + 1] == True: nodeNumber2 = (float(i + 1) * yScale, float(j) * xScale) if gu.find_vertex( G, vprop, { 'imgIdx': (j, i + 1), 'coord': nodeNumber2, 'nrOfPaths': 0, 'diameter': float(dia[j][i + 1]) * avgScale }): v2 = gu.find_vertex( G, vprop, { 'imgIdx': (j, i + 1), 'coord': nodeNumber2, 'nrOfPaths': 0, 'diameter': float(dia[j][i + 1]) * avgScale })[0] if gu.find_edge( G, eprop, { 'coord1': vprop[v2]['coord'], 'coord2': vprop[v1]['coord'], 'weight': ((vprop[v1]['diameter'] + vprop[v2]['diameter']) / 2)**4, 'RTP': False }): pass else: e = G.add_edge(v1, v2) epropW[e] = (((vprop[v1]['diameter'] + vprop[v2]['diameter']) / 2) / avgScale)**4 eprop[e] = { 'coord1': vprop[v1]['coord'], 'coord2': vprop[v2]['coord'], 'weight': ((vprop[v1]['diameter'] + vprop[v2]['diameter']) / 2)**4, 'RTP': False } else: v2 = G.add_vertex() vprop[G.vertex(v2)] = { 'imgIdx': (j, i + 1), 'coord': nodeNumber2, 'nrOfPaths': 0, 'diameter': float(dia[j][i + 1]) * avgScale } e = G.add_edge(v1, v2) epropW[e] = ( ((vprop[v1]['diameter'] + vprop[v2]['diameter']) / 2) / avgScale)**4 eprop[e] = { 'coord1': vprop[v1]['coord'], 'coord2': vprop[v2]['coord'], 'weight': ((vprop[v1]['diameter'] + vprop[v2]['diameter']) / 2)**4, 'RTP': False } except: pass try: if img[j, i - 1] == True: nodeNumber2 = (float(i - 1) * yScale, float(j) * xScale) if gu.find_vertex( G, vprop, { 'imgIdx': (j, i - 1), 'coord': nodeNumber2, 'nrOfPaths': 0, 'diameter': float(dia[j][i - 1]) * avgScale }): v2 = gu.find_vertex( G, vprop, { 'imgIdx': (j, i - 1), 'coord': nodeNumber2, 'nrOfPaths': 0, 'diameter': float(dia[j][i - 1]) * avgScale })[0] if gu.find_edge( G, eprop, { 'coord1': vprop[v2]['coord'], 'coord2': vprop[v1]['coord'], 'weight': ((vprop[v1]['diameter'] + vprop[v2]['diameter']) / 2)**4, 'RTP': False }): pass else: e = G.add_edge(v1, v2) epropW[e] = (((vprop[v1]['diameter'] + vprop[v2]['diameter']) / 2) / avgScale)**4 eprop[e] = { 'coord1': vprop[v1]['coord'], 'coord2': vprop[v2]['coord'], 'weight': ((vprop[v1]['diameter'] + vprop[v2]['diameter']) / 2)**4, 'RTP': False } else: v2 = G.add_vertex() vprop[G.vertex(v2)] = { 'imgIdx': (j, i - 1), 'coord': nodeNumber2, 'nrOfPaths': 0, 'diameter': float(dia[j][i - 1]) * avgScale } e = G.add_edge(v1, v2) epropW[e] = ( ((vprop[v1]['diameter'] + vprop[v2]['diameter']) / 2) / avgScale)**4 eprop[e] = { 'coord1': vprop[v1]['coord'], 'coord2': vprop[v2]['coord'], 'weight': ((vprop[v1]['diameter'] + vprop[v2]['diameter']) / 2)**4, 'RTP': False } except: pass try: if img[j + 1, i] == True: nodeNumber2 = (float(i) * yScale, float(j + 1) * xScale) if gu.find_vertex( G, vprop, { 'imgIdx': (j + 1, i), 'coord': nodeNumber2, 'nrOfPaths': 0, 'diameter': float(dia[j + 1][i]) * avgScale }): v2 = gu.find_vertex( G, vprop, { 'imgIdx': (j + 1, i), 'coord': nodeNumber2, 'nrOfPaths': 0, 'diameter': float(dia[j + 1][i]) * avgScale })[0] if gu.find_edge( G, eprop, { 'coord1': vprop[v2]['coord'], 'coord2': vprop[v1]['coord'], 'weight': ((vprop[v1]['diameter'] + vprop[v2]['diameter']) / 2)**4, 'RTP': False }): pass else: e = G.add_edge(v1, v2) epropW[e] = (((vprop[v1]['diameter'] + vprop[v2]['diameter']) / 2) / avgScale)**4 eprop[e] = { 'coord1': vprop[v1]['coord'], 'coord2': vprop[v2]['coord'], 'weight': ((vprop[v1]['diameter'] + vprop[v2]['diameter']) / 2)**4, 'RTP': False } else: v2 = G.add_vertex() vprop[G.vertex(v2)] = { 'imgIdx': (j + 1, i), 'coord': nodeNumber2, 'nrOfPaths': 0, 'diameter': float(dia[j + 1][i]) * avgScale } e = G.add_edge(v1, v2) epropW[e] = ( ((vprop[v1]['diameter'] + vprop[v2]['diameter']) / 2) / avgScale)**4 eprop[e] = { 'coord1': vprop[v1]['coord'], 'coord2': vprop[v2]['coord'], 'weight': ((vprop[v1]['diameter'] + vprop[v2]['diameter']) / 2)**4, 'RTP': False } except: pass try: if img[j - 1, i] == True: nodeNumber2 = (float(i) * yScale, float(j - 1) * xScale) if gu.find_vertex( G, vprop, { 'imgIdx': (j - 1, i), 'coord': nodeNumber2, 'nrOfPaths': 0, 'diameter': float(dia[j - 1][i]) * avgScale }): v2 = gu.find_vertex( G, vprop, { 'imgIdx': (j - 1, i), 'coord': nodeNumber2, 'nrOfPaths': 0, 'diameter': float(dia[j - 1][i]) * avgScale })[0] if gu.find_edge( G, eprop, { 'coord1': vprop[v2]['coord'], 'coord2': vprop[v1]['coord'], 'weight': ((vprop[v1]['diameter'] + vprop[v2]['diameter']) / 2)**4, 'RTP': False }): pass else: e = G.add_edge(v1, v2) epropW[e] = (((vprop[v1]['diameter'] + vprop[v2]['diameter']) / 2) / avgScale)**4 eprop[e] = { 'coord1': vprop[v1]['coord'], 'coord2': vprop[v2]['coord'], 'weight': ((vprop[v1]['diameter'] + vprop[v2]['diameter']) / 2)**4, 'RTP': False } else: v2 = G.add_vertex() vprop[G.vertex(v2)] = { 'imgIdx': (j - 1, i), 'coord': nodeNumber2, 'nrOfPaths': 0, 'diameter': float(dia[j - 1][i]) * avgScale } e = G.add_edge(v1, v2) epropW[e] = ( ((vprop[v1]['diameter'] + vprop[v2]['diameter']) / 2) / avgScale)**4 eprop[e] = { 'coord1': vprop[v1]['coord'], 'coord2': vprop[v2]['coord'], 'weight': ((vprop[v1]['diameter'] + vprop[v2]['diameter']) / 2)**4, 'RTP': False } except: pass # print '100.0%' print 'selecting largest connected component' G.edge_properties["ep"] = eprop G.edge_properties["w"] = epropW G.vertex_properties["vp"] = vprop l = gt.label_largest_component(G) print(l.a) u = gt.GraphView(G, vfilt=l) print '# vertices' print(u.num_vertices()) print(G.num_vertices()) print '# edges' print(u.num_edges()) print 'building graph finished in: ' + str(time.time() - start) + 's' return u
print('cascade size: ', len(np.nonzero(infection_times > 0)[0])) q = args.report_proba k = args.repeat_times method = args.method output_dir = args.output_dir result_dir = os.path.join(output_dir, method, "{}".format(q)) if not os.path.exists(result_dir): os.makedirs(result_dir) if not args.evaluate: print('run experiment...', 'q=', q, ', method=', method, 'cascade: ', args.cascade_id, 'cascade size: ', cascade_size(infection_times)) print(g) print(sum(label_largest_component(g).a)) run_k_runs(g, q, infection_times, method, k, result_dir, verbose=args.verbose) else: print('evaluate...') path, df = evaluate_from_result_dir(result_dir, infection_times=infection_times, k=k) print('writing to {}'.format(path)) if args.small_cascade:
def makeGraphFast(self, img, dia, xScale, yScale): print('Building Graph Data Structure'), start = time.time() G = Graph(directed=False) sumAddVertices = 0 vprop = G.new_vertex_property('object') eprop = G.new_edge_property('object') epropW = G.new_edge_property("float") h, w = np.shape(img) avgScale = (xScale + yScale) / 2 addedVerticesLine2 = [] vListLine2 = [] percentOld = 0 counter = 0 ''' Sweep over each line in the image except the last line ''' for idx, i in enumerate(img[:len(img) - 2]): ''' Get foreground indices in the current line of the image and make vertices ''' counter += 1 percent = (float(counter) / float(h)) * 100 if percentOld + 10 < percent: print(str(np.round(percent, 1)) + '% '), percentOld = percent line1 = np.where(i == True) if len(line1[0]) > 0: line1 = set(line1[0]).difference(set(addedVerticesLine2)) vL = G.add_vertex(len(list(line1))) if len(line1) > 1: vList = vListLine2 + list(vL) else: vList = vListLine2 + [vL] line1 = addedVerticesLine2 + list(line1) for jdx, j in enumerate(line1): vprop[vList[jdx]] = { 'imgIdx': (j, idx), 'coord': (float(j) * xScale, float(idx) * yScale), 'nrOfPaths': 0, 'diameter': float(dia[idx][j]) * avgScale } ''' keep order of the inserted vertices ''' sumAddVertices += len(line1) addedVerticesLine2 = [] vListLine2 = [] ''' Connect foreground indices to neighbours in the next line ''' for v1 in line1: va = vList[line1.index(v1)] diagonalLeft = diagonalRight = True try: if img[idx][v1 - 1] == True: diagonalLeft = False vb = vList[line1.index(v1 - 1)] e = G.add_edge(va, vb) eprop[e] = { 'coord1': vprop[va]['coord'], 'coord2': vprop[vb]['coord'], 'weight': ((vprop[va]['diameter'] + vprop[vb]['diameter']) / 2), 'RTP': False } epropW[e] = 2. / (eprop[e]['weight']**2) except: print 'Boundary vertex at: ' + str( [v1, idx - 1]) + ' image size: ' + str([w, h]) pass try: if img[idx][v1 + 1] == True: diagonalRight = False vb = vList[line1.index(v1 + 1)] e = G.add_edge(va, vb) eprop[e] = { 'coord1': vprop[va]['coord'], 'coord2': vprop[vb]['coord'], 'weight': ((vprop[va]['diameter'] + vprop[vb]['diameter']) / 2), 'RTP': False } epropW[e] = 2. / (eprop[e]['weight']**2) except: print 'Boundary vertex at: ' + str( [v1 + 1, idx]) + ' image size: ' + str([w, h]) pass # just if we are out of bounds try: if img[idx + 1][v1] == True: diagonalRight = False diagonalLeft = False vNew = G.add_vertex() vprop[vNew] = { 'imgIdx': (v1, idx + 1), 'coord': (float(v1) * xScale, float(idx + 1) * yScale), 'nrOfPaths': 0, 'diameter': float(dia[idx + 1][v1]) * avgScale } vListLine2.append(vNew) e = G.add_edge(vList[line1.index(v1)], vNew) eprop[e] = { 'coord1': vprop[va]['coord'], 'coord2': vprop[vNew]['coord'], 'weight': ((vprop[va]['diameter'] + vprop[vNew]['diameter']) / 2), 'RTP': False } epropW[e] = 1. / (eprop[e]['weight']**2) if v1 not in addedVerticesLine2: addedVerticesLine2.append(v1) except: print 'Boundary vertex at: ' + str( [v1, idx + 1]) + ' image size: ' + str([w, h]) pass try: if diagonalRight == True and img[idx + 1][v1 + 1] == True: vNew = G.add_vertex() vprop[vNew] = { 'imgIdx': (v1 + 1, idx + 1), 'coord': (float(v1 + 1) * xScale, float(idx + 1) * yScale), 'nrOfPaths': 0, 'diameter': float(dia[idx + 1][v1 + 1]) * avgScale } vListLine2.append(vNew) e = G.add_edge(vList[line1.index(v1)], vNew) eprop[e] = { 'coord1': vprop[va]['coord'], 'coord2': vprop[vNew]['coord'], 'weight': ((vprop[va]['diameter'] + vprop[vNew]['diameter']) / 2), 'RTP': False } epropW[e] = 1.41 / (eprop[e]['weight']**2) if v1 + 1 not in addedVerticesLine2: addedVerticesLine2.append(v1 + 1) except: print 'Boundary vertex at: ' + str( [v1 + 1, idx + 1]) + ' image size: ' + str([w, h]) pass try: if diagonalLeft == True and img[idx + 1][v1 - 1] == True: vNew = G.add_vertex() vprop[vNew] = { 'imgIdx': (v1 - 1, idx + 1), 'coord': (float(v1 - 1) * xScale, float(idx + 1) * yScale), 'nrOfPaths': 0, 'diameter': float(dia[idx + 1][v1 - 1]) * avgScale } vListLine2.append(vNew) e = G.add_edge(vList[line1.index(v1)], vNew) eprop[e] = { 'coord1': vprop[va]['coord'], 'coord2': vprop[vNew]['coord'], 'weight': ((vprop[va]['diameter'] + vprop[vNew]['diameter']) / 2), 'RTP': False } epropW[e] = 1.41 / (eprop[e]['weight']**2) if v1 - 1 not in addedVerticesLine2: addedVerticesLine2.append(v1 - 1) except: print 'Boundary vertex at: ' + str( [v1 - 1, idx + 1]) + ' image size: ' + str([w, h]) pass try: if img[idx][v1 + 1] == False and img[idx][ v1 - 1] == False and img[idx + 1][ v1] == False and diagonalLeft == False and diagonalRight == False: print 'tip detected' if img[idx - 1][v1 - 1] == False and img[idx - 1][ v1 + 1] == False and img[idx - 1][v1] == False: print 'floating pixel' except: pass print 'done!' G.edge_properties["ep"] = eprop G.edge_properties["w"] = epropW G.vertex_properties["vp"] = vprop print 'graph build in ' + str(time.time() - start) l = gt.label_largest_component(G) u = gt.GraphView(G, vfilt=l) print '# vertices' print(u.num_vertices()) print(G.num_vertices()) if u.num_vertices() != G.num_vertices(): self.__fail = float((G.num_vertices() - u.num_vertices())) / float( G.num_vertices()) return u, u.num_vertices()
def graph_lcc(graph): map = label_largest_component(graph, True) lcc = graph_tool.GraphView(graph, vfilt=map) return lcc
row_idx = [] col_idx = [] for q, us in q2us.items(): row_idx += [q2id_map[q]]*len(us) col_idx += [u2id_map[u] for u in us] assert len(data) == len(row_idx) == len(col_idx) m = sp.csr_matrix((data, (row_idx, col_idx)), shape=(len(q2id_map), len(u2id_map))) qm = m * m.T # question adj matrix via unipartite projection g = Graph() edges = zip(*qm.nonzero()) g.add_edge_list(edges) vfilt = label_largest_component(g) f = np.sum(vfilt.a) / len(vfilt.a) print('fraciton of nodes in largest cc: {}'.format(f)) prop_question_id = g.new_vertex_property('int') prop_question_id.a = np.array(list(id2q_map.values())) # focus on largest CC g.set_vertex_filter(vfilt) # re-index the graph # SO qustion: https://stackoverflow.com/questions/46264296/graph-tool-re-index-vertex-ids-to-be-consecutive-integers n2i = {n: i for i, n in enumerate(g.vertices())} i2n = dict(zip(n2i.values(), n2i.keys()))
def calculate_largest_weakly_connected_comp(g): w = topology.label_largest_component(g, directed=False) return GraphView(g, vfilt=w, directed=False)
def erdos_circles_cliques(bbSize, nCircles, circleSize, nCliques, cliqueSize, save=True): ''' Makes a random (Erdos-Renyi) graph connected with circles and cliques. Input: -bbSize[0]- how many nodes the random Graph will have -bbSize[1]- how many edges the random Graph will have -nCliques- how many cliques to add of size -cliqueSize- -nCircles- how many circles to add of size -circleSize- ''' backbone = erdos_renyi_graph(bbSize[0], bbSize[1], directed=False, gcc=True) print "The random backbone graph has %d vertices and %d edges." % ( backbone.num_vertices(), backbone.num_edges()) assert (np.sum(topology.label_largest_component(backbone).a) == backbone.num_vertices()) if backbone.num_vertices() < nCircles + nCliques: warnings.warn( "The erdos part of the graph is too small to add the requested circles/cliques." ) backbone.vp["bb"] = backbone.new_vertex_property( "short") # Mark all nodes belonging in the random backbone. backbone.vp["bb"].a = np.ones(backbone.num_vertices()) gCircle = add_in_graph(None, nCircles, generation.circular_graph, circleSize) gCircle.vp["circ"] = gCircle.new_vertex_property( "short") # Mark all nodes belonging in the Circles. gCircle.vp["circ"].a = np.ones(gCircle.num_vertices()) gCliq = add_in_graph(None, nCliques, generation.complete_graph, cliqueSize) gCliq.vp["cliq"] = gCliq.new_vertex_property( "short") # Mark all nodes belonging in the Cliques. gCliq.vp["cliq"].a = np.ones(gCliq.num_vertices()) concat1 = generation.graph_union(backbone, gCliq, internal_props=True) gFinal = generation.graph_union(concat1, gCircle, internal_props=True) assert (sum(gFinal.vp['cliq'].a == 1) == gCliq.num_vertices() and \ sum(gFinal.vp['circ'].a == 1) == gCircle.num_vertices()) comp, hist = topology.label_components( gFinal) # Mark to which CC every node is. numOfCC = max(comp.a) assert (numOfCC == nCircles + nCliques) bbNodes = np.where(gFinal.vp["bb"].a == 1)[0] np.random.shuffle(bbNodes) k = 0 gFinal.vp["attachments"] = gFinal.new_vertex_property( "short" ) # Bookkeeping which nodes of the backbone where used to connect with the circles/cliques. for cc in range(1, numOfCC + 1): atNode = np.where(comp.a == cc)[0][ 0] # Since all nodes of the added graphs are equivalent we can pick the 1st to make the attachment. gFinal.add_edge(atNode, bbNodes[k]) k += 1 gFinal.vp["attachments"].a[atNode] = 1 gFinal.vp["attachments"].a[bbNodes[k]] = 1 assert (topology.label_components(gFinal)[1][0] == gFinal.num_vertices() ) # gFinal must be Fully Connected print "The graph with the cliques and circles has in total %d vertices and %d edges." % ( gFinal.num_vertices(), gFinal.num_edges()) return gFinal