def networkSummary(G): """Provides summary values about the network Args: G (graph) The network of strains from :func:`~constructNetwork` Returns: components (int) The number of connected components (and clusters) density (float) The proportion of possible edges used transitivity (float) Network transitivity (triads/triangles) score (float) A score of network fit, given by :math:`\mathrm{transitivity} * (1-\mathrm{density})` """ component_assignments, component_frequencies = gt.label_components(G) components = len(component_frequencies) density = len(list(G.edges())) / (0.5 * len(list(G.vertices())) * (len(list(G.vertices())) - 1)) transitivity = gt.global_clustering(G)[0] score = transitivity * (1 - density) return (components, density, transitivity, score)
def clusteringStats(g): clustring_vertices = local_clustering(g).a print("Clusterização") stats(clustring_vertices) print("\tGlobal", global_clustering(g)[0]) histogram(np.histogram(clustring_vertices), "Distribuição de clusterização", "$C_i$", "$C$", sys.argv[1][:-8] + ".clusterizacao")
edges = [] with open("{}".format(args_main.graph), 'r') as f: for l in f: edges.append((l.split("\t")[0], l.split("\t")[2].rstrip("\n"))) g.add_edge_list(edges, hashed=True) number_of_vertices = len(list(g.vertices())) number_of_edges = len(list(g.edges())) print("### STATISTICS ###") print("\tNumber of vertices: {}".format(number_of_vertices)) print("\tNumber of edges: {}".format(number_of_edges)) clustering_coeff, std_error = gt.global_clustering(g) avg_degree, avg_degree_std = gt.vertex_average(g, "total") avg_in_degree, avg_in_degree_std = gt.vertex_average(g, "in") avg_out_degree, avg_out_degree_std = gt.vertex_average(g, "out") zero_in_deg = 0 zero_out_deg = 0 isolated_entities = 0 for v in g.vertices(): if v.in_degree() == 0: zero_in_deg += 1 if v.out_degree() == 0: zero_out_deg += 1 if v.in_degree() == 0 and v.out_degree() == 0: isolated_entities += 1
def topology(self): g = self.arcgraph.copy() components, chist = gt.label_components( g, directed=False ) # directed = False because True would look for strongly connected components self.__plot_component_hist(chist, 'componenthist') start_components = set() number_compounds_in_start_components = 0 for c in self.start_compounds: for v in gt.find_vertex(g, g.vp.compound_ids, c): start_components.add(components[v]) cg = gt.Graph() cg.vertex_properties["size"] = cg.new_vertex_property("int", val=10) for c in start_components: v = cg.add_vertex() cg.vp.size[v] = chist[c] number_compounds_in_start_components += chist[c] satellites = set() clustering_coefficient = gt.global_clustering(g) with open(join(self.statistics_path, "clustering_coefficient.txt"), 'w') as f: f.write( str(clustering_coefficient[0]) + '\t' + str(clustering_coefficient[1]) + '\n') with open(join(self.statistics_path, "compounds_components.txt"), 'w') as f, \ open(join(self.statistics_path, "component_hist.txt"), 'w') as f2: for componentid, elem in enumerate(chist): u = gt.GraphView(g, vfilt=components.a == componentid) u = gt.Graph(u, prune=True) f2.write(str(componentid + 1) + '\t' + str(elem) + '\n') for v in u.vertices(): f.write( str(componentid + 1) + '\t' + u.vp.compound_ids[v] + '\t' + u.vp.name[v] + '\n') if componentid not in start_components: satellites.add(u.vp.compound_ids[v]) # gt.graph_draw(u, output=join(self.statistics_path, "component{i}.pdf".format(i=componentid))) targets_in_main_component = self.targets - satellites targets_in_satellites = self.targets & satellites with open(join(self.statistics_path, "targets_in_main_component.txt"), 'w') as f: for c in targets_in_main_component: compound = self.builder.compounds[c] f.write(c + '\t' + compound.names[0] + '\n') with open(join(self.statistics_path, "targets_in_satellites.txt"), 'w') as f: for c in targets_in_satellites: compound = self.builder.compounds[c] f.write(c + '\t' + compound.names[0] + '\n') with open( join(self.statistics_path, "components_with_start_metabolites.txt"), 'w') as f: for cid in start_components: f.write(str(cid) + '\n') p = number_compounds_in_start_components / g.num_vertices() * 100 with open(join(const.MECAT_BASE, "component_table.txt"), 'a') as f: f.write(self.name + ' & ' + str(len(chist)) + ' & ' + str(np.amax(chist)) + ' & ' + str(len(start_components)) + ' & ' + str(int(number_compounds_in_start_components)) + ' & ' + str(int(round(p, 0))) + '\%' + '\\\\ \n') #largest = gt.label_largest_component(g, directed=False) #gt.graph_draw(g, vertex_fill_color=largest, output=join(self.statistics_path,"largest_component.pdf")) g.vertex_properties["start_components"] = g.new_vertex_property( "string", val='white') for v in g.vertices(): if components[v] in start_components: g.vp.start_components[v] = 'red' else: g.vp.start_components[v] = 'blue' gt.graph_draw(g, vertex_fill_color=g.vp.start_components, output=join('/mnt', 'g', 'LisaDaten', 'Paper2', 'figures', 'arcgraph' + self.name + '.pdf'))
import graph_tool.all as gt import matplotlib FILE = '10000vertices.xml.gz' print('loading ' + FILE + ' graph') g = gt.load_graph(FILE) N = len(list(g.vertices())) M = len(list(g.edges())) arr = [] for v in g.vertices(): arr.append(v.out_degree()) max_degree = max(arr) min_degree = min(arr) avg_degree = np.mean(arr) std_degree = np.std(arr) print(str(N) + ' vertices') print(str(M) + ' edges') print('Max degree: ' + str(max_degree)) print('Min degree: ' + str(min_degree)) print('Avg degree: ' + str(avg_degree) + ' / S.D.: ' + str(std_degree)) print('Density: ' + str((2.0 * M) / (N * (N - 1.0)))) print('Pseudo-diameter: ' + str(gt.pseudo_diameter(g)[0])) print('Global clustering: ' + str(gt.global_clustering(g))) # gt.graph_draw(g, output_size=(5000, 5000), vertex_size=1, # vcmap=matplotlib.cm.gist_heat_r, output="view.png")
def graph_tool_statistics(): print("loading") graph = gt.load_graph("graph.graphml") print("computing") print(gt.global_clustering(graph))
print(f"L = {L}") ## diameter d = diameter(g) print(f"diameter = {d}") ## <k> avg_degree = L/N if g.is_directed() else 2*L/N print(f"<k> = {avg_degree}") ## density p = avg_degree/(N-1) print(f"p = {p}") ## global clustering gc = GT.global_clustering(g)[0] print(f"gc = {gc}") ## average shortest path avg_sp = avg_shortest_path(g) print(f"avg shortest path = {avg_sp}") ## giant component size gcs = giant_component_size(g) if g.is_directed(): print(f"biggest strong component size = {gcs}") else: print(f"giant component size = {gcs}") ########## # Degree # ########## degrees = g.get_total_degrees(g.get_vertices())
def gb_clus_coef(g): return gt.global_clustering(g)[0]
#plt.title('Histogram of Degree Distribution\nLargest Component') plt.xlabel('Degree (log)') plt.ylabel('Frequency (log)') plt.savefig("degree_hist_g_friend_LC_ap1.png") plt.close() print("\n\nDesciptives: Degree Distribution - done \n") #-- Clustering Coefficiants (Global) - of Friendship Network--# if descCCG_bool == True: print("\nDesciptives: Global Clustering Coefficiants \n") # coefficient, standard deviation print('Global Clustering Coefficiants - Friendship Network: ', gt.global_clustering( g_friend)) # 0.101803664507653, 0.013292495836611278 print('Global Clustering Coefficiants - Largest Component: ', gt.global_clustering( g_friend_LC)) # 0.10158343197387348, 0.013265280343602718 print("\nDesciptives: Global Clustering Coefficiants - done\n") ### Deskriptives Friendship Network - Largest Component specific ### #-- (Pseudo-) Diameter of Largest Component --# if descDia_bool == True: print( "\n\nDeskriptives Friendship Network - Largest Component specific - (Pseudo-) Diameter\n" )
else: size1, size2 = hist[-1], 0 data.append(('size of 1st/2nd component', '{} ({:.2f}%), {}/({:.2f}%)'.format( size1, 100 * size1 / g.num_vertices(), size2, 100 * size2 / g.num_vertices()))) data.append(('min/max/avg degree', '{}/{}/{:.2f}'.format(int(deg.a.min()), int(deg.a.max()), float(deg.a.mean())))) data.append(('density', '{:.7f}'.format(2 * g.num_edges() / g.num_vertices() / (g.num_vertices() - 1)))) data.append(('clustering coefficient (std)', '{:.2f} ({:.2f})'.format(*global_clustering(g)))) sampled_sources = np.random.permutation(g.num_vertices())[:100] dist = np.max([pseudo_diameter(g, s)[0] for s in sampled_sources]) data.append(('pseudo diameter', dist)) data.append(('assortativity (std)', '{:.2f} ({:.2f})'.format( *assortativity(g, 'total')))) index, col = zip(*data) s = pd.Series(col, index=index) print(s.to_string()) # save it somewhere
g.vertex_properties["color_bytype"] = color_bytype g.vertex_properties["subgraph"] = subgraph g.vertex_properties["shape"] = shape for j in range(len(egdays)): [s, t, link] = egdays[j, [0, 1, 2]] edge = g.add_edge(g.vertex(int(s)), g.vertex(int(t))) edge_color[edge] = color_edge_dict[link] #[x for x in color] edge_label[edge] = link edge_width[edge] = width_edge_dict[link] g.edge_properties["edge_color"] = edge_color g.edge_properties["edge_label"] = edge_label g.edge_properties["edge_width"] = edge_width print('global_clustering', gt.global_clustering(g)) print('assortativity out', gt.assortativity(g, "out")) # correlations print('assortativity in', gt.assortativity(g, "in")) print('assortativity total', gt.assortativity(g, "total")) g = gt.GraphView(g) print('start drawing') pos_fr = gt.fruchterman_reingold_layout(g, n_iter=1000) g.vertex_properties["pos_fr"] = pos_fr control = g.new_edge_property("vector<double>") for e in g.edges(): d = sqrt(sum((pos_fr[e.source()].a - pos_fr[e.target()].a)**2)) / 5 control[e] = [0.3, d, 0.7, d] g.edge_properties["control"] = control