def classify(request, pk): #gets object based on id given graph_file = get_object_or_404(Document, pk=pk) #reads file into networkx graph based on extension if graph_file.extension() == ".gml": G = nx.read_gml(graph_file.uploadfile) else: G = nx.read_gexf(graph_file.uploadfile) #closes file so we can delete it graph_file.uploadfile.close() #loads the algorithm and tests the algorithm against the graph g_json = json_graph.node_link_data(G) #save graph into json file with open(os.path.join(settings.MEDIA_ROOT, 'graph.json'), 'w') as graph: json.dump(g_json, graph) with open(os.path.join(settings.MEDIA_ROOT, 'rf_classifier.pkl'), 'rb') as malgo: algo_loaded = pickle.load(malgo, encoding="latin1") dataset = np.array([G.number_of_nodes(), G.number_of_edges(), nx.density(G), nx.degree_assortativity_coefficient(G), nx.average_clustering(G), nx.graph_clique_number(G)]) print (dataset) #creates X to test against X = dataset prediction = algo_loaded.predict(X) graph_type = check_prediction(prediction) graph = GraphPasser(G.number_of_nodes(), G.number_of_edges(), nx.density(G), nx.degree_assortativity_coefficient(G), nx.average_clustering(G), nx.graph_clique_number(G)) #gives certain variables to the view return render( request, 'classification/classify.html', {'graph': graph, 'prediction': graph_type} )
def compute(self, model): if self.show_progress is True: print("Calculating Number of Hosts") self.stats['Number of hosts'] = number_of_nodes(model[0]) if self.show_progress is True: print("Calculating Risk") self.stats['Risk'] = model.risk if self.show_progress is True: print("Calculating Cost") self.stats['Cost'] = model.cost if self.show_progress is True: print("Calculating Mean of Path lengths") self.stats['Mean of attack path lengths'] = model[0].mean_path_length() if self.show_progress is True: print("Calculating Mode of Path lengths") self.stats['Mode of attack path lengths'] = model[0].mode_path_length() if self.show_progress is True: print("Calculating Standard deviation") self.stats['Standard Deviation of attack path lengths'] = \ model[0].stdev_path_length() if self.show_progress is True: print("Calculating attack path length") self.stats['Shortest attack path length'] = model[0].shortest_path_length() if self.show_progress is True: print("Calculating Return on Attack") self.stats['Return on Attack'] = model[0].return_on_attack() if self.show_progress is True: print("Calculating Density") self.stats['Density'] = density(model[0]) self.stats['Probability of attack success'] = model[0].probability_attack_success() self.compute_status = True
def write_network_characteristics(g): nodes = len(g.nodes()) edges = len(g.edges()) avg_degree = float(2*edges)/nodes max_conn = (nodes*(nodes-1))/2 clustering = nx.average_clustering(g) density = nx.density(g) diameter = nx.diameter(g) a_p_l = nx.average_shortest_path_length(g) conn = nx.is_connected(g) n_comp_con = nx.number_connected_components(g) # write them on file out = open("statistics_giant.csv", "w") out.write("#Nodes,#Edges,Avg_Degree, Max Connection, Clustering Coefficient, Density, Diameter , Average Shortest Path , Is Connected? , Number Connected Component\n") out.write("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n" % (nodes, edges, avg_degree , max_conn, clustering, density ,diameter ,a_p_l, conn , n_comp_con)) g = read_graph("dataset/cutted_graph(0.15).csv") degree_distribution(g0) #Extract max Giant component cc=sorted(nx.connected_component_subgraphs(g), key = len, reverse=True) g0=gcc[0] write_network_characteristics(g0)
def __init__(self, graph, slow_stuff = False): graph.info() # paolo - 20070919 - computing also the strongly connected # components directly on the directed graph. Changing a # directed graph into an undirected usually destroys a lot of # its structure and meaning. Let see. while in the published # API there is a method # strongly_connected_component_subgraphs(graph), I don't have it # on my machine (probably I have an older networkx version), # so for now I commented the following code. the method # strongly_connected_component_subgraphs(graph) was added on # 07/21/07. See https://networkx.lanl.gov/changeset/640 . On # my machine I have "python-networkx/feisty uptodate 0.32-2" # while on networkx svn there is already version 0.35.1 if False: self.strongconcom_subgraphs = component.strongly_connected_component_subgraphs(graph) strongconcom_subgraph_size = map(len, self.strongconcom_subgraphs) print "size of largest strongly connected components:", print ", ".join(map(str, strongconcom_subgraph_size[:10])), "..." print "%nodes in largest strongly connected component:", print 1.0 * strongconcom_subgraph_size[0] / len(graph) undir_graph = graph.to_undirected() self.concom_subgraphs = component.connected_component_subgraphs(undir_graph) concom_subgraph_size = map(len, self.concom_subgraphs) print "size of largest connected components:", print ", ".join(map(str, concom_subgraph_size[:10])), "..." print "%nodes in largest connected component:", print 1.0 * concom_subgraph_size[0] / len(graph) #only work on connected graphs, maybe we could run it on the #largest strongly connected component. #print "diameter:", distance.diameter(G) #print "radius:", distance.radius(graph) print "density:", networkx.density(graph) print "degree histogram:", networkx.degree_histogram(graph)[:15] print "average_clustering:", cluster.average_clustering(graph) print "transitivity:", cluster.transitivity(graph) if slow_stuff: #not yet in my networkx revision -- try try except print "number_of_cliques", cliques.number_of_cliques(graph) """this returns a dict with the betweenness centrality of every node, maybe we want to compute the average betweenness centrality but before it is important to understand which measures usually are usually reported in papers as peculiar for capturing the characteristics and structure of a directed graph.""" print "betweenness_centrality:", print centrality.betweenness_centrality(graph)
def creation(k): global RGG,pos tmp_dense=0.0 RGG=nx.Graph() RGG.add_nodes_from(range(N)) pos={} dense=net_creation(k) for i in range(N): x=round(rnd.random(),2) y=round(rnd.random(),2) #Allocate the random x,y coordinates RGG.node[i]['pos']=[x,y] pos[i]=RGG.node[i]['pos'] for i in range(N-1): for j in range(i+1,N): if euclidean_dist(i,j)<R: RGG.add_edge(i,j) tmp_dense=nx.density(RGG) if tmp_dense>=dense: break if tmp_dense>=dense: break
def gpn_stats(genes, gpn, version): LOGGER.info("Computing GPN statistics") nodes = sorted(gpn.nodes_iter()) components = sorted(nx.connected_components(gpn), key=len, reverse=True) ass = nx.degree_assortativity_coefficient(gpn) deg = [gpn.degree(node) for node in nodes] stats = pd.DataFrame(data={ "version": version, "release": pd.to_datetime(RELEASE[version]), "num_genes": len(genes), "num_nodes": len(nodes), "num_links": gpn.size(), "density": nx.density(gpn), "num_components": len(components), "largest_component": len(components[0]), "assortativity": ass, "avg_deg": mean(deg), "hub_deg": max(deg) }, index=[1]) stats["release"] = pd.to_datetime(stats["release"]) dists = pd.DataFrame(data={ "version": version, "release": [pd.to_datetime(RELEASE[version])] * len(nodes), "node": [node.unique_id for node in nodes], "degree": deg, }) return (stats, dists)
def pformat(self): """Pretty formats your graph into a string. This pretty formatted string representation includes many useful details about your graph, including; name, type, frozeness, node count, nodes, edge count, edges, graph density and graph cycles (if any). """ lines = [] lines.append("Name: %s" % self.name) lines.append("Type: %s" % type(self).__name__) lines.append("Frozen: %s" % nx.is_frozen(self)) lines.append("Nodes: %s" % self.number_of_nodes()) for n in self.nodes_iter(): lines.append(" - %s" % n) lines.append("Edges: %s" % self.number_of_edges()) for (u, v, e_data) in self.edges_iter(data=True): if e_data: lines.append(" %s -> %s (%s)" % (u, v, e_data)) else: lines.append(" %s -> %s" % (u, v)) lines.append("Density: %0.3f" % nx.density(self)) cycles = list(nx.cycles.recursive_simple_cycles(self)) lines.append("Cycles: %s" % len(cycles)) for cycle in cycles: buf = six.StringIO() buf.write("%s" % (cycle[0])) for i in range(1, len(cycle)): buf.write(" --> %s" % (cycle[i])) buf.write(" --> %s" % (cycle[0])) lines.append(" %s" % buf.getvalue()) return os.linesep.join(lines)
def test_networkx_roundtrip(self): print("\n---------- NetworkX Data Roundtrip Test Start -----------\n") g = nx.newman_watts_strogatz_graph(100, 3, 0.5) nodes = g.nodes() edges = g.edges() # Add some attributes g.graph["name"] = "original" g.graph["density"] = nx.density(g) nx.set_node_attributes(g, "betweenness", nx.betweenness_centrality(g)) nx.set_node_attributes(g, "degree", nx.degree(g)) nx.set_node_attributes(g, "closeness", nx.closeness_centrality(g)) nx.set_edge_attributes(g, "eb", nx.edge_betweenness(g)) cyjs1 = util.from_networkx(g) g2 = util.to_networkx(cyjs1) self.assertEqual(len(g2.nodes()), len(nodes)) self.assertEqual(len(g2.edges()), len(edges)) edge_set = set(list(map(lambda x: (int(x[0]), int(x[1])), g2.edges()))) self.assertEqual(0, len(edge_set.difference(set(edges)))) node_original = g.node[1] node_generated = g2.node["1"] print(node_original) print(node_generated) self.assertEqual(node_original["degree"], node_generated["degree"]) self.assertEqual(node_original["betweenness"], node_generated["betweenness"]) self.assertEqual(node_original["closeness"], node_generated["closeness"])
def test_fast_versions_properties_threshold_graphs(self): cs='ddiiddid' G=nxt.threshold_graph(cs) assert_equal(nxt.density('ddiiddid'), nx.density(G)) assert_equal(sorted(nxt.degree_sequence(cs)), sorted(G.degree().values())) ts=nxt.triangle_sequence(cs) assert_equal(ts, list(nx.triangles(G).values())) assert_equal(sum(ts) // 3, nxt.triangles(cs)) c1=nxt.cluster_sequence(cs) c2=list(nx.clustering(G).values()) assert_almost_equal(sum([abs(c-d) for c,d in zip(c1,c2)]), 0) b1=nx.betweenness_centrality(G).values() b2=nxt.betweenness_sequence(cs) assert_true(sum([abs(c-d) for c,d in zip(b1,b2)]) < 1e-14) assert_equal(nxt.eigenvalues(cs), [0, 1, 3, 3, 5, 7, 7, 8]) # Degree Correlation assert_true(abs(nxt.degree_correlation(cs)+0.593038821954) < 1e-12) assert_equal(nxt.degree_correlation('diiiddi'), -0.8) assert_equal(nxt.degree_correlation('did'), -1.0) assert_equal(nxt.degree_correlation('ddd'), 1.0) assert_equal(nxt.eigenvalues('dddiii'), [0, 0, 0, 0, 3, 3]) assert_equal(nxt.eigenvalues('dddiiid'), [0, 1, 1, 1, 4, 4, 7])
def _cliques_heuristic(G, H, k, min_density): h_cnumber = nx.core_number(H) for i, c_value in enumerate(sorted(set(h_cnumber.values()), reverse=True)): cands = set(n for n, c in h_cnumber.items() if c == c_value) # Skip checking for overlap for the highest core value if i == 0: overlap = False else: overlap = set.intersection(*[ set(x for x in H[n] if x not in cands) for n in cands]) if overlap and len(overlap) < k: SH = H.subgraph(cands | overlap) else: SH = H.subgraph(cands) sh_cnumber = nx.core_number(SH) SG = nx.k_core(G.subgraph(SH), k) while not (_same(sh_cnumber) and nx.density(SH) >= min_density): #!! This subgraph must be writable => .copy() SH = H.subgraph(SG).copy() if len(SH) <= k: break sh_cnumber = nx.core_number(SH) sh_deg = dict(SH.degree()) min_deg = min(sh_deg.values()) SH.remove_nodes_from(n for n, d in sh_deg.items() if d == min_deg) SG = nx.k_core(G.subgraph(SH), k) else: yield SG
def updateGraphStats(self, graph): origgraph = graph if nx.is_connected(graph): random = 0 else: connectedcomp = nx.connected_component_subgraphs(graph) graph = max(connectedcomp) if len(graph) > 1: pathlength = nx.average_shortest_path_length(graph) else: pathlength = 0 # print graph.nodes(), len(graph), nx.is_connected(graph) stats = { "radius": nx.radius(graph), "density": nx.density(graph), "nodecount": len(graph.nodes()), "center": nx.center(graph), "avgcluscoeff": nx.average_clustering(graph), "nodeconnectivity": nx.node_connectivity(graph), "components": nx.number_connected_components(graph), "avgpathlength": pathlength } # print "updated graph stats", stats return stats
def NetStats(G): return { 'radius': nx.radius(G), 'diameter': nx.diameter(G), 'connected_components': nx.number_connected_components(G), 'density' : nx.density(G), 'shortest_path_length': nx.shortest_path_length(G), 'clustering': nx.clustering(G)}
def info(self, graph, title=None): degree = sorted(nx.degree(graph).items(), key=lambda x: x[1], reverse=True) print('Highest degree nodes: ') if not title: for (node, value) in degree: print('{}:{}'.format(self.singer_dict[int(node)].split('|')[0], str(value))) if value < 90: break avg = (0.0 + sum(value for (node, value) in degree)) / (0.0 + len(degree)) (max_node, max_value) = degree[0] (min_node, min_value) = degree[len(degree) - 1] inf = list() if not title: inf.append('Number of nodes: {0}'.format(nx.number_of_nodes(graph))) inf.append('Number of edges: {0}'.format(nx.number_of_edges(graph))) inf.append('Is connected: {0}'.format(nx.is_connected(graph))) if title: inf.append(title) inf.append('Degree:') inf.append('Avg: {0}'.format(round(avg, 4))) inf.append('Max: {1} ({0})'.format(max_node, max_value)) inf.append('Min: {1} ({0})'.format(min_node, min_value)) inf.append('Density: {}'.format(round(nx.density(graph), 4))) return inf
def plot_distribution(distribution_type,legend,graph,list_communities,out=None): x = [i for i in range(0,len(list_communities[0]))] for communities in list_communities: if distribution_type.lower() == "nodes": y = list(map(len,communities)) else: y = [] for l in communities: H = graph.subgraph(l) if distribution_type.lower() == "density": y.append(nx.density(H)) elif distribution_type.lower() == "transitivity": y.append(nx.transitivity(H)) else: return None plt.plot(x,y,linewidth=2,alpha=0.8) #plt.yscale("log") plt.legend(legend, loc='upper left') plt.xlabel("Comunity ID") plt.ylabel(distribution_type) if out == None: plt.show() else: plt.savefig(out+".svg",bbox_inches="tight") plt.close()
def calGraph(infile, mode = 1): #init Parameter inputpath = 'edge_list/' outputpath = 'network_output/' n = mode Data_G = inputpath+infile+'_'+str(n)+'.edgelist' #init Graph G = nx.read_edgelist(Data_G, create_using=nx.DiGraph()) GU = nx.read_edgelist(Data_G) #basci info print nx.info(G),'\n', nx.info(GU) average_degree = float(sum(nx.degree(G).values()))/len(G.nodes()) print 'average degree :', average_degree degree_histogram = nx.degree_histogram(G) print 'degree histogram max :', degree_histogram[1] desity = nx.density(G) print 'desity :', desity #Approximation #Centrality degree_centrality = nx.degree_centrality(G) print 'degree centrality top 10 !', sorted_dict(degree_centrality)[:2] out_degree_centrality = nx.out_degree_centrality(G) print 'out degree centrality top 10 !', sorted_dict(out_degree_centrality)[:2]
def make_ground_truth(): edge_map, venue_edge_map, node_map = map_for_nx(CITEMAP_FILE) components = [] for conference in venue_edge_map.keys(): edges = venue_edge_map[conference] graph = nx.Graph() edge_ids = [(int(edge.source), int(edge.target)) for edge in edges] graph.add_edges_from(edge_ids) median_degree = np.median(graph.degree(graph.nodes()).values()) for component in nx.connected_components(graph): if len(component) >= MIN_SIZE: community = graph.subgraph(component) v_count = len(community.nodes()) fomd = sum([1 for v in component if len(set(graph.neighbors(v)) & set(component)) > median_degree]) / v_count internal_density = nx.density(community) components.append((component, fomd, internal_density)) components = sorted(components, key=lambda x: x[1], reverse=True)[:3000] components = sorted(components, key=lambda x: x[2], reverse=True)[:int(0.75 * len(components))] f_id = open(TRUTH_ID_FILE, 'wb') f_name = open(TRUTH_NAME_FILE, 'wb') for component, fomd, internal_density in components: component = map(str, component) author_names = ", ".join([node_map[node_id].name for node_id in component]) author_ids = ", ".join(component) f_id.write(author_ids + "\n") f_name.write(author_names + "\n") f_id.close() f_name.close()
def test_networkx_roundtrip(self): print('\n---------- NetworkX Data Roundtrip Test Start -----------\n') g = nx.newman_watts_strogatz_graph(100, 3, 0.5) nodes = g.nodes() edges = g.edges() # Add some attributes g.graph['name'] = 'original' g.graph['density'] = nx.density(g) nx.set_node_attributes(g, 'betweenness', nx.betweenness_centrality(g)) nx.set_node_attributes(g, 'degree', nx.degree(g)) nx.set_node_attributes(g, 'closeness', nx.closeness_centrality(g)) nx.set_edge_attributes(g, 'eb', nx.edge_betweenness(g)) cyjs1 = util.from_networkx(g) g2 = util.to_networkx(cyjs1) self.assertEqual(len(g2.nodes()), len(nodes)) self.assertEqual(len(g2.edges()), len(edges)) edge_set = set(list(map(lambda x: (int(x[0]), int(x[1])), g2.edges()))) self.assertEqual(0, len(edge_set.difference(set(edges)))) node_original = g.node[1] node_generated = g2.node['1'] print(node_original) print(node_generated) self.assertEqual(node_original['degree'], node_generated['degree']) self.assertEqual(node_original['betweenness'], node_generated['betweenness']) self.assertEqual(node_original['closeness'], node_generated['closeness'])
def run_main(file): NumberOfStations=465 print file adjmatrix = np.loadtxt(file,delimiter=' ',dtype=np.dtype('int32')) # for i in range (0,NumberOfStations): # if(adjmatrix[i,i]==1): # print "posicion: ["+str(i)+","+str(i)+"]" g = nx.from_numpy_matrix(adjmatrix, create_using = nx.MultiGraph()) degree = g.degree() density = nx.density(g) degree_centrality = nx.degree_centrality(g) clossness_centrality = nx.closeness_centrality(g) betweenless_centrality = nx.betweenness_centrality(g) print degree print density print degree_centrality print clossness_centrality print betweenless_centrality #nx.draw(g) # np.savetxt(OutputFile, Matrix, delimiter=' ',newline='\n',fmt='%i')
def ltDecomposeTestBatFull(dsName, path, outfile, cd, wccOnly, revEdges, undir, diaF, fillF): origNet = loadNw(dsName, path, cd, wccOnly, revEdges, undir) prodNet = origNet # prodNet = copy.deepcopy(origNet) # print("dc") outfile = open(path + outfile + ".csv", "w") intFlag = False print("NW-WIDE MEASURES:\n") nodeStr = str(origNet.number_of_nodes()) edgeStr = str(origNet.number_of_edges()) avgDeg = str(float(origNet.number_of_edges()) / float(origNet.number_of_nodes())) dens = str(nx.density(origNet)) avgCl = "--" # avgCl = str(nx.average_clustering(origNet)) if diaF: print(" Starting dia calc") diameter = str(nx.diameter(origNet)) print(" --> done w. dia calc") else: diameter = "---" # outfile.write("Dataset,NumNodes,NumEdges,avgDeg,dens,avgCl,diameter\n") # outfile.write(dsName+","+nodeStr+","+edgeStr+","+avgDeg+","+dens+","+avgCl+","+diameter+"\n") # if fillF: # print("FULL THRESH TEST\n") # outfile.write("Dataset,ThreshType,ThreshVal,PercSize,NumNodes,NumEdges,TimeAlg,TimeAlgAndSetup,Check\n") # thresh=1.0 # outfile.write(ltDecomposeNoSetWithCheck(prodNet,thresh,dsName,intFlag,origNet)) outfile.close() print("Done.")
def calculateDensity(Graph, community): result = [] for com in community: subg = Graph.subgraph(com[1:]) # print subg.nodes() result.append(nx.density(subg)) return result
def show_network_metrics(G): ''' Print the local and global metrics of the network ''' print(nx.info(G)) # density print("Density of the network") print(nx.density(G)) # average betweeness print("Average betweeness of the network") print(np.sum(list(nx.betweenness_centrality(G).values()))/len(nx.betweenness_centrality(G))) # Average clustering coefficient print("Average clustering coefficient:") print(nx.average_clustering(G)) #create metrics dataframe by_node_metrics = pd.DataFrame({"Betweeness_Centrality":nx.betweenness_centrality(G),"Degree_Centrality":nx.degree_centrality(G), "Clustering_Coefficient":nx.clustering(G), "Triangels":nx.algorithms.cluster.triangles(G)}) print(by_node_metrics) by_node_metrics.to_excel("metrics.xlsx")
def gen_network(graph,machines,basedata): """ Generates an LLD network from a graph distributing participants in a list of machines """ network = ET.Element('network') #network.set('type',graphtype) network.set('participants',str(graph.number_of_nodes())) network.set('edges',str(graph.size())) network.set('density',str(NX.density(graph))) network.set('connected',str(NX.is_weakly_connected(graph))) network.set('stronglyconnected',str(NX.is_strongly_connected(graph))) for node in graph.nodes_iter(): nodelement = ET.SubElement(network,'participant') nodelement.set('id','participant'+str(node)) hostelem = ET.SubElement(nodelement,'host') #hostelem.text = 'node'+str(int(node) % len(machines)) hostelem.text = machines[int(node) % len(machines)] portelem = ET.SubElement(nodelement,'port') portelem.text = str(20500+int(node)) baseelem = ET.SubElement(nodelement,'basedata') baseelem.text = basedata nodelement.append(gen_dynamic()) for source in gen_sources(graph,node): nodelement.append(source) return network
def get_characteristics(G, thr, input_name): N = nx.number_of_nodes(G) #total number of nodes : N L = nx.number_of_edges(G) #total number of links : L Compon = nx.number_connected_components(G) #number of connected components cc = nx.average_clustering(G) # clustering coefficient : cc D = nx.density(G) # network density: Kappa check_sum = 0. degree_hist = {} values = [] for node in G: if G.degree(node) not in degree_hist: degree_hist[G.degree(node)] = 1 else: degree_hist[G.degree(node)] += 1 values.append(G.degree(node)) ave_degree = float(sum(values)/float(N)) # average degree: <Kappa> keys = degree_hist.keys() keys.sort() for item in keys : check_sum += float(degree_hist[item])/float(N) print 'Test matrix: ', input_name print 'Threshold: ', thr print 'Number of nodes: ', N print 'Number of links: ', L print 'Number of connected components: ', Compon print 'Clustering coefficient of full network: ', cc print 'Check degree distribution sum: ', check_sum print 'Network density: ', D print 'Average network degree: ', ave_degree return 0
def get_single_network_measures(G, thr): f = open(out_prfx + 'single_network_measures.dat', 'a') N = nx.number_of_nodes(G) L = nx.number_of_edges(G) D = nx.density(G) cc = nx.average_clustering(G) compon = nx.number_connected_components(G) Con_sub = nx.connected_component_subgraphs(G) values = [] values_2 =[] for node in G: values.append(G.degree(node)) ave_deg = float(sum(values)) / float(N) f.write("%f\t%d\t%f\t%f\t%f\t%f\t" % (thr, L, D, cc, ave_deg, compon)) #1. threshold, 2. edges, 3. density 4.clustering coefficient #5. average degree, 6. number of connected components for i in range(len(Con_sub)): if nx.number_of_nodes(Con_sub[i])>1: values_2.append(nx.average_shortest_path_length(Con_sub[i])) if len(values_2)==0: f.write("0.\n") else: f.write("%f\n" % (sum(values_2)/len(values_2))) #7. shortest pathway f.close()
def print_info(G): #info prints name, type, number of nodes and edges, and average degree already print(nx.info(G)) print "Density: ", nx.density(G) print "Number of connected components: ", nx.number_connected_components(G) all_degree_cent = nx.degree_centrality(G) all_bet_cent = nx.betweenness_centrality(G) all_close_cent = nx.closeness_centrality(G) oldest = [] agerank = 0 names = [] print ("Node, Degree Centrality, Betweenness Centrality, Closeness Centrality:") for x in range(G.number_of_nodes()): names.append(G.nodes(data=True)[x][1]['label']) if G.nodes(data=True)[x][1]['agerank'] >= agerank: if G.nodes(data=True)[x][1]['agerank'] != agerank: oldest = [] agerank = G.nodes(data=True)[x][1]['agerank'] oldest.append(G.nodes(data=True)[x][1]) print G.nodes(data=True)[x][1]['label'],' %.2f' % all_degree_cent.get(x),\ ' %.2f' % all_bet_cent.get(x),\ ' %.2f' % all_close_cent.get(x) print "Oldest facebook(s): ", ', '.join([x['label'] for x in oldest]) return names
def calculate_connected_component(index, graph, top_artists, talky=False): """ Takes the given graph and computes the following measures: - size (number of nodes) [- diameter] - density - degree - closeness_centrality [- betweenness_centrality] [- eccentricity] The first three measures are computed for each connected component. The remaining ones are computed for each node. The result is written to a database (see tricorder.models). """ is_real_graph = graph.number_of_edges() > 0 num_artists = 0 num_top_artists = 0 # calculate measures (only if we have edges!) density = nx.density(graph) if is_real_graph else 0 # print "diameter..." # diameter = nx.diameter(graph) if is_real_graph else 0 degree = sc.degree_centrality(graph) if is_real_graph else {} closeness = sc.closeness_centrality(graph) if is_real_graph else {} # betweenness = sc.betweenness_centrality(graph) if is_real_graph else {} # print "eccentricity..." # eccentricity = sc.eccentricity(graph) if is_real_graph else {} # create Node DB entries for id, attrs in graph.node.items(): if attrs["type"] == "artist": num_artists += 1 if attrs["name"] in top_artists: num_top_artists += 1 # ecc = 1/eccentricity[id] if id in eccentricity else 0 # need an extra variable here since division by zero is evil Node.create( nid=int(id), pid=graph.graph["pid"], node_type=attrs["type"], name=attrs["name"], degree=degree.get(id, 0), closeness=closeness.get(id, 0), ) # eccentricity=ecc)#, betweenness=betweenness.get(id, 0)) # create Partition DB entry Partition.create( pid=graph.graph["pid"], # diameter=diameter, num_nodes=graph.number_of_nodes(), num_edges=graph.number_of_edges(), num_artists=num_artists, num_top_artists=num_top_artists, density=density, ) if talky and index % 500 == 0: print index
def sna_calculations(g, play_file): """ :param g: a NetworkX graph object :type g: object :param play_file: the location of a play in .txt format :type play_file: string :return: returns a dictionary containing various network related figures :rtype: dict :note: also writes into results/file_name-snaCalculations.csv and results/allCharacters.csv """ file_name = os.path.splitext(os.path.basename(play_file))[0] sna_calculations_list = dict() sna_calculations_list['playType'] = file_name[0] sna_calculations_list['avDegreeCentrality'] = numpy.mean(numpy.fromiter(iter(nx.degree_centrality(g).values()), dtype=float)) sna_calculations_list['avDegreeCentralityStd'] = numpy.std( numpy.fromiter(iter(nx.degree_centrality(g).values()), dtype=float)) sna_calculations_list['avInDegreeCentrality'] = numpy.mean( numpy.fromiter(iter(nx.in_degree_centrality(g).values()), dtype=float)) sna_calculations_list['avOutDegreeCentrality'] = numpy.mean( numpy.fromiter(iter(nx.out_degree_centrality(g).values()), dtype=float)) try: sna_calculations_list['avShortestPathLength'] = nx.average_shortest_path_length(g) except: sna_calculations_list['avShortestPathLength'] = 'not connected' sna_calculations_list['density'] = nx.density(g) sna_calculations_list['avEigenvectorCentrality'] = numpy.mean( numpy.fromiter(iter(nx.eigenvector_centrality(g).values()), dtype=float)) sna_calculations_list['avBetweennessCentrality'] = numpy.mean( numpy.fromiter(iter(nx.betweenness_centrality(g).values()), dtype=float)) sna_calculations_list['DegreeCentrality'] = nx.degree_centrality(g) sna_calculations_list['EigenvectorCentrality'] = nx.eigenvector_centrality(g) sna_calculations_list['BetweennessCentrality'] = nx.betweenness_centrality(g) # sna_calculations.txt file sna_calc_file = csv.writer(open('results/' + file_name + '-snaCalculations.csv', 'wb'), quoting=csv.QUOTE_ALL, delimiter=';') for key, value in sna_calculations_list.items(): sna_calc_file.writerow([key, value]) # all_characters.csv file if not os.path.isfile('results/allCharacters.csv'): with open('results/allCharacters.csv', 'w') as f: f.write( 'Name;PlayType;play_file;DegreeCentrality;EigenvectorCentrality;BetweennessCentrality;speech_amount;AverageUtteranceLength\n') all_characters = open('results/allCharacters.csv', 'a') character_speech_amount = speech_amount(play_file) for character in sna_calculations_list['DegreeCentrality']: all_characters.write(character + ';' + str(sna_calculations_list['playType']) + ';' + file_name + ';' + str( sna_calculations_list['DegreeCentrality'][character]) + ';' + str( sna_calculations_list['EigenvectorCentrality'][character]) + ';' + str( sna_calculations_list['BetweennessCentrality'][character]) + ';' + str( character_speech_amount[0][character]) + ';' + str(character_speech_amount[1][character]) + '\n') all_characters.close() return sna_calculations
def get_density(subgraph): ''' subgraph: Networkx Graph object return: Density of graph ''' return nx.density(subgraph)
def calculate_graphanalysis(graph): idno = graph.graph["idno"] numnodes = graph.number_of_nodes() numedges = graph.number_of_edges() density = nx.density(graph) analysis = [idno, numnodes, numedges, density] # print(analysis) return analysis
def watts_strogatz_replicate(original, params=None): #warning: for simplicity of coding, the replica uses nodes labeled 0..n-1 if params == None: params = {} n = nx.number_of_nodes(original) k = params.get('k', 4) p = nx.density(original) return nx.watts_strogatz_graph(n, k, p)
lin = ego_feat_file.readline().strip('\n').split(' ') for p, val in enumerate(lin): if val == 1: # and feat_name_list[p][0].__contains__('gender'): G.nodes[ego_nds_id[i]][feat_name_list[p][0]] = feat_name_list[p][1] nx.write_graphml(G, 'facebook.graphml') nx.write_edgelist(G, 'facebook.edgelist', data=False) print("Anonymize Ids") for x in feat_id_dics.keys(): print(x + " ", end='') print(list(feat_id_dics[x])) print("Number of nodes " + str(G.number_of_nodes())) print("Number of edges " + str(G.number_of_edges())) print("Network Density " + str(nx.density(G))) print("Degree Associativity Coefficient " + str(nx.degree_assortativity_coefficient(G))) def calculate_clustering_coefficients_and_degree(feat, feat_ids): node_wrt_feat_ids = dict([(x, []) for x in feat_ids]) for node in G.nodes: if feat in G.nodes[node] and G.nodes[node][feat] in feat_ids: node_wrt_feat_ids[G.nodes[node][feat]].append(node) for c, x in enumerate(feat_ids): print("Number of nodes of feature " + feat + " of id " + str(x) + " is " + str(len(node_wrt_feat_ids[x])))
# In[12]: nx.average_shortest_path_length(G) # ### 网络直径 # In[13]: nx.diameter(G) #返回图G的直径(最长最短路径的长度) # ### 密度 # In[16]: nx.density(G) # In[17]: nodeNum = len(G.nodes()) edgeNum = len(G.edges()) 2.0 * edgeNum / (nodeNum * (nodeNum - 1)) # 作业 # - 计算www网络的网络密度 # ### 聚集系数 # In[18]:
def erdos_renyi(G): n = G.number_of_nodes() p = nx.density(G) return nx.fast_gnp_random_graph(n, p)
critical_temperature = np.loadtxt(path_entity + 'ctem.csv', delimiter=',') c, r = correlation_function(simulated_matrix, J) print(nx.number_of_isolates(nx.Graph(J))) index_ct = find_nearest(ts, critical_temperature) dimensionality = dim(c, r, index_ct) if dimensionality != 3: # Outliear dimensionality_sim.append(dimensionality) critical_temperature_sim.append(critical_temperature) size_sim.append(J.shape[-1]) degrees = sorted(d for n, d in nx.Graph(J).degree()) degree_sim.append(np.mean(degrees)) sparcity.append(nx.density(nx.Graph(J)) * 100) #dimensionality_.append(np.mean(dimensionality_sim)) results.append([ np.mean(dimensionality_sim), np.mean(size_sim), np.mean(degree_sim), np.mean(sparcity) ]) #fig, ax = plt.subplots(figsize=(10, 7)) #plt.scatter(np.linspace(1,l, num=l), dimensionality_) #plt.xlabel("Graph name") #plt.ylabel("Dimensionality")
path_length_histogram(G_internet, title="Internet") plt.tight_layout() nx.average_shortest_path_length(G_karate) nx.average_shortest_path_length(G_electric) nx.average_shortest_path_length(G_internet) nx.diameter(G_karate) nx.diameter(G_electric) nx.diameter(G_internet) nx.density(G_karate) nx.density(G_electric) nx.density(G_internet) import networkx.algorithms.connectivity as nxcon nxcon.minimum_st_node_cut( G_karate, mr_hi, john_a ) # Returns a set of nodes of minimum cardinality that disconnect source from target in G nxcon.minimum_st_edge_cut(G_karate, mr_hi, john_a) nx.node_connectivity( G_karate, mr_hi, john_a
def node_data(b): S = G.subgraph(b) return dict(graph=S, nnodes=len(S), nedges=S.number_of_edges(), density=density(S))
for node in tempset: if G.has_edge(node, actid): G[node][actid]["weight"] += 1 else: G.add_edge(node, actid, weight=1) tempset.add(actid) print("Initial graph") print(nx.info(G)) if G.number_of_nodes() <= 20: print("Nodes: ", G.nodes()) for node in G.nodes(): print(G.node[node]) if G.number_of_edges() <= 20: print("Edges: ", G.edges()) print("Network density:", nx.density(G)) maxd = max([d for n, d in G.degree()]) d50th = sorted([d for n, d in G.degree()])[-50] print("Maximum degree:", maxd) connecteds = list(nx.connected_components(G)) maxconnectedsize = max([len(c) for c in connecteds]) print("Size of largest connected component:", maxconnectedsize) infodfrow = pd.Series( { "period": str(year1) + "-" + str(year2), "# vertices": G.number_of_nodes(), "# edges": G.number_of_edges(), "density": nx.density(G), "max degree": max([d for n, d in G.degree()]), "largest CC": maxconnectedsize },
egoGraphUni = egoGraph.to_undirected() clusteringCoef = nx.clustering(egoGraphUni, nodes=[key])[key] # print key, len(graphObj.successors(key)) # print "\t Density: ", nx.density(egoGraph) # print "\t Neighbor Degree Distribution: ", neighborDegreeDist(egoGraph) # print "\t Low Degree Neighbor Distribution: ", lowDegreeNeighborProp(egoGraph, key) # print "\t Intense Tie Prop: ", intenseTieProp(egoGraph, key) # print "\t Clustering Coefficient: ", clusteringCoef # print "\t Triangles: ", triangleRatio(egoGraphUni, key) dataLine = "%s, %s, %d, %f, %f, %f, %f, %f, %f, %d\n" % (key, graphPath, len(graphObj.successors(key)), nx.density(egoGraph), neighborDegreeDist(egoGraph), lowDegreeNeighborProp(egoGraph, key), intenseTieProp(egoGraph, key), clusteringCoef, triangleRatio(egoGraphUni, key), 0 ) # outFile.write(dataLine) print dataLine # # Draw graph # pos=nx.spring_layout(egoGraph) # nx.draw(egoGraph,pos,node_color='b',node_size=50,with_labels=False)
def est_density(func_mat): '''# Adapted from bctpy ''' fG = nx.from_numpy_matrix(func_mat) density = nx.density(fG) return density
graph_layout2 = nx.spectral_layout(graph) print '... done!' plt.figure() plt.xticks([]) plt.yticks([]) print '\t**Drawing nodes ...' nx.draw_networkx_nodes(graph, pos=graph_layout2, node_color=color_intensity, with_labels=False, alpha=0.75, node_size=100, cmap=plt.get_cmap('Blues')) print '\t**Saving pdf (no edges) ...' plt.savefig('correlation_network5-no_edges.pdf', bbox_inches='tight') print '\t**Drawing edges ...' nx.draw_networkx_edges(graph, pos=graph_layout, with_labels=False, alpha=0.3) print '\t**Saving final pdf ...' plt.savefig('correlation_network3.pdf', bbox_inches='tight') print '... done!' plt.close() a = nx.find_cliques(graph) nx.graph_clique_number(graph) nx.graph_number_of_cliques(graph) nx.density(graph) print nx.info(graph) nx.average_clustering(graph, weight='yeah') nx.get_edge_attributes(graph, 'weights')
cast_list = [x.strip() for x in linha[4].split(',')] for i in range(len(cast_list)): for j in range(i + 1, len(cast_list)): # G.add_edge(cast_list[i], cast_list[j], weight=float(linha[-1])) G.add_edge(cast_list[i], cast_list[j]) largest_cc = max(nx.connected_components(G), key=len) GCC = G.subgraph(largest_cc) # Para obter o grafo com nós fora da componente principal #G = G.subgraph(set(G.nodes()).difference(largest_cc)) ################################################################### #Métricas da rede e dos nós ################################################################### nx.density(GCC) nx.average_clustering(GCC) triadic_closure = nx.transitivity(GCC) nx.graph_number_of_cliques(GCC) nx.diameter(GCC) nx.average_shortest_path_length(GCC) nx.number_connected_components(G) #Medidas de centralidade c = nx.closeness_centrality(GCC) b = nx.betweenness_centrality(GCC) e = nx.eigenvector_centrality(GCC) sorted_x = sorted( c.items(), key=operator.itemgetter(1), reverse=True )[:10] #para obter a resposta ordenada pelos valores dos dicionários
def test_density_selfloop(self): G = nx.Graph() G.add_edge(1, 1) assert_equal(nx.density(G), 0.0) G.add_edge(1, 2) assert_equal(nx.density(G), 2.0)
def test_density(self): assert_equal(nx.density(self.G), 0.5) assert_equal(nx.density(self.DG), 0.3) G = nx.Graph() G.add_node(1) assert_equal(nx.density(G), 0.0)
def __init__(self, graph, slow_stuff=False): graph.info() # paolo - 20070919 - computing also the strongly connected # components directly on the directed graph. Changing a # directed graph into an undirected usually destroys a lot of # its structure and meaning. Let see. while in the published # API there is a method # strongly_connected_component_subgraphs(graph), I don't have it # on my machine (probably I have an older networkx version), # so for now I commented the following code. the method # strongly_connected_component_subgraphs(graph) was added on # 07/21/07. See https://networkx.lanl.gov/changeset/640 . On # my machine I have "python-networkx/feisty uptodate 0.32-2" # while on networkx svn there is already version 0.35.1 if False: self.strongconcom_subgraphs = component.strongly_connected_component_subgraphs( graph) strongconcom_subgraph_size = map(len, self.strongconcom_subgraphs) print "size of largest strongly connected components:", print ", ".join(map(str, strongconcom_subgraph_size[:10])), "..." print "%nodes in largest strongly connected component:", print 1.0 * strongconcom_subgraph_size[0] / len(graph) undir_graph = graph.to_undirected() self.concom_subgraphs = component.connected_component_subgraphs( undir_graph) concom_subgraph_size = map(len, self.concom_subgraphs) print "size of largest connected components:", print ", ".join(map(str, concom_subgraph_size[:10])), "..." print "%nodes in largest connected component:", print 1.0 * concom_subgraph_size[0] / len(graph) #only work on connected graphs, maybe we could run it on the #largest strongly connected component. #print "diameter:", distance.diameter(G) #print "radius:", distance.radius(graph) print "density:", networkx.density(graph) print "degree histogram:", networkx.degree_histogram(graph)[:15] print "average_clustering:", cluster.average_clustering(graph) print "transitivity:", cluster.transitivity(graph) if slow_stuff: #not yet in my networkx revision -- try try except print "number_of_cliques", cliques.number_of_cliques(graph) """this returns a dict with the betweenness centrality of every node, maybe we want to compute the average betweenness centrality but before it is important to understand which measures usually are usually reported in papers as peculiar for capturing the characteristics and structure of a directed graph.""" print "betweenness_centrality:", print centrality.betweenness_centrality(graph)
def Based(f, sep): ls = f.readlines() print(len(ls)) t = [] for i in range(len(ls)): t.append(list(range(2))) k = 0 for i in ls: l = i.split(sep) t[k][0] = int(l[0]) t[k][1] = int(l[1]) k = k + 1 G = nx.Graph() i = 0 #while i<n: #G.add_node(i) #i=i+1 i = 0 while i < len(t): G.add_edge(t[i][0], t[i][1]) i = i + 1 print(len(G.nodes())) print(len(G.edges())) ns = G.number_of_nodes() N = G.number_of_edges() den = nx.density(G) print(den) if den < 0.001: se = 0.25 else: se = 0.5 i = 0 w1 = [] tps1 = time.time() T1 = G.nodes() while i < ns: cpt1 = 0 xx = G.neighbors(T1[i]) a = len(xx) j = 0 while j < a - 1: j1 = j + 1 while j1 < a: if G.has_edge(xx[j], xx[j1]): cpt1 = cpt1 + 1 j1 = j1 + 1 j = j + 1 w1.append(cpt1) i = i + 1 print('b') T = G.edges() w = [] w2 = [] cp = [] #TT=T i = 0 wp = [] tt = [] ab = [] pp = [] w5 = [] while i < N: a = G.degree(T[i][0]) b = G.degree(T[i][1]) cpt = len(sorted(nx.common_neighbors(G, T[i][0], T[i][1]))) if cpt == 0: #w4=w1[T1.index(T[i][0])]+w1[T1.index(T[i][1])] ab.append([0, T[i][0], T[i][1]]) elif (cpt / (a + b - cpt)) < se: #pp.append([T[i][0], T[i][1]]) #wp.append(cpt) w4 = w1[T1.index(T[i][0])] + w1[T1.index(T[i][1])] w5.append([(w4 / (cpt * (a + b))), T[i][0], T[i][1]]) i = i + 1 print('a') tps2 = time.time() l = w5 print('le temps', tps2 - tps1) #print('a') k = len(l) print(k) print('v') l.sort(reverse=True) #ab.sort(reverse=True) l = ab + l tps1 = time.time() print('v') #G11=nx.Graph() G11 = G.copy() i = 0 print('problèèèèèèèèèème') G1 = G m1 = 0 sup = [] aj = [] while i < len(l): e1 = l[i][1] e2 = l[i][2] G1.remove_edge(e1, e2) y = G.degree(e1) y1 = G.degree(e2) if y < 1 or y1 < 1: #or m<m1 : #if len(la)<4: G1.add_edge(e1, e2) else: aj = [e1, e2] sup.append(aj) #m1=m i = i + 1 gr = list(nx.connected_component_subgraphs(G1)) k = 0 m = 0 g = [] #g=[i.nodes() for i in gr] for i in gr: g.append(i.nodes()) i1 = 0 while i1 < len(g): ii = g[i1] if len(ii) < 4: #print(len(g)) i = len(sup) - 1 while i >= 0: b = 0 if sup[i][0] in ii: if not (sup[i][1] in ii): b = 1 break else: sup.pop(i) i - 1 if sup[i][1] in ii: if not (sup[i][0] in ii): b = 2 break else: sup.pop(i) i = i - 1 i = i - 1 if b == 1: for kk in g: if sup[i][1] in kk: r = list(set(ii) | set(kk)) ind1 = g.index(kk) ind2 = i1 g[ind1] = r g.remove(g[ind2]) break elif b == 2: for kk in g: if sup[i][0] in kk: r = list(set(ii) | set(kk)) ind1 = g.index(kk) ind2 = i1 g[ind1] = r g.remove(g[ind2]) break else: i1 = i1 + 1 else: i1 = i1 + 1 m = 0 k = 1 mm = [] mm1 = [] #m2=[] m3 = [] for i in g: #print(i) temp = modu1(G11, i, N) #mm.append(temp[0]) mm1.append(temp[1]) #m2.append(temp[2]) m3.append(temp[3]) #m=m+temp[0] #print(m) i1 = 0 while i1 < len(g): ii = g[i1] #print(len(g)) i = len(sup) - 1 while i >= 0: b = 0 if sup[i][0] in ii: if not (sup[i][1] in ii): b = 1 break else: sup.pop(i) i = i - 1 if sup[i][1] in ii: if not (sup[i][0] in ii): b = 2 break else: sup.pop(i) i = i - 1 i = i - 1 #print('a') if b == 1: for kk in g: if sup[i][1] in kk: r = list(set(ii) | set(kk)) ind1 = g.index(kk) ind2 = i1 #aa=mm[ind1] #bb=mm[ind2] rr1 = modu11(G11, kk, ii) rr = (rr1 / N) - (2 * m3[ind1] * m3[ind2]) #mo=modu1(G11,r,tt,N) if rr > 0: #mo=mm[ind1]+mm[ind2]+rr g[ind1] = r g.remove(g[ind2]) #mm[ind1]=[0,(rr1+aa[1]+bb[1]),0,] mm1[ind1] = rr1 + mm1[ind1] + mm1[ind2] m3[ind1] = m3[ind1] + m3[ind2] #mm.pop(ind2) mm1.pop(ind2) m3.pop(ind2) else: i1 = i1 + 1 break elif b == 2: for kk in g: if sup[i][0] in kk: r = list(set(ii) | set(kk)) ind1 = g.index(kk) ind2 = i1 #aa=mm[ind1] #bb=mm[ind2] rr1 = modu11(G11, kk, ii) rr = (rr1 / N) - (2 * m3[ind1] * m3[ind2]) #mo=modu1(G11,r,tt,N) if rr > 0: #mo=mm[ind1]+mm[ind2]+rr g[ind1] = r g.remove(g[ind2]) # mm[ind1]=[0,(rr1+aa[1]+bb[1]),0,aa[3]+bb[3]] mm1[ind1] = rr1 + mm1[ind1] + mm1[ind2] m3[ind1] = m3[ind1] + m3[ind2] #mm.pop(ind2) mm1.pop(ind2) m3.pop(ind2) else: i1 = i1 + 1 break else: i1 = i1 + 1 tps2 = time.time() m = 0 k = 0 mm = [] l1 = [] #mm1=[] #m2=[] #m3=[] for i in range(ns): l1.append(list(range(1))) for i in g: #print(i) temp = modu1(G11, i, N) mm.append(temp) #mm1.append(temp[1]) #m2.append(temp[2]) #m3.append(temp[3]) for r in i: l1[T1.index(r)] = k k = k + 1 m = m + temp[0] print(m) #print(l1) print('le temps', tps2 - tps1)
def local_thresholding_dens(conn_matrix, thr): from pynets import netstats, thresholding ''' Threshold the adjacency matrix by building from the minimum spanning tree (MST) and adding successive N-nearest neighbour degree graphs to achieve target density. ''' fail_tol = 10 conn_matrix = np.nan_to_num(conn_matrix) G = nx.from_numpy_matrix(conn_matrix) if not nx.is_connected(G): [G, _] = netstats.prune_disconnected(G) maximum_edges = G.number_of_edges() G = thresholding.weight_to_distance(G) min_t = nx.minimum_spanning_tree(G, weight="distance") mst_density = nx.density(min_t) G_density = nx.density(G) if mst_density > G_density: print("%s%s%s" % ( 'Warning: The minimum spanning tree already has: ', thr, ' density. Local Threshold will be applied by just retaining the Minimum Spanning Tree' )) conn_matrix_thr = nx.to_numpy_array(G) return conn_matrix_thr k = 1 dense_list = [] while mst_density < float(thr) and (len(dense_list[-fail_tol:]) - len( set(dense_list[-fail_tol:]))) < (fail_tol - 1): print(k) print(mst_density) dense_list.append(mst_density) # Create nearest neighbour graph nng = thresholding.knn(conn_matrix, k) number_before = nng.number_of_edges() # Remove edges from the NNG that exist already in the new graph/MST nng.remove_edges_from(min_t.edges()) if nng.number_of_edges() == 0 and number_before >= maximum_edges: break # Add weights to NNG for e in nng.edges(): nng.edges[e[0], e[1]]['weight'] = float(conn_matrix[e[0], e[1]]) # Obtain list of edges from the NNG in order of weight edge_list = sorted(nng.edges(data=True), key=lambda t: t[2]['weight'], reverse=True) # Add edges in order of connectivity strength for edge in edge_list: min_t.add_edges_from([edge]) mst_density = thresholding.est_density((nx.to_numpy_array(min_t))) #print("%s%s" % ('Adding edge to mst: ', edge)) if mst_density >= G_density or mst_density >= float(thr): #print(mst_density) break print('\n') if (len(dense_list[-fail_tol:]) - len(set(dense_list[-fail_tol:]))) >= (fail_tol - 1): print("%s%s%s" % ('Cannot apply local thresholding to achieve density of: ', thr, '. Using maximally saturated connected matrix instead...')) k += 1 conn_matrix_thr = nx.to_numpy_array(min_t, nodelist=sorted(min_t.nodes()), dtype=np.float64) if len(min_t.nodes()) < conn_matrix.shape[0]: raise RuntimeWarning( "%s%s%s" % ('Cannot apply local thresholding to achieve density of: ', thr, '. Try a higher -thr or -min_thr')) return conn_matrix_thr
if k > biggest: biggest = k S = target.subgraph(CC) n = len(S) if n > minimumOrder: print(label, l, 'cc{:d}N {:d}'.format(ccID, n)) print(label, l, 'cc{:d}M {:d}'.format(ccID, S.number_of_edges())) print(label, l, 'cc{:d}Rad {:d}'.format(ccID, nx.radius(S))) print(label, l, 'cc{:d}Diam {:d}'.format(ccID, nx.diameter(S))) print( label, l, 'cc{:d}Cent {:f}'.format( ccID, 100 * len(nx.center(S)) / n)) print( label, l, 'cc{:d}Periph {:f}'.format( ccID, 100 * len(nx.periphery(S)) / n)) print(label, l, 'cc{:d}Dens {:f}'.format(ccID, nx.density(S))) v = list(nx.eccentricity(S).values()) for (prefix, selector) in [('max', max), ('min', min), ('avg', np.mean), ('med', np.median)]: print(label, l, f'cc{ccID}{prefix}Ecc', selector(v)) ccID += 1 print(label, l, 'ccCount', ccID) print(label, l, 'ccMin', smallest) print(label, l, 'ccMax', biggest)
def search( subgraphs: list, graph: nx.Graph, min_size: int, max_size: int, max_count: int = 10, node_select: Union[str, np.ndarray, list] = "uniform", ) -> dict: """Search for dense subgraphs within an input size range. For each subgraph from ``subgraphs``, this function resizes using :func:`resize` to the input range specified by ``min_size`` and ``max_size``, resulting in a range of differently sized subgraphs. This function loops over all elements of ``subgraphs`` and keeps track of the ``max_count`` number of densest subgraphs identified for each size. In both growth and shrink phases of :func:`resize`, there may be multiple candidate nodes with equal degree to add to or remove from the subgraph. The method of selecting the node is specified by the ``node_select`` argument, which can be either: - ``"uniform"`` (default): choose a node from the candidates uniformly at random; - A list or array: specifying the node weights of the graph, resulting in choosing the node from the candidates with the highest weight (when growing) and lowest weight (when shrinking), settling remaining ties by uniform random choice. **Example usage:** >>> s = data.Planted() >>> g = nx.Graph(s.adj) >>> s = sample.postselect(s, 16, 30) >>> s = sample.to_subgraphs(s, g) >>> search(s, g, 8, 9, max_count=3) {9: [(0.9722222222222222, [21, 22, 23, 24, 25, 26, 27, 28, 29]), (0.9722222222222222, [20, 21, 22, 24, 25, 26, 27, 28, 29]), (0.9444444444444444, [20, 21, 22, 23, 24, 25, 26, 27, 29])], 8: [(1.0, [21, 22, 24, 25, 26, 27, 28, 29]), (1.0, [21, 22, 23, 24, 25, 26, 27, 28]), (1.0, [20, 21, 22, 24, 25, 26, 27, 29])]} Args: subgraphs (list[list[int]]): a list of subgraphs specified by their nodes graph (nx.Graph): the input graph min_size (int): minimum size to search for dense subgraphs max_size (int): maximum size to search for dense subgraphs max_count (int): maximum number of densest subgraphs to keep track of for each size node_select (str, list or array): method of settling ties when more than one node of equal degree can be added/removed. Can be ``"uniform"`` (default), or a NumPy array or list containing node weights. Returns: dict[int, list[tuple[float, list[int]]]]: a dictionary of different sizes, each containing a list of densest subgraphs reported as a tuple of subgraph density and subgraph nodes, sorted in non-increasing order of density """ dense = {} for s in subgraphs: r = resize(s, graph, min_size, max_size, node_select) for size, subgraph in r.items(): r[size] = (nx.density(graph.subgraph(subgraph)), subgraph) _update_dict(dense, r, max_count) return dense
def graphDensity(G): return nx.density(G)
edges2.append((new_edge_1, new_edge_2)) # create new graph G2 = nx.Graph() G2.add_nodes_from(nodes) G2.add_edges_from(edges2) fig.add_subplot(313) plt.title('P = 80%') nx.draw(G2) plt.show() ################# # stats print('Info:') print('-------------------------') print("Small World") print(nx.info(G)) print('Density %.2lf' % nx.density(G)) print(' ') print("Small World 10% change") print(nx.info(G1)) print('Density %.2lf' % nx.density(G1)) print(' ') print("Small World 80% change") print(nx.info(G2)) print('Density %.2lf' % nx.density(G2))
edge_files = [ '/Volumes/Isabella/papers-metadata/fields/%s-edges-%d.json' % (field, year) for year in years ] metrics = open('%s-metrics.csv' % field, 'w+') for f in range(len(node_files)): nodes = json.loads(open(node_files[f]).read()) for i in nodes: g.add_node(i['id'], year=i['year'], keys=i['keywords'], fos=i['fos']) pre_edges = nx.number_of_nodes(g) # Add edges edges = [] with open(edge_files[f]) as k: for line in k: edges.append(ast.literal_eval(line)) g.add_edges_from(edges) mets = [ pre_edges, nx.number_of_nodes(g), nx.number_of_edges(g), nx.density(g) ] print(mets, file=metrics)
def run_GT_calcs(G, just_data, Do_kdist, Do_dia, Do_BCdist, Do_CCdist, Do_ECdist, Do_GD, Do_Eff, \ Do_clust, Do_ANC, Do_Ast, Do_WI, multigraph): # getting nodes and edges and defining variables for later use klist = [0] Tlist = [0] BCdist = [0] CCdist = [0] ECdist = [0] if multigraph: Do_BCdist = 0 Do_ECdist = 0 Do_clust = 0 data_dict = {"x": [], "y": []} nnum = int(nx.number_of_nodes(G)) enum = int(nx.number_of_edges(G)) if Do_ANC | Do_dia: connected_graph = nx.is_connected(G) # making a dictionary for the parameters and results just_data.append(nnum) data_dict["x"].append("Number of nodes") data_dict["y"].append(nnum) just_data.append(enum) data_dict["x"].append("Number of edges") data_dict["y"].append(enum) multi_image_settings.progress(35) # calculating parameters as requested # creating degree histogram if (Do_kdist == 1): klist1 = nx.degree(G) ksum = 0 klist = np.zeros(len(klist1)) for j in range(len(klist1)): ksum = ksum + klist1[j] klist[j] = klist1[j] k = ksum / len(klist1) k = round(k, 5) just_data.append(k) data_dict["x"].append("Average degree") data_dict["y"].append(k) multi_image_settings.progress(40) # calculating network diameter if (Do_dia == 1): if connected_graph: dia = int(diameter(G)) else: dia = 'NaN' just_data.append(dia) data_dict["x"].append("Network Diameter") data_dict["y"].append(dia) multi_image_settings.progress(45) # calculating graph density if (Do_GD == 1): GD = nx.density(G) GD = round(GD, 5) just_data.append(GD) data_dict["x"].append("Graph density") data_dict["y"].append(GD) multi_image_settings.progress(50) # calculating global efficiency if (Do_Eff == 1): Eff = global_efficiency(G) Eff = round(Eff, 5) just_data.append(Eff) data_dict["x"].append("Global Efficiency") data_dict["y"].append(Eff) multi_image_settings.progress(55) if (Do_WI == 1): WI = wiener_index(G) WI = round(WI, 1) just_data.append(WI) data_dict["x"].append("Wiener Index") data_dict["y"].append(WI) multi_image_settings.progress(60) # calculating clustering coefficients if (Do_clust == 1): Tlist1 = clustering(G) Tlist = np.zeros(len(Tlist1)) for j in range(len(Tlist1)): Tlist[j] = Tlist1[j] clust = average_clustering(G) clust = round(clust, 5) just_data.append(clust) data_dict["x"].append("Average clustering coefficient") data_dict["y"].append(clust) # calculating average nodal connectivity if (Do_ANC == 1): if connected_graph: ANC = average_node_connectivity(G) ANC = round(ANC, 5) else: ANC = 'NaN' just_data.append(ANC) data_dict["x"].append("Average nodal connectivity") data_dict["y"].append(ANC) multi_image_settings.progress(65) # calculating assortativity coefficient if (Do_Ast == 1): Ast = degree_assortativity_coefficient(G) Ast = round(Ast, 5) just_data.append(Ast) data_dict["x"].append("Assortativity Coefficient") data_dict["y"].append(Ast) multi_image_settings.progress(70) # calculating betweenness centrality histogram if (Do_BCdist == 1): BCdist1 = betweenness_centrality(G) Bsum = 0 BCdist = np.zeros(len(BCdist1)) for j in range(len(BCdist1)): Bsum += BCdist1[j] BCdist[j] = BCdist1[j] Bcent = Bsum / len(BCdist1) Bcent = round(Bcent, 5) just_data.append(Bcent) data_dict["x"].append("Average betweenness centrality") data_dict["y"].append(Bcent) multi_image_settings.progress(75) # calculating closeness centrality if (Do_CCdist == 1): CCdist1 = closeness_centrality(G) Csum = 0 CCdist = np.zeros(len(CCdist1)) for j in range(len(CCdist1)): Csum += CCdist1[j] CCdist[j] = CCdist1[j] Ccent = Csum / len(CCdist1) Ccent = round(Ccent, 5) just_data.append(Ccent) data_dict["x"].append("Average closeness centrality") data_dict["y"].append(Ccent) multi_image_settings.progress(80) # calculating eigenvector centrality if (Do_ECdist == 1): try: ECdist1 = eigenvector_centrality(G, max_iter=100) except: ECdist1 = eigenvector_centrality(G, max_iter=10000) Esum = 0 ECdist = np.zeros(len(ECdist1)) for j in range(len(ECdist1)): Esum += ECdist1[j] ECdist[j] = ECdist1[j] Ecent = Esum / len(ECdist1) Ecent = round(Ccent, 5) just_data.append(Ecent) data_dict["x"].append("Average eigenvector centrality") data_dict["y"].append(Ecent) data = pd.DataFrame(data_dict) return data, just_data, klist, Tlist, BCdist, CCdist, ECdist
import networkx as nx import matplotlib.pyplot as plt import collections from numpy import mean # --------------------------------------------------------- # -----------------------Select the graph # --------------------------------------------------------- G_fb = nx.read_edgelist("facebook_combined.txt", create_using=nx.Graph(), nodetype=int) # G_fb = nx.petersen_graph() # Synthetic graphs print(nx.info(G_fb)) print('Density: %f' % nx.density(G_fb)) print('Number of connected components: %f' % nx.number_connected_components(G_fb)) # print(nx.connected_components(G_fb)) A = sorted(nx.connected_components(G_fb), key=len, reverse=True) print(A) # print('Strongly connected?: %s' % nx.is_strongly_connected(G_fb)) """ print('is_directed: %s' % nx.is_directed(G_fb)) print('average_clustering: %f' % nx.average_clustering(G_fb)) print('average_degree_connectivity: %s' % nx.average_degree_connectivity(G_fb)) # required time print('Diameter (it is the maximum eccentricity): %d' % nx.diameter(G_fb)) # required time print('average_clustering: %f' % nx.average_clustering(G_fb)) print('assortativity: %f' % nx.degree_assortativity_coefficient(G_fb)) # ------------------------------------------------------------------------------
def get_stats(G, output_path=None, all_stats=False): r""" Prints or stores some basic statistics about the graph commonly used in network embedding literature. If an output file path is provided the results are written in that file. Parameters ---------- G : graph A NetworkX graph output_path : file or string File or filename to write. all_stats : bool Sets if all stats or a small subset of them should be shown. """ # Compute the number of nodes and edges of the graph N = len(G.nodes) M = len(G.edges) # Compute average degree and deg1 and deg2 num nodes degs = np.array(G.degree)[:, 1] avgdeg = sum(degs) / N counts = collections.Counter(degs) degdict = collections.OrderedDict(sorted(counts.items())) deg1 = degdict.get(1, 0) deg2 = degdict.get(2, 0) if all_stats: x = np.log(np.array(degdict.keys())) # degrees y = np.log(np.array(degdict.values())) # frequencies # the power-law coef. is the slope of a linear moder fitted to the loglog data which has closed-form solution plawcoef = np.abs(np.cov(x, y) / np.var(x))[0, 1] cc = nx.average_clustering(G) dens = nx.density(G) if G.is_directed(): diam = nx.diameter(G) if nx.is_strongly_connected(G) else float( 'inf') else: diam = nx.diameter(G) # Print or write to file the graph info if output_path is None: # Print some basic info about the graph if G.is_directed(): num_ccs = nx.number_weakly_connected_components(G) Gcc = max(nx.weakly_connected_component_subgraphs(G), key=len) Ncc = len(Gcc.nodes) Mcc = len(Gcc.edges) print("Directed and unweighted graph") print("Num. nodes: {}".format(N)) print("Num. edges: {}".format(M)) print("Num. weakly connected components: {}".format(num_ccs)) print("Num. nodes in largest weakly CC: {} ({} % of total)".format( Ncc, Ncc * 100.0 / N)) print("Num. edges in largest weakly CC: {} ({} % of total)".format( Mcc, Mcc * 100.0 / M)) else: num_ccs = nx.number_connected_components(G) Gcc = max(nx.connected_component_subgraphs(G), key=len) Ncc = len(Gcc.nodes) Mcc = len(Gcc.edges) print("Undirected and unweighted graph") print("Num. nodes: {}".format(N)) print("Num. edges: {}".format(M)) print("Num. connected components: {}".format(num_ccs)) print("Num. nodes in largest weakly CC: {} ({} % of total)".format( Ncc, Ncc * 100.0 / N)) print("Num. edges in largest weakly CC: {} ({} % of total)".format( Mcc, Mcc * 100.0 / M)) if all_stats: print("Clustering coefficient: {}".format(cc)) print("Diameter: {}".format(diam)) print("Density: {}".format(dens)) print("Power-law coefficient: {}".format(plawcoef)) print("Avg. node degree: {}".format(avgdeg)) print("Num. degree 1 nodes: {}".format(deg1)) print("Num. degree 2 nodes: {}".format(deg2)) print("Num. self loops: {}".format(G.number_of_selfloops())) print("") else: # Write the info to the provided file f = open(output_path, 'w+b') if G.is_directed(): num_ccs = nx.number_weakly_connected_components(G) Gcc = max(nx.weakly_connected_component_subgraphs(G), key=len) Ncc = len(Gcc.nodes) Mcc = len(Gcc.edges) f.write("# Directed and unweighted graph".encode()) f.write("\n# Num. nodes: {}".format(N).encode()) f.write("\n# Num. edges: {}".format(M).encode()) f.write("\n# Num. weakly connected components: {}".format( num_ccs).encode()) f.write("\n# Num. nodes in largest weakly CC: {} ({} % of total)". format(Ncc, Ncc * 100.0 / N).encode()) f.write("\n# Num. edges in largest weakly CC: {} ({} % of total)". format(Mcc, Mcc * 100.0 / M).encode()) else: num_ccs = nx.number_connected_components(G) Gcc = max(nx.connected_component_subgraphs(G), key=len) Ncc = len(Gcc.nodes) Mcc = len(Gcc.edges) f.write("# Undirected and unweighted graph".encode()) f.write("\n# Num. nodes: {}".format(N).encode()) f.write("\n# Num. edges: {}".format(M).encode()) f.write( "\n# Num. connected components: {}".format(num_ccs).encode()) f.write("\n# Num. nodes in largest CC: {} ({} % of total)".format( Ncc, Ncc * 100.0 / N).encode()) f.write("\n# Num. edges in largest CC: {} ({} % of total)".format( Mcc, Mcc * 100.0 / M).encode()) if all_stats: f.write("\n# Clustering coefficient: {}".format(cc).encode()) f.write("\n# Diameter: {}".format(diam).encode()) f.write("\n# Density: {}".format(dens).encode()) f.write("\n# Power-law coefficient: {}".format(plawcoef).encode()) f.write("\n# Avg. node degree: {}".format(avgdeg).encode()) f.write("\n# Num. degree 1 nodes: {}".format(deg1).encode()) f.write("\n# Num. degree 2 nodes: {}".format(deg2).encode()) f.write("\n# Num. self loops: {}".format( G.number_of_selfloops()).encode()) f.write("\n".encode()) f.close()
def newsEventAnnotate(annotationName, storiesGraph, eventThresholds): print('\nnewsEventAnnotate():') if ('links' not in storiesGraph or 'nodes' not in storiesGraph): return storiesGraph #reset state - start for i in range(0, len(storiesGraph['nodes'])): if ('node-details' not in storiesGraph['nodes'][i]): storiesGraph['nodes'][i]['node-details'] = {} if ('annotation' not in storiesGraph['nodes'][i]['node-details']): storiesGraph['nodes'][i]['node-details'][ 'annotation'] = annotationName storiesGraph['nodes'][i]['node-details']['connected-comp-type'] = '' #reset state - end if (len(storiesGraph['links']) == 0): return storiesGraph annotationName = storiesGraph['nodes'][0]['node-details']['annotation'] G = nx.Graph() for edge in storiesGraph['links']: G.add_edge(edge['source'], edge['target']) storiesGraph['connected-comps'] = [] subgraphs = list(nx.connected_component_subgraphs(G)) for subgraph in subgraphs: e = sum(subgraph.degree().values()) v = subgraph.number_of_nodes() avgDegree = e / float(v) nodes = subgraph.nodes() uniqueSourceCountDict = {} for storyIndex in nodes: source = storiesGraph['nodes'][storyIndex]['id'].split('-')[0] uniqueSourceCountDict[source] = True connectedCompType = {} connectedCompType['annotation'] = annotationName if (avgDegree >= eventThresholds['min-avg-degree'] and len(uniqueSourceCountDict) >= eventThresholds['min-unique-source-count']): connectedCompType['connected-comp-type'] = 'event' connectedCompType['color'] = 'green' else: connectedCompType['connected-comp-type'] = 'cluster' connectedCompType['color'] = 'red' connectedCompsDetails = {} connectedCompsDetails['nodes'] = nodes connectedCompsDetails['node-details'] = connectedCompType connectedCompsDetails['avg-degree'] = avgDegree connectedCompsDetails['density'] = nx.density(subgraph) connectedCompsDetails['unique-source-count'] = len( uniqueSourceCountDict) for storyIndex in nodes: if ('color' not in storiesGraph['nodes'][storyIndex]['node-details']): storiesGraph['nodes'][storyIndex]['node-details'][ 'color'] = connectedCompType['color'] storiesGraph['nodes'][storyIndex]['node-details'][ 'connected-comp-type'] = connectedCompType[ 'connected-comp-type'] storiesGraph['connected-comps'].append(connectedCompsDetails) return storiesGraph
print(len(node_names)) print(len(edge_names)) G = nx.Graph() #Initialize Graph G.add_nodes_from(node_names) #Add nodes to the Graph G.add_edges_from(edge_names) #Add edges to the Graph print(nx.info(G)) #create dictionary for attributes name_dict = {} lat_dict = {} lon_dict = {} for node in nodes: name_dict[node[0]] = node[1] lat_dict[node[0]] = node[2] lon_dict[node[0]] = node[3] nx.set_node_attributes(G, name_dict, 'Stop') #Add dictionary as node attribute nx.set_node_attributes(G, lat_dict, 'lat') nx.set_node_attributes(G, lon_dict, 'lon') for n in G.nodes(): print(n, G.nodes[n]["Stop"]) density = nx.density(G) print("Network Density:", density) nx.draw(G) plt.show
def generate_erdos_renyi_from_current_graph(G): return nx.erdos_renyi_graph(len(G.nodes()), nx.density(G))
edgereader = csv.reader(edgecsv) edges = [tuple(e) for e in edgereader][1:] # Print the number of nodes and edges in our two lists #print(len(node_names)) print(len(edges)) ''' # Initialize a undirected Graph object or use --> net.DiGraph for directed graph #G.add_nodes_from(node_names) # Add nodes to the Graph #G.add_edges_from(edges) # Add edges to the Graph print(nx.info(G)) # Print information about the Graph #print G.edges() print "The diameter is: ", nx.diameter(G, e=None) print "The density is: ", nx.density(G) print "the average shortest path:", nx.average_shortest_path_length(G) #--------------------The Longest Shortest Path -------------------------------- #Procedure to find the longest shortest path in any given topology Nodes = G.nodes() G2 = nx.convert_node_labels_to_integers(G, first_label=1, ordering='default', label_attribute=None) ns = nx.number_of_nodes(G2) es = nx.number_of_edges(G2) length = nx.all_pairs_shortest_path_length(G2) x = ns # nodes in topology #x= len(G.nodes()) y = x
def density_thresholding(conn_matrix, thr, max_iters=10000, interval=0.01): """ Iteratively apply an absolute threshold to achieve a target density. Parameters ---------- conn_matrix : np.ndarray Weighted connectivity matrix thr : float Density value between 0-1. max_iters : int Maximum number of iterations for performing absolute thresholding. Default is 1000. interval : float Interval for increasing the absolute threshold for each iteration. Default is 0.01. Returns ------- conn_matrix : np.ndarray Thresholded connectivity matrix References ---------- .. [1] van Wijk, B. C. M., Stam, C. J., & Daffertshofer, A. (2010). Comparing brain networks of different size and connectivity density using graph theory. PLoS ONE. https://doi.org/10.1371/journal.pone.0013701 .. [2] Complex network measures of brain connectivity: Uses and interpretations. Rubinov M, Sporns O (2010) NeuroImage 52:1059-69. """ from pynets.core import thresholding np.fill_diagonal(conn_matrix, 0) work_thr = 0 i = 1 density = nx.density(nx.from_numpy_matrix(conn_matrix)) if float(thr) < float(density): while float(i) < max_iters and float(work_thr) < float(1): work_thr = float(work_thr) + float(interval) density = nx.density( nx.from_numpy_matrix( thresholding.threshold_absolute(conn_matrix, work_thr))) print("%s%d%s%.2f%s%.2f%s" % ( "Iteration ", i, " -- with Thresh: ", float(work_thr), " and Density: ", float(density), "...", )) if float(thr) >= float(density): conn_matrix = thresholding.threshold_absolute( conn_matrix, work_thr) break i = i + 1 else: print("Density of raw matrix is already greater than or equal to the " "target density requested") return conn_matrix
def centralityAnalysis(repo: git.Repo, commits: List[git.Commit], outputDir: str): allRelatedAuthors = {} authorCommits = Counter({}) # for all commits... print("Analyzing centrality") for commit in Bar('Processing').iter(commits): author = commit.author.email # increase author commit count authorCommits.update({author: 1}) # initialize dates for related author analysis commitDate = datetime.fromtimestamp(commit.committed_date) earliestDate = commitDate + relativedelta(months=-1) latestDate = commitDate + relativedelta(months=+1) # find authors related to this commit # commitRelatedCommits = commit.iter_items( # repo, 'master', # after=earliestDate.strftime('%Y-%m-%d'), # before=latestDate.strftime('%Y-%m-%d')) commitRelatedCommits = filter( lambda c: findRelatedCommits(author, earliestDate, latestDate, c), commits) commitRelatedAuthors = set( list(map(lambda c: c.author.email, commitRelatedCommits))) # get current related authors collection and update it authorRelatedAuthors = allRelatedAuthors.setdefault(author, set()) authorRelatedAuthors.update(commitRelatedAuthors) # prepare graph print("Preparing NX graph") G = nx.Graph() for author in allRelatedAuthors: for relatedAuthor in allRelatedAuthors[author]: G.add_edge(author.strip(), relatedAuthor.strip()) # analyze graph closeness = dict(nx.closeness_centrality(G)) betweenness = dict(nx.betweenness_centrality(G)) centrality = dict(nx.degree_centrality(G)) density = nx.density(G) modularity = list(greedy_modularity_communities(G)) print("Outputting CSVs") # output non-tabular results with open(os.path.join(outputDir, 'project.csv'), 'a', newline='') as f: w = csv.writer(f, delimiter=',') w.writerow(['Density', density]) w.writerow(['Community Count', len(modularity)]) # output community information with open(os.path.join(outputDir, 'community.csv'), 'a', newline='') as f: w = csv.writer(f, delimiter=',') w.writerow(['Community Index', 'Author Count', 'Commit Count']) for idx, community in enumerate(modularity): communityCommitCount = sum(authorCommits[author] for author in community) w.writerow([idx + 1, len(modularity[idx]), communityCommitCount]) # combine centrality results combined = {} for key in closeness: single = { 'Author': key, 'Closeness': closeness[key], 'Betweenness': betweenness[key], 'Centrality': centrality[key] } combined[key] = single # output tabular results with open(os.path.join(outputDir, 'centrality.csv'), 'w', newline='') as f: w = csv.DictWriter( f, ['Author', 'Closeness', 'Betweenness', 'Centrality']) w.writeheader() for key in combined: w.writerow(combined[key]) # output graph to PNG print("Outputting graph to PNG") graphFigure = plt.figure(5, figsize=(30, 30)) nx.draw(G, with_labels=True, node_color='orange', node_size=4000, edge_color='black', linewidths=2, font_size=20) graphFigure.savefig(os.path.join(outputDir, 'graph.png'))