def test_union_all_and_compose_all(): K3=nx.complete_graph(3) P3=nx.path_graph(3) G1=nx.DiGraph() G1.add_edge('A','B') G1.add_edge('A','C') G1.add_edge('A','D') G2=nx.DiGraph() G2.add_edge('1','2') G2.add_edge('1','3') G2.add_edge('1','4') G=nx.union_all([G1,G2]) H=nx.compose_all([G1,G2]) assert_edges_equal(G.edges(),H.edges()) assert_false(G.has_edge('A','1')) assert_raises(nx.NetworkXError, nx.union, K3, P3) H1=nx.union_all([H,G1],rename=('H','G1')) assert_equal(sorted(H1.nodes()), ['G1A', 'G1B', 'G1C', 'G1D', 'H1', 'H2', 'H3', 'H4', 'HA', 'HB', 'HC', 'HD']) H2=nx.union_all([H,G2],rename=("H","")) assert_equal(sorted(H2.nodes()), ['1', '2', '3', '4', 'H1', 'H2', 'H3', 'H4', 'HA', 'HB', 'HC', 'HD']) assert_false(H1.has_edge('NB','NA')) G=nx.compose_all([G,G]) assert_edges_equal(G.edges(),H.edges()) G2=nx.union_all([G2,G2],rename=('','copy')) assert_equal(sorted(G2.nodes()), ['1', '2', '3', '4', 'copy1', 'copy2', 'copy3', 'copy4']) assert_equal(G2.neighbors('copy4'),[]) assert_equal(sorted(G2.neighbors('copy1')),['copy2', 'copy3', 'copy4']) assert_equal(len(G),8) assert_equal(nx.number_of_edges(G),6) E=nx.disjoint_union_all([G,G]) assert_equal(len(E),16) assert_equal(nx.number_of_edges(E),12) E=nx.disjoint_union_all([G1,G2]) assert_equal(sorted(E.nodes()),[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]) G1=nx.DiGraph() G1.add_edge('A','B') G2=nx.DiGraph() G2.add_edge(1,2) G3=nx.DiGraph() G3.add_edge(11,22) G4=nx.union_all([G1,G2,G3],rename=("G1","G2","G3")) assert_equal(sorted(G4.nodes()), ['G1A', 'G1B', 'G21', 'G22', 'G311', 'G322'])
def compare_graphs(graph): n = nx.number_of_nodes(graph) m = nx.number_of_edges(graph) k = np.mean(list(nx.degree(graph).values())) erdos = nx.erdos_renyi_graph(n, p=m/float(n*(n-1)/2)) barabasi = nx.barabasi_albert_graph(n, m=int(k)-7) small_world = nx.watts_strogatz_graph(n, int(k), p=0.04) print(' ') print('Compare the number of edges') print(' ') print('My network: ' + str(nx.number_of_edges(graph))) print('Erdos: ' + str(nx.number_of_edges(erdos))) print('Barabasi: ' + str(nx.number_of_edges(barabasi))) print('SW: ' + str(nx.number_of_edges(small_world))) print(' ') print('Compare average clustering coefficients') print(' ') print('My network: ' + str(nx.average_clustering(graph))) print('Erdos: ' + str(nx.average_clustering(erdos))) print('Barabasi: ' + str(nx.average_clustering(barabasi))) print('SW: ' + str(nx.average_clustering(small_world))) print(' ') print('Compare average path length') print(' ') print('My network: ' + str(nx.average_shortest_path_length(graph))) print('Erdos: ' + str(nx.average_shortest_path_length(erdos))) print('Barabasi: ' + str(nx.average_shortest_path_length(barabasi))) print('SW: ' + str(nx.average_shortest_path_length(small_world))) print(' ') print('Compare graph diameter') print(' ') print('My network: ' + str(nx.diameter(graph))) print('Erdos: ' + str(nx.diameter(erdos))) print('Barabasi: ' + str(nx.diameter(barabasi))) print('SW: ' + str(nx.diameter(small_world)))
def algorithm(w1,w2,w3,w4,G1,G2,G3,G4): try: cc=np.array([nx.average_clustering(G1,weight='weight'),nx.average_clustering(G2,weight='weight'),nx.average_clustering(G3,weight='weight'),nx.average_clustering(G4,weight='weight')]) spl=np.array([nx.average_shortest_path_length(G1,weight='weight'),nx.average_shortest_path_length(G2,weight='weight'),nx.average_shortest_path_length(G3,weight='weight'),nx.average_shortest_path_length(G4,weight='weight')]) nds=np.array([nx.number_of_nodes(G1),nx.number_of_nodes(G2),nx.number_of_nodes(G3),nx.number_of_nodes(G4)]) edgs= np.array([nx.number_of_edges(G1),nx.number_of_edges(G2),nx.number_of_edges(G3),nx.number_of_edges(G4)]) if valid(cc): cc=stats.zscore(cc) else: cc=np.array([.1,.1,.1,.1]) cc= cc-min(cc)+.1 if valid(spl): spl=stats.zscore(spl) else: spl=np.array([.1,.1,.1,.1]) spl= spl-min(spl)+.1 if valid(nds): nds=stats.zscore(nds) else: nds=np.array([.1,.1,.1,.1]) nds = nds-min(nds)+.1 if valid(edgs): edgs=stats.zscore(edgs) else: edgs=np.array([.1,.1,.1,.1]) edgs=edgs-min(edgs)+.1 r1=(w1*cc[0]+w2*spl[0]+w3*nds[0]+w4*edgs[0])*1000 r2=(w1*cc[1]+w2*spl[1]+w3*nds[1]+w4*edgs[1])*1000 r3=(w1*cc[2]+w2*spl[2]+w3*nds[2]+w4*edgs[2])*1000 r4=(w1*cc[3]+w2*spl[3]+w3*nds[3]+w4*edgs[3])*1000 d={'Player 1:': r1, 'Player 2:': r2,'Player 3:': r3, 'Player 4:': r4} rank = sorted(d.items(), key=lambda x: x[1], reverse=True) return ["USAU RANKINGS",str(rank[0][0])+ " " + str(int(rank[0][1])),str(rank[1][0])+" "+ str(int(rank[1][1])),str(rank[2][0])+" "+ str(int(rank[2][1])),str(rank[3][0])+" "+str(int(rank[3][1]))] except: return ["Unable to compute rankings! Need data","Player 1","Player 2","Player 3","Player 4"]
def reduceGraph(read_g, write_g, minEdgeWeight, minNodeDegree, Lp, Sp): """ Simplify the undirected graph and then update the 3 undirected weight properties. :param read_g: is the graph pickle to read :param write_g: is the updated graph pickle to write :param minEdgeWeight: the original weight of each edge should be >= minEdgeWeight :param minNodeDegree: the degree of each node should be >= minNodeDegree. the degree here is G.degree(node), NOT G.degree(node,weight='weight) :return: None """ G=nx.read_gpickle(read_g) print 'number of original nodes: ', nx.number_of_nodes(G) print 'number of original edges: ', nx.number_of_edges(G) for (u,v,w) in G.edges(data='weight'): if w < minEdgeWeight: G.remove_edge(u,v) for n in G.nodes(): if G.degree(n)<minNodeDegree: G.remove_node(n) print 'number of new nodes: ', nx.number_of_nodes(G) print 'number of new edges: ', nx.number_of_edges(G) for (a, b, w) in G.edges_iter(data='weight'): unweight_allocation(G, a, b, w,Lp,Sp) print 'update weight ok' nx.write_gpickle(G, write_g) return
def run_main(): file = str(sys.argv[1]) f = open(file, 'r') print "\nReading inputfile:", file, "..." edgelist = [] for line in f.readlines(): edgelist.append((int(line.split()[0]), int(line.split()[1]))) Directed_G = nx.DiGraph(edgelist) Undirected_G = Directed_G.to_undirected() #plt.figure(figsize=(8,8)) #nx.draw(Directed_G,pos=nx.spring_layout(Directed_G)) #plt.draw() #time.sleep(0.1) # compute other things print "Number of nodes involved in network:", nx.number_of_nodes(Undirected_G) print "Number of edges:", nx.number_of_edges(Undirected_G) print "Average degree:", nx.number_of_edges(Undirected_G) / float(nx.number_of_nodes(Undirected_G)) t0 = time.clock() print "Average clustering coefficient:", compute_clustering_coefficient(Directed_G, Undirected_G) print "Took:", time.clock() - t0, "seconds" t1 = time.clock() print "Average path length:", average_shortest_path(Directed_G, Undirected_G) print "Took:", time.clock() - t1, "seconds" print "Total time:", time.clock() - t0, "seconds" report_final_stats() counter += 1 second_counter += 1
def mcmc_subgraph_sample (master, eg): #import pdb; pdb.set_trace() new_graph = eg.copy() new_num_edges = curr_num_edges = num_eg_edges = nx.number_of_edges (eg) iterations = 2 * nx.number_of_nodes (eg) # play around w/ this number step_size = 1 # play around w/ this number sample_set = set(master.nodes_iter()) - set(eg.nodes_iter()) coeff = 1 for i in range(iterations): new_nodes = random.sample(sample_set, step_size) old_nodes = random.sample(new_graph.nodes(), step_size) new_graph = replace_in_context(new_nodes, old_nodes, new_graph, master) new_num_edges = nx.number_of_edges(new_graph) new_stat = abs(new_num_edges - num_eg_edges) old_stat = abs(curr_num_edges - num_eg_edges) rval = random.random() if (new_stat <= old_stat) or (rval < math.exp(coeff * (old_stat - new_stat))): # use new graph curr_num_edges = new_num_edges sample_set = (sample_set | set(new_nodes)) - set(old_nodes) else: # swap back old graph new_graph = replace_in_context(old_nodes, new_nodes, new_graph, master) if new_stat > num_eg_edges: coeff *= 1 return new_graph.nodes_iter()
def validate_constituency_parse(tokenization): """ Args: tokenization (concrete.structure.ttypes.Tokenization) Returns: bool: True if tokenization's constituency parse is valid, False otherwise """ valid = True if tokenization.parse: total_constituents = len(tokenization.parse.constituentList) logging.debug(ilm(6, "tokenization '%s' has %d constituents" % (tokenization.uuid, total_constituents))) total_uuid_mismatches = 0 constituent_id_set = set() constituent_parse_tree = nx.DiGraph() for constituent in tokenization.parse.constituentList: # Add nodes to parse tree constituent_parse_tree.add_node(constituent.id) if constituent.id not in constituent_id_set: constituent_id_set.add(constituent.id) else: valid = False logging.error(ilm(7, "constituent ID %d has already been used in this sentence's tokenization" % constituent.id)) # Per the Concrete 'structure.thrift' file, tokenSequence may not be defined: # "Typically, this field will only be defined for leaf constituents (i.e., constituents with no children)." if constituent.tokenSequence and constituent.tokenSequence.tokenizationId != tokenization.uuid: total_uuid_mismatches += 1 if total_uuid_mismatches > 0: valid = False logging.error(ilm(6, "tokenization '%s' has UUID mismatch for %d/%d constituents" % (tokenization.uuid, total_uuid_mismatches, total_constituents))) # Add edges to constituent parse tree for constituent in tokenization.parse.constituentList: if constituent.childList: for child_id in constituent.childList: constituent_parse_tree.add_edge(constituent.id, child_id) # Check if constituent parse "tree" is actually a tree undirected_graph = constituent_parse_tree.to_undirected() if not nx.is_connected(undirected_graph): valid = False logging.error(ilm(6, "The constituent parse \"tree\" is not a fully connected graph - the graph has %d components" % len(nx.connected_components(undirected_graph)))) if nx.number_of_nodes(constituent_parse_tree) != nx.number_of_edges(constituent_parse_tree) + 1: valid = False logging.error(ilm(6, "The constituent parse \"tree\" is not a tree. |V| != |E|+1 (|V|=%d, |E|=%d)" % (nx.number_of_nodes(constituent_parse_tree), nx.number_of_edges(constituent_parse_tree)))) return valid
def eval_proximity_vertices(network,graph_xml) : '''returns the proximity of proportion of vertices between synthetic network(test) and real network (goal)''' number_of_nodes_test = float(nx.number_of_nodes(network)) if network.isDirected() : proportion_edges_test = nx.number_of_edges(network)/(number_of_nodes_test*(number_of_nodes_test-1)) else : proportion_edges_test = 2.*nx.number_of_edges(network)/(number_of_nodes_test*(number_of_nodes_test-1)) proportion_edges_goal = eval(graph_xml.find('vertices').get('value')) proximity = proximity_numbers(proportion_edges_goal,proportion_edges_test ) return proximity
def reformat2Igraph(self,graph): print nx.number_of_nodes(graph) print nx.number_of_edges(graph) G=Graph(0) for i in range(nx.number_of_nodes(graph)): G.add_vertices(1) for i in graph.edge: for j in graph.edge[i]: if i<=j: G.add_edges([(i,j)]) return G
def get_number_of_edges(filename): import networkx as nx threshold = 0 f = open(filename[:-4]+'_edges.dat','w') for i in range(0,101): threshold = float(i)/100 G = get_threshold_matrix(filename, threshold) print 'number of edges:', nx.number_of_edges(G) max_number_edges = nx.number_of_nodes(G) * (nx.number_of_nodes(G) - 1.) / 2 f.write("%f\t%d\t%f\n" % (threshold, nx.number_of_edges(G), nx.number_of_edges(G)/max_number_edges)) f.close()
def modularity(subgs, G): Q = 0 total_edges = float(nx.number_of_edges(G)) for g in subgs: nodes = g.node.keys() degree_sum = sum(nx.degree(G, nodes).values()) edges_num = nx.number_of_edges(g) Q += edges_num / total_edges - (degree_sum / (2 * total_edges))**2 return Q
def test_union_all_and_compose_all(): K3 = nx.complete_graph(3) P3 = nx.path_graph(3) G1 = nx.DiGraph() G1.add_edge("A", "B") G1.add_edge("A", "C") G1.add_edge("A", "D") G2 = nx.DiGraph() G2.add_edge("1", "2") G2.add_edge("1", "3") G2.add_edge("1", "4") G = nx.union_all([G1, G2]) H = nx.compose_all([G1, G2]) assert_edges_equal(G.edges(), H.edges()) assert_false(G.has_edge("A", "1")) assert_raises(nx.NetworkXError, nx.union, K3, P3) H1 = nx.union_all([H, G1], rename=("H", "G1")) assert_equal(sorted(H1.nodes()), ["G1A", "G1B", "G1C", "G1D", "H1", "H2", "H3", "H4", "HA", "HB", "HC", "HD"]) H2 = nx.union_all([H, G2], rename=("H", "")) assert_equal(sorted(H2.nodes()), ["1", "2", "3", "4", "H1", "H2", "H3", "H4", "HA", "HB", "HC", "HD"]) assert_false(H1.has_edge("NB", "NA")) G = nx.compose_all([G, G]) assert_edges_equal(G.edges(), H.edges()) G2 = nx.union_all([G2, G2], rename=("", "copy")) assert_equal(sorted(G2.nodes()), ["1", "2", "3", "4", "copy1", "copy2", "copy3", "copy4"]) assert_equal(G2.neighbors("copy4"), []) assert_equal(sorted(G2.neighbors("copy1")), ["copy2", "copy3", "copy4"]) assert_equal(len(G), 8) assert_equal(nx.number_of_edges(G), 6) E = nx.disjoint_union_all([G, G]) assert_equal(len(E), 16) assert_equal(nx.number_of_edges(E), 12) E = nx.disjoint_union_all([G1, G2]) assert_equal(sorted(E.nodes()), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]) G1 = nx.DiGraph() G1.add_edge("A", "B") G2 = nx.DiGraph() G2.add_edge(1, 2) G3 = nx.DiGraph() G3.add_edge(11, 22) G4 = nx.union_all([G1, G2, G3], rename=("G1", "G2", "G3")) assert_equal(sorted(G4.nodes()), ["G1A", "G1B", "G21", "G22", "G311", "G322"])
def test_equivalence_transform(self, ch2, ch3, methane): ch2_atoms = list(ch2.particles()) methane_atoms = list(methane.particles()) equivalence_transform(ch2, ch2_atoms[0], methane_atoms[0], add_bond=False) assert (ch2_atoms[0].pos == methane_atoms[0].pos).all() equivalence_transform(ch2, ch2['up'], ch3['up']) assert ch2.n_bonds == 2 assert nx.number_of_edges(ch2.root.bond_graph) == 3 assert nx.number_of_edges(ch3.root.bond_graph) == 4 ethyl = mb.Compound([ch2, ch3]) assert ethyl.n_bonds == 6
def main(DAG, cite, out): DAG = create(cite, DAG) print nx.number_of_edges(DAG) # largest_component = component(DAG) #uses component function to select the largest subgraph in the data # if not largest_component.is_directed(): # out.write('\n' + 'yup not directed') # out.write('\n' + 'There are %d nodes in the largest component' %nx.number_of_nodes(largest_component)) # largest_component_DAG = redirect(DAG,largest_component) # if not largest_component_DAG.is_directed(): # out.write('\n' + 'Not directed') # else: # out.write('\n' + 'Directed!') # out.write('\n' + 'There are now %d nodes in the largest component' %nx.number_of_nodes(largest_component_DAG)) #check if it's connected etc! # out.write(str(DAG.number_of_edges())) # if not nx.is_connected(largest_component_DAG.to_undirected()): # out.write('\n'+'The network is disconnected') # out.write('\n' + "There are %d connected components" %nx.number_connected_components(largest_component_DAG.to_undirected())) #out.write(str(nx.average_shortest_path_length(largest_component_DAG))) print age('0001001', '9401139') print age('0103030', '0204161') print simple_age('33', '100') print simple_age('45', '2') paths = find_all_paths(DAG,'8', '0') # print paths empty = [] listofpaths = make_list_of_paths(paths, empty) pathset = [] for path in listofpaths: if path not in pathset: pathset.append(path) for path in pathset: out.write('\n' + str(path)) out.write('\n' + 'The longest path is ' + str(max(pathset, key=len))) # setofpaths = set(listofpaths) #Uses method 2 of assigning a number to each node reflecting the path length between that node and the youngest node print 'Testing using numbering started' paths3 = distance_to_all_nodes(box, extremal_points[0] , n) #creates list containing lists of lists of lists...containing a list of the nodes in path empty3 = [] #creates the list which will contain all of the paths listofpaths3 = make_list_of_paths(paths3, empty3) #looks through 'paths' to extract proper paths from the various levels of sublists pathset3 = [] #creates the list which will contain all unique paths for path in listofpaths3: if path not in pathset3: pathset3.append(path) #only adds unique paths to pathset out.write('\n' + 'Path testing using number...%d unique paths found' %(len(pathset3))) for path in pathset3: out.write('\n' + str(path)) #prints all the unique paths # longestpath3 = max(pathset3, key=len) #identifies the longest path in the set of paths # out.write('\n' + 'The longest path from numbering is %s which is %d nodes long' %(str(longestpath3), len(longestpath3))) print 'Testing using numbering completed'
def compareNumberOfEdges(masterGraph,wordGraph,worksheet,row): numberOfEdgesMasterGraph = nx.number_of_edges(masterGraph) numberOfEdgesWordGraph = nx.number_of_edges(wordGraph) # worksheet.write(row,1,numberOfEdgesMasterGraph) # worksheet.write(row,2,numberOfEdgesWordGraph) result = False if(numberOfEdgesMasterGraph >= numberOfEdgesWordGraph): result = True # worksheet.write(row,3,result) if result == True: return 1 else: return -1
def generate_smtcode_old(self): #convert_mpnf st = time.time() self.mdg = get_weighted_graph(self.graph, self.pair_mp_prop, self.opts) self.info.extend([('g_nodes', nx.number_of_nodes(self.graph)), ('g_edges', nx.number_of_edges(self.graph)), ('mdg_nodes', nx.number_of_nodes(self.mdg)), ('mdg_edges', nx.number_of_edges(self.mdg))]) sccs = self._gen_sccs(self.mdg) self._gen_smt_from_graph(sccs) en = time.time() self.time.append(('total gen_smt: graph->mdg->mdg(scc)', en - st))
def split_edges(G): Gsmaller = nx.DiGraph() Glarger = nx.DiGraph() for node in G.nodes(): Gsmaller.add_node(node) Glarger.add_node(node) for (u,v) in G.edges(): if G.node[u]['index'] < G.node[v]['index']: Gsmaller.add_edge(u,v) elif G.node[u]['index'] > G.node[v]['index']: Glarger.add_edge(u,v) if nx.number_of_edges(Gsmaller) > nx.number_of_edges(Glarger): return topological_sort(Gsmaller) else: return topological_sort(Glarger)
def get_SMTCode(mdg, mp_prop, opts): # input mdg # output SMTCode list, result per results = {'SCCs':[], 'AccSCCs':[]} if opts.tgba: all_acceptlist = mdg.graph['acccond'] SCCs = nx.strongly_connected_component_subgraphs(mdg) for scc in SCCs: results['SCCs'].append({'edges':nx.number_of_edges(scc), 'nodes':nx.number_of_nodes(scc)}) if scc.edges() == []: pass else: # nx.draw_circular(scc) accept = 0 if opts.tgba: a = reduce(lambda x, y : x | y , [set(edge[2]['acc']) for edge in scc.edges(data=True)]) print a acceptlist = list(a) print acceptlist """for edge in scc.edges(data=True): for acc in edge[2]['acc']: if not acc in acceptlist: acceptlist.append(acc) if opts.debug: print acceptlist """ if all_acceptlist.sort() == acceptlist.sort(): if opts.pdebug: print acceptlist,' ==? ',all_acceptlist print ' TGBA-Accept' accept = 1 else: for node in scc.nodes(): if 'accept' in node: accept = 1 break if accept == 1: if opts.debug: print '\t found BA/TGBA-Accept SCC' code_time = runsmt.gen_smtcodelist_time(scc, mp_prop, opts) results['AccSCCs'].append({'SMTCode':code_time['code'], 'edges':nx.number_of_edges(scc), 'nodes':nx.number_of_nodes(scc), 'gen_time':code_time['time'], 'sat_time':0}) else: if opts.pdebug: print ' non accept SCC' return results
def get_single_network_measures(G, thr): f = open(out_prfx + 'single_network_measures.dat', 'a') N = nx.number_of_nodes(G) L = nx.number_of_edges(G) D = nx.density(G) cc = nx.average_clustering(G) compon = nx.number_connected_components(G) Con_sub = nx.connected_component_subgraphs(G) values = [] values_2 =[] for node in G: values.append(G.degree(node)) ave_deg = float(sum(values)) / float(N) f.write("%f\t%d\t%f\t%f\t%f\t%f\t" % (thr, L, D, cc, ave_deg, compon)) #1. threshold, 2. edges, 3. density 4.clustering coefficient #5. average degree, 6. number of connected components for i in range(len(Con_sub)): if nx.number_of_nodes(Con_sub[i])>1: values_2.append(nx.average_shortest_path_length(Con_sub[i])) if len(values_2)==0: f.write("0.\n") else: f.write("%f\n" % (sum(values_2)/len(values_2))) #7. shortest pathway f.close()
def Attributes_of_Graph(G): print "*Statistic attributes of graphs:" print "N", nx.number_of_nodes(G) print "M", nx.number_of_edges(G) print "C", nx.average_clustering(G) #print "<d>", nx.average_shortest_path_length(G) print "r", nx.degree_assortativity_coefficient(G) degree_list = list(G.degree_iter()) max_degree = 0 min_degree = 0 avg_degree_1 = 0.0 avg_degree_2 = 0.0 for node in degree_list: avg_degree_1 = avg_degree_1 + node[1] avg_degree_2 = avg_degree_2 + node[1]*node[1] if node[1] > max_degree: max_degree = node[1] if node[1] < min_degree: min_degree = node[1] #end for avg_degree = avg_degree_1/len(degree_list) avg_degree_square = (avg_degree_2/len(degree_list)) / (avg_degree*avg_degree) print "<k>", avg_degree print "k_max", max_degree print "H", avg_degree_square print "DH", float(max_degree-min_degree)/G.number_of_nodes()
def save_graph(self, graphname, fmt='edgelist'): """ Saves the graph to disk **Positional Arguments:** graphname: - Filename for the graph **Optional Arguments:** fmt: - Output graph format """ self.g.graph['ecount'] = nx.number_of_edges(self.g) g = nx.convert_node_labels_to_integers(self.g, first_label=1) if fmt == 'edgelist': nx.write_weighted_edgelist(g, graphname, encoding='utf-8') elif fmt == 'gpickle': nx.write_gpickle(g, graphname) elif fmt == 'graphml': nx.write_graphml(g, graphname) else: raise ValueError('edgelist, gpickle, and graphml currently supported') pass
def load_graph(gid, with_followers = False): users = db.graph_users.find({ 'gid': str(gid) }, { 'id': 1, '_id': 0 }) nodes = set() graph = nx.Graph() for u in users: nodes.update([u['id']]) print 'Nodes to load:', len(nodes) for uid in nodes: graph.add_node(int(uid)) friends = db.user_friends.find_one({ '_id': uid }) if friends is None: friends = { 'friends': [] } for f in friends['friends']: if f in nodes: graph.add_edge(int(uid), int(f)) if with_followers: followers = db.followers.find_one({ '_id': uid }) if followers is None: followers = { 'followers': [] } for f in followers['followers']: if f in nodes: graph.add_edge(int(uid), int(f)) print 'Graph loaded' print 'Nodes:', nx.number_of_nodes(graph) print 'Edges:', nx.number_of_edges(graph) return graph
def get_characteristics(G, filename): import networkx as nx print 'calculating characteristics' n_nodes = nx.number_of_nodes(G) n_edges = nx.number_of_edges(G) n_components = nx.number_connected_components(G) print 'number of nodes:', n_nodes print 'number of edges:', n_edges print 'number of components:', n_components print 'degree histogram' check_sum = 0. degree_hist = {} for node in G: if G.degree(node) not in degree_hist: degree_hist[G.degree(node)] = 1 else: degree_hist[G.degree(node)] += 1 keys = degree_hist.keys() keys.sort() for item in keys: print item, degree_hist[item] check_sum += float(degree_hist[item])/float(n_nodes) print "check sum: %f" % check_sum #print 'clustering coefficient' print 'clustering coefficient of full network', nx.average_clustering(G) return 0
def topology(data, ell): """ Computation of topological characteristics. Parameters ------------ data : array of pathes to the graphs ell : list of length scales """ for i in data: G = nx.read_gpickle(i) B = nx.number_of_edges(G) V = nx.number_of_nodes(G) Euler = V - B C = (B-V)/float(V) eu.append(Euler) c_t.append(C) vert.append(V) bran.append(B) plt.plot(ell, c_t, '.', label='v23') # #np.save('/backup/yuliya/v23/graphs_largedom/Euler.npy', eu) #np.save('/backup/yuliya/v23/graphs_largedom/C_t.npy', c_t) #np.save('/backup/yuliya/v23/graphs_largedom/V.npy', vert) #np.save('/backup/yuliya/v23/graphs_largedom/B.npy', bran) #np.save('/backup/yuliya/vsi01/graphs_largdom/time.npv23/graphs_largedom/y', t) plt.yscale('log')
def _build_subgraph(self): if self.rebuildgraph: self._build_graph() self.log.info("Building domain transition subgraph.") self.log.debug("Excluding {0}".format(self.exclude)) self.log.debug("Reverse {0}".format(self.reverse)) # reverse graph for reverse DTA if self.reverse: self.subG = self.G.reverse(copy=True) else: self.subG = self.G.copy() if self.exclude: # delete excluded domains from subgraph self.subG.remove_nodes_from(self.exclude) # delete excluded entrypoints from subgraph self.__remove_excluded_entrypoints() self.rebuildsubgraph = False self.log.info("Completed building domain transition subgraph.") self.log.debug("Subgraph stats: nodes: {0}, edges: {1}.".format( nx.number_of_nodes(self.subG), nx.number_of_edges(self.subG)))
def get_characteristics(G, thr, input_name): N = nx.number_of_nodes(G) #total number of nodes : N L = nx.number_of_edges(G) #total number of links : L Compon = nx.number_connected_components(G) #number of connected components cc = nx.average_clustering(G) # clustering coefficient : cc D = nx.density(G) # network density: Kappa check_sum = 0. degree_hist = {} values = [] for node in G: if G.degree(node) not in degree_hist: degree_hist[G.degree(node)] = 1 else: degree_hist[G.degree(node)] += 1 values.append(G.degree(node)) ave_degree = float(sum(values)/float(N)) # average degree: <Kappa> keys = degree_hist.keys() keys.sort() for item in keys : check_sum += float(degree_hist[item])/float(N) print 'Test matrix: ', input_name print 'Threshold: ', thr print 'Number of nodes: ', N print 'Number of links: ', L print 'Number of connected components: ', Compon print 'Clustering coefficient of full network: ', cc print 'Check degree distribution sum: ', check_sum print 'Network density: ', D print 'Average network degree: ', ave_degree return 0
def _build_subgraph(self): if self.rebuildgraph: self._build_graph() self.log.info("Building information flow subgraph...") self.log.debug("Excluding {0!r}".format(self.exclude)) self.log.debug("Min weight {0}".format(self.min_weight)) # delete excluded types from subgraph nodes = [n for n in self.G.nodes() if n not in self.exclude] self.subG = self.G.subgraph(nodes).copy() # delete edges below minimum weight. # no need if weight is 1, since that # does not exclude any edges. if self.min_weight > 1: delete_list = [] for s, t in self.subG.edges(): edge = Edge(self.subG, s, t) if edge.weight < self.min_weight: delete_list.append(edge) self.subG.remove_edges_from(delete_list) self.rebuildsubgraph = False self.log.info("Completed building information flow subgraph.") self.log.debug("Subgraph stats: nodes: {0}, edges: {1}.".format( nx.number_of_nodes(self.subG), nx.number_of_edges(self.subG)))
def _build_graph(self): self.G.clear() self.G.name = "Information flow graph for {0}.".format(self.policy) self.perm_map.map_policy(self.policy) self.log.info("Building information flow graph from {0}...".format(self.policy)) for rule in self.policy.terules(): if rule.ruletype != TERuletype.allow: continue (rweight, wweight) = self.perm_map.rule_weight(rule) for s, t in itertools.product(rule.source.expand(), rule.target.expand()): # only add flows if they actually flow # in or out of the source type type if s != t: if wweight: edge = Edge(self.G, s, t, create=True) edge.rules.append(rule) edge.weight = wweight if rweight: edge = Edge(self.G, t, s, create=True) edge.rules.append(rule) edge.weight = rweight self.rebuildgraph = False self.rebuildsubgraph = True self.log.info("Completed building information flow graph.") self.log.debug("Graph stats: nodes: {0}, edges: {1}.".format( nx.number_of_nodes(self.G), nx.number_of_edges(self.G)))
def calculate(g, voltage): edges_num = nx.number_of_edges(g) # sort nodes in edges edges = [edge if edge[0] < edge[1] else (edge[1], edge[0]) for edge in nx.edges(g)] a = np.zeros((edges_num, edges_num)) b = np.zeros((edges_num, 1)) i = 0 # first law for node in [node for node in nx.nodes(g) if node != 0]: for neighbor in nx.all_neighbors(g, node): edge = tuple(sorted((node, neighbor))) a[i][edges.index(edge)] = 1 if neighbor < node else -1 i += 1 # second law cycles = nx.cycle_basis(g, 0) for cycle in cycles: for j in range(0, len(cycle)): node = cycle[j] next_node = cycle[(j + 1) % len(cycle)] edge = tuple(sorted((node, next_node))) resistance = g[node][next_node]['weight'] a[i][edges.index(edge)] = resistance if node < next_node else -resistance if 0 in cycle: b[i] = voltage i += 1 # solve x = np.linalg.solve(a, b) for (x1, x2), res in zip(edges, x): g[x1][x2]['current'] = res[0]
def modularity(graph, membership): try: noOfEdges = nx.number_of_edges(graph) types = max(membership) + 1 print types, len(membership) a = {} e = {} for i in range(types): a[i] = 0 e[i] = 0 for edge in graph.edges(): fromNode = int(edge[0]) toNode = int(edge[1]) c1 = membership[fromNode] c2 = membership[toNode] if c1 == c2: e[c1] += 2 # print "c1= ", c1, "c2=", c2 a[c1] += 1 a[c2] += 1 modularity = 0.0 if noOfEdges > 0: for i in range(types): tmp = a[i] / 2 / noOfEdges modularity += e[i] / 2 / noOfEdges modularity -= tmp * tmp except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] print (exc_type, fname, exc_tb.tb_lineno) raise e return modularity
def random_reference(G, niter=1, connectivity=True, seed=None): """Compute a random graph by swapping edges of a given graph. Parameters ---------- G : graph An undirected graph with 4 or more nodes. niter : integer (optional, default=1) An edge is rewired approximately `niter` times. connectivity : boolean (optional, default=True) When True, ensure connectivity for the randomized graph. seed : integer, random_state, or None (default) Indicator of random number generation state. See :ref:`Randomness<randomness>`. Returns ------- G : graph The randomized graph. Notes ----- The implementation is adapted from the algorithm by Maslov and Sneppen (2002) [1]_. References ---------- .. [1] Maslov, Sergei, and Kim Sneppen. "Specificity and stability in topology of protein networks." Science 296.5569 (2002): 910-913. """ if G.is_directed(): msg = "random_reference() not defined for directed graphs." raise nx.NetworkXError(msg) if len(G) < 4: raise nx.NetworkXError("Graph has less than four nodes.") from networkx.utils import cumulative_distribution, discrete_sequence local_conn = nx.connectivity.local_edge_connectivity G = G.copy() keys, degrees = zip(*G.degree()) # keys, degree cdf = cumulative_distribution(degrees) # cdf of degree nnodes = len(G) nedges = nx.number_of_edges(G) niter = niter * nedges ntries = int(nnodes * nedges / (nnodes * (nnodes - 1) / 2)) swapcount = 0 for i in range(niter): n = 0 while n < ntries: # pick two random edges without creating edge list # choose source node indices from discrete distribution (ai, ci) = discrete_sequence(2, cdistribution=cdf, seed=seed) if ai == ci: continue # same source, skip a = keys[ai] # convert index to label c = keys[ci] # choose target uniformly from neighbors b = seed.choice(list(G.neighbors(a))) d = seed.choice(list(G.neighbors(c))) bi = keys.index(b) di = keys.index(d) if b in [a, c, d] or d in [a, b, c]: continue # all vertices should be different # don't create parallel edges if (d not in G[a]) and (b not in G[c]): G.add_edge(a, d) G.add_edge(c, b) G.remove_edge(a, b) G.remove_edge(c, d) # Check if the graph is still connected if connectivity and local_conn(G, a, b) == 0: # Not connected, revert the swap G.remove_edge(a, d) G.remove_edge(c, b) G.add_edge(a, b) G.add_edge(c, d) else: swapcount += 1 break n += 1 return G
def correction(self, graph, err): """ Given a syndrome graph with negative edge weights, finds the maximum-weight perfect matching and produces a sparse_pauli.Pauli """ if self.useBlossom: #----------- c processing # print('C Processing') # print( 'graph nodes: {0}'.format( graph.nodes() ) ) # print( 'graph edges: {0}'.format( graph.edges() ) ) node_num = nx.number_of_nodes(graph) edge_num = nx.number_of_edges(graph) # print( 'no of nodes : {0}, no of edges : {1}'.format(node_num,edge_num) ) edges = self.ffi.new('Edge[%d]' % (edge_num)) cmatching = self.ffi.new('int[%d]' % (2 * node_num)) node2id = {val: index for index, val in enumerate(graph.nodes())} id2node = {v: k for k, v in node2id.items()} # print(node2id) e = 0 for u, v in graph.edges(): uid = int(node2id[u]) vid = int(node2id[v]) wt = -int(graph[u][v]['weight']) # weights from NetworkX # print('weight of edge[{0}][{1}] = {2}'.format( uid, vid, wt) ) edges[e].uid = uid edges[e].vid = vid edges[e].weight = wt e += 1 # print('printing edges before calling blossom') # for e in range(edge_num): # print(edges[e].uid, edges[e].vid, edges[e].weight) retVal = self.blossom.Init() retVal = self.blossom.Process(node_num, edge_num, edges) # retVal = self.blossom.PrintMatching() nMatching = self.blossom.GetMatching(cmatching) retVal = self.blossom.Clean() pairs = [] # print('recieved C matching :') for i in range(0, nMatching, 2): u, v = id2node[cmatching[i]], id2node[cmatching[i + 1]] pairs.append((u, v)) # print( '{0}, {1} '.format(u,v) ) #----------- end of c processing else: # Tom MWPM """ bulk_vs = sorted(list([_ for _ in graph.nodes() if type(_) is int])) bdy_vs = sorted(list([_ for _ in graph.nodes() if type(_) is tuple])) if bulk_vs == []: return sp.I sz = len(bulk_vs) + 1 weight_mat = np.zeros((sz, sz), dtype=np.int_) for r, c in it.product(range(1, sz), repeat=2): if r != c: u, v = bulk_vs[r - 1], bulk_vs[c - 1] weight_mat[r, c] = -graph[u][v]['weight'] for dx in range(1, sz): u = bulk_vs[dx - 1] v = (u, 'b') weight_mat[0, dx] = -graph[u][v]['weight'] weight_mat[dx, 0] = weight_mat[0, dx] # eliminate mixed sign min_wt = np.amin(weight_mat) - 1 if min_wt != -1: for r, c in it.product(range(1, sz), repeat=2): if weight_mat[r, c] != 0: weight_mat[r, c] -= min_wt # weight_mat = weight_mat.clip(0, np.inf) try: match_lst = bw.insert_wm(weight_mat) except: with open('error_wts.pkl', 'w') as phil: pkl.dump(weight_mat, phil) raise ValueError("Tom's Blossom has gone wrong: " "weight_mat saved to error_wts.pkl.") redundant_pairs = [(n_lst[j], n_lst[k-1]) for j, k in enumerate(match_lst[1:])] """ # """ NX MWPM matching = nx.max_weight_matching(graph, maxcardinality=True) redundant_pairs = matching.items() # """ # get rid of non-digraph duplicates pairs = [] for tpl in redundant_pairs: if tuple(reversed(tpl)) not in pairs: pairs.append(tpl) # print(tpl) x = self.layout.map.inv pauli_lst = [] for u, v in pairs: if isinstance(u, int) & isinstance(v, int): pauli_lst.append(self.path_pauli(x[u], x[v], err)) elif isinstance(u, int) ^ isinstance(v, int): bdy_pt = graph[u][v]['close_pt'] vert = u if isinstance(u, int) else v pauli_lst.append(self.path_pauli(bdy_pt, x[vert], err)) else: pass #both boundary points, no correction return product(pauli_lst)
import community from community import community_louvain myNetXGraph = nx.karate_club_graph() # Prints summary information about the graph print(nx.info(myNetXGraph)) # Print the degree of each node print("Node Degree") for v in myNetXGraph: print('%s %s' % (v, myNetXGraph.degree(v))) # Computer and print other stats nbr_nodes = nx.number_of_nodes(myNetXGraph) nbr_edges = nx.number_of_edges(myNetXGraph) nbr_components = nx.number_connected_components(myNetXGraph) print("Number of nodes:", nbr_nodes) print("Number of edges:", nbr_edges) print("Number of connected components:", nbr_components) print("Density:", nbr_edges / (nbr_nodes * (nbr_nodes - 1) / 2)) # Draw the network using the default settings nx.draw(myNetXGraph) plt.clf() # Draw, but change the color to to blue nx.draw(myNetXGraph, node_color="blue") plt.clf()
def nedges(self): return nx.number_of_edges(self.net)
def get_min_fill_factor(Gn): return np.amin([nx.number_of_edges(G) / (nx.number_of_nodes(G) * nx.number_of_nodes(G)) for G in Gn])
def get_all_edge_num(Gn): return [nx.number_of_edges(G) for G in Gn]
from networkx.algorithms import community # read edge list g = nx.read_edgelist("C:/Users/aayesha/Desktop/DiscreteProject/OUTFILE.txt") ug = nx.to_undirected(g) # print basic info (is the graph ok?) print(nx.info(ug)) # basic analysis # number of nodes print("number of nodes:", nx.number_of_nodes(ug)) #number of edges print("number of edges:", nx.number_of_edges(ug)) #average clustering print(nx.average_clustering(ug)) ## diameter #print("Diameter:",nx.diameter(ug)) # this diameter shows ... # average degree sum = 0 for n in ug.nodes(): sum = sum + ug.degree(n) print("Average degree:", sum / ug.number_of_nodes()) options = {
def run(args): # Check arguments. assert len(args.num_sampled_vertices_network)==len(args.index_vertex_files)==len(args.edge_list_files) assert len(args.num_sampled_vertices_weights)==len(args.vertex_weight_files) assert args.num_vertices>0 assert args.min_num_edges>0 assert args.min_num_edges<=args.num_vertices*(args.num_vertices-1)/2 # Create random network. n = args.num_vertices for i in range(1, n): G = nx.barabasi_albert_graph(n, i, seed=args.seed) if nx.number_of_edges(G)>args.min_num_edges: break # Use letters instead of integers for the vertex labels. vertices = letter_range(n) edges = [(vertices[i], vertices[j]) for i, j in G.edges()] G = nx.Graph() G.add_nodes_from(vertices) G.add_edges_from(edges) # Implant cliques. random.seed(args.seed) implanted_vertices = set() nonimplanted_vertices = set(vertices) for implant_size in args.implant_sizes: implant = set(random.sample(sorted(nonimplanted_vertices), implant_size)) implanted_vertices = set.union(implanted_vertices, implant) nonimplanted_vertices = set.difference(nonimplanted_vertices, implant) for u, v in combinations(implant, 2): G.add_edge(u, v) implanted_vertices = sorted(implanted_vertices) nonimplanted_vertices = sorted(nonimplanted_vertices) # Create random weights. np.random.seed(args.seed) weights = np.random.exponential(scale=1.0, size=n) # Assign high weights to implanted vertices and low weights to other vertices. random.seed(args.seed) sorted_weights = np.sort(weights)[::-1] random.shuffle(implanted_vertices) random.shuffle(nonimplanted_vertices) sorted_vertices = implanted_vertices + nonimplanted_vertices vertex_to_weight = dict((v, w) for v, w in zip(sorted_vertices, sorted_weights)) # Sample the random network. G_samples = [] for num_sampled_vertices in args.num_sampled_vertices_network: random.seed(args.seed) sampled_vertices = random.sample(vertices, num_sampled_vertices) G_sample = G.subgraph(sampled_vertices) G_samples.append(G_sample) # Sample the random weights. vertex_to_weight_samples = [] for num_sampled_vertices in args.num_sampled_vertices_weights: random.seed(args.seed) sampled_vertices = random.sample(vertices, num_sampled_vertices) vertex_to_weight_sample = dict((v, vertex_to_weight[v]) for v in sampled_vertices) vertex_to_weight_samples.append(vertex_to_weight_sample) # Output sampled random networks. for G_sample, index_vertex_file, edge_list_file in zip(G_samples, args.index_vertex_files, args.edge_list_files): np.random.seed(args.seed) sampled_vertices = np.random.permutation(G_sample.nodes()) vertex_to_index = dict((v, i+1) for i, v in enumerate(sampled_vertices)) edge_list = [[vertex_to_index[u], vertex_to_index[v]] for u, v in G_sample.edges() if u!=v] with open(index_vertex_file, 'w') as f: index_vertex_string = '\n'.join('\t'.join(map(str, (i, v))) for v, i in vertex_to_index.items()) f.write(index_vertex_string) with open(edge_list_file, 'w') as f: edge_list_string = '\n'.join('\t'.join(map(str, edge)) for edge in edge_list) f.write(edge_list_string) # Output sample random weights. for vertex_to_weight_sample, vertex_weight_file in zip(vertex_to_weight_samples, args.vertex_weight_files): with open(vertex_weight_file, 'w') as f: vertex_weight_string = '\n'.join('\t'.join(map(str, (v, w))) for v, w in vertex_to_weight_sample.items()) f.write(vertex_weight_string)
def main(argv): input_file = None output_file = None k = None noise = None try: opts, args = getopt.getopt(argv, "i:o:k:n:") except getopt.GetoptError: sys.exit( "test.py -i <inputfile> -o <outputfile> -k <k-anonymity level> -n <noise>" ) for opt, arg in opts: if opt == '-i': input_file = arg elif opt == '-o': output_file = arg elif opt == '-k': k = int(arg) elif opt == '-n': noise = int(arg) error = False if input_file is None: print("Please specify an input file") error = True if output_file is None: print("Please specify an output file") error = True if error: sys.exit( "Syntax: test.py -i <inputfile> -o <outputfile> -k <k-anonymity level> -n <noise>" ) if k is None: k = 2 print("Using default k = 2") if noise is None: noise = 1 print("Using default n = 1") if not path.exists(input_file): sys.exit("Cannot find the input file") log = open(output_file + '.log', 'w') sys.stdout = log G = nx.read_edgelist(input_file, nodetype=int) start = time.time() Ga = kd.graph_anonymiser(G, k=k, noise=noise, with_deletions=True) print("Total execution time =", time.time() - start) H = nx.intersection(G, Ga) num_edges_in_G = len(set(G.edges())) num_edges_in_both = len(set(H.edges())) print("Edges overlap = " + str(100 * num_edges_in_both / num_edges_in_G) + "%") print("Num edges original graph = " + str(nx.number_of_edges(G))) print("Num edges anonymised graph = " + str(nx.number_of_edges(Ga))) nx.write_edgelist(Ga, output_file, data=False)
test_pairs.append((line_list[1], line_list[2])) time1 = timeit.default_timer() print('Time for reading file: ', time1 - time0) '''Create a digraph for the task''' DG = nx.DiGraph() DG.add_edges_from(pairs) '''Create a undirected graph for computing AA, JC and RA''' UDG = nx.Graph() UDG.add_edges_from(pairs) time2 = timeit.default_timer() print('Time for creating graphs: ', time2 - time1) print("Num. nodes: ", nx.number_of_nodes(DG)) print("Num. edges: ", nx.number_of_edges(DG)) '''Get nodes edges, and non-edges''' nodes = nx.nodes(DG) edges = nx.edges(DG) non_edges = nx.non_edges(DG) '''Compute HAA, HJC and HRA''' HAA = [] HJC = [] HRA = [] SD = [] for e in test_pairs: if not DG.has_node(e[0]): DG.add_node(e[0]) UDG.add_node(e[0]) if not DG.has_node(e[1]): DG.add_node(e[1])
def main(): parser = argparse.ArgumentParser( prog="build-red-blue-graph", description="construct red-blu graph from WIF.", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-w', '--wif', help="Input WIF file.", required=True, dest='wif_file') parser.add_argument('-o', '--out', help="Output WIF merged file.", required=True, dest='out_file') parser.add_argument('-g', '--graph', help="Output graph CCS file.", required=False, dest='graph_file') parser.add_argument( '-t', '--thr', required=False, dest='thr', type=int, default=1000000, help= "Threshold of the ratio between the probabilities that the two reads come from the same side or from different sides." ) parser.add_argument('-e', '--error_rate', required=False, dest='err_rate', type=float, default=0.15, help="Probability that any nucleotide is wrong.") parser.add_argument( '-m', '--max_error_rate', required=False, dest='max_err_rate', type=float, default=0.25, help= "If an edge has too many errors, we discard it, since it is not reliable." ) parser.add_argument( '-n', '--neg_thr', required=False, dest='neg_thr', type=int, default=1000, help= "Threshold_neg is a more conservative threshold for the evidence that two reads should not be clustered together." ) parser.add_argument('-v', '--verbose', help='increase output verbosity', action='count', default=0) args = parser.parse_args() if args.verbose == 0: log_level = logging.INFO elif args.verbose == 1: log_level = logging.DEBUG else: log_level = logging.DEBUG logging.basicConfig(level=log_level, format='%(levelname)-8s [%(asctime)s] %(message)s', datefmt="%y%m%d %H%M%S") logging.info("Program started.") gblue = nx.Graph() gred = nx.Graph() gnotblue = nx.Graph() gnotred = nx.Graph() # Probability that any nucleotide is wrong error_rate = args.err_rate logging.info("Error Rate: %s", error_rate) # If an edge has too many errors, we discard it, since it is not reliable max_error_rate = args.max_err_rate logging.info("Max Error Rate: %s", max_error_rate) # Threshold of the ratio between the probabilities that the two reads come from # the same side or from different sides thr = args.thr logging.info("Positive Threshold: %s", thr) # Threshold_neg is a more conservative threshold for the evidence # that two reads should not be clustered together. thr_neg = args.neg_thr logging.info("Negative Threshold: %s", thr_neg) thr_diff = 1 + int(math.log(thr, (1 - error_rate) / (error_rate / 3))) thr_neg_diff = 1 + int( math.log(thr_neg, (1 - error_rate) / (error_rate / 3))) logging.debug("Thr. Diff.: %s - Thr. Neg. Diff.: %s", thr_diff, thr_neg_diff) logging.info("Started reading WIF file...") id = 0 orig_reads = {} site_alleles = {} # dic[site] = major and minor allele with open(args.wif_file, "r") as f: queue = {} reads = {} for line in f: id += 1 # tokenize line, get first and last site tokens = line.split(' : ')[:-2] begin_str = tokens[0].split()[0] snps = [] for t in tokens: toks = t.split() site = int(toks[0]) nucl = toks[1] zyg = int(toks[2]) qual = int(toks[3]) if int(zyg) == 0: snps.append('G') else: snps.append('C') # add to alleles dictionary, checking for discordancy (multi-allelic) if site not in site_alleles: site_alleles[site] = ['', ''] if not site_alleles[site][zyg]: site_alleles[site][zyg] = nucl else: deg = 'minor' if zyg else 'major' cur = str(site_alleles[site][zyg]) errsuf = ', current ' + deg + ' allele: ' + cur errsuf += '\n\tis discordant with new allele: ' + nucl assert site_alleles[site][zyg] == nucl, 'at site: ' + str( site) + errsuf #(id, begin_str, *snps) = line.split() begin = int(begin_str) end = begin + len(snps) logging.debug("id: %s - pos: %s - snps: %s", id, begin, "".join(snps)) orig_reads[id] = [t.split() for t in tokens] gblue.add_node(id, begin=begin, end=end, sites="".join(snps)) gnotblue.add_node(id, begin=begin, end=end, sites="".join(snps)) gred.add_node(id, begin=begin, end=end, sites="".join(snps)) gnotred.add_node(id, begin=begin, end=end, sites="".join(snps)) queue[id] = {'begin': begin, 'end': end, 'sites': snps} reads[id] = {'begin': begin, 'end': end, 'sites': snps} for x in [id for id in queue.keys() if queue[id]['end'] <= begin]: del queue[x] for id1 in queue.keys(): if id != id1: match, mismatch = eval_overlap(queue[id1], queue[id]) if match + mismatch >= thr_neg_diff and min( match, mismatch) / (match + mismatch) <= max_error_rate: if match - mismatch >= thr_diff: gblue.add_edge(id1, id, match=match, mismatch=mismatch) if mismatch - match >= thr_diff: gred.add_edge(id1, id, match=match, mismatch=mismatch) if match - mismatch >= thr_neg_diff: gnotred.add_edge(id1, id, match=match, mismatch=mismatch) if mismatch - match >= thr_neg_diff: gnotblue.add_edge(id1, id, match=match, mismatch=mismatch) logging.info("Finished reading WIF file.") logging.info("N. WIF entries: %s", id) logging.info("Blue Graph") logging.info("Nodes: %s - Edges: %s - ConnComp: %s", nx.number_of_nodes(gblue), nx.number_of_edges(gblue), len(list(nx.connected_components(gblue)))) logging.info("Non-Blue Graph") logging.info("Nodes: %s - Edges: %s - ConnComp: %s", nx.number_of_nodes(gnotblue), nx.number_of_edges(gnotblue), len(list(nx.connected_components(gnotblue)))) logging.info("Red Graph") logging.info("Nodes: %s - Edges: %s - ConnComp: %s", nx.number_of_nodes(gred), nx.number_of_edges(gred), len(list(nx.connected_components(gred)))) logging.info("Non-Red Graph") logging.info("Nodes: %s - Edges: %s - ConnComp: %s", nx.number_of_nodes(gnotred), nx.number_of_edges(gnotred), len(list(nx.connected_components(gnotred)))) # We consider the notblue edges as an evidence that two reads # should not be merged together # Since we want to merge each blue connected components into # a single superread, we check each notblue edge (r1, r2) and # we remove some blue edges so that r1 and r2 are not in the # same blue connected component blue_component = {} current_component = 0 for conncomp in nx.connected_components(gblue): for v in conncomp: blue_component[v] = current_component current_component += 1 # Keep only the notblue edges that are inside a blue connected component good_notblue_edges = [(v, w) for (v, w) in gnotblue.edges() if blue_component[v] == blue_component[w]] notblue_counter = 0 num_notblue = len(good_notblue_edges) block = num_notblue // 100 num_blue = len(list(nx.connected_components(gblue))) for (u, v) in good_notblue_edges: while v in nx.node_connected_component(gblue, u): path = nx.shortest_path(gblue, source=u, target=v) # Remove the edge with the smallest support # A better strategy is to weight each edge with -log p # and remove the minimum (u,v)-cut w, x = (min(zip(path[:-1], path[1:]), key=lambda p: gblue[p[0]][p[1]]['match'] - gblue[p[0]][ p[1]]['mismatch'])) gblue.remove_edge(w, x) # Merge blue components (somehow) logging.info("Started Merging Reads...") superreads = {} # superreads given by the clusters (if clustering) rep = {} # cluster representative of a read in a cluster if (args.graph_file): logging.info("Printing graph in %s file", args.graph_file) graph_out = open(args.graph_file, "w") for cc in nx.connected_components(gblue): if len(cc) > 1: if (args.graph_file): graph_out.write(' '.join([str(id) for id in cc]) + "\n") r = min(cc) superreads[r] = {} for id in cc: rep[id] = r logging.debug("rep: %s - cc: %s", r, ",".join([str(id) for id in cc])) for id in orig_reads: if id in rep: for tok in orig_reads[id]: site = int(tok[0]) zyg = int(tok[2]) qual = int(tok[3]) r = rep[id] if site not in superreads[r]: superreads[r][site] = [0, 0] superreads[r][site][zyg] += qual with open(args.out_file, "w") as out: for id in orig_reads: if id in rep: if id == rep[id]: for site in sorted(superreads[id]): z = superreads[id][site] if z[0] >= z[1]: out.write(" ".join( str(el) for el in [ site, site_alleles[site][0], 0, z[0] - z[1], ':', '' ])) elif z[1] > z[0]: out.write(" ".join( str(el) for el in [ site, site_alleles[site][1], 1, z[1] - z[0], ':', '' ])) out.write("# X : X\n") else: for tok in orig_reads[id]: out.write(" ".join(str(t) for t in tok) + " : ") out.write("# X : X\n") logging.info("Finished Merging Reads.") logging.info("Program Finshed")
# load data num_nodes = 1991 num_partitions = 12 with open('data/India_database.p', 'rb') as f: database = pickle.load(f) G = nx.Graph() nG = nx.Graph() for i in range(num_nodes): G.add_node(i) nG.add_node(i) for edge in database['edges']: G.add_edge(edge[0], edge[1]) nG.add_edge(edge[0], edge[1]) start_edges = nx.number_of_edges(G) # Load precomputed noisy version save_name = "village_noise.txt" with open(save_name, "rb") as fp: nG = pickle.load(fp) database['labels'] = database['label'] print('---Data files loaded. Computing...\n') def process_sgwl_village(cost, database, num_nodes, num_partitions,
network_name = args.pickle[:-7] with open(network_name + "_properties.txt", "w") as file: #------------------------------------------------# ### Network properties ### #------------------------------------------------# file.write("### Network properties ###\n") ### Number of nodes ### nnodes = nx.number_of_nodes(G) nnodes_str = str(nx.number_of_nodes(G)) file.write("Number_of_nodes\t" + nnodes_str + "\n") ### Number of edges ### nedges = nx.number_of_edges(G) nedges_str = str(nx.number_of_edges(G)) file.write("Number_of_edges\t" + nedges_str + "\n") ### Mean degree ### mean_degree = str((2 * nedges) / nnodes) file.write("Mean_degree\t" + mean_degree + "\n") ### Average clustering coefficient ### # The following code works exactly like the nx method, but the nx version was not outputting any values at first clust_coef = nx.clustering(G) cc_holder = [] for key, value in clust_coef.items(): cc_holder.append(value) file.write("Average_clustering_coefficient\t" +
def graph_test(filename): print('') print('Getting graph..') DG, terms, root = get_graph(filename, with_root=True) print('Getting graph is finished') print("") terms = list(set(terms) - {root}) # DG, terms = get_graph('WRP4/wrp4-11.stp') print_graph(DG) v = nx.number_of_nodes(DG) e = nx.number_of_edges(DG) print("Number of vertices: ", v) print("Number of reachable vertices: ", len(nx.descendants(DG, root)) + 1) print("Number of edges: ", e) print('') print('apsp started') start_time = time.time() tr_cl = trans_clos(DG) elapsed_time = time.time() - start_time print('apsp finished in', elapsed_time) # print_graph(tr_cl) max_len = 0 max_node = None for node in nx.nodes(tr_cl): # print(node, tr_cl.out_edges(node)) if len(tr_cl.out_edges(node)) > max_len: max_len = len(tr_cl.out_edges(node)) max_node = node print("max node ", max_node) print("intersect", set(v for x, v in tr_cl.out_edges(max_node)) & set(terms)) i = 1 print('Alg6 with i = ', i, 'started') start_time = time.time() set_start_time(start_time) terms.sort() tree = alg6(tr_cl, i=2, k=len(terms), r=root, x=terms) elapsed_time = time.time() - start_time print('Elapsed time = ', elapsed_time) tot_weight = tree.size(weight='weight') print('Weight of MSTw = ', tot_weight) print_graph(tree) exit() prev = dict() for i in [1, 2]: # try: # if not (('alg3-' + str(i)) not in prev or prev[('alg3-' + str(i))]): # raise Exception('') # raise Exception() # print('alg3-' + str(i), 'started..') # start_time = time.time() # set_start_time(start_time) # tree = alg3(tr_cl, i=i, k=len(terms.copy()), r=root, x=terms.copy()) # elapsed_time = time.time() - start_time # tot_weight = tot_weight = tree.size(weight='weight') # print('alg3-' + str(i), 'finished in', elapsed_time, 'with res =', tot_weight) # print('') # save_time(v, e, 'alg3-' + str(i), elapsed_time, tot_weight) # prev['alg3-' + str(i)] = True # except: # save_time(v, e, 'alg3-' + str(i), '-', '-') # print('Alg took to long to compute') # prev['alg3-' + str(i)] = False # try: # if not (('alg4-' + str(i)) not in prev or prev[('alg3-' + str(i))]): # raise Exception('') # raise Exception() # print('alg4-' + str(i), 'started..') # start_time = time.time() # set_start_time(start_time) # tree = alg4(tr_cl, i=i, k=len(terms.copy()), r=root, x=terms.copy()) # elapsed_time = time.time() - start_time # tot_weight = tree.size(weight='weight') # print('alg4-' + str(i), 'finished in', elapsed_time, 'with res =', tot_weight) # print('') # save_time(v, e, 'alg4-' + str(i), elapsed_time, tot_weight) # prev['alg4-' + str(i)] = True # except: # save_time(v, e, 'alg4-' + str(i), '-', '-') # print('Alg took to long to compute') # prev['alg4-' + str(i)] = False # try: if not (('alg6-' + str(i)) not in prev or prev[('alg6-' + str(i))]): raise Exception('') print('alg6-' + str(i), 'started..') start_time = time.time() set_start_time(start_time) tree = alg6(tr_cl, i=i, k=len(terms.copy()), r=root, x=terms.copy()) elapsed_time = time.time() - start_time tot_weight = tree.size(weight='weight') print('alg6-' + str(i), 'finished in', elapsed_time, 'with res =', tot_weight) print('') save_time(v, e, 'alg6-' + str(i), elapsed_time, tot_weight) prev['alg6-' + str(i)] = True
def get_ave_fill_factor(Gn): return np.mean([nx.number_of_edges(G) / (nx.number_of_nodes(G) * nx.number_of_nodes(G)) for G in Gn])
def wrp_test(filename=None, g=None, terms=None, root=None): global prev if g is None: # print('') # print('Getting graph..') DG, terms, root = get_graph(filename, with_root=True) # print_graph(DG) v = nx.number_of_nodes(DG) e = nx.number_of_edges(DG) print('root is', root) print("Number of vertices: ", v) print("Number of reachable vertices: ", len(nx.descendants(DG, root)) + 1) print("Number of edges: ", e) print('') print('apsp started') start_time = time.time() tr_cl = trans_clos_dense(DG) # print_graph(tr_cl) elapsed_time = time.time() - start_time print('apsp finished in', elapsed_time) terms = list(set(terms) - {root}) terms.sort() i = 2 print('Alg6 with i = ', i, 'started') start_time = time.time() set_start_time(start_time) terms.sort() tree = alg3(tr_cl, i=4, k=len(terms), r=root, x=terms) elapsed_time = time.time() - start_time print('Elapsed time = ', elapsed_time) tot_weight = tree.size(weight='weight') print('Weight of MSTw = ', tot_weight) print_graph(tree) exit() else: DG = g v = nx.number_of_nodes(DG) e = nx.number_of_edges(DG) print('root is', root) print("Number of vertices: ", v) print("Number of reachable vertices: ", len(nx.descendants(DG, root)) + 1) print("Number of edges: ", e) print('') print('apsp started') start_time = time.time() tr_cl = trans_clos_dense(DG) # print_graph(tr_cl) elapsed_time = time.time() - start_time print('apsp finished in', elapsed_time) terms = list(set(terms) - {root}) terms.sort() for i in [4]: try: if not (('alg3-' + str(i)) not in prev or prev[('alg3-' + str(i))]): raise Exception('') print('alg3-' + str(i), 'started..') start_time = time.time() set_start_time(start_time) tree = alg3(tr_cl, i=i, k=len(terms.copy()), r=root, x=terms.copy()) elapsed_time = time.time() - start_time tot_weight = tree.size(weight='weight') print('alg3-' + str(i), 'finished in', elapsed_time, 'with res =', tot_weight) print('') save_time(v, e, len(terms), 'alg3-' + str(i), elapsed_time, tot_weight) prev['alg3-' + str(i)] = True except: save_time(v, e, len(terms), 'alg3-' + str(i), '-', '-') print('Alg took to long to compute') prev['alg3-' + str(i)] = False try: if not (('alg4-' + str(i)) not in prev or prev[('alg3-' + str(i))]): raise Exception('') print('alg4-' + str(i), 'started..') start_time = time.time() set_start_time(start_time) tree = alg4(tr_cl, i=i, k=len(terms.copy()), r=root, x=terms.copy()) elapsed_time = time.time() - start_time tot_weight = tree.size(weight='weight') print('alg4-' + str(i), 'finished in', elapsed_time, 'with res =', tot_weight) print('') save_time(v, e, len(terms), 'alg4-' + str(i), elapsed_time, tot_weight) prev['alg4-' + str(i)] = True except: save_time(v, e, len(terms), 'alg4-' + str(i), '-', '-') print('Alg took to long to compute') prev['alg4-' + str(i)] = False try: if not (('alg6-' + str(i)) not in prev or prev[('alg6-' + str(i))]): raise Exception('') print('alg6-' + str(i), 'started..') start_time = time.time() set_start_time(start_time) tree = alg6(tr_cl, i=i, k=len(terms.copy()), r=root, x=terms.copy()) elapsed_time = time.time() - start_time tot_weight = tree.size(weight='weight') print('alg6-' + str(i), 'finished in', elapsed_time, 'with res =', tot_weight) print('') save_time(v, e, len(terms), 'alg6-' + str(i), elapsed_time, tot_weight) prev['alg6-' + str(i)] = True except: save_time(v, e, len(terms), 'alg6-' + str(i), '-', '-') print('Alg took to long to compute') prev['alg6-' + str(i)] = False
ideal_ratio_negpos = expec_neg / expec_pos #Calculate the non-normalized deviation from the expected (full) graph dev_posneg = obs_posneg_ratio / ideal_ratio_posneg dev_negpos = obs_negpos_ratio / ideal_ratio_negpos #Calculate the normalized deviation from the expected (full) graph dev_norm_posneg = (obs_posneg_ratio - ideal_ratio_posneg) / ideal_ratio_posneg dev_norm_negpos = (obs_negpos_ratio - ideal_ratio_negpos) / ideal_ratio_negpos # calculate the normalized deviation of the edge:node (density) from the full graph dens_dev = (abs(obs_edge_node_ratio - expec_edge_node_ratio)) / expec_edge_node_ratio # Calculate PUC (the proportion of edges that do not follow the expected direction) puc = puc_noncompliant / nx.number_of_edges(G) dev_dict = {} dev_dict['OBSERVED_number_nodes_positive'] = pos_nodes dev_dict['OBSERVED_number_nodes_negative'] = neg_nodes dev_dict['OBSERVED_number_edges_positive'] = pos_corr dev_dict['OBSERVED_number_edges_negative'] = neg_corr dev_dict['OBSERVED_ratio_pos_to_neg_nodes'] = round(obs_posneg_node_ratio, 2) dev_dict['OBSERVED_ratio_neg_to_pos_nodes'] = round(obs_negpos_node_ratio, 2) dev_dict['OBSERVED_edge_node_ratio'] = round(obs_edge_node_ratio, 2) dev_dict['OBSERVED_ratio_pos_to_neg_edges'] = round(obs_posneg_ratio, 2) dev_dict['OBSERVED_ratio_neg_to_pos_edges'] = round(obs_negpos_ratio, 2) dev_dict['IDEAL_total_number_edges_full_graph'] = expec_total dev_dict['IDEAL_number_positive_edges_full_graph'] = expec_pos dev_dict['IDEAL_number_negative_edges_full_graph'] = expec_neg dev_dict['IDEAL_density_full_graph'] = round(expec_edge_node_ratio, 2)
@author: megan squire """ import networkx as nx import operator # create a graph, filling it with one of the edgelists g = nx.read_weighted_edgelist('data/edgelist24.csv') # analyze the basic graph # make a Python dictionary full of each node and their degrees degree = nx.degree(g) # calculate some basic stuff about the nodes & degrees numNodes = nx.number_of_nodes(g) numEdges = nx.number_of_edges(g) minDegree = min(degree.values()) maxDegree = max(degree.values()) print('numNodes:', numNodes) print('numEdges:', numEdges) print('minDegree:', minDegree) print('maxDegree:', maxDegree) # sort the dictionary by highest degrees degreeSorted = sorted(degree.items(), key=operator.itemgetter(1), reverse=True) # print out the top ten nodes with the highest degrees print(degreeSorted[0:9]) # draw the graph - you will see that it is very crowded, # but some structure is apparent
import networkx as nx import matplotlib.pyplot as plt import numpy as np filename = "CA-CondMat.txt" RN = nx.read_edgelist(filename) n_RN = nx.number_of_edges(RN) RN_list = [RN] for i in range(4): RN_list.append( nx.double_edge_swap(RN.copy(), nswap=int(n_RN * (i + 1) * 0.1), max_tries=2 * (i + 1) * n_RN)) nx.write_edgelist(RN_list[i], 'RN_' + str(i + 1) + '.txt', data=False) nn_RN = nx.number_of_nodes(RN) avg_deg_of_node = np.mean((RN.degree()).values()) GN_list = [] for i in range(5): GN_list.append( nx.barabasi_albert_graph(nn_RN + 1000 * i, int(avg_deg_of_node))) nx.write_edgelist(GN_list[i], 'GN_' + str(i + 1) + '.txt', data=False)
def _build_graph(self) -> None: self.G.clear() self.G.name = "Domain transition graph for {0}.".format(self.policy) self.log.info("Building domain transition graph from {0}...".format( self.policy)) # hash tables keyed on domain type setexec: RuleHash = defaultdict(list) setcurrent: RuleHash = defaultdict(list) # hash tables keyed on (domain, entrypoint file type) # the parameter for defaultdict has to be callable # hence the lambda for the nested defaultdict execute: DefaultDict[Type, RuleHash] = defaultdict(lambda: defaultdict(list)) entrypoint: DefaultDict[Type, RuleHash] = defaultdict( lambda: defaultdict(list)) # hash table keyed on (domain, entrypoint, target domain) type_trans: DefaultDict[Type, DefaultDict[Type, RuleHash]] = \ defaultdict(lambda: defaultdict(lambda: defaultdict(list))) for rule in self.policy.terules(): if rule.ruletype == TERuletype.allow: if rule.tclass not in ["process", "file"]: continue if rule.tclass == "process": if "transition" in rule.perms: for s, t in itertools.product(rule.source.expand(), rule.target.expand()): # only add edges if they actually # transition to a new type if s != t: edge = Edge(self.G, s, t, create=True) edge.transition.append(rule) if "dyntransition" in rule.perms: for s, t in itertools.product(rule.source.expand(), rule.target.expand()): # only add edges if they actually # transition to a new type if s != t: e = Edge(self.G, s, t, create=True) e.dyntransition.append(rule) if "setexec" in rule.perms: for s in rule.source.expand(): setexec[s].append(rule) if "setcurrent" in rule.perms: for s in rule.source.expand(): setcurrent[s].append(rule) else: if "execute" in rule.perms: for s, t in itertools.product(rule.source.expand(), rule.target.expand()): execute[s][t].append(rule) if "entrypoint" in rule.perms: for s, t in itertools.product(rule.source.expand(), rule.target.expand()): entrypoint[s][t].append(rule) elif rule.ruletype == TERuletype.type_transition: if rule.tclass != "process": continue d = rule.default for s, t in itertools.product(rule.source.expand(), rule.target.expand()): type_trans[s][t][d].append(rule) invalid_edge: List[Edge] = [] clear_transition: List[Edge] = [] clear_dyntransition: List[Edge] = [] for s, t in self.G.edges(): edge = Edge(self.G, s, t) invalid_trans = False invalid_dyntrans = False if edge.transition: # get matching domain exec w/entrypoint type entry = set(entrypoint[t].keys()) exe = set(execute[s].keys()) match = entry.intersection(exe) if not match: # there are no valid entrypoints invalid_trans = True else: # TODO try to improve the # efficiency in this loop for m in match: # pylint: disable=unsupported-assignment-operation if s in setexec or type_trans[s][m]: # add key for each entrypoint edge.entrypoint[m] += entrypoint[t][m] edge.execute[m] += execute[s][m] if type_trans[s][m][t]: edge.type_transition[m] += type_trans[s][m][t] if s in setexec: edge.setexec.extend(setexec[s]) if not edge.setexec and not edge.type_transition: invalid_trans = True else: invalid_trans = True if edge.dyntransition: if s in setcurrent: edge.setcurrent.extend(setcurrent[s]) else: invalid_dyntrans = True else: invalid_dyntrans = True # cannot change the edges while iterating over them, # so keep appropriate lists if invalid_trans and invalid_dyntrans: invalid_edge.append(edge) elif invalid_trans: clear_transition.append(edge) elif invalid_dyntrans: clear_dyntransition.append(edge) # Remove invalid transitions self.G.remove_edges_from(invalid_edge) for edge in clear_transition: # if only the regular transition is invalid, # clear the relevant lists del edge.transition del edge.execute del edge.entrypoint del edge.type_transition del edge.setexec for edge in clear_dyntransition: # if only the dynamic transition is invalid, # clear the relevant lists del edge.dyntransition del edge.setcurrent self.rebuildgraph = False self.rebuildsubgraph = True self.log.info("Completed building domain transition graph.") self.log.debug("Graph stats: nodes: {0}, edges: {1}.".format( nx.number_of_nodes(self.G), nx.number_of_edges(self.G)))
def test_union_and_compose(): K3 = nx.complete_graph(3) P3 = nx.path_graph(3) G1 = nx.DiGraph() G1.add_edge("A", "B") G1.add_edge("A", "C") G1.add_edge("A", "D") G2 = nx.DiGraph() G2.add_edge("1", "2") G2.add_edge("1", "3") G2.add_edge("1", "4") G = nx.union(G1, G2) H = nx.compose(G1, G2) assert edges_equal(G.edges(), H.edges()) assert not G.has_edge("A", 1) pytest.raises(nx.NetworkXError, nx.union, K3, P3) H1 = nx.union(H, G1, rename=("H", "G1")) assert sorted(H1.nodes()) == [ "G1A", "G1B", "G1C", "G1D", "H1", "H2", "H3", "H4", "HA", "HB", "HC", "HD", ] H2 = nx.union(H, G2, rename=("H", "")) assert sorted(H2.nodes()) == [ "1", "2", "3", "4", "H1", "H2", "H3", "H4", "HA", "HB", "HC", "HD", ] assert not H1.has_edge("NB", "NA") G = nx.compose(G, G) assert edges_equal(G.edges(), H.edges()) G2 = nx.union(G2, G2, rename=("", "copy")) assert sorted(G2.nodes()) == [ "1", "2", "3", "4", "copy1", "copy2", "copy3", "copy4", ] assert sorted(G2.neighbors("copy4")) == [] assert sorted(G2.neighbors("copy1")) == ["copy2", "copy3", "copy4"] assert len(G) == 8 assert nx.number_of_edges(G) == 6 E = nx.disjoint_union(G, G) assert len(E) == 16 assert nx.number_of_edges(E) == 12 E = nx.disjoint_union(G1, G2) assert sorted(E.nodes()) == [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] G = nx.Graph() H = nx.Graph() G.add_nodes_from([(1, {"a1": 1})]) H.add_nodes_from([(1, {"b1": 1})]) R = nx.compose(G, H) assert R.nodes == {1: {"a1": 1, "b1": 1}}
print( "USAGE: provide input TSV edge list filepath, followed by output filepath." ) file_path = sys.argv[1] output_file_path = sys.argv[2] # Create a directed graph from the edgelist file G = nx.read_edgelist(file_path, create_using=nx.DiGraph()) # Remove self-loops G.remove_edges_from(G.selfloop_edges()) # Consider only the largest component G = max(nx.strongly_connected_component_subgraphs(G), key=len) G = nx.convert_node_labels_to_integers(G) graph_file = open(output_file_path, "w") n = nx.number_of_nodes(G) m = nx.number_of_edges(G) graph_file.write(f"{n:d} {m:d}\n") for u in range(n): # Add random capacity in the range [1,10] graph_file.write(" ".join( str(v) + " " + str(np.random.randint(1, 11)) for v in set(nx.all_neighbors(G, u))) + "\n") graph_file.close()
def lattice_reference(G, niter=1, D=None, connectivity=True, seed=None): """Latticize the given graph by swapping edges. Parameters ---------- G : graph An undirected graph with 4 or more nodes. niter : integer (optional, default=1) An edge is rewired approximatively niter times. D : numpy.array (optional, default=None) Distance to the diagonal matrix. connectivity : boolean (optional, default=True) Ensure connectivity for the latticized graph when set to True. seed : integer, random_state, or None (default) Indicator of random number generation state. See :ref:`Randomness<randomness>`. Returns ------- G : graph The latticized graph. Notes ----- The implementation is adapted from the algorithm by Sporns et al. [1]_. which is inspired from the original work by Maslov and Sneppen(2002) [2]_. References ---------- .. [1] Sporns, Olaf, and Jonathan D. Zwi. "The small world of the cerebral cortex." Neuroinformatics 2.2 (2004): 145-162. .. [2] Maslov, Sergei, and Kim Sneppen. "Specificity and stability in topology of protein networks." Science 296.5569 (2002): 910-913. """ import numpy as np from networkx.utils import cumulative_distribution, discrete_sequence local_conn = nx.connectivity.local_edge_connectivity if G.is_directed(): msg = "lattice_reference() not defined for directed graphs." raise nx.NetworkXError(msg) if len(G) < 4: raise nx.NetworkXError("Graph has less than four nodes.") # Instead of choosing uniformly at random from a generated edge list, # this algorithm chooses nonuniformly from the set of nodes with # probability weighted by degree. G = G.copy() keys, degrees = zip(*G.degree()) # keys, degree cdf = cumulative_distribution(degrees) # cdf of degree nnodes = len(G) nedges = nx.number_of_edges(G) if D is None: D = np.zeros((nnodes, nnodes)) un = np.arange(1, nnodes) um = np.arange(nnodes - 1, 0, -1) u = np.append((0, ), np.where(un < um, un, um)) for v in range(int(np.ceil(nnodes / 2))): D[nnodes - v - 1, :] = np.append(u[v + 1:], u[:v + 1]) D[v, :] = D[nnodes - v - 1, :][::-1] niter = niter * nedges ntries = int(nnodes * nedges / (nnodes * (nnodes - 1) / 2)) swapcount = 0 for i in range(niter): n = 0 while n < ntries: # pick two random edges without creating edge list # choose source node indices from discrete distribution (ai, ci) = discrete_sequence(2, cdistribution=cdf, seed=seed) if ai == ci: continue # same source, skip a = keys[ai] # convert index to label c = keys[ci] # choose target uniformly from neighbors b = seed.choice(list(G.neighbors(a))) d = seed.choice(list(G.neighbors(c))) bi = keys.index(b) di = keys.index(d) if b in [a, c, d] or d in [a, b, c]: continue # all vertices should be different # don't create parallel edges if (d not in G[a]) and (b not in G[c]): if D[ai, bi] + D[ci, di] >= D[ai, ci] + D[bi, di]: # only swap if we get closer to the diagonal G.add_edge(a, d) G.add_edge(c, b) G.remove_edge(a, b) G.remove_edge(c, d) # Check if the graph is still connected if connectivity and local_conn(G, a, b) == 0: # Not connected, revert the swap G.remove_edge(a, d) G.remove_edge(c, b) G.add_edge(a, b) G.add_edge(c, d) else: swapcount += 1 break n += 1 return G
words2 = G.node[j]["text"] sum = 0 avgSimilarity = 0 for x in words: for y in words2: if (len(words) !=0 and len(words2) !=0): sum += model.wv.similarity(w1=x, w2=y) if (len(words) !=0 and len(words2) !=0): avg = sum /(len(words) * len(words2)) # print(avg) for z in range(0,len(G.node[i]["topic"])): if G.node[i]["topic"][z] in G.node[j]["topic"]: G.add_edge(i, j, weight=avg) print(datetime.datetime.now()) print(nx.number_of_edges(G)) print(nx.number_of_nodes(G)) # print(G.node[16]["text"]) # print(G.node[16]["topic"]) # print(G.get_edge_data(0,16)) # print(G.node[0]["text"]) # print(G.node[0]["topic"]) elarge = [(u, v) for (u, v, d) in G.edges(data=True) if d['weight'] > 0.2] esmall = [(u, v) for (u, v, d) in G.edges(data=True) if d['weight'] <= 0.2] nx.draw(G, with_labels=True, font_weight='bold', node_size=100, edgelist=elarge, width=1, edge_color='b') nx.draw(G, with_labels=True, font_weight='bold', node_size=100, edgelist=esmall,edge_color='g')
words = set() for line in fh.readlines(): line = line.decode() if line.startswith("*"): continue w = str(line[0:4]) words.add(w) return generate_graph(words) if __name__ == "__main__": G = words_graph() print("Loaded words_dat.txt containing 2174 four-letter English words.") print("Two words are connected if they differ in one letter.") print("Graph has %d nodes with %d edges" % (nx.number_of_nodes(G), nx.number_of_edges(G))) print("%d connected components" % nx.number_connected_components(G)) for (source, target) in [ ("cold", "warm"), ("love", "hate"), ("good", "evil"), ("pear", "beef"), ("make", "take"), ]: print("Shortest path between %s and %s is" % (source, target)) try: sp = nx.shortest_path(G, source, target) for n in sp: print(n) except nx.NetworkXNoPath:
# Read graph as a MultiGraph. g = nx.read_edgelist(sys.argv[1], nodetype=int, create_using=nx.MultiGraph(), data=[("key", int)]) # Combine edges with the reverse sorted pair of the degree of their nodes, # then sort based on degrees pairs, breaking ties with the smallest degree. data = sorted([(e, (g.degree(e[0]), g.degree(e[1]))) if g.degree(e[0]) > g.degree(e[1]) else (e, (g.degree(e[1]),g.degree(e[0]))) for e in g.edges], key=lambda x: x[1], reverse=True) T = nx.number_of_edges(g) avg = (T - 1) / 2 # Output ordering, with an average rank for tied edges. class_content = [] class_type = data[0][1] # degree-degree pair class_base_time = 0 for t, x in enumerate(data): if x[1] != class_type: for c in class_content: # if class_base_time > avg: # print(*c, class_base_time) # elif class_base_time + len(class_content) < avg: # print(*c, class_base_time + len(class_content)) # else: # print(*c, avg)
def test_nX_decompose(primal_graph): # check that missing geoms throw an error G = primal_graph.copy() del G[0][1][0]['geom'] with pytest.raises(KeyError): graphs.nX_decompose(G, 20) # check that non-LineString geoms throw an error G = primal_graph.copy() for s, e, k in G.edges(keys=True): G[s][e][k]['geom'] = geometry.Point([G.nodes[s]['x'], G.nodes[s]['y']]) break with pytest.raises(TypeError): graphs.nX_decompose(G, 20) # test decomposition G = primal_graph.copy() # first clean the graph to strip disconnected looping component # this gives a start == end node situation for testing G_simple = graphs.nX_remove_filler_nodes(G) G_decompose = graphs.nX_decompose(G_simple, 50) # from cityseer.tools import plot # plot.plot_nX(G_simple, labels=True, node_size=80, plot_geoms=True) # plot.plot_nX(G_decompose, plot_geoms=True) assert nx.number_of_nodes(G_decompose) == 292 assert nx.number_of_edges(G_decompose) == 314 for s, e in G_decompose.edges(): assert G_decompose.number_of_edges(s, e) == 1 # check that total lengths are the same G_lens = 0 for s, e, e_data in G_simple.edges(data=True): G_lens += e_data['geom'].length G_d_lens = 0 for s, e, e_data in G_decompose.edges(data=True): G_d_lens += e_data['geom'].length assert np.allclose(G_lens, G_d_lens, atol=0.001, rtol=0) # check that geoms are correctly flipped G_forward = primal_graph.copy() G_forward_decompose = graphs.nX_decompose(G_forward, 20) G_backward = primal_graph.copy() for i, (s, e, k, d) in enumerate(G_backward.edges(data=True, keys=True)): # flip each third geom if i % 3 == 0: G[s][e][k]['geom'] = geometry.LineString(d['geom'].coords[::-1]) G_backward_decompose = graphs.nX_decompose(G_backward, 20) for n, d in G_forward_decompose.nodes(data=True): assert d['x'] == G_backward_decompose.nodes[n]['x'] assert d['y'] == G_backward_decompose.nodes[n]['y'] # test that geom coordinate mismatch throws an error G = primal_graph.copy() for k in ['x', 'y']: for n in G.nodes(): G.nodes[n][k] = G.nodes[n][k] + 1 break with pytest.raises(ValueError): graphs.nX_decompose(G, 20)
movie = 'Batman' movie_actors = [] for row in spamreader: if row[2] not in actors: actors.append(row[2]) G.add_node(row[2]) if movie == row[1]: for a in movie_actors: G.add_edge(a,row[2]) else: movie = row[1] movie_actors = [] movie_actors.append(row[2]) print "Nodes:",nx.number_of_nodes(G) print "Edges:",nx.number_of_edges(G) print "Density:",nx.density(G) if(nx.is_connected(G)): print("Graph connected") else: print("Graph disconnected") c = [degree_centrality, eigenvector_centrality] for cen in c: best = 0 top = [] ce = cen(G) for i in range(1,10): best = max(ce, key=ce.get) ce.pop(best,None) top.append(best) print(top)
sSQL = sSQL + " 'Country-Router' AS RouterType" sSQL = sSQL + " from" sSQL = sSQL + " Assess_IP_DATA as A" sSQL = sSQL + " ORDER BY A.Country;" CompanyData = pd.read_sql_query(sSQL, conn) print('################') for i in range(CompanyData.shape[0]): sNode = str(CompanyData['NodeName'][i]) sRouterType = str(CompanyData['RouterType'][i]) sGroupName0 = str(CompanyData['GroupName0'][i]) G.add_node(sNode, routertype=sRouterType, group0=sGroupName0) print('################################') print("Nodes of graph: ", nx.number_of_nodes(G)) print("Edges of graph: ", nx.number_of_edges(G)) print('################################') print('################') sTable = 'Assess_IP_Country' print('Storing :', sDatabaseName, ' Table:', sTable) CompanyData.to_sql(sTable, conn, if_exists="replace") print('################') ################################################################ print('################') sTable = 'Assess_IP_DATA' print('Loading :', sDatabaseName, ' Table:', sTable) sSQL = "select distinct" sSQL = sSQL + " A.Country," sSQL = sSQL + " A.PlaceName," sSQL = sSQL + " A.PlaceName || '-' || A.Country AS NodeName,"
def MVC_BnB(self, G, cutOffTime): initialUpdaterate = 10 if G.number_of_nodes() > 10000: initialUpdaterate = G.number_of_nodes()/500 trace = open(self.traceFileName, 'w') start_time = time.time() sub_problems = indexMinPQ() approxVC = self.approxMVC(G) lowerBound = len(approxVC)/2 sub_problems.push( (None, tuple(G.nodes()), tuple(G.nodes()), lowerBound), G.number_of_edges()) #Initialize the optimum to the approximate vertex cover optimum = (len(approxVC), approxVC) current_time = time.time() - start_time trace.write(str(current_time) + ', ' + str(optimum[0]) + '\n') counter = 0 while not sub_problems.isEmpty(): subProblem = sub_problems.pop() if subProblem[0] is None: sub_cover = [] else: sub_cover = list(subProblem[0]) available_vertices = list(subProblem[1]) remaining_graph = G.subgraph(list(subProblem[2])) currentLowerbound = subProblem[3] if currentLowerbound >= optimum[0]: pass if len(subProblem[2]) < 500: updateRate = 1 else: updateRate = initialUpdaterate max_degree_v = self.maxDegreeVertex(remaining_graph, available_vertices) cover_add_v = list(sub_cover) cover_add_v.append(max_degree_v) new_residual_vertices = self.residual_graph(remaining_graph, max_degree_v) new_available_vertices = list(set(new_residual_vertices) & set(available_vertices)) new_residual_graph = G.subgraph(new_residual_vertices) if nx.number_of_edges( new_residual_graph ) == 0: #Check if new optimum found, if yes update the optimum if len(cover_add_v) < optimum[0]: optimum = (len(cover_add_v), cover_add_v) current_time = time.time() - start_time trace.write(str(current_time) + ', ' + str(optimum[0]) + '\n') elif len(new_available_vertices) > 0: if counter % updateRate == 0: lowerBound = len(cover_add_v) + len(self.approxMVC(new_residual_graph))/2 counter = 0 else: lowerBound = currentLowerbound counter += 1 if lowerBound < optimum[0]: sub_problems.push((tuple(cover_add_v), tuple(new_available_vertices), tuple(new_residual_vertices), lowerBound), new_residual_graph.number_of_edges()) cover_unselect_v = list(sub_cover) available_vertices_delete_v = list(available_vertices) available_vertices_delete_v.remove(max_degree_v) if len(available_vertices_delete_v) > 0 and self.isCover(max_degree_v, available_vertices_delete_v, remaining_graph): if currentLowerbound < optimum[0]: sub_problems.push((tuple(cover_unselect_v), tuple(available_vertices_delete_v), subProblem[2], currentLowerbound), remaining_graph.number_of_edges()) if time.time() - start_time > cutOffTime: break trace.close() output = open(self.outputFileName, 'w') result = list(optimum[1]) result = sorted(result) output.write(str(optimum[0])+'\n' + ' '.join(str(v)for v in result)) result = list(optimum[1]) result = sorted(result) output.close()