def test_degree_centrality(self): d = bipartite.degree_centrality(self.P4, [1,3]) answer = {0: 0.5, 1: 1.0, 2: 1.0, 3: 0.5} assert_equal(d, answer) d = bipartite.degree_centrality(self.K3, [0,1,2]) answer = {0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0, 5: 1.0} assert_equal(d, answer) d = bipartite.degree_centrality(self.C4, [0,2]) answer = {0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0} assert_equal(d,answer)
def test_degree_centrality(self): d = bipartite.degree_centrality(self.P4, [1, 3]) answer = {0: 0.5, 1: 1.0, 2: 1.0, 3: 0.5} assert d == answer d = bipartite.degree_centrality(self.K3, [0, 1, 2]) answer = {0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0, 5: 1.0} assert d == answer d = bipartite.degree_centrality(self.C4, [0, 2]) answer = {0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0} assert d == answer
def reportCommunities(table,dt,comm): """ Output the detected communities to the suspiciousdnsfailures Hive table """ sys.stderr.write("Report suspicious IPs.\n") outputFile = open("%s/suspiciousdnsfailures_%s_%s.txt" % (outputDirectory,table,dt), "w") for commId, G in enumerate(comm): comfqdns = set(n for n,d in G.nodes(data=True) if d['bipartite']==1) degrees = bipartite.degree_centrality(G,comfqdns) for e in G.edges(): # Compute all fields to store in the DB if G.node[e[0]]["bipartite"] == 0 and G.node[e[1]]["bipartite"] == 1: srcip = e[0] fqdn = e[1] elif G.node[e[0]]["bipartite"] == 1 and G.node[e[1]]["bipartite"] == 0: srcip = e[1] fqdn = e[0] else: sys.stderr.write("Error: Invalid edge (%s)\n" % e) degree = degrees[e[0]]+degrees[e[1]]/2.0 conf = "LOW" if degree > 0.66: conf = "HIGH" elif degree > 0.33: conf = "MED" outputFile.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\t\n" % (fqdn,srcip,commId,G.order(),degree,conf,table)) outputFile.close() # Store results in Hive try: transport = TSocket.TSocket('localhost', 10000) transport = TTransport.TBufferedTransport(transport) protocol = TBinaryProtocol.TBinaryProtocol(transport) client = ThriftHive.Client(protocol) transport.open() client.execute("create table if not exists suspiciousdnsfailures (fqdn string, srcip string, clusterid int, clustersize bigint, degree double, confidence string, table string) partitioned by(dt string) row format delimited fields terminated by '\t'"); client.execute("load data local inpath '{dir}/suspiciousdnsfailures_{table}_{date}.txt' into table suspiciousdnsfailures partition (dt='{date}')".format(date=dt,dir=outputDirectory,table=table)) #create table suspiciousdnsfailuresIP_dns_pcaps (ip1 string, ip2 string, fqdn_overlap int) partitioned by (dt string); #client.execute("insert table suspiciousdnsfailuresIP partition (dt='{date}') select t1.srcip, t2.srcip, count(*) from suspiciousdnsfailures as t1 join suspiciousdnsfailures as t2 on (t1.clusterid=t2.clusterid and t1.fqdn=t2.fqdn and t1.dt='{date}' and t2.dt='{date}') where t1.srcip!=t2.srcip and t1.table='{table}' and t2.table='{table}' group by t1.srcip, t2.srcip".format(table=table,date=dt)) #transport.close() except Thrift.TException, tx: sys.stderr.write('%s\n' % (tx.message))
def calculate_centrality(fp, centrality_type, perm_maps): print '%s : start to read %s.txt '%(centrality_type, fp) g = nx.Graph() i_t = 100000 i_i = 0 p = 0 f = codecs.open('./txt_critical_perms/apps_file/%s.txt'%(fp), 'r', encoding='utf-8') l = f.readline() l = f.readline() while l: p, i_i = p_percent(p, i_i, i_t, 10) ls = l.split('\t') app_id = ls[0].strip().lower() perm_id = ls[1].strip().lower() g.add_node(app_id, bipartite=0) # top g.add_node(perm_id, bipartite=1) # buttom g.add_edge(app_id, perm_id) l = f.readline() is_connect = nx.is_connected(g) print u'end read: %s'%(fp), is_connect # buttom top #node_data, node_app = bipartite.sets(g) node_data = set(n for n, d in g.nodes(data=True) if d['bipartite'] == 1) node_app = set(g) - node_data ## centrality degree if centrality_type == 'degree': try: centrality = bipartite.degree_centrality(g, node_data) result = get_centrality_out(fp, node_data, node_app, centrality, centrality_type, perm_maps) return result, is_connect except Exception as e: print '** error in centrality: %s : %s'%(centrality_type, fp), e ## centrality closeness if centrality_type == 'closeness': try: centrality = bipartite.closeness_centrality(g, node_app, normalized=False) result = get_centrality_out(fp, node_data, node_app, centrality, centrality_type, perm_maps) return result, is_connect except Exception as e: print '**** error in centrality : %s : %s'%(centrality_type, fp), e ## centrality betweenness if centrality_type == 'betweenness': try: centrality = bipartite.betweenness_centrality(g, node_app) result = get_centrality_out(fp, node_data, node_app, centrality, centrality_type, perm_maps) return result, is_connect except Exception as e: print '**** error in centrality : %s : %s'%(centrality_type, fp), e if centrality_type == 'clustering': try: centrality = bipartite.clustering(g, node_data, mode='dot') result = get_centrality_out(fp, node_data, node_app, centrality, centrality_type, perm_maps) return result, is_connect except Exception as e: print '**** error in centrality : %s : %s'%(centrality_type, fp), e
def test_davis_degree_centrality(self): G = self.davis deg = bipartite.degree_centrality(G, self.top_nodes) answer = { "E8": 0.78, "E9": 0.67, "E7": 0.56, "Nora Fayette": 0.57, "Evelyn Jefferson": 0.57, "Theresa Anderson": 0.57, "E6": 0.44, "Sylvia Avondale": 0.50, "Laura Mandeville": 0.50, "Brenda Rogers": 0.50, "Katherina Rogers": 0.43, "E5": 0.44, "Helen Lloyd": 0.36, "E3": 0.33, "Ruth DeSand": 0.29, "Verne Sanderson": 0.29, "E12": 0.33, "Myra Liddel": 0.29, "E11": 0.22, "Eleanor Nye": 0.29, "Frances Anderson": 0.29, "Pearl Oglethorpe": 0.21, "E4": 0.22, "Charlotte McDowd": 0.29, "E10": 0.28, "Olivia Carleton": 0.14, "Flora Price": 0.14, "E2": 0.17, "E1": 0.17, "Dorothy Murchison": 0.14, "E13": 0.17, "E14": 0.17, } for node, value in answer.items(): assert almost_equal(value, deg[node], places=2)
def test_davis_degree_centrality(self): G = self.davis deg = bipartite.degree_centrality(G, self.top_nodes) answer = { 'E8': 0.78, 'E9': 0.67, 'E7': 0.56, 'Nora Fayette': 0.57, 'Evelyn Jefferson': 0.57, 'Theresa Anderson': 0.57, 'E6': 0.44, 'Sylvia Avondale': 0.50, 'Laura Mandeville': 0.50, 'Brenda Rogers': 0.50, 'Katherina Rogers': 0.43, 'E5': 0.44, 'Helen Lloyd': 0.36, 'E3': 0.33, 'Ruth DeSand': 0.29, 'Verne Sanderson': 0.29, 'E12': 0.33, 'Myra Liddel': 0.29, 'E11': 0.22, 'Eleanor Nye': 0.29, 'Frances Anderson': 0.29, 'Pearl Oglethorpe': 0.21, 'E4': 0.22, 'Charlotte McDowd': 0.29, 'E10': 0.28, 'Olivia Carleton': 0.14, 'Flora Price': 0.14, 'E2': 0.17, 'E1': 0.17, 'Dorothy Murchison': 0.14, 'E13': 0.17, 'E14': 0.17 } for node, value in answer.items(): assert almost_equal(value, deg[node], places=2)
def compute_centrality(nets=None, names=None): datet = datetime.datetime.today() date = datet.strftime("%Y%m%d%H%M") if names is None: names = default_years if nets is None: nets = networks_by_year() result = {} for name, G in zip(names, nets): result[name] = {} print("computing centrality for {}".format(name)) devs = set(n for n, d in G.nodes(data=True) if d['bipartite']==1) result[name]['deg'] = bp.degree_centrality(G, devs) try: result[name]['bet'] = bp.betweenness_centrality(G, devs) except ZeroDivisionError: result[name]['bet'] = dict() result[name]['clos'] = bp.closeness_centrality(G, devs) result[name]['ev'] = nx.eigenvector_centrality_numpy(G) fn = 'years' if name == 2014 else 'branches' fname = "{0}/bipartite_centrality_{1}_{2}.pkl".format(results_dir, fn, date) utils.write_results_pkl(result, fname)
def test_davis_degree_centrality(self): G = self.davis deg = bipartite.degree_centrality(G, self.top_nodes) answer = {'E8':0.78, 'E9':0.67, 'E7':0.56, 'Nora Fayette':0.57, 'Evelyn Jefferson':0.57, 'Theresa Anderson':0.57, 'E6':0.44, 'Sylvia Avondale':0.50, 'Laura Mandeville':0.50, 'Brenda Rogers':0.50, 'Katherina Rogers':0.43, 'E5':0.44, 'Helen Lloyd':0.36, 'E3':0.33, 'Ruth DeSand':0.29, 'Verne Sanderson':0.29, 'E12':0.33, 'Myra Liddel':0.29, 'E11':0.22, 'Eleanor Nye':0.29, 'Frances Anderson':0.29, 'Pearl Oglethorpe':0.21, 'E4':0.22, 'Charlotte McDowd':0.29, 'E10':0.28, 'Olivia Carleton':0.14, 'Flora Price':0.14, 'E2':0.17, 'E1':0.17, 'Dorothy Murchison':0.14, 'E13':0.17, 'E14':0.17} for node, value in answer.items(): assert_almost_equal(value, deg[node], places=2)
edges = [] x = 0 for i in range(len(top_nodes)): edges.append((top_nodes[i], bottom_nodes[i], weight[i])) B.add_weighted_edges_from(edges) # print(B.edges()) # 二模转一模 # G=bipartite.projected_graph(B,bottom_nodes,multigraph=True) # print(G.edges(keys=True)) # 计算二模网络的度 degX,degY=bipartite.degrees(B,bottom_nodes,weight='weight') # 计算二模网络的点度中心性 D = bipartite.degree_centrality(B, bottom_nodes) degX_dict, degY_dict, D_dict = dict(degX), dict(degY), dict(D) print('模1的度为:', len(degX), degX_dict, '模2的度为:', len(degY), degY_dict, '模2的点度中心性', len(D), D_dict, sep='\n') arcpy.env.overwriteOutput = True targetsource1 = targetsource + 'output' print(targetsource) arcpy.Copy_management(in_data=targetsource, out_data=targetsource1) mo2fieldname,mo1fieldname,mo2centraldegree = 'mo2degree', 'mo1degree', 'mo2centraldegree' arcpy.AddField_management(targetsource1, mo2fieldname, "FLOAT",field_alias=mo2fieldname, field_is_nullable="NULLABLE") arcpy.AddField_management(targetsource1, mo1fieldname, "FLOAT",field_alias=mo1fieldname, field_is_nullable="NULLABLE") arcpy.AddField_management(targetsource1, mo2centraldegree, "FLOAT",field_alias=mo2centraldegree, field_is_nullable="NULLABLE")
print("Women onto Events") W2 = bipartite.weighted_projected_graph(g, clubs, ratio=True) print('') print('#Women, Member') for c in clubs: print('%d %s' % (W2.degree(c, weight='weight'), c)) print nx.draw(W2, node_color='b', edge_color='r', with_labels=True) plt.savefig("davisontoclubsratio.png") # save as png plt.show() # display print print # Degee Summary Stats deg = bipartite.degree_centrality(g, clubs) # Betweenness Summary Stats bc = bipartite.betweenness_centrality(g, clubs) # Closeness Summary Stats cc = bipartite.closeness_centrality(g, clubs) maxdeg = 0 mindeg = 9999 mindegwomen = [] maxdegwomen = [] degarray = [] maxbc = 0 minbc = 9999
# No C3 clustering by definition of bipartite, elaborate and explain C4 during talk cluster1 = nx.square_clustering( network) # No clustering because edges only go from users to designs cluster2 = bipartite.clustering( network) # No clustering because edges only go from users to designs # Centrality Measures # Do these factor in directedness!!!!!!!!!!!!!!!!!!!!!!!!!??????????????????????? closeness_centrality = bipartite.closeness_centrality(network, users) total_closeness_centrality = 0 for key, value in closeness_centrality.items(): total_closeness_centrality += value avg_closeness_centrality = total_closeness_centrality / len( closeness_centrality) degree_centrality = bipartite.degree_centrality(network, users) total_degree_centrality = 0 for key, value in degree_centrality.items(): total_degree_centrality += value avg_degree_centrality = total_degree_centrality / len(degree_centrality) betweenness_centrality = bipartite.betweenness_centrality(network, users) total_betweenness_centrality = 0 for key, value in betweenness_centrality.items(): total_betweenness_centrality += value avg_betweenness_centrality = total_betweenness_centrality / len( betweenness_centrality) ################################################### # Projection onto Users considering all edge types# ###################################################
def describe(self, extra=False): """ Provides a summary of graph statistics. Includes basic statistics like the number of nodes, edges, denstiy, and the average degree for one mode. Prints a string that contains each of the items that make up the summary. Density is calculated using one of the modes of the original bipartite network graph. **Parameters** : > *extra* : `bool` >> Runs the low efficiency algorithms, which can be resource-intensive on large networks. >> Recommended maximum network size for the low efficiency algorithms is around 100 nodes. **Returns** : `string` > Returns the descriptive string that contains information about the `MultiGraphPlus` object. """ mode1 = self.mode1 mode2 = self.mode2 density = bipartite.density(self, bipartite.sets(self)[0]) edges = self.number_of_edges() nodes_mode1 = 0 nodes_mode2 = 0 for n in self.nodes(): if self.node[n]['type'] == mode1: nodes_mode1 += 1 elif self.node[n]['type'] == mode2: nodes_mode2 += 1 descriptives_nodes = "This is a bipartite network of types '{}' and '{}'.\n " \ "{} nodes are of the type '{}'.\n " \ "{} nodes are of the type '{}'.\n".format(str(mode1), str(mode2), str(nodes_mode1), str(mode1), str(nodes_mode2), str(mode2)) descriptives_edges = "There are {} edges.\n".format(str(edges)) descriptives_density = "Density: {}.\n".format(str(density)) descriptives = descriptives_nodes + descriptives_edges + descriptives_density if extra: # Note: for each mode of the bipartite graph, degree and betweenness centrality are the same. # Keeping them both makes it easy to compare them and make sure they are the same. degree_mode1 = bipartite.degree_centrality(self, bipartite.sets(self)[0]) degree_mode2 = bipartite.degree_centrality(self, bipartite.sets(self)[1]) degree_mode1 = list(degree_mode1.values()) degree_mode2 = list(degree_mode2.values()) degree_mode1 = np.mean(degree_mode1) degree_mode2 = np.mean(degree_mode2) betweenness_mode1 = bipartite.betweenness_centrality(self, bipartite.sets(self)[0]) betweenness_mode1 = list(betweenness_mode1.values()) betweenness_mode1 = np.mean(betweenness_mode1) betweenness_mode2 = bipartite.betweenness_centrality(self, bipartite.sets(self)[1]) betweenness_mode2 = list(betweenness_mode2.values()) betweenness_mode2 = np.mean(betweenness_mode2) g = nx.Graph(self) projection = bipartite.projected_graph(g, bipartite.sets(g)[0]) transitivity = nx.transitivity(projection) descriptives_transitivity = "Transitivity: {}.\n".format(str(transitivity)) descriptives_degree_centrality = "Mean Degree Centrality for '{}': {}.\n" \ "Mean Degree Centrality for '{}': {}.\n".format(str(mode1), str(degree_mode1), str(mode2), str(degree_mode2)) descriptives_btwn_centrality = "Mean Betweenness Centrality for '{}': {}.\n"\ "Mean Betweenness Centrality for '{}': {}.\n".format(str(mode1), str(betweenness_mode1), str(mode2), str(betweenness_mode2)) descriptives = descriptives + descriptives_transitivity +\ descriptives_degree_centrality + descriptives_btwn_centrality print(descriptives) return descriptives
for index, row in SNP2gene.iterrows(): kgp.append((str(row["SNP"]), row["gene"])) ##################### Add edges count_all=[ kgp.count(i) for i in kgp if kgp.count(i)] count_list=set(count_all) for n in count_list: kgp_wei1= [ i for i in kgp if kgp.count(i) == n] b.add_edges_from(kgp_wei1,weight=n) bottom_nodes = set(n for n,d in b.nodes(data=True) if d['bipartite']==1) top_nodes = set(b) - bottom_nodes bip_dg=bipartite.degree_centrality(b,bottom_nodes) node=[] dgvalue=[] for i in bip_dg: node.append(i) dgvalue.append(bip_dg[i]) dg=pd.DataFrame({'node':node,'DG':dgvalue}) dg_snp=dg[dg.node.isin(top_nodes)] dg_gene=dg[dg.node.isin(bottom_nodes)] dg_snp.sort(['DG']) dg_gene.sort(['DG']) dg_snp.to_csv(oupprefix+"_DG_snp.txt",index=False,sep='\t' ) dg_gene.to_csv(oupprefix+"_DG_gene.txt",index=False,sep='\t' )