def test_karate_club(self): pyl = PyLouvain.from_file("data/karate.txt") partition, q = pyl.apply_method() q_ = q * 10000 self.assertEqual(4, len(partition)) self.assertEqual(4298, math.floor(q_)) self.assertEqual(4299, math.ceil(q_))
def test_karate_club(self): pyl = PyLouvain.from_file("data/karate.txt") partition, q = pyl.apply_method() q_ = q * 10000 self.assertEqual(4, len(partition)) self.assertEqual(4298, math.floor(q_)) self.assertEqual(4299, math.ceil(q_))
def test_karate_club(): nodes, edges = PyLouvain.from_file("data/karate.txt") pyl = PyLouvain(nodes, edges) partition, q = pyl.apply_method(gamma=1.0) odds = bayes_model_selection(nodes, edges, partition) print(partition, q, odds)
def test(graphname, gnc=None): nodes, edges = PyLouvain.from_file("data/%s.txt" % graphname) pyl = PyLouvain(nodes, edges) name_pickle = 'fig/save_%s_%d.p' % (graphname, len(nodes)) if not os.path.isfile(name_pickle): print("pickle file", name_pickle, "is missing. Recompute.") start = time.time() partition, q = pyl.apply_method() print("Modularity Time", time.time() - start) start = time.time() partition2 = multiscale(nodes, edges, 0.5) print("Multiscale Time", time.time() - start) results = {"LV": partition, "MS": partition2} sizes_distri = { "Modularity": [len(p) for p in partition], "MultiScale": [len(p) for p in partition2] } pickle.dump(results, open(name_pickle, 'wb')) print("Pickle save", name_pickle) else: print("pickle file", name_pickle, "is found.") results = pickle.load(open(name_pickle, "rb")) sizes_distri = { "Modularity": [len(p) for p in results["LV"]], "MultiScale": [len(p) for p in results["MS"]] } if gnc: gnc_fp = open(gnc, "r+") gnc_map = {} sizes_distri["Ground Truth"] = [] for i, line in enumerate(gnc_fp): x = line.split() sizes_distri["Ground Truth"].append(len(x)) for j in x: gnc_map[int(j)] = i gnc_list = [gnc_map[k] for k in nodes] lv_map = {v: i for i, c in enumerate(partition) for v in c} lv_list = [lv_map[k] for k in nodes] ms_map = {v: i for i, c in enumerate(partition2) for v in c} ms_list = [ms_map[k] for k in nodes] print("Louvain NMI=", normalized_mutual_info_score(lv_list, gnc_list)) print("Multi-scale NMI=", normalized_mutual_info_score(ms_list, gnc_list)) hist(sizes_distri, graphname)
def do(path, source_data): start_time = time.time() print 'start time: ',start_time print 'louvain算法社团划分开始...' pyl = PyLouvain.from_file(path, source_data) partition, q = pyl.apply_method() # print partition out_file = open(path+"community_result.txt", 'w') #读取节点信息文件 nodes_file = open(path+'nodes_tmp.txt', 'r') nodes_lines = nodes_file.readlines() nodes_file.close() nodes = {} #保存 节点序号-节点名称 信息 for line in nodes_lines: n = line.split() if not n: break nodes[n[1]] = n[0] #社团信息 格式:标记 成员数 print '统计社团信息 写入社团状态文件' community_status = open(path+'community_status.txt', 'w') i = 1 label = {} # 格式 节点名称-社团标记 for community in partition: community_status.write(str(i)+'\t'+str(len(community))+'\n') #社团标记 成员数 #成员贴上社团标记 for per in community: label[nodes[str(per)]] = str(i) i += 1 community_status.close() #关联用户互动 print '关联用户互动数据 写入结果文件' relationship_file = open(path+source_data, 'r') relationship_lines = relationship_file.readlines() relationship_file.close() for rela in relationship_lines: r = rela.split() if not r: break out_file.write('-\t'+r[0]+'\t'+label[r[0]]+'\t'+r[1]+'\t'+label[r[1]]+'\t'+r[2]+'\n') out_file.close() print 'end time: ',time.time() print '花费时间: ',(time.time()-start_time)/60,' min'
def test_football2(): for gamma in np.linspace(0.4, 0.9, num=10): print() print("gamma=", gamma) nodes, edges = PyLouvain.from_file("data/football.txt") partition = multiscale(nodes, edges, gamma) # load GNC ground truth by txt file (as defined by conference) fconf = open("data/football.gnc.txt", "r") gnc = {str(i): int(line.strip()) for i, line in enumerate(fconf)} order_ = {i: stri for i, stri in enumerate(sorted(gnc.keys()))} comm = {n: i for i, ns in enumerate(partition) for n in ns} a = [comm[i] for i in nodes] b = [gnc[order_[i]] for i in nodes] print("NMI=", metrics.adjusted_mutual_info_score(a, b)) #test_football2()
def test_football(): # load GNC ground truth by txt file (as defined by conference) fconf = open("data/football.gnc.txt", "r") gnc = {str(i): int(line.strip()) for i, line in enumerate(fconf)} order_ = {i: stri for i, stri in enumerate(sorted(gnc.keys()))} x, y, z, r = [], [], [], [] for gamma in np.linspace(0.5, 8.5, num=35): nodes, edges = PyLouvain.from_file("data/football.txt") pyl = PyLouvain(nodes, edges) partition, q = pyl.apply_method(gamma) odds = bayes_model_selection(nodes, edges, partition) print(len(partition), odds) x.append(gamma) y.append(odds) z.append(len(partition)) comm = {n: i for i, ns in enumerate(partition) for n in ns} a = [comm[i] for i in nodes] b = [gnc[order_[i]] for i in nodes] #print("NMI=", metrics.adjusted_mutual_info_score(a, b)) r.append(metrics.adjusted_mutual_info_score(a, b)) #r.append(metrics.adjusted_rand_score(a, b)) plt.plot(x, y, 'r-*', markersize=10) ax1 = plt.gca() ax1.tick_params(axis='x', labelsize=18) ax1.tick_params(axis='y', labelcolor='r', labelsize=15) ax2 = ax1.twinx() ax2.plot(x, z, 'm-^', markersize=10) ax2.tick_params(axis='y', labelcolor='m', labelsize=15) plt.tight_layout() plt.savefig("fig/football2.png")
def test_citations(self): pyl = PyLouvain.from_file("data/hep-th-citations") partition, q = pyl.apply_method()
def test_arxiv(self): pyl = PyLouvain.from_file("data/arxiv.txt") partition, q = pyl.apply_method()
def test_arxiv(self): pyl = PyLouvain.from_file("data/facebook_combined.txt", 0.5) partition, q = pyl.apply_method() print(len(partition), q)
from pylouvain import PyLouvain import math from matplotlib import pyplot as plt import networkx as nx filepath = 'out.txt' # 获取社区划分 pyl = PyLouvain.from_file(filepath) node_dict = pyl.node_dict # key是253916-2的形式,value是编号的形式 reverse_node_dict = dict(zip(node_dict.values(), node_dict.keys())) # key是编号的形式,value是253916-2的形式 partition, q = pyl.apply_method() print(partition) print("模块度:", q) # 给各个社区节点分配颜色 community_num = len(partition) print('community_num:', community_num) color_board = ['red', 'green', 'blue', 'pink', 'orange', 'purple', 'scarlet'] color = {} for index in range(community_num): print("社区" + str(index + 1) + ":" + str(len(partition[index]))) for node_id in partition[index]: color[node_id] = color_board[ index] # color为一个字典,key为编号形式的节点,value为所属社区的颜色 new_color_dict = sorted(color.items(), key=lambda d: d[0], reverse=False) # 将color字典按照key的大小排序,并返回一个list node_list = [reverse_node_dict[item[0]] for item in new_color_dict] #存储编号从小到大顺序对应的253916-2的形式的节点 color_list = [item[1] for item in new_color_dict] #存储node_list中对应的节点颜色
def test_citations(): nodes, edges = PyLouvain.from_file("data/hep-th-citations") pyl = PyLouvain(nodes, edges) partition, q = pyl.apply_method() print(partition, q)
import matplotlib matplotlib.use("Agg") from matplotlib import pyplot as plt import pickle import time import numpy as np from sklearn import metrics from pylouvain import PyLouvain from run import multiscale, bayes_model_selection x, y, z = [], [], [] for gamma in np.linspace(0.2, 0.9, num=20): print("gamma=", gamma) pyl = PyLouvain.from_file("data/football.txt") partition = multiscale(pyl.nodes, pyl.edges, gamma) # load GNC ground truth by txt file (as defined by conference) fconf = open("data/football.gnc.txt", "r") gnc = {str(i): int(line.strip()) for i, line in enumerate(fconf)} order_ = {i: stri for i, stri in enumerate(sorted(gnc.keys()))} comm = {n: i for i, ns in enumerate(partition) for n in ns} a = [comm[i] for i in pyl.nodes] b = [gnc[order_[i]] for i in pyl.nodes] x.append(gamma) y.append(len(partition)) z.append(metrics.adjusted_mutual_info_score(a, b)) print("#comm=", len(partition), "NMI=", metrics.adjusted_mutual_info_score(a, b))
import matplotlib from matplotlib import pyplot as plt from sklearn import metrics from pylouvain import PyLouvain from sklearn import metrics import numpy as np nodes, edges = PyLouvain.from_file("data/karate.txt") gamma0 = 0.78 #nodes, edges = PyLouvain.from_gml_file("data/lesmis.gml") #nodes, edges = PyLouvain.from_gml_file("data/polbooks.gml") def cmap(nodes, partition): m = {n: i for i, _ in enumerate(partition) for n in _} return [m[i] for i in nodes] def test_small_networks(nodes, edges, gamma0): pyl = PyLouvain(nodes, edges) partition0, q0 = pyl.apply_method(gamma0) c0 = cmap(nodes, partition0) NMI = [] gamma_list = np.linspace(0.2, 3.5, num=200) for gamma in gamma_list: partition, q = PyLouvain(nodes, edges).apply_method(gamma) c = cmap(nodes, partition) NMI.append(metrics.normalized_mutual_info_score(c0, c))
def test_arxiv(self): pyl = PyLouvain.from_file("data/arxiv.txt") partition, q = pyl.apply_method()
def test_test(self): pyl = PyLouvain.from_file("data/year1990.txt") partition, q = pyl.apply_method() return partition
def test_citations(self): pyl = PyLouvain.from_file("data/hep-th-citations") partition, q = pyl.apply_method()