Python PyLouvain примеры, pylouvain.PyLouvain Python примеры использования

Пример #1

0

Показать файл

Файл: test.py Проект: xiaoylu/Multiscale-gModularity

def test_karate_club():
    nodes, edges = PyLouvain.from_file("data/karate.txt")
    pyl = PyLouvain(nodes, edges)
    partition, q = pyl.apply_method(gamma=1.0)
    odds = bayes_model_selection(nodes, edges, partition)

    print(partition, q, odds)

Пример #2

0

Показать файл

Файл: test.py Проект: xiaoylu/Multiscale-gModularity

def test(graphname, gnc=None):
    nodes, edges = PyLouvain.from_file("data/%s.txt" % graphname)
    pyl = PyLouvain(nodes, edges)

    name_pickle = 'fig/save_%s_%d.p' % (graphname, len(nodes))
    if not os.path.isfile(name_pickle):
        print("pickle file", name_pickle, "is missing. Recompute.")

        start = time.time()
        partition, q = pyl.apply_method()
        print("Modularity Time", time.time() - start)

        start = time.time()
        partition2 = multiscale(nodes, edges, 0.5)
        print("Multiscale Time", time.time() - start)

        results = {"LV": partition, "MS": partition2}
        sizes_distri = {
            "Modularity": [len(p) for p in partition],
            "MultiScale": [len(p) for p in partition2]
        }

        pickle.dump(results, open(name_pickle, 'wb'))
        print("Pickle save", name_pickle)
    else:
        print("pickle file", name_pickle, "is found.")

        results = pickle.load(open(name_pickle, "rb"))
        sizes_distri = {
            "Modularity": [len(p) for p in results["LV"]],
            "MultiScale": [len(p) for p in results["MS"]]
        }

    if gnc:
        gnc_fp = open(gnc, "r+")
        gnc_map = {}
        sizes_distri["Ground Truth"] = []
        for i, line in enumerate(gnc_fp):
            x = line.split()
            sizes_distri["Ground Truth"].append(len(x))
            for j in x:
                gnc_map[int(j)] = i

        gnc_list = [gnc_map[k] for k in nodes]

        lv_map = {v: i for i, c in enumerate(partition) for v in c}
        lv_list = [lv_map[k] for k in nodes]

        ms_map = {v: i for i, c in enumerate(partition2) for v in c}
        ms_list = [ms_map[k] for k in nodes]

        print("Louvain NMI=", normalized_mutual_info_score(lv_list, gnc_list))
        print("Multi-scale NMI=",
              normalized_mutual_info_score(ms_list, gnc_list))

    hist(sizes_distri, graphname)

Пример #3

0

Показать файл

def test_small_networks(nodes, edges, gamma0):
    pyl = PyLouvain(nodes, edges)
    partition0, q0 = pyl.apply_method(gamma0)
    c0 = cmap(nodes, partition0)
    NMI = []

    gamma_list = np.linspace(0.2, 3.5, num=200)
    for gamma in gamma_list:
        partition, q = PyLouvain(nodes, edges).apply_method(gamma)
        c = cmap(nodes, partition)

        NMI.append(metrics.normalized_mutual_info_score(c0, c))

    plt.plot(gamma_list, NMI, 'b-*', markersize=10)
    plt.show()

Пример #4

0

Показать файл

Файл: test.py Проект: wangyanjie1200/work

 def test_karate_club(self):
     pyl = PyLouvain.from_file("data/karate.txt")
     partition, q = pyl.apply_method()
     q_ = q * 10000
     self.assertEqual(4, len(partition))
     self.assertEqual(4298, math.floor(q_))
     self.assertEqual(4299, math.ceil(q_))

Пример #5

0

Показать файл

Файл: test.py Проект: zhuxiaoqiaing/pylouvain

 def test_karate_club(self):
     pyl = PyLouvain.from_file("data/karate.txt")
     partition, q = pyl.apply_method()
     q_ = q * 10000
     self.assertEqual(4, len(partition))
     self.assertEqual(4298, math.floor(q_))
     self.assertEqual(4299, math.ceil(q_))

Пример #6

0

Показать файл

Файл: communityDetection1.py Проект: dataSnail/SocialNetwork

def findGZCommunity():
    dbm = dbManager2('sina11', host='127.0.0.1', passwd='root')
    pyl = PyLouvain.from_db(dbm,
                            "select uid,fid from afrelation11 limit 0,10000")
    partition, q = pyl.apply_method()
    values = []
    j = 0
    print partition
    f = open("output.txt", "w")
    f.write(str(partition))
    f.close()

Пример #7

0

Показать файл

Файл: test.py Проект: xiaoylu/Multiscale-gModularity

def test_football():

    # load GNC ground truth by txt file (as defined by conference)
    fconf = open("data/football.gnc.txt", "r")
    gnc = {str(i): int(line.strip()) for i, line in enumerate(fconf)}
    order_ = {i: stri for i, stri in enumerate(sorted(gnc.keys()))}

    x, y, z, r = [], [], [], []
    for gamma in np.linspace(0.5, 8.5, num=35):
        nodes, edges = PyLouvain.from_file("data/football.txt")
        pyl = PyLouvain(nodes, edges)
        partition, q = pyl.apply_method(gamma)
        odds = bayes_model_selection(nodes, edges, partition)

        print(len(partition), odds)
        x.append(gamma)
        y.append(odds)
        z.append(len(partition))

        comm = {n: i for i, ns in enumerate(partition) for n in ns}
        a = [comm[i] for i in nodes]
        b = [gnc[order_[i]] for i in nodes]
        #print("NMI=", metrics.adjusted_mutual_info_score(a, b))

        r.append(metrics.adjusted_mutual_info_score(a, b))
        #r.append(metrics.adjusted_rand_score(a, b))

    plt.plot(x, y, 'r-*', markersize=10)
    ax1 = plt.gca()
    ax1.tick_params(axis='x', labelsize=18)
    ax1.tick_params(axis='y', labelcolor='r', labelsize=15)
    ax2 = ax1.twinx()
    ax2.plot(x, z, 'm-^', markersize=10)
    ax2.tick_params(axis='y', labelcolor='m', labelsize=15)
    plt.tight_layout()

    plt.savefig("fig/football2.png")

Пример #8

0

Показать файл

Файл: do.py Проект: Turtle1991/algorithm

def do(path, source_data):
    start_time = time.time()
    print 'start time: ',start_time
    print 'louvain算法社团划分开始...'
    pyl = PyLouvain.from_file(path, source_data)
    partition, q = pyl.apply_method()
    # print partition
    out_file = open(path+"community_result.txt", 'w')

    #读取节点信息文件
    nodes_file = open(path+'nodes_tmp.txt', 'r')
    nodes_lines = nodes_file.readlines()
    nodes_file.close()
    nodes = {} #保存 节点序号-节点名称 信息
    for line in nodes_lines:
        n = line.split()
        if not n:
            break
        nodes[n[1]] = n[0]

    #社团信息 格式：标记 成员数
    print '统计社团信息 写入社团状态文件'
    community_status = open(path+'community_status.txt', 'w')
    i = 1
    label = {} # 格式 节点名称-社团标记
    for community in partition:
        community_status.write(str(i)+'\t'+str(len(community))+'\n') #社团标记 成员数
        #成员贴上社团标记
        for per in community:
            label[nodes[str(per)]] = str(i)
        i += 1
    community_status.close()

    #关联用户互动
    print '关联用户互动数据 写入结果文件'
    relationship_file = open(path+source_data, 'r')
    relationship_lines = relationship_file.readlines()
    relationship_file.close()
    for rela in relationship_lines:
        r = rela.split()
        if not r:
            break
        out_file.write('-\t'+r[0]+'\t'+label[r[0]]+'\t'+r[1]+'\t'+label[r[1]]+'\t'+r[2]+'\n')
    out_file.close()

    print 'end time: ',time.time()
    print '花费时间: ',(time.time()-start_time)/60,' min'

Пример #9

0

Показать файл

Файл: test.py Проект: xiaoylu/Multiscale-gModularity

def test_football2():

    for gamma in np.linspace(0.4, 0.9, num=10):
        print()
        print("gamma=", gamma)
        nodes, edges = PyLouvain.from_file("data/football.txt")
        partition = multiscale(nodes, edges, gamma)

        # load GNC ground truth by txt file (as defined by conference)
        fconf = open("data/football.gnc.txt", "r")
        gnc = {str(i): int(line.strip()) for i, line in enumerate(fconf)}
        order_ = {i: stri for i, stri in enumerate(sorted(gnc.keys()))}
        comm = {n: i for i, ns in enumerate(partition) for n in ns}
        a = [comm[i] for i in nodes]
        b = [gnc[order_[i]] for i in nodes]

        print("NMI=", metrics.adjusted_mutual_info_score(a, b))


#test_football2()

Пример #10

0

Показать файл

Файл: test.py Проект: wangyanjie1200/work

 def test_citations(self):
     pyl = PyLouvain.from_file("data/hep-th-citations")
     partition, q = pyl.apply_method()

Пример #11

0

Показать файл

Файл: test.py Проект: zhuxiaoqiaing/pylouvain

 def test_lesmis(self):
     pyl = PyLouvain.from_gml_file("data/lesmis.gml")
     partition, q = pyl.apply_method()

Пример #12

0

Показать файл

Файл: test.py Проект: Nuomisu/cs5234

 def test_arxiv(self):
     pyl = PyLouvain.from_file("data/facebook_combined.txt", 0.5)
     partition, q = pyl.apply_method()
     print(len(partition), q)

Пример #13

0

Показать файл

Файл: test.py Проект: shogo-ma/pylouvain

 def test_arxiv(self):
     pyl = PyLouvain.from_file("data/arxiv.txt")
     partition, q = pyl.apply_method()

Пример #14

0

Показать файл

Файл: test.py Проект: xiaoylu/Multiscale-gModularity

def test_citations():
    nodes, edges = PyLouvain.from_file("data/hep-th-citations")
    pyl = PyLouvain(nodes, edges)
    partition, q = pyl.apply_method()
    print(partition, q)

Пример #15

0

Показать файл

Файл: impfuzzy_for_neo4j.py Проект: youngjun-chang/impfuzzy

def main():
    if not has_pyimpfuzzy:
        sys.exit("[!] pyimpfuzzy must be installed for this script.")

    if not has_py2neo:
        sys.exit("[!] py2neo must be installed for this script.")

    if not has_pylouvain and not args.nocluster:
        sys.exit("[!] Please download the pylouvain from https://github.com/patapizza/pylouvain.")

    try:
        graph_http = "http://" + NEO4J_USER + ":" + NEO4J_PASSWORD +"@:" + NEO4J_PORT + "/db/data/"
        GRAPH = Graph(graph_http)
    except:
        sys.exit("[!] Can't connect Neo4j Database.")

    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit()
    
    i= 0
    hashlist = []
    hashlist_new = []
    nodes = []
    edges = []
    relationships = []

    # This is a impfuzzys threshold
    if args.threshold:
        ss_threshold = args.threshold
    else:
        ss_threshold = 30
    print("[*] Impfuzzy threshold is %i." % ss_threshold)

    # Delete database data
    if args.delete:
        GRAPH.delete_all()
        print("[*] Delete all nodes and relationships from this Neo4j database.")

    # Load database data
    database = GRAPH.data("MATCH (m:Malware) RETURN m.id, m.name, m.impfuzzy, m.md5, m.sha1, m.sha256")

    if database:
        print("[*] Database nodes %d." % len(database))
        for d in database:
            hashlist.append([d["m.id"], d["m.name"], d["m.impfuzzy"], d["m.md5"], d["m.sha1"], d["m.sha256"]])

    nodes_count = len(database)
    # Load relationships
    relation_data = GRAPH.data("MATCH (m1:Malware)-[s:same]-(m2:Malware) RETURN m1.id,m2.id,s.value")
    if relation_data:
        print("[*] Database relationships %d." % len(relation_data))
        for r in relation_data:
            relationships.append([r["m1.id"], r["m2.id"], r["s.value"]])

    for x in range(nodes_count):
        nodes.append(x)

    print("[*] Creating a graph data.")

    # Import data from EXE or DLL
    if args.file:
        if os.path.isfile(args.file):
            i = nodes_count
            impfuzzy, md5, sha1, sha256 = get_digest(args.file)
            query = "MATCH (m:Malware) WHERE m.sha256=\"%s\" RETURN m" % sha256
            if impfuzzy:
                if not GRAPH.data(query):
                    nodes.append(i)
                    hashlist_new.append([i, args.file, impfuzzy, md5, sha1, sha256])
                else:
                    print("[!] This malware is registered already. sha256: %s" % sha256)
            else:
                print("[!] Can't calculate the impfuzzy hash. sha256: %s" % sha256)
        else:
            sys.exit("[!] Can't open file {0}.".format(args.file))

    # Import data from directory
    if args.directory:
        try:
            files = os.listdir(args.directory)
        except OSError:
            sys.exit("[!] Can't open directory {0}.".format(args.directory))

        outf = args.directory + "_hash.csv"
        fl = open(outf, "w")
        i = nodes_count
        for file in files:
            filename = args.directory + "/" + file
            impfuzzy, md5, sha1, sha256 = get_digest(filename)
            fl.write("%s,%s,%s,%s,%s\n" % (file, impfuzzy, md5, sha1, sha256))
            query = "MATCH (m:Malware) WHERE m.sha256=\"%s\" RETURN m" % sha256
            if impfuzzy:
                if not GRAPH.data(query) and sha256 not in [x[5] for x in hashlist_new]:
                    nodes.append(i)
                    hashlist_new.append([i, file, impfuzzy, md5, sha1, sha256])
                    i += 1
                else:
                    print("[!] This malware is registered already. sha256: %s" % sha256)
            else:
                print("[!] Can't calculate the impfuzzy hash. sha256: %s" % sha256)
        print("[*] Created hash list %s." % outf)
        fl.close()

    # Import data from csv file
    if args.listname:
        print("[*] Parse file %s." % args.listname)
        try:
            csvfile = csv.reader(open(args.listname), delimiter=",")
        except IOError:
            sys.exit("[!] Can't open file {0}.".format(args.listname))

        i = nodes_count
        for array in csvfile:
            query = "MATCH (m:Malware) WHERE m.sha256=\"%s\" RETURN m" % array[4]
            if array[1]:
                if not GRAPH.data(query):
                    nodes.append(i)
                    array.insert(0, i)
                    hashlist_new.append(array)
                    i += 1
                else:
                    print("[!] This malware is registered already. sha256: %s" % array[4])
            else:
                print("[!] Impfuzzy hash is blank. sha256: %s" % array[4])

    # Compare impfuzzy
    print("[*] The total number of malware is %i." % i)
    result_list = impfuzzy_comp(hashlist, hashlist_new)

    if len(database) != len(nodes):
        # Clustering
        if not args.nocluster:
            for edge in result_list + relationships:
                if edge[2] > ss_threshold:
                    edges.append([[edge[0], edge[1]], edge[2]])
                else:
                    edges.append([[edge[0], edge[1]], 0])
            pyl = PyLouvain(nodes, edges)
            partition, modularity = pyl.apply_method()
            print("[*] The number of clusters is %i." % (len(partition) - 1))
        else:
            print("[*] No clustering option.")

        # Create node
        tx = GRAPH.begin()
        if args.nocluster:
            for hash in hashlist_new:
                tx.append(statement_c, {"id": hash[0], "name": hash[1], "impfuzzy": hash[2],
                                        "md5": hash[3], "sha1": hash[4], "sha256": hash[5],
                                        "cluster": "NULL"})
        else:
            for hash in hashlist_new + hashlist:
                i=0
                for a in partition:
                    i=i+1
                    if hash[0] in a:
                        tx.append(statement_c, {"id": hash[0], "name": hash[1], "impfuzzy": hash[2],
                                                "md5": hash[3], "sha1": hash[4], "sha256": hash[5],
                                                "cluster": i})

        # Create relationship
        for result in result_list:
            if result[2] > ss_threshold:
                tx.append(statement_r, {"id1": result[0], "id2": result[1], "value": result[2]})

        tx.process()
        tx.commit()
        print("[*] Created a graph data.\n")
    else:
        print("[*] Not find a new malware.\n")

    print("  Access to http://localhost:7474 via Web browser.")
    print("  Use Cypher query. You can see the graph.\n")
    print("  == Cypher Query Examples ==")
    print("  [Visualizing the all clusters]")
    print("  $ MATCH (m:Malware) RETURN m\n")
    print("  [Visualizing the clusters that matches the MD5 hash]")
    print("  $ MATCH (m1:Malware)-[s]-() WHERE m1.md5 = \"[MD5]\"")
    print("    MATCH (m2:Malware) WHERE m2.cluster = m1.cluster")
    print("    RETURN m2\n")
    print("  [Visualizing the clusters that matches the threshold more than 90]")
    print("  $ MATCH (m:Malware)-[s:same]-() WHERE s.value > 90 RETURN m,s")
    print("  ===========================\n")

Пример #16

0

Показать файл

from pylouvain import PyLouvain
import math
from matplotlib import pyplot as plt
import networkx as nx

filepath = 'out.txt'

# 获取社区划分
pyl = PyLouvain.from_file(filepath)
node_dict = pyl.node_dict  # key是253916-2的形式，value是编号的形式
reverse_node_dict = dict(zip(node_dict.values(),
                             node_dict.keys()))  # key是编号的形式，value是253916-2的形式
partition, q = pyl.apply_method()
print(partition)
print("模块度：", q)

# 给各个社区节点分配颜色
community_num = len(partition)
print('community_num:', community_num)
color_board = ['red', 'green', 'blue', 'pink', 'orange', 'purple', 'scarlet']
color = {}
for index in range(community_num):
    print("社区" + str(index + 1) + ":" + str(len(partition[index])))
    for node_id in partition[index]:
        color[node_id] = color_board[
            index]  # color为一个字典，key为编号形式的节点，value为所属社区的颜色
new_color_dict = sorted(color.items(), key=lambda d: d[0],
                        reverse=False)  # 将color字典按照key的大小排序，并返回一个list
node_list = [reverse_node_dict[item[0]]
             for item in new_color_dict]  #存储编号从小到大顺序对应的253916-2的形式的节点
color_list = [item[1] for item in new_color_dict]  #存储node_list中对应的节点颜色

Пример #17

0

Показать файл

Файл: test.py Проект: wangyanjie1200/work

 def test_test(self):
     pyl = PyLouvain.from_file("data/year1990.txt")
     partition, q = pyl.apply_method()
     return partition

Пример #18

0

Показать файл

Файл: test.py Проект: zhuxiaoqiaing/pylouvain

 def test_citations(self):
     pyl = PyLouvain.from_file("data/hep-th-citations")
     partition, q = pyl.apply_method()

Пример #19

0

Показать файл

Файл: test.py Проект: zhuxiaoqiaing/pylouvain

 def test_arxiv(self):
     pyl = PyLouvain.from_file("data/arxiv.txt")
     partition, q = pyl.apply_method()

Пример #20

0

Показать файл

import matplotlib
from matplotlib import pyplot as plt
from sklearn import metrics
from pylouvain import PyLouvain
from sklearn import metrics
import numpy as np

nodes, edges = PyLouvain.from_file("data/karate.txt")
gamma0 = 0.78

#nodes, edges = PyLouvain.from_gml_file("data/lesmis.gml")
#nodes, edges = PyLouvain.from_gml_file("data/polbooks.gml")


def cmap(nodes, partition):
    m = {n: i for i, _ in enumerate(partition) for n in _}
    return [m[i] for i in nodes]


def test_small_networks(nodes, edges, gamma0):
    pyl = PyLouvain(nodes, edges)
    partition0, q0 = pyl.apply_method(gamma0)
    c0 = cmap(nodes, partition0)
    NMI = []

    gamma_list = np.linspace(0.2, 3.5, num=200)
    for gamma in gamma_list:
        partition, q = PyLouvain(nodes, edges).apply_method(gamma)
        c = cmap(nodes, partition)

        NMI.append(metrics.normalized_mutual_info_score(c0, c))

Пример #21

0

Показать файл

Файл: test.py Проект: xiaoylu/Multiscale-gModularity

def test_polbooks():
    nodes, edges = PyLouvain.from_gml_file("data/polbooks.gml")
    pyl = PyLouvain(nodes, edges)

    partition, q = pyl.apply_method()
    print(partition, q)

Пример #22

0

Показать файл

import matplotlib
matplotlib.use("Agg")
from matplotlib import pyplot as plt
import pickle
import time
import numpy as np
from sklearn import metrics

from pylouvain import PyLouvain
from run import multiscale, bayes_model_selection

x, y, z = [], [], []
for gamma in np.linspace(0.2, 0.9, num=20):
    print("gamma=", gamma)
    pyl = PyLouvain.from_file("data/football.txt")
    partition = multiscale(pyl.nodes, pyl.edges, gamma)

    # load GNC ground truth by txt file (as defined by conference)
    fconf = open("data/football.gnc.txt", "r")
    gnc = {str(i): int(line.strip()) for i, line in enumerate(fconf)}
    order_ = {i: stri for i, stri in enumerate(sorted(gnc.keys()))}
    comm = {n: i for i, ns in enumerate(partition) for n in ns}
    a = [comm[i] for i in pyl.nodes]
    b = [gnc[order_[i]] for i in pyl.nodes]

    x.append(gamma)
    y.append(len(partition))
    z.append(metrics.adjusted_mutual_info_score(a, b))

    print("#comm=", len(partition), "NMI=",
          metrics.adjusted_mutual_info_score(a, b))

Пример #23

0

Показать файл

Файл: test.py Проект: wangyanjie1200/work

 def test_lesmis(self):
     pyl = PyLouvain.from_gml_file("data/lesmis.gml")
     partition, q = pyl.apply_method()

Пример #24

0

Показать файл

    def process(self):

        hashlist = []
        hashlist_new = []
        nodes = []
        edges = []
        relationships = []

        # recover all actual data
        database = self.graph.run(
            "MATCH (m:Malware) RETURN m.id, m.name, m.impfuzzy, m.scout_result, m.scout_confidence, m.md5, m.sha1, m.sha256, m.tag"
        ).data()
        if database:
            for d in database:
                hashlist.append([
                    d["m.id"], d["m.name"], d["m.impfuzzy"],
                    d["m.scout_result"], d["m.scout_confidence"], d["m.md5"],
                    d["m.sha1"], d["m.sha256"], d["m.tag"]
                ])

        nodes_count = len(database)
        i = nodes_count

        relation_data = self.graph.run(
            "MATCH (m1:Malware)-[s:same]-(m2:Malware) RETURN m1.id, m2.id, s.value"
        ).data()
        if relation_data:
            for r in relation_data:
                relationships.append([r["m1.id"], r["m2.id"], r["s.value"]])
        for x in range(nodes_count):
            nodes.append(x)

        # if massive check for each file
        if self.folder_path:
            for item in self.files:
                scout_result, impfuzzy, md5, sha1, sha256, scout_confidence = self.get_digest(
                    item[0])
                if scout_result in ("", 'A171', None):
                    continue

                query = "MATCH (m:Malware) WHERE m.sha256=\"%s\" RETURN m" % sha256
                objs = self.graph.run(query).data()
                if not objs and sha256 not in [x[5] for x in hashlist_new]:
                    nodes.append(i)
                    hashlist_new.append([
                        i, item[0].split("/")[-1], impfuzzy, scout_result,
                        scout_confidence, md5, sha1, sha256, item[1]
                    ])
                    i += 1
                else:
                    continue
        else:
            # if single we are in the reporting module
            # if file is tested it need to have valid apiscout vector
            if self.check_file(self.filepath):
                scout_result, impfuzzy, md5, sha1, sha256, scout_confidence = self.get_digest(
                    self.filepath)
                if scout_result in ("", 'A171', None):
                    return {}
            else:
                return {}

            query = "MATCH (m:Malware) WHERE m.sha256=\"%s\" RETURN m" % sha256

            objs = self.graph.run(query).data()
            if not objs:
                nodes.append(nodes_count)
                hashlist_new.append([
                    nodes_count, self.filename, impfuzzy, scout_result,
                    scout_confidence, md5, sha1, sha256, None
                ])
            else:
                return self.search_hash(sha256)

        # Calculate apiscout correlation
        result_list = self.scout_comp(hashlist, hashlist_new)

        if len(database) != len(nodes):
            for edge in result_list + relationships:
                if edge[2] > self.threshold:
                    edges.append([[edge[0], edge[1]], edge[2]])
                else:
                    edges.append([[edge[0], edge[1]], 0])
            pyl = PyLouvain(nodes, edges)
            partition, modularity = pyl.apply_method()

        # Create node
        tx = self.graph.begin()

        for hash in hashlist_new + hashlist:
            i = 0
            for a in partition:
                i += 1
                if hash[0] in a:
                    tx.append(
                        statement_c, {
                            "id": hash[0],
                            "name": hash[1],
                            "impfuzzy": hash[2],
                            "scout_result": hash[3],
                            "scout_confidence": hash[4],
                            "md5": hash[5],
                            "sha1": hash[6],
                            "sha256": hash[7],
                            "tag": hash[8],
                            "cluster": i
                        })

        # Create relationship
        for result in result_list:
            if result[2] > self.threshold:
                tx.append(statement_r, {
                    "id1": result[0],
                    "id2": result[1],
                    "value_scout": result[2]
                })

        tx.process()
        tx.commit()

        # recover info
        if self.filename:
            return self.search_hash(sha256)

Пример #25

0

Показать файл

Файл: test.py Проект: wangyanjie1200/work

 def test_polbooks(self):
     pyl = PyLouvain.from_gml_file("data/polbooks.gml")
     partition, q = pyl.apply_method()

Пример #26

0

Показать файл

Файл: test.py Проект: zhuxiaoqiaing/pylouvain

 def test_polbooks(self):
     pyl = PyLouvain.from_gml_file("data/polbooks.gml")
     partition, q = pyl.apply_method()

Пример #27

0

Показать файл

def multiscale(nodes,
               edges,
               gamma,
               deg=None,
               depth=1,
               verbose=False,
               max_depth=4):
    '''
    Multi-scale community detection.
    Recursively split sub-graph by maximizing generalized modularity.
    Terminate at each level of recursion by bayes model selection.

    Args:
        nodes: a list of nodes
        edges: a list of edges ((src, dst), weight)
        gamma: the resolution parameter for the generalized modularity
    Return:
        a list of lists, each contains the nodes in a community
    '''

    if depth >= max_depth or len(nodes) < 2:
        return [nodes]

    verbose and print("    " * depth, "***", "depth=", depth, "N=", len(nodes))

    nodes.sort()
    d = {n: i for i, n in enumerate(nodes)}
    rd = {i: n for n, i in d.items()}
    nodes = list(range(len(d)))
    edges = [((d[e[0][0]], d[e[0][1]]), e[1]) for e in edges]

    if deg is None:
        deg = {i: 0 for i in nodes}
        for e in edges:
            deg[e[0][0]] += e[1]
            deg[e[0][1]] += e[1]

    pyl = PyLouvain(nodes, edges, deg)

    # execution
    partition, q = pyl.apply_method(gamma)
    verbose and print("    " * depth, "gamma=", gamma, "comm=", len(partition))

    if len(partition) < 2: return [list(map(rd.get, nodes))]
    odds = bayes_model_selection(pyl.nodes, pyl.edges, partition)
    verbose and print("    " * depth, "odds=", odds)
    if odds <= 1. or math.isnan(odds): return [list(map(rd.get, nodes))]

    comm = {n: i for i, ns in enumerate(partition) for n in ns}
    edge_list = [[] for _ in range(len(partition))]
    for e in edges:
        u, v = e[0][0], e[0][1]
        if comm[u] == comm[v]:
            edge_list[comm[u]].append(e)

    R = []
    for nodes_, edges_ in zip(partition, edge_list):
        groups = multiscale(nodes_, edges_, gamma, deg, depth + 1, verbose,
                            max_depth)
        for grp in groups:
            R.append([rd[n] for n in grp])

    return R

Пример #28

0

Показать файл

    end = time.time()

    commsFG_sizes = sorted([len(commsFG[i]) for i in range(len(commsFG))])
    verbose and print(commsFG_sizes)

    map_comm = {v:i for i, c in enumerate(commsFG) for v in c}
    a = [map_comm[k] for k in G.nodes()]
    print("FastGreedy Algorithm ARI=", adjusted_rand_score(a, gnc_list), "NMI=", normalized_mutual_info_score(a, gnc_list))
    print("which takes", end - start, "seconds")
    '''

    #=========== Benchmark Louvain ===============#
    print("Start Louvain community detection")

    start = time.time()
    pyl = PyLouvain(nodes, edges)
    commsLV, q = pyl.apply_method(1.0)
    end = time.time()

    commsLV_sizes = sorted([len(commsLV[i]) for i in range(len(commsLV))])
    verbose and print(commsLV_sizes)
    verbose and print(len(commsLV_sizes))

    map_comm = {v: i for i, c in enumerate(commsLV) for v in c}
    LV_list = [map_comm[k] for k in G.nodes()]
    print("Louvain Algorithm ARI=", adjusted_rand_score(LV_list, gnc_list),
          "NMI=", normalized_mutual_info_score(LV_list, gnc_list))
    print("which takes", end - start, "seconds")
    print("Size range = ", min(commsLV_sizes), max(commsLV_sizes))
    print()

Python PyLouvain примеры использования