Exemplo n.º 1
0
def test_directed_not_supported():
    # not supported for directed graphs
    test = nx.DiGraph()
    test.add_edge('a', 'b')
    test.add_edge('a', 'c')
    test.add_edge('b', 'd')
    result = label_propagation_communities(test)
Exemplo n.º 2
0
def apply_label(UG):
    partition_list = list(community.label_propagation_communities(UG))
    partition_map = list_to_dict(partition_list)
    try:
        mod = nx.community.quality.modularity(UG, partition_list)
    except:
        mod = 0
    return mod, partition_map, partition_list
Exemplo n.º 3
0
    def run(self, G, seed_node_count, adj_list):
        largest_cc = max(nx.connected_components(G), key=len)
        G = nx.subgraph(G, largest_cc)
        communities = list(label_propagation_communities(G))
        print("List of community sizes:",
              list(map(len, list(label_propagation_communities(G)))))
        best_community = list(max(communities, key=len))

        V = set()
        N_v = defaultdict(int)

        while len(V) < seed_node_count:
            best_score = 0
            best_node = None
            for node in best_community:
                if node in V: continue
                adjs = adj_list[node]
                score = 0
                for adj in adjs:
                    score += 1. / max(1, N_v[adj] + 1)
                # score /= len(adjs)
                if score > best_score:
                    best_score = score
                    best_node = node
            V.add(best_node)
            N_v[best_node] += 1
            for n in adj_list[best_node]:
                N_v[n] += 1

        # for node, adjs in adj_list.items():
        #     if node in isolates: continue
        #     # if parts[node] != chosen_part: continue
        #     n_in_top = 0
        #     other_deg = 0
        #     for adj in adjs:
        #         for n in adj_list[adj]:
        #             if parts[n] == chosen_part:
        #                 n_in_top += 2
        #             else:
        #                 n_in_top += 1
        #     if len(heap) < seed_node_count:
        #         heapq.heappush(heap, (n_in_top, other_deg, node))
        #     elif (n_in_top, other_deg, node) > heap[0]:
        #         heapq.heapreplace(heap, (n_in_top, other_deg, node))

        return V
def test_directed_not_supported():
    with pytest.raises(nx.NetworkXNotImplemented):
        # not supported for directed graphs
        test = nx.DiGraph()
        test.add_edge('a', 'b')
        test.add_edge('a', 'c')
        test.add_edge('b', 'd')
        result = label_propagation_communities(test)
Exemplo n.º 5
0
def test_one_node():
    test = nx.Graph()
    test.add_node('a')

    # The expected communities are:
    ground_truth = set([frozenset(['a'])])

    communities = label_propagation_communities(test)
    result = {frozenset(c) for c in communities}
    assert_equal(result, ground_truth)
Exemplo n.º 6
0
def test_one_node():
    test = nx.Graph()
    test.add_node("a")

    # The expected communities are:
    ground_truth = {frozenset(["a"])}

    communities = label_propagation_communities(test)
    result = {frozenset(c) for c in communities}
    assert result == ground_truth
Exemplo n.º 7
0
def test_one_node():
    test = nx.Graph()
    test.add_node('a')

    # The expected communities are:
    ground_truth = set([frozenset(['a'])])

    communities = label_propagation_communities(test)
    result = {frozenset(c) for c in communities}
    assert_equal(result, ground_truth)
Exemplo n.º 8
0
 def lpa(self):
     communities_generator = community.label_propagation_communities(
         self.network)
     print(communities_generator, 'ZZZZZZZZZZZZZZZ')
     node_cluster_pair = [(node, cluster_id)
                          for (cluster_id,
                               node_set) in enumerate(communities_generator)
                          for node in list(node_set)]
     self.clusters = dict(node_cluster_pair)
     self.clusters_num = max(self.clusters.values()) + 1
     print('{} clusters'.format(self.clusters_num))
Exemplo n.º 9
0
def execute(directory):

    os.chdir(directory)
    for file in glob.glob("*.net"):
        file_name = file.split(".")[0] + ".clu"
        multigraph = nx.read_pajek(file)
        modelGraph = nx.Graph(multigraph)
        file_name = file.split(".")[0]+ ".clu"
        file_directory = os.path.join("../../results-label-p/",file_name)
        f = open(file_directory, "w+")
        communities = community.label_propagation_communities(modelGraph)

        lines = [None] * (len(modelGraph)+1)
        group_number = 1

        for group in communities:
            group_as_string = str(group_number)
            for node_value in group:
                id = modelGraph.nodes.get(node_value)['id']
                index = int(id)
                lines[index] = group_as_string
            group_number+=1

        lines[0] = "*Vertices " + str(len(modelGraph))
        for index, x in enumerate(lines):
            if x is None:
                lines[index] = str(group_number)
                group_number += 1
        f.writelines('\n'.join(lines))
        f.write("\n")
        f.close()
        os.chdir("../../radatools/Communities_Tools/")
        st = os.stat('./Compare_Partitions.exe')
        os.chmod("./Compare_Partitions.exe", st.st_mode | stat.S_IEXEC)
        if ("model" in directory) and ("rb125" in file_name):
                index = 1
                while(index<=3):
                    os.system("./Compare_Partitions.exe ../../results-label-p/" + file_name + " ../" + directory
                              + "rb125-"+str(index)+".clu" + " ../../results-label-p/" + file_name + "-"+str(index) + ".exit " + " V")
                    index+=1
        else:
            os.system("./Compare_Partitions.exe ../../results-label-p/" + file_name + " ../" + directory
                      + file_name + " ../../results-label-p/" + file_name + ".exit " + " V")

        st = os.stat('./Modularity_Calculation.exe')
        os.chmod("./Modularity_Calculation.exe", st.st_mode | stat.S_IEXEC)
        os.system("./Modularity_Calculation.exe ../" + directory + file + " ../../results-label-p/" + file_name
                  + " 0 0 UN TC 2 >> " + " ../../results-label-p/" + file_name + ".modularity")
        os.chdir("../" + directory)


    os.chdir("../../source")
Exemplo n.º 10
0
def partition_into_clusters(G, n, num_players):
    '''
    Partitions a graph into clusters, and partitions the n seeds among the
    clusters proportional to their size.
    @param G: graph
    @param n: number of seeds
    @param num_players: number of players
    @return clusters, seed_nums: clusters is a list of clusters (each is a list of nodes),
    seed_nums is a list of how many seeds should be partitioned to each cluster.
    '''
    comp = list(community.label_propagation_communities(G)) # girvan newman too slow
    comp.sort(reverse = True, key = len)

    # we focus on only the top threshold fraction of nodes in clusters,
    # since the best strategy is probably to dominate the large clusters,
    # while ignoring the very small ones (idk?)

    threshold = 0.3 # change if needed

    # extract only the top clusters that form threshold fraction of nodes
    total_cluster_nodes = 0
    clusters = []
    for cluster in comp:
        clusters.append(cluster)
        total_cluster_nodes += len(cluster)
        if total_cluster_nodes >= threshold * len(G):
            break

    print(len(clusters))
    for cluster in clusters:
        print(len(cluster))
    # partition our n seeds among the clusters, s.t. number of seeds given is
    # proportional to cluster size

    # ensure all n seeds get partitioned
    total_nodes_counted = 0
    total_seeds_given = 0
    seed_nums = []
    for i in range(len(clusters)):
        total_nodes_counted += len(comp[i])
        num_seeds = round(total_nodes_counted / total_cluster_nodes * n) - total_seeds_given
        if num_seeds == 1 and len(comp[i]) < total_cluster_nodes / n: # don't give seeds to very small clusters, picked arbitrary threshold
            seed_nums[0] += num_seeds
            seed_nums.append(0)
        else:
            seed_nums.append(num_seeds)
        total_seeds_given += num_seeds

    assert len(clusters) == len(seed_nums)
    return clusters, seed_nums
Exemplo n.º 11
0
def test_connected_communities():
    test = nx.Graph()
    # community 1
    test.add_edge('a', 'b')
    test.add_edge('c', 'a')
    test.add_edge('c', 'b')
    test.add_edge('d', 'a')
    test.add_edge('d', 'b')
    test.add_edge('d', 'c')
    test.add_edge('e', 'a')
    test.add_edge('e', 'b')
    test.add_edge('e', 'c')
    test.add_edge('e', 'd')
    # community 2
    test.add_edge('1', '2')
    test.add_edge('3', '1')
    test.add_edge('3', '2')
    test.add_edge('4', '1')
    test.add_edge('4', '2')
    test.add_edge('4', '3')
    test.add_edge('5', '1')
    test.add_edge('5', '2')
    test.add_edge('5', '3')
    test.add_edge('5', '4')
    # edge between community 1 and 2
    test.add_edge('a', '1')
    # community 3
    test.add_edge('x', 'y')
    # community 4 with only a single node
    test.add_node('z')

    # The expected communities are:
    ground_truth1 = set([
        frozenset(['a', 'b', 'c', 'd', 'e']),
        frozenset(['1', '2', '3', '4', '5']),
        frozenset(['x', 'y']),
        frozenset(['z'])
    ])
    ground_truth2 = set([
        frozenset(['a', 'b', 'c', 'd', 'e', '1', '2', '3', '4', '5']),
        frozenset(['x', 'y']),
        frozenset(['z'])
    ])
    ground_truth = (ground_truth1, ground_truth2)

    communities = label_propagation_communities(test)
    result = {frozenset(c) for c in communities}
    assert_in(result, ground_truth)
Exemplo n.º 12
0
def test_connected_communities():
    test = nx.Graph()
    # community 1
    test.add_edge("a", "b")
    test.add_edge("c", "a")
    test.add_edge("c", "b")
    test.add_edge("d", "a")
    test.add_edge("d", "b")
    test.add_edge("d", "c")
    test.add_edge("e", "a")
    test.add_edge("e", "b")
    test.add_edge("e", "c")
    test.add_edge("e", "d")
    # community 2
    test.add_edge("1", "2")
    test.add_edge("3", "1")
    test.add_edge("3", "2")
    test.add_edge("4", "1")
    test.add_edge("4", "2")
    test.add_edge("4", "3")
    test.add_edge("5", "1")
    test.add_edge("5", "2")
    test.add_edge("5", "3")
    test.add_edge("5", "4")
    # edge between community 1 and 2
    test.add_edge("a", "1")
    # community 3
    test.add_edge("x", "y")
    # community 4 with only a single node
    test.add_node("z")

    # The expected communities are:
    ground_truth1 = {
        frozenset(["a", "b", "c", "d", "e"]),
        frozenset(["1", "2", "3", "4", "5"]),
        frozenset(["x", "y"]),
        frozenset(["z"]),
    }
    ground_truth2 = {
        frozenset(["a", "b", "c", "d", "e", "1", "2", "3", "4", "5"]),
        frozenset(["x", "y"]),
        frozenset(["z"]),
    }
    ground_truth = (ground_truth1, ground_truth2)

    communities = label_propagation_communities(test)
    result = {frozenset(c) for c in communities}
    assert result in ground_truth
Exemplo n.º 13
0
	def label_propagation(self, *args):
		G = self.generate_graph(args[0])

		edgelist = []
		for g in G.nodes():
			edgelist.append(tuple((args[1],g)))

		NewGraph = nx.Graph()
		NewGraph.add_edges_from(edgelist)

		G1 = nx.Graph()
		new_edges = G.edges() - NewGraph.edges() 
		G1.add_edges_from(new_edges)

		communities = community.label_propagation_communities(G1)
		return communities
Exemplo n.º 14
0
def test_unconnected_communities():
    test = nx.Graph()
    # community 1
    test.add_edge("a", "c")
    test.add_edge("a", "d")
    test.add_edge("d", "c")
    # community 2
    test.add_edge("b", "e")
    test.add_edge("e", "f")
    test.add_edge("f", "b")

    # The expected communities are:
    ground_truth = {frozenset(["a", "c", "d"]), frozenset(["b", "e", "f"])}

    communities = label_propagation_communities(test)
    result = {frozenset(c) for c in communities}
    assert result == ground_truth
def test_unconnected_communities():
    test = nx.Graph()
    # community 1
    test.add_edge('a', 'c')
    test.add_edge('a', 'd')
    test.add_edge('d', 'c')
    # community 2
    test.add_edge('b', 'e')
    test.add_edge('e', 'f')
    test.add_edge('f', 'b')

    # The expected communities are:
    ground_truth = {frozenset(['a', 'c', 'd']), frozenset(['b', 'e', 'f'])}

    communities = label_propagation_communities(test)
    result = {frozenset(c) for c in communities}
    assert result == ground_truth
Exemplo n.º 16
0
def test_connected_communities():
    test = nx.Graph()
    # community 1
    test.add_edge('a', 'b')
    test.add_edge('c', 'a')
    test.add_edge('c', 'b')
    test.add_edge('d', 'a')
    test.add_edge('d', 'b')
    test.add_edge('d', 'c')
    test.add_edge('e', 'a')
    test.add_edge('e', 'b')
    test.add_edge('e', 'c')
    test.add_edge('e', 'd')
    # community 2
    test.add_edge('1', '2')
    test.add_edge('3', '1')
    test.add_edge('3', '2')
    test.add_edge('4', '1')
    test.add_edge('4', '2')
    test.add_edge('4', '3')
    test.add_edge('5', '1')
    test.add_edge('5', '2')
    test.add_edge('5', '3')
    test.add_edge('5', '4')
    # edge between community 1 and 2
    test.add_edge('a', '1')
    # community 3
    test.add_edge('x', 'y')
    # community 4 with only a single node
    test.add_node('z')

    # The expected communities are:
    ground_truth1 = set([frozenset(['a', 'b', 'c', 'd', 'e']),
                         frozenset(['1', '2', '3', '4', '5']),
                         frozenset(['x', 'y']),
                         frozenset(['z'])])
    ground_truth2 = set([frozenset(['a', 'b', 'c', 'd', 'e',
                                    '1', '2', '3', '4', '5']),
                         frozenset(['x', 'y']),
                         frozenset(['z'])])
    ground_truth = (ground_truth1, ground_truth2)

    communities = label_propagation_communities(test)
    result = {frozenset(c) for c in communities}
    assert_in(result, ground_truth)
Exemplo n.º 17
0
def test_unconnected_communities():
    test = nx.Graph()
    # community 1
    test.add_edge('a', 'c')
    test.add_edge('a', 'd')
    test.add_edge('d', 'c')
    # community 2
    test.add_edge('b', 'e')
    test.add_edge('e', 'f')
    test.add_edge('f', 'b')

    # The expected communities are:
    ground_truth = set([frozenset(['a', 'c', 'd']),
                        frozenset(['b', 'e', 'f'])])

    communities = label_propagation_communities(test)
    result = {frozenset(c) for c in communities}
    assert_equal(result, ground_truth)
Exemplo n.º 18
0
    def __init__(self, nwtk_dict, root = None, vis_name = None, netX = False, edgelist = None):

        new_pair_dict = self.new_pairs_(nwtk_dict)

        # saving the files as .dat format
        nx_mid_list = self.gen_comm_node(list(new_pair_dict.values()))
        self.saving_new_g(root, vis_name, "dat", nx_mid_list)
        print("saving success!")

        if netX is True :

            # required format for modularity on networkx
            # nx_comm_list_2 = nx_list(list(new_pair_dict.values()))
            """ 
            Check modularity with networkX
            """
            nx_G = nx.read_edgelist(edgelist)
            modul_nx = nx_comm.modularity(nx_G, nx_comm.label_propagation_communities(nx_G))
            print("Modularity from networkX %d", modul_nx)
Exemplo n.º 19
0
def ksc_centrality(G, alpha=0.5, beta=0.5):
    '''
    Paper: https://www.researchgate.net/publication/262391998_A_New_Approach_to_Identify_Influential_Spreaders_in_Complex_Networks
    '''
    core_numbers = core_number(G)
    max_core_number = max(core_numbers.values())
    # internal influence
    f_internal = {}
    for v in G:
        f_internal[v] = core_numbers[v] / max_core_number

    # external influence
    f_external = {}
    curr_max_influence = 0
    comp = list(community.label_propagation_communities(G))
    comp.sort(reverse=True, key=len)
    for v in G:
        # count the influence (total number of neighbors in other communities)
        influence = 0
        for cluster in comp:
            cluster_set = set(cluster)
            num_neighbors = 0
            # count number of neighbors in the cluster
            for node in G.neighbors(v):
                if node in cluster_set:
                    num_neighbors += 1
            influence += (num_neighbors * len(cluster))
        f_external[v] = influence

    # normalize by maximum influence
    max_influence = max(f_external.values())
    for v in G:
        f_external[v] = f_external[v] / max_influence

    # total influence
    f_total = {}
    for v in G:
        f_total[v] = f_internal[v] * alpha + f_external[v] * beta

    return f_total
def label_propagation(G, weight='weight', iterNum=6):
    '''Community detection using label propagation algorithm.
    
    Parameters
    ----------
    G : networkx.graph
    
    weight : edge attribute if G is weighted or None if G is unweighted

    iterNum : number to repeat label propagation algorithm

    Returns
    -------
    list_communities : list
        A list of sets, and each set contains vertices in one community.
    
    Notes
    -----
    This function only deals with weighted and unweighted undirected graph.
    '''
    # H is the undirected version of graph G
    H = G.to_undirected()
    max_modularity = float('-inf')
    for i in range(iterNum):
        if weight is None:
            cur_list_communities = list(
                community.label_propagation_communities(H))
        else:
            cur_list_communities = list(
                community.asyn_lpa_communities(H, weight=weight))

        cur_modularity = quality.modularity(H, cur_list_communities)
        if (cur_modularity > max_modularity):
            list_communities = cur_list_communities
            max_modularity = cur_modularity

    return list_communities
Exemplo n.º 21
0
      (ex) 0 1 2 4 5 20 39 : these nodes are in same community 
'''

convert_comm = convertDat("/content/drive/MyDrive/Data/sourcefile/benchmark/community/community_50k_0.30-mu_1.dat")
_, comm_num, pair_dict = convert_comm.decreaseBy1("comm")
comm_list = convert_comm.genCommNode(list(pair_dict.values()))
nx_comm_list = convert_comm.nx_list(list(pair_dict.values()))
convert_comm.savingNewG("500_030_c_0202","dat",comm_list)

nx_comm_list

import networkx as nx
import networkx.algorithms.community as nx_comm
nx_G = nx.read_edgelist("/content/drive/MyDrive/Data/sourcefile/benchmark/convert_ntwk/500_030_0201.edgelist")

nx_comm.modularity(nx_G, nx_comm.label_propagation_communities(nx_G))

mid_point = {0: 8, 1: 10, 2: 8, 3: 6, 4: 7, 5: 8, 6: 8, 7: 8, 8: 8, 9: 8, 10: 8, 11: 10, 12: 8, 13: 8, 14: 8, 15: 8, 16: 21, 17: 10, 18: 8, 19: 0, 20: 10, 21: 7, 22: 10, 23: 8, 24: 8, 25: 8, 26: 8, 27: 8, 28: 8, 29: 7, 30: 8, 31: 10, 32: 10, 33: 10, 34: 16, 35: 7, 36: 10, 37: 10, 38: 8, 39: 15, 40: 2, 41: 17, 42: 10, 43: 10, 44: 8, 45: 8, 46: 8, 47: 11, 48: 10, 49: 7, 50: 8, 51: 10, 52: 8, 53: 8, 54: 8, 55: 10, 56: 8, 57: 10, 58: 8, 59: 8, 60: 14, 61: 10, 62: 7, 63: 8, 64: 10, 65: 10, 66: 10, 67: 10, 68: 1, 69: 7, 70: 10, 71: 10, 72: 11, 73: 10, 74: 8, 75: 8, 76: 10, 77: 7, 78: 8, 79: 8, 80: 8, 81: 10, 82: 10, 83: 7, 84: 18, 85: 10, 86: 8, 87: 8, 88: 10, 89: 10, 90: 7, 91: 8, 92: 19, 93: 8, 94: 10, 95: 8, 96: 10, 97: 6, 98: 10, 99: 8, 100: 10, 101: 8, 102: 8, 103: 3, 104: 8, 105: 7, 106: 8, 107: 8, 108: 8, 109: 8, 110: 10, 111: 8, 112: 7, 113: 8, 114: 7, 115: 8, 116: 8, 117: 7, 118: 21, 119: 8, 120: 1, 121: 7, 122: 8, 123: 8, 124: 8, 125: 8, 126: 8, 127: 8, 128: 8, 129: 10, 130: 10, 131: 8, 132: 8, 133: 8, 134: 7, 135: 8, 136: 10, 137: 8, 138: 8, 139: 8, 140: 8, 141: 8, 142: 7, 143: 7, 144: 10, 145: 8, 146: 4, 147: 8, 148: 7, 149: 8, 150: 8, 151: 10, 152: 10, 153: 10, 154: 10, 155: 8, 156: 9, 157: 10, 158: 8, 159: 10, 160: 8, 161: 8, 162: 8, 163: 10, 164: 8, 165: 8, 166: 8, 167: 8, 168: 8, 169: 8, 170: 7, 171: 10, 172: 8, 173: 7, 174: 10, 175: 8, 176: 7, 177: 8, 178: 7, 179: 10, 180: 18, 181: 8, 182: 8, 183: 8, 184: 10, 185: 8, 186: 8, 187: 10, 188: 10, 189: 7, 190: 7, 191: 8, 192: 8, 193: 20, 194: 8, 195: 10, 196: 8, 197: 5, 198: 8, 199: 7, 200: 8, 201: 7, 202: 8, 203: 8, 204: 8, 205: 8, 206: 8, 207: 8, 208: 8, 209: 8, 210: 8, 211: 13, 212: 8, 213: 10, 214: 7, 215: 8, 216: 7, 217: 7, 218: 8, 219: 10, 220: 10, 221: 10, 222: 8, 223: 8, 224: 7, 225: 8, 226: 8, 227: 8, 228: 10, 229: 8, 230: 10, 231: 7, 232: 10, 233: 8, 234: 8, 235: 8, 236: 8, 237: 8, 238: 10, 239: 8, 240: 8, 241: 2, 242: 8, 243: 19, 244: 10, 245: 10, 246: 10, 247: 7, 248: 10, 249: 8, 250: 10, 251: 8, 252: 7, 253: 7, 254: 7, 255: 8, 256: 8, 257: 8, 258: 8, 259: 10, 260: 8, 261: 8, 262: 8, 263: 8, 264: 10, 265: 8, 266: 8, 267: 10, 268: 10, 269: 8, 270: 8, 271: 10, 272: 8, 273: 8, 274: 8, 275: 8, 276: 8, 277: 8, 278: 8, 279: 8, 280: 7, 281: 8, 282: 8, 283: 7, 284: 8, 285: 8, 286: 10, 287: 10, 288: 7, 289: 8, 290: 10, 291: 8, 292: 10, 293: 8, 294: 8, 295: 8, 296: 8, 297: 10, 298: 7, 299: 8, 300: 7, 301: 7, 302: 8, 303: 7, 304: 10, 305: 8, 306: 8, 307: 8, 308: 12, 309: 10, 310: 8, 311: 8, 312: 7, 313: 8, 314: 10, 315: 8, 316: 10, 317: 10, 318: 8, 319: 7, 320: 8, 321: 7, 322: 10, 323: 8, 324: 7, 325: 10, 326: 7, 327: 10, 328: 10, 329: 8, 330: 10, 331: 8, 332: 10, 333: 8, 334: 8, 335: 10, 336: 8, 337: 8, 338: 8, 339: 8, 340: 10, 341: 10, 342: 10, 343: 8, 344: 10, 345: 8, 346: 7, 347: 8, 348: 7, 349: 8, 350: 7, 351: 8, 352: 8, 353: 8, 354: 8, 355: 8, 356: 8, 357: 8, 358: 8, 359: 7, 360: 8, 361: 10, 362: 8, 363: 10, 364: 10, 365: 7, 366: 10, 367: 10, 368: 10, 369: 10, 370: 10, 371: 8, 372: 8, 373: 7, 374: 10, 375: 10, 376: 8, 377: 8, 378: 7, 379: 8, 380: 10, 381: 10, 382: 10, 383: 8, 384: 8, 385: 8, 386: 7, 387: 10, 388: 8, 389: 7, 390: 8, 391: 7, 392: 7, 393: 10, 394: 7, 395: 10, 396: 7, 397: 7, 398: 8, 399: 10, 400: 10, 401: 8, 402: 10, 403: 7, 404: 7, 405: 7, 406: 8, 407: 10, 408: 10, 409: 8, 410: 7, 411: 10, 412: 7, 413: 10, 414: 8, 415: 7, 416: 7, 417: 7, 418: 7, 419: 10, 420: 7, 421: 8, 422: 8, 423: 8, 424: 7, 425: 7, 426: 7, 427: 8, 428: 10, 429: 8, 430: 8, 431: 10, 432: 7, 433: 7, 434: 8, 435: 10, 436: 10, 437: 7, 438: 7, 439: 7, 440: 8, 441: 8, 442: 8, 443: 10, 444: 7, 445: 7, 446: 7, 447: 7, 448: 10, 449: 8, 450: 8, 451: 7, 452: 7, 453: 8, 454: 7, 455: 10, 456: 10, 457: 7, 458: 7, 459: 7, 460: 10, 461: 10, 462: 10, 463: 7, 464: 7, 465: 10, 466: 7, 467: 7, 468: 7, 469: 7, 470: 7, 471: 10, 472: 10, 473: 7, 474: 10, 475: 10, 476: 7, 477: 7, 478: 7, 479: 7, 480: 7, 481: 7, 482: 7, 483: 7, 484: 10, 485: 10, 486: 10, 487: 7, 488: 7, 489: 10, 490: 10, 491: 7, 492: 7, 493: 10, 494: 10, 495: 10, 496: 7, 497: 7, 498: 7, 499: 7}

def new_pairs_(p_dict) :

    pair_dict = {}    
    for idx in range(len(p_dict)) :
      #print(idx)

      if p_dict[idx] not in pair_dict.keys() :
        pair_dict[p_dict[idx]] = []
        pair_dict[p_dict[idx]].append(idx)

      else :
        pair_dict[p_dict[idx]].append(idx)
Exemplo n.º 22
0
def eval_labelprop(graph):
    """this evaluates the main function and cach it for speed up."""
    communities = list(label_propagation_communities(graph))
    communities.sort(key=len, reverse=True)

    return communities
Exemplo n.º 23
0
def get_bias(text, index, visualise=False, filename="graph.png", folder=False):
    G = nx.Graph()
    ## convert lowercase
    # first_article = df.head().loc[row_num]["content"].lower()
    first_article = text.lower()
    ## remove html tags from improper scraping
    first_article = re.sub("\<(.)+\>", " ", first_article)
    ## replace non alpha-numeric + . + & + \s with \s
    ## kept the apostraphe ’ chr(8217)
    special_apostraphe = chr(8217)
    first_article = re.sub(f"[^0-9a-zA-Z\.\&\s{special_apostraphe}]+", " ",
                           first_article)
    ## remove random new lines
    first_article = re.sub(f"\n", " ", first_article)

    ## loop through the sentences
    for sentence in first_article.split(". "):
        ## ignore empty strings
        if sentence.strip() != "":
            cleaned_sentence = [
                stemmer.stem(word) for word in sentence.split(" ")
                if word not in spacy_stopwords and word.strip() != ""
            ]
            for i in range(len(cleaned_sentence) - 1):
                word = cleaned_sentence[i]
                for other_word in cleaned_sentence[i + 1:]:
                    if G.get_edge_data(word, other_word) == None:
                        G.add_edge(word, other_word, weight=0)
                    old_weight = G.get_edge_data(word, other_word)["weight"]
                    new_weight = old_weight + 1
                    G.add_edge(word, other_word, weight=new_weight)

    ## remove edges that have very small weights
    # to_remove = [(a,b) for a, b in G.edges if G[a][b]["weight"] == 1]
    # G.remove_edges_from(to_remove)
    # G.remove_nodes_from(list(nx.isolates(G)))

    ## remove nodes that are in isolated pairs and triplets
    for island in list(nx.connected_components(G)):
        if len(island) < 11:
            for node in island:
                G.remove_node(node)

    ## get list of betweenness_centrality scores to find most influential words
    top_words_with_scores = {
        k: v
        for k, v in sorted(betweenness_centrality(G).items(),
                           key=lambda item: item[1],
                           reverse=True)[:5] if v > 0
    }
    # print (f"The top 5 key words are ", end="")
    # for word in top_5_words_with_scores.keys():
    #     print (word, end=", ")
    # print ()

    ## determining modularity
    try:
        modularity_score = nx_comm.modularity(
            G, nx_comm.label_propagation_communities(G))
    except:
        modularity_score = 0
    # print(f"The modularity score is {modularity_score}")

    if folder:
        filename = folder + str(index) + " - " + str(modularity_score) + ".png"

    ## for visualisation
    plt.figure(figsize=(100, 100))
    pos = nx.spring_layout(G)
    pos_higher = {}
    y_off = 10  ## offset value
    for k, v in pos.items():
        pos_higher[k] = (v[0], v[1] + y_off)
    nx.draw(G, pos_higher, with_labels=True, node_size=60)
    plt.axis("off")

    if folder:
        plt.savefig(f"{SAVED_FIGURES}{filename}")

    if visualise:
        plt.show()

    plt.close()

    return modularity_score, list(top_words_with_scores.keys())
Exemplo n.º 24
0
# g=nx.windmill_graph(8,4)
N = len(g)
W = np.zeros((N, N))
for i in g:
    print(i, end="-> ")
    for j in nx.neighbors(g, i):
        print(j, end=" ")
        W[i][j] = 1
        W[j][i] = 1
    print()

# networkx community detection

gmc = list(greedy_modularity_communities(g))
alc = list(asyn_lpa_communities(g))
lpac = list(label_propagation_communities(g))
asfl = list(asyn_fluidc(g, 3))

# inititalization
anchorList = set([])
U = {}
U[N] = np.zeros(N)
randomFirstAnchor = random.randint(0, N - 1)
anchorList.add(randomFirstAnchor)
U[randomFirstAnchor] = np.zeros(N)
phi = 0.25
itermax = 15
K = 1
adjacentNodes = {}
for i in range(N):
    tmp = []
Exemplo n.º 25
0
    def get_communities(self, station_df):

        import networkx as nx
        import networkx.algorithms.community as nx_comm

        g_communities_ = []

        try:
            ''' sample the graph '''
            g_simple_ = self.get_simple_graph(station_df)

            if nx.is_empty(g_simple_):
                raise ValueError(
                    'A simple graph with %d stations was not created' %
                    station_df.shape[0])

            if self.name == 'ASYNC-LPA':  #asyn_lpa_communities
                g_communities_ = list(
                    nx_comm.asyn_lpa_communities(g_simple_,
                                                 weight=self.weight,
                                                 seed=self.seed))

            elif self.name == 'LPC':  #label_propagation_communities
                g_communities_ = list(
                    nx_comm.label_propagation_communities(g_simple_))

            elif self.name == 'GREEDY':  # greedy_modularity_communities
                g_communities_ = list(
                    nx_comm.greedy_modularity_communities(g_simple_))

            elif self.name == 'NAIVE-GREEDY':  #_naive_greedy_modularity_communities
                g_communities_ = list(
                    nx_comm._naive_greedy_modularity_communities(g_simple_))

            elif self.name == 'LUKES':  # lukes_partitioning
                # TODO: create MST of g_simple first but removing the mimum weigted edge doesn't seem right
                g_communities_ = list(
                    nx_comm.lukes_partitioning(
                        g_simple_,
                        edge_weight=self.weight,
                        max_size=self.maximum_node_weight))

            elif self.name == 'ASYNC-FLUID':  # asyn_fluidc
                # TODO: create complete graph for g_simple but a complete graph would not work
                g_communities_ = list(
                    nx_comm.asyn_fluidc(g_simple_,
                                        k=15,
                                        max_iter=300,
                                        seed=self.seed))

            elif self.name == 'GIRVAN-NEWMAN':  # girvan_newman
                #                g_communities_ = list(nx_comm.girvan_newman(g_simple_))
                #                tmp_communities = nx_comm.girvan_newman(g_simple_)
                #                g_communities_  = next(tmp_communities)
                g_communities_ = list(next(nx_comm.girvan_newman(g_simple_)))
#                print(list(g_communities_))

            else:
                raise AttributeError("something was not right")

#            g_simple_ = self.set_graph_cluster_labels(g_simple_, g_communities_)
            if isinstance(g_communities_, list):  #len(g_communities_)>0
                g_simple_ = self.set_graph_cluster_labels(
                    g_simple_, g_communities_)

#d            return g_simple_, g_communities_

        except Exception as err:
            print("Class community_detection [get_communities] Error message:",
                  err)

        return g_simple_, g_communities_
Exemplo n.º 26
0
import networkx as nx
from networkx.algorithms.community import label_propagation_communities, greedy_modularity_communities, asyn_fluidc

graph_name = 'simpledistros.gml'
number_subreddits = 6
infomap_clu = 'simpledistros.clu'

G = nx.read_gml(graph_name)
G = nx.convert_node_labels_to_integers(G, first_label=1)

# Get the label propagation clustering
print('Label propagation clustering...')
lpa_gen = label_propagation_communities(G)
partition = []
for community in lpa_gen:
    part = sorted(community)
    partition.append(part)
partition = tuple(partition)
for v in G:
    for i in range(len(partition)):
        if v in partition[i]:
            G.node[v]['lpa'] = i
            continue

# Get the modularity clustering
print('Modularity clustering...')
comp = greedy_modularity_communities(G)
partition = []
for community in comp:
    part = sorted(community)
    partition.append(part)
Exemplo n.º 27
0
def labelPropagation(graph):
    lps = list(community.label_propagation_communities(graph))
    return lps
from graph import load_graph, generate_graph, draw_graph
from sknetwork.clustering import PropagationClustering
from sknetwork.utils import edgelist2adjacency
import numpy as np
from networkx.algorithms.community import asyn_lpa_communities, label_propagation_communities

if __name__ == '__main__':
    print("Label Propagation Algorithm of Scikit NetWork & Network X")

    # use a simple graph with 400 nodes and about 20,000 edges
    G, Edges, _ = load_graph('data/n400_p0.8_q0.1.txt')
    print("--Scikit Network--")
    propagation = PropagationClustering()
    adjacency = edgelist2adjacency(list(G.edges))
    New_Labels = propagation.fit_transform(adjacency)
    labels_unique, count = np.unique(New_Labels, return_counts=True)
    print("Number of clusters/labels:", len(labels_unique))
    print("Partition of nodes in different clusters/labels:",
          [item for item in count])

    print("--NetWorkX--")
    nx_labels = label_propagation_communities(G)
    nx_labels = list(nx_labels)
    print("Number of clusters/labels:", len(nx_labels))
    print("Partition of nodes in different clusters/labels:",
          [len(item) for item in nx_labels])
Exemplo n.º 29
0
ncs = [0 for x in G.nodes()]

nlist = list(G.nodes())

for i in range(len(c)):
    for j in range(len(nlist)):
        if nlist[j] in c[i]:
            ncs[j] = i

plt.figure()
plt.title("Modularity")
nx.draw(G, node_color=ncs)
plt.show()

c = list(label_propagation_communities(G))

ncs = [0 for x in G.nodes()]

nlist = list(G.nodes())

for i in range(len(c)):
    for j in range(len(nlist)):
        if nlist[j] in c[i]:
            ncs[j] = i

plt.figure()
plt.title("Label Propogation")
nx.draw(G, node_color=ncs)
plt.show()
Exemplo n.º 30
0
def detect_communities_label_propagation(G):
    communities = list()
    #for c in asyn_lpa_communities(G):
    for c in label_propagation_communities(G):
        communities.append(sorted(c))
    return sorted(communities)
Exemplo n.º 31
0
def community_lpa(G, **kwargs):
    return list(community.label_propagation_communities(G))
Exemplo n.º 32
0
def get_communities_label_propagation(G):
    return list(community.label_propagation_communities(G))
Exemplo n.º 33
0
def find_communities(nnodes, edges, alg, params=None):
    def membership2cs(membership):
        cs = {}
        for i, m in enumerate(membership):
            cs.setdefault(m, []).append(i)
        return cs.values()

    def connected_subgraphs(G: nx.Graph):
        for comp in nx.connected_components(G):
            sub = nx.induced_subgraph(G, comp)
            sub = nx.convert_node_labels_to_integers(sub,
                                                     label_attribute='old')
            yield sub

    def apply_subgraphs(algorithm, **params):
        cs = []
        for sub in connected_subgraphs(G):
            if len(sub.nodes) <= 3:
                coms = [sub.nodes]  # let it be a cluster
            else:
                coms = algorithm(sub, **params)
                if hasattr(coms, 'communities'):
                    coms = coms.communities

            for com in coms:
                cs.append([sub.nodes[i]['old'] for i in set(com)])
        return cs

    def karate_apply(algorithm, graph, **params):
        model = algorithm(**params)
        model.fit(graph)
        return membership2cs(model.get_memberships().values())

    if alg == 'big_clam':
        c = -1 if params['c'] == 'auto' else int(params['c'])
        cs = BigClam('../../snap').run(edges, c=c, xc=int(params['xc']))
    elif alg in ('gmm', 'kclique', 'lprop', 'lprop_async', 'fluid',
                 'girvan_newman', 'angel', 'congo', 'danmf', 'egonet_splitter',
                 'lfm', 'multicom', 'nmnf', 'nnsed', 'node_perception', 'slpa',
                 'GEMSEC', 'EdMot', 'demon'):
        G = nx.Graph()
        G.add_edges_from(edges)

        if alg == 'gmm':
            cs = community.greedy_modularity_communities(G)
        elif alg == 'kclique':
            params = {k: float(v) for k, v in params.items()}
            cs = community.k_clique_communities(G, **params)
        elif alg == 'lprop':
            cs = community.label_propagation_communities(G)
        elif alg == 'lprop_async':
            cs = community.asyn_lpa_communities(G, seed=0)
        elif alg == 'fluid':
            params = {k: int(v) for k, v in params.items()}
            params['seed'] = 0
            cs = apply_subgraphs(community.asyn_fluidc, **params)
        elif alg == 'girvan_newman':
            comp = community.girvan_newman(G)
            for cs in itertools.islice(comp, int(params['k'])):
                pass
        elif alg == 'angel':
            params = {k: float(v) for k, v in params.items()}
            cs = cdlib.angel(G, **params).communities
        elif alg == 'congo':  # too slow
            ncoms = int(params['number_communities'])
            cs = []
            for sub in connected_subgraphs(G):
                if len(sub.nodes) <= max(3, ncoms):
                    cs.append(sub.nodes)  # let it be a cluster
                else:
                    coms = cdlib.congo(sub,
                                       number_communities=ncoms,
                                       height=int(params['height']))
                    for com in coms.communities:
                        cs.append([sub.nodes[i]['old'] for i in set(com)])
        elif alg == 'danmf':  # no overlapping
            cs = apply_subgraphs(cdlib.danmf)
        elif alg == 'egonet_splitter':
            params['resolution'] = float(params['resolution'])
            cs = apply_subgraphs(cdlib.egonet_splitter, **params)
        elif alg == 'lfm':
            coms = cdlib.lfm(G, float(params['alpha']))
            cs = coms.communities
        elif alg == 'multicom':
            cs = cdlib.multicom(G, seed_node=0).communities
        elif alg == 'nmnf':
            params = {k: int(v) for k, v in params.items()}
            cs = apply_subgraphs(cdlib.nmnf, **params)
        elif alg == 'nnsed':
            cs = apply_subgraphs(cdlib.nnsed)
        elif alg == 'node_perception':  # not usable
            params = {k: float(v) for k, v in params.items()}
            cs = cdlib.node_perception(G, **params).communities
        elif alg == 'slpa':
            params["t"] = int(params["t"])
            params["r"] = float(params["r"])
            cs = cdlib.slpa(G, **params).communities
        elif alg == 'demon':
            params = {k: float(v) for k, v in params.items()}
            cs = cdlib.demon(G, **params).communities
        elif alg == 'GEMSEC':
            # gamma = float(params.pop('gamma'))
            params = {k: int(v) for k, v in params.items()}
            # params['gamma'] = gamma
            params['seed'] = 0
            _wrap = partial(karate_apply, karateclub.GEMSEC)
            cs = apply_subgraphs(_wrap, **params)
        elif alg == 'EdMot':
            params = {k: int(v) for k, v in params.items()}
            _wrap = partial(karate_apply, karateclub.EdMot)
            cs = apply_subgraphs(_wrap, **params)

    elif alg in ('infomap', 'community_leading_eigenvector', 'leig',
                 'multilevel', 'optmod', 'edge_betweenness', 'spinglass',
                 'walktrap', 'leiden', 'hlc'):
        G = igraph.Graph()
        G.add_vertices(nnodes)
        G.add_edges(edges)

        if alg == 'infomap':
            vcl = G.community_infomap(trials=int(params['trials']))
            cs = membership2cs(vcl.membership)
        elif alg == 'leig':
            clusters = None if params['clusters'] == 'auto' else int(
                params['clusters'])
            vcl = G.community_leading_eigenvector(clusters=clusters)
            cs = membership2cs(vcl.membership)
        elif alg == 'multilevel':
            vcl = G.community_multilevel()
            cs = membership2cs(vcl.membership)
        elif alg == 'optmod':  # too long
            membership, modularity = G.community_optimal_modularity()
            cs = membership2cs(vcl.membership)
        elif alg == 'edge_betweenness':
            clusters = None if params['clusters'] == 'auto' else int(
                params['clusters'])
            dendrogram = G.community_edge_betweenness(clusters, directed=False)
            try:
                clusters = dendrogram.as_clustering()
            except:
                return []
            cs = membership2cs(clusters.membership)
        elif alg == 'spinglass':  # only for connected graph
            vcl = G.community_spinglass(parupdate=True,
                                        update_rule=params['update_rule'],
                                        start_temp=float(params['start_temp']),
                                        stop_temp=float(params['stop_temp']))
            cs = membership2cs(vcl.membership)
        elif alg == 'walktrap':
            dendrogram = G.community_walktrap(steps=int(params['steps']))
            try:
                clusters = dendrogram.as_clustering()
            except:
                return []
            cs = membership2cs(clusters.membership)
        elif alg == 'leiden':
            vcl = G.community_leiden(
                objective_function=params['objective_function'],
                resolution_parameter=float(params['resolution_parameter']),
                n_iterations=int(params['n_iterations']))
            cs = membership2cs(vcl.membership)
        elif alg == 'hlc':
            algorithm = HLC(G, min_size=int(params['min_size']))
            cs = algorithm.run(None)

    elif alg in ("sbm", "sbm_nested"):
        np.random.seed(42)
        gt.seed_rng(42)

        G = gt.Graph(directed=False)
        G.add_edge_list(edges)

        deg_corr = bool(params['deg_corr'])
        B_min = None if params['B_min'] == 'auto' else int(params['B_min'])
        B_max = None if params['B_max'] == 'auto' else int(params['B_max'])

        if alg == "sbm":
            state = gt.minimize_blockmodel_dl(G,
                                              deg_corr=deg_corr,
                                              B_min=B_min,
                                              B_max=B_max)

            membership = state.get_blocks()
            cs = membership2cs(membership)
        if alg == "sbm_nested":
            state = gt.minimize_nested_blockmodel_dl(G,
                                                     deg_corr=deg_corr,
                                                     B_min=B_min,
                                                     B_max=B_max)
            levels = state.get_bs()
            level_max = int(params['level'])

            membership = {}
            for nid in range(nnodes):
                cid = nid
                level_i = len(levels)
                for level in levels:
                    cid = level[cid]
                    if level_i == level_max:
                        membership.setdefault(cid, []).append(nid)
                        break
                    level_i -= 1

            cs = membership.values()

    else:
        return None

    return list(cs)
Exemplo n.º 34
0
import networkx as nx
from networkx.algorithms import community
import matplotlib.pyplot as plt

# 数据加载
G = nx.read_gml('./football.gml')
# 可视化
nx.draw(G, with_labels=True)
plt.show()
# 社区发现
communities = list(community.label_propagation_communities(G))
print(communities)
print(len(communities))