def test_modularity_communities_directed_weighted():
    G = nx.DiGraph()
    G.add_weighted_edges_from(
        [
            (1, 2, 5),
            (1, 3, 3),
            (2, 3, 6),
            (2, 6, 1),
            (1, 4, 1),
            (4, 5, 3),
            (4, 6, 7),
            (5, 6, 2),
            (5, 7, 5),
            (5, 8, 4),
            (6, 8, 3),
        ]
    )
    expected = [frozenset({4, 5, 6, 7, 8}), frozenset({1, 2, 3})]
    assert greedy_modularity_communities(G, weight="weight") == expected

    # A large weight of the edge (2, 6) causes 6 to change group, even if it shares
    # only one connection with the new group and 3 with the old one.
    G[2][6]["weight"] = 20
    expected = [frozenset({1, 2, 3, 6}), frozenset({4, 5, 7, 8})]
    assert greedy_modularity_communities(G, weight="weight") == expected
def test_greedy_modularity_communities_directed():
    G = nx.DiGraph(
        [
            ("a", "b"),
            ("a", "c"),
            ("b", "c"),
            ("b", "d"),  # inter-community edge
            ("d", "e"),
            ("d", "f"),
            ("d", "g"),
            ("f", "g"),
            ("d", "e"),
            ("f", "e"),
        ]
    )
    expected = [frozenset({"f", "g", "e", "d"}), frozenset({"a", "b", "c"})]
    assert greedy_modularity_communities(G) == expected

    # with loops
    G = nx.DiGraph()
    G.add_edges_from(
        [(1, 1), (1, 2), (1, 3), (2, 3), (1, 4), (4, 4), (5, 5), (4, 5), (4, 6), (5, 6)]
    )
    expected = [frozenset({1, 2, 3}), frozenset({4, 5, 6})]
    assert greedy_modularity_communities(G) == expected
示例#3
0
def clauset_newman_moore_detection(G):

    # fit the model
    if nx.is_weighted(G):
        c = greedy_modularity_communities(G, weight='weight')
    else:
        c = greedy_modularity_communities(G)

    # format the result
    communities = {}
    for node in G.nodes():
        for index, commu in enumerate(c):
            if node in sorted(commu):
                communities[node] = index

    # get the number of isolated nodes
    freq_dict = collections.Counter(communities.values())
    num_isolated_nodes = list(freq_dict.values()).count(1)

    # report the result
    print("Clauset-Newman-Moore Community Detection")
    print("----------------------------------------")
    if num_isolated_nodes == 0:
        print("Number of communities detected: {}".format(len(
            freq_dict.keys())))
    else:
        print("Number of communities detected: {}".format(
            len(freq_dict.keys()) - num_isolated_nodes))
        print("Number of nodes not in any community: {}".format(
            num_isolated_nodes))

    # return result
    out = {'algo': 'Clauset-Newman-Moore', 'communities': communities}

    return out
示例#4
0
def domains_from_pae_matrix_networkx(pae_matrix,
                                     pae_power=1,
                                     pae_cutoff=5,
                                     graph_resolution=1,
                                     weight_by_ca_ca_distance=False,
                                     distance_power=1,
                                     distance_model=None):
    '''
    Takes a predicted aligned error (PAE) matrix representing the predicted error in distances between each
    pair of residues in a model, and uses a graph-based community clustering algorithm to partition the model
    into approximately rigid groups.

    Arguments:

        * pae_matrix: a (n_residues x n_residues) numpy array. Diagonal elements should be set to some non-zero
          value to avoid divide-by-zero warnings
        * pae_power (optional, default=1): each edge in the graph will be weighted proportional to (1/pae**pae_power)
        * pae_cutoff (optional, default=5): graph edges will only be created for residue pairs with pae<pae_cutoff
        * graph_resolution (optional, default=1): regulates how aggressively the clustering algorithm is. Smaller values
          lead to larger clusters. Value should be larger than zero, and values larger than 5 are unlikely to be useful.
        * weight_by_ca_ca_distance (optional, default=False): adjust the edge weighting for each residue pair according
          to the distance between CA residues. If this is True, then `distance_model` must be provided.
        * distance_power (optional, default=1): If `weight_by_ca_ca_distance` is True, then edge weights will be multiplied
          by 1/distance**distance_power.
        * distance_model (optional, default=None): Model corresponding to the PAE
          matrix. Only needed if `weight_by_ca_ca_distances is True.
    Returns: a series of lists, where each list contains the indices of residues belonging to one cluster.
    '''
    try:
        import networkx as nx
    except ImportError:
        raise Sorry(
            'ERROR: This method requires NetworkX (>=2.6.2) to be installed. Please install it using "pip install networkx" '
            'in a Python >=3.7 environment and try again.')
    import numpy
    weights = 1 / pae_matrix**pae_power

    if weight_by_ca_ca_distance:
        if distance_model is None:
            raise Sorry(
                'If weight_by_ca_ca_distance is True, distance_model must be provided!'
            )
        weights *= weights_from_distance_matrix(distance_model, distance_power)

    g = nx.Graph()
    size = weights.shape[0]
    g.add_nodes_from(range(size))
    edges = numpy.argwhere(pae_matrix < pae_cutoff)
    sel_weights = weights[edges.T[0], edges.T[1]]
    wedges = [(i, j, w) for (i, j), w in zip(edges, sel_weights)]
    g.add_weighted_edges_from(wedges)

    from networkx.algorithms import community

    try:
        clusters = community.greedy_modularity_communities(
            g, weight='weight', resolution=graph_resolution)
    except Exception as e:  # run without resolution
        clusters = community.greedy_modularity_communities(g, weight='weight')
    return clusters
def test_greedy_modularity_communities_multigraph():
    G = nx.MultiGraph()
    G.add_edges_from(
        [
            (1, 2),
            (1, 2),
            (1, 3),
            (2, 3),
            (1, 4),
            (2, 4),
            (4, 5),
            (5, 6),
            (5, 7),
            (5, 7),
            (6, 7),
            (7, 8),
            (5, 8),
        ]
    )
    expected = [frozenset({1, 2, 3, 4}), frozenset({5, 6, 7, 8})]
    assert greedy_modularity_communities(G) == expected

    # Converting (4, 5) into a multi-edge causes node 4 to change group.
    G.add_edge(4, 5)
    expected = [frozenset({4, 5, 6, 7, 8}), frozenset({1, 2, 3})]
    assert greedy_modularity_communities(G) == expected
示例#6
0
    def modularity(self):

        print('\n There are ' +
              len(greedy_modularity_communities(self.graph)).__str__() +
              ' communities \n')

        for community in greedy_modularity_communities(self.graph):
            print(set(community))
示例#7
0
def test_modularity_communities_floating_point():
    # check for floating point error when used as key in the mapped_queue dict.
    # Test for gh-4992 and gh-5000
    G = nx.Graph()
    G.add_weighted_edges_from([(0, 1, 12), (1, 4, 71), (2, 3, 15), (2, 4, 10),
                               (3, 6, 13)])
    expected = [{0, 1, 4}, {2, 3, 6}]
    assert greedy_modularity_communities(G, weight="weight") == expected
    assert (greedy_modularity_communities(G, weight="weight",
                                          resolution=0.99) == expected)
def test_modularity_communities_weighted():
    G = nx.balanced_tree(2, 3)
    for (a, b) in G.edges:
        if ((a == 1) or (a == 2)) and (b != 0):
            G[a][b]["weight"] = 10.0
        else:
            G[a][b]["weight"] = 1.0

    expected = [{0, 1, 3, 4, 7, 8, 9, 10}, {2, 5, 6, 11, 12, 13, 14}]

    assert greedy_modularity_communities(G, weight="weight") == expected
    assert greedy_modularity_communities(G, weight="weight", resolution=0.9) == expected
    assert greedy_modularity_communities(G, weight="weight", resolution=0.3) == expected
    assert greedy_modularity_communities(G, weight="weight", resolution=1.1) != expected
def test_n_communities_parameter():
    G = nx.circular_ladder_graph(4)

    # No aggregation:
    expected = [{k} for k in range(8)]
    assert greedy_modularity_communities(G, n_communities=8) == expected

    # Aggregation to half order (number of nodes)
    expected = [{k, k + 1} for k in range(0, 8, 2)]
    assert greedy_modularity_communities(G, n_communities=4) == expected

    # Default aggregation case (here, 2 communities emerge)
    expected = [frozenset(range(0, 4)), frozenset(range(4, 8))]
    assert greedy_modularity_communities(G, n_communities=1) == expected
def cluster_subgraph_by_year_cnm(Graph,H):
    """
    option='accumulate' will accumulate the nodes and edges of the graph year on year
    option='separate' will only keep the nodes year on year, edges from previous years will not be retained
    connected='yes' will only use the largest connected component for each year
    connected='no' will use all available nodes for each year
    retain_clus='yes' will initialize the louvain calculation such that the previous year's cluster is used to initialize this year's cluster
    retain_clus='no' will use a random initialization for the louvain calculation

    """
    from networkx.algorithms.community import greedy_modularity_communities # This is to run CNM , remove if not neededs

    # get node and edge year
    node_yr=nx.get_node_attributes(Graph,'Year')
    edge_yr=nx.get_edge_attributes(Graph,'Year')
    
    # dictionarys to filter nodes and edges by year
    n_year=int(max(node_yr.values())-min(node_yr.values()))+1
    min_year=min(node_yr.values())
    
    #  implement clustering
    J=Graph

    print("------------Clauset-Newman-Moore------------------")
    c_cnm=[[]for i in range(n_year)]
    for i in range(n_year):
        start = time.time()
        c_cnm[i]=list(greedy_modularity_communities(H[i]))
        set_cluster(c_cnm[i],H[i],'CNM cluster') 
        set_cluster(c_cnm[i],J,'CNM cluster'+str(i+min_year))
        stop = time.time()
        print('Year:',str(i+min_year),'--',round(stop-start,2),'seconds --',str(len(set(c_cnm[i]))),'clusters')        
    del c_cnm, node_yr, edge_yr, n_year, min_year
    gc.collect()
    return J
示例#11
0
def Modularity_with_dot(data):
    norm = (data-data.mean())/data.std()
    matrix = norm.dot(norm.T)
    matrix = matrix - np.diag(np.diag(matrix))

    matrix[matrix > 0 ] = 1
    matrix[matrix < 0 ] = 0

    graph = from_numpy_array(matrix.values)
    try:
        cluster = list(community.greedy_modularity_communities(graph))
    except Exception as e:
        warn(str(e))
        return None

    if len(cluster) != 2:
        return None

    group = [None for _ in range(len(matrix))]
    for i in cluster[0]:
        group[i] = 0
    for i in cluster[1]:
        group[i] = 1

    return get_defective_cluster(data, group)
    def format(self, graph, clusters=True):
        if len(graph.nodes()) > 1 and clusters:
            i = 0

            def randomcolor():
                c = '#'
                for i in range(3):
                    c += str(hex(np.random.choice(range(64, 224))))[2:]
                return c

            for nodelist in community.greedy_modularity_communities(graph):
                color = randomcolor()
                for node in nodelist:
                    graph.nodes[node]['cluster'] = i
                    graph.nodes[node]['cluster-color'] = color
                i += 1

            for a, b in graph.edges:
                graph.edges[a, b]['cluster-a'] = graph.nodes[a]['cluster']
                graph.edges[a, b]['cluster-b'] = graph.nodes[b]['cluster']
                graph.edges[
                    a, b]['cluster-color-a'] = graph.nodes[a]['cluster-color']
                graph.edges[
                    a, b]['cluster-color-b'] = graph.nodes[b]['cluster-color']
        return json.dumps(nx.readwrite.json_graph.cytoscape_data(graph),
                          indent=4,
                          separators=(',', ': '))
示例#13
0
def main():
    start_time = time.time()

    args = utils.create_argument_parser()
    graph = utils.load_graph(args.dataset, args.w)
    graph_copy = deepcopy(graph)

    preprocess(graph)
    c = greedy_modularity_communities(graph)

    finish_time = time.time()
    print('\nDone in %.4f seconds.' % (finish_time - start_time))

    communities = dict()
    for i in range(len(c)):
        communities[i] = list(c[i])

    partition = create_partition(communities)
    utils.print_comm_info_to_display(communities)
    # utils.write_comm_info_to_file(partition)

    print('modularity_value =', modularity(graph_copy, communities))
    print('NMI =', NMI(args.output, partition))

    finish_time = time.time()
    print('\nDone in %.4f seconds.' % (finish_time - start_time))
def communitiesRandomModule(graphForCommunities):
    # function for random analysis
    # variables
    com = 0
    communitiesListCNM = []
    listCommunities = []
    communitiesDict = dict()
    community = None
    vertex = None
    networkModularity = 0
    networkCoverage = 0
    networkPerformance = 0
    counter = 0
    # get Clauset-Newman-Moore communities
    communitiesListCNM = list(
        greedy_modularity_communities(graphForCommunities))
    # evaluate modularity
    for community in communitiesListCNM:
        for vertex in set(community):
            communitiesDict[vertex] = com
        listCommunities.append(set(community))
        com = com + 1
    networkModularity = louv.modularity(communitiesDict, graphForCommunities)
    networkCoverage = coverage(graphForCommunities, listCommunities)
    networkPerformance = performance(graphForCommunities, listCommunities)
    # end of function
    return (networkModularity, len(listCommunities), networkCoverage,
            networkPerformance)
示例#15
0
def comparing_community_algortihms():
    residuals = pd.read_csv("/Users/emg/GitHub/thesis/output/2019_01/1000_residuals_output_utf8.csv")
    edges = residuals[['source','target', 'resid']]
    edges.columns = ['source','target', 'weight']

    top_edges = subset_df(edges, 'weight', q=0.95)
    G = edges_to_graph(top_edges, 'weight')
    add_partitions(G)

    membership = get_node_data(G).sort_values('community')

    from networkx.algorithms.community import greedy_modularity_communities
    c = list(greedy_modularity_communities(G, weight='weight'))

    clauset_search = {}

    for i,x in enumerate(c):
        for subreddit in x:
            clauset_search[subreddit] = i

    membership['clauset'] = membership.index.map(lambda x: clauset_search[x])

    from networkx.algorithms.community import girvan_newman
    communities_generator = community.girvan_newman(G)
    top_level_communities = next(communities_generator)
    next_level_communities = next(communities_generator)
    sorted(map(sorted, next_level_communities))
示例#16
0
def communityDetection():
    """ Runs the Classet-Newmann community detection algorithm """
    nn = createNearestNeighborEpsilon(allData, metric="Cosine")
    print(nn)

    g = nx.Graph()
    for cancer in range(len(cancerNames)):
        for nodeNumber in range(startingPositions[cancer],
                                startingPositions[cancer + 1]):
            g.add_node(nodeNumber)

    for i in range(nn.shape[0]):
        for j in range(nn.shape[1]):
            if (nn[i][j]):
                g.add_edge(i, j)

    #Amount of each cancer type in each community
    communities = greedy_modularity_communities(g)
    cancerTypes = []

    print(communities)

    #The cutoff to be put in the community detection part
    percentCutoff = 0

    csvData = np.zeros((CANCER_TYPES, len(communities)))

    for j, community in enumerate(communities):
        tempCancers = {}
        for i in range(1, len(startingPositions)):
            #Num inbetween
            cancersInbetween = len([
                x for x in community
                if startingPositions[i - 1] <= x < startingPositions[i]
            ])
            tempCancers[cancerNames[i - 1]] = cancersInbetween
            csvData[i - 1, j] = int(cancersInbetween)

        totalCancers = sum(tempCancers.values())

        #Sort by prevelance
        sortedCancers = sorted(tempCancers, key=tempCancers.get, reverse=True)
        t = {}
        for cancer in sortedCancers:
            if (tempCancers[cancer] >= totalCancers * percentCutoff):
                t[cancer] = (tempCancers[cancer],
                             round(tempCancers[cancer] / totalCancers, 2))

        cancerTypes.append(t)

    w = open("communityCSV.csv", "w")
    for i in range(csvData.shape[0]):
        tempLine = cancerNames[i] + ","
        tempLine += ",".join(list(map(str, csvData[i])))
        tempLine += "\n"
        w.write(tempLine)

    w.close()

    return csvData
    def assign_communities(self):
        """
        assigned communities to be calculated just once for a graph in all analysis funcs.
        
        It generates communities from both networkx community and python louvain community.
        In the code we mostly use the latter as community so it is necessary to first 
        pip the python-louvain and community.
        
        However, user may swich to networkx community by commenting and decommenting 
        few lines. These lines are marked by #XXX, although coloring the graph is only
        available with python-louvain community.
        """
        self.modularity_communitiesx = [
            list(x) for x in communityx.greedy_modularity_communities(self.G)
        ]
        self.best_parts = community.best_partition(self.G)
        com_dict = {}
        for i, com in enumerate(self.modularity_communitiesx):
            for node in com:
                com_dict[node] = i

        com_list = [[] for c in list(set(self.best_parts.values()))]
        for n, c in zip(self.best_parts.keys(), self.best_parts.values()):
            com_list[c].append(n)
        self.modularity_communities = com_list
        self.best_parts_x = com_dict
        return
示例#18
0
def clusters_mod(oid, evs, gene_list):

	# create network
	logging.info("%s Create network" % oid)
	evs_e = evs[["in_gene","out_gene","branch_support"]]
	evs_n = nx.convert_matrix.from_pandas_edgelist(evs_e, source="in_gene", target="out_gene", edge_attr="branch_support")
	# find communities using modularity
	logging.info("%s Find communities in network" % oid)
	evs_n_communities = list(community.greedy_modularity_communities(evs_n))
	#evs_n_communities = list(community.girvan_newman(evs_n))
	#evs_n_communities = list(community.asyn_lpa_communities(evs_n))
	#evs_n_communities = list(community.k_clique_communities(evs_n, 5))
	#evs_n_communities = list(community.asyn_fluidc(evs_n,2))
	clus_list    = np.zeros(len(gene_list))
	for n,noi in enumerate(gene_list):
		for com in range(len(evs_n_communities)):
			if noi in evs_n_communities[com]:
				clus_list[n] = int(com)+1
	# store clusters
	clu = pd.DataFrame( { 
		"node"    : gene_list,
		"cluster" : clus_list
	}, columns=["node","cluster"])
	clu["cluster"] = clu["cluster"].astype(int).astype(str)
	logging.info("%s Num clustered genes = %i" % (oid, len(clu)))
	logging.info("%s Num clusters = %i" % (oid, len(np.unique(clus_list))))

	return clu
示例#19
0
def detect_communities(G, save_img_path=None):
    """
        Returns the communities detected in the given
        graph using optimal modularity approach. 
        
        Args:
            G (nx.Graph): graph for which the 
                top nodes must be determined.
                
            save_img_path (str): path to save visualisation
                of the communities detected.
            
        Returns:
            communities (list): list of tuples of nodes representing
                communities detected.
    """
    # perform community detection using greedy modularity approach
    _coms = community.greedy_modularity_communities(G)
    communities = [set(c) for c in _coms]

    # save image if path given
    if save_img_path:
        colors = np.linspace(0, 1, len(communities))

        com_color_map = dict()
        for idx, com in enumerate(communities):
            for node in com:
                com_color_map[node] = colors[idx]

        labels = nx.draw_networkx_labels(G, pos=pos)
        nx.draw(G, pos, node_color=list(com_color_map.values()))
        plt.savefig(save_img_path, format="PNG")

    return communities
示例#20
0
def plot_feature(path, sample=False):

    dataset = path.split("/")[1]

    orig_feature = np.load("{}/orig_feature.npy".format(path))
    transformed_feature = np.load("{}/transformed_feature.npy".format(path))
    # transformed_feature = transformed_feature[0]
    scaler = MinMaxScaler(feature_range=(-1, 1))

    orig_fea_tsne = tsne(orig_feature, 2)
    trans_fea_tsne = tsne(transformed_feature, 2)
    orig_fea_tsne = scaler.fit_transform(orig_fea_tsne)
    trans_fea_tsne = scaler.fit_transform(trans_fea_tsne)

    # get community
    g = nx.read_gml("{}/{}.gml".format(path, dataset), label=None)
    partition = greedy_modularity_communities(g)
    n_nodes = g.number_of_nodes()
    n_com = len(partition)
    start_id = np.min(g.nodes)
    com_dict = dict()
    for i in range(n_com):
        for node in partition[i]:
            com_dict[node - start_id] = i  # -1 because node id start from 1

    # get node color
    colors = [
        'red', 'blue', 'green', 'aqua', 'yellow', 'skyblue', 'purple', 'olive'
    ]
    color_list = []
    for node in range(n_nodes):
        color_list.append(colors[com_dict[node]])
    """
    fig = plt.figure(figsize=(9, 3))
    ax = fig.add_subplot(131)
    plot_scatter(ax, orig_fea_tsne, color_list)
    ax = fig.add_subplot(132)
    plot_scatter(ax, trans_fea_tsne, color_list)

    if sample is True:

        F = np.load("{}/F.npy".format(path))
        F_tsne = tsne(F, 2)
        F_tsne = scaler.fit_transform(F_tsne)

        ax = fig.add_subplot(133)
        plot_scatter(ax, F_tsne, color_list)
    
    """

    fig = plt.figure(figsize=(3, 3))
    ax = fig.add_subplot(111)
    plot_scatter(ax, trans_fea_tsne, color_list)

    fig.tight_layout()

    plt.savefig("{}/{}_feature.pdf".format(path, dataset),
                format='pdf',
                dpi=1000)
    plt.show()
示例#21
0
def drawGraph(G, X, algo1="Graph"):
    g = nx.Graph(G)
    comm = community.greedy_modularity_communities(g)
    gridsize = (1, 1)
    fig = plt.figure(figsize=(8, 5))
    axIN = plt.subplot2grid(gridsize, (0, 0))
    plt.axis('off')
    axIN.set_xlim(min(X[:, 0]), max(X[:, 0]))
    axIN.set_ylim(min(X[:, 1]), max(X[:, 1]))
    linesIN = []
    e = 0
    print("Cluster:", len(comm))
    mycolors = cm.rainbow(np.linspace(0, 1, len(comm)))
    #for i,j in zip(*G.nonzero()):
    #    if i>j:
    #        linesIN.append([[X[i][0], X[i][1]], [X[j][0], X[j][1]]])
    #        e += 1
    gd = dict()
    for com in range(len(comm)):
        for node in list(comm[com]):
            gd[node] = com
            plt.scatter(X[node][0], X[node][1], s=2, color=mycolors[com])
    plt.axis('off')
    modularity = commm.community_louvain.modularity(gd, g)
    print("Modularity:", algo1, "=", modularity)
    plt.savefig(algo1 + '_vis.pdf')
示例#22
0
    def train(self):
        G = nx.Graph()
        if self.is_weighted:
            edges, weight = (
                self.data.edge_index.t().tolist(),
                self.data.edge_attr.tolist(),
            )
            G.add_weighted_edges_from(
                [(edges[i][0], edges[i][1], weight[0][i]) for i in range(len(edges))]
            )
        else:
            G.add_edges_from(self.data.edge_index.t().tolist())
        partition = community.greedy_modularity_communities(G)
        base_label = [0] * G.number_of_nodes()
        for i, node_set in enumerate(partition):
            for node in node_set:
                base_label[node] = i
        nmi_score = normalized_mutual_info_score(self.label, base_label)
        print("NMI score of greedy modularity optimize algorithm: ", nmi_score)
        
        embeddings = self.model.train(G)

        # Map node2id
        features_matrix = np.zeros((self.num_nodes, self.hidden_size))
        for vid, node in enumerate(G.nodes()):
            features_matrix[node] = embeddings[vid]
            
        return self._evaluate(features_matrix)
示例#23
0
def select_comm(graph, graph_type, mapping=None):
    if graph_type == 'Email':
        # read into community info
        with open('../data/email-Eu-core-department-labels-cc.txt',
                  'r') as fid:
            f_label = fid.readlines()
        comm_to_nodes = {}
        for item in f_label:
            nodeID, commID = [int(i) for i in item.rstrip().split()]
            if commID not in comm_to_nodes:
                comm_to_nodes[commID] = [mapping[nodeID]]
            else:
                comm_to_nodes[commID].append(mapping[nodeID])
        comm_size = sorted([(key, len(comm_to_nodes[key]))
                            for key in comm_to_nodes.keys()],
                           key=lambda x: x[1])
        selected_comm = comm_size[math.floor(len(comm_size) * 0.5)][0]
        comm = comm_to_nodes[selected_comm]

    elif graph_type == 'Airport':
        deg = list(dict(graph.degree()).items())
        deg = sorted(deg, key=lambda x: x[1])
        selected_node = deg[math.floor(len(deg) * 0.9)][0]
        comm = list(graph.neighbors(selected_node)) + [selected_node]

    elif graph_type == 'Brain':
        comm = list(range(len(graph) - 100, len(graph)))

    else:
        all_comms = list(greedy_modularity_communities(graph))
        all_comms = sorted(all_comms, key=lambda x: len(x))
        comm = list(all_comms[math.floor(len(all_comms) * 0.5)])
        assert (len(comm) != 0)

    return comm
def communityDetection(g):
    # greed modularity community detection
    greedy_communities = list(comm.greedy_modularity_communities(g))
    greedy_communities_dict = {}
    for i, c in enumerate(greedy_communities):
        for node_id in c:
            greedy_communities_dict[node_id] = i
    greedy_score = community.modularity(greedy_communities_dict, g)
    print "Greedy Number of Communities: ", len(
        set(greedy_communities_dict.values()))
    print "Greedy Modularity: ", greedy_score
    greedy_communities_sorted = sorted(greedy_communities,
                                       key=lambda x: len(x),
                                       reverse=True)
    for i in range(1, 6):
        print "Size of", i, "Community: ", len(greedy_communities_sorted[i])

    louvain_communities = community.best_partition(g)
    louvain_score = community.modularity(louvain_communities, g)
    print "Louvain Number of Communities: ", len(
        set(louvain_communities.values()))
    print "Louvain Modularity: ", louvain_score
    louvain_score_dict = defaultdict(list)
    for node_id, comm_id in louvain_communities.items():
        louvain_score_dict[comm_id].append(node_id)
    louvain_communities_list = louvain_score_dict.values()
    louvain_communities_list = sorted(louvain_communities_list,
                                      key=lambda x: len(x),
                                      reverse=True)
    for i in range(1, 6):
        print "Size of ", i, " Community: ", len(louvain_communities_list[i])
示例#25
0
def greedy_partition(graph):
    partition = greedy_modularity_communities(graph)
    res = dict()
    for i, part in enumerate(partition):
        for j in part:
            res[j] = i
    draw_graph(res, "greedy_modularity")
示例#26
0
def detect_communities(network: SpatioTemporalNetwork, algo, **kwargs):
    if algo == 'fluid':
        comm_iter = community.asyn_fluidc(network.to_multigraph().to_undirected(), **kwargs)
        return list(comm_iter)
    if algo == 'clm':
        comm_iter = community.greedy_modularity_communities(network.to_multigraph().to_undirected(), **kwargs)
        return list(comm_iter)
def test_greedy_modularity_communities_relabeled():
    # Test for gh-4966
    G = nx.balanced_tree(2, 2)
    mapping = {0: "a", 1: "b", 2: "c", 3: "d", 4: "e", 5: "f", 6: "g", 7: "h"}
    G = nx.relabel_nodes(G, mapping)
    expected = [frozenset({"e", "d", "a", "b"}), frozenset({"c", "f", "g"})]
    assert greedy_modularity_communities(G) == expected
def test_greed_modularity_communities_multidigraph_weighted():
    G = nx.MultiDiGraph()
    G.add_weighted_edges_from(
        [
            (1, 2, 5),
            (1, 2, 3),
            (3, 1, 6),
            (1, 3, 6),
            (3, 2, 4),
            (1, 4, 2),
            (1, 4, 5),
            (2, 4, 3),
            (3, 2, 8),
            (4, 2, 3),
            (4, 3, 5),
            (4, 5, 2),
            (5, 6, 3),
            (5, 6, 7),
            (6, 5, 4),
            (5, 7, 9),
            (5, 7, 9),
            (7, 6, 8),
            (7, 8, 2),
            (8, 7, 2),
            (5, 8, 6),
            (5, 8, 6),
        ]
    )
    expected = [frozenset({1, 2, 3, 4}), frozenset({5, 6, 7, 8})]
    assert greedy_modularity_communities(G, weight="weight") == expected
示例#29
0
    def create_g_cluster(self, word_pos):
        words = self.top_k(word_pos)[1:]

        if self.cluster_type < 4:
            pairs = self.gen_pairs(words)
            G = nx.Graph()
            G.add_weighted_edges_from(pairs)

        if self.cluster_type == 3:
            G = max(nx.connected_component_subgraphs(G), key=len)
            print('len_strip(G)', len(G))

        if self.cluster_type == 1:
            from networkx.algorithms.community import greedy_modularity_communities
            clusters = list(greedy_modularity_communities(G))
        elif self.cluster_type == 2:
            from chinese_whispers import chinese_whispers, aggregate_clusters
            chinese_whispers(G, iterations=20, weighting='log', seed=13)  # top, nolog, log
            clusters = aggregate_clusters(G).values()
        elif self.cluster_type == 3:
            from networkx.algorithms.community import asyn_fluidc
            if self.is_k_depends_g:
                clusters = list(asyn_fluidc(G, k=self.k - int((self.k - 8) * ((200 - len(G)) / 100))))
            else:
                clusters = list(asyn_fluidc(G, k=min(self.k, len(G))))
        elif self.cluster_type == 4:
            from collections import defaultdict
            from sklearn.cluster import KMeans

            X = [sg.emb(_) for _ in words[1:]]
            clusters = defaultdict(list)

            kmeans = KMeans(n_clusters=self.k, random_state=13)
            assigned_clusters = kmeans.fit_predict(X)

            for cl, w in zip(assigned_clusters, words): clusters[cl].append(w)
            clusters = list(clusters.values())
        elif self.cluster_type == 5:
            from collections import defaultdict
            from sklearn.cluster import DBSCAN

            X = [sg.emb(_) for _ in words[1:]]
            clusters = defaultdict(list)

            dbscan = DBSCAN(metric='l2', eps=self.min_dist_dbscan, min_samples=self.min_clust)
            assigned_clusters = dbscan.fit_predict(X)

            for cl, w in zip(assigned_clusters, words): clusters[cl].append(w)
            clusters = list(clusters.values())
        else:
            raise Exception('no cluster type', self.cluster_type)

        if self.debug:
            for i, cluster in enumerate(sorted(clusters, key=lambda e: len(e), reverse=True)):
                print('Cluster ID\tCluster Elements\n')
                print('{}\t{}\n'.format(i, cluster))
        print(word_pos, 'clusters', len(clusters))

        return clusters
    def buildGraph(self,additional_stopwords=[],min_frequency=5):

        #call getBatches method passing any contextual stop words as an arg
        batches = self.getBatches(additional_stopwords)

        #call getEdgesNodes mnethod taking max frequency as an arg
        self.getEdgesNodes(batches,min_frequency)

        #call the getGraph method and build the graph
        self.G = self.getGraph()
        print('Graph successfully built.')
        print('Node and Edge dataframes created.')


        """
        save a number of attributes to the instance of the class
        """
        #retain graph object adjacencies
        self.adjacencies = dict(self.G.adjacency())

        #retain graph object node betweeness centrality
        self.betweeness = nx.betweenness_centrality(self.G)

        #retain graph object clustering coefficients
        self.clustering_coeff = nx.clustering(self.G)

        """
        add these attributes as columns on the node dataframe
        """

        self.node_df['adjacency_frequency'] = self.node_df['id_code'].map(lambda x: len(self.adjacencies[x]))

        self.node_df['betweeness_centrality'] = self.node_df['id_code'].map(lambda x: self.betweeness[x])

        self.node_df['clustering_coefficient'] = self.node_df['id_code'].map(lambda x: self.clustering_coeff[x])

        #identify communities in instance of graph object and retain as attribute
        self.communities = community.greedy_modularity_communities(self.G)

        """
        assign each node to its community and add as column to node dataframe
        """
        self.communities_dict = {}

        nodes_in_community = [list(i) for i in self.communities]

        for i in nodes_in_community:

            self.communities_dict[nodes_in_community.index(i)] = i

        def community_allocation(source_val):
            for k,v in self.communities_dict.items():
                if source_val in v:
                    return k

        self.node_df['community'] = self.node_df['id_code'].map(lambda x: community_allocation(x))

        print('Communities calculated.')
        return
示例#31
0
 def _check_communities(self, expected):
     communities = set(greedy_modularity_communities(self.G))
     assert_equal(communities, expected)