Пример #1
0
def split_subslice_into_putative_modules(G_optimized, improvement_delta, modularity_score_objective, best_modularity):
    cur_components = [G_optimized.subgraph(c) for c in connected_components(G_optimized)]
    cur_modularity = modularity(G_optimized, cur_components, weight='weight')
    if cur_modularity >= modularity_score_objective:
        return True, best_modularity

        if len(n_nodes) < 4:
            G_optimized.remove_nodes_from(n_nodes)

    cur_components = [G_optimized.subgraph(c) for c in connected_components(G_optimized)]
    if len(cur_components) == 0:
        return True, best_modularity

    optimized_connected_components = girvan_newman(G_optimized)
    cur_components = sorted(next(optimized_connected_components))
    cur_modularity = modularity(G_optimized, cur_components, weight='weight')
    if cur_modularity <= best_modularity + improvement_delta:
        return True, best_modularity

    else:
        optimal_components = cur_components

        edges_to_remove = []
        for cur_edge in G_optimized.edges:
            included = False
            for n_nodes in optimal_components:
                if cur_edge[0] in n_nodes and cur_edge[1] in n_nodes:
                    included = True
            if not included:
                edges_to_remove.append(cur_edge)

        G_optimized.remove_edges_from(edges_to_remove)

        return False, cur_modularity
Пример #2
0
def walktrap_algorithm(graph, t=5):

    # check that graph is connected and undirected
    assert nx.is_connected(graph) == True, "The graph must be connected"
    assert nx.is_directed(graph) == False, "The graph must be undirected"

    partitions = []
    modularities = []

    #initialization
    partition = singleton_partition(graph)
    modularity = nxq.modularity(graph, partition, weight='weight') 
    partitions.append(deepcopy(partition))
    modularities.append(modularity)

    # number of iterations
    num_nodes = graph.number_of_nodes()

    # adjacency matrix
    A = nx.to_numpy_matrix(graph, dtype=int)
    A += np.diag([1 for i in range(len(A))])

    # diagonal matrix
    D = nx.laplacian_matrix(graph) + A
    Ddiag = np.diagonal(D)
    Dd = np.diag(np.power(Ddiag, (-0.5)))
    # Transition prob matrix P
    P = inv(D) @ A
    P_t = matrix_power(P, t)

    nodes2id = nodes_to_ind(graph)

    for iteration in tqdm(range(num_nodes - 1)):

        # index_to_id
        id2p = index_to_partition(partition)

        # computing distances
        dist = adjacent_communities_dist(graph, partition, P_t, nodes2id)
        (ind1, ind2) = min(dist, key=dist.get)
        C1 = id2p[ind1]
        C2 = id2p[ind2]

        # union of communities
        C3 = C1.union(C2)

        # redefine the partition
        partition.remove(C1)
        partition.remove(C2)
        partition.append(C3)

        partitions.append(deepcopy(partition))
        modularities.append(nxq.modularity(graph, partition, weight='weight'))

    return list(reversed(partitions)), list(reversed(modularities))
Пример #3
0
def _naive_greedy_modularity_communities(G):
    """Find communities in graph using the greedy modularity maximization.
    This implementation is O(n^4), much slower than alternatives, but it is
    provided as an easy-to-understand reference implementation.
    """
    # First create one community for each node
    communities = list([frozenset([u]) for u in G.nodes()])
    # Track merges
    merges = []
    # Greedily merge communities until no improvement is possible
    old_modularity = None
    new_modularity = modularity(G, communities)
    while old_modularity is None or new_modularity > old_modularity:
        # Save modularity for comparison
        old_modularity = new_modularity
        # Find best pair to merge
        trial_communities = list(communities)
        to_merge = None
        for i, u in enumerate(communities):
            for j, v in enumerate(communities):
                # Skip i=j and empty communities
                if j <= i or len(u) == 0 or len(v) == 0:
                    continue
                # Merge communities u and v
                trial_communities[j] = u | v
                trial_communities[i] = frozenset([])
                trial_modularity = modularity(G, trial_communities)
                if trial_modularity >= new_modularity:
                    # Check if strictly better or tie
                    if trial_modularity > new_modularity:
                        # Found new best, save modularity and group indexes
                        new_modularity = trial_modularity
                        to_merge = (i, j, new_modularity - old_modularity)
                    elif (
                        to_merge and
                        min(i, j) < min(to_merge[0], to_merge[1])
                    ):
                        # Break ties by choosing pair with lowest min id
                        new_modularity = trial_modularity
                        to_merge = (i, j, new_modularity - old_modularity)
                # Un-merge
                trial_communities[i] = u
                trial_communities[j] = v
        if to_merge is not None:
            # If the best merge improves modularity, use it
            merges.append(to_merge)
            i, j, dq = to_merge
            u, v = communities[i], communities[j]
            communities[j] = u | v
            communities[i] = frozenset([])
    # Remove empty communities and sort
    communities = [c for c in communities if len(c) > 0]
    for com in sorted(communities, key=lambda x: len(x), reverse=True):
        yield com
def _2ll(G, comms):
    '''
  Log-likelihood ratio test (LR Test) normalized by the number of edges.
  H0 is the configuration model.
  H1 is the degree-corrected planted partition model.

  Args:
      G: input networkx graph instance
      comms: partition of network, list of lists 

  Returns:
      the log-likelihood ratio test statistic 
  '''

    # community sizes
    E = G.number_of_edges()

    # win, wout: the MLE mixing parameters of PPM
    try:
        map_comm = {v: i for i, c in enumerate(comms) for v in c}
        win, wout = mle_paras(G, map_comm)  # the MLE win and wout
        gamma = (win - wout) / (np.log(win) - np.log(wout))  # the MLE gamme
    except RuntimeWarning:
        #print("RuntimeWarning", list(G.edges()), comms, win, wout)
        return 0.

    # modularity: modularity of the graph G under partition comm
    mod = modularity(G, comms, gamma)

    # constansts
    B = E * (np.log(win) - np.log(wout))
    C = E * (np.log(wout) - wout)

    return 2. * (B * mod + C + E) / E  # normalized by the number of edges ???
Пример #5
0
 def iterate(self):
     while len(self.communities) > 1:
         C1, C2 = self.choose_communities()
         self.merge_communities(C1, C2)
         self.modularities[tuple(tuple(C)
                                 for C in self.communities)] = modularity(
                                     self.g, self.communities)
Пример #6
0
def export_log(G, communities, dataset, algorithm, d_threshold, w_threshold,
               path):
    '''export community result to a log file for manually analysis
    '''
    with open(path, 'w') as f:
        # write some key information first
        line = "dataset: " + dataset + "\n"
        line += "algorithm: " + algorithm + "\n"
        line += "d_threshold: " + str(d_threshold) + "\n"
        line += "w_threshold: " + str(w_threshold) + "\n"
        line += "time: " + time.asctime(time.localtime(time.time())) + "\n"
        line += "-------------------------------------\n"
        line += "communities: " + str(len(communities)) + "\n"
        line += "modularity: " + str(round(modularity(G, communities),
                                           3)) + "\n"
        line += "performance: " + str(round(performance(G, communities),
                                            3)) + "\n"
        line += "=====================================\n"
        f.write(line)

        # write community line by line
        for community in communities:
            namelist = list(community)
            line = ", ".join(namelist)
            f.write(line + '\n')

    print("[Done] export log file:", path)
Пример #7
0
def move_node_to_other_com(graph, v, partition, initial_partition,
                           best_modularity, theta, T):
    """Computes the probabilities used for the function merge_nodes_subset and finds a new partition according to those probabilities"""

    prob = []
    new_partition = []
    ind_node = find_community_i(partition, v)

    for C, ind_com in T:
        partition_copy = deepcopy(partition)
        partition_copy = delete_from_com(partition_copy, ind_node, v)
        partition_copy = add_to_community(partition_copy, ind_com, v)
        new_com = partition_copy[ind_com]
        partition_copy = [s for s in partition_copy if s != set()]
        mod = nxq.modularity(graph, partition_copy, weight='weight')

        if mod > best_modularity and is_in_initial_partition(
                new_com, initial_partition) == True:
            prob.append(np.exp((mod - best_modularity) / theta))
            best_modularity = mod
            best_partition = partition_copy
        else:
            prob.append(0)

        new_partition.append(partition_copy)

    return prob, new_partition
Пример #8
0
def get_best_partition(graph, best_partition, best_mod, node, part):
    """Gets best partition by removing node to its neighbors' communities"""

    ind_node = find_community_i(part, node)
    neigh_node = graph.neighbors(node)

    # visit all nieghbors of the node
    for neigh in neigh_node:

        # make copy of part to not change the initial part
        part_bis = deepcopy(part)
        ind_neigh = find_community_i(part_bis, neigh)
        part_bis = delete_from_com(part_bis, ind_node, node)
        part_bis = add_to_community(part_bis, ind_neigh, node)
        part_bis = [s for s in part_bis if s != set()]

        # compute modularity of new partition
        mod = nxq.modularity(graph, part_bis, weight='weight')

        # update modularity
        if mod > best_mod:
            best_mod = mod
            best_partition = part_bis

    return best_mod, best_partition
Пример #9
0
def get_gw_ami(G, t, gt):
    # G  -- graph
    # t  -- heat kernel scale parameter
    # gt -- ground truth

    distribution_exponent_hk = 0.001
    distribution_offset_hk = 0

    C1 = sgw.undirected_normalized_heat_kernel(G, t)
    p1 = sgw.node_distribution(G, distribution_offset_hk,
                               distribution_exponent_hk)
    p2 = np.ravel(
        GwGt.estimate_target_distribution({0: p1.reshape(-1, 1)},
                                          dim_t=len(np.unique(gt))))
    # Note that we are inserting prior information about the number of clusters

    C2 = np.diag(p2)
    coup, log = ot.gromov.gromov_wasserstein(C1,
                                             C2,
                                             p1,
                                             p2,
                                             loss_fun='square_loss',
                                             log=True)
    est_idx = np.argmax(coup, axis=1)

    ami = metrics.adjusted_mutual_info_score(est_idx, gt, average_method='max')
    comms = [set() for v in np.unique(est_idx)]
    for idx, val in enumerate(est_idx):
        comms[val].add(idx)

    mod = modularity(G, comms)

    return ami, mod
Пример #10
0
def move_nodes_fast(graph, partition):
    """Move nodes to its neighbors communities to maximize the modularity"""

    # randomize the order of the visited nodes
    Q = list(graph.nodes())
    random.shuffle(Q)

    # initialize best modularity and partition
    best_modularity = nxq.modularity(graph, partition, weight='weight')
    best_partition = partition

    # visit all nodes at least once
    while len(Q) > 0:

        new_partition = deepcopy(best_partition)
        # get next node and neighbors
        next_node = Q.pop(0)
        neigh_node = graph.neighbors(next_node)
        ind_node = find_community_i(new_partition, next_node)

        # visit all neighbors
        for neigh in neigh_node:

            partition_copy = deepcopy(new_partition)
            ind_neigh = find_community_i(partition_copy, neigh)
            partition_copy = delete_from_com(partition_copy, ind_node,
                                             next_node)
            partition_copy = add_to_community(partition_copy, ind_neigh,
                                              next_node)
            partition_copy = [s for s in partition_copy if s != set()]
            mod = nxq.modularity(graph, partition_copy, weight='weight')

            if mod > best_modularity:
                best_modularity = mod
                best_partition = partition_copy
                new_ind_node = find_community_i(partition_copy, next_node)
                neigh_left = get_neighbors_not_in_com(graph, new_ind_node,
                                                      partition_copy,
                                                      next_node)
                neigh_not_in_Q = [
                    neigh for neigh in neigh_left if neigh not in Q
                ]
                # add those neighbors to Q again
                Q += neigh_not_in_Q

    return best_partition, best_modularity
def detect_communities(g: nx.Graph,
                       max_it: int = 100,
                       eps: float = 0.0001,
                       reruns_if_not_conv: int = 5,
                       threshold: float = 0.005,
                       q_max: int = 7):

    #determine number of optimal communities and run community detection for a given network

    #The nodes have to be labeled form 0 to n

    modularity_0 = 0
    modularity_1 = threshold
    q = 1
    c = 2 * g.number_of_edges() / g.number_of_nodes()
    partition = ()

    #run belief propagation community detection with increasing number of communities until the modularity of the
    #detected partition does not increase more then given threshold

    while modularity_1 - modularity_0 >= threshold:
        old_partition = partition
        beta = compute_opt_beta(q, c)
        modularity_0 = modularity_1
        partition = run_bp_community_detection(
            g=g,
            q=q,
            beta=beta,
            max_it=max_it,
            eps=eps,
            reruns_if_not_conv=reruns_if_not_conv)

        modularity_1 = modularity(
            g, [{i
                 for i in range(len(partition[1])) if partition[1][i] == j}
                for j in set(partition[1])])

        if not partition[4]:
            curr_partition = partition
            partition = old_partition
            modularity_1 = modularity_0
            modularity_0 = modularity_1 - threshold

        if q == 1:
            modularity_0 = modularity_1 - threshold

        if q > q_max:
            break

        print(q)
        q = q + 1

    if len(old_partition) != 0:
        return q - 1, modularity_0, old_partition[1], old_partition[
            2], old_partition[3], old_partition[4]
    else:
        return q - 1, modularity_0, curr_partition[1], curr_partition[
            2], curr_partition[3], curr_partition[4]
Пример #12
0
def projected_oxygen_graph_metrics(projected_graph):
    communities = community.greedy_modularity_communities(projected_graph)
    mod_score = modularity(projected_graph, communities)  #not returned
    try:
        aspl = nx.average_shortest_path_length(projected_graph)
        wiener = nx.wiener_index(projected_graph)
    except:
        aspl = -1
        wiener = -1
    return aspl, wiener, len(communities), communities, mod_score
Пример #13
0
def agglomerative_modularity(G):
    modularities = []
    # initial grouping where each node is in its own group
    best_partitions = [frozenset([n]) for n in G.nodes()]
    prev_modularity = -1000
    # modularity takes in the groupings as a set of nodes of G representing a partitioning
    curr_modularity = modularity(G, best_partitions)
    while curr_modularity > prev_modularity:
        merges = []
        prev_modularity = curr_modularity
        test_partitions = list(best_partitions)
        for i, g1 in enumerate(best_partitions):
            for j, g2 in enumerate(best_partitions):
                # Skip i=j and empty communities
                if j <= i or len(g1) == 0 or len(g2) == 0:
                    continue
                test_partitions[j] = g1 | g2
                test_partitions[i] = frozenset([])
                test_modularity = modularity(G, test_partitions)
                if test_modularity > curr_modularity:
                    curr_modularity = test_modularity
                    # add to the merging to list of merges as a tuple with delta Q as first element
                    # and the potential merge as the second element
                    merges.append((curr_modularity - prev_modularity,
                                   copy.deepcopy(test_partitions)))
                test_partitions[i] = g1
                test_partitions[j] = g2
        # in this implementation, tie breaking is first come first serve
        if len(merges) > 0:
            best_partitions = sorted(merges, key=lambda x: x[0],
                                     reverse=True)[0][1]
        else:
            best_partitions = test_partitions
        modularities.append(modularity(G, best_partitions))
    partitions = [
        frozenset(g)
        for g in sorted([g for g in best_partitions if len(g) > 0],
                        key=lambda x: len(x),
                        reverse=True)
    ]
    return partitions, modularities
Пример #14
0
def calc_modularity(G, communities):  #计算模块度
    """

    :param G:
    :param communities:
    :return:
    """
    comms = []
    for com in communities:
        comms.append(set(com))

    return modularity(G=G, communities=comms)
def girvan_newman(G, k, weight='weight', autothreshold=False):
    '''Community detection using Girvan-Newman algorithm.
    
    Parameters
    ----------
    G : networkx.graph

    k : number of communities
    
    weight : edge attribute if G is weighted or None if G is unweighted

    autothreshold : thresholding automatically according to modularity value

    Returns
    -------
    list_communities : list
        A list of k sets, and each set contains vertices in one community.
    
    Notes
    -----
    This function only deals with undirected graph.
    '''
    # determine most_valuable_edge according to weighted or not
    mvg = None if weight is None else most_valuable_edge
    communities = community.girvan_newman(G.to_undirected(),
                                          most_valuable_edge=mvg)

    if not autothreshold:
        # k must be not larger than number of nodes, or return an empty set
        if k > len(G.nodes()):
            return []

        # get (k-1)th community partition
        for com in itertools.islice(communities, k - 1):
            list_communities = list(com)
    else:
        # find the list_communities that contributes to maximum modularity
        max_modularity = float('-inf')
        for com in itertools.islice(communities, k - 1):
            cur_list_communities = list(com)
            cur_modularity = quality.modularity(G, cur_list_communities)

            if cur_modularity > max_modularity:
                list_communities = cur_list_communities
                max_modularity = cur_modularity

    return list_communities
Пример #16
0
def calc_modularity(mdl, gtype='parameter'):
    """
        Computes graph modularity given a graph representation of model mdl.
        
        Parameters
        ----------
        mdl : model or graph
        
        Returns
        -------
        modularity : Modularity
        """
    if type(mdl) == nx.classes.graph.Graph: g = mdl
    else: g = get_graph(mdl, gtype)
    communities = list(greedy_modularity_communities(g))
    m = modularity(g, communities)
    return m
Пример #17
0
    def clustering_statistics(self, community_partition, feat_name, feat_desc,
                              feat_interpret):
        """Compute quality of the community partitions."""
        compl_desc = " of the partition of " + feat_desc

        self.add_feature(
            feat_name + "_modularity",
            lambda graph: quality.modularity(graph, community_partition),
            "Modularity" + compl_desc,
            feat_interpret,
        )

        self.add_feature(
            feat_name + "_coverage",
            lambda graph: quality.coverage(graph, community_partition),
            "Coverage" + compl_desc,
            feat_interpret,
        )
        self.add_feature(
            feat_name + "_performance",
            lambda graph: quality.performance(graph, community_partition),
            "Performance" + compl_desc,
            feat_interpret,
        )
        self.add_feature(
            feat_name + "_inter_community_edges",
            lambda graph: quality.inter_community_edges(
                graph, community_partition),
            "Inter community edges" + compl_desc,
            feat_interpret,
        )
        self.add_feature(
            feat_name + "_inter_community_non_edges",
            lambda graph: quality.inter_community_non_edges(
                graph, community_partition),
            "Inter community non edges" + compl_desc,
            feat_interpret,
        )
        self.add_feature(
            feat_name + "_intra_community_edges",
            lambda graph: quality.intra_community_edges(
                graph, community_partition),
            "Intra community edges" + compl_desc,
            feat_interpret,
        )
Пример #18
0
def merge_nodes_subset(graph, partition, initial_partition, subset, theta):
    """From the initial refined partition, merges subsets only if those subsets are a subset of the communities from the initial partition"""

    R = get_connected_nodes(graph, subset)

    best_modularity = nxq.modularity(graph, partition, weight='weight')

    for v in R:

        ind_community = find_community_i(partition, v)
        if len(partition[ind_community]) == 1:
            T = get_connected_communities(graph, subset, partition)
            prob, new_partition = move_node_to_other_com(
                graph, v, partition, initial_partition, best_modularity, theta,
                T)
            if prob.count(0) == len(prob):
                pass
            else:
                partition = random.choices(new_partition, weights=prob)[0]

    return partition
Пример #19
0
def phase1(graph):
    """Gets the best partition by maximizing greedily the modularity function"""

    # initialize the communities: each node in a different community
    partition = singleton_partition(graph)

    # initialize the best modularity to spot convergence
    best_mod = nxq.modularity(graph, partition, weight='weight')
    best_partition = partition

    nodes = list(graph.nodes())
    random.shuffle(nodes)

    while 1:
        for node in nodes:
            part = deepcopy(best_partition)
            best_mod, best_partition = get_best_partition(
                graph, best_partition, best_mod, node, part)

        if part == best_partition:
            break

    return best_partition, best_mod
def label_propagation(G, weight='weight', iterNum=6):
    '''Community detection using label propagation algorithm.
    
    Parameters
    ----------
    G : networkx.graph
    
    weight : edge attribute if G is weighted or None if G is unweighted

    iterNum : number to repeat label propagation algorithm

    Returns
    -------
    list_communities : list
        A list of sets, and each set contains vertices in one community.
    
    Notes
    -----
    This function only deals with weighted and unweighted undirected graph.
    '''
    # H is the undirected version of graph G
    H = G.to_undirected()
    max_modularity = float('-inf')
    for i in range(iterNum):
        if weight is None:
            cur_list_communities = list(
                community.label_propagation_communities(H))
        else:
            cur_list_communities = list(
                community.asyn_lpa_communities(H, weight=weight))

        cur_modularity = quality.modularity(H, cur_list_communities)
        if (cur_modularity > max_modularity):
            list_communities = cur_list_communities
            max_modularity = cur_modularity

    return list_communities
Пример #21
0
def main():

    # Column name
    col_name = "ALGORITHM_cmty"

    # Load data
    if path.exists("../data/cmty_nodes.csv"):
        node_upload = "../data/cmty_nodes.csv"
    elif path.exists("../data/nodes.csv"):
        node_upload = "../data/nodes.csv"
    else:
        print("NO NODES TO UPLOAD!")
        assert (False)
    pd_nodes = pd.read_csv(node_upload, sep='\t', index_col=0)

    # Data in nice form
    headers = list(pd_nodes.columns)
    nodes = np.asarray(pd_nodes)

    # Aggregate file names
    model_names = ["GAT", "GCN", "GraphSage"]
    npy_names = ["../data/" + x + "_node_embeddings.npy" for x in model_names]

    model_cmtys = []
    model_time = []
    for i in range(len(npy_names)):

        # Load embeddings
        embeddings = np.load(npy_names[i])
        print(embeddings.shape)

        # Generate node_mapping for clutsers
        start = timeit.default_timer()
        ##########################################
        # CODE HERE to cluster embeddings and creating node_mapping #
        # node_mapping can either be dictionary or array #
        ##########################################

        node_mapping = np.zeros(len(nodes)).astype(int)

        ##########################################
        stop = timeit.default_timer()
        model_time.append(stop - start)

        # Convert node_mapping to cmtys and node_to_cmty array
        #num_cmtys = len(set(node_mapping.values()))
        num_cmtys = len(set(node_mapping))
        cmtys = [[] for _ in range(num_cmtys)]
        node_to_cmty = np.zeros(len(node_mapping)).astype(int)
        for j in range(len(node_to_cmty)):
            node_to_cmty[j] = node_mapping[j]
            cmtys[node_mapping[j]].append(j)
        model_cmtys.append(cmtys)

        # Add communities to nodes
        pd_nodes[model_names[i] + "_" + col_name] = node_to_cmty
        pd_nodes.to_csv("../data/cmty_nodes.csv", sep='\t')

    print("Creating Graph")
    # Load social network accordingly
    edges = pd.read_csv("../data/edges.csv", sep='\t', index_col=0)
    edges = np.asarray(edges).astype(int)
    G = nx.Graph()
    G.add_nodes_from(range(nodes.shape[0]))
    G.add_edges_from(list(map(tuple, edges)))

    print("Calculating modularity")

    for i in range(len(model_names)):
        assert (is_partition(G, model_cmtys[i]))
        modul = modularity(G, model_cmtys[i])

        print("Results from " + model_names[i] + " ALGORITHM:")
        print("Modularity:", modul)
        print("Number of clusters:", len(model_cmtys[i]))
        print("Time elapsed:", model_time[i])
def girvan_newman_best_partition(G, list_partitions):
    """ Returns the best partition on the `list_partitions`.

    Returns the best partition among those generated by the Girvan-Newman
    algorithm. The best partition is selected according to modularity,
    computed using `networkx.algorithms.community.quality.modularity`.

    Parameters
    ----------
    G : NetworkX graph
        `G` must meet 2 conditions:
        1. `G` must contain only one connected componet
        2. The nodes must be integers from 0 to (number_of_nodes - 1)

    list_partitions : list
        List of (number_nodes -1) lists got using `girvan_newman_partitions`.
        Each list contains the information about the partition of that level.

    Returns
    -------
    tupla
        Tupla of 2 elements:
        Fisrt element: list with information about the best partition.
                       It is a list of sets of nodes, each set of nodes
                       is a community.
        Second element: integer, position of the partition in `list_partitions`
                        which corresponds to the best partition.

    Raises
    ------
    TypeError
        If `G` does not meet the conditions:
        1. `G` must contain only one connected componet
        2. The nodes must be integers from 0 to (number_of_nodes - 1)

    Example
    --------
    To get the best partition of `G` among those detected by the
    Girvan-Newman algorithm::

        >>> G = nx.path_graph(6)
        >>> partitions = girvan_newman_partitions(G)
        >>> bp_G, index_bp_G = girvan_newman_best_partition(G, partitions)
        >>> print (bp_G)
        ... [{0, 1, 2}, {3, 4, 5}]
        >>> print (index_bp_G)
        ... 0

    To plot the dendrogram of community detection performed on graph G,
    highlighting the best partition::

        >>> from scipy.cluster.hierarchy import dendrogram
        >>> # Create graph and perform community detection with Girvan-Newman
        >>> G = nx.path_graph(6)
        >>> partitions = girvan_newman_partitions(G)
        >>> # Compute the agglomerative matrix
        >>> agglomerative_mat = agglomerative_matrix(G, partitions)
        >>> # Find the best partition and its distance from the ground level
        >>> bp_G, idx_bp_G = girvan_newman_best_partition(G, partitions)
        >>> n_communities_bp = len(bp_G)
        >>> dis_bp = distance_of_partition(agglomerative_mat, n_communities_bp)
        >>> # Plot the dendrogram highlighting the best partition
        >>> dendro_bp = dendrogram(agglomerative_mat, color_threshold=dis_bp)

     """
    # Does G meet the conditions?
    if nx.number_connected_components(G) > 1:
        raise TypeError("Bad graph type: do not use a graph with more" +
                        " connected components")
    _nodes = nx.nodes(G)
    nn = nx.number_of_nodes(G)
    _good_nodes = np.arange(nn)
    if not set(_nodes) == set(_good_nodes):
        raise TypeError("Bad graph type: use a graph with nodes which" +
                        " are integers from 0 to (number_of_nodes - 1)")

    # Look for the best partition
    best_partition = []
    MAX_mod = -99
    c = 0
    for part in list_partitions:
        # Compute modularity
        tmp_mod = modularity(G, part)

        # If modularity icreases, then update `best_partition`
        if tmp_mod > MAX_mod:
            MAX_mod = tmp_mod
            best_partition = part
            id_best_part = c

        c += 1

    return (best_partition, id_best_part)
Пример #23
0
 def modularity(self, communities):
     return modularity(self.graph, communities)
import matplotlib.pyplot as plt
import networkx as nx
from networkx.algorithms.community.quality import modularity
from sklearn.metrics.cluster import normalized_mutual_info_score

G = nx.read_adjlist('karate_edges_77.txt')

# Let each node in the graph be in its own community
communities = list()
for i in G.nodes():
    communities.append(set([i]))

# Create a list for keeping track of all merges
tracking_merges = list()

modnew = modularity(G, communities)
print('The modularity at the beginning is', modnew)
modold = None
comtrial = []
modtrial = 0
num_of_merges = 0
num_merges = []
modularity_scores = []

# Maximizing the modularity to find the best social parition
while (modold is None or modnew > modold):
    comtrial = list(communities)
    modold = modnew
    #print('The current modularity is', modold)
    to_be_merged = None
    for i, x in enumerate(communities):
Пример #25
0
    ##--------------------- print for label propagation result
    G_treated = label_prop(G, max_iter=100)
    labels = [G_treated.nodes[node]["label"] for node in G_treated.nodes]
    # print(labels)
    labels = list(set(labels))
    partitions = []
    for label in labels:
        partitions.append(
            set([
                node for node in G_treated.nodes
                if G_treated.nodes[node]["label"] == label
            ]))
    # start = time.time()
    print('modularity, coverage, performance : ',
          modularity(G_treated, partitions), coverage(G_treated, partitions),
          performance(G_treated, partitions))
    # end = time.time()
    # print(end-start)

    ##--------------------- print for louvain result
    # start = time.time()
    partition = community_louvain.best_partition(G)
    # print(partition)
    labels = [partition[node] for node in G.nodes]
    labels = list(set(labels))
    partitions = []
    for label in labels:
        partitions.append(
            set([node for node in G.nodes if partition[node] == label]))
    # print(modularity(partitions, G))
t = 20

cost = sgw.undirected_normalized_heat_kernel(G, t)

d_gws = []
mis = []
coverages = []
modularities = []

for j in num_clusts:
    mutual_info, d_gw, coup = process_sgwl_eu(cost, database, num_nodes, j)
    partition = get_partition(coup)
    mis.append(mutual_info)
    d_gws.append(d_gw)
    coverages.append(coverage(G, partition))
    modularities.append(modularity(G, partition))

# Estimate number of clusters
estimated_clusters_raw_sym = num_clusts[np.argmax(modularities)]
print('Number of Clusters:', estimated_clusters_raw_sym)

# Now perform modularity/coverage maximizing pipeline
ts = np.linspace(3, 10, 40)
mis, coups, d_gws, good_t_max, good_t_grad, rt = t_selection_pipeline_undirected_eu(
    G, ts, estimated_clusters_raw_sym)

coverages = []

for j in range(len(ts)):
    coup = coups[j]
    partition = get_partition(coup)
Пример #27
0
 def modularity(self, graph):
     communities = greedy_modularity_communities(nx.Graph(graph))
     modularity = round(quality.modularity(graph, communities), 1)
     if modularity < 0.1:
         modularity = 0.1
     return modularity
Пример #28
0
def greedy_modularity_communities(G, weight=None, resolution=1):
    """Find communities in G using greedy modularity maximization.

    This function uses Clauset-Newman-Moore greedy modularity maximization [2]_.
    This method currently supports the Graph class.

    Greedy modularity maximization begins with each node in its own community
    and joins the pair of communities that most increases modularity until no
    such pair exists.

    This function maximizes the generalized modularity, where `resolution`
    is the resolution parameter, often expressed as $\gamma$.
    See :func:`~networkx.algorithms.community.quality.modularity`.

    Parameters
    ----------
    G : NetworkX graph
    weight : string or None, optional (default=None)
       The name of an edge attribute that holds the numerical value used
       as a weight.  If None, then each edge has weight 1.
       The degree is the sum of the edge weights adjacent to the node.

    Returns
    -------
    list
        A list of sets of nodes, one for each community.
        Sorted by length with largest communities first.

    Examples
    --------
    >>> from networkx.algorithms.community import greedy_modularity_communities
    >>> G = nx.karate_club_graph()
    >>> c = list(greedy_modularity_communities(G))
    >>> sorted(c[0])
    [8, 14, 15, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33]

    See Also
    --------
    modularity

    References
    ----------
    .. [1] M. E. J Newman "Networks: An Introduction", page 224
       Oxford University Press 2011.
    .. [2] Clauset, A., Newman, M. E., & Moore, C.
       "Finding community structure in very large networks."
       Physical Review E 70(6), 2004.
    .. [3] Reichardt and Bornholdt "Statistical Mechanics of Community
       Detection" Phys. Rev. E74, 2006.
    """

    # Count nodes and edges
    N = len(G.nodes())
    m = sum([d.get("weight", 1) for u, v, d in G.edges(data=True)])
    q0 = 1.0 / (2.0 * m)

    # Map node labels to contiguous integers
    label_for_node = {i: v for i, v in enumerate(G.nodes())}
    node_for_label = {label_for_node[i]: i for i in range(N)}

    # Calculate degrees
    k_for_label = G.degree(G.nodes(), weight=weight)
    k = [k_for_label[label_for_node[i]] for i in range(N)]

    # Initialize community and merge lists
    communities = {i: frozenset([i]) for i in range(N)}
    merges = []

    # Initial modularity
    partition = [[label_for_node[x] for x in c] for c in communities.values()]
    q_cnm = modularity(G, partition, resolution=resolution)

    # Initialize data structures
    # CNM Eq 8-9 (Eq 8 was missing a factor of 2 (from A_ij + A_ji)
    # a[i]: fraction of edges within community i
    # dq_dict[i][j]: dQ for merging community i, j
    # dq_heap[i][n] : (-dq, i, j) for communitiy i nth largest dQ
    # H[n]: (-dq, i, j) for community with nth largest max_j(dQ_ij)
    a = [k[i] * q0 for i in range(N)]
    dq_dict = {
        i: {
            j: 2 * q0 * G.get_edge_data(i, j).get(weight, 1.0) -
            2 * resolution * k[i] * k[j] * q0 * q0
            for j in
            [node_for_label[u] for u in G.neighbors(label_for_node[i])]
            if j != i
        }
        for i in range(N)
    }
    dq_heap = [
        MappedQueue([(-dq, i, j) for j, dq in dq_dict[i].items()])
        for i in range(N)
    ]
    H = MappedQueue([dq_heap[i].h[0] for i in range(N) if len(dq_heap[i]) > 0])

    # Merge communities until we can't improve modularity
    while len(H) > 1:
        # Find best merge
        # Remove from heap of row maxes
        # Ties will be broken by choosing the pair with lowest min community id
        try:
            dq, i, j = H.pop()
        except IndexError:
            break
        dq = -dq
        # Remove best merge from row i heap
        dq_heap[i].pop()
        # Push new row max onto H
        if len(dq_heap[i]) > 0:
            H.push(dq_heap[i].h[0])
        # If this element was also at the root of row j, we need to remove the
        # duplicate entry from H
        if dq_heap[j].h[0] == (-dq, j, i):
            H.remove((-dq, j, i))
            # Remove best merge from row j heap
            dq_heap[j].remove((-dq, j, i))
            # Push new row max onto H
            if len(dq_heap[j]) > 0:
                H.push(dq_heap[j].h[0])
        else:
            # Duplicate wasn't in H, just remove from row j heap
            dq_heap[j].remove((-dq, j, i))
        # Stop when change is non-positive
        if dq <= 0:
            break

        # Perform merge
        communities[j] = frozenset(communities[i] | communities[j])
        del communities[i]
        merges.append((i, j, dq))
        # New modularity
        q_cnm += dq
        # Get list of communities connected to merged communities
        i_set = set(dq_dict[i].keys())
        j_set = set(dq_dict[j].keys())
        all_set = (i_set | j_set) - {i, j}
        both_set = i_set & j_set
        # Merge i into j and update dQ
        for k in all_set:
            # Calculate new dq value
            if k in both_set:
                dq_jk = dq_dict[j][k] + dq_dict[i][k]
            elif k in j_set:
                dq_jk = dq_dict[j][k] - 2.0 * resolution * a[i] * a[k]
            else:
                # k in i_set
                dq_jk = dq_dict[i][k] - 2.0 * resolution * a[j] * a[k]
            # Update rows j and k
            for row, col in [(j, k), (k, j)]:
                # Save old value for finding heap index
                if k in j_set:
                    d_old = (-dq_dict[row][col], row, col)
                else:
                    d_old = None
                # Update dict for j,k only (i is removed below)
                dq_dict[row][col] = dq_jk
                # Save old max of per-row heap
                if len(dq_heap[row]) > 0:
                    d_oldmax = dq_heap[row].h[0]
                else:
                    d_oldmax = None
                # Add/update heaps
                d = (-dq_jk, row, col)
                if d_old is None:
                    # We're creating a new nonzero element, add to heap
                    dq_heap[row].push(d)
                else:
                    # Update existing element in per-row heap
                    dq_heap[row].update(d_old, d)
                # Update heap of row maxes if necessary
                if d_oldmax is None:
                    # No entries previously in this row, push new max
                    H.push(d)
                else:
                    # We've updated an entry in this row, has the max changed?
                    if dq_heap[row].h[0] != d_oldmax:
                        H.update(d_oldmax, dq_heap[row].h[0])

        # Remove row/col i from matrix
        i_neighbors = dq_dict[i].keys()
        for k in i_neighbors:
            # Remove from dict
            dq_old = dq_dict[k][i]
            del dq_dict[k][i]
            # Remove from heaps if we haven't already
            if k != j:
                # Remove both row and column
                for row, col in [(k, i), (i, k)]:
                    # Check if replaced dq is row max
                    d_old = (-dq_old, row, col)
                    if dq_heap[row].h[0] == d_old:
                        # Update per-row heap and heap of row maxes
                        dq_heap[row].remove(d_old)
                        H.remove(d_old)
                        # Update row max
                        if len(dq_heap[row]) > 0:
                            H.push(dq_heap[row].h[0])
                    else:
                        # Only update per-row heap
                        dq_heap[row].remove(d_old)

        del dq_dict[i]
        # Mark row i as deleted, but keep placeholder
        dq_heap[i] = MappedQueue()
        # Merge i into j and update a
        a[j] += a[i]
        a[i] = 0

    communities = [
        frozenset([label_for_node[i] for i in c])
        for c in communities.values()
    ]
    return sorted(communities, key=len, reverse=True)
Пример #29
0
# Conductance
sumOfCond = []

for i in range(len(cluster)):
    sumOfCond.append(conductance(new_graph, cluster[i]))

condScoreS = conductance(new_graph, S)
condScoreT = conductance(new_graph, T)
overallCond = min(sumOfCond)

# Modularity Communities
barbMod = list(greedy_modularity_communities(new_graph))

# Modularity Score
barbModScore = qu.modularity(new_graph, barbMod)

# edge_betweenness_centrality
barbedgeBetweenness = nx.edge_betweenness_centrality(new_graph, None, False)
barbaverageEdge = sum(barbedgeBetweenness.values()) / len(barbedgeBetweenness)
barbtotalEdge = sum(barbedgeBetweenness.values())

# print sets of nodes, one for each community.
print("Communities: ", barbMod)

# Modularity Score
print("Modularity: ", barbModScore)

# Conductance Score
print("Conductance for: ", S, condScoreS)
print("Conductance for: ", T, condScoreT)
Пример #30
0
def naive_greedy_modularity_communities(G, resolution=1):
    """Find communities in G using greedy modularity maximization.

    This implementation is O(n^4), much slower than alternatives, but it is
    provided as an easy-to-understand reference implementation.

    Greedy modularity maximization begins with each node in its own community
    and joins the pair of communities that most increases modularity until no
    such pair exists.

    This function maximizes the generalized modularity, where `resolution`
    is the resolution parameter, often expressed as $\gamma$.
    See :func:`~networkx.algorithms.community.quality.modularity`.

    Parameters
    ----------
    G : NetworkX graph

    Returns
    -------
    list
        A list of sets of nodes, one for each community.
        Sorted by length with largest communities first.

    Examples
    --------
    >>> from networkx.algorithms.community import greedy_modularity_communities
    >>> G = nx.karate_club_graph()
    >>> c = list(greedy_modularity_communities(G))
    >>> sorted(c[0])
    [8, 14, 15, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33]

    See Also
    --------
    greedy_modularity_communities
    modularity
    """
    # First create one community for each node
    communities = list([frozenset([u]) for u in G.nodes()])
    # Track merges
    merges = []
    # Greedily merge communities until no improvement is possible
    old_modularity = None
    new_modularity = modularity(G, communities, resolution=resolution)
    while old_modularity is None or new_modularity > old_modularity:
        # Save modularity for comparison
        old_modularity = new_modularity
        # Find best pair to merge
        trial_communities = list(communities)
        to_merge = None
        for i, u in enumerate(communities):
            for j, v in enumerate(communities):
                # Skip i==j and empty communities
                if j <= i or len(u) == 0 or len(v) == 0:
                    continue
                # Merge communities u and v
                trial_communities[j] = u | v
                trial_communities[i] = frozenset([])
                trial_modularity = modularity(G,
                                              trial_communities,
                                              resolution=resolution)
                if trial_modularity >= new_modularity:
                    # Check if strictly better or tie
                    if trial_modularity > new_modularity:
                        # Found new best, save modularity and group indexes
                        new_modularity = trial_modularity
                        to_merge = (i, j, new_modularity - old_modularity)
                    elif to_merge and min(i, j) < min(to_merge[0],
                                                      to_merge[1]):
                        # Break ties by choosing pair with lowest min id
                        new_modularity = trial_modularity
                        to_merge = (i, j, new_modularity - old_modularity)
                # Un-merge
                trial_communities[i] = u
                trial_communities[j] = v
        if to_merge is not None:
            # If the best merge improves modularity, use it
            merges.append(to_merge)
            i, j, dq = to_merge
            u, v = communities[i], communities[j]
            communities[j] = u | v
            communities[i] = frozenset([])
    # Remove empty communities and sort
    return sorted((c for c in communities if len(c) > 0),
                  key=len,
                  reverse=True)
Пример #31
0
def greedy_modularity_communities(G, weight=None):
    """Find communities in graph using Clauset-Newman-Moore greedy modularity
    maximization. This method currently supports the Graph class and does not
    consider edge weights.

    Greedy modularity maximization begins with each node in its own community
    and joins the pair of communities that most increases modularity until no
    such pair exists.

    Parameters
    ----------
    G : NetworkX graph

    Returns
    -------
    Yields sets of nodes, one for each community.

    Examples
    --------
    >>> from networkx.algorithms.community import greedy_modularity_communities
    >>> G = nx.karate_club_graph()
    >>> c = list(greedy_modularity_communities(G))
    >>> sorted(c[0])
    [8, 14, 15, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33]

    References
    ----------
    .. [1] M. E. J Newman 'Networks: An Introduction', page 224
       Oxford University Press 2011.
    .. [2] Clauset, A., Newman, M. E., & Moore, C.
       "Finding community structure in very large networks."
       Physical Review E 70(6), 2004.
    """

    # Count nodes and edges
    N = len(G.nodes())
    m = sum([d.get('weight', 1) for u, v, d in G.edges(data=True)])
    q0 = 1.0 / (2.0*m)

    # Map node labels to contiguous integers
    label_for_node = dict((i, v) for i, v in enumerate(G.nodes()))
    node_for_label = dict((label_for_node[i], i) for i in range(N))

    # Calculate degrees
    k_for_label = G.degree(G.nodes(), weight=weight)
    k = [k_for_label[label_for_node[i]] for i in range(N)]

    # Initialize community and merge lists
    communities = dict((i, frozenset([i])) for i in range(N))
    merges = []

    # Initial modularity
    partition = [[label_for_node[x] for x in c] for c in communities.values()]
    q_cnm = modularity(G, partition)

    # Initialize data structures
    # CNM Eq 8-9 (Eq 8 was missing a factor of 2 (from A_ij + A_ji)
    # a[i]: fraction of edges within community i
    # dq_dict[i][j]: dQ for merging community i, j
    # dq_heap[i][n] : (-dq, i, j) for communitiy i nth largest dQ
    # H[n]: (-dq, i, j) for community with nth largest max_j(dQ_ij)
    a = [k[i]*q0 for i in range(N)]
    dq_dict = dict(
        (i, dict(
            (j, 2*q0 - 2*k[i]*k[j]*q0*q0)
            for j in [
                node_for_label[u]
                for u in G.neighbors(label_for_node[i])]
            if j != i))
        for i in range(N))
    dq_heap = [
        MappedQueue([
            (-dq, i, j)
            for j, dq in dq_dict[i].items()])
        for i in range(N)]
    H = MappedQueue([
        dq_heap[i].h[0]
        for i in range(N)
        if len(dq_heap[i]) > 0])

    # Merge communities until we can't improve modularity
    while len(H) > 1:
        # Find best merge
        # Remove from heap of row maxes
        # Ties will be broken by choosing the pair with lowest min community id
        try:
            dq, i, j = H.pop()
        except IndexError:
            break
        dq = -dq
        # Remove best merge from row i heap
        dq_heap[i].pop()
        # Push new row max onto H
        if len(dq_heap[i]) > 0:
            H.push(dq_heap[i].h[0])
        # If this element was also at the root of row j, we need to remove the
        # duplicate entry from H
        if dq_heap[j].h[0] == (-dq, j, i):
            H.remove((-dq, j, i))
            # Remove best merge from row j heap
            dq_heap[j].remove((-dq, j, i))
            # Push new row max onto H
            if len(dq_heap[j]) > 0:
                H.push(dq_heap[j].h[0])
        else:
            # Duplicate wasn't in H, just remove from row j heap
            dq_heap[j].remove((-dq, j, i))
        # Stop when change is non-positive
        if dq <= 0:
            break

        # Perform merge
        communities[j] = frozenset(communities[i] | communities[j])
        del communities[i]
        merges.append((i, j, dq))
        # New modularity
        q_cnm += dq
        # Get list of communities connected to merged communities
        i_set = set(dq_dict[i].keys())
        j_set = set(dq_dict[j].keys())
        all_set = (i_set | j_set) - set([i, j])
        both_set = i_set & j_set
        # Merge i into j and update dQ
        for k in all_set:
            # Calculate new dq value
            if k in both_set:
                dq_jk = dq_dict[j][k] + dq_dict[i][k]
            elif k in j_set:
                dq_jk = dq_dict[j][k] - 2.0*a[i]*a[k]
            else:
                # k in i_set
                dq_jk = dq_dict[i][k] - 2.0*a[j]*a[k]
            # Update rows j and k
            for row, col in [(j, k), (k, j)]:
                # Save old value for finding heap index
                if k in j_set:
                    d_old = (-dq_dict[row][col], row, col)
                else:
                    d_old = None
                # Update dict for j,k only (i is removed below)
                dq_dict[row][col] = dq_jk
                # Save old max of per-row heap
                if len(dq_heap[row]) > 0:
                    d_oldmax = dq_heap[row].h[0]
                else:
                    d_oldmax = None
                # Add/update heaps
                d = (-dq_jk, row, col)
                if d_old is None:
                    # We're creating a new nonzero element, add to heap
                    dq_heap[row].push(d)
                else:
                    # Update existing element in per-row heap
                    dq_heap[row].update(d_old, d)
                # Update heap of row maxes if necessary
                if d_oldmax is None:
                    # No entries previously in this row, push new max
                    H.push(d)
                else:
                    # We've updated an entry in this row, has the max changed?
                    if dq_heap[row].h[0] != d_oldmax:
                        H.update(d_oldmax, dq_heap[row].h[0])

        # Remove row/col i from matrix
        i_neighbors = dq_dict[i].keys()
        for k in i_neighbors:
            # Remove from dict
            dq_old = dq_dict[k][i]
            del dq_dict[k][i]
            # Remove from heaps if we haven't already
            if k != j:
                # Remove both row and column
                for row, col in [(k, i), (i, k)]:
                    # Check if replaced dq is row max
                    d_old = (-dq_old, row, col)
                    if dq_heap[row].h[0] == d_old:
                        # Update per-row heap and heap of row maxes
                        dq_heap[row].remove(d_old)
                        H.remove(d_old)
                        # Update row max
                        if len(dq_heap[row]) > 0:
                            H.push(dq_heap[row].h[0])
                    else:
                        # Only update per-row heap
                        dq_heap[row].remove(d_old)

        del dq_dict[i]
        # Mark row i as deleted, but keep placeholder
        dq_heap[i] = MappedQueue()
        # Merge i into j and update a
        a[j] += a[i]
        a[i] = 0

    communities = [
        frozenset([label_for_node[i] for i in c])
        for c in communities.values()]
    return sorted(communities, key=len, reverse=True)