Exemplo n.º 1
0
def _graph_community(G):  # unused function
    '''The 'graph_community' function is used to analyse a corpus at 
       two levels of the dendrogram of the corpus coupling graph G 
       in a way that the size of all the communities are <= SIZECUT.
       Author: Sebastian Grauwin (http://sebastian-grauwin.com/bibliomaps/)
    
       Args:
           G (networkx object): corpus coupling graph.
        
       Returns:
           louvain_partition (dict): partition of the corpus coupling graph G. 
    
    '''

    # 3rd party import
    import community as community_louvain

    # TO DO: move SIZECUT in COUPL_GLOBAL_VALUES if _graph_community is used
    SIZECUT = 10  # Upper limit of size communities

    dendrogram, part, max_mod = _runpythonlouvain(G)
    part2 = part.copy()
    to_update = {}

    communities_id, nodes_id = set(part.values()), list(part.keys())
    for community_id in communities_id:
        list_nodes = [
            nodes for nodes in part.keys() if part[nodes] == community_id
        ]

        if len(list_nodes) > SIZECUT:  # split clusters of size > SIZECUT
            H = G.subgraph(list_nodes).copy()
            [dendo2, partfoo, mod] = _runpythonlouvain(H)
            dendo2 = community_louvain.generate_dendrogram(H, part_init=None)
            partfoo = community_louvain.partition_at_level(
                dendo2,
                len(dendo2) - 1)
            # add prefix code
            for aaa in partfoo.keys():
                partfoo[aaa] = (community_id + 1) * 1000 + partfoo[aaa]
            nb_comm = len(set(partfoo.values()))
            # "community_id" cluster ("len(list_nodes)" records) is split in nb_comm sub-clusters
            part2.update(partfoo)
        else:  # for communities of less than SIZECUT nodes, shift the com label as well
            for n in list_nodes:
                to_update[n] = ""
    for n in to_update:
        part2[n] += 1

    # ... save partitions
    louvain_partition = dict()
    for lev in range(len(dendrogram)):
        louvain_partition[lev] = community_louvain.partition_at_level(
            dendrogram, lev)
    # .. set communtity labels starting from 1 instead of 0 for top level
    for k in louvain_partition[len(dendrogram) - 1].keys():
        louvain_partition[len(dendrogram) - 1][k] += 1
    louvain_partition[len(dendrogram)] = part2

    return louvain_partition
Exemplo n.º 2
0
def communityMining(G, minCommSize=10):
  """
  Find communities in the graph 'G' with more than 'minCommSize' nodes.
  """
  count = 0
  dendrogram = community.generate_dendrogram(G)
  firstPartition = community.partition_at_level(dendrogram,0)
  
  sys.stderr.write("Prune sparse clusters. ")
  #remove early small communities 
  sparseComm = set([k for k,v in Counter(firstPartition.values()).iteritems() if v<minCommSize])
  nodes = [node for node in G.nodes() if firstPartition[node] in sparseComm]
  G.remove_nodes_from(nodes)

  sys.stderr.write("Find communities. ")
  # Partition again the graph and report big communities:
  dendrogram = community.generate_dendrogram(G)
  partition = community.partition_at_level(dendrogram,len(dendrogram)-2)
  allfqdns =  set(n for n,d in G.nodes(data=True) if d['bipartite']==1)
  allHosts = set(n for n,d in G.nodes(data=True) if d['bipartite']==0)
  size = float(len(set(partition.values())))
  communities = []
  
  bigComm = [k for k,v in Counter(partition.values()).iteritems() if v>minCommSize]
  for com in bigComm :
    comfqdns = [nodes for nodes in allfqdns if partition[nodes] == com]
    comHosts = [nodes for nodes in allHosts if partition[nodes] == com]
    comm = G.subgraph(comfqdns+comHosts) 
    if comm.order() < minCommSize :
        sys.stderr("Remove small community (This shouldn't happen here?)\n")
        continue

    communities.append(comm)
    
  return communities 
Exemplo n.º 3
0
def apply_community_louvain(G):
    start_node_id, end_node_id = get_start_and_end_nodes(G)

    partition = community_louvain.best_partition(G)

    dendo = community_louvain.generate_dendrogram(G)
    highest_partition = community_louvain.partition_at_level(
        dendo, (len(dendo) - 1))
    communities = set(highest_partition.values())

    print("Communities;")

    list_of_communities = []

    community_count = 0
    for community_number in communities:
        community_items = [
            x for x in highest_partition
            if highest_partition[x] == community_number
        ]
        if start_node_id in community_items or end_node_id in community_items:
            continue
        list_of_communities.append(community_items)
        community_count = community_count + 1
        print(f"Community number {community_count}: {community_items}")

    return list_of_communities
Exemplo n.º 4
0
def get_community_assignment(in_df, graph, dendrogram):
    '''
    Utilize dendrogram to find community clusterings at every level
    available. For each hierarchy level, a new column is added to the
    returned df with the community clustering. (e.g. cid0 -> 0,0,1,2,3)

    in_df: Dataframe. Must be indexed by user_id.
    graph: Networkx Graph. Node IDs should match user_ids in dataframe
    dendrogram: List of dictionaries, each dictionary mapping user_id to
    community_id. Each dictionary should represent a level of the clustering
    hierarchy.

    return: Tuple of Dataframe with community id assignment columns added
    and dictionary mapping each level to community modularity (float)
    '''
    df = in_df.copy()

    community_modularity = {}

    for i in range(len(dendrogram)):

        partition = partition_at_level(dendrogram, i)

        # Infrequently, the community detection algorithm will exclude (?) a
        # a user ID or two. Still investgating why. For now, these will be
        # placed into partition 0.
        df['cid' + str(i)] = [partition[ind] if ind in partition else 0
                              for ind in df.index]

        community_modularity[i] = modularity(partition, graph)

    return df, community_modularity
Exemplo n.º 5
0
def external_ec_coarsening(graph, sfdp_path, coarsening_scheme = 2, c_type = 'original'):
    if c_type == 'louvain':
        print("Coarsening with Louvain")
        matrix = magicgraph.to_adjacency_matrix(graph)
        nx_graph = nx.from_scipy_sparse_matrix(matrix)
        dendro = community.generate_dendrogram(nx_graph)

        coarse_graphs = [DoubleWeightedDiGraph(graph)]
        merges = []
        i = 0
        for l in range(len(dendro)):
            level = community.partition_at_level(dendro, l)
            induced = community.induced_graph(level, nx_graph)
            filename = 'induced'+str(l)+'.edgelist'
            #nx.write_edgelist(induced, filename)
            # write weighted graph to file
            f = open(filename, 'w')
            for u, v, a in induced.edges.data('weight', default = 1):
                line = ' '.join([str(u), str(v), str(a)])
                f.write(line + '\n')
            f.close()
            m_graph = magicgraph.load_weighted_edgelist(filename, undirected = True)
            coarse_graphs.append(DoubleWeightedDiGraph(m_graph))
            merges.append(level)
            print('Level: ', i, 'N nodes: ', m_graph.number_of_nodes())
            i+= 1

        return coarse_graphs, merges
    elif c_type == 'original':
        return original_coarsening(graph, sfdp_path, coarsening_scheme)
Exemplo n.º 6
0
def louvain_community_detection(networkx_graph):
    """
    Do louvain community detection
    :param networkx_graph:
    :return:
    """
    return cm.partition_at_level(cm.generate_dendrogram(networkx_graph, randomize=True, weight='weight'), 0)
Exemplo n.º 7
0
    def louvianClustering(self, similarity_measure_list):
        edge_list = []
        node_list = []
        thresh = 0  #self.getThreshold(similarity_measure_list)
        for element in similarity_measure_list:
            f1, f2, val = element
            if (float(val) > thresh):
                edge_list.append((f1, f2, float(val)))
            node_list.append(f1)
            node_list.append(f2)
        node_list = list(set(node_list))
        G = nx.Graph()
        G.add_nodes_from(node_list)
        G.add_weighted_edges_from(edge_list)

        partition = community.best_partition(G)
        dendo = community.generate_dendrogram(G, None, 'weight', 1., False)
        testing = community.partition_at_level(dendo, len(dendo) - 1)
        res = community.modularity(partition, G, 'weight')

        list1 = [partition]
        cluster_set = set(val for dic in list1 for val in dic.values())
        cluster_set_elements = []
        for cluster_id in cluster_set:
            temp_elements = []
            for node, cluster in partition.iteritems():
                if (cluster == cluster_id):
                    temp_elements.append(node)
            cluster_set_elements.append(temp_elements)
        self.cluster_set = cluster_set_elements
        return cluster_set_elements
Exemplo n.º 8
0
def calc_louvain(adj_matrix, level=0, return_c_graph=False):
    nx_G = nx.from_numpy_matrix(adj_matrix)
    dendro = louvain.generate_dendrogram(
        nx_G, randomize=False)  #Maybe set tandomize True

    if len(dendro) - level - 1 < 0:
        raise Exception("The given Level is too deep. The maximum is: " +
                        str(len(dendro) - 1))

    communities = louvain.partition_at_level(dendro, len(dendro) - level - 1)
    number_communities = max(communities, key=lambda x: communities[x]) + 1

    # Maybe unnecessary after some code rework and unification
    community_list = []
    for i in range(number_communities):
        grp_list = []
        for grp in communities:
            if communities[grp] == i:
                grp_list.append(grp)
        else:
            if grp_list:
                community_list.append(grp_list)

    community_level_G = louvain.induced_graph(communities, nx_G)

    if return_c_graph:
        c_level_graph = nx.adjacency_matrix(community_level_G)
    else:
        c_level_graph = None

    return community_list, c_level_graph
Exemplo n.º 9
0
 def updateLabels(self, level):
     # Louvain algorithm labels community at different level (with dendrogram).
     # Here we want the community labels at a given level.
     level = int((len(self.dendrogram) - 1) * level)
     partition = community_louvain.partition_at_level(self.dendrogram, level)
     # Convert dictionary to numpy array
     self.labels = np.array(list(partition.values()))
     return
 def test_modularity_increase(self):
     """
     Generate a dendogram and test that modularity is always increasing
     """
     g = nx.erdos_renyi_graph(1000, 0.01)
     dendo = co.generate_dendogram(g)
     mod_prec = -1.
     mods = [co.modularity(co.partition_at_level(dendo, level), g) for level in range(len(dendo)) ]
     self.assertListEqual(mods, sorted(mods))
Exemplo n.º 11
0
 def get_covered_entities(self, entity, level):
     partition = community.partition_at_level(self.dendrogram, level)
     interesting_partition_code = partition[entity]
     covered_entities = []
     for entity, partition_code in partition.iteritems():
         if partition_code != interesting_partition_code:
             continue
         covered_entities.append(entity)
     return covered_entities
Exemplo n.º 12
0
def _runpythonlouvain(G):  # unused function
    '''The "_runpythonlouvain" function  is used to analyse a corpus 
       at level "len(foo_dendrogram) - 1)" of the corpus coupling graph G dendrogram, 
       (see https://buildmedia.readthedocs.org/media/pdf/python-louvain/latest/python-louvain.pdf).
       Author: Sebastian Grauwin (http://sebastian-grauwin.com/bibliomaps/)
    
       Args:
           G (networkx object): corpus coupling graph.
        
       Returns:
           results (tuple): [dendrogram, partition, modularity,] where
               dendrogram [list of dict]: a list of partitions, ie dictionnaries 
                                          where keys of the i+1 dict are the values of the i dict;
               partition (dict): Louvain partition of the corpus coupling graph G 
                                 where dict keys are the pub IDs 
                                 and the dict values are the community IDs;
               modularity [float]: modularity.
    
    '''
    # standard library imports
    from collections import namedtuple

    # 3rd party import
    import community as community_louvain

    # TO DO: move NRUNS in COUPL_GLOBAL_VALUES if _runpythonlouvain is used.
    NRUNS = 1  # number of time the louvain algorithm is run for a given network,
    # the best partition being kept.

    named_tup_results = namedtuple('results', [
        'dendrogram',
        'partition',
        'modularity',
    ])

    max_modularity = -1
    for run in range(NRUNS):
        if NRUNS > 1:
            print(f'......run {run + 1}/{NRUNS}')
        foo_dendrogram = community_louvain.generate_dendrogram(G,
                                                               part_init=None)
        partition_foo = community_louvain.partition_at_level(
            foo_dendrogram,
            len(foo_dendrogram) - 1)
        modularity = community_louvain.modularity(partition_foo, G)
        if modularity > max_modularity:
            max_modularity = modularity
            partition = partition_foo.copy()
            dendrogram = foo_dendrogram.copy()

    louvain_part = named_tup_results(
        dendrogram,
        partition,
        modularity,
    )
    return louvain_part
Exemplo n.º 13
0
 def busmap_by_louvain(network, level=-1):
     lines = network.lines.loc[:,['bus0', 'bus1']].assign(weight=1./network.lines.x).set_index(['bus0','bus1'])
     G = nx.Graph()
     G.add_nodes_from(network.buses.index)
     G.add_edges_from((u,v,dict(weight=w)) for (u,v),w in lines.itertuples())
     dendrogram = community.generate_dendrogram(G)
     if level < 0:
         level += len(dendrogram)
     return pd.Series(community.partition_at_level(dendrogram, level=level),
                      index=network.buses.index)
Exemplo n.º 14
0
 def busmap_by_louvain(network, level=-1):
     lines = network.lines.loc[:,['bus0', 'bus1']].assign(weight=1./network.lines.x).set_index(['bus0','bus1'])
     G = nx.Graph()
     G.add_nodes_from(network.buses.index)
     G.add_edges_from((u,v,dict(weight=w)) for (u,v),w in lines.itertuples())
     dendrogram = community.generate_dendrogram(G)
     if level < 0:
         level += len(dendrogram)
     return pd.Series(community.partition_at_level(dendrogram, level=level),
                      index=network.buses.index)
Exemplo n.º 15
0
 def test_modularity_increase(self):
     """
     Generate a dendrogram and test that modularity is always increasing
     """
     graph = nx.erdos_renyi_graph(1000, 0.01)
     dendo = co.generate_dendrogram(graph)
     mods = [
         co.modularity(co.partition_at_level(dendo, level), graph)
         for level in range(len(dendo))
     ]
     self.assertListEqual(mods, sorted(mods))
def Mod(G,usebest=True,l=1):
	D = G.to_undirected()
	dendo = community.generate_dendogram(D, None)
	if usebest:
		level = len(dendo)-1
	else:
		level = l
	partition = community.partition_at_level(dendo,level)
	mod = community.modularity(partition, D)
	for n in G:
		G.node[n]['m'] = partition[n]
	return mod
Exemplo n.º 17
0
    def __init__(self, directed_graph):
        self.directed_graph = directed_graph

        dendogram = community.generate_dendogram(self.directed_graph.to_undirected())
        partitions = community.partition_at_level(dendogram, len(dendogram)-1)
        communities = self._get_communities(partitions)
        major_communities = self._get_large_communities(communities)

        self.community_graphs = self._build_community_graphs(communities,
                                      valid_communities=major_communities)

        self.community_rankings = self._pagerank_communities(self.community_graphs)
Exemplo n.º 18
0
def partition(G):
    undirected_G = G.to_undirected()
    dendo = com.generate_dendrogram(undirected_G,
                                    None,
                                    weight='weight',
                                    resolution=part_para)
    pdendo = []
    for i in range(len(dendo)):
        pdendo.append(com.partition_at_level(dendo, i))
        #print i, com.partition_at_level(dendo, i)
    outfile = open("partition.json", "w+")
    json.dump(pdendo, outfile)
    print "  Check File\"partition.json\" for the partition tree."
    sdendo = sort_partition(G, dendo)
    outfile = open("sorted_partition.json", "w+")
    pdendo = []
    for i in range(len(sdendo)):
        pdendo.append(com.partition_at_level(sdendo, i))
    json.dump(pdendo, outfile)
    print "  Check File\"sorted_partition.json\" for the sorted partition tree."
    return len(dendo)
def louvain (graph):
    """ Louvain clustering, returns dictionary where each key is the level of
        clustering and the values are the clustering themselfs as returned by
        to_clusters_dict method.
    """
    community.__MIN = 1e-12
    dendo = community.generate_dendrogram(graph)
    multilevel = {}
    for level in range(len(dendo) - 1):
        tmp = community.partition_at_level(dendo, level)
        # tmp is a dictionary where keys are the nodes and the values are the set it belongs to
        multilevel[level] = to_clusters_dict(tmp)

    return multilevel
Exemplo n.º 20
0
def modularize(edgeGraph, nodeDf, nameOfModularityColumn=u'Community_Lvl_0'):
	'''
	uses the original code of the louvain algorithm to give modularity to a graph
	'''
	#compute the best partition
	dendrogram = community.generate_dendrogram(edgeGraph, weight='weight')
	dendroBestPartitionDict = community.partition_at_level(dendrogram, len(dendrogram)-1) #dendroBestPartitionDict = community.best_partition(graph)
	#add a column to the node data frame so we can add the community values
	if nameOfModularityColumn not in nodeDf.columns:
		nodeDf[nameOfModularityColumn] = np.nan	
	#add the community values to the node data frame
	nodeDf[nameOfModularityColumn] = nodeDf[u'Id'].map(dendroBestPartitionDict)
	#making sure all 'modularity_class' NaN were deleted 
	return nodeDfCleaner(nodeDf), dendrogram
Exemplo n.º 21
0
def louvain(graph):
    """ Louvain clustering, returns dictionary where each key is the level of
        clustering and the values are the clustering themselfs as returned by
        to_clusters_dict method.
    """
    community.__MIN = 1e-12
    dendo = community.generate_dendrogram(graph)
    multilevel = {}
    for level in range(len(dendo) - 1):
        tmp = community.partition_at_level(dendo, level)
        # tmp is a dictionary where keys are the nodes and the values are the set it belongs to
        multilevel[level] = to_clusters_dict(tmp)

    return multilevel
Exemplo n.º 22
0
def run_louvain(experiment_dir):
    g = nx.read_edgelist(os.path.join(experiment_dir, 'projection.txt'),
                         create_using=nx.DiGraph,
                         data=[('weight', float), ('p_prereq', float),
                               ('p_course', float)])
    g = g.to_undirected()
    d = community.generate_dendrogram(g)
    level_0 = community.partition_at_level(d, 0)
    for i in range(max(level_0.values())):
        print('=' * 40)
        major = []
        for class_id, partition_num in level_0.items():
            if partition_num == i:
                major.append(class_id)
        print(major)
        print('=' * 40)
Exemplo n.º 23
0
def study_dendrogram(G, filename):
  dendrogram = co.generate_dendrogram(G)
  modularity_at_level = dict()
  print("Dendrogram has {} levels".format(len(dendrogram)))
  for level in range(len(dendrogram)):
    part = co.partition_at_level(dendrogram, level)
    print("Found {} communities at level {}".format(len(set(part.values())), level))
    modularity_at_level[level] = co.modularity(part, G)

  plt.plot(list(modularity_at_level.keys()), list(modularity_at_level.values()), linestyle='dotted', marker = 'o', markersize=8)
  plt.xlabel("l - Level")
  plt.ylabel("Q - Modularity")
  if filename:
    plt.savefig("drawings/"+filename)
  plt.show()
  return dendrogram
 def test_nodes_stay_together(self):
     """
     Test that two nodes in the same community at one level stay in the same at higher level
     """
     g = nx.erdos_renyi_graph(500, 0.01)
     dendo = co.generate_dendogram(g)
     parts = dict([])
     for l in range(len(dendo)) :
         parts[l] = co.partition_at_level(dendo, l)
     for l in range(len(dendo)-1) :
         p1 = parts[l]
         p2 = parts[l+1]
         coms = set(p1.values())
         for com in coms :
             comhigher = [ p2[node] for node, comnode in p1.iteritems() if comnode == com]
             self.assertEqual(len(set(comhigher)), 1)
Exemplo n.º 25
0
def add_cluster_labels_to_nodes(nodes_pdf, edges_pdf, weight_col='lift'):
    """
    Decorate node_pdf with columns marking the cluster(s) each node belongs to, using the Louvain algorithm.
    These cluster columns are added to nodes_pdf as a side effect.
    """
    import networkx as nx
    G = nx.Graph()
    elist = [(r['from'], r['to'], r[weight_col])
             for i, r in edges_pdf.iterrows()]
    G.add_weighted_edges_from(elist)
    dendro = community_louvain.generate_dendrogram(G)
    for level in range(0, len(dendro)):
        cluster_level_name = f"level_{level}_cluster"
        partition = community_louvain.partition_at_level(dendro, level)
        nodes_pdf[cluster_level_name] = [
            partition[x] for x in nodes_pdf['id']
        ]  # [partition[node_id[x]] for x in nodes_pdf['label']]
Exemplo n.º 26
0
    def get_communities_level(self, level, relevant_entities=None):
        communities_main_entities = {}
        partition = community.partition_at_level(self.dendrogram, level)

        for entity, partition_code in partition.iteritems():
            if (relevant_entities is not None) and\
                    (entity not in relevant_entities):
                continue
            if partition_code not in communities_main_entities:
                communities_main_entities[partition_code] = entity
            else:
                new_entity_weight = self._get_weight(entity)
                current_entity = communities_main_entities[partition_code]
                current_entity_weight = self._get_weight(current_entity)
                if current_entity_weight < new_entity_weight:
                    communities_main_entities[partition_code] = entity
        return communities_main_entities
Exemplo n.º 27
0
def run_louvain(experiment_dir):
    g = get_networkx_graph(experiment_dir)
    g = g.to_undirected()

    d = community.generate_dendrogram(g)
    level_0 = community.partition_at_level(d, 0)

    majors = []
    for i in range(max(level_0.values())):
        major = []
        for class_id, partition_num in level_0.items():
            if partition_num == i:
                major.append(class_id)
        majors.append(major)

    with open(os.path.join(experiment_dir, 'louvain.json'), 'w') as outfile:
        json.dump(majors, outfile, indent=4)
Exemplo n.º 28
0
def get_community(weight):
    """
    进行图聚类,发现社区
    weight: 选择哪个变量作为权重
    """
    FG = nx.Graph()
    FG.add_weighted_edges_from(graph_data[['from_id', 'to_id', weight]].values)

    result = pd.DataFrame({'id': list(FG.nodes)})
    print('node number: %s' % len(result))
    dendrogram = community.generate_dendrogram(FG)

    for level in range(len(dendrogram)):
        the_partition = community.partition_at_level(dendrogram, level)
        result['%s_label_%s' % (weight, level)] = list(the_partition.values())

    return result
Exemplo n.º 29
0
def identify_clusters(graph, louvain_level=-1):
    """
    Identifies clusters in the given NetworkX Graph by Louvain partitioning.
    
    The parameter louvain_level controls the degree of partitioning.  0 is the most granular
    partition, and granularity decreases as louvain_level increases.  Since the number of
    levels can't be known a priori, negative values "count down" from the max - ie, -1
    means to use the maximum possible value and thus get the largest clusters
    """
    dendrogram = community.generate_dendrogram(graph)
    if louvain_level < 0:
        louvain_level = max(0, len(dendrogram) + louvain_level)
    if louvain_level >= len(dendrogram):
        #print("Warning [identify_clusters]: louvain_level set to {}, max allowable is {}.  Resetting".format(louvain_level, len(dendrogram)-1), file=sys.stderr)
        louvain_level = len(dendrogram) - 1
    #print("Cutting the Louvain dendrogram at level {}".format(louvain_level), file=sys.stderr)
    return community.partition_at_level(dendrogram, louvain_level)
Exemplo n.º 30
0
def extract_network_metrics(mdg, ts, team=True):
    met = {}
    dsg = extract_dpsg(mdg, ts, team)
    if team :
        pre = 'full:'
    else:
        pre = 'user:'******'nodes_count'] = dsg.number_of_nodes()
    met[pre+'edges_count'] = dsg.number_of_edges()
    met[pre+'density'] = nx.density(dsg)
    met[pre+'betweenness'] = nx.betweenness_centrality(dsg)
    met[pre+'avg_betweenness'] = float(sum(met[pre+'betweenness'].values()))/float(len(met[pre+'betweenness'].values()))
    met[pre+'betweenness_count'] = nx.betweenness_centrality(dsg, weight='count')
    met[pre+'avg_betweenness_count'] = float(sum(met[pre+'betweenness_count'].values()))/float(len(met[pre+'betweenness_count'].values()))
    met[pre+'betweenness_effort'] = nx.betweenness_centrality(dsg, weight='effort')
    met[pre+'avg_betweenness_effort'] = float(sum(met[pre+'betweenness_effort'].values()))/float(len(met[pre+'betweenness_effort'].values()))
    met[pre+'in_degree'] = dsg.in_degree()
    met[pre+'avg_in_degree'] = float(sum(met[pre+'in_degree'].values()))/float(len(met[pre+'in_degree'].values()))
    met[pre+'out_degree'] = dsg.out_degree()
    met[pre+'avg_out_degree'] = float(sum(met[pre+'out_degree'].values()))/float(len(met[pre+'out_degree'].values()))
    met[pre+'degree'] = dsg.degree()
    met[pre+'avg_degree'] = float(sum(met[pre+'degree'].values()))/float(len(met[pre+'degree'].values()))
    met[pre+'degree_count'] = dsg.degree(weight='count')
    met[pre+'avg_degree_count'] = float(sum(met[pre+'degree_count'].values()))/float(len(met[pre+'degree_count'].values()))
    met[pre+'degree_effort'] = dsg.degree(weight='effort')
    met[pre+'avg_degree_effort'] = float(sum(met[pre+'degree_effort'].values()))/float(len(met[pre+'degree_effort'].values()))
    usg = dsg.to_undirected()
    dendo = co.generate_dendrogram(usg)
    if len(dendo)>0 and isinstance(dendo, list):
        partition = co.partition_at_level(dendo, len(dendo) - 1 )
        met[pre+'partitions'] = {}
        for com in set(partition.values()):
            members = [nodes for nodes in partition.keys() if partition[nodes] == com]
            for member in members:
                met[pre+'partitions'][member] = com
        met[pre+'louvain_modularity'] = co.modularity(partition, usg)
    else:
        met[pre+'louvain_modularity'] = None
    connected_components = nx.connected_component_subgraphs(usg)
    shortest_paths = [nx.average_shortest_path_length(g) for g in connected_components if g.size()>1]
    if len(shortest_paths) > 0:
        met[pre+'avg_distance'] = max(shortest_paths)
    else:
        met[pre+'avg_distance'] = None
    return met
def louvain(G):
    dendo = lvcm.generate_dendrogram(graph=G,
                                     weight='weight',
                                     resolution=7.,
                                     randomize=True)

    partition = lvcm.partition_at_level(dendo, len(dendo) - 1)
    #a = set(partition.values())
    print(partition)

    #partition = community_louvain.best_partition(G)
    #print(set(partition.values()))
    #print(len(set(partition.values())))
    out = defaultdict(list)
    for k, v in partition.items():
        out[v].append(k)

    print(out)
Exemplo n.º 32
0
 def test_nodes_stay_together(self):
     """
     Test that two nodes in the same community at one level stay in the same at higher level
     """
     g = nx.erdos_renyi_graph(500, 0.01)
     dendo = co.generate_dendrogram(g)
     parts = dict([])
     for l in range(len(dendo)):
         parts[l] = co.partition_at_level(dendo, l)
     for l in range(len(dendo) - 1):
         p1 = parts[l]
         p2 = parts[l + 1]
         coms = set(p1.values())
         for com in coms:
             comhigher = [
                 p2[node] for node, comnode in p1.items() if comnode == com
             ]
             self.assertEqual(len(set(comhigher)), 1)
def louvain(adjacency_matrix):
    """
    Performs community embedding using the LOUVAIN method.

    Introduced in: Blondel, V. D., Guillaume, J. L., Lambiotte, R., & Lefebvre, E. (2008).
                   Fast unfolding of communities in large networks.
                   Journal of Statistical Mechanics: Theory and Experiment, 2008(10), P10008.

    Inputs:  - A in R^(nxn): Adjacency matrix of an undirected network represented as a SciPy Sparse COOrdinate matrix.

    Outputs: - X in R^(nxC_n): The latent space embedding represented as a SciPy Sparse COOrdinate matrix.
    """
    # Convert to networkx undirected graph.
    adjacency_matrix = nx.from_scipy_sparse_matrix(adjacency_matrix,
                                                   create_using=nx.Graph())

    # Call LOUVAIN algorithm to calculate a hierarchy of communities.
    tree = community.generate_dendogram(adjacency_matrix, part_init=None)

    # Embed communities
    row = list()
    col = list()
    append_row = row.append
    append_col = col.append

    community_counter = 0
    for i in range(len(tree)):
        partition = community.partition_at_level(tree, i)
        for n, c in partition.items():
            append_row(n)
            append_col(community_counter + c)

        community_counter += max(partition.values()) + 1

    row = np.array(row)
    col = np.array(col)
    data = np.ones(row.size, dtype=np.float64)

    louvain_features = sparse.coo_matrix(
        (data, (row, col)),
        shape=(len(partition.keys()), community_counter),
        dtype=np.float64)

    return louvain_features
Exemplo n.º 34
0
def calc_louvain(adj_matrix, level=0, return_c_graph=False):
    nx_G = nx.from_numpy_array(adj_matrix)
    dendro = louvain.generate_dendrogram(
        nx_G, randomize=False, random_state=0)  #Maybe set randomize True
    #print(dendro)
    #asdasd

    level = len(dendro) - level - 1

    if level < 0:
        raise Exception("The given Level is too deep. The maximum is: " +
                        str(len(dendro) - 1))

    communities = louvain.partition_at_level(dendro, level)
    number_communities = max(communities, key=lambda x: communities[x]) + 1

    # Maybe unnecessary after some code rework and unification
    community_list = []
    for i in range(number_communities):
        grp_list = []
        for grp in communities:
            if communities[grp] == i:
                grp_list.append(grp)
        else:
            if grp_list:
                community_list.append(grp_list)

    community_level_G = louvain.induced_graph(communities, nx_G)

    if return_c_graph:
        c_level_graph = nx.adjacency_matrix(community_level_G)
    else:
        c_level_graph = None

    inv_dendro = []
    for dct in dendro:
        inv_dct = {}
        for k, v in dct.items():
            inv_dct.setdefault(v, []).append(k)
        inv_dendro.append(inv_dct)

    return community_list, c_level_graph, dendro, inv_dendro
def louvain(adjacency_matrix):
    """
    Performs community embedding using the LOUVAIN method.

    Introduced in: Blondel, V. D., Guillaume, J. L., Lambiotte, R., & Lefebvre, E. (2008).
                   Fast unfolding of communities in large networks.
                   Journal of Statistical Mechanics: Theory and Experiment, 2008(10), P10008.

    Inputs:  - A in R^(nxn): Adjacency matrix of an undirected network represented as a SciPy Sparse COOrdinate matrix.

    Outputs: - X in R^(nxC_n): The latent space embedding represented as a SciPy Sparse COOrdinate matrix.
    """
    # Convert to networkx undirected graph.
    adjacency_matrix = nx.from_scipy_sparse_matrix(adjacency_matrix, create_using=nx.Graph())

    # Call LOUVAIN algorithm to calculate a hierarchy of communities.
    tree = community.generate_dendogram(adjacency_matrix, part_init=None)

    # Embed communities
    row = list()
    col = list()
    append_row = row.append
    append_col = col.append

    community_counter = 0
    for i in range(len(tree)):
        partition = community.partition_at_level(tree, i)
        for n, c in partition.items():
            append_row(n)
            append_col(community_counter + c)

        community_counter += max(partition.values()) + 1

    row = np.array(row)
    col = np.array(col)
    data = np.ones(row.size, dtype=np.float64)

    louvain_features = sparse.coo_matrix((data, (row, col)), shape=(len(partition.keys()), community_counter),
                                         dtype=np.float64)

    return louvain_features
Exemplo n.º 36
0
def extract_louvain_modularity(g):
    met = {}
    usg = g.copy()
    isolated = nx.isolates(usg)
    usg.remove_nodes_from(isolated)
    dendo = co.generate_dendrogram(usg)
    if len(dendo)>0 and isinstance(dendo, list):
        partition = co.partition_at_level(dendo, len(dendo) - 1 )
        met['partitions'] = {}
        for com in set(partition.values()):
            members = [nodes for nodes in partition.keys() if partition[nodes] == com]
            for member in members:
                met['partitions'][member] = com
        met['modularity'] = co.modularity(partition, usg)
        # for node in isolated:
        #     met['partitions'][node] = None
    else:
        met['partitions'] = None
        met['modularity'] = None
    
    return met
Exemplo n.º 37
0
def preprocess():
    data = sio.loadmat('f_data/phishing_2013_filter.mat')
    phish_data = data['phish']
    prefix_data = data['networks']
    #	computeWeightToFile('f_data/weight.mat', phish_data)
    G = genGraphFromFile('f_data/weight.mat')
    #	print 'load file success'
    # S = ComuputeSimilarity(phish_data)
    #G = nx.Graph()
    #genGraph(phish_data, G)
    # nx.write_gml(G, 'data/graph')
    # nx.draw(G)
    # partition = communityDetect(G)
    # partition = readResult("data/partition1")
    dendo = community.generate_dendrogram(G)
    # print len(dendo)
    #	print 'partition sucess', len(dendo)
    #	filename = "f_data/partition"
    for level in range(len(dendo)):
        partition = community.partition_at_level(dendo, level)
        print 'size', len(set(partition.values()))
        saveResult(filename + str(level), partition)
Exemplo n.º 38
0
def preprocess():
	data = sio.loadmat('f_data/phishing_2013_filter.mat')
	phish_data = data['phish']
	prefix_data = data['networks']
#	computeWeightToFile('f_data/weight.mat', phish_data)
	G = genGraphFromFile('f_data/weight.mat')
#	print 'load file success'
	# S = ComuputeSimilarity(phish_data)
	#G = nx.Graph()
	#genGraph(phish_data, G)
	# nx.write_gml(G, 'data/graph')
	# nx.draw(G)
	# partition = communityDetect(G)
	# partition = readResult("data/partition1")
	dendo = community.generate_dendrogram(G)
	# print len(dendo)
#	print 'partition sucess', len(dendo)
#	filename = "f_data/partition"
	for level in range(len(dendo)):
		partition = community.partition_at_level(dendo,level)
		print 'size', len(set(partition.values()))
		saveResult(filename + str(level), partition)
Exemplo n.º 39
0
def extract_louvain_modularity(g):
    met = {}
    usg = g.copy()
    isolated = nx.isolates(usg)
    usg.remove_nodes_from(isolated)
    dendo = co.generate_dendrogram(usg)
    if len(dendo) > 0 and isinstance(dendo, list):
        partition = co.partition_at_level(dendo, len(dendo) - 1)
        met['partitions'] = {}
        for com in set(partition.values()):
            members = [
                nodes for nodes in partition.keys() if partition[nodes] == com
            ]
            for member in members:
                met['partitions'][member] = com
        met['modularity'] = co.modularity(partition, usg)
        # for node in isolated:
        #     met['partitions'][node] = None
    else:
        met['partitions'] = None
        met['modularity'] = None

    return met
Exemplo n.º 40
0
def gen_clusters(edges_file, resolution=dflt_resolution):
    with open(edges_file, "rb") as fp:
        G = nx.read_weighted_edgelist(fp)

    dendrogram = community.generate_dendrogram(G, resolution=0.25)
    len_d = len(dendrogram)
    print("{} items in dendrogram".format(len_d))

    gids2names.load_groups_file("data/groups.txt")

    for level in range(len_d):
        print()
        partition = community.partition_at_level(dendrogram, level)
        modularity = community.modularity(partition, G)
        print("partition at level {} is\n{}".format(level, pformat(partition)))
        print("modularity at level {} is {}".format(level, modularity))
        for com in set(partition.values()):
            list_nodes = sorted([nodes for nodes in partition.keys()
                            if partition[nodes] == com])
            print("nodes: {}".format(json.dumps(list_nodes)))
            print("    groups:")
            for gid, name in gids2names.generate_group_names(
                    group_ids_list=list_nodes):
                print("    {} {}".format(gid, name))
Exemplo n.º 41
0
    def predict(self):
        """Predict using community structure

        If two nodes belong to the same community, they are predicted to form
        a link. This uses the Louvain algorithm, which determines communities
        at different granularity levels: the finer grained the community, the
        higher the resulting score.

        You'll need to install Thomas Aynaud's python-louvain package from
        https://bitbucket.org/taynaud/python-louvain for this.

        """
        try:
            from community import generate_dendogram, partition_at_level
        except ImportError:
            raise ImportError("Module 'community' could not be found. "
                              "Please install python-louvain from "
                              "https://bitbucket.org/taynaud/python-louvain")
        from collections import defaultdict

        res = Scoresheet()
        dendogram = generate_dendogram(self.G)

        for i in range(len(dendogram)):
            partition = partition_at_level(dendogram, i)
            communities = defaultdict(list)
            weight = len(dendogram) - i  # Lower i, smaller communities

            for n, com in six.iteritems(partition):
                communities[com].append(n)
            for nodes in six.itervalues(communities):
                for u, v in all_pairs(nodes):
                    if not self.eligible(u, v):
                        continue
                    res[(u, v)] += weight
        return res
Exemplo n.º 42
0
    def predict(self):  # pylint:disable=E0202
        """Predict using community structure

        If two nodes belong to the same community, they are predicted to form
        a link. This uses the Louvain algorithm, which determines communities
        at different granularity levels: the finer grained the community, the
        higher the resulting score.

        This needs the python-louvain package. Install linkpred as follows:

        $ pip install linkpred[community]

        """
        try:
            import community
        except ImportError:
            raise ImportError("Module 'community' could not be found. "
                              "Please install linkpred as follows:\n"
                              "$ pip install linkpred[community]")

        res = Scoresheet()
        dendogram = community.generate_dendrogram(self.G)

        for i in range(len(dendogram)):
            partition = community.partition_at_level(dendogram, i)
            communities = defaultdict(list)
            weight = len(dendogram) - i  # Lower i, smaller communities

            for n, com in partition.items():
                communities[com].append(n)
            for nodes in communities.values():
                for u, v in all_pairs(nodes):
                    if not self.eligible(u, v):
                        continue
                    res[(u, v)] += weight
        return res
Exemplo n.º 43
0
    def predict(self):  # pylint:disable=E0202
        """Predict using community structure

        If two nodes belong to the same community, they are predicted to form
        a link. This uses the Louvain algorithm, which determines communities
        at different granularity levels: the finer grained the community, the
        higher the resulting score.

        This needs the python-louvain package. Install linkpred as follows:

        $ pip install linkpred[community]

        """
        try:
            import community
        except ImportError:
            raise ImportError("Module 'community' could not be found. "
                              "Please install linkpred as follows:\n"
                              "$ pip install linkpred[community]")

        res = Scoresheet()
        dendogram = community.generate_dendrogram(self.G)

        for i in range(len(dendogram)):
            partition = community.partition_at_level(dendogram, i)
            communities = defaultdict(list)
            weight = len(dendogram) - i  # Lower i, smaller communities

            for n, com in partition.items():
                communities[com].append(n)
            for nodes in communities.values():
                for u, v in all_pairs(nodes):
                    if not self.eligible(u, v):
                        continue
                    res[(u, v)] += weight
        return res
Exemplo n.º 44
0
    def predict(self):
        """Predict using community structure

        If two nodes belong to the same community, they are predicted to form
        a link. This uses the Louvain algorithm, which determines communities
        at different granularity levels: the finer grained the community, the
        higher the resulting score.

        You'll need to install Thomas Aynaud's python-louvain package from
        https://bitbucket.org/taynaud/python-louvain for this.

        """
        try:
            from community import generate_dendogram, partition_at_level
        except ImportError:
            raise ImportError("Module 'community' could not be found. "
                              "Please install python-louvain from "
                              "https://bitbucket.org/taynaud/python-louvain")
        from collections import defaultdict

        res = Scoresheet()
        dendogram = generate_dendogram(self.G)

        for i in range(len(dendogram)):
            partition = partition_at_level(dendogram, i)
            communities = defaultdict(list)
            weight = len(dendogram) - i  # Lower i, smaller communities

            for n, com in six.iteritems(partition):
                communities[com].append(n)
            for nodes in six.itervalues(communities):
                for u, v in all_pairs(nodes):
                    if not self.eligible(u, v):
                        continue
                    res[(u, v)] += weight
        return res
    cluster2 = community.best_partition(graph)

    #print run time for c1
    runtimeC2 = timeit.default_timer() - startC2

    mod2 = community.modularity(cluster2, graph)



    graph = nx.read_edgelist("Data/a.data")

    #timer start c1
    startC3 = timeit.default_timer()

    tmp = community.generate_dendogram(graph)
    cluster3 = community.partition_at_level(tmp, 0)

    #print run time for c1
    runtimeC3 = timeit.default_timer() - startC3

    mod3 = community.modularity(cluster3, graph)

    print "modularity:  1:%f;  2:%f;  3:%f" % (mod1,mod2, mod3)
    nmi1 = calculate_NMI(cluster1, cluster2)
    print "nmi between cluster1 and cluster 2: %.10f" % nmi1
    nmi2 = calculate_NMI(cluster1, cluster3)
    print "nmi between cluster1 and cluster 3: %.10f" % nmi2
    nmi3 = calculate_NMI(cluster2, cluster3)
    print "nmi between cluster2 and cluster 3: %.10f" % nmi3

Exemplo n.º 46
0
def build_json(hierarchy_dict, h5_data, dataset_name, graph, json, threshold):
    # data set dict
    ds_dict = {}

    # graph dict
    g_dict = {}

    # Maximum hierarchy size
    hmax = len(hierarchy_dict["dendro"]) - 1

    # Add pseudo entry to trigger single node dict creation
    hierarchy_dict[hmax + 1] = {}
    for hidx, hdict in hierarchy_dict.items():
        if not isinstance(hidx, int):
            continue
        # Dendrogram list is sorted inversely to hierarchy dict. Therefore, the dendrogram index has to be recalculated.
        didx = hmax - hidx
        # edge dict
        e_dict = {}
        # node dict
        n_dict = {}
        # hierarchy dicr
        h_dict = {}
        if didx > -1:
            # Nodes
            for com, nodes in hierarchy_dict["inv_dendro"][didx].items():
                # attribute dict
                a_dict = {}
                a_dict["index"] = com
                a_dict["name"] = "h%in%i" % (hidx, com)
                a_dict["childs"] = nodes
                a_dict["mzs"] = list(
                    h5_data.columns[hdict["communities"][com]])
                try:
                    a_dict["membership"] = hierarchy_dict["dendro"][didx +
                                                                    1][com]
                except Exception as e:
                    print(e)
                n_dict["h%in%i" % (hidx, com)] = a_dict
        else:
            # single nodes are always first entry in dendro
            for node, com in hierarchy_dict["dendro"][0].items():
                a_dict = {}
                a_dict["index"] = node
                a_dict["name"] = h5_data.columns[node]
                a_dict["membership"] = com
                a_dict["mzs"] = [h5_data.columns[node]]
                n_dict["h%in%i" % (hidx, node)] = a_dict
        # Edges
        if didx > -1:
            community = louvain.partition_at_level(hierarchy_dict["dendro"],
                                                   didx)
            edges = louvain.induced_graph(community, graph).edges(data=True)
        else:
            edges = graph.edges(data=True)
        idx = 0
        for source, target, weight in edges:
            # Include source == target for inner edge weight.
            #print(weight)
            if source != target:
                a_dict = {}
                a_dict["index"] = idx
                a_dict["name"] = "h%ie%i" % (hidx, idx)
                a_dict["source"] = "h%in%i" % (hidx, source)
                a_dict["target"] = "h%in%i" % (hidx, target)
                try:
                    count = weight["count"]
                except:
                    count = 1
                #print(count)
                a_dict["weight"] = weight["weight"] / count
                e_dict["h%ie%i" % (hidx, idx)] = a_dict
                idx += 1

        h_dict["nodes"] = n_dict
        h_dict["edges"] = e_dict
        g_dict["hierarchy%i" % (hidx)] = h_dict

    ds_dict["graph"] = g_dict
    ds_dict["dataset"] = dataset_name
    ds_dict["threshold"] = threshold

    #mzs = [x for x in np.round(h5_data.columns, 3)]
    mzs = [x for x in h5_data.columns]
    mzs_dict = {}
    for mz in mzs:
        mzs_dict[str(mz)] = {}
        for hy, vals in g_dict.items():
            for nid, props in vals["nodes"].items():
                try:
                    if mz in props["mzs"]:
                        mzs_dict[str(mz)][hy] = nid
                        break
                # Last hierarchy has no "mzs" prop
                except Exception as e:
                    print(e)
                    if mz == props["name"]:
                        mzs_dict[str(mz)][hy] = nid

    ds_dict["mzs"] = mzs_dict

    json["graphs"]["graph%i" % (hierarchy_dict["graph_idx"])] = ds_dict

    return json
 def print_dendrogram(self):
     dendo = community.generate_dendogram(self.G)
     for level in range(len(dendo) - 1) :
         print "partition at level", level, "is", community.partition_at_level(dendo, level)
Exemplo n.º 48
0
	doResolution = float(sys.argv[2])		
sys.stderr.write("Using resolution " + str(doResolution) + ".\n")

# read data from edges input file 
G = networkx.Graph() # create a new undirected graph
G = networkx.read_edgelist(inputFile, nodetype=int, data=(('weight',int))) # read as int-weighted
# G = networkx.read_edgelist(inputFile, nodetype=int) # read as unweighted
sys.stderr.write("Done reading.\n")

# do community detection and get dendrograph of communities
dendo = community.generate_dendrogram(G, part_init=None, resolution=doResolution, weight='weight') 

# store communities at different levels
parts = {}
for level in range(0, len(dendo)):
	parts[level] = community.partition_at_level(dendo, level)
levels = len(dendo)

# just do plain community detection instead of nested variant
#levels = 1
#parts[0] = community.best_partition(G) # find communities

# output header to stdout
sys.stdout.write("Id")
communitySize = {}
for level in range(0, levels):
	sys.stdout.write("\tCommunity_Res" + str(doResolution) + "_Level" + str(level+1))
	communitySize[level] = -1
sys.stdout.write("\n")

# output nodelist with communities to stdout
Exemplo n.º 49
0
				G.add_edge(i, j, weight=w_ij)
	nx.draw_spring(G)
	dst = os.path.join(out_dir, 'CC-Network(ccthr=%d, thr=%d, ref_journal_flag=%s).png' % (ccthr, thr, ref_journal_flag))
	plt.savefig(dst)
	plt.close('all')
	
	#...
	if verbose: print "....computing communities with Louvain algo"
	dendogram = community.generate_dendogram(G, part_init=None)

	#... output infos
	print "....There are %d references in the database (contain duplicates)" % (nb_total_refs)
	print "....There are %d references in the database (contain no duplicate)" % (nb_refs)
	print "....There are %d references in the CC network\n......(ie sharing at least %d article(s) with another reference)" % (len(G.nodes()), ccthr)
	for level in range(len(dendogram)):
		part = community.partition_at_level(dendogram, level)
		mod = community.modularity(part, G)
		nb_comm = len(set(part.values()))
		size_sup10 = 0; size_sup100 = 0;  #communities_caracteristics(partition, thr, level)
		for com in set(part.values()) :
			list_nodes = [nodes for nodes in part.keys() if part[nodes] == com]
			if len(list_nodes) > 100: size_sup100 += 1
			if len(list_nodes) > 10: size_sup10 += 1
		print "....level %d: %d communities [%d with size > 10, %d with size > 100], modularity Q=%1.6f" % (level, nb_comm, size_sup10, size_sup100, mod)


	##############################
	## WHICH EXTRACTION ?
	print "..CC communities extraction"
	#
	confirm = 'n'; level = len(dendogram) - 1; thr = 10
Exemplo n.º 50
0
    k4_mod = modularity(k4_cores, graph)
    kmax_mod = modularity(kmax_cores, graph)
    print 'k4 mod', k4_mod
    print 'kmax mod', kmax_mod

    k4_wcc = wcc1(k4_cores[0], graph)
    kmax_wcc = wcc1(kmax_cores[0], graph)
    print 'k4 wcc', k4_wcc
    print 'kmax wcc', kmax_wcc

    dendro = comm.generate_dendrogram(graph)

    louvain_steps = []
    for level in range(len(dendro)):
        partition = comm.partition_at_level(dendro, level)

        clusters = {}
        for key, value in sorted(partition.iteritems()):
            clusters.setdefault(value, []).append(key)

        communities = []
        for key, value in clusters.iteritems():
            if len(value) > 0:
                communities.append(graph.subgraph(value))

        louvain_steps.append([len(set(partition.values())), modularity(communities, graph)])

    result = { 'max_core': max_k, 'num_4-cores': len(k4_cores), 'modularity_max-cores': kmax_mod, 'modularity_4-cores': k4_mod, "wcc_max-cores": kmax_wcc, "wcc_4-cores": k4_wcc, 'louvain_steps': louvain_steps }

    db.bgroups.update({ '_id': gid }, { '$set': result }, upsert = False, multi = False)
Exemplo n.º 51
0
    k4_mod = modularity(k4_cores, graph)
    kmax_mod = modularity(kmax_cores, graph)
    print 'k4 mod', k4_mod
    print 'kmax mod', kmax_mod

    k4_wcc = wcc1(k4_cores[0], graph)
    kmax_wcc = wcc1(kmax_cores[0], graph)
    print 'k4 wcc', k4_wcc
    print 'kmax wcc', kmax_wcc

    dendro = comm.generate_dendrogram(graph)

    louvain_steps = []
    for level in range(len(dendro)):
        partition = comm.partition_at_level(dendro, level)

        clusters = {}
        for key, value in sorted(partition.iteritems()):
            clusters.setdefault(value, []).append(key)

        communities = []
        for key, value in clusters.iteritems():
            if len(value) > 0:
                communities.append(graph.subgraph(value))

        louvain_steps.append(
            [len(set(partition.values())),
             modularity(communities, graph)])

    result = {
import json
from util.read_utils import lines_per_n
import community
import networkx as nx

author_graph = nx.DiGraph()
with open('clean_data.json', 'r') as jfile:
    for chunk in lines_per_n(jfile, 9):
        hdr_data = json.loads(chunk)
        for to_addr in str(hdr_data['To']).split(","):
            if '@' in to_addr:
                author_graph.add_edge(str(hdr_data['From']), to_addr.strip(), style='solid', label=hdr_data['Time'])
        for cc_addr in str(hdr_data['Cc']).split(","):
            if '@' in to_addr:
                author_graph.add_edge(str(hdr_data['From']), cc_addr.strip(), style='dashed', label=hdr_data['Time'])
    jfile.close()

print("No. of Weakly Connected Components:", nx.number_weakly_connected_components(author_graph))
print("No. of Strongly Connected Components:", nx.number_strongly_connected_components(author_graph))
print("Nodes:", nx.number_of_nodes(author_graph))
print("Edges:", nx.number_of_edges(author_graph))

#The following lines of code generate a dendogram for the above graph
dendo = community.generate_dendogram(author_graph.to_undirected())
for level in range(len(dendo)) :
    print("Partition at level", level, "is", community.partition_at_level(dendo, level))
    print("-"*10)