Exemplo n.º 1
0
def clustering_LDA_type_new(mygraph, mygroups, algoname, corecalculation):  # algoname is to specify which algorithm the function should use, corecalculation is wether to calcualte core documents or not
    counter_matrices = []
    for k in range(1):  # clustering is a greedy algorithm, run it 100 times and find the most frequent answer among all
        if algoname==0:
            myclusters = mygraph.community_fastgreedy(weights="weight").as_clustering(3)  # cluster based on max weight
        elif algoname==1:
            myclusters = louvain.find_partition(mygraph, method='Modularity', weight='weight', resolution_parameter=1)  # smaller resolution, smaller number of clusters #best
        elif algoname==2:
            myclusters= louvain.find_partition(mygraph, method='RBConfiguration', weight='weight', resolution_parameter=1)
        elif algoname ==3:
            myclusters=louvain.find_partition(mygraph, method='RBER', weight='weight', resolution_parameter=1)  # not working very well
        elif algoname==4:
            myclusters=louvain.find_partition(mygraph, method='CPM', weight='weight', resolution_parameter=0.6)  # not working very well
        cluster_list = list(myclusters)
        counter_matrix = np.zeros((len(cluster_list), len(mygroups)))
        cluster_subgraphs =[]
        for i in range(len(cluster_list)):
            cluster_subgraphs.insert(i, mygraph.subgraph(cluster_list[i]))  # creating clusters subgraph
            for v in cluster_list[i]:  # calculating matrices
                for j in range(len(mygroups)):
                    if mygraph.vs[v]['name'] in mygroups[j]:
                        counter_matrix[i][j] +=1
        counter_matrices.insert(k, counter_matrix)
    counter_matrix = most_common(counter_matrices)  # counter matrix (rows are clusters and columns are groups)
    cluster_cores =[]
    if corecalculation==1:
        for i in range(len(cluster_subgraphs)):  # finding core of each cluster subgraph
            cluster_cores.insert(i,find_core(cluster_subgraphs[i], 3))  # 3 means return the three center documents, you can set it to whatever number you wish
            print cluster_cores[i].vs()['name']
    group_matrix = counter_matrix/counter_matrix.sum(axis=0)[None,:]  # probability of each cluster belonging to each group
    cluster_matrix = counter_matrix/counter_matrix.sum(axis=1)[:,None]  # probability of each group belonging to each cluster
    return cluster_matrix, cluster_list, cluster_cores
def getGraphPartition(graph, resolution):
    """The returned partition is each county assigned to a community. While
    there are several methods available, RBConfiguration is tunable with a
    resolution parameter. Bigger resolutions mean smaller communities. Smaller
    resolutions mean fewer communities."""
    return louvain.find_partition(G, method='RBConfiguration', weight='weight',
                                  resolution_parameter=resolution)
Exemplo n.º 3
0
def cluster_adjmat(xmat, resolution=1, cutoff=0.1):
    """
    Cluster the groups based on the adjacent matrix.
    Use the cutoff to discretize the matrix used to construct the adjacent graph.
    Then cluster the graph using the louvain clustering with a resolution value.
    As the adjacent matrix is binary, the default resolution value is set to 1.

    Input
    -----
    xmat: `numpy.array` or sparse matrix
        the reference matrix/normalized confusion matrix
    cutoff: `float` optional (default: 0.1)
        threshold used to binarize the reference matrix
    resolution: `float` optional (default: 1.0)
        resolution parameter for louvain clustering

    return
    -----
    new group names.
    """
    g = sc._utils.get_igraph_from_adjacency((xmat > cutoff).astype(int),
                                            directed=False)
    print(g)
    part = louvain.find_partition(g,
                                  louvain.RBConfigurationVertexPartition,
                                  resolution_parameter=resolution)
    groups = np.array(part.membership)
    return groups
Exemplo n.º 4
0
def cluster2dspectrumlouvain(cp, project):
    datapath = cp.get('datadir')

    realpeaks = Two_Column_List(project + os.sep + cp.get('spectruminput'))
    #print(realpeaks)
    g = Graph.Read_Edgelist(project + os.sep + 'result' + os.sep +
                            cp.get('clusteringoutput'),
                            directed=False)
    #print(g)
    louvainresult = louvain.find_partition(g,
                                           louvain.RBERVertexPartition,
                                           resolution_parameter=float(
                                               cp.get('rberresolution')))
    #print(louvainresult)
    f = open(project + os.sep + 'result' + os.sep + cp.get('louvainoutput'),
             'w')
    for cluster in louvainresult:
        if len(cluster) > 0:
            f.write('/\n')
            for peak in cluster:
                for realpeak in realpeaks:
                    if realpeak[0] == peak:
                        f.write(
                            str(realpeak[0]) + ',' + str(realpeak[1]) + ',' +
                            str(realpeak[2]) + '\n')
    f.close()
Exemplo n.º 5
0
def partition_gievenNumPar(G, NumPar=None, edge_weight_factors=None):
    if NumPar is None:
        if 15 <= len(G.vs) / 10 <= 30:
            NumPar = len(G.vs) / 10
        else:
            NumPar = 20

    low = 0.001
    high = 0.75
    count = 0
    thres = None
    w = G.es['weight']
    if edge_weight_factors is not None:
        w = [a * b for a, b in zip(w, edge_weight_factors)]
    partitions = None
    if NumPar <= 0 or NumPar > len(G.vs):
        print("Numpar {} is wrong number".format(str(NumPar)))
        sys.exit()
    while True:
        thres = (low + high) / 2
        partitions = louvain.find_partition(
            G,
            partition_type=louvain.CPMVertexPartition,
            weights=w,
            resolution_parameter=thres)
        count += 1
        if np.abs(len(partitions) - NumPar) == 0 or count > 30:
            break
        elif len(partitions) > NumPar:
            high = thres
            thres = (low + high) / 2
        else:
            low = thres
            thres = (low + high) / 2
    return (partitions, thres)
Exemplo n.º 6
0
def identify_clusters(vlm,conn,correct_tags = False, tag_correction_list = [], method_name='ModularityVertexPartition', seed=360):
    """
    Cluster identification via the Louvain algorithm. Can be used for cluster discovery. If clusters are manually identified
    (e.g. by visualize_protein_markers()), clusters can be renumbered or combined using the tag correction list.
    Method names are any used in louvain.find_partition method.
    """

    g=ig.Graph.Adjacency(conn.todense().tolist())
    method=getattr(louvain,method_name)
    louvain.set_rng_seed(seed)
    partition=louvain.find_partition(g,method)
    tag_list = np.zeros(conn.shape[0])
    for x in range(len(partition)):
        tag_list[partition[x]]=int(x)
    if correct_tags:
        cluster_ID = [tag_correction_list[int(X)] for X in tag_list]
    else:
        cluster_ID = [int(X) for X in tag_list]
    

    num_clusters = max(cluster_ID)+1
    
    vlm.cluster_ID = cluster_ID
    vlm.num_clusters = int(num_clusters)
    return [cluster_ID, num_clusters]
Exemplo n.º 7
0
 def run_louvain(self, scalenumber):
     sources, targets = self.scales[scalenumber].tmatrix.nonzero()
     edgelist = list(zip(sources.tolist(), targets.tolist()))
     G = ig.Graph(edgelist)
     G.es['weight'] = self.scales[scalenumber].tmatrix.data
     return louvain.find_partition(G,
                                   louvain.ModularityVertexPartition,
                                   weights=G.es['weight']).membership
Exemplo n.º 8
0
def main(argv):
   inputFile = ''
   outputFile = ''
   imax = 0
   jmax = 0
   theGraph = Graph()
   inputFile = sys.argv[1]
   outputFile = sys.argv[2]
   print 'argv[1] is:', sys.argv[1]
   print 'argv[2] is:', sys.argv[2]

            
   with open(inputFile, 'rb') as csvfile:
      csvReader = csv.reader(csvfile, delimiter=',',quotechar='|')
      # First line is the number of distinct nodes.
      headerRows = csvReader.next()
      nNodes = int(headerRows[0])
      theGraph.add_vertices(nNodes)
      print 'nNodes: ', nNodes

      currentNodeIndex = 0
      # We build a map between the matrix we want to build and the node identifiers
      # as we read in the rows.
      thisI = 0
      thisJ = 0
      thisEdge = 0
      nodeMap = dict()

      # we also want a list that maps the indices to the node names
      indexList = list()
      # add this point, each row is an edge in the graph
      for row in csvReader:
         if (row[0] in nodeMap):
            thisI = nodeMap[row[0]]
         else:
            nodeMap[row[0]] = currentNodeIndex
            indexList.append(row[0])
            currentNodeIndex += 1

         if (row[1] in nodeMap):
            thisJ = nodeMap[row[1]]
         else:
            nodeMap[row[1]] = currentNodeIndex
            indexList.append(row[1])
            currentNodeIndex += 1

         # add this edge
         theGraph.add_edges([(thisI,thisJ)])
         theGraph.es[thisEdge]["weight"] = float(row[2])
         thisEdge += 1


   part = louvain.find_partition(theGraph, method = 'Modularity', weight = 'weight')

   with open(outputFile, 'wb') as csvoutfile:
     csvWriter = csv.writer(csvoutfile, delimiter=',',quotechar='|')
     for i in range(0, nNodes):
        csvWriter.writerow([indexList[i], part.membership[i]])
def louvain_method(user_interaction_graph):
    '''
    https://github.com/vtraag/louvain-igraph
    Fast unfolding of communities in large networks, Vincent D Blondel, Jean-Loup Guillaume, Renaud Lambiotte, Renaud Lefebvre, Journal of Statistical Mechanics: Theory and Experiment 2008(10), P10008 (12pp)
    :param user_interaction_graph: igraph Graph
    '''
    louvain.set_rng_seed(43)
    node_names = user_interaction_graph.vs
    return[[node_names[node]['name'] for node in community] for community in louvain.find_partition(user_interaction_graph, louvain.ModularityVertexPartition)]
Exemplo n.º 10
0
def create_partition(
    card_data_df, G, resolution_parameter=1, init=None
):
    """Take a card_data_df and the graph that represents it and create
    clusters based on lv.RBERVertexPartition.
    Parameters:
    -----------
    card_data_df: pandas DataFrame containing as colu.mns card name and
    the decks that each card belongs to as a set.

    G: igraph Graph representation of card_data_df.

    resolution_parameter: float to pass to the RBERVertexPartition as
    represented by γ in the quality function
      Q=∑(ij) (A_ij−γp)δ(σi,σj).

    init: None or string or  whether to specify an initial cluster
    membership for each card - if string, path
    Returns:
    --------
    partition: the partition created by lv.find_partition.

    clusters: the number of clusters detected.
    See also:
    ---------
    create_card_df: function that creates card_data_df.

    create_graph: function that creates G.

    https://louvain-igraph.readthedocs.io/en/latest/reference.html#rbervertexpartition:
    information on the algorithm used.
    """
    if init:
        initial_membership = card_data_df["init"].tolist()
    else:
        initial_membership = None

    partition = lv.find_partition(
        G,
        lv.RBERVertexPartition,
        weights="weight",
        resolution_parameter=resolution_parameter,
        node_sizes=card_data_df["Count"].tolist(),
        initial_membership=initial_membership,
    )

    clusters = 0
    card_data_df["Cluster"] = [set() for _ in card_data_df.index]
    card_data_df["Hub Score"] = G.hub_score("weight")
    card_data_df["Authority Score"] = G.authority_score("weight")

    for cluster in partition:
        for card in cluster:
            card_data_df.at[card, "Cluster"].add(clusters)
        clusters += 1

    return partition, clusters
Exemplo n.º 11
0
def parition_igraph():
    gexf_path = os.path.join(VIS_DATA_DIR, 'song-signed.gexf')
    origin_gexf_g = nx.read_gexf(gexf_path)
    # pajek_path = os.path.join(VIS_DATA_DIR, 'song-signed.net')
    # nx.write_pajek(origin_gexf_g, pajek_path)
    graphml_path = os.path.join(VIS_DATA_DIR, 'song-signed.graphml')
    nx.write_graphml(origin_gexf_g, graphml_path)
    G = ig.Graph.Read_GraphML(graphml_path)
    partition = louvain.find_partition(G, louvain.ModularityVertexPartition)
    print(partition)
Exemplo n.º 12
0
def get_louvain(mknn, min_cluster_size=10, resolution_parameter=1.0, seed=0):
    g = ig.Graph(n=mknn.shape[0], edges=list(zip(mknn.row, mknn.col)), directed=False)

    # Louvain clustering over the mKNN graph
    louvain.set_rng_seed(seed)
    part = louvain.find_partition(g,
                louvain.RBConfigurationVertexPartition,
                resolution_parameter=resolution_parameter)
    
    return CellLabels(clean_labels(part.membership, min_cluster_size=min_cluster_size)) 
    def optimal_modularity_community_detection(self,visual=True,name='optimal_modularity'):
        """
        Community detection Function using Louvain algorithm and maximization of modularity.
        Inputs:
            - visual: (Default = True) Visualize the communities computed
            - name: name of the .png exported file
        """
        louvain.set_rng_seed(123456)
        partition = louvain.find_partition(self.G, louvain.ModularityVertexPartition,weights=self.G.es['weight'])
        self.G.vs['community_optimal_modularity'] = partition.membership
        
        print("The estimated number of communities is",len(set(partition.membership)))
        print('\n')
        print("Communities")
        for n in range(0,len(partition)):
            print('Community number', n, '- size:', len(partition[n]))

        #Create a dictionary whith keys as channels (names of our nodes) and values the community they belong
        comm_detect = dict(zip(self.G.vs['label'],self.G.vs['community_optimal_modularity']))
        print()
        print('The communities are:')
        print()
        comms = {}

        for item in comm_detect.items():
            if item[1] not in comms.keys():
                comms[item[1]] = []

            comms[item[1]].append(item[0])
            
        comms = OrderedDict(sorted(comms.items(), key=lambda t:t[0]))

        print(comms.items())
        
        if visual:
            visual_style = {}
            visual_style["vertex_size"] = 25
            #visual_style["vertex_color"] = "white"
            visual_style["vertex_label"] = self.G.vs["label"]
            #visual_style["edge_width"] = [math.exp(weight)*0.5 for weight in self.G.es["weight"]]
            visual_style["edge_width"] = 0.2
            visual_style["layout"] = self.G.vs["coords"]
            pal = igraph.drawing.colors.ClusterColoringPalette(len(set(self.G.vs['community_optimal_modularity'])))
            visual_style["vertex_color"] = pal.get_many(self.G.vs['community_optimal_modularity'])
            self.G.es['arrow_size'] = [0.1 for edge in self.G.es]



            graph = igraph.plot(self.G,bbox=(0, 0, 600, 600), **visual_style)
            graph.save(name + '.png')
            
            return(comms,graph)
        
        return(comms)
Exemplo n.º 14
0
 def find_partition(self, weight=True, mode='hypergeometry'):
     g = ig.Graph(list(self.graph.edges))
     # use hyper geometry test as edge weights
     weights = []
     for u, v in self.graph.edges:
         if (u, v) in self.matrix:
             weights.append(self.matrix[(u, v)])
         elif (v, u) in self.matrix:
             weights.append(self.matrix[(v, u)])
         else:
             weight = self.eu_test_single(
                 u, v) if mode != 'hypergeometry' else self.co_test_single(
                     u, v)
             weights.append(weight)
     if weight:
         g.es['weight'] = weights
         self.parts = louvain.find_partition(g,
                                             method='Modularity',
                                             weight='weight')
     else:
         self.parts = louvain.find_partition(g, method='Modularity')
Exemplo n.º 15
0
def singlelayer_louvain(G, gamma, return_partition=False):
    if 'weight' not in G.es:
        G.es['weight'] = [1.0] * G.ecount()

    partition = louvain.find_partition(G,
                                       louvain.RBConfigurationVertexPartition,
                                       weights='weight',
                                       resolution_parameter=gamma)

    if return_partition:
        return partition
    else:
        return tuple(partition.membership)
Exemplo n.º 16
0
def sample_partitions():
    """Sample some partitions from the Karate club"""
    G = ig.Graph.Famous("Zachary")
    parts = []

    for gamma in SAMPLE_GAMMAS:
        sampled_partitions = [louvain.find_partition(G, louvain.RBConfigurationVertexPartition,
                                                     resolution_parameter=gamma) for _ in range(10 ** 4)]
        if gamma == 1.0:  # artificially make this partition low-quality
            parts.append(min(sampled_partitions, key=lambda p: p.quality()))
        else:
            parts.append(max(sampled_partitions, key=lambda p: p.quality()))

    return parts
Exemplo n.º 17
0
def modularity_analysis(feature_graph, opts):

    # feature_ungraph = pd.read_excel(opts['output']+'feature_graph_for_{}.xls'.format(opts['tradeday']),
    #                                 sheet_name='sheet1')
    # 网络生成
    length = len(feature_graph)
    edges = []
    edge_weights = []
    for i in range(length):
        # tuple(节点1, 节点2, 权值)
        edges.append(
            tuple([
                '{:0>6}'.format(int(feature_graph.loc[i][0])),
                '{:0>6}'.format(int(feature_graph.loc[i][1])),
                feature_graph.loc[i][2]
            ]))
        edge_weights.append(feature_graph.loc[i][2])

    graph = IGraph.TupleList(edges=edges, directed=False, weights=True)

    modularity_graph = louvain.find_partition(
        graph, louvain.ModularityVertexPartition, weights=edge_weights)

    # vertex_count = graph.vcount()
    graph.vs['label'] = graph.vs['name']

    mode_num = len(modularity_graph)
    modularity_index = np.zeros(len(graph.vs['name']), )
    for i in range(mode_num):
        modularity_index[modularity_graph[i]] = i
    modularity_index = list(map(int, modularity_index))
    graph.vs['modularity'] = modularity_index
    mode_list = []
    for i in range(mode_num):
        vertexes = graph.vs.select(modularity=i)
        one_mode_list = [vertexes[j]['name'] for j in range(len(vertexes))]
        mode_list.append(one_mode_list)
    with open(
            opts['output'] + 'modularity_for_{}.txt'.format(opts['tradeday']),
            'w') as f:
        for i in range(mode_num):
            f.write(','.join(mode_list[i]) + '\n')

    # 作图
    color_dict = {0: 'red', 1: 'green', 2: 'blue'}
    graph.vs['color'] = [color_dict[index] for index in graph.vs['modularity']]
    ig.plot(graph)

    return graph, modularity_graph
Exemplo n.º 18
0
def get_communities(G, mode=1):
    if mode == 2:
        print('Infomap')
        vc = G.community_infomap(edge_weights='weight')
    elif mode == 3:
        print('Louvain Surprise')
        vc = louvain.find_partition(G, louvain.ModularityVertexPartition)
    elif mode == 4:
        print('Multilevel')
        vc = G.community_multilevel(weights='weight')
    else:
        print('Newman leading eigenvector')
        vc = G.community_leading_eigenvector(weights='weight')

    return vc
Exemplo n.º 19
0
def get_school_communities():
    multi_school_community_graph = louvain.find_partition(d_social_network_graph, louvain.CPMVertexPartition,
                                       resolution_parameter=0.0005)
    for idx, community in enumerate(multi_school_community_graph):
        for node in community:
            v = d_social_network_graph.vs[node]
            v["groupId"] = idx

    response_builder = ResponseBuilder()
    nodes = response_builder.return_node_list(d_social_network_graph)
    edges = response_builder.return_edge_list(d_social_network_graph)

    response = dict()
    response["nodes"] = nodes
    response['edges'] = edges

    return jsonify(response)
Exemplo n.º 20
0
def louvain_clus(graph):
    
    partition = louvain.find_partition(graph, louvain.ModularityVertexPartition)
    
    print(partition.summary())
    
    subgraphs = partition.subgraphs()
    subgraph_labels_df = pd.DataFrame(columns=['label','cluster'])
    index = 0
    for i in range(len(subgraphs)):
        subgraph_labels = subgraphs[i].vs['label']
        for label in subgraph_labels:
            subgraph_labels_df.loc[index] = [label,i]
            index = index + 1 
    print('Done')
    
    return partition,subgraph_labels_df
Exemplo n.º 21
0
def run_alg(G, alg, gamma=1.0):
    '''
    run community detection algorithm with resolution parameter. Right now only use RB in Louvain
    :param G: an igraph graph
    :param gamma: resolution parameter
    :return: 
    '''
    if alg == 'louvain':
        partition_type = louvain.RBConfigurationVertexPartition
        partition = louvain.find_partition(G,
                                           partition_type,
                                           resolution_parameter=gamma)
    elif alg == 'leiden':
        partition_type = leidenalg.RBConfigurationVertexPartition
        partition = leidenalg.RBConfigurationVertexPartition(
            G, partition_type, resolution_parameter=gamma)
    # partition = sorted(partition, key=len, reverse=True)
    return partition
Exemplo n.º 22
0
def cluster_knn_louvain(data, neighbors=10):
    A = kneighbors_graph(data, 10, mode='connectivity', include_self=True)
    sources, targets = A.nonzero()
    weights = A[sources, targets]
    if isinstance(weights, np.matrix):
        weights = weights.A1
    g = ig.Graph(directed=False)
    g.add_vertices(A.shape[0])  # this adds adjacency.shap[0] vertices
    g.add_edges(list(zip(sources, targets)))

    g.es['weight'] = weights
    weights = np.array(g.es["weight"]).astype(np.float64)
    partition_type = louvain.RBConfigurationVertexPartition
    partition_kwargs = {}
    partition_kwargs["weights"] = weights
    part = louvain.find_partition(g, partition_type, **partition_kwargs)
    groups = np.array(part.membership)
    return groups
Exemplo n.º 23
0
def cluster(D, metric='euclidean', n_neighbors=20, method='louvain',
            resolution=1):
    import igraph as ig
    if method == 'louvain':
        try:
            import louvain as partition_alg
        except ImportError:
            print('package "louvain" is missing')
    else:
        try:
            import leidenalg as partition_alg
        except ImportError:
            print('package "leidenalg" is missing')
    adj = dist_to_nn(compute_distances(D, metric), K=n_neighbors)
    g = ig.Graph.Adjacency(adj.tolist())
    partition = partition_alg.find_partition(g,
                       partition_type=partition_alg.CPMVertexPartition,
                                            resolution_parameter=resolution)
    return np.array(partition.membership)
Exemplo n.º 24
0
def community_detection(G, methods=['louvain', 'infomap'], infomap_trials=100):
    """Compute communities of an igraph network and generate cluster graphs.

    Parameters:
    G (igraph graph): retweet network or hashtag network
    methods (list of str): preferred method of community detection
    infomap_trials (int, default=100): amount of trials for infomap method

    Returns:
    G (igraph graph) with node attribute '{method}_com'
    C (igraph graph): one cluster graph per method
    """        
    G.vs['weight'] = 1
    #print("Computing communities...")
    if 'louvain' in methods:            
        #print("Louvain...")
        Louvain = louvain.find_partition(G, louvain.ModularityVertexPartition)        
        cg_louv = Louvain.cluster_graph(combine_vertices=dict(weight="sum", 
                                                              followers="sum", 
                                                              friends="sum"),
                                        combine_edges=dict(weight=sum))        
    if 'infomap' in methods:
        #print("Infomap...")
        Infomap = G.community_infomap(trials=infomap_trials)
        cg_info = Infomap.cluster_graph(combine_vertices=dict(weight="sum", 
                                                              followers="sum", 
                                                              friends="sum"),
                                        combine_edges=dict(weight=sum))        
    del G.vs['weight']
    del G.es['weight']
    if 'louvain' and 'infomap' in methods:
        for v in G.vs:
            v["louvain_com"]  = Louvain.membership[v.index]
            v["infomap_com"]  = Infomap.membership[v.index] 
        return G, cg_louv, cg_info
    if 'louvain' in methods and 'infomap' not in methods:
        for v in G.vs:
            v["louvain_com"]  = Louvain.membership[v.index]
        return G, cg_louv
    if 'infomap' in methods and 'louvain' not in methods:
        for v in G.vs:
            v["infomap_com"]  = Infomap.membership[v.index] 
        return G, cg_info
Exemplo n.º 25
0
def louvain_clusters(latent, k=10, rands=0, mutual=False):
    nn_matrix = kneighbors_graph(latent, k)
    rows, cols = nn_matrix.nonzero()
    if mutual == True:
        edges = [[row, col] if row < col else (col, row)
                 for row, col in zip(rows, cols)]
        edges = np.asarray(edges)
        unique_edges, edges_count = np.unique(edges,
                                              return_counts=True,
                                              axis=0)
        edges = unique_edges[edges_count == 2]
    else:
        edges = [(row, col) for row, col in zip(rows, cols)]
    g = ig.Graph()
    g.add_vertices(latent.shape[0])
    g.add_edges(edges)
    louvain.set_rng_seed(rands)
    res = louvain.find_partition(g, louvain.ModularityVertexPartition)
    clusters = np.asarray(res.membership)
    return clusters
Exemplo n.º 26
0
def run_louvain(fileName):
    #Construct igraph
    g = ig.Graph.Read_Ncol(fileName, names=True, weights=True, directed=False)
    #Find clusters, using louvain. Pass in weights that's same order as edges.
    partition = louvain.find_partition(g,
                                       louvain.ModularityVertexPartition,
                                       weights=g.es["weight"])
    #print(g.vs.indices)
    #print(vars(partition))
    #Store vertices info
    vertices = g.vs
    #Get clusters
    membershipList = partition.membership
    #Get Names for vertices that matches order of membership
    verticeNames = []
    for i in range(0, len(vertices)):
        verticeNames.append(vertices[i]["name"])
    #print(membershipList)
    #print(verticeNames)
    return membershipList, verticeNames
Exemplo n.º 27
0
def run_louvain(graphnum):
    G = Gs[graphnum]
    parts = []
    start = time()

    for gamma_louvain in np.linspace(0, 10, 1000):
        part = louvain.find_partition(
            G,
            louvain.RBConfigurationVertexPartition,
            resolution_parameter=gamma_louvain).membership

        if num_communities(part) > 100:
            break
        else:
            parts.append(part)

    print(
        f"Running on Graph {graphnum}, n={G.vcount()}, m={G.ecount()}: "
        f"In {time() - start:.2f} s, found {len(parts)} partitions at {(time() - start) / len(parts):.2f} "
        "seconds per partition")
    return graphnum, {sorted_tuple(tuple(p)) for p in parts}
    def buildGraph(self):

        print("Dataset" + str(self.dataset) +
              " ====================================")

        client = pymongo.MongoClient(host='localhost', port=27017)
        db = client[self.dbName]
        documents = db[self.dataset]

        cursor = documents.find({}, {'authors': 1, 'title': 1})

        existingTitles = []

        vertices = []
        edges = []
        weights = []
        for c in cursor:
            if c['title'] in existingTitles:
                continue
            else:
                existingTitles.append(c['title'])

            for author in c["authors"]:
                if (author not in vertices):
                    vertices.append(author)

            for pair in combinations(c["authors"], 2):
                ind = edges.index(pair) if pair in edges else -1
                if ind == -1:
                    edges.append(pair)
                    weights.append(1)
                else:
                    weights[ind] += 1

        self.g.add_vertices(vertices)
        self.g.add_edges(edges)
        self.g.es['weight'] = weights
        self.partition = louvain.find_partition(
            self.g, louvain.ModularityVertexPartition)
Exemplo n.º 29
0
def compute_louvain(G):
    """Compute Louvain communities of an igraph network and generate cluster graph.

    Parameters:
    G (igraph graph): retweet network or hashtag network
    
    Returns:
    G (igraph graph) with node attribute 'louvain_com'
    clustergraph (igraph graph): graph where every node is a community
    """

    import louvain

    G.vs['weight'] = 1
    partition = louvain.find_partition(G, louvain.ModularityVertexPartition)
    clustergraph = partition.cluster_graph(combine_vertices=dict(
        weight="sum", followers="sum", friends="sum"),
                                           combine_edges=dict(weight=sum))
    del G.vs['weight']
    del G.es['weight']
    for v in G.vs:
        v["louvain_com"] = partition.membership[v.index]
    return G, clustergraph
Exemplo n.º 30
0
def singlelayer_louvain(G, gamma, return_partition=False):
    r"""Run the Louvain modularity maximization algorithm at a single :math:`\gamma` value.

    :param G: graph of interest
    :type G: igraph.Graph
    :param gamma: gamma (resolution parameter) to run Louvain at
    :type gamma: float
    :param return_partition: if True, return a louvain partition. Otherwise, return a community membership tuple
    :type return_partition: bool
    :return: partition from louvain
    :rtype: tuple[int] or louvain.RBConfigurationVertexPartition
    """
    if 'weight' not in G.es:
        G.es['weight'] = [1.0] * G.ecount()

    partition = louvain.find_partition(G,
                                       louvain.RBConfigurationVertexPartition,
                                       weights='weight',
                                       resolution_parameter=gamma)

    if return_partition:
        return partition
    else:
        return tuple(partition.membership)
    dict_reverse[id] = [row[0]]
    users.append(id)
    G.add_vertex(id)
    id = id + 1
print len(dict_users)

for index, row in df.iterrows():
    print index
    rowList = str(row['friends']).split(' ')
    if rowList:
        for v in rowList:
            if v != 'nan' and int(v) in dict_users:
                G.add_edge(dict_users[row[0]], dict_users[int(v)])

# compute the best partition
partition = louvain.find_partition(G, method='Modularity')

p_dict = {}
index = 0
for i in partition.membership:
    p_dict[index] = i
    index =index + 1

forced_partitions = limit_communities(p_dict, 50)


user_partitions = pd.DataFrame({'user': users,
                    'user_community': map(lambda u: forced_partitions[u], users)})

user_partitions['user'] = user_partitions['user'].replace(dict_reverse)
def main():
    args = get_args()
    logger.info('Start')

    graphs = dict()
    dates = list()
    for network in args.networks:
        logger.debug('Loading file {}...'.format(network))
        with open(network, 'r') as infile:

            basefilename = os.path.basename(network)
            graph_date = basefilename.split('.')[-2]

            dates.append(graph_date)
            reader = csv.reader(infile, delimiter='\t')

            # skip header
            next(reader)

            edgelist = [edge for edge in reader]

        # collect the set of vertex names and then sort them into a list
        vertices = set()
        for edge in edgelist:
            # iterates on the list and add each element
            vertices.update(edge)
        vertices = sorted(vertices)

        # new graph
        G = ig.Graph()

        # add vertices to the graph
        G.add_vertices(vertices)

        # add edges to the graph
        G.add_edges(edgelist)

        graphs[graph_date] = G
        logger.debug('done!')

    logger.info('Loaded all graphs')

    logger.info('Preparing to drop empty graphs')
    graphs_copy = copy.deepcopy(graphs)
    for graph_date, G in graphs_copy.items():
        if G.vcount() == 0:
            logger.debug('Dropping empty graph {}'.format(graph_date))
            del graphs[graph_date]
    del graphs_copy
    logger.info('Dropped empty graphs')

    global_vset = set()
    for graph_date, G in graphs.items():
        vertices = [v.attributes()['name'] for v in G.vs]
        global_vset.update(vertices)

    logger.info('Building global index of vertices')
    global_vlist = sorted(global_vset)
    del global_vset
    global_vtoid = dict((vname, vid) for vid, vname in enumerate(global_vlist))
    global_idtov = dict((vid, vname) for vid, vname in enumerate(global_vlist))
    with open(os.path.join('data', 'vertex.json'), 'w+') as vertexfile:
        json.dump(global_idtov, vertexfile)
    logger.info('Global index of vertices built')

    logger.info('Calculating partitions for all snapshots')
    partitions = dict()
    for graph_date, G in graphs.items():
        logger.debug(
            'Calculating partitions for graph {}...'.format(graph_date))
        part = louvain.find_partition(G, louvain.ModularityVertexPartition)
        partitions[graph_date] = part

    logger.info('Calculated partitions for all snapshots')

    all_clusters = list()
    csv_header = ('date', 'n_partitions')
    with open(os.path.join('data', 'partitions.csv'), 'w+') as outfile:
        writer = csv.writer(outfile, delimiter='\t')
        writer.writerow(csv_header)

        for graph_date in dates:
            logger.debug('Writing clusters for snapshot {}'.format(graph_date))

            parts = partitions.get(graph_date, None)
            if parts is not None:
                writer.writerow((graph_date, len(parts)))

                en_clusters = [cl for cl in enumerate(parts.subgraphs())]
                all_clusters.append(Cluster(arrow.get(graph_date),
                                            en_clusters))

                clevoname = 'graph.{0}.clusters.csv'.format(graph_date)
                clevoutfile_path = os.path.join('data', 'partitions-evolution',
                                                clevoname)

                with open(clevoutfile_path, 'w+') as clevoutfile:
                    for idx, cluster in en_clusters:

                        nodes = set(
                            [v.attributes()['name'] for v in cluster.vs])

                        nodes_ids = sorted([global_vtoid[n] for n in nodes])
                        clevoutfile.write('{}\n'.format(' '.join(
                            str(nid) for nid in nodes_ids)))

                        clname = ('graph.{0}.cluster.{1:02}.csv'.format(
                            graph_date, idx))
                        cloutfile_path = os.path.join('data', 'partitions',
                                                      clname)

                        with open(cloutfile_path, 'w+') as cloutfile:
                            for node in nodes:
                                cloutfile.write('{}\n'.format(node))

            else:
                writer.writerow((graph_date, 0))

    logger.info('Written all clusters')
    # Iterate over all pairs of consecutive items from a given
    # list
    # https://stackoverflow.com/q/21303224/2377454
    cluster_pairs = [pair for pair in zip(all_clusters, all_clusters[1:])]

    logger.info('Compared clusters at t and t+1')
    compare_clusters = dict()
    similarity_clusters = dict()
    for snap_t1, snap_t2 in cluster_pairs:
        t1 = str(snap_t1.date.format('YYYY-MM-DD'))
        t2 = str(snap_t2.date.format('YYYY-MM-DD'))

        assert snap_t1.date.replace(months=+1) == snap_t2.date

        # snap_t1.clusters and snap_t2.clusters are the clusters at
        # time t and t+1

        snap_t1_clusters_nodes = list()
        for idx1, cl1 in snap_t1.clusters:
            snap_t1_clusters_nodes.append(
                (idx1, [v.attributes()['name'] for v in cl1.vs]))
        del idx1, cl1

        snap_t2_clusters_nodes = list()
        for idx2, cl2 in snap_t2.clusters:
            snap_t2_clusters_nodes.append(
                (idx2, [v.attributes()['name'] for v in cl2.vs]))
        del idx2, cl2

        cluster_product = itertools.product(snap_t1_clusters_nodes,
                                            snap_t2_clusters_nodes)

        #  numpy.zeros(shape, dtype=float, order='C')
        n = len(snap_t1_clusters_nodes)
        m = len(snap_t2_clusters_nodes)
        clmatrix = np.zeros((n, m), dtype=float)
        for cl1, cl2 in cluster_product:
            ridx = cl1[0]
            cidx = cl2[0]
            logger.debug('Comparing clusters at {} and {}: ({},{})'.format(
                t1, t2, ridx, cidx))

            nodes_cl1 = set(cl1[1])
            nodes_cl2 = set(cl2[1])

            sim = jaccard_distance(nodes_cl1, nodes_cl2)

            clmatrix[ridx][cidx] = sim

        logger.debug('Compared clusters at {} and {}'.format(t1, t2))

        res = scipy.optimize.linear_sum_assignment(clmatrix)
        cluster_t1_indices = res[0].tolist()
        cluster_t2_indices = res[1].tolist()
        c1_to_c2 = dict(zip(cluster_t1_indices, cluster_t2_indices))

        compare_clusters['{}_{}'.format(t1, t2)] = c1_to_c2

        sim_c1c2 = dict()
        for c1, c2 in c1_to_c2.items():
            sim_c1c2[c1] = clmatrix[c1][c2]

        similarity_clusters['{}_{}'.format(t1, t2)] = sim_c1c2

    logger.info('Compared all clusters')

    clevo_filename = 'clusters_evolution.json'
    clevo_path = os.path.join('data', clevo_filename)
    with open(clevo_path, 'w') as clevo_out:
        json.dump(compare_clusters, clevo_out)

    evolved_clusters = defaultdict(dict)
    evolved_clusters_stable = defaultdict(dict)

    cl_date_prev = None
    cluster_no = 0
    cluster_no_stable = 0
    cluster_sizes = dict()
    for date, clusters in all_clusters:
        cl_date = date.format('YYYY-MM-DD')
        cluster_sizes[cl_date] = defaultdict(int)
        logger.info('Processing clusters for {}...'.format(cl_date))

        cl_dict = None
        if cl_date_prev is not None:
            key = '{}_{}'.format(cl_date_prev, cl_date)
            cl_dict = compare_clusters[key]
            inv_cl_dict = {v: k for k, v in cl_dict.items()}

            for cl in clusters:
                clid = cl[0]
                if clid in cl_dict.values():
                    evolved_clusters[cl_date][clid] = \
                        evolved_clusters[cl_date_prev][inv_cl_dict[clid]]

                    if similarity_clusters[key][inv_cl_dict[clid]] < 0.34:
                        evolved_clusters_stable[cl_date][clid] = \
                            evolved_clusters_stable[cl_date_prev][inv_cl_dict[clid]]
                    else:
                        evolved_clusters_stable[cl_date][
                            clid] = cluster_no_stable
                        cluster_no_stable += 1
                else:
                    evolved_clusters[cl_date][clid] = cluster_no
                    evolved_clusters_stable[cl_date][clid] = cluster_no_stable

                    cluster_no += 1
                    cluster_no_stable += 1

                cluster_sizes[cl_date][evolved_clusters[cl_date][clid]] = \
                    cl[1].vcount()

        else:
            for cl in clusters:
                clid = cl[0]
                evolved_clusters[cl_date][clid] = cluster_no
                evolved_clusters_stable[cl_date][clid] = cluster_no_stable

                cluster_no += 1
                cluster_no_stable += 1

                cluster_sizes[cl_date][evolved_clusters[cl_date][clid]] = \
                    cl[1].vcount()

        cl_date_prev = cl_date

    for i in range(cluster_no):
        clsize_path = os.path.join('data', 'cluster-sizes',
                                   'cluster_sizes.{:03}.csv'.format(i))
        with open(clsize_path, 'w+') as clsizefile:
            clsizewriter = csv.writer(clsizefile, delimiter='\t')
            for graph_date in dates:
                if graph_date in cluster_sizes:
                    cl_size = cluster_sizes[graph_date][i]
                else:
                    cl_size = 0

                clsizewriter.writerow((graph_date, cl_size))

    evcl_path = os.path.join('data', 'evolved_clusters.json')
    with open(evcl_path, 'w+') as evcl_file:
        json.dump(evolved_clusters, evcl_file)

    evclstable_path = os.path.join('data', 'evolved_clusters_stable.json')
    with open(evclstable_path, 'w+') as evclstable_file:
        json.dump(evolved_clusters_stable, evclstable_file)

    cl_date_prev = None

    logger.info('Processing vertexes in clusters')
    vertex_clusters = defaultdict(dict)
    for date, clusters in all_clusters:
        cl_date = date.format('YYYY-MM-DD')
        logger.info('Processing clusters for {}...'.format(cl_date))

        for clid, cl in clusters:
            logger.debug('Processing cluster id {} for {}...'.format(
                clid, cl_date))

            nodes = [v.attributes()['name'] for v in cl.vs]

            for node in nodes:
                vertex_clusters[node][cl_date] = evolved_clusters[cl_date][
                    clid]

    for node in global_vlist:
        node_outfilename = get_valid_filename(
            'node_evolution_{}.csv'.format(node))
        node_outfilepath = os.path.join('data', 'nodes-evolution',
                                        node_outfilename)

        with open(node_outfilepath, 'w+') as node_outfile:
            writer = csv.writer(node_outfile, delimiter='\t')
            writer.writerow(('date', 'cluster_id'))

        for graph_date in dates:
            clid = vertex_clusters[node].get(graph_date, -1)

            with open(node_outfilepath, 'a+') as node_outfile:
                writer = csv.writer(node_outfile, delimiter='\t')
                writer.writerow((graph_date, clid))

    logger.info('All done!')
Exemplo n.º 33
0
def louvain(graph):
    #lv.set_rng_seed(0)
    lv.set_rng_seed(random.randint(1, 100000))
    raw_partitions = lv.find_partition(graph, lv.ModularityVertexPartition)

    return raw_partitions
Exemplo n.º 34
0
 def louvain(self, load=True, save=False):
     """Computes cluster memberships returned by the Louvain method (implemented in C++ via louvain-igraph package)."""
     self._louvain_memberships = pd.DataFrame(
         louvain.find_partition(self, method="Modularity").membership, columns=["louvainMembership"]
     )
Exemplo n.º 35
0
estimate_group3 = nx.karate_club_graph()
estimate_group3.remove_nodes_from(partitions[partitions.estimate != 3].node-1)

# estimate partition 4
estimate_group4 = nx.karate_club_graph()
estimate_group4.remove_nodes_from(partitions[partitions.estimate != 4].node-1)

# calculate densities
g1_dens = nx.density(ground_first_group)
g2_dens = nx.density(ground_second_group)
e1_dens = nx.density(estimate_group1)
e2_dens = nx.density(estimate_group2)
e3_dens = nx.density(estimate_group3)
e4_dens = nx.density(estimate_group4)

# igraph approach -------------------------------------------------------------
# read and format the karate data
karate = ig.Graph.Read_GraphML("../data/karate.GraphML")
#
## find some partitions with different methods
partM = louvain.find_partition(karate, method = "Modularity")
partRBConfig = louvain.find_partition(karate, method = "RBConfiguration", resolution_parameter = 0.25)
partRBER = louvain.find_partition(karate, method = "RBER")
partDens = louvain.find_partition(karate, method = "CPM", resolution_parameter = 0.25)
partSignif = louvain.find_partition(karate, method = "Significance")
partSurp = louvain.find_partition(karate, method = "Surprise")

# view paritions by printing
# print partM    
                                 
                                 
Exemplo n.º 36
0
import igraph as ig
import louvain

G = ig.Graph.Erdos_Renyi(100, 0.1);
louvain.find_partition(G, "Modularity");
louvain.find_partition(G, "RBConfiguration");
louvain.find_partition(G, "Surprise");
louvain.find_partition(G, "Significance");

G.es['weight'] = 1.0;
louvain.find_partition(G, "Modularity", weight='weight');
louvain.find_partition(G, "RBConfiguration", weight='weight');
louvain.find_partition(G, "Surprise", weight='weight');
Exemplo n.º 37
0
graph = pGraph("http://localhost:7474/db/data/")
print graph

query = """
MATCH (n)-[r]->(m)
RETURN id(n)as from ,id(m) as to ,r.prob as prob
"""

data = graph.cypher.execute(query)
print data

ig = Graph.TupleList(data, weights=True)
print ig.is_simple()

part = louvain.find_partition(ig, method='Modularity', weight='weight')
layout = ig.layout_fruchterman_reingold(weights='weight')



visual_style = {}
visual_style["layout"] = layout
visual_style["vertex_label"] = ig.vs["name"]
visual_style["bbox"] = (3200, 3200)
visual_style["margin"] = 10
visual_style["vertex_size"] = 60
visual_style["edge_width"] = [2 + 2 * int(weight) for weight in ig.es["weight"]]
plot(part, **visual_style)


Exemplo n.º 38
0
def process_file(tm, year):
    ig_dpath = dpaths[tm, year, 'influenceGraph']
    ig_prefix = prefixs[tm, year, 'influenceGraph']
    gp_dpath = dpaths[tm, year, 'groupPartition']
    gp_prefix = prefixs[tm, year, 'groupPartition']
    #
    check_dir_create(gp_dpath)
    #
    gp_summary_fpath = '%s/%ssummary.csv' % (gp_dpath, gp_prefix)
    gp_original_fpath = '%s/%soriginal.pkl' % (gp_dpath, gp_prefix)
    gp_drivers_fpath = '%s/%sdrivers.pkl' % (gp_dpath, gp_prefix)
    #
    with open(gp_summary_fpath, 'wt') as w_csvfile:
        writer = csv.writer(w_csvfile, lineterminator='\n')
        writer.writerow(['groupName', 'numDrivers', 'numRelations', 'graphComplexity', 'tieStrength', 'contribution', 'benCon'])
    #
    logger.info('Start handling SP_group_dpath')
    orignal_graph = {}
    for fn in get_all_files(ig_dpath, '%s*' % ig_prefix):
        regression_graph = load_pickle_file('%s/%s' % (ig_dpath, fn))
        for i, ((did0, did1), w) in enumerate(regression_graph.iteritems()):
            orignal_graph[did0, did1] = w
    save_pickle_file(gp_original_fpath, orignal_graph)
    #
    igid, did_igid = 0, {}
    igG = ig.Graph(directed=True)
    for i, ((did0, did1), w) in enumerate(orignal_graph.iteritems()):
        if not did_igid.has_key(did0):
            igG.add_vertex(did0)
            did_igid[did0] = igid
            igid += 1
        if not did_igid.has_key(did1):
            igG.add_vertex(did1)
            did_igid[did1] = igid
            igid += 1
        igG.add_edge(did_igid[did0], did_igid[did1], weight=abs(w))
    #
    logger.info('Partitioning')
    part = louvain.find_partition(igG, method='Modularity', weight='weight')
    logger.info('Each group pickling and summary')
    gn_drivers = {}
    for i, sg in enumerate(part.subgraphs()):
        gn = 'G(%d)' % i
        group_fpath = '%s/%s%s.pkl' % (gp_dpath, gp_prefix, gn)
        sg.write_pickle(group_fpath)
        #
        drivers = [v['name'] for v in sg.vs]
        weights = [e['weight'] for e in sg.es]
        graphComplexity = len(weights) / float(len(drivers))
        tie_strength = sum(weights) / float(len(drivers))
        contribution = sum(weights) / float(len(weights))
        benCon = tie_strength / float(len(drivers))
        with open(gp_summary_fpath, 'a') as w_csvfile:
            writer = csv.writer(w_csvfile, lineterminator='\n')
            writer.writerow([gn, len(drivers), len(weights), graphComplexity, tie_strength, contribution, benCon])
        gl_img_fpath = '%s/%simg-%s.pdf' % (gp_dpath, gp_prefix, gn)
        layout = sg.layout("kk")
        if len(drivers) < 100:
            ig.plot(sg, gl_img_fpath, layout=layout, vertex_label=drivers)
        else:
            ig.plot(sg, gl_img_fpath, layout=layout)
        gn_drivers[gn] = drivers
        gc_fpath = '%s/%scoef-%s.csv' % (gp_dpath, gp_prefix, gn)
        with open(gc_fpath, 'wt') as w_csvfile:
            writer = csv.writer(w_csvfile, lineterminator='\n')
            writer.writerow(['groupName', 'did0', 'did1', 'coef'])
            for e in sg.es:
                did0, did1 = [sg.vs[nIndex]['name'] for nIndex in e.tuple]
                coef = e['weight']
                writer.writerow([gn, did0, did1, coef])
    save_pickle_file(gp_drivers_fpath, gn_drivers)
Exemplo n.º 39
0
 def louvain(self):
     vertexCluster = louvain.find_partition(self.g, method='Modularity', weight='weight', initial_membership=range(self.g.vcount()));
     return self.igraphWrapper.getCommunities(vertexCluster)