示例#1
0
def betwenness_clustering(G):
    # saving starting time of the algorithm
    start = time.time()
    #compute the betweenness for each edge
    eb, nb = betweenness(G)
    #insert the edge in a priorityq (sorted by highest betweenness)
    pq = PriorityQueue()
    for i in eb.keys():
        pq.add(i, -eb[i])
    graph = G.copy()
    # at each iteration we remove the highest betweenness edge
    # we can stop the algorithm when there are only 4 cluster (connected component in the graph)
    cc = []
    while len(cc) != 4:
        edge = tuple(sorted(pq.pop()))
        graph.remove_edges_from([edge])
        cc = list(nx.connected_components(graph))

    end = time.time()
    # algorithm execution time
    print("Execution time:", end - start)
    # we format the output into a dict
    label = ['first', 'second', 'third', 'fourth']
    final_cluster = {}
    for i in range(4):
        final_cluster[label[i]] = cc[i]
    return final_cluster
示例#2
0
def sorted_elements(dist, pq: PriorityQueue):
    sorted_list = []
    distances = []
    while not pq.is_empty():
        k = pq.pop()
        sorted_list.append(k)
        distances.append(dist[k])
    return distances, sorted_list
示例#3
0
def top(G,measure,k):
    pq = PriorityQueue()
    cen=measure(G)
    for u in G.nodes():
        pq.add(u, -cen[u])  # We use negative value because PriorityQueue returns first values whose priority value is lower
    out=[]
    for i in range(k):
        out.append(pq.pop())
    return out
示例#4
0
def bwt_cluster(G):
    eb,nb=betweenness(G)
    pq=PriorityQueue()
    for i in eb.keys():
        pq.add(i,-eb[i])
    graph=G.copy()

    done=False
    while not done:
        edge=tuple(sorted(pq.pop()))
        graph.remove_edges_from([edge])
        print(list(nx.connected_components(graph)))
        a = input("Do you want to continue? (y/n) ")
        if a == "n":
            done = True
示例#5
0
def dijkstra(start, G: nx.Graph):
    open = PriorityQueue()
    dist = {start: 0}
    increasing_order_dist = PriorityQueue()

    for v in G.nodes():
        if not v == start:
            dist[v] = np.Inf
        increasing_order_dist.add(v, dist[v])
        open.add(v, dist[v])

    while not open.is_empty():
        u = open.pop()
        for v in G.neighbors(u):
            # extract current weight between u and the current neighboor v
            try:
                w = G[u][v]["weight"]
            except KeyError:
                w = 1  # For unweighted graph
            alt = dist[u] + w
            if alt < dist[v]:
                dist[v] = alt
                increasing_order_dist.add(v, dist[v])
                # decrease priority of v
                open.add(
                    v,
                    alt)  # If an element already exists it update the priority
    return sorted_elements(dist, increasing_order_dist)
示例#6
0
def hierarchical(G):
    # Create a priority queue with each pair of nodes indexed by distance
    pq = PriorityQueue()
    for u in G.nodes():
        for v in G.nodes():
            if u != v:
                if (u, v) in G.edges() or (v, u) in G.edges():
                    pq.add(frozenset([frozenset(u), frozenset(v)]), 0)
                else:
                    pq.add(frozenset([frozenset(u), frozenset(v)]), 1)

    # Start with a cluster for each node
    clusters = set(frozenset(u) for u in G.nodes())

    done = False
    while not done:
        # Merge closest clusters
        s = list(pq.pop())
        clusters.remove(s[0])
        clusters.remove(s[1])

        # Update the distance of other clusters from the merged cluster
        for w in clusters:
            e1 = pq.remove(frozenset([s[0], w]))
            e2 = pq.remove(frozenset([s[1], w]))
            if e1 == 0 or e2 == 0:
                pq.add(frozenset([s[0] | s[1], w]), 0)
            else:
                pq.add(frozenset([s[0] | s[1], w]), 1)

        clusters.add(s[0] | s[1])

        if len(clusters)==4:
            done=True
示例#7
0
from utils.priorityq import PriorityQueue
from utils.es1_utils import *

if __name__ == '__main__':

    #loading the real clusters from file
    real_clusters = load_real_cluster(
        "../../facebook_large/musae_facebook_target.csv")
    #setting for output on file
    label = ['first', 'second', 'third', 'fourth']
    f = open("../results/output.txt", "w")
    #setting to load an algorithm's ouput
    name = "../results/spectral_sampled08" + ".pkl"
    output_clusters = load_dict_from_file(name)
    #starting a pq in order to sort the clusters common element percentage
    pq = PriorityQueue()
    #just for file ouput
    f.write("\n" + name + "\n")
    f.write("----------------------------------------------------------\n\n")
    #for each output cluster
    for k in label:
        cluster_len = len(output_clusters[k])
        #some statics
        print("Cluster {} has {} elements:".format(k, cluster_len))
        #for each real cluster
        for key in sorted(real_clusters.keys()):
            #count the common element between the real cluster[key] and our_cluster[k]
            intersection = len(real_clusters[key].intersection(
                output_clusters[k]))
            #compute the percentage over the output cluster number of elements
            perc = float(intersection / cluster_len)