def betwenness_clustering(G): # saving starting time of the algorithm start = time.time() #compute the betweenness for each edge eb, nb = betweenness(G) #insert the edge in a priorityq (sorted by highest betweenness) pq = PriorityQueue() for i in eb.keys(): pq.add(i, -eb[i]) graph = G.copy() # at each iteration we remove the highest betweenness edge # we can stop the algorithm when there are only 4 cluster (connected component in the graph) cc = [] while len(cc) != 4: edge = tuple(sorted(pq.pop())) graph.remove_edges_from([edge]) cc = list(nx.connected_components(graph)) end = time.time() # algorithm execution time print("Execution time:", end - start) # we format the output into a dict label = ['first', 'second', 'third', 'fourth'] final_cluster = {} for i in range(4): final_cluster[label[i]] = cc[i] return final_cluster
def sorted_elements(dist, pq: PriorityQueue): sorted_list = [] distances = [] while not pq.is_empty(): k = pq.pop() sorted_list.append(k) distances.append(dist[k]) return distances, sorted_list
def top(G,measure,k): pq = PriorityQueue() cen=measure(G) for u in G.nodes(): pq.add(u, -cen[u]) # We use negative value because PriorityQueue returns first values whose priority value is lower out=[] for i in range(k): out.append(pq.pop()) return out
def bwt_cluster(G): eb,nb=betweenness(G) pq=PriorityQueue() for i in eb.keys(): pq.add(i,-eb[i]) graph=G.copy() done=False while not done: edge=tuple(sorted(pq.pop())) graph.remove_edges_from([edge]) print(list(nx.connected_components(graph))) a = input("Do you want to continue? (y/n) ") if a == "n": done = True
def dijkstra(start, G: nx.Graph): open = PriorityQueue() dist = {start: 0} increasing_order_dist = PriorityQueue() for v in G.nodes(): if not v == start: dist[v] = np.Inf increasing_order_dist.add(v, dist[v]) open.add(v, dist[v]) while not open.is_empty(): u = open.pop() for v in G.neighbors(u): # extract current weight between u and the current neighboor v try: w = G[u][v]["weight"] except KeyError: w = 1 # For unweighted graph alt = dist[u] + w if alt < dist[v]: dist[v] = alt increasing_order_dist.add(v, dist[v]) # decrease priority of v open.add( v, alt) # If an element already exists it update the priority return sorted_elements(dist, increasing_order_dist)
def hierarchical(G): # Create a priority queue with each pair of nodes indexed by distance pq = PriorityQueue() for u in G.nodes(): for v in G.nodes(): if u != v: if (u, v) in G.edges() or (v, u) in G.edges(): pq.add(frozenset([frozenset(u), frozenset(v)]), 0) else: pq.add(frozenset([frozenset(u), frozenset(v)]), 1) # Start with a cluster for each node clusters = set(frozenset(u) for u in G.nodes()) done = False while not done: # Merge closest clusters s = list(pq.pop()) clusters.remove(s[0]) clusters.remove(s[1]) # Update the distance of other clusters from the merged cluster for w in clusters: e1 = pq.remove(frozenset([s[0], w])) e2 = pq.remove(frozenset([s[1], w])) if e1 == 0 or e2 == 0: pq.add(frozenset([s[0] | s[1], w]), 0) else: pq.add(frozenset([s[0] | s[1], w]), 1) clusters.add(s[0] | s[1]) if len(clusters)==4: done=True
from utils.priorityq import PriorityQueue from utils.es1_utils import * if __name__ == '__main__': #loading the real clusters from file real_clusters = load_real_cluster( "../../facebook_large/musae_facebook_target.csv") #setting for output on file label = ['first', 'second', 'third', 'fourth'] f = open("../results/output.txt", "w") #setting to load an algorithm's ouput name = "../results/spectral_sampled08" + ".pkl" output_clusters = load_dict_from_file(name) #starting a pq in order to sort the clusters common element percentage pq = PriorityQueue() #just for file ouput f.write("\n" + name + "\n") f.write("----------------------------------------------------------\n\n") #for each output cluster for k in label: cluster_len = len(output_clusters[k]) #some statics print("Cluster {} has {} elements:".format(k, cluster_len)) #for each real cluster for key in sorted(real_clusters.keys()): #count the common element between the real cluster[key] and our_cluster[k] intersection = len(real_clusters[key].intersection( output_clusters[k])) #compute the percentage over the output cluster number of elements perc = float(intersection / cluster_len)