def silhouette(cls, graph, clusters): """ Find the average silhouette distance for the clusters. """ paths = graph.paths() # Calculate the distances for all pairs of nodes. dists = {} for node1, node2 in combinations(graph.nodes, 2): value1 = node1.value value2 = node2.value dist = graph.dist(node1, node2, paths) if not value1 in dists: dists[value1] = {} if not value2 in dists: dists[value2] = {} dists[value1][value2] = dist dists[value2][value1] = dist s = 0.0 for node in graph.nodes: # Find a and b. a = 0.0 b = float('inf') for cluster in clusters: if cluster.node(node.value): if len(cluster.nodes) > 1: a = sum([dists[node.value][onode.value] for onode in cluster.nodes if node.value != onode.value]) / float(len(cluster.nodes) - 1) else: a = 0.0 else: b = min(b, sum([dists[node.value][onode.value] for onode in cluster.nodes]) / float(len(cluster.nodes))) if b == float('inf'): b = 0.0 s += (b - a) / max(a, b) return s / len(graph.nodes)
def davies_bouldin(cls, graph, clusters): """ Return the davies bouldin index for the clusters. Key arguments: graph -- the original graph clusters -- the clusters to analyze """ # If we only have one cluster, then return inf! if len(clusters) < 2: return float('inf') # Special case, if all the clusters are singletons, return inf. singletons = True for cluster in clusters: if len(cluster.nodes) > 1: singletons = False break if singletons: return float('inf') # Calculate the diameters for each cluster. diams = {} for cluster in clusters: diams[cluster] = 0.0 if len(cluster.nodes) > 1: paths = cluster.paths() for node1, node2 in combinations(cluster.nodes, 2): diams[cluster] = max(diams[cluster], cluster.dist(node1, node2, paths)) # If it weren't for directed graphs, we could use the combinations method. for node1 in paths: for node2 in paths[node1]: if node1 != node2 and paths[node1][node2]: diams[cluster] = max(diams[cluster], cluster.dist(node1, node2, paths)) # Find all the graphs paths. paths = graph.paths() # Calculate the distances between each cluster. dists = {} for cluster1, cluster2 in combinations(clusters, 2): if not cluster1 in dists: dists[cluster1] = {} if not cluster2 in dists: dists[cluster2] = {} # Find the average cluster distance between cluster i and j. dist = sum([graph.dist(node1, node2, paths) for node1, node2 in product(cluster1.nodes, cluster2.nodes)]) / float(len(cluster1.nodes) * len(cluster2.nodes)) dists[cluster1][cluster2] = dist dists[cluster2][cluster1] = dist num = 0.0 for cluster1 in clusters: max_db = 0.0 for cluster2 in clusters: if cluster1 != cluster2: max_db = max(max_db, (diams[cluster1] + diams[cluster2]) / dists[cluster1][cluster2]) num += max_db return num / len(clusters)