def analyze_clustering(cls, separated_decomposed_clusters, distance_matrix, analysis): """ Performs the overlap analysis of a clustering (calculates global measurements). """ analysis["total_num_clusters"] = 0 analysis["total_num_elements"] = 0 analysis["overlap"] = OverlapCalculator.calculate_clustering_overlap( mergeSeparatedClusters(separated_decomposed_clusters), distance_matrix) analysis[ "mixed_overlap"] = OverlapCalculator.calculate_clustering_overlap( mergeSeparatedClusters( {"mixed": separated_decomposed_clusters["mixed"]}), distance_matrix) for cluster_type in separated_decomposed_clusters: analysis["num_" + cluster_type] = len( separated_decomposed_clusters[cluster_type]) analysis["total_num_clusters"] += analysis["num_" + cluster_type] analysis["num_" + cluster_type + "_elements"] = numpy.sum([ len( getAllElements( separated_decomposed_clusters[cluster_type][dc_id])) for dc_id in separated_decomposed_clusters[cluster_type] ]) analysis["total_num_elements"] += analysis["num_" + cluster_type + "_elements"]
def test_get_cluster_min_max_distances(self): # First test distance_matrix = CondensedMatrix([1., 0.7, 0.3]) decomposed_cluster = {"traj_0":[0],"traj_1":[1],"traj_2":[2]} expected_min_d, expected_max_d = [ 0.7, 0.3, 0.3], [ 1., 1., 0.7] min_d, max_d = OverlapCalculator.get_cluster_min_max_distances(decomposed_cluster, distance_matrix) numpy.testing.assert_array_almost_equal(min_d, expected_min_d, 8) numpy.testing.assert_array_almost_equal(max_d, expected_max_d, 8) #Second test distance_matrix = CondensedMatrix([1., 0.7, 2., 0.3, 1., 0.7]) decomposed_cluster = {"traj_0":[0,3],"traj_1":[1],"traj_2":[2]} expected_min_d, expected_max_d = [ 0.7, 0.7, 0.3, 0.3], [ 1., 1., 1., 0.7] min_d, max_d = OverlapCalculator.get_cluster_min_max_distances(decomposed_cluster, distance_matrix) numpy.testing.assert_array_almost_equal(min_d, expected_min_d, 8) numpy.testing.assert_array_almost_equal(max_d, expected_max_d, 8)
def test_calculate_global_overlap(self): distance_matrix = CondensedMatrix([1., 0.7, 2., 0.3, 1., 0.7]) decomposed_clusters = [{"traj_0":[0],"traj_1":[1]},{"traj_0":[2],"traj_1":[3]}] self.assertEqual(0., OverlapCalculator.calculate_global_overlap(decomposed_clusters, distance_matrix, 1, 1)) decomposed_clusters = [{"traj_0":[0],"traj_1":[1]}, {"traj_0":[2]}, {"traj_1":[3]}] self.assertEqual(0., OverlapCalculator.calculate_global_overlap(decomposed_clusters, distance_matrix, 1, 1))
def test_calculate_cluster_overlap(self): distance_matrix = CondensedMatrix([1., 0.7, 0.3]) decomposed_cluster = {"traj_0":[0],"traj_1":[1],"traj_2":[2]} self.assertAlmostEqual(0.481481488022, OverlapCalculator.calculate_cluster_overlap(1, decomposed_cluster, distance_matrix),12) self.assertAlmostEqual(0.4761904843, OverlapCalculator.calculate_cluster_overlap(2, decomposed_cluster, distance_matrix), 12) decomposed_cluster = {"traj_0":[0],"traj_1":[1]} self.assertAlmostEqual(1., OverlapCalculator.calculate_cluster_overlap(1, decomposed_cluster, distance_matrix), 12) self.assertAlmostEqual(1., OverlapCalculator.calculate_cluster_overlap(2, decomposed_cluster, distance_matrix), 12)
def analyze_clustering(cls, separated_decomposed_clusters, distance_matrix, analysis): """ Performs the overlap analysis of a clustering (calculates global measurements). """ analysis["total_num_clusters"] = 0 analysis["total_num_elements"] = 0 analysis["overlap"] = OverlapCalculator.calculate_clustering_overlap(mergeSeparatedClusters(separated_decomposed_clusters), distance_matrix) analysis["mixed_overlap"] = OverlapCalculator.calculate_clustering_overlap(mergeSeparatedClusters({"mixed":separated_decomposed_clusters["mixed"]}), distance_matrix) for cluster_type in separated_decomposed_clusters: analysis["num_" + cluster_type] = len(separated_decomposed_clusters[cluster_type]) analysis["total_num_clusters"] += analysis["num_" + cluster_type] analysis["num_" + cluster_type + "_elements"] = numpy.sum([len(getAllElements(separated_decomposed_clusters[cluster_type][dc_id])) for dc_id in separated_decomposed_clusters[cluster_type]]) analysis["total_num_elements"] += analysis["num_" + cluster_type + "_elements"]
def analyze_clustering(cls, separated_decomposed_clusters, distance_matrix, analysis): analysis["total_num_clusters"] = 0 analysis["total_num_elements"] = 0 analysis["overlap"] = OverlapCalculator.calculate_global_overlap(mergeSeparatedClusters(separated_decomposed_clusters), distance_matrix, 2, 1) for cluster_type in separated_decomposed_clusters: analysis["num_" + cluster_type] = len(separated_decomposed_clusters[cluster_type]) analysis["total_num_clusters"] += analysis["num_" + cluster_type] analysis["num_" + cluster_type + "_elements"] = numpy.sum([len(getAllElements(separated_decomposed_clusters[cluster_type][dc_id])) for dc_id in separated_decomposed_clusters[cluster_type]]) analysis["total_num_elements"] += analysis["num_" + cluster_type + "_elements"] return cluster_type
def analyze_clusters(cls, separated_decomposed_clusters, distance_matrix, analysis): for cluster_type in separated_decomposed_clusters: for cluster_id in separated_decomposed_clusters[cluster_type]: decomposed_cluster = separated_decomposed_clusters[cluster_type][cluster_id] analysis[cluster_id] = {"components":decomposed_cluster.keys(),"global":{}} analysis[cluster_id]["global"]["mean"], analysis[cluster_id]["global"]["std"], analysis[cluster_id]["global"]["max"] = calculate_distance_stats(getAllElements(decomposed_cluster), distance_matrix) analysis[cluster_id]["global"]["num_elements"] = len(getAllElements(decomposed_cluster)) for traj_id in decomposed_cluster: analysis[cluster_id]["global"][traj_id] = {} analysis[cluster_id]["global"][traj_id]["mean"], analysis[cluster_id]["global"][traj_id]["std"], analysis[cluster_id]["global"][traj_id]["max"] = calculate_distance_stats(decomposed_cluster[traj_id], distance_matrix) analysis[cluster_id]["global"][traj_id]["num_elements"] = len(decomposed_cluster[traj_id]) if cluster_type == "mixed": analysis[cluster_id]["centers_mean_diff"] = calculate_mean_center_differences(decomposed_cluster, distance_matrix) analysis[cluster_id]["global"]["overlap"] = OverlapCalculator.calculate_cluster_overlap(2, decomposed_cluster, distance_matrix)
def analyze_clustering(cls, separated_decomposed_clusters, distance_matrix, analysis): analysis["total_num_clusters"] = 0 analysis["total_num_elements"] = 0 analysis["overlap"] = OverlapCalculator.calculate_global_overlap( mergeSeparatedClusters(separated_decomposed_clusters), distance_matrix, 2, 1) for cluster_type in separated_decomposed_clusters: analysis["num_" + cluster_type] = len( separated_decomposed_clusters[cluster_type]) analysis["total_num_clusters"] += analysis["num_" + cluster_type] analysis["num_" + cluster_type + "_elements"] = numpy.sum([ len( getAllElements( separated_decomposed_clusters[cluster_type][dc_id])) for dc_id in separated_decomposed_clusters[cluster_type] ]) analysis["total_num_elements"] += analysis["num_" + cluster_type + "_elements"] return cluster_type
def analyze_clusters(cls, separated_decomposed_clusters, distance_matrix, analysis): """ Performs the overlap analysis of separated clusters. """ for cluster_type in separated_decomposed_clusters: for cluster_id in separated_decomposed_clusters[cluster_type]: decomposed_cluster = separated_decomposed_clusters[ cluster_type][cluster_id] analysis[cluster_id] = { "components": decomposed_cluster.keys(), "global": {} } analysis[cluster_id]["global"]["mean"], analysis[cluster_id][ "global"]["std"], analysis[cluster_id]["global"][ "max"] = calculate_distance_stats( getAllElements(decomposed_cluster), distance_matrix) analysis[cluster_id]["global"]["num_elements"] = len( getAllElements(decomposed_cluster)) for traj_id in decomposed_cluster: analysis[cluster_id]["global"][traj_id] = {} analysis[cluster_id]["global"][traj_id]["mean"], analysis[ cluster_id]["global"][traj_id]["std"], analysis[ cluster_id]["global"][traj_id][ "max"] = calculate_distance_stats( decomposed_cluster[traj_id], distance_matrix) analysis[cluster_id]["global"][traj_id][ "num_elements"] = len(decomposed_cluster[traj_id]) if cluster_type == "mixed": analysis[cluster_id][ "centers_mean_diff"] = calculate_mean_center_differences( decomposed_cluster, distance_matrix) # The overlap ranges between 0 and 1, being 0 the best value. We invert it in order to # to get a more understandable range (1 is the best value and 0 the worst). analysis[cluster_id]["global"][ "overlap"] = 1 - OverlapCalculator.calculate_cluster_overlap( decomposed_cluster, distance_matrix)
def analyze_clusters(cls, separated_decomposed_clusters, distance_matrix, analysis): """ Performs the overlap analysis of separated clusters. """ for cluster_type in separated_decomposed_clusters: for cluster_id in separated_decomposed_clusters[cluster_type]: decomposed_cluster = separated_decomposed_clusters[cluster_type][cluster_id] analysis[cluster_id] = {"components":decomposed_cluster.keys(),"global":{}} analysis[cluster_id]["global"]["mean"], analysis[cluster_id]["global"]["std"], analysis[cluster_id]["global"]["max"] = calculate_distance_stats(getAllElements(decomposed_cluster), distance_matrix) analysis[cluster_id]["global"]["num_elements"] = len(getAllElements(decomposed_cluster)) for traj_id in decomposed_cluster: analysis[cluster_id]["global"][traj_id] = {} analysis[cluster_id]["global"][traj_id]["mean"], analysis[cluster_id]["global"][traj_id]["std"], analysis[cluster_id]["global"][traj_id]["max"] = calculate_distance_stats(decomposed_cluster[traj_id], distance_matrix) analysis[cluster_id]["global"][traj_id]["num_elements"] = len(decomposed_cluster[traj_id]) if cluster_type == "mixed": analysis[cluster_id]["centers_mean_diff"] = calculate_mean_center_differences(decomposed_cluster, distance_matrix) # The overlap ranges between 0 and 1, being 0 the best value. We invert it in order to # to get a more understandable range (1 is the best value and 0 the worst). analysis[cluster_id]["global"]["overlap"] = 1 - OverlapCalculator.calculate_cluster_overlap( decomposed_cluster, distance_matrix)
def analyze_clusters(cls, separated_decomposed_clusters, distance_matrix, analysis): for cluster_type in separated_decomposed_clusters: for cluster_id in separated_decomposed_clusters[cluster_type]: decomposed_cluster = separated_decomposed_clusters[ cluster_type][cluster_id] analysis[cluster_id] = { "components": decomposed_cluster.keys(), "global": {} } analysis[cluster_id]["global"]["mean"], analysis[cluster_id][ "global"]["std"], analysis[cluster_id]["global"][ "max"] = calculate_distance_stats( getAllElements(decomposed_cluster), distance_matrix) analysis[cluster_id]["global"]["num_elements"] = len( getAllElements(decomposed_cluster)) for traj_id in decomposed_cluster: analysis[cluster_id]["global"][traj_id] = {} analysis[cluster_id]["global"][traj_id]["mean"], analysis[ cluster_id]["global"][traj_id]["std"], analysis[ cluster_id]["global"][traj_id][ "max"] = calculate_distance_stats( decomposed_cluster[traj_id], distance_matrix) analysis[cluster_id]["global"][traj_id][ "num_elements"] = len(decomposed_cluster[traj_id]) if cluster_type == "mixed": analysis[cluster_id][ "centers_mean_diff"] = calculate_mean_center_differences( decomposed_cluster, distance_matrix) analysis[cluster_id]["global"][ "overlap"] = OverlapCalculator.calculate_cluster_overlap( 2, decomposed_cluster, distance_matrix)