def test_one_element_predicted_clusters(self): y_true = [0, 0, 0, 1, 1, 1] y_pred = [0, 1, 2, 3, 4, 5] self.assertAlmostEqual(average_cluster_purity(y_true, y_pred), 1.0) y_true = [0, 0] y_pred = [0, 1] self.assertAlmostEqual(average_cluster_purity(y_true, y_pred), 1.0)
def test_differing_clusters(self): y_true = [0, 0, 1] y_pred = [1, 0, 0] self.assertAlmostEqual(average_cluster_purity(y_true, y_pred), 2 / 3) y_true = [0, 1, 2, 1] y_pred = [0, 1, 1, 2] self.assertAlmostEqual(average_cluster_purity(y_true, y_pred), 0.75) y_true = [0, 1] y_pred = [0, 0] self.assertAlmostEqual(average_cluster_purity(y_true, y_pred), 0.5)
def _calculate_analysis_values(predicted_clusters, true_cluster, times): """ Calculates the analysis values out of the predicted_clusters. :param predicted_clusters: The predicted Clusters of the Network. :param true_clusters: The validation clusters :return: the results of all metrics as a 2D array where i is the index of the metric and j is the index of a specific result """ logger = get_logger('analysis', logging.INFO) logger.info('Calculate scores') # Initialize output metric_results = [None] * len(metric_names) for m, min_value in enumerate(metric_worst_values): if min_value == 1: metric_results[m] = np.ones(len(true_cluster)) else: metric_results[m] = np.zeros((len(true_cluster))) # Loop over all possible clustering for i, predicted_cluster in enumerate( tqdm(predicted_clusters, ncols=100, desc='Calculate scores for predicted clusters...')): # Calculate different analysis's metric_results[0][i] = misclassification_rate(true_cluster, predicted_cluster) metric_results[1][i] = average_cluster_purity(true_cluster, predicted_cluster) metric_results[2][i] = adjusted_rand_index(true_cluster, predicted_cluster) metric_results[3][i] = diarization_error_rate(true_cluster, predicted_cluster, times) return metric_results
def test_equal_clusters(self): y_true = [0, 1, 1, 0] y_pred = [1, 0, 0, 1] self.assertAlmostEqual(average_cluster_purity(y_true, y_pred), 1.0)