def test_precision_recall_and_f_calculation(self): ede = ClusteringEvaluator() targets = [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1 , 1, 2, 2, 2, 2, 2, 2] predictions = [0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1 , 2, 2, 2, 2, 2, 0, 2] confusion_matrix = ede.create_confusion_matrix(targets, predictions, 3) rp_rates = ede.calculate_precision_recall(confusion_matrix) fs = ede.calculate_f_measure(rp_rates) self.assertEquals(sum(fs), [2.5])
def test_confusion_matrix_creation(self): ede = ClusteringEvaluator() targets = [1, 1, 1, 1, 1 , 1, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2] predictions = [1, 1, 1, 1, 1 , 1, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2] confusion_matrix = ede.create_confusion_matrix(targets, predictions, 3) expected = numpy.array([[ 6., 0., 0.],[ 0., 6., 0.],[ 0., 0., 6.]]) self.assertTrue(numpy.in1d((numpy.sum(confusion_matrix-expected, axis=1)), [0., 0., 0.]).all())
def test_bcubed_calculation(self): ede = ClusteringEvaluator() documents_labels_clusters = [(0, 0), (0, 1), (0, 0), (0, 0), (0, 1), (0, 0), (1, 1), (1, 1), (1, 2), (1, 1), (1, 1), (1, 1), (2, 1), (2, 0), (2, 2), (2, 0), (2, 2), (2, 2)] precision ,recall, f = ede.calculate_bcubed_measures(documents_labels_clusters) self.assertAlmostEqual(0.532407407407 - precision, 0, places=7) self.assertAlmostEqual(0.555555555556 - recall, 0, places=7) self.assertAlmostEqual(0.543735224586 - f, 0, places=7)
''' Created on 21 Mar 2012 @author: george ''' from database.warehouse import WarehouseServer from database.model.tweets import EvaluationTweet from analysis.clustering.kmeans import OrangeKmeansClusterer from evaluation.evaluators import ClusteringEvaluator ws = WarehouseServer() documents = ws.get_all_documents(type=EvaluationTweet) oc = OrangeKmeansClusterer(k=35, ngram=1) ebe = ClusteringEvaluator(documents) bcubed_precision, bcubed_recall, bcubed_f = ebe.evaluate(clusterer=oc) print bcubed_precision, bcubed_recall, bcubed_f