def test_sample_doc_clustering_with_online(self): oc = OnlineClusterer(N=2, window=3) samples = get_orange_clustering_test_data() for document in samples: index = oc.add_document(document) oc.cluster(document) expected = [0, 0, 0, 1, 1, 1] for cluster in oc.clusters: print cluster.document_dict
Created on 13 Nov 2011 @author: george Unit tests for the analysis.clustering package. ''' import datetime, unittest from database.warehouse import WarehouseServer from analysis.clustering.kmeans import OrangeKmeansClusterer from tests.test_document import get_orange_clustering_test_data ########################################### # GLOBALS # ########################################### ws = WarehouseServer() sample_docs = get_orange_clustering_test_data() oc = OrangeKmeansClusterer(k=2) for s in sample_docs: oc.add_document(s) class TestOrangeClustering(unittest.TestCase): ########################################### # ORANGE TESTS # ########################################### def test_orange_sample_doc_kmeans(self): km = oc.run("orange_clustering_test") expected = [0, 0, 0, 1, 1, 1] self.assertEqual(expected, km.clusters)
''' Created on 27 Nov 2011 @author: george ''' import unittest, numpy from analysis.summarization.summarization import CentroidSummarizer from tests.test_document import get_orange_clustering_test_data test_documents = get_orange_clustering_test_data() doc_dict = {} id = 0 for doc in test_documents: doc_dict[id] = doc.content id +=1 class TestSummarizationFunctions(unittest.TestCase): def test_fv_and_centroid_creation(self): cs = CentroidSummarizer(doc_dict) sorted_docs = cs.summarize() expected = [[ 0.0866434, 0., 0., 0., 0.0866434, 0., 0., 0.0866434, 0., 0., 0., 0., 0.0866434, 0., 0., 0., 0., 0., 0.13732654, 0.0866434, 0., 0.0866434, 0., 0., 0., 0., 0., 0., 0., 0.22396993, 0., 0., 0.], [ 0.0866434, 0., 0., 0., 0.0866434, 0., 0., 0.0866434, 0., 0., 0., 0., 0.0866434, 0., 0., 0., 0., 0.13732654, 0., 0.0866434, 0., 0.0866434, 0., 0., 0., 0., 0., 0., 0., 0., 0.22396993, 0., 0.], [ 0.06931472, 0.17917595, 0., 0.35835189, 0.06931472, 0., 0., 0.06931472, 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.06931472, 0., 0.06931472, 0.17917595, 0., 0.17917595, 0., 0., 0., 0., 0., 0., 0., 0.], [ 0., 0., 0., 0., 0., 0.19908439, 0., 0., 0., 0.12206803, 0.12206803, 0., 0.07701635, 0., 0., 0., 0.07701635, 0., 0.12206803, 0., 0., 0., 0., 0.19908439, 0., 0.19908439, 0., 0., 0., 0., 0., 0.12206803, 0.], [ 0., 0., 0.13782765, 0., 0., 0., 0., 0., 0.13782765, 0., 0., 0.13782765, 0., 0.13782765, 0.13782765, 0.13782765, 0.05331901, 0., 0., 0., 0.13782765, 0., 0., 0., 0., 0., 0.13782765, 0.13782765, 0.13782765, 0., 0., 0.08450864, 0.13782765], [ 0., 0., 0., 0., 0., 0., 0.35835189, 0., 0., 0.21972246, 0.21972246, 0., 0., 0., 0., 0., 0.13862944, 0.21972246, 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.] ] for i, doc in enumerate(cs.documents.values()): diff = numpy.sum(doc.fv - expected[i]) self.assertAlmostEqual(diff, 0)