Unit tests for the analysis.clustering package.
'''
import datetime, unittest 
from database.warehouse import WarehouseServer
from analysis.clustering.kmeans import OrangeKmeansClusterer
from tests.test_document import get_orange_clustering_test_data

###########################################
# GLOBALS                                #
###########################################
ws = WarehouseServer()
sample_docs = get_orange_clustering_test_data()

oc = OrangeKmeansClusterer(k=2)
for s in sample_docs:
    oc.add_document(s)

class TestOrangeClustering(unittest.TestCase):
    
    ###########################################
    # ORANGE TESTS                            #
    ###########################################       
    def test_orange_sample_doc_kmeans(self):
        km = oc.run("orange_clustering_test")
        expected = [0, 0, 0, 1, 1, 1]
        self.assertEqual(expected, km.clusters)

    def test_orange_with_tweets_kmeans(self):
        import time
        start = time.time()            
        from_date = datetime.datetime(2011, 1, 26, 0, 0, 0)
Exemplo n.º 2
0
'''
Created on 26 Jan 2012

@author: george
'''
import unittest, numpy
from analysis.clustering.kmeans import OrangeKmeansClusterer
from tests.test_document import get_test_documents
###########################################
# GLOBALS                                #
###########################################
ignore, ignore, samples = get_test_documents()

oc = OrangeKmeansClusterer(k=2)
for sample in samples:
    oc.add_document(sample)


class Test(unittest.TestCase):
    def test_orange_cluster_term_document_matrix(self):
        oc.construct_term_doc_matrix()
        calculated = oc.td_matrix
        expected = numpy.array(
            [[0.31388923, 0.11584717, 0, 0, 0, 0, 0.47083384],
             [0, 0.13515504, 0.3662041, 0, 0.3662041, 0, 0],
             [0, 0, 0, 0.54930614, 0, 0.549306140, 0]])

        self.assertEqual(expected.all(), calculated.all())

    def test_orange_save_matrix_to_tab_file(self):
        oc.construct_term_doc_matrix()
Exemplo n.º 3
0
'''
Created on 26 Jan 2012

@author: george
'''
import unittest, numpy
from analysis.clustering.kmeans import OrangeKmeansClusterer
from tests.test_document import get_test_documents
###########################################
# GLOBALS                                #
###########################################
ignore, ignore, samples  =  get_test_documents()

oc = OrangeKmeansClusterer(k=2)        
for sample in samples:
    oc.add_document(sample)

class Test(unittest.TestCase):

    def test_orange_cluster_term_document_matrix(self):
        oc.construct_term_doc_matrix()
        calculated = oc.td_matrix
        expected = numpy.array([[ 0.31388923,  0.11584717,  0,           0,           0,           0,           0.47083384], 
                                [ 0,           0.13515504,  0.3662041,   0,           0.3662041,   0,           0         ],      
                                [ 0,           0,           0,           0.54930614,  0,           0.549306140, 0        ]])

        self.assertEqual(expected.all(), calculated.all())
        
    def test_orange_save_matrix_to_tab_file(self):
        oc.construct_term_doc_matrix()
        oc.save_table("sample_table_orange")
Unit tests for the analysis.clustering package.
'''
import datetime, unittest
from database.warehouse import WarehouseServer
from analysis.clustering.kmeans import OrangeKmeansClusterer
from tests.test_document import get_orange_clustering_test_data

###########################################
# GLOBALS                                #
###########################################
ws = WarehouseServer()
sample_docs = get_orange_clustering_test_data()

oc = OrangeKmeansClusterer(k=2)
for s in sample_docs:
    oc.add_document(s)


class TestOrangeClustering(unittest.TestCase):

    ###########################################
    # ORANGE TESTS                            #
    ###########################################
    def test_orange_sample_doc_kmeans(self):
        km = oc.run("orange_clustering_test")
        expected = [0, 0, 0, 1, 1, 1]
        self.assertEqual(expected, km.clusters)

    def test_orange_with_tweets_kmeans(self):
        import time
        start = time.time()