def test_orange_with_tweets_kmeans(self):
        import time
        start = time.time()
        from_date = datetime.datetime(2011, 1, 25, 0, 0, 0)
        to_date = datetime.datetime(2011, 1, 26, 0, 0, 0)
        items = ws.get_documents_by_date(from_date, to_date, limit=1000)

        oc = OrangeKmeansClusterer(k=20, ngram=1)
        oc.add_documents(items)
        oc.run("orange_clustering_test", pca=True)
        print time.time() - start
        oc.plot_growth_timeline(cumulative=True)
        oc.plot_scatter()
        oc.dump_clusters_to_file("kmeans_with_tweets_orange")
Exemplo n.º 2
0
'''
Created on 26 Jan 2012

@author: george
'''
import unittest, numpy
from analysis.clustering.kmeans import OrangeKmeansClusterer
from tests.test_document import get_test_documents
###########################################
# GLOBALS                                #
###########################################
ignore, ignore, samples = get_test_documents()

oc = OrangeKmeansClusterer(k=2)
for sample in samples:
    oc.add_document(sample)


class Test(unittest.TestCase):
    def test_orange_cluster_term_document_matrix(self):
        oc.construct_term_doc_matrix()
        calculated = oc.td_matrix
        expected = numpy.array(
            [[0.31388923, 0.11584717, 0, 0, 0, 0, 0.47083384],
             [0, 0.13515504, 0.3662041, 0, 0.3662041, 0, 0],
             [0, 0, 0, 0.54930614, 0, 0.549306140, 0]])

        self.assertEqual(expected.all(), calculated.all())

    def test_orange_save_matrix_to_tab_file(self):
        oc.construct_term_doc_matrix()