Пример #1
0
 def test_sample_doc_clustering_with_online(self):
     oc = OnlineClusterer(N=2, window=3)
     samples = get_orange_clustering_test_data()
     for document in samples:
         index = oc.add_document(document)
         oc.cluster(document)
     expected = [0, 0, 0, 1, 1, 1]
     for cluster in oc.clusters:
         print cluster.document_dict
Пример #2
0
 def test_sample_doc_clustering_with_online(self):
     oc = OnlineClusterer(N=2, window=3)        
     samples = get_orange_clustering_test_data()
     for document in samples:
         index = oc.add_document(document)
         oc.cluster(document)
     expected = [0, 0, 0, 1, 1, 1]
     for cluster in oc.clusters:
         print cluster.document_dict
Пример #3
0
    def test_cluster_term_document_matrix(self):
        oc = OnlineClusterer(N=2, window=3)
        for document in samples:
            index = oc.add_document(document)
            oc.cluster(document)

        calculated = oc.td_matrix
        expected = numpy.array(
            [[0.31388923, 0.11584717, 0, 0, 0, 0, 0.47083384],
             [0, 0.13515504, 0.3662041, 0, 0.3662041, 0, 0],
             [0, 0, 0, 0.54930614, 0, 0.549306140, 0]])

        self.assertEqual(expected.all(), calculated.all())
Пример #4
0
    def test_cluster_term_document_matrix(self):
        oc = OnlineClusterer(N=2, window=3)        
        for document in samples:
            index = oc.add_document(document)
            oc.cluster(document)
 
            
        calculated = oc.td_matrix
        expected = numpy.array([[ 0.31388923,  0.11584717,  0,           0,           0,           0,           0.47083384], 
                                [ 0,           0.13515504,  0.3662041,   0,           0.3662041,   0,           0         ],      
                                [ 0,           0,           0,           0.54930614,  0,           0.549306140, 0        ]])
        
        self.assertEqual(expected.all(), calculated.all())