def test_levels(self): """Test multiple level indexes""" features = np.random.binomial(1, 0.01, size=(1000, 20000)) features = csr_matrix(features) # build the search index! data_to_return = range(1000) # matrix size smaller - this forces the index to have multiple levels cluster_index = cp.ClusterIndex(features, data_to_return, matrix_size=10) ret = cluster_index.search(features[0:10], k=1, k_clusters=1, return_distance=False) self.assertEqual([[x] for x in data_to_return[:10]], ret)
def test_euclidean(self): """Do a quick basic test for index/search functionality""" data = [ 'hello world', 'oh hello there', 'Play it', 'Play it again Sam', ] features = [dict([(x, 1) for x in f.split()]) for f in data] features = DictVectorizer().fit_transform(features) cluster_index = cp.ClusterIndex(features, data, SlowEuclideanDistance) ret = cluster_index.search(features, k=1, k_clusters=1, return_distance=False) self.assertEqual([[d] for d in data], ret)