def setUp(self): self.algorithm = CoverTreeAlgorithm(euclidian_distance) # Number of neighbours self.neighbours_count = 2
class TestCoverTreeAlgorithm(unittest.TestCase): def setUp(self): self.algorithm = CoverTreeAlgorithm(euclidian_distance) # Number of neighbours self.neighbours_count = 2 def build_object_description(self, number): features = (TestFeature(number),) return ObjectDescription(features) def generate_numbers_and_queries(self, numbers_cnt, queries_cnt): # Generate 100 random numbers and create objects numbers = [random.random() * 100000 for idx in xrange(0, numbers_cnt)] self.objects = [] for number in numbers: self.objects.append(self.build_object_description(number)) # Generate 100 queries queries = [random.random() * 100000 for idx in xrange(0, queries_cnt)] self.query_objects = [] for query in queries: self.query_objects.append(self.build_object_description(query)) def check_result(self, query, neighbours): """ Do the naive query and check the distance to the furthest neighbour is the same """ expected_distance = euclidian_distance(query, neighbours[-1]) distances = [euclidian_distance(query, obj) for obj in self.objects] distances = sorted(distances) real_distance = distances[self.neighbours_count] self.assertTrue(expected_distance, real_distance) def test_covertree(self): """ Tests the correctitude of the algorithm. We are generating a small number of queries and numbers to be able to do a cross check with a naive search """ self.generate_numbers_and_queries(100, 100) # Inserting objects in covertree start = time.time() for obj in self.objects: self.algorithm.insert(obj) # Running queries start = time.time() results = [] for query in self.query_objects: query_result = self.algorithm.query(query, self.neighbours_count) results.append((query, query_result)) # Checking results for query, neighbours in results: self.check_result(query, neighbours) def test_covertree_performance(self): """ Test the performance of the covertree algorithm. We are going to generate a high number of queries and numbers here but we are not going to check the results anymore. """ self.generate_numbers_and_queries(50000, 50000) # Inserting objects in covertree start = time.time() for obj in self.objects: self.algorithm.insert(obj) print "Inserted %d objects in %f seconds" % (len(self.objects), time.time() - start) # Running queries start = time.time() results = [] for query in self.query_objects: query_result = self.algorithm.query(query, self.neighbours_count) results.append((query, query_result)) print "Ran %d queries in %f seconds" % (len(self.query_objects), time.time() - start)