Пример #1
0
    def setUp(self):
        self.algorithm = CoverTreeAlgorithm(euclidian_distance)

        # Number of neighbours
        self.neighbours_count = 2
Пример #2
0
class TestCoverTreeAlgorithm(unittest.TestCase):

    def setUp(self):
        self.algorithm = CoverTreeAlgorithm(euclidian_distance)

        # Number of neighbours
        self.neighbours_count = 2

    def build_object_description(self, number):
        features = (TestFeature(number),)
        return ObjectDescription(features)

    def generate_numbers_and_queries(self, numbers_cnt, queries_cnt):
        # Generate 100 random numbers and create objects
        numbers = [random.random() * 100000 for idx in xrange(0, numbers_cnt)]
        self.objects = []
        for number in numbers:
            self.objects.append(self.build_object_description(number))

        # Generate 100 queries
        queries = [random.random() * 100000 for idx in xrange(0, queries_cnt)]       
        self.query_objects = []
        for query in queries:
            self.query_objects.append(self.build_object_description(query)) 

    def check_result(self, query, neighbours):
        """ Do the naive query and check the distance to the furthest neighbour
        is the same
        """
        expected_distance = euclidian_distance(query, neighbours[-1])
 
        distances = [euclidian_distance(query, obj) for obj in self.objects]
        distances = sorted(distances)
        real_distance = distances[self.neighbours_count]
        self.assertTrue(expected_distance, real_distance)

    def test_covertree(self):
        """ Tests the correctitude of the algorithm. We are generating a small
        number of queries and numbers to be able to do a cross check with a
        naive search 
        """
        self.generate_numbers_and_queries(100, 100)
        # Inserting objects in covertree        
        start = time.time()
        for obj in self.objects:
            self.algorithm.insert(obj)
       
        # Running queries
        start = time.time()
        results = []
        for query in self.query_objects:
            query_result = self.algorithm.query(query, self.neighbours_count)
            results.append((query, query_result))
 

        # Checking results
        for query, neighbours in results:
            self.check_result(query, neighbours)

    def test_covertree_performance(self):
        """ Test the performance of the covertree algorithm. We are going to
        generate a high number of queries and numbers here but we are not
        going to check the results anymore.
        """
        self.generate_numbers_and_queries(50000, 50000)
        # Inserting objects in covertree        
        start = time.time()
        for obj in self.objects:
            self.algorithm.insert(obj)
        print "Inserted %d objects in %f seconds" % (len(self.objects),
                                                     time.time() - start)
       
        # Running queries
        start = time.time()
        results = []
        for query in self.query_objects:
            query_result = self.algorithm.query(query, self.neighbours_count)
            results.append((query, query_result))
        print "Ran %d queries in %f seconds" % (len(self.query_objects),
                                                time.time() - start)