def test_build_many_nodes_1(self): ''' Many nodes tests: The idea is that if we choose random datasets for each test, two random datasets might overlap such that the two tests are actually testing the same thing. In order to cover more corner cases, we start with a non-trivial dataset and change it slightly for each new test. ''' df = self.coords_to_df([(2, 7), (5, 4), (9, 6), (4, 3), (8, 1)]) result = build_kd_tree(df) expected = KdNode( coords = (5, 4), left_child = KdNode( coords = (2, 7), left_child = KdNode((4, 3), None, None), right_child = None ), right_child = KdNode( coords = (9, 6), left_child = KdNode((8, 1), None, None), right_child = None ) ) self.assertEqual(result, expected)
def test_build_many_nodes_4(self): ''' One more node than the previous test. ''' df = self.coords_to_df( [(2, 7), (5, 4), (9, 6), (4, 3), (8, 1), (7, 2), (10, 3), (11, 0)] ) result = build_kd_tree(df) expected = KdNode( coords = (8, 1), left_child = KdNode( coords = (5, 4), left_child = KdNode( coords = (7, 2), left_child = KdNode((4, 3), None, None), right_child = None ), right_child = KdNode((2, 7), None, None) ), right_child = KdNode( coords = (10, 3), left_child = KdNode((11, 0), None, None), right_child = KdNode((9, 6), None, None) ) ) self.assertEqual(result, expected)
def test_build_single_node(self): df = self.coords_to_df([(0, 0), ]) tree = build_kd_tree(df) expected = KdNode((0, 0), None, None) self.assertTrue(tree == expected)
def test_inspection_of_both_sides_of_splitting_plane(self): ''' While walking down the kdtree, find_closest chooses to inspect points in the left_child or right_child of a node based on whether the ref_location is on the "left" or "right" side of the splitting plane created by the node. If the ref_location is closer to the splitting plane than to the current closest, then the true closest neighbor might be on the other side of the splitting plane. If find_closest is doing *everything else* correctly *except* inspecting both sides of the splitting plane (when necessary), then find_closest will return "D" as the closest neighbor here, rather than "E". ''' neighbors_coords = [ ('A', 1.00, 1.00), ('B', 1.20, 2.00), ('C', 2.50, 3.50), ('D', 3.50, 2.80), ('E', 3.70, 0.80), ('F', 4.25, 1.80), ('G', 6.00, 5.60) ] neighbors_df = TestFindClosest.coords_to_df(neighbors_coords) ref_location = (3.36, 0.9) tree = build_kd_tree(neighbors_df) result = find_closest(ref_location, tree) self.assertEqual(result.name, "E") self.assertEqual(result.coordinates, (3.7, 0.8))
def test_1000_random_10D_points(self): total_runs = 5 num_dimensions = 10 for run_num in range(1, total_runs + 1): print( "Testing 1000 random 10D points, run" + "%d/%d" % (run_num, total_runs) ) def rand_val(): # Choose values between -1 and 1. r = random.randint(-1000, 1000) while r == 0: r = random.randint(-1000, 1000) return 1. / r # Make 10 random points, with 10 random names. neighbors_coords = [ (uuid.uuid4().hex,) + tuple((rand_val() for _ in range(num_dimensions))) for _ in range(1000) ] neighbors_df = TestFindClosest.coords_to_df(neighbors_coords) # Generate a random reference location. ref_location = tuple(rand_val() for _ in range(num_dimensions)) tree = build_kd_tree(neighbors_df) result = find_closest(ref_location, tree) expected_name, expected_coords = find_closest_with_brute_force( neighbors_df, ref_location ) self.assertEqual(result.name, expected_name) self.assertEqual(result.coordinates, expected_coords)
def test_build_three_nodes(self): df = self.coords_to_df([(0, 0), (1, 1), (2, 2)]) result = build_kd_tree(df) expected = KdNode( coords = (1, 1), left_child = KdNode((0, 0), None, None), right_child = KdNode((2, 2), None, None) ) self.assertEqual(result, expected)
def test_basic(self): neighbors_coords = [ ('A', -6, 3), ('B', 2, 2.2), ('C', 3, -6), ] neighbors_df = TestFindClosest.coords_to_df(neighbors_coords) ref_location = (-1, -2) tree = build_kd_tree(neighbors_df) result = find_closest(ref_location, tree) self.assertEqual(result.name, "B") self.assertEqual(result.coordinates, (2, 2.2))
def test_all_neighbors_equal(self): neighbors_coords = [ ('A', 1.1, 1.3), ('B', 1.1, 1.3), ] neighbors_df = TestFindClosest.coords_to_df(neighbors_coords) ref_location = (-1, -2) tree = build_kd_tree(neighbors_df) result = find_closest(ref_location, tree) # Not testing the name here. The algorithm does not # promise stable traversal of the tree. self.assertEqual(result.coordinates, (1.1, 1.3))
def test_build_empty_list(self): tree = build_kd_tree(None) self.assertIsNone(tree)