def test5_filter_finite(self): x_y_pairs = [(0, 0), (1, 1), (math.inf, 9), (9, math.inf), (-math.inf, 2), (2, -math.inf), (3.2, -20), (float("nan"), 0), (0, float("nan")), (7, 7), (float("inf"), 100), (100, float("inf")), (4, 0), (float("-inf"), 1), (1, float("inf")), (4, 0), (0.0, 0.1), (-3.9, 27)] computed = hac(x_y_pairs) # hac should return an numpy array or matrix of the right shape self.assertTrue( isinstance(computed, np.ndarray) or isinstance(computed, np.matrix)) self.assertEqual(np.shape(computed), (7, 4)) computed = np.array(computed) # The third column should be increasing for i in range(6): self.assertGreaterEqual(computed[i + 1, 2], computed[i, 2]) # Verify hac operates exactly as linkage does expected = linkage([(0, 0), (1, 1), (3.2, -20), (7, 7), (4, 0), (4, 0), (0.0, 0.1), (-3.9, 27)]) self.assertTrue(np.all(np.isclose(computed, expected)))
def test_tiebreak(self): x_y_pairs = get_x_y_pairs(tiebreak_csv_file) computed = hac(x_y_pairs) expected_cluster_sizes \ = [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 8, 8, 12, 20] for i in range(np.shape(computed)[0]): row = np.array(computed[i, :]).flatten() self.assertEqual(row[0], 2 * i) self.assertEqual(row[1], 2 * i + 1) self.assertEqual(row[2], 0) self.assertEqual(row[3], expected_cluster_sizes[i])
def test5_tiebreak(self): x_y_pairs = get_x_y_pairs(tiebreak_csv_file) computed = hac(x_y_pairs) expected_cluster_sizes \ = [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 8, 8, 12, 20] # chose lowest cluster index for the first position # if still tied, chose lowest cluster index for the second position for i in range(np.shape(computed)[0]): row = np.array(computed[i, :]).flatten() self.assertEqual(row[0], 2 * i) self.assertEqual(row[1], 2 * i + 1) self.assertEqual(row[2], 0) self.assertEqual(row[3], expected_cluster_sizes[i])
def test_randomized(self): x_y_pairs = get_x_y_pairs(random_csv_file) computed = hac(x_y_pairs) # hac should return an numpy array of the right shape self.assertIsInstance(computed, np.ndarray) self.assertEqual(np.shape(computed), (19, 4)) # The third column should be increasing for i in range(18): self.assertGreaterEqual(computed[i + 1, 2], computed[i, 2]) # Verify hac operates exactly as linkage does expected = scipy.cluster.hierarchy.linkage(x_y_pairs) self.assertTrue(np.all(np.isclose(computed, expected)))
def test7_more_than_20(self): n_points = 80 x_y_pairs = [(x**2, x**2) for x in range(0, n_points)] computed = hac(x_y_pairs) computed = np.array(computed) # The third column should be increasing for i in range(n_points - 2): self.assertGreaterEqual(computed[i + 1, 2], computed[i, 2]) # first row self.assertTrue(np.allclose(computed[0], [0, 1, 2**.5, 2])) for i, row in enumerate(computed[1:, :]): self.assertEqual(row[0], i + 2) self.assertEqual(row[1], i + n_points) self.assertTrue( np.isclose(row[2], ((i + i + 2)**2 + (i + i + 4)**2 - 2)**0.5)) self.assertEqual(row[3], i + 3)
def test3_pokemon_csv(self): x_y_pairs = get_x_y_pairs(pokemon_csv_file) computed = hac(x_y_pairs) # hac should return an numpy array of the right shape self.assertIsInstance(computed, np.ndarray) self.assertEqual(np.shape(computed), (19, 4)) # The third column should be increasing for i in range(18): self.assertGreaterEqual(computed[i + 1, 2], computed[i, 2]) # Verify hac operates exactly as linkage does - giving leeway for tiebreaker expected = linkage(x_y_pairs) self.assertTrue( np.allclose(computed[computed[:, 0].argsort()], expected[expected[:, 0].argsort()])) self.assertTrue( np.allclose(computed[computed[:, 1].argsort()], expected[expected[:, 1].argsort()]))