def test_compute_distance_matrix(self): np.random.seed(seed=self.random_seed) x_train = np.random.rand(self.train_samples, self.dim) x_test = np.random.rand(self.test_samples, self.dim) d = utils.compute_distance_matrix(x_train, x_test) self.assertEqual(d.shape, (self.test_samples, self.train_samples)) for i in range(self.test_samples): for j in range(self.train_samples): d_ij = np.linalg.norm(x_train[j, :] - x_test[i, :])**2 self.assertAlmostEqual(d_ij, d[i, j], places=5)
def test_compute_distance_matrix_cosine(self): if not tf.executing_eagerly(): self.skipTest("Test requires eager mode.") np.random.seed(seed=self.random_seed) x_train = np.random.rand(self.train_samples, self.dim) x_test = np.random.rand(self.test_samples, self.dim) d = utils.compute_distance_matrix(x_train, x_test, measure="cosine") self.assertEqual(d.shape, (self.test_samples, self.train_samples)) for i in range(self.test_samples): for j in range(self.train_samples): d_ij = spdist.cosine(x_test[i, :], x_train[j, :]) self.assertAlmostEqual(d_ij, d[i, j], places=5)
def knn_errorrate(self, k): x_train = np.random.rand(self.train_samples, self.dim) x_test = np.random.rand(self.test_samples, self.dim) d = utils.compute_distance_matrix(x_train, x_test) y_test = np.random.randint(self.classes, size=self.test_samples) y_train = np.random.randint(self.classes, size=self.train_samples) err = utils.knn_errorrate(d, y_train, y_test, k=k) knn = KNeighborsClassifier(n_neighbors=k) knn.fit(x_train, y_train) y_pred = knn.predict(x_test) acc = metrics.accuracy_score(y_test, y_pred) self.assertAlmostEqual(1.0 - err, acc, places=5)
def test_knn_errorrate_multik(self): if not tf.executing_eagerly(): self.skipTest("Test requires eager mode.") np.random.seed(seed=self.random_seed) x_train = np.random.rand(self.train_samples, self.dim) x_test = np.random.rand(self.test_samples, self.dim) d = utils.compute_distance_matrix(x_train, x_test) y_test = np.random.randint(self.classes, size=self.test_samples) y_train = np.random.randint(self.classes, size=self.train_samples) ks_input = [5, 1, 5, 3] ks = [5, 3, 1] vals = [] for val in ks: err = utils.knn_errorrate(d, y_train, y_test, k=val) vals.append(err) comp = utils.knn_errorrate(d, y_train, y_test, k=ks_input) self.assertEqual(len(vals), len(comp)) for k, v in enumerate(comp): self.assertAlmostEqual(v, vals[k], places=5)