Пример #1
0
 def test_dis_sim_global(self):
     """Test whether hubness and k-NN accuracy improve for dexter"""
     h_orig = hubness(self.distance)[0]
     acc_orig = score(self.distance, self.target)[0][0, 0]
     dist_dsg = dis_sim_global(self.vectors)
     h_dsg = hubness(dist_dsg)[0]
     acc_dsg = score(dist_dsg, self.target)[0][0, 0]
     result = (h_orig / h_dsg > 2) & (acc_dsg - acc_orig > 0.07)
     return self.assertTrue(result)
Пример #2
0
 def test_localized_centering(self):
     """Test whether hubness and k-NN accuracy improve for dexter"""
     h_orig = hubness(self.distance)[0]
     acc_orig = score(self.distance, self.target)[0][0, 0]
     sim_lcent = localized_centering(self.vectors, kappa=20, gamma=1.)
     h_lcent = hubness(sim_lcent, metric='similarity')[0]
     acc_lcent = score(sim_lcent, self.target, metric='similarity')[0][0, 0]
     result = (h_orig / h_lcent > 1.5) & (acc_lcent - acc_orig > 0.03)
     return self.assertTrue(result)
Пример #3
0
 def test_dis_sim_local(self):
     """Test whether hubness and k-NN accuracy improve for dexter"""
     #self.vectors = np.tile(self.vectors, 1)
     h_orig = hubness(self.distance)[0]
     acc_orig = score(self.distance, self.target)[0][0, 0]
     dist_dsl = dis_sim_local(self.vectors, k=50)
     h_dsl = hubness(dist_dsl)[0]
     acc_dsl = score(dist_dsl, self.target)[0][0, 0]
     result = (h_orig / h_dsl > 10) & (acc_dsl - acc_orig > 0.03)
     return self.assertTrue(result)
 def test_ls_dist_equals_sim(self):
     """Test for equal RANKS using dist. vs. sim. (LS_dist != 1-LS_sim).
        Using hubness and k-NN accuracy as proxy."""
     self.setUpMod('rnd')
     ls_dist = local_scaling(self.dist, metric='distance')
     ls_sim = local_scaling(1 - self.dist, metric='similarity')
     h_dist, _, _ = hubness(ls_dist, metric='distance')
     h_sim, _, _ = hubness(ls_sim, metric='similarity')
     acc_dist, _, _ = score(ls_dist, self.label, metric='distance')
     acc_sim, _, _ = score(ls_sim, self.label, metric='similarity')
     dist_sim_equal_in_hubness_knn = np.allclose(h_dist, h_sim) and \
                                     np.allclose(acc_dist, acc_sim)
     return self.assertTrue(dist_sim_equal_in_hubness_knn)
Пример #5
0
 def test_knn_score_equal_sklearn_loocv_score(self):
     acc, correct, cmat = \
         score(self.distance, self.label, k=5, metric='distance')
     # scoring only one k value, so take just the first elements:
     acc = acc[0, 0]
     correct = correct[0]
     cmat = cmat[0]
     knclassifier = KNeighborsClassifier(n_neighbors=5,
                                         algorithm='brute',
                                         metric='precomputed')
     n = self.distance.shape[0]  # for LOO-CV
     try:  # sklearn < 0.18
         loo_cv = LeaveOneOut(n)
     except TypeError:
         loo_cv = LeaveOneOut()
     predicted_sklearn = cross_val_predict(knclassifier,
                                           self.distance,
                                           self.label,
                                           cv=loo_cv)
     acc_sklearn = accuracy_score(self.label, predicted_sklearn)
     if not np.allclose(acc, acc_sklearn):
         return self.assertAlmostEqual(acc, acc_sklearn, places=7)
     else:
         correct_sklearn = predicted_sklearn == self.label
         equal_prediction = np.all(correct == correct_sklearn)
         msg = """Accuracies of hub toolbox k-NN and sklearn-kNN are almost 
                  equal, but the predictions per data point are not."""
         return self.assertTrue(equal_prediction, msg)
 def _calc_knn_accuracy(self, k: int = 5):
     """Calculate `k`-NN accuracy."""
     acc, _, _ = score(D=self.secondary_distance,
                       target=self.classes,
                       k=k,
                       metric=self.metric)
     self.knn_accuracy[k] = acc
     return self
Пример #7
0
 def test_knn_score_matches_correct_prediction_fraction(self):
     k = np.array([1, 5, 20])
     acc, correct, _ = score(self.distance, self.label, k=k)
     acc_match = np.zeros_like(k, dtype=bool)
     for i, _ in enumerate(k):
         cur_acc = acc[i]
         cur_correct = correct[i]
         acc_match[i] = np.allclose(cur_acc, cur_correct.sum() / self.n)
     return self.assertTrue(np.all(acc_match))
Пример #8
0
 def test_knn_score_matches_confusion_matrix(self):
     k = np.array([1, 5, 20])
     acc, _, cmat = score(self.distance, self.label, k=k)
     acc_match = np.zeros_like(k, dtype=bool)
     for i, _ in enumerate(k):
         cur_acc = acc[i]
         cur_cmat = cmat[i]
         TP = cur_cmat[0, 0]
         FN = cur_cmat[0, 1]
         FP = cur_cmat[1, 0]
         TN = cur_cmat[1, 1]
         acc_from_cmat = (TP + TN) / (TP + FN + FP + TN)
         acc_match[i] = np.allclose(cur_acc, acc_from_cmat)
     return self.assertTrue(np.all(acc_match))
Пример #9
0
 def test_sample_knn(self):
     """ Make sure that sample-kNN works correctly. """
     # TODO create a stricter test
     X = np.array([[1., 2.], [2., 2.], [2., 3.], [3., .5], [4., 1.5]])
     y = np.array([0, 1, 0, 1, 1])
     s = 2
     rnd = 1234
     D, sample_idx = sample_distance(X, y, s, random_state=rnd)
     expected_sample_idx = np.array([4, 2])
     expected_acc = 0.4
     if not np.setdiff1d(sample_idx, expected_sample_idx).size == \
            np.setdiff1d(expected_sample_idx, sample_idx).size == 0:
         return self.fail("Test implementation broken: wrong sample.")
     acc, _, _ = score(D=D,
                       target=y,
                       k=2,
                       metric='distance',
                       sample_idx=sample_idx)
     return self.assertEqual(expected_acc, acc[0, 0])
Пример #10
0
 def test_knn_sparse_equal_dense(self):
     sim_dense = 1 - self.distance
     sim_sparse = csr_matrix(sim_dense)
     acc_dense, _, _ = score(sim_dense, self.label, metric='similarity')
     acc_sparse, _, _ = score(sim_sparse, self.label, metric='similarity')
     return self.assertEqual(acc_dense, acc_sparse)
Пример #11
0
 def test_knn_sparse_does_not_error(self):
     """ Does not test correctness of result! """
     sim = random_sparse_matrix(100, 0.1)
     y = np.random.randint(0, 2, 100)
     acc, _, _ = score(sim, y, k=[1, 5, 10], metric='similarity')
     return self.assertTrue(np.alltrue(acc >= 0.))
        return D_shi
    else:
        # only return test-train-distances (there are no self distances here)
        return D_shi[test_ind]


if __name__ == '__main__':
    from hub_toolbox.hubness import hubness
    from hub_toolbox.knn_classification import score
    D, y, X = io.load_dexter()
    print("D", D.shape)
    print("y", y.shape)
    print("X", X.shape)
    D_shi = simhub(D, y=None)
    D_snn = shared_nearest_neighbors(D, k=50)
    h = hubness(D_shi, k=5)
    h_snn = hubness(D_snn, k=5)
    acc = score(D_shi, y, 5)
    acc_snn = score(D_snn, y, 5)

    D_sh = simhub(D=D, y=y)
    h_sh = hubness(D_sh, k=5)
    acc_sh = score(D_sh, y, 5)
    print("hubness SNN:", h_snn[0])
    print("hubness SHI:", h[0])
    print("hubness SH :", h_sh[0])

    print("kNN SNN:", acc_snn[0][0, 0])
    print("kNN SHI:", acc[0][0, 0])
    print("kNN SH :", acc_sh[0][0, 0])