Exemplo n.º 1
0
 def test_dis_sim_local(self):
     """Test whether hubness and k-NN accuracy improve for dexter"""
     h_orig = hubness(self.distance)[0]
     acc_orig = score(self.distance, self.target)[0][0, 0]
     dist_dsl = dis_sim_local(self.vectors, k=50)
     h_dsl = hubness(dist_dsl)[0]
     acc_dsl = score(dist_dsl, self.target)[0][0, 0]
     result = (h_orig / h_dsl > 10) & (acc_dsl - acc_orig > 0.03)
     return self.assertTrue(result)
Exemplo n.º 2
0
 def test_dis_sim_global(self):
     """Test whether hubness and k-NN accuracy improve for dexter"""
     h_orig = hubness(self.distance)[0]
     acc_orig = score(self.distance, self.target)[0][0, 0]
     dist_dsg = dis_sim_global(self.vectors)
     h_dsg = hubness(dist_dsg)[0]
     acc_dsg = score(dist_dsg, self.target)[0][0, 0]
     result = (h_orig / h_dsg > 2) & (acc_dsg - acc_orig > 0.07)
     return self.assertTrue(result)
Exemplo n.º 3
0
 def test_localized_centering(self):
     """Test whether hubness and k-NN accuracy improve for dexter"""
     h_orig = hubness(self.distance)[0]
     acc_orig = score(self.distance, self.target)[0][0, 0]
     sim_lcent = localized_centering(self.vectors, "cosine", 20, 1)
     h_lcent = hubness(sim_lcent, metric="similarity")[0]
     acc_lcent = score(sim_lcent, self.target, metric="similarity")[0][0, 0]
     result = (h_orig / h_lcent > 1.5) & (acc_lcent - acc_orig > 0.03)
     return self.assertTrue(result)
Exemplo n.º 4
0
 def test_ls_dist_equals_sim(self):
     """Test for equal RANKS using dist. vs. sim. (LS_dist != 1-LS_sim).
        Using hubness and k-NN accuracy as proxy."""
     self.setUpMod('rnd')
     ls_dist = local_scaling(self.dist, metric='distance')
     ls_sim = local_scaling(1 - self.dist, metric='similarity')
     h_dist, _, _ = hubness(ls_dist, metric='distance')
     h_sim, _, _ = hubness(ls_sim, metric='similarity')
     acc_dist, _, _ = score(ls_dist, self.label, metric='distance')
     acc_sim, _, _ = score(ls_sim, self.label, metric='similarity')
     dist_sim_equal_in_hubness_knn = np.allclose(h_dist, h_sim) and \
                                     np.allclose(acc_dist, acc_sim)
     return self.assertTrue(dist_sim_equal_in_hubness_knn)
Exemplo n.º 5
0
    def _calc_hubness(self, k:int=5):
        """Calculate hubness (skewness of `k`-occurence).

        Also calculate percentage of anti hubs (`k`-occurence == 0) and
        percentage of k-NN lists the largest hub occurs in.
        """
        S_k, _, N_k = hubness(D=self.secondary_distance,
                              metric=self.metric, k=k)
        self.hubness[k] = S_k
        self.anti_hubs[k] = 100 * (N_k == 0).sum() / self.n
        self.max_hub_k_occurence[k] = 100 * N_k.max() / self.n
        return self
  do = 'dexter'
  if do == 'random':
      print("RANDOM DATA:")
      print("------------")
      S = triu(rand(1000, 1000, 0.05, 'csr', np.float32, 43), 1)
      S += S.T
      D = 1. - S.toarray()
  elif do == 'dexter':
      print("DEXTER:")
      print("-------")
      D, c, v = load_dexter()
      acc_d, _, _ = score(D, c, [5], 'distance')
      S = csr_matrix(1 - D)
      acc_s, _, _ = score(S, c, [5], 'similarity')
 
  Sn_d, _, _ = hubness(D, 5, 'distance')
  Sn_s, _, _ = hubness(S, 5, 'similarity')
  print("Orig. dist. hubness:", Sn_d)
  print("Orig. sim.  hubness:", Sn_s)
  if do == 'dexter':
      print("Orig. dist. k-NN accuracy:", acc_d)
      print('Orig. sim.  k-NN accuracy:', acc_s)
      
  D_mp_emp_d = mutual_proximity_empiric(D)
  D_mp_emp_s = mutual_proximity_empiric(S, 'similarity')
  Sn_mp_emp_d, _, _ = hubness(D_mp_emp_d, 5)
  Sn_mp_emp_s, _, _ = hubness(D_mp_emp_s, 5, 'similarity')
  print("MP emp dist. hubness:", Sn_mp_emp_d)
  print("MP emp sim.  hubness:", Sn_mp_emp_s)
  if do == 'dexter':
      acc_mp_emp_d, _, _ = score(D_mp_emp_d, c, [5], 'distance')