def test_run2(): ''' Parameters ---------- checks: number of nodes to check (?) ''' n = 100 k = 3 ops = 10 test_n = 1 x = random_vectors(n) test_points = random_vectors(test_n) index = Index(x, w=(0.5, 0.5)) for i in range(n // ops): ur = index.run(ops) print(ur) ids1, dists1 = index.knn_search_points(test_points, k, checks=1) # ids2, dists2 = index.knn_search_points(test_points, k, checks = 50) ids3, dists3 = index.knn_search_points(test_points, k, checks=100) print("1: ", ids1) print("1: ", dists1) # print("2: ", ids2) print("3: ", ids3) print("3: ", dists3) print(index.size())
def test_run(): """ RUN ---------- INCREMENTALLY ADD POINTS TO THE TREE STRUCTURE Parameters ---------- ops: number of ops Returns ------- numPointsInserted: total number of points inserted addPointOps: number of ops allocated to add updateIndexOps: number of ops allocated to update index addPointResult: number of added points updateIndexResult: ? addPointElapsed: time elapsed for add updateIndexElapsed: time elapsed for update index """ x = random_vectors(n=30, d=3) index = Index(x, w=(0.5, 0.5)) ops = 6 for i in range(x.shape[0] // ops): ur = index.run(ops) print("===========") print("index.size(): ", index.size()) # index.size grows as we run iteratively print(ur)
class KNNKernelDensity: SQRT2PI = np.sqrt(2 * np.pi) def __init__(self, X: np.ndarray[Any, Any], online: Optional[bool] = False): self.X = X self.index = Index(X) if not online: self.index.add_points(len(X)) def run(self, ops: Any) -> Any: return self.index.run(ops) def run_ids(self, ids: Iterable[int]) -> Any: return self.index.run_ids(ids) def score_samples( self, X: np.ndarray[Any, Any], k: int = 10, bandwidth: float = 0.2 ) -> float: _, dists = self.index.knn_search_points(X, k=k) scores = self._gaussian_score(dists, bandwidth) / k return scores def _gaussian_score(self, dists: float, bandwidth: float) -> float: logg = -0.5 * (dists / bandwidth) ** 2 g = np.exp(logg) / bandwidth / self.SQRT2PI return g.sum(axis=1) # type: ignore
class KNNKernelDensity(): SQRT2PI = np.sqrt(2 * np.pi) def __init__(self, X, online=False): self.X = X self.index = Index(X) if not online: # if offline self.index.add_points(len(X)) def run(self, ops): return self.index.run(ops) def run_ids(self, ids): return self.index.run_ids(ids) def score_samples(self, X, k=10, bandwidth=0.2): _, dists = self.index.knn_search_points(X, k=k) scores = self._gaussian_score(dists, bandwidth) / k return scores def _gaussian_score(self, dists, bandwidth): logg = -0.5 * (dists / bandwidth) ** 2 g = np.exp(logg) / bandwidth / self.SQRT2PI return g.sum(axis=1)
def test_incremental_run1(self): x = random_vectors() index = Index(x, w=(0.5, 0.5)) self.assertTrue(index.is_using_pyarray) ops = 20 for i in range(x.shape[0] // ops): ur = index.run(ops) self.assertEqual(index.size(), (i + 1) * ops) self.assertEqual(ur['addPointResult'], ops)
class KNNRegressor(): def __init__(self, X, y, n_neighbors=5, weights='uniform', online=False): self.X = X self.y = y self.index = Index(X) self.n_neighbors = n_neighbors self.weights = weights if not online: # if offline self.index.add_points(len(X)) def run(self, ops): return self.index.run(ops) def predict(self, X): indices, dists = self.index.knn_search_points(X, k=self.n_neighbors) weights = self._get_weights(dists) if self.weights == 'uniform': y_pred = np.mean(self.y[indices], axis=1) else: y_pred = np.empty((X.shape[0], self.y.shape[1])) denom = np.sum(weights, axis=1) for j in range(self.y.shape[1]): num = np.sum(self.y[indices, j] * weights, axis=1) y_pred[:, j] = num / denom if self.y.ndim == 1: y_pred = y_pred.ravel() return y_pred def _get_weights(self, dists): if self.weights == 'uniform': return None for i, dist in enumerate(dists): if 0. in dist: dists[i] = dist == 0. else: dists[i] = 1. / dist return dists
def test_updates_after_all_points_added(self): np.random.seed(10) n = 10000 w = (0.5, 0.5) x = random_vectors(n) ops = 1000 index = Index(x, w=w) self.assertTrue(index.is_using_pyarray) index.add_points(n) # add all points for i in range(1000): index.knn_search_points(random_vectors(100), 10) # accumulate losses for i in range(10): res = index.run(ops) self.assertEqual(res['addPointResult'], 0) self.assertEqual(res['updateIndexResult'], ops)
def test_incremental_run2(self): n = 1000 k = 20 ops = 100 test_n = 30 x = random_vectors(n) test_points = random_vectors(test_n) index = Index(x) self.assertTrue(index.is_using_pyarray) for i in range(n // ops): ur = index.run(ops) ids1, dists1 = index.knn_search_points(test_points, k, checks=100) ids2, dists2 = index.knn_search_points(test_points, k, checks=1000) """ The assertion below always holds since later search checks a larger number of nodes and the search process is deterministic """ self.assertEqual(np.sum(dists1 >= dists2), test_n * k)