def fit(x_train): # Setup the K-NN estimator: x_train = tensor(x_train) Ntrain, D = x_train.shape start = timer() # The "training" time here should be negligible: x_train_norm = (x_train ** 2).sum(-1) elapsed = timer() - start def f(x_test): x_test = tensor(x_test) # Estimate the largest reasonable batch size: Ntest = x_test.shape[0] av_mem = int(5e8) # 500 Mb of GPU memory per batch # Remember that a vector of D float32 number takes up 4*D bytes: Ntest_loop = min(max(1, av_mem // (4 * D * Ntrain)), Ntest) Nloop = (Ntest - 1) // Ntest_loop + 1 out = int_tensor(Ntest, K) start = timer() # Actual K-NN query: for k in range(Nloop): x_test_k = x_test[Ntest_loop * k : Ntest_loop * (k + 1), :] out[Ntest_loop * k : Ntest_loop * (k + 1), :] = KNN_torch_fun( x_train, x_train_norm, x_test_k, K, metric ) # torch.cuda.empty_cache() elapsed = timer() - start indices = out.cpu().numpy() return indices, elapsed return f, elapsed
def f(x_test): x_test = tensor(x_test) start = timer() indices = KNN.kneighbors(x_test) elapsed = timer() - start indices = indices.cpu().numpy() return indices, elapsed
def f(x_test): x_test = tensor(x_test) start = timer() # Actual K-NN query: out = KNN_torch_fun(x_train, x_train_norm, x_test, K, metric) elapsed = timer() - start indices = out.cpu().numpy() return indices, elapsed
def f(x_test): x_test = tensor(x_test) start = timer() # Actual K-NN query: indices = KNN_fun(x_test, x_train) elapsed = timer() - start indices = indices.cpu().numpy() return indices, elapsed
def ground_truth(x_train, x_test, K, metric): # Setup the K-NN estimator: x_train = tensor(x_train) x_test = tensor(x_test) # Encoding as KeOps LazyTensors: X_i = LazyTensor(x_test[:, None, :]) X_j = LazyTensor(x_train[None, :, :]) # Symbolic distance matrix: if metric == "euclidean": D_ij = ((X_i - X_j) ** 2).sum(-1) elif metric == "manhattan": D_ij = (X_i - X_j).abs().sum(-1) elif metric == "angular": D_ij = -(X_i | X_j) elif metric == "hyperbolic": D_ij = ((X_i - X_j) ** 2).sum(-1) / (X_i[0] * X_j[0]) # K-NN query: indices = D_ij.argKmin(K, dim=1) return indices.cpu().numpy()
def fit(x_train): x_train = tensor(x_train) start = timer() KNN.fit(x_train, clusters=clusters, a=a) elapsed = timer() - start def f(x_test): x_test = tensor(x_test) start = timer() indices = KNN.kneighbors(x_test) elapsed = timer() - start indices = indices.cpu().numpy() return indices, elapsed return f, elapsed
def fit(x_train): # Setup the K-NN estimator: x_train = tensor(x_train) start = timer() # Encoding as KeOps LazyTensors: D = x_train.shape[1] X_i = Vi(0, D) # Purely symbolic "i" variable, without any data array X_j = Vj(1, D) # Purely symbolic "j" variable, without any data array # Symbolic distance matrix: if metric == "euclidean": D_ij = ((X_i - X_j)**2).sum(-1) elif metric == "manhattan": D_ij = (X_i - X_j).abs().sum(-1) elif metric == "angular": D_ij = -(X_i | X_j) elif metric == "hyperbolic": D_ij = ((X_i - X_j)**2).sum(-1) / (X_i[0] * X_j[0]) else: raise NotImplementedError( f"The '{metric}' distance is not supported.") # K-NN query operator: KNN_fun = D_ij.argKmin(K, dim=1) # N.B.: The "training" time here should be negligible. elapsed = timer() - start def f(x_test): x_test = tensor(x_test) start = timer() # Actual K-NN query: indices = KNN_fun(x_test, x_train) elapsed = timer() - start indices = indices.cpu().numpy() return indices, elapsed return f, elapsed
def fit(x_train): # Setup the K-NN estimator: x_train = tensor(x_train) start = timer() # The "training" time here should be negligible: x_train_norm = (x_train ** 2).sum(-1) elapsed = timer() - start def f(x_test): x_test = tensor(x_test) start = timer() # Actual K-NN query: out = KNN_torch_fun(x_train, x_train_norm, x_test, K, metric) elapsed = timer() - start indices = out.cpu().numpy() return indices, elapsed return f, elapsed