def sparse_init_from_random( n_neighbors, inds, indptr, data, query_inds, query_indptr, query_data, heap, rng_state, sparse_dist, ): for i in range(query_indptr.shape[0] - 1): indices = rejection_sample(n_neighbors, indptr.shape[0] - 1, rng_state) to_inds = query_inds[query_indptr[i]:query_indptr[i + 1]] to_data = query_data[query_indptr[i]:query_indptr[i + 1]] for j in range(indices.shape[0]): if indices[j] < 0: continue from_inds = inds[indptr[indices[j]]:indptr[indices[j] + 1]] from_data = data[indptr[indices[j]]:indptr[indices[j] + 1]] d = sparse_dist(from_inds, from_data, to_inds, to_data) heap_push(heap, i, d, indices[j], 1) return
def nn_descent( data, n_neighbors, rng_state, max_candidates=50, dist=dist.euclidean, n_iters=10, delta=0.001, rho=0.5, rp_tree_init=True, leaf_array=None, low_memory=False, verbose=False, ): tried = set([(-1, -1)]) current_graph = make_heap(data.shape[0], n_neighbors) for i in range(data.shape[0]): indices = rejection_sample(n_neighbors, data.shape[0], rng_state) for j in range(indices.shape[0]): d = dist(data[i], data[indices[j]]) heap_push(current_graph, i, d, indices[j], 1) heap_push(current_graph, indices[j], d, i, 1) tried.add((i, indices[j])) tried.add((indices[j], i)) if rp_tree_init: init_rp_tree(data, dist, current_graph, leaf_array, tried=tried) if low_memory: nn_descent_internal_low_memory( current_graph, data, n_neighbors, rng_state, max_candidates=max_candidates, dist=dist, n_iters=n_iters, delta=delta, rho=rho, verbose=verbose, ) else: nn_descent_internal_high_memory( current_graph, data, n_neighbors, rng_state, tried, max_candidates=max_candidates, dist=dist, n_iters=n_iters, delta=delta, rho=rho, verbose=verbose, ) return deheap_sort(current_graph)
def init_current_graph(data, dist, n_neighbors, rng_state): current_graph = make_heap(data.shape[0], n_neighbors) for i in range(data.shape[0]): indices = rejection_sample(n_neighbors, data.shape[0], rng_state) for j in range(indices.shape[0]): d = dist(data[i], data[indices[j]]) heap_push(current_graph, i, d, indices[j], 1) heap_push(current_graph, indices[j], d, i, 1) return current_graph
def init_from_random(n_neighbors, data, query_points, heap, rng_state, dist): for i in range(query_points.shape[0]): indices = rejection_sample(n_neighbors, data.shape[0], rng_state) for j in range(indices.shape[0]): if indices[j] < 0: continue d = dist(data[indices[j]], query_points[i]) heap_push(heap, i, d, indices[j], 1) return
def sparse_nn_descent( inds, indptr, data, n_vertices, n_neighbors, rng_state, max_candidates=50, sparse_dist=Jvis.sparse.sparse_euclidean, n_iters=10, delta=0.001, rho=0.5, low_memory=False, rp_tree_init=True, leaf_array=None, verbose=False, ): tried = set([(-1, -1)]) current_graph = make_heap(n_vertices, n_neighbors) for i in range(n_vertices): indices = rejection_sample(n_neighbors, n_vertices, rng_state) for j in range(indices.shape[0]): from_inds = inds[indptr[i]:indptr[i + 1]] from_data = data[indptr[i]:indptr[i + 1]] to_inds = inds[indptr[indices[j]]:indptr[indices[j] + 1]] to_data = data[indptr[indices[j]]:indptr[indices[j] + 1]] d = sparse_dist(from_inds, from_data, to_inds, to_data) heap_push(current_graph, i, d, indices[j], 1) heap_push(current_graph, indices[j], d, i, 1) tried.add((i, indices[j])) tried.add((indices[j], i)) if rp_tree_init: sparse_init_rp_tree( inds, indptr, data, sparse_dist, current_graph, leaf_array, tried=tried, ) if low_memory: sparse_nn_descent_internal_low_memory( current_graph, inds, indptr, data, n_vertices, n_neighbors, rng_state, max_candidates=max_candidates, sparse_dist=sparse_dist, n_iters=n_iters, delta=delta, rho=rho, verbose=verbose, ) else: sparse_nn_descent_internal_high_memory( current_graph, inds, indptr, data, n_vertices, n_neighbors, rng_state, tried, max_candidates=max_candidates, sparse_dist=sparse_dist, n_iters=n_iters, delta=delta, rho=rho, verbose=verbose, ) return deheap_sort(current_graph)