def initialized_nnd_search( data, indptr, indices, initialization, query_points, dist, dist_args ): for i in numba.prange(query_points.shape[0]): tried = set(initialization[0, i]) while True: # Find smallest flagged vertex vertex = smallest_flagged(initialization, i) if vertex == -1: break candidates = indices[indptr[vertex] : indptr[vertex + 1]] for j in range(candidates.shape[0]): if ( candidates[j] == vertex or candidates[j] == -1 or candidates[j] in tried ): continue d = dist(data[candidates[j]], query_points[i], *dist_args) unchecked_heap_push(initialization, i, d, candidates[j], 1) tried.add(candidates[j]) return initialization
def initialize_heaps(data, n_neighbors, leaf_array, dist=dist.euclidean, dist_args=()): graph_heap = make_heap(data.shape[0], 10) search_heap = make_heap(data.shape[0], n_neighbors * 2) tried = set([(-1, -1)]) for n in range(leaf_array.shape[0]): for i in range(leaf_array.shape[1]): if leaf_array[n, i] < 0: break for j in range(i + 1, leaf_array.shape[1]): if leaf_array[n, j] < 0: break if (leaf_array[n, i], leaf_array[n, j]) in tried: continue d = dist(data[leaf_array[n, i]], data[leaf_array[n, j]], *dist_args) unchecked_heap_push(graph_heap, leaf_array[n, i], d, leaf_array[n, j], 1) unchecked_heap_push(graph_heap, leaf_array[n, j], d, leaf_array[n, i], 1) unchecked_heap_push(search_heap, leaf_array[n, i], d, leaf_array[n, j], 1) unchecked_heap_push(search_heap, leaf_array[n, j], d, leaf_array[n, i], 1) tried.add((leaf_array[n, i], leaf_array[n, j])) tried.add((leaf_array[n, j], leaf_array[n, i])) return graph_heap, search_heap
def sparse_initialized_nnd_search( inds, indptr, data, search_indptr, search_inds, initialization, query_inds, query_indptr, query_data, sparse_dist, dist_args, ): for i in numba.prange(query_indptr.shape[0] - 1): tried = set(initialization[0, i]) to_inds = query_inds[query_indptr[i]:query_indptr[i + 1]] to_data = query_data[query_indptr[i]:query_indptr[i + 1]] while True: # Find smallest flagged vertex vertex = smallest_flagged(initialization, i) if vertex == -1: break candidates = search_inds[search_indptr[vertex]:search_indptr[vertex + 1]] for j in range(candidates.shape[0]): if (candidates[j] == vertex or candidates[j] == -1 or candidates[j] in tried): continue from_inds = inds[indptr[candidates[j]]:indptr[candidates[j] + 1]] from_data = data[indptr[candidates[j]]:indptr[candidates[j] + 1]] d = sparse_dist(from_inds, from_data, to_inds, to_data, *dist_args) unchecked_heap_push(initialization, i, d, candidates[j], 1) tried.add(candidates[j]) return initialization
def nn_descent( data, n_neighbors, rng_state, max_candidates=50, dist=dist.euclidean, dist_args=(), n_iters=10, delta=0.001, rho=0.5, rp_tree_init=True, leaf_array=None, verbose=False, seed_per_row=False, ): n_vertices = data.shape[0] tried = set([(-1, -1)]) current_graph = make_heap(data.shape[0], n_neighbors) for i in range(data.shape[0]): if seed_per_row: seed(rng_state, i) indices = rejection_sample(n_neighbors, data.shape[0], rng_state) for j in range(indices.shape[0]): d = dist(data[i], data[indices[j]], *dist_args) heap_push(current_graph, i, d, indices[j], 1) heap_push(current_graph, indices[j], d, i, 1) tried.add((i, indices[j])) tried.add((indices[j], i)) if rp_tree_init: init_rp_tree(data, dist, dist_args, current_graph, leaf_array, tried=tried) for n in range(n_iters): if verbose: print("\t", n, " / ", n_iters) (new_candidate_neighbors, old_candidate_neighbors) = new_build_candidates( current_graph, n_vertices, n_neighbors, max_candidates, rng_state, rho, seed_per_row, ) c = 0 for i in range(n_vertices): for j in range(max_candidates): p = int(new_candidate_neighbors[0, i, j]) if p < 0: continue for k in range(j, max_candidates): q = int(new_candidate_neighbors[0, i, k]) if q < 0 or (p, q) in tried: continue d = dist(data[p], data[q], *dist_args) c += unchecked_heap_push(current_graph, p, d, q, 1) tried.add((p, q)) if p != q: c += unchecked_heap_push(current_graph, q, d, p, 1) tried.add((q, p)) for k in range(max_candidates): q = int(old_candidate_neighbors[0, i, k]) if q < 0 or (p, q) in tried: continue d = dist(data[p], data[q], *dist_args) c += unchecked_heap_push(current_graph, p, d, q, 1) tried.add((p, q)) if p != q: c += unchecked_heap_push(current_graph, q, d, p, 1) tried.add((q, p)) if c <= delta * n_neighbors * data.shape[0]: break return deheap_sort(current_graph)
def sparse_nn_descent_internal_high_memory( current_graph, inds, indptr, data, n_vertices, n_neighbors, rng_state, tried, max_candidates=50, sparse_dist=sparse_euclidean, dist_args=(), n_iters=10, delta=0.001, rho=0.5, verbose=False, ): for n in range(n_iters): if verbose: print("\t", n, " / ", n_iters) (new_candidate_neighbors, old_candidate_neighbors) = new_build_candidates( current_graph, n_vertices, n_neighbors, max_candidates, rng_state, rho, False, ) c = 0 for i in range(n_vertices): for j in range(max_candidates): p = int(new_candidate_neighbors[0, i, j]) if p < 0: continue for k in range(j, max_candidates): q = int(new_candidate_neighbors[0, i, k]) if q < 0 or (p, q) in tried: continue from_inds = inds[indptr[p]:indptr[p + 1]] from_data = data[indptr[p]:indptr[p + 1]] to_inds = inds[indptr[q]:indptr[q + 1]] to_data = data[indptr[q]:indptr[q + 1]] d = sparse_dist(from_inds, from_data, to_inds, to_data, *dist_args) c += unchecked_heap_push(current_graph, p, d, q, 1) tried.add((p, q)) if p != q: c += unchecked_heap_push(current_graph, q, d, p, 1) tried.add((q, p)) for k in range(max_candidates): q = int(old_candidate_neighbors[0, i, k]) if q < 0 or (p, q) in tried: continue from_inds = inds[indptr[p]:indptr[p + 1]] from_data = data[indptr[p]:indptr[p + 1]] to_inds = inds[indptr[q]:indptr[q + 1]] to_data = data[indptr[q]:indptr[q + 1]] d = sparse_dist(from_inds, from_data, to_inds, to_data, *dist_args) c += unchecked_heap_push(current_graph, p, d, q, 1) tried.add((p, q)) if p != q: c += unchecked_heap_push(current_graph, q, d, p, 1) tried.add((q, p)) if c <= delta * n_neighbors * n_vertices: return
def nn_descent_internal_high_memory( current_graph, data, n_neighbors, rng_state, tried, max_candidates=50, dist=dist.euclidean, dist_args=(), n_iters=10, delta=0.001, rho=0.5, verbose=False, seed_per_row=False, ): n_vertices = data.shape[0] for n in range(n_iters): if verbose: print("\t", n, " / ", n_iters) (new_candidate_neighbors, old_candidate_neighbors) = new_build_candidates( current_graph, n_vertices, n_neighbors, max_candidates, rng_state, rho, seed_per_row, ) c = 0 for i in range(n_vertices): for j in range(max_candidates): p = int(new_candidate_neighbors[0, i, j]) if p < 0: continue for k in range(j, max_candidates): q = int(new_candidate_neighbors[0, i, k]) if q < 0 or (p, q) in tried: continue d = dist(data[p], data[q], *dist_args) c += unchecked_heap_push(current_graph, p, d, q, 1) tried.add((p, q)) if p != q: c += unchecked_heap_push(current_graph, q, d, p, 1) tried.add((q, p)) for k in range(max_candidates): q = int(old_candidate_neighbors[0, i, k]) if q < 0 or (p, q) in tried: continue d = dist(data[p], data[q], *dist_args) c += unchecked_heap_push(current_graph, p, d, q, 1) tried.add((p, q)) if p != q: c += unchecked_heap_push(current_graph, q, d, p, 1) tried.add((q, p)) if c <= delta * n_neighbors * data.shape[0]: return
def sparse_nn_descent( inds, indptr, data, n_vertices, n_neighbors, rng_state, max_candidates=50, sparse_dist=sparse_euclidean, dist_args=(), n_iters=10, delta=0.001, rho=0.5, rp_tree_init=True, leaf_array=None, verbose=False, ): tried = set([(-1, -1)]) current_graph = make_heap(n_vertices, n_neighbors) for i in range(n_vertices): indices = rejection_sample(n_neighbors, n_vertices, rng_state) for j in range(indices.shape[0]): from_inds = inds[indptr[i]:indptr[i + 1]] from_data = data[indptr[i]:indptr[i + 1]] to_inds = inds[indptr[indices[j]]:indptr[indices[j] + 1]] to_data = data[indptr[indices[j]]:indptr[indices[j] + 1]] d = sparse_dist(from_inds, from_data, to_inds, to_data, *dist_args) heap_push(current_graph, i, d, indices[j], 1) heap_push(current_graph, indices[j], d, i, 1) tried.add((i, indices[j])) tried.add((indices[j], i)) if rp_tree_init: sparse_init_rp_tree( inds, indptr, data, sparse_dist, dist_args, current_graph, leaf_array, tried=tried, ) for n in range(n_iters): if verbose: print("\t", n, " / ", n_iters) (new_candidate_neighbors, old_candidate_neighbors) = new_build_candidates( current_graph, n_vertices, n_neighbors, max_candidates, rng_state, rho, False, ) c = 0 for i in range(n_vertices): for j in range(max_candidates): p = int(new_candidate_neighbors[0, i, j]) if p < 0: continue for k in range(j, max_candidates): q = int(new_candidate_neighbors[0, i, k]) if q < 0 or (p, q) in tried: continue from_inds = inds[indptr[p]:indptr[p + 1]] from_data = data[indptr[p]:indptr[p + 1]] to_inds = inds[indptr[q]:indptr[q + 1]] to_data = data[indptr[q]:indptr[q + 1]] d = sparse_dist(from_inds, from_data, to_inds, to_data, *dist_args) c += unchecked_heap_push(current_graph, p, d, q, 1) tried.add((p, q)) if p != q: c += unchecked_heap_push(current_graph, q, d, p, 1) tried.add((q, p)) for k in range(max_candidates): q = int(old_candidate_neighbors[0, i, k]) if q < 0 or (p, q) in tried: continue from_inds = inds[indptr[p]:indptr[p + 1]] from_data = data[indptr[p]:indptr[p + 1]] to_inds = inds[indptr[q]:indptr[q + 1]] to_data = data[indptr[q]:indptr[q + 1]] d = sparse_dist(from_inds, from_data, to_inds, to_data, *dist_args) c += unchecked_heap_push(current_graph, p, d, q, 1) tried.add((p, q)) if p != q: c += unchecked_heap_push(current_graph, q, d, p, 1) tried.add((q, p)) if c <= delta * n_neighbors * n_vertices: break return deheap_sort(current_graph)