def sparse_current_graph_map_jit( heap, rows, n_neighbors, inds, indptr, data, rng_state, seed_per_row, sparse_dist, ): rng_state_local = rng_state.copy() for i in rows: if seed_per_row: seed(rng_state_local, i) if heap[0, i, 0] < 0.0: for j in range(n_neighbors - np.sum(heap[0, i] >= 0.0)): idx = np.abs(tau_rand_int(rng_state_local)) % data.shape[0] from_inds = inds[indptr[i]:indptr[i + 1]] from_data = data[indptr[i]:indptr[i + 1]] to_inds = inds[indptr[idx]:indptr[idx + 1]] to_data = data[indptr[idx]:indptr[idx + 1]] d = sparse_dist(from_inds, from_data, to_inds, to_data) heap_push(heap, i, d, idx, 1) return True
def candidates_map_jit(rows, n_neighbors, current_graph, heap_updates, offset, rng_state, seed_per_row): rng_state_local = rng_state.copy() count = 0 for i in rows: if seed_per_row: seed(rng_state_local, i) for j in range(n_neighbors): if current_graph[0, i - offset, j] < 0: continue idx = current_graph[0, i - offset, j] isn = current_graph[2, i - offset, j] d = tau_rand(rng_state_local) # if tau_rand(rng_state_local) < rho: # updates are common to old and new - decided by 'isn' flag hu = heap_updates[count] hu[0] = i hu[1] = d hu[2] = idx hu[3] = isn count += 1 hu = heap_updates[count] hu[0] = idx hu[1] = d hu[2] = i hu[3] = isn count += 1 return count
def current_graph_map_jit( rows, n_vertices, n_neighbors, data, heap_updates, rng_state, seed_per_row, dist, dist_args, ): rng_state_local = rng_state.copy() count = 0 for i in rows: if seed_per_row: seed(rng_state_local, i) indices = rejection_sample(n_neighbors, n_vertices, rng_state_local) for j in range(indices.shape[0]): d = dist(data[i], data[indices[j]], *dist_args) hu = heap_updates[count] hu[0] = i hu[1] = d hu[2] = indices[j] hu[3] = 1 count += 1 hu = heap_updates[count] hu[0] = indices[j] hu[1] = d hu[2] = i hu[3] = 1 count += 1 return count
def current_graph_map_jit(heap, rows, n_neighbors, data, rng_state, seed_per_row, dist, dist_args): rng_state_local = rng_state.copy() for i in rows: if seed_per_row: seed(rng_state_local, i) if heap[0, i, 0] < 0.0: for j in range(n_neighbors - np.sum(heap[0, i] >= 0.0)): idx = np.abs(tau_rand_int(rng_state_local)) % data.shape[0] d = dist(data[i], data[idx], *dist_args) heap_push(heap, i, d, idx, 1) return True
def init_current_graph( data, dist, dist_args, n_neighbors, rng_state, seed_per_row=False ): current_graph = make_heap(data.shape[0], n_neighbors) for i in range(data.shape[0]): if seed_per_row: seed(rng_state, i) indices = rejection_sample(n_neighbors, data.shape[0], rng_state) for j in range(indices.shape[0]): d = dist(data[i], data[indices[j]], *dist_args) heap_push(current_graph, i, d, indices[j], 1) heap_push(current_graph, indices[j], d, i, 1) return current_graph
def sparse_current_graph_map_jit( rows, n_vertices, n_neighbors, inds, indptr, data, heap_updates, rng_state, seed_per_row, sparse_dist, dist_args, ): rng_state_local = rng_state.copy() count = 0 for i in rows: if seed_per_row: seed(rng_state_local, i) indices = rejection_sample(n_neighbors, n_vertices, rng_state_local) for j in range(indices.shape[0]): from_inds = inds[indptr[i]:indptr[i + 1]] from_data = data[indptr[i]:indptr[i + 1]] to_inds = inds[indptr[indices[j]]:indptr[indices[j] + 1]] to_data = data[indptr[indices[j]]:indptr[indices[j] + 1]] d = sparse_dist(from_inds, from_data, to_inds, to_data, *dist_args) hu = heap_updates[count] hu[0] = i hu[1] = d hu[2] = indices[j] hu[3] = 1 count += 1 hu = heap_updates[count] hu[0] = indices[j] hu[1] = d hu[2] = i hu[3] = 1 count += 1 return count
def nn_descent( data, n_neighbors, rng_state, max_candidates=50, dist=dist.euclidean, dist_args=(), n_iters=10, delta=0.001, rho=0.5, rp_tree_init=True, leaf_array=None, verbose=False, seed_per_row=False, ): n_vertices = data.shape[0] tried = set([(-1, -1)]) current_graph = make_heap(data.shape[0], n_neighbors) for i in range(data.shape[0]): if seed_per_row: seed(rng_state, i) indices = rejection_sample(n_neighbors, data.shape[0], rng_state) for j in range(indices.shape[0]): d = dist(data[i], data[indices[j]], *dist_args) heap_push(current_graph, i, d, indices[j], 1) heap_push(current_graph, indices[j], d, i, 1) tried.add((i, indices[j])) tried.add((indices[j], i)) if rp_tree_init: init_rp_tree(data, dist, dist_args, current_graph, leaf_array, tried=tried) for n in range(n_iters): if verbose: print("\t", n, " / ", n_iters) (new_candidate_neighbors, old_candidate_neighbors) = new_build_candidates( current_graph, n_vertices, n_neighbors, max_candidates, rng_state, rho, seed_per_row, ) c = 0 for i in range(n_vertices): for j in range(max_candidates): p = int(new_candidate_neighbors[0, i, j]) if p < 0: continue for k in range(j, max_candidates): q = int(new_candidate_neighbors[0, i, k]) if q < 0 or (p, q) in tried: continue d = dist(data[p], data[q], *dist_args) c += unchecked_heap_push(current_graph, p, d, q, 1) tried.add((p, q)) if p != q: c += unchecked_heap_push(current_graph, q, d, p, 1) tried.add((q, p)) for k in range(max_candidates): q = int(old_candidate_neighbors[0, i, k]) if q < 0 or (p, q) in tried: continue d = dist(data[p], data[q], *dist_args) c += unchecked_heap_push(current_graph, p, d, q, 1) tried.add((p, q)) if p != q: c += unchecked_heap_push(current_graph, q, d, p, 1) tried.add((q, p)) if c <= delta * n_neighbors * data.shape[0]: break return deheap_sort(current_graph)
def nn_descent( data, n_neighbors, rng_state, max_candidates=50, dist=dist.euclidean, dist_args=(), n_iters=10, delta=0.001, rho=0.5, rp_tree_init=True, leaf_array=None, low_memory=False, verbose=False, seed_per_row=False, ): tried = set([(-1, -1)]) current_graph = make_heap(data.shape[0], n_neighbors) for i in range(data.shape[0]): if seed_per_row: seed(rng_state, i) indices = rejection_sample(n_neighbors, data.shape[0], rng_state) for j in range(indices.shape[0]): d = dist(data[i], data[indices[j]], *dist_args) heap_push(current_graph, i, d, indices[j], 1) heap_push(current_graph, indices[j], d, i, 1) tried.add((i, indices[j])) tried.add((indices[j], i)) if rp_tree_init: init_rp_tree(data, dist, dist_args, current_graph, leaf_array, tried=tried) if low_memory: nn_descent_internal_low_memory( current_graph, data, n_neighbors, rng_state, max_candidates=max_candidates, dist=dist, dist_args=dist_args, n_iters=n_iters, delta=delta, rho=rho, verbose=verbose, seed_per_row=seed_per_row, ) else: nn_descent_internal_high_memory( current_graph, data, n_neighbors, rng_state, tried, max_candidates=max_candidates, dist=dist, dist_args=dist_args, n_iters=n_iters, delta=delta, rho=rho, verbose=verbose, seed_per_row=seed_per_row, ) return deheap_sort(current_graph)