Exemplo n.º 1
0
def search_init(
    query_inds,
    query_data,
    k,
    inds,
    indptr,
    data,
    forest,
    n_neighbors,
    tried,
    sparse_dist,
    rng_state,
):

    heap_priorities = np.float32(np.inf) + np.zeros(k, dtype=np.float32)
    heap_indices = np.int32(-1) + np.zeros(k, dtype=np.int32)
    n_samples = indptr.shape[0] - 1

    n_random_samples = min(k, n_neighbors)

    for tree in forest:
        indices = search_sparse_flat_tree(
            query_inds,
            query_data,
            tree.hyperplanes,
            tree.offsets,
            tree.children,
            tree.indices,
            rng_state,
        )

        n_initial_points = indices.shape[0]
        n_random_samples = min(k, n_neighbors) - n_initial_points

        for j in range(n_initial_points):
            candidate = indices[j]

            from_inds = inds[indptr[candidate] : indptr[candidate + 1]]
            from_data = data[indptr[candidate] : indptr[candidate + 1]]

            d = sparse_dist(from_inds, from_data, query_inds, query_data)

            # indices are guaranteed different
            simple_heap_push(heap_priorities, heap_indices, d, candidate)
            mark_visited(tried, candidate)

    if n_random_samples > 0:
        for i in range(n_random_samples):
            candidate = np.abs(tau_rand_int(rng_state)) % n_samples
            if has_been_visited(tried, candidate) == 0:
                from_inds = inds[indptr[candidate] : indptr[candidate + 1]]
                from_data = data[indptr[candidate] : indptr[candidate + 1]]

                d = sparse_dist(from_inds, from_data, query_inds, query_data,)

                simple_heap_push(heap_priorities, heap_indices, d, candidate)
                mark_visited(tried, candidate)

    return heap_priorities, heap_indices
Exemplo n.º 2
0
def search_from_init(
    query_inds,
    query_data,
    inds,
    indptr,
    data,
    search_indptr,
    search_inds,
    heap_priorities,
    heap_indices,
    epsilon,
    tried,
    sparse_dist,
    dist_args,
):
    distance_scale = 1.0 + epsilon
    distance_bound = distance_scale * heap_priorities[0]
    heap_size = heap_priorities.shape[0]

    seed_set = [(heap_priorities[j], heap_indices[j])
                for j in range(heap_size)]
    heapq.heapify(seed_set)

    # Find smallest seed point
    d_vertex, vertex = heapq.heappop(seed_set)

    while d_vertex < distance_bound:

        for j in range(search_indptr[vertex], search_indptr[vertex + 1]):

            candidate = search_inds[j]

            if has_been_visited(tried, candidate) == 0:
                mark_visited(tried, candidate)

                from_inds = inds[indptr[candidate]:indptr[candidate + 1]]
                from_data = data[indptr[candidate]:indptr[candidate + 1]]

                d = sparse_dist(from_inds, from_data, query_inds, query_data,
                                *dist_args)

                if d < distance_bound:
                    simple_heap_push(heap_priorities, heap_indices, d,
                                     candidate)
                    heapq.heappush(seed_set, (d, candidate))
                    # Update bound
                    distance_bound = distance_scale * heap_priorities[0]

        # find new smallest seed point
        if len(seed_set) == 0:
            break
        else:
            d_vertex, vertex = heapq.heappop(seed_set)

    return heap_priorities, heap_indices
Exemplo n.º 3
0
    def custom_search_closure(query_points, candidate_indices, k, epsilon,
                              visited):
        result = make_heap(query_points.shape[0], k)
        distance_scale = 1.0 + epsilon

        for i in range(query_points.shape[0]):
            visited[:] = 0
            if dist == alternative_dot or dist == alternative_cosine:
                norm = np.sqrt((query_points[i]**2).sum())
                if norm > 0.0:
                    current_query = query_points[i] / norm
                else:
                    continue
            else:
                current_query = query_points[i]

            heap_priorities = result[1][i]
            heap_indices = result[0][i]
            seed_set = [(np.float32(np.inf), np.int32(-1)) for j in range(0)]

            ############ Init ################
            n_initial_points = candidate_indices.shape[0]

            for j in range(n_initial_points):
                candidate = np.int32(candidate_indices[j])
                d = dist(data[candidate], current_query)
                # indices are guaranteed different
                simple_heap_push(heap_priorities, heap_indices, d, candidate)
                heapq.heappush(seed_set, (d, candidate))
                mark_visited(visited, candidate)

            ############ Search ##############
            distance_bound = distance_scale * heap_priorities[0]

            # Find smallest seed point
            d_vertex, vertex = heapq.heappop(seed_set)

            while d_vertex < distance_bound:

                for j in range(indptr[vertex], indptr[vertex + 1]):

                    candidate = indices[j]

                    if has_been_visited(visited, candidate) == 0:
                        mark_visited(visited, candidate)

                        d = dist(data[candidate], current_query)

                        if d < distance_bound:
                            simple_heap_push(heap_priorities, heap_indices, d,
                                             candidate)
                            heapq.heappush(seed_set, (d, candidate))
                            # Update bound
                            distance_bound = distance_scale * heap_priorities[0]

                # find new smallest seed point
                if len(seed_set) == 0:
                    break
                else:
                    d_vertex, vertex = heapq.heappop(seed_set)

        return result