Пример #1
0
def knn_ground_truth(xq, db_iterator, k, metric_type=faiss.METRIC_L2):
    """Computes the exact KNN search results for a dataset that possibly
    does not fit in RAM but for which we have an iterator that
    returns it block by block.
    """
    LOG.info("knn_ground_truth queries size %s k=%d" % (xq.shape, k))
    t0 = time.time()
    nq, d = xq.shape
    rh = faiss.ResultHeap(nq, k)

    index = faiss.IndexFlat(d, metric_type)
    if faiss.get_num_gpus():
        LOG.info('running on %d GPUs' % faiss.get_num_gpus())
        index = faiss.index_cpu_to_all_gpus(index)

    # compute ground-truth by blocks, and add to heaps
    i0 = 0
    for xbi in db_iterator:
        ni = xbi.shape[0]
        index.add(xbi)
        D, I = index.search(xq, k)
        I += i0
        rh.add_result(D, I)
        index.reset()
        i0 += ni
        LOG.info("%d db elements, %.3f s" % (i0, time.time() - t0))

    rh.finalize()
    LOG.info("GT time: %.3f s (%d vectors)" % (time.time() - t0, i0))

    return rh.D, rh.I
Пример #2
0
def search_single_scan(index, xq, k, bs=128):
    """performs a search so that the inverted lists are accessed
    sequentially by blocks of size bs"""

    # handle pretransform
    if isinstance(index, faiss.IndexPreTransform):
        xq = index.apply_py(xq)
        index = faiss.downcast_index(index.index)

    # coarse assignment
    nprobe = min(index.nprobe, index.nlist)
    coarse_dis, assign = index.quantizer.search(xq, nprobe)
    nlist = index.nlist
    assign_buckets = assign // bs
    nq = len(xq)

    rh = faiss.ResultHeap(nq, k)
    index.parallel_mode |= index.PARALLEL_MODE_NO_HEAP_INIT

    for l0 in range(0, nlist, bs):
        bucket_no = l0 // bs
        skip_rows, skip_cols = np.where(assign_buckets != bucket_no)
        sub_assign = assign.copy()
        sub_assign[skip_rows, skip_cols] = -1

        index.search_preassigned(nq, faiss.swig_ptr(xq), k,
                                 faiss.swig_ptr(sub_assign),
                                 faiss.swig_ptr(coarse_dis),
                                 faiss.swig_ptr(rh.D), faiss.swig_ptr(rh.I),
                                 False, None)

    rh.finalize()

    return rh.D, rh.I
    def search(self, x, k: int):

        rh = faiss.ResultHeap(x.shape[0], k)

        for Di, Ii in self.pool.imap(lambda idx: idx.search(x, k), self.sub_indexes):
            rh.add_result(Di, Ii)
        rh.finalize()
        return rh.D, rh.I
Пример #4
0
    def run_test(self, keep_max):
        nq = 100
        nb = 1000
        restab = faiss.rand((nq, nb), 123)
        ids = faiss.randint((nq, nb), 1324, 10000)
        all_rh = {}
        for nstep in 1, 3:
            rh = faiss.ResultHeap(nq, 10, keep_max=keep_max)
            for i in range(nstep):
                i0, i1 = i * nb // nstep, (i + 1) * nb // nstep
                D = restab[:, i0:i1].copy()
                I = ids[:, i0:i1].copy()
                rh.add_result(D, I)
            rh.finalize()
            if keep_max:
                assert np.all(rh.D[:, :-1] >= rh.D[:, 1:])
            else:
                assert np.all(rh.D[:, :-1] <= rh.D[:, 1:])
            all_rh[nstep] = rh

        np.testing.assert_equal(all_rh[1].D, all_rh[3].D)
        np.testing.assert_equal(all_rh[1].I, all_rh[3].I)