Пример #1
0
def compute_GT():
    print "compute GT"
    t0 = time.time()

    gt_I = np.zeros((nq_gt, gt_sl), dtype='int64')
    gt_D = np.zeros((nq_gt, gt_sl), dtype='float32')
    heaps = faiss.float_maxheap_array_t()
    heaps.k = gt_sl
    heaps.nh = nq_gt
    heaps.val = faiss.swig_ptr(gt_D)
    heaps.ids = faiss.swig_ptr(gt_I)
    heaps.heapify()
    bs = 10**5

    n, d = xb.shape
    xqs = sanitize(xq[:nq_gt])

    db_gt = faiss.IndexFlatL2(d)
    vres, vdev = make_vres_vdev()
    db_gt_gpu = faiss.index_cpu_to_gpu_multiple(vres, vdev, db_gt)

    # compute ground-truth by blocks of bs, and add to heaps
    for i0, xsl in dataset_iterator(xb, IdentPreproc(d), bs):
        db_gt_gpu.add(xsl)
        D, I = db_gt_gpu.search(xqs, gt_sl)
        I += i0
        heaps.addn_with_ids(gt_sl, faiss.swig_ptr(D), faiss.swig_ptr(I), gt_sl)
        db_gt_gpu.reset()
        print "\r   %d/%d, %.3f s" % (i0, n, time.time() - t0),
    print
    heaps.reorder()

    print "GT time: %.3f s" % (time.time() - t0)
    return gt_I
Пример #2
0
def search_knn(xq, xb, k, distance_type=faiss.METRIC_L2):
    """ wrapper around the faiss knn functions without index """
    nq, d = xq.shape
    nb, d2 = xb.shape
    assert d == d2

    I = np.empty((nq, k), dtype='int64')
    D = np.empty((nq, k), dtype='float32')

    if distance_type == faiss.METRIC_L2:
        heaps = faiss.float_maxheap_array_t()
        heaps.k = k
        heaps.nh = nq
        heaps.val = faiss.swig_ptr(D)
        heaps.ids = faiss.swig_ptr(I)
        faiss.knn_L2sqr(faiss.swig_ptr(xq), faiss.swig_ptr(xb), d, nq, nb,
                        heaps)
    elif distance_type == faiss.METRIC_INNER_PRODUCT:
        heaps = faiss.float_minheap_array_t()
        heaps.k = k
        heaps.nh = nq
        heaps.val = faiss.swig_ptr(D)
        heaps.ids = faiss.swig_ptr(I)
        faiss.knn_inner_product(faiss.swig_ptr(xq), faiss.swig_ptr(xb), d, nq,
                                nb, heaps)
    return D, I
Пример #3
0
def compute_GT_CPU(xb, xq, gt_sl):
    nq_gt, _ = xq.shape
    print("compute GT CPU")
    t0 = time.time()

    gt_I = np.zeros((nq_gt, gt_sl), dtype='int64')
    gt_D = np.zeros((nq_gt, gt_sl), dtype='float32')
    heaps = faiss.float_maxheap_array_t()
    heaps.k = gt_sl
    heaps.nh = nq_gt
    heaps.val = faiss.swig_ptr(gt_D)
    heaps.ids = faiss.swig_ptr(gt_I)
    heaps.heapify()
    bs = 10 ** 5

    n, d = xb.shape
    xqs = sanitize(xq[:nq_gt])

    db_gt = faiss.IndexFlatL2(d)

    # compute ground-truth by blocks of bs, and add to heaps
    for i0, xsl in dataset_iterator(xb, IdentPreproc(d), bs):
        db_gt.add(xsl)
        D, I = db_gt.search(xqs, gt_sl)
        I += i0
        heaps.addn_with_ids(
            gt_sl, faiss.swig_ptr(D), faiss.swig_ptr(I), gt_sl)
        db_gt.reset()
    heaps.reorder()

    print("GT CPU time: {} s".format(time.time() - t0))
    return gt_I, gt_D
Пример #4
0
 def __init__(self, nq, k):
     " nq: number of query vectors, k: number of results per query "
     self.I = np.zeros((nq, k), dtype='int64')
     self.D = np.zeros((nq, k), dtype='float32')
     self.nq, self.k = nq, k
     heaps = faiss.float_maxheap_array_t()
     heaps.k = k
     heaps.nh = nq
     heaps.val = faiss.swig_ptr(self.D)
     heaps.ids = faiss.swig_ptr(self.I)
     heaps.heapify()
     self.heaps = heaps
Пример #5
0
def compute_GT_GPU(xb, xq, gt_sl):
    nq_gt, _ = xq.shape
    print("compute GT GPU")
    t0 = time.time()

    gt_I = np.zeros((nq_gt, gt_sl), dtype='int64')
    gt_D = np.zeros((nq_gt, gt_sl), dtype='float32')
    heaps = faiss.float_maxheap_array_t()
    heaps.k = gt_sl
    heaps.nh = nq_gt
    heaps.val = faiss.swig_ptr(gt_D)
    heaps.ids = faiss.swig_ptr(gt_I)
    heaps.heapify()
    bs = 10 ** 5
    # Please change this based on your GPU memory size.
    tempmem = 3500*1024*1024

    n, d = xb.shape
    xqs = sanitize(xq[:nq_gt])
 
    ngpu = faiss.get_num_gpus()
    gpu_resources = []

    for i in range(ngpu):
        res = faiss.StandardGpuResources()
        res.setTempMemory(tempmem)
        gpu_resources.append(res)

    vres = faiss.GpuResourcesVector()
    vdev = faiss.IntVector()
    for i in range(0, ngpu):
        vdev.push_back(i)
        vres.push_back(gpu_resources[i])

    db_gt = faiss.IndexFlatL2(d)
    db_gt_gpu = faiss.index_cpu_to_gpu_multiple(
        vres, vdev, db_gt)

    # compute ground-truth by blocks of bs, and add to heaps
    for i0, xsl in dataset_iterator(xb, IdentPreproc(d), bs):
        db_gt_gpu.add(xsl)
        D, I = db_gt_gpu.search(xqs, gt_sl)
        I += i0
        heaps.addn_with_ids(
            gt_sl, faiss.swig_ptr(D), faiss.swig_ptr(I), gt_sl)
        db_gt_gpu.reset()
    heaps.reorder()

    print("GT GPU time: {} s".format(time.time() - t0))
    return gt_I, gt_D
Пример #6
0
def compute_GT():
    print "compute GT"
    t0 = time.time()

    gt_I = np.zeros((nq_gt, gt_sl), dtype='int64')
    gt_D = np.zeros((nq_gt, gt_sl), dtype='float32')
    heaps = faiss.float_maxheap_array_t()
    heaps.k = gt_sl
    heaps.nh = nq_gt
    heaps.val = faiss.swig_ptr(gt_D)
    heaps.ids = faiss.swig_ptr(gt_I)
    heaps.heapify()
    bs = 10 ** 5

    n, d = xb.shape
    xqs = sanitize(xq[:nq_gt])

    db_gt = faiss.IndexFlatL2(d)
    vres, vdev = make_vres_vdev()
    db_gt_gpu = faiss.index_cpu_to_gpu_multiple(
        vres, vdev, db_gt)

    # compute ground-truth by blocks of bs, and add to heaps
    for i0, xsl in dataset_iterator(xb, IdentPreproc(d), bs):
        db_gt_gpu.add(xsl)
        D, I = db_gt_gpu.search(xqs, gt_sl)
        I += i0
        heaps.addn_with_ids(
            gt_sl, faiss.swig_ptr(D), faiss.swig_ptr(I), gt_sl)
        db_gt_gpu.reset()
        print "\r   %d/%d, %.3f s" % (i0, n, time.time() - t0),
    print
    heaps.reorder()

    print "GT time: %.3f s" % (time.time() - t0)
    return gt_I
Пример #7
0
def _knn_search(queries, data, k, return_neighbours=False, res=None):
    num_queries, dim = queries.shape
    if res is None:
        dists, idxs = np.empty((num_queries, k), dtype=np.float32), np.empty(
            (num_queries, k), dtype=np.int64)
        heaps = faiss.float_maxheap_array_t()
        heaps.k, heaps.nh = k, num_queries
        heaps.val, heaps.ids = faiss.swig_ptr(dists), faiss.swig_ptr(idxs)
        faiss.knn_L2sqr(faiss.swig_ptr(queries), faiss.swig_ptr(data), dim,
                        num_queries, data.shape[0], heaps)
    else:
        dists, idxs = torch.empty(num_queries,
                                  k,
                                  dtype=torch.float32,
                                  device=queries.device), torch.empty(
                                      num_queries,
                                      k,
                                      dtype=torch.int64,
                                      device=queries.device)
        faiss.bruteForceKnn(
            res, faiss.METRIC_L2,
            faiss.cast_integer_to_float_ptr(data.storage().data_ptr() +
                                            data.storage_offset() * 4),
            data.is_contiguous(), data.shape[0],
            faiss.cast_integer_to_float_ptr(queries.storage().data_ptr() +
                                            queries.storage_offset() * 4),
            queries.is_contiguous(), num_queries, dim, k,
            faiss.cast_integer_to_float_ptr(dists.storage().data_ptr() +
                                            dists.storage_offset() * 4),
            faiss.cast_integer_to_long_ptr(idxs.storage().data_ptr() +
                                           idxs.storage_offset() * 8))
    if return_neighbours:
        neighbours = data[idxs.reshape(-1)].reshape(-1, k, dim)
        return dists, idxs, neighbours
    else:
        return dists, idxs
Пример #8
0
    def test_doxygen_comments(self):
        maxheap_array = faiss.float_maxheap_array_t()

        self.assertTrue("a template structure for a set of [min|max]-heaps" in
                        maxheap_array.__doc__)