def query_distance(self, query_img_path, ref_path_list, embedding_weights):
        q_emb_list = self._embed_image(query_img_path)
        q_emb_dict = {
            layer: q_emb_list[i]
            for i, layer in enumerate(self.layer_names)
            if layer in embedding_weights
        }
        query_gram_dict = self._build_query_gram_dict(q_emb_dict)

        start = dt.datetime.now()
        dist_dict = {}
        rev_file_mapping = {v: k for k, v in self.file_mapping.items()}
        ref_indices = [rev_file_mapping[path] for path in ref_path_list]
        for layer_name, gram in query_gram_dict.items():
            labels_iter_range = list(range(1, len(ref_indices) + 1))
            labels = np.array([list(ref_indices), labels_iter_range])
            distances = np.empty((1, len(ref_indices)), dtype='float32')
            self.index_dict[layer_name].compute_distance_subset(
                1, faiss.swig_ptr(gram), len(ref_indices),
                faiss.swig_ptr(distances), faiss.swig_ptr(labels))
            distances = distances.flatten()
            dist_dict[layer_name] = {
                idx: distances[i]
                for i, idx in enumerate(ref_indices)
            }
Пример #2
0
 def add_batch_result(self, D, I, i0):
     assert D.shape == (self.nq, self.k)
     assert I.shape == (self.nq, self.k)
     I += i0
     self.heaps.addn_with_ids(
         self.k, faiss.swig_ptr(D),
         faiss.swig_ptr(I), self.k)
Пример #3
0
    def avg_interclass_centroid_dist_dict(self):
        # Should I use norm_mean or mean_norm for centroid?
        d = {}
        class_list = []
        v_list = []
        # TODO: TEMPORARILY CHANGED TO V_mean_norm
        for class_, v in self.V_mean_norm_dict.items():
            class_list.append(class_)
            v_list.append(v)
        V_norm_mean = np.stack(v_list)

        dim = V_norm_mean.shape[1]
        index = faiss.IndexFlatIP(dim)
        index.add(np.ascontiguousarray(V_norm_mean))

        for i, v in enumerate(V_norm_mean):
            V_ref_indices = list(
                chain(range(0, i), range(i + 1, len(V_norm_mean))))
            v = np.expand_dims(v, axis=0)
            labels_iter_range = list(range(1, len(V_norm_mean)))
            labels = np.array([list(V_ref_indices), labels_iter_range])
            distances = np.empty((1, len(V_norm_mean) - 1), dtype='float32')
            index.compute_distance_subset(1, faiss.swig_ptr(v),
                                          len(V_norm_mean),
                                          faiss.swig_ptr(distances),
                                          faiss.swig_ptr(labels))
            distances = distances.flatten()
            print(f'centroid distances: {distances}')
            avg_dist = np.mean(distances)
            d[class_list[i]] = avg_dist
        return d
Пример #4
0
    def do_test_codec(self, nbit):
        pq = faiss.ProductQuantizer(16, 2, nbit)

        # simulate training
        rs = np.random.RandomState(123)
        centroids = rs.rand(2, 1 << nbit, 8).astype('float32')
        faiss.copy_array_to_vector(centroids.ravel(), pq.centroids)

        idx = rs.randint(1 << nbit, size=(100, 2))
        # can be encoded exactly
        x = np.hstack((
            centroids[0, idx[:, 0]],
            centroids[1, idx[:, 1]]
        ))

        # encode / decode
        codes = pq.compute_codes(x)
        xr = pq.decode(codes)
        assert np.all(xr == x)

        # encode w/ external index
        assign_index = faiss.IndexFlatL2(8)
        pq.assign_index = assign_index
        codes2 = np.empty((100, pq.code_size), dtype='uint8')
        pq.compute_codes_with_assign_index(
            faiss.swig_ptr(x), faiss.swig_ptr(codes2), 100)
        assert np.all(codes == codes2)
Пример #5
0
def bitvec_shuffle(a, order):
    n, d = a.shape
    db, = order.shape
    b = np.empty((n, db // 8), dtype='uint8')
    faiss.bitvec_shuffle(n, d * 8, db, faiss.swig_ptr(order),
                         faiss.swig_ptr(a), faiss.swig_ptr(b))
    return b
Пример #6
0
    def test_clipping(self):
        """ verify that a clipped residual quantizer gives the same
        code prefix + suffix as the full RQ """
        ds = datasets.SyntheticDataset(32, 1000, 100, 0)

        rq = faiss.ResidualQuantizer(ds.d, 5, 4)
        rq.train_type = faiss.ResidualQuantizer.Train_default
        rq.max_beam_size = 5
        rq.train(ds.get_train())

        rq.max_beam_size = 1  # is not he same for a large beam size
        codes = rq.compute_codes(ds.get_database())

        rq2 = faiss.ResidualQuantizer(ds.d, 2, 4)
        rq2.initialize_from(rq)
        self.assertEqual(rq2.M, 2)
        # verify that the beginning of the codes are the same
        codes2 = rq2.compute_codes(ds.get_database())

        rq3 = faiss.ResidualQuantizer(ds.d, 3, 4)
        rq3.initialize_from(rq, 2)
        self.assertEqual(rq3.M, 3)
        codes3 = rq3.compute_codes(ds.get_database() - rq2.decode(codes2))

        # verify that prefixes are the same
        for i in range(ds.nb):
            print(i, ds.nb)
            br = faiss.BitstringReader(faiss.swig_ptr(codes[i]), rq.code_size)
            br2 = faiss.BitstringReader(faiss.swig_ptr(codes2[i]),
                                        rq2.code_size)
            self.assertEqual(br.read(rq2.tot_bits), br2.read(rq2.tot_bits))
            br3 = faiss.BitstringReader(faiss.swig_ptr(codes3[i]),
                                        rq3.code_size)
            self.assertEqual(br.read(rq3.tot_bits), br3.read(rq3.tot_bits))
Пример #7
0
    def test_decode(self):
        """Test LSQ decode"""
        d = 16
        n = 500
        M = 4
        nbits = 6
        K = (1 << nbits)

        rs = np.random.RandomState(123)
        x = rs.rand(n, d).astype(np.float32)
        codes = rs.randint(0, K, (n, M)).astype(np.int32)
        lsq = faiss.LocalSearchQuantizer(d, M, nbits)
        lsq.train(x)

        # decode x
        pack_codes = np.zeros((n, lsq.code_size)).astype(np.uint8)
        decoded_x = np.zeros((n, d)).astype(np.float32)
        lsq.pack_codes(n, faiss.swig_ptr(codes), faiss.swig_ptr(pack_codes))
        lsq.decode_c(faiss.swig_ptr(pack_codes), faiss.swig_ptr(decoded_x), n)

        # decode in Python
        codebooks = faiss.vector_float_to_array(lsq.codebooks)
        codebooks = codebooks.reshape(M, K, d).copy()
        decoded_x_ref = decode_ref(x, codebooks, codes)

        np.testing.assert_allclose(decoded_x, decoded_x_ref, rtol=1e-6)
Пример #8
0
def get_neighbors(hnsw, i, level):
    " list the neighbors for node i at level "
    assert i < hnsw.levels.size()
    assert level < hnsw.levels.at(i)
    be = np.empty(2, 'uint64')
    hnsw.neighbor_range(i, level, faiss.swig_ptr(be), faiss.swig_ptr(be[1:]))
    return [hnsw.neighbors.at(j) for j in range(be[0], be[1])]
Пример #9
0
    def test_update_codebooks(self):
        """Test codebooks updatation."""
        d = 16
        n = 500
        M = 4
        nbits = 6
        K = (1 << nbits)

        # set a larger value to make the updating process more stable
        lambd = 1e-2

        rs = np.random.RandomState(123)
        x = rs.rand(n, d).astype(np.float32)
        codes = rs.randint(0, K, (n, M)).astype(np.int32)

        lsq = faiss.LocalSearchQuantizer(d, M, nbits)
        lsq.lambd = lambd
        lsq.train(x)  # just for allocating memory for codebooks

        codebooks = faiss.vector_float_to_array(lsq.codebooks)
        codebooks = codebooks.reshape(M, K, d).copy()

        lsq.update_codebooks(faiss.swig_ptr(x), faiss.swig_ptr(codes), n)
        new_codebooks = faiss.vector_float_to_array(lsq.codebooks)
        new_codebooks = new_codebooks.reshape(M, K, d).copy()

        ref_codebooks = update_codebooks_ref(x, codes, K, lambd)

        np.testing.assert_allclose(new_codebooks, ref_codebooks, atol=1e-3)
Пример #10
0
def compute_GT_CPU(xb, xq, gt_sl):
    nq_gt, _ = xq.shape
    print("compute GT CPU")
    t0 = time.time()

    gt_I = np.zeros((nq_gt, gt_sl), dtype='int64')
    gt_D = np.zeros((nq_gt, gt_sl), dtype='float32')
    heaps = faiss.float_maxheap_array_t()
    heaps.k = gt_sl
    heaps.nh = nq_gt
    heaps.val = faiss.swig_ptr(gt_D)
    heaps.ids = faiss.swig_ptr(gt_I)
    heaps.heapify()
    bs = 10 ** 5

    n, d = xb.shape
    xqs = sanitize(xq[:nq_gt])

    db_gt = faiss.IndexFlatL2(d)

    # compute ground-truth by blocks of bs, and add to heaps
    for i0, xsl in dataset_iterator(xb, IdentPreproc(d), bs):
        db_gt.add(xsl)
        D, I = db_gt.search(xqs, gt_sl)
        I += i0
        heaps.addn_with_ids(
            gt_sl, faiss.swig_ptr(D), faiss.swig_ptr(I), gt_sl)
        db_gt.reset()
    heaps.reorder()

    print("GT CPU time: {} s".format(time.time() - t0))
    return gt_I, gt_D
Пример #11
0
def search_single_scan(index, xq, k, bs=128):
    """performs a search so that the inverted lists are accessed
    sequentially by blocks of size bs"""

    # handle pretransform
    if isinstance(index, faiss.IndexPreTransform):
        xq = index.apply_py(xq)
        index = faiss.downcast_index(index.index)

    # coarse assignment
    nprobe = min(index.nprobe, index.nlist)
    coarse_dis, assign = index.quantizer.search(xq, nprobe)
    nlist = index.nlist
    assign_buckets = assign // bs
    nq = len(xq)

    rh = faiss.ResultHeap(nq, k)
    index.parallel_mode |= index.PARALLEL_MODE_NO_HEAP_INIT

    for l0 in range(0, nlist, bs):
        bucket_no = l0 // bs
        skip_rows, skip_cols = np.where(assign_buckets != bucket_no)
        sub_assign = assign.copy()
        sub_assign[skip_rows, skip_cols] = -1

        index.search_preassigned(nq, faiss.swig_ptr(xq), k,
                                 faiss.swig_ptr(sub_assign),
                                 faiss.swig_ptr(coarse_dis),
                                 faiss.swig_ptr(rh.D), faiss.swig_ptr(rh.I),
                                 False, None)

    rh.finalize()

    return rh.D, rh.I
Пример #12
0
def get_invlist(invlists, l):
    """ returns the inverted lists content as a pair of (list_ids, list_codes).
    The codes are reshaped to a proper size
    """
    invlists = faiss.downcast_InvertedLists(invlists)
    ls = invlists.list_size(l)
    list_ids = np.zeros(ls, dtype='int64')
    ids = codes = None
    try:
        ids = invlists.get_ids(l)
        if ls > 0:
            faiss.memcpy(faiss.swig_ptr(list_ids), ids, list_ids.nbytes)
        codes = invlists.get_codes(l)
        if invlists.code_size != faiss.InvertedLists.INVALID_CODE_SIZE:
            list_codes = np.zeros((ls, invlists.code_size), dtype='uint8')
        else:
            # it's a BlockInvertedLists
            npb = invlists.n_per_block
            bs = invlists.block_size
            ls_round = (ls + npb - 1) // npb
            list_codes = np.zeros((ls_round, bs // npb, npb), dtype='uint8')
        if ls > 0:
            faiss.memcpy(faiss.swig_ptr(list_codes), codes, list_codes.nbytes)
    finally:
        if ids is not None:
            invlists.release_ids(l, ids)
        if codes is not None:
            invlists.release_codes(l, codes)
    return list_ids, list_codes
Пример #13
0
    def test_icm_encode_step(self):
        d = 16
        n = 500
        M = 4
        nbits = 6
        K = (1 << nbits)

        rs = np.random.RandomState(123)

        # randomly generate codes, binary terms and unary terms
        codes = rs.randint(0, K, (n, M)).astype(np.int32)
        new_codes = codes.copy()
        unaries = rs.rand(n, M, K).astype(np.float32)
        binaries = rs.rand(M, M, K, K).astype(np.float32)

        # do icm encoding given binary and unary terms
        lsq = faiss.LocalSearchQuantizer(d, M, nbits)
        lsq.icm_encode_step(
            faiss.swig_ptr(unaries),
            faiss.swig_ptr(binaries),
            faiss.swig_ptr(new_codes), n)

        # do icm encoding given binary and unary terms in Python
        ref_codes = icm_encode_step_ref(unaries, binaries, codes)
        np.testing.assert_array_equal(new_codes, ref_codes)
Пример #14
0
def compute_GT():
    print "compute GT"
    t0 = time.time()

    gt_I = np.zeros((nq_gt, gt_sl), dtype='int64')
    gt_D = np.zeros((nq_gt, gt_sl), dtype='float32')
    heaps = faiss.float_maxheap_array_t()
    heaps.k = gt_sl
    heaps.nh = nq_gt
    heaps.val = faiss.swig_ptr(gt_D)
    heaps.ids = faiss.swig_ptr(gt_I)
    heaps.heapify()
    bs = 10**5

    n, d = xb.shape
    xqs = sanitize(xq[:nq_gt])

    db_gt = faiss.IndexFlatL2(d)
    vres, vdev = make_vres_vdev()
    db_gt_gpu = faiss.index_cpu_to_gpu_multiple(vres, vdev, db_gt)

    # compute ground-truth by blocks of bs, and add to heaps
    for i0, xsl in dataset_iterator(xb, IdentPreproc(d), bs):
        db_gt_gpu.add(xsl)
        D, I = db_gt_gpu.search(xqs, gt_sl)
        I += i0
        heaps.addn_with_ids(gt_sl, faiss.swig_ptr(D), faiss.swig_ptr(I), gt_sl)
        db_gt_gpu.reset()
        print "\r   %d/%d, %.3f s" % (i0, n, time.time() - t0),
    print
    heaps.reorder()

    print "GT time: %.3f s" % (time.time() - t0)
    return gt_I
Пример #15
0
    def test_size_t_ptr(self):
        # issue 1064
        index = faiss.IndexHNSWFlat(10, 32)

        hnsw = index.hnsw
        index.add(np.random.rand(100, 10).astype('float32'))
        be = np.empty(2, 'uint64')
        hnsw.neighbor_range(23, 0, faiss.swig_ptr(be), faiss.swig_ptr(be[1:]))
Пример #16
0
    def evaluation_faiss(X, Y, Kset, args):
        if args.data_name.lower() != 'inshop':
            kmax = np.max(Kset + [args.max_r])  # search K
        else:
            kmax = np.max(Kset)

        test_class_dict = args.test_class_dict

        # compute NMI
        if args.do_nmi:
            classN = np.max(Y) + 1
            kmeans = KMeans(n_clusters=classN).fit(X)
            nmi = normalized_mutual_info_score(Y,
                                               kmeans.labels_,
                                               average_method='arithmetic')
        else:
            nmi = 0.0

        if args.data_name.lower() != 'inshop':
            offset = 1
            X_query = X
            X_gallery = X
            Y_query = Y
            Y_gallery = Y

        else:  # inshop
            offset = 0
            len_gallery = len(args.gallery_labels)
            X_gallery = X[:len_gallery, :]
            X_query = X[len_gallery:, :]
            Y_query = args.query_labels
            Y_gallery = args.gallery_labels

        nq, d = X_query.shape
        ng, d = X_gallery.shape
        I = np.empty([nq, kmax + offset], dtype='int64')
        D = np.empty([nq, kmax + offset], dtype='float32')
        res = faiss.StandardGpuResources()
        res.setDefaultNullStreamAllDevices()
        faiss.bruteForceKnn(res, faiss.METRIC_INNER_PRODUCT,
                            faiss.swig_ptr(X_gallery), True, ng,
                            faiss.swig_ptr(X_query), True, nq, d,
                            int(kmax + offset), faiss.swig_ptr(D),
                            faiss.swig_ptr(I))

        indices = I[:, offset:]

        YNN = Y_gallery[indices]

        recallK = get_recallK(Y_query, YNN, Kset)

        if args.data_name.lower() != 'inshop':
            RP, MAP = get_Rstat(Y_query, YNN, test_class_dict)
        else:  # inshop
            RP = 0
            MAP = 0

        return nmi, recallK, RP, MAP
Пример #17
0
def to_binary(x):
    n, d = x.shape
    assert d % 8 == 0
    if faiss is None:
        return ((x >= 0).reshape(n, d // 8, 8) *
                (1 << np.arange(8)).astype('uint8')).sum(2)
    else:
        y = np.empty((n, d // 8), dtype='uint8')
        faiss.real_to_binary(n * d, faiss.swig_ptr(x), faiss.swig_ptr(y))
        return y
Пример #18
0
def eval_intersection_measure(gt_I, I):
    """ measure intersection measure (used for knngraph)"""
    inter = 0
    rank = I.shape[1]
    assert gt_I.shape[1] >= rank
    for q in range(nq_gt):
        inter += faiss.ranklist_intersection_size(
            rank, faiss.swig_ptr(gt_I[q, :]),
            rank, faiss.swig_ptr(I[q, :].astype('int64')))
    return inter / float(rank * nq_gt)
Пример #19
0
def eval_intersection_measure(gt_I, I):
    """ measure intersection measure (used for knngraph)"""
    inter = 0
    rank = I.shape[1]
    assert gt_I.shape[1] >= rank
    for q in range(nq_gt):
        inter += faiss.ranklist_intersection_size(
            rank, faiss.swig_ptr(gt_I[q, :]), rank,
            faiss.swig_ptr(I[q, :].astype('int64')))
    return inter / float(rank * nq_gt)
Пример #20
0
 def __init__(self, nq, k):
     " nq: number of query vectors, k: number of results per query "
     self.I = np.zeros((nq, k), dtype='int64')
     self.D = np.zeros((nq, k), dtype='float32')
     self.nq, self.k = nq, k
     heaps = faiss.float_maxheap_array_t()
     heaps.k = k
     heaps.nh = nq
     heaps.val = faiss.swig_ptr(self.D)
     heaps.ids = faiss.swig_ptr(self.I)
     heaps.heapify()
     self.heaps = heaps
Пример #21
0
def search_knn(xq, xb, k, distance_type=faiss.METRIC_L2):
    """ wrapper around the faiss knn functions without index """
    nq, d = xq.shape
    nb, d2 = xb.shape
    assert d == d2

    I = np.empty((nq, k), dtype='int64')
    D = np.empty((nq, k), dtype='float32')

    if distance_type == faiss.METRIC_L2:
        heaps = faiss.float_maxheap_array_t()
        heaps.k = k
        heaps.nh = nq
        heaps.val = faiss.swig_ptr(D)
        heaps.ids = faiss.swig_ptr(I)
        faiss.knn_L2sqr(faiss.swig_ptr(xq), faiss.swig_ptr(xb), d, nq, nb,
                        heaps)
    elif distance_type == faiss.METRIC_INNER_PRODUCT:
        heaps = faiss.float_minheap_array_t()
        heaps.k = k
        heaps.nh = nq
        heaps.val = faiss.swig_ptr(D)
        heaps.ids = faiss.swig_ptr(I)
        faiss.knn_inner_product(faiss.swig_ptr(xq), faiss.swig_ptr(xb), d, nq,
                                nb, heaps)
    return D, I
Пример #22
0
def compute_GT_GPU(xb, xq, gt_sl):
    nq_gt, _ = xq.shape
    print("compute GT GPU")
    t0 = time.time()

    gt_I = np.zeros((nq_gt, gt_sl), dtype='int64')
    gt_D = np.zeros((nq_gt, gt_sl), dtype='float32')
    heaps = faiss.float_maxheap_array_t()
    heaps.k = gt_sl
    heaps.nh = nq_gt
    heaps.val = faiss.swig_ptr(gt_D)
    heaps.ids = faiss.swig_ptr(gt_I)
    heaps.heapify()
    bs = 10 ** 5
    # Please change this based on your GPU memory size.
    tempmem = 3500*1024*1024

    n, d = xb.shape
    xqs = sanitize(xq[:nq_gt])
 
    ngpu = faiss.get_num_gpus()
    gpu_resources = []

    for i in range(ngpu):
        res = faiss.StandardGpuResources()
        res.setTempMemory(tempmem)
        gpu_resources.append(res)

    vres = faiss.GpuResourcesVector()
    vdev = faiss.IntVector()
    for i in range(0, ngpu):
        vdev.push_back(i)
        vres.push_back(gpu_resources[i])

    db_gt = faiss.IndexFlatL2(d)
    db_gt_gpu = faiss.index_cpu_to_gpu_multiple(
        vres, vdev, db_gt)

    # compute ground-truth by blocks of bs, and add to heaps
    for i0, xsl in dataset_iterator(xb, IdentPreproc(d), bs):
        db_gt_gpu.add(xsl)
        D, I = db_gt_gpu.search(xqs, gt_sl)
        I += i0
        heaps.addn_with_ids(
            gt_sl, faiss.swig_ptr(D), faiss.swig_ptr(I), gt_sl)
        db_gt_gpu.reset()
    heaps.reorder()

    print("GT GPU time: {} s".format(time.time() - t0))
    return gt_I, gt_D
Пример #23
0
 def __init__(self, nq, k):
     " nq: number of query vectors, k: number of results per query "
     self.I = np.zeros((nq, k), dtype='int64')
     self.D = np.zeros((nq, k), dtype='float32')
     self.nq, self.k = nq, k
     #  changed to minheap from maxheap. The reason is that using cosine-similarity, the most similar (e.g. closest)
     #  vectors have a score of 1, whereas with distances the closest score is 0.
     heaps = faiss.float_minheap_array_t()
     heaps.k = k
     heaps.nh = nq
     heaps.val = faiss.swig_ptr(self.D)
     heaps.ids = faiss.swig_ptr(self.I)
     heaps.heapify()
     self.heaps = heaps
Пример #24
0
def add_preassigned(index_ivf, x, a, ids=None):
    """
    Add elements to an IVF index, where the assignment is already computed
    """
    n, d = x.shape
    assert a.shape == (n, )
    if isinstance(index_ivf, faiss.IndexBinaryIVF):
        d *= 8
    assert d == index_ivf.d
    if ids is not None:
        assert ids.shape == (n, )
        ids = faiss.swig_ptr(ids)
    index_ivf.add_core(
        n, faiss.swig_ptr(x), ids, faiss.swig_ptr(a)
    )
Пример #25
0
 def ivf_search_preassigned(self, xq, list_nos, coarse_dis, k):
     index_ivf = faiss.extract_index_ivf(self.index)
     n, d = xq.shape
     assert d == index_ivf.d
     n2, d2 = list_nos.shape
     assert list_nos.shape == coarse_dis.shape
     assert n2 == n
     assert d2 == index_ivf.nprobe
     D = np.empty((n, k), dtype='float32')
     I = np.empty((n, k), dtype='int64')
     index_ivf.search_preassigned(
         n, faiss.swig_ptr(xq), k,
         faiss.swig_ptr(list_nos), faiss.swig_ptr(coarse_dis),
         faiss.swig_ptr(D), faiss.swig_ptr(I), False)
     return D, I
Пример #26
0
    def update(self, features_vectors: list, image_ids: list) -> str:
        """
        Update index ids with new values
        :param image_ids: image id to change the value for
        :param features_vectors: features vector
        :return: status of update
        """
        # Check if image IDs specified
        if image_ids is None:
            return messages.NO_IDS_SPECIFIED

        # Check that for each vector there is an image id
        if len(image_ids) != len(features_vectors):
            return messages.DIMENSION_MISMATCH

        # Check that vector dimension is same as index dimension
        for features_vector in features_vectors:
            if len(features_vector) != self.dimension:
                return messages.DIMENSION_ERROR
        # Write the image_id = 17 as [17] of type numpy array
        id_array = numpy.array(image_ids, dtype=numpy.int64)
        # Select the ids from index and remove them
        id_selector = IDSelectorBatch(id_array.shape[0],
                                      faiss.swig_ptr(id_array))
        self.index.remove_ids(id_selector)
        # Insert new values
        _status = self.insert(features_vectors, image_ids, is_updating=True)

        return _status
Пример #27
0
    def test_progressive_dim(self):
        d = 32
        n = 10000
        k = 50
        xt, _, _ = get_dataset_2(d, n, 0, 0)

        # basic kmeans
        kmeans = faiss.Kmeans(d, k)
        kmeans.train(xt)

        clus = faiss.ProgressiveDimClustering(d, k)
        clus.verbose
        clus.verbose = True
        clus.progressive_dim_steps
        clus.progressive_dim_steps = 5
        fac = faiss.ProgressiveDimIndexFactory()
        clus.train(n, faiss.swig_ptr(xt), fac)

        stats = clus.iteration_stats
        stats = [stats.at(i) for i in range(stats.size())]
        obj = np.array([st.obj for st in stats])
        # clustering objective should be a tad better
        self.assertLess(obj[-1], kmeans.obj[-1])

        # same test w/ Kmeans wrapper
        kmeans2 = faiss.Kmeans(d, k, progressive_dim_steps=5)
        kmeans2.train(xt)
        self.assertLess(kmeans2.obj[-1], kmeans.obj[-1])
Пример #28
0
def run_kmeans(x, nmb_clusters, verbose=False, init_cents=None):
    """Runs kmeans on 1 GPU.
    Args:
        x: data
        nmb_clusters (int): number of clusters
    Returns:
        list: ids of data in each cluster
    """
    n_data, d = x.shape

    # faiss implementation of k-means
    clus = faiss.Clustering(d, nmb_clusters)
    clus.niter = 20
    clus.max_points_per_centroid = 10000000

    if init_cents is not None:
        clus.centroids.resize(init_cents.size)
        faiss.memcpy(clus.centroids.data(), faiss.swig_ptr(init_cents),
                     init_cents.size * 4)

    index = faiss.IndexFlatL2(d)

    # perform the training
    clus.train(x, index)
    _, I = index.search(x, 1)
    losses = faiss.vector_to_array(clus.obj)
    if verbose:
        print('k-means loss evolution: {0}'.format(losses))

    centroids = faiss.vector_to_array(clus.centroids).reshape(
        (nmb_clusters, d))

    return [int(n[0]) for n in I], losses[-1], centroids
Пример #29
0
 def do_test_array_type(self, dtype):
     """ tests swig_ptr and rev_swig_ptr for this type of array """
     a = np.arange(12).astype(dtype)
     ptr = faiss.swig_ptr(a)
     print(ptr)
     a2 = faiss.rev_swig_ptr(ptr, 12)
     np.testing.assert_array_equal(a, a2)
Пример #30
0
def update_index(image_id, image_vector, index_faiss=None):

    id_array = np.array([image_id], dtype=np.int64)
    idsel = IDSelectorBatch(id_array.shape[0], faiss.swig_ptr(id_array))
    index_faiss.remove_ids(idsel)
    vector_array = np.array([image_vector], dtype=np.float32)
    index_faiss.add_with_ids(vector_array, id_array)
Пример #31
0
    def get_cluster_ids(self, list_num: int) -> np.ndarray:
        """
        TODO: docstring

        """

        # TODO: assert IVF
        assert self.is_trained

        # This fixes problem with SWIG and numpy int
        list_num = int(list_num)

        index = faiss.read_index(str(self.tempdir / self.MERGED_INDEX_NAME))

        # Get the IVF from potentially opaque index
        invlists = faiss.extract_index_ivf(index).invlists
        list_size = invlists.list_size(list_num)
        list_ids = np.zeros(list_size, dtype=np.int64)
        temp_ids = invlists.get_ids(list_num)

        # Need to copy since memory will be deallocated along with the invlist.
        faiss.memcpy(faiss.swig_ptr(list_ids), temp_ids, list_ids.nbytes)
        invlists.release_ids(list_num, temp_ids)

        if self.multi_id:
            list_ids = self._invert_cantor_pairing_vec(list_ids)

        return list_ids
Пример #32
0
def compute_populated_index_2(preproc):

    indexall = prepare_trained_index(preproc)

    # set up a 3-stage pipeline that does:
    # - stage 1: load + preproc
    # - stage 2: assign on GPU
    # - stage 3: add to index

    stage1 = dataset_iterator(xb, preproc, add_batch_size)

    vres, vdev = make_vres_vdev()
    coarse_quantizer_gpu = faiss.index_cpu_to_gpu_multiple(
        vres, vdev, indexall.quantizer)

    def quantize((i0, xs)):
        _, assign = coarse_quantizer_gpu.search(xs, 1)
        return i0, xs, assign.ravel()

    stage2 = rate_limited_imap(quantize, stage1)

    print "add..."
    t0 = time.time()
    nb = xb.shape[0]

    for i0, xs, assign in stage2:
        i1 = i0 + xs.shape[0]
        if indexall.__class__ == faiss.IndexIVFPQ:
            indexall.add_core_o(i1 - i0, faiss.swig_ptr(xs),
                                None, None, faiss.swig_ptr(assign))
        elif indexall.__class__ == faiss.IndexIVFFlat:
            indexall.add_core(i1 - i0, faiss.swig_ptr(xs), None,
                              faiss.swig_ptr(assign))
        else:
            assert False

        print '\r%d/%d (%.3f s)  ' % (
            i0, nb, time.time() - t0),
        sys.stdout.flush()
    print "Add time: %.3f s" % (time.time() - t0)

    return None, indexall
Пример #33
0
def compute_GT():
    print "compute GT"
    t0 = time.time()

    gt_I = np.zeros((nq_gt, gt_sl), dtype='int64')
    gt_D = np.zeros((nq_gt, gt_sl), dtype='float32')
    heaps = faiss.float_maxheap_array_t()
    heaps.k = gt_sl
    heaps.nh = nq_gt
    heaps.val = faiss.swig_ptr(gt_D)
    heaps.ids = faiss.swig_ptr(gt_I)
    heaps.heapify()
    bs = 10 ** 5

    n, d = xb.shape
    xqs = sanitize(xq[:nq_gt])

    db_gt = faiss.IndexFlatL2(d)
    vres, vdev = make_vres_vdev()
    db_gt_gpu = faiss.index_cpu_to_gpu_multiple(
        vres, vdev, db_gt)

    # compute ground-truth by blocks of bs, and add to heaps
    for i0, xsl in dataset_iterator(xb, IdentPreproc(d), bs):
        db_gt_gpu.add(xsl)
        D, I = db_gt_gpu.search(xqs, gt_sl)
        I += i0
        heaps.addn_with_ids(
            gt_sl, faiss.swig_ptr(D), faiss.swig_ptr(I), gt_sl)
        db_gt_gpu.reset()
        print "\r   %d/%d, %.3f s" % (i0, n, time.time() - t0),
    print
    heaps.reorder()

    print "GT time: %.3f s" % (time.time() - t0)
    return gt_I