示例#1
0
    def test_shards(self):
        d = 32
        nq = 100
        nb = 200

        (_, xb, xq) = make_binary_dataset(d, 0, nb, nq)

        index_ref = faiss.IndexBinaryFlat(d)
        index_ref.add(xb)

        Dref, Iref = index_ref.search(xq, 10)

        nrep = 5
        index = faiss.IndexBinaryShards(d)
        for i in range(nrep):
            sub_idx = faiss.IndexBinaryFlat(d)
            sub_idx.add(xb[i * nb // nrep:(i + 1) * nb // nrep])
            index.add_shard(sub_idx)

        D, I = index.search(xq, 10)

        compare_binary_result_lists(Dref, Iref, D, I)

        index2 = faiss.IndexBinaryShards(d)
        for _i in range(nrep):
            sub_idx = faiss.IndexBinaryFlat(d)
            index2.add_shard(sub_idx)

        index2.add(xb)
        D2, I2 = index2.search(xq, 10)

        compare_binary_result_lists(Dref, Iref, D2, I2)
示例#2
0
    def test_replicas(self):
        d = 32
        nq = 100
        nb = 200

        (_, xb, xq) = make_binary_dataset(d, 0, nb, nq)

        index_ref = faiss.IndexBinaryFlat(d)
        index_ref.add(xb)

        Dref, Iref = index_ref.search(xq, 10)

        nrep = 5
        index = faiss.IndexBinaryReplicas()
        for _i in range(nrep):
            sub_idx = faiss.IndexBinaryFlat(d)
            sub_idx.add(xb)
            index.addIndex(sub_idx)

        D, I = index.search(xq, 10)

        self.assertTrue((Dref == D).all())
        self.assertTrue((Iref == I).all())

        index2 = faiss.IndexBinaryReplicas()
        for _i in range(nrep):
            sub_idx = faiss.IndexBinaryFlat(d)
            index2.addIndex(sub_idx)

        index2.add(xb)
        D2, I2 = index2.search(xq, 10)

        self.assertTrue((Dref == D2).all())
        self.assertTrue((Iref == I2).all())
示例#3
0
    def __init__(self, *args, **kwargs):
        unittest.TestCase.__init__(self, *args, **kwargs)
        d = 32
        nt = 0
        nb = 1500
        nq = 500

        (_, self.xb, self.xq) = make_binary_dataset(d, nt, nb, nq)
示例#4
0
    def __init__(self, *args, **kwargs):
        unittest.TestCase.__init__(self, *args, **kwargs)
        d = 32
        nt = 200
        nb = 1500
        nq = 500

        (self.xt, self.xb, self.xq) = make_binary_dataset(d, nt, nb, nq)
        index = faiss.IndexBinaryFlat(d)
        index.add(self.xb)
        Dref, Iref = index.search(self.xq, 10)
        self.Dref = Dref
示例#5
0
    def test_encode_to_binary(self):
        d = 256
        nt = 256
        nb = 1500
        nq = 500
        (xt, xb, xq) = make_binary_dataset(d, nt, nb, nq)
        pq = faiss.ProductQuantizer(d, int(d / 8), 8)

        centroids = binary_to_float(
            np.tile(np.arange(256), int(d / 8)).astype('uint8').reshape(-1, 1))

        faiss.copy_array_to_vector(centroids.ravel(), pq.centroids)
        pq.is_trained = True

        codes = pq.compute_codes(binary_to_float(xb))

        assert np.all(codes == xb)

        indexpq = faiss.IndexPQ(d, int(d / 8), 8)
        indexpq.pq = pq
        indexpq.is_trained = True

        indexpq.add(binary_to_float(xb))
        D, I = indexpq.search(binary_to_float(xq), 3)

        for i in range(nq):
            for j, dj in zip(I[i], D[i]):
                ref_dis = binary_dis(xq[i], xb[j])
                assert 4 * ref_dis == dj

        nlist = 32
        quantizer = faiss.IndexFlatL2(d)
        # pretext class for training
        iflat = faiss.IndexIVFFlat(quantizer, d, nlist)
        iflat.train(binary_to_float(xt))

        indexivfpq = faiss.IndexIVFPQ(quantizer, d, nlist, int(d / 8), 8)

        indexivfpq.pq = pq
        indexivfpq.is_trained = True
        indexivfpq.by_residual = False

        indexivfpq.add(binary_to_float(xb))
        indexivfpq.nprobe = 4

        D, I = indexivfpq.search(binary_to_float(xq), 3)

        for i in range(nq):
            for j, dj in zip(I[i], D[i]):
                ref_dis = binary_dis(xq[i], xb[j])
                assert 4 * ref_dis == dj
示例#6
0
    def test_hash_and_multihash(self):
        d = 128
        nq = 100
        nb = 2000

        (_, xb, xq) = make_binary_dataset(d, 0, nb, nq)

        index_ref = faiss.IndexBinaryFlat(d)
        index_ref.add(xb)
        k = 10
        Dref, Iref = index_ref.search(xq, k)

        nfound = {}
        for nh in 0, 1, 3, 5:

            for nbit in 4, 7:
                if nh == 0:
                    index = faiss.IndexBinaryHash(d, nbit)
                else:
                    index = faiss.IndexBinaryMultiHash(d, nh, nbit)
                index.add(xb)
                index.nflip = 2
                Dnew, Inew = index.search(xq, k)
                nf = 0
                for i in range(nq):
                    ref = Iref[i]
                    new = Inew[i]
                    snew = set(new)
                    # no duplicates
                    self.assertTrue(len(new) == len(snew))
                    nf += len(set(ref) & snew)
                print('nfound', nh, nbit, nf)
                nfound[(nh, nbit)] = nf
            self.assertGreater(nfound[(nh, 4)], nfound[(nh, 7)])

            # test serialization
            index2 = faiss.deserialize_index_binary(
                faiss.serialize_index_binary(index))

            D2, I2 = index2.search(xq, k)
            np.testing.assert_array_equal(Inew, I2)
            np.testing.assert_array_equal(Dnew, D2)

        print('nfound=', nfound)
        self.assertGreater(3, abs(nfound[(0, 7)] - nfound[(1, 7)]))
        self.assertGreater(nfound[(3, 7)], nfound[(1, 7)])
        self.assertGreater(nfound[(5, 7)], nfound[(3, 7)])
示例#7
0
    def subtest_result_order(self, nh):

        d = 128
        nq = 10
        nb = 200

        (_, xb, xq) = make_binary_dataset(d, 0, nb, nq)

        nbit = 10
        if nh == 0:
            index = faiss.IndexBinaryHash(d, nbit)
        else:
            index = faiss.IndexBinaryMultiHash(d, nh, nbit)
        index.add(xb)
        index.nflip = 5
        k = 10
        Do, Io = index.search(xq, k)
        self.assertTrue(np.all(Do[:, 1:] >= Do[:, :-1]))
示例#8
0
    def test_replicas(self):
        d = 32
        nq = 100
        nb = 200

        (_, xb, xq) = make_binary_dataset(d, 0, nb, nq)

        index_ref = faiss.IndexBinaryFlat(d)
        index_ref.add(xb)

        Dref, Iref = index_ref.search(xq, 10)

        # there is a OpenMP bug in this configuration, so disable threading
        if sys.platform == "darwin" and "Clang 12" in sys.version:
            nthreads = faiss.omp_get_max_threads()
            faiss.omp_set_num_threads(1)
        else:
            nthreads = None

        nrep = 5
        index = faiss.IndexBinaryReplicas()
        for _i in range(nrep):
            sub_idx = faiss.IndexBinaryFlat(d)
            sub_idx.add(xb)
            index.addIndex(sub_idx)

        D, I = index.search(xq, 10)

        self.assertTrue((Dref == D).all())
        self.assertTrue((Iref == I).all())

        index2 = faiss.IndexBinaryReplicas()
        for _i in range(nrep):
            sub_idx = faiss.IndexBinaryFlat(d)
            index2.addIndex(sub_idx)

        index2.add(xb)
        D2, I2 = index2.search(xq, 10)

        if nthreads is not None:
            faiss.omp_set_num_threads(nthreads)

        self.assertTrue((Dref == D2).all())
        self.assertTrue((Iref == I2).all())
示例#9
0
    def test_multihash(self):
        d = 128
        nq = 100
        nb = 2000

        (_, xb, xq) = make_binary_dataset(d, 0, nb, nq)

        index_ref = faiss.IndexBinaryFlat(d)
        index_ref.add(xb)

        radius = 55

        Lref, Dref, Iref = index_ref.range_search(xq, radius)

        print("nb res: ", Lref[-1])

        nfound = []
        ndis = []

        for nh in 1, 3, 5:
            index = faiss.IndexBinaryMultiHash(d, nh, 10)
            index.add(xb)
            # index.display()
            stats = faiss.cvar.indexBinaryHash_stats
            index.nflip = 2
            stats.reset()
            Lnew, Dnew, Inew = index.range_search(xq, radius)
            for i in range(nq):
                ref = Iref[Lref[i]:Lref[i + 1]]
                new = Inew[Lnew[i]:Lnew[i + 1]]
                snew = set(new)
                # no duplicates
                self.assertTrue(len(new) == len(snew))
                # subset of real results
                self.assertTrue(snew <= set(ref))
            nfound.append(Lnew[-1])
            ndis.append(stats.ndis)
        print('nfound=', nfound)
        print('ndis=', ndis)
        nfound = np.array(nfound)
        # self.assertTrue(nfound[-1] == Lref[-1])
        self.assertTrue(np.all(nfound[1:] >= nfound[:-1]))