Пример #1
0
    def test_clipping(self):
        """ verify that a clipped residual quantizer gives the same
        code prefix + suffix as the full RQ """
        ds = datasets.SyntheticDataset(32, 1000, 100, 0)

        rq = faiss.ResidualQuantizer(ds.d, 5, 4)
        rq.train_type = faiss.ResidualQuantizer.Train_default
        rq.max_beam_size = 5
        rq.train(ds.get_train())

        rq.max_beam_size = 1  # is not he same for a large beam size
        codes = rq.compute_codes(ds.get_database())

        rq2 = faiss.ResidualQuantizer(ds.d, 2, 4)
        rq2.initialize_from(rq)
        self.assertEqual(rq2.M, 2)
        # verify that the beginning of the codes are the same
        codes2 = rq2.compute_codes(ds.get_database())

        rq3 = faiss.ResidualQuantizer(ds.d, 3, 4)
        rq3.initialize_from(rq, 2)
        self.assertEqual(rq3.M, 3)
        codes3 = rq3.compute_codes(ds.get_database() - rq2.decode(codes2))

        # verify that prefixes are the same
        for i in range(ds.nb):
            print(i, ds.nb)
            br = faiss.BitstringReader(faiss.swig_ptr(codes[i]), rq.code_size)
            br2 = faiss.BitstringReader(faiss.swig_ptr(codes2[i]),
                                        rq2.code_size)
            self.assertEqual(br.read(rq2.tot_bits), br2.read(rq2.tot_bits))
            br3 = faiss.BitstringReader(faiss.swig_ptr(codes3[i]),
                                        rq3.code_size)
            self.assertEqual(br.read(rq3.tot_bits), br3.read(rq3.tot_bits))
Пример #2
0
    def test_training(self):
        """check that the error is in the same ballpark as PQ """
        ds = datasets.SyntheticDataset(32, 3000, 1000, 0)

        xt = ds.get_train()
        xb = ds.get_database()

        rq = faiss.ResidualQuantizer(ds.d, 4, 6)
        rq.verbose
        rq.verbose = True
        #
        rq.train_type = faiss.ResidualQuantizer.Train_default
        rq.cp.verbose
        # rq.cp.verbose = True
        rq.train(xt)
        err_rq = eval_codec(rq, xb)

        pq = faiss.ProductQuantizer(ds.d, 4, 6)
        pq.train(xt)
        err_pq = eval_codec(pq, xb)

        # in practice RQ is often better than PQ but it does not the case here, so just check
        # that we are within some factor.
        print(err_pq, err_rq)
        self.assertLess(err_rq, err_pq * 1.2)
Пример #3
0
    def test_precomp(self):
        ds = datasets.SyntheticDataset(32, 1000, 1000, 0)

        # make sure it work with varying nb of bits
        nbits = faiss.UInt64Vector()
        nbits.push_back(5)
        nbits.push_back(6)
        nbits.push_back(7)

        rq = faiss.ResidualQuantizer(ds.d, nbits)
        rq.train_type = faiss.ResidualQuantizer.Train_default
        rq.train(ds.get_train())

        codebooks = get_additive_quantizer_codebooks(rq)
        precomp = precomp_codebooks(codebooks)
        codebook_cross_prods_ref, cent_norms_ref = precomp

        # check C++ precomp tables
        codebook_cross_prods_ref = np.hstack([
            np.vstack(c) for c in codebook_cross_prods_ref])

        rq.compute_codebook_tables()
        codebook_cross_prods = faiss.vector_to_array(
            rq.codebook_cross_products)
        codebook_cross_prods = codebook_cross_prods.reshape(
            rq.total_codebook_size, rq.total_codebook_size)
        cent_norms = faiss.vector_to_array(rq.cent_norms)

        np.testing.assert_array_almost_equal(
            codebook_cross_prods, codebook_cross_prods_ref, decimal=5)
        np.testing.assert_array_almost_equal(
            np.hstack(cent_norms_ref), cent_norms, decimal=5)

        # validate that the python tab-based encoding works
        xb = ds.get_database()
        ref_codes, _, _ = beam_search_encoding_ref(codebooks, xb, 7)
        new_codes, _ = beam_search_encoding_tab(codebooks, xb, 7, precomp)
        np.testing.assert_array_equal(ref_codes, new_codes)

        # validate the C++ beam_search_encode_step_tab function
        beam_search_encoding_tab(codebooks, xb, 7, precomp, implem="ref cpp")

        # check implem w/ residuals
        n = ref_codes.shape[0]
        sp = faiss.swig_ptr
        ref_codes_packed = np.zeros((n, rq.code_size), dtype='uint8')
        ref_codes_int32 = ref_codes.astype('int32')
        rq.pack_codes(
            n, sp(ref_codes_int32),
            sp(ref_codes_packed), rq.M * ref_codes.shape[1]
        )

        rq.max_beam_size = 7
        codes_ref_residuals = rq.compute_codes(xb)
        np.testing.assert_array_equal(ref_codes_packed, codes_ref_residuals)

        rq.use_beam_LUT = 1
        codes_new = rq.compute_codes(xb)
        np.testing.assert_array_equal(codes_ref_residuals, codes_new)
Пример #4
0
    def test_beam_size(self):
        """ check that a larger beam gives a lower error """
        ds = datasets.SyntheticDataset(32, 3000, 1000, 0)

        xt = ds.get_train()
        xb = ds.get_database()

        rq0 = faiss.ResidualQuantizer(ds.d, 4, 6)
        rq0.train_type = faiss.ResidualQuantizer.Train_default
        rq0.max_beam_size = 2
        rq0.train(xt)
        err_rq0 = eval_codec(rq0, xb)

        rq1 = faiss.ResidualQuantizer(ds.d, 4, 6)
        rq1.train_type = faiss.ResidualQuantizer.Train_default
        rq1.max_beam_size = 10
        rq1.train(xt)
        err_rq1 = eval_codec(rq1, xb)

        self.assertLess(err_rq1, err_rq0)
Пример #5
0
    def test_with_gpu(self):
        """ check that we get the same resutls with a GPU quantizer and a CPU quantizer """
        d = 32
        nt = 3000
        nb = 1000
        xt, xb, _ = get_dataset_2(d, nt, nb, 0)

        rq0 = faiss.ResidualQuantizer(d, 4, 6)
        rq0.train(xt)
        err_rq0 = eval_codec(rq0, xb)
        # codes0 = rq0.compute_codes(xb)
        rq1 = faiss.ResidualQuantizer(d, 4, 6)
        fac = faiss.GpuProgressiveDimIndexFactory(1)
        rq1.assign_index_factory = fac
        rq1.train(xt)
        self.assertGreater(fac.ncall, 0)
        ncall_train = fac.ncall
        err_rq1 = eval_codec(rq1, xb)
        # codes1 = rq1.compute_codes(xb)
        self.assertGreater(fac.ncall, ncall_train)

        print(err_rq0, err_rq1)

        self.assertTrue(0.9 * err_rq0 < err_rq1 < 1.1 * err_rq0)
Пример #6
0
    def test_training_with_limited_mem(self):
        """ make sure a different batch size gives the same result"""
        ds = datasets.SyntheticDataset(32, 3000, 1000, 0)

        xt = ds.get_train()

        rq0 = faiss.ResidualQuantizer(ds.d, 4, 6)
        rq0.train_type = faiss.ResidualQuantizer.Train_default
        rq0.max_beam_size = 5
        # rq0.verbose = True
        rq0.train(xt)
        cb0 = get_additive_quantizer_codebooks(rq0)

        rq1 = faiss.ResidualQuantizer(ds.d, 4, 6)
        rq1.train_type = faiss.ResidualQuantizer.Train_default
        rq1.max_beam_size = 5
        rq1.max_mem_distances
        rq1.max_mem_distances = 3000 * ds.d * 4 * 3
        # rq1.verbose = True
        rq1.train(xt)
        cb1 = get_additive_quantizer_codebooks(rq1)

        for c0, c1 in zip(cb0, cb1):
            self.assertTrue(np.all(c0 == c1))
Пример #7
0
    def test_with_rq(self):
        """compare with RQ when nsplits = 1"""
        ds = datasets.SyntheticDataset(32, 3000, 3000, 0)

        xt = ds.get_train()
        xb = ds.get_database()

        M = 4
        nbits = 4

        prq = faiss.ProductResidualQuantizer(ds.d, 1, M, nbits)
        prq.train(xt)
        err_prq = eval_codec(prq, xb)

        rq = faiss.ResidualQuantizer(ds.d, M, nbits)
        rq.train(xt)
        err_rq = eval_codec(rq, xb)

        print(err_prq, err_rq)
        self.assertEqual(err_prq, err_rq)
Пример #8
0
    print(f"===== PRQ{nsplits}x{Msub}x{nbits}")
    prq = faiss.ProductResidualQuantizer(d, nsplits, Msub, nbits)
    variants = [("max_beam_size", i) for i in (1, 2, 4, 8, 16, 32)]
    eval_quantizer(prq, xq, xb, gt, xt, variants=variants)

if 'plsq' in todo:
    print(f"===== PLSQ{nsplits}x{Msub}x{nbits}")
    plsq = faiss.ProductLocalSearchQuantizer(d, nsplits, Msub, nbits)
    variants = [("encode_ils_iters", i) for i in (2, 3, 4, 8, 16)]
    eval_quantizer(plsq, xq, xb, gt, xt, variants=variants)

if 'rq' in todo:
    print("===== RQ")
    rq = faiss.ResidualQuantizer(
        d,
        M,
        nbits,
    )
    rq.max_beam_size
    rq.max_beam_size = 30  # for compatibility with older runs
    # rq.train_type = faiss.ResidualQuantizer.Train_default
    # rq.verbose = True
    variants = [("max_beam_size", i) for i in (1, 2, 4, 8, 16, 32)]
    eval_quantizer(rq, xq, xb, gt, xt, variants=variants)

if 'rq_lut' in todo:
    print("===== RQ")
    rq = faiss.ResidualQuantizer(
        d,
        M,
        nbits,