def test_update_codebooks(self):
        """Test codebooks updatation."""
        d = 16
        n = 500
        M = 4
        nbits = 6
        K = (1 << nbits)

        # set a larger value to make the updating process more stable
        lambd = 1e-2

        rs = np.random.RandomState(123)
        x = rs.rand(n, d).astype(np.float32)
        codes = rs.randint(0, K, (n, M)).astype(np.int32)

        lsq = faiss.LocalSearchQuantizer(d, M, nbits)
        lsq.lambd = lambd
        lsq.train(x)  # just for allocating memory for codebooks

        codebooks = faiss.vector_float_to_array(lsq.codebooks)
        codebooks = codebooks.reshape(M, K, d).copy()

        lsq.update_codebooks(sp(x), sp(codes), n)
        new_codebooks = faiss.vector_float_to_array(lsq.codebooks)
        new_codebooks = new_codebooks.reshape(M, K, d).copy()

        ref_codebooks = update_codebooks_ref(x, codes, K, lambd)

        np.testing.assert_allclose(new_codebooks, ref_codebooks, atol=1e-3)
    def test_icm_encode(self):
        d = 16
        n = 500
        M = 4
        nbits = 4
        K = (1 << nbits)

        rs = np.random.RandomState(123)
        x = rs.rand(n, d).astype(np.float32)

        lsq = faiss.LocalSearchQuantizer(d, M, nbits)
        lsq.train(x)  # just for allocating memory for codebooks

        # compute binary terms
        binaries = np.zeros((M, M, K, K)).astype(np.float32)
        lsq.compute_binary_terms(sp(binaries))

        # compute unary terms
        unaries = np.zeros((M, n, K)).astype(np.float32)
        lsq.compute_unary_terms(sp(x), sp(unaries), n)

        # randomly generate codes
        codes = rs.randint(0, K, (n, M)).astype(np.int32)
        new_codes = codes.copy()

        # do icm encoding given binary and unary terms
        lsq.icm_encode_step(sp(new_codes), sp(unaries), sp(binaries), n, 1)

        # do icm encoding without pre-computed unary and bianry terms in Python
        codebooks = faiss.vector_float_to_array(lsq.codebooks)
        codebooks = codebooks.reshape(M, K, d).copy()
        ref_codes = icm_encode_ref(x, codebooks, codes)

        np.testing.assert_array_equal(new_codes, ref_codes)
    def test_decode(self):
        """Test LSQ decode"""
        d = 16
        n = 500
        M = 4
        nbits = 6
        K = (1 << nbits)

        rs = np.random.RandomState(123)
        x = rs.rand(n, d).astype(np.float32)
        codes = rs.randint(0, K, (n, M)).astype(np.int32)
        lsq = faiss.LocalSearchQuantizer(d, M, nbits)
        lsq.train(x)

        # decode x
        pack_codes = np.zeros((n, lsq.code_size)).astype(np.uint8)
        decoded_x = np.zeros((n, d)).astype(np.float32)
        lsq.pack_codes(n, sp(codes), sp(pack_codes))
        lsq.decode_c(sp(pack_codes), sp(decoded_x), n)

        # decode in Python
        codebooks = faiss.vector_float_to_array(lsq.codebooks)
        codebooks = codebooks.reshape(M, K, d).copy()
        decoded_x_ref = decode_ref(x, codebooks, codes)

        np.testing.assert_allclose(decoded_x, decoded_x_ref, rtol=1e-6)
示例#4
0
    def test_icm_encode_step(self):
        d = 16
        n = 500
        M = 4
        nbits = 6
        K = (1 << nbits)

        rs = np.random.RandomState(123)

        # randomly generate codes, binary terms and unary terms
        codes = rs.randint(0, K, (n, M)).astype(np.int32)
        new_codes = codes.copy()
        unaries = rs.rand(n, M, K).astype(np.float32)
        binaries = rs.rand(M, M, K, K).astype(np.float32)

        # do icm encoding given binary and unary terms
        lsq = faiss.LocalSearchQuantizer(d, M, nbits)
        lsq.icm_encode_step(
            faiss.swig_ptr(unaries),
            faiss.swig_ptr(binaries),
            faiss.swig_ptr(new_codes), n)

        # do icm encoding given binary and unary terms in Python
        ref_codes = icm_encode_step_ref(unaries, binaries, codes)
        np.testing.assert_array_equal(new_codes, ref_codes)
示例#5
0
    def subtest_gpu_encoding(self, ngpus):
        """check that the error is in the same as cpu."""
        ds = datasets.SyntheticDataset(32, 1000, 1000, 0)

        xt = ds.get_train()
        xb = ds.get_database()

        M = 4
        nbits = 8

        lsq = faiss.LocalSearchQuantizer(ds.d, M, nbits)
        lsq.train(xt)
        err_cpu = self.eval_codec(lsq, xb)

        lsq = faiss.LocalSearchQuantizer(ds.d, M, nbits)
        lsq.train(xt)
        lsq.icm_encoder_factory = faiss.GpuIcmEncoderFactory(ngpus)
        err_gpu = self.eval_codec(lsq, xb)

        # 13804.411 vs 13814.794, 1 gpu
        print(err_gpu, err_cpu)
        self.assertLess(err_gpu, err_cpu * 1.05)
    def test_update_codebooks_with_double(self):
        """If the data is not zero-centering, it would be more accurate to
        use double-precision floating-point numbers."""
        ds = datasets.SyntheticDataset(16, 1000, 1000, 0)

        xt = ds.get_train() + 1000
        xb = ds.get_database() + 1000

        M = 4
        nbits = 4

        lsq = faiss.LocalSearchQuantizer(ds.d, M, nbits)
        lsq.train(xt)
        err_double = eval_codec(lsq, xb)

        lsq = faiss.LocalSearchQuantizer(ds.d, M, nbits)
        lsq.update_codebooks_with_double = False
        lsq.train(xt)
        err_float = eval_codec(lsq, xb)

        # 6533.377 vs 25457.99
        print(err_double, err_float)
        self.assertLess(err_double, err_float)
    def test_with_lsq(self):
        """compare with LSQ when nsplits = 1"""
        ds = datasets.SyntheticDataset(32, 3000, 3000, 0)

        xt = ds.get_train()
        xb = ds.get_database()

        M = 4
        nbits = 4

        plsq = faiss.ProductLocalSearchQuantizer(ds.d, 1, M, nbits)
        plsq.train(xt)
        err_plsq = eval_codec(plsq, xb)

        lsq = faiss.LocalSearchQuantizer(ds.d, M, nbits)
        lsq.train(xt)
        err_lsq = eval_codec(lsq, xb)

        print(err_plsq, err_lsq)
        self.assertEqual(err_plsq, err_lsq)
    def test_training(self):
        """check that the error is in the same ballpark as PQ."""
        ds = datasets.SyntheticDataset(32, 3000, 3000, 0)

        xt = ds.get_train()
        xb = ds.get_database()

        M = 4
        nbits = 4

        lsq = faiss.LocalSearchQuantizer(ds.d, M, nbits)
        lsq.train(xt)
        err_lsq = eval_codec(lsq, xb)

        pq = faiss.ProductQuantizer(ds.d, M, nbits)
        pq.train(xt)
        err_pq = eval_codec(pq, xb)

        print(err_lsq, err_pq)
        self.assertLess(err_lsq, err_pq)
    def test_compute_unary_terms(self):
        d = 16
        n = 500
        M = 4
        nbits = 6
        K = (1 << nbits)

        rs = np.random.RandomState(123)
        x = rs.rand(n, d).astype(np.float32)
        unaries = np.zeros((M, n, K)).astype(np.float32)

        lsq = faiss.LocalSearchQuantizer(d, M, nbits)
        lsq.train(x)  # just for allocating memory for codebooks

        lsq.compute_unary_terms(sp(x), sp(unaries), n)

        codebooks = faiss.vector_float_to_array(lsq.codebooks)
        codebooks = codebooks.reshape(M, K, d).copy()
        ref_unaries = compute_unary_terms_ref(codebooks, x)

        np.testing.assert_allclose(unaries, ref_unaries, atol=1e-4)
示例#10
0
print(f"eval on {M}x{nbits} maxtrain={maxtrain}")

xq = ds.get_queries()
xb = ds.get_database()
gt = ds.get_groundtruth()

xt = ds.get_train(maxtrain=maxtrain)

nb, d = xb.shape
nq, d = xq.shape
nt, d = xt.shape

# fastest to slowest

if 'lsq-gpu' in todo:
    lsq = faiss.LocalSearchQuantizer(d, M, nbits)
    ngpus = faiss.get_num_gpus()
    lsq.icm_encoder_factory = faiss.GpuIcmEncoderFactory(ngpus)
    lsq.verbose = True
    eval_quantizer(lsq, xb, xt, 'lsq-gpu')

if 'pq' in todo:
    pq = faiss.ProductQuantizer(d, M, nbits)
    print("===== PQ")
    eval_quantizer(pq, xq, xb, gt, xt)

if 'opq' in todo:
    d2 = ((d + M - 1) // M) * M
    print("OPQ d2=", d2)
    opq = faiss.OPQMatrix(d, M, d2)
    opq.train(xt)