def test_clipping(self): """ verify that a clipped residual quantizer gives the same code prefix + suffix as the full RQ """ ds = datasets.SyntheticDataset(32, 1000, 100, 0) rq = faiss.ResidualQuantizer(ds.d, 5, 4) rq.train_type = faiss.ResidualQuantizer.Train_default rq.max_beam_size = 5 rq.train(ds.get_train()) rq.max_beam_size = 1 # is not he same for a large beam size codes = rq.compute_codes(ds.get_database()) rq2 = faiss.ResidualQuantizer(ds.d, 2, 4) rq2.initialize_from(rq) self.assertEqual(rq2.M, 2) # verify that the beginning of the codes are the same codes2 = rq2.compute_codes(ds.get_database()) rq3 = faiss.ResidualQuantizer(ds.d, 3, 4) rq3.initialize_from(rq, 2) self.assertEqual(rq3.M, 3) codes3 = rq3.compute_codes(ds.get_database() - rq2.decode(codes2)) # verify that prefixes are the same for i in range(ds.nb): print(i, ds.nb) br = faiss.BitstringReader(faiss.swig_ptr(codes[i]), rq.code_size) br2 = faiss.BitstringReader(faiss.swig_ptr(codes2[i]), rq2.code_size) self.assertEqual(br.read(rq2.tot_bits), br2.read(rq2.tot_bits)) br3 = faiss.BitstringReader(faiss.swig_ptr(codes3[i]), rq3.code_size) self.assertEqual(br.read(rq3.tot_bits), br3.read(rq3.tot_bits))
def test_training(self): """check that the error is in the same ballpark as PQ """ ds = datasets.SyntheticDataset(32, 3000, 1000, 0) xt = ds.get_train() xb = ds.get_database() rq = faiss.ResidualQuantizer(ds.d, 4, 6) rq.verbose rq.verbose = True # rq.train_type = faiss.ResidualQuantizer.Train_default rq.cp.verbose # rq.cp.verbose = True rq.train(xt) err_rq = eval_codec(rq, xb) pq = faiss.ProductQuantizer(ds.d, 4, 6) pq.train(xt) err_pq = eval_codec(pq, xb) # in practice RQ is often better than PQ but it does not the case here, so just check # that we are within some factor. print(err_pq, err_rq) self.assertLess(err_rq, err_pq * 1.2)
def test_precomp(self): ds = datasets.SyntheticDataset(32, 1000, 1000, 0) # make sure it work with varying nb of bits nbits = faiss.UInt64Vector() nbits.push_back(5) nbits.push_back(6) nbits.push_back(7) rq = faiss.ResidualQuantizer(ds.d, nbits) rq.train_type = faiss.ResidualQuantizer.Train_default rq.train(ds.get_train()) codebooks = get_additive_quantizer_codebooks(rq) precomp = precomp_codebooks(codebooks) codebook_cross_prods_ref, cent_norms_ref = precomp # check C++ precomp tables codebook_cross_prods_ref = np.hstack([ np.vstack(c) for c in codebook_cross_prods_ref]) rq.compute_codebook_tables() codebook_cross_prods = faiss.vector_to_array( rq.codebook_cross_products) codebook_cross_prods = codebook_cross_prods.reshape( rq.total_codebook_size, rq.total_codebook_size) cent_norms = faiss.vector_to_array(rq.cent_norms) np.testing.assert_array_almost_equal( codebook_cross_prods, codebook_cross_prods_ref, decimal=5) np.testing.assert_array_almost_equal( np.hstack(cent_norms_ref), cent_norms, decimal=5) # validate that the python tab-based encoding works xb = ds.get_database() ref_codes, _, _ = beam_search_encoding_ref(codebooks, xb, 7) new_codes, _ = beam_search_encoding_tab(codebooks, xb, 7, precomp) np.testing.assert_array_equal(ref_codes, new_codes) # validate the C++ beam_search_encode_step_tab function beam_search_encoding_tab(codebooks, xb, 7, precomp, implem="ref cpp") # check implem w/ residuals n = ref_codes.shape[0] sp = faiss.swig_ptr ref_codes_packed = np.zeros((n, rq.code_size), dtype='uint8') ref_codes_int32 = ref_codes.astype('int32') rq.pack_codes( n, sp(ref_codes_int32), sp(ref_codes_packed), rq.M * ref_codes.shape[1] ) rq.max_beam_size = 7 codes_ref_residuals = rq.compute_codes(xb) np.testing.assert_array_equal(ref_codes_packed, codes_ref_residuals) rq.use_beam_LUT = 1 codes_new = rq.compute_codes(xb) np.testing.assert_array_equal(codes_ref_residuals, codes_new)
def test_beam_size(self): """ check that a larger beam gives a lower error """ ds = datasets.SyntheticDataset(32, 3000, 1000, 0) xt = ds.get_train() xb = ds.get_database() rq0 = faiss.ResidualQuantizer(ds.d, 4, 6) rq0.train_type = faiss.ResidualQuantizer.Train_default rq0.max_beam_size = 2 rq0.train(xt) err_rq0 = eval_codec(rq0, xb) rq1 = faiss.ResidualQuantizer(ds.d, 4, 6) rq1.train_type = faiss.ResidualQuantizer.Train_default rq1.max_beam_size = 10 rq1.train(xt) err_rq1 = eval_codec(rq1, xb) self.assertLess(err_rq1, err_rq0)
def test_with_gpu(self): """ check that we get the same resutls with a GPU quantizer and a CPU quantizer """ d = 32 nt = 3000 nb = 1000 xt, xb, _ = get_dataset_2(d, nt, nb, 0) rq0 = faiss.ResidualQuantizer(d, 4, 6) rq0.train(xt) err_rq0 = eval_codec(rq0, xb) # codes0 = rq0.compute_codes(xb) rq1 = faiss.ResidualQuantizer(d, 4, 6) fac = faiss.GpuProgressiveDimIndexFactory(1) rq1.assign_index_factory = fac rq1.train(xt) self.assertGreater(fac.ncall, 0) ncall_train = fac.ncall err_rq1 = eval_codec(rq1, xb) # codes1 = rq1.compute_codes(xb) self.assertGreater(fac.ncall, ncall_train) print(err_rq0, err_rq1) self.assertTrue(0.9 * err_rq0 < err_rq1 < 1.1 * err_rq0)
def test_training_with_limited_mem(self): """ make sure a different batch size gives the same result""" ds = datasets.SyntheticDataset(32, 3000, 1000, 0) xt = ds.get_train() rq0 = faiss.ResidualQuantizer(ds.d, 4, 6) rq0.train_type = faiss.ResidualQuantizer.Train_default rq0.max_beam_size = 5 # rq0.verbose = True rq0.train(xt) cb0 = get_additive_quantizer_codebooks(rq0) rq1 = faiss.ResidualQuantizer(ds.d, 4, 6) rq1.train_type = faiss.ResidualQuantizer.Train_default rq1.max_beam_size = 5 rq1.max_mem_distances rq1.max_mem_distances = 3000 * ds.d * 4 * 3 # rq1.verbose = True rq1.train(xt) cb1 = get_additive_quantizer_codebooks(rq1) for c0, c1 in zip(cb0, cb1): self.assertTrue(np.all(c0 == c1))
def test_with_rq(self): """compare with RQ when nsplits = 1""" ds = datasets.SyntheticDataset(32, 3000, 3000, 0) xt = ds.get_train() xb = ds.get_database() M = 4 nbits = 4 prq = faiss.ProductResidualQuantizer(ds.d, 1, M, nbits) prq.train(xt) err_prq = eval_codec(prq, xb) rq = faiss.ResidualQuantizer(ds.d, M, nbits) rq.train(xt) err_rq = eval_codec(rq, xb) print(err_prq, err_rq) self.assertEqual(err_prq, err_rq)
print(f"===== PRQ{nsplits}x{Msub}x{nbits}") prq = faiss.ProductResidualQuantizer(d, nsplits, Msub, nbits) variants = [("max_beam_size", i) for i in (1, 2, 4, 8, 16, 32)] eval_quantizer(prq, xq, xb, gt, xt, variants=variants) if 'plsq' in todo: print(f"===== PLSQ{nsplits}x{Msub}x{nbits}") plsq = faiss.ProductLocalSearchQuantizer(d, nsplits, Msub, nbits) variants = [("encode_ils_iters", i) for i in (2, 3, 4, 8, 16)] eval_quantizer(plsq, xq, xb, gt, xt, variants=variants) if 'rq' in todo: print("===== RQ") rq = faiss.ResidualQuantizer( d, M, nbits, ) rq.max_beam_size rq.max_beam_size = 30 # for compatibility with older runs # rq.train_type = faiss.ResidualQuantizer.Train_default # rq.verbose = True variants = [("max_beam_size", i) for i in (1, 2, 4, 8, 16, 32)] eval_quantizer(rq, xq, xb, gt, xt, variants=variants) if 'rq_lut' in todo: print("===== RQ") rq = faiss.ResidualQuantizer( d, M, nbits,