def test_6bit_equiv(self): rs = np.random.RandomState(123) for d in 3, 6, 8, 16, 36: trainset = np.zeros((2, d), dtype='float32') trainset[0, :] = 0 trainset[0, :] = 63 index = faiss.IndexScalarQuantizer( d, faiss.ScalarQuantizer.QT_6bit) index.train(trainset) print('cs=', index.code_size) x = rs.randint(64, size=(100, d)).astype('float32') # verify encoder / decoder index.add(x) x2 = index.reconstruct_n(0, x.shape[0]) assert np.all(x == x2 - 0.5) # verify AVX decoder (used only for search) y = 63 * rs.rand(20, d).astype('float32') D, I = index.search(y, 10) for i in range(20): for j in range(10): dis = ((y[i] - x2[I[i, j]]) ** 2).sum() # print(dis, D[i, j]) assert abs(D[i, j] - dis) / dis < 1e-5
def test_encoded(self): d = 32 k = 5 xt, xb, xq = get_dataset_2(d, 1000, 0, 0) # make sure that training on a compressed then decompressed # dataset gives the same result as decompressing on-the-fly codec = faiss.IndexScalarQuantizer(d, faiss.ScalarQuantizer.QT_4bit) codec.train(xt) codes = codec.sa_encode(xt) xt2 = codec.sa_decode(codes) clus = faiss.Clustering(d, k) # clus.verbose = True clus.niter = 0 index = faiss.IndexFlatL2(d) clus.train(xt2, index) ref_centroids = faiss.vector_to_array(clus.centroids).reshape(-1, d) _, ref_errs = index.search(xt2, 1) clus = faiss.Clustering(d, k) # clus.verbose = True clus.niter = 0 clus.decode_block_size = 120 index = faiss.IndexFlatL2(d) clus.train_encoded(codes, codec, index) new_centroids = faiss.vector_to_array(clus.centroids).reshape(-1, d) _, new_errs = index.search(xt2, 1) # It's the same operation, so should be bit-exact the same self.assertTrue(np.all(ref_centroids == new_centroids))
def test_4variants(self): d = 32 nt = 2500 nq = 400 nb = 5000 (xt, xb, xq) = get_dataset(d, nb, nt, nq) index_gt = faiss.IndexFlatL2(d) index_gt.add(xb) D_ref, I_ref = index_gt.search(xq, 10) nok = {} for qname in "QT_4bit QT_4bit_uniform QT_8bit QT_8bit_uniform QT_fp16".split(): qtype = getattr(faiss.ScalarQuantizer, qname) index = faiss.IndexScalarQuantizer(d, qtype, faiss.METRIC_L2) index.train(xt) index.add(xb) D, I = index.search(xq, 10) nok[qname] = (I[:, 0] == I_ref[:, 0]).sum() print(nok, nq) self.assertGreaterEqual(nok['QT_8bit'], nq * 0.9) self.assertGreaterEqual(nok['QT_8bit'], nok['QT_4bit']) self.assertGreaterEqual(nok['QT_8bit'], nok['QT_8bit_uniform']) self.assertGreaterEqual(nok['QT_4bit'], nok['QT_4bit_uniform']) self.assertGreaterEqual(nok['QT_fp16'], nok['QT_8bit'])
def test_downcast_Refine(self): index = faiss.IndexRefineFlat( faiss.IndexScalarQuantizer(10, faiss.ScalarQuantizer.QT_8bit)) # serialize and deserialize index2 = faiss.deserialize_index(faiss.serialize_index(index)) assert isinstance(index2, faiss.IndexRefineFlat)
def subtest_8bit_direct(self, metric_type, d): xt, xb, xq = get_dataset_2(d, 500, 1000, 30) # rescale everything to get integer tmin, tmax = xt.min(), xt.max() def rescale(x): x = np.floor((x - tmin) * 256 / (tmax - tmin)) x[x < 0] = 0 x[x > 255] = 255 return x xt = rescale(xt) xb = rescale(xb) xq = rescale(xq) gt_index = faiss.IndexFlat(d, metric_type) gt_index.add(xb) Dref, Iref = gt_index.search(xq, 10) index = faiss.IndexScalarQuantizer( d, faiss.ScalarQuantizer.QT_8bit_direct, metric_type) index.add(xb) D, I = index.search(xq, 10) assert np.all(I == Iref) assert np.all(D == Dref) # same, with IVF nlist = 64 quantizer = faiss.IndexFlat(d, metric_type) gt_index = faiss.IndexIVFFlat(quantizer, d, nlist, metric_type) gt_index.nprobe = 4 gt_index.train(xt) gt_index.add(xb) Dref, Iref = gt_index.search(xq, 10) index = faiss.IndexIVFScalarQuantizer( quantizer, d, nlist, faiss.ScalarQuantizer.QT_8bit_direct, metric_type) index.nprobe = 4 index.by_residual = False index.train(xt) index.add(xb) D, I = index.search(xq, 10) assert np.all(I == Iref) assert np.all(D == Dref)
def test_sa_encode_decode(self): d = 16 index = faiss.IndexScalarQuantizer(d, faiss.ScalarQuantizer.QT_8bit) xb = torch.rand(1000, d, dtype=torch.float32) index.train(xb) # torch cpu as ground truth nq = 10 xq = torch.rand(nq, d, dtype=torch.float32) encoded_torch = index.sa_encode(xq) # numpy cpu encoded_np = index.sa_encode(xq.numpy()) self.assertTrue(np.array_equal(encoded_torch.numpy(), encoded_np)) decoded_torch = index.sa_decode(encoded_torch) decoded_np = index.sa_decode(encoded_np) self.assertTrue( torch.equal(decoded_torch, torch.from_numpy(decoded_np))) # torch cpu as output parameter encoded_torch_param = torch.zeros(nq, d, dtype=torch.uint8) index.sa_encode(xq, encoded_torch_param) self.assertTrue(torch.equal(encoded_torch, encoded_torch)) decoded_torch_param = torch.zeros(nq, d, dtype=torch.float32) index.sa_decode(encoded_torch, decoded_torch_param) self.assertTrue(torch.equal(decoded_torch, decoded_torch_param)) # np as output parameter encoded_np_param = np.zeros((nq, d), dtype=np.uint8) index.sa_encode(xq.numpy(), encoded_np_param) self.assertTrue(np.array_equal(encoded_torch.numpy(), encoded_np_param)) decoded_np_param = np.zeros((nq, d), dtype=np.float32) index.sa_decode(encoded_np_param, decoded_np_param) self.assertTrue(np.array_equal(decoded_np, decoded_np_param))
from recall_data import recall_data # 基本参数 d = 300 # 向量维数 data_size = 10000 # 数据库大小 k = 50 qname = "QT_4bit" # 生成测试数据 numpy.random.seed(13) data = numpy.random.random(size=(data_size, d)).astype('float32') test_data = recall_data # 创建索引模型并添加向量 qtype = getattr(faiss.ScalarQuantizer, qname) index = faiss.IndexScalarQuantizer(d, qtype, faiss.METRIC_L2) # 训练数据 start_time = time.time() assert not index.is_trained index.train(data) assert index.is_trained print "Train Index Used %.2f sec." % (time.time() - start_time) # 添加数据 start_time = time.time() index.add(data) # 添加索引可能会有一点慢 print "Add vector Used %.2f sec." % (time.time() - start_time) start_time = time.time() D, I = index.search(data[:50], k) # 搜索每一个数据的的k临近向量
def build_index_streaming( cached_embeddings_path, output_path, hnsw=False, sq8_quantization=False, fp16_quantization=False, store_n=256, ef_search=32, ef_construction=80, sample_fraction=0.1, indexing_batch_size=5000000, ): vector_size = get_vectors_dim(cached_embeddings_path) if hnsw: if sq8_quantization: index = faiss.IndexHNSWSQ(vector_size + 1, faiss.ScalarQuantizer.QT_8bit, store_n) elif fp16_quantization: index = faiss.IndexHNSWSQ(vector_size + 1, faiss.ScalarQuantizer.QT_fp16, store_n) else: index = faiss.IndexHNSWFlat(vector_size + 1, store_n) index.hnsw.efSearch = ef_search index.hnsw.efConstruction = ef_construction else: if sq8_quantization: index = faiss.IndexScalarQuantizer(vector_size, faiss.ScalarQuantizer.QT_8bit, faiss.METRIC_L2) elif fp16_quantization: index = faiss.IndexScalarQuantizer(vector_size, faiss.ScalarQuantizer.QT_fp16, faiss.METRIC_L2) else: index = faiss.IndexIP(vector_size + 1, store_n) vector_sample, max_phi, N = get_vector_sample(cached_embeddings_path, sample_fraction) if hnsw: vector_sample = augment_vectors(vector_sample, max_phi) if sq8_quantization or fp16_quantization: # index requires training vs = vector_sample.numpy() logging.info(f'Training Quantizer with matrix of shape {vs.shape}') index.train(vs) del vs del vector_sample chunks_to_add = [] added = 0 for vector_chunk in parse_vectors_from_directory(cached_embeddings_path, as_chunks=True): if hnsw: vector_chunk = augment_vectors(vector_chunk, max_phi) chunks_to_add.append(vector_chunk) if sum(c.shape[0] for c in chunks_to_add) > indexing_batch_size: logging.info( f'Adding Vectors {added} -> {added + to_add.shape[0]} of {N}') to_add = torch.cat(chunks_to_add) chunks_to_add = [] index.add(to_add) added += 1 if len(chunks_to_add) > 0: to_add = torch.cat(chunks_to_add).numpy() index.add(to_add) logging.info( f'Adding Vectors {added} -> {added + to_add.shape[0]} of {N}') logger.info(f'Index Built, writing index to {output_path}') faiss.write_index(index, output_path) logger.info(f'Index dumped') return index