def test_exception_2(self): try: faiss.index_factory(12, 'IVF256,Flat,PQ8') except RuntimeError as e: assert 'could not parse' in str(e) else: assert False, 'exception did not fire???'
def test_factory_3(self): index = faiss.index_factory(12, "IVF10,PQ4") faiss.ParameterSpace().set_index_parameter(index, "nprobe", 3) assert index.nprobe == 3 index = faiss.index_factory(12, "PCAR8,IVF10,PQ4") faiss.ParameterSpace().set_index_parameter(index, "nprobe", 3) assert faiss.downcast_index(index.index).nprobe == 3
def test_white(self): # generate data d = 4 nt = 1000 nb = 200 nq = 200 # normal distribition x = faiss.randn((nt + nb + nq) * d, 1234).reshape(nt + nb + nq, d) index = faiss.index_factory(d, 'Flat') xt = x[:nt] xb = x[nt:-nq] xq = x[-nq:] # NN search on normal distribution index.add(xb) Do, Io = index.search(xq, 5) # make distribution very skewed x *= [10, 4, 1, 0.5] rr, _ = np.linalg.qr(faiss.randn(d * d).reshape(d, d)) x = np.dot(x, rr).astype('float32') xt = x[:nt] xb = x[nt:-nq] xq = x[-nq:] # L2 search on skewed distribution index = faiss.index_factory(d, 'Flat') index.add(xb) Dl2, Il2 = index.search(xq, 5) # whiten + L2 search on L2 distribution index = faiss.index_factory(d, 'PCAW%d,Flat' % d) index.train(xt) index.add(xb) Dw, Iw = index.search(xq, 5) # make sure correlation of whitened results with original # results is much better than simple L2 distances # should be 961 vs. 264 assert (faiss.eval_intersection(Io, Iw) > 2 * faiss.eval_intersection(Io, Il2))
def get_trained_index(): filename = "%s/%s_%s_trained.index" % ( tmpdir, dbname, index_key) if not os.path.exists(filename): index = faiss.index_factory(d, index_key) n_train = choose_train_size(index_key) xtsub = xt[:n_train] print "Keeping %d train vectors" % xtsub.shape[0] # make sure the data is actually in RAM and in float xtsub = xtsub.astype('float32').copy() index.verbose = True t0 = time.time() index.train(xtsub) index.verbose = False print "train done in %.3f s" % (time.time() - t0) print "storing", filename faiss.write_index(index, filename) else: print "loading", filename index = faiss.read_index(filename) return index
def fit(self, X): X = X.astype(numpy.float32) self._index = faiss.index_factory(len(X[0]), "IVF%d,PQ64" % self._n_bits) co = faiss.GpuClonerOptions() co.useFloat16 = True self._index = faiss.index_cpu_to_gpu(self._res, 0, self._index, co) self._index.train(X) self._index.add(X) self._index.setNumProbes(self._n_probes)
def do_cpu_to_gpu(self, index_key): ts = [] ts.append(time.time()) (xt, xb, xq) = self.get_dataset(small_one=True) nb, d = xb.shape index = faiss.index_factory(d, index_key) if index.__class__ == faiss.IndexIVFPQ: # speed up test index.pq.cp.niter = 2 index.do_polysemous_training = False ts.append(time.time()) index.train(xt) ts.append(time.time()) # adding some ids because there was a bug in this case index.add_with_ids(xb, np.arange(nb) * 3 + 12345) ts.append(time.time()) index.nprobe = 4 D, Iref = index.search(xq, 10) ts.append(time.time()) res = faiss.StandardGpuResources() gpu_index = faiss.index_cpu_to_gpu(res, 0, index) ts.append(time.time()) gpu_index.setNumProbes(4) D, Inew = gpu_index.search(xq, 10) ts.append(time.time()) print 'times:', [t - ts[0] for t in ts] self.assertGreaterEqual((Iref == Inew).sum(), Iref.size) if faiss.get_num_gpus() == 1: return for shard in False, True: # test on just 2 GPUs res = [faiss.StandardGpuResources() for i in range(2)] co = faiss.GpuMultipleClonerOptions() co.shard = shard gpu_index = faiss.index_cpu_to_gpu_multiple_py(res, index, co) faiss.GpuParameterSpace().set_index_parameter( gpu_index, 'nprobe', 4) D, Inew = gpu_index.search(xq, 10) self.assertGreaterEqual((Iref == Inew).sum(), Iref.size)
def test_factory_1(self): index = faiss.index_factory(12, "IVF10,PQ4") assert index.do_polysemous_training index = faiss.index_factory(12, "IVF10,PQ4np") assert not index.do_polysemous_training index = faiss.index_factory(12, "PQ4") assert index.do_polysemous_training index = faiss.index_factory(12, "PQ4np") assert not index.do_polysemous_training try: index = faiss.index_factory(10, "PQ4") except RuntimeError: pass else: assert False, "should do a runtime error"
def test_chain(self): # generate data d = 4 nt = 1000 nb = 200 nq = 200 # normal distribition x = faiss.randn((nt + nb + nq) * d, 1234).reshape(nt + nb + nq, d) # make distribution very skewed x *= [10, 4, 1, 0.5] rr, _ = np.linalg.qr(faiss.randn(d * d).reshape(d, d)) x = np.dot(x, rr).astype('float32') xt = x[:nt] xb = x[nt:-nq] xq = x[-nq:] index = faiss.index_factory(d, "L2norm,PCA2,L2norm,Flat") assert index.chain.size() == 3 l2_1 = faiss.downcast_VectorTransform(index.chain.at(0)) assert l2_1.norm == 2 pca = faiss.downcast_VectorTransform(index.chain.at(1)) assert not pca.is_trained index.train(xt) assert pca.is_trained index.add(xb) D, I = index.search(xq, 5) # do the computation manually and check if we get the same result def manual_trans(x): x = x.copy() faiss.normalize_L2(x) x = pca.apply_py(x) faiss.normalize_L2(x) return x index2 = faiss.IndexFlatL2(2) index2.add(manual_trans(xb)) D2, I2 = index2.search(manual_trans(xq), 5) assert np.all(I == I2)
def test_remove_id_map_2(self): # from https://github.com/facebookresearch/faiss/issues/255 rs = np.random.RandomState(1234) X = rs.randn(10, 10).astype(np.float32) idx = np.array([0, 10, 20, 30, 40, 5, 15, 25, 35, 45], np.int64) remove_set = np.array([10, 30], dtype=np.int64) index = faiss.index_factory(10, 'IDMap,Flat') index.add_with_ids(X[:5, :], idx[:5]) index.remove_ids(remove_set) index.add_with_ids(X[5:, :], idx[5:]) print (index.search(X, 1)) for i in range(10): _, searchres = index.search(X[i:i + 1, :], 1) if idx[i] in remove_set: assert searchres[0] != idx[i] else: assert searchres[0] == idx[i]
def test_update(self): d = 64 nb = 1000 nt = 1500 nq = 100 np.random.seed(123) xb = np.random.random(size=(nb, d)).astype('float32') xt = np.random.random(size=(nt, d)).astype('float32') xq = np.random.random(size=(nq, d)).astype('float32') index = faiss.index_factory(d, "IVF64,Flat") index.train(xt) index.add(xb) index.nprobe = 32 D, I = index.search(xq, 5) index.make_direct_map() recons_before = np.vstack([index.reconstruct(i) for i in range(nb)]) # revert order of the 200 first vectors nu = 200 index.update_vectors(np.arange(nu), xb[nu - 1::-1].copy()) recons_after = np.vstack([index.reconstruct(i) for i in range(nb)]) # make sure reconstructions remain the same diff_recons = recons_before[:nu] - recons_after[nu - 1::-1] assert np.abs(diff_recons).max() == 0 D2, I2 = index.search(xq, 5) assert np.all(D == D2) gt_map = np.arange(nb) gt_map[:nu] = np.arange(nu, 0, -1) - 1 eqs = I.ravel() == gt_map[I2.ravel()] assert np.all(eqs)
if isinstance(index, faiss.IndexPreTransform): index_hnsw = faiss.downcast_index(index.index) vec_transform = index.chain.at(0).apply_py else: index_hnsw = index vec_transform = lambda x: x hnsw = index_hnsw.hnsw hnsw_stats = faiss.cvar.hnsw_stats else: print "build index, key=", args.indexkey index = faiss.index_factory(d, args.indexkey) if isinstance(index, faiss.IndexPreTransform): index_hnsw = faiss.downcast_index(index.index) vec_transform = index.chain.at(0).apply_py else: index_hnsw = index vec_transform = lambda x: x hnsw = index_hnsw.hnsw hnsw.efConstruction = args.efConstruction hnsw_stats = faiss.cvar.hnsw_stats index.verbose = True index_hnsw.verbose = True index_hnsw.storage.verbose = True
def test_factory_2(self): index = faiss.index_factory(12, "SQ8") assert index.code_size == 12
D, I = index.search(xq, k) t1 = time.time() # the recall should be 1 at all times recall_at_1 = (I[:, :1] == gt[:, :1]).sum() / float(nq) print "k=%d %.3f s, R@1 %.4f" % ( k, t1 - t0, recall_at_1) ################################################################# # Approximate search experiment ################################################################# print "============ Approximate search" index = faiss.index_factory(d, "IVF4096,PQ64") # faster, uses more memory # index = faiss.index_factory(d, "IVF16384,Flat") co = faiss.GpuClonerOptions() # here we are using a 64-byte PQ, so we must set the lookup tables to # 16 bit float (this is due to the limited temporary memory). co.useFloat16 = True index = faiss.index_cpu_to_gpu(res, 0, index, co) print "train" index.train(xt)
def fvecs_read(fname): return ivecs_read(fname).view('float32') ################################################################# # Main program ################################################################# stage = int(sys.argv[1]) tmpdir = '/tmp/' if stage == 0: # train the index xt = fvecs_read("sift1M/sift_learn.fvecs") index = faiss.index_factory(xt.shape[1], "IVF4096,Flat") print("training index") index.train(xt) print("write " + tmpdir + "trained.index") faiss.write_index(index, tmpdir + "trained.index") if 1 <= stage <= 4: # add 1/4 of the database to 4 independent indexes bno = stage - 1 xb = fvecs_read("sift1M/sift_base.fvecs") i0, i1 = int(bno * xb.shape[0] / 4), int((bno + 1) * xb.shape[0] / 4) index = faiss.read_index(tmpdir + "trained.index") print("adding vectors %d:%d" % (i0, i1)) index.add_with_ids(xb[i0:i1], np.arange(i0, i1)) print("write " + tmpdir + "block_%d.index" % bno) faiss.write_index(index, tmpdir + "block_%d.index" % bno)
def __init__(self, view): self.view = view d = self.view.shape[-1] self.index = faiss.index_factory(d, "Flat", faiss.METRIC_INNER_PRODUCT) faiss.normalize_L2(self.view) self.index.add(view)
k = 1 << lk t0 = time.time() D, I = index.search(xq, k) t1 = time.time() # the recall should be 1 at all times recall_at_1 = (I[:, :1] == gt[:, :1]).sum() / float(nq) print "k=%d %.3f s, R@1 %.4f" % (k, t1 - t0, recall_at_1) ################################################################# # Approximate search experiment ################################################################# print "============ Approximate search" index = faiss.index_factory(d, "IVF4096,PQ64") # faster, uses more memory # index = faiss.index_factory(d, "IVF16384,Flat") co = faiss.GpuClonerOptions() # here we are using a 64-byte PQ, so we must set the lookup tables to # 16 bit float (this is due to the limited temporary memory). co.useFloat16 = True index = faiss.index_cpu_to_gpu(res, 0, index, co) print "train" index.train(xt)
def do_cpu_to_gpu(self, index_key): ts = [] ts.append(time.time()) (xt, xb, xq) = self.get_dataset(small_one=True) nb, d = xb.shape index = faiss.index_factory(d, index_key) if index.__class__ == faiss.IndexIVFPQ: # speed up test index.pq.cp.niter = 2 index.do_polysemous_training = False ts.append(time.time()) index.train(xt) ts.append(time.time()) # adding some ids because there was a bug in this case; # those need to be cast to idx_t(= int64_t), because # on windows the numpy int default is int32 ids = (np.arange(nb) * 3 + 12345).astype('int64') index.add_with_ids(xb, ids) ts.append(time.time()) index.nprobe = 4 Dref, Iref = index.search(xq, 10) ts.append(time.time()) res = faiss.StandardGpuResources() gpu_index = faiss.index_cpu_to_gpu(res, 0, index) ts.append(time.time()) # Validate the layout of the memory info mem_info = res.getMemoryInfo() assert type(mem_info) == dict assert type(mem_info[0]['FlatData']) == tuple assert type(mem_info[0]['FlatData'][0]) == int assert type(mem_info[0]['FlatData'][1]) == int gpu_index.setNumProbes(4) Dnew, Inew = gpu_index.search(xq, 10) ts.append(time.time()) print('times:', [t - ts[0] for t in ts]) # Give us some margin of error self.assertGreaterEqual((Iref == Inew).sum(), Iref.size - 50) if faiss.get_num_gpus() == 1: return for shard in False, True: # test on just 2 GPUs res = [faiss.StandardGpuResources() for i in range(2)] co = faiss.GpuMultipleClonerOptions() co.shard = shard gpu_index = faiss.index_cpu_to_gpu_multiple_py(res, index, co) faiss.GpuParameterSpace().set_index_parameter( gpu_index, 'nprobe', 4) Dnew, Inew = gpu_index.search(xq, 10) # 0.99: allow some tolerance in results otherwise test # fails occasionally (not reproducible) self.assertGreaterEqual((Iref == Inew).sum(), Iref.size * 0.99)
def test_sq_cpu_to_gpu(self): res = faiss.StandardGpuResources() index = faiss.index_factory(32, "SQfp16") index.add(np.random.rand(1000, 32).astype(np.float32)) gpu_index = faiss.index_cpu_to_gpu(res, 0, index) self.assertIsInstance(gpu_index, faiss.GpuIndexFlat)
"FAISS was not compiled with GPU support, or loading _swigfaiss_gpu.so failed" res = faiss.StandardGpuResources() dev_no = 0 # remember results from other index types op_per_key = [] # keep track of optimal operating points seen so far op = faiss.OperatingPoints() for index_key in keys_to_test: print "============ key", index_key # make the index described by the key index = faiss.index_factory(d, index_key) if use_gpu: # transfer to GPU (may be partial) index = faiss.index_cpu_to_gpu(res, dev_no, index) params = faiss.GpuParameterSpace() else: params = faiss.ParameterSpace() params.initialize(index) print "[%.3f s] train & add" % (time.time() - t0) index.train(xt) index.add(xb)
def run(spark_session, cfg): score_vector_table = cfg['score_vector_rebucketing'][ 'score_vector_alpha_table'] similarity_table = cfg['top_n_similarity']['similarity_table'] top_n_value = cfg['top_n_similarity']['top_n'] aid_bucket_size = cfg['top_n_similarity']['aid_bucket_size'] load_bucket_step = cfg['top_n_similarity']['load_bucket_step'] search_bucket_step = cfg['top_n_similarity']['search_bucket_step'] index_factory_string = cfg['top_n_similarity']['index_factory_string'] # If the number of GPUs is 0, uninstall faiss-cpu. num_gpus = faiss.get_num_gpus() assert num_gpus != 0 print('Number of GPUs available: {}'.format(num_gpus)) start_time = time.time() # Load the score vectors into the index. aid_list = [] for aid_bucket in range(0, aid_bucket_size, load_bucket_step): print('Loading alpha buckets {} - {} of {}'.format( aid_bucket, aid_bucket + load_bucket_step - 1, aid_bucket_size)) (aids, score_vectors, _) = load_score_vectors(spark_session, score_vector_table, aid_bucket, load_bucket_step, aid_bucket_size) # Keep track of the aids. if aid_bucket == 0: aid_list = aids else: aid_list = np.concatenate((aid_list, aids)) # Create the FAISS index on the first iteration. if aid_bucket == 0: cpu_index = faiss.index_factory(score_vectors.shape[1], index_factory_string) gpu_index = faiss.index_cpu_to_all_gpus(cpu_index) # we need to check if train is necessary, now it is disabled. if not gpu_index.is_trained: gpu_index.train(score_vectors) # Add the vectors to the index. gpu_index.add(score_vectors) load_time = time.time() # Find the top N by bucket step. start_load = time.time() mode = 'overwrite' total_search_time = 0 total_load_time = 0 total_format_time = 0 total_write_time = 0 for aid_bucket in range(0, aid_bucket_size, search_bucket_step): print('Searching alpha buckets {} - {} of {}'.format( aid_bucket, aid_bucket + search_bucket_step - 1, aid_bucket_size)) # Load the users to perform the search with. print('Loading users from Hive') (aids, score_vectors, buckets) = load_score_vectors(spark_session, score_vector_table, aid_bucket, search_bucket_step, aid_bucket_size) end_load = time.time() total_load_time += end_load - start_load # Search for the top N similar users for bucket. print('Performing the search') top_n_distances, top_n_indices = gpu_index.search( score_vectors, top_n_value) end_search = time.time() total_search_time += end_search - end_load # Get the top N aids from the top N indexes. top_n_aids = aid_list[top_n_indices] # Format and write the result back to Hive. # Format the data for a Spark dataframe in order to write to Hive. # [ ('0000001', [{'aid':'0000001', 'score':1.73205081}, {'aid':'0000003', 'score':1.73205081}, {'aid':'0000004', 'score':0.88532267}, {'aid':'0000002', 'score':0.66903623}], 0), # ('0000002', [{'aid':'0000002', 'score':1.73205081}, {'aid':'0000004', 'score':1.50844401}, {'aid':'0000001', 'score':0.66903623}, {'aid':'0000003', 'score':0.66903623}], 0), # ... ] print('Formatting the output') data = [(str(aid), [(str(n_aid), float(distance)) for n_aid, distance in zip(top_aid, top_distances) ], int(bucket)) for aid, top_aid, top_distances, bucket in zip( aids, top_n_aids, top_n_distances, buckets)] # Output dataframe schema. schema = StructType([ StructField("aid", StringType(), True), StructField( "top_n_similar_user", ArrayType( StructType([ StructField('aid', StringType(), False), StructField('score', FloatType(), False) ]), True)), StructField("aid_bucket", IntegerType(), True) ]) # Create the output dataframe with the similar users for each user. df = spark_session.createDataFrame( spark_session.sparkContext.parallelize(data), schema) end_format = time.time() total_format_time += end_format - end_search # Write the output dataframe to Hive. print('Writing output to Hive') write_to_table_with_partition(df, similarity_table, partition=('aid_bucket'), mode=mode) mode = 'append' end_write = time.time() total_write_time += end_write - end_format start_load = end_write search_time = time.time() print('Index size:', gpu_index.ntotal) print(gpu_index.d) print(4 * gpu_index.d * gpu_index.ntotal, 'bytes (uncompressed)') print('Total time: ', str(datetime.timedelta(seconds=search_time - start_time))) print(' Index load time: ', str(datetime.timedelta(seconds=load_time - start_time))) print(' Overall search time: ', str(datetime.timedelta(seconds=search_time - load_time))) print(' Total load time: ', str(datetime.timedelta(seconds=total_load_time))) print(' Total search time: ', str(datetime.timedelta(seconds=total_search_time))) print(' Total format time: ', str(datetime.timedelta(seconds=total_format_time))) print(' Total write time: ', str(datetime.timedelta(seconds=total_write_time)))
def test_exception_2(self): try: faiss.index_factory(12, 'IVF256,Flat,PQ8') except RuntimeError, e: assert 'could not parse' in str(e)
faiss库 - 海量高维域名相似度计算 author : h-j-13 time : 2018-6-25 """ import time import numpy import faiss # 基本参数 d = 300 # 向量维数 data_size = 500000 # 数据库大小 k = 50 # 构建索引 index = faiss.index_factory(d, "OPQ8_64,IVF2000,PQ8") # 生成测试数据 numpy.random.seed(13) data = numpy.random.random(size=(data_size, d)).astype('float32') # 训练数据 start_time = time.time() index.train(data) print "Train Index Used %.2f sec." % (time.time() - start_time) for i in xrange(250): # 添加数据 data = numpy.random.random(size=(data_size, d)).astype('float32') start_time = time.time()
def test_factory_4(self): index = faiss.index_factory(12, "IVF10,FlatDedup") assert index.instances is not None
def test_set_gpu_param(self): index = faiss.index_factory(12, "PCAR8,IVF10,PQ4") res = faiss.StandardGpuResources() gpu_index = faiss.index_cpu_to_gpu(res, 0, index) faiss.GpuParameterSpace().set_index_parameter(gpu_index, "nprobe", 3)
def build(self, config): ''' build index from scratch ''' operation_method = config.get("index_operation", "new").lower() gallery_images, gallery_docs = split_datafile( config['data_file'], config['image_root'], config['delimiter']) # when remove data in index, do not need extract fatures if operation_method != "remove": gallery_features = self._extract_features(gallery_images, config) assert operation_method in [ "new", "remove", "append" ], "Only append, remove and new operation are supported" # vector.index: faiss index file # id_map.pkl: use this file to map id to image_doc if operation_method in ["remove", "append"]: # if remove or append, vector.index and id_map.pkl must exist assert os.path.join( config["index_dir"], "vector.index" ), "The vector.index dose not exist in {} when 'index_operation' is not None".format( config["index_dir"]) assert os.path.join( config["index_dir"], "id_map.pkl" ), "The id_map.pkl dose not exist in {} when 'index_operation' is not None".format( config["index_dir"]) index = faiss.read_index( os.path.join(config["index_dir"], "vector.index")) with open(os.path.join(config["index_dir"], "id_map.pkl"), 'rb') as fd: ids = pickle.load(fd) assert index.ntotal == len(ids.keys( )), "data number in index is not equal in in id_map" else: if not os.path.exists(config["index_dir"]): os.makedirs(config["index_dir"], exist_ok=True) index_method = config.get("index_method", "HNSW32") # if IVF method, cal ivf number automaticlly if index_method == "IVF": index_method = index_method + str( min(int(len(gallery_images) // 8), 65536)) + ",Flat" # for binary index, add B at head of index_method if config["dist_type"] == "hamming": index_method = "B" + index_method #dist_type dist_type = faiss.METRIC_INNER_PRODUCT if config[ "dist_type"] == "IP" else faiss.METRIC_L2 #build index if config["dist_type"] == "hamming": index = faiss.index_binary_factory(config["embedding_size"], index_method) else: index = faiss.index_factory(config["embedding_size"], index_method, dist_type) index = faiss.IndexIDMap2(index) ids = {} if config["index_method"] == "HNSW32": logger.warning( "The HNSW32 method dose not support 'remove' operation") if operation_method != "remove": # calculate id for new data start_id = max(ids.keys()) + 1 if ids else 0 ids_now = ( np.arange(0, len(gallery_images)) + start_id).astype(np.int64) # only train when new index file if operation_method == "new": if config["dist_type"] == "hamming": index.add(gallery_features) else: index.train(gallery_features) if not config["dist_type"] == "hamming": index.add_with_ids(gallery_features, ids_now) for i, d in zip(list(ids_now), gallery_docs): ids[i] = d else: if config["index_method"] == "HNSW32": raise RuntimeError( "The index_method: HNSW32 dose not support 'remove' operation" ) # remove ids in id_map, remove index data in faiss index remove_ids = list( filter(lambda k: ids.get(k) in gallery_docs, ids.keys())) remove_ids = np.asarray(remove_ids) index.remove_ids(remove_ids) for k in remove_ids: del ids[k] # store faiss index file and id_map file if config["dist_type"] == "hamming": faiss.write_index_binary( index, os.path.join(config["index_dir"], "vector.index")) else: faiss.write_index( index, os.path.join(config["index_dir"], "vector.index")) with open(os.path.join(config["index_dir"], "id_map.pkl"), 'wb') as fd: pickle.dump(ids, fd)
def fvecs_read(fname): return ivecs_read(fname).view('float32') ################################################################# # Main program ################################################################# stage = int(sys.argv[1]) tmpdir = '/tmp/' if stage == 0: # train the index xt = fvecs_read("sift1M/sift_learn.fvecs") index = faiss.index_factory(xt.shape[1], "IVF4096,Flat") print("training index") index.train(xt) print("write " + tmpdir + "trained.index") faiss.write_index(index, tmpdir + "trained.index") if 1 <= stage <= 4: # add 1/4 of the database to 4 independent indexes bno = stage - 1 xb = fvecs_read("sift1M/sift_base.fvecs") i0, i1 = int(bno * xb.shape[0] / 4), int((bno + 1) * xb.shape[0] / 4) index = faiss.read_index(tmpdir + "trained.index") print("adding vectors %d:%d" % (i0, i1)) index.add(xb[i0:i1]) print("write " + tmpdir + "block_%d.index" % bno)
dev_no = 0 # remember results from other index types op_per_key = [] # keep track of optimal operating points seen so far op = faiss.OperatingPoints() for index_key in keys_to_test: print "============ key", index_key # make the index described by the key index = faiss.index_factory(d, index_key) if use_gpu: # transfer to GPU (may be partial) index = faiss.index_cpu_to_gpu(res, dev_no, index) params = faiss.GpuParameterSpace() else: params = faiss.ParameterSpace() params.initialize(index) print "[%.3f s] train & add" % (time.time() - t0) index.train(xt) index.add(xb)
bowDiction = cv2.BOWImgDescriptorExtractor(sift, cv2.BFMatcher(cv2.NORM_L2)) bowDiction.setVocabulary(dictionary) print "bow dictionary", np.shape(dictionary) # returns descriptor of image at pth def feature_extract(pth): im = cv2.imread(pth, 1) gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) return bowDiction.compute(gray, sift.detect(gray)) # ------------------- train faiss index # prepare index index = faiss.index_factory(bow_num_words, INDEX_KEY) # index = faiss.IndexIDMap(index) if USE_GPU: print("Use GPU...") res = faiss.StandardGpuResources() index = faiss.index_cpu_to_gpu(res, 0, index) # prepare ids ids_count = 1 index_dict = {} ids = [] features = np.matrix([]) for file_name in images_list: print ids_count dsc = feature_extract(file_name)