def do_cpu_to_gpu(self, index_key): ts = [] ts.append(time.time()) (xt, xb, xq) = self.get_dataset(small_one=True) nb, d = xb.shape index = faiss.index_factory(d, index_key) if index.__class__ == faiss.IndexIVFPQ: # speed up test index.pq.cp.niter = 2 index.do_polysemous_training = False ts.append(time.time()) index.train(xt) ts.append(time.time()) # adding some ids because there was a bug in this case index.add_with_ids(xb, np.arange(nb).astype(np.int64) * 3 + 12345) ts.append(time.time()) index.nprobe = 4 D, Iref = index.search(xq, 10) ts.append(time.time()) res = faiss.StandardGpuResources() gpu_index = faiss.index_cpu_to_gpu(res, 0, index) ts.append(time.time()) gpu_index.setNumProbes(4) D, Inew = gpu_index.search(xq, 10) ts.append(time.time()) print('times:', [t - ts[0] for t in ts]) self.assertGreaterEqual((Iref == Inew).sum(), Iref.size) if faiss.get_num_gpus() == 1: return for shard in False, True: # test on just 2 GPUs res = [faiss.StandardGpuResources() for i in range(2)] co = faiss.GpuMultipleClonerOptions() co.shard = shard gpu_index = faiss.index_cpu_to_gpu_multiple_py(res, index, co) faiss.GpuParameterSpace().set_index_parameter( gpu_index, 'nprobe', 4) D, Inew = gpu_index.search(xq, 10) # 0.99: allow some tolerance in results otherwise test # fails occasionally (not reproducible) self.assertGreaterEqual((Iref == Inew).sum(), Iref.size * 0.99)
def do_cpu_to_gpu(self, index_key): ts = [] ts.append(time.time()) (xt, xb, xq) = self.get_dataset(small_one=True) nb, d = xb.shape index = faiss.index_factory(d, index_key) if index.__class__ == faiss.IndexIVFPQ: # speed up test index.pq.cp.niter = 2 index.do_polysemous_training = False ts.append(time.time()) index.train(xt) ts.append(time.time()) # adding some ids because there was a bug in this case index.add_with_ids(xb, np.arange(nb) * 3 + 12345) ts.append(time.time()) index.nprobe = 4 D, Iref = index.search(xq, 10) ts.append(time.time()) res = faiss.StandardGpuResources() gpu_index = faiss.index_cpu_to_gpu(res, 0, index) ts.append(time.time()) gpu_index.setNumProbes(4) D, Inew = gpu_index.search(xq, 10) ts.append(time.time()) print 'times:', [t - ts[0] for t in ts] self.assertGreaterEqual((Iref == Inew).sum(), Iref.size) if faiss.get_num_gpus() == 1: return for shard in False, True: # test on just 2 GPUs res = [faiss.StandardGpuResources() for i in range(2)] co = faiss.GpuMultipleClonerOptions() co.shard = shard gpu_index = faiss.index_cpu_to_gpu_multiple_py(res, index, co) faiss.GpuParameterSpace().set_index_parameter( gpu_index, 'nprobe', 4) D, Inew = gpu_index.search(xq, 10) self.assertGreaterEqual((Iref == Inew).sum(), Iref.size)
def move_index_to_gpu(index, shard=False): ngpu = faiss.get_num_gpus() gpu_resources = [faiss.StandardGpuResources() for i in range(ngpu)] co = faiss.GpuMultipleClonerOptions() co.useFloat16 = True co.shard = shard co.shard_type = 1 print(" moving to %d GPUs" % ngpu) t0 = time.time() index = faiss.index_cpu_to_gpu_multiple_py(gpu_resources, index, co) index.dont_dealloc_me = gpu_resources print(" done in %.3f s" % (time.time() - t0)) return index
def gpux4_allpair_similarity(ds, prefix): # Use cache cache_data = load_cached_result(prefix) if cache_data is not None: return cache_data # Search with GpuMultiple co = faiss.GpuMultipleClonerOptions() co.shard = True vres = [] for _ in range(4): res = faiss.StandardGpuResources() vres.append(res) cpu_index = faiss.IndexFlatIP(ds.feats_index.shape[1]) gpu_index = faiss.index_cpu_to_gpu_multiple_py(vres, cpu_index, co) gpu_index.add(ds.feats_index) # 177sec with timer('Prepare all-pair similarity on index dataset'): ii_sims, ii_ids = gpu_index.search(x=ds.feats_index, k=100) with timer('Save results (index-index)'): fn_out = Path(prefix) / "index19_vs_index19_ids.npy" fn_out.parent.mkdir(parents=True, exist_ok=True) np.save(str(fn_out), ii_ids) np.save(str(Path(prefix) / "index19_vs_index19_sims.npy"), ii_sims) with timer('Prepare all-pair similarity on test-index dataset'): ti_sims, ti_ids = gpu_index.search(x=ds.feats_test, k=100) with timer('Save results (test-index)'): np.save(str(Path(prefix) / "test19_vs_index19_ids.npy"), ti_ids) np.save(str(Path(prefix) / "test19_vs_index19_sims.npy"), ti_sims) return edict({ 'ti_sims': ti_sims, 'ti_ids': ti_ids, 'ii_sims': ii_sims, 'ii_ids': ii_ids, })
def voronoi_gpu(): test_index = tools.load_vector('../data/adamskij/test_index.bin', 'L') nlist = 100 quantizer = faiss.IndexFlatL2(ncols) cpu_index = faiss.IndexIVFFlat(quantizer, ncols, nlist) xb = tools.load_2d_vec(fout, ncols, typecode='f') xq = np.copy(xb[:test_size]) cpu_index.train(xb) ngpus = faiss.get_num_gpus() print("number of GPUs:", ngpus) ress = [] for i in range(ngpus): res = faiss.StandardGpuResources() if i in (2, 3, 4, 5): res.noTempMemory() res.initializeForDevice(i) ress.append(res) co = faiss.GpuMultipleClonerOptions() co.shard = True gpu_index = faiss.index_cpu_to_gpu_multiple_py(ress, cpu_index, co) # gpu_index = faiss.index_cpu_to_all_gpus(cpu_index, co) gpu_index.add(xb[:20_000_000]) # for xb in it: # gpu_index.add(xb) for i in range(20): gpu_index.nprobe = i + 1 # default nprobe is 1, try a few more start_time = time.time() D, I = gpu_index.search(xq, 2) secs = time.time() - start_time # acc = (I[:, 1] == test_index).sum() print(i + 1, secs)
def gpux4_euclidsearch_from_dataset(ds, fn_npy, lhs='test', rhs='index', topk=100): # Search with GpuMultiple co = faiss.GpuMultipleClonerOptions() co.shard = True vres = [] for _ in range(4): res = faiss.StandardGpuResources() vres.append(res) cpu_index = faiss.IndexFlatL2(ds[f'feats_{rhs}'].shape[1]) gpu_index = faiss.index_cpu_to_gpu_multiple_py(vres, cpu_index, co) gpu_index.add(ds[f'feats_{rhs}']) _, all_ranks = gpu_index.search(x=ds[f'feats_{lhs}'], k=topk) Path(fn_npy).parent.mkdir(parents=True, exist_ok=True) np.save(fn_npy, all_ranks) if lhs == 'test' and rhs == 'index': # Retrieval task fn_sub = fn_npy.rstrip('.npy') + '.csv.gz' save_sub_from_top100ranks(ds, all_ranks, fn_sub, topk=topk)
weights, normalize=True) # ids_train, feats_train = utils.prepare_ids_and_feats(train_dirs, weights, normalize=True) co = faiss.GpuMultipleClonerOptions() co.shard = True # co.float16 = False vres = [] for _ in range(n_gpus): res = faiss.StandardGpuResources() vres.append(res) print('build index...') cpu_index = faiss.IndexFlatL2(feats_index.shape[1]) gpu_index = faiss.index_cpu_to_gpu_multiple_py(vres, cpu_index, co) gpu_index.add(feats_index) dists, topk_idx = gpu_index.search(x=feats_test, k=topk) print('query search done.') retrieval_result = pd.DataFrame(ids_test, columns=['id']) retrieval_result['images'] = np.apply_along_axis(' '.join, axis=1, arr=ids_index[topk_idx]) output_name = f'../output/{setting}.csv.gz' retrieval_result.to_csv(output_name, compression='gzip', index=False) print('saved to ' + output_name) cmd = f'kaggle c submit -c landmark-retrieval-2019 -f {output_name} -m "" ' print(cmd) subprocess.run(cmd, shell=True)
def do_cpu_to_gpu(self, index_key): ts = [] ts.append(time.time()) (xt, xb, xq) = self.get_dataset(small_one=True) nb, d = xb.shape index = faiss.index_factory(d, index_key) if index.__class__ == faiss.IndexIVFPQ: # speed up test index.pq.cp.niter = 2 index.do_polysemous_training = False ts.append(time.time()) index.train(xt) ts.append(time.time()) # adding some ids because there was a bug in this case; # those need to be cast to idx_t(= int64_t), because # on windows the numpy int default is int32 ids = (np.arange(nb) * 3 + 12345).astype('int64') index.add_with_ids(xb, ids) ts.append(time.time()) index.nprobe = 4 Dref, Iref = index.search(xq, 10) ts.append(time.time()) res = faiss.StandardGpuResources() gpu_index = faiss.index_cpu_to_gpu(res, 0, index) ts.append(time.time()) # Validate the layout of the memory info mem_info = res.getMemoryInfo() assert type(mem_info) == dict assert type(mem_info[0]['FlatData']) == tuple assert type(mem_info[0]['FlatData'][0]) == int assert type(mem_info[0]['FlatData'][1]) == int gpu_index.setNumProbes(4) Dnew, Inew = gpu_index.search(xq, 10) ts.append(time.time()) print('times:', [t - ts[0] for t in ts]) # Give us some margin of error self.assertGreaterEqual((Iref == Inew).sum(), Iref.size - 50) if faiss.get_num_gpus() == 1: return for shard in False, True: # test on just 2 GPUs res = [faiss.StandardGpuResources() for i in range(2)] co = faiss.GpuMultipleClonerOptions() co.shard = shard gpu_index = faiss.index_cpu_to_gpu_multiple_py(res, index, co) faiss.GpuParameterSpace().set_index_parameter( gpu_index, 'nprobe', 4) Dnew, Inew = gpu_index.search(xq, 10) # 0.99: allow some tolerance in results otherwise test # fails occasionally (not reproducible) self.assertGreaterEqual((Iref == Inew).sum(), Iref.size * 0.99)
def ban_final(): import argparse import faiss import numpy as np import pandas as pd import os import subprocess import tqdm from collections import Counter from src import utils topk = 100 ROOT = '/opt/landmark/' test_dirs = [ ROOT + 'experiments/v19c/feats_test19_ms_L2_ep4_scaleup_ep3_freqthresh-2_loss-cosface_pooling-G,G,G,G_verifythresh-30/', ROOT + 'experiments/v20c/feats_test19_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/', ROOT + 'experiments/v21c/feats_test19_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/', ROOT + 'experiments/v22c/feats_test19_ms_L2_ep4_scaleup_ep3_base_margin-0.4_freqthresh-2_verifythresh-30/', ROOT + 'experiments/v23c/feats_test19_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_verifythresh-30/', ROOT + 'experiments/v24c/feats_test19_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-cosface_verifythresh-30/', ] weights = [ 0.5, 1.0, 1.0, 0.5, 1.0, 1.0, ] # intuition ids_test, feats_test = utils.prepare_ids_and_feats(test_dirs, weights, normalize=True) # train19_csv = pd.read_pickle('../input/train.pkl')[['id', 'landmark_id']].set_index('id').sort_index() # landmark_dict = train19_csv.to_dict()['landmark_id'] co = faiss.GpuMultipleClonerOptions() co.shard = True # co.float16 = False vres = [] for _ in range(4): res = faiss.StandardGpuResources() vres.append(res) subm = pd.read_csv( '../output/stage2_submit_banthresh30_ens3_top3_DBAx1_v44r7.csv.gz') subm['landmark_id'], subm['score'] = list( zip(*subm['landmarks'].apply(lambda x: str(x).split(' ')))) subm['score'] = subm['score'].astype(np.float32) subm = subm.sort_values('score', ascending=False).set_index('id') ban_thresh = 30 freq = subm['landmark_id'].value_counts() ban_lids = freq[freq > ban_thresh].index is_ban = np.isin(ids_test, subm[subm['landmark_id'].isin(ban_lids)].index) ban_ids_test = ids_test[is_ban] not_ban_ids_test = ids_test[~is_ban] ban_feats_test = feats_test[is_ban] not_ban_feats_test = feats_test[~is_ban] print('build index...') cpu_index = faiss.IndexFlatL2(not_ban_feats_test.shape[1]) gpu_index = faiss.index_cpu_to_gpu_multiple_py(vres, cpu_index, co) gpu_index.add(not_ban_feats_test) dists, topk_idx = gpu_index.search(x=ban_feats_test, k=100) print('query search done.') subm = pd.read_csv( '../output/stage2_submit_banthresh30_ens3_top3_DBAx1_v44r7.csv.gz') subm['landmark_id'], subm['score'] = list( zip(*subm['landmarks'].apply(lambda x: str(x).split(' ')))) subm['score'] = subm['score'].astype(np.float32) subm = subm.sort_values('score', ascending=False).set_index('id') new_ban_ids = np.unique(not_ban_ids_test[topk_idx[dists < 0.5]]) subm.loc[new_ban_ids, 'landmarks'] = subm.loc[new_ban_ids, 'landmark_id'] + ' 0' # subm.loc[new_ban_ids, 'landmarks'] = subm.loc[new_ban_ids, 'landmark_id'] + ' ' + (subm.loc[new_ban_ids, 'score'] * 0.001).map(str) output_filename = '../output/l2dist_0.5.csv.gz' subm.reset_index()[['id', 'landmarks']].to_csv(output_filename, index=False, compression='gzip')