示例#1
0
    def do_cpu_to_gpu(self, index_key):
        ts = []
        ts.append(time.time())
        (xt, xb, xq) = self.get_dataset(small_one=True)
        nb, d = xb.shape

        index = faiss.index_factory(d, index_key)
        if index.__class__ == faiss.IndexIVFPQ:
            # speed up test
            index.pq.cp.niter = 2
            index.do_polysemous_training = False
        ts.append(time.time())

        index.train(xt)
        ts.append(time.time())

        # adding some ids because there was a bug in this case
        index.add_with_ids(xb, np.arange(nb).astype(np.int64) * 3 + 12345)
        ts.append(time.time())

        index.nprobe = 4
        D, Iref = index.search(xq, 10)
        ts.append(time.time())

        res = faiss.StandardGpuResources()
        gpu_index = faiss.index_cpu_to_gpu(res, 0, index)
        ts.append(time.time())

        gpu_index.setNumProbes(4)

        D, Inew = gpu_index.search(xq, 10)
        ts.append(time.time())
        print('times:', [t - ts[0] for t in ts])

        self.assertGreaterEqual((Iref == Inew).sum(), Iref.size)

        if faiss.get_num_gpus() == 1:
            return

        for shard in False, True:

            # test on just 2 GPUs
            res = [faiss.StandardGpuResources() for i in range(2)]
            co = faiss.GpuMultipleClonerOptions()
            co.shard = shard

            gpu_index = faiss.index_cpu_to_gpu_multiple_py(res, index, co)

            faiss.GpuParameterSpace().set_index_parameter(
                gpu_index, 'nprobe', 4)

            D, Inew = gpu_index.search(xq, 10)

            # 0.99: allow some tolerance in results otherwise test
            # fails occasionally (not reproducible)
            self.assertGreaterEqual((Iref == Inew).sum(), Iref.size * 0.99)
示例#2
0
    def do_cpu_to_gpu(self, index_key):
        ts = []
        ts.append(time.time())
        (xt, xb, xq) = self.get_dataset(small_one=True)
        nb, d = xb.shape

        index = faiss.index_factory(d, index_key)
        if index.__class__ == faiss.IndexIVFPQ:
            # speed up test
            index.pq.cp.niter = 2
            index.do_polysemous_training = False
        ts.append(time.time())

        index.train(xt)
        ts.append(time.time())

        # adding some ids because there was a bug in this case
        index.add_with_ids(xb, np.arange(nb) * 3 + 12345)
        ts.append(time.time())

        index.nprobe = 4
        D, Iref = index.search(xq, 10)
        ts.append(time.time())

        res = faiss.StandardGpuResources()
        gpu_index = faiss.index_cpu_to_gpu(res, 0, index)
        ts.append(time.time())

        gpu_index.setNumProbes(4)

        D, Inew = gpu_index.search(xq, 10)
        ts.append(time.time())
        print 'times:', [t - ts[0] for t in ts]

        self.assertGreaterEqual((Iref == Inew).sum(), Iref.size)

        if faiss.get_num_gpus() == 1:
            return

        for shard in False, True:

            # test on just 2 GPUs
            res = [faiss.StandardGpuResources() for i in range(2)]
            co = faiss.GpuMultipleClonerOptions()
            co.shard = shard

            gpu_index = faiss.index_cpu_to_gpu_multiple_py(res, index, co)

            faiss.GpuParameterSpace().set_index_parameter(
                gpu_index, 'nprobe', 4)

            D, Inew = gpu_index.search(xq, 10)

            self.assertGreaterEqual((Iref == Inew).sum(), Iref.size)
def move_index_to_gpu(index, shard=False):
    ngpu = faiss.get_num_gpus()
    gpu_resources = [faiss.StandardGpuResources() for i in range(ngpu)]

    co = faiss.GpuMultipleClonerOptions()
    co.useFloat16 = True
    co.shard = shard
    co.shard_type = 1

    print("   moving to %d GPUs" % ngpu)
    t0 = time.time()
    index = faiss.index_cpu_to_gpu_multiple_py(gpu_resources, index, co)
    index.dont_dealloc_me = gpu_resources
    print("      done in %.3f s" % (time.time() - t0))
    return index
示例#4
0
def gpux4_allpair_similarity(ds, prefix):
    # Use cache
    cache_data = load_cached_result(prefix)
    if cache_data is not None:
        return cache_data

    # Search with GpuMultiple
    co = faiss.GpuMultipleClonerOptions()
    co.shard = True
    vres = []
    for _ in range(4):
        res = faiss.StandardGpuResources()
        vres.append(res)

    cpu_index = faiss.IndexFlatIP(ds.feats_index.shape[1])
    gpu_index = faiss.index_cpu_to_gpu_multiple_py(vres, cpu_index, co)
    gpu_index.add(ds.feats_index)

    # 177sec
    with timer('Prepare all-pair similarity on index dataset'):
        ii_sims, ii_ids = gpu_index.search(x=ds.feats_index, k=100)

    with timer('Save results (index-index)'):
        fn_out = Path(prefix) / "index19_vs_index19_ids.npy"
        fn_out.parent.mkdir(parents=True, exist_ok=True)
        np.save(str(fn_out), ii_ids)
        np.save(str(Path(prefix) / "index19_vs_index19_sims.npy"), ii_sims)

    with timer('Prepare all-pair similarity on test-index dataset'):
        ti_sims, ti_ids = gpu_index.search(x=ds.feats_test, k=100)

    with timer('Save results (test-index)'):
        np.save(str(Path(prefix) / "test19_vs_index19_ids.npy"), ti_ids)
        np.save(str(Path(prefix) / "test19_vs_index19_sims.npy"), ti_sims)

    return edict({
        'ti_sims': ti_sims,
        'ti_ids': ti_ids,
        'ii_sims': ii_sims,
        'ii_ids': ii_ids,
    })
示例#5
0
def voronoi_gpu():
    test_index = tools.load_vector('../data/adamskij/test_index.bin', 'L')

    nlist = 100
    quantizer = faiss.IndexFlatL2(ncols)
    cpu_index = faiss.IndexIVFFlat(quantizer, ncols, nlist)

    xb = tools.load_2d_vec(fout, ncols, typecode='f')
    xq = np.copy(xb[:test_size])
    cpu_index.train(xb)

    ngpus = faiss.get_num_gpus()
    print("number of GPUs:", ngpus)

    ress = []
    for i in range(ngpus):
        res = faiss.StandardGpuResources()
        if i in (2, 3, 4, 5):
            res.noTempMemory()
        res.initializeForDevice(i)
        ress.append(res)

    co = faiss.GpuMultipleClonerOptions()
    co.shard = True
    gpu_index = faiss.index_cpu_to_gpu_multiple_py(ress, cpu_index, co)
    # gpu_index = faiss.index_cpu_to_all_gpus(cpu_index, co)

    gpu_index.add(xb[:20_000_000])
    # for xb in it:
    #     gpu_index.add(xb)

    for i in range(20):
        gpu_index.nprobe = i + 1  # default nprobe is 1, try a few more
        start_time = time.time()

        D, I = gpu_index.search(xq, 2)

        secs = time.time() - start_time
        # acc = (I[:, 1] == test_index).sum()
        print(i + 1, secs)
示例#6
0
def gpux4_euclidsearch_from_dataset(ds,
                                    fn_npy,
                                    lhs='test',
                                    rhs='index',
                                    topk=100):
    # Search with GpuMultiple
    co = faiss.GpuMultipleClonerOptions()
    co.shard = True
    vres = []
    for _ in range(4):
        res = faiss.StandardGpuResources()
        vres.append(res)

    cpu_index = faiss.IndexFlatL2(ds[f'feats_{rhs}'].shape[1])
    gpu_index = faiss.index_cpu_to_gpu_multiple_py(vres, cpu_index, co)
    gpu_index.add(ds[f'feats_{rhs}'])

    _, all_ranks = gpu_index.search(x=ds[f'feats_{lhs}'], k=topk)
    Path(fn_npy).parent.mkdir(parents=True, exist_ok=True)
    np.save(fn_npy, all_ranks)

    if lhs == 'test' and rhs == 'index':  # Retrieval task
        fn_sub = fn_npy.rstrip('.npy') + '.csv.gz'
        save_sub_from_top100ranks(ds, all_ranks, fn_sub, topk=topk)
                                                       weights,
                                                       normalize=True)
    # ids_train, feats_train = utils.prepare_ids_and_feats(train_dirs, weights, normalize=True)

    co = faiss.GpuMultipleClonerOptions()
    co.shard = True
    # co.float16 = False

    vres = []
    for _ in range(n_gpus):
        res = faiss.StandardGpuResources()
        vres.append(res)

    print('build index...')
    cpu_index = faiss.IndexFlatL2(feats_index.shape[1])
    gpu_index = faiss.index_cpu_to_gpu_multiple_py(vres, cpu_index, co)
    gpu_index.add(feats_index)
    dists, topk_idx = gpu_index.search(x=feats_test, k=topk)
    print('query search done.')

    retrieval_result = pd.DataFrame(ids_test, columns=['id'])
    retrieval_result['images'] = np.apply_along_axis(' '.join,
                                                     axis=1,
                                                     arr=ids_index[topk_idx])
    output_name = f'../output/{setting}.csv.gz'
    retrieval_result.to_csv(output_name, compression='gzip', index=False)
    print('saved to ' + output_name)

    cmd = f'kaggle c submit -c landmark-retrieval-2019 -f {output_name} -m "" '
    print(cmd)
    subprocess.run(cmd, shell=True)
示例#8
0
    def do_cpu_to_gpu(self, index_key):
        ts = []
        ts.append(time.time())
        (xt, xb, xq) = self.get_dataset(small_one=True)
        nb, d = xb.shape

        index = faiss.index_factory(d, index_key)
        if index.__class__ == faiss.IndexIVFPQ:
            # speed up test
            index.pq.cp.niter = 2
            index.do_polysemous_training = False
        ts.append(time.time())

        index.train(xt)
        ts.append(time.time())

        # adding some ids because there was a bug in this case;
        # those need to be cast to idx_t(= int64_t), because
        # on windows the numpy int default is int32
        ids = (np.arange(nb) * 3 + 12345).astype('int64')
        index.add_with_ids(xb, ids)
        ts.append(time.time())

        index.nprobe = 4
        Dref, Iref = index.search(xq, 10)
        ts.append(time.time())

        res = faiss.StandardGpuResources()
        gpu_index = faiss.index_cpu_to_gpu(res, 0, index)
        ts.append(time.time())

        # Validate the layout of the memory info
        mem_info = res.getMemoryInfo()

        assert type(mem_info) == dict
        assert type(mem_info[0]['FlatData']) == tuple
        assert type(mem_info[0]['FlatData'][0]) == int
        assert type(mem_info[0]['FlatData'][1]) == int

        gpu_index.setNumProbes(4)

        Dnew, Inew = gpu_index.search(xq, 10)
        ts.append(time.time())
        print('times:', [t - ts[0] for t in ts])

        # Give us some margin of error
        self.assertGreaterEqual((Iref == Inew).sum(), Iref.size - 50)

        if faiss.get_num_gpus() == 1:
            return

        for shard in False, True:

            # test on just 2 GPUs
            res = [faiss.StandardGpuResources() for i in range(2)]
            co = faiss.GpuMultipleClonerOptions()
            co.shard = shard

            gpu_index = faiss.index_cpu_to_gpu_multiple_py(res, index, co)

            faiss.GpuParameterSpace().set_index_parameter(
                gpu_index, 'nprobe', 4)

            Dnew, Inew = gpu_index.search(xq, 10)

            # 0.99: allow some tolerance in results otherwise test
            # fails occasionally (not reproducible)
            self.assertGreaterEqual((Iref == Inew).sum(), Iref.size * 0.99)
def ban_final():
    import argparse
    import faiss
    import numpy as np
    import pandas as pd
    import os
    import subprocess
    import tqdm
    from collections import Counter
    from src import utils

    topk = 100
    ROOT = '/opt/landmark/'

    test_dirs = [
        ROOT +
        'experiments/v19c/feats_test19_ms_L2_ep4_scaleup_ep3_freqthresh-2_loss-cosface_pooling-G,G,G,G_verifythresh-30/',
        ROOT +
        'experiments/v20c/feats_test19_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/',
        ROOT +
        'experiments/v21c/feats_test19_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-arcface_verifythresh-30/',
        ROOT +
        'experiments/v22c/feats_test19_ms_L2_ep4_scaleup_ep3_base_margin-0.4_freqthresh-2_verifythresh-30/',
        ROOT +
        'experiments/v23c/feats_test19_ms_L2_ep6_scaleup_ep5_augmentation-middle_epochs-7_freqthresh-3_verifythresh-30/',
        ROOT +
        'experiments/v24c/feats_test19_ms_L2_ep5_augmentation-middle_epochs-7_freqthresh-3_loss-cosface_verifythresh-30/',
    ]

    weights = [
        0.5,
        1.0,
        1.0,
        0.5,
        1.0,
        1.0,
    ]  # intuition

    ids_test, feats_test = utils.prepare_ids_and_feats(test_dirs,
                                                       weights,
                                                       normalize=True)

    # train19_csv = pd.read_pickle('../input/train.pkl')[['id', 'landmark_id']].set_index('id').sort_index()
    # landmark_dict = train19_csv.to_dict()['landmark_id']

    co = faiss.GpuMultipleClonerOptions()
    co.shard = True
    # co.float16 = False

    vres = []
    for _ in range(4):
        res = faiss.StandardGpuResources()
        vres.append(res)

    subm = pd.read_csv(
        '../output/stage2_submit_banthresh30_ens3_top3_DBAx1_v44r7.csv.gz')
    subm['landmark_id'], subm['score'] = list(
        zip(*subm['landmarks'].apply(lambda x: str(x).split(' '))))
    subm['score'] = subm['score'].astype(np.float32)
    subm = subm.sort_values('score', ascending=False).set_index('id')

    ban_thresh = 30
    freq = subm['landmark_id'].value_counts()
    ban_lids = freq[freq > ban_thresh].index

    is_ban = np.isin(ids_test, subm[subm['landmark_id'].isin(ban_lids)].index)
    ban_ids_test = ids_test[is_ban]
    not_ban_ids_test = ids_test[~is_ban]
    ban_feats_test = feats_test[is_ban]
    not_ban_feats_test = feats_test[~is_ban]

    print('build index...')
    cpu_index = faiss.IndexFlatL2(not_ban_feats_test.shape[1])
    gpu_index = faiss.index_cpu_to_gpu_multiple_py(vres, cpu_index, co)
    gpu_index.add(not_ban_feats_test)
    dists, topk_idx = gpu_index.search(x=ban_feats_test, k=100)
    print('query search done.')

    subm = pd.read_csv(
        '../output/stage2_submit_banthresh30_ens3_top3_DBAx1_v44r7.csv.gz')
    subm['landmark_id'], subm['score'] = list(
        zip(*subm['landmarks'].apply(lambda x: str(x).split(' '))))
    subm['score'] = subm['score'].astype(np.float32)
    subm = subm.sort_values('score', ascending=False).set_index('id')

    new_ban_ids = np.unique(not_ban_ids_test[topk_idx[dists < 0.5]])
    subm.loc[new_ban_ids,
             'landmarks'] = subm.loc[new_ban_ids, 'landmark_id'] + ' 0'
    # subm.loc[new_ban_ids, 'landmarks'] = subm.loc[new_ban_ids, 'landmark_id'] + ' ' + (subm.loc[new_ban_ids, 'score'] * 0.001).map(str)

    output_filename = '../output/l2dist_0.5.csv.gz'
    subm.reset_index()[['id', 'landmarks']].to_csv(output_filename,
                                                   index=False,
                                                   compression='gzip')