Exemplo n.º 1
0
def create_faiss_index(vecs, method, n_gpu):
    """
    Create FAISS index on GPU(s).
    To create a GPU index with FAISS, one first needs to create it on CPU then copy it on GPU. 
    Note that a "flat" index means that it is brute-force, with no approximation techniques.
    """
    # Build flat CPU index given the chosen method.
    if method=='l2':
        index = faiss.IndexFlatL2(vecs.shape[1])  # Exact Search for L2
    elif method=='ip':
        index = faiss.IndexFlatIP(vecs.shape[1])  # Exact Search for Inner Product
    elif method=='cos':
        # Cosime similarity comes down to normalizing the embeddings beforehand and then applying inner product.
        vecs = preprocessing.normalize(vecs, norm='l2')
        index = faiss.IndexFlatIP(vecs.shape[1])
    else:
        print("Error: Please choose between L2 Distance ('l2'), Inner Product Distance ('ip') or Cosine Distance ('cos') as brute-force method for exact search. Exiting...")
        sys.exit(0)
    
    # Convert to flat GPU index.
    if n_gpu > 0:
        co = faiss.GpuMultipleClonerOptions()  # If using multiple GPUs, enable sharding so that the dataset is divided across the GPUs rather than replicated.
        co.shard = True
        index = faiss.index_cpu_to_all_gpus(index, co=co, ngpu=n_gpu)  # Convert CPU index to GPU index.
    
    # Add vectors to GPU index.
    index.add(vecs)
    
    # Convert back to cpu index (needed for saving it to disk).
    index = faiss.index_gpu_to_cpu(index)

    return index
Exemplo n.º 2
0
    def __init__(self,
                 xt_path="/home/wenqingfu/sift1b/bigann_learn.bvecs",
                 xb_path="/home/wenqingfu/sift1b/bigann_base.bvecs",
                 ngpu=3):
        self.xt = self.mmap_bvecs(xt_path)
        self.xb = self.mmap_bvecs(xb_path)
        self.xt = self.sanitize(self.xt[:1000000])
        self.xb = self.sanitize(self.xb[self.db_start * 1000 *
                                        1000:self.db_end * 1000 * 1000])
        self.gpu_resources = []
        for i in range(0, ngpu):
            res = faiss.StandardGpuResources()
            if tempmem >= 0:
                res.setTempMemory(tempmem)
                print("set tempemm to %d" % tempmem)
            self.gpu_resources.append(res)
        self.vres = faiss.GpuResourcesVector()
        self.vdev = faiss.IntVector()

        for i in range(0, ngpu):
            self.vdev.push_back(i)
            self.vres.push_back(self.gpu_resources[i])

        self.co = faiss.GpuMultipleClonerOptions()
        self.co.useFloat16 = True
        self.co.useFloat16CoarseQuantizer = False
        self.co.usePrecomputed = False
        self.co.indicesOptions = 0
        self.co.verbose = True
        self.co.shard = True

        self.ps = faiss.GpuParameterSpace()
Exemplo n.º 3
0
    def _set_mips_index(self):
        """
        Create a Faiss Flat index with inner product as the metric
        to search against
        """
        try:
            import faiss
        except ImportError:
            raise Exception(
                "Error: Please install faiss to use FaissMIPSIndex")

        if mpu.is_unitialized() or mpu.get_data_parallel_rank() == 0:
            print("\n> Building index", flush=True)

        cpu_index = faiss.IndexFlatIP(self.embed_size)

        if self.use_gpu:
            # create resources and config for GpuIndex
            config = faiss.GpuMultipleClonerOptions()
            config.shard = True
            config.useFloat16 = True
            gpu_index = faiss.index_cpu_to_all_gpus(cpu_index, co=config)
            self.mips_index = faiss.IndexIDMap(gpu_index)
            if mpu.is_unitialized() or mpu.get_data_parallel_rank() == 0:
                print(">> Initialized index on GPU", flush=True)
        else:
            # CPU index supports IDs so wrap with IDMap
            self.mips_index = faiss.IndexIDMap(cpu_index)
            if mpu.is_unitialized() or mpu.get_data_parallel_rank() == 0:
                print(">> Initialized index on CPU", flush=True)

        # if we were constructed with a BlockData, then automatically load it
        # when the FAISS structure is built
        if self.embed_data is not None:
            self.add_embed_data(self.embed_data)
Exemplo n.º 4
0
def convert_index_to_gpu(index, faiss_gpu_index, useFloat16=False):
    if type(faiss_gpu_index) == list and len(faiss_gpu_index) == 1:
        faiss_gpu_index = faiss_gpu_index[0]
    if isinstance(faiss_gpu_index, int):
        res = faiss.StandardGpuResources()
        res.setTempMemory(512 * 1024 * 1024)
        co = faiss.GpuClonerOptions()
        co.useFloat16 = useFloat16
        index = faiss.index_cpu_to_gpu(res, faiss_gpu_index, index, co)
    else:
        global gpu_resources
        if len(gpu_resources) == 0:
            import torch
            for i in range(torch.cuda.device_count()):
                res = faiss.StandardGpuResources()
                res.setTempMemory(256 * 1024 * 1024)
                gpu_resources.append(res)

        assert isinstance(faiss_gpu_index, list)
        vres = faiss.GpuResourcesVector()
        vdev = faiss.IntVector()
        co = faiss.GpuMultipleClonerOptions()
        co.shard = True
        co.useFloat16 = useFloat16
        for i in faiss_gpu_index:
            vdev.push_back(i)
            vres.push_back(gpu_resources[i])
        index = faiss.index_cpu_to_gpu_multiple(vres, vdev, index, co)

    return index
Exemplo n.º 5
0
def get_gpu_index(cpu_index):
    gpu_resources = []
    ngpu = faiss.get_num_gpus()
    tempmem = -1
    for i in range(ngpu):
        res = faiss.StandardGpuResources()
        if tempmem >= 0:
            res.setTempMemory(tempmem)
        gpu_resources.append(res)

    def make_vres_vdev(i0=0, i1=-1):
        " return vectors of device ids and resources useful for gpu_multiple"
        vres = faiss.GpuResourcesVector()
        vdev = faiss.IntVector()
        if i1 == -1:
            i1 = ngpu
        for i in range(i0, i1):
            vdev.push_back(i)
            vres.push_back(gpu_resources[i])
        return vres, vdev

    co = faiss.GpuMultipleClonerOptions()
    co.shard = True
    gpu_vector_resources, gpu_devices_vector = make_vres_vdev(0, ngpu)
    gpu_index = faiss.index_cpu_to_gpu_multiple(gpu_vector_resources,
                                                gpu_devices_vector, cpu_index,
                                                co)
    return gpu_index
Exemplo n.º 6
0
def faiss_knn(feats_train, targets_train, feats_val, targets_val, k):
    feats_train = feats_train.numpy()
    targets_train = targets_train.numpy()
    feats_val = feats_val.numpy()
    targets_val = targets_val.numpy()

    d = feats_train.shape[-1]

    index = faiss.IndexFlatL2(d)  # build the index
    co = faiss.GpuMultipleClonerOptions()
    co.useFloat16 = True
    co.shard = True
    gpu_index = faiss.index_cpu_to_all_gpus(index, co)
    gpu_index.add(feats_train)

    D, I = gpu_index.search(feats_val, k)

    pred = np.zeros(I.shape[0])
    for i in range(I.shape[0]):
        votes = list(Counter(targets_train[I[i]]).items())
        shuffle(votes)
        pred[i] = max(votes, key=lambda x: x[1])[0]

    acc = 100.0 * (pred == targets_val).mean()

    return acc
Exemplo n.º 7
0
def compute_populated_index(preproc):
    """Add elements to a sharded index. Return the index and if available
    a sharded gpu_index that contains the same data. """

    indexall = prepare_trained_index(preproc)

    co = faiss.GpuMultipleClonerOptions()
    co.useFloat16 = use_float16
    co.useFloat16CoarseQuantizer = False
    co.usePrecomputed = use_precomputed_tables
    co.indicesOptions = faiss.INDICES_CPU
    co.verbose = True
    co.reserveVecs = max_add if max_add > 0 else xb.shape[0]
    co.shard = True
    assert co.shard_type in (0, 1, 2)
    vres, vdev = make_vres_vdev()
    gpu_index = faiss.index_cpu_to_gpu_multiple(vres, vdev, indexall, co)

    print("add...")
    t0 = time.time()
    nb = xb.shape[0]
    for i0, xs in dataset_iterator(xb, preproc, add_batch_size):
        i1 = i0 + xs.shape[0]
        gpu_index.add_with_ids(xs, np.arange(i0, i1))
        if max_add > 0 and gpu_index.ntotal > max_add:
            print("Flush indexes to CPU")
            for i in range(ngpu):
                index_src_gpu = faiss.downcast_index(gpu_index.at(i))
                index_src = faiss.index_gpu_to_cpu(index_src_gpu)
                print("  index %d size %d" % (i, index_src.ntotal))
                index_src.copy_subset_to(indexall, 0, 0, nb)
                index_src_gpu.reset()
                index_src_gpu.reserveMemory(max_add)
            gpu_index.sync_with_shard_indexes()

        print('\r%d/%d (%.3f s)  ' % (i0, nb, time.time() - t0), end=' ')
        sys.stdout.flush()
    print("Add time: %.3f s" % (time.time() - t0))

    print("Aggregate indexes to CPU")
    t0 = time.time()

    if hasattr(gpu_index, 'at'):
        # it is a sharded index
        for i in range(ngpu):
            index_src = faiss.index_gpu_to_cpu(gpu_index.at(i))
            print("  index %d size %d" % (i, index_src.ntotal))
            index_src.copy_subset_to(indexall, 0, 0, nb)
    else:
        # simple index
        index_src = faiss.index_gpu_to_cpu(gpu_index)
        index_src.copy_subset_to(indexall, 0, 0, nb)

    print("  done in %.3f s" % (time.time() - t0))

    if max_add > 0:
        # it does not contain all the vectors
        gpu_index = None

    return gpu_index, indexall
Exemplo n.º 8
0
    def __init__(self,
                 target,
                 nprobe=128,
                 index_factory_str=None,
                 verbose=False,
                 mode='proxy',
                 using_gpu=True):
        self._res_list = []

        num_gpu = faiss.get_num_gpus()
        print('[faiss gpu] #GPU: {}'.format(num_gpu))

        size, dim = target.shape
        assert size > 0, "size: {}".format(size)
        index_factory_str = "IVF{},PQ{}".format(
            min(8192, 16 * round(np.sqrt(size))),
            32) if index_factory_str is None else index_factory_str
        cpu_index = faiss.index_factory(dim, index_factory_str)
        cpu_index.nprobe = nprobe

        if mode == 'proxy':
            co = faiss.GpuClonerOptions()
            co.useFloat16 = True
            co.usePrecomputed = False

            index = faiss.IndexProxy()
            for i in range(num_gpu):
                res = faiss.StandardGpuResources()
                self._res_list.append(res)
                sub_index = faiss.index_cpu_to_gpu(
                    res, i, cpu_index, co) if using_gpu else cpu_index
                index.addIndex(sub_index)
        elif mode == 'shard':
            co = faiss.GpuMultipleClonerOptions()
            co.useFloat16 = True
            co.usePrecomputed = False
            co.shard = True
            index = faiss.index_cpu_to_all_gpus(cpu_index, co, ngpu=num_gpu)
        else:
            raise KeyError("Unknown index mode")

        index = faiss.IndexIDMap(index)
        index.verbose = verbose

        # get nlist to decide how many samples used for training
        nlist = int(
            float([
                item for item in index_factory_str.split(",") if 'IVF' in item
            ][0].replace("IVF", "")))

        # training
        if not index.is_trained:
            indexes_sample_for_train = np.random.randint(0, size, nlist * 256)
            index.train(target[indexes_sample_for_train])

        # add with ids
        target_ids = np.arange(0, size)
        index.add_with_ids(target, target_ids)
        self.index = index
Exemplo n.º 9
0
def range_ground_truth(xq,
                       db_iterator,
                       threshold,
                       metric_type=faiss.METRIC_L2,
                       shard=False,
                       ngpu=-1):
    """Computes the range-search search results for a dataset that possibly
    does not fit in RAM but for which we have an iterator that
    returns it block by block.
    """
    nq, d = xq.shape
    t0 = time.time()
    xq = np.ascontiguousarray(xq, dtype='float32')

    index = faiss.IndexFlat(d, metric_type)
    if ngpu == -1:
        ngpu = faiss.get_num_gpus()
    if ngpu:
        LOG.info('running on %d GPUs' % ngpu)
        co = faiss.GpuMultipleClonerOptions()
        co.shard = shard
        index_gpu = faiss.index_cpu_to_all_gpus(index, co=co, ngpu=ngpu)

    # compute ground-truth by blocks
    i0 = 0
    D = [[] for _i in range(nq)]
    I = [[] for _i in range(nq)]
    all_lims = []
    for xbi in db_iterator:
        ni = xbi.shape[0]
        if ngpu > 0:
            index_gpu.add(xbi)
            lims_i, Di, Ii = range_search_gpu(xq, threshold, index_gpu, xbi)
            index_gpu.reset()
        else:
            index.add(xbi)
            lims_i, Di, Ii = index.range_search(xq, threshold)
            index.reset()
        Ii += i0
        for j in range(nq):
            l0, l1 = lims_i[j], lims_i[j + 1]
            if l1 > l0:
                D[j].append(Di[l0:l1])
                I[j].append(Ii[l0:l1])
        i0 += ni
        LOG.info("%d db elements, %.3f s" % (i0, time.time() - t0))

    empty_I = np.zeros(0, dtype='int64')
    empty_D = np.zeros(0, dtype='float32')
    # import pdb; pdb.set_trace()
    D = [(np.hstack(i) if i != [] else empty_D) for i in D]
    I = [(np.hstack(i) if i != [] else empty_I) for i in I]
    sizes = [len(i) for i in I]
    assert len(sizes) == nq
    lims = np.zeros(nq + 1, dtype="uint64")
    lims[1:] = np.cumsum(sizes)
    return lims, np.hstack(D), np.hstack(I)
Exemplo n.º 10
0
    def do_cpu_to_gpu(self, index_key):
        ts = []
        ts.append(time.time())
        (xt, xb, xq) = self.get_dataset(small_one=True)
        nb, d = xb.shape

        index = faiss.index_factory(d, index_key)
        if index.__class__ == faiss.IndexIVFPQ:
            # speed up test
            index.pq.cp.niter = 2
            index.do_polysemous_training = False
        ts.append(time.time())

        index.train(xt)
        ts.append(time.time())

        # adding some ids because there was a bug in this case
        index.add_with_ids(xb, np.arange(nb).astype(np.int64) * 3 + 12345)
        ts.append(time.time())

        index.nprobe = 4
        D, Iref = index.search(xq, 10)
        ts.append(time.time())

        res = faiss.StandardGpuResources()
        gpu_index = faiss.index_cpu_to_gpu(res, 0, index)
        ts.append(time.time())

        gpu_index.setNumProbes(4)

        D, Inew = gpu_index.search(xq, 10)
        ts.append(time.time())
        print('times:', [t - ts[0] for t in ts])

        self.assertGreaterEqual((Iref == Inew).sum(), Iref.size)

        if faiss.get_num_gpus() == 1:
            return

        for shard in False, True:

            # test on just 2 GPUs
            res = [faiss.StandardGpuResources() for i in range(2)]
            co = faiss.GpuMultipleClonerOptions()
            co.shard = shard

            gpu_index = faiss.index_cpu_to_gpu_multiple_py(res, index, co)

            faiss.GpuParameterSpace().set_index_parameter(
                gpu_index, 'nprobe', 4)

            D, Inew = gpu_index.search(xq, 10)

            # 0.99: allow some tolerance in results otherwise test
            # fails occasionally (not reproducible)
            self.assertGreaterEqual((Iref == Inew).sum(), Iref.size * 0.99)
Exemplo n.º 11
0
def get_populated_index(preproc):

    if not index_cachefile or not os.path.exists(index_cachefile):
        if not altadd:
            gpu_index, indexall = compute_populated_index(preproc)
        else:
            gpu_index, indexall = compute_populated_index_2(preproc)
        if index_cachefile:
            print "store", index_cachefile
            faiss.write_index(indexall, index_cachefile)
    else:
        print "load", index_cachefile
        indexall = faiss.read_index(index_cachefile)
        gpu_index = None

    co = faiss.GpuMultipleClonerOptions()
    co.useFloat16 = use_float16
    co.useFloat16CoarseQuantizer = False
    co.usePrecomputed = use_precomputed_tables
    co.indicesOptions = 0
    co.verbose = True
    co.shard = True    # the replicas will be made "manually"
    t0 = time.time()
    print "CPU index contains %d vectors, move to GPU" % indexall.ntotal
    if replicas == 1:

        if not gpu_index:
            print "copying loaded index to GPUs"
            vres, vdev = make_vres_vdev()
            index = faiss.index_cpu_to_gpu_multiple(
                vres, vdev, indexall, co)
        else:
            index = gpu_index

    else:
        del gpu_index # We override the GPU index

        print "Copy CPU index to %d sharded GPU indexes" % replicas

        index = faiss.IndexProxy()

        for i in range(replicas):
            gpu0 = ngpu * i / replicas
            gpu1 = ngpu * (i + 1) / replicas
            vres, vdev = make_vres_vdev(gpu0, gpu1)

            print "   dispatch to GPUs %d:%d" % (gpu0, gpu1)

            index1 = faiss.index_cpu_to_gpu_multiple(
                vres, vdev, indexall, co)
            index1.this.disown()
            index.addIndex(index1)
        index.own_fields = True
    del indexall
    print "move to GPU done in %.3f s" % (time.time() - t0)
    return index
Exemplo n.º 12
0
def faiss_knn(feats_train, targets_train, feats_val, targets_val,
              feats_val_poisoned, targets_val_poisoned, k):
    feats_train = feats_train.numpy()
    targets_train = targets_train.numpy()
    feats_val = feats_val.numpy()
    targets_val = targets_val.numpy()

    d = feats_train.shape[-1]

    index = faiss.IndexFlatL2(d)  # build the index
    co = faiss.GpuMultipleClonerOptions()
    co.useFloat16 = True
    co.shard = True
    gpu_index = faiss.index_cpu_to_all_gpus(index, co)
    gpu_index.add(feats_train)

    # Val clean
    D, I = gpu_index.search(feats_val, k)

    # create confusion matrix ROWS ground truth COLUMNS pred
    conf_matrix_clean = np.zeros(
        (int(targets_val.max()) + 1, int(targets_val.max()) + 1))

    pred = np.zeros(I.shape[0])
    for i in range(I.shape[0]):
        votes = list(Counter(targets_train[I[i]]).items())
        shuffle(votes)
        pred[i] = max(votes, key=lambda x: x[1])[0]
        # update confusion matrix
        conf_matrix_clean[targets_val[i], int(pred[i])] += 1

    acc = 100.0 * (pred == targets_val).mean()

    # Val poisoned
    feats_val_poisoned = feats_val_poisoned.numpy()
    targets_val_poisoned = targets_val_poisoned.numpy()

    D, I = gpu_index.search(feats_val_poisoned, k)

    # create confusion matrix ROWS ground truth COLUMNS pred
    conf_matrix_poisoned = np.zeros((int(targets_val_poisoned.max()) + 1,
                                     int(targets_val_poisoned.max()) + 1))

    pred_poisoned = np.zeros(I.shape[0])
    for i in range(I.shape[0]):
        votes = list(Counter(targets_train[I[i]]).items())
        shuffle(votes)
        pred_poisoned[i] = max(votes, key=lambda x: x[1])[0]
        # update confusion matrix
        conf_matrix_poisoned[targets_val_poisoned[i],
                             int(pred_poisoned[i])] += 1

    acc_poisoned = 100.0 * (pred_poisoned == targets_val_poisoned).mean()

    return acc, conf_matrix_clean, acc_poisoned, conf_matrix_poisoned
Exemplo n.º 13
0
    def to_gpu(self):
        if faiss.get_num_gpus() == 1:
            res = faiss.StandardGpuResources()
            self.index = faiss.index_cpu_to_gpu(res, 0, self.index)
        else:
            cloner_options = faiss.GpuMultipleClonerOptions()
            cloner_options.shard = True
            self.index = faiss.index_cpu_to_all_gpus(self.index,
                                                     co=cloner_options)

        return self.index
Exemplo n.º 14
0
    def _create_co(self, use_float16, use_float16_quantizer,
                   use_precomputed_codes) -> 'faiss.GpuMultipleClonerOptions':
        """
        TODO: docstring

        """

        co = faiss.GpuMultipleClonerOptions()
        co.useFloat16 = self.use_float16
        co.useFloat16CoarseQuantizer = self.use_float16_quantizer
        co.usePrecomputed = use_precomputed_codes
        return co
 def moveCPUtoGPU(self):
     co = faiss.GpuMultipleClonerOptions()
     co.useFloat16 = self.use_float16
     co.useFloat16CoarseQuantizer = False
     co.usePrecomputed = self.use_precomputed_tables
     co.indicesOptions = faiss.INDICES_CPU
     co.verbose = True
     co.reserveVecs = self.max_add
     co.shard = True
     vres, vdev = indexfunctions.make_vres_vdev(self.gpu_resources,
                                                ngpu=self.ngpu)
     self.gpu_index = faiss.index_cpu_to_gpu_multiple(
         vres, vdev, self.index, co)
def make_index(sx, preproc=ident):
    N, p = sx.shape
    ngpu = faiss.get_num_gpus()

    if N < 1000:
        indextype = 'Flat'
    elif N < 10**6:
        indextype = 'GPUFlat'
    elif N < 100000:
        indextype = 'GPUIVFFlat'
    else:
        indextype = 'GPUIVFFlatShards'

    if (indextype == 'IVFFlat' or indextype == 'GPUIVFFlat'
            or indextype == 'GPUIVFFlatShards'):
        ncentroids = int(4 * np.floor(np.sqrt(N)))
        nprobe = 256
        print("using IndexIVFFlat with %d/%d centroids" % (nprobe, ncentroids))
        q = faiss.IndexFlatL2(p)
        index = faiss.IndexIVFFlat(q, p, ncentroids,
                                   faiss.METRIC_INNER_PRODUCT)
        if nprobe >= ncentroids * 3 / 4:
            nprobe = int(ncentroids * 3 / 4)
            print("  forcing nprobe to %d" % nprobe)
        index.nprobe = nprobe
        index.quantizer_no_dealloc = q
        if indextype.startswith('GPU') and ngpu > 0:
            index = move_index_to_gpu(index, indextype == 'GPUIVFFlatShards')
        ntrain = min(ncentroids * 100, N)
        print("prepare train set, size=%d" % ntrain)
        trainset = sx[:ntrain]
        trainset.max()  # force move to RAM
        print("train")
        index.train(trainset)

    elif indextype == 'GPUFlat' or indextype == 'Flat':
        index = faiss.IndexFlatIP(p)
        if indextype.startswith('GPU') and ngpu > 0:
            co = faiss.GpuMultipleClonerOptions()
            co.useFloat16 = True
            index = faiss.index_cpu_to_all_gpus(index, co)
    else:
        assert False

    bs = 16384
    for i0, i1, block in dataset_iterator(sx, preproc, bs):
        print("   add %d:%d / %d\r" % (i0, i1, N), end=' ')
        sys.stdout.flush()
        index.add(block)

    return index
Exemplo n.º 17
0
def copyToGpu(index_cpu):

    co = faiss.GpuMultipleClonerOptions()
    co.useFloat16 = useFloat16
    co.useFloat16CoarseQuantizer = False
    co.usePrecomputed = usePrecomputed
    co.indicesOptions = faiss.INDICES_CPU
    co.verbose = True
    co.reserveVecs = N
    co.shard = True
    assert co.shard_type in (0, 1, 2)
    vres, vdev = make_vres_vdev()
    index_gpu = faiss.index_cpu_to_gpu_multiple(vres, vdev, index_cpu, co)

    return index_gpu
def move_index_to_gpu(index, shard=False):
    ngpu = faiss.get_num_gpus()
    gpu_resources = [faiss.StandardGpuResources() for i in range(ngpu)]

    co = faiss.GpuMultipleClonerOptions()
    co.useFloat16 = True
    co.shard = shard
    co.shard_type = 1

    print("   moving to %d GPUs" % ngpu)
    t0 = time.time()
    index = faiss.index_cpu_to_gpu_multiple_py(gpu_resources, index, co)
    index.dont_dealloc_me = gpu_resources
    print("      done in %.3f s" % (time.time() - t0))
    return index
Exemplo n.º 19
0
def enrich_vcr_with_omcs(args):
    omcs_embs, omcs_index = load_omcs(args)
    LOG.info('Loaded faiss index with OMCS emnbeddings, ntotal={}'.format(
        omcs_index.ntotal))

    co = faiss.GpuMultipleClonerOptions()
    co.shard = False  # Replica mode (dataparallel) instead of shard mode
    omcs_index = faiss.index_cpu_to_all_gpus(omcs_index, co)

    vcr_h5 = h5py.File(args.vcr_h5, 'r')
    LOG.info('Loaded VCR embeddings from {}, found {} entities'.format(
        args.vcr_h5, len(vcr_h5)))

    outfile = os.path.basename(args.vcr_h5).split('.')[0] + '_omcs.h5'
    if not os.path.exists(args.outdir):
        os.makedirs(args.outdir)
    outfile = os.path.join(args.outdir, outfile)
    LOG.info('Writing output OMCS embeddings to {}'.format(outfile))
    output_h5 = h5py.File(outfile, 'w')
    for i in range(len(vcr_h5)):
        output_h5.create_group(f'{i}')

    for i in tqdm(range(len(vcr_h5))):
        grp = vcr_h5[str(i)]
        out_grp = output_h5[str(i)]

        # Each key has embeddings of dim (num_words, d). Batch over all keys.
        items = {k: np.array(v, dtype=np.float32) for k, v in grp.items()}
        vcr_embs = np.vstack(items.values())
        vcr_omcs_embs = get_omcs_embeddings_for_vcr(omcs_embs, omcs_index, vcr_embs, args)
        assert vcr_embs.shape == vcr_omcs_embs.shape

        # Convert back to float16 to match BERT VCR format
        vcr_omcs_embs = vcr_omcs_embs.numpy().astype(np.float16)

        # Unbatch based on word counts
        word_counts = [v.shape[0] for v in items.values()]
        vcr_omcs_embs = np.split(vcr_omcs_embs, np.cumsum(word_counts)[:-1])
        assert len(vcr_omcs_embs) == len(items)

        # Write in the same format as vcr_h5 file
        for key, data in zip(items.keys(), vcr_omcs_embs):
            out_grp.create_dataset(key, data=data)

    LOG.info('Success!')
Exemplo n.º 20
0
    def adding_initialize(self, index):
        """
        The index should be owned by caller.
        """

        assert self.ngpu > 0
        print_message('Adding initialize...')
        self.co = faiss.GpuMultipleClonerOptions()
        self.co.useFloat16 = True
        self.co.useFloat16CoarseQuantizer = False
        self.co.usePrecomputed = False
        self.co.indicesOptions = faiss.INDICES_CPU
        self.co.verbose = True
        self.co.reserveVecs = self.max_add
        self.co.shard = True
        assert self.co.shard_type in (0, 1, 2)

        self.vres, self.vdev = self._make_vres_vdev()
        self.gpu_index = faiss.index_cpu_to_gpu_multiple(
            self.vres, self.vdev, index, self.co)
Exemplo n.º 21
0
    def __loadIndex(self):
        assert self.dbs != [], "You should load db before load index, use self.loadDB() ..."
        d = self.dbs[0].shape[-1]
        ngpu = faiss.get_num_gpus()
        index = faiss.IndexFlatL2(d)
        vres = faiss.GpuResourcesVector()
        vdev = faiss.IntVector()
        gpu_resources = []

        for i in range(0, ngpu):
            res = faiss.StandardGpuResources()
            gpu_resources.append(res)
            vdev.push_back(i)
            vres.push_back(res)

        co = faiss.GpuMultipleClonerOptions()
        co.shard = True
        self.gpu_index = faiss.index_cpu_to_gpu_multiple(vres, vdev, index, co)
        self.gpu_index.referenced_objects = gpu_resources
        self.gpu_index.add(self.dbs)
Exemplo n.º 22
0
def gpux4_allpair_similarity(ds, prefix):
    # Use cache
    cache_data = load_cached_result(prefix)
    if cache_data is not None:
        return cache_data

    # Search with GpuMultiple
    co = faiss.GpuMultipleClonerOptions()
    co.shard = True
    vres = []
    for _ in range(4):
        res = faiss.StandardGpuResources()
        vres.append(res)

    cpu_index = faiss.IndexFlatIP(ds.feats_index.shape[1])
    gpu_index = faiss.index_cpu_to_gpu_multiple_py(vres, cpu_index, co)
    gpu_index.add(ds.feats_index)

    # 177sec
    with timer('Prepare all-pair similarity on index dataset'):
        ii_sims, ii_ids = gpu_index.search(x=ds.feats_index, k=100)

    with timer('Save results (index-index)'):
        fn_out = Path(prefix) / "index19_vs_index19_ids.npy"
        fn_out.parent.mkdir(parents=True, exist_ok=True)
        np.save(str(fn_out), ii_ids)
        np.save(str(Path(prefix) / "index19_vs_index19_sims.npy"), ii_sims)

    with timer('Prepare all-pair similarity on test-index dataset'):
        ti_sims, ti_ids = gpu_index.search(x=ds.feats_test, k=100)

    with timer('Save results (test-index)'):
        np.save(str(Path(prefix) / "test19_vs_index19_ids.npy"), ti_ids)
        np.save(str(Path(prefix) / "test19_vs_index19_sims.npy"), ti_sims)

    return edict({
        'ti_sims': ti_sims,
        'ti_ids': ti_ids,
        'ii_sims': ii_sims,
        'ii_ids': ii_ids,
    })
def faiss_kmeans(train_feats, val_feats, nmb_clusters):
    train_feats = train_feats.numpy()
    val_feats = val_feats.numpy()

    d = train_feats.shape[-1]

    clus = faiss.Clustering(d, nmb_clusters)
    clus.niter = 20
    clus.max_points_per_centroid = 10000000

    index = faiss.IndexFlatL2(d)
    co = faiss.GpuMultipleClonerOptions()
    co.useFloat16 = True
    co.shard = True
    index = faiss.index_cpu_to_all_gpus(index, co)

    clus.train(train_feats, index)
    _, train_a = index.search(train_feats, 1)
    _, val_a = index.search(val_feats, 1)

    return list(train_a[:, 0]), list(val_a[:, 0])
Exemplo n.º 24
0
def voronoi_gpu():
    test_index = tools.load_vector('../data/adamskij/test_index.bin', 'L')

    nlist = 100
    quantizer = faiss.IndexFlatL2(ncols)
    cpu_index = faiss.IndexIVFFlat(quantizer, ncols, nlist)

    xb = tools.load_2d_vec(fout, ncols, typecode='f')
    xq = np.copy(xb[:test_size])
    cpu_index.train(xb)

    ngpus = faiss.get_num_gpus()
    print("number of GPUs:", ngpus)

    ress = []
    for i in range(ngpus):
        res = faiss.StandardGpuResources()
        if i in (2, 3, 4, 5):
            res.noTempMemory()
        res.initializeForDevice(i)
        ress.append(res)

    co = faiss.GpuMultipleClonerOptions()
    co.shard = True
    gpu_index = faiss.index_cpu_to_gpu_multiple_py(ress, cpu_index, co)
    # gpu_index = faiss.index_cpu_to_all_gpus(cpu_index, co)

    gpu_index.add(xb[:20_000_000])
    # for xb in it:
    #     gpu_index.add(xb)

    for i in range(20):
        gpu_index.nprobe = i + 1  # default nprobe is 1, try a few more
        start_time = time.time()

        D, I = gpu_index.search(xq, 2)

        secs = time.time() - start_time
        # acc = (I[:, 1] == test_index).sum()
        print(i + 1, secs)
Exemplo n.º 25
0
    def test_sharded(self):
        d = 32
        nb = 1000
        nq = 200
        k = 10
        rs = np.random.RandomState(123)
        xb = rs.rand(nb, d).astype('float32')
        xq = rs.rand(nq, d).astype('float32')

        index_cpu = faiss.IndexFlatL2(d)

        assert faiss.get_num_gpus() > 1

        co = faiss.GpuMultipleClonerOptions()
        co.shard = True
        index = faiss.index_cpu_to_all_gpus(index_cpu, co, ngpu=2)

        index.add(xb)
        D, I = index.search(xq, k)

        index_cpu.add(xb)
        D_ref, I_ref = index_cpu.search(xq, k)

        assert np.all(I == I_ref)

        del index
        index2 = faiss.index_cpu_to_all_gpus(index_cpu, co, ngpu=2)
        D2, I2 = index2.search(xq, k)

        assert np.all(I2 == I_ref)

        try:
            index2.add(xb)
        except RuntimeError:
            pass
        else:
            assert False, "this call should fail!"
Exemplo n.º 26
0
def load_index(passage_embeddings, index_path, faiss_gpu_index, use_gpu):
    dim = passage_embeddings.shape[1]
    if index_path is None:
        index = faiss.index_factory(dim, "Flat", faiss.METRIC_INNER_PRODUCT)
        index.add(passage_embeddings)
    else:
        index = faiss.read_index(index_path)
    if faiss_gpu_index and use_gpu:
        if len(faiss_gpu_index) == 1:
            res = faiss.StandardGpuResources()
            res.setTempMemory(1024 * 1024 * 1024)
            co = faiss.GpuClonerOptions()
            if index_path:
                co.useFloat16 = True
            else:
                co.useFloat16 = False
            index = faiss.index_cpu_to_gpu(res, faiss_gpu_index, index, co)
        else:
            assert not index_path  # Only need one GPU for compressed index
            global gpu_resources
            import torch
            for i in range(torch.cuda.device_count()):
                res = faiss.StandardGpuResources()
                res.setTempMemory(128 * 1024 * 1024)
                gpu_resources.append(res)

            assert isinstance(faiss_gpu_index, list)
            vres = faiss.GpuResourcesVector()
            vdev = faiss.IntVector()
            co = faiss.GpuMultipleClonerOptions()
            co.shard = True
            for i in faiss_gpu_index:
                vdev.push_back(i)
                vres.push_back(gpu_resources[i])
            index = faiss.index_cpu_to_gpu_multiple(vres, vdev, index, co)

    return index
Exemplo n.º 27
0
def faiss_knn(feats_train, targets_train, feats_val, targets_val, k):
    feats_train = feats_train.numpy()
    targets_train = targets_train.numpy()
    feats_val = feats_val.numpy()
    targets_val = targets_val.numpy()

    d = feats_train.shape[-1]

    index = faiss.IndexFlatL2(d)  # build the index
    co = faiss.GpuMultipleClonerOptions()
    co.useFloat16 = True
    co.shard = True
    gpu_index = faiss.index_cpu_to_all_gpus(index, co)
    gpu_index.add(feats_train)

    D, I = gpu_index.search(feats_val, k)

    pred = np.zeros(I.shape[0], dtype=np.int)
    conf_mat = np.zeros((1000, 1000), dtype=np.int)
    for i in range(I.shape[0]):
        votes = list(Counter(targets_train[I[i]]).items())
        shuffle(votes)
        pred[i] = max(votes, key=lambda x: x[1])[0]
        conf_mat[targets_val[i], pred[i]] += 1

    acc = 100.0 * (pred == targets_val).mean()
    assert acc == (100.0 * (np.trace(conf_mat) / np.sum(conf_mat)))

    # per_cat_acc = 100.0 * (np.diag(conf_mat) / np.sum(conf_mat, axis=1))
    # sparse_cats = [58, 155, 356, 747, 865, 234, 268, 384, 385, 491, 498, 538, 646, 650, 726, 860, 887, 15, 170, 231]
    # s = ' '.join('{}'.format(c) for c in sparse_cats)
    # print('==> cats: {}'.format(s))
    # s = ' '.join('{:.1f}'.format(a) for a in per_cat_acc[sparse_cats])
    # print('==> acc/cat: {}'.format(s))
    # print('==> mean acc: {}'.format(per_cat_acc[sparse_cats].mean()))

    return acc
Exemplo n.º 28
0
def gpux4_euclidsearch_from_dataset(ds,
                                    fn_npy,
                                    lhs='test',
                                    rhs='index',
                                    topk=100):
    # Search with GpuMultiple
    co = faiss.GpuMultipleClonerOptions()
    co.shard = True
    vres = []
    for _ in range(4):
        res = faiss.StandardGpuResources()
        vres.append(res)

    cpu_index = faiss.IndexFlatL2(ds[f'feats_{rhs}'].shape[1])
    gpu_index = faiss.index_cpu_to_gpu_multiple_py(vres, cpu_index, co)
    gpu_index.add(ds[f'feats_{rhs}'])

    _, all_ranks = gpu_index.search(x=ds[f'feats_{lhs}'], k=topk)
    Path(fn_npy).parent.mkdir(parents=True, exist_ok=True)
    np.save(fn_npy, all_ranks)

    if lhs == 'test' and rhs == 'index':  # Retrieval task
        fn_sub = fn_npy.rstrip('.npy') + '.csv.gz'
        save_sub_from_top100ranks(ds, all_ranks, fn_sub, topk=topk)
Exemplo n.º 29
0
    def load_omcs(self, use_sentence_embs=True):
        omcs_h5_file = os.path.join(VCR_ANNOTS_DIR, 'omcs', 'bert_da_omcs.h5')
        # Embeddings are stored as float16, but faiss requires float32
        _, sentence_embs, word_embs = load_omcs_embeddings(omcs_h5_file,
                                                           dtype=np.float32)

        if use_sentence_embs:
            embs = np.vstack(sentence_embs)
            index_file = 'bert_da_omcs_sentences.faissindex'
        else:
            embs = np.vstack(word_embs)
            index_file = 'bert_da_omcs_words.faissindex'
        index_file = os.path.join(VCR_ANNOTS_DIR, 'omcs', index_file)

        index = faiss.read_index(index_file)
        assert len(embs) == index.ntotal
        assert embs.shape[1] == index.d
        LOG.info('Loaded faiss index with OMCS embeddings from {}, ntotal={}'.
                 format(index_file, index.ntotal))

        self.co = faiss.GpuMultipleClonerOptions()
        self.co.shard = False  # Replica mode (dataparallel) instead of shard mode
        index = faiss.index_cpu_to_all_gpus(index, self.co)
        return torch.from_numpy(embs), index
Exemplo n.º 30
0
        codes = faiss.vector_to_array(rfn.codes)
        np.save(args.neigh_recons_codes, codes)

######################################################
# Exhaustive evaluation
######################################################

if args.exhaustive:
    print "exhaustive evaluation"
    xq_tr = vec_transform(sanitize(xq))
    index2 = faiss.IndexFlatL2(index_hnsw.d)
    accu_recons_error = 0.0

    if faiss.get_num_gpus() > 0:
        print "do eval on GPU"
        co = faiss.GpuMultipleClonerOptions()
        co.shard = False
        index2 = faiss.index_cpu_to_all_gpus(index2, co)

    # process in batches in case the dataset does not fit in RAM
    rh = datasets.ResultHeap(xq_tr.shape[0], 100)
    t0 = time.time()
    bs = 500000
    for i0 in range(0, nb, bs):
        i1 = min(nb, i0 + bs)
        print '  handling batch %d:%d' % (i0, i1)

        xb_recons = np.empty((i1 - i0, index_hnsw.d), dtype='float32')
        rfn.reconstruct_n(i0, i1 - i0, faiss.swig_ptr(xb_recons))

        accu_recons_error += ((vec_transform(sanitize(xb[i0:i1])) -