Пример #1
0
    def test_indices_ivfpq(self):
        res = faiss.StandardGpuResources()
        d = 128
        nb = 5000
        nlist = 10
        M = 4
        nbits = 8

        rs = np.random.RandomState(567)
        xb = rs.rand(nb, d).astype('float32')
        xb_indices_base = np.arange(nb, dtype=np.int64)

        # Force values to not be representable in int32
        xb_indices = (xb_indices_base + 4294967296).astype('int64')

        config = faiss.GpuIndexIVFPQConfig()
        idx = faiss.GpuIndexIVFPQ(res, d, nlist, M, nbits, faiss.METRIC_L2,
                                  config)
        idx.train(xb)
        idx.add_with_ids(xb, xb_indices)

        _, I = idx.search(xb[10:20], 5)
        self.assertTrue(np.array_equal(xb_indices[10:20], I[:, 0]))

        # Store values using 32-bit indices instead
        config.indicesOptions = faiss.INDICES_32_BIT
        idx = faiss.GpuIndexIVFPQ(res, d, nlist, M, nbits, faiss.METRIC_L2,
                                  config)
        idx.train(xb)
        idx.add_with_ids(xb, xb_indices)

        _, I = idx.search(xb[10:20], 5)
        # This will strip the high bit
        self.assertTrue(np.array_equal(xb_indices_base[10:20], I[:, 0]))
    def test_IndexIVFPQ(self):
        (xt, xb, xq) = self.get_dataset()
        d = xt.shape[1]

        dev_no = 0
        usePrecomputed = True

        res = faiss.StandardGpuResources()

        gt_index = faiss.GpuIndexFlatL2(res, dev_no, d, False)
        gt_index.add(xb)
        D, gt_nns = gt_index.search(xq, 1)

        coarse_quantizer = faiss.IndexFlatL2(d)
        ncentroids = int(np.sqrt(xb.shape[0])) * 4

        index = faiss.IndexIVFPQ(coarse_quantizer, d, ncentroids, 32, 8)
        # add implemented on GPU but not train
        index.train(xt)
        gpuIndex = faiss.GpuIndexIVFPQ(res, dev_no, faiss.INDICES_64_BIT,
                                       False, index)
        gpuIndex.setPrecomputedCodes(usePrecomputed)
        gpuIndex.setNumProbes(64)
        index.add(xb)

        D, nns = index.search(xq, 10)
        n_ok = (nns == gt_nns).sum()
        nq = xq.shape[0]
        print ncentroids, n_ok, nq

        self.assertGreater(n_ok, nq * 0.2)
Пример #3
0
    def test_ivfpq(self):
        index_cpu = faiss.IndexIVFPQ(faiss.IndexFlatL2(self.d), self.d,
                                     self.nlist, 2, 8)
        # speed up test
        index_cpu.pq.cp.niter = 2
        index_cpu.do_polysemous_training = False
        index_cpu.train(self.xb)

        index = faiss.GpuIndexIVFPQ(faiss.StandardGpuResources(), index_cpu)
        index.add(self.xb)
Пример #4
0
    def test_serialize(self):
        res = faiss.StandardGpuResources()

        d = 32
        k = 10
        train = make_t(10000, d)
        add = make_t(10000, d)
        query = make_t(10, d)

        # Construct various GPU index types
        indexes = []

        # Flat
        indexes.append(faiss.GpuIndexFlatL2(res, d))

        # IVF
        nlist = 5

        # IVFFlat
        indexes.append(faiss.GpuIndexIVFFlat(res, d, nlist, faiss.METRIC_L2))

        # IVFSQ
        indexes.append(faiss.GpuIndexIVFScalarQuantizer(res, d, nlist, faiss.ScalarQuantizer.QT_fp16))

        # IVFPQ
        indexes.append(faiss.GpuIndexIVFPQ(res, d, nlist, 4, 8, faiss.METRIC_L2))

        for index in indexes:
            index.train(train)
            index.add(add)

            orig_d, orig_i = index.search(query, k)

            ser = faiss.serialize_index(faiss.index_gpu_to_cpu(index))
            cpu_index = faiss.deserialize_index(ser)

            gpu_index_restore = faiss.index_cpu_to_gpu(res, 0, cpu_index)

            restore_d, restore_i = gpu_index_restore.search(query, k)

            self.assertTrue(np.array_equal(orig_d, restore_d))
            self.assertTrue(np.array_equal(orig_i, restore_i))

            # Make sure the index is in a state where we can add to it
            # without error
            gpu_index_restore.add(query)
Пример #5
0
    def test_copy_to_gpu(self):
        res = faiss.StandardGpuResources()

        for bits_per_code in [4, 5, 6, 8]:
            d = 128
            nb = 10000
            nq = 20

            rs = np.random.RandomState(567)
            xb = rs.rand(nb, d).astype('float32')
            xq = rs.rand(nq, d).astype('float32')

            nlist = int(math.sqrt(nb))
            sub_q = 16
            bits_per_code = 8
            nprobe = 4

            config = faiss.GpuIndexIVFPQConfig()
            config.interleavedLayout = True
            idx_gpu = faiss.GpuIndexIVFPQ(res, d, nlist, sub_q, bits_per_code,
                                          faiss.METRIC_L2, config)
            q = faiss.IndexFlatL2(d)
            idx_cpu = faiss.IndexIVFPQ(q, d, nlist, sub_q, bits_per_code,
                                       faiss.METRIC_L2)

            idx_cpu.train(xb)
            idx_cpu.add(xb)

            idx_gpu.copyFrom(idx_cpu)

            idx_gpu.nprobe = nprobe
            idx_cpu.nprobe = nprobe

            # Try without precomputed codes
            d_g, i_g = idx_gpu.search(xq, 10)
            d_c, i_c = idx_cpu.search(xq, 10)
            self.assertGreaterEqual((i_g == i_c).sum(), i_g.size * 0.9)
            self.assertTrue(np.allclose(d_g, d_c))

            # Try with precomputed codes (different kernel)
            idx_gpu.setPrecomputedCodes(True)
            d_g, i_g = idx_gpu.search(xq, 10)
            d_c, i_c = idx_cpu.search(xq, 10)
            self.assertGreaterEqual((i_g == i_c).sum(), i_g.size * 0.9)
            self.assertTrue(np.allclose(d_g, d_c))
Пример #6
0
    def test_indices_ivfpq(self):
        res = faiss.StandardGpuResources()
        d = 128
        nb = 5000
        nlist = 10
        M = 4
        nbits = 8

        rs = np.random.RandomState(567)
        xb = rs.rand(nb, d).astype('float32')
        xb_indices_base = np.arange(nb, dtype=np.int64)

        # Force values to not be representable in int32
        xb_indices = (xb_indices_base + 4294967296).astype('int64')

        config = faiss.GpuIndexIVFPQConfig()
        idx = faiss.GpuIndexIVFPQ(res, d, nlist, M, nbits,
                                  faiss.METRIC_L2, config)
        idx.train(xb)
        idx.add_with_ids(xb, xb_indices)

        # invalid k (should be > 0)
        k = -5
        idx.setNumProbes(3)
        self.assertRaises(AssertionError, idx.search, xb[10:20], k)

        # invalid nprobe (should be > 0)
        self.assertRaises(RuntimeError, idx.setNumProbes, 0)
        self.assertRaises(RuntimeError, idx.setNumProbes, -3)

        k = 5
        idx.nprobe = -3
        self.assertRaises(RuntimeError, idx.search, xb[10:20], k)

        # valid params
        k = 5
        idx.setNumProbes(3)
        _, I = idx.search(xb[10:20], k)
        self.assertTrue(np.array_equal(xb_indices[10:20], I[:, 0]))
Пример #7
0
    def test_IndexIVFPQ(self):
        (xt, xb, xq) = self.get_dataset()
        d = xt.shape[1]

        dev_no = 0
        usePrecomputed = True

        res = faiss.StandardGpuResources()

        flat_config = faiss.GpuIndexFlatConfig()
        flat_config.device = dev_no

        gt_index = faiss.GpuIndexFlatL2(res, d, flat_config)
        gt_index.add(xb)
        D, gt_nns = gt_index.search(xq, 1)

        coarse_quantizer = faiss.IndexFlatL2(d)
        ncentroids = int(np.sqrt(xb.shape[0])) * 4

        index = faiss.IndexIVFPQ(coarse_quantizer, d, ncentroids, 32, 8)
        # add implemented on GPU but not train
        index.train(xt)

        ivfpq_config = faiss.GpuIndexIVFPQConfig()
        ivfpq_config.device = dev_no
        ivfpq_config.usePrecomputedTables = usePrecomputed

        gpuIndex = faiss.GpuIndexIVFPQ(res, index, ivfpq_config)
        gpuIndex.setNumProbes(64)
        index.add(xb)

        D, nns = index.search(xq, 10)
        n_ok = (nns == gt_nns).sum()
        nq = xq.shape[0]
        print ncentroids, n_ok, nq

        self.assertGreater(n_ok, nq * 0.2)
Пример #8
0
start = time.time()
mfcc_features = np.array(mfcc_features, dtype=np.float32)
labels = np.array(labels)
print("formatting labels took : ", time.time() - start, " seconds")

### Build the FAISS index - in this case, IVFPQ

d = 16
print('starting training')
start = time.time()

res = faiss.StandardGpuResources()  # use a single GPU

ncentroids = int(4 * math.sqrt(
    len(labels)))  #ncentroids defined on the low end of FAISS guidance.
gpu_index = faiss.GpuIndexIVFPQ(res, 16, ncentroids, 8, 8, faiss.METRIC_L2)

gpu_index.train(mfcc_features)
gpu_index.add_with_ids(
    mfcc_features, labels
)  # adding vectors to the index w/ ids means that the index handles label lookups
print("building index took : ", time.time() - start, " seconds")

#save the index to file
index = faiss.index_gpu_to_cpu(gpu_index)
faiss.write_index(index, "/home/sir/voice/faiss_index_librispeech360.ind")

### Read the test files into GPU memory (and do not bring them to the CPU) to take advantage of GPU querying of torch tensors
start = time.time()

calc_mfcc = torchaudio.transforms.MFCC(n_mfcc=13,