Python eval_intersection примеры, faiss.eval_intersection Python примеры использования

Пример #1

0

Показать файл

Файл: test_index_accuracy.py Проект: mlzxy/faiss

    def subtest(self, mt):
        d = 32
        xt, xb, xq = get_dataset_2(d, 1000, 2000, 200)
        nlist = 64

        gt_index = faiss.IndexFlat(d, mt)
        gt_index.add(xb)
        gt_D, gt_I = gt_index.search(xq, 10)
        quantizer = faiss.IndexFlat(d, mt)
        for by_residual in True, False:

            index = faiss.IndexIVFPQ(quantizer, d, nlist, 4, 8)
            index.metric_type = mt
            index.by_residual = by_residual
            if by_residual:
                # perform cheap polysemous training
                index.do_polysemous_training = True
                pt = faiss.PolysemousTraining()
                pt.n_iter = 50000
                pt.n_redo = 1
                index.polysemous_training = pt

            index.train(xt)
            index.add(xb)
            index.nprobe = 4
            D, I = index.search(xq, 10)

            ninter = faiss.eval_intersection(I, gt_I)
            print('(%d, %s): %d, ' % (mt, by_residual, ninter))

            assert ninter >= self.ref_results[mt, by_residual] - 2

            index.use_precomputed_table = 0
            D2, I2 = index.search(xq, 10)
            assert np.all(I == I2)

            if by_residual:

                index.use_precomputed_table = 1
                index.polysemous_ht = 20
                D, I = index.search(xq, 10)
                ninter = faiss.eval_intersection(I, gt_I)
                print('(%d, %s, %d): %d, ' %
                      (mt, by_residual, index.polysemous_ht, ninter))

                # polysemous behaves bizarrely on ARM
                assert (
                    ninter >=
                    self.ref_results[mt, by_residual, index.polysemous_ht] - 4)

Пример #2

0

Показать файл

    def test_white(self):

        # generate data
        d = 4
        nt = 1000
        nb = 200
        nq = 200

        # normal distribition
        x = faiss.randn((nt + nb + nq) * d, 1234).reshape(nt + nb + nq, d)

        index = faiss.index_factory(d, 'Flat')

        xt = x[:nt]
        xb = x[nt:-nq]
        xq = x[-nq:]

        # NN search on normal distribution
        index.add(xb)
        Do, Io = index.search(xq, 5)

        # make distribution very skewed
        x *= [10, 4, 1, 0.5]
        rr, _ = np.linalg.qr(faiss.randn(d * d).reshape(d, d))
        x = np.dot(x, rr).astype('float32')

        xt = x[:nt]
        xb = x[nt:-nq]
        xq = x[-nq:]

        # L2 search on skewed distribution
        index = faiss.index_factory(d, 'Flat')

        index.add(xb)
        Dl2, Il2 = index.search(xq, 5)

        # whiten + L2 search on L2 distribution
        index = faiss.index_factory(d, 'PCAW%d,Flat' % d)

        index.train(xt)
        index.add(xb)
        Dw, Iw = index.search(xq, 5)

        # make sure correlation of whitened results with original
        # results is much better than simple L2 distances
        # should be 961 vs. 264
        assert (faiss.eval_intersection(Io, Iw) >
                2 * faiss.eval_intersection(Io, Il2))

Пример #3

0

Показать файл

Файл: test_index_composite.py Проект: mrlocker/faiss

    def test_white(self):

        # generate data
        d = 4
        nt = 1000
        nb = 200
        nq = 200

        # normal distribition
        x = faiss.randn((nt + nb + nq) * d, 1234).reshape(nt + nb + nq, d)

        index = faiss.index_factory(d, 'Flat')

        xt = x[:nt]
        xb = x[nt:-nq]
        xq = x[-nq:]

        # NN search on normal distribution
        index.add(xb)
        Do, Io = index.search(xq, 5)

        # make distribution very skewed
        x *= [10, 4, 1, 0.5]
        rr, _ = np.linalg.qr(faiss.randn(d * d).reshape(d, d))
        x = np.dot(x, rr).astype('float32')

        xt = x[:nt]
        xb = x[nt:-nq]
        xq = x[-nq:]

        # L2 search on skewed distribution
        index = faiss.index_factory(d, 'Flat')

        index.add(xb)
        Dl2, Il2 = index.search(xq, 5)

        # whiten + L2 search on L2 distribution
        index = faiss.index_factory(d, 'PCAW%d,Flat' % d)

        index.train(xt)
        index.add(xb)
        Dw, Iw = index.search(xq, 5)

        # make sure correlation of whitened results with original
        # results is much better than simple L2 distances
        # should be 961 vs. 264
        assert (faiss.eval_intersection(Io, Iw) >
                2 * faiss.eval_intersection(Io, Il2))

Пример #4

0

Показать файл

Файл: test_local_search_quantizer.py Проект: ifeherva/faiss

    def eval_index_accuracy(self, factory_key):
        # just do a single test, most search functions are already stress
        # tested in test_residual_quantizer.py
        ds = datasets.SyntheticDataset(32, 3000, 1000, 100)
        index = faiss.index_factory(ds.d, factory_key)

        index.train(ds.get_train())
        index.add(ds.get_database())

        inters = []
        for nprobe in 1, 2, 5, 10, 20, 50:
            index.nprobe = nprobe
            D, I = index.search(ds.get_queries(), 10)
            inter = faiss.eval_intersection(I, ds.get_groundtruth(10))
            # print("nprobe=", nprobe, "inter=", inter)
            inters.append(inter)

        inters = np.array(inters)
        # in fact the results should be the same for the decoding and the
        # reconstructing versions
        self.assertTrue(np.all(inters[1:] >= inters[:-1]))

        # do a little I/O test
        index2 = faiss.deserialize_index(faiss.serialize_index(index))
        D2, I2 = index2.search(ds.get_queries(), 10)
        np.testing.assert_array_equal(I2, I)
        np.testing.assert_array_equal(D2, D)

Пример #5

0

Показать файл

Файл: test_index_accuracy.py Проект: xiongziqi/faiss

    def test_IVFPQ_non8bit(self):
        d = 16
        xt, xb, xq = get_dataset_2(d, 10000, 2000, 200)
        nlist = 64

        gt_index = faiss.IndexFlat(d)
        gt_index.add(xb)
        gt_D, gt_I = gt_index.search(xq, 10)

        quantizer = faiss.IndexFlat(d)
        ninter = {}
        for v in '2x8', '8x2':
            if v == '8x2':
                index = faiss.IndexIVFPQ(
                    quantizer, d, nlist, 2, 8)
            else:
                index = faiss.IndexIVFPQ(
                    quantizer, d, nlist, 8, 2)
            index.train(xt)
            index.add(xb)
            index.npobe = 16

            D, I = index.search(xq, 10)
            ninter[v] = faiss.eval_intersection(I, gt_I)
        print('ninter=', ninter)
        # this should be the case but we don't observe
        # that... Probavly too few test points
        #  assert ninter['2x8'] > ninter['8x2']
        # ref numbers on 2019-11-02
        assert abs(ninter['2x8'] - 458) < 4
        assert abs(ninter['8x2'] - 465) < 4

Пример #6

0

Показать файл

Файл: test_local_search_quantizer.py Проект: ifeherva/faiss

    def test_IndexLocalSearchQuantizer(self):
        ds = datasets.SyntheticDataset(32, 1000, 200, 100)
        gt = ds.get_groundtruth(10)

        ir = faiss.IndexLocalSearchQuantizer(ds.d, 4, 5)
        ir.train(ds.get_train())
        ir.add(ds.get_database())
        Dref, Iref = ir.search(ds.get_queries(), 10)
        inter_ref = faiss.eval_intersection(Iref, gt)

        # 467
        self.assertGreater(inter_ref, 460)

        AQ = faiss.AdditiveQuantizer
        ir2 = faiss.IndexLocalSearchQuantizer(ds.d, 4, 5, faiss.METRIC_L2,
                                              AQ.ST_norm_float)

        ir2.train(ds.get_train())  # just to set flags properly
        ir2.lsq.codebooks = ir.lsq.codebooks

        ir2.add(ds.get_database())
        D2, I2 = ir2.search(ds.get_queries(), 10)
        np.testing.assert_array_almost_equal(Dref, D2, decimal=5)
        self.assertLess((Iref != I2).sum(), Iref.size * 0.01)

        # test I/O
        ir3 = faiss.deserialize_index(faiss.serialize_index(ir))
        D3, I3 = ir3.search(ds.get_queries(), 10)
        np.testing.assert_array_equal(Iref, I3)
        np.testing.assert_array_equal(Dref, D3)

Пример #7

0

Показать файл

Файл: test_index_accuracy.py Проект: PhilipBAdams/faiss-learned-termination-prior-weighted

    def subtest(self, mt):
        d = 32
        xt, xb, xq = get_dataset_2(d, 1000, 2000, 200)
        nlist = 64

        gt_index = faiss.IndexFlat(d, mt)
        gt_index.add(xb)
        gt_D, gt_I = gt_index.search(xq, 10)
        quantizer = faiss.IndexFlat(d, mt)
        for qname in '8bit 4bit 8bit_uniform 4bit_uniform fp16'.split():
            qtype = getattr(faiss.ScalarQuantizer, 'QT_' + qname)
            index = faiss.IndexIVFScalarQuantizer(quantizer, d, nlist, qtype,
                                                  mt)
            index.train(xt)
            index.add(xb)
            index.nprobe = 4  # hopefully more robust than 1
            D, I = index.search(xq, 10)
            ninter = faiss.eval_intersection(I, gt_I)
            print('(%d, %s): %d, ' % (mt, repr(qname), ninter))
            assert abs(ninter - self.ref_results[(mt, qname)]) <= 9

            D2, I2 = self.subtest_add2col(xb, xq, index, qname)

            assert np.all(I2 == I)

            # also test range search

            if mt == faiss.METRIC_INNER_PRODUCT:
                radius = float(D[:, -1].max())
            else:
                radius = float(D[:, -1].min())
            print('radius', radius)

            lims, D3, I3 = index.range_search(xq, radius)
            ntot = ndiff = 0
            for i in range(len(xq)):
                l0, l1 = lims[i], lims[i + 1]
                Inew = set(I3[l0:l1])
                if mt == faiss.METRIC_INNER_PRODUCT:
                    mask = D2[i] > radius
                else:
                    mask = D2[i] < radius
                Iref = set(I2[i, mask])
                ndiff += len(Inew ^ Iref)
                ntot += len(Iref)
            print('ndiff %d / %d' % (ndiff, ntot))
            assert ndiff < ntot * 0.01

            for pm in 1, 2:
                print('parallel_mode=%d' % pm)
                index.parallel_mode = pm
                lims4, D4, I4 = index.range_search(xq, radius)
                print('sizes', lims4[1:] - lims4[:-1])
                for qno in range(len(lims) - 1):
                    Iref = I3[lims[qno]:lims[qno + 1]]
                    Inew = I4[lims4[qno]:lims4[qno + 1]]
                    assert set(Iref) == set(
                        Inew), "q %d ref %s new %s" % (qno, Iref, Inew)

Пример #8

0

Показать файл

    def do_test_accuracy(self, by_residual, st):
        ds = datasets.SyntheticDataset(32, 3000, 1000, 100)

        quantizer = faiss.IndexFlatL2(ds.d)

        index = faiss.IndexIVFResidualQuantizer(
            quantizer, ds.d, 100, 3, 4,
            faiss.METRIC_L2, st
        )
        index.by_residual = by_residual

        index.rq.train_type
        index.rq.train_type = faiss.ResidualQuantizer.Train_default
        index.rq.max_beam_size = 30

        index.train(ds.get_train())
        index.add(ds.get_database())

        inters = []
        for nprobe in 1, 2, 5, 10, 20, 50:
            index.nprobe = nprobe
            D, I = index.search(ds.get_queries(), 10)
            inter = faiss.eval_intersection(I, ds.get_groundtruth(10))
            # print(st, "nprobe=", nprobe, "inter=", inter)
            inters.append(inter)

        # do a little I/O test
        index2 = faiss.deserialize_index(faiss.serialize_index(index))
        D2, I2 = index2.search(ds.get_queries(), 10)
        np.testing.assert_array_equal(I2, I)
        np.testing.assert_array_equal(D2, D)

        inters = np.array(inters)

        if by_residual:
            # check that we have increasing intersection measures with
            # nprobe
            self.assertTrue(np.all(inters[1:] >= inters[:-1]))
        else:
            self.assertTrue(np.all(inters[1:3] >= inters[:2]))
            # check that we have the same result as the flat residual quantizer
            iflat = faiss.IndexResidualQuantizer(
                ds.d, 3, 4, faiss.METRIC_L2, st)
            iflat.rq.train_type
            iflat.rq.train_type = faiss.ResidualQuantizer.Train_default
            iflat.rq.max_beam_size = 30
            iflat.train(ds.get_train())
            iflat.rq.codebooks = index.rq.codebooks

            iflat.add(ds.get_database())
            Dref, Iref = iflat.search(ds.get_queries(), 10)

            index.nprobe = 100
            D2, I2 = index.search(ds.get_queries(), 10)
            np.testing.assert_array_almost_equal(Dref, D2, decimal=5)
            # there are many ties because the codes are so short
            self.assertLess((Iref != I2).sum(), Iref.size * 0.2)

Пример #9

0

Показать файл

Файл: test_contrib.py Проект: ifeherva/faiss

    def test_float(self):
        ds = datasets.SyntheticDataset(128, 2000, 2000, 200)

        d = ds.d
        xt = ds.get_train()
        xq = ds.get_queries()
        xb = ds.get_database()

        # define alternative quantizer on the 20 first dims of vectors
        km = faiss.Kmeans(20, 50)
        km.train(xt[:, :20].copy())
        alt_quantizer = km.index

        index = faiss.index_factory(d, "IVF50,PQ16np")
        index.by_residual = False

        # (optional) fake coarse quantizer
        fake_centroids = np.zeros((index.nlist, index.d), dtype="float32")
        index.quantizer.add(fake_centroids)

        # train the PQ part
        index.train(xt)

        # add elements xb
        a = alt_quantizer.search(xb[:, :20].copy(), 1)[1].ravel()
        ivf_tools.add_preassigned(index, xb, a)

        # search elements xq, increase nprobe, check 4 first results w/ groundtruth
        prev_inter_perf = 0
        for nprobe in 1, 10, 20:

            index.nprobe = nprobe
            a = alt_quantizer.search(xq[:, :20].copy(), index.nprobe)[1]
            D, I = ivf_tools.search_preassigned(index, xq, 4, a)
            inter_perf = faiss.eval_intersection(I,
                                                 ds.get_groundtruth()[:, :4])
            self.assertTrue(inter_perf >= prev_inter_perf)
            prev_inter_perf = inter_perf

        # test range search

        index.nprobe = 20

        a = alt_quantizer.search(xq[:, :20].copy(), index.nprobe)[1]

        # just to find a reasonable radius
        D, I = ivf_tools.search_preassigned(index, xq, 4, a)
        radius = D.max() * 1.01

        lims, DR, IR = ivf_tools.range_search_preassigned(index, xq, radius, a)

        # with that radius the k-NN results are a subset of the range search results
        for q in range(len(xq)):
            l0, l1 = lims[q], lims[q + 1]
            self.assertTrue(set(I[q]) <= set(IR[l0:l1]))

Пример #10

0

Показать файл

Файл: test_index_accuracy.py Проект: xiongziqi/faiss

    def test_sh(self):
        d = 32
        xt, xb, xq = get_dataset_2(d, 2000, 1000, 200)
        nlist, nprobe = 1, 1

        gt_index = faiss.IndexFlatL2(d)
        gt_index.add(xb)
        gt_D, gt_I = gt_index.search(xq, 10)

        for nbit in 32, 64, 128:
            quantizer = faiss.IndexFlatL2(d)

            index_lsh = faiss.IndexLSH(d, nbit, True)
            index_lsh.add(xb)
            D, I = index_lsh.search(xq, 10)
            ninter = faiss.eval_intersection(I, gt_I)

            print('LSH baseline: %d' % ninter)

            for period in 10.0, 1.0:

                for tt in 'global centroid centroid_half median'.split():
                    index = faiss.IndexIVFSpectralHash(quantizer, d, nlist,
                                                       nbit, period)
                    index.nprobe = nprobe
                    index.threshold_type = getattr(
                        faiss.IndexIVFSpectralHash,
                        'Thresh_' + tt
                    )

                    index.train(xt)
                    index.add(xb)
                    D, I = index.search(xq, 10)

                    ninter = faiss.eval_intersection(I, gt_I)
                    key = (nbit, tt, period)

                    print('(%d, %s, %g): %d, ' % (nbit, repr(tt), period, ninter))
                    assert abs(ninter - self.ref_results[key]) <= 12

Пример #11

0

Показать файл

    def test_search_L2(self):
        ds = datasets.SyntheticDataset(32, 1000, 200, 100)

        xt = ds.get_train()
        xb = ds.get_database()
        xq = ds.get_queries()
        gt = ds.get_groundtruth(10)

        ir = faiss.IndexResidualQuantizer(ds.d, 3, 4)
        ir.rq.train_type = faiss.ResidualQuantizer.Train_default
        ir.rq.max_beam_size = 30
        ir.train(xt)

        # reference run w/ decoding
        ir.add(xb)
        Dref, Iref = ir.search(xq, 10)

        # 388
        inter_ref = faiss.eval_intersection(Iref, gt)

        AQ = faiss.AdditiveQuantizer
        for st in AQ.ST_norm_float, AQ.ST_norm_qint8, AQ.ST_norm_qint4, \
                AQ.ST_norm_cqint8, AQ.ST_norm_cqint4:

            ir2 = faiss.IndexResidualQuantizer(ds.d, 3, 4, faiss.METRIC_L2, st)
            ir2.rq.max_beam_size = 30
            ir2.train(xt)   # to get the norm bounds
            ir2.rq.codebooks = ir.rq.codebooks    # fake training
            ir2.add(xb)

            D2, I2 = ir2.search(xq, 10)

            if st == AQ.ST_norm_float:
                np.testing.assert_array_almost_equal(Dref, D2, decimal=5)
                self.assertLess((Iref != I2).sum(), Iref.size * 0.05)
            else:
                inter_2 = faiss.eval_intersection(I2, gt)
                self.assertGreater(inter_ref, inter_2)

Пример #12

0

Показать файл

Файл: test_build_blocks.py Проект: ifeherva/faiss

 def test_rand_vector(self):
     """ test if the smooth_vectors function is reasonably compressible with
     a small PQ """
     x = faiss.rand_smooth_vectors(1300, 32)
     xt = x[:1000]
     xb = x[1000:1200]
     xq = x[1200:]
     _, gt = faiss.knn(xq, xb, 10)
     index = faiss.IndexPQ(32, 4, 4)
     index.train(xt)
     index.add(xb)
     D, I = index.search(xq, 10)
     ninter = faiss.eval_intersection(I, gt)
     # 445 for SyntheticDataset
     self.assertGreater(ninter, 420)
     self.assertLess(ninter, 460)

Пример #13

0

Показать файл

    def do_test_accuracy_IP(self, by_residual):
        ds = datasets.SyntheticDataset(32, 3000, 1000, 100, "IP")

        quantizer = faiss.IndexFlatIP(ds.d)

        index = faiss.IndexIVFResidualQuantizer(
            quantizer, ds.d, 100, 3, 4,
            faiss.METRIC_INNER_PRODUCT, faiss.AdditiveQuantizer.ST_decompress
        )
        index.cp.spherical = True
        index.by_residual = by_residual

        index.rq.train_type
        index.rq.train_type = faiss.ResidualQuantizer.Train_default
        index.train(ds.get_train())

        index.add(ds.get_database())

        inters = []
        for nprobe in 1, 2, 5, 10, 20, 50:
            index.nprobe = nprobe
            index.rq.search_type = faiss.AdditiveQuantizer.ST_decompress
            D, I = index.search(ds.get_queries(), 10)
            index.rq.search_type = faiss.AdditiveQuantizer.ST_LUT_nonorm
            D2, I2 = index.search(ds.get_queries(), 10)
            # print(D[:5] - D2[:5])
            # print(I[:5])
            np.testing.assert_array_almost_equal(D, D2, decimal=5)
            # there are many ties because the codes are so short
            self.assertLess((I != I2).sum(), I.size * 0.1)

            # D2, I2 = index2.search(ds.get_queries(), 10)
            # print(D[:5])
            # print(D2[:5])

            inter = faiss.eval_intersection(I, ds.get_groundtruth(10))
            # print("nprobe=", nprobe, "inter=", inter)
            inters.append(inter)
        self.assertTrue(np.all(inters[1:4] >= inters[:3]))

Пример #14

0

Показать файл

Файл: test_local_search_quantizer.py Проект: ifeherva/faiss

    def test_coarse_quantizer(self):
        ds = datasets.SyntheticDataset(32, 5000, 1000, 100)
        gt = ds.get_groundtruth(10)

        quantizer = faiss.LocalSearchCoarseQuantizer(ds.d, 2, 4)
        quantizer.lsq.nperts
        quantizer.lsq.nperts = 2

        index = faiss.IndexIVFFlat(quantizer, ds.d, 256)
        index.quantizer_trains_alone = True

        index.train(ds.get_train())

        index.add(ds.get_database())
        index.nprobe = 4

        Dref, Iref = index.search(ds.get_queries(), 10)

        inter_ref = faiss.eval_intersection(Iref, gt)

        # 249
        self.assertGreater(inter_ref, 235)

Пример #15

0

Показать файл

Файл: test_index_accuracy.py Проект: mlzxy/faiss

    def subtest(self, mt):
        d = 32
        xt, xb, xq = get_dataset_2(d, 1000, 2000, 200)
        nlist = 64

        gt_index = faiss.IndexFlat(d, mt)
        gt_index.add(xb)
        gt_D, gt_I = gt_index.search(xq, 10)
        quantizer = faiss.IndexFlat(d, mt)
        for qname in '8bit 4bit 8bit_uniform 4bit_uniform fp16'.split():
            qtype = getattr(faiss.ScalarQuantizer, 'QT_' + qname)
            index = faiss.IndexIVFScalarQuantizer(quantizer, d, nlist, qtype,
                                                  mt)
            index.train(xt)
            index.add(xb)
            index.nprobe = 4  # hopefully more robust than 1
            D, I = index.search(xq, 10)
            ninter = faiss.eval_intersection(I, gt_I)
            print('(%d, %s): %d, ' % (mt, repr(qname), ninter))
            assert ninter >= self.ref_results[(mt, qname)] - 4

            D2, I2 = self.subtest_add2col(xb, xq, index, qname)

            assert np.all(I2 == I)

Пример #16

0

Показать файл

Файл: test_contrib.py Проект: ifeherva/faiss

    def test_binary(self):
        ds = datasets.SyntheticDataset(128, 2000, 2000, 200)

        d = ds.d
        xt = ds.get_train()
        xq = ds.get_queries()
        xb = ds.get_database()

        # define alternative quantizer on the 20 first dims of vectors (will be in float)
        km = faiss.Kmeans(20, 50)
        km.train(xt[:, :20].copy())
        alt_quantizer = km.index

        binarizer = faiss.index_factory(d, "ITQ,LSHt")
        binarizer.train(xt)

        xb_bin = binarizer.sa_encode(xb)
        xq_bin = binarizer.sa_encode(xq)

        index = faiss.index_binary_factory(d, "BIVF200")

        fake_centroids = np.zeros((index.nlist, index.d // 8), dtype="uint8")
        index.quantizer.add(fake_centroids)
        index.is_trained = True

        # add elements xb
        a = alt_quantizer.search(xb[:, :20].copy(), 1)[1].ravel()
        ivf_tools.add_preassigned(index, xb_bin, a)

        # recompute GT in binary
        k = 15
        ib = faiss.IndexBinaryFlat(128)
        ib.add(xb_bin)
        Dgt, Igt = ib.search(xq_bin, k)

        # search elements xq, increase nprobe, check 4 first results w/ groundtruth
        prev_inter_perf = 0
        for nprobe in 1, 10, 20:

            index.nprobe = nprobe
            a = alt_quantizer.search(xq[:, :20].copy(), index.nprobe)[1]
            D, I = ivf_tools.search_preassigned(index, xq_bin, k, a)
            inter_perf = faiss.eval_intersection(I, Igt)
            self.assertGreaterEqual(inter_perf, prev_inter_perf)
            prev_inter_perf = inter_perf

        # test range search

        index.nprobe = 20

        a = alt_quantizer.search(xq[:, :20].copy(), index.nprobe)[1]

        # just to find a reasonable radius
        D, I = ivf_tools.search_preassigned(index, xq_bin, 4, a)
        radius = int(D.max() + 1)

        lims, DR, IR = ivf_tools.range_search_preassigned(
            index, xq_bin, radius, a)

        # with that radius the k-NN results are a subset of the range search results
        for q in range(len(xq)):
            l0, l1 = lims[q], lims[q + 1]
            self.assertTrue(set(I[q]) <= set(IR[l0:l1]))

Пример #17

0

Показать файл

Файл: test_index_accuracy.py Проект: xiongziqi/faiss

    def subtest(self, mt):
        d = 32
        xt, xb, xq = get_dataset_2(d, 2000, 1000, 200)
        nlist = 64

        gt_index = faiss.IndexFlat(d, mt)
        gt_index.add(xb)
        gt_D, gt_I = gt_index.search(xq, 10)
        quantizer = faiss.IndexFlat(d, mt)
        for by_residual in True, False:

            index = faiss.IndexIVFPQ(
                quantizer, d, nlist, 4, 8)
            index.metric_type = mt
            index.by_residual = by_residual
            if by_residual:
                # perform cheap polysemous training
                index.do_polysemous_training = True
                pt = faiss.PolysemousTraining()
                pt.n_iter = 50000
                pt.n_redo = 1
                index.polysemous_training = pt

            index.train(xt)
            index.add(xb)
            index.nprobe = 4
            D, I = index.search(xq, 10)

            ninter = faiss.eval_intersection(I, gt_I)
            print('(%d, %s): %d, ' % (mt, by_residual, ninter))

            assert abs(ninter - self.ref_results[mt, by_residual]) <= 3

            index.use_precomputed_table = 0
            D2, I2 = index.search(xq, 10)
            assert np.all(I == I2)

            if by_residual:

                index.use_precomputed_table = 1
                index.polysemous_ht = 20
                D, I = index.search(xq, 10)
                ninter = faiss.eval_intersection(I, gt_I)
                print('(%d, %s, %d): %d, ' % (
                    mt, by_residual, index.polysemous_ht, ninter))

                # polysemous behaves bizarrely on ARM
                assert (ninter >= self.ref_results[
                    mt, by_residual, index.polysemous_ht] - 4)

            # also test range search

            if mt == faiss.METRIC_INNER_PRODUCT:
                radius = float(D[:, -1].max())
            else:
                radius = float(D[:, -1].min())
            print('radius', radius)

            lims, D3, I3 = index.range_search(xq, radius)
            ntot = ndiff = 0
            for i in range(len(xq)):
                l0, l1 = lims[i], lims[i + 1]
                Inew = set(I3[l0:l1])
                if mt == faiss.METRIC_INNER_PRODUCT:
                    mask = D2[i] > radius
                else:
                    mask = D2[i] < radius
                Iref = set(I2[i, mask])
                ndiff += len(Inew ^ Iref)
                ntot += len(Iref)
            print('ndiff %d / %d' % (ndiff, ntot))
            assert ndiff < ntot * 0.02

Python eval_intersection примеры использования