Пример #1
0
    def do_read_callback(self, bsz):
        d, n = 32, 1000
        x = np.random.uniform(size=(n, d)).astype('float32')
        index = faiss.IndexFlatL2(d)
        index.add(x)

        fd, fname = tempfile.mkstemp()
        os.close(fd)
        try:
            faiss.write_index(index, fname)

            with open(fname, 'rb') as f:
                reader = faiss.PyCallbackIOReader(f.read, 1234)

                if bsz > 0:
                    reader = faiss.BufferedIOReader(reader, bsz)

                index2 = faiss.read_index(reader)

            self.assertEqual(index.d, index2.d)
            np.testing.assert_array_equal(faiss.vector_to_array(index.xb),
                                          faiss.vector_to_array(index2.xb))

            # This is not a callable function: should raise an exception
            reader = faiss.PyCallbackIOReader("blabla")
            self.assertRaises(Exception, faiss.read_index, reader)
        finally:
            if os.path.exists(fname):
                os.unlink(fname)
Пример #2
0
def load_faiss_index(path_to_faiss="models/lex_similar_sentences.index"):
    """Load and deserialize the Faiss index."""

    data = urllib.request.urlopen(
        "https://podcast-search-scify.s3.amazonaws.com/lex_similar_sentences_distil.index"
    )
    reader = faiss.PyCallbackIOReader(data.read)
    index = faiss.read_index(reader)

    return index
Пример #3
0
 def load_faiss_index(self):
     from .url_utils import use_s3
     item_index_input_dir = use_s3('%sfaiss/item_index/' %
                                   self.model_in_path)
     self.faiss_index = faiss.IndexShards(self.item_embedding_size, True,
                                          False)
     partition_count = self.get_partition_count()
     for rank in range(partition_count):
         item_index_input_path = '%spart_%d_%d.dat' % (
             item_index_input_dir, partition_count, rank)
         item_index_stream = _mindalpha.InputStream(item_index_input_path)
         item_index_reader = faiss.PyCallbackIOReader(
             item_index_stream.read)
         index = faiss.read_index(item_index_reader)
         self.faiss_index.add_shard(index)
     print('faiss index ntotal: %d' % self.faiss_index.ntotal)
Пример #4
0
    def test_buf_read(self):
        x = np.random.uniform(size=20)

        _, fname = tempfile.mkstemp()
        try:
            x.tofile(fname)

            f = open(fname, 'rb')
            reader = faiss.PyCallbackIOReader(f.read, 1234)

            bsz = 123
            reader = faiss.BufferedIOReader(reader, bsz)

            y = np.zeros_like(x)
            print('nbytes=', y.nbytes)
            reader(faiss.swig_ptr(y), y.nbytes, 1)

            np.testing.assert_array_equal(x, y)
        finally:
            if os.path.exists(fname):
                os.unlink(fname)
Пример #5
0
 def index_from_pipe():
     reader = faiss.PyCallbackIOReader(lambda size: os.read(rf, size))
     return faiss.read_index(reader)