Exemplo n.º 1
0
    def set_bytes(self, b: bytes) -> None:
        """
        Set bytes to this data element in the form of a string.

        Not all implementations may support setting bytes (writing). See the
        ``writable`` method.

        :param b: bytes to set.
        :type b: bytes

        :raises ReadOnlyError: This data element can only be read from / does
            not support writing.

        """
        if not self.writable():
            raise ReadOnlyError('Unauthorized access to write to Girder file %s'
                                % self.file_id)

        try:
            # noinspection PyTypeChecker
            self.gc.uploadFileContents(self.file_id, BytesIO(b), len(b))
        except girder_client.HttpError as e:
            if e.status == 401:
                raise ReadOnlyError('Unauthorized access to write to Girder '
                                    'file %s' % self.file_id)
            else:
                raise e
Exemplo n.º 2
0
 def _remove_from_index(self, uids: Iterable[Hashable]) -> None:
     with self._model_lock:
         if self._read_only:
             raise ReadOnlyError("Cannot modify container attributes due "
                                 "to being in read-only mode.")
         self._descriptor_set.remove_many_descriptors(uids)
         self.build_index(self._descriptor_set)
Exemplo n.º 3
0
    def remove_many_descriptors(self, uuids: Iterable[Hashable]) -> None:
        """
        Remove descriptors associated to given descriptor UUIDs from this set.

        :param uuids: Iterable of descriptor UUIDs to remove.

        :raises KeyError: A given UUID doesn't associate with a
            DescriptorElement in this set.
        """
        if self.read_only:
            raise ReadOnlyError("Cannot remove from a read-only set.")

        q = self.DELETE_MANY_TMPL.format(
            table_name=self.table_name,
            uuid_col=self.uuid_col,
        )
        str_uuid_set = set(str(uid) for uid in uuids)
        v = {'uuid_tuple': tuple(str_uuid_set)}

        def execute(c: psycopg2.extensions.cursor) -> None:
            c.execute(q, v)

            # Check query UUIDs against rows that would actually be deleted.
            deleted_uuid_set = set(r[0] for r in c.fetchall())
            for uid in str_uuid_set:
                if uid not in deleted_uuid_set:
                    raise KeyError(uid)

        list(self.psql_helper.single_execute(execute))
Exemplo n.º 4
0
    def add_descriptor(self, descriptor: DescriptorElement) -> None:
        """
        Add a descriptor to this set.

        Adding the same descriptor multiple times should not add multiple copies
        of the descriptor in the set (based on UUID). Added descriptors
        overwrite set descriptors based on UUID.

        :param descriptor: Descriptor to set.
        """
        if self.read_only:
            raise ReadOnlyError("Cannot clear a read-only set.")

        q = self.UPSERT_TMPL.format(
            table_name=self.table_name,
            uuid_col=self.uuid_col,
            element_col=self.element_col,
        )
        v = {
            'uuid_val':
            str(descriptor.uuid()),
            'element_val':
            psycopg2.Binary(pickle.dumps(descriptor, self.pickle_protocol))
        }

        def exec_hook(cur: psycopg2.extensions.cursor) -> None:
            cur.execute(q, v)

        list(self.psql_helper.single_execute(exec_hook))
Exemplo n.º 5
0
    def remove_descriptor(self, uuid: Hashable) -> None:
        """
        Remove a descriptor from this set by the given UUID.

        :param uuid: UUID of the DescriptorElement to remove.

        :raises KeyError: The given UUID doesn't associate to a
            DescriptorElement in this set.
        """
        if self.read_only:
            raise ReadOnlyError("Cannot remove from a read-only set.")

        q = self.DELETE_LIKE_TMPL.format(
            table_name=self.table_name,
            uuid_col=self.uuid_col,
        )
        v = {'uuid_like': str(uuid)}

        def execute(c: psycopg2.extensions.cursor) -> None:
            c.execute(q, v)
            # Nothing deleted if rowcount == 0
            # (otherwise 1 when deleted a thing)
            if c.rowcount == 0:
                raise KeyError(uuid)

        list(self.psql_helper.single_execute(execute))
Exemplo n.º 6
0
 def _update_index(self, descriptors: Iterable[DescriptorElement]) -> None:
     with self._model_lock:
         if self._read_only:
             raise ReadOnlyError("Cannot modify container attributes due "
                                 "to being in read-only mode.")
         LOG.debug("Updating index by rebuilding with union. ")
         self.build_index(chain(self._descriptor_set, descriptors))
Exemplo n.º 7
0
    def matrix(self, m: numpy.ndarray) -> None:
        """
        :param numpy.ndarray m:
            New ndarray instance to set as the contained matrix.

        :raises ReadOnlyError: This data element can only be read from / does
            not support writing.
        """
        if not self.writable():
            raise ReadOnlyError("This %s element is read only." % self)
        self._matrix = numpy.asarray(m)
Exemplo n.º 8
0
    def _update_index(self, descriptors: Iterable[DescriptorElement]) -> None:
        """
        Internal method to be implemented by sub-classes to additively update
        the current index with the one or more descriptor elements given.

        If no index exists yet, a new one should be created using the given
        descriptors.

        :raises ReadOnlyError: This index is set to be read-only and cannot be
            modified.

        :param descriptors: Iterable of descriptor elements to add to this
            index.

        """
        with self._model_lock:
            if self.read_only:
                raise ReadOnlyError("Cannot modify container attributes due "
                                    "to being in read-only mode.")
            # tee out iterable for use in adding to index as well as hash code
            # generation.
            d_for_index, d_for_hashing = itertools.tee(descriptors, 2)

            LOG.debug("Updating descriptor index.")
            self.descriptor_set.add_many_descriptors(d_for_index)

            LOG.debug("Generating hash codes for new descriptors")
            prog_reporter = ProgressReporter(LOG.debug, 1.0).start()
            # for updating hash_index
            hash_vectors: Deque[numpy.ndarray] = collections.deque()
            # for updating kv-store after collecting new hash codes
            # NOTE: Mapping type apparently not yet covariant in the key type.
            kvstore_update: Dict[Hashable, Set[Hashable]] = {}
            for d in d_for_hashing:
                h_vec = self.lsh_functor.get_hash(d.vector())
                hash_vectors.append(h_vec)
                h_int = bit_vector_to_int_large(h_vec)
                # Get, update and reinsert hash UUID set object.
                if h_int not in kvstore_update:
                    #: :type: set
                    kvstore_update[h_int] = \
                        self.hash2uuids_kvstore.get(h_int, set())
                kvstore_update[h_int] |= {d.uuid()}
                prog_reporter.increment_report()
            prog_reporter.report()

            LOG.debug("Updating kv-store with new hash codes")
            self.hash2uuids_kvstore.add_many(kvstore_update)
            del kvstore_update

            if self.hash_index is not None:
                LOG.debug("Updating hash index structure.")
                self.hash_index.update_index(hash_vectors)
Exemplo n.º 9
0
    def clear(self) -> None:
        if self.read_only:
            raise ReadOnlyError("Cannot clear a read-only set.")

        q = self.DELETE_LIKE_TMPL.format(
            table_name=self.table_name,
            uuid_col=self.uuid_col,
        )

        def exec_hook(cur: psycopg2.extensions.cursor) -> None:
            cur.execute(q, {'uuid_like': '%'})

        list(self.psql_helper.single_execute(exec_hook))
Exemplo n.º 10
0
    def cache(self) -> None:
        """
        Cache the current table if a cache has been configured.
        """
        if self.cache_element:
            if self.cache_element.is_read_only():
                raise ReadOnlyError("Cache element (%s) is read-only." %
                                    self.cache_element)

            with self._element_map_lock:
                with SimpleTimer("Caching memory data-set table", LOG.debug):
                    self.cache_element.set_bytes(
                        pickle.dumps(self._element_map, self.pickle_protocol))
Exemplo n.º 11
0
    def set_bytes(self, b: bytes) -> None:
        """
        Set bytes to this data element in the form of a string.

        Not all implementations may support setting bytes (writing). See the
        ``writable`` method.

        :param b: bytes to set.
        :type b: bytes

        :raises ReadOnlyError: This data element can only be read from / does
            not support writing.

        """
        raise ReadOnlyError("HBase elements cannot write data.")
Exemplo n.º 12
0
    def _build_index(self, descriptors: Iterable[DescriptorElement]) -> None:
        """
        Internal method to be implemented by sub-classes to build the index
        with the given descriptor data elements.

        Subsequent calls to this method should rebuild the current index.  This
        method shall not add to the existing index nor raise an exception to as
        to protect the current index.

        :raises ReadOnlyError: This index is set to be read-only and cannot be
            modified.

        :param descriptors: Iterable of descriptor elements to build index
            over.

        """
        with self._model_lock:
            if self.read_only:
                raise ReadOnlyError("Cannot modify container attributes due "
                                    "to being in read-only mode.")

            LOG.debug("Clearing and adding new descriptor elements")
            self.descriptor_set.clear()
            self.descriptor_set.add_many_descriptors(descriptors)

            LOG.debug("Generating hash codes")
            hash_vectors: Deque[numpy.ndarray] = collections.deque()
            self.hash2uuids_kvstore.clear()
            prog_reporter = ProgressReporter(LOG.debug, 1.0).start()
            # We just cleared the previous store, so aggregate new kv-mapping
            # in ``kvstore_update`` for single update after loop.
            # NOTE: Mapping type apparently not yet covariant in the key type.
            kvstore_update: Dict[Hashable, Set[Hashable]] = collections.defaultdict(set)
            for d in self.descriptor_set:
                h_vec = self.lsh_functor.get_hash(d.vector())
                hash_vectors.append(h_vec)
                h_int = bit_vector_to_int_large(h_vec)
                kvstore_update[h_int] |= {d.uuid()}
                prog_reporter.increment_report()
            prog_reporter.report()
            self.hash2uuids_kvstore.add_many(kvstore_update)
            del kvstore_update

            if self.hash_index is not None:
                LOG.debug(f"Clearing and building hash index of type {type(self.hash_index)}")
                # a build is supposed to clear previous state.
                self.hash_index.build_index(hash_vectors)
Exemplo n.º 13
0
    def set_bytes(self, b: bytes) -> None:
        """
        Set bytes to this data element in the form of a string.

        Previous content type value is maintained.

        :param b: bytes to set.
        :type b: bytes

        :raises ReadOnlyError: This data element can only be read from / does
            not support writing.

        """
        if not self._readonly:
            self._assert_is_bytes(b)
            self._bytes = b
        else:
            raise ReadOnlyError("This memory element cannot be written to.")
Exemplo n.º 14
0
    def add_many(self, d: Mapping[Hashable, Any]) -> "KeyValueStore":
        """
        Add multiple key-value pairs at a time into this store as represented
        in the provided dictionary `d`.

        :param d: Dictionary of key-value pairs to add to this store.
        :type d: dict[Hashable, object]

        :raises ReadOnlyError: If this instance is marked as read-only.

        :return: Self.
        :rtype: KeyValueStore

        """
        # Input keys must already be hashable because they're in a dictionary.
        if self.is_read_only():
            raise ReadOnlyError("Cannot add to read-only instance %s." % self)
        return self
Exemplo n.º 15
0
    def clear(self) -> "KeyValueStore":
        """
        Clear this key-value store.

        *NOTE:* **Implementing sub-classes should call this super-method. This
        super method should not be considered a critical section for thread
        safety.**

        :raises ReadOnlyError: If this instance is marked as read-only.

        :return: Self.
        :rtype: KeyValueStore

        """
        if self.is_read_only():
            raise ReadOnlyError("Cannot clear a read-only %s instance." %
                                self.__class__.__name__)
        return self
Exemplo n.º 16
0
    def set_bytes(self, b: bytes) -> None:
        """
        Set bytes to this data element in the form of a string.

        Not all implementations may support setting bytes (writing). See the
        ``writable`` method.

        :param b: bytes to set.
        :type b: bytes

        :raises ReadOnlyError: This data element can only be read from / does
            not support writing.

        """
        if not self._readonly:
            safe_file_write(self._filepath, b)
        else:
            raise ReadOnlyError("This file element is read only.")
Exemplo n.º 17
0
    def remove(self, key: Hashable) -> "KeyValueStore":
        """
        Remove a single key-value entry.

        :param key: Key to remove.
        :type key: Hashable

        :raises ReadOnlyError: If this instance is marked as read-only.
        :raises KeyError: The given key is not present in this store and no
            default value given.

        :return: Self.
        :rtype: KeyValueStore

        """
        if self.is_read_only():
            raise ReadOnlyError("Cannot remove from read-only instance %s." %
                                self)
        return self
Exemplo n.º 18
0
    def set_bytes(self, b: bytes) -> None:
        """
        Set bytes to this data element.

        Not all implementations may support setting bytes (check ``writable``
        method return).

        This base abstract method should be called by sub-class implementations
        first. We check for mutability based on ``writable()`` method return.

        :param b: bytes to set.
        :type b: bytes

        :raises ReadOnlyError: This data element can only be read from / does
            not support writing.

        """
        if not self.writable():
            raise ReadOnlyError("This %s element is read only." % self)
Exemplo n.º 19
0
    def _build_index(self, descriptors: Iterable[DescriptorElement]) -> None:
        with self._model_lock:
            if self._read_only:
                raise ReadOnlyError(
                    "Cannot modify container attributes due to "
                    "being in read-only mode.")

            LOG.info("Building new MRPT index")

            LOG.debug("Clearing and adding new descriptor elements")
            # NOTE: It may be the case for some DescriptorSet implementations,
            # this clear may interfere with iteration when part of the input
            # iterator of descriptors was this index's previous descriptor-set,
            # as is the case with ``update_index``.
            self._descriptor_set.clear()
            self._descriptor_set.add_many_descriptors(descriptors)

            LOG.debug('Building MRPT index')
            self._build_multiple_trees()

            self._save_mrpt_model()
Exemplo n.º 20
0
    def remove_many(self, keys: Iterable[Hashable]) -> "KeyValueStore":
        """
        Remove multiple keys and associated values.

        :param keys: Iterable of keys to remove.  If this is empty this method
            does nothing.
        :type keys: collections.abc.Iterable[Hashable]

        :raises ReadOnlyError: If this instance is marked as read-only.
        :raises KeyError: The given key is not present in this store and no
            default value given.  The store is not modified if any key is
            invalid.

        :return: Self.
        :rtype: KeyValueStore

        """
        if self.is_read_only():
            raise ReadOnlyError("Cannot remove from read-only instance %s." %
                                self)
        return self
Exemplo n.º 21
0
    def add_many_descriptors(self,
                             descriptors: Iterable[DescriptorElement]) -> None:
        """
        Add multiple descriptors at one time.

        Adding the same descriptor multiple times should not add multiple copies
        of the descriptor in the set (based on UUID). Added descriptors
        overwrite set descriptors based on UUID.

        :param descriptors: Iterable of descriptor instances to add to this
            set.
        """
        if self.read_only:
            raise ReadOnlyError("Cannot clear a read-only set.")

        q = self.UPSERT_TMPL.format(
            table_name=self.table_name,
            uuid_col=self.uuid_col,
            element_col=self.element_col,
        )

        # Transform input into
        def elements() -> Generator[Dict[str, Any], None, None]:
            for d in descriptors:
                yield {
                    'uuid_val':
                    str(d.uuid()),
                    'element_val':
                    psycopg2.Binary(pickle.dumps(d, self.pickle_protocol))
                }

        def exec_hook(cur: psycopg2.extensions.cursor,
                      batch: Sequence[Dict[str, Any]]) -> None:
            cur.executemany(q, batch)

        LOG.debug("Adding many descriptors")
        list(
            self.psql_helper.batch_execute(elements(), exec_hook,
                                           self.multiquery_batch_size))
Exemplo n.º 22
0
    def add(self, key: Hashable, value: Any) -> "KeyValueStore":
        """
        Add a key-value pair to this store.

        *NOTE:* **Implementing sub-classes should call this super-method. This
        super method should not be considered a critical section for thread
        safety unless ``is_read_only`` is not thread-safe.**

        :param key: Key for the value. Must be hashable.
        :type key: Hashable

        :param value: Python object to store.
        :type value: object

        :raises ReadOnlyError: If this instance is marked as read-only.

        :return: Self.
        :rtype: KeyValueStore

        """
        if self.is_read_only():
            raise ReadOnlyError("Cannot add to read-only instance %s." % self)
        return self
Exemplo n.º 23
0
    def _remove_from_index(self, uids: Iterable[Hashable]) -> None:
        """
        Internal method to be implemented by sub-classes to partially remove
        descriptors from this index associated with the given UIDs.

        :param uids: Iterable of UIDs of descriptors to remove from this index.

        :raises KeyError: One or more UIDs provided do not match any stored
            descriptors.

        """
        if self.read_only:
            raise ReadOnlyError("Cannot modify read-only index.")

        with self._model_lock:
            if self._faiss_index is None:
                # No index built, so anything is a key error.
                # We can also only be here if hashes was non-zero in size.
                raise KeyError(next(iter(uids)))

            # Check that provided IDs are present in uid2idx mapping.
            uids_d = []
            for uid in uids:
                if uid not in self._uid2idx_kvs:
                    raise KeyError(uid)
                uids_d.append(uid)

            # Remove elements from structures
            # - faiss remove_ids requires a np.ndarray of int64 type.
            rm_idxs = np.asarray([self._uid2idx_kvs[uid] for uid in uids_d],
                                 dtype=np.int64)
            self._faiss_index.remove_ids(rm_idxs)
            self._descriptor_set.remove_many_descriptors(uids_d)
            self._uid2idx_kvs.remove_many(uids_d)
            self._idx2uid_kvs.remove_many(rm_idxs)
            self._save_faiss_model()
Exemplo n.º 24
0
    def _remove_from_index(self, uids: Iterable[Hashable]) -> None:
        """
        Remove descriptors from this index associated with the given UIDs.

        :param uids: Iterable of UIDs of descriptors to remove from this index.

        :raises KeyError: One or more UIDs provided do not match any stored
            descriptors.  The index should not be modified.
        :raises ReadOnlyError: This index is set to be read-only and cannot be
            modified.

        """
        with self._model_lock:
            if self.read_only:
                raise ReadOnlyError("Cannot modify container attributes due "
                                    "to being in read-only mode.")

            uids = list(uids)

            # Remove UIDs from our hash2uid-kvs
            # - get the hash for each input UID's descriptor, remove UID from
            #   recorded association set.
            # - `get_many_descriptors` fails when bad UIDs are provided
            #   (KeyError).
            LOG.debug("Removing hash2uid entries for UID's descriptors")
            h_vectors: Deque[numpy.ndarray] = collections.deque()
            h_ints: Deque[int] = collections.deque()
            for d in self.descriptor_set.get_many_descriptors(uids):
                h_vec = self.lsh_functor.get_hash(d.vector())
                h_vectors.append(h_vec)
                h_int = bit_vector_to_int_large(h_vec)
                h_ints.append(h_int)

            # If we're here, then all given UIDs mapped to an indexed
            # descriptor.  Proceed with removal from hash2uids kvs.  If a hash
            # no longer maps anything, remove that key from the KVS.
            hashes_for_removal: Deque[numpy.ndarray] = collections.deque()
            # store key-value pairs to update after loop in batch call
            # NOTE: Mapping type apparently not yet covariant in the key type.
            kvs_update: Dict[Hashable, Set[Hashable]] = {}
            # store keys to remove after loop in batch-call
            kvs_remove = set()
            for uid, h_int, h_vec in zip(uids, h_ints, h_vectors):
                if h_int not in kvs_update:
                    # First time seeing key, cache current value
                    kvs_update[h_int] = \
                        self.hash2uuids_kvstore.get(h_int, set())
                kvs_update[h_int] -= {uid}
                # If the resolves UID set is empty, flag the key for removal.
                if not kvs_update[h_int]:
                    del kvs_update[h_int]
                    kvs_remove.add(h_int)
                    hashes_for_removal.append(h_vec)
            LOG.debug("Updating hash2uuids: modified relations")
            self.hash2uuids_kvstore.add_many(kvs_update)
            LOG.debug("Updating hash2uuids: removing empty hash keys")
            self.hash2uuids_kvstore.remove_many(kvs_remove)
            del kvs_update, kvs_remove

            # call remove-from-index on hash-index if we have one and there are
            # hashes to be removed.
            if self.hash_index and hashes_for_removal:
                self.hash_index.remove_from_index(hashes_for_removal)

            # Remove descriptors from our set matching the given UIDs.
            self.descriptor_set.remove_many_descriptors(uids)
Exemplo n.º 25
0
    def _update_index(self, descriptors: Iterable[DescriptorElement]) -> None:
        """
        Internal method to be implemented by sub-classes to additively update
        the current index with the one or more descriptor elements given.

        If no index exists yet, a new one should be created using the given
        descriptors.

        If any descriptors have already been added, they will be not be
        re-inserted, but a warning will be raised.

        :param descriptors: Iterable of descriptor elements to add to this
            index.

        """
        if self.read_only:
            raise ReadOnlyError("Cannot modify read-only index.")

        if self._faiss_index is None:
            self._build_index(descriptors)
            return

        LOG.debug('Updating FAISS index')

        with self._model_lock:
            # Remove any uids which have already been indexed. This gracefully
            # handles the unusual case that the underlying FAISS index and the
            # SMQTK descriptor set have fallen out of sync due to an unexpected
            # external failure.
            desc_list = []
            for descriptor_ in descriptors:
                if descriptor_.uuid() in self._uid2idx_kvs:
                    warnings.warn(
                        "Descriptor with UID {} already present in this"
                        " index".format(descriptor_.uuid()))
                else:
                    desc_list.append(descriptor_)
            if not desc_list:
                LOG.info("No new descriptors provided not already present in "
                         "this index. No update necessary.")
                return
            data, new_uuids = self._descriptors_to_matrix(desc_list)

            n, d = data.shape

            old_ntotal = self.count()

            next_next_index = self._next_index + n
            new_ids = np.arange(self._next_index, next_next_index)
            self._next_index = next_next_index

            assert self._faiss_index.d == d, \
                "FAISS index dimension doesn't match data dimension"
            # noinspection PyArgumentList
            self._faiss_index.add_with_ids(data, new_ids)
            assert self._faiss_index.ntotal == old_ntotal + n, \
                "New FAISS index size doesn't match old + data size"
            LOG.info(f"FAISS index has been updated with {n} new vectors")

            LOG.debug("Adding new descriptor elements")
            self._descriptor_set.add_many_descriptors(desc_list)
            assert len(self._descriptor_set) == old_ntotal + n, \
                "New descriptor set size doesn't match old + data size"

            # Convert numpy.int64 type values into python integer values.
            # This is for compatibility with storing in some KVS impls.
            new_ids = new_ids.astype(object)

            self._uid2idx_kvs.add_many(dict(zip(new_uuids, new_ids)))
            assert len(self._uid2idx_kvs) == old_ntotal + n, \
                "New uid2idx kvs size doesn't match old + new data size."

            self._idx2uid_kvs.add_many(dict(zip(new_ids, new_uuids)))
            assert len(self._idx2uid_kvs) == old_ntotal + n, \
                "New idx2uid kvs size doesn't match old + new data size."

            self._save_faiss_model()
Exemplo n.º 26
0
    def _build_index(self, descriptors: Iterable[DescriptorElement]) -> None:
        """
        Internal method to be implemented by sub-classes to build the index
        with the given descriptor data elements.

        Subsequent calls to this method should rebuild the current index.
        This method shall not add to the existing index nor raise an exception
        to as to protect the current index.

        :param descriptors: Iterable of descriptor elements to build index
            over.

        """
        if self.read_only:
            raise ReadOnlyError("Cannot modify read-only index.")

        LOG.info("Building new FAISS index")

        # We need to fork the iterator, so stick the elements in a list
        desc_list = list(descriptors)
        data, new_uuids = self._descriptors_to_matrix(desc_list)
        n, d = data.shape
        idx_ids = np.arange(n)  # restart IDs from 0.

        # Build a faiss index but don't internalize it until we have a lock.

        faiss_index = self._index_factory_wrapper(d, self.factory_string,
                                                  self._metric_type_const)
        LOG.info("Training FAISS index")
        # noinspection PyArgumentList
        faiss_index.train(data)
        # TODO(john.moeller): This will raise an exception on flat indexes.
        # There's a solution which involves wrapping the index in an
        # IndexIDMap, but it doesn't work because of a bug in FAISS. So for
        # now we don't support flat indexes.
        LOG.info("Adding data to index")
        # noinspection PyArgumentList
        faiss_index.add_with_ids(data, idx_ids)

        assert faiss_index.d == d, \
            "FAISS index dimension doesn't match data dimension"
        assert faiss_index.ntotal == n, \
            "FAISS index size doesn't match data size"

        with self._model_lock:
            self._faiss_index = faiss_index
            LOG.info(f"FAISS index has been constructed with {n} vectors")

            LOG.debug("Clearing and adding new descriptor elements")
            self._descriptor_set.clear()
            self._descriptor_set.add_many_descriptors(desc_list)
            assert len(self._descriptor_set) == n, \
                "New descriptor set size doesn't match data size"
            # Convert numpy.int64 type values into python integer values.
            # This is for compatibility with storing in some KVS impls.
            idx_ids = idx_ids.astype(object)

            self._uid2idx_kvs.clear()
            self._uid2idx_kvs.add_many(dict(zip(new_uuids, idx_ids)))
            assert len(self._uid2idx_kvs) == n, \
                "New uid2idx map size doesn't match data size."

            self._idx2uid_kvs.clear()
            self._idx2uid_kvs.add_many(dict(zip(idx_ids, new_uuids)))
            assert len(self._idx2uid_kvs) == n, \
                "New idx2uid map size doesn't match data size."

            self._next_index = n

            self._save_faiss_model()