Exemplo n.º 1
0
    def remove_many_descriptors(self, uuids):
        """
        Remove descriptors associated to given descriptor UUIDs from this index.

        :param uuids: Iterable of descriptor UUIDs to remove.
        :type uuids: collections.Iterable[collections.Hashable]

        :raises KeyError: A given UUID doesn't associate with a
            DescriptorElement in this index.

        """
        if self.read_only:
            raise ReadOnlyError("Cannot clear a read-only index.")

        q = self.DELETE_MANY_TMPL.format(
            table_name=self.table_name,
            uuid_col=self.uuid_col,
        )
        str_uuid_set = set(str(uid) for uid in uuids)
        v = {'uuid_tuple': tuple(str_uuid_set)}

        def execute(c):
            c.execute(q, v)

            # Check query UUIDs against rows that would actually be deleted.
            deleted_uuid_set = set(r[0] for r in c.fetchall())
            for uid in str_uuid_set:
                if uid not in deleted_uuid_set:
                    raise KeyError(uid)

        list(self._single_execute(execute))
Exemplo n.º 2
0
    def build_index(self, descriptors):
        """
        Build the index over the descriptor data elements.

        Subsequent calls to this method should rebuild the index, not add to
        it, or raise an exception to as to protect the current index.

        :raises ValueError: No data available in the given iterable.

        :param descriptors: Iterable of descriptor elements to build index
            over.
        :type descriptors:
            collections.Iterable[smqtk.representation.DescriptorElement]

        """
        if self._read_only:
            raise ReadOnlyError("Cannot modify container attributes due to "
                                "being in read-only mode.")

        super(MRPTNearestNeighborsIndex, self).build_index(descriptors)

        self._log.info("Building new MRPT index")

        self._log.debug("Clearing and adding new descriptor elements")
        self._descriptor_set.clear()
        self._descriptor_set.add_many_descriptors(descriptors)

        self._log.debug('Building MRPT index')
        self._build_multiple_trees()

        self._save_mrpt_model()
Exemplo n.º 3
0
    def remove_descriptor(self, uuid):
        """
        Remove a descriptor from this index by the given UUID.

        :param uuid: UUID of the DescriptorElement to remove.
        :type uuid: collections.Hashable

        :raises KeyError: The given UUID doesn't associate to a
            DescriptorElement in this index.

        """
        if self.read_only:
            raise ReadOnlyError("Cannot clear a read-only index.")

        q = self.DELETE_LIKE_TMPL.format(
            table_name=self.table_name,
            uuid_col=self.uuid_col,
        )
        v = {'uuid_like': str(uuid)}

        def execute(c):
            c.execute(q, v)
            # Nothing deleted if rowcount == 0
            # (otherwise 1 when deleted a thing)
            if c.rowcount == 0:
                raise KeyError(uuid)

        list(self._single_execute(execute))
Exemplo n.º 4
0
    def add_descriptor(self, descriptor):
        """
        Add a descriptor to this index.

        Adding the same descriptor multiple times should not add multiple copies
        of the descriptor in the index (based on UUID). Added descriptors
        overwrite indexed descriptors based on UUID.

        :param descriptor: Descriptor to index.
        :type descriptor: smqtk.representation.DescriptorElement

        """
        if self.read_only:
            raise ReadOnlyError("Cannot clear a read-only index.")

        q = self.UPSERT_TMPL.format(
            table_name=self.table_name,
            uuid_col=self.uuid_col,
            element_col=self.element_col,
        )
        v = {
            'uuid_val': str(descriptor.uuid()),
            'element_val': psycopg2.Binary(
                cPickle.dumps(descriptor, self.pickle_protocol)
            )
        }

        def exec_hook(cur):
            cur.execute(q, v)

        list(self._single_execute(exec_hook))
Exemplo n.º 5
0
    def clear(self):
        """
        Clear this descriptor index's entries.
        """
        if self.read_only:
            raise ReadOnlyError("Cannot clear a read-only index.")

        q = self.DELETE_LIKE_TMPL.format(
            table_name=self.table_name,
            uuid_col=self.uuid_col,
        )

        def exec_hook(cur):
            cur.execute(q, {'uuid_like': '%'})

        list(self._single_execute(exec_hook))
Exemplo n.º 6
0
    def add_many_descriptors(self, descriptors):
        """
        Add multiple descriptors at one time.

        Adding the same descriptor multiple times should not add multiple copies
        of the descriptor in the index (based on UUID). Added descriptors
        overwrite indexed descriptors based on UUID.

        :param descriptors: Iterable of descriptor instances to add to this
            index.
        :type descriptors:
            collections.Iterable[smqtk.representation.DescriptorElement]

        """
        if self.read_only:
            raise ReadOnlyError("Cannot clear a read-only index.")

        q = self.UPSERT_TMPL.format(
            table_name=self.table_name,
            uuid_col=self.uuid_col,
            element_col=self.element_col,
        )

        # Transform input into
        def iter_elements():
            for d in descriptors:
                yield {
                    'uuid_val': str(d.uuid()),
                    'element_val': psycopg2.Binary(
                        cPickle.dumps(d, self.pickle_protocol)
                    )
                }

        def exec_hook(cur, batch):
            cur.executemany(q, batch)

        self._log.debug("Adding many descriptors")
        list(self._batch_execute(iter_elements(), exec_hook))
Exemplo n.º 7
0
    def build_index(self, descriptors):
        """
        Build the index over the descriptor data elements. This in turn builds
        the configured hash index if one is set.

        Subsequent calls to this method should rebuild the index, not add to
        it, or raise an exception to as to protect the current index.

        :raises ValueError: No data available in the given iterable.

        :param descriptors: Iterable of descriptor elements to build index
            over.
        :type descriptors:
            collections.Iterable[smqtk.representation.DescriptorElement]

        """
        if self.read_only:
            raise ReadOnlyError("Cannot modify container attributes due to "
                                "being in read-only mode.")

        self._log.debug("Clearing and adding new descriptor elements")
        self.descriptor_index.clear()
        self.descriptor_index.add_many_descriptors(descriptors)

        self._log.debug("Generating hash codes")
        new_hash2uuid = \
            self.build_from_descriptor_index(self.descriptor_index,
                                             self.hash_index,
                                             self.lsh_functor)

        with self._hash2uuid_lock:
            self._hash2uuid = new_hash2uuid

            if self.hash2uuid_cache_filepath:
                self._log.debug("Writing out hash2uuid map: %s",
                                self.hash2uuid_cache_filepath)
                with open(self.hash2uuid_cache_filepath, 'w') as f:
                    cPickle.dump(self._hash2uuid, f, -1)