示例#1
0
def is_valid_element(data_element: DataElement,
                     valid_content_types: Optional[Iterable] = None,
                     check_image: bool = False) -> bool:
    """
    Determines if a given data element is valid.

    :param data_element: Data element
    :type data_element: DataElement

    :param valid_content_types: List of valid content types, or None to skip
        content type checking.
    :type valid_content_types: iterable | None

    :param check_image: Whether or not to try loading the image with PIL. This
        often catches issues that content type can't, such as corrupt images.
    :type check_image: bool

    :return: Whether or not the data element is valid
    :rtype: bool

    """
    log = logging.getLogger(__name__)

    if (valid_content_types is not None
            and data_element.content_type() not in valid_content_types):
        log.debug(
            "Skipping file (invalid content) type for "
            "descriptor generator (data_element='%s', ct=%s)", data_element,
            data_element.content_type())
        return False

    if check_image and not is_loadable_image(data_element):
        return False

    return isinstance(data_element, DataElement)
    def raise_valid_element(self,
                            data_element: DataElement,
                            exception_type: type = ValueError,
                            message: Optional[str] = None) -> DataElement:
        """
        Check if the given data element matches a reported valid content type,
        raising the given exception class (``ValueError`` by default) if not.

        :param smqtk.representation.DataElement data_element:
             Data element instance to check.
        :param StandardError exception_type:
            Custom exception type to raise if the given element does not report
            as a valid content type. By default we raise a ``ValueError``.
        :param str message:
            Specific message to provide with a raise exception. By default
            we compose a generic message that also reports the given
            element's content type.

        :return: The unmodified input data element.
        :rtype: smqtk.representation.DataElement
        """
        if not self.is_valid_element(data_element):
            if message is None:
                message = "Data element does not match a content type " \
                          "reported as valid. Given: \"{}\". Valid types: {}." \
                          .format(data_element.content_type(),
                                  list(self.valid_content_types()))
            # noinspection PyCallingNonCallable
            # - Leave the handling of whether or not an exception is
            # constructable to the exception class being constructed (user
            # decision repercussion).
            raise exception_type(message)
        return data_element
示例#3
0
def load_dataset_tempfile(data_element: DataElement) -> "gdal.Dataset":
    """
    Load GDAL Dataset from element by first writing it to a temporary file.

    :param smqtk.representation.DataElement data_element:
        Element to load dataset from.

    :return: GDAL Dataset
    :rtype: gdal.Dataset

    """
    fp = data_element.write_temp()
    try:
        yield gdal.Open(fp)
    finally:
        data_element.clean_temp()
示例#4
0
    def from_config(cls: Type[T],
                    config_dict: Dict,
                    merge_default: bool = True) -> T:
        """
        Instantiate a new instance of this class given the configuration
        JSON-compliant dictionary encapsulating initialization arguments.

        :param config_dict: JSON compliant dictionary encapsulating
            a configuration.
        :type config_dict: dict

        :param merge_default: Merge the given configuration on top of the
            default provided by ``get_default_config``.
        :type merge_default: bool

        :return: Constructed instance from the provided config.
        :rtype: MemoryKeyValueStore

        """
        # Copy top-level of config in order to not modify input instance.
        c = config_dict.copy()
        # Simplify specification for "no cache element"
        if 'cache_element' not in c or \
                c['cache_element'] is None or \
                c['cache_element']['type'] is None:
            c['cache_element'] = None
        else:
            # Create from nested config.
            c['cache_element'] = \
                from_config_dict(config_dict['cache_element'],
                                 DataElement.get_impls())
        return super(MemoryKeyValueStore, cls).from_config(c)
示例#5
0
    def from_config(cls: Type[T_IF],
                    config_dict: Dict,
                    merge_default: bool = True) -> T_IF:
        """
        Instantiate a new instance of this class given the JSON-compliant
        configuration dictionary encapsulating initialization arguments.

        :param config_dict: JSON compliant dictionary encapsulating
            a configuration.
        :param merge_default: Merge the given configuration on top of the
            default provided by ``get_default_config``.

        :return: Constructed instance from the provided config.

        """
        if merge_default:
            config_dict = merge_dict(cls.get_default_config(), config_dict)

        data_element_impls = DataElement.get_impls()
        # Mean vector cache element.
        mean_vec_cache = None
        if config_dict['mean_vec_cache'] and \
                config_dict['mean_vec_cache']['type']:
            mean_vec_cache = from_config_dict(config_dict['mean_vec_cache'],
                                              data_element_impls)
        config_dict['mean_vec_cache'] = mean_vec_cache
        # Rotation matrix cache element.
        rotation_cache = None
        if config_dict['rotation_cache'] and \
                config_dict['rotation_cache']['type']:
            rotation_cache = from_config_dict(config_dict['rotation_cache'],
                                              data_element_impls)
        config_dict['rotation_cache'] = rotation_cache

        return super(ItqFunctor, cls).from_config(config_dict, False)
示例#6
0
    def get_default_config(cls) -> Dict[str, Any]:
        """
        Generate and return a default configuration dictionary for this class.
        This will be primarily used for generating what the configuration
        dictionary would look like for this class without instantiating it.

        By default, we observe what this class's constructor takes as
        arguments, turning those argument names into configuration dictionary
        keys. If any of those arguments have defaults, we will add those
        values into the configuration dictionary appropriately. The dictionary
        returned should only contain JSON compliant value types.

        It is not be guaranteed that the configuration dictionary returned
        from this method is valid for construction of an instance of this
        class.

        :return: Default configuration dictionary for the class.

        """
        default = super(FaissNearestNeighborsIndex, cls).get_default_config()

        data_element_default_config = \
            make_default_config(DataElement.get_impls())
        default['index_element'] = data_element_default_config
        default['index_param_element'] = deepcopy(data_element_default_config)

        di_default = make_default_config(DescriptorSet.get_impls())
        default['descriptor_set'] = di_default

        kvs_default = make_default_config(KeyValueStore.get_impls())
        default['idx2uid_kvs'] = kvs_default
        default['uid2idx_kvs'] = deepcopy(kvs_default)

        return default
示例#7
0
    def from_config(
        cls: Type[MDS],
        config_dict: Dict,
        merge_default: bool = True
    ) -> MDS:
        """
        Instantiate a new instance of this class given the configuration
        JSON-compliant dictionary encapsulating initialization arguments.

        :param config_dict: JSON compliant dictionary encapsulating
            a configuration.

        :param merge_default: Merge the given configuration on top of the
            default provided by ``get_default_config``.

        :return: Constructed instance from the provided config.

        """
        if merge_default:
            config_dict = merge_dict(cls.get_default_config(), config_dict)

        # Optionally construct cache element from sub-config.
        if config_dict['cache_element'] \
                and config_dict['cache_element']['type']:
            e = from_config_dict(config_dict['cache_element'],
                                 DataElement.get_impls())
            config_dict['cache_element'] = e
        else:
            config_dict['cache_element'] = None

        return super(MemoryDescriptorSet, cls).from_config(config_dict, False)
示例#8
0
def load_dataset_vsimem(data_element: DataElement) -> "gdal.Dataset":
    """
    Load GDAL dataset from element by writing its bytes to a virtual file
    and loading a dataset from that virtual file.

    Requires GDAL major version 2 or greater.

    :param smqtk.representation.DataElement data_element:
        Element to load dataset from.

    :return: GDAL Dataset
    :rtype: gdal.Dataset

    """
    # Unguarded next() call is OK in this case because the generator returned
    # by ``_get_candidate_names()`` does not terminate.
    # noinspection PyProtectedMember
    get_candidate_names = tempfile._get_candidate_names  # type: ignore
    tmp_vsimem_path = '/vsimem/{}'.format(
        next(get_candidate_names())  # lgtm[py/unguarded-next-in-generator]
    )
    gdal.FileFromMemBuffer(tmp_vsimem_path, data_element.get_bytes())
    try:
        yield gdal.Open(tmp_vsimem_path)
    finally:
        rc = gdal.Unlink(tmp_vsimem_path)
        if rc != 0:
            raise RuntimeError("Failed to gdal.Unlink virtual file '{}' "
                               "containing bytes from {}.".format(
                                   tmp_vsimem_path, data_element))
示例#9
0
    def from_config(cls: Type[T],
                    config_dict: Dict,
                    merge_default: bool = True) -> T:
        if merge_default:
            config_dict = merge_dict(cls.get_default_config(), config_dict)

        data_elem_impl_set = DataElement.get_impls()

        # Translate prototext and model sub-configs into DataElement instances.
        config_dict['network_prototxt'] = \
            from_config_dict(config_dict['network_prototxt'],
                             data_elem_impl_set)
        config_dict['network_model'] = \
            from_config_dict(config_dict['network_model'],
                             data_elem_impl_set)

        # Translate optionally provided image mean sub-config into a
        # DataElement instance. May have been provided as ``None`` or a
        # configuration dictionary with type ``None`.
        # None, dict[type=None], dict[type=str]
        if config_dict['image_mean'] is None \
                or config_dict['image_mean'].get('type', None) is None:
            config_dict['image_mean'] = None
        else:
            config_dict['image_mean'] = \
                from_config_dict(config_dict['image_mean'], data_elem_impl_set)

        return super(CaffeDescriptorGenerator,
                     cls).from_config(config_dict, merge_default=False)
示例#10
0
def is_loadable_image(data_element: DataElement) -> bool:
    """
    Determine if an image is able to be loaded by PIL.

    :param data_element: A data element to check
    :type data_element: DataElement

    :return: Whether or not the image is loadable
    :rtype: bool

    Example:
    >>>

    """
    log = logging.getLogger(__name__)

    try:
        PIL.Image.open(io.BytesIO(data_element.get_bytes()))
        return True
    except IOError as ex:
        # noinspection PyProtectedMember
        log.debug(
            "Failed to convert '%s' bytes into an image "
            "(error: %s). Skipping", data_element, str(ex))
        return False
示例#11
0
    def _load_as_matrix(
        self, data_element: DataElement,
            pixel_crop: Optional[AxisAlignedBoundingBox] = None) \
            -> numpy.ndarray:
        """
        Internal method to be implemented that attempts loading an image
        from the given data element and returning it as an image matrix.

        Pre-conditions:
            - ``pixel_crop`` has a non-zero volume and is composed of integer
              types.

        :param smqtk.representation.DataElement data_element:
            DataElement to load image data from.
        :param None|smqtk.representation.AxisAlignedBoundingBox pixel_crop:
            Optional pixel crop region to load from the given data.  If this
            is provided it must represent a valid sub-region within the loaded
            image, otherwise a RuntimeError is raised.

        :raises RuntimeError: A crop region was specified but did not specify a
            valid sub-region of the image.

        :return: Numpy ndarray of the image data. Specific return format is
            implementation dependant.
        :rtype: numpy.ndarray

        """
        # We may have to add a mode where we use write_temp and load from that
        # if loading large images straight from bytes-in-memory is a problem
        # and that approach actually alleviates anything.

        # Catch and raise alternate IOError exception for readability.
        try:
            #: :type: PIL.Image.Image
            img = PIL.Image.open(BytesIO(data_element.get_bytes()))
        except IOError as ex:
            ex_str = str(ex)
            if 'cannot identify image file' in ex_str:
                raise IOError("Failed to identify image from bytes provided "
                              "by {}".format(data_element))
            else:
                # pass through other exceptions
                raise

        if pixel_crop:
            if not crop_in_bounds(pixel_crop, *img.size):
                raise RuntimeError("Crop provided not within input image. "
                                   "Image shape: {}, crop: {}".format(
                                       img.size, pixel_crop))
            img = img.crop(pixel_crop.min_vertex.tolist() +
                           pixel_crop.max_vertex.tolist())

        # If the loaded image is not already the optionally provided
        # explicit mode, convert it.
        if self._explicit_mode and img.mode != self._explicit_mode:
            img = img.convert(mode=self._explicit_mode)

        # noinspection PyTypeChecker
        return numpy.asarray(img)
示例#12
0
    def from_config(cls: Type[T_FNNI],
                    config_dict: Dict,
                    merge_default: bool = True) -> T_FNNI:
        """
        Instantiate a new instance of this class given the configuration
        JSON-compliant dictionary encapsulating initialization arguments.

        This method should not be called via super unless and instance of the
        class is desired.

        :param config_dict: JSON compliant dictionary encapsulating
            a configuration.
        :param merge_default: Merge the given configuration on top of the
            default provided by ``get_default_config``.

        :return: Constructed instance from the provided config.

        """
        if merge_default:
            cfg = cls.get_default_config()
            merge_dict(cfg, config_dict)
        else:
            cfg = config_dict

        cfg['descriptor_set'] = from_config_dict(cfg['descriptor_set'],
                                                 DescriptorSet.get_impls())
        cfg['uid2idx_kvs'] = from_config_dict(cfg['uid2idx_kvs'],
                                              KeyValueStore.get_impls())
        cfg['idx2uid_kvs'] = from_config_dict(cfg['idx2uid_kvs'],
                                              KeyValueStore.get_impls())

        if (cfg['index_element'] and cfg['index_element']['type']):
            index_element = from_config_dict(cfg['index_element'],
                                             DataElement.get_impls())
            cfg['index_element'] = index_element
        else:
            cfg['index_element'] = None

        if (cfg['index_param_element'] and cfg['index_param_element']['type']):
            index_param_element = from_config_dict(cfg['index_param_element'],
                                                   DataElement.get_impls())
            cfg['index_param_element'] = index_param_element
        else:
            cfg['index_param_element'] = None

        return super(FaissNearestNeighborsIndex, cls).from_config(cfg, False)
示例#13
0
 def __setstate__(self, state: Mapping[str, Any]) -> None:
     # This ``__dict__.update`` works because configuration parameters
     # exactly match up with instance attributes currently.
     self.__dict__.update(state)
     # Translate nested Configurable instance configurations into actual
     # object instances.
     self.network_prototxt = from_config_dict(state["network_prototxt"],
                                              DataElement.get_impls())
     # noinspection PyTypeChecker
     self.network_model = from_config_dict(state["network_model"],
                                           DataElement.get_impls())
     state_image_mean = state["image_mean"]
     if state_image_mean is not None:
         # noinspection PyTypeChecker
         self.image_mean = from_config_dict(state_image_mean,
                                            DataElement.get_impls())
     self._setup_network()
示例#14
0
 def get_config(self) -> Dict[str, Any]:
     # Recursively get config from data element if we have one.
     if self._cache_element is not None:
         elem_config = to_config_dict(self._cache_element)
     else:
         # No cache element, output default config with no type.
         elem_config = make_default_config(DataElement.get_impls())
     return {'cache_element': elem_config}
示例#15
0
    def get_default_config(cls) -> Dict[str, Any]:
        default = super(CaffeDescriptorGenerator, cls).get_default_config()

        data_elem_impl_set = DataElement.get_impls()
        # Need to make copies of dict so changes to one does not effect others.
        default['network_prototxt'] = \
            make_default_config(data_elem_impl_set)
        default['network_model'] = make_default_config(data_elem_impl_set)
        default['image_mean'] = make_default_config(data_elem_impl_set)

        return default
    def is_valid_element(self, data_element: DataElement) -> bool:
        """
        Check if the given DataElement instance reports a content type that
        matches one of the MIME types reported by ``valid_content_types``.

        :param data_element:
             Data element instance to check.

        :return: True if the given element has a valid content type as reported
            by ``valid_content_types``, and False if not.
        """
        return data_element.content_type() in self.valid_content_types()
示例#17
0
    def get_default_config(cls) -> Dict[str, Any]:
        default = super(ItqFunctor, cls).get_default_config()

        # Cache element parameters need to be split out into sub-configurations
        data_element_default_config = \
            make_default_config(DataElement.get_impls())
        default['mean_vec_cache'] = data_element_default_config
        # Need to deepcopy source to prevent modifications on one sub-config
        # from reflecting in the other.
        default['rotation_cache'] = deepcopy(data_element_default_config)

        return default
示例#18
0
    def from_config(cls: Type[T],
                    config_dict: Dict,
                    merge_default: bool = True) -> T:
        if merge_default:
            config_dict = merge_dict(cls.get_default_config(), config_dict)

        cache_element = None
        if config_dict['cache_element'] and config_dict['cache_element'][
                'type']:
            cache_element = from_config_dict(config_dict['cache_element'],
                                             DataElement.get_impls())
        config_dict['cache_element'] = cache_element

        return super(DataMemorySet, cls).from_config(config_dict, False)
示例#19
0
    def get_default_config(cls) -> Dict[str, Any]:
        """
        Generate and return a default configuration dictionary for this class.
        This will be primarily used for generating what the configuration
        dictionary would look like for this class without instantiating it.

        It is not be guaranteed that the configuration dictionary returned
        from this method is valid for construction of an instance of this class.

        :return: Default configuration dictionary for the class.
        :rtype: dict

        """
        default = super(MemoryKeyValueStore, cls).get_default_config()
        default['cache_element'] = make_default_config(DataElement.get_impls())
        return default
示例#20
0
    def get_default_config(cls) -> Dict[str, Any]:
        """
        Generate and return a default configuration dictionary for this class.
        This will be primarily used for generating what the configuration
        dictionary would look like for this class without instantiating it.

        By default, we observe what this class's constructor takes as arguments,
        turning those argument names into configuration dictionary keys. If any
        of those arguments have defaults, we will add those values into the
        configuration dictionary appropriately. The dictionary returned should
        only contain JSON compliant value types.

        It is not be guaranteed that the configuration dictionary returned
        from this method is valid for construction of an instance of this class.

        :return: Default configuration dictionary for the class.

        """
        c = super(MemoryDescriptorSet, cls).get_default_config()
        c['cache_element'] = make_default_config(DataElement.get_impls())
        return c
示例#21
0
    def from_config(
        cls: Type[T],
        config_dict: Dict,
        merge_default: bool = True
    ) -> T:
        """
        Instantiate a new instance of this class given the configuration
        JSON-compliant dictionary encapsulating initialization arguments.

        This method should not be called via super unless an instance of the
        class is desired.

        :param config_dict: JSON compliant dictionary encapsulating
            a configuration.
        :type config_dict: dict

        :param merge_default: Merge the given configuration on top of the
            default provided by ``get_default_config``.
        :type merge_default: bool

        :return: Constructed instance from the provided config.
        :rtype: SkLearnBallTreeHashIndex

        """
        if merge_default:
            config_dict = merge_dict(cls.get_default_config(), config_dict)

        # Parse ``cache_element`` configuration if set.
        cache_element = None
        if config_dict['cache_element'] and \
                config_dict['cache_element']['type']:
            cache_element = \
                from_config_dict(config_dict['cache_element'],
                                 DataElement.get_impls())
        config_dict['cache_element'] = cache_element

        return super(SkLearnBallTreeHashIndex, cls).from_config(config_dict,
                                                                False)
示例#22
0
    def _load_as_matrix(
            self,
            data_element: DataElement,
            pixel_crop: AxisAlignedBoundingBox = None) -> np.ndarray:
        """
        Internal method to be implemented that attempts loading an image
        from the given data element and returning it as an image matrix.

        Pre-conditions:
            - ``pixel_crop`` has a non-zero volume and is composed of integer
              types.

        :param smqtk.representation.DataElement data_element:
            DataElement to load image data from.
        :param None|smqtk.representation.AxisAlignedBoundingBox pixel_crop:
            Optional pixel crop region to load from the given data.  If this
            is provided it must represent a valid sub-region within the loaded
            image, otherwise a RuntimeError is raised.

        :raises RuntimeError: A crop region was specified but did not specify a
            valid sub-region of the image.

        :return: Numpy ndarray of the image data. Specific return format is
            implementation dependant.
        :rtype: np.ndarray

        """
        if data_element.is_empty():
            raise ValueError(
                "GdalImageReader cannot load 0-sized data (no bytes in {}).".
                format(data_element))
        load_cm = self.LOAD_METHOD_CONTEXTMANAGERS[self._load_method]
        with load_cm(data_element) as gdal_ds:  # type: gdal.Dataset
            img_width = gdal_ds.RasterXSize
            img_height = gdal_ds.RasterYSize

            # GDAL wants [x, y, width, height] as the first 4 positional
            # arguments to ``ReadAsArray``.
            xywh = [0, 0, img_width, img_height]
            if pixel_crop:
                if not crop_in_bounds(pixel_crop, img_width, img_height):
                    raise RuntimeError("Crop provided not within input image. "
                                       "Image shape: {}, crop: {}".format(
                                           (img_width, img_height),
                                           pixel_crop))
                # This is testing faster than ``np.concatenate``.
                xywh = \
                    pixel_crop.min_vertex.tolist() + pixel_crop.deltas.tolist()

            # Select specific channels if they are present in this dataset, or
            # just get all of them
            if self._channel_order is not None:
                assert self._channel_order_gci is not None, (
                    "When a channel-order is set, the GCI equivalent should "
                    "also be set.")
                # Map raster bands from CI value to band index.
                # - GDAL uses 1-based indexing.
                band_ci_to_idx = {
                    gdal_ds.GetRasterBand(b_i).GetColorInterpretation(): b_i
                    for b_i in range(1, gdal_ds.RasterCount + 1)
                }
                gci_diff = (set(
                    self._channel_order_gci).difference(band_ci_to_idx))
                if gci_diff:
                    raise RuntimeError(
                        "Data element did not provide channels required to "
                        "satisfy requested channel order {}.  "
                        "Data had bands: {} (missing {}).".format(
                            map_gci_list_to_names(self._channel_order_gci),
                            map_gci_list_to_names(band_ci_to_idx),
                            map_gci_list_to_names(gci_diff)))
                # Initialize a matrix to read band image data into
                # TODO: Handle when there are no bands?
                band_dtype = gdal_array.GDALTypeCodeToNumericTypeCode(
                    gdal_ds.GetRasterBand(1).DataType)
                if len(self._channel_order_gci) > 1:
                    img_mat = np.ndarray(
                        [xywh[3], xywh[2],
                         len(self._channel_order_gci)],
                        dtype=band_dtype)
                    for i, gci in enumerate(self._channel_order_gci):
                        #: :type: gdal.Band
                        b = gdal_ds.GetRasterBand(band_ci_to_idx[gci])
                        b.ReadAsArray(*xywh, buf_obj=img_mat[:, :, i])
                else:
                    img_mat = np.ndarray([xywh[3], xywh[2]], dtype=band_dtype)
                    gci = self._channel_order_gci[0]
                    b = gdal_ds.GetRasterBand(band_ci_to_idx[gci])
                    b.ReadAsArray(*xywh, buf_obj=img_mat)
            else:
                img_mat = gdal_ds.ReadAsArray(*xywh)
                if img_mat.ndim > 2:
                    # Transpose into [height, width, channel] format.
                    img_mat = img_mat.transpose(1, 2, 0)

        return img_mat