Python IdentRegexChecker примеры использования

Язык программирования: Python

Пространство имен/Пакет: loris.identifiers

Класс/Тип: IdentRegexChecker

Примеров на hotexamples.com: 6

Python IdentRegexChecker - 6 примеров найдено. Это лучшие примеры Python кода для loris.identifiers.IdentRegexChecker, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

IdentRegexChecker(4)

is_allowed(4)

Основные методы

IdentRegexChecker (4)

is_allowed (4)

Пример #1

Показать файл

Файл: s3resolver.py Проект: nmaekawa/hxloris

    def __init__(self, config):
        super(S3Resolver, self).__init__(config)
        self.default_format = self.config.get("default_format", None)

        self._ident_regex_checker = IdentRegexChecker(
            ident_regex=self.config.get("ident_regex"))
        self._cache_namer = CacheNamer()

        if "cache_root" in self.config:
            self.cache_root = self.config["cache_root"]
        else:
            message = ("Server Side Error: Configuration incomplete and "
                       "cannot resolve. Missing setting for cache_root.")
            logger.error(message)
            raise ResolverException(message)

        self.has_bucket_map = False
        if "bucket_map" in config:
            self.bucket_map = config["bucket_map"]
            self.has_bucket_map = True
            logger.debug("s3 bucket_map: {}".format(self.bucket_map))

        # boto3: if not in us-east-1, set envvar AWS_DEFAULT_REGION to avoid extra
        # requests when downloading from s3
        # thread safe:
        # https://boto3.amazonaws.com/v1/documentation/api/latest/guide/resources.html#multithreading-multiprocessing
        session = boto3.session.Session()
        self.s3 = session.resource("s3")

        logger.info("loaded s3 resolver with config: {}".format(config))

Пример #2

Показать файл

Файл: resolver.py Проект: fitnycdigitalinitiatives/loris

    def __init__(self, config):
        super(SimpleHTTPResolver, self).__init__(config)

        self.source_prefix = self.config.get('source_prefix', '')

        self.source_suffix = self.config.get('source_suffix', '')

        self.default_format = self.config.get('default_format', None)

        self.head_resolvable = self.config.get('head_resolvable', False)

        self.uri_resolvable = self.config.get('uri_resolvable', False)

        self.user = self.config.get('user', None)

        self.pw = self.config.get('pw', None)

        self.cert = self.config.get('cert', None)

        self.key = self.config.get('key', None)

        self.ssl_check = self.config.get('ssl_check', True)

        self._ident_regex_checker = IdentRegexChecker(
            ident_regex=self.config.get('ident_regex')
        )
        self._cache_namer = CacheNamer()

        if 'cache_root' in self.config:
            self.cache_root = self.config['cache_root']
        else:
            message = 'Server Side Error: Configuration incomplete and cannot resolve. Missing setting for cache_root.'
            logger.error(message)
            raise ResolverException(message)

        if not self.uri_resolvable and self.source_prefix == '':
            message = 'Server Side Error: Configuration incomplete and cannot resolve. Must either set uri_resolvable' \
                      ' or source_prefix settings.'
            logger.error(message)
            raise ResolverException(message)

Пример #3

Показать файл

Файл: resolver.py Проект: mtrekels/loris

class SimpleHTTPResolver(_AbstractResolver):
    '''
    Example resolver that one might use if image files were coming from
    an http image store (like Fedora Commons). The first call to `resolve()`
    copies the source image into a local cache; subsequent calls use local
    copy from the cache.

    The config dictionary MUST contain
     * `cache_root`, which is the absolute path to the directory where source images
        should be cached.

    The config dictionary MAY contain
     * `source_prefix`, the url up to the identifier.
     * `source_suffix`, the url after the identifier (if applicable).
     * `default_format`, the format of images (will use content-type of response if not specified).
     * `head_resolvable` with value True, whether to make HEAD requests to verify object existence (don't set if using
        Fedora Commons prior to 3.8).
     * `uri_resolvable` with value True, allows one to use full uri's to resolve to an image.
     * `user`, the username to make the HTTP request as.
     * `pw`, the password to make the HTTP request as.
     * `ssl_check`, whether to check the validity of the origin server's HTTPS
     certificate. Set to False if you are using an origin server with a
     self-signed certificate.
     * `cert`, path to an SSL client certificate to use for authentication. If `cert` and `key` are both present, they take precedence over `user` and `pw` for authentication.
     * `key`, path to an SSL client key to use for authentication.
    '''
    def __init__(self, config):
        super(SimpleHTTPResolver, self).__init__(config)

        self.source_prefix = self.config.get('source_prefix', '')

        self.source_suffix = self.config.get('source_suffix', '')

        self.default_format = self.config.get('default_format', None)

        self.head_resolvable = self.config.get('head_resolvable', False)

        self.uri_resolvable = self.config.get('uri_resolvable', False)

        self.user = self.config.get('user', None)

        self.pw = self.config.get('pw', None)

        self.cert = self.config.get('cert', None)

        self.key = self.config.get('key', None)

        self.ssl_check = self.config.get('ssl_check', True)

        self._ident_regex_checker = IdentRegexChecker(
            ident_regex=self.config.get('ident_regex'))
        self._cache_namer = CacheNamer()

        if 'cache_root' in self.config:
            self.cache_root = self.config['cache_root']
        else:
            message = 'Server Side Error: Configuration incomplete and cannot resolve. Missing setting for cache_root.'
            logger.error(message)
            raise ResolverException(message)

        if not self.uri_resolvable and self.source_prefix == '':
            message = 'Server Side Error: Configuration incomplete and cannot resolve. Must either set uri_resolvable' \
                      ' or source_prefix settings.'
            logger.error(message)
            raise ResolverException(message)

    def request_options(self):
        # parameters to pass to all head and get requests;
        options = {}
        if self.cert is not None and self.key is not None:
            options['cert'] = (self.cert, self.key)
        if self.user is not None and self.pw is not None:
            options['auth'] = (self.user, self.pw)
        options['verify'] = self.ssl_check
        return options

    def is_resolvable(self, ident):
        ident = unquote(ident)

        if not self._ident_regex_checker.is_allowed(ident):
            return False

        fp = self.cache_dir_path(ident=ident)
        if exists(fp):
            return True
        else:
            try:
                (url, options) = self._web_request_url(ident)
            except ResolverException:
                return False

            try:
                if self.head_resolvable:
                    response = requests.head(url, **options)
                    return response.ok
                else:
                    with closing(requests.get(url, stream=True,
                                              **options)) as response:
                        return response.ok
            except requests.ConnectionError:
                return False

    def get_format(self, ident, potential_format):
        if self.default_format is not None:
            return self.default_format
        elif potential_format is not None:
            return potential_format
        else:
            return self.format_from_ident(ident)

    def _web_request_url(self, ident):
        if ident.startswith(('http://', 'https://')) and self.uri_resolvable:
            url = ident
        else:
            url = self.source_prefix + ident + self.source_suffix
        if not url.startswith(('http://', 'https://')):
            logger.warn('Bad URL request at %s for identifier: %s.', url,
                        ident)
            raise ResolverException(
                "Bad URL request made for identifier: %r." % ident)
        return (url, self.request_options())

    def cache_dir_path(self, ident):
        return os.path.join(self.cache_root,
                            CacheNamer.cache_directory_name(ident=ident))

    def raise_404_for_ident(self, ident):
        raise ResolverException("Image not found for identifier: %r." % ident)

    def cached_file_for_ident(self, ident):
        cache_dir = self.cache_dir_path(ident)
        if exists(cache_dir):
            files = glob.glob(join(cache_dir, 'loris_cache.*'))
            if files:
                return files[0]
        return None

    def cache_file_extension(self, ident, response):
        if 'content-type' in response.headers:
            try:
                extension = self.get_format(
                    ident, constants.FORMATS_BY_MEDIA_TYPE[
                        response.headers['content-type']])
            except KeyError:
                logger.warn(
                    'Your server may be responding with incorrect content-types. Reported %s for ident %s.',
                    response.headers['content-type'], ident)
                # Attempt without the content-type
                extension = self.get_format(ident, None)
        else:
            extension = self.get_format(ident, None)
        return extension

    def copy_to_cache(self, ident):
        ident = unquote(ident)

        #get source image and write to temporary file
        (source_url, options) = self._web_request_url(ident)
        assert source_url is not None

        cache_dir = self.cache_dir_path(ident)
        mkdir_p(cache_dir)

        with closing(requests.get(source_url, stream=True,
                                  **options)) as response:
            if not response.ok:
                logger.warn(
                    "Source image not found at %s for identifier: %s. "
                    "Status code returned: %s.", source_url, ident,
                    response.status_code)
                raise ResolverException(
                    "Source image not found for identifier: %s. "
                    "Status code returned: %s." %
                    (ident, response.status_code))

            extension = self.cache_file_extension(ident, response)
            local_fp = join(cache_dir, "loris_cache." + extension)

            with tempfile.NamedTemporaryFile(dir=cache_dir,
                                             delete=False) as tmp_file:
                for chunk in response.iter_content(2048):
                    tmp_file.write(chunk)

        # Now rename the temp file to the desired file name if it still
        # doesn't exist (another process could have created it).
        #
        # Note: This is purely an optimisation; if the file springs into
        # existence between the existence check and the copy, it will be
        # overridden.
        if exists(local_fp):
            logger.info('Another process downloaded src image %s', local_fp)
            remove(tmp_file.name)
        else:
            safe_rename(tmp_file.name, local_fp)
            logger.info("Copied %s to %s", source_url, local_fp)

        # Check for rules file associated with image file
        # These files are < 2k in size, so fetch in one go.
        # Assumes that the rules will be next to the image
        # cache_dir is image specific, so this is easy

        bits = split(source_url)
        fn = bits[1].rsplit('.', 1)[0] + "." + self.auth_rules_ext
        rules_url = bits[0] + '/' + fn
        try:
            resp = requests.get(rules_url)
            if resp.status_code == 200:
                local_rules_fp = join(cache_dir,
                                      "loris_cache." + self.auth_rules_ext)
                if not exists(local_rules_fp):
                    with open(local_rules_fp, 'w') as fh:
                        fh.write(resp.text)
        except:
            # No connection available
            pass

        return local_fp

    def resolve(self, app, ident, base_uri):
        cached_file_path = self.cached_file_for_ident(ident)
        if not cached_file_path:
            cached_file_path = self.copy_to_cache(ident)
        format_ = self.get_format(cached_file_path, None)
        uri = self.fix_base_uri(base_uri)
        if self.use_extra_info:
            extra = self.get_extra_info(ident, cached_file_path)
        else:
            extra = {}
        return ImageInfo(app, uri, cached_file_path, format_, extra)

Пример #4

Показать файл

Файл: s3resolver.py Проект: nmaekawa/hxloris

class S3Resolver(_AbstractResolver):
    """Resolver for image files stored on aws s3 buckets.

    The first call to `resolve()` copies the source image into a local cache;
    subsequent calls use local copy from the cache.

    A config example:

        [resolver]
        impl = 'hxloris.s3resolver.S3Resolver'

        # absolute path to dir where source images are downloaded from s3
        # mandatory
        cache_root = '/var/loris_cache_root'

        # subsection to define mappings from :ident to an s3 bucket/key
        # optional
        [[bucket_map]]
          [[[site1]]]
            bucket = 'bucket-for-site1'
            key_prefix = 'loris/images'

          [[[site2]]]
            bucket = 'bucket-for-site2'
            key_prefix = 'loris/other-images'

        ...

    an incoming request url and its corresponding s3 bucket/prefix:
        http://localhost/site1/this/that/image.jpg/:region/:size/:rotation/default.jpg
        s3://bucket-for-site1/loris/images/this/that/image.jpg
    or
        http://localhost/site2/blah/image3.jpg/:region/:size/:rotation/default.jpg
        s3://bucket-for-site2/loris/other-images/blah/image3.jpg

    `bucket_map` is optional (as is `key_prefix`), but will always require a
    `bucket` to be in the request url. For example, the url below is invalid:
        http://localhost/image.jpg

    If it looks too similar to loris.resolver.SimpleHTTPResolver... you're right!
    """
    def __init__(self, config):
        super(S3Resolver, self).__init__(config)
        self.default_format = self.config.get("default_format", None)

        self._ident_regex_checker = IdentRegexChecker(
            ident_regex=self.config.get("ident_regex"))
        self._cache_namer = CacheNamer()

        if "cache_root" in self.config:
            self.cache_root = self.config["cache_root"]
        else:
            message = ("Server Side Error: Configuration incomplete and "
                       "cannot resolve. Missing setting for cache_root.")
            logger.error(message)
            raise ResolverException(message)

        self.has_bucket_map = False
        if "bucket_map" in config:
            self.bucket_map = config["bucket_map"]
            self.has_bucket_map = True
            logger.debug("s3 bucket_map: {}".format(self.bucket_map))

        # boto3: if not in us-east-1, set envvar AWS_DEFAULT_REGION to avoid extra
        # requests when downloading from s3
        # thread safe:
        # https://boto3.amazonaws.com/v1/documentation/api/latest/guide/resources.html#multithreading-multiprocessing
        session = boto3.session.Session()
        self.s3 = session.resource("s3")

        logger.info("loaded s3 resolver with config: {}".format(config))

    def raise_404_for_ident(self, ident):
        message = "Source image not found for identifier: %s." % (ident, )
        logger.warn(message)
        raise ResolverException(message)

    def is_resolvable(self, ident):
        """checks if ident contains a readable s3 object.

        this generates a head request for the s3 object
        """
        ident = unquote(ident)

        if not self._ident_regex_checker.is_allowed(ident):
            return False

        fp = self.cache_dir_path(ident=ident)
        if os.path.exists(fp):
            return True
        else:
            try:
                (bucketname, keyname) = self.s3bucket_from_ident(ident)
            except ResolverException as e:
                logger.warn(e)
                return False

            # check that we can get to this object on s3
            # access to s3obj prop generates a head request or 404
            try:
                s3obj = self.s3.Object(bucketname, keyname)
                content_length = s3obj.content_length
            except Exception as e:
                logger.error("unable to access s3 object ({}:{}): {}".format(
                    bucketname, keyname, e))
                return False
            else:
                if content_length > 0:
                    return True
                else:
                    logger.warning("empty s3 object ({}:{})".format(
                        bucketname, keyname))
                    return False

    def get_format(self, ident, potential_format):
        if self.default_format is not None:
            return self.default_format
        elif potential_format is not None:
            return potential_format
        else:
            return self.format_from_ident(ident)

    def s3bucket_from_ident(self, ident):
        """ returns tuple(buckename, keyname) parsed from ident."""
        key_parts = ident.split("/", 1)
        if len(key_parts) == 2:
            (bucket, partial_key) = key_parts
        else:
            raise ResolverException(
                "Invalid identifier. Expected bucket/ident; got {}".format(
                    key_parts))

        # check if bucketname actually means something different
        if self.has_bucket_map and bucket in self.bucket_map:
            bucketname = self.bucket_map[bucket]["bucket"]
            if "key_prefix" in self.bucket_map[bucket]:
                keyname = os.path.join(self.bucket_map[bucket]["key_prefix"],
                                       partial_key)
            else:
                keyname = partial_key
            return (bucketname, keyname)

        else:  # what came in ident is the actual bucketname
            return (bucket, partial_key)

    def cache_dir_path(self, ident):
        # build dir path for ident file in cache
        return os.path.join(self.cache_root,
                            CacheNamer.cache_directory_name(ident=ident))

    def cached_file_for_ident(self, ident):
        # recover filepath for ident in cache
        cache_dir = self.cache_dir_path(ident)
        if os.path.exists(cache_dir):
            files = glob.glob(os.path.join(cache_dir, "loris_cache.*"))
            if files:
                return files[0]
        return None

    def cache_file_extension(self, ident, content_type=None):
        if content_type is not None:
            try:
                extension = self.get_format(
                    ident, constants.FORMATS_BY_MEDIA_TYPE[content_type])
            except KeyError:
                logger.warn(
                    "wonky s3 resource content-type({}) for ident({})",
                    content_type,
                    ident,
                )
                # Attempt without the content-type
                extension = self.get_format(ident, None)
        else:
            extension = self.get_format(ident, None)
        return extension

    def copy_to_cache(self, ident):
        """ downloads image source file from s3, if not in cache already."""
        ident = unquote(ident)

        # get source image and write to temporary file
        (bucketname, keyname) = self.s3bucket_from_ident(ident)

        try:
            s3obj = self.s3.Object(bucketname, keyname)
            content_type = s3obj.content_type
        except Exception as e:
            msg = "no content_type for s3 object ({}:{}): {}".format(
                bucketname, keyname, e)
            logger.error(msg)
            raise ResolverException(msg)

        extension = self.cache_file_extension(ident, content_type)
        cache_dir = self.cache_dir_path(ident)
        os.makedirs(cache_dir, exist_ok=True)
        local_fp = os.path.join(cache_dir, "loris_cache." + extension)
        with tempfile.NamedTemporaryFile(dir=cache_dir,
                                         delete=False) as tmp_file:
            try:
                self.s3.Bucket(bucketname).download_fileobj(keyname, tmp_file)
            except Exception as e:
                msg = "unable to access or save s3 object ({}:{}): {}".format(
                    bucketname, keyname, e)
                logger.error(msg)
                raise ResolverException(msg)

        # Now rename the temp file to the desired file name if it still
        # doesn't exist (another process could have created it).
        #
        # Note: This is purely an optimisation; if the file springs into
        # existence between the existence check and the copy, it will be
        # overridden.
        if os.path.exists(local_fp):
            logger.info(
                "Another process downloaded src image {}".format(local_fp))
            os.remove(tmp_file.name)
        else:
            safe_rename(tmp_file.name, local_fp)
            logger.info("Copied {}:{} to {}".format(bucketname, keyname,
                                                    local_fp))

        # Check for rules file associated with image file
        # These files are < 2k in size, so fetch in one go.
        # Assumes that the rules will be next to the image
        # cache_dir is image specific, so this is easy
        bits = os.path.split(keyname)  # === bash basename
        fn = bits[1].rsplit(".")[0] + "." + self.auth_rules_ext
        rules_keyname = bits[0] + "/" + fn
        local_rules_fp = os.path.join(cache_dir,
                                      "loris_cache." + self.auth_rules_ext)
        try:
            self.s3.Object(bucketname,
                           rules_keyname).download_file(local_rules_fp)
        except Exception as e:
            # no connection available?
            msg = "ignoring rules file({}/{}) for ident({}): {}".format(
                bucketname, rules_keyname, ident, e)
            logger.warn(msg)

        return local_fp

    def resolve(self, app, ident, base_uri):
        if not self.is_resolvable(ident):
            self.raise_404_for_ident(ident)
        cached_file_path = self.cached_file_for_ident(ident)
        if not cached_file_path:
            cached_file_path = self.copy_to_cache(ident)
        format_ = self.get_format(cached_file_path, None)
        auth_rules = self.get_auth_rules(ident, cached_file_path)
        return ImageInfo(
            app=app,
            src_img_fp=cached_file_path,
            src_format=format_,
            auth_rules=auth_rules,
        )

Пример #5

Показать файл

Файл: identifiers_t.py Проект: onbcst/loris

 def test_checker_has_correct_is_allowed(self, ident_regex, ident,
                                         expected_is_allowed):
     checker = IdentRegexChecker(ident_regex=ident_regex)
     assert checker.is_allowed(ident=ident) is expected_is_allowed

Пример #6

Показать файл

Файл: identifiers_t.py Проект: onbcst/loris

 def test_any_ident_is_allowed_if_regex_is_none(self, ident):
     checker = IdentRegexChecker(ident_regex=None)
     assert checker.is_allowed(ident=ident) is True