Пример #1
0
def storage_uri(uri_str, default_scheme='file', debug=False):
    """Instantiate a StorageUri from a URI string.

    :type uri_str: string
    :param uri_str: URI naming bucket + optional object.
    :type default_scheme: string
    :param default_scheme: default scheme for scheme-less URIs.

    :rtype: :class:`boto.StorageUri` subclass
    :return: StorageUri subclass for given URI.

    uri_str must be one of the following formats:
        gs://bucket/name
        s3://bucket/name
        gs://bucket
        s3://bucket
        filename
    The last example uses the default scheme ('file', unless overridden)
    """

    # Manually parse URI components instead of using urlparse.urlparse because
    # what we're calling URIs don't really fit the standard syntax for URIs
    # (the latter includes an optional host/net location part).
    end_scheme_idx = uri_str.find('://')
    if end_scheme_idx == -1:
        scheme = default_scheme.lower()
        path = uri_str
    else:
        scheme = uri_str[0:end_scheme_idx].lower()
        path = uri_str[end_scheme_idx + 3:]

    if scheme not in ['file', 's3', 'gs']:
        raise InvalidUriError('Unrecognized scheme "%s"' % scheme)
    if scheme == 'file':
        # For file URIs we have no bucket name, and use the complete path
        # (minus 'file://') as the object name.
        return FileStorageUri(path, debug)
    else:
        path_parts = path.split('/', 1)
        bucket_name = path_parts[0]
        # Ensure the bucket name is valid, to avoid possibly confusing other
        # parts of the code. (For example if we didn't catch bucket names
        # containing ':', when a user tried to connect to the server with that
        # name they might get a confusing error about non-integer port numbers.)
        if (bucket_name and not re.match(
                '^[a-z0-9][a-z0-9\._-]{1,253}[a-z0-9]$', bucket_name)):
            raise InvalidUriError('Invalid bucket name in URI "%s"' % uri_str)
        object_name = ''
        if len(path_parts) > 1:
            object_name = path_parts[1]
        return BucketStorageUri(scheme, bucket_name, object_name, debug)
Пример #2
0
def storage_uri(uri_str, default_scheme='file', debug=0, validate=True,
                bucket_storage_uri_class=BucketStorageUri,
                suppress_consec_slashes=True, is_latest=False):
    """
    Instantiate a StorageUri from a URI string.

    :type uri_str: string
    :param uri_str: URI naming bucket + optional object.
    :type default_scheme: string
    :param default_scheme: default scheme for scheme-less URIs.
    :type debug: int
    :param debug: debug level to pass in to boto connection (range 0..2).
    :type validate: bool
    :param validate: whether to check for bucket name validity.
    :type bucket_storage_uri_class: BucketStorageUri interface.
    :param bucket_storage_uri_class: Allows mocking for unit tests.
    :param suppress_consec_slashes: If provided, controls whether
        consecutive slashes will be suppressed in key paths.
    :type is_latest: bool
    :param is_latest: whether this versioned object represents the
        current version.

    We allow validate to be disabled to allow caller
    to implement bucket-level wildcarding (outside the boto library;
    see gsutil).

    :rtype: :class:`boto.StorageUri` subclass
    :return: StorageUri subclass for given URI.

    ``uri_str`` must be one of the following formats:

    * gs://bucket/name
    * gs://bucket/name#ver
    * s3://bucket/name
    * gs://bucket
    * s3://bucket
    * filename (which could be a Unix path like /a/b/c or a Windows path like
      C:\a\b\c)

    The last example uses the default scheme ('file', unless overridden).
    """
    version_id = None
    generation = None

    # Manually parse URI components instead of using urlparse because
    # what we're calling URIs don't really fit the standard syntax for URIs
    # (the latter includes an optional host/net location part).
    end_scheme_idx = uri_str.find('://')
    if end_scheme_idx == -1:
        scheme = default_scheme.lower()
        path = uri_str
    else:
        scheme = uri_str[0:end_scheme_idx].lower()
        path = uri_str[end_scheme_idx + 3:]

    if scheme not in ['file', 's3', 'gs']:
        raise InvalidUriError('Unrecognized scheme "%s"' % scheme)
    if scheme == 'file':
        # For file URIs we have no bucket name, and use the complete path
        # (minus 'file://') as the object name.
        is_stream = False
        if path == '-':
            is_stream = True
        return FileStorageUri(path, debug, is_stream)
    else:
        path_parts = path.split('/', 1)
        bucket_name = path_parts[0]
        object_name = ''
        # If validate enabled, ensure the bucket name is valid, to avoid
        # possibly confusing other parts of the code. (For example if we didn't
        # catch bucket names containing ':', when a user tried to connect to
        # the server with that name they might get a confusing error about
        # non-integer port numbers.)
        if (validate and bucket_name and
            (not BUCKET_NAME_RE.match(bucket_name)
             or TOO_LONG_DNS_NAME_COMP.search(bucket_name))):
            raise InvalidUriError('Invalid bucket name in URI "%s"' % uri_str)
        if scheme == 'gs':
            match = GENERATION_RE.search(path)
            if match:
                md = match.groupdict()
                versionless_uri_str = md['versionless_uri_str']
                path_parts = versionless_uri_str.split('/', 1)
                generation = int(md['generation'])
        elif scheme == 's3':
            match = VERSION_RE.search(path)
            if match:
                md = match.groupdict()
                versionless_uri_str = md['versionless_uri_str']
                path_parts = versionless_uri_str.split('/', 1)
                version_id = md['version_id']
        else:
            raise InvalidUriError('Unrecognized scheme "%s"' % scheme)
        if len(path_parts) > 1:
            object_name = path_parts[1]
        return bucket_storage_uri_class(
            scheme, bucket_name, object_name, debug,
            suppress_consec_slashes=suppress_consec_slashes,
            version_id=version_id, generation=generation, is_latest=is_latest)
Пример #3
0
def storage_uri(uri_str, default_scheme='file', debug=0, validate=True,
                bucket_storage_uri_class=BucketStorageUri,
                suppress_consec_slashes=True):
    """
    Instantiate a StorageUri from a URI string.

    :type uri_str: string
    :param uri_str: URI naming bucket + optional object.
    :type default_scheme: string
    :param default_scheme: default scheme for scheme-less URIs.
    :type debug: int
    :param debug: debug level to pass in to boto connection (range 0..2).
    :type validate: bool
    :param validate: whether to check for bucket name validity.
    :type bucket_storage_uri_class: BucketStorageUri interface.
    :param bucket_storage_uri_class: Allows mocking for unit tests.
    :param suppress_consec_slashes: If provided, controls whether
        consecutive slashes will be suppressed in key paths.

    We allow validate to be disabled to allow caller
    to implement bucket-level wildcarding (outside the boto library;
    see gsutil).

    :rtype: :class:`boto.StorageUri` subclass
    :return: StorageUri subclass for given URI.

    ``uri_str`` must be one of the following formats:

    * gs://bucket/name
    * s3://bucket/name
    * gs://bucket
    * s3://bucket
    * filename (which could be a Unix path like /a/b/c or a Windows path like
      C:\a\b\c)

    The last example uses the default scheme ('file', unless overridden)
    """

    # Manually parse URI components instead of using urlparse.urlparse because
    # what we're calling URIs don't really fit the standard syntax for URIs
    # (the latter includes an optional host/net location part).
    end_scheme_idx = uri_str.find('://')
    if end_scheme_idx == -1:
        # Check for common error: user specifies gs:bucket instead
        # of gs://bucket. Some URI parsers allow this, but it can cause
        # confusion for callers, so we don't.
        colon_pos = uri_str.find(':')
        if colon_pos != -1:
            # Allow Windows path names including drive letter (C: etc.)
            drive_char = uri_str[0].lower()
            if not (platform.system().lower().startswith('windows')
                    and colon_pos == 1
                    and drive_char >= 'a' and drive_char <= 'z'):
              raise InvalidUriError('"%s" contains ":" instead of "://"' % uri_str)
        scheme = default_scheme.lower()
        path = uri_str
    else:
        scheme = uri_str[0:end_scheme_idx].lower()
        path = uri_str[end_scheme_idx + 3:]

    if scheme not in ['file', 's3', 'gs']:
        raise InvalidUriError('Unrecognized scheme "%s"' % scheme)
    if scheme == 'file':
        # For file URIs we have no bucket name, and use the complete path
        # (minus 'file://') as the object name.
        is_stream = False
        if path == '-':
            is_stream = True
        return FileStorageUri(path, debug, is_stream)
    else:
        path_parts = path.split('/', 1)
        bucket_name = path_parts[0]
        if (validate and bucket_name and
            # Disallow buckets violating charset or not [3..255] chars total.
            (not re.match('^[a-z0-9][a-z0-9\._-]{1,253}[a-z0-9]$', bucket_name)
            # Disallow buckets with individual DNS labels longer than 63.
             or re.search('[-_a-z0-9]{64}', bucket_name))):
            raise InvalidUriError('Invalid bucket name in URI "%s"' % uri_str)
        # If enabled, ensure the bucket name is valid, to avoid possibly
        # confusing other parts of the code. (For example if we didn't
        # catch bucket names containing ':', when a user tried to connect to
        # the server with that name they might get a confusing error about
        # non-integer port numbers.)
        object_name = ''
        if len(path_parts) > 1:
            object_name = path_parts[1]
        return bucket_storage_uri_class(
            scheme, bucket_name, object_name, debug,
            suppress_consec_slashes=suppress_consec_slashes)
Пример #4
0
def storage_uri(uri_str, default_scheme='file', debug=0, validate=True):
    """Instantiate a StorageUri from a URI string.

    :type uri_str: string
    :param uri_str: URI naming bucket + optional object.
    :type default_scheme: string
    :param default_scheme: default scheme for scheme-less URIs.
    :type debug: int
    :param debug: debug level to pass in to boto connection (range 0..2).
    :type validate: bool
    :param validate: whether to check for bucket name validity.

    We allow validate to be disabled to allow caller
    to implement bucket-level wildcarding (outside the boto library;
    see gsutil).

    :rtype: :class:`boto.StorageUri` subclass
    :return: StorageUri subclass for given URI.

    uri_str must be one of the following formats:
        gs://bucket/name
        s3://bucket/name
        gs://bucket
        s3://bucket
        filename
    The last example uses the default scheme ('file', unless overridden)
    """

    # Manually parse URI components instead of using urlparse.urlparse because
    # what we're calling URIs don't really fit the standard syntax for URIs
    # (the latter includes an optional host/net location part).
    end_scheme_idx = uri_str.find('://')
    if end_scheme_idx == -1:
        # Check for common error: user specifies gs:bucket instead
        # of gs://bucket. Some URI parsers allow this, but it can cause
        # confusion for callers, so we don't.
        if uri_str.find(':') != -1:
            raise InvalidUriError('"%s" contains ":" instead of "://"' %
                                  uri_str)
        scheme = default_scheme.lower()
        path = uri_str
    else:
        scheme = uri_str[0:end_scheme_idx].lower()
        path = uri_str[end_scheme_idx + 3:]

    if scheme not in ['file', 's3', 'gs']:
        raise InvalidUriError('Unrecognized scheme "%s"' % scheme)
    if scheme == 'file':
        # For file URIs we have no bucket name, and use the complete path
        # (minus 'file://') as the object name.
        return FileStorageUri(path, debug)
    else:
        path_parts = path.split('/', 1)
        bucket_name = path_parts[0]
        if (validate and bucket_name and not re.match(
                '^[a-z0-9][a-z0-9\._-]{1,253}[a-z0-9]$', bucket_name)):
            raise InvalidUriError('Invalid bucket name in URI "%s"' % uri_str)
        # If enabled, ensure the bucket name is valid, to avoid possibly
        # confusing other parts of the code. (For example if we didn't
        # catch bucket names containing ':', when a user tried to connect to
        # the server with that name they might get a confusing error about
        # non-integer port numbers.)
        object_name = ''
        if len(path_parts) > 1:
            object_name = path_parts[1]
        return BucketStorageUri(scheme, bucket_name, object_name, debug)