def storage_uri(uri_str, default_scheme='file', debug=False): """Instantiate a StorageUri from a URI string. :type uri_str: string :param uri_str: URI naming bucket + optional object. :type default_scheme: string :param default_scheme: default scheme for scheme-less URIs. :rtype: :class:`boto.StorageUri` subclass :return: StorageUri subclass for given URI. uri_str must be one of the following formats: gs://bucket/name s3://bucket/name gs://bucket s3://bucket filename The last example uses the default scheme ('file', unless overridden) """ # Manually parse URI components instead of using urlparse.urlparse because # what we're calling URIs don't really fit the standard syntax for URIs # (the latter includes an optional host/net location part). end_scheme_idx = uri_str.find('://') if end_scheme_idx == -1: scheme = default_scheme.lower() path = uri_str else: scheme = uri_str[0:end_scheme_idx].lower() path = uri_str[end_scheme_idx + 3:] if scheme not in ['file', 's3', 'gs']: raise InvalidUriError('Unrecognized scheme "%s"' % scheme) if scheme == 'file': # For file URIs we have no bucket name, and use the complete path # (minus 'file://') as the object name. return FileStorageUri(path, debug) else: path_parts = path.split('/', 1) bucket_name = path_parts[0] # Ensure the bucket name is valid, to avoid possibly confusing other # parts of the code. (For example if we didn't catch bucket names # containing ':', when a user tried to connect to the server with that # name they might get a confusing error about non-integer port numbers.) if (bucket_name and not re.match( '^[a-z0-9][a-z0-9\._-]{1,253}[a-z0-9]$', bucket_name)): raise InvalidUriError('Invalid bucket name in URI "%s"' % uri_str) object_name = '' if len(path_parts) > 1: object_name = path_parts[1] return BucketStorageUri(scheme, bucket_name, object_name, debug)
def set_acl(self, acl_or_str, key_name='', validate=True, headers=None, version_id=None): if not self.bucket_name: raise InvalidUriError('set_acl on bucket-less URI (%s)' % self.uri) self.get_bucket(validate, headers).set_acl(acl_or_str, key_name, headers, version_id)
def add_group_email_grant(self, permission, email_address, recursive=False, validate=False, headers=None): self._check_bucket_uri('add_group_email_grant') if self.scheme != 'gs': raise ValueError('add_group_email_grant() not supported for %s ' 'URIs.' % self.scheme) if self.object_name: if recursive: raise ValueError( 'add_group_email_grant() on key-ful URI cannot ' 'specify recursive=True') key = self.get_key(validate, headers) self.check_response(key, 'key', self.uri) key.add_group_email_grant(permission, email_address, headers) elif self.bucket_name: bucket = self.get_bucket(validate, headers) bucket.add_group_email_grant(permission, email_address, recursive, headers) else: raise InvalidUriError('add_group_email_grant() on bucket-less URI ' '%s' % self.uri)
def connect(self, access_key_id=None, secret_access_key=None, **kwargs): """ Opens a connection to appropriate provider, depending on provider portion of URI. Requires Credentials defined in boto config file (see boto/pyami/config.py). @type storage_uri: StorageUri @param storage_uri: StorageUri specifying a bucket or a bucket+object @rtype: L{AWSAuthConnection<boto.gs.connection.AWSAuthConnection>} @return: A connection to storage service provider of the given URI. """ if not self.connection: if self.scheme == 's3': from boto.s3.connection import S3Connection self.connection = S3Connection(access_key_id, secret_access_key, **kwargs) elif self.scheme == 'gs': from boto.gs.connection import GSConnection self.connection = GSConnection(access_key_id, secret_access_key, **kwargs) elif self.scheme == 'file': from boto.file.connection import FileConnection self.connection = FileConnection(self) else: raise InvalidUriError('Unrecognized scheme "%s"' % self.scheme) self.connection.debug = self.debug return self.connection
def get_acl(self, validate=True, headers=None): if not self.bucket_name: raise InvalidUriError('get_acl on bucket-less URI (%s)' % self.uri) bucket = self.get_bucket(validate, headers) # This works for both bucket- and object- level ACLs (former passes # key_name=None): return bucket.get_acl(self.object_name, headers)
def enable_logging(self, target_bucket, target_prefix=None, validate=True, headers=None, version_id=None): if not self.bucket_name: raise InvalidUriError( 'disable_logging on bucket-less URI (%s)' % self.uri) bucket = self.get_bucket(validate, headers) bucket.enable_logging(target_bucket, target_prefix, headers=headers)
def check_response(self, resp, level, uri): if resp is None: raise InvalidUriError('\n'.join(textwrap.wrap( 'Attempt to get %s for "%s" failed. This can happen if ' 'the URI refers to a non-existent object or if you meant to ' 'operate on a directory (e.g., leaving off -R option on gsutil ' 'cp, mv, or ls of a bucket)' % (level, uri), 80)))
def get_key(self, validate=True, headers=None, version_id=None): if not self.object_name: raise InvalidUriError('get_key on object-less URI (%s)' % self.uri) bucket = self.get_bucket(validate, headers) key = bucket.get_key(self.object_name, headers, version_id) self.check_response(key, 'key', self.uri) return key
def set_cors(self, cors, validate=True, headers=None): """sets or updates a bucket's CORS XML""" if not self.bucket_name: raise InvalidUriError('set_cors on bucket-less URI (%s)' % self.uri) bucket = self.get_bucket(validate, headers) bucket.set_cors(cors.to_xml(), headers)
def exists(self, headers=None): """Returns True if the object exists or False if it doesn't""" if not self.object_name: raise InvalidUriError('exists on object-less URI (%s)' % self.uri) bucket = self.get_bucket() key = bucket.get_key(self.object_name, headers=headers) return bool(key)
def delete_key(self, validate=True, headers=None, version_id=None, mfa_token=None): if not self.object_name: raise InvalidUriError('delete_key on object-less URI (%s)' % self.uri) bucket = self.get_bucket(validate, headers) return bucket.delete_key(self.object_name, headers, version_id, mfa_token)
def set_def_acl(self, acl_or_str, key_name='', validate=True, headers=None, version_id=None): """sets or updates a bucket's default object acl""" if not self.bucket_name: raise InvalidUriError('set_acl on bucket-less URI (%s)' % self.uri) self.get_bucket(validate, headers).set_def_acl(acl_or_str, key_name, headers)
def check_response(self, resp, level, uri): if resp is None: raise InvalidUriError( 'Attempt to get %s for "%s" failed.\nThis ' 'can happen if the URI refers to a non-' 'existent object or if you meant to\noperate ' 'on a directory (e.g., leaving off -R option ' 'on gsutil cp, mv, or ls of a\nbucket)' % (level, uri))
def set_canned_acl(self, acl_str, validate=True, headers=None, version_id=None): if not self.object_name: raise InvalidUriError('set_canned_acl on object-less URI (%s)' % self.uri) key = self.get_key(validate, headers) self.check_response(key, 'key', self.uri) key.set_canned_acl(acl_str, headers, version_id)
def canned_acls(self): if self.bucket_name is None: raise InvalidUriError('canned_acls on bucket-less URI (%s)' % self.uri) conn = self.connect() canned_acls = conn.provider.canned_acls self.check_response(canned_acls, 'canned_acls', self.uri) return canned_acls
def get_bucket(self, validate=True, headers=None): if self.bucket_name is None: raise InvalidUriError('get_bucket on bucket-less URI (%s)' % self.uri) conn = self.connect() bucket = conn.get_bucket(self.bucket_name, validate, headers) self.check_response(bucket, 'bucket', self.uri) return bucket
def acl_class(self): if self.bucket_name is None: raise InvalidUriError('acl_class on bucket-less URI (%s)' % self.uri) conn = self.connect() acl_class = conn.provider.acl_class self.check_response(acl_class, 'acl_class', self.uri) return acl_class
def set_subresource(self, subresource, value, validate=True, headers=None, version_id=None): if not self.bucket_name: raise InvalidUriError( 'set_subresource on bucket-less URI (%s)' % self.uri) bucket = self.get_bucket(validate, headers) bucket.set_subresource(subresource, value, self.object_name, headers, version_id)
def get_acl(self, validate=True, headers=None, version_id=None): if not self.bucket_name: raise InvalidUriError('get_acl on bucket-less URI (%s)' % self.uri) bucket = self.get_bucket(validate, headers) # This works for both bucket- and object- level ACLs (former passes # key_name=None): acl = bucket.get_acl(self.object_name, headers, version_id) self.check_response(acl, 'acl', self.uri) return acl
def connect(self, access_key_id=None, secret_access_key=None, **kwargs): """ Opens a connection to appropriate provider, depending on provider portion of URI. Requires Credentials defined in boto config file (see boto/pyami/config.py). @type storage_uri: StorageUri @param storage_uri: StorageUri specifying a bucket or a bucket+object @rtype: L{AWSAuthConnection<boto.gs.connection.AWSAuthConnection>} @return: A connection to storage service provider of the given URI. """ connection_args = dict(self.connection_args or ()) if (hasattr(self, 'suppress_consec_slashes') and 'suppress_consec_slashes' not in connection_args): connection_args['suppress_consec_slashes'] = ( self.suppress_consec_slashes) connection_args.update(kwargs) if not self.connection: if self.scheme in self.provider_pool: self.connection = self.provider_pool[self.scheme] elif self.scheme == 's3': from boto.s3.connection import S3Connection self.connection = S3Connection(access_key_id, secret_access_key, **connection_args) self.provider_pool[self.scheme] = self.connection elif self.scheme == 'gs': from boto.gs.connection import GSConnection # Use OrdinaryCallingFormat instead of boto-default # SubdomainCallingFormat because the latter changes the hostname # that's checked during cert validation for HTTPS connections, # which will fail cert validation (when cert validation is # enabled). # # The same is not true for S3's HTTPS certificates. In fact, # we don't want to do this for S3 because S3 requires the # subdomain to match the location of the bucket. If the proper # subdomain is not used, the server will return a 301 redirect # with no Location header. # # Note: the following import can't be moved up to the # start of this file else it causes a config import failure when # run from the resumable upload/download tests. from boto.s3.connection import OrdinaryCallingFormat connection_args['calling_format'] = OrdinaryCallingFormat() self.connection = GSConnection(access_key_id, secret_access_key, **connection_args) self.provider_pool[self.scheme] = self.connection elif self.scheme == 'file': from boto.file.connection import FileConnection self.connection = FileConnection(self) else: raise InvalidUriError('Unrecognized scheme "%s"' % self.scheme) self.connection.debug = self.debug return self.connection
def get_cors(self, validate=True, headers=None): """returns a bucket's CORS XML""" if not self.bucket_name: raise InvalidUriError('get_cors on bucket-less URI (%s)' % self.uri) bucket = self.get_bucket(validate, headers) cors = bucket.get_cors(headers) self.check_response(cors, 'cors', self.uri) return cors
def set_canned_acl(self, acl_str, validate=True, headers=None, version_id=None): """sets or updates a bucket's acl to a predefined (canned) value""" if not self.object_name: raise InvalidUriError('set_canned_acl on object-less URI (%s)' % self.uri) key = self.get_key(validate, headers) self.check_response(key, 'key', self.uri) key.set_canned_acl(acl_str, headers, version_id)
def _set_tracker_uri(self, uri): """ Called when we start a new resumable upload or get a new tracker URI for the upload. Saves URI and resets upload state. Raises InvalidUriError if URI is syntactically invalid. """ parse_result = urlparse.urlparse(uri) if (parse_result.scheme.lower() not in ['http', 'https'] or not parse_result.netloc or not parse_result.query): raise InvalidUriError('Invalid tracker URI (%s)' % uri) qdict = cgi.parse_qs(parse_result.query) if not qdict or not 'upload_id' in qdict: raise InvalidUriError('Invalid tracker URI (%s)' % uri) self.tracker_uri = uri self.tracker_uri_host = parse_result.netloc self.tracker_uri_path = '/?%s' % parse_result.query self.server_has_bytes = 0
def get_contents_as_string(self, validate=True, headers=None, cb=None, num_cb=10, torrent=False, version_id=None): if not self.object_name: raise InvalidUriError('get_contents_as_string on object-less URI ' '(%s)' % self.uri) key = self.get_key(validate, headers) self.check_response(key, 'key', self.uri) return key.get_contents_as_string(headers, cb, num_cb, torrent, version_id)
def check_response(self, resp, level, uri): if resp is None: raise InvalidUriError('Attempt to get %s for "%s" failed.\nThis ' 'can happen if the URI refers to a non-' 'existent object or if you meant to\noperate ' 'on a directory (e.g., leaving off -R option ' 'on gsutil cp, mv, or ls of a\nbucket). If a ' 'version-ful object was specified, you may ' 'have neglected to\nuse a -v flag.' % (level, uri))
def get_contents_as_string(self, validate=True, headers=None, cb=None, num_cb=10, torrent=False): if not self.object_name: raise InvalidUriError('get_contents_as_string on object-less URI ' '(%s)' % self.uri) key = self.get_key(validate, headers) return key.get_contents_as_string(headers, cb, num_cb, torrent)
def storage_uri_for_key(key): """Returns a StorageUri for the given key. :type key: :class:`boto.s3.key.Key` or subclass :param key: URI naming bucket + optional object. """ if not isinstance(key, boto.s3.key.Key): raise InvalidUriError('Requested key (%s) is not a subclass of ' 'boto.s3.key.Key' % str(type(key))) prov_name = key.bucket.connection.provider.get_provider_name() uri_str = '%s://%s/%s' % (prov_name, key.bucket.name, key.name) return storage_uri(uri_str)
def add_user_grant(self, permission, user_id, recursive=False, validate=True, headers=None): if not self.bucket_name: raise InvalidUriError('add_user_grant on bucket-less URI (%s)' % self.uri) if not self.object_name: bucket = self.get_bucket(validate, headers) bucket.add_user_grant(permission, user_id, recursive, headers) else: key = self.get_key(validate, headers) self.check_response(key, 'key', self.uri) key.add_user_grant(permission, user_id)
def clone_replace_name(self, new_name): """Instantiate a BucketStorageUri from the current BucketStorageUri, but replacing the object_name. @type new_name: string @param new_name: new object name """ if not self.bucket_name: raise InvalidUriError( 'clone_replace_name() on bucket-less URI %s' % self.uri) return BucketStorageUri(self.scheme, self.bucket_name, new_name, self.debug)
def add_email_grant(self, permission, email_address, recursive=False, validate=True, headers=None): if not self.bucket_name: raise InvalidUriError('add_email_grant on bucket-less URI (%s)' % self.uri) if not self.object_name: bucket = self.get_bucket(validate, headers) bucket.add_email_grant(permission, email_address, recursive, headers) else: key = self.get_key(validate, headers) key.add_email_grant(permission, email_address)