def _copy(self, src, dst, recursive, use_src_basename): src_st = self.stats(src) if src_st.isDir and not recursive: return # omitting directory dst = s3.abspath(src, dst) dst_st = self._stats(dst) if src_st.isDir and dst_st and not dst_st.isDir: raise S3FileSystemException("Cannot overwrite non-directory '%s' with directory '%s'" % (dst, src)) src_bucket, src_key = s3.parse_uri(src)[:2] dst_bucket, dst_key = s3.parse_uri(dst)[:2] keep_src_basename = use_src_basename and dst_st and dst_st.isDir src_bucket = self._get_bucket(src_bucket) dst_bucket = self._get_bucket(dst_bucket) if keep_src_basename: cut = len(posixpath.dirname(src_key)) # cut of an parent directory name if cut: cut += 1 else: cut = len(src_key) if not src_key.endswith('/'): cut += 1 for key in src_bucket.list(prefix=src_key): if not key.name.startswith(src_key): raise S3FileSystemException(_("Invalid key to transform: %s") % key.name) dst_name = posixpath.normpath(s3.join(dst_key, key.name[cut:])) if self.isdir(normpath(self.join(S3A_ROOT, key.bucket.name, key.name))): dst_name = self._append_separator(dst_name) key.copy(dst_bucket, dst_name)
def _copy(self, src, dst, recursive, use_src_basename): src_st = self.stats(src) if src_st.isDir and not recursive: return # omitting directory dst = s3.abspath(src, dst) dst_st = self._stats(dst) if src_st.isDir and dst_st and not dst_st.isDir: raise IOError( errno.EEXIST, "Cannot overwrite non-directory '%s' with directory '%s'" % (dst, src)) src_bucket, src_key = s3.parse_uri(src)[:2] dst_bucket, dst_key = s3.parse_uri(dst)[:2] keep_src_basename = use_src_basename and dst_st and dst_st.isDir src_bucket = self._get_bucket(src_bucket) dst_bucket = self._get_bucket(dst_bucket) if keep_src_basename: cut = len( posixpath.dirname(src_key)) # cut of an parent directory name if cut: cut += 1 else: cut = len(src_key) if not src_key.endswith('/'): cut += 1 for key in src_bucket.list(prefix=src_key): if not key.name.startswith(src_key): raise RuntimeError("Invalid key to transform: %s" % key.name) dst_name = posixpath.normpath(s3.join(dst_key, key.name[cut:])) key.copy(dst_bucket, dst_name)
def _copy(self, src, dst, recursive, use_src_basename): src_st = self.stats(src) if src_st.isDir and not recursive: return # omitting directory dst = s3.abspath(src, dst) dst_st = self._stats(dst) if src_st.isDir and dst_st and not dst_st.isDir: raise IOError(errno.EEXIST, "Cannot overwrite non-directory '%s' with directory '%s'" % (dst, src)) src_bucket, src_key = s3.parse_uri(src)[:2] dst_bucket, dst_key = s3.parse_uri(dst)[:2] keep_src_basename = use_src_basename and dst_st and dst_st.isDir src_bucket = self._get_bucket(src_bucket) dst_bucket = self._get_bucket(dst_bucket) if keep_src_basename: cut = len(posixpath.dirname(src_key)) # cut of an parent directory name if cut: cut += 1 else: cut = len(src_key) if not src_key.endswith('/'): cut += 1 for key in src_bucket.list(prefix=src_key): if not key.name.startswith(src_key): raise RuntimeError(_("Invalid key to transform: %s") % key.name) dst_name = posixpath.normpath(s3.join(dst_key, key.name[cut:])) key.copy(dst_bucket, dst_name)
def rmtree(self, path, skipTrash=True): if not skipTrash: raise NotImplementedError(_('Moving to trash is not implemented for S3')) bucket_name, key_name = s3.parse_uri(path)[:2] if bucket_name and not key_name: raise NotImplementedError(_('Deleting a bucket is not implemented for S3')) key = self._get_key(path, validate=False) if key.exists(): to_delete = iter([key]) else: to_delete = iter([]) if self.isdir(path): # add `/` to prevent removing of `s3://b/a_new` trying to remove `s3://b/a` prefix = self._append_separator(key.name) keys = key.bucket.list(prefix=prefix) to_delete = itertools.chain(keys, to_delete) result = key.bucket.delete_keys(to_delete) if result.errors: msg = "%d errors occurred during deleting '%s':\n%s" % ( len(result.errors), '\n'.join(map(repr, result.errors))) LOG.error(msg) raise IOError(msg)
def listdir_stats(self, path, glob=None): if glob is not None: raise NotImplementedError(_("Option `glob` is not implemented")) if S3FileSystem.isroot(path): try: return sorted([S3Stat.from_bucket(b, self.fs) for b in self._s3_connection.get_all_buckets(headers=self.header_values)], key=lambda x: x.name) except S3FileSystemException as e: raise e except S3ResponseError as e: if 'Forbidden' in str(e) or (hasattr(e, 'status') and e.status == 403): raise S3ListAllBucketsException(_('You do not have permissions to list all buckets. Please specify a bucket name you have access to.')) else: raise S3FileSystemException(_('Failed to retrieve buckets: %s') % e.reason) except Exception as e: raise S3FileSystemException(_('Failed to retrieve buckets: %s') % e) bucket_name, prefix = s3.parse_uri(path)[:2] bucket = self._get_bucket(bucket_name) prefix = self._append_separator(prefix) res = [] for item in bucket.list(prefix=prefix, delimiter='/', headers=self.header_values): if isinstance(item, Prefix): res.append(S3Stat.from_key(Key(item.bucket, item.name), is_dir=True, fs=self.fs)) else: if item.name == prefix: continue res.append(self._stats_key(item, self.fs)) return res
def rmtree(self, path, skipTrash=True): if not skipTrash: raise NotImplementedError(_('Moving to trash is not implemented for S3')) bucket_name, key_name = s3.parse_uri(path)[:2] if bucket_name and not key_name: self._delete_bucket(bucket_name) else: if self.isdir(path): path = self._append_separator(path) # Really need to make sure we end with a '/' key = self._get_key(path, validate=False) if key.exists(): to_delete = [key] dir_keys = [] if self.isdir(path): dir_keys = key.bucket.list(prefix=path) to_delete = itertools.chain(dir_keys, to_delete) if not dir_keys: # Avoid Raz bulk delete issue deleted_key = key.delete() if deleted_key.exists(): raise S3FileSystemException('Could not delete key %s' % deleted_key) else: result = key.bucket.delete_keys(to_delete) if result.errors: msg = "%d errors occurred while attempting to delete the following S3 paths:\n%s" % ( len(result.errors), '\n'.join(['%s: %s' % (error.key, error.message) for error in result.errors]) ) LOG.error(msg) raise S3FileSystemException(msg)
def _get_key(self, path, validate=True): bucket_name, key_name = s3.parse_uri(path)[:2] bucket = self._get_bucket(bucket_name) try: return bucket.get_key(key_name, validate=validate) except BotoClientError, e: raise S3FileSystemException(_('Failed to access path at "%s": %s') % (path, e.reason))
def rmtree(self, path, skipTrash=True): if not skipTrash: raise NotImplementedError(_('Moving to trash is not implemented for S3')) bucket_name, key_name = s3.parse_uri(path)[:2] if bucket_name and not key_name: self._delete_bucket(bucket_name) else: key = self._get_key(path, validate=False) if key.exists(): to_delete = iter([key]) else: to_delete = iter([]) if self.isdir(path): # add `/` to prevent removing of `s3://b/a_new` trying to remove `s3://b/a` prefix = self._append_separator(key.name) keys = key.bucket.list(prefix=prefix) to_delete = itertools.chain(keys, to_delete) result = key.bucket.delete_keys(to_delete) if result.errors: msg = "%d errors occurred while attempting to delete the following S3 paths:\n%s" % ( len(result.errors), '\n'.join(['%s: %s' % (error.key, error.message) for error in result.errors]) ) LOG.error(msg) raise S3FileSystemException(msg)
def mkdir(self, path, *args, **kwargs): """ Creates a directory and any parent directory if necessary. Actually it creates an empty object: s3://[bucket]/[path]/ """ bucket_name, key_name = s3.parse_uri(path)[:2] if not BUCKET_NAME_PATTERN.match(bucket_name): raise S3FileSystemException( _('Invalid bucket name: %s') % bucket_name) try: self._get_or_create_bucket(bucket_name) except S3FileSystemException as e: raise e except S3ResponseError as e: raise S3FileSystemException( _('Failed to create S3 bucket "%s": %s: %s') % (bucket_name, e.reason, e.body)) except Exception as e: raise S3FileSystemException( _('Failed to create S3 bucket "%s": %s') % (bucket_name, e)) stats = self._stats(path) if stats: if stats.isDir: return None else: raise S3FileSystemException( "'%s' already exists and is not a directory" % path) path = self._append_separator(path) # folder-key should ends by / self.create(path) # create empty object
def listdir_stats(self, path, glob=None): if glob is not None: raise NotImplementedError(_("Option `glob` is not implemented")) if s3.is_root(path): try: return sorted([ S3Stat.from_bucket(b) for b in self._s3_connection.get_all_buckets() ], key=lambda x: x.name) except S3FileSystemException as e: raise e except S3ResponseError as e: raise S3FileSystemException( _('Failed to retrieve buckets: %s') % e.reason) except Exception as e: raise S3FileSystemException( _('Failed to retrieve buckets: %s') % e) bucket_name, prefix = s3.parse_uri(path)[:2] bucket = self._get_bucket(bucket_name) prefix = self._append_separator(prefix) res = [] for item in bucket.list(prefix=prefix, delimiter='/'): if isinstance(item, Prefix): res.append( S3Stat.from_key(Key(item.bucket, item.name), is_dir=True)) else: if item.name == prefix: continue res.append(self._stats_key(item)) return res
def rmtree(self, path, skipTrash=False): if not skipTrash: raise NotImplementedError( _('Moving to trash is not implemented for S3')) bucket_name, key_name = s3.parse_uri(path)[:2] if bucket_name and not key_name: raise NotImplementedError( _('Deleting a bucket is not implemented for S3')) key = self._get_key(path, validate=False) if key.exists(): to_delete = iter([key]) else: to_delete = iter([]) if self.isdir(path): # add `/` to prevent removing of `s3://b/a_new` trying to remove `s3://b/a` prefix = self._append_separator(key.name) keys = key.bucket.list(prefix=prefix) to_delete = itertools.chain(keys, to_delete) result = key.bucket.delete_keys(to_delete) if result.errors: msg = "%d errors occurred during deleting '%s':\n%s" % (len( result.errors), '\n'.join(map(repr, result.errors))) LOG.error(msg) raise IOError(msg)
def test_full_copy(self): src_path = self.get_test_path('test_full_copy_src') dst_path = self.get_test_path('test_full_copy_dst') src_file_path = join(src_path, 'file.txt') dst_file_path = join(dst_path, 'file.txt') with self.cleaning(src_path, dst_path): self.fs.mkdir(src_path) self.fs.mkdir(dst_path) data = "To boldly go where no one has gone before\n" * 2000 self.fs.create(src_file_path, data=data) # File to directory copy. self.fs.copy(src_file_path, dst_path) assert_true(self.fs.exists(dst_file_path)) # Directory to directory copy. self.fs.copy(src_path, dst_path, True) base_name = parse_uri(src_path)[2] dst_folder_path = join(dst_path, base_name) assert_true(self.fs.exists(dst_folder_path)) assert_true(self.fs.exists(join(dst_folder_path, 'file.txt'))) # Copy directory to file should fail. assert_raises(S3FileSystemException, self.fs.copy, src_path, dst_file_path, True)
def test_full_copy(self): src_path = self.get_test_path('test_full_copy_src') dst_path = self.get_test_path('test_full_copy_dst') src_file_path = join(src_path, 'file.txt') dst_file_path = join(dst_path, 'file.txt') with self.cleaning(src_path, dst_path): self.fs.mkdir(src_path) self.fs.mkdir(dst_path) data = "To boldly go where no one has gone before\n" * 2000 self.fs.create(src_file_path, data=data) # File to directory copy. self.fs.copy(src_file_path, dst_path) assert_true(self.fs.exists(dst_file_path)) # Directory to directory copy. self.fs.copy(src_path, dst_path, True) base_name = parse_uri(src_path)[2] dst_folder_path = join(dst_path, base_name) assert_true(self.fs.exists(dst_folder_path)) assert_true(self.fs.exists(join(dst_folder_path, 'file.txt'))) # Copy directory to file should fail. assert_raises(IOError, self.fs.copy, src_path, dst_file_path, True)
def rmtree(self, path, skipTrash=True): if not skipTrash: raise NotImplementedError( _('Moving to trash is not implemented for S3')) bucket_name, key_name = s3.parse_uri(path)[:2] if bucket_name and not key_name: self._delete_bucket(bucket_name) else: key = self._get_key(path, validate=False) if key.exists(): to_delete = iter([key]) else: to_delete = iter([]) if self.isdir(path): # add `/` to prevent removing of `s3://b/a_new` trying to remove `s3://b/a` prefix = self._append_separator(key.name) keys = key.bucket.list(prefix=prefix) to_delete = itertools.chain(keys, to_delete) result = key.bucket.delete_keys(to_delete) if result.errors: msg = "%d errors occurred while attempting to delete the following S3 paths:\n%s" % ( len(result.errors), '\n'.join([ '%s: %s' % (error.key, error.message) for error in result.errors ])) LOG.error(msg) raise S3FileSystemException(msg)
def _get_key(self, path, validate=True): bucket_name, key_name = s3.parse_uri(path)[:2] bucket = self._get_bucket(bucket_name) try: return bucket.get_key(key_name, validate=validate) except: e, exc, tb = sys.exc_info() raise ValueError(e)
def check_access(self, path, permission='READ'): permission = permission.upper() bucket_name, key_name = s3.parse_uri(path)[:2] bucket = self._get_bucket(bucket_name) acp = bucket.get_acl() for grant in acp.acl.grants: if grant.permission == permission or grant.permission == 'FULL_CONTROL': # TODO: Check grant.uri for user list too return True return False
def _get_key(self, path, validate=True): bucket_name, key_name = s3.parse_uri(path)[:2] bucket = self._get_bucket(bucket_name) try: return bucket.get_key(key_name, validate=validate) except S3ResponseError, e: if e.status == 301: raise S3FileSystemException(_('Failed to access path: "%s" ' 'Check that you have access to read this bucket and that the region is correct.') % path) else: raise S3FileSystemException(e.message or e.reason)
def parent_path(path): parent_dir = S3FileSystem._append_separator(path) if not s3.is_root(parent_dir): bucket_name, key_name, basename = s3.parse_uri(path) if not basename: # bucket is top-level so return root parent_dir = S3_ROOT else: bucket_path = '%s%s' % (S3_ROOT, bucket_name) key_path = '/'.join(key_name.split('/')[:-1]) parent_dir = s3.abspath(bucket_path, key_path) return parent_dir
def parent_path(path): parent_dir = S3FileSystem._append_separator(path) if not s3.is_root(parent_dir): bucket_name, key_name, basename = s3.parse_uri(path) if not basename: # bucket is top-level so return root parent_dir = S3A_ROOT else: bucket_path = '%s%s' % (S3A_ROOT, bucket_name) key_path = '/'.join(key_name.split('/')[:-1]) parent_dir = s3.abspath(bucket_path, key_path) return parent_dir
def mkdir(self, path, *args, **kwargs): """ Creates a directory and any parent directory if necessary. Actually it creates an empty object: s3://[bucket]/[path]/ """ bucket_name, key_name = s3.parse_uri(path)[:2] if not BUCKET_NAME_PATTERN.match(bucket_name): raise S3FileSystemException(_('Invalid bucket name: %s') % bucket_name) try: self._get_or_create_bucket(bucket_name) except S3FileSystemException, e: raise e
def mkdir(self, path, *args, **kwargs): """ Creates a directory and any parent directory if necessary. Actually it creates an empty object: s3://[bucket]/[path]/ """ bucket_name, key_name = s3.parse_uri(path)[:2] if not BUCKET_NAME_PATTERN.match(bucket_name): raise S3FileSystemException(_('Invalid bucket name: %s') % bucket_name) try: self._get_or_create_bucket(bucket_name) except S3ResponseError, e: raise S3FileSystemException(_('Failed to create S3 bucket "%s": %s') % (bucket_name, e.reason))
def _get_key(self, path, validate=True): bucket_name, key_name = s3.parse_uri(path)[:2] bucket = self._get_bucket(bucket_name) try: return bucket.get_key(key_name, validate=validate) except BotoClientError as e: raise S3FileSystemException(_('Failed to access path at "%s": %s') % (path, e.reason)) except S3ResponseError as e: if e.status in (301, 400): raise S3FileSystemException(_('Failed to access path: "%s" ' 'Check that you have access to read this bucket and that the region is correct: %s') % (path, e.message or e.reason)) elif e.status == 403: raise S3FileSystemException(_('User is not authorized to access path at "%s".' % path)) else: raise S3FileSystemException(e.message or e.reason)
def __init__(self, request): super(S3FileUploadHandler, self).__init__(request) self.chunk_size = DEFAULT_WRITE_SIZE self.destination = request.GET.get('dest', None) # GET param avoids infinite looping self.target_path = None self.file = None self._request = request self._fs = self._get_s3fs(request) self._mp = None self._part_num = 1 if self._is_s3_upload(): self.bucket_name, self.key_name = parse_uri(self.destination)[:2] # Verify that the path exists self._fs._stats(self.destination) self._bucket = self._fs._get_bucket(self.bucket_name)
def mkdir(self, path, *args, **kwargs): """ Creates a directory and any parent directory if necessary. Actually it creates an empty object: s3://[bucket]/[path]/ """ bucket_name, key_name = s3.parse_uri(path)[:2] self._get_or_create_bucket(bucket_name) stats = self._stats(path) if stats: if stats.isDir: return None else: raise IOError(errno.ENOTDIR, "'%s' already exists and is not a directory" % path) path = self._append_separator(path) # folder-key should ends by / self.create(path) # create empty object
def mkdir(self, path, *args, **kwargs): """ Creates a directory and any parent directory if necessary. Actually it creates an empty object: s3://[bucket]/[path]/ """ bucket_name, key_name = s3.parse_uri(path)[:2] self._get_or_create_bucket(bucket_name) stats = self._stats(path) if stats: if stats.isDir: return None else: raise IOError( errno.ENOTDIR, "'%s' already exists and is not a directory" % path) path = self._append_separator(path) # folder-key should ends by / self.create(path) # create empty object
def __init__(self, request): super(S3FileUploadHandler, self).__init__(request) self.chunk_size = DEFAULT_WRITE_SIZE self.destination = request.GET.get('dest', None) # GET param avoids infinite looping self.target_path = None self.file = None self._request = request self._fs = self._get_s3fs(request) self._mp = None self._part_num = 1 if self._is_s3_upload(): # Check access permissions before attempting upload if not self._fs.check_access(self.destination, permission='WRITE'): raise S3FileUploadError(_('Insufficient permissions to write to S3 path "%s".') % self.destination) self.bucket_name, self.key_name = parse_uri(self.destination)[:2] # Verify that the path exists self._fs._stats(self.destination) self._bucket = self._fs._get_bucket(self.bucket_name)
def listdir_stats(self, path, glob=None): if glob is not None: raise NotImplementedError(_("Option `glob` is not implemented")) if s3.is_root(path): self._init_bucket_cache() return sorted([S3Stat.from_bucket(b) for b in self._bucket_cache.values()], key=lambda x: x.name) bucket_name, prefix = s3.parse_uri(path)[:2] bucket = self._get_bucket(bucket_name) prefix = self._append_separator(prefix) res = [] for item in bucket.list(prefix=prefix, delimiter='/'): if isinstance(item, Prefix): res.append(S3Stat.from_key(Key(item.bucket, item.name), is_dir=True)) else: if item.name == prefix: continue res.append(self._stats_key(item)) return res
def listdir_stats(self, path, glob=None): if glob is not None: raise NotImplementedError(_("Option `glob` is not implemented")) if s3.is_root(path): self._init_bucket_cache() return [S3Stat.from_bucket(b) for b in self._bucket_cache.values()] bucket_name, prefix = s3.parse_uri(path)[:2] bucket = self._get_bucket(bucket_name) prefix = self._append_separator(prefix) res = [] for item in bucket.list(prefix=prefix, delimiter='/'): if isinstance(item, Prefix): res.append(S3Stat.from_key(Key(item.bucket, item.name), is_dir=True)) else: if item.name == prefix: continue res.append(self._stats_key(item)) return res
def __init__(self, request): super(S3FileUploadHandler, self).__init__(request) self.chunk_size = DEFAULT_WRITE_SIZE self.destination = request.GET.get( 'dest', None) # GET param avoids infinite looping self.target_path = None self.file = None self._request = request self._fs = self._get_s3fs(request) self._mp = None self._part_num = 1 if self._is_s3_upload(): # Check access permissions before attempting upload if not self._fs.check_access(self.destination, permission='WRITE'): raise S3FileUploadError( _('Insufficient permissions to write to S3 path "%s".') % self.destination) self.bucket_name, self.key_name = parse_uri(self.destination)[:2] # Verify that the path exists self._fs._stats(self.destination) self._bucket = self._fs._get_bucket(self.bucket_name)
def listdir(self, path, glob=None): return [ s3.parse_uri(x.path)[2] for x in self.listdir_stats(path, glob) ]
def get_key(cls, path, validate=False): bucket_name, key_name = parse_uri(path)[:2] bucket = cls.s3_connection.get_bucket(bucket_name) return bucket.get_key(key_name, validate=validate)
try: return sorted([ S3Stat.from_bucket(b) for b in self._s3_connection.get_all_buckets() ], key=lambda x: x.name) except S3FileSystemException, e: raise e except S3ResponseError, e: raise S3FileSystemException( _('Failed to retrieve buckets: %s') % e.reason) except Exception, e: raise S3FileSystemException( _('Failed to retrieve buckets: %s') % e) bucket_name, prefix = s3.parse_uri(path)[:2] bucket = self._get_bucket(bucket_name) prefix = self._append_separator(prefix) res = [] for item in bucket.list(prefix=prefix, delimiter='/'): if isinstance(item, Prefix): res.append( S3Stat.from_key(Key(item.bucket, item.name), is_dir=True)) else: if item.name == prefix: continue res.append(self._stats_key(item)) return res @translate_s3_error def listdir(self, path, glob=None):
def listdir(self, path, glob=None): return [s3.parse_uri(x.path)[2] for x in self.listdir_stats(path, glob)]