def _path_open(self, path, mode, mime_type='application/octet-stream', compression_type=CompressionTypes.AUTO): """Helper functions to open a file in the provided mode. """ compression_type = FileSystem._get_compression_type(path, compression_type) mime_type = CompressionTypes.mime_type(compression_type, mime_type) raw_file = s3io.S3IO().open(path, mode, mime_type=mime_type) if compression_type == CompressionTypes.UNCOMPRESSED: return raw_file return CompressedFile(raw_file, compression_type=compression_type)
def setUp(self): # These tests can be run locally against a mock S3 client, or as integration # tests against the real S3 client. self.USE_MOCK = True # If you're running integration tests with S3, set this variable to be an # s3 path that you have access to where test data can be written. If you're # just running tests against the mock, this can be any s3 path. It should # end with a '/'. self.TEST_DATA_PATH = 's3://random-data-sets/beam_tests/' if self.USE_MOCK: self.client = fake_client.FakeS3Client() test_data_bucket, _ = s3io.parse_s3_path(self.TEST_DATA_PATH) self.client.known_buckets.add(test_data_bucket) self.aws = s3io.S3IO(self.client) else: self.aws = s3io.S3IO() self.client = self.aws.client
def exists(self, path): """Check if the provided path exists on the FileSystem. Args: path: string path that needs to be checked. Returns: boolean flag indicating if path exists """ try: return s3io.S3IO(options=self._options).exists(path) except Exception as e: # pylint: disable=broad-except raise BeamIOError("exists() operation failed", {path: e})
def delete(self, paths): """Deletes files or directories at the provided paths. Directories will be deleted recursively. Args: paths: list of paths that give the file objects to be deleted """ results = s3io.S3IO().delete_paths(paths) exceptions = {path: error for (path, error) in results.items() if error is not None} if exceptions: raise BeamIOError("Delete operation failed", exceptions)
def last_updated(self, path): """Get UNIX Epoch time in seconds on the FileSystem. Args: path: string path of file. Returns: float UNIX Epoch time Raises: ``BeamIOError``: if path doesn't exist. """ try: return s3io.S3IO(options=self._options).last_updated(path) except Exception as e: # pylint: disable=broad-except raise BeamIOError("last_updated operation failed", {path: e})
def size(self, path): """Get size of path on the FileSystem. Args: path: string path in question. Returns: int size of path according to the FileSystem. Raises: ``BeamIOError``: if path doesn't exist. """ try: return s3io.S3IO(options=self._options).size(path) except Exception as e: # pylint: disable=broad-except raise BeamIOError("size() operation failed", {path: e})
def copy(self, source_file_names, destination_file_names): """Recursively copy the file tree from the source to the destination Args: source_file_names: list of source file objects that needs to be copied destination_file_names: list of destination of the new object Raises: ``BeamIOError``: if any of the copy operations fail """ if not len(source_file_names) == len(destination_file_names): message = 'Unable to copy unequal number of sources and destinations' raise BeamIOError(message) src_dest_pairs = list(zip(source_file_names, destination_file_names)) return s3io.S3IO(options=self._options).copy_paths(src_dest_pairs)
def checksum(self, path): """Fetch checksum metadata of a file on the :class:`~apache_beam.io.filesystem.FileSystem`. Args: path: string path of a file. Returns: string containing checksum Raises: ``BeamIOError``: if path isn't a file or doesn't exist. """ try: return s3io.S3IO(options=self._options).checksum(path) except Exception as e: # pylint: disable=broad-except raise BeamIOError("Checksum operation failed", {path: e})
def metadata(self, path): """Fetch metadata fields of a file on the FileSystem. Args: path: string path of a file. Returns: :class:`~apache_beam.io.filesystem.FileMetadata`. Raises: ``BeamIOError``: if path isn't a file or doesn't exist. """ try: file_metadata = s3io.S3IO(options=self._options)._status(path) return FileMetadata(path, file_metadata['size'], file_metadata['last_updated']) except Exception as e: # pylint: disable=broad-except raise BeamIOError("Metadata operation failed", {path: e})
def rename(self, source_file_names, destination_file_names): """Rename the files at the source list to the destination list. Source and destination lists should be of the same size. Args: source_file_names: List of file paths that need to be moved destination_file_names: List of destination_file_names for the files Raises: ``BeamIOError``: if any of the rename operations fail """ if not len(source_file_names) == len(destination_file_names): message = 'Unable to rename unequal number of sources and destinations' raise BeamIOError(message) src_dest_pairs = list(zip(source_file_names, destination_file_names)) results = s3io.S3IO(options=self._options).rename_files(src_dest_pairs) exceptions = {(src, dest): error for (src, dest, error) in results if error is not None} if exceptions: raise BeamIOError("Rename operation failed", exceptions)
def _list(self, dir_or_prefix): """List files in a location. Listing is non-recursive, for filesystems that support directories. Args: dir_or_prefix: (string) A directory or location prefix (for filesystems that don't have directories). Returns: Generator of ``FileMetadata`` objects. Raises: ``BeamIOError``: if listing fails, but not if no files were found. """ try: for path, size in iteritems(s3io.S3IO().list_prefix(dir_or_prefix)): yield FileMetadata(path, size) except Exception as e: # pylint: disable=broad-except raise BeamIOError("List operation failed", {dir_or_prefix: e})