class TransferManager(object): ALLOWED_DOWNLOAD_ARGS = ALLOWED_DOWNLOAD_ARGS ALLOWED_UPLOAD_ARGS = [ 'ACL', 'CacheControl', 'ContentDisposition', 'ContentEncoding', 'ContentLanguage', 'ContentType', 'Expires', 'GrantFullControl', 'GrantRead', 'GrantReadACP', 'GrantWriteACP', 'Metadata', 'RequestPayer', 'ServerSideEncryption', 'StorageClass', 'SSECustomerAlgorithm', 'SSECustomerKey', 'SSECustomerKeyMD5', 'SSEKMSKeyId', 'WebsiteRedirectLocation', 'RetentionExpirationDate', 'RetentionLegalHoldId', 'RetentionPeriod', ] ALLOWED_COPY_ARGS = ALLOWED_UPLOAD_ARGS + [ 'CopySourceIfMatch', 'CopySourceIfModifiedSince', 'CopySourceIfNoneMatch', 'CopySourceIfUnmodifiedSince', 'CopySourceSSECustomerAlgorithm', 'CopySourceSSECustomerKey', 'CopySourceSSECustomerKeyMD5', 'MetadataDirective' ] ALLOWED_DELETE_ARGS = [ 'MFA', 'VersionId', 'RequestPayer', ] def __init__(self, client, config=None, osutil=None, executor_cls=None): """A transfer manager interface for Amazon S3 :param client: Client to be used by the manager :param config: TransferConfig to associate specific configurations :param osutil: OSUtils object to use for os-related behavior when using with transfer manager. :type executor_cls: ibm_s3transfer.futures.BaseExecutor :param executor_cls: The class of executor to use with the transfer manager. By default, concurrent.futures.ThreadPoolExecutor is used. """ self._client = client self._config = config if config is None: self._config = TransferConfig() self._osutil = osutil if osutil is None: self._osutil = OSUtils() self._coordinator_controller = TransferCoordinatorController() # A counter to create unique id's for each transfer submitted. self._id_counter = 0 # The executor responsible for making S3 API transfer requests self._request_executor = BoundedExecutor( max_size=self._config.max_request_queue_size, max_num_threads=self._config.max_request_concurrency, tag_semaphores={ IN_MEMORY_UPLOAD_TAG: TaskSemaphore(self._config.max_in_memory_upload_chunks), IN_MEMORY_DOWNLOAD_TAG: SlidingWindowSemaphore( self._config.max_in_memory_download_chunks) }, executor_cls=executor_cls) # The executor responsible for submitting the necessary tasks to # perform the desired transfer self._submission_executor = BoundedExecutor( max_size=self._config.max_submission_queue_size, max_num_threads=self._config.max_submission_concurrency, executor_cls=executor_cls) # There is one thread available for writing to disk. It will handle # downloads for all files. self._io_executor = BoundedExecutor( max_size=self._config.max_io_queue_size, max_num_threads=1, executor_cls=executor_cls) # The component responsible for limiting bandwidth usage if it # is configured. self._bandwidth_limiter = None if self._config.max_bandwidth is not None: logger.debug('Setting max_bandwidth to %s', self._config.max_bandwidth) leaky_bucket = LeakyBucket(self._config.max_bandwidth) self._bandwidth_limiter = BandwidthLimiter(leaky_bucket) self._register_handlers() def upload(self, fileobj, bucket, key, extra_args=None, subscribers=None): """Uploads a file to S3 :type fileobj: str or seekable file-like object :param fileobj: The name of a file to upload or a seekable file-like object to upload. It is recommended to use a filename because file-like objects may result in higher memory usage. :type bucket: str :param bucket: The name of the bucket to upload to :type key: str :param key: The name of the key to upload to :type extra_args: dict :param extra_args: Extra arguments that may be passed to the client operation :type subscribers: list(ibm_s3transfer.subscribers.BaseSubscriber) :param subscribers: The list of subscribers to be invoked in the order provided based on the event emit during the process of the transfer request. :rtype: ibm_s3transfer.futures.TransferFuture :returns: Transfer future representing the upload """ if extra_args is None: extra_args = {} if subscribers is None: subscribers = [] self._validate_all_known_args(extra_args, self.ALLOWED_UPLOAD_ARGS) call_args = CallArgs(fileobj=fileobj, bucket=bucket, key=key, extra_args=extra_args, subscribers=subscribers) extra_main_kwargs = {} if self._bandwidth_limiter: extra_main_kwargs['bandwidth_limiter'] = self._bandwidth_limiter return self._submit_transfer(call_args, UploadSubmissionTask, extra_main_kwargs) def download(self, bucket, key, fileobj, extra_args=None, subscribers=None): """Downloads a file from S3 :type bucket: str :param bucket: The name of the bucket to download from :type key: str :param key: The name of the key to download from :type fileobj: str or seekable file-like object :param fileobj: The name of a file to download or a seekable file-like object to download. It is recommended to use a filename because file-like objects may result in higher memory usage. :type extra_args: dict :param extra_args: Extra arguments that may be passed to the client operation :type subscribers: list(ibm_s3transfer.subscribers.BaseSubscriber) :param subscribers: The list of subscribers to be invoked in the order provided based on the event emit during the process of the transfer request. :rtype: ibm_s3transfer.futures.TransferFuture :returns: Transfer future representing the download """ if extra_args is None: extra_args = {} if subscribers is None: subscribers = [] self._validate_all_known_args(extra_args, self.ALLOWED_DOWNLOAD_ARGS) call_args = CallArgs(bucket=bucket, key=key, fileobj=fileobj, extra_args=extra_args, subscribers=subscribers) extra_main_kwargs = {'io_executor': self._io_executor} if self._bandwidth_limiter: extra_main_kwargs['bandwidth_limiter'] = self._bandwidth_limiter return self._submit_transfer(call_args, DownloadSubmissionTask, extra_main_kwargs) def copy(self, copy_source, bucket, key, extra_args=None, subscribers=None, source_client=None): """Copies a file in S3 :type copy_source: dict :param copy_source: The name of the source bucket, key name of the source object, and optional version ID of the source object. The dictionary format is: ``{'Bucket': 'bucket', 'Key': 'key', 'VersionId': 'id'}``. Note that the ``VersionId`` key is optional and may be omitted. :type bucket: str :param bucket: The name of the bucket to copy to :type key: str :param key: The name of the key to copy to :type extra_args: dict :param extra_args: Extra arguments that may be passed to the client operation :type subscribers: a list of subscribers :param subscribers: The list of subscribers to be invoked in the order provided based on the event emit during the process of the transfer request. :type source_client: ibm_botocore or ibm_boto3 Client :param source_client: The client to be used for operation that may happen at the source object. For example, this client is used for the head_object that determines the size of the copy. If no client is provided, the transfer manager's client is used as the client for the source object. :rtype: ibm_s3transfer.futures.TransferFuture :returns: Transfer future representing the copy """ if extra_args is None: extra_args = {} if subscribers is None: subscribers = [] if source_client is None: source_client = self._client self._validate_all_known_args(extra_args, self.ALLOWED_COPY_ARGS) call_args = CallArgs(copy_source=copy_source, bucket=bucket, key=key, extra_args=extra_args, subscribers=subscribers, source_client=source_client) return self._submit_transfer(call_args, CopySubmissionTask) def delete(self, bucket, key, extra_args=None, subscribers=None): """Delete an S3 object. :type bucket: str :param bucket: The name of the bucket. :type key: str :param key: The name of the S3 object to delete. :type extra_args: dict :param extra_args: Extra arguments that may be passed to the DeleteObject call. :type subscribers: list :param subscribers: A list of subscribers to be invoked during the process of the transfer request. Note that the ``on_progress`` callback is not invoked during object deletion. :rtype: ibm_s3transfer.futures.TransferFuture :return: Transfer future representing the deletion. """ if extra_args is None: extra_args = {} if subscribers is None: subscribers = [] self._validate_all_known_args(extra_args, self.ALLOWED_DELETE_ARGS) call_args = CallArgs(bucket=bucket, key=key, extra_args=extra_args, subscribers=subscribers) return self._submit_transfer(call_args, DeleteSubmissionTask) def _validate_all_known_args(self, actual, allowed): for kwarg in actual: if kwarg not in allowed: raise ValueError("Invalid extra_args key '%s', " "must be one of: %s" % (kwarg, ', '.join(allowed))) def _submit_transfer(self, call_args, submission_task_cls, extra_main_kwargs=None): if not extra_main_kwargs: extra_main_kwargs = {} # Create a TransferFuture to return back to the user transfer_future, components = self._get_future_with_components( call_args) # Add any provided done callbacks to the created transfer future # to be invoked on the transfer future being complete. for callback in get_callbacks(transfer_future, 'done'): components['coordinator'].add_done_callback(callback) # Get the main kwargs needed to instantiate the submission task main_kwargs = self._get_submission_task_main_kwargs( transfer_future, extra_main_kwargs) # Submit a SubmissionTask that will submit all of the necessary # tasks needed to complete the S3 transfer. self._submission_executor.submit( submission_task_cls(transfer_coordinator=components['coordinator'], main_kwargs=main_kwargs)) # Increment the unique id counter for future transfer requests self._id_counter += 1 return transfer_future def _get_future_with_components(self, call_args): transfer_id = self._id_counter # Creates a new transfer future along with its components transfer_coordinator = TransferCoordinator(transfer_id=transfer_id) # Track the transfer coordinator for transfers to manage. self._coordinator_controller.add_transfer_coordinator( transfer_coordinator) # Also make sure that the transfer coordinator is removed once # the transfer completes so it does not stick around in memory. transfer_coordinator.add_done_callback( self._coordinator_controller.remove_transfer_coordinator, transfer_coordinator) components = { 'meta': TransferMeta(call_args, transfer_id=transfer_id), 'coordinator': transfer_coordinator } transfer_future = TransferFuture(**components) return transfer_future, components def _get_submission_task_main_kwargs(self, transfer_future, extra_main_kwargs): main_kwargs = { 'client': self._client, 'config': self._config, 'osutil': self._osutil, 'request_executor': self._request_executor, 'transfer_future': transfer_future } main_kwargs.update(extra_main_kwargs) return main_kwargs def _register_handlers(self): # Register handlers to enable/disable callbacks on uploads. event_name = 'request-created.s3' self._client.meta.events.register_first( event_name, signal_not_transferring, unique_id='s3upload-not-transferring') self._client.meta.events.register_last( event_name, signal_transferring, unique_id='s3upload-transferring') def __enter__(self): return self def __exit__(self, exc_type, exc_value, *args): cancel = False cancel_msg = '' cancel_exc_type = FatalError # If a exception was raised in the context handler, signal to cancel # all of the inprogress futures in the shutdown. if exc_type: cancel = True cancel_msg = six.text_type(exc_value) if not cancel_msg: cancel_msg = repr(exc_value) # If it was a KeyboardInterrupt, the cancellation was initiated # by the user. if isinstance(exc_value, KeyboardInterrupt): cancel_exc_type = CancelledError self._shutdown(cancel, cancel_msg, cancel_exc_type) def shutdown(self, cancel=False, cancel_msg=''): """Shutdown the TransferManager It will wait till all transfers complete before it completely shuts down. :type cancel: boolean :param cancel: If True, calls TransferFuture.cancel() for all in-progress in transfers. This is useful if you want the shutdown to happen quicker. :type cancel_msg: str :param cancel_msg: The message to specify if canceling all in-progress transfers. """ self._shutdown(cancel, cancel, cancel_msg) def _shutdown(self, cancel, cancel_msg, exc_type=CancelledError): if cancel: # Cancel all in-flight transfers if requested, before waiting # for them to complete. self._coordinator_controller.cancel(cancel_msg, exc_type) try: # Wait until there are no more in-progress transfers. This is # wrapped in a try statement because this can be interrupted # with a KeyboardInterrupt that needs to be caught. self._coordinator_controller.wait() except KeyboardInterrupt: # If not errors were raised in the try block, the cancel should # have no coordinators it needs to run cancel on. If there was # an error raised in the try statement we want to cancel all of # the inflight transfers before shutting down to speed that # process up. self._coordinator_controller.cancel('KeyboardInterrupt()') raise finally: # Shutdown all of the executors. self._submission_executor.shutdown() self._request_executor.shutdown() self._io_executor.shutdown()
def test_replace_underlying_executor(self): mocked_executor_cls = mock.Mock(BaseExecutor) executor = BoundedExecutor(10, 1, {}, mocked_executor_cls) executor.submit(self.get_task(ReturnFooTask)) self.assertTrue(mocked_executor_cls.return_value.submit.called)