Exemplo n.º 1
0
class S3Bucket:
    def __init__(self, bucket, endpoint, id, key, region):
        self.bucket = bucket
        self.service_endpoint = endpoint
        self.aws_access_key_id = id
        self.aws_secret_access_key = key
        self.region_name = region
        self.client = boto3.client(
            's3',
            endpoint_url=self.service_endpoint,
            aws_access_key_id=self.aws_access_key_id,
            aws_secret_access_key=self.aws_secret_access_key,
            region_name=self.region_name)
        self.transfer = TransferManager(self.client, None, None, None)

    def manager_upload(self, file):
        self.transfer.upload(file, self.bucket, file[1:], None, None)

    def upload(self, file, filekey):
        exist = self.client.list_objects(Bucket=self.bucket,
                                         Prefix=filekey[1:])
        if exist:
            return False
        try:
            with open(file, 'rb') as f:
                self.client.upload_fileobj(f, self.bucket, filekey[1:])
        except Exception as ex:
            traceback.print_exc()
            raise
        return True
Exemplo n.º 2
0
    def test_cntrl_c_in_context_manager_cancels_incomplete_transfers(self):
        # The purpose of this test is to make sure if an error is raised
        # in the body of the context manager, incomplete transfers will
        # be cancelled with value of the exception wrapped by a CancelledError

        # NOTE: The fact that delete() was chosen to test this is arbitrary
        # other than it is the easiet to set up for the stubber.
        # The specific operation is not important to the purpose of this test.
        num_transfers = 100
        futures = []

        for _ in range(num_transfers):
            self.stubber.add_response('delete_object', {})

        manager = TransferManager(
            self.client,
            TransferConfig(
                max_request_concurrency=1, max_submission_concurrency=1)
        )
        try:
            with manager:
                for i in range(num_transfers):
                    futures.append(manager.delete('mybucket', 'mykey'))
                raise KeyboardInterrupt()
        except KeyboardInterrupt:
            # At least one of the submitted futures should have been
            # cancelled.
            with self.assertRaisesRegexp(
                    CancelledError, 'KeyboardInterrupt()'):
                for future in futures:
                    future.result()
Exemplo n.º 3
0
 def test_enable_disable_callbacks_only_ever_registered_once(self):
     body = CallbackEnablingBody()
     request = create_request_object({
         'method': 'PUT',
         'url': 'https://s3.amazonaws.com',
         'body': body,
         'headers': {},
         'context': {}
     })
     # Create two TransferManager's using the same client
     TransferManager(self.client)
     TransferManager(self.client)
     self.client.meta.events.emit('request-created.s3',
                                  request=request,
                                  operation_name='PutObject')
     # The client should have only have the enable/disable callback
     # handlers registered once depite being used for two different
     # TransferManagers.
     self.assertEqual(
         body.enable_callback_call_count, 1,
         'The enable_callback() should have only ever been registered once')
     self.assertEqual(
         body.disable_callback_call_count, 1,
         'The disable_callback() should have only ever been registered '
         'once')
Exemplo n.º 4
0
    def test_cntrl_c_in_context_manager_cancels_incomplete_transfers(self):
        # The purpose of this test is to make sure if an error is raised
        # in the body of the context manager, incomplete transfers will
        # be cancelled with value of the exception wrapped by a CancelledError

        # NOTE: The fact that delete() was chosen to test this is arbitrary
        # other than it is the easiet to set up for the stubber.
        # The specific operation is not important to the purpose of this test.
        num_transfers = 100
        futures = []

        for _ in range(num_transfers):
            self.stubber.add_response('delete_object', {})

        manager = TransferManager(
            self.client,
            TransferConfig(max_request_concurrency=1,
                           max_submission_concurrency=1))
        try:
            with manager:
                for i in range(num_transfers):
                    futures.append(manager.delete('mybucket', 'mykey'))
                raise KeyboardInterrupt()
        except KeyboardInterrupt:
            # At least one of the submitted futures should have been
            # cancelled.
            with self.assertRaisesRegexp(CancelledError,
                                         'KeyboardInterrupt()'):
                for future in futures:
                    future.result()
Exemplo n.º 5
0
 def test_can_disable_bucket_validation(self):
     s3_object_lambda_arn = (
         'arn:aws:s3-object-lambda:us-west-2:123456789012:'
         'accesspoint:my-accesspoint')
     config = TransferConfig()
     manager = TransferManager(self.client, config)
     manager.VALIDATE_SUPPORTED_BUCKET_VALUES = False
     manager.delete(s3_object_lambda_arn, 'my-key')
Exemplo n.º 6
0
class TestDeleteObject(BaseGeneralInterfaceTest):

    __test__ = True

    def setUp(self):
        super(TestDeleteObject, self).setUp()
        self.bucket = 'mybucket'
        self.key = 'mykey'
        self.manager = TransferManager(self.client)

    @property
    def method(self):
        """The transfer manager method to invoke i.e. upload()"""
        return self.manager.delete

    def create_call_kwargs(self):
        """The kwargs to be passed to the transfer manager method"""
        return {
            'bucket': self.bucket,
            'key': self.key,
        }

    def create_invalid_extra_args(self):
        return {
            'BadKwargs': True,
        }

    def create_stubbed_responses(self):
        """A list of stubbed responses that will cause the request to succeed

        The elements of this list is a dictionary that will be used as key
        word arguments to botocore.Stubber.add_response(). For example::

            [{'method': 'put_object', 'service_response': {}}]
        """
        return [{
            'method': 'delete_object',
            'service_response': {},
            'expected_params': {'Bucket': self.bucket, 'Key': self.key},
        }]

    def create_expected_progress_callback_info(self):
        return []

    def test_known_allowed_args_in_input_shape(self):
        op_model = self.client.meta.service_model.operation_model(
            'DeleteObject')
        for allowed_arg in self.manager.ALLOWED_DELETE_ARGS:
            self.assertIn(allowed_arg, op_model.input_shape.members)

    def test_raise_exception_on_s3_object_lambda_resource(self):
        s3_object_lambda_arn = (
            'arn:aws:s3-object-lambda:us-west-2:123456789012:'
            'accesspoint:my-accesspoint'
        )
        with self.assertRaisesRegexp(ValueError, 'methods do not support'):
            self.manager.delete(s3_object_lambda_arn, self.key)
Exemplo n.º 7
0
 def __init__(self, bucket, endpoint, id, key, region):
     self.bucket = bucket
     self.service_endpoint = endpoint
     self.aws_access_key_id = id
     self.aws_secret_access_key = key
     self.region_name = region
     self.client = boto3.client(
         's3',
         endpoint_url=self.service_endpoint,
         aws_access_key_id=self.aws_access_key_id,
         aws_secret_access_key=self.aws_secret_access_key,
         region_name=self.region_name)
     self.transfer = TransferManager(self.client, None, None, None)
Exemplo n.º 8
0
    def test_uses_bandwidth_limiter(self):
        self.content = b'a' * 1024 * 1024
        self.stream = six.BytesIO(self.content)
        self.config = TransferConfig(max_request_concurrency=1,
                                     max_bandwidth=len(self.content) / 2)
        self._manager = TransferManager(self.client, self.config)

        self.add_head_object_response()
        self.add_successful_get_object_responses()

        start = time.time()
        future = self.manager.download(self.bucket, self.key, self.filename,
                                       self.extra_args)
        future.result()
        # This is just a smoke test to make sure that the limiter is
        # being used and not necessary its exactness. So we set the maximum
        # bandwidth to len(content)/2 per sec and make sure that it is
        # noticeably slower. Ideally it will take more than two seconds, but
        # given tracking at the beginning of transfers are not entirely
        # accurate setting at the initial start of a transfer, we give us
        # some flexibility by setting the expected time to half of the
        # theoretical time to take.
        self.assertGreaterEqual(time.time() - start, 1)

        # Ensure that the contents are correct
        with open(self.filename, 'rb') as f:
            self.assertEqual(self.content, f.read())
Exemplo n.º 9
0
    def __init__(self,
                 s3_client,
                 bucket_name,
                 prefix=None,
                 kms_key_id=None,
                 force_upload=False,
                 transfer_manager=None):
        self.bucket_name = bucket_name
        self.prefix = prefix
        self.kms_key_id = kms_key_id or None
        self.force_upload = force_upload
        self.s3 = s3_client

        self.transfer_manager = transfer_manager
        if not transfer_manager:
            self.transfer_manager = TransferManager(self.s3)
Exemplo n.º 10
0
    def __call__(self, client, result_queue):
        """Creates a S3TransferHandler instance

        :type client: botocore.client.Client
        :param client: The client to power the S3TransferHandler

        :type result_queue: queue.Queue
        :param result_queue: The result queue to be used to process results
            for the S3TransferHandler

        :returns: A S3TransferHandler instance
        """
        transfer_config = create_transfer_config_from_runtime_config(
            self._runtime_config)
        transfer_config.max_in_memory_upload_chunks = self.MAX_IN_MEMORY_CHUNKS
        transfer_config.max_in_memory_download_chunks = \
            self.MAX_IN_MEMORY_CHUNKS

        transfer_manager = TransferManager(client, transfer_config)

        LOGGER.debug(
            "Using a multipart threshold of %s and a part size of %s",
            transfer_config.multipart_threshold,
            transfer_config.multipart_chunksize
        )
        result_recorder = ResultRecorder()
        result_processor_handlers = [result_recorder]
        self._add_result_printer(result_recorder, result_processor_handlers)
        result_processor = ResultProcessor(
            result_queue, result_processor_handlers)
        command_result_recorder = CommandResultRecorder(
            result_queue, result_recorder, result_processor)

        return S3TransferHandler(
            transfer_manager, self._cli_params, command_result_recorder)
Exemplo n.º 11
0
    def test_limits_in_memory_chunks_for_fileobj(self):
        # Limit the maximum in memory chunks to one but make number of
        # threads more than one. This means that the upload will have to
        # happen sequentially despite having many threads available because
        # data is sequentially partitioned into chunks in memory and since
        # there can only every be one in memory chunk, each upload part will
        # have to happen one at a time.
        self.config.max_request_concurrency = 10
        self.config.max_in_memory_upload_chunks = 1
        self._manager = TransferManager(self.client, self.config)

        # Add some default stubbed responses.
        # These responses are added in order of part number so if the
        # multipart upload is not done sequentially, which it should because
        # we limit the in memory upload chunks to one, the stubber will
        # raise exceptions for mismatching parameters for partNumber when
        # once the upload() method is called on the transfer manager.
        # If there is a mismatch, the stubber error will propogate on
        # the future.result()
        self.add_create_multipart_response_with_default_expected_params()
        self.add_upload_part_responses_with_default_expected_params()
        self.add_complete_multipart_response_with_default_expected_params()
        with open(self.filename, 'rb') as f:
            future = self.manager.upload(f, self.bucket, self.key,
                                         self.extra_args)
            future.result()

        # Make sure that the stubber had all of its stubbed responses consumed.
        self.assert_expected_client_calls_were_correct()
        # Ensure the contents were uploaded in sequentially order by checking
        # the sent contents were in order.
        self.assert_upload_part_bodies_were_correct()
Exemplo n.º 12
0
 def __init__(self, client=None, config=None, osutil=None, manager=None):
     if not client and not manager:
         raise ValueError(
             'Either a boto3.Client or s3transfer.manager.TransferManager '
             'must be provided')
     if manager and any([client, config, osutil]):
         raise ValueError(
             'Manager cannot be provided with client, config, '
             'nor osutil. These parameters are mutually exclusive.')
     if config is None:
         config = TransferConfig()
     if osutil is None:
         osutil = OSUtils()
     if manager:
         self._manager = manager
     else:
         self._manager = TransferManager(client, config, osutil)
Exemplo n.º 13
0
 def setUp(self):
     super(TestMultipartUpload, self).setUp()
     self.chunksize = 4
     self.config = TransferConfig(max_request_concurrency=1,
                                  multipart_threshold=1,
                                  multipart_chunksize=self.chunksize)
     self._manager = TransferManager(self.client, self.config)
     self.multipart_id = 'my-upload-id'
Exemplo n.º 14
0
    def __init__(self, s3_bucket, s3_key, boto3_s3_client=None):
        import boto3
        from s3transfer.manager import TransferManager, TransferConfig

        self.s3_bucket = s3_bucket
        self.s3_key = s3_key
        self.s3_client = boto3_s3_client
        if self.s3_client is None:
            self.s3_client = boto3.client('s3')

        self._internal_queue = BlockingReaderWriterByteStream()
        self._boto3_multipart_upload_workaround_buffer = b''

        self.temp_s3_key = self.s3_key + '-{:0>10}-tmp'.format(
            random.randrange(0, 1e10))

        # don't start the upload until we've written at least
        # boto3.TransferConfig.multipart_threshold bytes
        self._transfer_manager = TransferManager(self.s3_client,
                                                 TransferConfig())
        self._upload_future = None
Exemplo n.º 15
0
    def test_uses_provided_osutil(self):
        osutil = RecordingOSUtils()
        # Use the recording os utility for the transfer manager
        self._manager = TransferManager(self.client, self.config, osutil)

        self.add_put_object_response_with_default_expected_params()

        future = self.manager.upload(self.filename, self.bucket, self.key)
        future.result()

        # The upload should have used the os utility. We check this by making
        # sure that the recorded opens are as expected.
        self.assertEqual(osutil.open_records, [(self.filename, 'rb')])
Exemplo n.º 16
0
    def call(self, files):
        # There is only ever one file in a stream transfer.
        file = files[0]
        if self._manager is not None:
            manager = self._manager
        else:
            manager = TransferManager(file.client, self.config)

        if file.operation_name == 'upload':
            bucket, key = find_bucket_key(file.dest)
            return self._upload(manager, bucket, key)
        elif file.operation_name == 'download':
            bucket, key = find_bucket_key(file.src)
            return self._download(manager, bucket, key)
Exemplo n.º 17
0
    def test_uses_provided_osutil(self):
        osutil = RecordingOSUtils()
        # Use the recording os utility for the transfer manager
        self._manager = TransferManager(self.client, self.config, osutil)

        self.add_head_object_response()
        self.add_successful_get_object_responses()

        future = self.manager.download(**self.create_call_kwargs())
        future.result()
        # The osutil should have had its open() method invoked when opening
        # a temporary file and its rename_file() method invoked when the
        # the temporary file was moved to its final location.
        self.assertEqual(len(osutil.open_records), 1)
        self.assertEqual(len(osutil.rename_records), 1)
Exemplo n.º 18
0
    def __init__(self, s3_client,
                 bucket_name,
                 prefix=None,
                 kms_key_id=None,
                 force_upload=False,
                 transfer_manager=None):
        self.bucket_name = bucket_name
        self.prefix = prefix
        self.kms_key_id = kms_key_id or None
        self.force_upload = force_upload
        self.s3 = s3_client

        self.transfer_manager = transfer_manager
        if not transfer_manager:
            self.transfer_manager = TransferManager(self.s3)
Exemplo n.º 19
0
    def setUp(self):
        super(BaseCopyTest, self).setUp()
        self.config = TransferConfig(max_request_concurrency=1)
        self._manager = TransferManager(self.client, self.config)

        # Initialize some default arguments
        self.bucket = 'mybucket'
        self.key = 'mykey'
        self.copy_source = {
            'Bucket': 'mysourcebucket',
            'Key': 'mysourcekey'
        }
        self.extra_args = {}
        self.subscribers = []

        self.content = b'my content'
Exemplo n.º 20
0
    def setUp(self):
        super(BaseCopyTest, self).setUp()
        self.config = TransferConfig(max_request_concurrency=1,
                                     multipart_chunksize=MIN_UPLOAD_CHUNKSIZE,
                                     multipart_threshold=MIN_UPLOAD_CHUNKSIZE *
                                     4)
        self._manager = TransferManager(self.client, self.config)

        # Initialize some default arguments
        self.bucket = 'mybucket'
        self.key = 'mykey'
        self.copy_source = {'Bucket': 'mysourcebucket', 'Key': 'mysourcekey'}
        self.extra_args = {}
        self.subscribers = []

        self.half_chunksize = int(MIN_UPLOAD_CHUNKSIZE / 2)
        self.content = b'0' * (2 * MIN_UPLOAD_CHUNKSIZE + self.half_chunksize)
Exemplo n.º 21
0
    def test_retry_failure(self):
        self.add_head_object_response()

        max_retries = 3
        self.config.num_download_attempts = max_retries
        self._manager = TransferManager(self.client, self.config)
        # Add responses that fill up the maximum number of retries.
        self.add_n_retryable_get_object_responses(max_retries)

        future = self.manager.download(**self.create_call_kwargs())

        # A retry exceeded error should have happened.
        with self.assertRaises(RetriesExceededError):
            future.result()

        # All of the retries should have been used up.
        self.stubber.assert_no_pending_responses()
Exemplo n.º 22
0
    def setUp(self):
        super(BaseDownloadTest, self).setUp()
        self.config = TransferConfig(max_request_concurrency=1)
        self._manager = TransferManager(self.client, self.config)

        # Create a temporary directory to write to
        self.tempdir = tempfile.mkdtemp()
        self.filename = os.path.join(self.tempdir, 'myfile')

        # Initialize some default arguments
        self.bucket = 'mybucket'
        self.key = 'mykey'
        self.extra_args = {}
        self.subscribers = []

        # Create a stream to read from
        self.content = b'my content'
        self.stream = six.BytesIO(self.content)
Exemplo n.º 23
0
def create_transfer_manager(client, config, osutil=None):
    """Creates a transfer manager based on configuration

    :type client: boto3.client
    :param client: The S3 client to use

    :type config: boto3.s3.transfer.TransferConfig
    :param config: The transfer config to use

    :type osutil: s3transfer.utils.OSUtils
    :param osutil: The os utility to use

    :rtype: s3transfer.manager.TransferManager
    :returns: A transfer manager based on parameters provided
    """
    executor_cls = None
    if not config.use_threads:
        executor_cls = NonThreadedExecutor
    return TransferManager(client, config, osutil, executor_cls)
Exemplo n.º 24
0
    def test_sigv4_progress_callbacks_invoked_once(self):
        # Reset the client and manager to use sigv4
        self.reset_stubber_with_new_client(
            {'config': Config(signature_version='s3v4')})
        self.client.meta.events.register(
            'before-parameter-build.s3.*', self.collect_body)
        self._manager = TransferManager(self.client, self.config)

        # Add the stubbed response.
        self.add_put_object_response_with_default_expected_params()

        subscriber = RecordingSubscriber()
        future = self.manager.upload(
            self.filename, self.bucket, self.key, subscribers=[subscriber])
        future.result()
        self.assert_expected_client_calls_were_correct()

        # The amount of bytes seen should be the same as the file size
        self.assertEqual(subscriber.calculate_bytes_seen(), len(self.content))
Exemplo n.º 25
0
 def __init__(self, client=None, config=None, osutil=None, manager=None):
     if not client and not manager:
         raise ValueError(
             'Either a boto3.Client or s3transfer.manager.TransferManager '
             'must be provided'
         )
     if manager and any([client, config, osutil]):
         raise ValueError(
             'Manager cannot be provided with client, config, '
             'nor osutil. These parameters are mutually exclusive.'
         )
     if config is None:
         config = TransferConfig()
     if osutil is None:
         osutil = OSUtils()
     if manager:
         self._manager = manager
     else:
         self._manager = TransferManager(client, config, osutil)
Exemplo n.º 26
0
    def __call__(
        self, client: S3Client, result_queue: "Queue[Any]"
    ) -> S3TransferHandler:
        """Create a S3TransferHandler instance.

        Args:
            client: The client to power the S3TransferHandler.
            result_queue: The result queue to be used to process results
                for the S3TransferHandler.

        """
        transfer_config = create_transfer_config_from_runtime_config(
            self._runtime_config
        )
        transfer_config.max_in_memory_upload_chunks = self.MAX_IN_MEMORY_CHUNKS
        transfer_config.max_in_memory_download_chunks = self.MAX_IN_MEMORY_CHUNKS

        transfer_manager = TransferManager(client, transfer_config)

        LOGGER.debug(
            "Using a multipart threshold of %s and a part size of %s",
            transfer_config.multipart_threshold,
            transfer_config.multipart_chunksize,
        )
        result_recorder = ResultRecorder()
        result_processor_handlers: List[Any] = [result_recorder]
        self._add_result_printer(result_recorder, result_processor_handlers)
        result_processor = ResultProcessor(
            result_queue=result_queue, result_handlers=result_processor_handlers
        )
        command_result_recorder = CommandResultRecorder(
            result_queue=result_queue,
            result_recorder=result_recorder,
            result_processor=result_processor,
        )

        return S3TransferHandler(
            transfer_manager=transfer_manager,
            config_params=self._config_params,
            result_command_recorder=command_result_recorder,
        )
Exemplo n.º 27
0
    def test_upload_with_bandwidth_limiter(self):
        self.content = b'a' * 1024 * 1024
        with open(self.filename, 'wb') as f:
            f.write(self.content)
        self.config = TransferConfig(max_request_concurrency=1,
                                     max_bandwidth=len(self.content) / 2)
        self._manager = TransferManager(self.client, self.config)

        self.add_put_object_response_with_default_expected_params()
        start = time.time()
        future = self.manager.upload(self.filename, self.bucket, self.key)
        future.result()
        # This is just a smoke test to make sure that the limiter is
        # being used and not necessary its exactness. So we set the maximum
        # bandwidth to len(content)/2 per sec and make sure that it is
        # noticeably slower. Ideally it will take more than two seconds, but
        # given tracking at the beginning of transfers are not entirely
        # accurate setting at the initial start of a transfer, we give us
        # some flexibility by setting the expected time to half of the
        # theoretical time to take.
        self.assertGreaterEqual(time.time() - start, 1)

        self.assert_expected_client_calls_were_correct()
        self.assert_put_object_body_was_correct()
Exemplo n.º 28
0
    def setUp(self):
        super(BaseUploadTest, self).setUp()
        self.config = TransferConfig(max_request_concurrency=1)
        self._manager = TransferManager(self.client, self.config)

        # Create a temporary directory with files to read from
        self.tempdir = tempfile.mkdtemp()
        self.filename = os.path.join(self.tempdir, 'myfile')
        self.content = b'my content'

        with open(self.filename, 'wb') as f:
            f.write(self.content)

        # Initialize some default arguments
        self.bucket = 'mybucket'
        self.key = 'mykey'
        self.extra_args = {}
        self.subscribers = []

        # A list to keep track of all of the bodies sent over the wire
        # and their order.
        self.sent_bodies = []
        self.client.meta.events.register(
            'before-parameter-build.s3.*', self.collect_body)
Exemplo n.º 29
0
    def setUp(self):
        super(BaseUploadTest, self).setUp()
        # TODO: We do not want to use the real MIN_UPLOAD_CHUNKSIZE
        # when we're adjusting parts.
        # This is really wasteful and fails CI builds because self.contents
        # would normally use 10MB+ of memory.
        # Until there's an API to configure this, we're patching this with
        # a min size of 1.  We can't patch MIN_UPLOAD_CHUNKSIZE directly
        # because it's already bound to a default value in the
        # chunksize adjuster.  Instead we need to patch out the
        # chunksize adjuster class.
        self.adjuster_patch = mock.patch('s3transfer.upload.ChunksizeAdjuster',
                                         lambda: ChunksizeAdjuster(min_size=1))
        self.adjuster_patch.start()
        self.config = TransferConfig(max_request_concurrency=1)
        self._manager = TransferManager(self.client, self.config)

        # Create a temporary directory with files to read from
        self.tempdir = tempfile.mkdtemp()
        self.filename = os.path.join(self.tempdir, 'myfile')
        self.content = b'my content'

        with open(self.filename, 'wb') as f:
            f.write(self.content)

        # Initialize some default arguments
        self.bucket = 'mybucket'
        self.key = 'mykey'
        self.extra_args = {}
        self.subscribers = []

        # A list to keep track of all of the bodies sent over the wire
        # and their order.
        self.sent_bodies = []
        self.client.meta.events.register('before-parameter-build.s3.*',
                                         self.collect_body)
Exemplo n.º 30
0
class AtomicRemoteWritableS3File(object):
    """
    An S3 file that writes to a remote temp object on S3; copies to the true key on close.

    This class requires boto3 v1.4.0+ for its non-seekable file object upload ability.

    Useful for performing operations on large S3 objects when you don't have
    sufficient space on local drives.

    Works around AWS S3's multipart transfer size requirements and boto3's
    idiosyncratic implementation that requires an initial buffer size
    larger than the multipart transfer threshold in order to correctly
    select the 'read-until-empty' behavior needed for a streaming upload.
    """
    _boto3_default_multipart_threshold = 8 * 1024 * 1024

    def __init__(self, s3_bucket, s3_key, boto3_s3_client=None):
        import boto3
        from s3transfer.manager import TransferManager, TransferConfig

        self.s3_bucket = s3_bucket
        self.s3_key = s3_key
        self.s3_client = boto3_s3_client
        if self.s3_client is None:
            self.s3_client = boto3.client('s3')

        self._internal_queue = BlockingReaderWriterByteStream()
        self._boto3_multipart_upload_workaround_buffer = b''

        self.temp_s3_key = self.s3_key + '-{:0>10}-tmp'.format(
            random.randrange(0, 1e10))

        # don't start the upload until we've written at least
        # boto3.TransferConfig.multipart_threshold bytes
        self._transfer_manager = TransferManager(self.s3_client,
                                                 TransferConfig())
        self._upload_future = None

    def write(self, some_bytes):
        """
        Writes bytes to S3.

        This method may not be safely called by multiple writers in different threads.
        """
        self._write(some_bytes)

    def _write(self, some_bytes, close_and_flush=False):
        """
        Buffers writes until they're large enough to be safely sent to boto3.
        """
        buffer_write = (len(self._boto3_multipart_upload_workaround_buffer) +
                        len(some_bytes) <
                        self._boto3_default_multipart_threshold)
        self._boto3_multipart_upload_workaround_buffer += some_bytes
        if not buffer_write or close_and_flush:
            self._internal_queue.write(
                self._boto3_multipart_upload_workaround_buffer)
            self._boto3_multipart_upload_workaround_buffer = b''
            if not self._upload_future:
                self._submit_upload()

    def _submit_upload(self):
        self._upload_future = self._transfer_manager.upload(
            fileobj=self._internal_queue,
            bucket=self.s3_bucket,
            key=self.temp_s3_key)

    def close(self):
        """
        Closes the writer, so that it will flush to the reader.

        This method will block until the file has been fully flushed to S3,
        and until it has been properly moved to its final destination.
        """
        self._write(b'', close_and_flush=True)
        self._internal_queue.close()
        self._upload_future.result(
        )  # wait for upload to complete before moving
        self._move_to_final_destination()

    def _move_to_final_destination(self):
        self.s3_client.copy_object(Bucket=self.s3_bucket,
                                   Key=self.s3_key,
                                   CopySource={
                                       'Bucket': self.s3_bucket,
                                       'Key': self.temp_s3_key
                                   })
        self.s3_client.delete_object(Bucket=self.s3_bucket,
                                     Key=self.temp_s3_key)

    def __del__(self):
        self.close()

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        if exc_type:
            self._internal_queue.error('Pipe not properly closed.')
            if self._upload_future:
                self._upload_future.result()
                self.s3_client.delete_object(Bucket=self.s3_bucket,
                                             Key=self.temp_s3_key)
            return
        else:
            self.close()
Exemplo n.º 31
0
 def test_use_custom_executor_implementation(self):
     mocked_executor_cls = mock.Mock(BaseExecutor)
     transfer_manager = TransferManager(self.client,
                                        executor_cls=mocked_executor_cls)
     transfer_manager.delete('bucket', 'key')
     self.assertTrue(mocked_executor_cls.return_value.submit.called)
Exemplo n.º 32
0
class S3Uploader(object):
    """
    Class to upload objects to S3 bucket that use versioning. If bucket
    does not already use versioning, this class will turn on versioning.
    """

    def __init__(self, s3_client,
                 bucket_name,
                 region,
                 prefix=None,
                 kms_key_id=None,
                 force_upload=False,
                 transfer_manager=None):
        self.bucket_name = bucket_name
        self.prefix = prefix
        self.kms_key_id = kms_key_id or None
        self.force_upload = force_upload
        self.s3 = s3_client
        self.region = region

        self.transfer_manager = transfer_manager
        if not transfer_manager:
            self.transfer_manager = TransferManager(self.s3)

    def upload(self, file_name, remote_path):
        """
        Uploads given file to S3
        :param file_name: Path to the file that will be uploaded
        :param remote_path:  be uploaded
        :return: VersionId of the latest upload
        """

        if self.prefix and len(self.prefix) > 0:
            remote_path = "{0}/{1}".format(self.prefix, remote_path)

        # Check if a file with same data exists
        if not self.force_upload and self.file_exists(remote_path):
            LOG.debug("File with same data is already exists at {0}. "
                      "Skipping upload".format(remote_path))
            return self.make_url(remote_path)

        try:

            # Default to regular server-side encryption unless customer has
            # specified their own KMS keys
            additional_args = {
                "ServerSideEncryption": "AES256"
            }

            if self.kms_key_id:
                additional_args["ServerSideEncryption"] = "aws:kms"
                additional_args["SSEKMSKeyId"] = self.kms_key_id

            print_progress_callback = \
                ProgressPercentage(file_name, remote_path)
            future = self.transfer_manager.upload(file_name,
                                                  self.bucket_name,
                                                  remote_path,
                                                  additional_args,
                                                  [print_progress_callback])
            future.result()

            return self.make_url(remote_path)

        except botocore.exceptions.ClientError as ex:
            error_code = ex.response["Error"]["Code"]
            if error_code == "NoSuchBucket":
                raise exceptions.NoSuchBucketError(
                        bucket_name=self.bucket_name)
            raise ex

    def upload_with_dedup(self, file_name, extension=None):
        """
        Makes and returns name of the S3 object based on the file's MD5 sum

        :param file_name: file to upload
        :param extension: String of file extension to append to the object
        :return: S3 URL of the uploaded object
        """

        # This construction of remote_path is critical to preventing duplicate
        # uploads of same object. Uploader will check if the file exists in S3
        # and re-upload only if necessary. So the template points to same file
        # in multiple places, this will upload only once

        filemd5 = self.file_checksum(file_name)
        remote_path = filemd5
        if extension:
            remote_path = remote_path + "." + extension

        return self.upload(file_name, remote_path)

    def file_exists(self, remote_path):
        """
        Check if the file we are trying to upload already exists in S3

        :param remote_path:
        :return: True, if file exists. False, otherwise
        """

        try:
            # Find the object that matches this ETag
            self.s3.head_object(
                Bucket=self.bucket_name, Key=remote_path)
            return True
        except botocore.exceptions.ClientError:
            # Either File does not exist or we are unable to get
            # this information.
            return False

    def make_url(self, obj_path):
        return "s3://{0}/{1}".format(
            self.bucket_name, obj_path)

    def file_checksum(self, file_name):

        with open(file_name, "rb") as file_handle:
            md5 = hashlib.md5()
            # Read file in chunks of 4096 bytes
            block_size = 4096

            # Save current cursor position and reset cursor to start of file
            curpos = file_handle.tell()
            file_handle.seek(0)

            buf = file_handle.read(block_size)
            while len(buf) > 0:
                md5.update(buf)
                buf = file_handle.read(block_size)

            # Restore file cursor's position
            file_handle.seek(curpos)

            return md5.hexdigest()

    def to_path_style_s3_url(self, key, version=None):
        """
            This link describes the format of Path Style URLs
            http://docs.aws.amazon.com/AmazonS3/latest/dev/UsingBucket.html#access-bucket-intro
        """
        base = "https://s3.amazonaws.com"
        if self.region and self.region != "us-east-1":
            base = "https://s3-{0}.amazonaws.com".format(self.region)

        result = "{0}/{1}/{2}".format(base, self.bucket_name, key)
        if version:
            result = "{0}?versionId={1}".format(result, version)

        return result
Exemplo n.º 33
0
class S3Uploader(object):
    """
    Class to upload objects to S3 bucket that use versioning. If bucket
    does not already use versioning, this class will turn on versioning.
    """
    def __init__(self,
                 s3_client,
                 bucket_name,
                 region,
                 prefix=None,
                 kms_key_id=None,
                 force_upload=False,
                 transfer_manager=None):
        self.bucket_name = bucket_name
        self.prefix = prefix
        self.kms_key_id = kms_key_id or None
        self.force_upload = force_upload
        self.s3 = s3_client
        self.region = region

        self.transfer_manager = transfer_manager
        if not transfer_manager:
            self.transfer_manager = TransferManager(self.s3)

    def upload(self, file_name, remote_path):
        """
        Uploads given file to S3
        :param file_name: Path to the file that will be uploaded
        :param remote_path:  be uploaded
        :return: VersionId of the latest upload
        """

        if self.prefix and len(self.prefix) > 0:
            remote_path = "{0}/{1}".format(self.prefix, remote_path)

        # Check if a file with same data exists
        if not self.force_upload and self.file_exists(remote_path):
            LOG.debug("File with same data is already exists at {0}. "
                      "Skipping upload".format(remote_path))
            return self.make_url(remote_path)

        try:

            # Default to regular server-side encryption unless customer has
            # specified their own KMS keys
            additional_args = {"ServerSideEncryption": "AES256"}

            if self.kms_key_id:
                additional_args["ServerSideEncryption"] = "aws:kms"
                additional_args["SSEKMSKeyId"] = self.kms_key_id

            print_progress_callback = \
                ProgressPercentage(file_name, remote_path)
            future = self.transfer_manager.upload(file_name, self.bucket_name,
                                                  remote_path, additional_args,
                                                  [print_progress_callback])
            future.result()

            return self.make_url(remote_path)

        except botocore.exceptions.ClientError as ex:
            error_code = ex.response["Error"]["Code"]
            if error_code == "NoSuchBucket":
                raise exceptions.NoSuchBucketError(
                    bucket_name=self.bucket_name)
            raise ex

    def upload_with_dedup(self, file_name, extension=None):
        """
        Makes and returns name of the S3 object based on the file's MD5 sum

        :param file_name: file to upload
        :param extension: String of file extension to append to the object
        :return: S3 URL of the uploaded object
        """

        # This construction of remote_path is critical to preventing duplicate
        # uploads of same object. Uploader will check if the file exists in S3
        # and re-upload only if necessary. So the template points to same file
        # in multiple places, this will upload only once

        filemd5 = self.file_checksum(file_name)
        remote_path = filemd5
        if extension:
            remote_path = remote_path + "." + extension

        return self.upload(file_name, remote_path)

    def file_exists(self, remote_path):
        """
        Check if the file we are trying to upload already exists in S3

        :param remote_path:
        :return: True, if file exists. False, otherwise
        """

        try:
            # Find the object that matches this ETag
            self.s3.head_object(Bucket=self.bucket_name, Key=remote_path)
            return True
        except botocore.exceptions.ClientError:
            # Either File does not exist or we are unable to get
            # this information.
            return False

    def make_url(self, obj_path):
        return "s3://{0}/{1}".format(self.bucket_name, obj_path)

    def file_checksum(self, file_name):

        with open(file_name, "rb") as file_handle:
            md5 = hashlib.md5()
            # Read file in chunks of 4096 bytes
            block_size = 4096

            # Save current cursor position and reset cursor to start of file
            curpos = file_handle.tell()
            file_handle.seek(0)

            buf = file_handle.read(block_size)
            while len(buf) > 0:
                md5.update(buf)
                buf = file_handle.read(block_size)

            # Restore file cursor's position
            file_handle.seek(curpos)

            return md5.hexdigest()

    def to_path_style_s3_url(self, key, version=None):
        """
            This link describes the format of Path Style URLs
            http://docs.aws.amazon.com/AmazonS3/latest/dev/UsingBucket.html#access-bucket-intro
        """
        base = "https://s3.amazonaws.com"
        if self.region and self.region != "us-east-1":
            base = "https://s3-{0}.amazonaws.com".format(self.region)

        result = "{0}/{1}/{2}".format(base, self.bucket_name, key)
        if version:
            result = "{0}?versionId={1}".format(result, version)

        return result
Exemplo n.º 34
0
 def setUp(self):
     super(TestRangedDownload, self).setUp()
     self.config = TransferConfig(max_request_concurrency=1,
                                  multipart_threshold=1,
                                  multipart_chunksize=4)
     self._manager = TransferManager(self.client, self.config)
Exemplo n.º 35
0
class S3Transfer(object):
    ALLOWED_DOWNLOAD_ARGS = TransferManager.ALLOWED_DOWNLOAD_ARGS
    ALLOWED_UPLOAD_ARGS = TransferManager.ALLOWED_UPLOAD_ARGS

    def __init__(self, client=None, config=None, osutil=None, manager=None):
        if not client and not manager:
            raise ValueError(
                'Either a boto3.Client or s3transfer.manager.TransferManager '
                'must be provided'
            )
        if manager and any([client, config, osutil]):
            raise ValueError(
                'Manager cannot be provided with client, config, '
                'nor osutil. These parameters are mutually exclusive.'
            )
        if config is None:
            config = TransferConfig()
        if osutil is None:
            osutil = OSUtils()
        if manager:
            self._manager = manager
        else:
            self._manager = TransferManager(client, config, osutil)

    def upload_file(self, filename, bucket, key,
                    callback=None, extra_args=None):
        """Upload a file to an S3 object.

        Variants have also been injected into S3 client, Bucket and Object.
        You don't have to use S3Transfer.upload_file() directly.
        """
        if not isinstance(filename, six.string_types):
            raise ValueError('Filename must be a string')

        subscribers = self._get_subscribers(callback)
        future = self._manager.upload(
            filename, bucket, key, extra_args, subscribers)
        try:
            future.result()
        # If a client error was raised, add the backwards compatibility layer
        # that raises a S3UploadFailedError. These specific errors were only
        # ever thrown for upload_parts but now can be thrown for any related
        # client error.
        except ClientError as e:
            raise S3UploadFailedError(
                "Failed to upload %s to %s: %s" % (
                    filename, '/'.join([bucket, key]), e))

    def download_file(self, bucket, key, filename, extra_args=None,
                      callback=None):
        """Download an S3 object to a file.

        Variants have also been injected into S3 client, Bucket and Object.
        You don't have to use S3Transfer.download_file() directly.
        """
        if not isinstance(filename, six.string_types):
            raise ValueError('Filename must be a string')

        subscribers = self._get_subscribers(callback)
        future = self._manager.download(
            bucket, key, filename, extra_args, subscribers)
        try:
            future.result()
        # This is for backwards compatibility where when retries are
        # exceeded we need to throw the same error from boto3 instead of
        # s3transfer's built in RetriesExceededError as current users are
        # catching the boto3 one instead of the s3transfer exception to do
        # their own retries.
        except S3TransferRetriesExceededError as e:
            raise RetriesExceededError(e.last_exception)

    def _get_subscribers(self, callback):
        if not callback:
            return None
        return [ProgressCallbackInvoker(callback)]

    def __enter__(self):
        return self

    def __exit__(self, *args):
        self._manager.__exit__(*args)