Exemplo n.º 1
0
    def test_open_file_chunk_reader(self):
        reader = OSUtils().open_file_chunk_reader(self.filename, 0, 3,
                                                  [self.callback])

        # The returned reader should be a ReadFileChunk.
        self.assertIsInstance(reader, ReadFileChunk)
        # The content of the reader should be correct.
        self.assertEqual(reader.read(), self.content)
        # Callbacks should be disabled depspite being passed in.
        self.assertEqual(self.amounts_seen, [])
Exemplo n.º 2
0
    def test_open_file_chunk_reader(self):
        reader = OSUtils().open_file_chunk_reader(
            self.filename, 0, 3, [self.callback])

        # The returned reader should be a ReadFileChunk.
        self.assertIsInstance(reader, ReadFileChunk)
        # The content of the reader should be correct.
        self.assertEqual(reader.read(), self.content)
        # Callbacks should be disabled depspite being passed in.
        self.assertEqual(self.amounts_seen, [])
Exemplo n.º 3
0
 def _create_s3_transfer(self):
     self.request_serializer = s3transfer.crt.BotocoreCRTRequestSerializer(
         self.session)
     credetial_resolver = self.session.get_component('credential_provider')
     self.s3_crt_client = s3transfer.crt.create_s3_crt_client(
         self.session.get_config_variable("region"), credetial_resolver)
     self.record_subscriber = RecordingSubscriber()
     self.osutil = OSUtils()
     return s3transfer.crt.CRTTransferManager(self.s3_crt_client,
                                              self.request_serializer)
Exemplo n.º 4
0
    def test_open_file_chunk_reader_from_fileobj(self):
        with open(self.filename, 'rb') as f:
            reader = OSUtils().open_file_chunk_reader_from_fileobj(
                f, len(self.content), len(self.content), [self.callback])

            # The returned reader should be a ReadFileChunk.
            self.assertIsInstance(reader, ReadFileChunk)
            # The content of the reader should be correct.
            self.assertEqual(reader.read(), self.content)
            reader.close()
            # Callbacks should be disabled depspite being passed in.
            self.assertEqual(self.amounts_seen, [])
            self.assertEqual(self.num_close_callback_calls, 0)
Exemplo n.º 5
0
    def test_open_file_chunk_reader_from_fileobj(self):
        with open(self.filename, 'rb') as f:
            reader = OSUtils().open_file_chunk_reader_from_fileobj(
                f, len(self.content), len(self.content), [self.callback])

            # The returned reader should be a ReadFileChunk.
            self.assertIsInstance(reader, ReadFileChunk)
            # The content of the reader should be correct.
            self.assertEqual(reader.read(), self.content)
            reader.close()
            # Callbacks should be disabled depspite being passed in.
            self.assertEqual(self.amounts_seen, [])
            self.assertEqual(self.num_close_callback_calls, 0)
Exemplo n.º 6
0
        def upload_part(i, start, end):
            nonlocal remaining
            part_id = i + 1
            with OSUtils().open_file_chunk_reader(src_path, start, end-start, [ctx.progress]) as fd:
                part = s3_client.upload_part(
                    Body=fd,
                    Bucket=dest_bucket,
                    Key=dest_key,
                    UploadId=upload_id,
                    PartNumber=part_id
                )
            with lock:
                parts[i] = {"PartNumber": part_id, "ETag": part["ETag"]}
                remaining -= 1
                done = remaining == 0

            if done:
                resp = s3_client.complete_multipart_upload(
                    Bucket=dest_bucket,
                    Key=dest_key,
                    UploadId=upload_id,
                    MultipartUpload={"Parts": parts}
                )
                version_id = resp.get('VersionId')  # Absent in unversioned buckets.
                ctx.done(PhysicalKey(dest_bucket, dest_key, version_id))
Exemplo n.º 7
0
    def __init__(self, client_kwargs=None, config=None):
        """Downloads S3 objects using process pools

        :type client_kwargs: dict
        :param client_kwargs: The keyword arguments to provide when
            instantiating S3 clients. The arguments must match the keyword
            arguments provided to the
            `botocore.session.Session.create_client()` method.

        :type config: ProcessTransferConfig
        :param config: Configuration for the downloader
        """
        if client_kwargs is None:
            client_kwargs = {}
        self._client_factory = ClientFactory(client_kwargs)

        self._transfer_config = config
        if config is None:
            self._transfer_config = ProcessTransferConfig()

        self._download_request_queue = multiprocessing.Queue(1000)
        self._worker_queue = multiprocessing.Queue(1000)
        self._osutil = OSUtils()

        self._started = False
        self._start_lock = threads_new.Lock()

        # These below are initialized in the start() method
        self._manager = None
        self._transfer_monitor = None
        self._submitter = None
        self._workers = []
Exemplo n.º 8
0
    def __init__(self, client, config=None, osutil=None, executor_cls=None):
        """A transfer manager interface for Amazon S3

        :param client: Client to be used by the manager
        :param config: TransferConfig to associate specific configurations
        :param osutil: OSUtils object to use for os-related behavior when
            using with transfer manager.

        :type executor_cls: s3transfer.futures.BaseExecutor
        :param executor_cls: The class of executor to use with the transfer
            manager. By default, concurrent.futures.ThreadPoolExecutor is used.
        """
        self._client = client
        self._config = config
        if config is None:
            self._config = TransferConfig()
        self._osutil = osutil
        if osutil is None:
            self._osutil = OSUtils()
        self._coordinator_controller = TransferCoordinatorController()
        # A counter to create unique id's for each transfer submitted.
        self._id_counter = 0

        # The executor responsible for making S3 API transfer requests
        self._request_executor = BoundedExecutor(
            max_size=self._config.max_request_queue_size,
            max_num_threads=self._config.max_request_concurrency,
            tag_semaphores={
                IN_MEMORY_UPLOAD_TAG:
                TaskSemaphore(self._config.max_in_memory_upload_chunks),
                IN_MEMORY_DOWNLOAD_TAG:
                SlidingWindowSemaphore(
                    self._config.max_in_memory_download_chunks)
            },
            executor_cls=executor_cls)

        # The executor responsible for submitting the necessary tasks to
        # perform the desired transfer
        self._submission_executor = BoundedExecutor(
            max_size=self._config.max_submission_queue_size,
            max_num_threads=self._config.max_submission_concurrency,
            executor_cls=executor_cls)

        # There is one thread available for writing to disk. It will handle
        # downloads for all files.
        self._io_executor = BoundedExecutor(
            max_size=self._config.max_io_queue_size,
            max_num_threads=1,
            executor_cls=executor_cls)

        # The component responsible for limiting bandwidth usage if it
        # is configured.
        self._bandwidth_limiter = None
        if self._config.max_bandwidth is not None:
            logger.debug('Setting max_bandwidth to %s',
                         self._config.max_bandwidth)
            leaky_bucket = LeakyBucket(self._config.max_bandwidth)
            self._bandwidth_limiter = BandwidthLimiter(leaky_bucket)

        self._register_handlers()
Exemplo n.º 9
0
def _upload_file(ctx, size, src_path, dest_bucket, dest_key):
    s3_client = ctx.s3_client_provider.standard_client

    if size < s3_transfer_config.multipart_threshold:
        with OSUtils().open_file_chunk_reader(src_path, 0, size,
                                              [ctx.progress]) as fd:
            resp = s3_client.put_object(
                Body=fd,
                Bucket=dest_bucket,
                Key=dest_key,
            )

        version_id = resp.get('VersionId')  # Absent in unversioned buckets.
        ctx.done(PhysicalKey(dest_bucket, dest_key, version_id))
    else:
        resp = s3_client.create_multipart_upload(
            Bucket=dest_bucket,
            Key=dest_key,
        )
        upload_id = resp['UploadId']

        adjuster = ChunksizeAdjuster()
        chunksize = adjuster.adjust_chunksize(
            s3_transfer_config.multipart_chunksize, size)

        chunk_offsets = list(range(0, size, chunksize))

        lock = Lock()
        remaining = len(chunk_offsets)
        parts = [None] * remaining

        def upload_part(i, start, end):
            nonlocal remaining
            part_id = i + 1
            with OSUtils().open_file_chunk_reader(src_path, start, end - start,
                                                  [ctx.progress]) as fd:
                part = s3_client.upload_part(Body=fd,
                                             Bucket=dest_bucket,
                                             Key=dest_key,
                                             UploadId=upload_id,
                                             PartNumber=part_id)
            with lock:
                parts[i] = {"PartNumber": part_id, "ETag": part["ETag"]}
                remaining -= 1
                done = remaining == 0

            if done:
                resp = s3_client.complete_multipart_upload(
                    Bucket=dest_bucket,
                    Key=dest_key,
                    UploadId=upload_id,
                    MultipartUpload={"Parts": parts})
                version_id = resp.get(
                    'VersionId')  # Absent in unversioned buckets.
                ctx.done(PhysicalKey(dest_bucket, dest_key, version_id))

        for i, start in enumerate(chunk_offsets):
            end = min(start + chunksize, size)
            ctx.run(upload_part, i, start, end)
Exemplo n.º 10
0
 def test_remove_file_ignores_errors(self):
     non_existent_file = os.path.join(self.tempdir, 'no-exist')
     # This should not exist to start.
     self.assertFalse(os.path.exists(non_existent_file))
     try:
         OSUtils().remove_file(non_existent_file)
     except OSError as e:
         self.fail('OSError should have been caught: %s' % e)
Exemplo n.º 11
0
 def setUp(self):
     super(BaseSubmissionTaskTest, self).setUp()
     self.config = TransferConfig()
     self.osutil = OSUtils()
     self.executor = BoundedExecutor(
         1000, 1, {
             IN_MEMORY_UPLOAD_TAG: TaskSemaphore(10),
             IN_MEMORY_DOWNLOAD_TAG: SlidingWindowSemaphore(10)
         })
Exemplo n.º 12
0
    def setUp(self):
        super(BaseDownloadOutputManagerTest, self).setUp()
        self.osutil = OSUtils()

        # Create a file to write to
        self.tempdir = tempfile.mkdtemp()
        self.filename = os.path.join(self.tempdir, 'myfile')

        self.call_args = CallArgs(fileobj=self.filename)
        self.future = self.get_transfer_future(self.call_args)
        self.io_executor = BoundedExecutor(1000, 1)
Exemplo n.º 13
0
 def setUp(self):
     super(TestGetObjectTask, self).setUp()
     self.bucket = 'mybucket'
     self.key = 'mykey'
     self.extra_args = {}
     self.callbacks = []
     self.max_attempts = 5
     self.io_executor = BoundedExecutor(1000, 1)
     self.content = b'my content'
     self.stream = six.BytesIO(self.content)
     self.fileobj = WriteCollector()
     self.osutil = OSUtils()
     self.io_chunksize = 64 * (1024**2)
     self.download_output_manager = DownloadSeekableOutputManager(
         self.osutil, self.transfer_coordinator, self.io_executor)
Exemplo n.º 14
0
 def __init__(self, client=None, config=None, osutil=None, manager=None):
     if not client and not manager:
         raise ValueError(
             'Either a boto3.Client or s3transfer.manager.TransferManager '
             'must be provided')
     if manager and any([client, config, osutil]):
         raise ValueError(
             'Manager cannot be provided with client, config, '
             'nor osutil. These parameters are mutually exclusive.')
     if config is None:
         config = TransferConfig()
     if osutil is None:
         osutil = OSUtils()
     if manager:
         self._manager = manager
     else:
         self._manager = create_transfer_manager(client, config, osutil)
Exemplo n.º 15
0
    def __init__(self, client, config=None, osutil=None):
        """A transfer manager interface for Amazon S3

        :param client: Client to be used by the manager
        :param config: TransferConfig to associate specific configurations
        :param osutil: OSUtils object to use for os-related behavior when
            using with transfer manager.
        """
        self._client = client
        self._config = config
        if config is None:
            self._config = TransferConfig()
        self._osutil = osutil
        if osutil is None:
            self._osutil = OSUtils()
        self._coordinator_controller = TransferCoordinatorController()
        # A counter to create unique id's for each transfer submitted.
        self._id_counter = 0

        # The executor responsible for making S3 API transfer requests
        self._request_executor = BoundedExecutor(
            max_size=self._config.max_request_queue_size,
            max_num_threads=self._config.max_request_concurrency,
            tag_semaphores={
                IN_MEMORY_UPLOAD_TAG: TaskSemaphore(
                    self._config.max_in_memory_upload_chunks),
                IN_MEMORY_DOWNLOAD_TAG: SlidingWindowSemaphore(
                    self._config.max_in_memory_download_chunks)
            }
        )

        # The executor responsible for submitting the necessary tasks to
        # perform the desired transfer
        self._submission_executor = BoundedExecutor(
            max_size=self._config.max_submission_queue_size,
            max_num_threads=self._config.max_submission_concurrency
        )

        # There is one thread available for writing to disk. It will handle
        # downloads for all files.
        self._io_executor = BoundedExecutor(
            max_size=self._config.max_io_queue_size,
            max_num_threads=1
        )
        self._register_handlers()
Exemplo n.º 16
0
    def setUp(self):
        super(BaseUploadTest, self).setUp()
        self.bucket = 'mybucket'
        self.key = 'foo'
        self.osutil = OSUtils()

        self.tempdir = tempfile.mkdtemp()
        self.filename = os.path.join(self.tempdir, 'myfile')
        self.content = b'my content'
        self.subscribers = []

        with open(self.filename, 'wb') as f:
            f.write(self.content)

        # A list to keep track of all of the bodies sent over the wire
        # and their order.
        self.sent_bodies = []
        self.client.meta.events.register('before-parameter-build.s3.*',
                                         self.collect_body)
Exemplo n.º 17
0
 def setUp(self):
     super(TestGetObjectWorker, self).setUp()
     self.files = FileCreator()
     self.queue = queue.Queue()
     self.client_factory = mock.Mock(ClientFactory)
     self.client_factory.create_client.return_value = self.client
     self.transfer_monitor = TransferMonitor()
     self.osutil = OSUtils()
     self.worker = GetObjectWorker(queue=self.queue,
                                   client_factory=self.client_factory,
                                   transfer_monitor=self.transfer_monitor,
                                   osutil=self.osutil)
     self.transfer_id = self.transfer_monitor.notify_new_transfer()
     self.bucket = 'bucket'
     self.key = 'key'
     self.remote_contents = b'my content'
     self.temp_filename = self.files.create_file('tempfile', '')
     self.extra_args = {}
     self.offset = 0
     self.final_filename = self.files.full_path('final_filename')
     self.stream = six.BytesIO(self.remote_contents)
     self.transfer_monitor.notify_expected_jobs_to_complete(
         self.transfer_id, 1000)
Exemplo n.º 18
0
    def __init__(self, crt_s3_client, crt_request_serializer, osutil=None):
        """A transfer manager interface for Amazon S3 on CRT s3 client.

        :type crt_s3_client: awscrt.s3.S3Client
        :param crt_s3_client: The CRT s3 client, handling all the
            HTTP requests and functions under then hood

        :type crt_request_serializer: s3transfer.crt.BaseCRTRequestSerializer
        :param crt_request_serializer: Serializer, generates unsigned crt HTTP
            request.

        :type osutil: s3transfer.utils.OSUtils
        :param osutil: OSUtils object to use for os-related behavior when
            using with transfer manager.
        """
        if osutil is None:
            self._osutil = OSUtils()
        self._crt_s3_client = crt_s3_client
        self._s3_args_creator = S3ClientArgsCreator(
            crt_request_serializer, self._osutil)
        self._future_coordinators = []
        self._semaphore = threading.Semaphore(128)  # not configurable
        # A counter to create unique id's for each transfer submitted.
        self._id_counter = 0
Exemplo n.º 19
0
 def setUp(self):
     super(BaseUploadInputManagerTest, self).setUp()
     self.osutil = OSUtils()
     self.config = TransferConfig()
     self.recording_subscriber = RecordingSubscriber()
     self.subscribers.append(self.recording_subscriber)
Exemplo n.º 20
0
 def test_get_file_size(self):
     self.assertEqual(OSUtils().get_file_size(self.filename),
                      len(self.content))
Exemplo n.º 21
0
def upload_file(**kwargs):
    """Upload manager."""
    filename = kwargs.pop('file')
    client = CosmosIdS3Client(**kwargs)
    config = TransferConfig()
    osutil = OSUtils()
    transfer_manager = CosmosIdTransferManager(client,
                                               config=config,
                                               osutil=osutil)

    subscribers = None

    _, file_name = os.path.split(filename)
    try:
        response = requests.get(client.base_url +
                                '/api/metagenid/v1/files/upload_init',
                                json=dict(file_name=file_name),
                                headers=client.header)
        if response.status_code == 403:
            raise AuthenticationFailed('Authentication Failed. Wrong API Key.')
        if response.status_code == requests.codes.ok:
            sources = response.json()
            future = transfer_manager.upload(filename,
                                             sources['upload_source'],
                                             sources['upload_key'], None,
                                             subscribers)
            s3path, _ = os.path.split(sources['upload_key'])
            data = dict(path=s3path,
                        size=str(os.stat(filename)[6]),
                        name=file_name,
                        parent='')
        else:
            logger.error(
                "File upload inititalisation Failed. Response code: {}".format(
                    response.status_code))
            raise UploadException(
                "File upload inititalisation Failed. Response code: {}".format(
                    response.status_code))

        future.result()
        create_response = requests.post(client.base_url +
                                        '/api/metagenid/v1/files',
                                        json=data,
                                        headers=client.header)
        if create_response.status_code == 201:
            return create_response.json()
        else:
            raise UploadException(
                'Failed to upload file: {}'.format(file_name))
        '''
           If a client error was raised, add the backwards compatibility layer
           that raises a S3UploadFailedError. These specific errors were only
           ever thrown for upload_parts but now can be thrown for any related
           client error.
        '''
    except ClientError as e:
        raise S3UploadFailedError(
            "Failed to upload %s to %s: %s" % (filename, '/'.join(
                [sources['upload_source'], sources['upload_key']]), e))
        return False
    except AuthenticationFailed as ae:
        logger.error('{}'.format(ae))
        return False
    except UploadException as ue:
        logger.error("File Upload Failed. Error: {}".format(ue))
        return False
Exemplo n.º 22
0
 def test_allocate_with_os_error(self, mock_fallocate):
     mock_fallocate.side_effect = OSError()
     with self.assertRaises(OSError):
         OSUtils().allocate(self.filename, 1)
     self.assertFalse(os.path.exists(self.filename))
Exemplo n.º 23
0
def _upload_file(ctx, size, src_path, dest_bucket, dest_key, override_meta):
    if override_meta is None:
        meta = _parse_file_metadata(src_path)
    else:
        meta = override_meta

    s3_client = ctx.s3_client

    if size < s3_transfer_config.multipart_threshold:
        with OSUtils().open_file_chunk_reader(src_path, 0, size, [ctx.progress]) as fd:
            resp = s3_client.put_object(
                Body=fd,
                Bucket=dest_bucket,
                Key=dest_key,
                Metadata={HELIUM_METADATA: json.dumps(meta)},
            )

        version_id = resp.get('VersionId')  # Absent in unversioned buckets.
        ctx.done(make_s3_url(dest_bucket, dest_key, version_id))
    else:
        resp = s3_client.create_multipart_upload(
            Bucket=dest_bucket,
            Key=dest_key,
            Metadata={HELIUM_METADATA: json.dumps(meta)},
        )
        upload_id = resp['UploadId']

        chunk_offsets = list(range(0, size, s3_transfer_config.multipart_chunksize))

        lock = Lock()
        remaining = len(chunk_offsets)
        parts = [None] * remaining

        def upload_part(i, start, end):
            nonlocal remaining
            part_id = i + 1
            with OSUtils().open_file_chunk_reader(src_path, start, end-start, [ctx.progress]) as fd:
                part = s3_client.upload_part(
                    Body=fd,
                    Bucket=dest_bucket,
                    Key=dest_key,
                    UploadId=upload_id,
                    PartNumber=part_id
                )
            with lock:
                parts[i] = {"PartNumber": part_id, "ETag": part["ETag"]}
                remaining -= 1
                done = remaining == 0

            if done:
                resp = s3_client.complete_multipart_upload(
                    Bucket=dest_bucket,
                    Key=dest_key,
                    UploadId=upload_id,
                    MultipartUpload={"Parts": parts}
                )
                version_id = resp.get('VersionId')  # Absent in unversioned buckets.
                ctx.done(make_s3_url(dest_bucket, dest_key, version_id))

        for i, start in enumerate(chunk_offsets):
            end = min(start + s3_transfer_config.multipart_chunksize, size)
            ctx.run(upload_part, i, start, end)
Exemplo n.º 24
0
 def setUp(self):
     super(BaseIOTaskTest, self).setUp()
     self.files = FileCreator()
     self.osutil = OSUtils()
     self.temp_filename = os.path.join(self.files.rootdir, 'mytempfile')
     self.final_filename = os.path.join(self.files.rootdir, 'myfile')
Exemplo n.º 25
0
 def test_allocate(self):
     truncate_size = 1
     OSUtils().allocate(self.filename, truncate_size)
     with open(self.filename, 'rb') as f:
         self.assertEqual(len(f.read()), truncate_size)
Exemplo n.º 26
0
 def test_open_file(self):
     fileobj = OSUtils().open(os.path.join(self.tempdir, 'foo'), 'w')
     self.assertTrue(hasattr(fileobj, 'write'))
Exemplo n.º 27
0
 def test_remove_file_proxies_remove_file(self):
     OSUtils().remove_file(self.filename)
     self.assertFalse(os.path.exists(self.filename))
Exemplo n.º 28
0
 def test_rename_file(self):
     new_filename = os.path.join(self.tempdir, 'newfoo')
     OSUtils().rename_file(self.filename, new_filename)
     self.assertFalse(os.path.exists(self.filename))
     self.assertTrue(os.path.exists(new_filename))
Exemplo n.º 29
0
 def test_is_special_file_for_normal_file(self):
     self.assertFalse(OSUtils().is_special_file(self.filename))
Exemplo n.º 30
0
 def test_get_temp_filename(self):
     filename = 'myfile'
     self.assertIsNotNone(
         re.match('%s\.[0-9A-Fa-f]{8}$' % filename,
                  OSUtils().get_temp_filename(filename)))
Exemplo n.º 31
0
 def test_is_special_file_for_non_existant_file(self):
     non_existant_filename = os.path.join(self.tempdir, 'no-exist')
     self.assertFalse(os.path.exists(non_existant_filename))
     self.assertFalse(OSUtils().is_special_file(non_existant_filename))
Exemplo n.º 32
0
 def test_is_not_compatible_for_non_special_file(self):
     self.assertFalse(
         self.download_output_manager.is_compatible(self.filename,
                                                    OSUtils()))