Exemplo n.º 1
0
 def _submit_transfer_request(self, fileinfo, extra_args, subscribers):
     bucket, key = find_bucket_key(fileinfo.dest)
     source_bucket, source_key = find_bucket_key(fileinfo.src)
     copy_source = {'Bucket': source_bucket, 'Key': source_key}
     return self._transfer_manager.copy(
         bucket=bucket, key=key, copy_source=copy_source,
         extra_args=extra_args, subscribers=subscribers,
         source_client=fileinfo.source_client
     )
Exemplo n.º 2
0
 def copy(self):
     """
     Copies a object in s3 to another location in s3.
     """
     source_bucket, source_key = find_bucket_key(self.src)
     copy_source = {'Bucket': source_bucket, 'Key': source_key}
     bucket, key = find_bucket_key(self.dest)
     params = {'Bucket': bucket,
               'CopySource': copy_source, 'Key': key}
     self._inject_content_type(params)
     RequestParamsMapper.map_copy_object_params(params, self.parameters)
     response_data = self.client.copy_object(**params)
Exemplo n.º 3
0
    def __call__(self):
        LOGGER.debug("Uploading part copy %s for filename: %s",
                     self._part_number, self._filename.src)
        total_file_size = self._filename.size
        start_range = (self._part_number - 1) * self._chunk_size
        if self._is_last_part(self._part_number):
            end_range = total_file_size - 1
        else:
            end_range = start_range + self._chunk_size - 1
        range_param = 'bytes=%s-%s' % (start_range, end_range)
        try:
            LOGGER.debug("Waiting for upload id.")
            upload_id = self._upload_context.wait_for_upload_id()
            bucket, key = find_bucket_key(self._filename.dest)
            src_bucket, src_key = find_bucket_key(self._filename.src)
            params = {'endpoint': self._filename.endpoint,
                      'bucket': bucket, 'key': key,
                      'part_number': self._part_number,
                      'upload_id': upload_id,
                      'copy_source': '%s/%s' % (src_bucket, src_key),
                      'copy_source_range': range_param}
            response_data, http = operate(
                self._filename.service, 'UploadPartCopy', params)
            etag = response_data['CopyPartResult']['ETag'][1:-1]
            self._upload_context.announce_finished_part(
                etag=etag, part_number=self._part_number)

            message = print_operation(self._filename, 0)
            result = {'message': message, 'total_parts': self._total_parts(),
                      'error': False}
            self._result_queue.put(result)
        except UploadCancelledError as e:
            # We don't need to do anything in this case.  The task
            # has been cancelled, and the task that cancelled the
            # task has already queued a message.
            LOGGER.debug("Not uploading part copy, task has been cancelled.")
        except Exception as e:
            LOGGER.debug('Error during upload part copy: %s', e,
                         exc_info=True)
            message = print_operation(self._filename, failed=True,
                                      dryrun=False)
            message += '\n' + str(e)
            result = {'message': message, 'error': True}
            self._result_queue.put(result)
            self._upload_context.cancel_upload()
        else:
            LOGGER.debug("Copy part number %s completed for filename: %s",
                         self._part_number, self._filename.src)
Exemplo n.º 4
0
    def __call__(self):
        total_file_size = self._filename.size
        start_range = self._part_number * self._chunk_size
        if self._part_number == int(total_file_size / self._chunk_size) - 1:
            end_range = ''
        else:
            end_range = start_range + self._chunk_size - 1
        range_param = 'bytes=%s-%s' % (start_range, end_range)
        LOGGER.debug("Downloading bytes range of %s for file %s", range_param,
                     self._filename.dest)
        bucket, key = find_bucket_key(self._filename.src)
        params = {'endpoint': self._filename.endpoint, 'bucket': bucket,
                  'key': key, 'range': range_param}
        try:
            LOGGER.debug("Making GetObject requests with byte range: %s",
                         range_param)
            response_data, http = operate(self._service, 'GetObject',
                                          params)
            LOGGER.debug("Response received from GetObject")
            body = response_data['Body']
            self._write_to_file(body)
            self._context.announce_completed_part(self._part_number)

            message = print_operation(self._filename, 0)
            total_parts = int(self._filename.size / self._chunk_size)
            result = {'message': message, 'error': False,
                      'total_parts': total_parts}
            self._result_queue.put(result)
        except Exception as e:
            LOGGER.debug(
                'Exception caught downloading byte range: %s',
                e, exc_info=True)
            self._context.cancel()
            raise e
Exemplo n.º 5
0
 def __call__(self):
     LOGGER.debug("Completing multipart upload for file: %s", self.filename.src)
     upload_id = self._upload_context.wait_for_upload_id()
     parts = self._upload_context.wait_for_parts_to_finish()
     LOGGER.debug("Received upload id and parts list.")
     bucket, key = find_bucket_key(self.filename.dest)
     params = {
         "bucket": bucket,
         "key": key,
         "endpoint": self.filename.endpoint,
         "upload_id": upload_id,
         "multipart_upload": {"Parts": parts},
     }
     try:
         operate(self.filename.service, "CompleteMultipartUpload", params)
     except Exception as e:
         LOGGER.debug("Error trying to complete multipart upload: %s", e, exc_info=True)
         message = print_operation(self.filename, failed=True, dryrun=self.parameters["dryrun"])
         message += "\n" + str(e)
         result = {"message": message, "error": True}
     else:
         LOGGER.debug("Multipart upload completed for: %s", self.filename.src)
         message = print_operation(self.filename, False, self.parameters["dryrun"])
         result = {"message": message, "error": False}
         self._upload_context.announce_completed()
     self.result_queue.put(result)
Exemplo n.º 6
0
 def __call__(self):
     LOGGER.debug("Completing multipart upload for file: %s",
                  self.filename.src)
     upload_id = self._upload_context.wait_for_upload_id()
     parts = self._upload_context.wait_for_parts_to_finish()
     LOGGER.debug("Received upload id and parts list.")
     bucket, key = find_bucket_key(self.filename.dest)
     params = {
         'bucket': bucket, 'key': key,
         'endpoint': self.filename.endpoint,
         'upload_id': upload_id,
         'multipart_upload': {'Parts': parts},
     }
     try:
         operate(self.filename.service, 'CompleteMultipartUpload', params)
     except Exception as e:
         LOGGER.debug("Error trying to complete multipart upload: %s",
                      e, exc_info=True)
         message = print_operation(
             self.filename, failed=True,
             dryrun=self.parameters['dryrun'])
         message += '\n' + str(e)
         result = {
             'message': message,
             'error': True
         }
     else:
         LOGGER.debug("Multipart upload completed for: %s",
                     self.filename.src)
         message = print_operation(self.filename, False,
                                   self.parameters['dryrun'])
         result = {'message': message, 'error': False}
         self._upload_context.announce_completed()
     self.result_queue.put(result)
Exemplo n.º 7
0
 def _run_main(self, parsed_args, parsed_globals):
     super(ListCommand, self)._run_main(parsed_args, parsed_globals)
     self._empty_result = False
     self._at_first_page = True
     self._size_accumulator = 0
     self._total_objects = 0
     self._human_readable = parsed_args.human_readable
     path = parsed_args.paths
     if path.startswith('s3://'):
         path = path[5:]
     bucket, key = find_bucket_key(path)
     if not bucket:
         self._list_all_buckets()
     elif parsed_args.dir_op:
         # Then --recursive was specified.
         self._list_all_objects_recursive(bucket, key,
                                          parsed_args.page_size)
     else:
         self._list_all_objects(bucket, key, parsed_args.page_size)
     if parsed_args.summarize:
         self._print_summary()
     if key:
         # User specified a key to look for. We should return an rc of one
         # if there are no matching keys and/or prefixes or return an rc
         # of zero if there are matching keys or prefixes.
         return self._check_no_objects()
     else:
         # This covers the case when user is trying to list all of of
         # the buckets or is trying to list the objects of a bucket
         # (without specifying a key). For both situations, a rc of 0
         # should be returned because applicable errors are supplied by
         # the server (i.e. bucket not existing). These errors will be
         # thrown before reaching the automatic return of rc of zero.
         return 0
Exemplo n.º 8
0
 def _submit_transfer_request(self, fileinfo, extra_args, subscribers):
     bucket, key = find_bucket_key(fileinfo.src)
     fileout = self._get_fileout(fileinfo)
     return self._transfer_manager.download(
         fileobj=fileout, bucket=bucket, key=key,
         extra_args=extra_args, subscribers=subscribers
     )
Exemplo n.º 9
0
 def create_multipart_upload(self):
     bucket, key = find_bucket_key(self.dest)
     params = {'Bucket': bucket, 'Key': key}
     self._handle_object_params(params)
     response_data = self.client.create_multipart_upload(**params)
     upload_id = response_data['UploadId']
     return upload_id
Exemplo n.º 10
0
 def _list_single_object(self, s3_path):
     # When we know we're dealing with a single object, we can avoid
     # a ListObjects operation (which causes concern for anyone setting
     # IAM policies with the smallest set of permissions needed) and
     # instead use a HeadObject request.
     bucket, key = find_bucket_key(s3_path)
     try:
         params = {'Bucket': bucket, 'Key': key}
         params.update(self.request_parameters.get('HeadObject', {}))
         response = self._client.head_object(**params)
     except ClientError as e:
         # We want to try to give a more helpful error message.
         # This is what the customer is going to see so we want to
         # give as much detail as we have.
         if not e.response['Error']['Code'] == '404':
             raise
         # The key does not exist so we'll raise a more specific
         # error message here.
         response = e.response.copy()
         response['Error']['Message'] = 'Key "%s" does not exist' % key
         raise ClientError(response, 'HeadObject')
     response['Size'] = int(response.pop('ContentLength'))
     last_update = parse(response['LastModified'])
     response['LastModified'] = last_update.astimezone(tzlocal())
     return s3_path, response
Exemplo n.º 11
0
 def multi_upload(self):
     """
     Performs multipart uploads.  It initiates the multipart upload.
     It creates a queue ``part_queue`` which is directly responsible
     with controlling the progress of the multipart upload.  It then
     creates ``UploadPartTasks`` for threads to run via the
     ``executer``.  This fucntion waits for all of the parts in the
     multipart upload to finish, and then it completes the multipart
     upload.  This method waits on its parts to finish.  So, threads
     are required to process the parts for this function to complete.
     """
     part_queue = NoBlockQueue(self.interrupt)
     complete_upload_queue = Queue.PriorityQueue()
     part_counter = MultiCounter()
     counter_lock = threading.Lock()
     bucket, key = find_bucket_key(self.dest)
     params = {'endpoint': self.endpoint, 'bucket': bucket, 'key': key}
     if self.parameters['acl']:
         params['acl'] = self.parameters['acl'][0]
     if self.parameters['guess_mime_type']:
         self._inject_content_type(params, self.src)
     response_data, http = operate(self.service, 'CreateMultipartUpload',
                                   params)
     upload_id = response_data['UploadId']
     size_uploads = self.chunksize
     num_uploads = int(math.ceil(self.size/float(size_uploads)))
     for i in range(1, (num_uploads + 1)):
         part_info = (self, upload_id, i, size_uploads)
         part_queue.put(part_info)
         task = UploadPartTask(session=self.session, executer=self.executer,
                               part_queue=part_queue,
                               dest_queue=complete_upload_queue,
                               region=self.region,
                               printQueue=self.printQueue,
                               interrupt=self.interrupt,
                               part_counter=part_counter,
                               counter_lock=counter_lock)
         self.executer.submit(task)
     part_queue.join()
     # The following ensures that if the multipart upload is in progress,
     # all part uploads finish before aborting or completing.  This
     # really only applies when an interrupt signal is sent because the
     # ``part_queue.join()`` ensures this if the process is not
     # interrupted.
     while part_counter.count:
         time.sleep(0.1)
     parts_list = []
     while not complete_upload_queue.empty():
         part = complete_upload_queue.get()
         parts_list.append(part[1])
     if len(parts_list) == num_uploads:
         parts = {'Parts': parts_list}
         params = {'endpoint': self.endpoint, 'bucket': bucket, 'key': key,
                   'upload_id': upload_id, 'multipart_upload': parts}
         operate(self.service, 'CompleteMultipartUpload', params)
     else:
         abort_params = {'endpoint': self.endpoint, 'bucket': bucket,
                         'key': key, 'upload_id': upload_id}
         operate(self.service, 'AbortMultipartUpload', abort_params)
         raise Exception()
Exemplo n.º 12
0
 def list_objects(self, s3_path, dir_op):
     """
     This function yields the appropriate object or objects under a
     common prefix depending if the operation is on objects under a
     common prefix.  It yields the file's source path, size, and last
     update.
     """
     # Short circuit path: if we are not recursing into the s3
     # bucket and a specific path was given, we can just yield
     # that path and not have to call any operation in s3.
     bucket, prefix = find_bucket_key(s3_path)
     if not dir_op and prefix:
         yield self._list_single_object(s3_path)
     else:
         lister = BucketLister(self._client)
         for key in lister.list_objects(bucket=bucket, prefix=prefix,
                                        page_size=self.page_size):
             source_path, response_data = key
             if response_data['Size'] == 0 and source_path.endswith('/'):
                 if self.operation_name == 'delete':
                     # This is to filter out manually created folders
                     # in S3.  They have a size zero and would be
                     # undesirably downloaded.  Local directories
                     # are automatically created when they do not
                     # exist locally.  But user should be able to
                     # delete them.
                     yield source_path, response_data
             elif not dir_op and s3_path != source_path:
                 pass
             else:
                 yield source_path, response_data
Exemplo n.º 13
0
 def remove_bucket(self):
     """
     This operation removes a bucket.
     """
     bucket, key = find_bucket_key(self.src)
     params = {'endpoint': self.endpoint, 'bucket': bucket}
     response_data, http = operate(self.service, 'DeleteBucket', params)
Exemplo n.º 14
0
 def _list_single_object(self, s3_path):
     # When we know we're dealing with a single object, we can avoid
     # a ListObjects operation (which causes concern for anyone setting
     # IAM policies with the smallest set of permissions needed) and
     # instead use a HeadObject request.
     bucket, key = find_bucket_key(s3_path)
     try:
         response = self._client.head_object(Bucket=bucket, Key=key)
     except ClientError as e:
         # We want to try to give a more helpful error message.
         # This is what the customer is going to see so we want to
         # give as much detail as we have.
         copy_fields = e.__dict__.copy()
         if not e.error_message == 'Not Found':
             raise
         if e.http_status_code == 404:
             # The key does not exist so we'll raise a more specific
             # error message here.
             copy_fields['error_message'] = 'Key "%s" does not exist' % key
         else:
             reason = six.moves.http_client.responses[
                 e.http_status_code]
             copy_fields['error_code'] = reason
             copy_fields['error_message'] = reason
         raise ClientError(**copy_fields)
     response['Size'] = int(response.pop('ContentLength'))
     last_update = parse(response['LastModified'])
     response['LastModified'] = last_update.astimezone(tzlocal())
     return s3_path, response
Exemplo n.º 15
0
 def list_objects(self, s3_path, dir_op):
     """
     This function yields the appropriate object or objects under a
     common prefix depending if the operation is on objects under a
     common prefix.  It yields the file's source path, size, and last
     update.
     """
     operation = self._service.get_operation('ListObjects')
     bucket, prefix = find_bucket_key(s3_path)
     iterator = operation.paginate(self._endpoint, bucket=bucket,
                                   prefix=prefix)
     for html_response, response_data in iterator:
         contents = response_data['Contents']
         for content in contents:
             src_path = bucket + '/' + content['Key']
             size = content['Size']
             last_update = parse(content['LastModified'])
             last_update = last_update.astimezone(tzlocal())
             if size == 0 and src_path.endswith('/'):
                 if self.operation_name == 'delete':
                     # This is to filter out manually created folders
                     # in S3.  They have a size zero and would be
                     # undesirably downloaded.  Local directories
                     # are automatically created when they do not
                     # exist locally.  But user should be able to
                     # delete them.
                     yield src_path, size, last_update
             elif not dir_op and s3_path != src_path:
                 pass
             else:
                 yield src_path, size, last_update
Exemplo n.º 16
0
    def check_src_path(self, paths):
        """
        This checks the source paths to deem if they are valid.  The check
        performed in S3 is first it lists the objects using the source path.
        If there is an error like the bucket does not exist, the error will be
        caught with ``check_error()`` funciton.  If the operation is on a
        single object in s3, it checks that a list of object was returned and
        that the first object listed is the name of the specified in the
        command line.  If the operation is on objects under a common prefix,
        it will check that there are common prefixes and objects under
        the specified prefix.
        For local files, it first checks that the path exists.  Then it checks
        that the path is a directory if it is a directory operation or that
        the path is a file if the operation is on a single file.
        """
        src_path = paths[0]
        dir_op = self.parameters['dir_op']
        if src_path.startswith('s3://'):
            if self.cmd in ['ls', 'mb', 'rb']:
                return
            session = self.session
            service = session.get_service('s3')
            endpoint = service.get_endpoint(self.parameters['region'])
            src_path = src_path[5:]
            if dir_op:
                if not src_path.endswith('/'):
                    src_path += '/'  # all prefixes must end with a /
            bucket, key = find_bucket_key(src_path)
            operation = service.get_operation('ListObjects')
            html_response, response_data = operation.call(endpoint,
                                                          bucket=bucket,
                                                          prefix=key,
                                                          delimiter='/')
            check_error(response_data)
            contents = response_data['Contents']
            common_prefixes = response_data['CommonPrefixes']
            if not dir_op:
                if contents:
                    if contents[0]['Key'] == key:
                        pass
                    else:
                        raise Exception("Error: S3 Object does not exist")
                else:
                    raise Exception('Error: S3 Object does not exist')
            else:
                if not contents and not common_prefixes:
                    raise Exception('Error: S3 Prefix does not exist')

        else:
            src_path = os.path.abspath(src_path)
            if os.path.exists(src_path):
                if os.path.isdir(src_path) and not dir_op:
                    raise Exception("Error: Requires a local file")
                elif os.path.isfile(src_path) and dir_op:
                    raise Exception("Error: Requires a local directory")
                else:
                    pass
            else:
                raise Exception("Error: Local path does not exist")
Exemplo n.º 17
0
 def create_multipart_upload(self):
     bucket, key = find_bucket_key(self.dest)
     params = {'endpoint': self.endpoint, 'bucket': bucket, 'key': key}
     self._handle_object_params(params)
     response_data, http = operate(self.service, 'CreateMultipartUpload',
                                   params)
     upload_id = response_data['UploadId']
     return upload_id
Exemplo n.º 18
0
 def __call__(self):
     LOGGER.debug("Waiting for download to finish.")
     self._context.wait_for_completion()
     bucket, key = find_bucket_key(self._filename.src)
     params = {'endpoint': self._filename.endpoint,
               'bucket': bucket, 'key': key}
     response_data, http = operate(
         self._filename.service, 'DeleteObject', params)
Exemplo n.º 19
0
 def _do_command(self, parsed_args, parsed_globals):
     bucket, key = find_bucket_key(parsed_args.paths[0][5:])
     self.service = self._session.get_service('s3')
     self.endpoint = self._get_endpoint(self.service, parsed_globals)
     if not bucket:
         self._list_all_buckets()
     else:
         self._list_all_objects(bucket, key)
     return 0
Exemplo n.º 20
0
 def check_dest_path(self, destination):
     if destination.startswith('s3://') and \
             self.cmd in ['cp', 'sync', 'mv']:
         bucket, key = find_bucket_key(destination[5:])
         # A bucket is not always provided (like 'aws s3 ls')
         # so only verify the bucket exists if we actually have
         # a bucket.
         if bucket:
             self._verify_bucket_exists(bucket)
Exemplo n.º 21
0
 def prepare_bucket(self, s3_path):
     bucket, key = find_bucket_key(s3_path)
     bucket_exists = self._check_bucket_exists(bucket)
     if not bucket_exists:
         self._create_bucket(bucket)
         sys.stdout.write('Using new S3 bucket: %s\n' % bucket)
     else:
         sys.stdout.write('Using existing S3 bucket: %s\n' % bucket)
     return bucket, key
Exemplo n.º 22
0
 def download(self):
     """
     Redirects the file to the multipart download function if the file is
     large.  If it is small enough, it gets the file as an object from s3.
     """
     bucket, key = find_bucket_key(self.src)
     params = {'endpoint': self.endpoint, 'bucket': bucket, 'key': key}
     response_data, http = operate(self.service, 'GetObject', params)
     save_file(self.dest, response_data, self.last_update)
Exemplo n.º 23
0
 def set_size_from_s3(self):
     """
     This runs a ``HeadObject`` on the s3 object and sets the size.
     """
     bucket, key = find_bucket_key(self.src)
     params = {'Bucket': bucket,
               'Key': key}
     response_data = self.client.head_object(**params)
     self.size = int(response_data['ContentLength'])
Exemplo n.º 24
0
 def create_multipart_upload(self):
     bucket, key = find_bucket_key(self.dest)
     params = {'Bucket': bucket, 'Key': key}
     self._inject_content_type(params)
     RequestParamsMapper.map_create_multipart_upload_params(
         params, self.parameters)
     response_data = self.client.create_multipart_upload(**params)
     upload_id = response_data['UploadId']
     return upload_id
Exemplo n.º 25
0
 def _handle_upload(self, body):
     bucket, key = find_bucket_key(self.dest)
     params = {
         'Bucket': bucket,
         'Key': key,
         'Body': body,
     }
     self._handle_object_params(params)
     response_data = self.client.put_object(**params)
Exemplo n.º 26
0
 def _cancel_upload(self, upload_id, filename):
     bucket, key = find_bucket_key(filename.dest)
     params = {
         'Bucket': bucket,
         'Key': key,
         'UploadId': upload_id,
     }
     LOGGER.debug("Aborting multipart upload for: %s", key)
     filename.client.abort_multipart_upload(**params)
Exemplo n.º 27
0
    def __call__(self):
        LOGGER.debug("Uploading part copy %s for filename: %s", self._part_number, self._filename.src)
        total_file_size = self._filename.size
        start_range = (self._part_number - 1) * self._chunk_size
        if self._is_last_part(self._part_number):
            end_range = total_file_size - 1
        else:
            end_range = start_range + self._chunk_size - 1
        range_param = "bytes=%s-%s" % (start_range, end_range)
        try:
            LOGGER.debug("Waiting for upload id.")
            upload_id = self._upload_context.wait_for_upload_id()
            bucket, key = find_bucket_key(self._filename.dest)
            src_bucket, src_key = find_bucket_key(self._filename.src)
            params = {
                "endpoint": self._filename.endpoint,
                "bucket": bucket,
                "key": key,
                "part_number": self._part_number,
                "upload_id": upload_id,
                "copy_source": "%s/%s" % (src_bucket, src_key),
                "copy_source_range": range_param,
            }
            response_data, http = operate(self._filename.service, "UploadPartCopy", params)
            etag = response_data["CopyPartResult"]["ETag"][1:-1]
            self._upload_context.announce_finished_part(etag=etag, part_number=self._part_number)

            message = print_operation(self._filename, 0)
            result = {"message": message, "total_parts": self._total_parts(), "error": False}
            self._result_queue.put(result)
        except UploadCancelledError as e:
            # We don't need to do anything in this case.  The task
            # has been cancelled, and the task that cancelled the
            # task has already queued a message.
            LOGGER.debug("Not uploading part copy, task has been cancelled.")
        except Exception as e:
            LOGGER.debug("Error during upload part copy: %s", e, exc_info=True)
            message = print_operation(self._filename, failed=True, dryrun=False)
            message += "\n" + str(e)
            result = {"message": message, "error": True}
            self._result_queue.put(result)
            self._upload_context.cancel_upload()
        else:
            LOGGER.debug("Copy part number %s completed for filename: %s", self._part_number, self._filename.src)
Exemplo n.º 28
0
 def download(self):
     """
     Redirects the file to the multipart download function if the file is
     large.  If it is small enough, it gets the file as an object from s3.
     """
     bucket, key = find_bucket_key(self.src)
     params = {'Bucket': bucket, 'Key': key}
     response_data = self.client.get_object(**params)
     save_file(self.dest, response_data, self.last_update,
               self.is_stream)
Exemplo n.º 29
0
 def make_bucket(self):
     """
     This opereation makes a bucket.
     """
     bucket, key = find_bucket_key(self.src)
     bucket_config = {'LocationConstraint': self.client.meta.region_name}
     params = {'Bucket': bucket}
     if self.client.meta.region_name != 'us-east-1':
         params['CreateBucketConfiguration'] = bucket_config
     self.client.create_bucket(**params)
Exemplo n.º 30
0
 def _normalize_s3_trailing_slash(self, paths):
     for i, path in enumerate(paths):
         if path.startswith('s3://'):
             bucket, key = find_bucket_key(path[5:])
             if not key and not path.endswith('/'):
                 # If only a bucket was specified, we need
                 # to normalize the path and ensure it ends
                 # with a '/', s3://bucket -> s3://bucket/
                 path += '/'
                 paths[i] = path
Exemplo n.º 31
0
    def __call__(self):
        LOGGER.debug("Uploading part %s for filename: %s", self._part_number,
                     self._filename.src)
        try:
            LOGGER.debug("Waiting for upload id.")
            upload_id = self._upload_context.wait_for_upload_id()
            bucket, key = find_bucket_key(self._filename.dest)
            total = int(
                math.ceil(self._filename.size / float(self._chunk_size)))
            body = self._read_part()
            params = {
                'endpoint': self._filename.endpoint,
                'bucket': bucket,
                'key': key,
                'part_number': self._part_number,
                'upload_id': upload_id,
                'body': body
            }
            try:
                response_data, http = operate(self._filename.service,
                                              'UploadPart', params)
            finally:
                body.close()
            etag = response_data['ETag'][1:-1]
            self._upload_context.announce_finished_part(
                etag=etag, part_number=self._part_number)

            message = print_operation(self._filename, 0)
            result = {'message': message, 'total_parts': total, 'error': False}
            self._result_queue.put(result)
        except UploadCancelledError as e:
            # We don't need to do anything in this case.  The task
            # has been cancelled, and the task that cancelled the
            # task has already queued a message.
            LOGGER.debug("Not uploading part, task has been cancelled.")
        except Exception as e:
            LOGGER.debug('Error during part upload: %s', e, exc_info=True)
            message = print_operation(self._filename,
                                      failed=True,
                                      dryrun=False)
            message += '\n' + str(e)
            result = {'message': message, 'error': True}
            self._result_queue.put(result)
            self._upload_context.cancel_upload()
        else:
            LOGGER.debug("Part number %s completed for filename: %s",
                         self._part_number, self._filename.src)
Exemplo n.º 32
0
    def _download_part(self):
        total_file_size = self._filename.size
        start_range = self._part_number * self._chunk_size
        if self._part_number == int(total_file_size / self._chunk_size) - 1:
            end_range = ''
        else:
            end_range = start_range + self._chunk_size - 1
        range_param = 'bytes=%s-%s' % (start_range, end_range)
        LOGGER.debug("Downloading bytes range of %s for file %s", range_param,
                     self._filename.dest)
        bucket, key = find_bucket_key(self._filename.src)
        params = {'Bucket': bucket, 'Key': key, 'Range': range_param}
        RequestParamsMapper.map_get_object_params(params, self._params)
        for i in range(self.TOTAL_ATTEMPTS):
            try:
                LOGGER.debug("Making GetObject requests with byte range: %s",
                             range_param)
                response_data = self._client.get_object(**params)
                LOGGER.debug("Response received from GetObject")
                body = response_data['Body']
                self._queue_writes(body)
                self._context.announce_completed_part(self._part_number)

                message = print_operation(self._filename, 0)
                total_parts = int(self._filename.size / self._chunk_size)
                result = {
                    'message': message,
                    'error': False,
                    'total_parts': total_parts
                }
                self._result_queue.put(PrintTask(**result))
                LOGGER.debug("Task complete: %s", self)
                return
            except (socket.timeout, socket.error, ReadTimeoutError) as e:
                LOGGER.debug(
                    "Timeout error caught, retrying request, "
                    "(attempt %s / %s)",
                    i,
                    self.TOTAL_ATTEMPTS,
                    exc_info=True)
                continue
            except IncompleteReadError as e:
                LOGGER.debug("Incomplete read detected: %s, (attempt %s / %s)",
                             e, i, self.TOTAL_ATTEMPTS)
                continue
        raise RetriesExeededError("Maximum number of attempts exceeded: %s" %
                                  self.TOTAL_ATTEMPTS)
Exemplo n.º 33
0
    def __call__(self):
        try:
            part_info = self.part_queue.get(True, QUEUE_TIMEOUT_GET)
            with self.counter_lock:
                self.part_counter.count += 1
            filename = part_info[0]
            part_number = part_info[1]
            size_uploads = part_info[2]
            last_part_number = int(filename.size / size_uploads) - 1
            beginning_range = part_number*size_uploads
            str_range = "bytes="
            if part_number == last_part_number:
                str_range += str(beginning_range) + "-"
            else:
                end_range = beginning_range + size_uploads - 1
                str_range += str(beginning_range) + "-" + str(end_range)
            bucket, key = find_bucket_key(filename.src)
            try:
                params = {'endpoint': self.endpoint, 'bucket': bucket,
                          'key': key, 'range': str_range}
                response_data, http = operate(self.service, 'GetObject',
                                              params)
                body = response_data['Body'].read()
                with self.write_lock:
                    self.f.seek(part_number*size_uploads)
                    self.f.write(body)

                print_str = print_operation(filename, 0)
                print_result = {'result': print_str}
                part_str = {'total': int(filename.size / size_uploads)}
                print_result['part'] = part_str
                self.printQueue.put(print_result)
                self.dest_queue.put(part_number)
            except requests.ConnectionError as e:
                connect_error = str(e)
                LOGGER.debug("%s part download failure: %s" %
                            (part_info[0].src, connect_error))
                self.part_queue.put(part_info)
                self.executer.submit(self)
            except Exception as e:
                LOGGER.debug('%s' % str(e))
            self.part_queue.task_done()
            with self.counter_lock:
                self.part_counter.count -= 1
        except Queue.Empty:
            pass
Exemplo n.º 34
0
 def upload(self):
     """
     Redirects the file to the multipart upload function if the file is
     large.  If it is small enough, it puts the file as an object in s3.
     """
     with open(self.src, 'rb') as body:
         bucket, key = find_bucket_key(self.dest)
         params = {
             'endpoint': self.endpoint,
             'bucket': bucket,
             'key': key,
             'body': body,
         }
         self._handle_object_params(params)
         response_data, http = operate(self.service, 'PutObject', params)
         etag = response_data['ETag'][1:-1]
         body.seek(0)
         check_etag(etag, body)
Exemplo n.º 35
0
 def _handle_rm_force(self, parsed_globals, parameters):
     """
     This function recursive deletes objects in a bucket if the force
     parameters was thrown when using the remove bucket command.
     """
     # XXX: This shouldn't really be here.  This was originally moved from
     # the CommandParameters class to here, but this is still not the ideal
     # place for this code.  This should be moved
     # to either the CommandArchitecture class, or the RbCommand class where
     # the actual operations against S3 are performed.  This may require
     # some refactoring though to move this to either of those classes.
     # For now, moving this out of CommandParameters allows for that class
     # to be kept simple.
     if 'force' in parameters:
         if parameters['force']:
             bucket = find_bucket_key(parameters['src'][5:])[0]
             path = 's3://' + bucket
             del_objects = RmCommand(self._session)
             del_objects([path, '--recursive'], parsed_globals)
Exemplo n.º 36
0
    def __call__(self):
        total_file_size = self._filename.size
        start_range = self._part_number * self._chunk_size
        if self._part_number == int(total_file_size / self._chunk_size) - 1:
            end_range = ''
        else:
            end_range = start_range + self._chunk_size - 1
        range_param = 'bytes=%s-%s' % (start_range, end_range)
        LOGGER.debug("Downloading bytes range of %s for file %s", range_param,
                     self._filename.dest)
        bucket, key = find_bucket_key(self._filename.src)
        params = {
            'endpoint': self._filename.endpoint,
            'bucket': bucket,
            'key': key,
            'range': range_param
        }
        try:
            LOGGER.debug("Making GetObject requests with byte range: %s",
                         range_param)
            response_data, http = operate(self._service, 'GetObject', params)
            LOGGER.debug("Response received from GetObject")
            body = response_data['Body']
            self._write_to_file(body)
            self._context.announce_completed_part(self._part_number)

            message = print_operation(self._filename, 0)
            total_parts = int(self._filename.size / self._chunk_size)
            result = {
                'message': message,
                'error': False,
                'total_parts': total_parts
            }
            self._result_queue.put(result)
        except Exception as e:
            LOGGER.debug('Exception caught downloading byte range: %s',
                         e,
                         exc_info=True)
            self._context.cancel()
            raise e
Exemplo n.º 37
0
 def upload(self):
     """
     Redirects the file to the multipart upload function if the file is
     large.  If it is small enough, it puts the file as an object in s3.
     """
     if not self.is_multi:
         body = read_file(self.src)
         bucket, key = find_bucket_key(self.dest)
         if sys.version_info[:2] == (2, 6):
             stream_body = StringIO(body)
         else:
             stream_body = bytearray(body)
         params = {'endpoint': self.endpoint, 'bucket': bucket, 'key': key}
         if body:
             params['body'] = stream_body
         if self.parameters['acl']:
             params['acl'] = self.parameters['acl'][0]
         response_data, http = operate(self.service, 'PutObject', params)
         etag = retrieve_http_etag(http)
         check_etag(etag, body)
     else:
         self.multi_upload()
Exemplo n.º 38
0
 def list_objects(self, s3_path, dir_op):
     """
     This function yields the appropriate object or objects under a
     common prefix depending if the operation is on objects under a
     common prefix.  It yields the file's source path, size, and last
     update.
     """
     # Short circuit path: if we are not recursing into the s3
     # bucket and a specific path was given, we can just yield
     # that path and not have to call any operation in s3.
     bucket, prefix = find_bucket_key(s3_path)
     if not dir_op and prefix:
         yield self._list_single_object(s3_path)
     else:
         operation = self._service.get_operation('ListObjects')
         iterator = operation.paginate(self._endpoint,
                                       bucket=bucket,
                                       prefix=prefix)
         for html_response, response_data in iterator:
             contents = response_data['Contents']
             for content in contents:
                 src_path = bucket + '/' + content['Key']
                 size = content['Size']
                 last_update = parse(content['LastModified'])
                 last_update = last_update.astimezone(tzlocal())
                 if size == 0 and src_path.endswith('/'):
                     if self.operation_name == 'delete':
                         # This is to filter out manually created folders
                         # in S3.  They have a size zero and would be
                         # undesirably downloaded.  Local directories
                         # are automatically created when they do not
                         # exist locally.  But user should be able to
                         # delete them.
                         yield src_path, size, last_update
                 elif not dir_op and s3_path != src_path:
                     pass
                 else:
                     yield src_path, size, last_update
Exemplo n.º 39
0
 def _handle_rm_force(self, parsed_globals, parameters):
     """
     This function recursively deletes objects in a bucket if the force
     parameter was thrown when using the remove bucket command. It will
     refuse to delete if a key is specified in the s3path.
     """
     # XXX: This shouldn't really be here.  This was originally moved from
     # the CommandParameters class to here, but this is still not the ideal
     # place for this code.  This should be moved
     # to either the CommandArchitecture class, or the RbCommand class where
     # the actual operations against S3 are performed.  This may require
     # some refactoring though to move this to either of those classes.
     # For now, moving this out of CommandParameters allows for that class
     # to be kept simple.
     if 'force' in parameters:
         if parameters['force']:
             bucket, key = find_bucket_key(parameters['src'][5:])
             if key:
                 raise ValueError('Please specify a valid bucket name only.'
                                  ' E.g. s3://%s' % bucket)
             path = 's3://' + bucket
             del_objects = RmCommand(self._session)
             del_objects([path, '--recursive'], parsed_globals)
 def test_bucket(self):
     bucket, key = find_bucket_key('bucket')
     self.assertEqual(bucket, 'bucket')
     self.assertEqual(key, '')
Exemplo n.º 41
0
 def multi_upload(self):
     """
     Performs multipart uploads.  It initiates the multipart upload.
     It creates a queue ``part_queue`` which is directly responsible
     with controlling the progress of the multipart upload.  It then
     creates ``UploadPartTasks`` for threads to run via the
     ``executer``.  This fucntion waits for all of the parts in the
     multipart upload to finish, and then it completes the multipart
     upload.  This method waits on its parts to finish.  So, threads
     are required to process the parts for this function to complete.
     """
     part_queue = NoBlockQueue(self.interrupt)
     complete_upload_queue = Queue.PriorityQueue()
     part_counter = MultiCounter()
     counter_lock = threading.Lock()
     bucket, key = find_bucket_key(self.dest)
     params = {'endpoint': self.endpoint, 'bucket': bucket, 'key': key}
     if self.parameters['acl']:
         params['acl'] = self.parameters['acl'][0]
     if self.parameters['guess_mime_type']:
         self._inject_content_type(params, self.src)
     response_data, http = operate(self.service, 'CreateMultipartUpload',
                                   params)
     upload_id = response_data['UploadId']
     size_uploads = self.chunksize
     num_uploads = int(math.ceil(self.size / float(size_uploads)))
     for i in range(1, (num_uploads + 1)):
         part_info = (self, upload_id, i, size_uploads)
         part_queue.put(part_info)
         task = UploadPartTask(session=self.session,
                               executer=self.executer,
                               part_queue=part_queue,
                               dest_queue=complete_upload_queue,
                               region=self.region,
                               printQueue=self.printQueue,
                               interrupt=self.interrupt,
                               part_counter=part_counter,
                               counter_lock=counter_lock)
         self.executer.submit(task)
     part_queue.join()
     # The following ensures that if the multipart upload is in progress,
     # all part uploads finish before aborting or completing.  This
     # really only applies when an interrupt signal is sent because the
     # ``part_queue.join()`` ensures this if the process is not
     # interrupted.
     while part_counter.count:
         time.sleep(0.1)
     parts_list = []
     while not complete_upload_queue.empty():
         part = complete_upload_queue.get()
         parts_list.append(part[1])
     if len(parts_list) == num_uploads:
         parts = {'Parts': parts_list}
         params = {
             'endpoint': self.endpoint,
             'bucket': bucket,
             'key': key,
             'upload_id': upload_id,
             'multipart_upload': parts
         }
         operate(self.service, 'CompleteMultipartUpload', params)
     else:
         abort_params = {
             'endpoint': self.endpoint,
             'bucket': bucket,
             'key': key,
             'upload_id': upload_id
         }
         operate(self.service, 'AbortMultipartUpload', abort_params)
         raise Exception()
Exemplo n.º 42
0
 def __call__(self):
     LOGGER.debug("Waiting for download to finish.")
     self._context.wait_for_completion()
     bucket, key = find_bucket_key(self._filename.src)
     params = {'Bucket': bucket, 'Key': key}
     self._filename.source_client.delete_object(**params)
Exemplo n.º 43
0
 def _submit_transfer_request(self, fileinfo, extra_args, subscribers):
     bucket, key = find_bucket_key(fileinfo.src)
     return self._transfer_manager.delete(
         bucket=bucket, key=key, extra_args=extra_args,
         subscribers=subscribers)
Exemplo n.º 44
0
 def prepare_bucket(self, s3_path):
     bucket, key = find_bucket_key(s3_path)
     bucket_exists = self._check_bucket_exists(bucket)
     if not bucket_exists:
         self._create_bucket(bucket)
     return bucket, key
 def test_accesspoint_arn_with_key_and_prefix(self):
     bucket, key = find_bucket_key(
         'arn:aws:s3:us-west-2:123456789012:accesspoint/endpoint/pre/key')
     self.assertEqual(
         bucket, 'arn:aws:s3:us-west-2:123456789012:accesspoint/endpoint')
     self.assertEqual(key, 'pre/key')
 def test_accesspoint_arn_with_slash(self):
     bucket, key = find_bucket_key(
         'arn:aws:s3:us-west-2:123456789012:accesspoint/endpoint/')
     self.assertEqual(
         bucket, 'arn:aws:s3:us-west-2:123456789012:accesspoint/endpoint')
     self.assertEqual(key, '')
 def test_bucket_with_key_and_prefix(self):
     bucket, key = find_bucket_key('bucket/prefix/key')
     self.assertEqual(bucket, 'bucket')
     self.assertEqual(key, 'prefix/key')
 def test_bucket_with_key(self):
     bucket, key = find_bucket_key('bucket/key')
     self.assertEqual(bucket, 'bucket')
     self.assertEqual(key, 'key')
 def test_unicode(self):
     s3_path = '\u1234' + u'/' + '\u5678'
     bucket, key = find_bucket_key(s3_path)
     self.assertEqual(bucket, '\u1234')
     self.assertEqual(key, '\u5678')
Exemplo n.º 50
0
 def remove_bucket(self):
     """
     This operation removes a bucket.
     """
     bucket, key = find_bucket_key(self.src)
     self.client.delete_bucket(Bucket=bucket)
 def test_bucket_with_slash(self):
     bucket, key = find_bucket_key('bucket/')
     self.assertEqual(bucket, 'bucket')
     self.assertEqual(key, '')