Пример #1
0
    def GET(self, req):
        """
        Handles GET Bucket acl and GET Object acl.
        """
        resp = req.get_response(self.app)

        acl = resp.object_acl if req.is_object_request else resp.bucket_acl

        resp = HTTPOk()
        resp.body = tostring(acl.elem())

        return resp
Пример #2
0
    def GET(self, req):
        """
        Handle GET Bucket (List Objects) request
        """
        tag_max_keys = req.get_validated_param('max-keys',
                                               self.conf.max_bucket_listing)
        # TODO: Separate max_bucket_listing and default_bucket_listing
        max_keys = min(tag_max_keys, self.conf.max_bucket_listing)

        encoding_type, query, listing_type, fetch_owner = \
            self._parse_request_options(req, max_keys)

        resp = req.get_response(self.app, query=query)

        objects = json.loads(resp.body)

        is_truncated = max_keys > 0 and len(objects) > max_keys
        objects = objects[:max_keys]

        if listing_type == 'object-versions':
            func = self._build_versions_result
        elif listing_type == 'version-2':
            func = self._build_list_bucket_result_type_two
        else:
            func = self._build_list_bucket_result_type_one
        elem = func(req, objects, encoding_type, tag_max_keys, is_truncated)
        self._add_objects_to_result(req, elem, objects, encoding_type,
                                    listing_type, fetch_owner)

        body = tostring(elem)

        return HTTPOk(body=body, content_type='application/xml')
Пример #3
0
    def GET(self, req):  # pylint: disable=invalid-name
        """
        Handles GET Bucket and Object tagging.
        """
        if req.is_object_request:
            self.set_s3api_command(req, 'get-object-tagging')
        else:
            self.set_s3api_command(req, 'get-bucket-tagging')

        resp = req._get_response(self.app, 'HEAD', req.container_name,
                                 req.object_name)
        headers = dict()
        if req.is_object_request:
            body = resp.sysmeta_headers.get(OBJECT_TAGGING_HEADER)
            # It seems that S3 returns x-amz-version-id,
            # even if it is not documented.
            headers['x-amz-version-id'] = resp.sw_headers[VERSION_ID_HEADER]
        else:
            body = resp.sysmeta_headers.get(BUCKET_TAGGING_HEADER)
        close_if_possible(resp.app_iter)

        if not body:
            if not req.is_object_request:
                raise NoSuchTagSet(headers=headers)
            else:
                elem = Element('Tagging')
                SubElement(elem, 'TagSet')
                body = tostring(elem)

        return HTTPOk(body=body,
                      content_type='application/xml',
                      headers=headers)
Пример #4
0
    def PUT(self, req):  # pylint: disable=invalid-name
        """
        Handles PUT Bucket and Object tagging.
        """
        if req.is_object_request:
            self.set_s3api_command(req, 'put-object-tagging')
        else:
            self.set_s3api_command(req, 'put-bucket-tagging')

        body = req.xml(MAX_TAGGING_BODY_SIZE)
        try:
            # Just validate the body
            fromstring(body, 'Tagging')
        except (DocumentInvalid, XMLSyntaxError) as exc:
            raise MalformedXML(str(exc))

        if req.object_name:
            req.headers[OBJECT_TAGGING_HEADER] = body
        else:
            req.headers[BUCKET_TAGGING_HEADER] = body
        resp = req._get_response(self.app, 'POST', req.container_name,
                                 req.object_name)
        if resp.status_int == 202:
            headers = dict()
            if req.object_name:
                headers['x-amz-version-id'] = \
                    resp.sw_headers[VERSION_ID_HEADER]
            return HTTPOk(headers=headers)
        return resp
Пример #5
0
    def HEAD(self, req):
        """
        Handle HEAD Bucket (Get Metadata) request
        """
        resp = req.get_response(self.app)

        return HTTPOk(headers=resp.headers)
Пример #6
0
    def PUT(self, req):
        """
        Handles PUT Bucket versioning.
        """
        self.set_s3api_command(req, 'put-bucket-versioning')

        if 'object_versioning' not in get_swift_info():
            raise S3NotImplemented()

        xml = req.xml(MAX_PUT_VERSIONING_BODY_SIZE)
        try:
            elem = fromstring(xml, 'VersioningConfiguration')
            status = elem.find('./Status').text
        except (XMLSyntaxError, DocumentInvalid):
            raise MalformedXML()
        except Exception as e:
            self.logger.error(e)
            raise

        if status not in ['Enabled', 'Suspended']:
            raise MalformedXML()

        # Set up versioning
        # NB: object_versioning responsible for ensuring its container exists
        req.headers['X-Versions-Enabled'] = str(status == 'Enabled').lower()
        req.get_response(self.app, 'POST')

        return HTTPOk()
Пример #7
0
    def GET(self, req):
        """
        Handles GET Bucket acl and GET Object acl.
        """
        if req.is_object_request:
            self.set_s3api_command(req, 'get-object-acl')
        else:
            self.set_s3api_command(req, 'get-bucket-acl')

        resp = req.get_response(self.app)

        acl = resp.object_acl if req.is_object_request else resp.bucket_acl

        resp = HTTPOk()
        resp.body = tostring(acl.elem())

        return resp
Пример #8
0
 def GET(self, req):  # pylint: disable=invalid-name
     """
     Handles GET Bucket CORS.
     """
     sysmeta = req.get_container_info(self.app).get('sysmeta', {})
     body = sysmeta.get('s3api-cors')
     if not body:
         raise NoSuchCORSConfiguration
     return HTTPOk(body=body, content_type='application/xml')
Пример #9
0
    def HEAD(self, req):
        """
        Handle HEAD Bucket (Get Metadata) request
        """
        self.set_s3api_command(req, 'head-bucket')

        resp = req.get_response(self.app)

        return HTTPOk(headers=resp.headers)
Пример #10
0
    def GET(self, req):
        """
        Handles GET Bucket and Object tagging.
        """
        elem = Element('Tagging')
        SubElement(elem, 'TagSet')
        body = tostring(elem)

        return HTTPOk(body=body, content_type=None)
Пример #11
0
    def POST(self, req):
        """
        Handles Initiate Multipart Upload.
        """
        # Create a unique S3 upload id from UUID to avoid duplicates.
        upload_id = unique_id()

        seg_container = req.container_name + MULTIUPLOAD_SUFFIX
        content_type = req.headers.get('Content-Type')
        if content_type:
            req.headers[sysmeta_header('object', 'has-content-type')] = 'yes'
            req.headers[sysmeta_header('object',
                                       'content-type')] = content_type
        else:
            req.headers[sysmeta_header('object', 'has-content-type')] = 'no'
        req.headers['Content-Type'] = 'application/directory'

        try:
            seg_req = copy.copy(req)
            seg_req.environ = copy.copy(req.environ)
            seg_req.container_name = seg_container
            seg_req.get_container_info(self.app)
        except NoSuchBucket:
            try:
                # multi-upload bucket doesn't exist, create one with
                # same storage policy and acls as the primary bucket
                info = req.get_container_info(self.app)
                policy_name = POLICIES[info['storage_policy']].name
                hdrs = {'X-Storage-Policy': policy_name}
                if info.get('read_acl'):
                    hdrs['X-Container-Read'] = info['read_acl']
                if info.get('write_acl'):
                    hdrs['X-Container-Write'] = info['write_acl']
                seg_req.get_response(self.app,
                                     'PUT',
                                     seg_container,
                                     '',
                                     headers=hdrs)
            except (BucketAlreadyExists, BucketAlreadyOwnedByYou):
                pass

        obj = '%s/%s' % (req.object_name, upload_id)

        req.headers.pop('Etag', None)
        req.headers.pop('Content-Md5', None)

        req.get_response(self.app, 'PUT', seg_container, obj, body='')

        result_elem = Element('InitiateMultipartUploadResult')
        SubElement(result_elem, 'Bucket').text = req.container_name
        SubElement(result_elem, 'Key').text = req.object_name
        SubElement(result_elem, 'UploadId').text = upload_id

        body = tostring(result_elem)

        return HTTPOk(body=body, content_type='application/xml')
Пример #12
0
    def GET(self, req):
        """
        Handles GET Bucket logging.
        """
        req.get_response(self.app, method='HEAD')

        # logging disabled
        elem = Element('BucketLoggingStatus')
        body = tostring(elem)

        return HTTPOk(body=body, content_type='application/xml')
Пример #13
0
    def GET(self, req):
        """
        Handles GET Bucket versioning.
        """
        req.get_response(self.app, method='HEAD')

        # Just report there is no versioning configured here.
        elem = Element('VersioningConfiguration')
        body = tostring(elem)

        return HTTPOk(body=body, content_type="text/plain")
Пример #14
0
    def GET(self, req):
        """
        Handles GET Bucket location.
        """
        req.get_response(self.app, method='HEAD')

        elem = Element('LocationConstraint')
        if self.conf.location != 'US':
            elem.text = self.conf.location
        body = tostring(elem)

        return HTTPOk(body=body, content_type='application/xml')
Пример #15
0
    def GET(self, req):
        """
        Handles GET Bucket versioning.
        """
        sysmeta = req.get_container_info(self.app).get('sysmeta', {})

        elem = Element('VersioningConfiguration')
        if sysmeta.get('versions-enabled'):
            SubElement(elem, 'Status').text = ('Enabled' if config_true_value(
                sysmeta['versions-enabled']) else 'Suspended')
        body = tostring(elem)

        return HTTPOk(body=body, content_type=None)
Пример #16
0
    def PUT(self, req):
        """
        Handles PUT Bucket acl and PUT Object acl.
        """
        if req.is_object_request:
            self.set_s3api_command(req, 'put-object-acl')
        else:
            self.set_s3api_command(req, 'put-bucket-acl')

        # ACLs will be set as sysmeta
        req.get_response(self.app, 'POST')

        return HTTPOk()
Пример #17
0
    def GET(self, req):
        """
        Handle GET Bucket (List Objects) request
        """
        max_keys = req.get_validated_param('max-keys',
                                           self.conf.max_bucket_listing)
        tag_max_keys = max_keys
        # TODO: Separate max_bucket_listing and default_bucket_listing
        max_keys = min(max_keys, self.conf.max_bucket_listing)

        encoding_type, query, listing_type, fetch_owner = \
            self._parse_request_options(req, max_keys)

        if listing_type == 'object-versions':
            self.set_s3api_command(req, 'list-object-versions')
        elif listing_type == 'version-2':
            self.set_s3api_command(req, 'list-objects-v2')
        else:
            self.set_s3api_command(req, 'list-objects')

        query['format'] = 'json'
        resp = req.get_response(self.app, query=query)

        objects = json.loads(resp.body)

        is_truncated = max_keys > 0 and len(objects) > max_keys
        objects = objects[:max_keys]

        if listing_type == 'object-versions':
            elem = self._build_versions_result(req, objects, is_truncated)
        elif listing_type == 'version-2':
            elem = self._build_list_bucket_result_type_two(
                req, objects, is_truncated)
        else:
            elem = self._build_list_bucket_result_type_one(
                req, objects, encoding_type, is_truncated)
        self._finish_result(req, elem, tag_max_keys, encoding_type,
                            is_truncated)
        self._add_objects_to_result(req, elem, objects, encoding_type,
                                    listing_type, fetch_owner)

        body = tostring(elem)
        resp = HTTPOk(body=body, content_type='application/xml')

        origin = req.headers.get('Origin')
        if origin:
            rule = get_cors(self.app, req, "GET", origin)
            if rule:
                cors_fill_headers(req, resp, rule)

        return resp
Пример #18
0
    def GET(self, req):
        """
        Handles GET Bucket location.
        """
        self.set_s3api_command(req, 'get-bucket-location')

        req.get_response(self.app, method='HEAD')

        elem = Element('LocationConstraint')
        if self.conf.location != 'us-east-1':
            elem.text = self.conf.location
        body = tostring(elem)

        return HTTPOk(body=body, content_type='application/xml')
Пример #19
0
    def PUT(self, req):
        """
        Handles PUT Bucket acl and PUT Object acl.
        """
        if req.is_object_request:
            headers = {}
            src_path = '/%s/%s' % (req.container_name, req.object_name)

            # object-sysmeta' can be updated by 'Copy' method,
            # but can not be by 'POST' method.
            # So headers['X-Copy-From'] for copy request is added here.
            headers['X-Copy-From'] = quote(src_path)
            headers['Content-Length'] = 0
            req.get_response(self.app, 'PUT', headers=headers)
        else:
            req.get_response(self.app, 'POST')

        return HTTPOk()
Пример #20
0
    def OPTIONS(self, req):
        origin = req.headers.get('Origin')
        if not origin:
            raise CORSOriginMissing()

        method = req.headers.get('Access-Control-Request-Method')
        if method not in CORS_ALLOWED_HTTP_METHOD:
            raise CORSInvalidAccessControlRequest(method=method)

        rule = get_cors(self.app, req, method, origin)
        # FIXME(mbo): we should raise also NoSuchCORSConfiguration
        if rule is None:
            raise CORSForbidden(method)

        resp = HTTPOk(body=None)
        del resp.headers['Content-Type']

        return cors_fill_headers(req, resp, rule)
Пример #21
0
    def POST(self, req):
        """
        Handles Initiate Multipart Upload.
        """
        # Create a unique S3 upload id from UUID to avoid duplicates.
        upload_id = unique_id()

        orig_container = req.container_name
        seg_container = orig_container + MULTIUPLOAD_SUFFIX
        content_type = req.headers.get('Content-Type')
        if content_type:
            req.headers[sysmeta_header('object', 'has-content-type')] = 'yes'
            req.headers[sysmeta_header('object',
                                       'content-type')] = content_type
        else:
            req.headers[sysmeta_header('object', 'has-content-type')] = 'no'
        req.headers['Content-Type'] = 'application/directory'

        try:
            req.container_name = seg_container
            req.get_container_info(self.app)
        except NoSuchBucket:
            try:
                req.get_response(self.app, 'PUT', seg_container, '')
            except (BucketAlreadyExists, BucketAlreadyOwnedByYou):
                pass
        finally:
            req.container_name = orig_container

        obj = '%s/%s' % (req.object_name, upload_id)

        req.headers.pop('Etag', None)
        req.headers.pop('Content-Md5', None)

        req.get_response(self.app, 'PUT', seg_container, obj, body='')

        result_elem = Element('InitiateMultipartUploadResult')
        SubElement(result_elem, 'Bucket').text = req.container_name
        SubElement(result_elem, 'Key').text = req.object_name
        SubElement(result_elem, 'UploadId').text = upload_id

        body = tostring(result_elem)

        return HTTPOk(body=body, content_type='application/xml')
Пример #22
0
    def GET(self, req):
        """
        Handle GET Service request
        """
        self.set_s3api_command(req, 'list-buckets')

        resp = req.get_response(self.app, query={'format': 'json'})

        containers = json.loads(resp.body)

        containers = filter(
            lambda item: validate_bucket_name(
                item['name'], self.conf.dns_compliant_bucket_names),
            containers)

        # we don't keep the creation time of a bucket (s3cmd doesn't
        # work without that) so we use something bogus.
        elem = Element('ListAllMyBucketsResult')

        owner = SubElement(elem, 'Owner')
        SubElement(owner, 'ID').text = req.user_id
        SubElement(owner, 'DisplayName').text = req.user_id

        buckets = SubElement(elem, 'Buckets')
        for c in containers:
            if self.conf.s3_acl and self.conf.check_bucket_owner:
                container = bytes_to_wsgi(c['name'].encode('utf8'))
                try:
                    req.get_response(self.app, 'HEAD', container)
                except AccessDenied:
                    continue
                except NoSuchBucket:
                    continue

            bucket = SubElement(buckets, 'Bucket')
            SubElement(bucket, 'Name').text = c['name']
            SubElement(bucket, 'CreationDate').text = \
                '2009-02-03T16:45:09.000Z'

        body = tostring(elem)

        return HTTPOk(content_type='application/xml', body=body)
Пример #23
0
def get_acl(account_name, headers):
    """
    Attempts to construct an S3 ACL based on what is found in the swift headers
    """

    elem = Element('AccessControlPolicy')
    owner = SubElement(elem, 'Owner')
    SubElement(owner, 'ID').text = account_name
    SubElement(owner, 'DisplayName').text = account_name
    access_control_list = SubElement(elem, 'AccessControlList')

    # grant FULL_CONTROL to myself by default
    grant = SubElement(access_control_list, 'Grant')
    grantee = SubElement(grant, 'Grantee', nsmap={'xsi': XMLNS_XSI})
    grantee.set('{%s}type' % XMLNS_XSI, 'CanonicalUser')
    SubElement(grantee, 'ID').text = account_name
    SubElement(grantee, 'DisplayName').text = account_name
    SubElement(grant, 'Permission').text = 'FULL_CONTROL'

    referrers, _ = parse_acl(headers.get('x-container-read'))
    if referrer_allowed('unknown', referrers):
        # grant public-read access
        grant = SubElement(access_control_list, 'Grant')
        grantee = SubElement(grant, 'Grantee', nsmap={'xsi': XMLNS_XSI})
        grantee.set('{%s}type' % XMLNS_XSI, 'Group')
        SubElement(grantee, 'URI').text = \
            'http://acs.amazonaws.com/groups/global/AllUsers'
        SubElement(grant, 'Permission').text = 'READ'

    referrers, _ = parse_acl(headers.get('x-container-write'))
    if referrer_allowed('unknown', referrers):
        # grant public-write access
        grant = SubElement(access_control_list, 'Grant')
        grantee = SubElement(grant, 'Grantee', nsmap={'xsi': XMLNS_XSI})
        grantee.set('{%s}type' % XMLNS_XSI, 'Group')
        SubElement(grantee, 'URI').text = \
            'http://acs.amazonaws.com/groups/global/AllUsers'
        SubElement(grant, 'Permission').text = 'WRITE'

    body = tostring(elem)

    return HTTPOk(body=body, content_type="text/plain")
Пример #24
0
    def POST(self, req):
        """
        Handles Delete Multiple Objects.
        """
        def object_key_iter(elem):
            for obj in elem.iterchildren('Object'):
                key = obj.find('./Key').text
                if not key:
                    raise UserKeyMustBeSpecified()
                version = obj.find('./VersionId')
                if version is not None:
                    version = version.text

                yield key, version

        max_body_size = min(
            # FWIW, AWS limits multideletes to 1000 keys, and swift limits
            # object names to 1024 bytes (by default). Add a factor of two to
            # allow some slop.
            2 * self.conf.max_multi_delete_objects * MAX_OBJECT_NAME_LENGTH,
            # But, don't let operators shoot themselves in the foot
            10 * 1024 * 1024)

        try:
            xml = req.xml(max_body_size)
            if not xml:
                raise MissingRequestBodyError()

            req.check_md5(xml)
            elem = fromstring(xml, 'Delete', self.logger)

            quiet = elem.find('./Quiet')
            if quiet is not None and quiet.text.lower() == 'true':
                self.quiet = True
            else:
                self.quiet = False

            delete_list = list(object_key_iter(elem))
            if len(delete_list) > self.conf.max_multi_delete_objects:
                raise MalformedXML()
        except (XMLSyntaxError, DocumentInvalid):
            raise MalformedXML()
        except ErrorResponse:
            raise
        except Exception as e:
            self.logger.error(e)
            raise

        elem = Element('DeleteResult')

        # check bucket existence
        try:
            req.get_response(self.app, 'HEAD')
        except AccessDenied as error:
            body = self._gen_error_body(error, elem, delete_list)
            return HTTPOk(body=body)

        if any(version is not None for _key, version in delete_list):
            # TODO: support deleting specific versions of objects
            raise S3NotImplemented()

        def do_delete(base_req, key, version):
            req = copy.copy(base_req)
            req.environ = copy.copy(base_req.environ)
            req.object_name = key

            try:
                query = req.gen_multipart_manifest_delete_query(self.app)
                resp = req.get_response(self.app,
                                        method='DELETE',
                                        query=query,
                                        headers={'Accept': 'application/json'})
                # Have to read the response to actually do the SLO delete
                if query:
                    try:
                        delete_result = json.loads(resp.body)
                        if delete_result['Errors']:
                            # NB: bulk includes 404s in "Number Not Found",
                            # not "Errors"
                            msg_parts = [delete_result['Response Status']]
                            msg_parts.extend(
                                '%s: %s' % (obj, status)
                                for obj, status in delete_result['Errors'])
                            return key, {
                                'code': 'SLODeleteError',
                                'message': '\n'.join(msg_parts)
                            }
                        # else, all good
                    except (ValueError, TypeError, KeyError):
                        # Logs get all the gory details
                        self.logger.exception(
                            'Could not parse SLO delete response: %r',
                            resp.body)
                        # Client gets something more generic
                        return key, {
                            'code': 'SLODeleteError',
                            'message': 'Unexpected swift response'
                        }
            except NoSuchKey:
                pass
            except ErrorResponse as e:
                return key, {'code': e.__class__.__name__, 'message': e._msg}
            return key, None

        with StreamingPile(self.conf.multi_delete_concurrency) as pile:
            for key, err in pile.asyncstarmap(
                    do_delete,
                ((req, key, version) for key, version in delete_list)):
                if err:
                    error = SubElement(elem, 'Error')
                    SubElement(error, 'Key').text = key
                    SubElement(error, 'Code').text = err['code']
                    SubElement(error, 'Message').text = err['message']
                elif not self.quiet:
                    deleted = SubElement(elem, 'Deleted')
                    SubElement(deleted, 'Key').text = key

        body = tostring(elem)

        return HTTPOk(body=body)
Пример #25
0
    def POST(self, req):
        """
        Handles Complete Multipart Upload.
        """
        upload_id = req.params['uploadId']
        resp = _get_upload_info(req, self.app, upload_id)
        headers = {'Accept': 'application/json'}
        for key, val in resp.headers.items():
            _key = key.lower()
            if _key.startswith('x-amz-meta-'):
                headers['x-object-meta-' + _key[11:]] = val

        hct_header = sysmeta_header('object', 'has-content-type')
        if resp.sysmeta_headers.get(hct_header) == 'yes':
            content_type = resp.sysmeta_headers.get(
                sysmeta_header('object', 'content-type'))
        elif hct_header in resp.sysmeta_headers:
            # has-content-type is present but false, so no content type was
            # set on initial upload. In that case, we won't set one on our
            # PUT request. Swift will end up guessing one based on the
            # object name.
            content_type = None
        else:
            content_type = resp.headers.get('Content-Type')

        if content_type:
            headers['Content-Type'] = content_type

        container = req.container_name + MULTIUPLOAD_SUFFIX
        s3_etag_hasher = md5()
        manifest = []
        previous_number = 0
        try:
            xml = req.xml(MAX_COMPLETE_UPLOAD_BODY_SIZE)
            if not xml:
                raise InvalidRequest(msg='You must specify at least one part')
            if 'content-md5' in req.headers:
                # If an MD5 was provided, we need to verify it.
                # Note that S3Request already took care of translating to ETag
                if req.headers['etag'] != md5(xml).hexdigest():
                    raise BadDigest(content_md5=req.headers['content-md5'])
                # We're only interested in the body here, in the
                # multipart-upload controller -- *don't* let it get
                # plumbed down to the object-server
                del req.headers['etag']

            complete_elem = fromstring(xml, 'CompleteMultipartUpload',
                                       self.logger)
            for part_elem in complete_elem.iterchildren('Part'):
                part_number = int(part_elem.find('./PartNumber').text)

                if part_number <= previous_number:
                    raise InvalidPartOrder(upload_id=upload_id)
                previous_number = part_number

                etag = normalize_etag(part_elem.find('./ETag').text)
                if len(etag) != 32 or any(c not in '0123456789abcdef'
                                          for c in etag):
                    raise InvalidPart(upload_id=upload_id,
                                      part_number=part_number)

                manifest.append({
                    'path':
                    '/%s/%s/%s/%d' %
                    (container, req.object_name, upload_id, part_number),
                    'etag':
                    etag
                })
                s3_etag_hasher.update(binascii.a2b_hex(etag))
        except (XMLSyntaxError, DocumentInvalid):
            # NB: our schema definitions catch uploads with no parts here
            raise MalformedXML()
        except ErrorResponse:
            raise
        except Exception as e:
            self.logger.error(e)
            raise

        s3_etag = '%s-%d' % (s3_etag_hasher.hexdigest(), len(manifest))
        headers[sysmeta_header('object', 'etag')] = s3_etag
        # Leave base header value blank; SLO will populate
        c_etag = '; s3_etag=%s' % s3_etag
        headers[get_container_update_override_key('etag')] = c_etag

        too_small_message = ('s3api requires that each segment be at least '
                             '%d bytes' % self.conf.min_segment_size)

        def size_checker(manifest):
            # Check the size of each segment except the last and make sure
            # they are all more than the minimum upload chunk size.
            # Note that we need to use the *internal* keys, since we're
            # looking at the manifest that's about to be written.
            return [(item['name'], too_small_message) for item in manifest[:-1]
                    if item and item['bytes'] < self.conf.min_segment_size]

        req.environ['swift.callback.slo_manifest_hook'] = size_checker
        start_time = time.time()

        def response_iter():
            # NB: XML requires that the XML declaration, if present, be at the
            # very start of the document. Clients *will* call us out on not
            # being valid XML if we pass through whitespace before it.
            # Track whether we've sent anything yet so we can yield out that
            # declaration *first*
            yielded_anything = False

            try:
                try:
                    # TODO: add support for versioning
                    put_resp = req.get_response(self.app,
                                                'PUT',
                                                body=json.dumps(manifest),
                                                query={
                                                    'multipart-manifest':
                                                    'put',
                                                    'heartbeat': 'on'
                                                },
                                                headers=headers)
                    if put_resp.status_int == 202:
                        body = []
                        put_resp.fix_conditional_response()
                        for chunk in put_resp.response_iter:
                            if not chunk.strip():
                                if time.time() - start_time < 10:
                                    # Include some grace period to keep
                                    # ceph-s3tests happy
                                    continue
                                if not yielded_anything:
                                    yield (b'<?xml version="1.0" '
                                           b'encoding="UTF-8"?>\n')
                                yielded_anything = True
                                yield chunk
                                continue
                            body.append(chunk)
                        body = json.loads(b''.join(body))
                        if body['Response Status'] != '201 Created':
                            for seg, err in body['Errors']:
                                if err == too_small_message:
                                    raise EntityTooSmall()
                                elif err in ('Etag Mismatch', '404 Not Found'):
                                    raise InvalidPart(upload_id=upload_id)
                            raise InvalidRequest(
                                status=body['Response Status'],
                                msg='\n'.join(': '.join(err)
                                              for err in body['Errors']))
                except BadSwiftRequest as e:
                    msg = str(e)
                    if too_small_message in msg:
                        raise EntityTooSmall(msg)
                    elif ', Etag Mismatch' in msg:
                        raise InvalidPart(upload_id=upload_id)
                    elif ', 404 Not Found' in msg:
                        raise InvalidPart(upload_id=upload_id)
                    else:
                        raise

                # clean up the multipart-upload record
                obj = '%s/%s' % (req.object_name, upload_id)
                try:
                    req.get_response(self.app, 'DELETE', container, obj)
                except NoSuchKey:
                    # We know that this existed long enough for us to HEAD
                    pass

                result_elem = Element('CompleteMultipartUploadResult')

                # NOTE: boto with sig v4 appends port to HTTP_HOST value at
                # the request header when the port is non default value and it
                # makes req.host_url like as http://localhost:8080:8080/path
                # that obviously invalid. Probably it should be resolved at
                # swift.common.swob though, tentatively we are parsing and
                # reconstructing the correct host_url info here.
                # in detail, https://github.com/boto/boto/pull/3513
                parsed_url = urlparse(req.host_url)
                host_url = '%s://%s' % (parsed_url.scheme, parsed_url.hostname)
                # Why are we doing our own port parsing? Because py3 decided
                # to start raising ValueErrors on access after parsing such
                # an invalid port
                netloc = parsed_url.netloc.split('@')[-1].split(']')[-1]
                if ':' in netloc:
                    port = netloc.split(':', 2)[1]
                    host_url += ':%s' % port

                SubElement(result_elem, 'Location').text = host_url + req.path
                SubElement(result_elem, 'Bucket').text = req.container_name
                SubElement(result_elem, 'Key').text = req.object_name
                SubElement(result_elem, 'ETag').text = '"%s"' % s3_etag
                resp.headers.pop('ETag', None)
                if yielded_anything:
                    yield b'\n'
                yield tostring(result_elem,
                               xml_declaration=not yielded_anything)
            except ErrorResponse as err_resp:
                if yielded_anything:
                    err_resp.xml_declaration = False
                    yield b'\n'
                else:
                    # Oh good, we can still change HTTP status code, too!
                    resp.status = err_resp.status
                for chunk in err_resp({}, lambda *a: None):
                    yield chunk

        resp = HTTPOk()  # assume we're good for now... but see above!
        resp.app_iter = reiterate(response_iter())
        resp.content_type = "application/xml"

        return resp
Пример #26
0
    def POST(self, req):
        """
        Handles Complete Multipart Upload.
        """
        upload_id = req.params['uploadId']
        resp = _get_upload_info(req, self.app, upload_id)
        headers = {'Accept': 'application/json'}
        for key, val in resp.headers.items():
            _key = key.lower()
            if _key.startswith('x-amz-meta-'):
                headers['x-object-meta-' + _key[11:]] = val

        hct_header = sysmeta_header('object', 'has-content-type')
        if resp.sysmeta_headers.get(hct_header) == 'yes':
            content_type = resp.sysmeta_headers.get(
                sysmeta_header('object', 'content-type'))
        elif hct_header in resp.sysmeta_headers:
            # has-content-type is present but false, so no content type was
            # set on initial upload. In that case, we won't set one on our
            # PUT request. Swift will end up guessing one based on the
            # object name.
            content_type = None
        else:
            content_type = resp.headers.get('Content-Type')

        if content_type:
            headers['Content-Type'] = content_type

        # Query for the objects in the segments area to make sure it completed
        query = {
            'format': 'json',
            'prefix': '%s/%s/' % (req.object_name, upload_id),
            'delimiter': '/'
        }

        container = req.container_name + MULTIUPLOAD_SUFFIX
        resp = req.get_response(self.app, 'GET', container, '', query=query)
        objinfo = json.loads(resp.body)
        objtable = dict((o['name'], {
            'path': '/'.join(['', container, o['name']]),
            'etag': o['hash'],
            'size_bytes': o['bytes']
        }) for o in objinfo)

        s3_etag_hasher = md5()
        manifest = []
        previous_number = 0
        try:
            xml = req.xml(MAX_COMPLETE_UPLOAD_BODY_SIZE)
            if not xml:
                raise InvalidRequest(msg='You must specify at least one part')

            complete_elem = fromstring(xml, 'CompleteMultipartUpload',
                                       self.logger)
            for part_elem in complete_elem.iterchildren('Part'):
                part_number = int(part_elem.find('./PartNumber').text)

                if part_number <= previous_number:
                    raise InvalidPartOrder(upload_id=upload_id)
                previous_number = part_number

                etag = part_elem.find('./ETag').text
                if len(etag) >= 2 and etag[0] == '"' and etag[-1] == '"':
                    # strip double quotes
                    etag = etag[1:-1]

                info = objtable.get("%s/%s/%s" %
                                    (req.object_name, upload_id, part_number))
                if info is None or info['etag'] != etag:
                    raise InvalidPart(upload_id=upload_id,
                                      part_number=part_number)

                s3_etag_hasher.update(etag.decode('hex'))
                info['size_bytes'] = int(info['size_bytes'])
                manifest.append(info)
        except (XMLSyntaxError, DocumentInvalid):
            # NB: our schema definitions catch uploads with no parts here
            raise MalformedXML()
        except ErrorResponse:
            raise
        except Exception as e:
            self.logger.error(e)
            raise

        s3_etag = '%s-%d' % (s3_etag_hasher.hexdigest(), len(manifest))
        headers[sysmeta_header('object', 'etag')] = s3_etag
        # Leave base header value blank; SLO will populate
        c_etag = '; s3_etag=%s' % s3_etag
        headers['X-Object-Sysmeta-Container-Update-Override-Etag'] = c_etag

        # Check the size of each segment except the last and make sure they are
        # all more than the minimum upload chunk size
        for info in manifest[:-1]:
            if info['size_bytes'] < self.conf.min_segment_size:
                raise EntityTooSmall()

        def response_iter():
            # NB: XML requires that the XML declaration, if present, be at the
            # very start of the document. Clients *will* call us out on not
            # being valid XML if we pass through whitespace before it.
            # Track whether we've sent anything yet so we can yield out that
            # declaration *first*
            yielded_anything = False

            try:
                try:
                    # TODO: add support for versioning
                    put_resp = req.get_response(self.app,
                                                'PUT',
                                                body=json.dumps(manifest),
                                                query={
                                                    'multipart-manifest':
                                                    'put',
                                                    'heartbeat': 'on'
                                                },
                                                headers=headers)
                    if put_resp.status_int == 202:
                        body = []
                        put_resp.fix_conditional_response()
                        for chunk in put_resp.response_iter:
                            if not chunk.strip():
                                if not yielded_anything:
                                    yield ('<?xml version="1.0" '
                                           'encoding="UTF-8"?>\n')
                                yielded_anything = True
                                yield chunk
                            body.append(chunk)
                        body = json.loads(''.join(body))
                        if body['Response Status'] != '201 Created':
                            raise InvalidRequest(
                                status=body['Response Status'],
                                msg='\n'.join(': '.join(err)
                                              for err in body['Errors']))
                except BadSwiftRequest as e:
                    msg = str(e)
                    expected_msg = ('too small; each segment must be '
                                    'at least 1 byte')
                    if expected_msg in msg:
                        # FIXME: AWS S3 allows a smaller object than 5 MB if
                        # there is only one part.  Use a COPY request to copy
                        # the part object from the segments container instead.
                        raise EntityTooSmall(msg)
                    else:
                        raise

                # clean up the multipart-upload record
                obj = '%s/%s' % (req.object_name, upload_id)
                try:
                    req.get_response(self.app, 'DELETE', container, obj)
                except NoSuchKey:
                    # We know that this existed long enough for us to HEAD
                    pass

                result_elem = Element('CompleteMultipartUploadResult')

                # NOTE: boto with sig v4 appends port to HTTP_HOST value at
                # the request header when the port is non default value and it
                # makes req.host_url like as http://localhost:8080:8080/path
                # that obviously invalid. Probably it should be resolved at
                # swift.common.swob though, tentatively we are parsing and
                # reconstructing the correct host_url info here.
                # in detail, https://github.com/boto/boto/pull/3513
                parsed_url = urlparse(req.host_url)
                host_url = '%s://%s' % (parsed_url.scheme, parsed_url.hostname)
                if parsed_url.port:
                    host_url += ':%s' % parsed_url.port

                SubElement(result_elem, 'Location').text = host_url + req.path
                SubElement(result_elem, 'Bucket').text = req.container_name
                SubElement(result_elem, 'Key').text = req.object_name
                SubElement(result_elem, 'ETag').text = '"%s"' % s3_etag
                resp.headers.pop('ETag', None)
                if yielded_anything:
                    yield '\n'
                yield tostring(result_elem,
                               xml_declaration=not yielded_anything)
            except ErrorResponse as err_resp:
                if yielded_anything:
                    err_resp.xml_declaration = False
                    yield '\n'
                else:
                    # Oh good, we can still change HTTP status code, too!
                    resp.status = err_resp.status
                for chunk in err_resp({}, lambda *a: None):
                    yield chunk

        resp = HTTPOk()  # assume we're good for now... but see above!
        resp.app_iter = reiterate(response_iter())
        resp.content_type = "application/xml"

        return resp
Пример #27
0
    def GET(self, req):
        """
        Handles List Parts.
        """
        def filter_part_num_marker(o):
            try:
                num = int(os.path.basename(o['name']))
                return num > part_num_marker
            except ValueError:
                return False

        encoding_type = req.params.get('encoding-type')
        if encoding_type is not None and encoding_type != 'url':
            err_msg = 'Invalid Encoding Method specified in Request'
            raise InvalidArgument('encoding-type', encoding_type, err_msg)

        upload_id = req.params['uploadId']
        _get_upload_info(req, self.app, upload_id)

        maxparts = req.get_validated_param('max-parts',
                                           DEFAULT_MAX_PARTS_LISTING,
                                           self.conf.max_parts_listing)
        part_num_marker = req.get_validated_param('part-number-marker', 0)

        query = {
            'format': 'json',
            'prefix': '%s/%s/' % (req.object_name, upload_id),
            'delimiter': '/',
            'marker': '',
        }

        container = req.container_name + MULTIUPLOAD_SUFFIX
        # Because the parts are out of order in Swift, we list up to the
        # maximum number of parts and then apply the marker and limit options.
        objects = []
        while True:
            resp = req.get_response(self.app,
                                    container=container,
                                    obj='',
                                    query=query)
            new_objects = json.loads(resp.body)
            if not new_objects:
                break
            objects.extend(new_objects)
            query['marker'] = new_objects[-1]['name']

        last_part = 0

        # If the caller requested a list starting at a specific part number,
        # construct a sub-set of the object list.
        objList = [obj for obj in objects if filter_part_num_marker(obj)]

        # pylint: disable-msg=E1103
        objList.sort(key=lambda o: int(o['name'].split('/')[-1]))

        if len(objList) > maxparts:
            objList = objList[:maxparts]
            truncated = True
        else:
            truncated = False
        # TODO: We have to retrieve object list again when truncated is True
        # and some objects filtered by invalid name because there could be no
        # enough objects for limit defined by maxparts.

        if objList:
            o = objList[-1]
            last_part = os.path.basename(o['name'])

        result_elem = Element('ListPartsResult')
        SubElement(result_elem, 'Bucket').text = req.container_name
        name = req.object_name
        if encoding_type == 'url':
            name = quote(name)
        SubElement(result_elem, 'Key').text = name
        SubElement(result_elem, 'UploadId').text = upload_id

        initiator_elem = SubElement(result_elem, 'Initiator')
        SubElement(initiator_elem, 'ID').text = req.user_id
        SubElement(initiator_elem, 'DisplayName').text = req.user_id
        owner_elem = SubElement(result_elem, 'Owner')
        SubElement(owner_elem, 'ID').text = req.user_id
        SubElement(owner_elem, 'DisplayName').text = req.user_id

        SubElement(result_elem, 'StorageClass').text = 'STANDARD'
        SubElement(result_elem, 'PartNumberMarker').text = str(part_num_marker)
        SubElement(result_elem, 'NextPartNumberMarker').text = str(last_part)
        SubElement(result_elem, 'MaxParts').text = str(maxparts)
        if 'encoding-type' in req.params:
            SubElement(result_elem, 'EncodingType').text = \
                req.params['encoding-type']
        SubElement(result_elem, 'IsTruncated').text = \
            'true' if truncated else 'false'

        for i in objList:
            part_elem = SubElement(result_elem, 'Part')
            SubElement(part_elem, 'PartNumber').text = i['name'].split('/')[-1]
            SubElement(part_elem, 'LastModified').text = \
                i['last_modified'][:-3] + 'Z'
            SubElement(part_elem, 'ETag').text = '"%s"' % i['hash']
            SubElement(part_elem, 'Size').text = str(i['bytes'])

        body = tostring(result_elem)

        return HTTPOk(body=body, content_type='application/xml')
Пример #28
0
    def GET(self, req):
        """
        Handles List Multipart Uploads
        """
        def separate_uploads(uploads, prefix, delimiter):
            """
            separate_uploads will separate uploads into non_delimited_uploads
            (a subset of uploads) and common_prefixes according to the
            specified delimiter. non_delimited_uploads is a list of uploads
            which exclude the delimiter. common_prefixes is a set of prefixes
            prior to the specified delimiter. Note that the prefix in the
            common_prefixes includes the delimiter itself.

            i.e. if '/' delimiter specified and then the uploads is consists of
            ['foo', 'foo/bar'], this function will return (['foo'], ['foo/']).

            :param uploads: A list of uploads dictionary
            :param prefix: A string of prefix reserved on the upload path.
                           (i.e. the delimiter must be searched behind the
                            prefix)
            :param delimiter: A string of delimiter to split the path in each
                              upload

            :return (non_delimited_uploads, common_prefixes)
            """
            if six.PY2:
                (prefix, delimiter) = utf8encode(prefix, delimiter)
            non_delimited_uploads = []
            common_prefixes = set()
            for upload in uploads:
                key = upload['key']
                end = key.find(delimiter, len(prefix))
                if end >= 0:
                    common_prefix = key[:end + len(delimiter)]
                    common_prefixes.add(common_prefix)
                else:
                    non_delimited_uploads.append(upload)
            return non_delimited_uploads, sorted(common_prefixes)

        encoding_type = req.params.get('encoding-type')
        if encoding_type is not None and encoding_type != 'url':
            err_msg = 'Invalid Encoding Method specified in Request'
            raise InvalidArgument('encoding-type', encoding_type, err_msg)

        keymarker = req.params.get('key-marker', '')
        uploadid = req.params.get('upload-id-marker', '')
        maxuploads = req.get_validated_param('max-uploads',
                                             DEFAULT_MAX_UPLOADS,
                                             DEFAULT_MAX_UPLOADS)

        query = {
            'format': 'json',
            'limit': maxuploads + 1,
        }

        if uploadid and keymarker:
            query.update({'marker': '%s/%s' % (keymarker, uploadid)})
        elif keymarker:
            query.update({'marker': '%s/~' % (keymarker)})
        if 'prefix' in req.params:
            query.update({'prefix': req.params['prefix']})

        container = req.container_name + MULTIUPLOAD_SUFFIX
        try:
            resp = req.get_response(self.app, container=container, query=query)
            objects = json.loads(resp.body)
        except NoSuchBucket:
            # Assume NoSuchBucket as no uploads
            objects = []

        def object_to_upload(object_info):
            obj, upid = object_info['name'].rsplit('/', 1)
            obj_dict = {
                'key': obj,
                'upload_id': upid,
                'last_modified': object_info['last_modified']
            }
            return obj_dict

        # uploads is a list consists of dict, {key, upload_id, last_modified}
        # Note that pattern matcher will drop whole segments objects like as
        # object_name/upload_id/1.
        pattern = re.compile('/[0-9]+$')
        uploads = [
            object_to_upload(obj) for obj in objects
            if pattern.search(obj.get('name', '')) is None
        ]

        prefixes = []
        if 'delimiter' in req.params:
            prefix = req.params.get('prefix', '')
            delimiter = req.params['delimiter']
            uploads, prefixes = separate_uploads(uploads, prefix, delimiter)

        if len(uploads) > maxuploads:
            uploads = uploads[:maxuploads]
            truncated = True
        else:
            truncated = False

        nextkeymarker = ''
        nextuploadmarker = ''
        if len(uploads) > 1:
            nextuploadmarker = uploads[-1]['upload_id']
            nextkeymarker = uploads[-1]['key']

        result_elem = Element('ListMultipartUploadsResult')
        SubElement(result_elem, 'Bucket').text = req.container_name
        SubElement(result_elem, 'KeyMarker').text = keymarker
        SubElement(result_elem, 'UploadIdMarker').text = uploadid
        SubElement(result_elem, 'NextKeyMarker').text = nextkeymarker
        SubElement(result_elem, 'NextUploadIdMarker').text = nextuploadmarker
        if 'delimiter' in req.params:
            SubElement(result_elem, 'Delimiter').text = req.params['delimiter']
        if 'prefix' in req.params:
            SubElement(result_elem, 'Prefix').text = req.params['prefix']
        SubElement(result_elem, 'MaxUploads').text = str(maxuploads)
        if encoding_type is not None:
            SubElement(result_elem, 'EncodingType').text = encoding_type
        SubElement(result_elem, 'IsTruncated').text = \
            'true' if truncated else 'false'

        # TODO: don't show uploads which are initiated before this bucket is
        # created.
        for u in uploads:
            upload_elem = SubElement(result_elem, 'Upload')
            name = u['key']
            if encoding_type == 'url':
                name = quote(name)
            SubElement(upload_elem, 'Key').text = name
            SubElement(upload_elem, 'UploadId').text = u['upload_id']
            initiator_elem = SubElement(upload_elem, 'Initiator')
            SubElement(initiator_elem, 'ID').text = req.user_id
            SubElement(initiator_elem, 'DisplayName').text = req.user_id
            owner_elem = SubElement(upload_elem, 'Owner')
            SubElement(owner_elem, 'ID').text = req.user_id
            SubElement(owner_elem, 'DisplayName').text = req.user_id
            SubElement(upload_elem, 'StorageClass').text = 'STANDARD'
            SubElement(upload_elem, 'Initiated').text = \
                u['last_modified'][:-3] + 'Z'

        for p in prefixes:
            elem = SubElement(result_elem, 'CommonPrefixes')
            SubElement(elem, 'Prefix').text = p

        body = tostring(result_elem)

        return HTTPOk(body=body, content_type='application/xml')
Пример #29
0
    def GET(self, req):
        """
        Handle GET Bucket (List Objects) request
        """

        max_keys = req.get_validated_param('max-keys',
                                           self.conf.max_bucket_listing)
        # TODO: Separate max_bucket_listing and default_bucket_listing
        tag_max_keys = max_keys
        max_keys = min(max_keys, self.conf.max_bucket_listing)

        encoding_type = req.params.get('encoding-type')
        if encoding_type is not None and encoding_type != 'url':
            err_msg = 'Invalid Encoding Method specified in Request'
            raise InvalidArgument('encoding-type', encoding_type, err_msg)

        query = {
            'format': 'json',
            'limit': max_keys + 1,
        }
        if 'prefix' in req.params:
            query.update({'prefix': req.params['prefix']})
        if 'delimiter' in req.params:
            query.update({'delimiter': req.params['delimiter']})
        fetch_owner = False
        if 'versions' in req.params:
            listing_type = 'object-versions'
            if 'key-marker' in req.params:
                query.update({'marker': req.params['key-marker']})
            elif 'version-id-marker' in req.params:
                err_msg = ('A version-id marker cannot be specified without '
                           'a key marker.')
                raise InvalidArgument('version-id-marker',
                                      req.params['version-id-marker'], err_msg)
        elif int(req.params.get('list-type', '1')) == 2:
            listing_type = 'version-2'
            if 'start-after' in req.params:
                query.update({'marker': req.params['start-after']})
            # continuation-token overrides start-after
            if 'continuation-token' in req.params:
                decoded = b64decode(req.params['continuation-token'])
                query.update({'marker': decoded})
            if 'fetch-owner' in req.params:
                fetch_owner = config_true_value(req.params['fetch-owner'])
        else:
            listing_type = 'version-1'
            if 'marker' in req.params:
                query.update({'marker': req.params['marker']})

        resp = req.get_response(self.app, query=query)

        objects = json.loads(resp.body)

        # in order to judge that truncated is valid, check whether
        # max_keys + 1 th element exists in swift.
        is_truncated = max_keys > 0 and len(objects) > max_keys
        objects = objects[:max_keys]

        if listing_type == 'object-versions':
            elem = Element('ListVersionsResult')
            SubElement(elem, 'Name').text = req.container_name
            SubElement(elem, 'Prefix').text = req.params.get('prefix')
            SubElement(elem, 'KeyMarker').text = req.params.get('key-marker')
            SubElement(
                elem,
                'VersionIdMarker').text = req.params.get('version-id-marker')
            if is_truncated:
                if 'name' in objects[-1]:
                    SubElement(elem, 'NextKeyMarker').text = \
                        objects[-1]['name']
                if 'subdir' in objects[-1]:
                    SubElement(elem, 'NextKeyMarker').text = \
                        objects[-1]['subdir']
                SubElement(elem, 'NextVersionIdMarker').text = 'null'
        else:
            elem = Element('ListBucketResult')
            SubElement(elem, 'Name').text = req.container_name
            SubElement(elem, 'Prefix').text = req.params.get('prefix')
            if listing_type == 'version-1':
                SubElement(elem, 'Marker').text = req.params.get('marker')
                if is_truncated and 'delimiter' in req.params:
                    if 'name' in objects[-1]:
                        name = objects[-1]['name']
                    else:
                        name = objects[-1]['subdir']
                    if encoding_type == 'url':
                        name = quote(name)
                    SubElement(elem, 'NextMarker').text = name
            elif listing_type == 'version-2':
                if is_truncated:
                    if 'name' in objects[-1]:
                        SubElement(elem, 'NextContinuationToken').text = \
                            b64encode(objects[-1]['name'].encode('utf8'))
                    if 'subdir' in objects[-1]:
                        SubElement(elem, 'NextContinuationToken').text = \
                            b64encode(objects[-1]['subdir'].encode('utf8'))
                if 'continuation-token' in req.params:
                    SubElement(elem, 'ContinuationToken').text = \
                        req.params['continuation-token']
                if 'start-after' in req.params:
                    SubElement(elem, 'StartAfter').text = \
                        req.params['start-after']
                SubElement(elem, 'KeyCount').text = str(len(objects))

        SubElement(elem, 'MaxKeys').text = str(tag_max_keys)

        if 'delimiter' in req.params:
            SubElement(elem, 'Delimiter').text = req.params['delimiter']

        if encoding_type == 'url':
            SubElement(elem, 'EncodingType').text = encoding_type

        SubElement(elem, 'IsTruncated').text = \
            'true' if is_truncated else 'false'

        for o in objects:
            if 'subdir' not in o:
                name = o['name']
                if encoding_type == 'url':
                    name = quote(name.encode('utf-8'))

                if listing_type == 'object-versions':
                    contents = SubElement(elem, 'Version')
                    SubElement(contents, 'Key').text = name
                    SubElement(contents, 'VersionId').text = 'null'
                    SubElement(contents, 'IsLatest').text = 'true'
                else:
                    contents = SubElement(elem, 'Contents')
                    SubElement(contents, 'Key').text = name
                SubElement(contents, 'LastModified').text = \
                    o['last_modified'][:-3] + 'Z'
                if 's3_etag' in o:
                    # New-enough MUs are already in the right format
                    etag = o['s3_etag']
                elif 'slo_etag' in o:
                    # SLOs may be in something *close* to the MU format
                    etag = '"%s-N"' % o['slo_etag'].strip('"')
                else:
                    # Normal objects just use the MD5
                    etag = '"%s"' % o['hash']
                    # This also catches sufficiently-old SLOs, but we have
                    # no way to identify those from container listings
                SubElement(contents, 'ETag').text = etag
                SubElement(contents, 'Size').text = str(o['bytes'])
                if fetch_owner or listing_type != 'version-2':
                    owner = SubElement(contents, 'Owner')
                    SubElement(owner, 'ID').text = req.user_id
                    SubElement(owner, 'DisplayName').text = req.user_id
                SubElement(contents, 'StorageClass').text = 'STANDARD'

        for o in objects:
            if 'subdir' in o:
                common_prefixes = SubElement(elem, 'CommonPrefixes')
                name = o['subdir']
                if encoding_type == 'url':
                    name = quote(name.encode('utf-8'))
                SubElement(common_prefixes, 'Prefix').text = name

        body = tostring(elem)

        return HTTPOk(body=body, content_type='application/xml')
Пример #30
0
    def POST(self, req):
        """
        Handles Complete Multipart Upload.
        """
        upload_id = req.params['uploadId']
        resp = _get_upload_info(req, self.app, upload_id)
        headers = {
            'Accept': 'application/json',
            sysmeta_header('object', 'upload-id'): upload_id
        }
        for key, val in resp.headers.items():
            _key = key.lower()
            if _key.startswith('x-amz-meta-'):
                headers['x-object-meta-' + _key[11:]] = val

        hct_header = sysmeta_header('object', 'has-content-type')
        if resp.sysmeta_headers.get(hct_header) == 'yes':
            content_type = resp.sysmeta_headers.get(
                sysmeta_header('object', 'content-type'))
        elif hct_header in resp.sysmeta_headers:
            # has-content-type is present but false, so no content type was
            # set on initial upload. In that case, we won't set one on our
            # PUT request. Swift will end up guessing one based on the
            # object name.
            content_type = None
        else:
            content_type = resp.headers.get('Content-Type')

        if content_type:
            headers['Content-Type'] = content_type

        container = req.container_name + MULTIUPLOAD_SUFFIX
        s3_etag_hasher = md5(usedforsecurity=False)
        manifest = []
        previous_number = 0
        try:
            xml = req.xml(MAX_COMPLETE_UPLOAD_BODY_SIZE)
            if not xml:
                raise InvalidRequest(msg='You must specify at least one part')
            if 'content-md5' in req.headers:
                # If an MD5 was provided, we need to verify it.
                # Note that S3Request already took care of translating to ETag
                if req.headers['etag'] != md5(
                        xml, usedforsecurity=False).hexdigest():
                    raise BadDigest(content_md5=req.headers['content-md5'])
                # We're only interested in the body here, in the
                # multipart-upload controller -- *don't* let it get
                # plumbed down to the object-server
                del req.headers['etag']

            complete_elem = fromstring(xml, 'CompleteMultipartUpload',
                                       self.logger)
            for part_elem in complete_elem.iterchildren('Part'):
                part_number = int(part_elem.find('./PartNumber').text)

                if part_number <= previous_number:
                    raise InvalidPartOrder(upload_id=upload_id)
                previous_number = part_number

                etag = normalize_etag(part_elem.find('./ETag').text)
                if len(etag) != 32 or any(c not in '0123456789abcdef'
                                          for c in etag):
                    raise InvalidPart(upload_id=upload_id,
                                      part_number=part_number)

                manifest.append({
                    'path':
                    '/%s/%s/%s/%d' %
                    (wsgi_to_str(container), wsgi_to_str(
                        req.object_name), upload_id, part_number),
                    'etag':
                    etag
                })
                s3_etag_hasher.update(binascii.a2b_hex(etag))
        except (XMLSyntaxError, DocumentInvalid):
            # NB: our schema definitions catch uploads with no parts here
            raise MalformedXML()
        except ErrorResponse:
            raise
        except Exception as e:
            self.logger.error(e)
            raise

        s3_etag = '%s-%d' % (s3_etag_hasher.hexdigest(), len(manifest))
        s3_etag_header = sysmeta_header('object', 'etag')
        if resp.sysmeta_headers.get(s3_etag_header) == s3_etag:
            # This header should only already be present if the upload marker
            # has been cleaned up and the current target uses the same
            # upload-id; assuming the segments to use haven't changed, the work
            # is already done
            return HTTPOk(body=_make_complete_body(req, s3_etag, False),
                          content_type='application/xml')
        headers[s3_etag_header] = s3_etag
        # Leave base header value blank; SLO will populate
        c_etag = '; s3_etag=%s' % s3_etag
        headers[get_container_update_override_key('etag')] = c_etag

        too_small_message = ('s3api requires that each segment be at least '
                             '%d bytes' % self.conf.min_segment_size)

        def size_checker(manifest):
            # Check the size of each segment except the last and make sure
            # they are all more than the minimum upload chunk size.
            # Note that we need to use the *internal* keys, since we're
            # looking at the manifest that's about to be written.
            return [(item['name'], too_small_message) for item in manifest[:-1]
                    if item and item['bytes'] < self.conf.min_segment_size]

        req.environ['swift.callback.slo_manifest_hook'] = size_checker
        start_time = time.time()

        def response_iter():
            # NB: XML requires that the XML declaration, if present, be at the
            # very start of the document. Clients *will* call us out on not
            # being valid XML if we pass through whitespace before it.
            # Track whether we've sent anything yet so we can yield out that
            # declaration *first*
            yielded_anything = False

            try:
                try:
                    # TODO: add support for versioning
                    put_resp = req.get_response(self.app,
                                                'PUT',
                                                body=json.dumps(manifest),
                                                query={
                                                    'multipart-manifest':
                                                    'put',
                                                    'heartbeat': 'on'
                                                },
                                                headers=headers)
                    if put_resp.status_int == 202:
                        body = []
                        put_resp.fix_conditional_response()
                        for chunk in put_resp.response_iter:
                            if not chunk.strip():
                                if time.time() - start_time < 10:
                                    # Include some grace period to keep
                                    # ceph-s3tests happy
                                    continue
                                if not yielded_anything:
                                    yield (b'<?xml version="1.0" '
                                           b'encoding="UTF-8"?>\n')
                                yielded_anything = True
                                yield chunk
                                continue
                            body.append(chunk)
                        body = json.loads(b''.join(body))
                        if body['Response Status'] != '201 Created':
                            for seg, err in body['Errors']:
                                if err == too_small_message:
                                    raise EntityTooSmall()
                                elif err in ('Etag Mismatch', '404 Not Found'):
                                    raise InvalidPart(upload_id=upload_id)
                            raise InvalidRequest(
                                status=body['Response Status'],
                                msg='\n'.join(': '.join(err)
                                              for err in body['Errors']))
                except BadSwiftRequest as e:
                    msg = str(e)
                    if too_small_message in msg:
                        raise EntityTooSmall(msg)
                    elif ', Etag Mismatch' in msg:
                        raise InvalidPart(upload_id=upload_id)
                    elif ', 404 Not Found' in msg:
                        raise InvalidPart(upload_id=upload_id)
                    else:
                        raise

                # clean up the multipart-upload record
                obj = '%s/%s' % (req.object_name, upload_id)
                try:
                    req.get_response(self.app, 'DELETE', container, obj)
                except NoSuchKey:
                    # The important thing is that we wrote out a tombstone to
                    # make sure the marker got cleaned up. If it's already
                    # gone (e.g., because of concurrent completes or a retried
                    # complete), so much the better.
                    pass

                yield _make_complete_body(req, s3_etag, yielded_anything)
            except ErrorResponse as err_resp:
                if yielded_anything:
                    err_resp.xml_declaration = False
                    yield b'\n'
                else:
                    # Oh good, we can still change HTTP status code, too!
                    resp.status = err_resp.status
                for chunk in err_resp({}, lambda *a: None):
                    yield chunk

        resp = HTTPOk()  # assume we're good for now... but see above!
        resp.app_iter = reiterate(response_iter())
        resp.content_type = "application/xml"

        return resp
Пример #31
0
    def POST(self, req):
        """
        Handles Complete Multipart Upload.
        """
        upload_id = req.params['uploadId']
        resp = _get_upload_info(req, self.app, upload_id)
        headers = {'Accept': 'application/json'}
        for key, val in resp.headers.items():
            _key = key.lower()
            if _key.startswith('x-amz-meta-'):
                headers['x-object-meta-' + _key[11:]] = val

        hct_header = sysmeta_header('object', 'has-content-type')
        if resp.sysmeta_headers.get(hct_header) == 'yes':
            content_type = resp.sysmeta_headers.get(
                sysmeta_header('object', 'content-type'))
        elif hct_header in resp.sysmeta_headers:
            # has-content-type is present but false, so no content type was
            # set on initial upload. In that case, we won't set one on our
            # PUT request. Swift will end up guessing one based on the
            # object name.
            content_type = None
        else:
            content_type = resp.headers.get('Content-Type')

        if content_type:
            headers['Content-Type'] = content_type

        container = req.container_name + MULTIUPLOAD_SUFFIX
        s3_etag_hasher = md5()
        manifest = []
        previous_number = 0
        try:
            xml = req.xml(MAX_COMPLETE_UPLOAD_BODY_SIZE)
            if not xml:
                raise InvalidRequest(msg='You must specify at least one part')
            if 'content-md5' in req.headers:
                # If an MD5 was provided, we need to verify it.
                # Note that S3Request already took care of translating to ETag
                if req.headers['etag'] != md5(xml).hexdigest():
                    raise BadDigest(content_md5=req.headers['content-md5'])
                # We're only interested in the body here, in the
                # multipart-upload controller -- *don't* let it get
                # plumbed down to the object-server
                del req.headers['etag']

            complete_elem = fromstring(
                xml, 'CompleteMultipartUpload', self.logger)
            for part_elem in complete_elem.iterchildren('Part'):
                part_number = int(part_elem.find('./PartNumber').text)

                if part_number <= previous_number:
                    raise InvalidPartOrder(upload_id=upload_id)
                previous_number = part_number

                etag = part_elem.find('./ETag').text
                if len(etag) >= 2 and etag[0] == '"' and etag[-1] == '"':
                    # strip double quotes
                    etag = etag[1:-1]
                if len(etag) != 32 or any(c not in '0123456789abcdef'
                                          for c in etag):
                    raise InvalidPart(upload_id=upload_id,
                                      part_number=part_number)

                manifest.append({
                    'path': '/%s/%s/%s/%d' % (
                        container, req.object_name, upload_id, part_number),
                    'etag': etag})
                s3_etag_hasher.update(binascii.a2b_hex(etag))
        except (XMLSyntaxError, DocumentInvalid):
            # NB: our schema definitions catch uploads with no parts here
            raise MalformedXML()
        except ErrorResponse:
            raise
        except Exception as e:
            self.logger.error(e)
            raise

        s3_etag = '%s-%d' % (s3_etag_hasher.hexdigest(), len(manifest))
        headers[sysmeta_header('object', 'etag')] = s3_etag
        # Leave base header value blank; SLO will populate
        c_etag = '; s3_etag=%s' % s3_etag
        headers['X-Object-Sysmeta-Container-Update-Override-Etag'] = c_etag

        too_small_message = ('s3api requires that each segment be at least '
                             '%d bytes' % self.conf.min_segment_size)

        def size_checker(manifest):
            # Check the size of each segment except the last and make sure
            # they are all more than the minimum upload chunk size.
            # Note that we need to use the *internal* keys, since we're
            # looking at the manifest that's about to be written.
            return [
                (item['name'], too_small_message)
                for item in manifest[:-1]
                if item and item['bytes'] < self.conf.min_segment_size]

        req.environ['swift.callback.slo_manifest_hook'] = size_checker
        start_time = time.time()

        def response_iter():
            # NB: XML requires that the XML declaration, if present, be at the
            # very start of the document. Clients *will* call us out on not
            # being valid XML if we pass through whitespace before it.
            # Track whether we've sent anything yet so we can yield out that
            # declaration *first*
            yielded_anything = False

            try:
                try:
                    # TODO: add support for versioning
                    put_resp = req.get_response(
                        self.app, 'PUT', body=json.dumps(manifest),
                        query={'multipart-manifest': 'put',
                               'heartbeat': 'on'},
                        headers=headers)
                    if put_resp.status_int == 202:
                        body = []
                        put_resp.fix_conditional_response()
                        for chunk in put_resp.response_iter:
                            if not chunk.strip():
                                if time.time() - start_time < 10:
                                    # Include some grace period to keep
                                    # ceph-s3tests happy
                                    continue
                                if not yielded_anything:
                                    yield (b'<?xml version="1.0" '
                                           b'encoding="UTF-8"?>\n')
                                yielded_anything = True
                                yield chunk
                                continue
                            body.append(chunk)
                        body = json.loads(b''.join(body))
                        if body['Response Status'] != '201 Created':
                            for seg, err in body['Errors']:
                                if err == too_small_message:
                                    raise EntityTooSmall()
                                elif err in ('Etag Mismatch', '404 Not Found'):
                                    raise InvalidPart(upload_id=upload_id)
                            raise InvalidRequest(
                                status=body['Response Status'],
                                msg='\n'.join(': '.join(err)
                                              for err in body['Errors']))
                except BadSwiftRequest as e:
                    msg = str(e)
                    if too_small_message in msg:
                        raise EntityTooSmall(msg)
                    elif ', Etag Mismatch' in msg:
                        raise InvalidPart(upload_id=upload_id)
                    elif ', 404 Not Found' in msg:
                        raise InvalidPart(upload_id=upload_id)
                    else:
                        raise

                # clean up the multipart-upload record
                obj = '%s/%s' % (req.object_name, upload_id)
                try:
                    req.get_response(self.app, 'DELETE', container, obj)
                except NoSuchKey:
                    # We know that this existed long enough for us to HEAD
                    pass

                result_elem = Element('CompleteMultipartUploadResult')

                # NOTE: boto with sig v4 appends port to HTTP_HOST value at
                # the request header when the port is non default value and it
                # makes req.host_url like as http://localhost:8080:8080/path
                # that obviously invalid. Probably it should be resolved at
                # swift.common.swob though, tentatively we are parsing and
                # reconstructing the correct host_url info here.
                # in detail, https://github.com/boto/boto/pull/3513
                parsed_url = urlparse(req.host_url)
                host_url = '%s://%s' % (parsed_url.scheme, parsed_url.hostname)
                # Why are we doing our own port parsing? Because py3 decided
                # to start raising ValueErrors on access after parsing such
                # an invalid port
                netloc = parsed_url.netloc.split('@')[-1].split(']')[-1]
                if ':' in netloc:
                    port = netloc.split(':', 2)[1]
                    host_url += ':%s' % port

                SubElement(result_elem, 'Location').text = host_url + req.path
                SubElement(result_elem, 'Bucket').text = req.container_name
                SubElement(result_elem, 'Key').text = req.object_name
                SubElement(result_elem, 'ETag').text = '"%s"' % s3_etag
                resp.headers.pop('ETag', None)
                if yielded_anything:
                    yield b'\n'
                yield tostring(result_elem,
                               xml_declaration=not yielded_anything)
            except ErrorResponse as err_resp:
                if yielded_anything:
                    err_resp.xml_declaration = False
                    yield b'\n'
                else:
                    # Oh good, we can still change HTTP status code, too!
                    resp.status = err_resp.status
                for chunk in err_resp({}, lambda *a: None):
                    yield chunk

        resp = HTTPOk()  # assume we're good for now... but see above!
        resp.app_iter = reiterate(response_iter())
        resp.content_type = "application/xml"

        return resp