Exemplo n.º 1
0
    def _get_source_object(self, ssc_ctx, source_path, req):
        source_req = req.copy_get()

        # make sure the source request uses it's container_info
        source_req.headers.pop('X-Backend-Storage-Policy-Index', None)
        source_req.path_info = quote(source_path)
        source_req.headers['X-Newest'] = 'true'

        # in case we are copying an SLO manifest, set format=raw parameter
        params = source_req.params
        if params.get('multipart-manifest') == 'get':
            params['format'] = 'raw'
            source_req.params = params

        source_resp = ssc_ctx.get_source_resp(source_req)

        if source_resp.content_length is None:
            # This indicates a transfer-encoding: chunked source object,
            # which currently only happens because there are more than
            # CONTAINER_LISTING_LIMIT segments in a segmented object. In
            # this case, we're going to refuse to do the server-side copy.
            close_if_possible(source_resp.app_iter)
            return HTTPRequestEntityTooLarge(request=req)

        if source_resp.content_length > MAX_FILE_SIZE:
            close_if_possible(source_resp.app_iter)
            return HTTPRequestEntityTooLarge(request=req)

        return source_resp
Exemplo n.º 2
0
    def _get_source_object(self, ssc_ctx, source_path, req):
        source_req = req.copy_get()

        # make sure the source request uses it's container_info
        source_req.headers.pop('X-Backend-Storage-Policy-Index', None)
        source_req.path_info = quote(source_path)
        source_req.headers['X-Newest'] = 'true'
        if 'swift.post_as_copy' in req.environ:
            # We're COPYing one object over itself because of a POST; rely on
            # the PUT for write authorization, don't require read authorization
            source_req.environ['swift.authorize'] = lambda req: None
            source_req.environ['swift.authorize_override'] = True

        # in case we are copying an SLO manifest, set format=raw parameter
        params = source_req.params
        if params.get('multipart-manifest') == 'get':
            params['format'] = 'raw'
            source_req.params = params

        source_resp = ssc_ctx.get_source_resp(source_req)

        if source_resp.content_length is None:
            # This indicates a transfer-encoding: chunked source object,
            # which currently only happens because there are more than
            # CONTAINER_LISTING_LIMIT segments in a segmented object. In
            # this case, we're going to refuse to do the server-side copy.
            return HTTPRequestEntityTooLarge(request=req)

        if source_resp.content_length > MAX_FILE_SIZE:
            return HTTPRequestEntityTooLarge(request=req)

        return source_resp
Exemplo n.º 3
0
    def __call__(self, request):

        if request.method not in ("POST", "PUT"):
            return self.app

        try:
            request.split_path(2, 4, rest_with_last=True)
        except ValueError:
            return self.app

        new_quota = request.headers.get('X-Account-Meta-Quota-Bytes')
        remove_quota = request.headers.get('X-Remove-Account-Meta-Quota-Bytes')
        if remove_quota:
            new_quota = 0  # X-Remove dominates if both are present

        if request.environ.get('reseller_request') is True:
            if new_quota and not new_quota.isdigit():
                return HTTPBadRequest()
            return self.app

        # deny quota set for non-reseller
        if new_quota is not None:
            return HTTPForbidden()

        account_info = get_account_info(request.environ, self.app)
        if not account_info or not account_info['bytes']:
            return self.app
        new_size = int(account_info['bytes']) + (request.content_length or 0)
        quota = int(account_info['meta'].get('quota-bytes', -1))

        if 0 <= quota < new_size:
            return HTTPRequestEntityTooLarge()

        return self.app
Exemplo n.º 4
0
def check_object_creation(req, object_name):
    """
    Check to ensure that everything is alright about an object to be created.

    :param req: HTTP request object
    :param object_name: name of object to be created
    :returns HTTPRequestEntityTooLarge: the object is too large
    :returns HTTPLengthRequired: missing content-length header and not
                                 a chunked request
    :returns HTTPBadRequest: missing or bad content-type header, or
                             bad metadata
    """
    if req.content_length and req.content_length > MAX_FILE_SIZE:
        return HTTPRequestEntityTooLarge(body='Your request is too large.',
                                         request=req,
                                         content_type='text/plain')
    if req.content_length is None and \
            req.headers.get('transfer-encoding') != 'chunked':
        return HTTPLengthRequired(request=req)
    if 'X-Copy-From' in req.headers and req.content_length:
        return HTTPBadRequest(body='Copy requests require a zero byte body',
                              request=req, content_type='text/plain')
    if len(object_name) > MAX_OBJECT_NAME_LENGTH:
        return HTTPBadRequest(body='Object name length of %d longer than %d' %
                              (len(object_name), MAX_OBJECT_NAME_LENGTH),
                              request=req, content_type='text/plain')
    if 'Content-Type' not in req.headers:
        return HTTPBadRequest(request=req, content_type='text/plain',
                              body='No content type')
    if not check_utf8(req.headers['Content-Type']):
        return HTTPBadRequest(request=req, body='Invalid Content-Type',
                              content_type='text/plain')
    return check_metadata(req, 'object')
Exemplo n.º 5
0
    def get_objs_to_delete(self, req):
        """
        Will populate objs_to_delete with data from request input.
        :params req: a Swob request
        :returns: a list of the contents of req.body when separated by newline.
        :raises: HTTPException on failures
        """
        line = ''
        data_remaining = True
        objs_to_delete = []
        if req.content_length is None and \
                req.headers.get('transfer-encoding', '').lower() != 'chunked':
            raise HTTPLengthRequired(request=req)

        while data_remaining:
            if '\n' in line:
                obj_to_delete, line = line.split('\n', 1)
                objs_to_delete.append(unquote(obj_to_delete))
            else:
                data = req.body_file.read(MAX_PATH_LENGTH)
                if data:
                    line += data
                else:
                    data_remaining = False
                    if line.strip():
                        objs_to_delete.append(unquote(line))
            if len(objs_to_delete) > self.max_deletes_per_request:
                raise HTTPRequestEntityTooLarge(
                    'Maximum Bulk Deletes: %d per request' %
                    self.max_deletes_per_request)
            if len(line) > MAX_PATH_LENGTH * 2:
                raise HTTPBadRequest('Invalid File Name')
        return objs_to_delete
Exemplo n.º 6
0
    def __call__(self, request):

        if request.method not in ("POST", "PUT"):
            return self.app

        try:
            ver, account, container, obj = request.split_path(
                2, 4, rest_with_last=True)
        except ValueError:
            return self.app

        if not container:
            # account request, so we pay attention to the quotas
            new_quota = request.headers.get('X-Account-Meta-Quota-Bytes')
            remove_quota = request.headers.get(
                'X-Remove-Account-Meta-Quota-Bytes')
        else:
            # container or object request; even if the quota headers are set
            # in the request, they're meaningless
            new_quota = remove_quota = None

        if remove_quota:
            new_quota = 0  # X-Remove dominates if both are present

        if request.environ.get('reseller_request') is True:
            if new_quota and not new_quota.isdigit():
                return HTTPBadRequest()
            return self.app

        # deny quota set for non-reseller
        if new_quota is not None:
            return HTTPForbidden()

        if obj and request.method == "POST" or not obj:
            return self.app

        copy_from = request.headers.get('X-Copy-From')
        content_length = (request.content_length or 0)

        if obj and copy_from:
            path = '/' + ver + '/' + account + '/' + copy_from.lstrip('/')
            object_info = get_object_info(request.environ, self.app, path)
            if not object_info or not object_info['length']:
                content_length = 0
            else:
                content_length = int(object_info['length'])

        account_info = get_account_info(request.environ, self.app)
        if not account_info or not account_info['bytes']:
            return self.app

        new_size = int(account_info['bytes']) + content_length
        quota = int(account_info['meta'].get('quota-bytes', -1))

        if 0 <= quota < new_size:
            return HTTPRequestEntityTooLarge()

        return self.app
Exemplo n.º 7
0
 def bad_response(self, req, container_info):
     # 401 if the user couldn't have PUT this object in the first place.
     # This prevents leaking the container's existence to unauthed users.
     if 'swift.authorize' in req.environ:
         req.acl = container_info['write_acl']
         aresp = req.environ['swift.authorize'](req)
         if aresp:
             return aresp
     return HTTPRequestEntityTooLarge(body='Upload exceeds quota.')
Exemplo n.º 8
0
def check_object_creation(req, object_name):
    """
    Check to ensure that everything is alright about an object to be created.

    :param req: HTTP request object
    :param object_name: name of object to be created
    :returns: HTTPRequestEntityTooLarge -- the object is too large
    :returns: HTTPLengthRequired -- missing content-length header and not
                                    a chunked request
    :returns: HTTPBadRequest -- missing or bad content-type header, or
                                bad metadata
    :returns: HTTPNotImplemented -- unsupported transfer-encoding header value
    """
    try:
        ml = req.message_length()
    except ValueError as e:
        return HTTPBadRequest(request=req,
                              content_type='text/plain',
                              body=str(e))
    except AttributeError as e:
        return HTTPNotImplemented(request=req,
                                  content_type='text/plain',
                                  body=str(e))
    if ml is not None and ml > MAX_FILE_SIZE:
        return HTTPRequestEntityTooLarge(body='Your request is too large.',
                                         request=req,
                                         content_type='text/plain')
    if req.content_length is None and \
            req.headers.get('transfer-encoding') != 'chunked':
        return HTTPLengthRequired(body='Missing Content-Length header.',
                                  request=req,
                                  content_type='text/plain')

    if len(object_name) > MAX_OBJECT_NAME_LENGTH:
        return HTTPBadRequest(body='Object name length of %d longer than %d' %
                              (len(object_name), MAX_OBJECT_NAME_LENGTH),
                              request=req,
                              content_type='text/plain')

    if 'Content-Type' not in req.headers:
        return HTTPBadRequest(request=req,
                              content_type='text/plain',
                              body='No content type')

    try:
        req = check_delete_headers(req)
    except HTTPException as e:
        return HTTPBadRequest(request=req,
                              body=e.body,
                              content_type='text/plain')

    if not check_utf8(req.headers['Content-Type']):
        return HTTPBadRequest(request=req,
                              body='Invalid Content-Type',
                              content_type='text/plain')
    return check_metadata(req, 'object')
Exemplo n.º 9
0
 def __call__(self, env, start_response):
     req = Request(env)
     new_service = env.get('liteauth.new_service', None)
     if new_service:
         account_id = env.get('REMOTE_USER', '')
         if not account_id:
             return HTTPInternalServerError()
         if not self.activate_service(account_id, new_service, req.environ):
             return HTTPInternalServerError()
     if req.method in ['PUT', 'POST'
                       ] and not 'x-zerovm-execute' in req.headers:
         account_info = get_account_info(req.environ,
                                         self.app,
                                         swift_source='litequota')
         service_plan = assemble_from_partial(self.metadata_key,
                                              account_info['meta'])
         if service_plan:
             try:
                 service_plan = json.loads(service_plan)
                 path_parts = req.split_path(2, 4, rest_with_last=True)
             except ValueError:
                 return self.app(env, start_response)
             if len(path_parts) == 3:
                 quota = service_plan['storage']['containers']
                 new_size = int(account_info['container_count'])
                 if 0 <= quota < new_size:
                     return HTTPRequestEntityTooLarge(
                         body='Over quota: containers')(env, start_response)
             else:
                 new_size = int(account_info['bytes']) + (req.content_length
                                                          or 0)
                 quota = service_plan['storage']['bytes']
                 if 0 <= quota < new_size:
                     return HTTPRequestEntityTooLarge(
                         body='Over quota: bytes')(env, start_response)
                 quota = service_plan['storage']['objects']
                 new_size = int(account_info['total_object_count'])
                 if 0 <= quota < new_size:
                     return HTTPRequestEntityTooLarge(
                         body='Over quota: objects')(env, start_response)
     return self.app(env, start_response)
Exemplo n.º 10
0
    def _get_source_object(self, req, path_info):
        # make a pre_auth request in case the user has write access
        # to container, but not READ. This was allowed in previous version
        # (i.e., before middleware) so keeping the same behavior here
        get_req = make_pre_authed_request(
            req.environ, path=path_info,
            headers={'X-Newest': 'True'}, method='GET', swift_source='VW')
        source_resp = get_req.get_response(self.app)

        if source_resp.content_length is None or \
                source_resp.content_length > MAX_FILE_SIZE:
            return HTTPRequestEntityTooLarge(request=req)

        return source_resp
Exemplo n.º 11
0
 def __call__(self, env, start_response):
     req = Request(env)
     if env['REQUEST_METHOD'] == 'GET' or env['REQUEST_METHOD'] == 'HEAD':
         if self.status == 200:
             if 'HTTP_RANGE' in env:
                 resp = Response(request=req,
                                 body=self.object_body,
                                 conditional_response=True)
                 return resp(env, start_response)
             start_response(
                 Response(request=req).status,
                 self.response_headers.items())
             if env['REQUEST_METHOD'] == 'GET':
                 return self.object_body
         elif self.status == 401:
             start_response(HTTPUnauthorized(request=req).status, [])
         elif self.status == 403:
             start_response(HTTPForbidden(request=req).status, [])
         elif self.status == 404:
             start_response(HTTPNotFound(request=req).status, [])
         else:
             start_response(HTTPBadRequest(request=req).status, [])
     elif env['REQUEST_METHOD'] == 'PUT':
         if self.status == 201:
             start_response(
                 HTTPCreated(request=req).status,
                 [('etag', self.response_headers['etag'])])
         elif self.status == 401:
             start_response(HTTPUnauthorized(request=req).status, [])
         elif self.status == 403:
             start_response(HTTPForbidden(request=req).status, [])
         elif self.status == 404:
             start_response(HTTPNotFound(request=req).status, [])
         elif self.status == 413:
             start_response(
                 HTTPRequestEntityTooLarge(request=req).status, [])
         else:
             start_response(HTTPBadRequest(request=req).status, [])
     elif env['REQUEST_METHOD'] == 'DELETE':
         if self.status == 204:
             start_response(HTTPNoContent(request=req).status, [])
         elif self.status == 401:
             start_response(HTTPUnauthorized(request=req).status, [])
         elif self.status == 403:
             start_response(HTTPForbidden(request=req).status, [])
         elif self.status == 404:
             start_response(HTTPNotFound(request=req).status, [])
         else:
             start_response(HTTPBadRequest(request=req).status, [])
     return []
Exemplo n.º 12
0
def check_object_creation(req, object_name):
    """
    Check to ensure that everything is alright about an object to be created.

    :param req: HTTP request object
    :param object_name: name of object to be created
    :raises HTTPRequestEntityTooLarge: the object is too large
    :raises HTTPLengthRequered: missing content-length header and not
                                a chunked request
    :raises HTTPBadRequest: missing or bad content-type header, or
                            bad metadata
    """
    if req.content_length and req.content_length > MAX_FILE_SIZE:
        return HTTPRequestEntityTooLarge(body='Your request is too large.',
                                         request=req,
                                         content_type='text/plain')
    if req.content_length is None and \
            req.headers.get('transfer-encoding') != 'chunked':
        return HTTPLengthRequired(request=req)
    if 'X-Copy-From' in req.headers and req.content_length:
        return HTTPBadRequest(body='Copy requests require a zero byte body',
                              request=req,
                              content_type='text/plain')
    if len(object_name) > MAX_OBJECT_NAME_LENGTH:
        return HTTPBadRequest(body='Object name length of %d longer than %d' %
                              (len(object_name), MAX_OBJECT_NAME_LENGTH),
                              request=req,
                              content_type='text/plain')
    if 'Content-Type' not in req.headers:
        return HTTPBadRequest(request=req,
                              content_type='text/plain',
                              body='No content type')
    if not check_utf8(req.headers['Content-Type']):
        return HTTPBadRequest(request=req,
                              body='Invalid Content-Type',
                              content_type='text/plain')
    if 'x-object-manifest' in req.headers:
        value = req.headers['x-object-manifest']
        container = prefix = None
        try:
            container, prefix = value.split('/', 1)
        except ValueError:
            pass
        if not container or not prefix or '?' in value or '&' in value or \
                prefix[0] == '/':
            return HTTPBadRequest(
                request=req,
                body='X-Object-Manifest must in the format container/prefix')
    return check_metadata(req, 'object')
Exemplo n.º 13
0
    def get_objs_to_delete(self, req):
        """
        Will populate objs_to_delete with data from request input.
        :params req: a Swob request
        :returns: a list of the contents of req.body when separated by newline.
        :raises HTTPException: on failures
        """
        line = b''
        data_remaining = True
        objs_to_delete = []
        if req.content_length is None and \
                req.headers.get('transfer-encoding', '').lower() != 'chunked':
            raise HTTPLengthRequired(request=req)

        while data_remaining:
            if b'\n' in line:
                obj_to_delete, line = line.split(b'\n', 1)
                if six.PY2:
                    obj_to_delete = wsgi_unquote(obj_to_delete.strip())
                else:
                    # yeah, all this chaining is pretty terrible...
                    # but it gets even worse trying to use UTF-8 and
                    # errors='surrogateescape' when dealing with terrible
                    # input like b'\xe2%98\x83'
                    obj_to_delete = wsgi_to_str(
                        wsgi_unquote(bytes_to_wsgi(obj_to_delete.strip())))
                objs_to_delete.append({'name': obj_to_delete})
            else:
                data = req.body_file.read(self.max_path_length)
                if data:
                    line += data
                else:
                    data_remaining = False
                    if six.PY2:
                        obj_to_delete = wsgi_unquote(line.strip())
                    else:
                        obj_to_delete = wsgi_to_str(
                            wsgi_unquote(bytes_to_wsgi(line.strip())))
                    if obj_to_delete:
                        objs_to_delete.append({'name': obj_to_delete})
            if len(objs_to_delete) > self.max_deletes_per_request:
                raise HTTPRequestEntityTooLarge(
                    'Maximum Bulk Deletes: %d per request' %
                    self.max_deletes_per_request)
            if len(line) > self.max_path_length * 2:
                raise HTTPBadRequest('Invalid File Name')
        return objs_to_delete
Exemplo n.º 14
0
    def _get_source_object(self, req, path_info):
        # make a pre_auth request in case the user has write access
        # to container, but not READ. This was allowed in previous version
        # (i.e., before middleware) so keeping the same behavior here
        get_req = make_pre_authed_request(req.environ,
                                          path=wsgi_quote(path_info) +
                                          '?symlink=get',
                                          headers={'X-Newest': 'True'},
                                          method='GET',
                                          swift_source='VW')
        source_resp = get_req.get_response(self.app)

        if source_resp.content_length is None or \
                source_resp.content_length > MAX_FILE_SIZE:
            # Consciously *don't* drain the response before closing;
            # any logged 499 is actually rather appropriate here
            close_if_possible(source_resp.app_iter)
            return HTTPRequestEntityTooLarge(request=req)

        return source_resp
Exemplo n.º 15
0
    def PUT(self, req):
        """HTTP PUT request handler."""
        container_info = self.container_info(
            self.account_name, self.container_name)
        container_partition = container_info['partition']
        containers = container_info['nodes']
        req.acl = container_info['write_acl']
        req.environ['swift_sync_key'] = container_info['sync_key']
        object_versions = container_info['versions']
        if 'swift.authorize' in req.environ:
            aresp = req.environ['swift.authorize'](req)
            if aresp:
                return aresp
        if not containers:
            return HTTPNotFound(request=req)
        if 'x-delete-after' in req.headers:
            try:
                x_delete_after = int(req.headers['x-delete-after'])
            except ValueError:
                    return HTTPBadRequest(request=req,
                                          content_type='text/plain',
                                          body='Non-integer X-Delete-After')
            req.headers['x-delete-at'] = '%d' % (time.time() + x_delete_after)
        partition, nodes = self.app.object_ring.get_nodes(
            self.account_name, self.container_name, self.object_name)
        # do a HEAD request for container sync and checking object versions
        if 'x-timestamp' in req.headers or \
                (object_versions and not
                 req.environ.get('swift_versioned_copy')):
            hreq = Request.blank(req.path_info, headers={'X-Newest': 'True'},
                                 environ={'REQUEST_METHOD': 'HEAD'})
            hresp = self.GETorHEAD_base(
                hreq, _('Object'), self.app.object_ring, partition,
                hreq.path_info)
        # Used by container sync feature
        if 'x-timestamp' in req.headers:
            try:
                req.headers['X-Timestamp'] = \
                    normalize_timestamp(float(req.headers['x-timestamp']))
                if hresp.environ and 'swift_x_timestamp' in hresp.environ and \
                    float(hresp.environ['swift_x_timestamp']) >= \
                        float(req.headers['x-timestamp']):
                    return HTTPAccepted(request=req)
            except ValueError:
                return HTTPBadRequest(
                    request=req, content_type='text/plain',
                    body='X-Timestamp should be a UNIX timestamp float value; '
                         'was %r' % req.headers['x-timestamp'])
        else:
            req.headers['X-Timestamp'] = normalize_timestamp(time.time())
        # Sometimes the 'content-type' header exists, but is set to None.
        content_type_manually_set = True
        if not req.headers.get('content-type'):
            guessed_type, _junk = mimetypes.guess_type(req.path_info)
            req.headers['Content-Type'] = guessed_type or \
                'application/octet-stream'
            content_type_manually_set = False
        error_response = check_object_creation(req, self.object_name) or \
            check_content_type(req)
        if error_response:
            return error_response
        if object_versions and not req.environ.get('swift_versioned_copy'):
            is_manifest = 'x-object-manifest' in req.headers or \
                          'x-object-manifest' in hresp.headers
            if hresp.status_int != HTTP_NOT_FOUND and not is_manifest:
                # This is a version manifest and needs to be handled
                # differently. First copy the existing data to a new object,
                # then write the data from this request to the version manifest
                # object.
                lcontainer = object_versions.split('/')[0]
                prefix_len = '%03x' % len(self.object_name)
                lprefix = prefix_len + self.object_name + '/'
                ts_source = hresp.environ.get('swift_x_timestamp')
                if ts_source is None:
                    ts_source = time.mktime(time.strptime(
                                            hresp.headers['last-modified'],
                                            '%a, %d %b %Y %H:%M:%S GMT'))
                new_ts = normalize_timestamp(ts_source)
                vers_obj_name = lprefix + new_ts
                copy_headers = {
                    'Destination': '%s/%s' % (lcontainer, vers_obj_name)}
                copy_environ = {'REQUEST_METHOD': 'COPY',
                                'swift_versioned_copy': True
                                }
                copy_req = Request.blank(req.path_info, headers=copy_headers,
                                         environ=copy_environ)
                copy_resp = self.COPY(copy_req)
                if is_client_error(copy_resp.status_int):
                    # missing container or bad permissions
                    return HTTPPreconditionFailed(request=req)
                elif not is_success(copy_resp.status_int):
                    # could not copy the data, bail
                    return HTTPServiceUnavailable(request=req)

        reader = req.environ['wsgi.input'].read
        data_source = iter(lambda: reader(self.app.client_chunk_size), '')
        source_header = req.headers.get('X-Copy-From')
        source_resp = None
        if source_header:
            source_header = unquote(source_header)
            acct = req.path_info.split('/', 2)[1]
            if isinstance(acct, unicode):
                acct = acct.encode('utf-8')
            if not source_header.startswith('/'):
                source_header = '/' + source_header
            source_header = '/' + acct + source_header
            try:
                src_container_name, src_obj_name = \
                    source_header.split('/', 3)[2:]
            except ValueError:
                return HTTPPreconditionFailed(
                    request=req,
                    body='X-Copy-From header must be of the form'
                         '<container name>/<object name>')
            source_req = req.copy_get()
            source_req.path_info = source_header
            source_req.headers['X-Newest'] = 'true'
            orig_obj_name = self.object_name
            orig_container_name = self.container_name
            self.object_name = src_obj_name
            self.container_name = src_container_name
            source_resp = self.GET(source_req)
            if source_resp.status_int >= HTTP_MULTIPLE_CHOICES:
                return source_resp
            self.object_name = orig_obj_name
            self.container_name = orig_container_name
            new_req = Request.blank(req.path_info,
                                    environ=req.environ, headers=req.headers)
            data_source = source_resp.app_iter
            new_req.content_length = source_resp.content_length
            if new_req.content_length is None:
                # This indicates a transfer-encoding: chunked source object,
                # which currently only happens because there are more than
                # CONTAINER_LISTING_LIMIT segments in a segmented object. In
                # this case, we're going to refuse to do the server-side copy.
                return HTTPRequestEntityTooLarge(request=req)
            if new_req.content_length > MAX_FILE_SIZE:
                return HTTPRequestEntityTooLarge(request=req)
            new_req.etag = source_resp.etag
            # we no longer need the X-Copy-From header
            del new_req.headers['X-Copy-From']
            if not content_type_manually_set:
                new_req.headers['Content-Type'] = \
                    source_resp.headers['Content-Type']
            if not config_true_value(
                    new_req.headers.get('x-fresh-metadata', 'false')):
                copy_headers_into(source_resp, new_req)
                copy_headers_into(req, new_req)
            # copy over x-static-large-object for POSTs and manifest copies
            if 'X-Static-Large-Object' in source_resp.headers and \
                    req.params.get('multipart-manifest') == 'get':
                new_req.headers['X-Static-Large-Object'] = \
                    source_resp.headers['X-Static-Large-Object']

            req = new_req

        if 'x-delete-at' in req.headers:
            try:
                x_delete_at = int(req.headers['x-delete-at'])
                if x_delete_at < time.time():
                    return HTTPBadRequest(
                        body='X-Delete-At in past', request=req,
                        content_type='text/plain')
            except ValueError:
                return HTTPBadRequest(request=req, content_type='text/plain',
                                      body='Non-integer X-Delete-At')
            delete_at_container = str(
                x_delete_at /
                self.app.expiring_objects_container_divisor *
                self.app.expiring_objects_container_divisor)
            delete_at_part, delete_at_nodes = \
                self.app.container_ring.get_nodes(
                    self.app.expiring_objects_account, delete_at_container)
        else:
            delete_at_part = delete_at_nodes = None

        node_iter = GreenthreadSafeIterator(
            self.iter_nodes(self.app.object_ring, partition))
        pile = GreenPile(len(nodes))
        chunked = req.headers.get('transfer-encoding')

        outgoing_headers = self._backend_requests(
            req, len(nodes), container_partition, containers,
            delete_at_part, delete_at_nodes)

        for nheaders in outgoing_headers:
            # RFC2616:8.2.3 disallows 100-continue without a body
            if (req.content_length > 0) or chunked:
                nheaders['Expect'] = '100-continue'
            pile.spawn(self._connect_put_node, node_iter, partition,
                       req.path_info, nheaders, self.app.logger.thread_locals)

        conns = [conn for conn in pile if conn]
        if len(conns) <= len(nodes) / 2:
            self.app.logger.error(
                _('Object PUT returning 503, %(conns)s/%(nodes)s '
                  'required connections'),
                {'conns': len(conns), 'nodes': len(nodes) // 2 + 1})
            return HTTPServiceUnavailable(request=req)
        bytes_transferred = 0
        try:
            with ContextPool(len(nodes)) as pool:
                for conn in conns:
                    conn.failed = False
                    conn.queue = Queue(self.app.put_queue_depth)
                    pool.spawn(self._send_file, conn, req.path)
                while True:
                    with ChunkReadTimeout(self.app.client_timeout):
                        try:
                            chunk = next(data_source)
                        except StopIteration:
                            if chunked:
                                [conn.queue.put('0\r\n\r\n') for conn in conns]
                            break
                    bytes_transferred += len(chunk)
                    if bytes_transferred > MAX_FILE_SIZE:
                        return HTTPRequestEntityTooLarge(request=req)
                    for conn in list(conns):
                        if not conn.failed:
                            conn.queue.put(
                                '%x\r\n%s\r\n' % (len(chunk), chunk)
                                if chunked else chunk)
                        else:
                            conns.remove(conn)
                    if len(conns) <= len(nodes) / 2:
                        self.app.logger.error(_(
                            'Object PUT exceptions during'
                            ' send, %(conns)s/%(nodes)s required connections'),
                            {'conns': len(conns), 'nodes': len(nodes) / 2 + 1})
                        return HTTPServiceUnavailable(request=req)
                for conn in conns:
                    if conn.queue.unfinished_tasks:
                        conn.queue.join()
            conns = [conn for conn in conns if not conn.failed]
        except ChunkReadTimeout, err:
            self.app.logger.warn(
                _('ERROR Client read timeout (%ss)'), err.seconds)
            self.app.logger.increment('client_timeouts')
            return HTTPRequestTimeout(request=req)
Exemplo n.º 16
0
    def _transfer_data(self, req, data_source, conns, nodes):
        min_conns = quorum_size(len(nodes))

        bytes_transferred = 0
        try:
            with ContextPool(len(nodes)) as pool:
                for conn in conns:
                    conn.failed = False
                    conn.queue = Queue(self.app.put_queue_depth)
                    pool.spawn(self._send_file, conn, req.path)
                while True:
                    with ChunkReadTimeout(self.app.client_timeout):
                        try:
                            chunk = next(data_source)
                        except StopIteration:
                            if req.is_chunked:
                                for conn in conns:
                                    conn.queue.put('0\r\n\r\n')
                            break
                    bytes_transferred += len(chunk)
                    if bytes_transferred > constraints.MAX_FILE_SIZE:
                        raise HTTPRequestEntityTooLarge(request=req)
                    for conn in list(conns):
                        if not conn.failed:
                            conn.queue.put('%x\r\n%s\r\n' %
                                           (len(chunk), chunk) if req.
                                           is_chunked else chunk)
                        else:
                            conn.close()
                            conns.remove(conn)
                    self._check_min_conn(
                        req,
                        conns,
                        min_conns,
                        msg='Object PUT exceptions during'
                        ' send, %(conns)s/%(nodes)s required connections')
                for conn in conns:
                    if conn.queue.unfinished_tasks:
                        conn.queue.join()
            conns = [conn for conn in conns if not conn.failed]
            self._check_min_conn(req,
                                 conns,
                                 min_conns,
                                 msg='Object PUT exceptions after last send, '
                                 '%(conns)s/%(nodes)s required connections')
        except ChunkReadTimeout as err:
            self.app.logger.warn(_('ERROR Client read timeout (%ss)'),
                                 err.seconds)
            self.app.logger.increment('client_timeouts')
            raise HTTPRequestTimeout(request=req)
        except HTTPException:
            raise
        except (Exception, Timeout):
            self.app.logger.exception(
                _('ERROR Exception causing client disconnect'))
            raise HTTPClientDisconnect(request=req)
        if req.content_length and bytes_transferred < req.content_length:
            req.client_disconnect = True
            self.app.logger.warn(
                _('Client disconnected without sending enough data'))
            self.app.logger.increment('client_disconnects')
            raise HTTPClientDisconnect(request=req)
Exemplo n.º 17
0
def check_object_creation(req, object_name):
    """
    Check to ensure that everything is alright about an object to be created.

    :param req: HTTP request object
    :param object_name: name of object to be created
    :returns HTTPRequestEntityTooLarge: the object is too large
    :returns HTTPLengthRequired: missing content-length header and not
                                 a chunked request
    :returns HTTPBadRequest: missing or bad content-type header, or
                             bad metadata
    :returns HTTPNotImplemented: unsupported transfer-encoding header value
    """
    try:
        # 获取消息的长度,如果headers中没有包含消息长度,返回None,报错
        ml = req.message_length()
    except ValueError as e:
        return HTTPBadRequest(request=req,
                              content_type='text/plain',
                              body=str(e))
    except AttributeError as e:
        return HTTPNotImplemented(request=req,
                                  content_type='text/plain',
                                  body=str(e))

    # 如果请求消息的长度超过5G,报错
    if ml is not None and ml > MAX_FILE_SIZE:
        return HTTPRequestEntityTooLarge(body='Your request is too large.',
                                         request=req,
                                         content_type='text/plain')
    if req.content_length is None and \
            req.headers.get('transfer-encoding') != 'chunked':
        return HTTPLengthRequired(body='Missing Content-Length header.',
                                  request=req,
                                  content_type='text/plain')

    # 如果请求头中有拷贝源,但没有长度,报错
    if 'X-Copy-From' in req.headers and req.content_length:
        return HTTPBadRequest(body='Copy requests require a zero byte body',
                              request=req,
                              content_type='text/plain')

    # 如果对象名称大于1024字节,报错
    if len(object_name) > MAX_OBJECT_NAME_LENGTH:
        return HTTPBadRequest(body='Object name length of %d longer than %d' %
                              (len(object_name), MAX_OBJECT_NAME_LENGTH),
                              request=req,
                              content_type='text/plain')

    # 如果请求头中没有内容类型,报错
    if 'Content-Type' not in req.headers:
        return HTTPBadRequest(request=req,
                              content_type='text/plain',
                              body='No content type')

    try:
        # 检查请求头中关于对象删除的信息,失败报错
        req = check_delete_headers(req)
    except HTTPException as e:
        return HTTPBadRequest(request=req,
                              body=e.body,
                              content_type='text/plain')

    # 检查内容类型是否为utf-8编码,失败报错
    if not check_utf8(req.headers['Content-Type']):
        return HTTPBadRequest(request=req,
                              body='Invalid Content-Type',
                              content_type='text/plain')

    # 检查请求中的用户自定义的元数据,失败报错
    return check_metadata(req, 'object')
Exemplo n.º 18
0
    def handle_multipart_put(self, req, start_response):
        """
        Will handle the PUT of a SLO manifest.
        Heads every object in manifest to check if is valid and if so will
        save a manifest generated from the user input. Uses WSGIContext to
        call self and start_response and returns a WSGI iterator.

        :params req: a swob.Request with an obj in path
        :raises: HttpException on errors
        """
        try:
            vrs, account, container, obj = req.split_path(1, 4, True)
        except ValueError:
            return self.app(req.environ, start_response)
        if req.content_length > self.max_manifest_size:
            raise HTTPRequestEntityTooLarge("Manifest File > %d bytes" %
                                            self.max_manifest_size)
        if req.headers.get('X-Copy-From'):
            raise HTTPMethodNotAllowed(
                'Multipart Manifest PUTs cannot be COPY requests')
        if req.content_length is None and \
                req.headers.get('transfer-encoding', '').lower() != 'chunked':
            raise HTTPLengthRequired(request=req)
        parsed_data = parse_and_validate_input(
            req.body_file.read(self.max_manifest_size), req.path)
        problem_segments = []

        if len(parsed_data) > self.max_manifest_segments:
            raise HTTPRequestEntityTooLarge(
                'Number of segments must be <= %d' %
                self.max_manifest_segments)
        total_size = 0
        out_content_type = req.accept.best_match(ACCEPTABLE_FORMATS)
        if not out_content_type:
            out_content_type = 'text/plain'
        data_for_storage = []
        path2indices = defaultdict(list)
        for index, seg_dict in enumerate(parsed_data):
            path2indices[seg_dict['path']].append(index)

        def do_head(obj_name):
            obj_path = '/'.join(
                ['', vrs, account,
                 get_valid_utf8_str(obj_name).lstrip('/')])

            sub_req = make_subrequest(
                req.environ,
                path=obj_path + '?',  # kill the query string
                method='HEAD',
                headers={'x-auth-token': req.headers.get('x-auth-token')},
                agent='%(orig)s SLO MultipartPUT',
                swift_source='SLO')
            return obj_name, sub_req.get_response(self)

        def validate_seg_dict(seg_dict, head_seg_resp):
            if not head_seg_resp.is_success:
                problem_segments.append(
                    [quote(obj_name), head_seg_resp.status])
                return 0, None

            segment_length = head_seg_resp.content_length
            if seg_dict.get('range'):
                # Since we now know the length, we can normalize the
                # range. We know that there is exactly one range
                # requested since we checked that earlier in
                # parse_and_validate_input().
                ranges = seg_dict['range'].ranges_for_length(
                    head_seg_resp.content_length)

                if not ranges:
                    problem_segments.append(
                        [quote(obj_name), 'Unsatisfiable Range'])
                elif ranges == [(0, head_seg_resp.content_length)]:
                    # Just one range, and it exactly matches the object.
                    # Why'd we do this again?
                    del seg_dict['range']
                    segment_length = head_seg_resp.content_length
                else:
                    rng = ranges[0]
                    seg_dict['range'] = '%d-%d' % (rng[0], rng[1] - 1)
                    segment_length = rng[1] - rng[0]

            if segment_length < 1:
                problem_segments.append([
                    quote(obj_name),
                    'Too small; each segment must be at least 1 byte.'
                ])
            if seg_dict['size_bytes'] is not None and \
                    seg_dict['size_bytes'] != head_seg_resp.content_length:
                problem_segments.append([quote(obj_name), 'Size Mismatch'])
            if seg_dict['etag'] is not None and \
                    seg_dict['etag'] != head_seg_resp.etag:
                problem_segments.append([quote(obj_name), 'Etag Mismatch'])
            if head_seg_resp.last_modified:
                last_modified = head_seg_resp.last_modified
            else:
                # shouldn't happen
                last_modified = datetime.now()

            last_modified_formatted = \
                last_modified.strftime('%Y-%m-%dT%H:%M:%S.%f')
            seg_data = {
                'name': '/' + seg_dict['path'].lstrip('/'),
                'bytes': head_seg_resp.content_length,
                'hash': head_seg_resp.etag,
                'content_type': head_seg_resp.content_type,
                'last_modified': last_modified_formatted
            }
            if seg_dict.get('range'):
                seg_data['range'] = seg_dict['range']
            if config_true_value(
                    head_seg_resp.headers.get('X-Static-Large-Object')):
                seg_data['sub_slo'] = True
            return segment_length, seg_data

        data_for_storage = [None] * len(parsed_data)
        with StreamingPile(self.concurrency) as pile:
            for obj_name, resp in pile.asyncstarmap(
                    do_head, ((path, ) for path in path2indices)):
                for i in path2indices[obj_name]:
                    segment_length, seg_data = validate_seg_dict(
                        parsed_data[i], resp)
                    data_for_storage[i] = seg_data
                    total_size += segment_length

        if problem_segments:
            resp_body = get_response_body(out_content_type, {},
                                          problem_segments)
            raise HTTPBadRequest(resp_body, content_type=out_content_type)

        slo_etag = md5()
        for seg_data in data_for_storage:
            if seg_data.get('range'):
                slo_etag.update('%s:%s;' %
                                (seg_data['hash'], seg_data['range']))
            else:
                slo_etag.update(seg_data['hash'])

        slo_etag = slo_etag.hexdigest()
        req.headers.update({
            SYSMETA_SLO_ETAG: slo_etag,
            SYSMETA_SLO_SIZE: total_size,
            'X-Static-Large-Object': 'True',
        })

        json_data = json.dumps(data_for_storage)
        if six.PY3:
            json_data = json_data.encode('utf-8')
        req.body = json_data

        env = req.environ
        if not env.get('CONTENT_TYPE'):
            guessed_type, _junk = mimetypes.guess_type(req.path_info)
            env['CONTENT_TYPE'] = guessed_type or 'application/octet-stream'
        env['swift.content_type_overridden'] = True
        env['CONTENT_TYPE'] += ";swift_bytes=%d" % total_size

        def start_response_wrapper(status, headers, exc_info=None):
            for i, (header, _value) in enumerate(headers):
                if header.lower() == 'etag':
                    headers[i] = ('Etag', '"%s"' % slo_etag)
                    break
            return start_response(status, headers, exc_info)

        return self.app(env, start_response_wrapper)
Exemplo n.º 19
0
    def PUT(self, req):
        """HTTP PUT request handler."""
        if req.if_none_match is not None and '*' not in req.if_none_match:
            # Sending an etag with if-none-match isn't currently supported
            return HTTPBadRequest(request=req, content_type='text/plain',
                                  body='If-None-Match only supports *')
        container_info = self.container_info(
            self.account_name, self.container_name, req)
        policy_index = req.headers.get('X-Backend-Storage-Policy-Index',
                                       container_info['storage_policy'])
        obj_ring = self.app.get_object_ring(policy_index)
        # pass the policy index to storage nodes via req header
        req.headers['X-Backend-Storage-Policy-Index'] = policy_index
        container_partition = container_info['partition']
        containers = container_info['nodes']
        req.acl = container_info['write_acl']
        req.environ['swift_sync_key'] = container_info['sync_key']
        object_versions = container_info['versions']
        if 'swift.authorize' in req.environ:
            aresp = req.environ['swift.authorize'](req)
            if aresp:
                return aresp
        if not containers:
            return HTTPNotFound(request=req)
        try:
            ml = req.message_length()
        except ValueError as e:
            return HTTPBadRequest(request=req, content_type='text/plain',
                                  body=str(e))
        except AttributeError as e:
            return HTTPNotImplemented(request=req, content_type='text/plain',
                                      body=str(e))
        if ml is not None and ml > constraints.MAX_FILE_SIZE:
            return HTTPRequestEntityTooLarge(request=req)
        if 'x-delete-after' in req.headers:
            try:
                x_delete_after = int(req.headers['x-delete-after'])
            except ValueError:
                return HTTPBadRequest(request=req,
                                      content_type='text/plain',
                                      body='Non-integer X-Delete-After')
            req.headers['x-delete-at'] = normalize_delete_at_timestamp(
                time.time() + x_delete_after)
        partition, nodes = obj_ring.get_nodes(
            self.account_name, self.container_name, self.object_name)
        # do a HEAD request for container sync and checking object versions
        if 'x-timestamp' in req.headers or \
                (object_versions and not
                 req.environ.get('swift_versioned_copy')):
            # make sure proxy-server uses the right policy index
            _headers = {'X-Backend-Storage-Policy-Index': policy_index,
                        'X-Newest': 'True'}
            hreq = Request.blank(req.path_info, headers=_headers,
                                 environ={'REQUEST_METHOD': 'HEAD'})
            hresp = self.GETorHEAD_base(
                hreq, _('Object'), obj_ring, partition,
                hreq.swift_entity_path)
        # Used by container sync feature
        if 'x-timestamp' in req.headers:
            try:
                req_timestamp = Timestamp(req.headers['X-Timestamp'])
                if hresp.environ and 'swift_x_timestamp' in hresp.environ and \
                        hresp.environ['swift_x_timestamp'] >= req_timestamp:
                    return HTTPAccepted(request=req)
            except ValueError:
                return HTTPBadRequest(
                    request=req, content_type='text/plain',
                    body='X-Timestamp should be a UNIX timestamp float value; '
                         'was %r' % req.headers['x-timestamp'])
            req.headers['X-Timestamp'] = req_timestamp.internal
        else:
            req.headers['X-Timestamp'] = Timestamp(time.time()).internal
        # Sometimes the 'content-type' header exists, but is set to None.
        content_type_manually_set = True
        detect_content_type = \
            config_true_value(req.headers.get('x-detect-content-type'))
        if detect_content_type or not req.headers.get('content-type'):
            guessed_type, _junk = mimetypes.guess_type(req.path_info)
            req.headers['Content-Type'] = guessed_type or \
                'application/octet-stream'
            if detect_content_type:
                req.headers.pop('x-detect-content-type')
            else:
                content_type_manually_set = False

        error_response = check_object_creation(req, self.object_name) or \
            check_content_type(req)
        if error_response:
            return error_response
        if object_versions and not req.environ.get('swift_versioned_copy'):
            if hresp.status_int != HTTP_NOT_FOUND:
                # This is a version manifest and needs to be handled
                # differently. First copy the existing data to a new object,
                # then write the data from this request to the version manifest
                # object.
                lcontainer = object_versions.split('/')[0]
                prefix_len = '%03x' % len(self.object_name)
                lprefix = prefix_len + self.object_name + '/'
                ts_source = hresp.environ.get('swift_x_timestamp')
                if ts_source is None:
                    ts_source = time.mktime(time.strptime(
                                            hresp.headers['last-modified'],
                                            '%a, %d %b %Y %H:%M:%S GMT'))
                new_ts = Timestamp(ts_source).internal
                vers_obj_name = lprefix + new_ts
                copy_headers = {
                    'Destination': '%s/%s' % (lcontainer, vers_obj_name)}
                copy_environ = {'REQUEST_METHOD': 'COPY',
                                'swift_versioned_copy': True
                                }
                copy_req = Request.blank(req.path_info, headers=copy_headers,
                                         environ=copy_environ)
                copy_resp = self.COPY(copy_req)
                if is_client_error(copy_resp.status_int):
                    # missing container or bad permissions
                    return HTTPPreconditionFailed(request=req)
                elif not is_success(copy_resp.status_int):
                    # could not copy the data, bail
                    return HTTPServiceUnavailable(request=req)

        reader = req.environ['wsgi.input'].read
        data_source = iter(lambda: reader(self.app.client_chunk_size), '')
        source_header = req.headers.get('X-Copy-From')
        source_resp = None
        if source_header:
            if req.environ.get('swift.orig_req_method', req.method) != 'POST':
                req.environ.setdefault('swift.log_info', []).append(
                    'x-copy-from:%s' % source_header)
            src_container_name, src_obj_name = check_copy_from_header(req)
            ver, acct, _rest = req.split_path(2, 3, True)
            if isinstance(acct, unicode):
                acct = acct.encode('utf-8')
            source_header = '/%s/%s/%s/%s' % (ver, acct,
                                              src_container_name, src_obj_name)
            source_req = req.copy_get()
            # make sure the source request uses it's container_info
            source_req.headers.pop('X-Backend-Storage-Policy-Index', None)
            source_req.path_info = source_header
            source_req.headers['X-Newest'] = 'true'
            orig_obj_name = self.object_name
            orig_container_name = self.container_name
            self.object_name = src_obj_name
            self.container_name = src_container_name
            sink_req = Request.blank(req.path_info,
                                     environ=req.environ, headers=req.headers)
            source_resp = self.GET(source_req)
            # This gives middlewares a way to change the source; for example,
            # this lets you COPY a SLO manifest and have the new object be the
            # concatenation of the segments (like what a GET request gives
            # the client), not a copy of the manifest file.
            hook = req.environ.get(
                'swift.copy_hook',
                (lambda source_req, source_resp, sink_req: source_resp))
            source_resp = hook(source_req, source_resp, sink_req)

            if source_resp.status_int >= HTTP_MULTIPLE_CHOICES:
                return source_resp
            self.object_name = orig_obj_name
            self.container_name = orig_container_name
            data_source = iter(source_resp.app_iter)
            sink_req.content_length = source_resp.content_length
            if sink_req.content_length is None:
                # This indicates a transfer-encoding: chunked source object,
                # which currently only happens because there are more than
                # CONTAINER_LISTING_LIMIT segments in a segmented object. In
                # this case, we're going to refuse to do the server-side copy.
                return HTTPRequestEntityTooLarge(request=req)
            if sink_req.content_length > constraints.MAX_FILE_SIZE:
                return HTTPRequestEntityTooLarge(request=req)
            sink_req.etag = source_resp.etag
            # we no longer need the X-Copy-From header
            del sink_req.headers['X-Copy-From']
            if not content_type_manually_set:
                sink_req.headers['Content-Type'] = \
                    source_resp.headers['Content-Type']
            if not config_true_value(
                    sink_req.headers.get('x-fresh-metadata', 'false')):
                copy_headers_into(source_resp, sink_req)
                copy_headers_into(req, sink_req)
            # copy over x-static-large-object for POSTs and manifest copies
            if 'X-Static-Large-Object' in source_resp.headers and \
                    req.params.get('multipart-manifest') == 'get':
                sink_req.headers['X-Static-Large-Object'] = \
                    source_resp.headers['X-Static-Large-Object']

            req = sink_req

        if 'x-delete-at' in req.headers:
            try:
                x_delete_at = normalize_delete_at_timestamp(
                    int(req.headers['x-delete-at']))
                if int(x_delete_at) < time.time():
                    return HTTPBadRequest(
                        body='X-Delete-At in past', request=req,
                        content_type='text/plain')
            except ValueError:
                return HTTPBadRequest(request=req, content_type='text/plain',
                                      body='Non-integer X-Delete-At')
            req.environ.setdefault('swift.log_info', []).append(
                'x-delete-at:%s' % x_delete_at)
            delete_at_container = normalize_delete_at_timestamp(
                int(x_delete_at) /
                self.app.expiring_objects_container_divisor *
                self.app.expiring_objects_container_divisor)
            delete_at_part, delete_at_nodes = \
                self.app.container_ring.get_nodes(
                    self.app.expiring_objects_account, delete_at_container)
        else:
            delete_at_container = delete_at_part = delete_at_nodes = None

        node_iter = GreenthreadSafeIterator(
            self.iter_nodes_local_first(obj_ring, partition))
        pile = GreenPile(len(nodes))
        te = req.headers.get('transfer-encoding', '')
        chunked = ('chunked' in te)

        outgoing_headers = self._backend_requests(
            req, len(nodes), container_partition, containers,
            delete_at_container, delete_at_part, delete_at_nodes)

        for nheaders in outgoing_headers:
            # RFC2616:8.2.3 disallows 100-continue without a body
            if (req.content_length > 0) or chunked:
                nheaders['Expect'] = '100-continue'
            pile.spawn(self._connect_put_node, node_iter, partition,
                       req.swift_entity_path, nheaders,
                       self.app.logger.thread_locals)

        conns = [conn for conn in pile if conn]
        min_conns = quorum_size(len(nodes))

        if req.if_none_match is not None and '*' in req.if_none_match:
            statuses = [conn.resp.status for conn in conns if conn.resp]
            if HTTP_PRECONDITION_FAILED in statuses:
                # If we find any copy of the file, it shouldn't be uploaded
                self.app.logger.debug(
                    _('Object PUT returning 412, %(statuses)r'),
                    {'statuses': statuses})
                return HTTPPreconditionFailed(request=req)

        if len(conns) < min_conns:
            self.app.logger.error(
                _('Object PUT returning 503, %(conns)s/%(nodes)s '
                  'required connections'),
                {'conns': len(conns), 'nodes': min_conns})
            return HTTPServiceUnavailable(request=req)
        bytes_transferred = 0
        try:
            with ContextPool(len(nodes)) as pool:
                for conn in conns:
                    conn.failed = False
                    conn.queue = Queue(self.app.put_queue_depth)
                    pool.spawn(self._send_file, conn, req.path)
                while True:
                    with ChunkReadTimeout(self.app.client_timeout):
                        try:
                            chunk = next(data_source)
                        except StopIteration:
                            if chunked:
                                for conn in conns:
                                    conn.queue.put('0\r\n\r\n')
                            break
                    bytes_transferred += len(chunk)
                    if bytes_transferred > constraints.MAX_FILE_SIZE:
                        return HTTPRequestEntityTooLarge(request=req)
                    for conn in list(conns):
                        if not conn.failed:
                            conn.queue.put(
                                '%x\r\n%s\r\n' % (len(chunk), chunk)
                                if chunked else chunk)
                        else:
                            conns.remove(conn)
                    if len(conns) < min_conns:
                        self.app.logger.error(_(
                            'Object PUT exceptions during'
                            ' send, %(conns)s/%(nodes)s required connections'),
                            {'conns': len(conns), 'nodes': min_conns})
                        return HTTPServiceUnavailable(request=req)
                for conn in conns:
                    if conn.queue.unfinished_tasks:
                        conn.queue.join()
            conns = [conn for conn in conns if not conn.failed]
        except ChunkReadTimeout as err:
            self.app.logger.warn(
                _('ERROR Client read timeout (%ss)'), err.seconds)
            self.app.logger.increment('client_timeouts')
            return HTTPRequestTimeout(request=req)
        except (Exception, Timeout):
            self.app.logger.exception(
                _('ERROR Exception causing client disconnect'))
            return HTTPClientDisconnect(request=req)
        if req.content_length and bytes_transferred < req.content_length:
            req.client_disconnect = True
            self.app.logger.warn(
                _('Client disconnected without sending enough data'))
            self.app.logger.increment('client_disconnects')
            return HTTPClientDisconnect(request=req)

        statuses, reasons, bodies, etags = self._get_put_responses(req, conns,
                                                                   nodes)

        if len(etags) > 1:
            self.app.logger.error(
                _('Object servers returned %s mismatched etags'), len(etags))
            return HTTPServerError(request=req)
        etag = etags.pop() if len(etags) else None
        resp = self.best_response(req, statuses, reasons, bodies,
                                  _('Object PUT'), etag=etag)
        if source_header:
            resp.headers['X-Copied-From'] = quote(
                source_header.split('/', 3)[3])
            if 'last-modified' in source_resp.headers:
                resp.headers['X-Copied-From-Last-Modified'] = \
                    source_resp.headers['last-modified']
            copy_headers_into(req, resp)
        resp.last_modified = math.ceil(
            float(Timestamp(req.headers['X-Timestamp'])))
        return resp
Exemplo n.º 20
0
    def handle_multipart_put(self, req):
        """
        Will handle the PUT of a SLO manifest.
        Heads every object in manifest to check if is valid and if so will
        save a manifest generated from the user input.

        :params req: a swob.Request with an obj in path
        :raises: HttpException on errors
        """
        try:
            vrs, account, container, obj = req.split_path(1, 4, True)
        except ValueError:
            return self.app
        if req.content_length > self.max_manifest_size:
            raise HTTPRequestEntityTooLarge(
                "Manifest File > %d bytes" % self.max_manifest_size)
        if req.headers.get('X-Copy-From'):
            raise HTTPMethodNotAllowed(
                'Multipart Manifest PUTs cannot be Copy requests')
        if req.content_length is None and \
                req.headers.get('transfer-encoding', '').lower() != 'chunked':
            raise HTTPLengthRequired(request=req)
        parsed_data = parse_input(req.body_file.read(self.max_manifest_size))
        problem_segments = []

        if len(parsed_data) > self.max_manifest_segments:
            raise HTTPRequestEntityTooLarge(
                'Number segments must be <= %d' % self.max_manifest_segments)
        total_size = 0
        out_content_type = req.accept.best_match(ACCEPTABLE_FORMATS)
        if not out_content_type:
            out_content_type = 'text/plain'
        data_for_storage = []
        for index, seg_dict in enumerate(parsed_data):
            obj_path = '/'.join(
                ['', vrs, account, seg_dict['path'].lstrip('/')])
            try:
                seg_size = int(seg_dict['size_bytes'])
            except (ValueError, TypeError):
                raise HTTPBadRequest('Invalid Manifest File')
            if seg_size < self.min_segment_size and \
                    (index == 0 or index < len(parsed_data) - 1):
                raise HTTPBadRequest(
                    'Each segment, except the last, must be larger than '
                    '%d bytes.' % self.min_segment_size)

            new_env = req.environ.copy()
            if isinstance(obj_path, unicode):
                obj_path = obj_path.encode('utf-8')
            new_env['PATH_INFO'] = obj_path
            new_env['REQUEST_METHOD'] = 'HEAD'
            new_env['swift.source'] = 'SLO'
            del(new_env['wsgi.input'])
            del(new_env['QUERY_STRING'])
            new_env['CONTENT_LENGTH'] = 0
            new_env['HTTP_USER_AGENT'] = \
                '%s MultipartPUT' % req.environ.get('HTTP_USER_AGENT')
            head_seg_resp = \
                Request.blank(obj_path, new_env).get_response(self.app)
            if head_seg_resp.status_int // 100 == 2:
                total_size += seg_size
                if seg_size != head_seg_resp.content_length:
                    problem_segments.append([quote(obj_path), 'Size Mismatch'])
                if seg_dict['etag'] != head_seg_resp.etag:
                    problem_segments.append([quote(obj_path), 'Etag Mismatch'])
                if head_seg_resp.last_modified:
                    last_modified = head_seg_resp.last_modified
                else:
                    # shouldn't happen
                    last_modified = datetime.now()

                last_modified_formatted = \
                    last_modified.strftime('%Y-%m-%dT%H:%M:%S.%f')
                data_for_storage.append(
                    {'name': '/' + seg_dict['path'].lstrip('/'),
                     'bytes': seg_size,
                     'hash': seg_dict['etag'],
                     'content_type': head_seg_resp.content_type,
                     'last_modified': last_modified_formatted})

            else:
                problem_segments.append([quote(obj_path),
                                         head_seg_resp.status])
        if problem_segments:
            resp_body = get_response_body(
                out_content_type, {}, problem_segments)
            raise HTTPBadRequest(resp_body, content_type=out_content_type)
        env = req.environ

        if not env.get('CONTENT_TYPE'):
            guessed_type, _junk = mimetypes.guess_type(req.path_info)
            env['CONTENT_TYPE'] = guessed_type or 'application/octet-stream'
        env['swift.content_type_overriden'] = True
        env['CONTENT_TYPE'] += ";swift_bytes=%d" % total_size
        env['HTTP_X_STATIC_LARGE_OBJECT'] = 'True'
        json_data = json.dumps(data_for_storage)
        env['CONTENT_LENGTH'] = str(len(json_data))
        env['wsgi.input'] = StringIO(json_data)
        return self.app
Exemplo n.º 21
0
    def handle_multipart_put(self, req, start_response):
        """
        Will handle the PUT of a SLO manifest.
        Heads every object in manifest to check if is valid and if so will
        save a manifest generated from the user input. Uses WSGIContext to
        call self and start_response and returns a WSGI iterator.

        :params req: a swob.Request with an obj in path
        :raises: HttpException on errors
        """
        try:
            vrs, account, container, obj = req.split_path(1, 4, True)
        except ValueError:
            return self.app(req.environ, start_response)
        if req.content_length > self.max_manifest_size:
            raise HTTPRequestEntityTooLarge(
                "Manifest File > %d bytes" % self.max_manifest_size)
        if req.headers.get('X-Copy-From'):
            raise HTTPMethodNotAllowed(
                'Multipart Manifest PUTs cannot be COPY requests')
        if req.content_length is None and \
                req.headers.get('transfer-encoding', '').lower() != 'chunked':
            raise HTTPLengthRequired(request=req)
        parsed_data = parse_and_validate_input(
            req.body_file.read(self.max_manifest_size),
            req.path)
        problem_segments = []

        if len(parsed_data) > self.max_manifest_segments:
            raise HTTPRequestEntityTooLarge(
                'Number of segments must be <= %d' %
                self.max_manifest_segments)
        total_size = 0
        out_content_type = req.accept.best_match(ACCEPTABLE_FORMATS)
        if not out_content_type:
            out_content_type = 'text/plain'
        data_for_storage = []
        slo_etag = md5()
        last_obj_path = None
        for index, seg_dict in enumerate(parsed_data):
            obj_name = seg_dict['path']
            if isinstance(obj_name, six.text_type):
                obj_name = obj_name.encode('utf-8')
            obj_path = '/'.join(['', vrs, account, obj_name.lstrip('/')])

            if obj_path != last_obj_path:
                last_obj_path = obj_path
                sub_req = make_subrequest(
                    req.environ, path=obj_path + '?',  # kill the query string
                    method='HEAD',
                    headers={'x-auth-token': req.headers.get('x-auth-token')},
                    agent='%(orig)s SLO MultipartPUT', swift_source='SLO')
                head_seg_resp = sub_req.get_response(self)

            if head_seg_resp.is_success:
                segment_length = head_seg_resp.content_length
                if seg_dict.get('range'):
                    # Since we now know the length, we can normalize the
                    # range. We know that there is exactly one range
                    # requested since we checked that earlier in
                    # parse_and_validate_input().
                    ranges = seg_dict['range'].ranges_for_length(
                        head_seg_resp.content_length)

                    if not ranges:
                        problem_segments.append([quote(obj_name),
                                                 'Unsatisfiable Range'])
                    elif ranges == [(0, head_seg_resp.content_length)]:
                        # Just one range, and it exactly matches the object.
                        # Why'd we do this again?
                        del seg_dict['range']
                        segment_length = head_seg_resp.content_length
                    else:
                        rng = ranges[0]
                        seg_dict['range'] = '%d-%d' % (rng[0], rng[1] - 1)
                        segment_length = rng[1] - rng[0]

                if segment_length < 1:
                    problem_segments.append(
                        [quote(obj_name),
                         'Too small; each segment must be at least 1 byte.'])
                total_size += segment_length
                if seg_dict['size_bytes'] is not None and \
                        seg_dict['size_bytes'] != head_seg_resp.content_length:
                    problem_segments.append([quote(obj_name), 'Size Mismatch'])
                if seg_dict['etag'] is None or \
                        seg_dict['etag'] == head_seg_resp.etag:
                    if seg_dict.get('range'):
                        slo_etag.update('%s:%s;' % (head_seg_resp.etag,
                                                    seg_dict['range']))
                    else:
                        slo_etag.update(head_seg_resp.etag)
                else:
                    problem_segments.append([quote(obj_name), 'Etag Mismatch'])
                if head_seg_resp.last_modified:
                    last_modified = head_seg_resp.last_modified
                else:
                    # shouldn't happen
                    last_modified = datetime.now()

                last_modified_formatted = \
                    last_modified.strftime('%Y-%m-%dT%H:%M:%S.%f')
                seg_data = {'name': '/' + seg_dict['path'].lstrip('/'),
                            'bytes': head_seg_resp.content_length,
                            'hash': head_seg_resp.etag,
                            'content_type': head_seg_resp.content_type,
                            'last_modified': last_modified_formatted}
                if seg_dict.get('range'):
                    seg_data['range'] = seg_dict['range']

                if config_true_value(
                        head_seg_resp.headers.get('X-Static-Large-Object')):
                    seg_data['sub_slo'] = True
                data_for_storage.append(seg_data)

            else:
                problem_segments.append([quote(obj_name),
                                         head_seg_resp.status])
        if problem_segments:
            resp_body = get_response_body(
                out_content_type, {}, problem_segments)
            raise HTTPBadRequest(resp_body, content_type=out_content_type)
        env = req.environ

        if not env.get('CONTENT_TYPE'):
            guessed_type, _junk = mimetypes.guess_type(req.path_info)
            env['CONTENT_TYPE'] = guessed_type or 'application/octet-stream'
        env['swift.content_type_overridden'] = True
        env['CONTENT_TYPE'] += ";swift_bytes=%d" % total_size
        env['HTTP_X_STATIC_LARGE_OBJECT'] = 'True'
        json_data = json.dumps(data_for_storage)
        if six.PY3:
            json_data = json_data.encode('utf-8')
        env['CONTENT_LENGTH'] = str(len(json_data))
        env['wsgi.input'] = BytesIO(json_data)

        slo_put_context = SloPutContext(self, slo_etag)
        return slo_put_context.handle_slo_put(req, start_response)
Exemplo n.º 22
0
    def handle_extract_iter(self,
                            req,
                            compress_type,
                            out_content_type='text/plain'):
        """
        A generator that can be assigned to a swob Response's app_iter which,
        when iterated over, will extract and PUT the objects pulled from the
        request body. Will occasionally yield whitespace while request is being
        processed. When the request is completed will yield a response body
        that can be parsed to determine success. See above documentation for
        details.

        :params req: a swob Request
        :params compress_type: specifying the compression type of the tar.
            Accepts '', 'gz', or 'bz2'
        """
        resp_dict = {
            'Response Status': HTTPCreated().status,
            'Response Body': '',
            'Number Files Created': 0
        }
        failed_files = []
        last_yield = time()
        if out_content_type and out_content_type.endswith('/xml'):
            to_yield = b'<?xml version="1.0" encoding="UTF-8"?>\n'
        else:
            to_yield = b' '
        separator = b''
        containers_accessed = set()
        req.environ['eventlet.minimum_write_chunk_size'] = 0
        try:
            if not out_content_type:
                raise HTTPNotAcceptable(request=req)

            if req.content_length is None and \
                    req.headers.get('transfer-encoding',
                                    '').lower() != 'chunked':
                raise HTTPLengthRequired(request=req)
            try:
                vrs, account, extract_base = req.split_path(2, 3, True)
            except ValueError:
                raise HTTPNotFound(request=req)
            extract_base = extract_base or ''
            extract_base = extract_base.rstrip('/')
            tar = tarfile.open(mode='r|' + compress_type,
                               fileobj=req.body_file)
            failed_response_type = HTTPBadRequest
            containers_created = 0
            while True:
                if last_yield + self.yield_frequency < time():
                    last_yield = time()
                    yield to_yield
                    to_yield, separator = b' ', b'\r\n\r\n'
                tar_info = tar.next()
                if tar_info is None or \
                        len(failed_files) >= self.max_failed_extractions:
                    break
                if tar_info.isfile():
                    obj_path = tar_info.name
                    if not six.PY2:
                        obj_path = obj_path.encode('utf-8', 'surrogateescape')
                    obj_path = bytes_to_wsgi(obj_path)
                    if obj_path.startswith('./'):
                        obj_path = obj_path[2:]
                    obj_path = obj_path.lstrip('/')
                    if extract_base:
                        obj_path = extract_base + '/' + obj_path
                    if '/' not in obj_path:
                        continue  # ignore base level file

                    destination = '/'.join(['', vrs, account, obj_path])
                    container = obj_path.split('/', 1)[0]
                    if not constraints.check_utf8(wsgi_to_str(destination)):
                        failed_files.append([
                            wsgi_quote(obj_path[:self.max_path_length]),
                            HTTPPreconditionFailed().status
                        ])
                        continue
                    if tar_info.size > constraints.MAX_FILE_SIZE:
                        failed_files.append([
                            wsgi_quote(obj_path[:self.max_path_length]),
                            HTTPRequestEntityTooLarge().status
                        ])
                        continue
                    container_failure = None
                    if container not in containers_accessed:
                        cont_path = '/'.join(['', vrs, account, container])
                        try:
                            if self.create_container(req, cont_path):
                                containers_created += 1
                                if containers_created > self.max_containers:
                                    raise HTTPBadRequest(
                                        'More than %d containers to create '
                                        'from tar.' % self.max_containers)
                        except CreateContainerError as err:
                            # the object PUT to this container still may
                            # succeed if acls are set
                            container_failure = [
                                wsgi_quote(cont_path[:self.max_path_length]),
                                err.status
                            ]
                            if err.status_int == HTTP_UNAUTHORIZED:
                                raise HTTPUnauthorized(request=req)
                        except ValueError:
                            failed_files.append([
                                wsgi_quote(obj_path[:self.max_path_length]),
                                HTTPBadRequest().status
                            ])
                            continue

                    tar_file = tar.extractfile(tar_info)
                    create_headers = {
                        'Content-Length': tar_info.size,
                        'X-Auth-Token': req.headers.get('X-Auth-Token'),
                    }

                    # Copy some whitelisted headers to the subrequest
                    for k, v in req.headers.items():
                        if ((k.lower() in ('x-delete-at', 'x-delete-after'))
                                or is_user_meta('object', k)):
                            create_headers[k] = v

                    create_obj_req = make_subrequest(
                        req.environ,
                        method='PUT',
                        path=wsgi_quote(destination),
                        headers=create_headers,
                        agent='%(orig)s BulkExpand',
                        swift_source='EA')
                    create_obj_req.environ['wsgi.input'] = tar_file

                    for pax_key, pax_value in tar_info.pax_headers.items():
                        header_name = pax_key_to_swift_header(pax_key)
                        if header_name:
                            # Both pax_key and pax_value are unicode
                            # strings; the key is already UTF-8 encoded, but
                            # we still have to encode the value.
                            create_obj_req.headers[header_name] = \
                                pax_value.encode("utf-8")

                    resp = create_obj_req.get_response(self.app)
                    containers_accessed.add(container)
                    if resp.is_success:
                        resp_dict['Number Files Created'] += 1
                    else:
                        if container_failure:
                            failed_files.append(container_failure)
                        if resp.status_int == HTTP_UNAUTHORIZED:
                            failed_files.append([
                                wsgi_quote(obj_path[:self.max_path_length]),
                                HTTPUnauthorized().status
                            ])
                            raise HTTPUnauthorized(request=req)
                        if resp.status_int // 100 == 5:
                            failed_response_type = HTTPBadGateway
                        failed_files.append([
                            wsgi_quote(obj_path[:self.max_path_length]),
                            resp.status
                        ])

            if failed_files:
                resp_dict['Response Status'] = failed_response_type().status
            elif not resp_dict['Number Files Created']:
                resp_dict['Response Status'] = HTTPBadRequest().status
                resp_dict['Response Body'] = 'Invalid Tar File: No Valid Files'

        except HTTPException as err:
            resp_dict['Response Status'] = err.status
            resp_dict['Response Body'] = err.body.decode('utf-8')
        except (tarfile.TarError, zlib.error) as tar_error:
            resp_dict['Response Status'] = HTTPBadRequest().status
            resp_dict['Response Body'] = 'Invalid Tar File: %s' % tar_error
        except Exception:
            self.logger.exception('Error in extract archive.')
            resp_dict['Response Status'] = HTTPServerError().status

        yield separator + get_response_body(out_content_type, resp_dict,
                                            failed_files, 'extract')
Exemplo n.º 23
0
    def PUT(self, req):
        """HTTP PUT request handler."""
        if req.if_none_match is not None and '*' not in req.if_none_match:
            # Sending an etag with if-none-match isn't currently supported
            return HTTPBadRequest(request=req,
                                  content_type='text/plain',
                                  body='If-None-Match only supports *')
        container_info = self.container_info(self.account_name,
                                             self.container_name, req)
        policy_index = req.headers.get('X-Backend-Storage-Policy-Index',
                                       container_info['storage_policy'])
        obj_ring = self.app.get_object_ring(policy_index)

        # pass the policy index to storage nodes via req header
        req.headers['X-Backend-Storage-Policy-Index'] = policy_index
        container_partition = container_info['partition']
        containers = container_info['nodes']
        req.acl = container_info['write_acl']
        req.environ['swift_sync_key'] = container_info['sync_key']
        object_versions = container_info['versions']
        if 'swift.authorize' in req.environ:
            aresp = req.environ['swift.authorize'](req)
            if aresp:
                return aresp

        if not containers:
            return HTTPNotFound(request=req)

        # Sometimes the 'content-type' header exists, but is set to None.
        content_type_manually_set = True
        detect_content_type = \
            config_true_value(req.headers.get('x-detect-content-type'))
        if detect_content_type or not req.headers.get('content-type'):
            guessed_type, _junk = mimetypes.guess_type(req.path_info)
            req.headers['Content-Type'] = guessed_type or \
                'application/octet-stream'
            if detect_content_type:
                req.headers.pop('x-detect-content-type')
            else:
                content_type_manually_set = False

        error_response = check_object_creation(req, self.object_name) or \
            check_content_type(req)
        if error_response:
            return error_response

        partition, nodes = obj_ring.get_nodes(self.account_name,
                                              self.container_name,
                                              self.object_name)
        ####################################  CHANGED_CODE  ############################################################
        # Change the nodes list to contain only one dictionary item instead of the original 3 returned by the ring.
        d = dict()
        # d[partition] = nodes[1:]
        # f.write(str(d)+"\n")
        # f.close()
        print("===Original Nodes===")
        print(nodes)
        temp_nodes = []
        flag = 0
        f = open("/home/hduser/swift/swift/proxy/controllers/spindowndevices",
                 "r")
        sdlist = f.read().split("\n")
        print("===Spun down devices===:", sdlist)
        f.close()

        upnodes = [item for item in nodes if item['device'] not in sdlist]
        downnodes = [item for item in nodes if item['device'] in sdlist]
        temp_nodes = upnodes
        if (len(downnodes) > 0):
            d = ast.literal_eval(
                open("/home/hduser/swift/swift/proxy/controllers/nodes.txt",
                     "r").read())
            # d_temp=pickle.load("/home/hduser/swift/proxy/controllers/nodes.p","rb")
            # print("===Current dict===:",d)
            for item in downnodes:
                if (partition in d):
                    d[partition].append(item)
                    # print("===Modified dict===:",d)
                else:
                    d[partition] = [item]
                    # print("===Modified dict===:",d)
        # pickle.dump(d,open("/home/hduser/nodes.p","wb"))
        # print("Before writing:",d)
        fo = open("/home/hduser/swift/swift/proxy/controllers/nodes.txt", "w")
        fo.write(str(d) + "\n")
        fo.close()
        # pickle.dump(d,open("/home/hduser/swift/swift/proxy/controllers/nodes.p","wb"))
        ## Old method, IGNORE
        # for item in nodes:
        #     device = item['device']
        #     if(device not in sdlist):
        #     # if(os.path.ismount("path"))
        #         temp_nodes.append(item)
        #         flag = 1
        #         break
        #     else:
        #         pickle.dump(d,open("/home/hduser/nodes.p","wb"))
        #         # d = pickle.load(open("/home/hduser/nodes.p","rb"))
        #         import ast
        #         d = ast.literal_eval(open("/home/hduser/nodes.txt","r").read())
        #         print("===Current dict===:",d)
        #         if(partition in d):
        #             print("In IF")
        #             d[partition].append(item)
        #             print("===Modified dict===:",d)
        #         else:
        #             print("In ELSE")
        #             d[partition] = [item]
        #             print("===Modified dict===:",d)
        #         pickle.dump(d,open("/home/hduser/nodes.p","wb"))
        #         fo = open("/home/hduser/nodes.txt","w")
        #         fo.write(str(d)+"\n")

        # Code to spin up a device if none are running already.
        if (len(upnodes) == 0):
            dev = nodes[0]['device']
            print("===ALL NODES DOWN===")
            print("===Mounting device===", dev)
            os.system("mount /dev/" + str(dev))

        print('===In controller PUT===:')
        print("===Partition===", partition)
        nodes = temp_nodes

        print('===In controller PUT===:')
        print("===Partition===", partition)
        nodes = temp_nodes
        print("===Nodes===:", nodes)

        check_ssd()
        ############################################  CHANGED_CODE  ########################################################

        # do a HEAD request for checking object versions
        if object_versions and not req.environ.get('swift_versioned_copy'):
            # make sure proxy-server uses the right policy index
            _headers = {
                'X-Backend-Storage-Policy-Index': policy_index,
                'X-Newest': 'True'
            }
            hreq = Request.blank(req.path_info,
                                 headers=_headers,
                                 environ={'REQUEST_METHOD': 'HEAD'})
            hresp = self.GETorHEAD_base(hreq, _('Object'), obj_ring, partition,
                                        hreq.swift_entity_path)

        # Used by container sync feature
        if 'x-timestamp' in req.headers:
            try:
                req_timestamp = Timestamp(req.headers['X-Timestamp'])
            except ValueError:
                return HTTPBadRequest(
                    request=req,
                    content_type='text/plain',
                    body='X-Timestamp should be a UNIX timestamp float value; '
                    'was %r' % req.headers['x-timestamp'])
            req.headers['X-Timestamp'] = req_timestamp.internal
        else:
            req.headers['X-Timestamp'] = Timestamp(time.time()).internal

        if object_versions and not req.environ.get('swift_versioned_copy'):
            is_manifest = 'X-Object-Manifest' in req.headers or \
                          'X-Object-Manifest' in hresp.headers
            if hresp.status_int != HTTP_NOT_FOUND and not is_manifest:
                # This is a version manifest and needs to be handled
                # differently. First copy the existing data to a new object,
                # then write the data from this request to the version manifest
                # object.
                lcontainer = object_versions.split('/')[0]
                prefix_len = '%03x' % len(self.object_name)
                lprefix = prefix_len + self.object_name + '/'
                ts_source = hresp.environ.get('swift_x_timestamp')
                if ts_source is None:
                    ts_source = time.mktime(
                        time.strptime(hresp.headers['last-modified'],
                                      '%a, %d %b %Y %H:%M:%S GMT'))
                new_ts = Timestamp(ts_source).internal
                vers_obj_name = lprefix + new_ts
                copy_headers = {
                    'Destination': '%s/%s' % (lcontainer, vers_obj_name)
                }
                copy_environ = {
                    'REQUEST_METHOD': 'COPY',
                    'swift_versioned_copy': True
                }
                copy_req = Request.blank(req.path_info,
                                         headers=copy_headers,
                                         environ=copy_environ)
                copy_resp = self.COPY(copy_req)
                if is_client_error(copy_resp.status_int):
                    # missing container or bad permissions
                    return HTTPPreconditionFailed(request=req)
                elif not is_success(copy_resp.status_int):
                    # could not copy the data, bail
                    return HTTPServiceUnavailable(request=req)

        reader = req.environ['wsgi.input'].read
        data_source = iter(lambda: reader(self.app.client_chunk_size), '')
        source_header = req.headers.get('X-Copy-From')
        source_resp = None
        if source_header:
            if req.environ.get('swift.orig_req_method', req.method) != 'POST':
                req.environ.setdefault('swift.log_info', []).append(
                    'x-copy-from:%s' % source_header)
            ver, acct, _rest = req.split_path(2, 3, True)
            src_account_name = req.headers.get('X-Copy-From-Account', None)
            if src_account_name:
                src_account_name = check_account_format(req, src_account_name)
            else:
                src_account_name = acct
            src_container_name, src_obj_name = check_copy_from_header(req)
            source_header = '/%s/%s/%s/%s' % (ver, src_account_name,
                                              src_container_name, src_obj_name)
            source_req = req.copy_get()

            # make sure the source request uses it's container_info
            source_req.headers.pop('X-Backend-Storage-Policy-Index', None)
            source_req.path_info = source_header
            source_req.headers['X-Newest'] = 'true'
            orig_obj_name = self.object_name
            orig_container_name = self.container_name
            orig_account_name = self.account_name
            self.object_name = src_obj_name
            self.container_name = src_container_name
            self.account_name = src_account_name
            sink_req = Request.blank(req.path_info,
                                     environ=req.environ,
                                     headers=req.headers)
            source_resp = self.GET(source_req)

            # This gives middlewares a way to change the source; for example,
            # this lets you COPY a SLO manifest and have the new object be the
            # concatenation of the segments (like what a GET request gives
            # the client), not a copy of the manifest file.
            hook = req.environ.get(
                'swift.copy_hook',
                (lambda source_req, source_resp, sink_req: source_resp))
            source_resp = hook(source_req, source_resp, sink_req)

            if source_resp.status_int >= HTTP_MULTIPLE_CHOICES:
                return source_resp
            self.object_name = orig_obj_name
            self.container_name = orig_container_name
            self.account_name = orig_account_name
            data_source = iter(source_resp.app_iter)
            sink_req.content_length = source_resp.content_length
            if sink_req.content_length is None:
                # This indicates a transfer-encoding: chunked source object,
                # which currently only happens because there are more than
                # CONTAINER_LISTING_LIMIT segments in a segmented object. In
                # this case, we're going to refuse to do the server-side copy.
                return HTTPRequestEntityTooLarge(request=req)
            if sink_req.content_length > constraints.MAX_FILE_SIZE:
                return HTTPRequestEntityTooLarge(request=req)
            sink_req.etag = source_resp.etag

            # we no longer need the X-Copy-From header
            del sink_req.headers['X-Copy-From']
            if 'X-Copy-From-Account' in sink_req.headers:
                del sink_req.headers['X-Copy-From-Account']
            if not content_type_manually_set:
                sink_req.headers['Content-Type'] = \
                    source_resp.headers['Content-Type']
            if config_true_value(
                    sink_req.headers.get('x-fresh-metadata', 'false')):
                # post-as-copy: ignore new sysmeta, copy existing sysmeta
                condition = lambda k: is_sys_meta('object', k)
                remove_items(sink_req.headers, condition)
                copy_header_subset(source_resp, sink_req, condition)
            else:
                # copy/update existing sysmeta and user meta
                copy_headers_into(source_resp, sink_req)
                copy_headers_into(req, sink_req)

            # copy over x-static-large-object for POSTs and manifest copies
            if 'X-Static-Large-Object' in source_resp.headers and \
                    req.params.get('multipart-manifest') == 'get':
                sink_req.headers['X-Static-Large-Object'] = \
                    source_resp.headers['X-Static-Large-Object']

            req = sink_req

        req, delete_at_container, delete_at_part, \
            delete_at_nodes = self._config_obj_expiration(req)

        node_iter = GreenthreadSafeIterator(
            self.iter_nodes_local_first(obj_ring, partition))
        pile = GreenPile(len(nodes))
        te = req.headers.get('transfer-encoding', '')
        chunked = ('chunked' in te)

        outgoing_headers = self._backend_requests(
            req, len(nodes), container_partition, containers,
            delete_at_container, delete_at_part, delete_at_nodes)

        for nheaders in outgoing_headers:
            # RFC2616:8.2.3 disallows 100-continue without a body
            if (req.content_length > 0) or chunked:
                nheaders['Expect'] = '100-continue'

#################################  CHANGED_CODE  ###################################################################
# Replaced node_iter by nodes in the following line to make sure that a new list with different order isnt used.
# Change from node_iter to nodes to make sure it writes to the same device.
# Without this, it gets a new list of nodes from the ring in a different order and connects to the first one.

            pile.spawn(self._connect_put_node, nodes, partition,
                       req.swift_entity_path, nheaders,
                       self.app.logger.thread_locals)

#################################  CHANGED_CODE ###################################################################

        conns = [conn for conn in pile if conn]
        min_conns = quorum_size(len(nodes))

        if req.if_none_match is not None and '*' in req.if_none_match:
            statuses = [conn.resp.status for conn in conns if conn.resp]
            if HTTP_PRECONDITION_FAILED in statuses:
                # If we find any copy of the file, it shouldn't be uploaded
                self.app.logger.debug(
                    _('Object PUT returning 412, %(statuses)r'),
                    {'statuses': statuses})
                return HTTPPreconditionFailed(request=req)

        if any(conn for conn in conns
               if conn.resp and conn.resp.status == HTTP_CONFLICT):
            timestamps = [
                HeaderKeyDict(
                    conn.resp.getheaders()).get('X-Backend-Timestamp')
                for conn in conns if conn.resp
            ]
            self.app.logger.debug(
                _('Object PUT returning 202 for 409: '
                  '%(req_timestamp)s <= %(timestamps)r'), {
                      'req_timestamp': req.timestamp.internal,
                      'timestamps': ', '.join(timestamps)
                  })
            return HTTPAccepted(request=req)

        if len(conns) < min_conns:
            self.app.logger.error(
                _('Object PUT returning 503, %(conns)s/%(nodes)s '
                  'required connections'), {
                      'conns': len(conns),
                      'nodes': min_conns
                  })
            return HTTPServiceUnavailable(request=req)
        bytes_transferred = 0
        try:
            with ContextPool(len(nodes)) as pool:
                for conn in conns:
                    conn.failed = False
                    conn.queue = Queue(self.app.put_queue_depth)
                    pool.spawn(self._send_file, conn, req.path)
                while True:
                    with ChunkReadTimeout(self.app.client_timeout):
                        try:
                            chunk = next(data_source)
                        except StopIteration:
                            if chunked:
                                for conn in conns:
                                    conn.queue.put('0\r\n\r\n')
                            break
                    bytes_transferred += len(chunk)
                    if bytes_transferred > constraints.MAX_FILE_SIZE:
                        return HTTPRequestEntityTooLarge(request=req)
                    for conn in list(conns):
                        if not conn.failed:
                            conn.queue.put('%x\r\n%s\r\n' %
                                           (len(chunk),
                                            chunk) if chunked else chunk)
                        else:
                            conns.remove(conn)
                    if len(conns) < min_conns:
                        self.app.logger.error(
                            _('Object PUT exceptions during'
                              ' send, %(conns)s/%(nodes)s required connections'
                              ), {
                                  'conns': len(conns),
                                  'nodes': min_conns
                              })
                        return HTTPServiceUnavailable(request=req)
                for conn in conns:
                    if conn.queue.unfinished_tasks:
                        conn.queue.join()
            conns = [conn for conn in conns if not conn.failed]
        except ChunkReadTimeout as err:
            self.app.logger.warn(_('ERROR Client read timeout (%ss)'),
                                 err.seconds)
            self.app.logger.increment('client_timeouts')
            return HTTPRequestTimeout(request=req)
        except (Exception, Timeout):
            self.app.logger.exception(
                _('ERROR Exception causing client disconnect'))
            return HTTPClientDisconnect(request=req)
        if req.content_length and bytes_transferred < req.content_length:
            req.client_disconnect = True
            self.app.logger.warn(
                _('Client disconnected without sending enough data'))
            self.app.logger.increment('client_disconnects')
            return HTTPClientDisconnect(request=req)

        statuses, reasons, bodies, etags = self._get_put_responses(
            req, conns, nodes)

        if len(etags) > 1:
            self.app.logger.error(
                _('Object servers returned %s mismatched etags'), len(etags))
            return HTTPServerError(request=req)
        etag = etags.pop() if len(etags) else None
        resp = self.best_response(req,
                                  statuses,
                                  reasons,
                                  bodies,
                                  _('Object PUT'),
                                  etag=etag)
        if source_header:
            acct, path = source_header.split('/', 3)[2:4]
            resp.headers['X-Copied-From-Account'] = quote(acct)
            resp.headers['X-Copied-From'] = quote(path)
            if 'last-modified' in source_resp.headers:
                resp.headers['X-Copied-From-Last-Modified'] = \
                    source_resp.headers['last-modified']
            copy_headers_into(req, resp)
        resp.last_modified = math.ceil(
            float(Timestamp(req.headers['X-Timestamp'])))
        return resp
    def __call__(self, request):

        if request.method not in ("POST", "PUT"):
            return self.app

        try:
            ver, account, container, obj = request.split_path(
                2, 4, rest_with_last=True)
        except ValueError:
            return self.app

        if not container:
            # account request, so we pay attention to the quotas
            new_quota = request.headers.get(
                'X-Account-Meta-Quota-Bytes')
            remove_quota = request.headers.get(
                'X-Remove-Account-Meta-Quota-Bytes')
        else:
            # container or object request; even if the quota headers are set
            # in the request, they're meaningless
            new_quota = remove_quota = None

        if remove_quota:
            new_quota = 0    # X-Remove dominates if both are present

        if request.environ.get('reseller_request') is True:
            if new_quota and not new_quota.isdigit():
                return HTTPBadRequest()
            return self.app

        # deny quota set for non-reseller
        if new_quota is not None:
            return HTTPForbidden()

        if request.method == "POST" or not obj:
            return self.app

        content_length = (request.content_length or 0)

        account_info = get_account_info(request.environ, self.app)
        if not account_info or not account_info['bytes']:
            return self.app
        try:
            quota = int(account_info['meta'].get('quota-bytes', -1))
        except ValueError:
            return self.app
        if quota < 0:
            return self.app

        new_size = int(account_info['bytes']) + content_length
        if quota < new_size:
            resp = HTTPRequestEntityTooLarge(body='Upload exceeds quota.')
            if 'swift.authorize' in request.environ:
                orig_authorize = request.environ['swift.authorize']

                def reject_authorize(*args, **kwargs):
                    aresp = orig_authorize(*args, **kwargs)
                    if aresp:
                        return aresp
                    return resp
                request.environ['swift.authorize'] = reject_authorize
            else:
                return resp

        return self.app
Exemplo n.º 25
0
    def handle_extract_iter(self,
                            req,
                            compress_type,
                            out_content_type='text/plain'):
        """
        A generator that can be assigned to a swob Response's app_iter which,
        when iterated over, will extract and PUT the objects pulled from the
        request body. Will occasionally yield whitespace while request is being
        processed. When the request is completed will yield a response body
        that can be parsed to determine success. See above documentation for
        details.

        :params req: a swob Request
        :params compress_type: specifying the compression type of the tar.
            Accepts '', 'gz', or 'bz2'
        """
        resp_dict = {
            'Response Status': HTTPCreated().status,
            'Response Body': '',
            'Number Files Created': 0
        }
        failed_files = []
        last_yield = time()
        separator = ''
        containers_accessed = set()
        try:
            if not out_content_type:
                raise HTTPNotAcceptable(request=req)
            if out_content_type.endswith('/xml'):
                yield '<?xml version="1.0" encoding="UTF-8"?>\n'

            if req.content_length is None and \
                    req.headers.get('transfer-encoding',
                                    '').lower() != 'chunked':
                raise HTTPLengthRequired(request=req)
            try:
                vrs, account, extract_base = req.split_path(2, 3, True)
            except ValueError:
                raise HTTPNotFound(request=req)
            extract_base = extract_base or ''
            extract_base = extract_base.rstrip('/')
            tar = tarfile.open(mode='r|' + compress_type,
                               fileobj=req.body_file)
            failed_response_type = HTTPBadRequest
            req.environ['eventlet.minimum_write_chunk_size'] = 0
            containers_created = 0
            while True:
                if last_yield + self.yield_frequency < time():
                    separator = '\r\n\r\n'
                    last_yield = time()
                    yield ' '
                tar_info = tar.next()
                if tar_info is None or \
                        len(failed_files) >= self.max_failed_extractions:
                    break
                if tar_info.isfile():
                    obj_path = tar_info.name
                    if obj_path.startswith('./'):
                        obj_path = obj_path[2:]
                    obj_path = obj_path.lstrip('/')
                    if extract_base:
                        obj_path = extract_base + '/' + obj_path
                    if '/' not in obj_path:
                        continue  # ignore base level file

                    destination = '/'.join(['', vrs, account, obj_path])
                    container = obj_path.split('/', 1)[0]
                    if not check_utf8(destination):
                        failed_files.append([
                            quote(obj_path[:MAX_PATH_LENGTH]),
                            HTTPPreconditionFailed().status
                        ])
                        continue
                    if tar_info.size > MAX_FILE_SIZE:
                        failed_files.append([
                            quote(obj_path[:MAX_PATH_LENGTH]),
                            HTTPRequestEntityTooLarge().status
                        ])
                        continue
                    container_failure = None
                    if container not in containers_accessed:
                        cont_path = '/'.join(['', vrs, account, container])
                        try:
                            if self.create_container(req, cont_path):
                                containers_created += 1
                                if containers_created > self.max_containers:
                                    raise HTTPBadRequest(
                                        'More than %d containers to create '
                                        'from tar.' % self.max_containers)
                        except CreateContainerError, err:
                            # the object PUT to this container still may
                            # succeed if acls are set
                            container_failure = [
                                quote(cont_path[:MAX_PATH_LENGTH]), err.status
                            ]
                            if err.status_int == HTTP_UNAUTHORIZED:
                                raise HTTPUnauthorized(request=req)
                        except ValueError:
                            failed_files.append([
                                quote(obj_path[:MAX_PATH_LENGTH]),
                                HTTPBadRequest().status
                            ])
                            continue
Exemplo n.º 26
0
    def _handle_copy_request(self, req):
        """
        This method handles copying objects based on values set in the headers
        'X-Copy-From' and 'X-Copy-From-Account'

        This method was added as part of the refactoring of the PUT method and
        the functionality is expected to be moved to middleware
        """
        if req.environ.get('swift.orig_req_method', req.method) != 'POST':
            req.environ.setdefault('swift.log_info', []).append(
                'x-copy-from:%s' % req.headers['X-Copy-From'])
        ver, acct, _rest = req.split_path(2, 3, True)
        src_account_name = req.headers.get('X-Copy-From-Account', None)
        if src_account_name:
            src_account_name = check_account_format(req, src_account_name)
        else:
            src_account_name = acct
        src_container_name, src_obj_name = check_copy_from_header(req)
        source_header = '/%s/%s/%s/%s' % (ver, src_account_name,
                                          src_container_name, src_obj_name)
        source_req = req.copy_get()

        # make sure the source request uses it's container_info
        source_req.headers.pop('X-Backend-Storage-Policy-Index', None)
        source_req.path_info = source_header
        source_req.headers['X-Newest'] = 'true'

        orig_obj_name = self.object_name
        orig_container_name = self.container_name
        orig_account_name = self.account_name
        sink_req = Request.blank(req.path_info,
                                 environ=req.environ,
                                 headers=req.headers)

        self.object_name = src_obj_name
        self.container_name = src_container_name
        self.account_name = src_account_name
        source_resp = self.GET(source_req)

        # This gives middlewares a way to change the source; for example,
        # this lets you COPY a SLO manifest and have the new object be the
        # concatenation of the segments (like what a GET request gives
        # the client), not a copy of the manifest file.
        hook = req.environ.get(
            'swift.copy_hook',
            (lambda source_req, source_resp, sink_req: source_resp))
        source_resp = hook(source_req, source_resp, sink_req)

        # reset names
        self.object_name = orig_obj_name
        self.container_name = orig_container_name
        self.account_name = orig_account_name

        if source_resp.status_int >= HTTP_MULTIPLE_CHOICES:
            # this is a bit of ugly code, but I'm willing to live with it
            # until copy request handling moves to middleware
            return source_resp, None, None, None
        if source_resp.content_length is None:
            # This indicates a transfer-encoding: chunked source object,
            # which currently only happens because there are more than
            # CONTAINER_LISTING_LIMIT segments in a segmented object. In
            # this case, we're going to refuse to do the server-side copy.
            raise HTTPRequestEntityTooLarge(request=req)
        if source_resp.content_length > constraints.MAX_FILE_SIZE:
            raise HTTPRequestEntityTooLarge(request=req)

        data_source = iter(source_resp.app_iter)
        sink_req.content_length = source_resp.content_length
        sink_req.etag = source_resp.etag

        # we no longer need the X-Copy-From header
        del sink_req.headers['X-Copy-From']
        if 'X-Copy-From-Account' in sink_req.headers:
            del sink_req.headers['X-Copy-From-Account']
        if not req.content_type_manually_set:
            sink_req.headers['Content-Type'] = \
                source_resp.headers['Content-Type']
        if config_true_value(sink_req.headers.get('x-fresh-metadata',
                                                  'false')):
            # post-as-copy: ignore new sysmeta, copy existing sysmeta
            condition = lambda k: is_sys_meta('object', k)
            remove_items(sink_req.headers, condition)
            copy_header_subset(source_resp, sink_req, condition)
        else:
            # copy/update existing sysmeta and user meta
            copy_headers_into(source_resp, sink_req)
            copy_headers_into(req, sink_req)

        # copy over x-static-large-object for POSTs and manifest copies
        if 'X-Static-Large-Object' in source_resp.headers and \
                req.params.get('multipart-manifest') == 'get':
            sink_req.headers['X-Static-Large-Object'] = \
                source_resp.headers['X-Static-Large-Object']

        req = sink_req

        def update_response(req, resp):
            acct, path = source_resp.environ['PATH_INFO'].split('/', 3)[2:4]
            resp.headers['X-Copied-From-Account'] = quote(acct)
            resp.headers['X-Copied-From'] = quote(path)
            if 'last-modified' in source_resp.headers:
                resp.headers['X-Copied-From-Last-Modified'] = \
                    source_resp.headers['last-modified']
            copy_headers_into(req, resp)
            return resp

        # this is a bit of ugly code, but I'm willing to live with it
        # until copy request handling moves to middleware
        return None, req, data_source, update_response
Exemplo n.º 27
0
    def handle_extract(self, req, compress_type):
        """
        :params req: a swob Request
        :params compress_type: specifying the compression type of the tar.
                               Accepts '', 'gz, or 'bz2'
        :raises HTTPException: on unhandled errors
        :returns: a swob response to request
        """
        success_count = 0
        failed_files = []
        existing_containers = set()
        out_content_type = req.accept.best_match(ACCEPTABLE_FORMATS)
        if not out_content_type:
            return HTTPNotAcceptable(request=req)
        if req.content_length is None and \
                req.headers.get('transfer-encoding', '').lower() != 'chunked':
            return HTTPLengthRequired(request=req)
        try:
            vrs, account, extract_base = req.split_path(2, 3, True)
        except ValueError:
            return HTTPNotFound(request=req)
        extract_base = extract_base or ''
        extract_base = extract_base.rstrip('/')
        try:
            tar = tarfile.open(mode='r|' + compress_type,
                               fileobj=req.body_file)
            while True:
                tar_info = tar.next()
                if tar_info is None or \
                        len(failed_files) >= self.max_failed_extractions:
                    break
                if tar_info.isfile():
                    obj_path = tar_info.name
                    if obj_path.startswith('./'):
                        obj_path = obj_path[2:]
                    obj_path = obj_path.lstrip('/')
                    if extract_base:
                        obj_path = extract_base + '/' + obj_path
                    if '/' not in obj_path:
                        continue  # ignore base level file

                    destination = '/'.join(['', vrs, account, obj_path])
                    container = obj_path.split('/', 1)[0]
                    if not check_utf8(destination):
                        failed_files.append([
                            quote(destination[:MAX_PATH_LENGTH]),
                            HTTPPreconditionFailed().status
                        ])
                        continue
                    if tar_info.size > MAX_FILE_SIZE:
                        failed_files.append([
                            quote(destination[:MAX_PATH_LENGTH]),
                            HTTPRequestEntityTooLarge().status
                        ])
                        continue
                    if container not in existing_containers:
                        try:
                            self.create_container(
                                req, '/'.join(['', vrs, account, container]))
                            existing_containers.add(container)
                        except CreateContainerError, err:
                            if err.status_int == HTTP_UNAUTHORIZED:
                                return HTTPUnauthorized(request=req)
                            failed_files.append([
                                quote(destination[:MAX_PATH_LENGTH]),
                                err.status
                            ])
                            continue
                        except ValueError:
                            failed_files.append([
                                quote(destination[:MAX_PATH_LENGTH]),
                                HTTP_BAD_REQUEST
                            ])
                            continue
                        if len(existing_containers) > self.max_containers:
                            return HTTPBadRequest(
                                'More than %d base level containers in tar.' %
                                self.max_containers)
Exemplo n.º 28
0
    def handle_multipart_put(self, req, start_response):
        """
        Will handle the PUT of a SLO manifest.
        Heads every object in manifest to check if is valid and if so will
        save a manifest generated from the user input. Uses WSGIContext to
        call self and start_response and returns a WSGI iterator.

        :params req: a swob.Request with an obj in path
        :raises: HttpException on errors
        """
        try:
            vrs, account, container, obj = req.split_path(1, 4, True)
        except ValueError:
            return self.app(req.environ, start_response)
        if req.content_length > self.max_manifest_size:
            raise HTTPRequestEntityTooLarge("Manifest File > %d bytes" %
                                            self.max_manifest_size)
        if req.headers.get('X-Copy-From'):
            raise HTTPMethodNotAllowed(
                'Multipart Manifest PUTs cannot be COPY requests')
        if req.content_length is None and \
                req.headers.get('transfer-encoding', '').lower() != 'chunked':
            raise HTTPLengthRequired(request=req)
        parsed_data = parse_input(req.body_file.read(self.max_manifest_size))
        problem_segments = []

        if len(parsed_data) > self.max_manifest_segments:
            raise HTTPRequestEntityTooLarge(
                'Number of segments must be <= %d' %
                self.max_manifest_segments)
        total_size = 0
        out_content_type = req.accept.best_match(ACCEPTABLE_FORMATS)
        if not out_content_type:
            out_content_type = 'text/plain'
        data_for_storage = []
        slo_etag = md5()
        last_obj_path = None
        for index, seg_dict in enumerate(parsed_data):
            obj_name = seg_dict['path']
            if isinstance(obj_name, unicode):
                obj_name = obj_name.encode('utf-8')
            obj_path = '/'.join(['', vrs, account, obj_name.lstrip('/')])
            if req.path == quote(obj_path):
                raise HTTPConflict('Manifest object name "%s" '
                                   'cannot be included in the manifest' %
                                   obj_name)
            try:
                seg_size = int(seg_dict['size_bytes'])
            except (ValueError, TypeError):
                if seg_dict['size_bytes'] is None:
                    seg_size = None
                else:
                    raise HTTPBadRequest('Invalid Manifest File')
            if seg_size is not None and seg_size < self.min_segment_size and \
                    index < len(parsed_data) - 1:
                raise HTTPBadRequest(
                    'Each segment, except the last, must be at least '
                    '%d bytes.' % self.min_segment_size)

            new_env = req.environ.copy()
            new_env['PATH_INFO'] = obj_path
            new_env['REQUEST_METHOD'] = 'HEAD'
            new_env['swift.source'] = 'SLO'
            del (new_env['wsgi.input'])
            del (new_env['QUERY_STRING'])
            new_env['CONTENT_LENGTH'] = 0
            new_env['HTTP_USER_AGENT'] = \
                '%s MultipartPUT' % req.environ.get('HTTP_USER_AGENT')
            if obj_path != last_obj_path:
                last_obj_path = obj_path
                head_seg_resp = \
                    Request.blank(obj_path, new_env).get_response(self)

            if head_seg_resp.is_success:
                segment_length = head_seg_resp.content_length
                if seg_dict.get('range'):
                    # Since we now know the length, we can normalize the ranges
                    ranges = seg_dict['range'].ranges_for_length(
                        head_seg_resp.content_length)

                    if not ranges:
                        problem_segments.append(
                            [quote(obj_name), 'Unsatisfiable Range'])
                    elif len(ranges) > 1:
                        problem_segments.append(
                            [quote(obj_name), 'Multiple Ranges'])
                    elif ranges == [(0, head_seg_resp.content_length)]:
                        # Just one range, and it exactly matches the object.
                        # Why'd we do this again?
                        seg_dict['range'] = None
                        segment_length = head_seg_resp.content_length
                    else:
                        range = ranges[0]
                        seg_dict['range'] = '%d-%d' % (range[0], range[1] - 1)
                        segment_length = range[1] - range[0]

                if segment_length < self.min_segment_size and \
                        index < len(parsed_data) - 1:
                    raise HTTPBadRequest(
                        'Each segment, except the last, must be at least '
                        '%d bytes.' % self.min_segment_size)
                total_size += segment_length
                if seg_size is not None and \
                        seg_size != head_seg_resp.content_length:
                    problem_segments.append([quote(obj_name), 'Size Mismatch'])
                if seg_dict['etag'] is None or \
                        seg_dict['etag'] == head_seg_resp.etag:
                    if seg_dict.get('range'):
                        slo_etag.update(
                            '%s:%s;' % (head_seg_resp.etag, seg_dict['range']))
                    else:
                        slo_etag.update(head_seg_resp.etag)
                else:
                    problem_segments.append([quote(obj_name), 'Etag Mismatch'])
                if head_seg_resp.last_modified:
                    last_modified = head_seg_resp.last_modified
                else:
                    # shouldn't happen
                    last_modified = datetime.now()

                last_modified_formatted = \
                    last_modified.strftime('%Y-%m-%dT%H:%M:%S.%f')
                seg_data = {
                    'name': '/' + seg_dict['path'].lstrip('/'),
                    'bytes': head_seg_resp.content_length,
                    'hash': head_seg_resp.etag,
                    'content_type': head_seg_resp.content_type,
                    'last_modified': last_modified_formatted
                }
                if seg_dict.get('range'):
                    seg_data['range'] = seg_dict['range']

                if config_true_value(
                        head_seg_resp.headers.get('X-Static-Large-Object')):
                    seg_data['sub_slo'] = True
                data_for_storage.append(seg_data)

            else:
                problem_segments.append(
                    [quote(obj_name), head_seg_resp.status])
        if problem_segments:
            resp_body = get_response_body(out_content_type, {},
                                          problem_segments)
            raise HTTPBadRequest(resp_body, content_type=out_content_type)
        env = req.environ

        if not env.get('CONTENT_TYPE'):
            guessed_type, _junk = mimetypes.guess_type(req.path_info)
            env['CONTENT_TYPE'] = guessed_type or 'application/octet-stream'
        env['swift.content_type_overridden'] = True
        env['CONTENT_TYPE'] += ";swift_bytes=%d" % total_size
        env['HTTP_X_STATIC_LARGE_OBJECT'] = 'True'
        json_data = json.dumps(data_for_storage)
        if six.PY3:
            json_data = json_data.encode('utf-8')
        env['CONTENT_LENGTH'] = str(len(json_data))
        env['wsgi.input'] = BytesIO(json_data)

        slo_put_context = SloPutContext(self, slo_etag)
        return slo_put_context.handle_slo_put(req, start_response)
Exemplo n.º 29
0
    def handle_extract_iter(self, req, compress_type,
                            out_content_type='text/plain'):
        """
        A generator that can be assigned to a swob Response's app_iter which,
        when iterated over, will extract and PUT the objects pulled from the
        request body. Will occasionally yield whitespace while request is being
        processed. When the request is completed will yield a response body
        that can be parsed to determine success. See above documentation for
        details.

        :params req: a swob Request
        :params compress_type: specifying the compression type of the tar.
            Accepts '', 'gz', or 'bz2'
        """
        resp_dict = {'Response Status': HTTPCreated().status,
                     'Response Body': '', 'Number Files Created': 0}
        failed_files = []
        last_yield = time()
        separator = ''
        containers_accessed = set()
        try:
            if not out_content_type:
                raise HTTPNotAcceptable(request=req)
            if out_content_type.endswith('/xml'):
                yield '<?xml version="1.0" encoding="UTF-8"?>\n'

            if req.content_length is None and \
                    req.headers.get('transfer-encoding',
                                    '').lower() != 'chunked':
                raise HTTPLengthRequired(request=req)
            try:
                vrs, account, extract_base = req.split_path(2, 3, True)
            except ValueError:
                raise HTTPNotFound(request=req)
            extract_base = extract_base or ''
            extract_base = extract_base.rstrip('/')
            tar = tarfile.open(mode='r|' + compress_type,
                               fileobj=req.body_file)
            failed_response_type = HTTPBadRequest
            req.environ['eventlet.minimum_write_chunk_size'] = 0
            containers_created = 0
            while True:
                if last_yield + self.yield_frequency < time():
                    separator = '\r\n\r\n'
                    last_yield = time()
                    yield ' '
                tar_info = tar.next()
                if tar_info is None or \
                        len(failed_files) >= self.max_failed_extractions:
                    break
                if tar_info.isfile():
                    obj_path = tar_info.name
                    if obj_path.startswith('./'):
                        obj_path = obj_path[2:]
                    obj_path = obj_path.lstrip('/')
                    if extract_base:
                        obj_path = extract_base + '/' + obj_path
                    if '/' not in obj_path:
                        continue  # ignore base level file

                    destination = '/'.join(
                        ['', vrs, account, obj_path])
                    container = obj_path.split('/', 1)[0]
                    if not check_utf8(destination):
                        failed_files.append(
                            [quote(obj_path[:MAX_PATH_LENGTH]),
                             HTTPPreconditionFailed().status])
                        continue
                    if tar_info.size > MAX_FILE_SIZE:
                        failed_files.append([
                            quote(obj_path[:MAX_PATH_LENGTH]),
                            HTTPRequestEntityTooLarge().status])
                        continue
                    container_failure = None
                    if container not in containers_accessed:
                        cont_path = '/'.join(['', vrs, account, container])
                        try:
                            if self.create_container(req, cont_path):
                                containers_created += 1
                                if containers_created > self.max_containers:
                                    raise HTTPBadRequest(
                                        'More than %d containers to create '
                                        'from tar.' % self.max_containers)
                        except CreateContainerError as err:
                            # the object PUT to this container still may
                            # succeed if acls are set
                            container_failure = [
                                quote(cont_path[:MAX_PATH_LENGTH]),
                                err.status]
                            if err.status_int == HTTP_UNAUTHORIZED:
                                raise HTTPUnauthorized(request=req)
                        except ValueError:
                            failed_files.append([
                                quote(obj_path[:MAX_PATH_LENGTH]),
                                HTTPBadRequest().status])
                            continue

                    tar_file = tar.extractfile(tar_info)
                    new_env = req.environ.copy()
                    new_env['REQUEST_METHOD'] = 'PUT'
                    new_env['wsgi.input'] = tar_file
                    new_env['PATH_INFO'] = destination
                    new_env['CONTENT_LENGTH'] = tar_info.size
                    new_env['swift.source'] = 'EA'
                    new_env['HTTP_USER_AGENT'] = \
                        '%s BulkExpand' % req.environ.get('HTTP_USER_AGENT')
                    create_obj_req = Request.blank(destination, new_env)
                    resp = create_obj_req.get_response(self.app)
                    containers_accessed.add(container)
                    if resp.is_success:
                        resp_dict['Number Files Created'] += 1
                    else:
                        if container_failure:
                            failed_files.append(container_failure)
                        if resp.status_int == HTTP_UNAUTHORIZED:
                            failed_files.append([
                                quote(obj_path[:MAX_PATH_LENGTH]),
                                HTTPUnauthorized().status])
                            raise HTTPUnauthorized(request=req)
                        if resp.status_int // 100 == 5:
                            failed_response_type = HTTPBadGateway
                        failed_files.append([
                            quote(obj_path[:MAX_PATH_LENGTH]), resp.status])

            if failed_files:
                resp_dict['Response Status'] = failed_response_type().status
            elif not resp_dict['Number Files Created']:
                resp_dict['Response Status'] = HTTPBadRequest().status
                resp_dict['Response Body'] = 'Invalid Tar File: No Valid Files'

        except HTTPException as err:
            resp_dict['Response Status'] = err.status
            resp_dict['Response Body'] = err.body
        except (tarfile.TarError, zlib.error) as tar_error:
            resp_dict['Response Status'] = HTTPBadRequest().status
            resp_dict['Response Body'] = 'Invalid Tar File: %s' % tar_error
        except Exception:
            self.logger.exception('Error in extract archive.')
            resp_dict['Response Status'] = HTTPServerError().status

        yield separator + get_response_body(
            out_content_type, resp_dict, failed_files)
Exemplo n.º 30
0
    def handle_multipart_put(self, req, start_response):
        """
        Will handle the PUT of a SLO manifest.
        List every object in manifest to check if is valid and if so will
        save a manifest generated from the user input. Uses WSGIContext to
        call self and start_response and returns a WSGI iterator.

        :param req: a :class:`~swift.common.swob.Request` with an obj in path
        :param start_response: WSGI start_response callable
        :raises HttpException: on errors
        """
        vrs, account, container, obj = req.split_path(4, rest_with_last=True)
        if req.content_length > self.max_manifest_size:
            raise HTTPRequestEntityTooLarge("Manifest File > %d bytes" %
                                            self.max_manifest_size)
        if req.headers.get('X-Copy-From'):
            raise HTTPMethodNotAllowed(
                'Multipart Manifest PUTs cannot be COPY requests')
        if req.content_length is None and \
                req.headers.get('transfer-encoding', '').lower() != 'chunked':
            raise HTTPLengthRequired(request=req)
        parsed_data = parse_and_validate_input(
            req.body_file.read(self.max_manifest_size), req.path)
        problem_segments = []

        object_segments = [seg for seg in parsed_data if 'path' in seg]
        if len(object_segments) > self.max_manifest_segments:
            raise HTTPRequestEntityTooLarge(
                'Number of object-backed segments must be <= %d' %
                self.max_manifest_segments)
        try:
            out_content_type = req.accept.best_match(ACCEPTABLE_FORMATS)
        except ValueError:
            out_content_type = 'text/plain'  # Ignore invalid header
        if not out_content_type:
            out_content_type = 'text/plain'
        data_for_storage = [None] * len(parsed_data)
        total_size = 0
        path2indices = defaultdict(list)
        for index, seg_dict in enumerate(parsed_data):
            if 'data' in seg_dict:
                data_for_storage[index] = seg_dict
                total_size += len(base64.b64decode(seg_dict['data']))
            else:
                path2indices[seg_dict['path']].append(index)

        # BEGIN: New OpenIO code
        obj_path = get_valid_utf8_str(object_segments[0]['path']).lstrip('/')
        split_path = obj_path.split('/')
        segments_container = split_path[0]
        seg_prefix = '/'.join(split_path[1:-1])
        segments_container_path = '/'.join(
            ['', vrs, account, segments_container])

        # END: New OpenIO code

        # BEGIN: Adapt for OpenIO code
        def validate_seg_dict(seg_dict, seg_resp, allow_empty_segment):
            obj_name = seg_dict['path']

            segment_length = seg_resp['bytes']
            if seg_dict.get('range'):
                # Since we now know the length, we can normalize the
                # range. We know that there is exactly one range
                # requested since we checked that earlier in
                # parse_and_validate_input().
                ranges = seg_dict['range'].ranges_for_length(seg_resp['bytes'])

                if not ranges:
                    problem_segments.append(
                        [quote(obj_name), 'Unsatisfiable Range'])
                elif ranges == [(0, seg_resp['bytes'])]:
                    # Just one range, and it exactly matches the object.
                    # Why'd we do this again?
                    del seg_dict['range']
                    segment_length = seg_resp['bytes']
                else:
                    rng = ranges[0]
                    seg_dict['range'] = '%d-%d' % (rng[0], rng[1] - 1)
                    segment_length = rng[1] - rng[0]

            if segment_length < 1 and not allow_empty_segment:
                problem_segments.append([
                    quote(obj_name),
                    'Too small; each segment must be at least 1 byte.'
                ])

            _size_bytes = seg_dict.get('size_bytes')
            size_mismatch = (_size_bytes is not None
                             and _size_bytes != seg_resp['bytes'])
            if size_mismatch:
                problem_segments.append([quote(obj_name), 'Size Mismatch'])

            _etag = seg_dict.get('etag')
            etag_mismatch = (_etag is not None and _etag != seg_resp['hash'])
            if etag_mismatch:
                problem_segments.append([quote(obj_name), 'Etag Mismatch'])

            last_modified_formatted = seg_resp.get('last_modified')
            if not last_modified_formatted:
                # shouldn't happen
                last_modified_formatted = datetime.now().strftime(
                    '%Y-%m-%dT%H:%M:%S.%f')
            seg_data = {
                'name': '/' + seg_dict['path'].lstrip('/'),
                'bytes': seg_resp['bytes'],
                'hash': seg_resp['hash'],
                'content_type': seg_resp['content_type'],
                'last_modified': last_modified_formatted
            }
            if seg_dict.get('range'):
                seg_data['range'] = seg_dict['range']
            if config_true_value(seg_resp['slo']):
                seg_data['sub_slo'] = True

            return segment_length, seg_data

        # END: Adapt for OpenIO code

        heartbeat = config_true_value(req.params.get('heartbeat'))
        separator = ''
        if heartbeat:
            # Apparently some ways of deploying require that this to happens
            # *before* the return? Not sure why.
            req.environ['eventlet.minimum_write_chunk_size'] = 0
            start_response(
                '202 Accepted',
                [  # NB: not 201 !
                    ('Content-Type', out_content_type),
                ])
            separator = '\r\n\r\n'

        def resp_iter(total_size=total_size):
            # wsgi won't propagate start_response calls until some data has
            # been yielded so make sure first heartbeat is sent immediately
            if heartbeat:
                yield ' '
            last_yield_time = time.time()

            # BEGIN: New OpenIO code
            sub_req = make_subrequest(
                req.environ,
                path='%s?format=json&prefix=%s&limit=%d' %
                (segments_container_path, seg_prefix,
                 self.max_manifest_segments),
                method='GET',
                headers={'x-auth-token': req.headers.get('x-auth-token')},
                agent='%(orig)s SLO MultipartPUT',
                swift_source='SLO')
            sub_req.environ.setdefault('oio.query', {})
            # All meta2 databases may not be synchronized
            sub_req.environ['oio.query']['force_master'] = True
            sub_req.environ['oio.query']['slo'] = True
            list_seg_resp = sub_req.get_response(self)

            with closing_if_possible(list_seg_resp.app_iter):
                segments_resp = json.loads(list_seg_resp.body)

            seg_resp_dict = dict()
            for seg_resp in segments_resp:
                obj_name = '/'.join(('', segments_container, seg_resp['name']))
                seg_resp_dict[obj_name] = seg_resp

            for obj_name in path2indices:
                now = time.time()
                if heartbeat and (now - last_yield_time >
                                  self.yield_frequency):
                    # Make sure we've called start_response before
                    # sending data
                    yield ' '
                    last_yield_time = now

                for i in path2indices[obj_name]:
                    if not list_seg_resp.is_success:
                        problem_segments.append(
                            [quote(obj_name), list_seg_resp.status])
                        segment_length = 0
                        seg_data = None
                    else:
                        seg_resp = seg_resp_dict.get(obj_name)
                        if seg_resp:
                            segment_length, seg_data = validate_seg_dict(
                                parsed_data[i], seg_resp,
                                (i == len(parsed_data) - 1))
                        else:
                            problem_segments.append([quote(obj_name), 404])
                            segment_length = 0
                            seg_data = None
                    data_for_storage[i] = seg_data
                    total_size += segment_length
            # END: New OpenIO code

            if problem_segments:
                err = HTTPBadRequest(content_type=out_content_type)
                resp_dict = {}
                if heartbeat:
                    resp_dict['Response Status'] = err.status
                    resp_dict['Response Body'] = err.body or '\n'.join(
                        RESPONSE_REASONS.get(err.status_int, ['']))
                else:
                    start_response(err.status,
                                   [(h, v) for h, v in err.headers.items()
                                    if h.lower() != 'content-length'])
                yield separator + get_response_body(
                    out_content_type, resp_dict, problem_segments, 'upload')
                return

            slo_etag = md5()
            for seg_data in data_for_storage:
                if 'data' in seg_data:
                    raw_data = base64.b64decode(seg_data['data'])
                    slo_etag.update(md5(raw_data).hexdigest())
                elif seg_data.get('range'):
                    slo_etag.update('%s:%s;' %
                                    (seg_data['hash'], seg_data['range']))
                else:
                    slo_etag.update(seg_data['hash'])

            slo_etag = slo_etag.hexdigest()
            client_etag = req.headers.get('Etag')
            if client_etag and client_etag.strip('"') != slo_etag:
                err = HTTPUnprocessableEntity(request=req)
                if heartbeat:
                    yield separator + get_response_body(
                        out_content_type, {
                            'Response Status':
                            err.status,
                            'Response Body':
                            err.body or '\n'.join(
                                RESPONSE_REASONS.get(err.status_int, [''])),
                        }, problem_segments, 'upload')
                else:
                    for chunk in err(req.environ, start_response):
                        yield chunk
                return

            json_data = json.dumps(data_for_storage)
            if six.PY3:
                json_data = json_data.encode('utf-8')
            req.body = json_data
            req.headers.update({
                SYSMETA_SLO_ETAG: slo_etag,
                SYSMETA_SLO_SIZE: total_size,
                'X-Static-Large-Object': 'True',
                'Etag': md5(json_data).hexdigest(),
            })

            # Ensure container listings have both etags. However, if any
            # middleware to the left of us touched the base value, trust them.
            override_header = 'X-Object-Sysmeta-Container-Update-Override-Etag'
            val, sep, params = req.headers.get(override_header,
                                               '').partition(';')
            req.headers[override_header] = '%s; slo_etag=%s' % (
                (val or req.headers['Etag']) + sep + params, slo_etag)

            env = req.environ
            if not env.get('CONTENT_TYPE'):
                guessed_type, _junk = mimetypes.guess_type(req.path_info)
                env['CONTENT_TYPE'] = (guessed_type
                                       or 'application/octet-stream')
            env['swift.content_type_overridden'] = True
            env['CONTENT_TYPE'] += ";swift_bytes=%d" % total_size

            resp = req.get_response(self.app)
            resp_dict = {'Response Status': resp.status}
            if resp.is_success:
                resp.etag = slo_etag
                resp_dict['Etag'] = resp.headers['Etag']
                resp_dict['Last Modified'] = resp.headers['Last-Modified']

            if heartbeat:
                resp_dict['Response Body'] = resp.body
                yield separator + get_response_body(out_content_type,
                                                    resp_dict, [], 'upload')
            else:
                for chunk in resp(req.environ, start_response):
                    yield chunk

        return resp_iter()