def _get_source_object(self, ssc_ctx, source_path, req): source_req = req.copy_get() # make sure the source request uses it's container_info source_req.headers.pop('X-Backend-Storage-Policy-Index', None) source_req.path_info = quote(source_path) source_req.headers['X-Newest'] = 'true' # in case we are copying an SLO manifest, set format=raw parameter params = source_req.params if params.get('multipart-manifest') == 'get': params['format'] = 'raw' source_req.params = params source_resp = ssc_ctx.get_source_resp(source_req) if source_resp.content_length is None: # This indicates a transfer-encoding: chunked source object, # which currently only happens because there are more than # CONTAINER_LISTING_LIMIT segments in a segmented object. In # this case, we're going to refuse to do the server-side copy. close_if_possible(source_resp.app_iter) return HTTPRequestEntityTooLarge(request=req) if source_resp.content_length > MAX_FILE_SIZE: close_if_possible(source_resp.app_iter) return HTTPRequestEntityTooLarge(request=req) return source_resp
def _get_source_object(self, ssc_ctx, source_path, req): source_req = req.copy_get() # make sure the source request uses it's container_info source_req.headers.pop('X-Backend-Storage-Policy-Index', None) source_req.path_info = quote(source_path) source_req.headers['X-Newest'] = 'true' if 'swift.post_as_copy' in req.environ: # We're COPYing one object over itself because of a POST; rely on # the PUT for write authorization, don't require read authorization source_req.environ['swift.authorize'] = lambda req: None source_req.environ['swift.authorize_override'] = True # in case we are copying an SLO manifest, set format=raw parameter params = source_req.params if params.get('multipart-manifest') == 'get': params['format'] = 'raw' source_req.params = params source_resp = ssc_ctx.get_source_resp(source_req) if source_resp.content_length is None: # This indicates a transfer-encoding: chunked source object, # which currently only happens because there are more than # CONTAINER_LISTING_LIMIT segments in a segmented object. In # this case, we're going to refuse to do the server-side copy. return HTTPRequestEntityTooLarge(request=req) if source_resp.content_length > MAX_FILE_SIZE: return HTTPRequestEntityTooLarge(request=req) return source_resp
def __call__(self, request): if request.method not in ("POST", "PUT"): return self.app try: request.split_path(2, 4, rest_with_last=True) except ValueError: return self.app new_quota = request.headers.get('X-Account-Meta-Quota-Bytes') remove_quota = request.headers.get('X-Remove-Account-Meta-Quota-Bytes') if remove_quota: new_quota = 0 # X-Remove dominates if both are present if request.environ.get('reseller_request') is True: if new_quota and not new_quota.isdigit(): return HTTPBadRequest() return self.app # deny quota set for non-reseller if new_quota is not None: return HTTPForbidden() account_info = get_account_info(request.environ, self.app) if not account_info or not account_info['bytes']: return self.app new_size = int(account_info['bytes']) + (request.content_length or 0) quota = int(account_info['meta'].get('quota-bytes', -1)) if 0 <= quota < new_size: return HTTPRequestEntityTooLarge() return self.app
def check_object_creation(req, object_name): """ Check to ensure that everything is alright about an object to be created. :param req: HTTP request object :param object_name: name of object to be created :returns HTTPRequestEntityTooLarge: the object is too large :returns HTTPLengthRequired: missing content-length header and not a chunked request :returns HTTPBadRequest: missing or bad content-type header, or bad metadata """ if req.content_length and req.content_length > MAX_FILE_SIZE: return HTTPRequestEntityTooLarge(body='Your request is too large.', request=req, content_type='text/plain') if req.content_length is None and \ req.headers.get('transfer-encoding') != 'chunked': return HTTPLengthRequired(request=req) if 'X-Copy-From' in req.headers and req.content_length: return HTTPBadRequest(body='Copy requests require a zero byte body', request=req, content_type='text/plain') if len(object_name) > MAX_OBJECT_NAME_LENGTH: return HTTPBadRequest(body='Object name length of %d longer than %d' % (len(object_name), MAX_OBJECT_NAME_LENGTH), request=req, content_type='text/plain') if 'Content-Type' not in req.headers: return HTTPBadRequest(request=req, content_type='text/plain', body='No content type') if not check_utf8(req.headers['Content-Type']): return HTTPBadRequest(request=req, body='Invalid Content-Type', content_type='text/plain') return check_metadata(req, 'object')
def get_objs_to_delete(self, req): """ Will populate objs_to_delete with data from request input. :params req: a Swob request :returns: a list of the contents of req.body when separated by newline. :raises: HTTPException on failures """ line = '' data_remaining = True objs_to_delete = [] if req.content_length is None and \ req.headers.get('transfer-encoding', '').lower() != 'chunked': raise HTTPLengthRequired(request=req) while data_remaining: if '\n' in line: obj_to_delete, line = line.split('\n', 1) objs_to_delete.append(unquote(obj_to_delete)) else: data = req.body_file.read(MAX_PATH_LENGTH) if data: line += data else: data_remaining = False if line.strip(): objs_to_delete.append(unquote(line)) if len(objs_to_delete) > self.max_deletes_per_request: raise HTTPRequestEntityTooLarge( 'Maximum Bulk Deletes: %d per request' % self.max_deletes_per_request) if len(line) > MAX_PATH_LENGTH * 2: raise HTTPBadRequest('Invalid File Name') return objs_to_delete
def __call__(self, request): if request.method not in ("POST", "PUT"): return self.app try: ver, account, container, obj = request.split_path( 2, 4, rest_with_last=True) except ValueError: return self.app if not container: # account request, so we pay attention to the quotas new_quota = request.headers.get('X-Account-Meta-Quota-Bytes') remove_quota = request.headers.get( 'X-Remove-Account-Meta-Quota-Bytes') else: # container or object request; even if the quota headers are set # in the request, they're meaningless new_quota = remove_quota = None if remove_quota: new_quota = 0 # X-Remove dominates if both are present if request.environ.get('reseller_request') is True: if new_quota and not new_quota.isdigit(): return HTTPBadRequest() return self.app # deny quota set for non-reseller if new_quota is not None: return HTTPForbidden() if obj and request.method == "POST" or not obj: return self.app copy_from = request.headers.get('X-Copy-From') content_length = (request.content_length or 0) if obj and copy_from: path = '/' + ver + '/' + account + '/' + copy_from.lstrip('/') object_info = get_object_info(request.environ, self.app, path) if not object_info or not object_info['length']: content_length = 0 else: content_length = int(object_info['length']) account_info = get_account_info(request.environ, self.app) if not account_info or not account_info['bytes']: return self.app new_size = int(account_info['bytes']) + content_length quota = int(account_info['meta'].get('quota-bytes', -1)) if 0 <= quota < new_size: return HTTPRequestEntityTooLarge() return self.app
def bad_response(self, req, container_info): # 401 if the user couldn't have PUT this object in the first place. # This prevents leaking the container's existence to unauthed users. if 'swift.authorize' in req.environ: req.acl = container_info['write_acl'] aresp = req.environ['swift.authorize'](req) if aresp: return aresp return HTTPRequestEntityTooLarge(body='Upload exceeds quota.')
def check_object_creation(req, object_name): """ Check to ensure that everything is alright about an object to be created. :param req: HTTP request object :param object_name: name of object to be created :returns: HTTPRequestEntityTooLarge -- the object is too large :returns: HTTPLengthRequired -- missing content-length header and not a chunked request :returns: HTTPBadRequest -- missing or bad content-type header, or bad metadata :returns: HTTPNotImplemented -- unsupported transfer-encoding header value """ try: ml = req.message_length() except ValueError as e: return HTTPBadRequest(request=req, content_type='text/plain', body=str(e)) except AttributeError as e: return HTTPNotImplemented(request=req, content_type='text/plain', body=str(e)) if ml is not None and ml > MAX_FILE_SIZE: return HTTPRequestEntityTooLarge(body='Your request is too large.', request=req, content_type='text/plain') if req.content_length is None and \ req.headers.get('transfer-encoding') != 'chunked': return HTTPLengthRequired(body='Missing Content-Length header.', request=req, content_type='text/plain') if len(object_name) > MAX_OBJECT_NAME_LENGTH: return HTTPBadRequest(body='Object name length of %d longer than %d' % (len(object_name), MAX_OBJECT_NAME_LENGTH), request=req, content_type='text/plain') if 'Content-Type' not in req.headers: return HTTPBadRequest(request=req, content_type='text/plain', body='No content type') try: req = check_delete_headers(req) except HTTPException as e: return HTTPBadRequest(request=req, body=e.body, content_type='text/plain') if not check_utf8(req.headers['Content-Type']): return HTTPBadRequest(request=req, body='Invalid Content-Type', content_type='text/plain') return check_metadata(req, 'object')
def __call__(self, env, start_response): req = Request(env) new_service = env.get('liteauth.new_service', None) if new_service: account_id = env.get('REMOTE_USER', '') if not account_id: return HTTPInternalServerError() if not self.activate_service(account_id, new_service, req.environ): return HTTPInternalServerError() if req.method in ['PUT', 'POST' ] and not 'x-zerovm-execute' in req.headers: account_info = get_account_info(req.environ, self.app, swift_source='litequota') service_plan = assemble_from_partial(self.metadata_key, account_info['meta']) if service_plan: try: service_plan = json.loads(service_plan) path_parts = req.split_path(2, 4, rest_with_last=True) except ValueError: return self.app(env, start_response) if len(path_parts) == 3: quota = service_plan['storage']['containers'] new_size = int(account_info['container_count']) if 0 <= quota < new_size: return HTTPRequestEntityTooLarge( body='Over quota: containers')(env, start_response) else: new_size = int(account_info['bytes']) + (req.content_length or 0) quota = service_plan['storage']['bytes'] if 0 <= quota < new_size: return HTTPRequestEntityTooLarge( body='Over quota: bytes')(env, start_response) quota = service_plan['storage']['objects'] new_size = int(account_info['total_object_count']) if 0 <= quota < new_size: return HTTPRequestEntityTooLarge( body='Over quota: objects')(env, start_response) return self.app(env, start_response)
def _get_source_object(self, req, path_info): # make a pre_auth request in case the user has write access # to container, but not READ. This was allowed in previous version # (i.e., before middleware) so keeping the same behavior here get_req = make_pre_authed_request( req.environ, path=path_info, headers={'X-Newest': 'True'}, method='GET', swift_source='VW') source_resp = get_req.get_response(self.app) if source_resp.content_length is None or \ source_resp.content_length > MAX_FILE_SIZE: return HTTPRequestEntityTooLarge(request=req) return source_resp
def __call__(self, env, start_response): req = Request(env) if env['REQUEST_METHOD'] == 'GET' or env['REQUEST_METHOD'] == 'HEAD': if self.status == 200: if 'HTTP_RANGE' in env: resp = Response(request=req, body=self.object_body, conditional_response=True) return resp(env, start_response) start_response( Response(request=req).status, self.response_headers.items()) if env['REQUEST_METHOD'] == 'GET': return self.object_body elif self.status == 401: start_response(HTTPUnauthorized(request=req).status, []) elif self.status == 403: start_response(HTTPForbidden(request=req).status, []) elif self.status == 404: start_response(HTTPNotFound(request=req).status, []) else: start_response(HTTPBadRequest(request=req).status, []) elif env['REQUEST_METHOD'] == 'PUT': if self.status == 201: start_response( HTTPCreated(request=req).status, [('etag', self.response_headers['etag'])]) elif self.status == 401: start_response(HTTPUnauthorized(request=req).status, []) elif self.status == 403: start_response(HTTPForbidden(request=req).status, []) elif self.status == 404: start_response(HTTPNotFound(request=req).status, []) elif self.status == 413: start_response( HTTPRequestEntityTooLarge(request=req).status, []) else: start_response(HTTPBadRequest(request=req).status, []) elif env['REQUEST_METHOD'] == 'DELETE': if self.status == 204: start_response(HTTPNoContent(request=req).status, []) elif self.status == 401: start_response(HTTPUnauthorized(request=req).status, []) elif self.status == 403: start_response(HTTPForbidden(request=req).status, []) elif self.status == 404: start_response(HTTPNotFound(request=req).status, []) else: start_response(HTTPBadRequest(request=req).status, []) return []
def check_object_creation(req, object_name): """ Check to ensure that everything is alright about an object to be created. :param req: HTTP request object :param object_name: name of object to be created :raises HTTPRequestEntityTooLarge: the object is too large :raises HTTPLengthRequered: missing content-length header and not a chunked request :raises HTTPBadRequest: missing or bad content-type header, or bad metadata """ if req.content_length and req.content_length > MAX_FILE_SIZE: return HTTPRequestEntityTooLarge(body='Your request is too large.', request=req, content_type='text/plain') if req.content_length is None and \ req.headers.get('transfer-encoding') != 'chunked': return HTTPLengthRequired(request=req) if 'X-Copy-From' in req.headers and req.content_length: return HTTPBadRequest(body='Copy requests require a zero byte body', request=req, content_type='text/plain') if len(object_name) > MAX_OBJECT_NAME_LENGTH: return HTTPBadRequest(body='Object name length of %d longer than %d' % (len(object_name), MAX_OBJECT_NAME_LENGTH), request=req, content_type='text/plain') if 'Content-Type' not in req.headers: return HTTPBadRequest(request=req, content_type='text/plain', body='No content type') if not check_utf8(req.headers['Content-Type']): return HTTPBadRequest(request=req, body='Invalid Content-Type', content_type='text/plain') if 'x-object-manifest' in req.headers: value = req.headers['x-object-manifest'] container = prefix = None try: container, prefix = value.split('/', 1) except ValueError: pass if not container or not prefix or '?' in value or '&' in value or \ prefix[0] == '/': return HTTPBadRequest( request=req, body='X-Object-Manifest must in the format container/prefix') return check_metadata(req, 'object')
def get_objs_to_delete(self, req): """ Will populate objs_to_delete with data from request input. :params req: a Swob request :returns: a list of the contents of req.body when separated by newline. :raises HTTPException: on failures """ line = b'' data_remaining = True objs_to_delete = [] if req.content_length is None and \ req.headers.get('transfer-encoding', '').lower() != 'chunked': raise HTTPLengthRequired(request=req) while data_remaining: if b'\n' in line: obj_to_delete, line = line.split(b'\n', 1) if six.PY2: obj_to_delete = wsgi_unquote(obj_to_delete.strip()) else: # yeah, all this chaining is pretty terrible... # but it gets even worse trying to use UTF-8 and # errors='surrogateescape' when dealing with terrible # input like b'\xe2%98\x83' obj_to_delete = wsgi_to_str( wsgi_unquote(bytes_to_wsgi(obj_to_delete.strip()))) objs_to_delete.append({'name': obj_to_delete}) else: data = req.body_file.read(self.max_path_length) if data: line += data else: data_remaining = False if six.PY2: obj_to_delete = wsgi_unquote(line.strip()) else: obj_to_delete = wsgi_to_str( wsgi_unquote(bytes_to_wsgi(line.strip()))) if obj_to_delete: objs_to_delete.append({'name': obj_to_delete}) if len(objs_to_delete) > self.max_deletes_per_request: raise HTTPRequestEntityTooLarge( 'Maximum Bulk Deletes: %d per request' % self.max_deletes_per_request) if len(line) > self.max_path_length * 2: raise HTTPBadRequest('Invalid File Name') return objs_to_delete
def _get_source_object(self, req, path_info): # make a pre_auth request in case the user has write access # to container, but not READ. This was allowed in previous version # (i.e., before middleware) so keeping the same behavior here get_req = make_pre_authed_request(req.environ, path=wsgi_quote(path_info) + '?symlink=get', headers={'X-Newest': 'True'}, method='GET', swift_source='VW') source_resp = get_req.get_response(self.app) if source_resp.content_length is None or \ source_resp.content_length > MAX_FILE_SIZE: # Consciously *don't* drain the response before closing; # any logged 499 is actually rather appropriate here close_if_possible(source_resp.app_iter) return HTTPRequestEntityTooLarge(request=req) return source_resp
def PUT(self, req): """HTTP PUT request handler.""" container_info = self.container_info( self.account_name, self.container_name) container_partition = container_info['partition'] containers = container_info['nodes'] req.acl = container_info['write_acl'] req.environ['swift_sync_key'] = container_info['sync_key'] object_versions = container_info['versions'] if 'swift.authorize' in req.environ: aresp = req.environ['swift.authorize'](req) if aresp: return aresp if not containers: return HTTPNotFound(request=req) if 'x-delete-after' in req.headers: try: x_delete_after = int(req.headers['x-delete-after']) except ValueError: return HTTPBadRequest(request=req, content_type='text/plain', body='Non-integer X-Delete-After') req.headers['x-delete-at'] = '%d' % (time.time() + x_delete_after) partition, nodes = self.app.object_ring.get_nodes( self.account_name, self.container_name, self.object_name) # do a HEAD request for container sync and checking object versions if 'x-timestamp' in req.headers or \ (object_versions and not req.environ.get('swift_versioned_copy')): hreq = Request.blank(req.path_info, headers={'X-Newest': 'True'}, environ={'REQUEST_METHOD': 'HEAD'}) hresp = self.GETorHEAD_base( hreq, _('Object'), self.app.object_ring, partition, hreq.path_info) # Used by container sync feature if 'x-timestamp' in req.headers: try: req.headers['X-Timestamp'] = \ normalize_timestamp(float(req.headers['x-timestamp'])) if hresp.environ and 'swift_x_timestamp' in hresp.environ and \ float(hresp.environ['swift_x_timestamp']) >= \ float(req.headers['x-timestamp']): return HTTPAccepted(request=req) except ValueError: return HTTPBadRequest( request=req, content_type='text/plain', body='X-Timestamp should be a UNIX timestamp float value; ' 'was %r' % req.headers['x-timestamp']) else: req.headers['X-Timestamp'] = normalize_timestamp(time.time()) # Sometimes the 'content-type' header exists, but is set to None. content_type_manually_set = True if not req.headers.get('content-type'): guessed_type, _junk = mimetypes.guess_type(req.path_info) req.headers['Content-Type'] = guessed_type or \ 'application/octet-stream' content_type_manually_set = False error_response = check_object_creation(req, self.object_name) or \ check_content_type(req) if error_response: return error_response if object_versions and not req.environ.get('swift_versioned_copy'): is_manifest = 'x-object-manifest' in req.headers or \ 'x-object-manifest' in hresp.headers if hresp.status_int != HTTP_NOT_FOUND and not is_manifest: # This is a version manifest and needs to be handled # differently. First copy the existing data to a new object, # then write the data from this request to the version manifest # object. lcontainer = object_versions.split('/')[0] prefix_len = '%03x' % len(self.object_name) lprefix = prefix_len + self.object_name + '/' ts_source = hresp.environ.get('swift_x_timestamp') if ts_source is None: ts_source = time.mktime(time.strptime( hresp.headers['last-modified'], '%a, %d %b %Y %H:%M:%S GMT')) new_ts = normalize_timestamp(ts_source) vers_obj_name = lprefix + new_ts copy_headers = { 'Destination': '%s/%s' % (lcontainer, vers_obj_name)} copy_environ = {'REQUEST_METHOD': 'COPY', 'swift_versioned_copy': True } copy_req = Request.blank(req.path_info, headers=copy_headers, environ=copy_environ) copy_resp = self.COPY(copy_req) if is_client_error(copy_resp.status_int): # missing container or bad permissions return HTTPPreconditionFailed(request=req) elif not is_success(copy_resp.status_int): # could not copy the data, bail return HTTPServiceUnavailable(request=req) reader = req.environ['wsgi.input'].read data_source = iter(lambda: reader(self.app.client_chunk_size), '') source_header = req.headers.get('X-Copy-From') source_resp = None if source_header: source_header = unquote(source_header) acct = req.path_info.split('/', 2)[1] if isinstance(acct, unicode): acct = acct.encode('utf-8') if not source_header.startswith('/'): source_header = '/' + source_header source_header = '/' + acct + source_header try: src_container_name, src_obj_name = \ source_header.split('/', 3)[2:] except ValueError: return HTTPPreconditionFailed( request=req, body='X-Copy-From header must be of the form' '<container name>/<object name>') source_req = req.copy_get() source_req.path_info = source_header source_req.headers['X-Newest'] = 'true' orig_obj_name = self.object_name orig_container_name = self.container_name self.object_name = src_obj_name self.container_name = src_container_name source_resp = self.GET(source_req) if source_resp.status_int >= HTTP_MULTIPLE_CHOICES: return source_resp self.object_name = orig_obj_name self.container_name = orig_container_name new_req = Request.blank(req.path_info, environ=req.environ, headers=req.headers) data_source = source_resp.app_iter new_req.content_length = source_resp.content_length if new_req.content_length is None: # This indicates a transfer-encoding: chunked source object, # which currently only happens because there are more than # CONTAINER_LISTING_LIMIT segments in a segmented object. In # this case, we're going to refuse to do the server-side copy. return HTTPRequestEntityTooLarge(request=req) if new_req.content_length > MAX_FILE_SIZE: return HTTPRequestEntityTooLarge(request=req) new_req.etag = source_resp.etag # we no longer need the X-Copy-From header del new_req.headers['X-Copy-From'] if not content_type_manually_set: new_req.headers['Content-Type'] = \ source_resp.headers['Content-Type'] if not config_true_value( new_req.headers.get('x-fresh-metadata', 'false')): copy_headers_into(source_resp, new_req) copy_headers_into(req, new_req) # copy over x-static-large-object for POSTs and manifest copies if 'X-Static-Large-Object' in source_resp.headers and \ req.params.get('multipart-manifest') == 'get': new_req.headers['X-Static-Large-Object'] = \ source_resp.headers['X-Static-Large-Object'] req = new_req if 'x-delete-at' in req.headers: try: x_delete_at = int(req.headers['x-delete-at']) if x_delete_at < time.time(): return HTTPBadRequest( body='X-Delete-At in past', request=req, content_type='text/plain') except ValueError: return HTTPBadRequest(request=req, content_type='text/plain', body='Non-integer X-Delete-At') delete_at_container = str( x_delete_at / self.app.expiring_objects_container_divisor * self.app.expiring_objects_container_divisor) delete_at_part, delete_at_nodes = \ self.app.container_ring.get_nodes( self.app.expiring_objects_account, delete_at_container) else: delete_at_part = delete_at_nodes = None node_iter = GreenthreadSafeIterator( self.iter_nodes(self.app.object_ring, partition)) pile = GreenPile(len(nodes)) chunked = req.headers.get('transfer-encoding') outgoing_headers = self._backend_requests( req, len(nodes), container_partition, containers, delete_at_part, delete_at_nodes) for nheaders in outgoing_headers: # RFC2616:8.2.3 disallows 100-continue without a body if (req.content_length > 0) or chunked: nheaders['Expect'] = '100-continue' pile.spawn(self._connect_put_node, node_iter, partition, req.path_info, nheaders, self.app.logger.thread_locals) conns = [conn for conn in pile if conn] if len(conns) <= len(nodes) / 2: self.app.logger.error( _('Object PUT returning 503, %(conns)s/%(nodes)s ' 'required connections'), {'conns': len(conns), 'nodes': len(nodes) // 2 + 1}) return HTTPServiceUnavailable(request=req) bytes_transferred = 0 try: with ContextPool(len(nodes)) as pool: for conn in conns: conn.failed = False conn.queue = Queue(self.app.put_queue_depth) pool.spawn(self._send_file, conn, req.path) while True: with ChunkReadTimeout(self.app.client_timeout): try: chunk = next(data_source) except StopIteration: if chunked: [conn.queue.put('0\r\n\r\n') for conn in conns] break bytes_transferred += len(chunk) if bytes_transferred > MAX_FILE_SIZE: return HTTPRequestEntityTooLarge(request=req) for conn in list(conns): if not conn.failed: conn.queue.put( '%x\r\n%s\r\n' % (len(chunk), chunk) if chunked else chunk) else: conns.remove(conn) if len(conns) <= len(nodes) / 2: self.app.logger.error(_( 'Object PUT exceptions during' ' send, %(conns)s/%(nodes)s required connections'), {'conns': len(conns), 'nodes': len(nodes) / 2 + 1}) return HTTPServiceUnavailable(request=req) for conn in conns: if conn.queue.unfinished_tasks: conn.queue.join() conns = [conn for conn in conns if not conn.failed] except ChunkReadTimeout, err: self.app.logger.warn( _('ERROR Client read timeout (%ss)'), err.seconds) self.app.logger.increment('client_timeouts') return HTTPRequestTimeout(request=req)
def _transfer_data(self, req, data_source, conns, nodes): min_conns = quorum_size(len(nodes)) bytes_transferred = 0 try: with ContextPool(len(nodes)) as pool: for conn in conns: conn.failed = False conn.queue = Queue(self.app.put_queue_depth) pool.spawn(self._send_file, conn, req.path) while True: with ChunkReadTimeout(self.app.client_timeout): try: chunk = next(data_source) except StopIteration: if req.is_chunked: for conn in conns: conn.queue.put('0\r\n\r\n') break bytes_transferred += len(chunk) if bytes_transferred > constraints.MAX_FILE_SIZE: raise HTTPRequestEntityTooLarge(request=req) for conn in list(conns): if not conn.failed: conn.queue.put('%x\r\n%s\r\n' % (len(chunk), chunk) if req. is_chunked else chunk) else: conn.close() conns.remove(conn) self._check_min_conn( req, conns, min_conns, msg='Object PUT exceptions during' ' send, %(conns)s/%(nodes)s required connections') for conn in conns: if conn.queue.unfinished_tasks: conn.queue.join() conns = [conn for conn in conns if not conn.failed] self._check_min_conn(req, conns, min_conns, msg='Object PUT exceptions after last send, ' '%(conns)s/%(nodes)s required connections') except ChunkReadTimeout as err: self.app.logger.warn(_('ERROR Client read timeout (%ss)'), err.seconds) self.app.logger.increment('client_timeouts') raise HTTPRequestTimeout(request=req) except HTTPException: raise except (Exception, Timeout): self.app.logger.exception( _('ERROR Exception causing client disconnect')) raise HTTPClientDisconnect(request=req) if req.content_length and bytes_transferred < req.content_length: req.client_disconnect = True self.app.logger.warn( _('Client disconnected without sending enough data')) self.app.logger.increment('client_disconnects') raise HTTPClientDisconnect(request=req)
def check_object_creation(req, object_name): """ Check to ensure that everything is alright about an object to be created. :param req: HTTP request object :param object_name: name of object to be created :returns HTTPRequestEntityTooLarge: the object is too large :returns HTTPLengthRequired: missing content-length header and not a chunked request :returns HTTPBadRequest: missing or bad content-type header, or bad metadata :returns HTTPNotImplemented: unsupported transfer-encoding header value """ try: # 获取消息的长度,如果headers中没有包含消息长度,返回None,报错 ml = req.message_length() except ValueError as e: return HTTPBadRequest(request=req, content_type='text/plain', body=str(e)) except AttributeError as e: return HTTPNotImplemented(request=req, content_type='text/plain', body=str(e)) # 如果请求消息的长度超过5G,报错 if ml is not None and ml > MAX_FILE_SIZE: return HTTPRequestEntityTooLarge(body='Your request is too large.', request=req, content_type='text/plain') if req.content_length is None and \ req.headers.get('transfer-encoding') != 'chunked': return HTTPLengthRequired(body='Missing Content-Length header.', request=req, content_type='text/plain') # 如果请求头中有拷贝源,但没有长度,报错 if 'X-Copy-From' in req.headers and req.content_length: return HTTPBadRequest(body='Copy requests require a zero byte body', request=req, content_type='text/plain') # 如果对象名称大于1024字节,报错 if len(object_name) > MAX_OBJECT_NAME_LENGTH: return HTTPBadRequest(body='Object name length of %d longer than %d' % (len(object_name), MAX_OBJECT_NAME_LENGTH), request=req, content_type='text/plain') # 如果请求头中没有内容类型,报错 if 'Content-Type' not in req.headers: return HTTPBadRequest(request=req, content_type='text/plain', body='No content type') try: # 检查请求头中关于对象删除的信息,失败报错 req = check_delete_headers(req) except HTTPException as e: return HTTPBadRequest(request=req, body=e.body, content_type='text/plain') # 检查内容类型是否为utf-8编码,失败报错 if not check_utf8(req.headers['Content-Type']): return HTTPBadRequest(request=req, body='Invalid Content-Type', content_type='text/plain') # 检查请求中的用户自定义的元数据,失败报错 return check_metadata(req, 'object')
def handle_multipart_put(self, req, start_response): """ Will handle the PUT of a SLO manifest. Heads every object in manifest to check if is valid and if so will save a manifest generated from the user input. Uses WSGIContext to call self and start_response and returns a WSGI iterator. :params req: a swob.Request with an obj in path :raises: HttpException on errors """ try: vrs, account, container, obj = req.split_path(1, 4, True) except ValueError: return self.app(req.environ, start_response) if req.content_length > self.max_manifest_size: raise HTTPRequestEntityTooLarge("Manifest File > %d bytes" % self.max_manifest_size) if req.headers.get('X-Copy-From'): raise HTTPMethodNotAllowed( 'Multipart Manifest PUTs cannot be COPY requests') if req.content_length is None and \ req.headers.get('transfer-encoding', '').lower() != 'chunked': raise HTTPLengthRequired(request=req) parsed_data = parse_and_validate_input( req.body_file.read(self.max_manifest_size), req.path) problem_segments = [] if len(parsed_data) > self.max_manifest_segments: raise HTTPRequestEntityTooLarge( 'Number of segments must be <= %d' % self.max_manifest_segments) total_size = 0 out_content_type = req.accept.best_match(ACCEPTABLE_FORMATS) if not out_content_type: out_content_type = 'text/plain' data_for_storage = [] path2indices = defaultdict(list) for index, seg_dict in enumerate(parsed_data): path2indices[seg_dict['path']].append(index) def do_head(obj_name): obj_path = '/'.join( ['', vrs, account, get_valid_utf8_str(obj_name).lstrip('/')]) sub_req = make_subrequest( req.environ, path=obj_path + '?', # kill the query string method='HEAD', headers={'x-auth-token': req.headers.get('x-auth-token')}, agent='%(orig)s SLO MultipartPUT', swift_source='SLO') return obj_name, sub_req.get_response(self) def validate_seg_dict(seg_dict, head_seg_resp): if not head_seg_resp.is_success: problem_segments.append( [quote(obj_name), head_seg_resp.status]) return 0, None segment_length = head_seg_resp.content_length if seg_dict.get('range'): # Since we now know the length, we can normalize the # range. We know that there is exactly one range # requested since we checked that earlier in # parse_and_validate_input(). ranges = seg_dict['range'].ranges_for_length( head_seg_resp.content_length) if not ranges: problem_segments.append( [quote(obj_name), 'Unsatisfiable Range']) elif ranges == [(0, head_seg_resp.content_length)]: # Just one range, and it exactly matches the object. # Why'd we do this again? del seg_dict['range'] segment_length = head_seg_resp.content_length else: rng = ranges[0] seg_dict['range'] = '%d-%d' % (rng[0], rng[1] - 1) segment_length = rng[1] - rng[0] if segment_length < 1: problem_segments.append([ quote(obj_name), 'Too small; each segment must be at least 1 byte.' ]) if seg_dict['size_bytes'] is not None and \ seg_dict['size_bytes'] != head_seg_resp.content_length: problem_segments.append([quote(obj_name), 'Size Mismatch']) if seg_dict['etag'] is not None and \ seg_dict['etag'] != head_seg_resp.etag: problem_segments.append([quote(obj_name), 'Etag Mismatch']) if head_seg_resp.last_modified: last_modified = head_seg_resp.last_modified else: # shouldn't happen last_modified = datetime.now() last_modified_formatted = \ last_modified.strftime('%Y-%m-%dT%H:%M:%S.%f') seg_data = { 'name': '/' + seg_dict['path'].lstrip('/'), 'bytes': head_seg_resp.content_length, 'hash': head_seg_resp.etag, 'content_type': head_seg_resp.content_type, 'last_modified': last_modified_formatted } if seg_dict.get('range'): seg_data['range'] = seg_dict['range'] if config_true_value( head_seg_resp.headers.get('X-Static-Large-Object')): seg_data['sub_slo'] = True return segment_length, seg_data data_for_storage = [None] * len(parsed_data) with StreamingPile(self.concurrency) as pile: for obj_name, resp in pile.asyncstarmap( do_head, ((path, ) for path in path2indices)): for i in path2indices[obj_name]: segment_length, seg_data = validate_seg_dict( parsed_data[i], resp) data_for_storage[i] = seg_data total_size += segment_length if problem_segments: resp_body = get_response_body(out_content_type, {}, problem_segments) raise HTTPBadRequest(resp_body, content_type=out_content_type) slo_etag = md5() for seg_data in data_for_storage: if seg_data.get('range'): slo_etag.update('%s:%s;' % (seg_data['hash'], seg_data['range'])) else: slo_etag.update(seg_data['hash']) slo_etag = slo_etag.hexdigest() req.headers.update({ SYSMETA_SLO_ETAG: slo_etag, SYSMETA_SLO_SIZE: total_size, 'X-Static-Large-Object': 'True', }) json_data = json.dumps(data_for_storage) if six.PY3: json_data = json_data.encode('utf-8') req.body = json_data env = req.environ if not env.get('CONTENT_TYPE'): guessed_type, _junk = mimetypes.guess_type(req.path_info) env['CONTENT_TYPE'] = guessed_type or 'application/octet-stream' env['swift.content_type_overridden'] = True env['CONTENT_TYPE'] += ";swift_bytes=%d" % total_size def start_response_wrapper(status, headers, exc_info=None): for i, (header, _value) in enumerate(headers): if header.lower() == 'etag': headers[i] = ('Etag', '"%s"' % slo_etag) break return start_response(status, headers, exc_info) return self.app(env, start_response_wrapper)
def PUT(self, req): """HTTP PUT request handler.""" if req.if_none_match is not None and '*' not in req.if_none_match: # Sending an etag with if-none-match isn't currently supported return HTTPBadRequest(request=req, content_type='text/plain', body='If-None-Match only supports *') container_info = self.container_info( self.account_name, self.container_name, req) policy_index = req.headers.get('X-Backend-Storage-Policy-Index', container_info['storage_policy']) obj_ring = self.app.get_object_ring(policy_index) # pass the policy index to storage nodes via req header req.headers['X-Backend-Storage-Policy-Index'] = policy_index container_partition = container_info['partition'] containers = container_info['nodes'] req.acl = container_info['write_acl'] req.environ['swift_sync_key'] = container_info['sync_key'] object_versions = container_info['versions'] if 'swift.authorize' in req.environ: aresp = req.environ['swift.authorize'](req) if aresp: return aresp if not containers: return HTTPNotFound(request=req) try: ml = req.message_length() except ValueError as e: return HTTPBadRequest(request=req, content_type='text/plain', body=str(e)) except AttributeError as e: return HTTPNotImplemented(request=req, content_type='text/plain', body=str(e)) if ml is not None and ml > constraints.MAX_FILE_SIZE: return HTTPRequestEntityTooLarge(request=req) if 'x-delete-after' in req.headers: try: x_delete_after = int(req.headers['x-delete-after']) except ValueError: return HTTPBadRequest(request=req, content_type='text/plain', body='Non-integer X-Delete-After') req.headers['x-delete-at'] = normalize_delete_at_timestamp( time.time() + x_delete_after) partition, nodes = obj_ring.get_nodes( self.account_name, self.container_name, self.object_name) # do a HEAD request for container sync and checking object versions if 'x-timestamp' in req.headers or \ (object_versions and not req.environ.get('swift_versioned_copy')): # make sure proxy-server uses the right policy index _headers = {'X-Backend-Storage-Policy-Index': policy_index, 'X-Newest': 'True'} hreq = Request.blank(req.path_info, headers=_headers, environ={'REQUEST_METHOD': 'HEAD'}) hresp = self.GETorHEAD_base( hreq, _('Object'), obj_ring, partition, hreq.swift_entity_path) # Used by container sync feature if 'x-timestamp' in req.headers: try: req_timestamp = Timestamp(req.headers['X-Timestamp']) if hresp.environ and 'swift_x_timestamp' in hresp.environ and \ hresp.environ['swift_x_timestamp'] >= req_timestamp: return HTTPAccepted(request=req) except ValueError: return HTTPBadRequest( request=req, content_type='text/plain', body='X-Timestamp should be a UNIX timestamp float value; ' 'was %r' % req.headers['x-timestamp']) req.headers['X-Timestamp'] = req_timestamp.internal else: req.headers['X-Timestamp'] = Timestamp(time.time()).internal # Sometimes the 'content-type' header exists, but is set to None. content_type_manually_set = True detect_content_type = \ config_true_value(req.headers.get('x-detect-content-type')) if detect_content_type or not req.headers.get('content-type'): guessed_type, _junk = mimetypes.guess_type(req.path_info) req.headers['Content-Type'] = guessed_type or \ 'application/octet-stream' if detect_content_type: req.headers.pop('x-detect-content-type') else: content_type_manually_set = False error_response = check_object_creation(req, self.object_name) or \ check_content_type(req) if error_response: return error_response if object_versions and not req.environ.get('swift_versioned_copy'): if hresp.status_int != HTTP_NOT_FOUND: # This is a version manifest and needs to be handled # differently. First copy the existing data to a new object, # then write the data from this request to the version manifest # object. lcontainer = object_versions.split('/')[0] prefix_len = '%03x' % len(self.object_name) lprefix = prefix_len + self.object_name + '/' ts_source = hresp.environ.get('swift_x_timestamp') if ts_source is None: ts_source = time.mktime(time.strptime( hresp.headers['last-modified'], '%a, %d %b %Y %H:%M:%S GMT')) new_ts = Timestamp(ts_source).internal vers_obj_name = lprefix + new_ts copy_headers = { 'Destination': '%s/%s' % (lcontainer, vers_obj_name)} copy_environ = {'REQUEST_METHOD': 'COPY', 'swift_versioned_copy': True } copy_req = Request.blank(req.path_info, headers=copy_headers, environ=copy_environ) copy_resp = self.COPY(copy_req) if is_client_error(copy_resp.status_int): # missing container or bad permissions return HTTPPreconditionFailed(request=req) elif not is_success(copy_resp.status_int): # could not copy the data, bail return HTTPServiceUnavailable(request=req) reader = req.environ['wsgi.input'].read data_source = iter(lambda: reader(self.app.client_chunk_size), '') source_header = req.headers.get('X-Copy-From') source_resp = None if source_header: if req.environ.get('swift.orig_req_method', req.method) != 'POST': req.environ.setdefault('swift.log_info', []).append( 'x-copy-from:%s' % source_header) src_container_name, src_obj_name = check_copy_from_header(req) ver, acct, _rest = req.split_path(2, 3, True) if isinstance(acct, unicode): acct = acct.encode('utf-8') source_header = '/%s/%s/%s/%s' % (ver, acct, src_container_name, src_obj_name) source_req = req.copy_get() # make sure the source request uses it's container_info source_req.headers.pop('X-Backend-Storage-Policy-Index', None) source_req.path_info = source_header source_req.headers['X-Newest'] = 'true' orig_obj_name = self.object_name orig_container_name = self.container_name self.object_name = src_obj_name self.container_name = src_container_name sink_req = Request.blank(req.path_info, environ=req.environ, headers=req.headers) source_resp = self.GET(source_req) # This gives middlewares a way to change the source; for example, # this lets you COPY a SLO manifest and have the new object be the # concatenation of the segments (like what a GET request gives # the client), not a copy of the manifest file. hook = req.environ.get( 'swift.copy_hook', (lambda source_req, source_resp, sink_req: source_resp)) source_resp = hook(source_req, source_resp, sink_req) if source_resp.status_int >= HTTP_MULTIPLE_CHOICES: return source_resp self.object_name = orig_obj_name self.container_name = orig_container_name data_source = iter(source_resp.app_iter) sink_req.content_length = source_resp.content_length if sink_req.content_length is None: # This indicates a transfer-encoding: chunked source object, # which currently only happens because there are more than # CONTAINER_LISTING_LIMIT segments in a segmented object. In # this case, we're going to refuse to do the server-side copy. return HTTPRequestEntityTooLarge(request=req) if sink_req.content_length > constraints.MAX_FILE_SIZE: return HTTPRequestEntityTooLarge(request=req) sink_req.etag = source_resp.etag # we no longer need the X-Copy-From header del sink_req.headers['X-Copy-From'] if not content_type_manually_set: sink_req.headers['Content-Type'] = \ source_resp.headers['Content-Type'] if not config_true_value( sink_req.headers.get('x-fresh-metadata', 'false')): copy_headers_into(source_resp, sink_req) copy_headers_into(req, sink_req) # copy over x-static-large-object for POSTs and manifest copies if 'X-Static-Large-Object' in source_resp.headers and \ req.params.get('multipart-manifest') == 'get': sink_req.headers['X-Static-Large-Object'] = \ source_resp.headers['X-Static-Large-Object'] req = sink_req if 'x-delete-at' in req.headers: try: x_delete_at = normalize_delete_at_timestamp( int(req.headers['x-delete-at'])) if int(x_delete_at) < time.time(): return HTTPBadRequest( body='X-Delete-At in past', request=req, content_type='text/plain') except ValueError: return HTTPBadRequest(request=req, content_type='text/plain', body='Non-integer X-Delete-At') req.environ.setdefault('swift.log_info', []).append( 'x-delete-at:%s' % x_delete_at) delete_at_container = normalize_delete_at_timestamp( int(x_delete_at) / self.app.expiring_objects_container_divisor * self.app.expiring_objects_container_divisor) delete_at_part, delete_at_nodes = \ self.app.container_ring.get_nodes( self.app.expiring_objects_account, delete_at_container) else: delete_at_container = delete_at_part = delete_at_nodes = None node_iter = GreenthreadSafeIterator( self.iter_nodes_local_first(obj_ring, partition)) pile = GreenPile(len(nodes)) te = req.headers.get('transfer-encoding', '') chunked = ('chunked' in te) outgoing_headers = self._backend_requests( req, len(nodes), container_partition, containers, delete_at_container, delete_at_part, delete_at_nodes) for nheaders in outgoing_headers: # RFC2616:8.2.3 disallows 100-continue without a body if (req.content_length > 0) or chunked: nheaders['Expect'] = '100-continue' pile.spawn(self._connect_put_node, node_iter, partition, req.swift_entity_path, nheaders, self.app.logger.thread_locals) conns = [conn for conn in pile if conn] min_conns = quorum_size(len(nodes)) if req.if_none_match is not None and '*' in req.if_none_match: statuses = [conn.resp.status for conn in conns if conn.resp] if HTTP_PRECONDITION_FAILED in statuses: # If we find any copy of the file, it shouldn't be uploaded self.app.logger.debug( _('Object PUT returning 412, %(statuses)r'), {'statuses': statuses}) return HTTPPreconditionFailed(request=req) if len(conns) < min_conns: self.app.logger.error( _('Object PUT returning 503, %(conns)s/%(nodes)s ' 'required connections'), {'conns': len(conns), 'nodes': min_conns}) return HTTPServiceUnavailable(request=req) bytes_transferred = 0 try: with ContextPool(len(nodes)) as pool: for conn in conns: conn.failed = False conn.queue = Queue(self.app.put_queue_depth) pool.spawn(self._send_file, conn, req.path) while True: with ChunkReadTimeout(self.app.client_timeout): try: chunk = next(data_source) except StopIteration: if chunked: for conn in conns: conn.queue.put('0\r\n\r\n') break bytes_transferred += len(chunk) if bytes_transferred > constraints.MAX_FILE_SIZE: return HTTPRequestEntityTooLarge(request=req) for conn in list(conns): if not conn.failed: conn.queue.put( '%x\r\n%s\r\n' % (len(chunk), chunk) if chunked else chunk) else: conns.remove(conn) if len(conns) < min_conns: self.app.logger.error(_( 'Object PUT exceptions during' ' send, %(conns)s/%(nodes)s required connections'), {'conns': len(conns), 'nodes': min_conns}) return HTTPServiceUnavailable(request=req) for conn in conns: if conn.queue.unfinished_tasks: conn.queue.join() conns = [conn for conn in conns if not conn.failed] except ChunkReadTimeout as err: self.app.logger.warn( _('ERROR Client read timeout (%ss)'), err.seconds) self.app.logger.increment('client_timeouts') return HTTPRequestTimeout(request=req) except (Exception, Timeout): self.app.logger.exception( _('ERROR Exception causing client disconnect')) return HTTPClientDisconnect(request=req) if req.content_length and bytes_transferred < req.content_length: req.client_disconnect = True self.app.logger.warn( _('Client disconnected without sending enough data')) self.app.logger.increment('client_disconnects') return HTTPClientDisconnect(request=req) statuses, reasons, bodies, etags = self._get_put_responses(req, conns, nodes) if len(etags) > 1: self.app.logger.error( _('Object servers returned %s mismatched etags'), len(etags)) return HTTPServerError(request=req) etag = etags.pop() if len(etags) else None resp = self.best_response(req, statuses, reasons, bodies, _('Object PUT'), etag=etag) if source_header: resp.headers['X-Copied-From'] = quote( source_header.split('/', 3)[3]) if 'last-modified' in source_resp.headers: resp.headers['X-Copied-From-Last-Modified'] = \ source_resp.headers['last-modified'] copy_headers_into(req, resp) resp.last_modified = math.ceil( float(Timestamp(req.headers['X-Timestamp']))) return resp
def handle_multipart_put(self, req): """ Will handle the PUT of a SLO manifest. Heads every object in manifest to check if is valid and if so will save a manifest generated from the user input. :params req: a swob.Request with an obj in path :raises: HttpException on errors """ try: vrs, account, container, obj = req.split_path(1, 4, True) except ValueError: return self.app if req.content_length > self.max_manifest_size: raise HTTPRequestEntityTooLarge( "Manifest File > %d bytes" % self.max_manifest_size) if req.headers.get('X-Copy-From'): raise HTTPMethodNotAllowed( 'Multipart Manifest PUTs cannot be Copy requests') if req.content_length is None and \ req.headers.get('transfer-encoding', '').lower() != 'chunked': raise HTTPLengthRequired(request=req) parsed_data = parse_input(req.body_file.read(self.max_manifest_size)) problem_segments = [] if len(parsed_data) > self.max_manifest_segments: raise HTTPRequestEntityTooLarge( 'Number segments must be <= %d' % self.max_manifest_segments) total_size = 0 out_content_type = req.accept.best_match(ACCEPTABLE_FORMATS) if not out_content_type: out_content_type = 'text/plain' data_for_storage = [] for index, seg_dict in enumerate(parsed_data): obj_path = '/'.join( ['', vrs, account, seg_dict['path'].lstrip('/')]) try: seg_size = int(seg_dict['size_bytes']) except (ValueError, TypeError): raise HTTPBadRequest('Invalid Manifest File') if seg_size < self.min_segment_size and \ (index == 0 or index < len(parsed_data) - 1): raise HTTPBadRequest( 'Each segment, except the last, must be larger than ' '%d bytes.' % self.min_segment_size) new_env = req.environ.copy() if isinstance(obj_path, unicode): obj_path = obj_path.encode('utf-8') new_env['PATH_INFO'] = obj_path new_env['REQUEST_METHOD'] = 'HEAD' new_env['swift.source'] = 'SLO' del(new_env['wsgi.input']) del(new_env['QUERY_STRING']) new_env['CONTENT_LENGTH'] = 0 new_env['HTTP_USER_AGENT'] = \ '%s MultipartPUT' % req.environ.get('HTTP_USER_AGENT') head_seg_resp = \ Request.blank(obj_path, new_env).get_response(self.app) if head_seg_resp.status_int // 100 == 2: total_size += seg_size if seg_size != head_seg_resp.content_length: problem_segments.append([quote(obj_path), 'Size Mismatch']) if seg_dict['etag'] != head_seg_resp.etag: problem_segments.append([quote(obj_path), 'Etag Mismatch']) if head_seg_resp.last_modified: last_modified = head_seg_resp.last_modified else: # shouldn't happen last_modified = datetime.now() last_modified_formatted = \ last_modified.strftime('%Y-%m-%dT%H:%M:%S.%f') data_for_storage.append( {'name': '/' + seg_dict['path'].lstrip('/'), 'bytes': seg_size, 'hash': seg_dict['etag'], 'content_type': head_seg_resp.content_type, 'last_modified': last_modified_formatted}) else: problem_segments.append([quote(obj_path), head_seg_resp.status]) if problem_segments: resp_body = get_response_body( out_content_type, {}, problem_segments) raise HTTPBadRequest(resp_body, content_type=out_content_type) env = req.environ if not env.get('CONTENT_TYPE'): guessed_type, _junk = mimetypes.guess_type(req.path_info) env['CONTENT_TYPE'] = guessed_type or 'application/octet-stream' env['swift.content_type_overriden'] = True env['CONTENT_TYPE'] += ";swift_bytes=%d" % total_size env['HTTP_X_STATIC_LARGE_OBJECT'] = 'True' json_data = json.dumps(data_for_storage) env['CONTENT_LENGTH'] = str(len(json_data)) env['wsgi.input'] = StringIO(json_data) return self.app
def handle_multipart_put(self, req, start_response): """ Will handle the PUT of a SLO manifest. Heads every object in manifest to check if is valid and if so will save a manifest generated from the user input. Uses WSGIContext to call self and start_response and returns a WSGI iterator. :params req: a swob.Request with an obj in path :raises: HttpException on errors """ try: vrs, account, container, obj = req.split_path(1, 4, True) except ValueError: return self.app(req.environ, start_response) if req.content_length > self.max_manifest_size: raise HTTPRequestEntityTooLarge( "Manifest File > %d bytes" % self.max_manifest_size) if req.headers.get('X-Copy-From'): raise HTTPMethodNotAllowed( 'Multipart Manifest PUTs cannot be COPY requests') if req.content_length is None and \ req.headers.get('transfer-encoding', '').lower() != 'chunked': raise HTTPLengthRequired(request=req) parsed_data = parse_and_validate_input( req.body_file.read(self.max_manifest_size), req.path) problem_segments = [] if len(parsed_data) > self.max_manifest_segments: raise HTTPRequestEntityTooLarge( 'Number of segments must be <= %d' % self.max_manifest_segments) total_size = 0 out_content_type = req.accept.best_match(ACCEPTABLE_FORMATS) if not out_content_type: out_content_type = 'text/plain' data_for_storage = [] slo_etag = md5() last_obj_path = None for index, seg_dict in enumerate(parsed_data): obj_name = seg_dict['path'] if isinstance(obj_name, six.text_type): obj_name = obj_name.encode('utf-8') obj_path = '/'.join(['', vrs, account, obj_name.lstrip('/')]) if obj_path != last_obj_path: last_obj_path = obj_path sub_req = make_subrequest( req.environ, path=obj_path + '?', # kill the query string method='HEAD', headers={'x-auth-token': req.headers.get('x-auth-token')}, agent='%(orig)s SLO MultipartPUT', swift_source='SLO') head_seg_resp = sub_req.get_response(self) if head_seg_resp.is_success: segment_length = head_seg_resp.content_length if seg_dict.get('range'): # Since we now know the length, we can normalize the # range. We know that there is exactly one range # requested since we checked that earlier in # parse_and_validate_input(). ranges = seg_dict['range'].ranges_for_length( head_seg_resp.content_length) if not ranges: problem_segments.append([quote(obj_name), 'Unsatisfiable Range']) elif ranges == [(0, head_seg_resp.content_length)]: # Just one range, and it exactly matches the object. # Why'd we do this again? del seg_dict['range'] segment_length = head_seg_resp.content_length else: rng = ranges[0] seg_dict['range'] = '%d-%d' % (rng[0], rng[1] - 1) segment_length = rng[1] - rng[0] if segment_length < 1: problem_segments.append( [quote(obj_name), 'Too small; each segment must be at least 1 byte.']) total_size += segment_length if seg_dict['size_bytes'] is not None and \ seg_dict['size_bytes'] != head_seg_resp.content_length: problem_segments.append([quote(obj_name), 'Size Mismatch']) if seg_dict['etag'] is None or \ seg_dict['etag'] == head_seg_resp.etag: if seg_dict.get('range'): slo_etag.update('%s:%s;' % (head_seg_resp.etag, seg_dict['range'])) else: slo_etag.update(head_seg_resp.etag) else: problem_segments.append([quote(obj_name), 'Etag Mismatch']) if head_seg_resp.last_modified: last_modified = head_seg_resp.last_modified else: # shouldn't happen last_modified = datetime.now() last_modified_formatted = \ last_modified.strftime('%Y-%m-%dT%H:%M:%S.%f') seg_data = {'name': '/' + seg_dict['path'].lstrip('/'), 'bytes': head_seg_resp.content_length, 'hash': head_seg_resp.etag, 'content_type': head_seg_resp.content_type, 'last_modified': last_modified_formatted} if seg_dict.get('range'): seg_data['range'] = seg_dict['range'] if config_true_value( head_seg_resp.headers.get('X-Static-Large-Object')): seg_data['sub_slo'] = True data_for_storage.append(seg_data) else: problem_segments.append([quote(obj_name), head_seg_resp.status]) if problem_segments: resp_body = get_response_body( out_content_type, {}, problem_segments) raise HTTPBadRequest(resp_body, content_type=out_content_type) env = req.environ if not env.get('CONTENT_TYPE'): guessed_type, _junk = mimetypes.guess_type(req.path_info) env['CONTENT_TYPE'] = guessed_type or 'application/octet-stream' env['swift.content_type_overridden'] = True env['CONTENT_TYPE'] += ";swift_bytes=%d" % total_size env['HTTP_X_STATIC_LARGE_OBJECT'] = 'True' json_data = json.dumps(data_for_storage) if six.PY3: json_data = json_data.encode('utf-8') env['CONTENT_LENGTH'] = str(len(json_data)) env['wsgi.input'] = BytesIO(json_data) slo_put_context = SloPutContext(self, slo_etag) return slo_put_context.handle_slo_put(req, start_response)
def handle_extract_iter(self, req, compress_type, out_content_type='text/plain'): """ A generator that can be assigned to a swob Response's app_iter which, when iterated over, will extract and PUT the objects pulled from the request body. Will occasionally yield whitespace while request is being processed. When the request is completed will yield a response body that can be parsed to determine success. See above documentation for details. :params req: a swob Request :params compress_type: specifying the compression type of the tar. Accepts '', 'gz', or 'bz2' """ resp_dict = { 'Response Status': HTTPCreated().status, 'Response Body': '', 'Number Files Created': 0 } failed_files = [] last_yield = time() if out_content_type and out_content_type.endswith('/xml'): to_yield = b'<?xml version="1.0" encoding="UTF-8"?>\n' else: to_yield = b' ' separator = b'' containers_accessed = set() req.environ['eventlet.minimum_write_chunk_size'] = 0 try: if not out_content_type: raise HTTPNotAcceptable(request=req) if req.content_length is None and \ req.headers.get('transfer-encoding', '').lower() != 'chunked': raise HTTPLengthRequired(request=req) try: vrs, account, extract_base = req.split_path(2, 3, True) except ValueError: raise HTTPNotFound(request=req) extract_base = extract_base or '' extract_base = extract_base.rstrip('/') tar = tarfile.open(mode='r|' + compress_type, fileobj=req.body_file) failed_response_type = HTTPBadRequest containers_created = 0 while True: if last_yield + self.yield_frequency < time(): last_yield = time() yield to_yield to_yield, separator = b' ', b'\r\n\r\n' tar_info = tar.next() if tar_info is None or \ len(failed_files) >= self.max_failed_extractions: break if tar_info.isfile(): obj_path = tar_info.name if not six.PY2: obj_path = obj_path.encode('utf-8', 'surrogateescape') obj_path = bytes_to_wsgi(obj_path) if obj_path.startswith('./'): obj_path = obj_path[2:] obj_path = obj_path.lstrip('/') if extract_base: obj_path = extract_base + '/' + obj_path if '/' not in obj_path: continue # ignore base level file destination = '/'.join(['', vrs, account, obj_path]) container = obj_path.split('/', 1)[0] if not constraints.check_utf8(wsgi_to_str(destination)): failed_files.append([ wsgi_quote(obj_path[:self.max_path_length]), HTTPPreconditionFailed().status ]) continue if tar_info.size > constraints.MAX_FILE_SIZE: failed_files.append([ wsgi_quote(obj_path[:self.max_path_length]), HTTPRequestEntityTooLarge().status ]) continue container_failure = None if container not in containers_accessed: cont_path = '/'.join(['', vrs, account, container]) try: if self.create_container(req, cont_path): containers_created += 1 if containers_created > self.max_containers: raise HTTPBadRequest( 'More than %d containers to create ' 'from tar.' % self.max_containers) except CreateContainerError as err: # the object PUT to this container still may # succeed if acls are set container_failure = [ wsgi_quote(cont_path[:self.max_path_length]), err.status ] if err.status_int == HTTP_UNAUTHORIZED: raise HTTPUnauthorized(request=req) except ValueError: failed_files.append([ wsgi_quote(obj_path[:self.max_path_length]), HTTPBadRequest().status ]) continue tar_file = tar.extractfile(tar_info) create_headers = { 'Content-Length': tar_info.size, 'X-Auth-Token': req.headers.get('X-Auth-Token'), } # Copy some whitelisted headers to the subrequest for k, v in req.headers.items(): if ((k.lower() in ('x-delete-at', 'x-delete-after')) or is_user_meta('object', k)): create_headers[k] = v create_obj_req = make_subrequest( req.environ, method='PUT', path=wsgi_quote(destination), headers=create_headers, agent='%(orig)s BulkExpand', swift_source='EA') create_obj_req.environ['wsgi.input'] = tar_file for pax_key, pax_value in tar_info.pax_headers.items(): header_name = pax_key_to_swift_header(pax_key) if header_name: # Both pax_key and pax_value are unicode # strings; the key is already UTF-8 encoded, but # we still have to encode the value. create_obj_req.headers[header_name] = \ pax_value.encode("utf-8") resp = create_obj_req.get_response(self.app) containers_accessed.add(container) if resp.is_success: resp_dict['Number Files Created'] += 1 else: if container_failure: failed_files.append(container_failure) if resp.status_int == HTTP_UNAUTHORIZED: failed_files.append([ wsgi_quote(obj_path[:self.max_path_length]), HTTPUnauthorized().status ]) raise HTTPUnauthorized(request=req) if resp.status_int // 100 == 5: failed_response_type = HTTPBadGateway failed_files.append([ wsgi_quote(obj_path[:self.max_path_length]), resp.status ]) if failed_files: resp_dict['Response Status'] = failed_response_type().status elif not resp_dict['Number Files Created']: resp_dict['Response Status'] = HTTPBadRequest().status resp_dict['Response Body'] = 'Invalid Tar File: No Valid Files' except HTTPException as err: resp_dict['Response Status'] = err.status resp_dict['Response Body'] = err.body.decode('utf-8') except (tarfile.TarError, zlib.error) as tar_error: resp_dict['Response Status'] = HTTPBadRequest().status resp_dict['Response Body'] = 'Invalid Tar File: %s' % tar_error except Exception: self.logger.exception('Error in extract archive.') resp_dict['Response Status'] = HTTPServerError().status yield separator + get_response_body(out_content_type, resp_dict, failed_files, 'extract')
def PUT(self, req): """HTTP PUT request handler.""" if req.if_none_match is not None and '*' not in req.if_none_match: # Sending an etag with if-none-match isn't currently supported return HTTPBadRequest(request=req, content_type='text/plain', body='If-None-Match only supports *') container_info = self.container_info(self.account_name, self.container_name, req) policy_index = req.headers.get('X-Backend-Storage-Policy-Index', container_info['storage_policy']) obj_ring = self.app.get_object_ring(policy_index) # pass the policy index to storage nodes via req header req.headers['X-Backend-Storage-Policy-Index'] = policy_index container_partition = container_info['partition'] containers = container_info['nodes'] req.acl = container_info['write_acl'] req.environ['swift_sync_key'] = container_info['sync_key'] object_versions = container_info['versions'] if 'swift.authorize' in req.environ: aresp = req.environ['swift.authorize'](req) if aresp: return aresp if not containers: return HTTPNotFound(request=req) # Sometimes the 'content-type' header exists, but is set to None. content_type_manually_set = True detect_content_type = \ config_true_value(req.headers.get('x-detect-content-type')) if detect_content_type or not req.headers.get('content-type'): guessed_type, _junk = mimetypes.guess_type(req.path_info) req.headers['Content-Type'] = guessed_type or \ 'application/octet-stream' if detect_content_type: req.headers.pop('x-detect-content-type') else: content_type_manually_set = False error_response = check_object_creation(req, self.object_name) or \ check_content_type(req) if error_response: return error_response partition, nodes = obj_ring.get_nodes(self.account_name, self.container_name, self.object_name) #################################### CHANGED_CODE ############################################################ # Change the nodes list to contain only one dictionary item instead of the original 3 returned by the ring. d = dict() # d[partition] = nodes[1:] # f.write(str(d)+"\n") # f.close() print("===Original Nodes===") print(nodes) temp_nodes = [] flag = 0 f = open("/home/hduser/swift/swift/proxy/controllers/spindowndevices", "r") sdlist = f.read().split("\n") print("===Spun down devices===:", sdlist) f.close() upnodes = [item for item in nodes if item['device'] not in sdlist] downnodes = [item for item in nodes if item['device'] in sdlist] temp_nodes = upnodes if (len(downnodes) > 0): d = ast.literal_eval( open("/home/hduser/swift/swift/proxy/controllers/nodes.txt", "r").read()) # d_temp=pickle.load("/home/hduser/swift/proxy/controllers/nodes.p","rb") # print("===Current dict===:",d) for item in downnodes: if (partition in d): d[partition].append(item) # print("===Modified dict===:",d) else: d[partition] = [item] # print("===Modified dict===:",d) # pickle.dump(d,open("/home/hduser/nodes.p","wb")) # print("Before writing:",d) fo = open("/home/hduser/swift/swift/proxy/controllers/nodes.txt", "w") fo.write(str(d) + "\n") fo.close() # pickle.dump(d,open("/home/hduser/swift/swift/proxy/controllers/nodes.p","wb")) ## Old method, IGNORE # for item in nodes: # device = item['device'] # if(device not in sdlist): # # if(os.path.ismount("path")) # temp_nodes.append(item) # flag = 1 # break # else: # pickle.dump(d,open("/home/hduser/nodes.p","wb")) # # d = pickle.load(open("/home/hduser/nodes.p","rb")) # import ast # d = ast.literal_eval(open("/home/hduser/nodes.txt","r").read()) # print("===Current dict===:",d) # if(partition in d): # print("In IF") # d[partition].append(item) # print("===Modified dict===:",d) # else: # print("In ELSE") # d[partition] = [item] # print("===Modified dict===:",d) # pickle.dump(d,open("/home/hduser/nodes.p","wb")) # fo = open("/home/hduser/nodes.txt","w") # fo.write(str(d)+"\n") # Code to spin up a device if none are running already. if (len(upnodes) == 0): dev = nodes[0]['device'] print("===ALL NODES DOWN===") print("===Mounting device===", dev) os.system("mount /dev/" + str(dev)) print('===In controller PUT===:') print("===Partition===", partition) nodes = temp_nodes print('===In controller PUT===:') print("===Partition===", partition) nodes = temp_nodes print("===Nodes===:", nodes) check_ssd() ############################################ CHANGED_CODE ######################################################## # do a HEAD request for checking object versions if object_versions and not req.environ.get('swift_versioned_copy'): # make sure proxy-server uses the right policy index _headers = { 'X-Backend-Storage-Policy-Index': policy_index, 'X-Newest': 'True' } hreq = Request.blank(req.path_info, headers=_headers, environ={'REQUEST_METHOD': 'HEAD'}) hresp = self.GETorHEAD_base(hreq, _('Object'), obj_ring, partition, hreq.swift_entity_path) # Used by container sync feature if 'x-timestamp' in req.headers: try: req_timestamp = Timestamp(req.headers['X-Timestamp']) except ValueError: return HTTPBadRequest( request=req, content_type='text/plain', body='X-Timestamp should be a UNIX timestamp float value; ' 'was %r' % req.headers['x-timestamp']) req.headers['X-Timestamp'] = req_timestamp.internal else: req.headers['X-Timestamp'] = Timestamp(time.time()).internal if object_versions and not req.environ.get('swift_versioned_copy'): is_manifest = 'X-Object-Manifest' in req.headers or \ 'X-Object-Manifest' in hresp.headers if hresp.status_int != HTTP_NOT_FOUND and not is_manifest: # This is a version manifest and needs to be handled # differently. First copy the existing data to a new object, # then write the data from this request to the version manifest # object. lcontainer = object_versions.split('/')[0] prefix_len = '%03x' % len(self.object_name) lprefix = prefix_len + self.object_name + '/' ts_source = hresp.environ.get('swift_x_timestamp') if ts_source is None: ts_source = time.mktime( time.strptime(hresp.headers['last-modified'], '%a, %d %b %Y %H:%M:%S GMT')) new_ts = Timestamp(ts_source).internal vers_obj_name = lprefix + new_ts copy_headers = { 'Destination': '%s/%s' % (lcontainer, vers_obj_name) } copy_environ = { 'REQUEST_METHOD': 'COPY', 'swift_versioned_copy': True } copy_req = Request.blank(req.path_info, headers=copy_headers, environ=copy_environ) copy_resp = self.COPY(copy_req) if is_client_error(copy_resp.status_int): # missing container or bad permissions return HTTPPreconditionFailed(request=req) elif not is_success(copy_resp.status_int): # could not copy the data, bail return HTTPServiceUnavailable(request=req) reader = req.environ['wsgi.input'].read data_source = iter(lambda: reader(self.app.client_chunk_size), '') source_header = req.headers.get('X-Copy-From') source_resp = None if source_header: if req.environ.get('swift.orig_req_method', req.method) != 'POST': req.environ.setdefault('swift.log_info', []).append( 'x-copy-from:%s' % source_header) ver, acct, _rest = req.split_path(2, 3, True) src_account_name = req.headers.get('X-Copy-From-Account', None) if src_account_name: src_account_name = check_account_format(req, src_account_name) else: src_account_name = acct src_container_name, src_obj_name = check_copy_from_header(req) source_header = '/%s/%s/%s/%s' % (ver, src_account_name, src_container_name, src_obj_name) source_req = req.copy_get() # make sure the source request uses it's container_info source_req.headers.pop('X-Backend-Storage-Policy-Index', None) source_req.path_info = source_header source_req.headers['X-Newest'] = 'true' orig_obj_name = self.object_name orig_container_name = self.container_name orig_account_name = self.account_name self.object_name = src_obj_name self.container_name = src_container_name self.account_name = src_account_name sink_req = Request.blank(req.path_info, environ=req.environ, headers=req.headers) source_resp = self.GET(source_req) # This gives middlewares a way to change the source; for example, # this lets you COPY a SLO manifest and have the new object be the # concatenation of the segments (like what a GET request gives # the client), not a copy of the manifest file. hook = req.environ.get( 'swift.copy_hook', (lambda source_req, source_resp, sink_req: source_resp)) source_resp = hook(source_req, source_resp, sink_req) if source_resp.status_int >= HTTP_MULTIPLE_CHOICES: return source_resp self.object_name = orig_obj_name self.container_name = orig_container_name self.account_name = orig_account_name data_source = iter(source_resp.app_iter) sink_req.content_length = source_resp.content_length if sink_req.content_length is None: # This indicates a transfer-encoding: chunked source object, # which currently only happens because there are more than # CONTAINER_LISTING_LIMIT segments in a segmented object. In # this case, we're going to refuse to do the server-side copy. return HTTPRequestEntityTooLarge(request=req) if sink_req.content_length > constraints.MAX_FILE_SIZE: return HTTPRequestEntityTooLarge(request=req) sink_req.etag = source_resp.etag # we no longer need the X-Copy-From header del sink_req.headers['X-Copy-From'] if 'X-Copy-From-Account' in sink_req.headers: del sink_req.headers['X-Copy-From-Account'] if not content_type_manually_set: sink_req.headers['Content-Type'] = \ source_resp.headers['Content-Type'] if config_true_value( sink_req.headers.get('x-fresh-metadata', 'false')): # post-as-copy: ignore new sysmeta, copy existing sysmeta condition = lambda k: is_sys_meta('object', k) remove_items(sink_req.headers, condition) copy_header_subset(source_resp, sink_req, condition) else: # copy/update existing sysmeta and user meta copy_headers_into(source_resp, sink_req) copy_headers_into(req, sink_req) # copy over x-static-large-object for POSTs and manifest copies if 'X-Static-Large-Object' in source_resp.headers and \ req.params.get('multipart-manifest') == 'get': sink_req.headers['X-Static-Large-Object'] = \ source_resp.headers['X-Static-Large-Object'] req = sink_req req, delete_at_container, delete_at_part, \ delete_at_nodes = self._config_obj_expiration(req) node_iter = GreenthreadSafeIterator( self.iter_nodes_local_first(obj_ring, partition)) pile = GreenPile(len(nodes)) te = req.headers.get('transfer-encoding', '') chunked = ('chunked' in te) outgoing_headers = self._backend_requests( req, len(nodes), container_partition, containers, delete_at_container, delete_at_part, delete_at_nodes) for nheaders in outgoing_headers: # RFC2616:8.2.3 disallows 100-continue without a body if (req.content_length > 0) or chunked: nheaders['Expect'] = '100-continue' ################################# CHANGED_CODE ################################################################### # Replaced node_iter by nodes in the following line to make sure that a new list with different order isnt used. # Change from node_iter to nodes to make sure it writes to the same device. # Without this, it gets a new list of nodes from the ring in a different order and connects to the first one. pile.spawn(self._connect_put_node, nodes, partition, req.swift_entity_path, nheaders, self.app.logger.thread_locals) ################################# CHANGED_CODE ################################################################### conns = [conn for conn in pile if conn] min_conns = quorum_size(len(nodes)) if req.if_none_match is not None and '*' in req.if_none_match: statuses = [conn.resp.status for conn in conns if conn.resp] if HTTP_PRECONDITION_FAILED in statuses: # If we find any copy of the file, it shouldn't be uploaded self.app.logger.debug( _('Object PUT returning 412, %(statuses)r'), {'statuses': statuses}) return HTTPPreconditionFailed(request=req) if any(conn for conn in conns if conn.resp and conn.resp.status == HTTP_CONFLICT): timestamps = [ HeaderKeyDict( conn.resp.getheaders()).get('X-Backend-Timestamp') for conn in conns if conn.resp ] self.app.logger.debug( _('Object PUT returning 202 for 409: ' '%(req_timestamp)s <= %(timestamps)r'), { 'req_timestamp': req.timestamp.internal, 'timestamps': ', '.join(timestamps) }) return HTTPAccepted(request=req) if len(conns) < min_conns: self.app.logger.error( _('Object PUT returning 503, %(conns)s/%(nodes)s ' 'required connections'), { 'conns': len(conns), 'nodes': min_conns }) return HTTPServiceUnavailable(request=req) bytes_transferred = 0 try: with ContextPool(len(nodes)) as pool: for conn in conns: conn.failed = False conn.queue = Queue(self.app.put_queue_depth) pool.spawn(self._send_file, conn, req.path) while True: with ChunkReadTimeout(self.app.client_timeout): try: chunk = next(data_source) except StopIteration: if chunked: for conn in conns: conn.queue.put('0\r\n\r\n') break bytes_transferred += len(chunk) if bytes_transferred > constraints.MAX_FILE_SIZE: return HTTPRequestEntityTooLarge(request=req) for conn in list(conns): if not conn.failed: conn.queue.put('%x\r\n%s\r\n' % (len(chunk), chunk) if chunked else chunk) else: conns.remove(conn) if len(conns) < min_conns: self.app.logger.error( _('Object PUT exceptions during' ' send, %(conns)s/%(nodes)s required connections' ), { 'conns': len(conns), 'nodes': min_conns }) return HTTPServiceUnavailable(request=req) for conn in conns: if conn.queue.unfinished_tasks: conn.queue.join() conns = [conn for conn in conns if not conn.failed] except ChunkReadTimeout as err: self.app.logger.warn(_('ERROR Client read timeout (%ss)'), err.seconds) self.app.logger.increment('client_timeouts') return HTTPRequestTimeout(request=req) except (Exception, Timeout): self.app.logger.exception( _('ERROR Exception causing client disconnect')) return HTTPClientDisconnect(request=req) if req.content_length and bytes_transferred < req.content_length: req.client_disconnect = True self.app.logger.warn( _('Client disconnected without sending enough data')) self.app.logger.increment('client_disconnects') return HTTPClientDisconnect(request=req) statuses, reasons, bodies, etags = self._get_put_responses( req, conns, nodes) if len(etags) > 1: self.app.logger.error( _('Object servers returned %s mismatched etags'), len(etags)) return HTTPServerError(request=req) etag = etags.pop() if len(etags) else None resp = self.best_response(req, statuses, reasons, bodies, _('Object PUT'), etag=etag) if source_header: acct, path = source_header.split('/', 3)[2:4] resp.headers['X-Copied-From-Account'] = quote(acct) resp.headers['X-Copied-From'] = quote(path) if 'last-modified' in source_resp.headers: resp.headers['X-Copied-From-Last-Modified'] = \ source_resp.headers['last-modified'] copy_headers_into(req, resp) resp.last_modified = math.ceil( float(Timestamp(req.headers['X-Timestamp']))) return resp
def __call__(self, request): if request.method not in ("POST", "PUT"): return self.app try: ver, account, container, obj = request.split_path( 2, 4, rest_with_last=True) except ValueError: return self.app if not container: # account request, so we pay attention to the quotas new_quota = request.headers.get( 'X-Account-Meta-Quota-Bytes') remove_quota = request.headers.get( 'X-Remove-Account-Meta-Quota-Bytes') else: # container or object request; even if the quota headers are set # in the request, they're meaningless new_quota = remove_quota = None if remove_quota: new_quota = 0 # X-Remove dominates if both are present if request.environ.get('reseller_request') is True: if new_quota and not new_quota.isdigit(): return HTTPBadRequest() return self.app # deny quota set for non-reseller if new_quota is not None: return HTTPForbidden() if request.method == "POST" or not obj: return self.app content_length = (request.content_length or 0) account_info = get_account_info(request.environ, self.app) if not account_info or not account_info['bytes']: return self.app try: quota = int(account_info['meta'].get('quota-bytes', -1)) except ValueError: return self.app if quota < 0: return self.app new_size = int(account_info['bytes']) + content_length if quota < new_size: resp = HTTPRequestEntityTooLarge(body='Upload exceeds quota.') if 'swift.authorize' in request.environ: orig_authorize = request.environ['swift.authorize'] def reject_authorize(*args, **kwargs): aresp = orig_authorize(*args, **kwargs) if aresp: return aresp return resp request.environ['swift.authorize'] = reject_authorize else: return resp return self.app
def handle_extract_iter(self, req, compress_type, out_content_type='text/plain'): """ A generator that can be assigned to a swob Response's app_iter which, when iterated over, will extract and PUT the objects pulled from the request body. Will occasionally yield whitespace while request is being processed. When the request is completed will yield a response body that can be parsed to determine success. See above documentation for details. :params req: a swob Request :params compress_type: specifying the compression type of the tar. Accepts '', 'gz', or 'bz2' """ resp_dict = { 'Response Status': HTTPCreated().status, 'Response Body': '', 'Number Files Created': 0 } failed_files = [] last_yield = time() separator = '' containers_accessed = set() try: if not out_content_type: raise HTTPNotAcceptable(request=req) if out_content_type.endswith('/xml'): yield '<?xml version="1.0" encoding="UTF-8"?>\n' if req.content_length is None and \ req.headers.get('transfer-encoding', '').lower() != 'chunked': raise HTTPLengthRequired(request=req) try: vrs, account, extract_base = req.split_path(2, 3, True) except ValueError: raise HTTPNotFound(request=req) extract_base = extract_base or '' extract_base = extract_base.rstrip('/') tar = tarfile.open(mode='r|' + compress_type, fileobj=req.body_file) failed_response_type = HTTPBadRequest req.environ['eventlet.minimum_write_chunk_size'] = 0 containers_created = 0 while True: if last_yield + self.yield_frequency < time(): separator = '\r\n\r\n' last_yield = time() yield ' ' tar_info = tar.next() if tar_info is None or \ len(failed_files) >= self.max_failed_extractions: break if tar_info.isfile(): obj_path = tar_info.name if obj_path.startswith('./'): obj_path = obj_path[2:] obj_path = obj_path.lstrip('/') if extract_base: obj_path = extract_base + '/' + obj_path if '/' not in obj_path: continue # ignore base level file destination = '/'.join(['', vrs, account, obj_path]) container = obj_path.split('/', 1)[0] if not check_utf8(destination): failed_files.append([ quote(obj_path[:MAX_PATH_LENGTH]), HTTPPreconditionFailed().status ]) continue if tar_info.size > MAX_FILE_SIZE: failed_files.append([ quote(obj_path[:MAX_PATH_LENGTH]), HTTPRequestEntityTooLarge().status ]) continue container_failure = None if container not in containers_accessed: cont_path = '/'.join(['', vrs, account, container]) try: if self.create_container(req, cont_path): containers_created += 1 if containers_created > self.max_containers: raise HTTPBadRequest( 'More than %d containers to create ' 'from tar.' % self.max_containers) except CreateContainerError, err: # the object PUT to this container still may # succeed if acls are set container_failure = [ quote(cont_path[:MAX_PATH_LENGTH]), err.status ] if err.status_int == HTTP_UNAUTHORIZED: raise HTTPUnauthorized(request=req) except ValueError: failed_files.append([ quote(obj_path[:MAX_PATH_LENGTH]), HTTPBadRequest().status ]) continue
def _handle_copy_request(self, req): """ This method handles copying objects based on values set in the headers 'X-Copy-From' and 'X-Copy-From-Account' This method was added as part of the refactoring of the PUT method and the functionality is expected to be moved to middleware """ if req.environ.get('swift.orig_req_method', req.method) != 'POST': req.environ.setdefault('swift.log_info', []).append( 'x-copy-from:%s' % req.headers['X-Copy-From']) ver, acct, _rest = req.split_path(2, 3, True) src_account_name = req.headers.get('X-Copy-From-Account', None) if src_account_name: src_account_name = check_account_format(req, src_account_name) else: src_account_name = acct src_container_name, src_obj_name = check_copy_from_header(req) source_header = '/%s/%s/%s/%s' % (ver, src_account_name, src_container_name, src_obj_name) source_req = req.copy_get() # make sure the source request uses it's container_info source_req.headers.pop('X-Backend-Storage-Policy-Index', None) source_req.path_info = source_header source_req.headers['X-Newest'] = 'true' orig_obj_name = self.object_name orig_container_name = self.container_name orig_account_name = self.account_name sink_req = Request.blank(req.path_info, environ=req.environ, headers=req.headers) self.object_name = src_obj_name self.container_name = src_container_name self.account_name = src_account_name source_resp = self.GET(source_req) # This gives middlewares a way to change the source; for example, # this lets you COPY a SLO manifest and have the new object be the # concatenation of the segments (like what a GET request gives # the client), not a copy of the manifest file. hook = req.environ.get( 'swift.copy_hook', (lambda source_req, source_resp, sink_req: source_resp)) source_resp = hook(source_req, source_resp, sink_req) # reset names self.object_name = orig_obj_name self.container_name = orig_container_name self.account_name = orig_account_name if source_resp.status_int >= HTTP_MULTIPLE_CHOICES: # this is a bit of ugly code, but I'm willing to live with it # until copy request handling moves to middleware return source_resp, None, None, None if source_resp.content_length is None: # This indicates a transfer-encoding: chunked source object, # which currently only happens because there are more than # CONTAINER_LISTING_LIMIT segments in a segmented object. In # this case, we're going to refuse to do the server-side copy. raise HTTPRequestEntityTooLarge(request=req) if source_resp.content_length > constraints.MAX_FILE_SIZE: raise HTTPRequestEntityTooLarge(request=req) data_source = iter(source_resp.app_iter) sink_req.content_length = source_resp.content_length sink_req.etag = source_resp.etag # we no longer need the X-Copy-From header del sink_req.headers['X-Copy-From'] if 'X-Copy-From-Account' in sink_req.headers: del sink_req.headers['X-Copy-From-Account'] if not req.content_type_manually_set: sink_req.headers['Content-Type'] = \ source_resp.headers['Content-Type'] if config_true_value(sink_req.headers.get('x-fresh-metadata', 'false')): # post-as-copy: ignore new sysmeta, copy existing sysmeta condition = lambda k: is_sys_meta('object', k) remove_items(sink_req.headers, condition) copy_header_subset(source_resp, sink_req, condition) else: # copy/update existing sysmeta and user meta copy_headers_into(source_resp, sink_req) copy_headers_into(req, sink_req) # copy over x-static-large-object for POSTs and manifest copies if 'X-Static-Large-Object' in source_resp.headers and \ req.params.get('multipart-manifest') == 'get': sink_req.headers['X-Static-Large-Object'] = \ source_resp.headers['X-Static-Large-Object'] req = sink_req def update_response(req, resp): acct, path = source_resp.environ['PATH_INFO'].split('/', 3)[2:4] resp.headers['X-Copied-From-Account'] = quote(acct) resp.headers['X-Copied-From'] = quote(path) if 'last-modified' in source_resp.headers: resp.headers['X-Copied-From-Last-Modified'] = \ source_resp.headers['last-modified'] copy_headers_into(req, resp) return resp # this is a bit of ugly code, but I'm willing to live with it # until copy request handling moves to middleware return None, req, data_source, update_response
def handle_extract(self, req, compress_type): """ :params req: a swob Request :params compress_type: specifying the compression type of the tar. Accepts '', 'gz, or 'bz2' :raises HTTPException: on unhandled errors :returns: a swob response to request """ success_count = 0 failed_files = [] existing_containers = set() out_content_type = req.accept.best_match(ACCEPTABLE_FORMATS) if not out_content_type: return HTTPNotAcceptable(request=req) if req.content_length is None and \ req.headers.get('transfer-encoding', '').lower() != 'chunked': return HTTPLengthRequired(request=req) try: vrs, account, extract_base = req.split_path(2, 3, True) except ValueError: return HTTPNotFound(request=req) extract_base = extract_base or '' extract_base = extract_base.rstrip('/') try: tar = tarfile.open(mode='r|' + compress_type, fileobj=req.body_file) while True: tar_info = tar.next() if tar_info is None or \ len(failed_files) >= self.max_failed_extractions: break if tar_info.isfile(): obj_path = tar_info.name if obj_path.startswith('./'): obj_path = obj_path[2:] obj_path = obj_path.lstrip('/') if extract_base: obj_path = extract_base + '/' + obj_path if '/' not in obj_path: continue # ignore base level file destination = '/'.join(['', vrs, account, obj_path]) container = obj_path.split('/', 1)[0] if not check_utf8(destination): failed_files.append([ quote(destination[:MAX_PATH_LENGTH]), HTTPPreconditionFailed().status ]) continue if tar_info.size > MAX_FILE_SIZE: failed_files.append([ quote(destination[:MAX_PATH_LENGTH]), HTTPRequestEntityTooLarge().status ]) continue if container not in existing_containers: try: self.create_container( req, '/'.join(['', vrs, account, container])) existing_containers.add(container) except CreateContainerError, err: if err.status_int == HTTP_UNAUTHORIZED: return HTTPUnauthorized(request=req) failed_files.append([ quote(destination[:MAX_PATH_LENGTH]), err.status ]) continue except ValueError: failed_files.append([ quote(destination[:MAX_PATH_LENGTH]), HTTP_BAD_REQUEST ]) continue if len(existing_containers) > self.max_containers: return HTTPBadRequest( 'More than %d base level containers in tar.' % self.max_containers)
def handle_multipart_put(self, req, start_response): """ Will handle the PUT of a SLO manifest. Heads every object in manifest to check if is valid and if so will save a manifest generated from the user input. Uses WSGIContext to call self and start_response and returns a WSGI iterator. :params req: a swob.Request with an obj in path :raises: HttpException on errors """ try: vrs, account, container, obj = req.split_path(1, 4, True) except ValueError: return self.app(req.environ, start_response) if req.content_length > self.max_manifest_size: raise HTTPRequestEntityTooLarge("Manifest File > %d bytes" % self.max_manifest_size) if req.headers.get('X-Copy-From'): raise HTTPMethodNotAllowed( 'Multipart Manifest PUTs cannot be COPY requests') if req.content_length is None and \ req.headers.get('transfer-encoding', '').lower() != 'chunked': raise HTTPLengthRequired(request=req) parsed_data = parse_input(req.body_file.read(self.max_manifest_size)) problem_segments = [] if len(parsed_data) > self.max_manifest_segments: raise HTTPRequestEntityTooLarge( 'Number of segments must be <= %d' % self.max_manifest_segments) total_size = 0 out_content_type = req.accept.best_match(ACCEPTABLE_FORMATS) if not out_content_type: out_content_type = 'text/plain' data_for_storage = [] slo_etag = md5() last_obj_path = None for index, seg_dict in enumerate(parsed_data): obj_name = seg_dict['path'] if isinstance(obj_name, unicode): obj_name = obj_name.encode('utf-8') obj_path = '/'.join(['', vrs, account, obj_name.lstrip('/')]) if req.path == quote(obj_path): raise HTTPConflict('Manifest object name "%s" ' 'cannot be included in the manifest' % obj_name) try: seg_size = int(seg_dict['size_bytes']) except (ValueError, TypeError): if seg_dict['size_bytes'] is None: seg_size = None else: raise HTTPBadRequest('Invalid Manifest File') if seg_size is not None and seg_size < self.min_segment_size and \ index < len(parsed_data) - 1: raise HTTPBadRequest( 'Each segment, except the last, must be at least ' '%d bytes.' % self.min_segment_size) new_env = req.environ.copy() new_env['PATH_INFO'] = obj_path new_env['REQUEST_METHOD'] = 'HEAD' new_env['swift.source'] = 'SLO' del (new_env['wsgi.input']) del (new_env['QUERY_STRING']) new_env['CONTENT_LENGTH'] = 0 new_env['HTTP_USER_AGENT'] = \ '%s MultipartPUT' % req.environ.get('HTTP_USER_AGENT') if obj_path != last_obj_path: last_obj_path = obj_path head_seg_resp = \ Request.blank(obj_path, new_env).get_response(self) if head_seg_resp.is_success: segment_length = head_seg_resp.content_length if seg_dict.get('range'): # Since we now know the length, we can normalize the ranges ranges = seg_dict['range'].ranges_for_length( head_seg_resp.content_length) if not ranges: problem_segments.append( [quote(obj_name), 'Unsatisfiable Range']) elif len(ranges) > 1: problem_segments.append( [quote(obj_name), 'Multiple Ranges']) elif ranges == [(0, head_seg_resp.content_length)]: # Just one range, and it exactly matches the object. # Why'd we do this again? seg_dict['range'] = None segment_length = head_seg_resp.content_length else: range = ranges[0] seg_dict['range'] = '%d-%d' % (range[0], range[1] - 1) segment_length = range[1] - range[0] if segment_length < self.min_segment_size and \ index < len(parsed_data) - 1: raise HTTPBadRequest( 'Each segment, except the last, must be at least ' '%d bytes.' % self.min_segment_size) total_size += segment_length if seg_size is not None and \ seg_size != head_seg_resp.content_length: problem_segments.append([quote(obj_name), 'Size Mismatch']) if seg_dict['etag'] is None or \ seg_dict['etag'] == head_seg_resp.etag: if seg_dict.get('range'): slo_etag.update( '%s:%s;' % (head_seg_resp.etag, seg_dict['range'])) else: slo_etag.update(head_seg_resp.etag) else: problem_segments.append([quote(obj_name), 'Etag Mismatch']) if head_seg_resp.last_modified: last_modified = head_seg_resp.last_modified else: # shouldn't happen last_modified = datetime.now() last_modified_formatted = \ last_modified.strftime('%Y-%m-%dT%H:%M:%S.%f') seg_data = { 'name': '/' + seg_dict['path'].lstrip('/'), 'bytes': head_seg_resp.content_length, 'hash': head_seg_resp.etag, 'content_type': head_seg_resp.content_type, 'last_modified': last_modified_formatted } if seg_dict.get('range'): seg_data['range'] = seg_dict['range'] if config_true_value( head_seg_resp.headers.get('X-Static-Large-Object')): seg_data['sub_slo'] = True data_for_storage.append(seg_data) else: problem_segments.append( [quote(obj_name), head_seg_resp.status]) if problem_segments: resp_body = get_response_body(out_content_type, {}, problem_segments) raise HTTPBadRequest(resp_body, content_type=out_content_type) env = req.environ if not env.get('CONTENT_TYPE'): guessed_type, _junk = mimetypes.guess_type(req.path_info) env['CONTENT_TYPE'] = guessed_type or 'application/octet-stream' env['swift.content_type_overridden'] = True env['CONTENT_TYPE'] += ";swift_bytes=%d" % total_size env['HTTP_X_STATIC_LARGE_OBJECT'] = 'True' json_data = json.dumps(data_for_storage) if six.PY3: json_data = json_data.encode('utf-8') env['CONTENT_LENGTH'] = str(len(json_data)) env['wsgi.input'] = BytesIO(json_data) slo_put_context = SloPutContext(self, slo_etag) return slo_put_context.handle_slo_put(req, start_response)
def handle_extract_iter(self, req, compress_type, out_content_type='text/plain'): """ A generator that can be assigned to a swob Response's app_iter which, when iterated over, will extract and PUT the objects pulled from the request body. Will occasionally yield whitespace while request is being processed. When the request is completed will yield a response body that can be parsed to determine success. See above documentation for details. :params req: a swob Request :params compress_type: specifying the compression type of the tar. Accepts '', 'gz', or 'bz2' """ resp_dict = {'Response Status': HTTPCreated().status, 'Response Body': '', 'Number Files Created': 0} failed_files = [] last_yield = time() separator = '' containers_accessed = set() try: if not out_content_type: raise HTTPNotAcceptable(request=req) if out_content_type.endswith('/xml'): yield '<?xml version="1.0" encoding="UTF-8"?>\n' if req.content_length is None and \ req.headers.get('transfer-encoding', '').lower() != 'chunked': raise HTTPLengthRequired(request=req) try: vrs, account, extract_base = req.split_path(2, 3, True) except ValueError: raise HTTPNotFound(request=req) extract_base = extract_base or '' extract_base = extract_base.rstrip('/') tar = tarfile.open(mode='r|' + compress_type, fileobj=req.body_file) failed_response_type = HTTPBadRequest req.environ['eventlet.minimum_write_chunk_size'] = 0 containers_created = 0 while True: if last_yield + self.yield_frequency < time(): separator = '\r\n\r\n' last_yield = time() yield ' ' tar_info = tar.next() if tar_info is None or \ len(failed_files) >= self.max_failed_extractions: break if tar_info.isfile(): obj_path = tar_info.name if obj_path.startswith('./'): obj_path = obj_path[2:] obj_path = obj_path.lstrip('/') if extract_base: obj_path = extract_base + '/' + obj_path if '/' not in obj_path: continue # ignore base level file destination = '/'.join( ['', vrs, account, obj_path]) container = obj_path.split('/', 1)[0] if not check_utf8(destination): failed_files.append( [quote(obj_path[:MAX_PATH_LENGTH]), HTTPPreconditionFailed().status]) continue if tar_info.size > MAX_FILE_SIZE: failed_files.append([ quote(obj_path[:MAX_PATH_LENGTH]), HTTPRequestEntityTooLarge().status]) continue container_failure = None if container not in containers_accessed: cont_path = '/'.join(['', vrs, account, container]) try: if self.create_container(req, cont_path): containers_created += 1 if containers_created > self.max_containers: raise HTTPBadRequest( 'More than %d containers to create ' 'from tar.' % self.max_containers) except CreateContainerError as err: # the object PUT to this container still may # succeed if acls are set container_failure = [ quote(cont_path[:MAX_PATH_LENGTH]), err.status] if err.status_int == HTTP_UNAUTHORIZED: raise HTTPUnauthorized(request=req) except ValueError: failed_files.append([ quote(obj_path[:MAX_PATH_LENGTH]), HTTPBadRequest().status]) continue tar_file = tar.extractfile(tar_info) new_env = req.environ.copy() new_env['REQUEST_METHOD'] = 'PUT' new_env['wsgi.input'] = tar_file new_env['PATH_INFO'] = destination new_env['CONTENT_LENGTH'] = tar_info.size new_env['swift.source'] = 'EA' new_env['HTTP_USER_AGENT'] = \ '%s BulkExpand' % req.environ.get('HTTP_USER_AGENT') create_obj_req = Request.blank(destination, new_env) resp = create_obj_req.get_response(self.app) containers_accessed.add(container) if resp.is_success: resp_dict['Number Files Created'] += 1 else: if container_failure: failed_files.append(container_failure) if resp.status_int == HTTP_UNAUTHORIZED: failed_files.append([ quote(obj_path[:MAX_PATH_LENGTH]), HTTPUnauthorized().status]) raise HTTPUnauthorized(request=req) if resp.status_int // 100 == 5: failed_response_type = HTTPBadGateway failed_files.append([ quote(obj_path[:MAX_PATH_LENGTH]), resp.status]) if failed_files: resp_dict['Response Status'] = failed_response_type().status elif not resp_dict['Number Files Created']: resp_dict['Response Status'] = HTTPBadRequest().status resp_dict['Response Body'] = 'Invalid Tar File: No Valid Files' except HTTPException as err: resp_dict['Response Status'] = err.status resp_dict['Response Body'] = err.body except (tarfile.TarError, zlib.error) as tar_error: resp_dict['Response Status'] = HTTPBadRequest().status resp_dict['Response Body'] = 'Invalid Tar File: %s' % tar_error except Exception: self.logger.exception('Error in extract archive.') resp_dict['Response Status'] = HTTPServerError().status yield separator + get_response_body( out_content_type, resp_dict, failed_files)
def handle_multipart_put(self, req, start_response): """ Will handle the PUT of a SLO manifest. List every object in manifest to check if is valid and if so will save a manifest generated from the user input. Uses WSGIContext to call self and start_response and returns a WSGI iterator. :param req: a :class:`~swift.common.swob.Request` with an obj in path :param start_response: WSGI start_response callable :raises HttpException: on errors """ vrs, account, container, obj = req.split_path(4, rest_with_last=True) if req.content_length > self.max_manifest_size: raise HTTPRequestEntityTooLarge("Manifest File > %d bytes" % self.max_manifest_size) if req.headers.get('X-Copy-From'): raise HTTPMethodNotAllowed( 'Multipart Manifest PUTs cannot be COPY requests') if req.content_length is None and \ req.headers.get('transfer-encoding', '').lower() != 'chunked': raise HTTPLengthRequired(request=req) parsed_data = parse_and_validate_input( req.body_file.read(self.max_manifest_size), req.path) problem_segments = [] object_segments = [seg for seg in parsed_data if 'path' in seg] if len(object_segments) > self.max_manifest_segments: raise HTTPRequestEntityTooLarge( 'Number of object-backed segments must be <= %d' % self.max_manifest_segments) try: out_content_type = req.accept.best_match(ACCEPTABLE_FORMATS) except ValueError: out_content_type = 'text/plain' # Ignore invalid header if not out_content_type: out_content_type = 'text/plain' data_for_storage = [None] * len(parsed_data) total_size = 0 path2indices = defaultdict(list) for index, seg_dict in enumerate(parsed_data): if 'data' in seg_dict: data_for_storage[index] = seg_dict total_size += len(base64.b64decode(seg_dict['data'])) else: path2indices[seg_dict['path']].append(index) # BEGIN: New OpenIO code obj_path = get_valid_utf8_str(object_segments[0]['path']).lstrip('/') split_path = obj_path.split('/') segments_container = split_path[0] seg_prefix = '/'.join(split_path[1:-1]) segments_container_path = '/'.join( ['', vrs, account, segments_container]) # END: New OpenIO code # BEGIN: Adapt for OpenIO code def validate_seg_dict(seg_dict, seg_resp, allow_empty_segment): obj_name = seg_dict['path'] segment_length = seg_resp['bytes'] if seg_dict.get('range'): # Since we now know the length, we can normalize the # range. We know that there is exactly one range # requested since we checked that earlier in # parse_and_validate_input(). ranges = seg_dict['range'].ranges_for_length(seg_resp['bytes']) if not ranges: problem_segments.append( [quote(obj_name), 'Unsatisfiable Range']) elif ranges == [(0, seg_resp['bytes'])]: # Just one range, and it exactly matches the object. # Why'd we do this again? del seg_dict['range'] segment_length = seg_resp['bytes'] else: rng = ranges[0] seg_dict['range'] = '%d-%d' % (rng[0], rng[1] - 1) segment_length = rng[1] - rng[0] if segment_length < 1 and not allow_empty_segment: problem_segments.append([ quote(obj_name), 'Too small; each segment must be at least 1 byte.' ]) _size_bytes = seg_dict.get('size_bytes') size_mismatch = (_size_bytes is not None and _size_bytes != seg_resp['bytes']) if size_mismatch: problem_segments.append([quote(obj_name), 'Size Mismatch']) _etag = seg_dict.get('etag') etag_mismatch = (_etag is not None and _etag != seg_resp['hash']) if etag_mismatch: problem_segments.append([quote(obj_name), 'Etag Mismatch']) last_modified_formatted = seg_resp.get('last_modified') if not last_modified_formatted: # shouldn't happen last_modified_formatted = datetime.now().strftime( '%Y-%m-%dT%H:%M:%S.%f') seg_data = { 'name': '/' + seg_dict['path'].lstrip('/'), 'bytes': seg_resp['bytes'], 'hash': seg_resp['hash'], 'content_type': seg_resp['content_type'], 'last_modified': last_modified_formatted } if seg_dict.get('range'): seg_data['range'] = seg_dict['range'] if config_true_value(seg_resp['slo']): seg_data['sub_slo'] = True return segment_length, seg_data # END: Adapt for OpenIO code heartbeat = config_true_value(req.params.get('heartbeat')) separator = '' if heartbeat: # Apparently some ways of deploying require that this to happens # *before* the return? Not sure why. req.environ['eventlet.minimum_write_chunk_size'] = 0 start_response( '202 Accepted', [ # NB: not 201 ! ('Content-Type', out_content_type), ]) separator = '\r\n\r\n' def resp_iter(total_size=total_size): # wsgi won't propagate start_response calls until some data has # been yielded so make sure first heartbeat is sent immediately if heartbeat: yield ' ' last_yield_time = time.time() # BEGIN: New OpenIO code sub_req = make_subrequest( req.environ, path='%s?format=json&prefix=%s&limit=%d' % (segments_container_path, seg_prefix, self.max_manifest_segments), method='GET', headers={'x-auth-token': req.headers.get('x-auth-token')}, agent='%(orig)s SLO MultipartPUT', swift_source='SLO') sub_req.environ.setdefault('oio.query', {}) # All meta2 databases may not be synchronized sub_req.environ['oio.query']['force_master'] = True sub_req.environ['oio.query']['slo'] = True list_seg_resp = sub_req.get_response(self) with closing_if_possible(list_seg_resp.app_iter): segments_resp = json.loads(list_seg_resp.body) seg_resp_dict = dict() for seg_resp in segments_resp: obj_name = '/'.join(('', segments_container, seg_resp['name'])) seg_resp_dict[obj_name] = seg_resp for obj_name in path2indices: now = time.time() if heartbeat and (now - last_yield_time > self.yield_frequency): # Make sure we've called start_response before # sending data yield ' ' last_yield_time = now for i in path2indices[obj_name]: if not list_seg_resp.is_success: problem_segments.append( [quote(obj_name), list_seg_resp.status]) segment_length = 0 seg_data = None else: seg_resp = seg_resp_dict.get(obj_name) if seg_resp: segment_length, seg_data = validate_seg_dict( parsed_data[i], seg_resp, (i == len(parsed_data) - 1)) else: problem_segments.append([quote(obj_name), 404]) segment_length = 0 seg_data = None data_for_storage[i] = seg_data total_size += segment_length # END: New OpenIO code if problem_segments: err = HTTPBadRequest(content_type=out_content_type) resp_dict = {} if heartbeat: resp_dict['Response Status'] = err.status resp_dict['Response Body'] = err.body or '\n'.join( RESPONSE_REASONS.get(err.status_int, [''])) else: start_response(err.status, [(h, v) for h, v in err.headers.items() if h.lower() != 'content-length']) yield separator + get_response_body( out_content_type, resp_dict, problem_segments, 'upload') return slo_etag = md5() for seg_data in data_for_storage: if 'data' in seg_data: raw_data = base64.b64decode(seg_data['data']) slo_etag.update(md5(raw_data).hexdigest()) elif seg_data.get('range'): slo_etag.update('%s:%s;' % (seg_data['hash'], seg_data['range'])) else: slo_etag.update(seg_data['hash']) slo_etag = slo_etag.hexdigest() client_etag = req.headers.get('Etag') if client_etag and client_etag.strip('"') != slo_etag: err = HTTPUnprocessableEntity(request=req) if heartbeat: yield separator + get_response_body( out_content_type, { 'Response Status': err.status, 'Response Body': err.body or '\n'.join( RESPONSE_REASONS.get(err.status_int, [''])), }, problem_segments, 'upload') else: for chunk in err(req.environ, start_response): yield chunk return json_data = json.dumps(data_for_storage) if six.PY3: json_data = json_data.encode('utf-8') req.body = json_data req.headers.update({ SYSMETA_SLO_ETAG: slo_etag, SYSMETA_SLO_SIZE: total_size, 'X-Static-Large-Object': 'True', 'Etag': md5(json_data).hexdigest(), }) # Ensure container listings have both etags. However, if any # middleware to the left of us touched the base value, trust them. override_header = 'X-Object-Sysmeta-Container-Update-Override-Etag' val, sep, params = req.headers.get(override_header, '').partition(';') req.headers[override_header] = '%s; slo_etag=%s' % ( (val or req.headers['Etag']) + sep + params, slo_etag) env = req.environ if not env.get('CONTENT_TYPE'): guessed_type, _junk = mimetypes.guess_type(req.path_info) env['CONTENT_TYPE'] = (guessed_type or 'application/octet-stream') env['swift.content_type_overridden'] = True env['CONTENT_TYPE'] += ";swift_bytes=%d" % total_size resp = req.get_response(self.app) resp_dict = {'Response Status': resp.status} if resp.is_success: resp.etag = slo_etag resp_dict['Etag'] = resp.headers['Etag'] resp_dict['Last Modified'] = resp.headers['Last-Modified'] if heartbeat: resp_dict['Response Body'] = resp.body yield separator + get_response_body(out_content_type, resp_dict, [], 'upload') else: for chunk in resp(req.environ, start_response): yield chunk return resp_iter()