def close(self): """ Called when the client disconnect. Ensure that the connection to the backend server is closed. """ if self.current_resp: close_if_possible(self.current_resp.app_iter)
def _get_source_object(self, ssc_ctx, source_path, req): source_req = req.copy_get() # make sure the source request uses it's container_info source_req.headers.pop('X-Backend-Storage-Policy-Index', None) source_req.path_info = source_path source_req.headers['X-Newest'] = 'true' # in case we are copying an SLO manifest, set format=raw parameter params = source_req.params if params.get('multipart-manifest') == 'get': params['format'] = 'raw' source_req.params = params source_resp = ssc_ctx.get_source_resp(source_req) if source_resp.content_length is None: # This indicates a transfer-encoding: chunked source object, # which currently only happens because there are more than # CONTAINER_LISTING_LIMIT segments in a segmented object. In # this case, we're going to refuse to do the server-side copy. close_if_possible(source_resp.app_iter) return HTTPRequestEntityTooLarge(request=req) if source_resp.content_length > MAX_FILE_SIZE: close_if_possible(source_resp.app_iter) return HTTPRequestEntityTooLarge(request=req) return source_resp
def object_request(self, req, api_version, account, container, obj, allow_versioned_writes): container_name = unquote(container) object_name = unquote(obj) orig_container = get_unversioned_container(container_name) if orig_container != container_name: orig_object, version = \ swift3_split_object_name_version(object_name) req.environ['oio.query'] = {'version': version} req.environ['PATH_INFO'] = '/%s/%s/%s/%s' % (api_version, account, quote(orig_container), quote(orig_object)) elif req.method == 'DELETE': ver_mode = req.headers.get('X-Backend-Versioning-Mode-Override', 'history') if ver_mode == 'stack': # Do not create a delete marker, delete the latest version obj_inf = get_object_info(req.environ, self.app, swift_source='VW') req.environ['oio.query'] = { 'version': obj_inf.get('sysmeta', {}).get('version-id') } resp = req.get_response(self.app) if req.method == 'HEAD': close_if_possible(resp.app_iter) return resp
def _get_source_object(self, ssc_ctx, source_path, req): source_req = req.copy_get() # make sure the source request uses it's container_info source_req.headers.pop('X-Backend-Storage-Policy-Index', None) source_req.path_info = quote(source_path) source_req.headers['X-Newest'] = 'true' if 'swift.post_as_copy' in req.environ: # We're COPYing one object over itself because of a POST; rely on # the PUT for write authorization, don't require read authorization source_req.environ['swift.authorize'] = lambda req: None source_req.environ['swift.authorize_override'] = True # in case we are copying an SLO manifest, set format=raw parameter params = source_req.params if params.get('multipart-manifest') == 'get': params['format'] = 'raw' source_req.params = params source_resp = ssc_ctx.get_source_resp(source_req) if source_resp.content_length is None: # This indicates a transfer-encoding: chunked source object, # which currently only happens because there are more than # CONTAINER_LISTING_LIMIT segments in a segmented object. In # this case, we're going to refuse to do the server-side copy. close_if_possible(source_resp.app_iter) return HTTPRequestEntityTooLarge(request=req) if source_resp.content_length > MAX_FILE_SIZE: close_if_possible(source_resp.app_iter) return HTTPRequestEntityTooLarge(request=req) return source_resp
def _fetch_sub_slo_segments(self, req, version, acc, con, obj): """ Fetch the submanifest, parse it, and return it. Raise exception on failures. """ sub_req = make_subrequest( req.environ, path="/".join(["", version, acc, con, obj]), method="GET", headers={"x-auth-token": req.headers.get("x-auth-token")}, agent=("%(orig)s " + "SLO MultipartGET"), swift_source="SLO", ) sub_resp = sub_req.get_response(self.slo.app) if not is_success(sub_resp.status_int): close_if_possible(sub_resp.app_iter) raise ListingIterError( "ERROR: while fetching %s, GET of submanifest %s " "failed with status %d" % (req.path, sub_req.path, sub_resp.status_int) ) try: with closing_if_possible(sub_resp.app_iter): return json.loads("".join(sub_resp.app_iter)) except ValueError as err: raise ListingIterError( "ERROR: while fetching %s, JSON-decoding of submanifest %s " "failed with %s" % (req.path, sub_req.path, err) )
def _fetch_sub_slo_segments(self, req, version, acc, con, obj): """ Fetch the submanifest, parse it, and return it. Raise exception on failures. """ sub_req = make_subrequest( req.environ, path='/'.join(['', version, acc, con, obj]), method='GET', headers={'x-auth-token': req.headers.get('x-auth-token')}, agent=('%(orig)s ' + 'SLO MultipartGET'), swift_source='SLO') sub_resp = sub_req.get_response(self.slo.app) if not is_success(sub_resp.status_int): close_if_possible(sub_resp.app_iter) raise ListingIterError( 'ERROR: while fetching %s, GET of submanifest %s ' 'failed with status %d' % (req.path, sub_req.path, sub_resp.status_int)) try: with closing_if_possible(sub_resp.app_iter): return json.loads(''.join(sub_resp.app_iter)) except ValueError as err: raise ListingIterError( 'ERROR: while fetching %s, JSON-decoding of submanifest %s ' 'failed with %s' % (req.path, sub_req.path, err))
def enforce_byte_count(inner_iter, nbytes): """ Enforces that inner_iter yields exactly <nbytes> bytes before exhaustion. If inner_iter fails to do so, BadResponseLength is raised. :param inner_iter: iterable of bytestrings :param nbytes: number of bytes expected """ try: bytes_left = nbytes for chunk in inner_iter: if bytes_left >= len(chunk): yield chunk bytes_left -= len(chunk) else: yield chunk[:bytes_left] raise BadResponseLength( "Too many bytes; truncating after %d bytes " "with at least %d surplus bytes remaining" % ( nbytes, len(chunk) - bytes_left)) if bytes_left: raise BadResponseLength('Expected another %d bytes' % ( bytes_left,)) finally: close_if_possible(inner_iter)
def _listing_pages_iter(self, account_name, lcontainer, lprefix, env, marker='', end_marker='', reverse=True): '''Get "pages" worth of objects that start with a prefix. The optional keyword arguments ``marker``, ``end_marker``, and ``reverse`` are used similar to how they are for containers. We're either coming: - directly from ``_listing_iter``, in which case none of the optional args are specified, or - from ``_in_proxy_reverse_listing``, in which case ``reverse`` is ``False`` and both ``marker`` and ``end_marker`` are specified (although they may still be blank). ''' while True: lreq = make_pre_authed_request( env, method='GET', swift_source='VW', path='/v1/%s/%s' % (account_name, lcontainer)) lreq.environ['QUERY_STRING'] = \ 'format=json&prefix=%s&marker=%s' % ( quote(lprefix), quote(marker)) if end_marker: lreq.environ['QUERY_STRING'] += '&end_marker=%s' % ( quote(end_marker)) if reverse: lreq.environ['QUERY_STRING'] += '&reverse=on' lresp = lreq.get_response(self.app) if not is_success(lresp.status_int): close_if_possible(lresp.app_iter) if lresp.status_int == HTTP_NOT_FOUND: raise ListingIterNotFound() elif is_client_error(lresp.status_int): raise HTTPPreconditionFailed() else: raise ListingIterError() if not lresp.body: break sublisting = json.loads(lresp.body) if not sublisting: break # When using the ``reverse`` param, check that the listing is # actually reversed first_item = sublisting[0]['name'].encode('utf-8') last_item = sublisting[-1]['name'].encode('utf-8') page_is_after_marker = marker and first_item > marker if reverse and (first_item < last_item or page_is_after_marker): # Apparently there's at least one pre-2.6.0 container server yield self._in_proxy_reverse_listing( account_name, lcontainer, lprefix, env, marker, sublisting) return marker = last_item yield sublisting
def _listing_pages_iter(self, account_name, lcontainer, lprefix, req, marker='', end_marker='', reverse=True): '''Get "pages" worth of objects that start with a prefix. The optional keyword arguments ``marker``, ``end_marker``, and ``reverse`` are used similar to how they are for containers. We're either coming: - directly from ``_listing_iter``, in which case none of the optional args are specified, or - from ``_in_proxy_reverse_listing``, in which case ``reverse`` is ``False`` and both ``marker`` and ``end_marker`` are specified (although they may still be blank). ''' while True: lreq = make_pre_authed_request( req.environ, method='GET', swift_source='VW', path=wsgi_quote('/v1/%s/%s' % (account_name, lcontainer))) lreq.environ['QUERY_STRING'] = \ 'prefix=%s&marker=%s' % (wsgi_quote(lprefix), wsgi_quote(marker)) if end_marker: lreq.environ['QUERY_STRING'] += '&end_marker=%s' % ( wsgi_quote(end_marker)) if reverse: lreq.environ['QUERY_STRING'] += '&reverse=on' lresp = lreq.get_response(self.app) if not is_success(lresp.status_int): close_if_possible(lresp.app_iter) if lresp.status_int == HTTP_NOT_FOUND: raise ListingIterNotFound() elif is_client_error(lresp.status_int): raise HTTPPreconditionFailed(request=req) else: raise ListingIterError() if not lresp.body: break sublisting = json.loads(lresp.body) if not sublisting: break # When using the ``reverse`` param, check that the listing is # actually reversed first_item = bytes_to_wsgi(sublisting[0]['name'].encode('utf-8')) last_item = bytes_to_wsgi(sublisting[-1]['name'].encode('utf-8')) page_is_after_marker = marker and first_item > marker if reverse and (first_item < last_item or page_is_after_marker): # Apparently there's at least one pre-2.6.0 container server yield self._in_proxy_reverse_listing( account_name, lcontainer, lprefix, req, marker, sublisting) return marker = last_item yield sublisting
def handle_slo_get_or_head(self, req, start_response): """ Takes a request and a start_response callable and does the normal WSGI thing with them. Returns an iterator suitable for sending up the WSGI chain. :param req: swob.Request object; is a GET or HEAD request aimed at what may be a static large object manifest (or may not). :param start_response: WSGI start_response callable """ resp_iter = self._app_call(req.environ) # make sure this response is for a static large object manifest for header, value in self._response_headers: if (header.lower() == 'x-static-large-object' and config_true_value(value)): break else: # Not a static large object manifest. Just pass it through. start_response(self._response_status, self._response_headers, self._response_exc_info) return resp_iter # Handle pass-through request for the manifest itself if req.params.get('multipart-manifest') == 'get': new_headers = [] for header, value in self._response_headers: if header.lower() == 'content-type': new_headers.append(('Content-Type', 'application/json; charset=utf-8')) else: new_headers.append((header, value)) self._response_headers = new_headers start_response(self._response_status, self._response_headers, self._response_exc_info) return resp_iter if self._need_to_refetch_manifest(req): req.environ['swift.non_client_disconnect'] = True close_if_possible(resp_iter) del req.environ['swift.non_client_disconnect'] get_req = make_subrequest( req.environ, method='GET', headers={'x-auth-token': req.headers.get('x-auth-token')}, agent=('%(orig)s ' + 'SLO MultipartGET'), swift_source='SLO') resp_iter = self._app_call(get_req.environ) # Any Content-Range from a manifest is almost certainly wrong for the # full large object. resp_headers = [(h, v) for h, v in self._response_headers if not h.lower() == 'content-range'] response = self.get_or_head_response( req, resp_headers, resp_iter) return response(req.environ, start_response)
def handle_slo_get_or_head(self, req, start_response): """ Takes a request and a start_response callable and does the normal WSGI thing with them. Returns an iterator suitable for sending up the WSGI chain. :param req: swob.Request object; is a GET or HEAD request aimed at what may be a static large object manifest (or may not). :param start_response: WSGI start_response callable """ resp_iter = self._app_call(req.environ) # make sure this response is for a static large object manifest for header, value in self._response_headers: if (header.lower() == 'x-static-large-object' and config_true_value(value)): break else: # Not a static large object manifest. Just pass it through. start_response(self._response_status, self._response_headers, self._response_exc_info) return resp_iter # Handle pass-through request for the manifest itself if req.params.get('multipart-manifest') == 'get': new_headers = [] for header, value in self._response_headers: if header.lower() == 'content-type': new_headers.append( ('Content-Type', 'application/json; charset=utf-8')) else: new_headers.append((header, value)) self._response_headers = new_headers start_response(self._response_status, self._response_headers, self._response_exc_info) return resp_iter if self._need_to_refetch_manifest(req): req.environ['swift.non_client_disconnect'] = True close_if_possible(resp_iter) del req.environ['swift.non_client_disconnect'] get_req = make_subrequest( req.environ, method='GET', headers={'x-auth-token': req.headers.get('x-auth-token')}, agent=('%(orig)s ' + 'SLO MultipartGET'), swift_source='SLO') resp_iter = self._app_call(get_req.environ) # Any Content-Range from a manifest is almost certainly wrong for the # full large object. resp_headers = [(h, v) for h, v in self._response_headers if not h.lower() == 'content-range'] response = self.get_or_head_response(req, resp_headers, resp_iter) return response(req.environ, start_response)
def handle_slo_get_or_head(self, req, start_response): """ Takes a request and a start_response callable and does the normal WSGI thing with them. Returns an iterator suitable for sending up the WSGI chain. :param req: swob.Request object; is a GET or HEAD request aimed at what may be a static large object manifest (or may not). :param start_response: WSGI start_response callable """ resp_iter = self._app_call(req.environ) # make sure this response is for a static large object manifest for header, value in self._response_headers: if (header.lower() == 'x-static-large-object' and config_true_value(value)): break else: # Not a static large object manifest. Just pass it through. start_response(self._response_status, self._response_headers, self._response_exc_info) return resp_iter # Handle pass-through request for the manifest itself if req.params.get('multipart-manifest') == 'get': new_headers = [] for header, value in self._response_headers: if header.lower() == 'content-type': new_headers.append(('Content-Type', 'application/json; charset=utf-8')) else: new_headers.append((header, value)) self._response_headers = new_headers start_response(self._response_status, self._response_headers, self._response_exc_info) return resp_iter # Just because a response shows that an object is a SLO manifest does # not mean that response's body contains the entire SLO manifest. If # it doesn't, we need to make a second request to actually get the # whole thing. if req.method == 'HEAD' or req.range: req.environ['swift.non_client_disconnect'] = True close_if_possible(resp_iter) del req.environ['swift.non_client_disconnect'] get_req = req.copy_get() get_req.range = None get_req.environ['swift.source'] = 'SLO' get_req.user_agent = "%s SLO MultipartGET" % get_req.user_agent resp_iter = self._app_call(get_req.environ) response = self.get_or_head_response(req, self._response_headers, resp_iter) return response(req.environ, start_response)
def _check_response_error(self, req, resp): """ Raise Error Response in case of error """ if is_success(resp.status_int): return close_if_possible(resp.app_iter) if is_client_error(resp.status_int): # missing container or bad permissions raise HTTPPreconditionFailed(request=req) # could not version the data, bail raise HTTPServiceUnavailable(request=req)
def _put_versioned_obj(self, req, put_path_info, source_resp): # Create a new Request object to PUT to the versions container, copying # all headers from the source object apart from x-timestamp. put_req = make_pre_authed_request( req.environ, path=put_path_info, method='PUT', swift_source='VW') copy_header_subset(source_resp, put_req, lambda k: k.lower() != 'x-timestamp') put_req.environ['wsgi.input'] = FileLikeIter(source_resp.app_iter) put_resp = put_req.get_response(self.app) close_if_possible(source_resp.app_iter) return put_resp
def _put_versioned_obj(self, req, put_path_info, source_resp): # Create a new Request object to PUT to the versions container, copying # all headers from the source object apart from x-timestamp. put_req = make_pre_authed_request( req.environ, path=quote(put_path_info), method='PUT', swift_source='VW') copy_header_subset(source_resp, put_req, lambda k: k.lower() != 'x-timestamp') put_req.environ['wsgi.input'] = FileLikeIter(source_resp.app_iter) put_resp = put_req.get_response(self.app) close_if_possible(source_resp.app_iter) return put_resp
def _internal_iter(self): # Top level of our iterator stack: pass bytes through; catch and # handle exceptions. try: for chunk in self._time_limited_iter(): yield chunk except (ListingIterError, SegmentError) as err: self.logger.error(err) if not self.validated_first_segment: raise finally: if self.current_resp: close_if_possible(self.current_resp.app_iter)
def handle_object(self, req, start_response, sync_profile, obj, per_account): status, headers, app_iter = req.call_application(self.app) if not status.startswith('404 '): # Only shunt 404s start_response(status, headers) return app_iter self.logger.debug('404 for %s; shunting to %r' % (req.path, sync_profile)) # Save off any existing trans-id headers so we can add them back later trans_id_headers = [(h, v) for h, v in headers if h.lower() in ('x-trans-id', 'x-openstack-request-id')] utils.close_if_possible(app_iter) provider = create_provider(sync_profile, max_conns=1, per_account=per_account) if req.method == 'GET' and sync_profile.get('restore_object', False) \ and 'range' not in req.headers: # We incur an extra request hit by checking for a possible SLO. obj = obj.decode('utf-8') manifest = provider.get_manifest(obj) self.logger.debug("Manifest: %s" % manifest) status, headers, app_iter = provider.shunt_object(req, obj) put_headers = convert_to_local_headers(headers) if response_is_complete(int(status.split()[0]), headers): if check_slo(put_headers) and manifest: app_iter = SwiftSloPutWrapper(app_iter, put_headers, req.environ['PATH_INFO'], self.app, manifest, self.logger) else: app_iter = SwiftPutWrapper(app_iter, put_headers, req.environ['PATH_INFO'], self.app, self.logger) else: status, headers, app_iter = provider.shunt_object(req, obj) headers = [(k.encode('utf-8'), unicode(v).encode('utf-8')) for k, v in headers] self.logger.debug('Remote resp: %s' % status) headers = filter_hop_by_hop_headers(headers) headers.extend(trans_id_headers) start_response(status, headers) return app_iter
def _get_source_object(self, req, path_info): # make a pre_auth request in case the user has write access # to container, but not READ. This was allowed in previous version # (i.e., before middleware) so keeping the same behavior here get_req = make_pre_authed_request( req.environ, path=path_info, headers={'X-Newest': 'True'}, method='GET', swift_source='VW') source_resp = get_req.get_response(self.app) if source_resp.content_length is None or \ source_resp.content_length > MAX_FILE_SIZE: close_if_possible(source_resp.app_iter) return HTTPRequestEntityTooLarge(request=req) return source_resp
def _get_source_object(self, req, path_info): # make a pre_auth request in case the user has write access # to container, but not READ. This was allowed in previous version # (i.e., before middleware) so keeping the same behavior here get_req = make_pre_authed_request( req.environ, path=wsgi_quote(path_info) + '?symlink=get', headers={'X-Newest': 'True'}, method='GET', swift_source='VW') source_resp = get_req.get_response(self.app) if source_resp.content_length is None or \ source_resp.content_length > MAX_FILE_SIZE: close_if_possible(source_resp.app_iter) return HTTPRequestEntityTooLarge(request=req) return source_resp
def handle_obj_versions_put(self, req, versions_cont, api_version, account_name, object_name): """ Copy current version of object to versions_container before proceding with original request. :param req: original request. :param versions_cont: container where previous versions of the object are stored. :param api_version: api version. :param account_name: account name. :param object_name: name of object of original request """ if 'X-Object-Manifest' in req.headers: # do not version DLO manifest, proceed with original request return self.app get_resp = self._get_source_object(req, req.path_info) if 'X-Object-Manifest' in get_resp.headers: # do not version DLO manifest, proceed with original request close_if_possible(get_resp.app_iter) return self.app if get_resp.status_int == HTTP_NOT_FOUND: # nothing to version, proceed with original request close_if_possible(get_resp.app_iter) return self.app # check for any other errors self._check_response_error(req, get_resp) # if there's an existing object, then copy it to # X-Versions-Location prefix_len = '%03x' % len(object_name) lprefix = prefix_len + object_name + '/' ts_source = get_resp.headers.get( 'x-timestamp', calendar.timegm(time.strptime( get_resp.headers['last-modified'], '%a, %d %b %Y %H:%M:%S GMT'))) vers_obj_name = lprefix + Timestamp(ts_source).internal put_path_info = "/%s/%s/%s/%s" % ( api_version, account_name, versions_cont, vers_obj_name) put_resp = self._put_versioned_obj(req, put_path_info, get_resp) self._check_response_error(req, put_resp) return self.app
def _ensure_segments_container(self): env = {'REQUEST_METHOD': 'PUT'} segment_path = self.manifest[self.segment_index]['name'] container_path = segment_path.split('/', 2)[1] req = Request.blank( # The manifest path is /<container>/<object> self._create_request_path(container_path), environ=env) resp = req.get_response(self.app) if not resp.is_success: self.failed = True if self.logger: self.logger.warning( 'Failed to create the segment container %s: %s' % (container_path, resp.status)) close_if_possible(resp.app_iter)
def _put_versioned_obj(self, req, put_path_info, source_resp): # Create a new Request object to PUT to the container, copying # all headers from the source object apart from x-timestamp. put_req = make_pre_authed_request( req.environ, path=wsgi_quote(put_path_info), method='PUT', swift_source='VW') copy_header_subset(source_resp, put_req, lambda k: k.lower() != 'x-timestamp') slo_size = put_req.headers.get('X-Object-Sysmeta-Slo-Size') if slo_size: put_req.headers['Content-Type'] += '; swift_bytes=' + slo_size put_req.environ['swift.content_type_overridden'] = True put_req.environ['wsgi.input'] = FileLikeIter(source_resp.app_iter) put_resp = put_req.get_response(self.app) close_if_possible(source_resp.app_iter) return put_resp
def handle_object_put(self, req, start_response, sync_profile, per_account): status, headers, app_iter = req.call_application(self.app) if not status.startswith('404 '): status, headers, app_iter = req.call_application(self.app) start_response(status, headers) return app_iter provider = create_provider(sync_profile, max_conns=1, per_account=per_account) headers = {} if sync_profile.get('protocol') == 'swift': try: headers = get_container_headers(provider) except RemoteHTTPError as e: self.logger.warning( 'Failed to query the remote container (%d): %s' % (e.resp.status, e.resp.body)) status, headers, app_iter = req.call_application(self.app) start_response(status, headers) return app_iter vers, acct, cont, _ = req.split_path(4, 4, True) container_path = '/%s' % '/'.join( [vers, utils.quote(acct), utils.quote(cont)]) put_container_req = make_subrequest(req.environ, method='PUT', path=container_path, headers=headers, swift_source='CloudSync Shunt') put_container_req.environ['swift_owner'] = True status, headers, body = put_container_req.call_application(self.app) utils.close_if_possible(body) if int(status.split()[0]) // 100 != 2: self.logger.warning('Failed to create container: %s' % status) status, headers, app_iter = req.call_application(self.app) start_response(status, headers) return app_iter
def GETorHEAD(self, req): """ Handled GET or HEAD request on a part of a multipart object. """ part_number = self.parse_part_number(req) # Get the list of parts. Must be raw to get all response headers. slo_req = req.to_swift_req('GET', req.container_name, req.object_name, query={'multipart-manifest': 'get', 'format': 'raw'}) slo_resp = slo_req.get_response(self.app) # Check if the object is really a SLO. If not, and user asked # for the first part, do a regular request. if 'X-Static-Large-Object' not in slo_resp.headers: close_if_possible(slo_resp.app_iter) if part_number == 1: return req.get_response(self.app) else: raise InvalidRange() # Locate the part slo = json.loads(slo_resp.body) try: part = slo[part_number - 1] except IndexError: raise InvalidRange() # Redirect the request on the part _, req.container_name, req.object_name = part['path'].split('/', 2) # XXX enforce container_name and object_name to be <str> # or it will rise issues in swift3/requests when merging both req.container_name = req.container_name.encode('utf-8') req.object_name = req.object_name.encode('utf8') resp = req.get_response(self.app) # Get the content-type and etag of the object, not the part ctype, etag = extract_s3_etag(slo_resp.headers['Content-Type']) resp.headers['Content-Type'] = ctype if etag: resp.headers['ETag'] = '"%s"' % etag resp.headers['X-Amz-Mp-Parts-Count'] = len(slo) return resp
def _upload_manifest(self): SLO_FIELD_MAP = { 'bytes': 'size_bytes', 'hash': 'etag', 'name': 'path', 'range': 'range' } env = {} env['REQUEST_METHOD'] = 'PUT' # We have to transform the SLO fields, as Swift internally uses a # different representation from what the client submits. Unfortunately, # when we extract the manifest with the InternalClient, we don't have # SLO in the pipeline and retrieve the internal represenation. put_manifest = [ dict([(SLO_FIELD_MAP[k], v) for k, v in entry.items() if k in SLO_FIELD_MAP]) for entry in self.manifest] content = json.dumps(put_manifest) env['wsgi.input'] = StringIO.StringIO(content) env['CONTENT_LENGTH'] = len(content) env['QUERY_STRING'] = 'multipart-manifest=put' # The SLO header must not be set on manifest PUT and we should remove # the content length of the whole SLO, as we will overwrite it with the # length of the manifest itself. if SLO_HEADER in self.headers: del self.headers[SLO_HEADER] del self.headers['Content-Length'] etag = hashlib.md5() for entry in self.manifest: etag.update(entry['hash']) self.headers['ETag'] = etag.hexdigest() req = Request.blank(self.path, environ=env, headers=self.headers) resp = req.get_response(self.app) if self.logger: if resp.status_int == 202: self.logger.warning( 'SLO %s possibly already overwritten' % self.path) elif not resp.is_success: self.logger.warning('Failed to create the manifest %s: %s' % ( self.path, resp.status)) close_if_possible(resp.app_iter)
def handle_obj_versions_delete_push(self, req, versions_cont, api_version, account_name, container_name, object_name): """ Handle DELETE requests when in history mode. Copy current version of object to versions_container and write a delete marker before proceeding with original request. :param req: original request. :param versions_cont: container where previous versions of the object are stored. :param api_version: api version. :param account_name: account name. :param object_name: name of object of original request """ self._copy_current(req, versions_cont, api_version, account_name, object_name) marker_path = "/%s/%s/%s/%s" % ( api_version, account_name, versions_cont, self._build_versions_object_name(object_name, time.time())) marker_headers = { # Definitive source of truth is Content-Type, and since we add # a swift_* param, we know users haven't set it themselves. # This is still open to users POSTing to update the content-type # but they're just shooting themselves in the foot then. 'content-type': DELETE_MARKER_CONTENT_TYPE, 'content-length': '0', 'x-auth-token': req.headers.get('x-auth-token') } marker_req = make_pre_authed_request(req.environ, path=wsgi_quote(marker_path), headers=marker_headers, method='PUT', swift_source='VW') marker_req.environ['swift.content_type_overridden'] = True marker_resp = marker_req.get_response(self.app) self._check_response_error(req, marker_resp) close_if_possible(marker_resp.app_iter) # successfully copied and created delete marker; safe to delete return self.app
def __call__(self, env, start_response): path = SUB_PUT_POST_PATH if env['REQUEST_METHOD'] == 'GET': path = SUB_GET_PATH # Make a subrequest that will be logged hdrs = {'content-type': 'text/plain'} sub_req = make_subrequest(env, path=path, method=self.conf['subrequest_type'], headers=hdrs, agent='FakeApp', swift_source='FA') self.register(self.conf['subrequest_type'], path, HTTPOk, headers=hdrs) resp = sub_req.get_response(self.app) close_if_possible(resp.app_iter) return self.app(env, start_response)
def handle_request(self, req, start_response): """ Take a GET or HEAD request, and if it is for a dynamic large object manifest, return an appropriate response. Otherwise, simply pass it through. """ resp_iter = self._app_call(req.environ) # make sure this response is for a dynamic large object manifest for header, value in self._response_headers: if (header.lower() == 'x-object-manifest'): close_if_possible(resp_iter) response = self.get_or_head_response(req, value) return response(req.environ, start_response) # Not a dynamic large object manifest; just pass it through. start_response(self._response_status, self._response_headers, self._response_exc_info) return resp_iter
def _create_dir_marker(self, env, account, container, obj): """ Create an empty object to mark a subdirectory. This is required to quickly recurse on subdirectories, since with this middleware they are stored on separate containers. """ path = quote_plus( self.DELIMITER.join(('', 'v1', account, container, obj))) req = make_subrequest(env, method='PUT', path=path, body='', swift_source=self.SWIFT_SOURCE) req.headers['If-None-Match'] = '*' req.headers['Content-Length'] = '0' resp = req.get_response(self.app) if not resp.is_success: LOG.warn('%s: Failed to create directory placeholder in %s: %s', self.SWIFT_SOURCE, container, resp.status) close_if_possible(resp.app_iter)
def _copy_current(self, req, versions_cont, api_version, account_name, object_name): # validate the write access to the versioned container before # making any backend requests if 'swift.authorize' in req.environ: container_info = get_container_info( req.environ, self.app) req.acl = container_info.get('write_acl') aresp = req.environ['swift.authorize'](req) if aresp: raise aresp get_resp = self._get_source_object(req, req.path_info) if 'X-Object-Manifest' in get_resp.headers: # do not version DLO manifest, proceed with original request close_if_possible(get_resp.app_iter) return if get_resp.status_int == HTTP_NOT_FOUND: # nothing to version, proceed with original request close_if_possible(get_resp.app_iter) return # check for any other errors self._check_response_error(req, get_resp) # if there's an existing object, then copy it to # X-Versions-Location ts_source = get_resp.headers.get( 'x-timestamp', calendar.timegm(time.strptime( get_resp.headers['last-modified'], '%a, %d %b %Y %H:%M:%S GMT'))) vers_obj_name = self._build_versions_object_name( object_name, ts_source) put_path_info = "/%s/%s/%s/%s" % ( api_version, account_name, versions_cont, vers_obj_name) put_resp = self._put_versioned_obj(req, put_path_info, get_resp) self._check_response_error(req, put_resp)
def _restore_data(self, req, versions_cont, api_version, account_name, container_name, object_name, prev_obj_name): get_path = "/%s/%s/%s/%s" % ( api_version, account_name, versions_cont, prev_obj_name) get_resp = self._get_source_object(req, get_path) # if the version isn't there, keep trying with previous version if get_resp.status_int == HTTP_NOT_FOUND: close_if_possible(get_resp.app_iter) return False self._check_response_error(req, get_resp) put_path_info = "/%s/%s/%s/%s" % ( api_version, account_name, container_name, object_name) put_resp = self._put_versioned_obj(req, put_path_info, get_resp) self._check_response_error(req, put_resp) close_if_possible(put_resp.app_iter) return get_path
def _create_dir_marker(self, env, account, container, obj): """ Create an empty object to mark a subdirectory. This is required to quickly recurse on subdirectories, since with this middleware they are stored on separate containers. """ items = container.split(self.ENCODED_DELIMITER) while items: path = quote_plus( self.DELIMITER.join(('', 'v1', account, container, obj))) req = make_subrequest(env, method='PUT', path=path, body='', swift_source=self.SWIFT_SOURCE) req.headers['If-None-Match'] = '*' req.headers['Content-Length'] = '0' LOG.debug("%s: Create placeholder %s in %s", self.SWIFT_SOURCE, obj, container) resp = req.get_response(self.app) if resp.status_int == HTTP_PRECONDITION_FAILED: LOG.debug('%s: directory placeholder already present ' 'in %s', self.SWIFT_SOURCE, container) close_if_possible(resp.app_iter) break if not resp.is_success: LOG.warn( '%s: Failed to create directory placeholder ' 'in %s: %s', self.SWIFT_SOURCE, container, resp.status) close_if_possible(resp.app_iter) if not self.recursive_placeholders: break if items: obj = items.pop() + self.DELIMITER container = self.ENCODED_DELIMITER.join(items)
def handle_obj_versions_delete_push(self, req, versions_cont, api_version, account_name, container_name, object_name): """ Handle DELETE requests when in history mode. Copy current version of object to versions_container and write a delete marker before proceeding with original request. :param req: original request. :param versions_cont: container where previous versions of the object are stored. :param api_version: api version. :param account_name: account name. :param object_name: name of object of original request """ self._copy_current(req, versions_cont, api_version, account_name, object_name) marker_path = "/%s/%s/%s/%s" % ( api_version, account_name, versions_cont, self._build_versions_object_name(object_name, time.time())) marker_headers = { # Definitive source of truth is Content-Type, and since we add # a swift_* param, we know users haven't set it themselves. # This is still open to users POSTing to update the content-type # but they're just shooting themselves in the foot then. 'content-type': DELETE_MARKER_CONTENT_TYPE, 'content-length': '0', 'x-auth-token': req.headers.get('x-auth-token')} marker_req = make_pre_authed_request( req.environ, path=marker_path, headers=marker_headers, method='PUT', swift_source='VW') marker_req.environ['swift.content_type_overridden'] = True marker_resp = marker_req.get_response(self.app) self._check_response_error(req, marker_resp) close_if_possible(marker_resp.app_iter) # successfully copied and created delete marker; safe to delete return self.app
def _create_req_container(self, req, headers, migration=False, storage_policy=None): vers, acct, cont, _ = req.split_path(3, 4, True) container_path = '/%s' % '/'.join( [vers, utils.quote(acct), utils.quote(cont)]) headers = dict(headers) if migration: headers[get_sys_migrator_header('container')] =\ MigrationContainerStates.MIGRATING if storage_policy: headers['X-Storage-Policy'] = storage_policy put_container_req = make_subrequest(req.environ, method='PUT', path=container_path, headers=headers, swift_source='1space-shunt') put_container_req.environ['swift_owner'] = True status, headers, body = put_container_req.call_application(self.app) utils.close_if_possible(body) return status, headers
def handle_PUT(self, req, start_response): if req.content_length: return HTTPBadRequest(body='Copy requests require a zero byte ' 'body', request=req, content_type='text/plain')(req.environ, start_response) # Form the path of source object to be fetched ver, acct, _rest = req.split_path(2, 3, True) src_account_name = req.headers.get('X-Copy-From-Account') if src_account_name: src_account_name = check_account_format(req, src_account_name) else: src_account_name = acct src_container_name, src_obj_name = _check_copy_from_header(req) source_path = '/%s/%s/%s/%s' % (ver, src_account_name, src_container_name, src_obj_name) if req.environ.get('swift.orig_req_method', req.method) != 'POST': self.logger.info("Copying object from %s to %s" % (source_path, req.path)) # GET the source object, bail out on error ssc_ctx = ServerSideCopyWebContext(self.app, self.logger) source_resp = self._get_source_object(ssc_ctx, source_path, req) if source_resp.status_int >= HTTP_MULTIPLE_CHOICES: return source_resp(source_resp.environ, start_response) # Create a new Request object based on the original request instance. # This will preserve original request environ including headers. sink_req = Request.blank(req.path_info, environ=req.environ) def is_object_sysmeta(k): return is_sys_meta('object', k) if config_true_value(req.headers.get('x-fresh-metadata', 'false')): # x-fresh-metadata only applies to copy, not post-as-copy: ignore # existing user metadata, update existing sysmeta with new copy_header_subset(source_resp, sink_req, is_object_sysmeta) copy_header_subset(req, sink_req, is_object_sysmeta) else: # First copy existing sysmeta, user meta and other headers from the # source to the sink, apart from headers that are conditionally # copied below and timestamps. exclude_headers = ('x-static-large-object', 'x-object-manifest', 'etag', 'content-type', 'x-timestamp', 'x-backend-timestamp') copy_header_subset(source_resp, sink_req, lambda k: k.lower() not in exclude_headers) # now update with original req headers sink_req.headers.update(req.headers) params = sink_req.params if params.get('multipart-manifest') == 'get': if 'X-Static-Large-Object' in source_resp.headers: params['multipart-manifest'] = 'put' if 'X-Object-Manifest' in source_resp.headers: del params['multipart-manifest'] sink_req.headers['X-Object-Manifest'] = \ source_resp.headers['X-Object-Manifest'] sink_req.params = params # Set swift.source, data source, content length and etag # for the PUT request sink_req.environ['swift.source'] = 'SSC' sink_req.environ['wsgi.input'] = FileLikeIter(source_resp.app_iter) sink_req.content_length = source_resp.content_length if (source_resp.status_int == HTTP_OK and 'X-Static-Large-Object' not in source_resp.headers and ('X-Object-Manifest' not in source_resp.headers or req.params.get('multipart-manifest') == 'get')): # copy source etag so that copied content is verified, unless: # - not a 200 OK response: source etag may not match the actual # content, for example with a 206 Partial Content response to a # ranged request # - SLO manifest: etag cannot be specified in manifest PUT; SLO # generates its own etag value which may differ from source # - SLO: etag in SLO response is not hash of actual content # - DLO: etag in DLO response is not hash of actual content sink_req.headers['Etag'] = source_resp.etag else: # since we're not copying the source etag, make sure that any # container update override values are not copied. remove_items( sink_req.headers, lambda k: k.startswith( 'X-Object-Sysmeta-Container-Update-Override-')) # We no longer need these headers sink_req.headers.pop('X-Copy-From', None) sink_req.headers.pop('X-Copy-From-Account', None) # If the copy request does not explicitly override content-type, # use the one present in the source object. if not req.headers.get('content-type'): sink_req.headers['Content-Type'] = \ source_resp.headers['Content-Type'] # Create response headers for PUT response resp_headers = self._create_response_headers(source_path, source_resp, sink_req) put_resp = ssc_ctx.send_put_req(sink_req, resp_headers, start_response) close_if_possible(source_resp.app_iter) return put_resp
def _perform_subrequest(self, orig_env, attributes, fp, keys): """ Performs the subrequest and returns the response. :param orig_env: The WSGI environment dict; will only be used to form a new env for the subrequest. :param attributes: dict of the attributes of the form so far. :param fp: The file-like object containing the request body. :param keys: The account keys to validate the signature with. :returns: (status_line, headers_list) """ if not keys: raise FormUnauthorized('invalid signature') try: max_file_size = int(attributes.get('max_file_size') or 0) except ValueError: raise FormInvalid('max_file_size not an integer') subenv = make_pre_authed_env(orig_env, 'PUT', agent=None, swift_source='FP') if 'QUERY_STRING' in subenv: del subenv['QUERY_STRING'] subenv['HTTP_TRANSFER_ENCODING'] = 'chunked' subenv['wsgi.input'] = _CappedFileLikeObject(fp, max_file_size) if not subenv['PATH_INFO'].endswith('/') and \ subenv['PATH_INFO'].count('/') < 4: subenv['PATH_INFO'] += '/' subenv['PATH_INFO'] += str_to_wsgi( attributes['filename'] or 'filename') if 'x_delete_at' in attributes: try: subenv['HTTP_X_DELETE_AT'] = int(attributes['x_delete_at']) except ValueError: raise FormInvalid('x_delete_at not an integer: ' 'Unix timestamp required.') if 'x_delete_after' in attributes: try: subenv['HTTP_X_DELETE_AFTER'] = int( attributes['x_delete_after']) except ValueError: raise FormInvalid('x_delete_after not an integer: ' 'Number of seconds required.') if 'content-type' in attributes: subenv['CONTENT_TYPE'] = \ attributes['content-type'] or 'application/octet-stream' if 'content-encoding' in attributes: subenv['HTTP_CONTENT_ENCODING'] = attributes['content-encoding'] try: if int(attributes.get('expires') or 0) < time(): raise FormUnauthorized('form expired') except ValueError: raise FormInvalid('expired not an integer') hmac_body = '%s\n%s\n%s\n%s\n%s' % ( wsgi_to_str(orig_env['PATH_INFO']), attributes.get('redirect') or '', attributes.get('max_file_size') or '0', attributes.get('max_file_count') or '0', attributes.get('expires') or '0') if six.PY3: hmac_body = hmac_body.encode('utf-8') has_valid_sig = False for key in keys: # Encode key like in swift.common.utls.get_hmac. if not isinstance(key, six.binary_type): key = key.encode('utf8') sig = hmac.new(key, hmac_body, sha1).hexdigest() if streq_const_time(sig, (attributes.get('signature') or 'invalid')): has_valid_sig = True if not has_valid_sig: raise FormUnauthorized('invalid signature') substatus = [None] subheaders = [None] wsgi_input = subenv['wsgi.input'] def _start_response(status, headers, exc_info=None): if wsgi_input.file_size_exceeded: raise EOFError("max_file_size exceeded") substatus[0] = status subheaders[0] = headers # reiterate to ensure the response started, # but drop any data on the floor close_if_possible(reiterate(self.app(subenv, _start_response))) return substatus[0], subheaders[0]
def close(self): """ Called when the client disconnect. Ensure that the connection to the backend server is closed. """ close_if_possible(self.app_iter)
def _requests_to_bytes_iter(self): # Take the requests out of self._coalesce_requests, actually make # the requests, and generate the bytes from the responses. # # Yields 2-tuples (segment-name, byte-chunk). The segment name is # used for logging. for data_or_req, seg_etag, seg_size in self._coalesce_requests(): if isinstance(data_or_req, bytes): # ugly, awful overloading yield ('data segment', data_or_req) continue seg_req = data_or_req seg_resp = seg_req.get_response(self.app) if not is_success(seg_resp.status_int): close_if_possible(seg_resp.app_iter) raise SegmentError( 'While processing manifest %s, ' 'got %d while retrieving %s' % (self.name, seg_resp.status_int, seg_req.path)) elif ( (seg_etag and (seg_resp.etag != seg_etag)) or (seg_size and (seg_resp.content_length != seg_size) and not seg_req.range)): # The content-length check is for security reasons. Seems # possible that an attacker could upload a >1mb object and # then replace it with a much smaller object with same # etag. Then create a big nested SLO that calls that # object many times which would hammer our obj servers. If # this is a range request, don't check content-length # because it won't match. close_if_possible(seg_resp.app_iter) raise SegmentError( 'Object segment no longer valid: ' '%(path)s etag: %(r_etag)s != %(s_etag)s or ' '%(r_size)s != %(s_size)s.' % { 'path': seg_req.path, 'r_etag': seg_resp.etag, 'r_size': seg_resp.content_length, 's_etag': seg_etag, 's_size': seg_size }) else: self.current_resp = seg_resp seg_hash = None if seg_resp.etag and not seg_req.headers.get('Range'): # Only calculate the MD5 if it we can use it to validate seg_hash = hashlib.md5() document_iters = maybe_multipart_byteranges_to_document_iters( seg_resp.app_iter, seg_resp.headers['Content-Type']) for chunk in itertools.chain.from_iterable(document_iters): if seg_hash: seg_hash.update(chunk) yield (seg_req.path, chunk) close_if_possible(seg_resp.app_iter) if seg_hash and seg_hash.hexdigest() != seg_resp.etag: raise SegmentError( "Bad MD5 checksum in %(name)s for %(seg)s: headers had" " %(etag)s, but object MD5 was actually %(actual)s" % { 'seg': seg_req.path, 'etag': seg_resp.etag, 'name': self.name, 'actual': seg_hash.hexdigest() })
def handle_obj_versions_delete_pop(self, req, versions_cont, api_version, account_name, container_name, object_name): """ Handle DELETE requests when in stack mode. Delete current version of object and pop previous version in its place. :param req: original request. :param versions_cont: container where previous versions of the object are stored. :param api_version: api version. :param account_name: account name. :param container_name: container name. :param object_name: object name. """ listing_prefix = self._build_versions_object_prefix(object_name) item_iter = self._listing_iter(account_name, versions_cont, listing_prefix, req) auth_token_header = {'X-Auth-Token': req.headers.get('X-Auth-Token')} authed = False for previous_version in item_iter: if not authed: # validate the write access to the versioned container before # making any backend requests if 'swift.authorize' in req.environ: container_info = get_container_info( req.environ, self.app) req.acl = container_info.get('write_acl') aresp = req.environ['swift.authorize'](req) if aresp: return aresp authed = True if previous_version['content_type'] == DELETE_MARKER_CONTENT_TYPE: # check whether we have data in the versioned container obj_head_headers = {'X-Newest': 'True'} obj_head_headers.update(auth_token_header) head_req = make_pre_authed_request( req.environ, path=req.path_info, method='HEAD', headers=obj_head_headers, swift_source='VW') hresp = head_req.get_response(self.app) close_if_possible(hresp.app_iter) if hresp.status_int != HTTP_NOT_FOUND: self._check_response_error(req, hresp) # if there's an existing object, then just let the delete # through (i.e., restore to the delete-marker state): break # no data currently in the container (delete marker is current) for version_to_restore in item_iter: if version_to_restore['content_type'] == \ DELETE_MARKER_CONTENT_TYPE: # Nothing to restore break prev_obj_name = version_to_restore['name'].encode('utf-8') restored_path = self._restore_data( req, versions_cont, api_version, account_name, container_name, object_name, prev_obj_name) if not restored_path: continue old_del_req = make_pre_authed_request( req.environ, path=restored_path, method='DELETE', headers=auth_token_header, swift_source='VW') del_resp = old_del_req.get_response(self.app) close_if_possible(del_resp.app_iter) if del_resp.status_int != HTTP_NOT_FOUND: self._check_response_error(req, del_resp) # else, well, it existed long enough to do the # copy; we won't worry too much break marker_path = "/%s/%s/%s/%s" % ( api_version, account_name, versions_cont, previous_version['name'].encode('utf-8')) # done restoring, redirect the delete to the marker req = make_pre_authed_request( req.environ, path=marker_path, method='DELETE', headers=auth_token_header, swift_source='VW') else: # there are older versions so copy the previous version to the # current object and delete the previous version prev_obj_name = previous_version['name'].encode('utf-8') restored_path = self._restore_data( req, versions_cont, api_version, account_name, container_name, object_name, prev_obj_name) if not restored_path: continue # redirect the original DELETE to the source of the reinstated # version object - we already auth'd original req so make a # pre-authed request req = make_pre_authed_request( req.environ, path=restored_path, method='DELETE', headers=auth_token_header, swift_source='VW') # remove 'X-If-Delete-At', since it is not for the older copy if 'X-If-Delete-At' in req.headers: del req.headers['X-If-Delete-At'] break # handle DELETE request here in case it was modified return req.get_response(self.app)
def _segment_listing_iterator(self, req, version, account, container, prefix, segments, first_byte=None, last_byte=None): # It's sort of hokey that this thing takes in the first page of # segments as an argument, but we need to compute the etag and content # length from the first page, and it's better to have a hokey # interface than to make redundant requests. if first_byte is None: first_byte = 0 if last_byte is None: last_byte = float("inf") marker = '' while True: for segment in segments: seg_length = int(segment['bytes']) if first_byte >= seg_length: # don't need any bytes from this segment first_byte = max(first_byte - seg_length, -1) last_byte = max(last_byte - seg_length, -1) continue elif last_byte < 0: # no bytes are needed from this or any future segment break seg_name = segment['name'] if isinstance(seg_name, six.text_type): seg_name = seg_name.encode("utf-8") # We deliberately omit the etag and size here; # SegmentedIterable will check size and etag if # specified, but we don't want it to. DLOs only care # that the objects' names match the specified prefix. # SegmentedIterable will instead check that the data read # from each segment matches the response headers. _path = "/".join(["", version, account, container, seg_name]) _first = None if first_byte <= 0 else first_byte _last = None if last_byte >= seg_length - 1 else last_byte yield {'path': _path, 'first_byte': _first, 'last_byte': _last} first_byte = max(first_byte - seg_length, -1) last_byte = max(last_byte - seg_length, -1) if len(segments) < constraints.CONTAINER_LISTING_LIMIT: # a short page means that we're done with the listing break elif last_byte < 0: break marker = segments[-1]['name'] error_response, segments = self._get_container_listing( req, version, account, container, prefix, marker) if error_response: # we've already started sending the response body to the # client, so all we can do is raise an exception to make the # WSGI server close the connection early close_if_possible(error_response.app_iter) raise ListingIterError( "Got status %d listing container /%s/%s" % (error_response.status_int, account, container))
def handle_PUT(self, req, start_response): if req.content_length: return HTTPBadRequest(body='Copy requests require a zero byte ' 'body', request=req, content_type='text/plain')(req.environ, start_response) # Form the path of source object to be fetched ver, acct, _rest = req.split_path(2, 3, True) src_account_name = req.headers.get('X-Copy-From-Account') if src_account_name: src_account_name = check_account_format(req, src_account_name) else: src_account_name = acct src_container_name, src_obj_name = _check_copy_from_header(req) source_path = '/%s/%s/%s/%s' % (ver, src_account_name, src_container_name, src_obj_name) if req.environ.get('swift.orig_req_method', req.method) != 'POST': self.logger.info("Copying object from %s to %s" % (source_path, req.path)) # GET the source object, bail out on error ssc_ctx = ServerSideCopyWebContext(self.app, self.logger) source_resp = self._get_source_object(ssc_ctx, source_path, req) if source_resp.status_int >= HTTP_MULTIPLE_CHOICES: close_if_possible(source_resp.app_iter) return source_resp(source_resp.environ, start_response) # Create a new Request object based on the original req instance. # This will preserve env and headers. sink_req = Request.blank(req.path_info, environ=req.environ, headers=req.headers) params = sink_req.params if params.get('multipart-manifest') == 'get': if 'X-Static-Large-Object' in source_resp.headers: params['multipart-manifest'] = 'put' if 'X-Object-Manifest' in source_resp.headers: del params['multipart-manifest'] sink_req.headers['X-Object-Manifest'] = \ source_resp.headers['X-Object-Manifest'] sink_req.params = params # Set data source, content length and etag for the PUT request sink_req.environ['wsgi.input'] = FileLikeIter(source_resp.app_iter) sink_req.content_length = source_resp.content_length sink_req.etag = source_resp.etag # We no longer need these headers sink_req.headers.pop('X-Copy-From', None) sink_req.headers.pop('X-Copy-From-Account', None) # If the copy request does not explicitly override content-type, # use the one present in the source object. if not req.headers.get('content-type'): sink_req.headers['Content-Type'] = \ source_resp.headers['Content-Type'] fresh_meta_flag = config_true_value( sink_req.headers.get('x-fresh-metadata', 'false')) if fresh_meta_flag or 'swift.post_as_copy' in sink_req.environ: # Post-as-copy: ignore new sysmeta, copy existing sysmeta condition = lambda k: is_sys_meta('object', k) remove_items(sink_req.headers, condition) copy_header_subset(source_resp, sink_req, condition) else: # Copy/update existing sysmeta and user meta _copy_headers_into(source_resp, sink_req) # Copy/update new metadata provided in request if any _copy_headers_into(req, sink_req) # Create response headers for PUT response resp_headers = self._create_response_headers(source_path, source_resp, sink_req) put_resp = ssc_ctx.send_put_req(sink_req, resp_headers, start_response) close_if_possible(source_resp.app_iter) return put_resp
def handle_PUT(self, req, start_response): if req.content_length: return HTTPBadRequest(body='Copy requests require a zero byte ' 'body', request=req, content_type='text/plain')(req.environ, start_response) # Form the path of source object to be fetched ver, acct, _rest = req.split_path(2, 3, True) src_account_name = req.headers.get('X-Copy-From-Account') if src_account_name: src_account_name = check_account_format(req, src_account_name) else: src_account_name = acct src_container_name, src_obj_name = _check_copy_from_header(req) source_path = '/%s/%s/%s/%s' % (ver, src_account_name, src_container_name, src_obj_name) if req.environ.get('swift.orig_req_method', req.method) != 'POST': self.logger.info("Copying object from %s to %s" % (source_path, req.path)) # GET the source object, bail out on error ssc_ctx = ServerSideCopyWebContext(self.app, self.logger) source_resp = self._get_source_object(ssc_ctx, source_path, req) if source_resp.status_int >= HTTP_MULTIPLE_CHOICES: return source_resp(source_resp.environ, start_response) # Create a new Request object based on the original request instance. # This will preserve original request environ including headers. sink_req = Request.blank(req.path_info, environ=req.environ) def is_object_sysmeta(k): return is_sys_meta('object', k) if 'swift.post_as_copy' in sink_req.environ: # Post-as-copy: ignore new sysmeta, copy existing sysmeta remove_items(sink_req.headers, is_object_sysmeta) copy_header_subset(source_resp, sink_req, is_object_sysmeta) elif config_true_value(req.headers.get('x-fresh-metadata', 'false')): # x-fresh-metadata only applies to copy, not post-as-copy: ignore # existing user metadata, update existing sysmeta with new copy_header_subset(source_resp, sink_req, is_object_sysmeta) copy_header_subset(req, sink_req, is_object_sysmeta) else: # First copy existing sysmeta, user meta and other headers from the # source to the sink, apart from headers that are conditionally # copied below and timestamps. exclude_headers = ('x-static-large-object', 'x-object-manifest', 'etag', 'content-type', 'x-timestamp', 'x-backend-timestamp') copy_header_subset(source_resp, sink_req, lambda k: k.lower() not in exclude_headers) # now update with original req headers sink_req.headers.update(req.headers) params = sink_req.params if params.get('multipart-manifest') == 'get': if 'X-Static-Large-Object' in source_resp.headers: params['multipart-manifest'] = 'put' if 'X-Object-Manifest' in source_resp.headers: del params['multipart-manifest'] if 'swift.post_as_copy' not in sink_req.environ: sink_req.headers['X-Object-Manifest'] = \ source_resp.headers['X-Object-Manifest'] sink_req.params = params # Set swift.source, data source, content length and etag # for the PUT request sink_req.environ['swift.source'] = 'SSC' sink_req.environ['wsgi.input'] = FileLikeIter(source_resp.app_iter) sink_req.content_length = source_resp.content_length if (source_resp.status_int == HTTP_OK and 'X-Static-Large-Object' not in source_resp.headers and ('X-Object-Manifest' not in source_resp.headers or req.params.get('multipart-manifest') == 'get')): # copy source etag so that copied content is verified, unless: # - not a 200 OK response: source etag may not match the actual # content, for example with a 206 Partial Content response to a # ranged request # - SLO manifest: etag cannot be specified in manifest PUT; SLO # generates its own etag value which may differ from source # - SLO: etag in SLO response is not hash of actual content # - DLO: etag in DLO response is not hash of actual content sink_req.headers['Etag'] = source_resp.etag else: # since we're not copying the source etag, make sure that any # container update override values are not copied. remove_items(sink_req.headers, lambda k: k.startswith( 'X-Object-Sysmeta-Container-Update-Override-')) # We no longer need these headers sink_req.headers.pop('X-Copy-From', None) sink_req.headers.pop('X-Copy-From-Account', None) # If the copy request does not explicitly override content-type, # use the one present in the source object. if not req.headers.get('content-type'): sink_req.headers['Content-Type'] = \ source_resp.headers['Content-Type'] # Create response headers for PUT response resp_headers = self._create_response_headers(source_path, source_resp, sink_req) put_resp = ssc_ctx.send_put_req(sink_req, resp_headers, start_response) close_if_possible(source_resp.app_iter) return put_resp
def _requests_to_bytes_iter(self): # Take the requests out of self._coalesce_requests, actually make # the requests, and generate the bytes from the responses. # # Yields 2-tuples (segment-name, byte-chunk). The segment name is # used for logging. for data_or_req, seg_etag, seg_size in self._coalesce_requests(): if isinstance(data_or_req, bytes): # ugly, awful overloading yield ('data segment', data_or_req) continue seg_req = data_or_req seg_resp = seg_req.get_response(self.app) if not is_success(seg_resp.status_int): close_if_possible(seg_resp.app_iter) raise SegmentError( 'While processing manifest %s, ' 'got %d while retrieving %s' % (self.name, seg_resp.status_int, seg_req.path)) elif ((seg_etag and (seg_resp.etag != seg_etag)) or (seg_size and (seg_resp.content_length != seg_size) and not seg_req.range)): # The content-length check is for security reasons. Seems # possible that an attacker could upload a >1mb object and # then replace it with a much smaller object with same # etag. Then create a big nested SLO that calls that # object many times which would hammer our obj servers. If # this is a range request, don't check content-length # because it won't match. close_if_possible(seg_resp.app_iter) raise SegmentError( 'Object segment no longer valid: ' '%(path)s etag: %(r_etag)s != %(s_etag)s or ' '%(r_size)s != %(s_size)s.' % {'path': seg_req.path, 'r_etag': seg_resp.etag, 'r_size': seg_resp.content_length, 's_etag': seg_etag, 's_size': seg_size}) else: self.current_resp = seg_resp seg_hash = None if seg_resp.etag and not seg_req.headers.get('Range'): # Only calculate the MD5 if it we can use it to validate seg_hash = hashlib.md5() document_iters = maybe_multipart_byteranges_to_document_iters( seg_resp.app_iter, seg_resp.headers['Content-Type']) for chunk in itertools.chain.from_iterable(document_iters): if seg_hash: seg_hash.update(chunk) yield (seg_req.path, chunk) close_if_possible(seg_resp.app_iter) if seg_hash and seg_hash.hexdigest() != seg_resp.etag: raise SegmentError( "Bad MD5 checksum in %(name)s for %(seg)s: headers had" " %(etag)s, but object MD5 was actually %(actual)s" % {'seg': seg_req.path, 'etag': seg_resp.etag, 'name': self.name, 'actual': seg_hash.hexdigest()})
def _internal_iter(self): start_time = time.time() bytes_left = self.response_body_length try: for seg_path, seg_etag, seg_size, first_byte, last_byte \ in self.listing_iter: if time.time() - start_time > self.max_get_time: raise SegmentError( 'ERROR: While processing manifest %s, ' 'max LO GET time of %ds exceeded' % (self.name, self.max_get_time)) # Make sure that the segment is a plain old object, not some # flavor of large object, so that we can check its MD5. path = seg_path + '?multipart-manifest=get' seg_req = make_subrequest( self.req.environ, path=path, method='GET', headers={'x-auth-token': self.req.headers.get( 'x-auth-token')}, agent=('%(orig)s ' + self.ua_suffix), swift_source=self.swift_source) if first_byte is not None or last_byte is not None: seg_req.headers['Range'] = "bytes=%s-%s" % ( # The 0 is to avoid having a range like "bytes=-10", # which actually means the *last* 10 bytes. '0' if first_byte is None else first_byte, '' if last_byte is None else last_byte) seg_resp = seg_req.get_response(self.app) if not is_success(seg_resp.status_int): close_if_possible(seg_resp.app_iter) raise SegmentError( 'ERROR: While processing manifest %s, ' 'got %d while retrieving %s' % (self.name, seg_resp.status_int, seg_path)) elif ((seg_etag and (seg_resp.etag != seg_etag)) or (seg_size and (seg_resp.content_length != seg_size) and not seg_req.range)): # The content-length check is for security reasons. Seems # possible that an attacker could upload a >1mb object and # then replace it with a much smaller object with same # etag. Then create a big nested SLO that calls that # object many times which would hammer our obj servers. If # this is a range request, don't check content-length # because it won't match. close_if_possible(seg_resp.app_iter) raise SegmentError( 'Object segment no longer valid: ' '%(path)s etag: %(r_etag)s != %(s_etag)s or ' '%(r_size)s != %(s_size)s.' % {'path': seg_req.path, 'r_etag': seg_resp.etag, 'r_size': seg_resp.content_length, 's_etag': seg_etag, 's_size': seg_size}) else: self.current_resp = seg_resp seg_hash = hashlib.md5() for chunk in seg_resp.app_iter: seg_hash.update(chunk) if bytes_left is None: yield chunk elif bytes_left >= len(chunk): yield chunk bytes_left -= len(chunk) else: yield chunk[:bytes_left] bytes_left -= len(chunk) close_if_possible(seg_resp.app_iter) raise SegmentError( 'Too many bytes for %(name)s; truncating in ' '%(seg)s with %(left)d bytes left' % {'name': self.name, 'seg': seg_req.path, 'left': bytes_left}) close_if_possible(seg_resp.app_iter) if seg_resp.etag and seg_hash.hexdigest() != seg_resp.etag \ and first_byte is None and last_byte is None: raise SegmentError( "Bad MD5 checksum in %(name)s for %(seg)s: headers had" " %(etag)s, but object MD5 was actually %(actual)s" % {'seg': seg_req.path, 'etag': seg_resp.etag, 'name': self.name, 'actual': seg_hash.hexdigest()}) if bytes_left: raise SegmentError( 'Not enough bytes for %s; closing connection' % self.name) except (ListingIterError, SegmentError): self.logger.exception(_('ERROR: An error occurred ' 'while retrieving segments')) raise
def _perform_subrequest(self, orig_env, attributes, fp, keys): """ Performs the subrequest and returns the response. :param orig_env: The WSGI environment dict; will only be used to form a new env for the subrequest. :param attributes: dict of the attributes of the form so far. :param fp: The file-like object containing the request body. :param keys: The account keys to validate the signature with. :returns: (status_line, headers_list) """ if not keys: raise FormUnauthorized('invalid signature') try: max_file_size = int(attributes.get('max_file_size') or 0) except ValueError: raise FormInvalid('max_file_size not an integer') subenv = make_pre_authed_env(orig_env, 'PUT', agent=None, swift_source='FP') if 'QUERY_STRING' in subenv: del subenv['QUERY_STRING'] subenv['HTTP_TRANSFER_ENCODING'] = 'chunked' subenv['wsgi.input'] = _CappedFileLikeObject(fp, max_file_size) if not subenv['PATH_INFO'].endswith('/') and \ subenv['PATH_INFO'].count('/') < 4: subenv['PATH_INFO'] += '/' subenv['PATH_INFO'] += attributes['filename'] or 'filename' if 'x_delete_at' in attributes: try: subenv['HTTP_X_DELETE_AT'] = int(attributes['x_delete_at']) except ValueError: raise FormInvalid('x_delete_at not an integer: ' 'Unix timestamp required.') if 'x_delete_after' in attributes: try: subenv['HTTP_X_DELETE_AFTER'] = int( attributes['x_delete_after']) except ValueError: raise FormInvalid('x_delete_after not an integer: ' 'Number of seconds required.') if 'content-type' in attributes: subenv['CONTENT_TYPE'] = \ attributes['content-type'] or 'application/octet-stream' if 'content-encoding' in attributes: subenv['HTTP_CONTENT_ENCODING'] = attributes['content-encoding'] try: if int(attributes.get('expires') or 0) < time(): raise FormUnauthorized('form expired') except ValueError: raise FormInvalid('expired not an integer') hmac_body = '%s\n%s\n%s\n%s\n%s' % ( orig_env['PATH_INFO'], attributes.get('redirect') or '', attributes.get('max_file_size') or '0', attributes.get('max_file_count') or '0', attributes.get('expires') or '0') if six.PY3: hmac_body = hmac_body.encode('utf-8') has_valid_sig = False for key in keys: sig = hmac.new(key, hmac_body, sha1).hexdigest() if streq_const_time(sig, (attributes.get('signature') or 'invalid')): has_valid_sig = True if not has_valid_sig: raise FormUnauthorized('invalid signature') substatus = [None] subheaders = [None] wsgi_input = subenv['wsgi.input'] def _start_response(status, headers, exc_info=None): if wsgi_input.file_size_exceeded: raise EOFError("max_file_size exceeded") substatus[0] = status subheaders[0] = headers # reiterate to ensure the response started, # but drop any data on the floor close_if_possible(reiterate(self.app(subenv, _start_response))) return substatus[0], subheaders[0]
def _internal_iter(self): bytes_left = self.response_body_length try: for seg_req, seg_etag, seg_size in self._coalesce_requests(): seg_resp = seg_req.get_response(self.app) if not is_success(seg_resp.status_int): close_if_possible(seg_resp.app_iter) raise SegmentError( 'While processing manifest %s, ' 'got %d while retrieving %s' % (self.name, seg_resp.status_int, seg_req.path)) elif ((seg_etag and (seg_resp.etag != seg_etag)) or (seg_size and (seg_resp.content_length != seg_size) and not seg_req.range)): # The content-length check is for security reasons. Seems # possible that an attacker could upload a >1mb object and # then replace it with a much smaller object with same # etag. Then create a big nested SLO that calls that # object many times which would hammer our obj servers. If # this is a range request, don't check content-length # because it won't match. close_if_possible(seg_resp.app_iter) raise SegmentError( 'Object segment no longer valid: ' '%(path)s etag: %(r_etag)s != %(s_etag)s or ' '%(r_size)s != %(s_size)s.' % {'path': seg_req.path, 'r_etag': seg_resp.etag, 'r_size': seg_resp.content_length, 's_etag': seg_etag, 's_size': seg_size}) else: self.current_resp = seg_resp seg_hash = None if seg_resp.etag and not seg_req.headers.get('Range'): # Only calculate the MD5 if it we can use it to validate seg_hash = hashlib.md5() document_iters = maybe_multipart_byteranges_to_document_iters( seg_resp.app_iter, seg_resp.headers['Content-Type']) for chunk in itertools.chain.from_iterable(document_iters): if seg_hash: seg_hash.update(chunk) if bytes_left is None: yield chunk elif bytes_left >= len(chunk): yield chunk bytes_left -= len(chunk) else: yield chunk[:bytes_left] bytes_left -= len(chunk) close_if_possible(seg_resp.app_iter) raise SegmentError( 'Too many bytes for %(name)s; truncating in ' '%(seg)s with %(left)d bytes left' % {'name': self.name, 'seg': seg_req.path, 'left': bytes_left}) close_if_possible(seg_resp.app_iter) if seg_hash and seg_hash.hexdigest() != seg_resp.etag: raise SegmentError( "Bad MD5 checksum in %(name)s for %(seg)s: headers had" " %(etag)s, but object MD5 was actually %(actual)s" % {'seg': seg_req.path, 'etag': seg_resp.etag, 'name': self.name, 'actual': seg_hash.hexdigest()}) if bytes_left: raise SegmentError( 'Not enough bytes for %s; closing connection' % self.name) except (ListingIterError, SegmentError) as err: self.logger.error(err) if not self.validated_first_segment: raise finally: if self.current_resp: close_if_possible(self.current_resp.app_iter)
def handle_slo_get_or_head(self, req, start_response): """ Takes a request and a start_response callable and does the normal WSGI thing with them. Returns an iterator suitable for sending up the WSGI chain. :param req: swob.Request object; is a GET or HEAD request aimed at what may be a static large object manifest (or may not). :param start_response: WSGI start_response callable """ if req.params.get('multipart-manifest') != 'get': # If this object is an SLO manifest, we may have saved off the # large object etag during the original PUT. Send an # X-Backend-Etag-Is-At header so that, if the SLO etag *was* # saved, we can trust the object-server to respond appropriately # to If-Match/If-None-Match requests. update_etag_is_at_header(req, SYSMETA_SLO_ETAG) resp_iter = self._app_call(req.environ) # make sure this response is for a static large object manifest slo_marker = slo_etag = slo_size = None for header, value in self._response_headers: header = header.lower() if header == SYSMETA_SLO_ETAG: slo_etag = value elif header == SYSMETA_SLO_SIZE: slo_size = value elif (header == 'x-static-large-object' and config_true_value(value)): slo_marker = value if slo_marker and slo_etag and slo_size: break if not slo_marker: # Not a static large object manifest. Just pass it through. start_response(self._response_status, self._response_headers, self._response_exc_info) return resp_iter # Handle pass-through request for the manifest itself if req.params.get('multipart-manifest') == 'get': if req.params.get('format') == 'raw': resp_iter = self.convert_segment_listing( self._response_headers, resp_iter) else: new_headers = [] for header, value in self._response_headers: if header.lower() == 'content-type': new_headers.append(('Content-Type', 'application/json; charset=utf-8')) else: new_headers.append((header, value)) self._response_headers = new_headers start_response(self._response_status, self._response_headers, self._response_exc_info) return resp_iter is_conditional = self._response_status.startswith(('304', '412')) and ( req.if_match or req.if_none_match) if slo_etag and slo_size and ( req.method == 'HEAD' or is_conditional): # Since we have length and etag, we can respond immediately for i, (header, _value) in enumerate(self._response_headers): lheader = header.lower() if lheader == 'etag': self._response_headers[i] = (header, '"%s"' % slo_etag) elif lheader == 'content-length' and not is_conditional: self._response_headers[i] = (header, slo_size) start_response(self._response_status, self._response_headers, self._response_exc_info) return resp_iter if self._need_to_refetch_manifest(req): req.environ['swift.non_client_disconnect'] = True close_if_possible(resp_iter) del req.environ['swift.non_client_disconnect'] get_req = make_subrequest( req.environ, method='GET', headers={'x-auth-token': req.headers.get('x-auth-token')}, agent='%(orig)s SLO MultipartGET', swift_source='SLO') resp_iter = self._app_call(get_req.environ) # Any Content-Range from a manifest is almost certainly wrong for the # full large object. resp_headers = [(h, v) for h, v in self._response_headers if not h.lower() == 'content-range'] response = self.get_or_head_response( req, resp_headers, resp_iter) return response(req.environ, start_response)
def close(self): return close_if_possible(self.source)
def handle_slo_get_or_head(self, req, start_response): """ Takes a request and a start_response callable and does the normal WSGI thing with them. Returns an iterator suitable for sending up the WSGI chain. :param req: swob.Request object; is a GET or HEAD request aimed at what may be a static large object manifest (or may not). :param start_response: WSGI start_response callable """ if req.params.get('multipart-manifest') != 'get': # If this object is an SLO manifest, we may have saved off the # large object etag during the original PUT. Send an # X-Backend-Etag-Is-At header so that, if the SLO etag *was* # saved, we can trust the object-server to respond appropriately # to If-Match/If-None-Match requests. update_etag_is_at_header(req, SYSMETA_SLO_ETAG) resp_iter = self._app_call(req.environ) # make sure this response is for a static large object manifest slo_marker = slo_etag = slo_size = None for header, value in self._response_headers: header = header.lower() if header == SYSMETA_SLO_ETAG: slo_etag = value elif header == SYSMETA_SLO_SIZE: slo_size = value elif (header == 'x-static-large-object' and config_true_value(value)): slo_marker = value if slo_marker and slo_etag and slo_size: break if not slo_marker: # Not a static large object manifest. Just pass it through. start_response(self._response_status, self._response_headers, self._response_exc_info) return resp_iter # Handle pass-through request for the manifest itself if req.params.get('multipart-manifest') == 'get': if req.params.get('format') == 'raw': resp_iter = self.convert_segment_listing( self._response_headers, resp_iter) else: new_headers = [] for header, value in self._response_headers: if header.lower() == 'content-type': new_headers.append(('Content-Type', 'application/json; charset=utf-8')) else: new_headers.append((header, value)) self._response_headers = new_headers start_response(self._response_status, self._response_headers, self._response_exc_info) return resp_iter is_conditional = self._response_status.startswith( ('304', '412')) and (req.if_match or req.if_none_match) if slo_etag and slo_size and (req.method == 'HEAD' or is_conditional): # Since we have length and etag, we can respond immediately for i, (header, _value) in enumerate(self._response_headers): lheader = header.lower() if lheader == 'etag': self._response_headers[i] = (header, '"%s"' % slo_etag) elif lheader == 'content-length' and not is_conditional: self._response_headers[i] = (header, slo_size) start_response(self._response_status, self._response_headers, self._response_exc_info) return resp_iter if self._need_to_refetch_manifest(req): req.environ['swift.non_client_disconnect'] = True close_if_possible(resp_iter) del req.environ['swift.non_client_disconnect'] get_req = make_subrequest( req.environ, method='GET', headers={'x-auth-token': req.headers.get('x-auth-token')}, agent='%(orig)s SLO MultipartGET', swift_source='SLO') resp_iter = self._app_call(get_req.environ) # Any Content-Range from a manifest is almost certainly wrong for the # full large object. resp_headers = [(h, v) for h, v in self._response_headers if not h.lower() == 'content-range'] response = self.get_or_head_response(req, resp_headers, resp_iter) return response(req.environ, start_response)
def _segment_listing_iterator(self, req, version, account, container, prefix, segments, first_byte=None, last_byte=None): # It's sort of hokey that this thing takes in the first page of # segments as an argument, but we need to compute the etag and content # length from the first page, and it's better to have a hokey # interface than to make redundant requests. if first_byte is None: first_byte = 0 if last_byte is None: last_byte = float("inf") marker = '' while True: for segment in segments: seg_length = int(segment['bytes']) if first_byte >= seg_length: # don't need any bytes from this segment first_byte = max(first_byte - seg_length, -1) last_byte = max(last_byte - seg_length, -1) continue elif last_byte < 0: # no bytes are needed from this or any future segment break seg_name = segment['name'] if isinstance(seg_name, six.text_type): seg_name = seg_name.encode("utf-8") # (obj path, etag, size, first byte, last byte) yield ("/" + "/".join((version, account, container, seg_name)), # We deliberately omit the etag and size here; # SegmentedIterable will check size and etag if # specified, but we don't want it to. DLOs only care # that the objects' names match the specified prefix. None, None, (None if first_byte <= 0 else first_byte), (None if last_byte >= seg_length - 1 else last_byte)) first_byte = max(first_byte - seg_length, -1) last_byte = max(last_byte - seg_length, -1) if len(segments) < constraints.CONTAINER_LISTING_LIMIT: # a short page means that we're done with the listing break elif last_byte < 0: break marker = segments[-1]['name'] error_response, segments = self._get_container_listing( req, version, account, container, prefix, marker) if error_response: # we've already started sending the response body to the # client, so all we can do is raise an exception to make the # WSGI server close the connection early close_if_possible(error_response.app_iter) raise ListingIterError( "Got status %d listing container /%s/%s" % (error_response.status_int, account, container))