def _fetch_sub_slo_segments(self, req, version, acc, con, obj): """ Fetch the submanifest, parse it, and return it. Raise exception on failures. """ sub_req = make_subrequest( req.environ, path='/'.join(['', version, acc, con, obj]), method='GET', headers={'x-auth-token': req.headers.get('x-auth-token')}, agent='%(orig)s SLO MultipartGET', swift_source='SLO') sub_resp = sub_req.get_response(self.app) if not sub_resp.is_success: closing_if_possible(sub_resp.app_iter) raise ListingIterError( 'while fetching %s, GET of submanifest %s ' 'failed with status %d' % (req.path, sub_req.path, sub_resp.status_int)) try: with closing_if_possible(sub_resp.app_iter): return ''.join(sub_resp.app_iter) except ValueError as err: raise ListingIterError( 'while fetching %s, String-decoding of submanifest %s ' 'failed with %s' % (req.path, sub_req.path, err))
def __call__(self, env, start_response): ctx = WSGIContext(self.app) app_iter = ctx._app_call(env) try: split_path(env['PATH_INFO'], 4, 4, True) except ValueError: pass # not an object request; don't care else: if env['REQUEST_METHOD'] == 'DELETE' and \ ctx._response_status[:3] == '404': # Should be a cache hit if is_success( get_container_info(env, self.app, swift_source='S3').get('status')): # Convert to a successful response ctx._response_status = '204 No Content' ctx._response_headers = [ (h, '0' if h.lower() == 'content-length' else v) for h, v in ctx._response_headers ] with closing_if_possible(app_iter): for chunk in app_iter: pass # should be short; just drop it on the floor app_iter = [''] start_response(ctx._response_status, ctx._response_headers) return app_iter
def get_or_head_response(self, req, resp_headers, resp_iter): with closing_if_possible(resp_iter): resp_body = ''.join(resp_iter) try: segments = json.loads(resp_body) except ValueError: segments = [] etag = md5() content_length = 0 for seg_dict in segments: if seg_dict.get('range'): etag.update('%s:%s;' % (seg_dict['hash'], seg_dict['range'])) else: etag.update(seg_dict['hash']) if config_true_value(seg_dict.get('sub_slo')): override_bytes_from_content_type( seg_dict, logger=self.slo.logger) content_length += self._segment_length(seg_dict) response_headers = [(h, v) for h, v in resp_headers if h.lower() not in ('etag', 'content-length')] response_headers.append(('Content-Length', str(content_length))) response_headers.append(('Etag', '"%s"' % etag.hexdigest())) if req.method == 'HEAD': return self._manifest_head_response(req, response_headers) else: return self._manifest_get_response( req, content_length, response_headers, segments)
def call_dlo(self, req, app=None, expect_exception=False): if app is None: app = self.dlo req.headers.setdefault("User-Agent", "Soap Opera") status = [None] headers = [None] def start_response(s, h, ei=None): status[0] = s headers[0] = h body_iter = app(req.environ, start_response) body = '' caught_exc = None try: # appease the close-checker with closing_if_possible(body_iter): for chunk in body_iter: body += chunk except Exception as exc: if expect_exception: caught_exc = exc else: raise if expect_exception: return status[0], headers[0], body, caught_exc else: return status[0], headers[0], body
def call_app(self, req, app=None): if app is None: app = self.app self.authorized = [] def authorize(req): self.authorized.append(req) if 'swift.authorize' not in req.environ: req.environ['swift.authorize'] = authorize req.headers.setdefault("User-Agent", "Melted Cheddar") status = [None] headers = [None] def start_response(s, h, ei=None): status[0] = s headers[0] = h body_iter = app(req.environ, start_response) with utils.closing_if_possible(body_iter): body = b''.join(body_iter) return status[0], headers[0], body
def _get_container_listing(self, req, version, account, container, prefix, marker=''): ''' :param version: whatever :param account: native :param container: native :param prefix: native :param marker: native ''' con_req = make_subrequest( req.environ, path=wsgi_quote('/'.join([ '', str_to_wsgi(version), str_to_wsgi(account), str_to_wsgi(container)])), method='GET', headers={'x-auth-token': req.headers.get('x-auth-token')}, agent=('%(orig)s ' + 'DLO MultipartGET'), swift_source='DLO') con_req.query_string = 'prefix=%s' % quote(prefix) if marker: con_req.query_string += '&marker=%s' % quote(marker) con_resp = con_req.get_response(self.dlo.app) if not is_success(con_resp.status_int): if req.method == 'HEAD': con_resp.body = b'' return con_resp, None with closing_if_possible(con_resp.app_iter): return None, json.loads(b''.join(con_resp.app_iter))
def multipart_response_iter(self, resp, boundary, body_key, crypto_meta): """ Decrypts a multipart mime doc response body. :param resp: application response :param boundary: multipart boundary string :param body_key: decryption key for the response body :param crypto_meta: crypto_meta for the response body :return: generator for decrypted response body """ with closing_if_possible(resp): parts_iter = multipart_byteranges_to_document_iters( FileLikeIter(resp), boundary) for first_byte, last_byte, length, headers, body in parts_iter: yield "--" + boundary + "\r\n" for header_pair in headers: yield "%s: %s\r\n" % header_pair yield "\r\n" decrypt_ctxt = self.crypto.create_decryption_ctxt( body_key, crypto_meta['iv'], first_byte) for chunk in iter(lambda: body.read(DECRYPT_CHUNK_SIZE), ''): yield decrypt_ctxt.update(chunk) yield "\r\n" yield "--" + boundary + "--"
def delete_object( self, account, container, obj, acceptable_statuses=(2, HTTP_NOT_FOUND), headers=None): """ Deletes an object. :param account: The object's account. :param container: The object's container. :param obj: The object. :param acceptable_statuses: List of status for valid responses, defaults to (2, HTTP_NOT_FOUND). :param headers: extra headers to send with request :raises UnexpectedResponse: Exception raised when requests fail to get a response with an acceptable status :raises Exception: Exception is raised when code fails in an unexpected way. """ path = self.make_path(account, container, obj) resp = self.make_request('DELETE', path, (headers or {}), acceptable_statuses) # Drain the response body to prevent unexpected disconnect # in proxy-server with closing_if_possible(resp.app_iter): for iter_body in resp.app_iter: pass
def PUT(self, req): """HTTP PUT request handler.""" container_info = self.container_info( self.account_name, self.container_name, req) req.acl = container_info['write_acl'] req.environ['swift_sync_key'] = container_info['sync_key'] # is request authorized if 'swift.authorize' in req.environ: aresp = req.environ['swift.authorize'](req) if aresp: return aresp old_slo_manifest = None # If versioning is disabled, we must check if the object exists. # If it's a SLO, we will have to delete the parts if the current # operation is a success. if (self.app.delete_slo_parts and not container_info['sysmeta'].get('versions-location', None)): try: dest_info = get_object_info(req.environ, self.app) if 'slo-size' in dest_info['sysmeta']: manifest_env = req.environ.copy() manifest_env['QUERY_STRING'] = 'multipart-manifest=get' manifest_req = make_subrequest(manifest_env, 'GET') manifest_resp = manifest_req.get_response(self.app) old_slo_manifest = json.loads(manifest_resp.body) except Exception as exc: self.app.logger.warn(('Failed to check existence of %s. If ' 'overwriting a SLO, old parts may ' 'remain. Error was: %s') % (req.path, exc)) self._update_content_type(req) self._update_x_timestamp(req) # check constraints on object name and request headers error_response = check_object_creation(req, self.object_name) or \ check_content_type(req) if error_response: return error_response if req.headers.get('Oio-Copy-From'): return self._link_object(req) data_source = req.environ['wsgi.input'] if req.content_length: data_source = ExpectedSizeReader(data_source, req.content_length) headers = self._prepare_headers(req) with closing_if_possible(data_source): resp = self._store_object(req, data_source, headers) if old_slo_manifest and resp.is_success: self.app.logger.debug( 'Previous object %s was a SLO, deleting parts', req.path) self._delete_slo_parts(req, old_slo_manifest) return resp
def multipart_response_iter(self, resp, boundary, body_key, crypto_meta): """ Decrypts a multipart mime doc response body. :param resp: application response :param boundary: multipart boundary string :param body_key: decryption key for the response body :param crypto_meta: crypto_meta for the response body :return: generator for decrypted response body """ with closing_if_possible(resp): parts_iter = multipart_byteranges_to_document_iters( FileLikeIter(resp), boundary) for first_byte, last_byte, length, headers, body in parts_iter: yield b"--" + boundary + b"\r\n" for header, value in headers: yield b"%s: %s\r\n" % (wsgi_to_bytes(header), wsgi_to_bytes(value)) yield b"\r\n" decrypt_ctxt = self.crypto.create_decryption_ctxt( body_key, crypto_meta['iv'], first_byte) for chunk in iter(lambda: body.read(DECRYPT_CHUNK_SIZE), b''): yield decrypt_ctxt.update(chunk) yield b"\r\n" yield b"--" + boundary + b"--"
def _fetch_sub_slo_segments(self, req, version, acc, con, obj): """ Fetch the submanifest, parse it, and return it. Raise exception on failures. """ sub_req = req.copy_get() sub_req.range = None sub_req.environ['PATH_INFO'] = '/'.join(['', version, acc, con, obj]) sub_req.environ['swift.source'] = 'SLO' sub_req.user_agent = "%s SLO MultipartGET" % sub_req.user_agent sub_resp = sub_req.get_response(self.slo.app) if not is_success(sub_resp.status_int): raise ListingIterError( 'ERROR: while fetching %s, GET of submanifest %s ' 'failed with status %d' % (req.path, sub_req.path, sub_resp.status_int)) try: with closing_if_possible(sub_resp.app_iter): return json.loads(''.join(sub_resp.app_iter)) except ValueError as err: raise ListingIterError( 'ERROR: while fetching %s, JSON-decoding of submanifest %s ' 'failed with %s' % (req.path, sub_req.path, err))
def _fetch_sub_slo_segments(self, req, version, acc, con, obj): """ Fetch the submanifest, parse it, and return it. Raise exception on failures. """ sub_req = make_subrequest( req.environ, path='/'.join(['', version, acc, con, obj]), method='GET', headers={'x-auth-token': req.headers.get('x-auth-token')}, agent=('%(orig)s ' + 'SLO MultipartGET'), swift_source='SLO') sub_resp = sub_req.get_response(self.slo.app) if not is_success(sub_resp.status_int): close_if_possible(sub_resp.app_iter) raise ListingIterError( 'ERROR: while fetching %s, GET of submanifest %s ' 'failed with status %d' % (req.path, sub_req.path, sub_resp.status_int)) try: with closing_if_possible(sub_resp.app_iter): return json.loads(''.join(sub_resp.app_iter)) except ValueError as err: raise ListingIterError( 'ERROR: while fetching %s, JSON-decoding of submanifest %s ' 'failed with %s' % (req.path, sub_req.path, err))
def PUT(self, req): """HTTP PUT request handler.""" container_info = self.container_info(self.account_name, self.container_name, req) req.acl = container_info['write_acl'] req.environ['swift_sync_key'] = container_info['sync_key'] # is request authorized if 'swift.authorize' in req.environ: aresp = req.environ['swift.authorize'](req) if aresp: return aresp self._update_content_type(req) # check constraints on object name and request headers error_response = check_object_creation(req, self.object_name) or \ check_content_type(req) if error_response: return error_response if req.headers.get('Oio-Copy-From'): return self._link_object(req) self._update_x_timestamp(req) data_source = req.environ['wsgi.input'] if req.content_length: data_source = ExpectedSizeReader(data_source, req.content_length) headers = self._prepare_headers(req) with closing_if_possible(data_source): resp = self._store_object(req, data_source, headers) return resp
def get_or_head_response(self, req, resp_headers, resp_iter): with closing_if_possible(resp_iter): resp_body = "".join(resp_iter) try: segments = json.loads(resp_body) except ValueError: segments = [] etag = md5() content_length = 0 for seg_dict in segments: if seg_dict.get("range"): etag.update("%s:%s;" % (seg_dict["hash"], seg_dict["range"])) else: etag.update(seg_dict["hash"]) if config_true_value(seg_dict.get("sub_slo")): override_bytes_from_content_type(seg_dict, logger=self.slo.logger) content_length += self._segment_length(seg_dict) response_headers = [(h, v) for h, v in resp_headers if h.lower() not in ("etag", "content-length")] response_headers.append(("Content-Length", str(content_length))) response_headers.append(("Etag", '"%s"' % etag.hexdigest())) if req.method == "HEAD": return self._manifest_head_response(req, response_headers) else: return self._manifest_get_response(req, content_length, response_headers, segments)
def _fetch_sub_slo_segments(self, req, version, acc, con, obj): """ Fetch the submanifest, parse it, and return it. Raise exception on failures. """ sub_req = make_subrequest( req.environ, path="/".join(["", version, acc, con, obj]), method="GET", headers={"x-auth-token": req.headers.get("x-auth-token")}, agent=("%(orig)s " + "SLO MultipartGET"), swift_source="SLO", ) sub_resp = sub_req.get_response(self.slo.app) if not is_success(sub_resp.status_int): close_if_possible(sub_resp.app_iter) raise ListingIterError( "ERROR: while fetching %s, GET of submanifest %s " "failed with status %d" % (req.path, sub_req.path, sub_resp.status_int) ) try: with closing_if_possible(sub_resp.app_iter): return json.loads("".join(sub_resp.app_iter)) except ValueError as err: raise ListingIterError( "ERROR: while fetching %s, JSON-decoding of submanifest %s " "failed with %s" % (req.path, sub_req.path, err) )
def get_or_head_response(self, req, resp_headers, resp_iter): with closing_if_possible(resp_iter): resp_body = ''.join(resp_iter) try: segments = json.loads(resp_body) except ValueError: segments = [] etag = md5() content_length = 0 for seg_dict in segments: if seg_dict.get('range'): etag.update('%s:%s;' % (seg_dict['hash'], seg_dict['range'])) else: etag.update(seg_dict['hash']) if config_true_value(seg_dict.get('sub_slo')): override_bytes_from_content_type(seg_dict, logger=self.slo.logger) content_length += self._segment_length(seg_dict) response_headers = [(h, v) for h, v in resp_headers if h.lower() not in ('etag', 'content-length')] response_headers.append(('Content-Length', str(content_length))) response_headers.append(('Etag', '"%s"' % etag.hexdigest())) if req.method == 'HEAD': return self._manifest_head_response(req, response_headers) else: return self._manifest_get_response(req, content_length, response_headers, segments)
def _get_manifest_read(self, resp_iter): with closing_if_possible(resp_iter): resp_body = ''.join(resp_iter) try: segments = json.loads(resp_body) except ValueError: segments = [] return segments
def _list_objects(self, env, account, ct_parts, header_cb, prefix='', recursive=True, limit=10000): """ If `recursive` is set (the default), for each subdirectory marker encountered, make a listing subrequest, and yield object list. If `recursive` is False, list objects and directory markers (but do not recurse). """ sub_path = quote_plus( self.DELIMITER.join( ('', 'v1', account, self.ENCODED_DELIMITER.join(ct_parts)))) LOG.debug("%s: listing objects from '%s'", self.SWIFT_SOURCE, sub_path) sub_req = make_subrequest(env.copy(), method='GET', path=sub_path, body='', swift_source=self.SWIFT_SOURCE) params = sub_req.params params['delimiter'] = self.DELIMITER params['limit'] = str(limit) # FIXME: why is it str? params['prefix'] = prefix params['format'] = 'json' sub_req.params = params resp = sub_req.get_response(self.app) obj_prefix = '' if len(ct_parts) > 1: obj_prefix = self.DELIMITER.join(ct_parts[1:] + ('', )) if not resp.is_success or resp.content_length == 0: LOG.warn("Failed to recursively list '%s': %s", obj_prefix, resp.status) return with closing_if_possible(resp.app_iter): items = json.loads(resp.body) if header_cb: header_cb(resp.headers) subdirs = [x['subdir'][:-1] for x in items if 'subdir' in x] for obj in items: if 'name' in obj: obj['name'] = obj_prefix + obj['name'] yield obj elif not recursive and 'subdir' in obj: obj['subdir'] = obj_prefix + obj['subdir'] yield obj if recursive: for subdir in subdirs: for obj in self._list_objects(env, account, ct_parts + (subdir, ), header_cb): yield obj
def process_json_resp(self, req, resp_iter): """ Parses json body listing and decrypt encrypted entries. Updates Content-Length header with new body length and return a body iter. """ with closing_if_possible(resp_iter): resp_body = ''.join(resp_iter) body_json = json.loads(resp_body) new_body = json.dumps( [self.decrypt_obj_dict(req, obj_dict) for obj_dict in body_json]) self.update_content_length(len(new_body)) return [new_body]
def process_json_resp(self, req, resp_iter): """ Parses json body listing and decrypt encrypted entries. Updates Content-Length header with new body length and return a body iter. """ with closing_if_possible(resp_iter): resp_body = b''.join(resp_iter) body_json = json.loads(resp_body) new_body = json.dumps([self.decrypt_obj_dict(req, obj_dict) for obj_dict in body_json]).encode('ascii') self.update_content_length(len(new_body)) return [new_body]
def _process_json_resp(self, resp_iter, req): """ Iterate through json body looking for symlinks and modify its content :return: modified json body """ with closing_if_possible(resp_iter): resp_body = b''.join(resp_iter) body_json = json.loads(resp_body.decode('ascii')) swift_version, account, _junk = split_path(req.path, 2, 3, True) new_body = json.dumps( [self._extract_symlink_path_json(obj_dict, swift_version, account) for obj_dict in body_json]).encode('ascii') self.update_content_length(len(new_body)) return [new_body]
def _process_json_resp(self, resp_iter, req): """ Iterate through json body looking for symlinks and modify its content :return: modified json body """ with closing_if_possible(resp_iter): resp_body = b''.join(resp_iter) body_json = json.loads(resp_body) swift_version, account, _junk = split_path(req.path, 2, 3, True) new_body = json.dumps( [self._extract_symlink_path_json(obj_dict, swift_version, account) for obj_dict in body_json]).encode('ascii') self.update_content_length(len(new_body)) return [new_body]
def response_iter(self, resp, body_key, crypto_meta, offset): """ Decrypts a response body. :param resp: application response :param body_key: decryption key for the response body :param crypto_meta: crypto_meta for the response body :param offset: offset into object content at which response body starts :return: generator for decrypted response body """ decrypt_ctxt = self.crypto.create_decryption_ctxt( body_key, crypto_meta['iv'], offset) with closing_if_possible(resp): for chunk in resp: yield decrypt_ctxt.update(chunk)
def _get_container_listing(self, req, version, account, container, prefix, marker=''): con_req = make_subrequest( req.environ, path='/'.join(['', version, account, container]), method='GET', headers={'x-auth-token': req.headers.get('x-auth-token')}, agent=('%(orig)s ' + 'DLO MultipartGET'), swift_source='DLO') con_req.query_string = 'prefix=%s' % quote(prefix) if marker: con_req.query_string += '&marker=%s' % quote(marker) con_resp = con_req.get_response(self.dlo.app) if not is_success(con_resp.status_int): return con_resp, None with closing_if_possible(con_resp.app_iter): return None, json.loads(''.join(con_resp.app_iter))
def _get_container_listing(self, req, version, account, container, prefix, marker=''): con_req = make_subrequest( req.environ, path='/'.join(['', version, account, container]), method='GET', headers={'x-auth-token': req.headers.get('x-auth-token')}, agent=('%(orig)s ' + 'DLO MultipartGET'), swift_source='DLO') con_req.query_string = 'format=json&prefix=%s' % quote(prefix) if marker: con_req.query_string += '&marker=%s' % quote(marker) con_resp = con_req.get_response(self.dlo.app) if not is_success(con_resp.status_int): return con_resp, None with closing_if_possible(con_resp.app_iter): return None, json.loads(''.join(con_resp.app_iter))
def _list_objects(self, env, account, ct_parts, header_cb, prefix='', limit=DEFAULT_LIMIT, marker=None, force_master=False): """ returns items """ sub_path = quote(self.DELIMITER.join( ('', 'v1', account, self.ENCODED_DELIMITER.join(ct_parts)))) LOG.debug("%s: listing objects from '%s' " "(limit=%d, prefix=%s, marker=%s)", self.SWIFT_SOURCE, sub_path, limit, prefix, marker) sub_req = make_subrequest(env.copy(), method='GET', path=sub_path, body='', swift_source=self.SWIFT_SOURCE) params = sub_req.params params.pop('delimiter', None) # allow list-multipart-uploads params['limit'] = str(limit) # FIXME: why is it str? params['prefix'] = prefix params['format'] = 'json' if marker: params['marker'] = marker else: params.pop('marker', None) if force_master: sub_req.environ.setdefault('oio.query', {}) sub_req.environ['oio.query']['force_master'] = True sub_req.params = params resp = sub_req.get_response(self.app) obj_prefix = '' if len(ct_parts) > 1: obj_prefix = self.DELIMITER.join(ct_parts[1:] + ['', ]) if not resp.is_success or resp.content_length == 0: LOG.warn("%s: Failed to list %s", self.SWIFT_SOURCE, sub_path) return with closing_if_possible(resp.app_iter): items = json.loads(resp.body) if header_cb: header_cb(resp.headers) for obj in items: if 'name' in obj: obj['name'] = obj_prefix.decode('utf-8') + obj['name'] yield obj
def process_xml_resp(self, key, resp_iter): """ Parses xml body listing and decrypt encrypted entries. Updates Content-Length header with new body length and return a body iter. """ with closing_if_possible(resp_iter): resp_body = ''.join(resp_iter) tree = ElementTree.fromstring(resp_body) for elem in tree.iter('hash'): ciphertext = elem.text.encode('utf8') plain = self.decrypt_value_with_meta(ciphertext, key) elem.text = plain.decode('utf8') new_body = ElementTree.tostring(tree, encoding='UTF-8').replace( "<?xml version='1.0' encoding='UTF-8'?>", '<?xml version="1.0" encoding="UTF-8"?>', 1) self.update_content_length(len(new_body)) return [new_body]
def _validate_etag_and_update_sysmeta(self, req, symlink_target_path, etag): # next we'll make sure the E-Tag matches a real object new_req = make_subrequest(req.environ, path=wsgi_quote(symlink_target_path), method='HEAD', swift_source='SYM') self._last_target_path = symlink_target_path resp = self._recursive_get_head(new_req, target_etag=etag, follow_softlinks=False) if self._get_status_int() == HTTP_NOT_FOUND: raise HTTPConflict(body='X-Symlink-Target does not exist', request=req, headers={ 'Content-Type': 'text/plain', 'Content-Location': self._last_target_path }) if not is_success(self._get_status_int()): with closing_if_possible(resp): for chunk in resp: pass raise status_map[self._get_status_int()](request=req) response_headers = HeaderKeyDict(self._response_headers) # carry forward any etag update params (e.g. "slo_etag"), we'll append # symlink_target_* params to this header after this method returns override_header = get_container_update_override_key('etag') if override_header in response_headers and \ override_header not in req.headers: sep, params = response_headers[override_header].partition(';')[1:] req.headers[override_header] = MD5_OF_EMPTY_STRING + sep + params # It's troublesome that there's so much leakage with SLO if 'X-Object-Sysmeta-Slo-Etag' in response_headers and \ override_header not in req.headers: req.headers[override_header] = '%s; slo_etag=%s' % ( MD5_OF_EMPTY_STRING, response_headers['X-Object-Sysmeta-Slo-Etag']) req.headers[TGT_BYTES_SYSMETA_SYMLINK_HDR] = ( response_headers.get('x-object-sysmeta-slo-size') or response_headers['Content-Length']) req.headers[TGT_ETAG_SYSMETA_SYMLINK_HDR] = etag if not req.headers.get('Content-Type'): req.headers['Content-Type'] = response_headers['Content-Type']
def _list_objects(self, env, account, ct_parts, header_cb, prefix='', limit=DEFAULT_LIMIT, marker=None): """ returns items """ sub_path = quote_plus(self.DELIMITER.join( ('', 'v1', account, self.ENCODED_DELIMITER.join(ct_parts)))) LOG.debug("%s: listing objects from '%s' " "(limit=%d, prefix=%s, marker=%s)", self.SWIFT_SOURCE, sub_path, limit, prefix, marker) sub_req = make_subrequest(env.copy(), method='GET', path=sub_path, body='', swift_source=self.SWIFT_SOURCE) params = sub_req.params params['delimiter'] = self.DELIMITER params['limit'] = str(limit) # FIXME: why is it str? params['prefix'] = prefix params['format'] = 'json' if marker: params['marker'] = marker else: params.pop('marker', None) sub_req.params = params resp = sub_req.get_response(self.app) obj_prefix = '' if len(ct_parts) > 1: obj_prefix = self.DELIMITER.join(ct_parts[1:] + ['', ]) if not resp.is_success or resp.content_length == 0: LOG.warn("%s: Failed to list %s", self.SWIFT_SOURCE, sub_path) return with closing_if_possible(resp.app_iter): items = json.loads(resp.body) if header_cb: header_cb(resp.headers) for obj in items: if 'name' in obj: obj['name'] = obj_prefix + obj['name'] yield obj
def call_dlo(self, req, app=None): if app is None: app = self.dlo req.headers.setdefault("User-Agent", "Soap Opera") status = [None] headers = [None] def start_response(s, h, ei=None): status[0] = s headers[0] = h body_iter = app(req.environ, start_response) body = b'' # appease the close-checker with closing_if_possible(body_iter): for chunk in body_iter: body += chunk return status[0], headers[0], body
def _recursive_listing(self, env, account, ct_parts, header_cb): """ For each subdirectory marker encountered, make a listing subrequest, and yield object list. """ sub_path = quote_plus( self.DELIMITER.join( ('', 'v1', account, self.ENCODED_DELIMITER.join(ct_parts)))) LOG.debug("%s: Recursively listing '%s'", self.SWIFT_SOURCE, sub_path) sub_req = make_subrequest(env.copy(), method='GET', path=sub_path, body='', swift_source=self.SWIFT_SOURCE) params = sub_req.params params['delimiter'] = self.DELIMITER params['limit'] = '10000' params['prefix'] = '' sub_req.params = params resp = sub_req.get_response(self.app) obj_prefix = '' if len(ct_parts) > 1: obj_prefix = self.DELIMITER.join(ct_parts[1:] + ('', )) if not resp.is_success or resp.content_length == 0: LOG.warn("Failed to recursively list '%s': %s", obj_prefix, resp.status) return with closing_if_possible(resp.app_iter): items = json.loads(resp.body) header_cb(resp.headers) subdirs = [x['subdir'][:-1] for x in items if 'subdir' in x] for obj in items: if 'name' in obj: obj['name'] = obj_prefix + obj['name'] yield obj for subdir in subdirs: for obj in self._recursive_listing(env, account, ct_parts + (subdir, ), header_cb): yield obj
def __call__(self, env, start_response): # a lot of this is cribbed from listing_formats / swob.Request if env['REQUEST_METHOD'] != 'GET': # Nothing to translate return self.app(env, start_response) try: v, a, c = split_path(env.get('SCRIPT_NAME', '') + env['PATH_INFO'], 3, 3) if not valid_api_version(v): raise ValueError except ValueError: # not a container request; pass through return self.app(env, start_response) ctx = WSGIContext(self.app) resp_iter = ctx._app_call(env) content_type = content_length = cl_index = None for index, (header, value) in enumerate(ctx._response_headers): header = header.lower() if header == 'content-type': content_type = value.split(';', 1)[0].strip() if content_length: break elif header == 'content-length': cl_index = index try: content_length = int(value) except ValueError: pass # ignore -- we'll bail later if content_type: break if content_type != 'application/json' or content_length is None or \ content_length > MAX_CONTAINER_LISTING_CONTENT_LENGTH: start_response(ctx._response_status, ctx._response_headers, ctx._response_exc_info) return resp_iter # We've done our sanity checks, slurp the response into memory with closing_if_possible(resp_iter): body = b''.join(resp_iter) try: listing = json.loads(body) for item in listing: if 'subdir' in item: continue value, params = parse_header(item['hash']) if 's3_etag' in params: item['s3_etag'] = '"%s"' % params.pop('s3_etag') item['hash'] = value + ''.join( '; %s=%s' % kv for kv in params.items()) except (TypeError, KeyError, ValueError): # If anything goes wrong above, drop back to original response start_response(ctx._response_status, ctx._response_headers, ctx._response_exc_info) return [body] body = json.dumps(listing).encode('ascii') ctx._response_headers[cl_index] = ( ctx._response_headers[cl_index][0], str(len(body)), ) start_response(ctx._response_status, ctx._response_headers, ctx._response_exc_info) return [body]
def resp_iter(total_size=total_size): # wsgi won't propagate start_response calls until some data has # been yielded so make sure first heartbeat is sent immediately if heartbeat: yield ' ' last_yield_time = time.time() # BEGIN: New OpenIO code sub_req = make_subrequest( req.environ, path='%s?format=json&prefix=%s&limit=%d' % (segments_container_path, seg_prefix, self.max_manifest_segments), method='GET', headers={'x-auth-token': req.headers.get('x-auth-token')}, agent='%(orig)s SLO MultipartPUT', swift_source='SLO') sub_req.environ.setdefault('oio.query', {}) # All meta2 databases may not be synchronized sub_req.environ['oio.query']['force_master'] = True sub_req.environ['oio.query']['slo'] = True list_seg_resp = sub_req.get_response(self) with closing_if_possible(list_seg_resp.app_iter): segments_resp = json.loads(list_seg_resp.body) seg_resp_dict = dict() for seg_resp in segments_resp: obj_name = '/'.join(('', segments_container, seg_resp['name'])) seg_resp_dict[obj_name] = seg_resp for obj_name in path2indices: now = time.time() if heartbeat and (now - last_yield_time > self.yield_frequency): # Make sure we've called start_response before # sending data yield ' ' last_yield_time = now for i in path2indices[obj_name]: if not list_seg_resp.is_success: problem_segments.append( [quote(obj_name), list_seg_resp.status]) segment_length = 0 seg_data = None else: seg_resp = seg_resp_dict.get(obj_name) if seg_resp: segment_length, seg_data = validate_seg_dict( parsed_data[i], seg_resp, (i == len(parsed_data) - 1)) else: problem_segments.append([quote(obj_name), 404]) segment_length = 0 seg_data = None data_for_storage[i] = seg_data total_size += segment_length # END: New OpenIO code if problem_segments: err = HTTPBadRequest(content_type=out_content_type) resp_dict = {} if heartbeat: resp_dict['Response Status'] = err.status resp_dict['Response Body'] = err.body or '\n'.join( RESPONSE_REASONS.get(err.status_int, [''])) else: start_response(err.status, [(h, v) for h, v in err.headers.items() if h.lower() != 'content-length']) yield separator + get_response_body( out_content_type, resp_dict, problem_segments, 'upload') return slo_etag = md5() for seg_data in data_for_storage: if 'data' in seg_data: raw_data = base64.b64decode(seg_data['data']) slo_etag.update(md5(raw_data).hexdigest()) elif seg_data.get('range'): slo_etag.update('%s:%s;' % (seg_data['hash'], seg_data['range'])) else: slo_etag.update(seg_data['hash']) slo_etag = slo_etag.hexdigest() client_etag = req.headers.get('Etag') if client_etag and client_etag.strip('"') != slo_etag: err = HTTPUnprocessableEntity(request=req) if heartbeat: yield separator + get_response_body( out_content_type, { 'Response Status': err.status, 'Response Body': err.body or '\n'.join( RESPONSE_REASONS.get(err.status_int, [''])), }, problem_segments, 'upload') else: for chunk in err(req.environ, start_response): yield chunk return json_data = json.dumps(data_for_storage) if six.PY3: json_data = json_data.encode('utf-8') req.body = json_data req.headers.update({ SYSMETA_SLO_ETAG: slo_etag, SYSMETA_SLO_SIZE: total_size, 'X-Static-Large-Object': 'True', 'Etag': md5(json_data).hexdigest(), }) # Ensure container listings have both etags. However, if any # middleware to the left of us touched the base value, trust them. override_header = 'X-Object-Sysmeta-Container-Update-Override-Etag' val, sep, params = req.headers.get(override_header, '').partition(';') req.headers[override_header] = '%s; slo_etag=%s' % ( (val or req.headers['Etag']) + sep + params, slo_etag) env = req.environ if not env.get('CONTENT_TYPE'): guessed_type, _junk = mimetypes.guess_type(req.path_info) env['CONTENT_TYPE'] = (guessed_type or 'application/octet-stream') env['swift.content_type_overridden'] = True env['CONTENT_TYPE'] += ";swift_bytes=%d" % total_size resp = req.get_response(self.app) resp_dict = {'Response Status': resp.status} if resp.is_success: resp.etag = slo_etag resp_dict['Etag'] = resp.headers['Etag'] resp_dict['Last Modified'] = resp.headers['Last-Modified'] if heartbeat: resp_dict['Response Body'] = resp.body yield separator + get_response_body(out_content_type, resp_dict, [], 'upload') else: for chunk in resp(req.environ, start_response): yield chunk
def __call__(self, env, start_response): # a lot of this is cribbed from listing_formats / swob.Request if env['REQUEST_METHOD'] != 'GET': # Nothing to translate return self.app(env, start_response) try: v, a, c = split_path( env.get('SCRIPT_NAME', '') + env['PATH_INFO'], 3, 3) if not valid_api_version(v): raise ValueError except ValueError: # not a container request; pass through return self.app(env, start_response) ctx = WSGIContext(self.app) resp_iter = ctx._app_call(env) content_type = content_length = cl_index = None for index, (header, value) in enumerate(ctx._response_headers): header = header.lower() if header == 'content-type': content_type = value.split(';', 1)[0].strip() if content_length: break elif header == 'content-length': cl_index = index try: content_length = int(value) except ValueError: pass # ignore -- we'll bail later if content_type: break if content_type != 'application/json' or content_length is None or \ content_length > MAX_CONTAINER_LISTING_CONTENT_LENGTH: start_response(ctx._response_status, ctx._response_headers, ctx._response_exc_info) return resp_iter # We've done our sanity checks, slurp the response into memory with closing_if_possible(resp_iter): body = b''.join(resp_iter) try: listing = json.loads(body) for item in listing: if 'subdir' in item: continue value, params = parse_header(item['hash']) if 's3_etag' in params: item['s3_etag'] = '"%s"' % params.pop('s3_etag') item['hash'] = value + ''.join('; %s=%s' % kv for kv in params.items()) except (TypeError, KeyError, ValueError): # If anything goes wrong above, drop back to original response start_response(ctx._response_status, ctx._response_headers, ctx._response_exc_info) return [body] body = json.dumps(listing) ctx._response_headers[cl_index] = ( ctx._response_headers[cl_index][0], str(len(body)), ) start_response(ctx._response_status, ctx._response_headers, ctx._response_exc_info) return [body]
def handle_container_listing(self, env, start_response): # This code may be clearer by using Request(env).get_response() # instead of self._app_call(env) api_vers, account, container_name = split_path( env['PATH_INFO'], 3, 3, True) sub_env = env.copy() orig_container = get_unversioned_container(container_name) if orig_container != container_name: # Check that container_name is actually the versioning # container for orig_container sub_env['PATH_INFO'] = '/%s/%s/%s' % (api_vers, account, orig_container) info = get_container_info(sub_env, self.app, swift_source='VW') vers_loc = info.get('sysmeta', {}).get('versions-location') # Sometimes we receive versioned listing requests whereas # versioning is not enabled (vers_loc is None or empty). if vers_loc and vers_loc != container_name: # The container specified in the request ends with the # versioning suffix, but user has asked the versions to # be saved elsewhere, thus we will consider this as a # regular listing request. orig_container = container_name if orig_container != container_name: qs = parse_qs(sub_env.get('QUERY_STRING', '')) if 'marker' in qs: marker, _ = swift3_split_object_name_version(qs['marker'][0]) qs['marker'] = [marker] if 'prefix' in qs: prefix, _ = swift3_split_object_name_version(qs['prefix'][0]) qs['prefix'] = prefix qs['format'] = 'json' sub_env['QUERY_STRING'] = urlencode(qs, True) sub_env['oio.query'] = {'versions': True} resp = super(OioVersionedWritesContext, self).handle_container_request( sub_env, lambda x, y, z: None) if orig_container != container_name and \ self._response_status == '200 OK': with closing_if_possible(resp): versioned_objects = json.loads("".join(resp)) # Discard the latest version of each object, because it is # not supposed to appear in the versioning container. # Also keep object prefixes as some of them may be shadowed # from the "main" container. latest = dict() subdirs = [] for obj in versioned_objects: if 'subdir' in obj: subdirs.append(obj) continue ver = int(obj.get('version', '0')) # An integer is always strictly greater than None if ver > latest.get(obj['name']): latest[obj['name']] = ver versioned_objects = [ obj for obj in versioned_objects if 'subdir' not in obj and (int(obj.get('version', '0')) != latest[obj['name']] or is_deleted(obj)) ] for obj in versioned_objects: obj['name'] = swift3_versioned_object_name( obj['name'], obj.get('version', '')) versioned_objects += subdirs resp = json.dumps(versioned_objects) self._response_headers = [x for x in self._response_headers if x[0] != 'Content-Length'] self._response_headers.append(('Content-Length', str(len(resp)))) start_response(self._response_status, self._response_headers, self._response_exc_info) return resp
def make_request( self, method, path, headers, acceptable_statuses, body_file=None, params=None): """Makes a request to Swift with retries. :param method: HTTP method of request. :param path: Path of request. :param headers: Headers to be sent with request. :param acceptable_statuses: List of acceptable statuses for request. :param body_file: Body file to be passed along with request, defaults to None. :param params: A dict of params to be set in request query string, defaults to None. :returns: Response object on success. :raises UnexpectedResponse: Exception raised when make_request() fails to get a response with an acceptable status :raises Exception: Exception is raised when code fails in an unexpected way. """ headers = dict(headers) headers['user-agent'] = self.user_agent for attempt in range(self.request_tries): resp = exc_type = exc_value = exc_traceback = None req = Request.blank( path, environ={'REQUEST_METHOD': method}, headers=headers) if body_file is not None: if hasattr(body_file, 'seek'): body_file.seek(0) req.body_file = body_file if params: req.params = params try: resp = req.get_response(self.app) except (Exception, Timeout): exc_type, exc_value, exc_traceback = exc_info() else: if resp.status_int in acceptable_statuses or \ resp.status_int // 100 in acceptable_statuses: return resp elif not is_server_error(resp.status_int): # No sense retrying when we expect the same result break # sleep only between tries, not after each one if attempt < self.request_tries - 1: if resp: # always close any resp.app_iter before we discard it with closing_if_possible(resp.app_iter): # for non 2XX requests it's safe and useful to drain # the response body so we log the correct status code if resp.status_int // 100 != 2: for iter_body in resp.app_iter: pass sleep(2 ** (attempt + 1)) if resp: msg = 'Unexpected response: %s' % resp.status if resp.status_int // 100 != 2 and resp.body: # provide additional context (and drain the response body) for # non 2XX responses msg += ' (%s)' % resp.body raise UnexpectedResponse(msg, resp) if exc_type: # To make pep8 tool happy, in place of raise t, v, tb: six.reraise(exc_type, exc_value, exc_traceback)
def handle_container_listing(self, env, start_response): # This code may be clearer by using Request(env).get_response() # instead of self._app_call(env) api_vers, account, container_name = split_path( env['PATH_INFO'], 3, 3, True) sub_env = env.copy() orig_container = get_unversioned_container(container_name) if orig_container != container_name: # Check that container_name is actually the versioning # container for orig_container sub_env['PATH_INFO'] = '/%s/%s/%s' % (api_vers, account, orig_container) info = get_container_info(sub_env, self.app, swift_source='VW') if info.get('sysmeta', {}).get('versions-location') != \ container_name: # We were wrong, do a standard listing orig_container = container_name if orig_container != container_name: qs = parse_qs(sub_env.get('QUERY_STRING', '')) if 'marker' in qs: marker, _ = swift3_split_object_name_version(qs['marker'][0]) qs['marker'] = [marker] if 'prefix' in qs: prefix, _ = swift3_split_object_name_version(qs['prefix'][0]) qs['prefix'] = prefix sub_env['QUERY_STRING'] = urlencode(qs, True) sub_env['oio_query'] = {'versions': True} resp = super(OioVersionedWritesContext, self).handle_container_request( sub_env, lambda x, y, z: None) if orig_container != container_name and \ self._response_status == '200 OK': with closing_if_possible(resp): versioned_objects = json.loads("".join(resp)) # Discard the latest version of each object, because it is # not supposed to appear in the versioning container. latest = dict() for obj in versioned_objects: ver = int(obj.get('version', '0')) if ver > latest.get(obj['name'], 0): latest[obj['name']] = ver versioned_objects = [obj for obj in versioned_objects if int(obj.get('version', '0')) != latest[obj['name']] or is_deleted(obj)] for obj in versioned_objects: obj['name'] = swift3_versioned_object_name( obj['name'], obj.get('version', '')) resp = json.dumps(versioned_objects) self._response_headers = [x for x in self._response_headers if x[0] != 'Content-Length'] self._response_headers.append(('Content-Length', str(len(resp)))) start_response(self._response_status, self._response_headers, self._response_exc_info) return resp
def PUT(self, req): """HTTP PUT request handler.""" container_info = self.container_info(self.account_name, self.container_name, req) req.acl = container_info['write_acl'] req.environ['swift_sync_key'] = container_info['sync_key'] # is request authorized if 'swift.authorize' in req.environ: aresp = req.environ['swift.authorize'](req) if aresp: return aresp self.enforce_versioning(req) old_slo_manifest = None old_slo_manifest_etag = None # If versioning is disabled, we must check if the object exists. # If it's a NEW SLO (we must check it is not the same manifest), # we will have to delete the parts if the current # operation is a success. if (self.app.delete_slo_parts and not config_true_value( container_info.get('sysmeta', {}).get('versions-enabled', False))): try: dest_info = get_object_info(req.environ, self.app) if 'slo-size' in dest_info['sysmeta']: manifest_env = req.environ.copy() manifest_env['QUERY_STRING'] = 'multipart-manifest=get' manifest_req = make_subrequest(manifest_env, 'GET') manifest_resp = manifest_req.get_response(self.app) old_slo_manifest = json.loads(manifest_resp.body) old_slo_manifest_etag = dest_info.get('etag') except Exception as exc: self.app.logger.warn( ('Failed to check existence of %s. If ' 'overwriting a SLO, old parts may ' 'remain. Error was: %s') % (req.path, exc)) self._update_content_type(req) req.ensure_x_timestamp() # check constraints on object name and request headers error_response = check_object_creation(req, self.object_name) or \ check_content_type(req) if error_response: return error_response if req.headers.get('Oio-Copy-From'): return self._link_object(req) data_source = req.environ['wsgi.input'] if req.content_length: data_source = ExpectedSizeReader(data_source, req.content_length) headers = self._prepare_headers(req) with closing_if_possible(data_source): resp = self._store_object(req, data_source, headers) if (resp.is_success and old_slo_manifest and resp.etag != old_slo_manifest_etag): self.app.logger.debug( 'Previous object %s was a different SLO, deleting parts', req.path) self._delete_slo_parts(req, old_slo_manifest) return resp
def handle_container_listing(self, env, start_response): # This code may be clearer by using Request(env).get_response() # instead of self._app_call(env) api_vers, account, container_name = split_path( env['PATH_INFO'], 3, 3, True) sub_env = env.copy() orig_container = get_unversioned_container(container_name) if orig_container != container_name: # Check that container_name is actually the versioning # container for orig_container sub_env['PATH_INFO'] = '/%s/%s/%s' % (api_vers, account, orig_container) info = get_container_info(sub_env, self.app, swift_source='VW') vers_loc = info.get('sysmeta', {}).get('versions-location') # Sometimes we receive versioned listing requests whereas # versioning is not enabled (vers_loc is None or empty). if vers_loc and vers_loc != container_name: # The container specified in the request ends with the # versioning suffix, but user has asked the versions to # be saved elsewhere, thus we will consider this as a # regular listing request. orig_container = container_name if orig_container != container_name: qs = parse_qs(sub_env.get('QUERY_STRING', '')) if 'marker' in qs: marker, _ = swift3_split_object_name_version(qs['marker'][0]) qs['marker'] = [marker] if 'prefix' in qs: prefix, _ = swift3_split_object_name_version(qs['prefix'][0]) qs['prefix'] = prefix qs['format'] = 'json' sub_env['QUERY_STRING'] = urlencode(qs, True) sub_env['oio.query'] = {'versions': True} resp = super(OioVersionedWritesContext, self).handle_container_request( sub_env, lambda x, y, z: None) if orig_container != container_name and \ self._response_status == '200 OK': with closing_if_possible(resp): versioned_objects = json.loads("".join(resp)) # Discard the latest version of each object, because it is # not supposed to appear in the versioning container. # Also discard object prefixes, which are computed # from the "main" container. latest = dict() for obj in versioned_objects: if 'subdir' in obj: continue ver = int(obj.get('version', '0')) if ver > latest.get(obj['name'], 0): latest[obj['name']] = ver versioned_objects = [ obj for obj in versioned_objects if 'subdir' not in obj and (int(obj.get('version', '0')) != latest[obj['name']] or is_deleted(obj)) ] for obj in versioned_objects: obj['name'] = swift3_versioned_object_name( obj['name'], obj.get('version', '')) resp = json.dumps(versioned_objects) self._response_headers = [x for x in self._response_headers if x[0] != 'Content-Length'] self._response_headers.append(('Content-Length', str(len(resp)))) start_response(self._response_status, self._response_headers, self._response_exc_info) return resp
def make_request(self, method, path, headers, acceptable_statuses, body_file=None, params=None): """Makes a request to Swift with retries. :param method: HTTP method of request. :param path: Path of request. :param headers: Headers to be sent with request. :param acceptable_statuses: List of acceptable statuses for request. :param body_file: Body file to be passed along with request, defaults to None. :param params: A dict of params to be set in request query string, defaults to None. :returns: Response object on success. :raises UnexpectedResponse: Exception raised when make_request() fails to get a response with an acceptable status :raises Exception: Exception is raised when code fails in an unexpected way. """ headers = dict(headers) headers['user-agent'] = self.user_agent for attempt in range(self.request_tries): resp = exc_type = exc_value = exc_traceback = None req = Request.blank(path, environ={'REQUEST_METHOD': method}, headers=headers) if body_file is not None: if hasattr(body_file, 'seek'): body_file.seek(0) req.body_file = body_file if params: req.params = params try: resp = req.get_response(self.app) except (Exception, Timeout): exc_type, exc_value, exc_traceback = exc_info() else: if resp.status_int in acceptable_statuses or \ resp.status_int // 100 in acceptable_statuses: return resp elif not is_server_error(resp.status_int): # No sense retrying when we expect the same result break # sleep only between tries, not after each one if attempt < self.request_tries - 1: if resp: # always close any resp.app_iter before we discard it with closing_if_possible(resp.app_iter): # for non 2XX requests it's safe and useful to drain # the response body so we log the correct status code if resp.status_int // 100 != 2: for iter_body in resp.app_iter: pass sleep(2**(attempt + 1)) if resp: msg = 'Unexpected response: %s' % resp.status if resp.status_int // 100 != 2 and resp.body: # provide additional context (and drain the response body) for # non 2XX responses msg += ' (%s)' % resp.body raise UnexpectedResponse(msg, resp) if exc_type: # To make pep8 tool happy, in place of raise t, v, tb: six.reraise(exc_type, exc_value, exc_traceback)
def _list_objects(self, env, account, ct_parts, header_cb, prefix='', limit=DEFAULT_LIMIT, marker=None, force_master=False, versions=False): """ returns items """ sub_path = quote( self.DELIMITER.join( ('', 'v1', account, self.ENCODED_DELIMITER.join(ct_parts)))) LOG.debug( "%s: listing objects from '%s' " "(limit=%d, prefix=%s, marker=%s)", self.SWIFT_SOURCE, sub_path, limit, prefix, marker) sub_req = make_subrequest(env.copy(), method='GET', path=sub_path, body='', swift_source=self.SWIFT_SOURCE) params = sub_req.params params.pop('delimiter', None) # allow list-multipart-uploads params['limit'] = str(limit) # FIXME: why is it str? params['prefix'] = prefix params['format'] = 'json' if marker: params['marker'] = marker else: params.pop('marker', None) if force_master: # this is used to check if container is empty after a delete # but we want to ensure listing is done on master sub_req.environ.setdefault('oio.query', {}) sub_req.environ['oio.query']['force_master'] = True if versions: # this is used to check if container is really empty after a delete # or when a versioned listing is done sub_req.environ.setdefault('oio.query', {}) sub_req.environ['oio.query']['versions'] = True sub_req.params = params resp = sub_req.get_response(self.app) obj_prefix = '' if len(ct_parts) > 1: obj_prefix = self.DELIMITER.join(ct_parts[1:] + [ '', ]) if not resp.is_success or resp.content_length == 0: if resp.status_int != 404: LOG.warn("%s: Failed to list %s: %s", self.SWIFT_SOURCE, sub_path, resp.status) return with closing_if_possible(resp.app_iter): items = json.loads(resp.body) if header_cb: header_cb(resp.headers) for obj in items: if 'name' in obj: obj['name'] = obj_prefix.decode('utf-8') + obj['name'] yield obj