def _get_new_endpoint(original_endpoint, new_endpoint, use_new_scheme=True): new_endpoint_components = urlsplit(new_endpoint) original_endpoint_components = urlsplit(original_endpoint) scheme = original_endpoint_components.scheme if use_new_scheme: scheme = new_endpoint_components.scheme final_endpoint_components = (scheme, new_endpoint_components.netloc, original_endpoint_components.path, original_endpoint_components.query, '') final_endpoint = urlunsplit(final_endpoint_components) logger.debug('Updating URI from %s to %s' % (original_endpoint, final_endpoint)) return final_endpoint
def _inject_signature(self, request, signature): query_dict = {} query_dict['AWSAccessKeyId'] = self.credentials.access_key query_dict['Signature'] = signature for header_key in request.headers: lk = header_key.lower() # For query string requests, Expires is used instead of the # Date header. if header_key == 'Date': query_dict['Expires'] = request.headers['Date'] # We only want to include relevant headers in the query string. # These can be anything that starts with x-amz, is Content-MD5, # or is Content-Type. elif lk.startswith('x-amz-') or lk in [ 'content-md5', 'content-type' ]: query_dict[lk] = request.headers[lk] # Combine all of the identified headers into an encoded # query string new_query_string = percent_encode_sequence(query_dict) # Create a new url with the presigned url. p = urlsplit(request.url) if p[3]: # If there was a pre-existing query string, we should # add that back before injecting the new query string. new_query_string = '%s&%s' % (p[3], new_query_string) new_url_parts = (p[0], p[1], p[2], new_query_string, p[4]) request.url = urlunsplit(new_url_parts)
def _is_s3_accelerate(self, endpoint_url, s3_config): # Accelerate has been explicitly configured. if s3_config is not None and s3_config.get('use_accelerate_endpoint'): return True # Accelerate mode is turned on automatically if an endpoint url is # provided that matches the accelerate scheme. if endpoint_url is None: return False # Accelerate is only valid for Amazon endpoints. netloc = urlsplit(endpoint_url).netloc if not netloc.endswith('amazonaws.com'): return False # The first part of the url should always be s3-accelerate. parts = netloc.split('.') if parts[0] != 's3-accelerate': return False # Url parts between 's3-accelerate' and 'amazonaws.com' which # represent different url features. feature_parts = parts[1:-2] # There should be no duplicate url parts. if len(feature_parts) != len(set(feature_parts)): return False # Remaining parts must all be in the whitelist. return all(p in S3_ACCELERATE_WHITELIST for p in feature_parts)
def calc_signature(self, request, params): logger.debug("Calculating signature using v2 auth.") split = urlsplit(request.url) path = split.path if len(path) == 0: path = '/' string_to_sign = '%s\n%s\n%s\n' % (request.method, split.netloc, path) lhmac = hmac.new(self.credentials.secret_key.encode('utf-8'), digestmod=sha256) pairs = [] for key in sorted(params): # Any previous signature should not be a part of this # one, so we skip that particular key. This prevents # issues during retries. if key == 'Signature': continue value = six.text_type(params[key]) quoted_key = quote(key.encode('utf-8'), safe='') quoted_value = quote(value.encode('utf-8'), safe='-_~') pairs.append(f'{quoted_key}={quoted_value}') qs = '&'.join(pairs) string_to_sign += qs logger.debug('String to sign: %s', string_to_sign) lhmac.update(string_to_sign.encode('utf-8')) b64 = base64.b64encode(lhmac.digest()).strip().decode('utf-8') return (qs, b64)
def _urljoin(endpoint_url, url_path, host_prefix): p = urlsplit(endpoint_url) # <part> - <index> # scheme - p[0] # netloc - p[1] # path - p[2] # query - p[3] # fragment - p[4] if not url_path or url_path == '/': # If there's no path component, ensure the URL ends with # a '/' for backwards compatibility. if not p[2]: new_path = '/' else: new_path = p[2] elif p[2].endswith('/') and url_path.startswith('/'): new_path = p[2][:-1] + url_path else: new_path = p[2] + url_path new_netloc = p[1] if host_prefix is not None: new_netloc = host_prefix + new_netloc reconstructed = urlunsplit((p[0], new_netloc, new_path, p[3], p[4])) return reconstructed
def create_request_from_raw_request(raw_request): request = AWSRequest() raw = RawHTTPRequest(raw_request) if raw.error_code is not None: raise Exception(raw.error_message) request.method = raw.command datetime_now = DATE request.context['timestamp'] = datetime_now.strftime('%Y%m%dT%H%M%SZ') for key, val in raw.headers.items(): request.headers[key] = val request.data = raw.rfile.read() host = raw.headers.get('host', '') # For whatever reason, the BaseHTTPRequestHandler encodes # the first line of the response as 'iso-8859-1', # so we need decode this into utf-8. if isinstance(raw.path, six.text_type): raw.path = raw.path.encode('iso-8859-1').decode('utf-8') url = 'https://%s%s' % (host, raw.path) if '?' in url: split_url = urlsplit(url) params = dict(parse_qsl(split_url.query)) request.url = split_url.path request.params = params else: request.url = url return request
def get_parsed_query_string(self, request): query_string_dict = parse_qs(urlsplit(request.url).query) # Also, parse_qs sets each value in the dict to be a list, but # because we know that we won't have repeated keys, we simplify # the dict and convert it back to a single value. for key in query_string_dict: query_string_dict[key] = query_string_dict[key][0] return query_string_dict
def _modify_request_before_signing(self, request): # We automatically set this header, so if it's the auto-set value we # want to get rid of it since it doesn't make sense for presigned urls. content_type = request.headers.get('content-type') blacklisted_content_type = ( 'application/x-www-form-urlencoded; charset=utf-8') if content_type == blacklisted_content_type: del request.headers['content-type'] # Note that we're not including X-Amz-Signature. # From the docs: "The Canonical Query String must include all the query # parameters from the preceding table except for X-Amz-Signature. signed_headers = self.signed_headers(self.headers_to_sign(request)) auth_params = { 'X-Amz-Algorithm': 'AWS4-HMAC-SHA256', 'X-Amz-Credential': self.scope(request), 'X-Amz-Date': request.context['timestamp'], 'X-Amz-Expires': self._expires, 'X-Amz-SignedHeaders': signed_headers, } if self.credentials.token is not None: auth_params['X-Amz-Security-Token'] = self.credentials.token # Now parse the original query string to a dict, inject our new query # params, and serialize back to a query string. url_parts = urlsplit(request.url) # parse_qs makes each value a list, but in our case we know we won't # have repeated keys so we know we have single element lists which we # can convert back to scalar values. query_dict = dict([(k, v[0]) for k, v in parse_qs( url_parts.query, keep_blank_values=True).items()]) # The spec is particular about this. It *has* to be: # https://<endpoint>?<operation params>&<auth params> # You can't mix the two types of params together, i.e just keep doing # new_query_params.update(op_params) # new_query_params.update(auth_params) # percent_encode_sequence(new_query_params) operation_params = '' if request.data: # We also need to move the body params into the query string. To # do this, we first have to convert it to a dict. query_dict.update(self._get_body_as_dict(request)) request.data = '' if query_dict: operation_params = percent_encode_sequence(query_dict) + '&' new_query_string = (operation_params + percent_encode_sequence(auth_params)) # url_parts is a tuple (and therefore immutable) so we need to create # a new url_parts with the new query string. # <part> - <index> # scheme - 0 # netloc - 1 # path - 2 # query - 3 <-- we're replacing this. # fragment - 4 p = url_parts new_url_parts = (p[0], p[1], p[2], new_query_string, p[4]) request.url = urlunsplit(new_url_parts)
def _canonical_host(self, url): url_parts = urlsplit(url) default_ports = {'http': 80, 'https': 443} if any(url_parts.scheme == scheme and url_parts.port == port for scheme, port in default_ports.items()): # No need to include the port if it's the default port. return url_parts.hostname # Strip out auth if it's present in the netloc. return url_parts.netloc.rsplit('@', 1)[-1]
def canonical_query_string(self, request): # The query string can come from two parts. One is the # params attribute of the request. The other is from the request # url (in which case we have to re-split the url into its components # and parse out the query string component). if request.params: return self._canonical_query_string_params(request.params) else: return self._canonical_query_string_url(urlsplit(request.url))
def _prepend_to_host(self, url, prefix): url_components = urlsplit(url) parts = url_components.netloc.split('.') parts = [prefix] + parts new_netloc = '.'.join(parts) new_components = (url_components.scheme, new_netloc, url_components.path, url_components.query, '') new_url = urlunsplit(new_components) return new_url
def test_bucket_operations(self): # Check that the standard operations on buckets that are # specified as query strings end up in the canonical resource. operations = ('acl', 'cors', 'lifecycle', 'policy', 'notification', 'logging', 'tagging', 'requestPayment', 'versioning', 'website') for operation in operations: url = '/quotes?%s' % operation split = urlsplit(url) cr = self.hmacv1.canonical_resource(split) self.assertEqual(cr, '/quotes?%s' % operation)
def add_auth(self, request): if self.credentials is None: raise NoCredentialsError logger.debug("Calculating signature using hmacv1 auth.") split = urlsplit(request.url) logger.debug('HTTP request method: %s', request.method) signature = self.get_signature(request.method, split, request.headers, auth_path=request.auth_path) self._inject_signature(request, signature)
def test_duplicate_headers(self): pairs = [('Date', 'Thu, 17 Nov 2005 18:49:58 GMT'), ('Content-Md5', 'c8fdb181845a4ca6b8fec737b3581d76'), ('Content-Type', 'text/html'), ('X-Amz-Meta-Author', '*****@*****.**'), ('X-Amz-Meta-Author', '*****@*****.**'), ('X-Amz-Magic', 'abracadabra')] http_headers = HTTPHeaders.from_pairs(pairs) split = urlsplit('/quotes/nelson') sig = self.hmacv1.get_signature('PUT', split, http_headers) self.assertEqual(sig, 'kIdMxyiYB+F+83zYGR6sSb3ICcE=')
def headers_to_sign(self, request): """ Select the headers from the request that need to be included in the StringToSign. """ header_map = HTTPHeaders() split = urlsplit(request.url) for name, value in request.headers.items(): lname = name.lower() if lname not in SIGNED_HEADERS_BLACKLIST: header_map[lname] = value if 'host' not in header_map: header_map['host'] = split.netloc return header_map
def canonical_request(self, request): cr = [request.method.upper()] path = self._normalize_url_path(urlsplit(request.url).path) cr.append(path) cr.append(self.canonical_query_string(request)) headers_to_sign = self.headers_to_sign(request) cr.append(self.canonical_headers(headers_to_sign) + '\n') cr.append(self.signed_headers(headers_to_sign)) if 'X-Amz-Content-SHA256' in request.headers: body_checksum = request.headers['X-Amz-Content-SHA256'] else: body_checksum = self.payload(request) cr.append(body_checksum) return '\n'.join(cr)
def _host_from_url(url): # Given URL, derive value for host header. Ensure that value: # 1) is lowercase # 2) excludes port, if it was the default port # 3) excludes userinfo url_parts = urlsplit(url) host = url_parts.hostname # urlsplit's hostname is always lowercase # IBM Unsupported # if is_valid_ipv6_endpoint_url(url): # host = '[%s]' % (host) default_ports = {'http': 80, 'https': 443} if url_parts.port is not None: if url_parts.port != default_ports.get(url_parts.scheme): host = '%s:%d' % (host, url_parts.port) return host
def switch_host_s3_accelerate(request, operation_name, **kwargs): """Switches the current s3 endpoint with an S3 Accelerate endpoint""" # Note that when registered the switching of the s3 host happens # before it gets changed to virtual. So we are not concerned with ensuring # that the bucket name is translated to the virtual style here and we # can hard code the Accelerate endpoint. parts = urlsplit(request.url).netloc.split('.') parts = [p for p in parts if p in S3_ACCELERATE_WHITELIST] endpoint = 'https://s3-accelerate.' if len(parts) > 0: endpoint += '.'.join(parts) + '.' endpoint += 'amazonaws.com' if operation_name in ['ListBuckets', 'CreateBucket', 'DeleteBucket']: return _switch_hosts(request, endpoint, use_new_scheme=False)
def test_put(self): headers = {'Date': 'Thu, 17 Nov 2005 18:49:58 GMT', 'Content-Md5': 'c8fdb181845a4ca6b8fec737b3581d76', 'Content-Type': 'text/html', 'X-Amz-Meta-Author': '*****@*****.**', 'X-Amz-Magic': 'abracadabra'} http_headers = HTTPHeaders.from_dict(headers) split = urlsplit('/quotes/nelson') cs = self.hmacv1.canonical_string('PUT', split, http_headers) expected_canonical = ( "PUT\nc8fdb181845a4ca6b8fec737b3581d76\ntext/html\n" "Thu, 17 Nov 2005 18:49:58 GMT\nx-amz-magic:abracadabra\n" "x-amz-meta-author:[email protected]\n/quotes/nelson") expected_signature = 'jZNOcbfWmD/A/f3hSvVzXZjM2HU=' self.assertEqual(cs, expected_canonical) sig = self.hmacv1.get_signature('PUT', split, http_headers) self.assertEqual(sig, expected_signature)
def _verify_presigned_url_addressing(region, bucket, key, s3_config, is_secure=True, customer_provided_endpoint=None, expected_url=None, signature_version=None): s3 = _create_s3_client(region=region, is_secure=is_secure, endpoint_url=customer_provided_endpoint, s3_config=s3_config, signature_version=signature_version) url = s3.generate_presigned_url('get_object', { 'Bucket': bucket, 'Key': key }) # We're not trying to verify the params for URL presigning, # those are tested elsewhere. We just care about the hostname/path. parts = urlsplit(url) actual = '%s://%s%s' % parts[:3] assert_equal(actual, expected_url)
def is_valid_endpoint_url(endpoint_url): """Verify the endpoint_url is valid. :type endpoint_url: string :param endpoint_url: An endpoint_url. Must have at least a scheme and a hostname. :return: True if the endpoint url is valid. False otherwise. """ parts = urlsplit(endpoint_url) hostname = parts.hostname if hostname is None: return False if len(hostname) > 255: return False if hostname[-1] == ".": hostname = hostname[:-1] allowed = re.compile( "^((?!-)[A-Z\d-]{1,63}(?<!-)\.)*((?!-)[A-Z\d-]{1,63}(?<!-))$", re.IGNORECASE) return allowed.match(hostname)
def switch_to_virtual_host_style(request, signature_version, default_endpoint_url=None, **kwargs): """ This is a handler to force virtual host style s3 addressing no matter the signature version (which is taken in consideration for the default case). If the bucket is not DNS compatible an InvalidDNSName is thrown. :param request: A AWSRequest object that is about to be sent. :param signature_version: The signature version to sign with :param default_endpoint_url: The endpoint to use when switching to a virtual style. If None is supplied, the virtual host will be constructed from the url of the request. """ if request.auth_path is not None: # The auth_path has already been applied (this may be a # retried request). We don't need to perform this # customization again. return elif _is_get_bucket_location_request(request): # For the GetBucketLocation response, we should not be using # the virtual host style addressing so we can avoid any sigv4 # issues. logger.debug("Request is GetBucketLocation operation, not checking " "for DNS compatibility.") return parts = urlsplit(request.url) request.auth_path = parts.path path_parts = parts.path.split('/') # Retrieve what the endpoint we will be prepending the bucket name to. if default_endpoint_url is None: default_endpoint_url = parts.netloc if len(path_parts) > 1: bucket_name = path_parts[1] if not bucket_name: # If the bucket name is empty we should not be checking for # dns compatibility. return logger.debug('Checking for DNS compatible bucket for: %s', request.url) if check_dns_name(bucket_name): # If the operation is on a bucket, the auth_path must be # terminated with a '/' character. if len(path_parts) == 2: if request.auth_path[-1] != '/': request.auth_path += '/' path_parts.remove(bucket_name) # At the very least the path must be a '/', such as with the # CreateBucket operation when DNS style is being used. If this # is not used you will get an empty path which is incorrect. path = '/'.join(path_parts) or '/' global_endpoint = default_endpoint_url host = bucket_name + '.' + global_endpoint new_tuple = (parts.scheme, host, path, parts.query, '') new_uri = urlunsplit(new_tuple) request.url = new_uri logger.debug('URI updated to: %s', new_uri) else: raise InvalidDNSNameError(bucket_name=bucket_name)
def _assert_endpoints_equal(actual, expected, endpoint): if 'host' not in expected: return prepare_request_dict(actual, endpoint) actual_host = urlsplit(actual['url']).netloc assert_equal(actual_host, expected['host'], 'Host')
def test_query_string(self): split = urlsplit('/quotes/nelson?uploads') pairs = [('Date', 'Thu, 17 Nov 2005 18:49:58 GMT')] sig = self.hmacv1.get_signature('PUT', split, HTTPHeaders.from_pairs(pairs)) self.assertEqual(sig, 'P7pBz3Z4p3GxysRSJ/gR8nk7D4o=')