async def _validate_checksum(self, response): """Check the computed checksum, if any, against the response headers. Args: response (object): The HTTP response object. Raises: ~google.resumable_media.common.DataCorruption: If the checksum computed locally and the checksum reported by the remote host do not match. """ if self._checksum_type is None: return metadata_key = sync_helpers._get_metadata_key(self._checksum_type) metadata = await response.json() remote_checksum = metadata.get(metadata_key) if remote_checksum is None: raise common.InvalidResponse( response, _UPLOAD_METADATA_NO_APPROPRIATE_CHECKSUM_MESSAGE.format( metadata_key), self._get_headers(response), ) local_checksum = sync_helpers.prepare_checksum_digest( self._checksum_object.digest()) if local_checksum != remote_checksum: raise common.DataCorruption( response, _UPLOAD_CHECKSUM_MISMATCH_MESSAGE.format( self._checksum_type.upper(), local_checksum, remote_checksum), )
def require_status_code(response, status_codes, get_status_code, callback=do_nothing): """Require a response has a status code among a list. Args: response (object): The HTTP response object. status_codes (tuple): The acceptable status codes. get_status_code (Callable[Any, int]): Helper to get a status code from a response. callback (Optional[Callable]): A callback that takes no arguments, to be executed when an exception is being raised. Returns: int: The status code. Raises: ~google.resumable_media.common.InvalidResponse: If the status code is not one of the values in ``status_codes``. """ status_code = get_status_code(response) if status_code not in status_codes: callback() raise common.InvalidResponse(response, u"Request failed with status code", status_code, u"Expected one of", *status_codes) return status_code
def header_required(response, name, get_headers, callback=do_nothing): """Checks that a specific header is in a headers dictionary. Args: response (object): An HTTP response object, expected to have a ``headers`` attribute that is a ``Mapping[str, str]``. name (str): The name of a required header. get_headers (Callable[Any, Mapping[str, str]]): Helper to get headers from an HTTP response. callback (Optional[Callable]): A callback that takes no arguments, to be executed when an exception is being raised. Returns: str: The desired header. Raises: ~google.resumable_media.common.InvalidResponse: If the header is missing. """ headers = get_headers(response) if name not in headers: callback() raise common.InvalidResponse(response, u"Response headers must contain header", name) return headers[name]
def get_range_info(response, get_headers, callback=_helpers.do_nothing): """Get the start, end and total bytes from a content range header. Args: response (object): An HTTP response object. get_headers (Callable[Any, Mapping[str, str]]): Helper to get headers from an HTTP response. callback (Optional[Callable]): A callback that takes no arguments, to be executed when an exception is being raised. Returns: Tuple[int, int, int]: The start byte, end byte and total bytes. Raises: ~google.resumable_media.common.InvalidResponse: If the ``Content-Range`` header is not of the form ``bytes {start}-{end}/{total}``. """ content_range = _helpers.header_required(response, _helpers.CONTENT_RANGE_HEADER, get_headers, callback=callback) match = _CONTENT_RANGE_RE.match(content_range) if match is None: callback() raise common.InvalidResponse( response, u'Unexpected content-range header', content_range, u'Expected to be of the form "bytes {start}-{end}/{total}"') return (int(match.group(u'start_byte')), int(match.group(u'end_byte')), int(match.group(u'total_bytes')))
async def _process_response(self, response, bytes_sent): """Process the response from an HTTP request. This is everything that must be done after a request that doesn't require network I/O (or other I/O). This is based on the `sans-I/O`_ philosophy. Args: response (object): The HTTP response object. bytes_sent (int): The number of bytes sent in the request that ``response`` was returned for. Raises: ~google.resumable_media.common.InvalidResponse: If the status code is 308 and the ``range`` header is not of the form ``bytes 0-{end}``. ~google.resumable_media.common.InvalidResponse: If the status code is not 200 or 308. .. _sans-I/O: https://sans-io.readthedocs.io/ """ status_code = _helpers.require_status_code( response, (http.client.OK, http.client.PERMANENT_REDIRECT), self._get_status_code, callback=self._make_invalid, ) if status_code == http.client.OK: # NOTE: We use the "local" information of ``bytes_sent`` to update # ``bytes_uploaded``, but do not verify this against other # state. However, there may be some other information: # # * a ``size`` key in JSON response body # * the ``total_bytes`` attribute (if set) # * ``stream.tell()`` (relying on fact that ``initiate()`` # requires stream to be at the beginning) self._bytes_uploaded = self._bytes_uploaded + bytes_sent # Tombstone the current upload so it cannot be used again. self._finished = True # Validate the checksum. This can raise an exception on failure. await self._validate_checksum(response) else: bytes_range = _helpers.header_required( response, _helpers.RANGE_HEADER, self._get_headers, callback=self._make_invalid, ) match = _BYTES_RANGE_RE.match(bytes_range) if match is None: self._make_invalid() raise common.InvalidResponse( response, 'Unexpected "range" header', bytes_range, 'Expected to be of the form "bytes=0-{end}"', ) self._bytes_uploaded = int(match.group("end_byte")) + 1
def _parse_checksum_header(header_value, response, checksum_label): """Parses the checksum header from an ``X-Goog-Hash`` value. .. _header reference: https://cloud.google.com/storage/docs/\ xml-api/reference-headers#xgooghash Expects ``header_value`` (if not :data:`None`) to be in one of the three following formats: * ``crc32c=n03x6A==`` * ``md5=Ojk9c3dhfxgoKVVHYwFbHQ==`` * ``crc32c=n03x6A==,md5=Ojk9c3dhfxgoKVVHYwFbHQ==`` See the `header reference`_ for more information. Args: header_value (Optional[str]): The ``X-Goog-Hash`` header from a download response. response (~requests.Response): The HTTP response object. checksum_label (str): The label of the header value to read, as in the examples above. Typically "md5" or "crc32c" Returns: Optional[str]: The expected checksum of the response, if it can be detected from the ``X-Goog-Hash`` header; otherwise, None. Raises: ~google.resumable_media.common.InvalidResponse: If there are multiple checksums of the requested type in ``header_value``. """ if header_value is None: return None matches = [] for checksum in header_value.split(u","): name, value = checksum.split(u"=", 1) # Official docs say "," is the separator, but real-world responses have encountered ", " if name.lstrip() == checksum_label: matches.append(value) if len(matches) == 0: return None elif len(matches) == 1: return matches[0] else: raise common.InvalidResponse( response, u"X-Goog-Hash header had multiple ``{}`` values.".format( checksum_label), header_value, matches, )
def _parse_md5_header(header_value, response): """Parses the MD5 header from an ``X-Goog-Hash`` value. .. _header reference: https://cloud.google.com/storage/docs/\ xml-api/reference-headers#xgooghash Expects ``header_value`` (if not :data:`None`) to be in one of the three following formats: * ``crc32c=n03x6A==`` * ``md5=Ojk9c3dhfxgoKVVHYwFbHQ==`` * ``crc32c=n03x6A==,md5=Ojk9c3dhfxgoKVVHYwFbHQ==`` See the `header reference`_ for more information. Args: header_value (Optional[str]): The ``X-Goog-Hash`` header from a download response. response (~requests.Response): The HTTP response object. Returns: Optional[str]: The expected MD5 hash of the response, if it can be detected from the ``X-Goog-Hash`` header. Raises: ~google.resumable_media.common.InvalidResponse: If there are multiple ``md5`` checksums in ``header_value``. """ if header_value is None: return None matches = [] for checksum in header_value.split(u","): name, value = checksum.split(u"=", 1) if name == u"md5": matches.append(value) if len(matches) == 0: return None elif len(matches) == 1: return matches[0] else: raise common.InvalidResponse( response, u"X-Goog-Hash header had multiple ``md5`` values.", header_value, matches, )
def _process_recover_response(self, response): """Process the response from an HTTP request to recover from failure. This is everything that must be done after a request that doesn't require network I/O (or other I/O). This is based on the `sans-I/O`_ philosophy. Args: response (object): The HTTP response object. Raises: ~google.resumable_media.common.InvalidResponse: If the status code is not 308. ~google.resumable_media.common.InvalidResponse: If the status code is 308 and the ``range`` header is not of the form ``bytes 0-{end}``. .. _sans-I/O: https://sans-io.readthedocs.io/ """ _helpers.require_status_code( response, (http.client.PERMANENT_REDIRECT, ), self._get_status_code, ) headers = self._get_headers(response) if _helpers.RANGE_HEADER in headers: bytes_range = headers[_helpers.RANGE_HEADER] match = _BYTES_RANGE_RE.match(bytes_range) if match is None: raise common.InvalidResponse( response, 'Unexpected "range" header', bytes_range, 'Expected to be of the form "bytes=0-{end}"', ) self._bytes_uploaded = int(match.group("end_byte")) + 1 else: # In this case, the upload has not "begun". self._bytes_uploaded = 0 self._stream.seek(self._bytes_uploaded) self._invalid = False
def test_constructor(self): response = mock.sentinel.response error = common.InvalidResponse(response, 1, u'a', [b'm'], True) assert error.response is response assert error.args == (1, u'a', [b'm'], True)
def _process_response(self, response): """Process the response from an HTTP request. This is everything that must be done after a request that doesn't require network I/O. This is based on the `sans-I/O`_ philosophy. For the time being, this **does require** some form of I/O to write a chunk to ``stream``. However, this will (almost) certainly not be network I/O. Updates the current state after consuming a chunk. First, increments ``bytes_downloaded`` by the number of bytes in the ``content-length`` header. If ``total_bytes`` is already set, this assumes (but does not check) that we already have the correct value and doesn't bother to check that it agrees with the headers. We expect the **total** length to be in the ``content-range`` header, but this header is only present on requests which sent the ``range`` header. This response header should be of the form ``bytes {start}-{end}/{total}`` and ``{end} - {start} + 1`` should be the same as the ``Content-Length``. Args: response (object): The HTTP response object (need headers). Raises: ~google.resumable_media.common.InvalidResponse: If the number of bytes in the body doesn't match the content length header. .. _sans-I/O: https://sans-io.readthedocs.io/ """ # Verify the response before updating the current instance. if _check_for_zero_content_range( response, self._get_status_code, self._get_headers ): self._finished = True return _helpers.require_status_code( response, _ACCEPTABLE_STATUS_CODES, self._get_status_code, callback=self._make_invalid, ) headers = self._get_headers(response) response_body = self._get_body(response) start_byte, end_byte, total_bytes = get_range_info( response, self._get_headers, callback=self._make_invalid ) transfer_encoding = headers.get(u"transfer-encoding") if transfer_encoding is None: content_length = _helpers.header_required( response, u"content-length", self._get_headers, callback=self._make_invalid, ) num_bytes = int(content_length) if len(response_body) != num_bytes: self._make_invalid() raise common.InvalidResponse( response, u"Response is different size than content-length", u"Expected", num_bytes, u"Received", len(response_body), ) else: # 'content-length' header not allowed with chunked encoding. num_bytes = end_byte - start_byte + 1 # First update ``bytes_downloaded``. self._bytes_downloaded += num_bytes # If the end byte is past ``end`` or ``total_bytes - 1`` we are done. if self.end is not None and end_byte >= self.end: self._finished = True elif end_byte >= total_bytes - 1: self._finished = True # NOTE: We only use ``total_bytes`` if not already known. if self.total_bytes is None: self._total_bytes = total_bytes # Write the response body to the stream. self._stream.write(response_body)