def done(self) -> None: if not self.response.complete: if self.response.http_error: problem = self.response.http_error.desc else: problem = "" self.add_base_note("", RANGE_SUBREQ_PROBLEM, problem=problem) return if self.response.status_code == "206": c_e = "content-encoding" if ("gzip" in self.base.response.parsed_headers.get(c_e, []) == "gzip" not in self.response.parsed_headers.get(c_e, [])): self.add_base_note( "header-accept-ranges header-content-encoding", RANGE_NEG_MISMATCH) return self.check_missing_hdrs( [ "date", "cache-control", "content-location", "etag", "expires", "vary", ], MISSING_HDRS_206, ) if self.response.parsed_headers.get( "etag", None) == self.base.response.parsed_headers.get( "etag", None): if self.response.payload == self.range_target: self.base.partial_support = True self.add_base_note("header-accept-ranges", RANGE_CORRECT) else: # the body samples are just bags of bits self.base.partial_support = False self.add_base_note( "header-accept-ranges", RANGE_INCORRECT, range="bytes=%s-%s" % (self.range_start, self.range_end), range_expected=display_bytes(self.range_target), range_expected_bytes=f_num(len(self.range_target)), range_received=display_bytes(self.response.payload), range_received_bytes=f_num(self.response.payload_len), ) else: self.add_base_note("header-accept-ranges", RANGE_CHANGED) elif self.response.status_code == self.base.response.status_code: self.base.partial_support = False self.add_base_note("header-accept-ranges", RANGE_FULL) else: self.add_base_note( "header-accept-ranges", RANGE_STATUS, range_status=self.response.status_code, enc_range_status=self.response.status_code or "(unknown)", )
def done(self): if not self.response.complete: self.add_note('', rs.RANGE_SUBREQ_PROBLEM, problem=self.response.http_error.desc ) return if self.response.status_code == '206': c_e = 'content-encoding' if 'gzip' in self.base.response.parsed_headers.get(c_e, []) == \ 'gzip' not in self.response.parsed_headers.get(c_e, []): self.add_note( 'header-accept-ranges header-content-encoding', rs.RANGE_NEG_MISMATCH ) return if not [True for h in self.base.orig_req_hdrs if h[0].lower() == 'if-range']: self.check_missing_hdrs([ 'date', 'cache-control', 'content-location', 'etag', 'expires', 'vary' ], rs.MISSING_HDRS_206, 'Range' ) if self.response.parsed_headers.get('etag', 1) == \ self.base.response.parsed_headers.get('etag', 2): if self.response.payload == self.range_target: self.base.partial_support = True self.add_note('header-accept-ranges', rs.RANGE_CORRECT) else: # the body samples are just bags of bits self.base.partial_support = False self.add_note('header-accept-ranges', rs.RANGE_INCORRECT, range="bytes=%s-%s" % ( self.range_start, self.range_end ), range_expected = \ self.range_target.encode('string_escape'), range_expected_bytes = f_num(len(self.range_target)), range_received = \ self.response.payload.encode('string_escape'), range_received_bytes = \ f_num(self.response.payload_len) ) else: self.add_note('header-accept-ranges', rs.RANGE_CHANGED) # TODO: address 416 directly elif self.response.status_code == \ self.base.response.status_code: self.base.partial_support = False self.add_note('header-accept-ranges', rs.RANGE_FULL) else: self.add_note('header-accept-ranges', rs.RANGE_STATUS, range_status=self.response.status_code, enc_range_status=self.response.status_code or \ '(unknown)' )
def format_options(self, resource: HttpResource) -> str: "Return things that the user can do with the URI as HTML links" options = [] media_type = resource.response.parsed_headers.get( "content-type", [""])[0] options.append(( "response headers: %s bytes" % f_num(resource.response.header_length), "how large the response headers are, including the status line", )) options.append(( "body: %s bytes" % f_num(resource.response.payload_len), "how large the response body is", )) transfer_overhead = (resource.response.transfer_length - resource.response.payload_len) if transfer_overhead > 0: options.append(( "transfer overhead: %s bytes" % f_num(transfer_overhead), "how much using chunked encoding adds to the response size", )) options.append(None) options.append(( """\ <script type="text/javascript"> document.write("<a href='#' id='body_view' accesskey='b'>view body</a>") </script>""", "View this response body (with any gzip compression removed)", )) if isinstance(resource, HttpResource): options.append(( """\ <a href="?%s" accesskey="h">view har</a>""" % self.req_qs(res_format="har"), "View a HAR (HTTP ARchive, a JSON format) file for this test", )) if not self.kw.get("is_saved", False): if self.kw.get("allow_save", False): options.append(( "<a href=\"#\" id='save' accesskey='s'>save</a>", "Save these results for future reference", )) if media_type in self.validators: options.append(( "<a href=\"%s\" accesskey='v'>validate body</a>" % self.validators[media_type] % e_query_arg(resource.request.uri), "", )) if hasattr(resource, "link_count") and resource.link_count > 0: options.append(( "<a href=\"?descend=True&%s\" accesskey='a'>" "check embedded</a>" % self.req_qs(use_stored=False), "run REDbot on images, frames and embedded links", )) return nl.join([ o and "<span class='option' title='%s'>%s</span>" % (o[1], o[0]) or "<br>" for o in options ])
def format_options(self, state): "Return things that the user can do with the URI as HTML links" options = [] media_type = state.response.parsed_headers.get('content-type', [""])[0] options.append( (u"response headers: %s bytes" % \ f_num(state.response.header_length), u"how large the response headers are, including the status line" ) ) options.append((u"body: %s bytes" % f_num(state.response.payload_len), u"how large the response body is")) transfer_overhead = state.response.transfer_length - \ state.response.payload_len if transfer_overhead > 0: options.append( ( u"transfer overhead: %s bytes" % f_num(transfer_overhead), u"how much using chunked encoding adds to the response size" ) ) options.append(None) options.append((u"""\ <script type="text/javascript"> document.write("<a href='#' id='body_view' accesskey='b'>view body</a>") </script>""", "View this response body (with any gzip compression removed)" )) options.append( (u"""\ <a href='?%s' accesskey='h'>view har</a>""" % self.req_qs(res_format='har'), "View a HAR (HTTP ARchive, a JSON format) file for this response" )) if not self.kw.get('is_saved', False): if self.kw.get('allow_save', False): options.append(( u"<a href='#' id='save' accesskey='s'>save</a>", "Save these results for future reference" )) if self.validators.has_key(media_type): options.append( ( u"<a href='%s' accesskey='v'>validate body</a>" % self.validators[media_type] % e_query_arg(state.request.uri), "" ) ) if hasattr(state, "link_count") and state.link_count > 0: options.append(( u"<a href='?descend=True&%s' accesskey='a'>" \ u"check embedded</a>" % self.req_qs(use_stored=False), "run RED on images, frames and embedded links" )) return nl.join( [o and u"<span class='option' title='%s'>%s</span>" % (o[1], o[0]) or u"<br>" for o in options] )
def done(self) -> None: if not self.response.complete: if self.response.http_error: problem = self.response.http_error.desc else: problem = "" self.add_base_note('', RANGE_SUBREQ_PROBLEM, problem=problem) return if self.response.status_code == '206': c_e = 'content-encoding' if 'gzip' in self.base.response.parsed_headers.get(c_e, []) == \ 'gzip' not in self.response.parsed_headers.get(c_e, []): self.add_base_note( 'header-accept-ranges header-content-encoding', RANGE_NEG_MISMATCH) return self.check_missing_hdrs([ 'date', 'cache-control', 'content-location', 'etag', 'expires', 'vary' ], MISSING_HDRS_206) if self.response.parsed_headers.get('etag', None) == \ self.base.response.parsed_headers.get('etag', None): if self.response.payload == self.range_target: self.base.partial_support = True self.add_base_note('header-accept-ranges', RANGE_CORRECT) else: # the body samples are just bags of bits self.base.partial_support = False self.add_base_note( 'header-accept-ranges', RANGE_INCORRECT, range="bytes=%s-%s" % (self.range_start, self.range_end), range_expected=display_bytes(self.range_target), range_expected_bytes=f_num(len(self.range_target)), range_received=display_bytes(self.response.payload), range_received_bytes=f_num(self.response.payload_len)) else: self.add_base_note('header-accept-ranges', RANGE_CHANGED) # TODO: address 416 directly elif self.response.status_code == self.base.response.status_code: self.base.partial_support = False self.add_base_note('header-accept-ranges', RANGE_FULL) else: self.add_base_note('header-accept-ranges', RANGE_STATUS, range_status=self.response.status_code, enc_range_status=self.response.status_code or '(unknown)')
def body_done(self, complete, trailers=None): """ Signal that the body is done. Complete should be True if we know it's complete. """ # TODO: check trailers self.complete = complete self.trailers = trailers or [] self.payload_md5 = self._md5_processor.digest() self.decoded_md5 = self._md5_post_processor.digest() if self.is_request or \ (not self.is_head_response and self.status_code not in ['304']): # check payload basics if self.parsed_headers.has_key('content-length'): if self.payload_len == self.parsed_headers['content-length']: self.add_note('header-content-length', rs.CL_CORRECT) else: self.add_note('header-content-length', rs.CL_INCORRECT, body_length=f_num(self.payload_len) ) if self.parsed_headers.has_key('content-md5'): c_md5_calc = base64.encodestring(self.payload_md5)[:-1] if self.parsed_headers['content-md5'] == c_md5_calc: self.add_note('header-content-md5', rs.CMD5_CORRECT) else: self.add_note('header-content-md5', rs.CMD5_INCORRECT, calc_md5=c_md5_calc)
def body_done(self, complete: bool, trailers: RawHeaderListType = None) -> None: """ Signal that the body is done. Complete should be True if we know it's complete (e.g., final chunk, Content-Length). """ self.complete = complete self.complete_time = thor.time() self.trailers = trailers or [] self.payload_md5 = self._md5_processor.digest() self.decoded_md5 = self._md5_post_processor.digest() if self.is_request or \ (not self.is_head_response and self.status_code not in ['304']): # check payload basics if 'content-length' in self.parsed_headers: if self.payload_len == self.parsed_headers['content-length']: self.add_note('header-content-length', CL_CORRECT) else: self.add_note('header-content-length', CL_INCORRECT, body_length=f_num(self.payload_len)) if 'content-md5' in self.parsed_headers: c_md5_calc = base64.encodebytes(self.payload_md5)[:-1] if self.parsed_headers['content-md5'] == c_md5_calc: self.add_note('header-content-md5', CMD5_CORRECT) else: self.add_note('header-content-md5', CMD5_INCORRECT, calc_md5=c_md5_calc) self.emit('content_available')
def process(self, headers: RawHeaderListType) -> Tuple[StrHeaderListType, HeaderDictType]: """ Given a list of (bytes name, bytes value) headers and: - calculate the total header block size - call msg.add_note as appropriate Returns: - a list of unicode header tuples - a dict of parsed header values """ unicode_headers = [] # unicode version of the header tuples parsed_headers = {} # dictionary of parsed header values offset = 0 # what number header we're on # estimate the start-lines size header_block_size = len(self.message.version) if self.message.is_request: header_block_size += len(self.message.method) + len(self.message.uri) + 2 else: header_block_size += len(self.message.status_phrase) + 5 for name, value in headers: offset += 1 add_note = partial(self.message.add_note, "offset-%s" % offset) # track header size header_size = len(name) + len(value) header_block_size += header_size # decode the header to make it unicode clean try: str_name = name.decode('ascii', 'strict') except UnicodeError: str_name = name.decode('ascii', 'ignore') add_note(HEADER_NAME_ENCODING, field_name=str_name) try: str_value = value.decode('ascii', 'strict') except UnicodeError: str_value = value.decode('iso-8859-1', 'replace') add_note(HEADER_VALUE_ENCODING, field_name=str_name) unicode_headers.append((str_name, str_value)) header_handler = self.get_header_handler(str_name) field_add_note = partial(add_note, # type: ignore field_name=header_handler.canonical_name) header_handler.handle_input(str_value, field_add_note) if header_size > MAX_HDR_SIZE: add_note(HEADER_TOO_LARGE, field_name=header_handler.canonical_name, header_size=f_num(header_size)) # check each of the complete header values and get the parsed value for header_name, header_handler in list(self._header_handlers.items()): header_add_note = partial(self.message.add_note, "header-%s" % header_handler.canonical_name.lower(), field_name=header_handler.canonical_name) header_handler.finish(self.message, header_add_note) # type: ignore parsed_headers[header_handler.norm_name] = header_handler.value return unicode_headers, parsed_headers
def done(self) -> None: if not self.response.complete: if self.response.http_error: problem = self.response.http_error.desc else: problem = "" self.add_base_note('', RANGE_SUBREQ_PROBLEM, problem=problem) return if self.response.status_code == '206': c_e = 'content-encoding' if 'gzip' in self.base.response.parsed_headers.get(c_e, []) == \ 'gzip' not in self.response.parsed_headers.get(c_e, []): self.add_base_note('header-accept-ranges header-content-encoding', RANGE_NEG_MISMATCH) return self.check_missing_hdrs(['date', 'cache-control', 'content-location', 'etag', 'expires', 'vary'], MISSING_HDRS_206) if self.response.parsed_headers.get('etag', None) == \ self.base.response.parsed_headers.get('etag', None): if self.response.payload == self.range_target: self.base.partial_support = True self.add_base_note('header-accept-ranges', RANGE_CORRECT) else: # the body samples are just bags of bits self.base.partial_support = False self.add_base_note('header-accept-ranges', RANGE_INCORRECT, range="bytes=%s-%s" % (self.range_start, self.range_end), range_expected=display_bytes(self.range_target), range_expected_bytes=f_num(len(self.range_target)), range_received=display_bytes(self.response.payload), range_received_bytes=f_num(self.response.payload_len)) else: self.add_base_note('header-accept-ranges', RANGE_CHANGED) # TODO: address 416 directly elif self.response.status_code == self.base.response.status_code: self.base.partial_support = False self.add_base_note('header-accept-ranges', RANGE_FULL) else: self.add_base_note('header-accept-ranges', RANGE_STATUS, range_status=self.response.status_code, enc_range_status=self.response.status_code or '(unknown)')
def set_iri(self, iri: str) -> None: """ Given a unicode string (possibly an IRI), convert to a URI and make sure it's sensible. """ self.iri = iri try: self.uri = self.iri_to_uri(iri) except (ValueError, UnicodeError) as why: raise thor.http.error.UrlError(why.args[0]) if not re.match(r"^\s*%s\s*$" % rfc3986.URI, self.uri, re.VERBOSE): self.add_note('uri', URI_BAD_SYNTAX) if '#' in self.uri: # chop off the fragment self.uri = self.uri[:self.uri.index('#')] if len(self.uri) > MAX_URI: self.add_note('uri', URI_TOO_LONG, uri_len=f_num(len(self.uri)))
def _process_content_codings(self, chunk: bytes) -> bytes: """ Decode a chunk according to the message's content-encoding header. Currently supports gzip. """ content_codings = self.parsed_headers.get("content-encoding", []) content_codings.reverse() for coding in content_codings: if coding in ["gzip", "x-gzip"] and self._decode_ok: if not self._in_gzip_body: self._gzip_header_buffer += chunk try: chunk = self._read_gzip_header( self._gzip_header_buffer) self._in_gzip_body = True except IndexError: return b"" # not a full header yet except IOError as gzip_error: self.add_note( "header-content-encoding", BAD_GZIP, gzip_error=str(gzip_error), ) self._decode_ok = False return b"" try: chunk = self._gzip_processor.decompress(chunk) except zlib.error as zlib_error: self.add_note( "header-content-encoding", BAD_ZLIB, zlib_error=str(zlib_error), ok_zlib_len=f_num(self.payload_len), chunk_sample=display_bytes(chunk), ) self._decode_ok = False return b"" else: # we can't handle other codecs, so punt on body processing. self._decode_ok = False return b"" self._md5_post_processor.update(chunk) self.decoded_len += len(chunk) return chunk
def _process_content_codings(self, chunk: bytes) -> bytes: """ Decode a chunk according to the message's content-encoding header. Currently supports gzip. """ content_codings = self.parsed_headers.get('content-encoding', []) content_codings.reverse() for coding in content_codings: if coding in ['gzip', 'x-gzip'] and self._decode_ok: if not self._in_gzip_body: self._gzip_header_buffer += chunk try: chunk = self._read_gzip_header(self._gzip_header_buffer) self._in_gzip_body = True except IndexError: return b'' # not a full header yet except IOError as gzip_error: self.add_note('header-content-encoding', BAD_GZIP, gzip_error=str(gzip_error)) self._decode_ok = False return b'' try: chunk = self._gzip_processor.decompress(chunk) except zlib.error as zlib_error: self.add_note( 'header-content-encoding', BAD_ZLIB, zlib_error=str(zlib_error), ok_zlib_len=f_num(self.payload_len), chunk_sample=display_bytes(chunk) ) self._decode_ok = False return b'' else: # we can't handle other codecs, so punt on body processing. self._decode_ok = False return b'' self._md5_post_processor.update(chunk) self.decoded_len += len(chunk) return chunk
def _process_content_codings(self, chunk): """ Decode a chunk according to the message's content-encoding header. Currently supports gzip. """ content_codings = self.parsed_headers.get('content-encoding', []) content_codings.reverse() for coding in content_codings: # TODO: deflate support if coding in ['gzip', 'x-gzip'] and self._decode_ok: if not self._in_gzip_body: self._gzip_header_buffer += chunk try: chunk = self._read_gzip_header( self._gzip_header_buffer ) self._in_gzip_body = True except IndexError: return '' # not a full header yet except IOError, gzip_error: self.add_note('header-content-encoding', rs.BAD_GZIP, gzip_error=str(gzip_error) ) self._decode_ok = False return try: chunk = self._gzip_processor.decompress(chunk) except zlib.error, zlib_error: self.add_note( 'header-content-encoding', rs.BAD_ZLIB, zlib_error=str(zlib_error), ok_zlib_len=f_num(self.payload_sample[-1][0]), chunk_sample=chunk[:20].encode('string_escape') ) self._decode_ok = False return
def done(self) -> None: negotiated = self.response bare = self.base.response if not negotiated.complete: if negotiated.http_error: problem = negotiated.http_error.desc else: problem = "" self.add_base_note('', CONNEG_SUBREQ_PROBLEM, problem=problem) return # see if it was compressed when not negotiated no_conneg_vary_headers = bare.parsed_headers.get('vary', []) if 'gzip' in bare.parsed_headers.get('content-encoding', []) \ or 'x-gzip' in bare.parsed_headers.get('content-encoding', []): self.add_base_note('header-vary header-content-encoding', CONNEG_GZIP_WITHOUT_ASKING) if 'gzip' not in negotiated.parsed_headers.get('content-encoding', []) \ and 'x-gzip' not in negotiated.parsed_headers.get('content-encoding', []): self.base.gzip_support = False else: # Apparently, content negotiation is happening. # check status if bare.status_code != negotiated.status_code: self.add_base_note('status', VARY_STATUS_MISMATCH, neg_status=negotiated.status_code, noneg_status=bare.status_code) return # Can't be sure what's going on... # check headers that should be invariant for hdr in ['content-type']: if bare.parsed_headers.get( hdr) != negotiated.parsed_headers.get(hdr, None): self.add_base_note('header-%s' % hdr, VARY_HEADER_MISMATCH, header=hdr) # check Vary headers vary_headers = negotiated.parsed_headers.get('vary', []) if (not "accept-encoding" in vary_headers) and (not "*" in vary_headers): self.add_base_note('header-vary', CONNEG_NO_VARY) if no_conneg_vary_headers != vary_headers: self.add_base_note( 'header-vary', VARY_INCONSISTENT, conneg_vary=", ".join(vary_headers), no_conneg_vary=", ".join(no_conneg_vary_headers)) # check body if bare.decoded_md5 != negotiated.payload_md5: self.add_base_note('body', VARY_BODY_MISMATCH) # check ETag if bare.parsed_headers.get('etag', 1) == negotiated.parsed_headers.get( 'etag', 2): if not self.base.response.parsed_headers['etag'][0]: # strong self.add_base_note('header-etag', VARY_ETAG_DOESNT_CHANGE) # check compression efficiency if negotiated.payload_len > 0 and bare.payload_len > 0: savings = int( 100 * ((float(bare.payload_len) - negotiated.payload_len) / bare.payload_len)) elif negotiated.payload_len > 0 and bare.payload_len == 0: # weird. return else: savings = 0 self.base.gzip_support = True self.base.gzip_savings = savings if savings >= 0: self.add_base_note('header-content-encoding', CONNEG_GZIP_GOOD, savings=savings, orig_size=f_num(bare.payload_len), gzip_size=f_num(negotiated.payload_len)) else: self.add_base_note('header-content-encoding', CONNEG_GZIP_BAD, savings=abs(savings), orig_size=f_num(bare.payload_len), gzip_size=f_num(negotiated.payload_len))
def format_size(value: int) -> str: if value is None: return '<td>-</td>' return '<td>%s</td>' % f_num(value, by1024=True)
def done(self) -> None: negotiated = self.response bare = self.base.response if not negotiated.complete: if negotiated.http_error: problem = negotiated.http_error.desc else: problem = "" self.add_base_note('', CONNEG_SUBREQ_PROBLEM, problem=problem) return # see if it was compressed when not negotiated no_conneg_vary_headers = bare.parsed_headers.get('vary', []) if 'gzip' in bare.parsed_headers.get('content-encoding', []) \ or 'x-gzip' in bare.parsed_headers.get('content-encoding', []): self.add_base_note('header-vary header-content-encoding', CONNEG_GZIP_WITHOUT_ASKING) if 'gzip' not in negotiated.parsed_headers.get('content-encoding', []) \ and 'x-gzip' not in negotiated.parsed_headers.get('content-encoding', []): self.base.gzip_support = False else: # Apparently, content negotiation is happening. # check status if bare.status_code != negotiated.status_code: self.add_base_note('status', VARY_STATUS_MISMATCH, neg_status=negotiated.status_code, noneg_status=bare.status_code) return # Can't be sure what's going on... # check headers that should be invariant for hdr in ['content-type']: if bare.parsed_headers.get(hdr) != negotiated.parsed_headers.get(hdr, None): self.add_base_note('header-%s' % hdr, VARY_HEADER_MISMATCH, header=hdr) # check Vary headers vary_headers = negotiated.parsed_headers.get('vary', []) if (not "accept-encoding" in vary_headers) and (not "*" in vary_headers): self.add_base_note('header-vary', CONNEG_NO_VARY) if no_conneg_vary_headers != vary_headers: self.add_base_note('header-vary', VARY_INCONSISTENT, conneg_vary=", ".join(vary_headers), no_conneg_vary=", ".join(no_conneg_vary_headers)) # check body if bare.payload_md5 != negotiated.decoded_md5: self.add_base_note('body', VARY_BODY_MISMATCH) # check ETag if bare.parsed_headers.get('etag', 1) == negotiated.parsed_headers.get('etag', 2): if not self.base.response.parsed_headers['etag'][0]: # strong self.add_base_note('header-etag', VARY_ETAG_DOESNT_CHANGE) # check compression efficiency if negotiated.payload_len > 0 and bare.payload_len > 0: savings = int(100 * ( (float(bare.payload_len) - negotiated.payload_len) / bare.payload_len)) elif negotiated.payload_len > 0 and bare.payload_len == 0: # weird. return else: savings = 0 self.base.gzip_support = True self.base.gzip_savings = savings if savings >= 0: self.add_base_note('header-content-encoding', CONNEG_GZIP_GOOD, savings=savings, orig_size=f_num(bare.payload_len), gzip_size=f_num(negotiated.payload_len)) else: self.add_base_note('header-content-encoding', CONNEG_GZIP_BAD, savings=abs(savings), orig_size=f_num(bare.payload_len), gzip_size=f_num(negotiated.payload_len))
def process( self, headers: RawHeaderListType ) -> Tuple[StrHeaderListType, HeaderDictType]: """ Given a list of (bytes name, bytes value) headers and: - calculate the total header block size - call msg.add_note as appropriate Returns: - a list of unicode header tuples - a dict of parsed header values """ unicode_headers = [] # unicode version of the header tuples parsed_headers = {} # dictionary of parsed header values offset = 0 # what number header we're on # estimate the start-lines size header_block_size = len(self.message.version) if self.message.is_request: header_block_size += len(self.message.method) + len(self.message.uri) + 2 else: header_block_size += len(self.message.status_phrase) + 5 for name, value in headers: offset += 1 add_note = partial(self.message.add_note, "offset-%s" % offset) # track header size header_size = len(name) + len(value) header_block_size += header_size # decode the header to make it unicode clean try: str_name = name.decode("ascii", "strict") except UnicodeError: str_name = name.decode("ascii", "ignore") add_note(HEADER_NAME_ENCODING, field_name=str_name) try: str_value = value.decode("ascii", "strict") except UnicodeError: str_value = value.decode("iso-8859-1", "replace") add_note(HEADER_VALUE_ENCODING, field_name=str_name) unicode_headers.append((str_name, str_value)) header_handler = self.get_header_handler(str_name) field_add_note = partial( add_note, # type: ignore field_name=header_handler.canonical_name, ) header_handler.handle_input(str_value, field_add_note) if header_size > MAX_HDR_SIZE: add_note( HEADER_TOO_LARGE, field_name=header_handler.canonical_name, header_size=f_num(header_size), ) # check each of the complete header values and get the parsed value for header_name, header_handler in list(self._header_handlers.items()): header_add_note = partial( self.message.add_note, "header-%s" % header_handler.canonical_name.lower(), field_name=header_handler.canonical_name, ) header_handler.finish(self.message, header_add_note) # type: ignore parsed_headers[header_handler.norm_name] = header_handler.value return unicode_headers, parsed_headers
def checkCaching(response: HttpResponse, request: HttpRequest = None) -> None: "Examine HTTP caching characteristics." # get header values lm_hdr = response.parsed_headers.get("last-modified", None) date_hdr = response.parsed_headers.get("date", None) expires_hdr = response.parsed_headers.get("expires", None) etag_hdr = response.parsed_headers.get("etag", None) age_hdr = response.parsed_headers.get("age", None) cc_set = response.parsed_headers.get("cache-control", []) cc_list = [k for (k, v) in cc_set] cc_dict = dict(cc_set) cc_keys = list(cc_dict.keys()) # Last-Modified if lm_hdr: serv_date = date_hdr or response.start_time if lm_hdr > serv_date: response.add_note("header-last-modified", LM_FUTURE) else: response.add_note( "header-last-modified", LM_PRESENT, last_modified_string=relative_time(lm_hdr, serv_date), ) # known Cache-Control directives that don't allow duplicates known_cc = [ "max-age", "no-store", "s-maxage", "public", "private", "pre-check", "post-check", "stale-while-revalidate", "stale-if-error", ] # check for mis-capitalised directives / # assure there aren't any dup directives with different values for cc in cc_keys: if cc.lower() in known_cc and cc != cc.lower(): response.add_note("header-cache-control", CC_MISCAP, cc_lower=cc.lower(), cc=cc) if cc in known_cc and cc_list.count(cc) > 1: response.add_note("header-cache-control", CC_DUP, cc=cc) # Who can store this? if request and request.method not in cacheable_methods: response.store_shared = response.store_private = False request.add_note("method", METHOD_UNCACHEABLE, method=request.method) return # bail; nothing else to see here if "no-store" in cc_keys: response.store_shared = response.store_private = False response.add_note("header-cache-control", NO_STORE) return # bail; nothing else to see here if "private" in cc_keys: response.store_shared = False response.store_private = True response.add_note("header-cache-control", PRIVATE_CC) elif (request and "authorization" in [k.lower() for k, v in request.headers] and "public" not in cc_keys): response.store_shared = False response.store_private = True response.add_note("header-cache-control", PRIVATE_AUTH) else: response.store_shared = response.store_private = True response.add_note("header-cache-control", STOREABLE) # no-cache? if "no-cache" in cc_keys: if lm_hdr is None and etag_hdr is None: response.add_note("header-cache-control", NO_CACHE_NO_VALIDATOR) else: response.add_note("header-cache-control", NO_CACHE) return # pre-check / post-check if "pre-check" in cc_keys or "post-check" in cc_keys: if "pre-check" not in cc_keys or "post-check" not in cc_keys: response.add_note("header-cache-control", CHECK_SINGLE) else: pre_check = post_check = None try: pre_check = int(cc_dict["pre-check"]) post_check = int(cc_dict["post-check"]) except ValueError: response.add_note("header-cache-control", CHECK_NOT_INTEGER) if pre_check is not None and post_check is not None: if pre_check == 0 and post_check == 0: response.add_note("header-cache-control", CHECK_ALL_ZERO) elif post_check > pre_check: response.add_note("header-cache-control", CHECK_POST_BIGGER) post_check = pre_check elif post_check == 0: response.add_note("header-cache-control", CHECK_POST_ZERO) else: response.add_note( "header-cache-control", CHECK_POST_PRE, pre_check=pre_check, post_check=post_check, ) # vary? vary = response.parsed_headers.get("vary", set()) if "*" in vary: response.add_note("header-vary", VARY_ASTERISK) return # bail; nothing else to see here if len(vary) > 3: response.add_note("header-vary", VARY_COMPLEX, vary_count=f_num(len(vary))) else: if "user-agent" in vary: response.add_note("header-vary", VARY_USER_AGENT) if "host" in vary: response.add_note("header-vary", VARY_HOST) # calculate age response.age = age_hdr or 0 age_str = relative_time(response.age, 0, 0) if date_hdr and date_hdr > 0: apparent_age = max(0, int(response.start_time - date_hdr)) else: apparent_age = 0 current_age = max(apparent_age, response.age) current_age_str = relative_time(current_age, 0, 0) if response.age >= 1: response.add_note("header-age header-date", CURRENT_AGE, age=age_str) # Check for clock skew and dateless origin server. if not date_hdr: response.add_note("", DATE_CLOCKLESS) if expires_hdr or lm_hdr: response.add_note("header-expires header-last-modified", DATE_CLOCKLESS_BAD_HDR) else: skew = date_hdr - response.start_time + (response.age) if response.age > max_clock_skew and (current_age - skew) < max_clock_skew: response.add_note("header-date header-age", AGE_PENALTY) elif abs(skew) > max_clock_skew: response.add_note( "header-date", DATE_INCORRECT, clock_skew_string=relative_time(skew, 0, 2), ) else: response.add_note("header-date", DATE_CORRECT) # calculate freshness freshness_lifetime = 0 has_explicit_freshness = False has_cc_freshness = False freshness_hdrs = ["header-date"] if "s-maxage" in cc_keys: freshness_lifetime = cc_dict["s-maxage"] freshness_hdrs.append("header-cache-control") has_explicit_freshness = True has_cc_freshness = True elif "max-age" in cc_keys: freshness_lifetime = cc_dict["max-age"] freshness_hdrs.append("header-cache-control") has_explicit_freshness = True has_cc_freshness = True elif "expires" in response.parsed_headers: # An invalid Expires header means it's automatically stale has_explicit_freshness = True freshness_hdrs.append("header-expires") freshness_lifetime = (expires_hdr or 0) - (date_hdr or int(response.start_time)) freshness_left = freshness_lifetime - current_age freshness_left_str = relative_time(abs(int(freshness_left)), 0, 0) freshness_lifetime_str = relative_time(int(freshness_lifetime), 0, 0) response.freshness_lifetime = freshness_lifetime fresh = freshness_left > 0 if has_explicit_freshness: if fresh: response.add_note( " ".join(freshness_hdrs), FRESHNESS_FRESH, freshness_lifetime=freshness_lifetime_str, freshness_left=freshness_left_str, current_age=current_age_str, ) elif has_cc_freshness and response.age > freshness_lifetime: response.add_note( " ".join(freshness_hdrs), FRESHNESS_STALE_CACHE, freshness_lifetime=freshness_lifetime_str, freshness_left=freshness_left_str, current_age=current_age_str, ) else: response.add_note( " ".join(freshness_hdrs), FRESHNESS_STALE_ALREADY, freshness_lifetime=freshness_lifetime_str, freshness_left=freshness_left_str, current_age=current_age_str, ) # can heuristic freshness be used? elif response.status_code in heuristic_cacheable_status: response.add_note("header-last-modified", FRESHNESS_HEURISTIC) else: response.add_note("", FRESHNESS_NONE) # can stale responses be served? if "must-revalidate" in cc_keys: if fresh: response.add_note("header-cache-control", FRESH_MUST_REVALIDATE) elif has_explicit_freshness: response.add_note("header-cache-control", STALE_MUST_REVALIDATE) elif "proxy-revalidate" in cc_keys or "s-maxage" in cc_keys: if fresh: response.add_note("header-cache-control", FRESH_PROXY_REVALIDATE) elif has_explicit_freshness: response.add_note("header-cache-control", STALE_PROXY_REVALIDATE) else: if fresh: response.add_note("header-cache-control", FRESH_SERVABLE) elif has_explicit_freshness: response.add_note("header-cache-control", STALE_SERVABLE) # public? if "public" in cc_keys: # TODO: check for authentication in request response.add_note("header-cache-control", PUBLIC)
def format_size(self, value): if value is None: return u'<td>-</td>' else: return u'<td>%s</td>' % f_num(value, by1024=True)
def format_size(value: int) -> str: if value is None: return '<td>-</td>' else: return '<td>%s</td>' % f_num(value, by1024=True)
def checkCaching(response: HttpResponse, request: HttpRequest=None) -> None: "Examine HTTP caching characteristics." # get header values lm_hdr = response.parsed_headers.get('last-modified', None) date_hdr = response.parsed_headers.get('date', None) expires_hdr = response.parsed_headers.get('expires', None) etag_hdr = response.parsed_headers.get('etag', None) age_hdr = response.parsed_headers.get('age', None) cc_set = response.parsed_headers.get('cache-control', []) cc_list = [k for (k, v) in cc_set] cc_dict = dict(cc_set) cc_keys = list(cc_dict.keys()) # Last-Modified if lm_hdr: serv_date = date_hdr or response.start_time if lm_hdr > serv_date: response.add_note('header-last-modified', LM_FUTURE) else: response.add_note('header-last-modified', LM_PRESENT, last_modified_string=relative_time(lm_hdr, serv_date)) # known Cache-Control directives that don't allow duplicates known_cc = ["max-age", "no-store", "s-maxage", "public", "private", "pre-check", "post-check", "stale-while-revalidate", "stale-if-error"] # check for mis-capitalised directives / # assure there aren't any dup directives with different values for cc in cc_keys: if cc.lower() in known_cc and cc != cc.lower(): response.add_note('header-cache-control', CC_MISCAP, cc_lower=cc.lower(), cc=cc) if cc in known_cc and cc_list.count(cc) > 1: response.add_note('header-cache-control', CC_DUP, cc=cc) # Who can store this? if request and request.method not in cacheable_methods: response.store_shared = response.store_private = False request.add_note('method', METHOD_UNCACHEABLE, method=request.method) return # bail; nothing else to see here elif 'no-store' in cc_keys: response.store_shared = response.store_private = False response.add_note('header-cache-control', NO_STORE) return # bail; nothing else to see here elif 'private' in cc_keys: response.store_shared = False response.store_private = True response.add_note('header-cache-control', PRIVATE_CC) elif request and 'authorization' in [k.lower() for k, v in request.headers] \ and 'public' not in cc_keys: response.store_shared = False response.store_private = True response.add_note('header-cache-control', PRIVATE_AUTH) else: response.store_shared = response.store_private = True response.add_note('header-cache-control', STOREABLE) # no-cache? if 'no-cache' in cc_keys: if lm_hdr is None and etag_hdr is None: response.add_note('header-cache-control', NO_CACHE_NO_VALIDATOR) else: response.add_note('header-cache-control', NO_CACHE) return # pre-check / post-check if 'pre-check' in cc_keys or 'post-check' in cc_keys: if 'pre-check' not in cc_keys or 'post-check' not in cc_keys: response.add_note('header-cache-control', CHECK_SINGLE) else: pre_check = post_check = None try: pre_check = int(cc_dict['pre-check']) post_check = int(cc_dict['post-check']) except ValueError: response.add_note('header-cache-control', CHECK_NOT_INTEGER) if pre_check is not None and post_check is not None: if pre_check == 0 and post_check == 0: response.add_note('header-cache-control', CHECK_ALL_ZERO) elif post_check > pre_check: response.add_note('header-cache-control', CHECK_POST_BIGGER) post_check = pre_check elif post_check == 0: response.add_note('header-cache-control', CHECK_POST_ZERO) else: response.add_note('header-cache-control', CHECK_POST_PRE, pre_check=pre_check, post_check=post_check) # vary? vary = response.parsed_headers.get('vary', set()) if "*" in vary: response.add_note('header-vary', VARY_ASTERISK) return # bail; nothing else to see here elif len(vary) > 3: response.add_note('header-vary', VARY_COMPLEX, vary_count=f_num(len(vary))) else: if "user-agent" in vary: response.add_note('header-vary', VARY_USER_AGENT) if "host" in vary: response.add_note('header-vary', VARY_HOST) # TODO: enumerate the axes in a message # calculate age response.age = age_hdr or 0 age_str = relative_time(response.age, 0, 0) if date_hdr and date_hdr > 0: apparent_age = max(0, int(response.start_time - date_hdr)) else: apparent_age = 0 current_age = max(apparent_age, response.age) current_age_str = relative_time(current_age, 0, 0) if response.age >= 1: response.add_note('header-age header-date', CURRENT_AGE, age=age_str) # Check for clock skew and dateless origin server. if not date_hdr: response.add_note('', DATE_CLOCKLESS) if expires_hdr or lm_hdr: response.add_note('header-expires header-last-modified', DATE_CLOCKLESS_BAD_HDR) else: skew = date_hdr - response.start_time + (response.age) if response.age > max_clock_skew and (current_age - skew) < max_clock_skew: response.add_note('header-date header-age', AGE_PENALTY) elif abs(skew) > max_clock_skew: response.add_note('header-date', DATE_INCORRECT, clock_skew_string=relative_time(skew, 0, 2)) else: response.add_note('header-date', DATE_CORRECT) # calculate freshness freshness_lifetime = 0 has_explicit_freshness = False has_cc_freshness = False freshness_hdrs = ['header-date'] if 's-maxage' in cc_keys: freshness_lifetime = cc_dict['s-maxage'] freshness_hdrs.append('header-cache-control') has_explicit_freshness = True has_cc_freshness = True elif 'max-age' in cc_keys: freshness_lifetime = cc_dict['max-age'] freshness_hdrs.append('header-cache-control') has_explicit_freshness = True has_cc_freshness = True elif 'expires' in response.parsed_headers: # An invalid Expires header means it's automatically stale has_explicit_freshness = True freshness_hdrs.append('header-expires') freshness_lifetime = (expires_hdr or 0) - (date_hdr or response.start_time) freshness_left = freshness_lifetime - current_age freshness_left_str = relative_time(abs(int(freshness_left)), 0, 0) freshness_lifetime_str = relative_time(int(freshness_lifetime), 0, 0) response.freshness_lifetime = freshness_lifetime fresh = freshness_left > 0 if has_explicit_freshness: if fresh: response.add_note(" ".join(freshness_hdrs), FRESHNESS_FRESH, freshness_lifetime=freshness_lifetime_str, freshness_left=freshness_left_str, current_age=current_age_str) # FIXME: response.age = None elif has_cc_freshness and response.age > freshness_lifetime: response.add_note(" ".join(freshness_hdrs), FRESHNESS_STALE_CACHE, freshness_lifetime=freshness_lifetime_str, freshness_left=freshness_left_str, current_age=current_age_str) else: response.add_note(" ".join(freshness_hdrs), FRESHNESS_STALE_ALREADY, freshness_lifetime=freshness_lifetime_str, freshness_left=freshness_left_str, current_age=current_age_str) # can heuristic freshness be used? elif response.status_code in heuristic_cacheable_status: response.add_note('header-last-modified', FRESHNESS_HEURISTIC) else: response.add_note('', FRESHNESS_NONE) # can stale responses be served? if 'must-revalidate' in cc_keys: if fresh: response.add_note('header-cache-control', FRESH_MUST_REVALIDATE) elif has_explicit_freshness: response.add_note('header-cache-control', STALE_MUST_REVALIDATE) elif 'proxy-revalidate' in cc_keys or 's-maxage' in cc_keys: if fresh: response.add_note('header-cache-control', FRESH_PROXY_REVALIDATE) elif has_explicit_freshness: response.add_note('header-cache-control', STALE_PROXY_REVALIDATE) else: if fresh: response.add_note('header-cache-control', FRESH_SERVABLE) elif has_explicit_freshness: response.add_note('header-cache-control', STALE_SERVABLE) # public? if 'public' in cc_keys: # TODO: check for authentication in request response.add_note('header-cache-control', PUBLIC)
def process_headers(msg): """ Parse and check the message for obvious syntactic errors, as well as semantic errors that are self-contained (i.e., it can be determined without examining other headers, etc.). Using msg.headers, it populates: - .headers with a Unicode version of the input - .parsed_headers with a dictionary of parsed header values """ hdr_dict = {} header_block_size = len(msg.version) if msg.is_request: header_block_size += len(msg.method) + len(msg.uri) + 2 else: header_block_size += len(msg.status_phrase) + 5 clean_hdrs = [] # unicode version of the header tuples parsed_hdrs = {} # dictionary of parsed header values offset = 0 for name, value in msg.headers: offset += 1 subject = "offset-%s" % offset hdr_size = len(name) + len(value) if hdr_size > MAX_HDR_SIZE: msg.add_note(subject, rs.HEADER_TOO_LARGE, header_name=name, header_size=f_num(hdr_size)) header_block_size += hdr_size # decode the header to make it unicode clean try: name = name.decode('ascii', 'strict') except UnicodeError: name = name.decode('ascii', 'ignore') msg.add_note(subject, rs.HEADER_NAME_ENCODING, header_name=name) try: value = value.decode('ascii', 'strict') except UnicodeError: value = value.decode('iso-8859-1', 'replace') msg.add_note(subject, rs.HEADER_VALUE_ENCODING, header_name=name) clean_hdrs.append((name, value)) msg.set_context(field_name=name) # check field name syntax if not re.match("^\s*%s\s*$" % syntax.TOKEN, name, re.VERBOSE): msg.add_note(subject, rs.FIELD_NAME_BAD_SYNTAX) continue norm_name = name.lower() value = value.strip() hdr_parse = load_header_func(norm_name, 'parse') if hdr_parse: if hasattr(hdr_parse, 'pre_parse'): values = hdr_parse.pre_parse(value) else: values = [value] for value in values: if not hdr_dict.has_key(norm_name): hdr_dict[norm_name] = (name, []) parsed_value = hdr_parse(subject, value, msg) if parsed_value != None: hdr_dict[norm_name][1].append(parsed_value) # replace the original header tuple with ones that are clean unicode msg.headers = clean_hdrs # join parsed header values for norm_name, (orig_name, values) in hdr_dict.items(): msg.set_context(field_name=orig_name) hdr_join = load_header_func(norm_name, 'join') if hdr_join: subject = "header-%s" % norm_name joined_value = hdr_join(subject, values, msg) if joined_value == None: continue parsed_hdrs[norm_name] = joined_value msg.parsed_headers = parsed_hdrs # check the total header block size if header_block_size > MAX_TTL_HDR: msg.add_note('header', rs.HEADER_BLOCK_TOO_LARGE, header_block_size=f_num(header_block_size))