def _parse_rfc_forwarded(self): """Parse RFC 7239 "Forwarded" header. Returns: list: addresses derived from "for" parameters. """ addr = [] for forwarded in self.env["HTTP_FORWARDED"].split(","): for param in forwarded.split(";"): # PERF(kgriffs): Partition() is faster than split(). key, _, val = param.strip().partition("=") if not val: # NOTE(kgriffs): The '=' separator was not found or # it was, but the value was missing. continue if key.lower() != "for": # We only want "for" params continue host, _ = parse_host(unquote_string(val)) addr.append(host) return addr
def _parse_rfc_forwarded(self): """Parse RFC 7239 "Forwarded" header. Returns: list: addresses derived from "for" parameters. """ addr = [] for forwarded in self.env['HTTP_FORWARDED'].split(','): for param in forwarded.split(';'): # PERF(kgriffs): Partition() is faster than split(). key, _, val = param.strip().partition('=') if not val: # NOTE(kgriffs): The '=' separator was not found or # it was, but the value was missing. continue if key.lower() != 'for': # We only want "for" params continue host, _ = parse_host(unquote_string(val)) addr.append(host) return addr
def _parse_rfc_forwarded(self): """Parse RFC 7239 "Forwarded" header. Returns: list: addresses derived from "for" parameters. """ addr = [] for forwarded in self.env['HTTP_FORWARDED'].split(','): for param in forwarded.split(';'): param = param.strip().split('=', 1) if len(param) == 1: continue key, val = param if key.lower() != 'for': # we only want for params continue host, _ = parse_host(unquote_string(val)) addr.append(host) return addr
def test_unquote_string(self): assert uri.unquote_string('v') == 'v' assert uri.unquote_string('not-quoted') == 'not-quoted' assert uri.unquote_string('partial-quoted"') == 'partial-quoted"' assert uri.unquote_string('"partial-quoted') == '"partial-quoted' assert uri.unquote_string('"partial-quoted"') == 'partial-quoted'
def _parse_forwarded_header(forwarded): """Parse the value of a Forwarded header. Makes an effort to parse Forwarded headers as specified by RFC 7239: - It checks that every value has valid syntax in general as specified in section 4: either a 'token' or a 'quoted-string'. - It un-escapes found escape sequences. - It does NOT validate 'by' and 'for' contents as specified in section 6. - It does NOT validate 'host' contents (Host ABNF). - It does NOT validate 'proto' contents for valid URI scheme names. Arguments: forwarded (str): Value of a Forwarded header Returns: list: Sequence of Forwarded instances, representing each forwarded-element in the header, in the same order as they appeared in the header. """ elements = [] pos = 0 end = len(forwarded) need_separator = False parsed_element = None while 0 <= pos < end: match = _FORWARDED_PAIR_RE.match(forwarded, pos) if match is not None: # got a valid forwarded-pair if need_separator: # bad syntax here, skip to next comma pos = forwarded.find(',', pos) else: pos += len(match.group(0)) need_separator = True name, value = match.groups() # NOTE(kgriffs): According to RFC 7239, parameter # names are case-insensitive. name = name.lower() if value[0] == '"': value = unquote_string(value) # NOTE(kgriffs): If this is the first pair we've encountered # for this forwarded-element, initialize a new object. if not parsed_element: parsed_element = Forwarded() if name == 'by': parsed_element.dest = value elif name == 'for': parsed_element.src = value elif name == 'host': parsed_element.host = value elif name == 'proto': # NOTE(kgriffs): RFC 7239 only requires that # the "proto" value conform to the Host ABNF # described in RFC 7230. The Host ABNF, in turn, # does not require that the scheme be in any # particular case, so we normalize it here to be # consistent with the WSGI spec that *does* # require the value of 'wsgi.url_scheme' to be # either 'http' or 'https' (case-sensitive). parsed_element.scheme = value.lower() elif forwarded[pos] == ',': # next forwarded-element need_separator = False pos += 1 # NOTE(kgriffs): It's possible that we arrive here without a # parsed element if the header is malformed. if parsed_element: elements.append(parsed_element) parsed_element = None elif forwarded[pos] == ';': # next forwarded-pair need_separator = False pos += 1 elif forwarded[pos] in ' \t': # Allow whitespace even between forwarded-pairs, though # RFC 7239 doesn't. This simplifies code and is in line # with Postel's law. pos += 1 else: # bad syntax here, skip to next comma pos = forwarded.find(',', pos) # NOTE(kgriffs): Add the last forwarded-element, if any if parsed_element: elements.append(parsed_element) return elements
def _parse_forwarded_header(forwarded): """Parses the value of a Forwarded header. Makes an effort to parse Forwarded headers as specified by RFC 7239: - It checks that every value has valid syntax in general as specified in section 4: either a 'token' or a 'quoted-string'. - It un-escapes found escape sequences. - It does NOT validate 'by' and 'for' contents as specified in section 6. - It does NOT validate 'host' contents (Host ABNF). - It does NOT validate 'proto' contents for valid URI scheme names. Arguments: forwarded (str): Value of a Forwarded header Returns: list: Sequence of Forwarded instances, representing each forwarded-element in the header, in the same order as they appeared in the header. """ elements = [] pos = 0 end = len(forwarded) need_separator = False parsed_element = None while 0 <= pos < end: match = _FORWARDED_PAIR_RE.match(forwarded, pos) if match is not None: # got a valid forwarded-pair if need_separator: # bad syntax here, skip to next comma pos = forwarded.find(',', pos) else: pos += len(match.group(0)) need_separator = True name, value = match.groups() # NOTE(kgriffs): According to RFC 7239, parameter # names are case-insensitive. name = name.lower() if value[0] == '"': value = unquote_string(value) # NOTE(kgriffs): If this is the first pair we've encountered # for this forwarded-element, initialize a new object. if not parsed_element: parsed_element = Forwarded() if name == 'by': parsed_element.dest = value elif name == 'for': parsed_element.src = value elif name == 'host': parsed_element.host = value elif name == 'proto': # NOTE(kgriffs): RFC 7239 only requires that # the "proto" value conform to the Host ABNF # described in RFC 7230. The Host ABNF, in turn, # does not require that the scheme be in any # particular case, so we normalize it here to be # consistent with the WSGI spec that *does* # require the value of 'wsgi.url_scheme' to be # either 'http' or 'https' (case-sensitive). parsed_element.scheme = value.lower() elif forwarded[pos] == ',': # next forwarded-element need_separator = False pos += 1 # NOTE(kgriffs): It's possible that we arrive here without a # parsed element if the header is malformed. if parsed_element: elements.append(parsed_element) parsed_element = None elif forwarded[pos] == ';': # next forwarded-pair need_separator = False pos += 1 elif forwarded[pos] in ' \t': # Allow whitespace even between forwarded-pairs, though # RFC 7239 doesn't. This simplifies code and is in line # with Postel's law. pos += 1 else: # bad syntax here, skip to next comma pos = forwarded.find(',', pos) # NOTE(kgriffs): Add the last forwarded-element, if any if parsed_element: elements.append(parsed_element) return elements