def _process_request(data, creator, path): version, header_entries = _process_message(data, creator) method = data['method'] parsed = urlparse(data['url']) scheme = parsed.scheme if method == m.CONNECT: target = parsed.netloc elif any(name == h.host for (name, _) in header_entries): # With HAR, we can't tell if the request was to a proxy or to a server. # So we force most requests into the "origin form" of the target, target = parsed.path if parsed.query: target += u'?' + parsed.query else: # However, if the request has no ``Host`` header, # the user won't be able to see the target host # unless we set the full URL ("absolute form") as the target. # To prevent this from having an effect on the proxy logic, # we explicitly set `Request.is_to_proxy` to `None` later. target = data['url'] if data['bodySize'] == 0: # No body, or a body of length 0 (which we do not distinguish). body = b'' elif data['bodySize'] > 0: # A message body was present, but we cannot recover it, # because message body is the body *with* ``Content-Encoding``, # and HAR does not include that. body = Unavailable() else: # Unknown. Maybe there was a body, maybe there wasn't. body = None text = None post = data.get('postData') if post and post.get('text'): text = post['text'] if creator in FIDDLER and method == m.CONNECT and u'Fiddler' in text: # Fiddler's HAR export adds a body with debug information # to CONNECT requests. text = None body = b'' req = Request(scheme, method, target, version, header_entries, body, remark=u'from %s' % path) if text is not None: req.unicode_body = text req.is_to_proxy = None # See above. return req
def _parse_request_heading(stream, scheme=None): beginning = stream.point try: with stream: method_ = Method(stream.consume_regex(rfc7230.method)) stream.consume_regex(SP) target = stream.consume_regex(b'[^\\s]+', u'request target') stream.consume_regex(SP) version_ = HTTPVersion(stream.consume_regex(rfc7230.HTTP_version)) _parse_line_ending(stream) entries = parse_header_fields(stream) except ParseError as e: stream.sane = False stream.complain(1006, error=e) return Unavailable else: req = Request(scheme, method_, target, version_, entries, body=None, remark=u'from %s, offset %d' % (stream.name, beginning)) stream.dump_complaints(req.complain, place=u'request heading') return req
def test_effective_uri_1(): req = Request(u'http', m.GET, u'/pub/WWW/TheProject.html', http11, [(h.host, b'www.example.org:8080')], b'') assert req.effective_uri == \ u'http://www.example.org:8080/pub/WWW/TheProject.html'
def test_construct_exchange(): req = Request(u'http', u'GET', u'/', u'HTTP/1.1', [(u'Host', b'example.com')], None) assert repr(req) == '<Request GET>' resp1 = Response(u'HTTP/1.1', 123, u'Please wait', [], None) assert repr(resp1) == '<Response 123>' resp2 = Response(u'HTTP/1.1', 200, u'OK', [(u'Content-Length', b'14')], b'Hello world!\r\n', None) exch = Exchange(req, [resp1, resp2]) assert repr(exch) == \ 'Exchange(<Request GET>, [<Response 123>, <Response 200>])' assert isinstance(exch.request.method, Method) assert isinstance(exch.request.version, HTTPVersion) assert isinstance(exch.request.header_entries[0].name, FieldName) assert isinstance(exch.responses[0].version, HTTPVersion) assert isinstance(exch.responses[0].status, StatusCode) assert isinstance(exch.responses[1].header_entries[0].name, FieldName)
def _parse_request_heading(stream, scheme=None): beginning = stream.tell() with stream.parsing(request_line): line = stream.readline() pieces = line.split(u' ') if len(pieces) != 3 or not HTTP_VERSION.match(pieces[2]): raise stream.error(beginning) method = Method(pieces[0]) target = pieces[1] version_ = HTTPVersion(pieces[2]) entries = parse_header_fields(stream) with stream.parsing(HTTP_message): stream.readlineend() req = Request(scheme, method, target, version_, entries, body=None, remark=u'from %s, offset %d' % (stream.name, beginning)) stream.dump_complaints(req.complain, place=u'request heading') return req
def _process_request(data, creator, path): (version, header_entries, pseudo_headers) = _process_message(data, creator) if creator.is_chrome and version == http11 and u':host' in pseudo_headers: # SPDY exported from Chrome. version = None # Firefox exports "Connection: keep-alive" on HTTP/2 requests # (which triggers notice 1244) # even though it does not actually send it # (this can be verified with SSLKEYLOGFILE + Wireshark). if creator.is_firefox and version == http2: header_entries = [(name, value) for (name, value) in header_entries if (name, value) != (h.connection, u'keep-alive')] method = data['method'] header_names = {name for (name, _) in header_entries} parsed = urlparse(data['url']) scheme = parsed.scheme if creator.is_insomnia: # https://github.com/getinsomnia/insomnia/issues/840 if h.host not in header_names: header_entries.insert(0, (h.host, parsed.netloc)) if h.user_agent not in header_names: # The actual version can probably be extracted from ua_string = u'insomnia/%s' % creator.reconstruct_insomnia_version() header_entries.append((h.user_agent, ua_string)) if h.accept not in header_names: header_entries.append((h.accept, u'*/*')) header_names = {name for (name, _) in header_entries} if method == m.CONNECT: target = parsed.netloc elif h.host in header_names: # With HAR, we can't tell if the request was to a proxy or to a server. # So we force most requests into the "origin form" of the target, target = parsed.path if parsed.query: target += u'?' + parsed.query else: # However, if the request has no ``Host`` header, # the user won't be able to see the target host # unless we set the full URL ("absolute form") as the target. # To prevent this from having an effect on the proxy logic, # we explicitly set `Request.is_to_proxy` to `None` later. target = data['url'] if data['bodySize'] == 0: # No body, or a body of length 0 (which we do not distinguish). body = b'' elif data['bodySize'] > 0: # A message body was present, but we cannot recover it, # because message body is the body *with* ``Content-Encoding``, # and HAR does not include that. body = Unavailable() else: # Unknown. Maybe there was a body, maybe there wasn't. body = None text = None post = data.get('postData') if post and post.get('text'): text = post['text'] if creator.is_firefox and \ post['mimeType'] == media.application_x_www_form_urlencoded \ and u'\r\n' in text: # Yes, Firefox actually outputs this stuff. Go figure. (wtf, actual_text) = text.rsplit(u'\r\n', 1) try: buf = io.BufferedReader(io.BytesIO(wtf.encode('iso-8859-1'))) more_entries = framing1.parse_header_fields(Stream(buf)) except (UnicodeError, ParseError): # pragma: no cover pass else: header_entries.extend(more_entries) text = actual_text if creator.is_fiddler and method == m.CONNECT and u'Fiddler' in text: # Fiddler's HAR export adds a body with debug information # to CONNECT requests. text = None body = b'' req = Request(scheme, method, target, version, header_entries, body, remark=u'from %s' % path) if text is not None: req.unicode_body = text req.is_to_proxy = None # See above. return req
def test_effective_uri_4(): req = Request(u'http', m.GET, u'myproto://www.example.org/index.html', http11, [(h.host, b'www.example.org')], b'') assert req.effective_uri == u'myproto://www.example.org/index.html'
def test_effective_uri_3(): req = Request(u'https', m.OPTIONS, u'*', http11, [(h.host, b'www.example.org')], b'') assert req.effective_uri == u'https://www.example.org'
def test_effective_uri_2(): req = Request(u'http', m.GET, u'/pub/WWW/TheProject.html', http10, [], b'') assert req.effective_uri is None
def _process_request(data, creator): (version, header_entries, pseudo_headers) = _process_message(data, creator) if creator in CHROME and version == http11 and u':host' in pseudo_headers: # SPDY exported from Chrome. version = None # Firefox exports "Connection: keep-alive" on HTTP/2 requests # (which triggers notice 1244) # even though it does not actually send it # (this can be verified with SSLKEYLOGFILE + Wireshark). if creator in FIREFOX and version == http2: header_entries = [ (name, value) for (name, value) in header_entries if (name, value) != (h.connection, u'keep-alive') ] method = data['method'] parsed = urlparse(data['url']) scheme = parsed.scheme if method == m.CONNECT: target = parsed.netloc elif any(name == h.host for (name, _) in header_entries): # With HAR, we can't tell if the request was to a proxy or to a server. # So we force most requests into the "origin form" of the target, target = parsed.path if parsed.query: target += u'?' + parsed.query else: # However, if the request has no ``Host`` header, # the user won't be able to see the target host # unless we set the full URL ("absolute form") as the target. # To prevent this from having an effect on the proxy logic, # we explicitly set `Request.is_to_proxy` to `None` later. target = data['url'] if data['bodySize'] == 0: # No body, or a body of length 0 (which we do not distinguish). body = b'' elif data['bodySize'] > 0: # A message body was present, but we cannot recover it, # because message body is the body *with* ``Content-Encoding``, # and HAR does not include that. body = Unavailable else: # Unknown. Maybe there was a body, maybe there wasn't. body = None text = None post = data.get('postData') if post and post.get('text'): text = post['text'] if creator in FIREFOX and \ post['mimeType'] == media.application_x_www_form_urlencoded \ and u'\r\n' in text: # Yes, Firefox actually outputs this stuff. Go figure. (wtf, actual_text) = text.rsplit(u'\r\n', 1) try: stream = Stream((wtf + u'\r\n').encode('iso-8859-1')) more_entries = framing1.parse_header_fields(stream) except (UnicodeError, ParseError): pass else: header_entries.extend(more_entries) text = actual_text if creator in FIDDLER and method == m.CONNECT and u'Fiddler' in text: # Fiddler's HAR export adds a body with debug information # to CONNECT requests. text = None body = b'' req = Request(scheme, method, target, version, header_entries, body) if text is not None: req.unicode_body = text req.is_to_proxy = None # See above. return req