def parse_streams(inbound, outbound, scheme=None): """Parse one or two HTTP/1.x streams. Note that parsing an outbound stream without an inbound stream is unreliable, because response framing depends on the request. :param inbound: The inbound (request) stream as a :class:`~httpolice.stream.Stream`, or `None`. :param outbound: The outbound (response) stream as a :class:`~httpolice.stream.Stream`, or `None`. :param scheme: The scheme of the request URI, as a Unicode string, or `None` if unknown. :return: An iterable of :class:`Exchange` objects. Some of the exchanges may be "empty" (aka "complaint boxes"): containing neither request nor responses, but only a notice that indicates some general problem with the streams. """ while inbound and inbound.good: (req, req_box) = _parse_request(inbound, scheme) (resps, resp_box) = ([], None) if req: if outbound and outbound.good: (resps, resp_box) = _parse_responses(outbound, req) if resps: if resps[-1].status == st.switching_protocols: inbound.sane = False if req.method == m.CONNECT and resps[-1].status.successful: inbound.sane = False yield Exchange(req, resps) if req_box: yield req_box if resp_box: yield resp_box if inbound and not inbound.eof: # Some data remains on the inbound stream, but we can't parse it. yield complaint_box(1007, stream=inbound, offset=inbound.tell()) if outbound and outbound.good: if inbound: # We had some requests, but we ran out of them. # We'll still try to parse the remaining responses on their own. yield complaint_box(1008, stream=outbound) while outbound.good: (resps, resp_box) = _parse_responses(outbound, None) if resps: yield Exchange(None, resps) if resp_box: yield resp_box if outbound and not outbound.eof: # Some data remains on the outbound stream, but we can't parse it. yield complaint_box(1010, stream=outbound, offset=outbound.tell())
def _path_pairs_input(path_pairs, sniff_direction=False, complain_on_one_sided=False): sequences = [] # We have pairs of input files, each corresponding to one TCP connection, # and possibly having a time hint indicating when the connection started. for (path1, path2, time_hint) in path_pairs: path1 = decode_path(path1) if path1 else path1 path2 = decode_path(path2) if path2 else path2 sequence = [] # Exchanges from this connection. # Some of the pairs may be one-sided, i.e. consisting of # only the inbound stream or only the outbound stream. # In some cases (``req-stream`` and ``resp-stream`` input formats) # this is expected, but in other cases we need to complain. # We still want to try and process the one stream though. if complain_on_one_sided and (path1 is None or path2 is None): sequence.append(complaint_box(1278, path=path1 or path2)) (inbound_path, outbound_path) = (path1, path2) # In some cases (``tcpflow`` and ``tcpick`` input formats) # the pairs may not yet be disambiguated as to which side is # the inbound (client->server) stream and which is the outbound. if sniff_direction: direction = _sniff_direction(path1, path2) if direction is None: # If sniffing fails, this is a non-HTTP/1.x connection # that was accidentally captured by tcpflow or something. # We don't even try to parse that. sequence.append(complaint_box(1279, path1=path1 or u'(none)', path2=path2 or u'(none)')) (inbound_path, outbound_path) = (None, None) else: (inbound_path, outbound_path) = direction if inbound_path or outbound_path: # Finally we can parse the streams as HTTP/1.x, # appending them to the complaint boxes we may have produced above. sequence = itertools.chain(sequence, _parse_paths(inbound_path, outbound_path)) sequences.append((iter(sequence), time_hint)) return _rearrange_by_time(sequences)
def _parse_request(stream, scheme=None): try: req = _parse_request_heading(stream, scheme) except ParseError as e: return (None, complaint_box(1006, error=e)) else: _parse_request_body(req, stream) return (req, None)
def _parse_request(stream, scheme=None): try: req = _parse_request_heading(stream, scheme) except ParseError as e: return (None, complaint_box(1006, error=e)) else: _parse_request_body(req, stream) return (req, None)
def _path_pairs_input(path_pairs, sniff_direction=False, complain_on_one_sided=False): sequences = [] # We have pairs of input files, each corresponding to one TCP connection, # and possibly having a time hint indicating when the connection started. for (path1, path2, time_hint) in path_pairs: sequence = [] # Exchanges from this connection. # Some of the pairs may be one-sided, i.e. consisting of # only the inbound stream or only the outbound stream. # In some cases (``req-stream`` and ``resp-stream`` input formats) # this is expected, but in other cases we need to complain. # We still want to try and process the one stream though. if complain_on_one_sided and (path1 is None or path2 is None): sequence.append(complaint_box(1278, path=path1 or path2)) (inbound_path, outbound_path) = (path1, path2) # In some cases (``tcpflow`` and ``tcpick`` input formats) # the pairs may not yet be disambiguated as to which side is # the inbound (client->server) stream and which is the outbound. if sniff_direction: direction = _sniff_direction(path1, path2) if direction is None: # If sniffing fails, this is a non-HTTP/1.x connection # that was accidentally captured by tcpflow or something. # We don't even try to parse that. sequence.append(complaint_box(1279, path1=path1 or u'(none)', path2=path2 or u'(none)')) (inbound_path, outbound_path) = (None, None) else: (inbound_path, outbound_path) = direction if inbound_path or outbound_path: # Finally we can parse the streams as HTTP/1.x, # appending them to the complaint boxes we may have produced above. sequence = itertools.chain(sequence, _parse_paths(inbound_path, outbound_path)) sequences.append((iter(sequence), time_hint)) return _rearrange_by_time(sequences)
def _parse_responses(stream, req): resps = [] while stream.good: # Parse all responses corresponding to one request. # RFC 7230 section 3.3. try: resp = _parse_response_heading(req, stream) except ParseError as e: return (resps, complaint_box(1009, error=e)) else: resps.append(resp) _parse_response_body(resp, stream) if (not resp.status.informational) or \ (resp.status == st.switching_protocols): # This is the final response for this request. break return (resps, None)
def _parse_responses(stream, req): resps = [] while stream.good: # Parse all responses corresponding to one request. # RFC 7230 section 3.3. try: resp = _parse_response_heading(req, stream) except ParseError as e: return (resps, complaint_box(1009, error=e)) else: resps.append(resp) _parse_response_body(resp, stream) if (not resp.status.informational) or \ (resp.status == st.switching_protocols): # This is the final response for this request. break return (resps, None)
def parse_streams(inbound, outbound, scheme=None): """Parse one or two HTTP/1.x streams. Note that parsing an outbound stream without an inbound stream is unreliable, because response framing depends on the request. :param inbound: The inbound (request) stream as a :class:`~httpolice.parse.Stream`, or `None`. :param outbound: The outbound (response) stream as a :class:`~httpolice.parse.Stream`, or `None`. :param scheme: The scheme of the request URI, as a Unicode string, or `None` if unknown. :return: An iterable of :class:`Exchange` objects. Some of the exchanges may be "empty" (aka "complaint boxes"): containing neither request nor responses, but only a notice that indicates some general problem with the streams. """ while inbound and inbound.sane: (req, req_box) = _parse_request(inbound, scheme) (resps, resp_box) = ([], None) if req: if outbound and outbound.sane: (resps, resp_box) = _parse_responses(outbound, req) if resps: if resps[-1].status == st.switching_protocols: inbound.sane = False if req.method == m.CONNECT and resps[-1].status.successful: inbound.sane = False yield Exchange(req, resps) if req_box: yield req_box if resp_box: yield resp_box if inbound and not inbound.eof: # Some data remains on the inbound stream, but we can't parse it. yield complaint_box(1007, stream=inbound, nbytes=len(inbound.consume_rest())) if outbound and outbound.sane: if inbound: # We had some requests, but we ran out of them. # We'll still try to parse the remaining responses on their own. yield complaint_box(1008, stream=outbound) while outbound.sane: (resps, resp_box) = _parse_responses(outbound, None) if resps: yield Exchange(None, resps) if resp_box: yield resp_box if outbound and not outbound.eof: # Some data remains on the outbound stream, but we can't parse it. yield complaint_box(1010, stream=outbound, nbytes=len(outbound.consume_rest()))