示例#1
0
def parse_streams(inbound, outbound, scheme=None):
    """Parse one or two HTTP/1.x streams.

    Note that parsing an outbound stream without an inbound stream
    is unreliable, because response framing depends on the request.

    :param inbound:
        The inbound (request) stream as a :class:`~httpolice.stream.Stream`,
        or `None`.
    :param outbound:
        The outbound (response) stream as a :class:`~httpolice.stream.Stream`,
        or `None`.
    :param scheme:
        The scheme of the request URI, as a Unicode string,
        or `None` if unknown.
    :return:
        An iterable of :class:`Exchange` objects.
        Some of the exchanges may be "empty" (aka "complaint boxes"):
        containing neither request nor responses,
        but only a notice that indicates some general problem with the streams.
    """
    while inbound and inbound.good:
        (req, req_box) = _parse_request(inbound, scheme)
        (resps, resp_box) = ([], None)
        if req:
            if outbound and outbound.good:
                (resps, resp_box) = _parse_responses(outbound, req)
                if resps:
                    if resps[-1].status == st.switching_protocols:
                        inbound.sane = False
                    if req.method == m.CONNECT and resps[-1].status.successful:
                        inbound.sane = False
            yield Exchange(req, resps)
        if req_box:
            yield req_box
        if resp_box:
            yield resp_box

    if inbound and not inbound.eof:
        # Some data remains on the inbound stream, but we can't parse it.
        yield complaint_box(1007, stream=inbound, offset=inbound.tell())

    if outbound and outbound.good:
        if inbound:
            # We had some requests, but we ran out of them.
            # We'll still try to parse the remaining responses on their own.
            yield complaint_box(1008, stream=outbound)
        while outbound.good:
            (resps, resp_box) = _parse_responses(outbound, None)
            if resps:
                yield Exchange(None, resps)
            if resp_box:
                yield resp_box

    if outbound and not outbound.eof:
        # Some data remains on the outbound stream, but we can't parse it.
        yield complaint_box(1010, stream=outbound, offset=outbound.tell())
示例#2
0
def _path_pairs_input(path_pairs, sniff_direction=False,
                      complain_on_one_sided=False):
    sequences = []

    # We have pairs of input files, each corresponding to one TCP connection,
    # and possibly having a time hint indicating when the connection started.
    for (path1, path2, time_hint) in path_pairs:
        path1 = decode_path(path1) if path1 else path1
        path2 = decode_path(path2) if path2 else path2
        sequence = []           # Exchanges from this connection.

        # Some of the pairs may be one-sided, i.e. consisting of
        # only the inbound stream or only the outbound stream.
        # In some cases (``req-stream`` and ``resp-stream`` input formats)
        # this is expected, but in other cases we need to complain.
        # We still want to try and process the one stream though.
        if complain_on_one_sided and (path1 is None or path2 is None):
            sequence.append(complaint_box(1278, path=path1 or path2))

        (inbound_path, outbound_path) = (path1, path2)

        # In some cases (``tcpflow`` and ``tcpick`` input formats)
        # the pairs may not yet be disambiguated as to which side is
        # the inbound (client->server) stream and which is the outbound.
        if sniff_direction:
            direction = _sniff_direction(path1, path2)
            if direction is None:
                # If sniffing fails, this is a non-HTTP/1.x connection
                # that was accidentally captured by tcpflow or something.
                # We don't even try to parse that.
                sequence.append(complaint_box(1279,
                                              path1=path1 or u'(none)',
                                              path2=path2 or u'(none)'))
                (inbound_path, outbound_path) = (None, None)
            else:
                (inbound_path, outbound_path) = direction

        if inbound_path or outbound_path:
            # Finally we can parse the streams as HTTP/1.x,
            # appending them to the complaint boxes we may have produced above.
            sequence = itertools.chain(sequence,
                                       _parse_paths(inbound_path,
                                                    outbound_path))

        sequences.append((iter(sequence), time_hint))

    return _rearrange_by_time(sequences)
示例#3
0
def _parse_request(stream, scheme=None):
    try:
        req = _parse_request_heading(stream, scheme)
    except ParseError as e:
        return (None, complaint_box(1006, error=e))
    else:
        _parse_request_body(req, stream)
        return (req, None)
示例#4
0
def _parse_request(stream, scheme=None):
    try:
        req = _parse_request_heading(stream, scheme)
    except ParseError as e:
        return (None, complaint_box(1006, error=e))
    else:
        _parse_request_body(req, stream)
        return (req, None)
示例#5
0
def _path_pairs_input(path_pairs, sniff_direction=False,
                      complain_on_one_sided=False):
    sequences = []

    # We have pairs of input files, each corresponding to one TCP connection,
    # and possibly having a time hint indicating when the connection started.
    for (path1, path2, time_hint) in path_pairs:
        sequence = []           # Exchanges from this connection.

        # Some of the pairs may be one-sided, i.e. consisting of
        # only the inbound stream or only the outbound stream.
        # In some cases (``req-stream`` and ``resp-stream`` input formats)
        # this is expected, but in other cases we need to complain.
        # We still want to try and process the one stream though.
        if complain_on_one_sided and (path1 is None or path2 is None):
            sequence.append(complaint_box(1278, path=path1 or path2))

        (inbound_path, outbound_path) = (path1, path2)

        # In some cases (``tcpflow`` and ``tcpick`` input formats)
        # the pairs may not yet be disambiguated as to which side is
        # the inbound (client->server) stream and which is the outbound.
        if sniff_direction:
            direction = _sniff_direction(path1, path2)
            if direction is None:
                # If sniffing fails, this is a non-HTTP/1.x connection
                # that was accidentally captured by tcpflow or something.
                # We don't even try to parse that.
                sequence.append(complaint_box(1279,
                                              path1=path1 or u'(none)',
                                              path2=path2 or u'(none)'))
                (inbound_path, outbound_path) = (None, None)
            else:
                (inbound_path, outbound_path) = direction

        if inbound_path or outbound_path:
            # Finally we can parse the streams as HTTP/1.x,
            # appending them to the complaint boxes we may have produced above.
            sequence = itertools.chain(sequence,
                                       _parse_paths(inbound_path,
                                                    outbound_path))

        sequences.append((iter(sequence), time_hint))

    return _rearrange_by_time(sequences)
示例#6
0
def _parse_responses(stream, req):
    resps = []
    while stream.good:
        # Parse all responses corresponding to one request.
        # RFC 7230 section 3.3.
        try:
            resp = _parse_response_heading(req, stream)
        except ParseError as e:
            return (resps, complaint_box(1009, error=e))
        else:
            resps.append(resp)
            _parse_response_body(resp, stream)
            if (not resp.status.informational) or \
                    (resp.status == st.switching_protocols):
                # This is the final response for this request.
                break
    return (resps, None)
示例#7
0
def _parse_responses(stream, req):
    resps = []
    while stream.good:
        # Parse all responses corresponding to one request.
        # RFC 7230 section 3.3.
        try:
            resp = _parse_response_heading(req, stream)
        except ParseError as e:
            return (resps, complaint_box(1009, error=e))
        else:
            resps.append(resp)
            _parse_response_body(resp, stream)
            if (not resp.status.informational) or \
                    (resp.status == st.switching_protocols):
                # This is the final response for this request.
                break
    return (resps, None)
示例#8
0
def parse_streams(inbound, outbound, scheme=None):
    """Parse one or two HTTP/1.x streams.

    Note that parsing an outbound stream without an inbound stream
    is unreliable, because response framing depends on the request.

    :param inbound:
        The inbound (request) stream as a :class:`~httpolice.parse.Stream`,
        or `None`.
    :param outbound:
        The outbound (response) stream as a :class:`~httpolice.parse.Stream`,
        or `None`.
    :param scheme:
        The scheme of the request URI, as a Unicode string,
        or `None` if unknown.
    :return:
        An iterable of :class:`Exchange` objects.
        Some of the exchanges may be "empty" (aka "complaint boxes"):
        containing neither request nor responses,
        but only a notice that indicates some general problem with the streams.
    """
    while inbound and inbound.sane:
        (req, req_box) = _parse_request(inbound, scheme)
        (resps, resp_box) = ([], None)
        if req:
            if outbound and outbound.sane:
                (resps, resp_box) = _parse_responses(outbound, req)
                if resps:
                    if resps[-1].status == st.switching_protocols:
                        inbound.sane = False
                    if req.method == m.CONNECT and resps[-1].status.successful:
                        inbound.sane = False
            yield Exchange(req, resps)
        if req_box:
            yield req_box
        if resp_box:
            yield resp_box

    if inbound and not inbound.eof:
        # Some data remains on the inbound stream, but we can't parse it.
        yield complaint_box(1007,
                            stream=inbound,
                            nbytes=len(inbound.consume_rest()))

    if outbound and outbound.sane:
        if inbound:
            # We had some requests, but we ran out of them.
            # We'll still try to parse the remaining responses on their own.
            yield complaint_box(1008, stream=outbound)
        while outbound.sane:
            (resps, resp_box) = _parse_responses(outbound, None)
            if resps:
                yield Exchange(None, resps)
            if resp_box:
                yield resp_box

    if outbound and not outbound.eof:
        # Some data remains on the outbound stream, but we can't parse it.
        yield complaint_box(1010,
                            stream=outbound,
                            nbytes=len(outbound.consume_rest()))