def test_parser_edge_cases(): # Our parser implementation is general enough that # some of its branches are not being exercised by our regular tests, # so I had to come up with these contrived examples to test them. p = many(rfc7230.tchar) > named(u'p') p1 = '1' * p > named(u'p1') p2 = '11' * p * skip('\n') > named(u'p2') assert parse(p1 | p2, b'11abc') == (u'1', [u'1', u'a', u'b', u'c']) assert parse(p1 | p2, b'11abc\n') == (u'11', [u'a', u'b', u'c']) p = recursive() > named(u'p') p.rec = (rfc7230.tchar * p | subst(None) << empty) assert parse(p, b'abc') == (u'a', (u'b', (u'c', None))) p = literal('ab') > named(u'p') p0 = subst(u'') << empty | p > named(u'p0') p1 = 'xab' * p0 > named(u'p1') p2 = 'x' * string(p0) * '!' > named(u'p2') assert parse(p1 | p2, b'xabab') == (u'xab', u'ab') assert parse(p1 | p2, b'xabab!') == (u'x', u'abab', u'!') p = empty | literal('a') > named(u'p') p0 = p * 'x' > named(u'x') assert parse(p0, b'x') == u'x'
can_complain, fill_names, maybe, octet, octet_range, pivot, string, subst, ) from httpolice.structure import EntityTag from httpolice.syntax.common import DQUOTE from httpolice.syntax.rfc7230 import comma_list1, obs_text from httpolice.syntax.rfc7231 import HTTP_date weak = subst(True) << octet(0x57) * octet(0x2F) > auto etagc = octet(0x21) | octet_range(0x23, 0x7E) | obs_text > auto @can_complain def _no_backslashes(complain, s): if u'\\' in s: complain(1119) return s opaque_tag = _no_backslashes << DQUOTE + string(etagc) + DQUOTE > auto entity_tag = EntityTag << maybe(weak, False) * opaque_tag > pivot ETag = entity_tag > pivot Last_Modified = HTTP_date > pivot If_Match = '*' | comma_list1(entity_tag) > pivot
# -*- coding: utf-8; -*- from httpolice.citation import RFC from httpolice.parse import (auto, can_complain, fill_names, maybe, octet, octet_range, pivot, string, subst) from httpolice.structure import EntityTag from httpolice.syntax.common import DQUOTE from httpolice.syntax.rfc7230 import comma_list1, obs_text from httpolice.syntax.rfc7231 import HTTP_date weak = subst(True) << octet(0x57) * octet(0x2F) > auto etagc = octet(0x21) | octet_range(0x23, 0x7E) | obs_text > auto @can_complain def _no_backslashes(complain, s): if u'\\' in s: complain(1119) return s opaque_tag = _no_backslashes << DQUOTE + string(etagc) + DQUOTE > auto entity_tag = EntityTag << maybe(weak, False) * opaque_tag > pivot ETag = entity_tag > pivot Last_Modified = HTTP_date > pivot If_Match = '*' | comma_list1(entity_tag) > pivot If_None_Match = '*' | comma_list1(entity_tag) > pivot If_Modified_Since = HTTP_date > pivot If_Unmodified_Since = HTTP_date > pivot
# -*- coding: utf-8; -*- from httpolice.parse import (fill_names, literal, maybe, pivot, skip, string1, subst) from httpolice.syntax.common import DIGIT from httpolice.syntax.rfc7230 import RWS, comma_list notice_id = int << string1(DIGIT) > pivot resp = subst(True) << literal('resp') > pivot HTTPolice_Silence = comma_list(notice_id * maybe(skip(RWS) * resp)) > pivot fill_names(globals(), citation=None)
(lambda x: (None, x)) << skip('-') * suffix_length > pivot byte_range_set = comma_list1(byte_range_spec | suffix_byte_range_spec) > auto byte_ranges_specifier = RangeSpecifier << (bytes_unit * skip('=') * byte_range_set) > pivot other_range_set = string1(VCHAR) > auto other_ranges_specifier = RangeSpecifier << (other_range_unit * skip('=') * other_range_set) > pivot Range = byte_ranges_specifier | other_ranges_specifier > pivot byte_range = first_byte_pos * skip('-') * last_byte_pos > auto complete_length = int << string1(DIGIT) > auto byte_range_resp = (byte_range * skip('/') * (complete_length | subst(None) << literal('*'))) > pivot unsatisfied_range = ((subst(None) << literal('*/')) * complete_length) > pivot @can_complain def _well_formed2(complain, r): bounds, complete = r.range if bounds is not None: first, last = bounds if (last < first) or ((complete is not None) and (complete <= last)): complain(1148) return r byte_content_range = _well_formed2 << (
subtype = token > pivot media_type = Parametrized << ( (_check_media_type << (MediaType << type_ + '/' + subtype)) * (MultiDict << many(skip(OWS * ';' * OWS) * parameter()))) > pivot content_coding = ContentCoding << token > pivot product_version = token > pivot product = Versioned << ( (ProductName << token) * maybe(skip('/') * product_version)) > pivot User_Agent = product % many( skip(RWS) * (product | comment(include_parens=False))) > pivot Server = product % many(skip(RWS) * (product | comment(include_parens=False))) > pivot day_name = (subst(0) << octet(0x4D) * octet(0x6F) * octet(0x6E) | subst(1) << octet(0x54) * octet(0x75) * octet(0x65) | subst(2) << octet(0x57) * octet(0x65) * octet(0x64) | subst(3) << octet(0x54) * octet(0x68) * octet(0x75) | subst(4) << octet(0x46) * octet(0x72) * octet(0x69) | subst(5) << octet(0x53) * octet(0x61) * octet(0x74) | subst(6) << octet(0x53) * octet(0x75) * octet(0x6E)) > pivot @can_complain def _to_date(complain, d, m, y): try: return date(y, m, d) except ValueError: complain(1222, date=u'%d-%02d-%02d' % (y, m, d)) return Unavailable
# -*- coding: utf-8; -*- from httpolice.citation import RFC from httpolice.parse import (auto, can_complain, fill_names, literal, maybe, pivot, skip, string, string1, subst) from httpolice.structure import ContentRange, RangeSpecifier, RangeUnit from httpolice.syntax.common import CHAR, DIGIT, SP, VCHAR from httpolice.syntax.rfc7230 import comma_list1, token__excluding from httpolice.syntax.rfc7231 import HTTP_date from httpolice.syntax.rfc7232 import entity_tag bytes_unit = RangeUnit << literal('bytes') > auto other_range_unit = RangeUnit << token__excluding(['bytes']) > auto range_unit = bytes_unit | other_range_unit > pivot acceptable_ranges = (subst([]) << literal('none') | comma_list1(range_unit)) > pivot Accept_Ranges = acceptable_ranges > pivot @can_complain def _well_formed1(complain, first, last): if (last is not None) and (first > last): complain(1133) return (first, last) first_byte_pos = int << string1(DIGIT) > auto last_byte_pos = int << string1(DIGIT) > auto byte_range_spec = _well_formed1 << (first_byte_pos * skip('-') * maybe(last_byte_pos)) > pivot
def comma_list1(element): return _collect_elements << (many(subst(None) << ',' * OWS) + ( (lambda x: [x]) << group(element)) + many( skip(OWS * ',') * maybe(skip(OWS) * element))) > named( u'1#rule', RFC(7230, section=u'7'))
# As updated by RFC 6874 ZoneID = string1(unreserved | pct_encoded) > pivot IPv6addrz = IPv6address + '%25' + ZoneID > pivot IP_literal = '[' + (IPv6address | IPv6addrz | IPvFuture) + ']' > pivot reg_name = string(unreserved | sub_delims | pct_encoded) > pivot host = IP_literal | IPv4address | reg_name > pivot port = string(DIGIT) > pivot authority = maybe_str(userinfo + '@') + host + maybe_str(':' + port) > pivot path_abempty = string('/' + segment) > auto path_absolute = '/' + maybe_str(segment_nz + string('/' + segment)) > auto path_noscheme = segment_nz_nc + string('/' + segment) > auto path_rootless = segment_nz + string('/' + segment) > auto path_empty = subst(u'') << empty > auto hier_part = ('//' + authority + path_abempty | path_absolute | path_rootless | path_empty) > pivot query = string(pchar | '/' | '?') > pivot fragment = string(pchar | '/' | '?') > pivot absolute_URI = scheme + ':' + hier_part + maybe_str('?' + query) > pivot relative_part = ('//' + authority + path_abempty | path_absolute | path_noscheme | path_empty) > pivot URI = (scheme + ':' + hier_part + maybe_str('?' + query) + maybe_str('#' + fragment)) > pivot
from httpolice.citation import RFC from httpolice.parse import (auto, can_complain, fill_names, literal, maybe, pivot, skip, string, string1, subst) from httpolice.structure import ContentRange, RangeSpecifier, RangeUnit from httpolice.syntax.common import CHAR, DIGIT, SP, VCHAR from httpolice.syntax.rfc7230 import comma_list1, token__excluding from httpolice.syntax.rfc7231 import HTTP_date from httpolice.syntax.rfc7232 import entity_tag bytes_unit = RangeUnit << literal('bytes') > auto other_range_unit = RangeUnit << token__excluding(['bytes']) > auto range_unit = bytes_unit | other_range_unit > pivot acceptable_ranges = ( subst([]) << literal('none') | comma_list1(range_unit)) > pivot Accept_Ranges = acceptable_ranges > pivot @can_complain def _well_formed1(complain, first, last): if (last is not None) and (first > last): complain(1133) return (first, last) first_byte_pos = int << string1(DIGIT) > auto last_byte_pos = int << string1(DIGIT) > auto byte_range_spec = _well_formed1 << (first_byte_pos * skip('-') * maybe(last_byte_pos)) > pivot suffix_length = int << string1(DIGIT) > auto
def comma_list(element): return _collect_elements << maybe( (subst([None, None]) << literal(',') | (lambda x: [x]) << group(element)) + many(skip(OWS * ',') * maybe(skip(OWS) * element))) > named( u'#rule', RFC(7230, section=(7, )))
# -*- coding: utf-8; -*- from httpolice.parse import ( fill_names, literal, maybe, pivot, skip, string1, subst, ) from httpolice.syntax.common import DIGIT from httpolice.syntax.rfc7230 import RWS, comma_list notice_id = int << string1(DIGIT) > pivot resp = subst(True) << literal('resp') > pivot HTTPolice_Silence = comma_list(notice_id * maybe(skip(RWS) * resp)) > pivot fill_names(globals(), citation=None)
def comma_list(element): return _collect_elements << maybe( (subst([None, None]) << literal(',') | (lambda x: [x]) << group(element)) + many(skip(OWS * ',') * maybe(skip(OWS) * element)) ) > named(u'#rule', RFC(7230, section=(7,)))
correct_encoded_id += pct_encode(c, safe='').upper() if encoded_id != correct_encoded_id: complain(1256, actual=encoded_id, correct=correct_encoded_id) return decoded_id protocol_id = _check_protocol_id << token > pivot @can_complain def _check_alt_authority(complain, value): return parse(value, maybe_str(uri_host) + ':' + port, complain, 1257, authority=value) alt_authority = _check_alt_authority << quoted_string > pivot alternative = protocol_id * skip('=') * alt_authority > pivot parameter = ((AltSvcParam << token) * skip('=') * (token | quoted_string)) > pivot alt_value = Parametrized << ( alternative * (MultiDict << many(skip(OWS * ';' * OWS) * parameter))) > pivot Alt_Svc = clear | comma_list1(alt_value) > pivot ma = delta_seconds > pivot persist = subst(True) << literal('1') > pivot Alt_Used = uri_host + maybe_str(':' + port) > pivot fill_names(globals(), RFC(7838))
def comma_list1(element): return _collect_elements << ( many(subst(None) << ',' * OWS) + ((lambda x: [x]) << group(element)) + many(skip(OWS * ',') * maybe(skip(OWS) * element)) ) > named(u'1#rule', RFC(7230, section=(7,)))
protocol_id = _check_protocol_id << token > pivot @can_complain def _check_alt_authority(complain, value): return parse(value, maybe_str(uri_host) + ':' + port, complain, 1257, authority=value) alt_authority = _check_alt_authority << quoted_string > pivot alternative = protocol_id * skip('=') * alt_authority > pivot parameter = ((AltSvcParam << token) * skip('=') * (token | quoted_string)) > pivot alt_value = Parametrized << ( alternative * (MultiDict << many(skip(OWS * ';' * OWS) * parameter))) > pivot Alt_Svc = clear | comma_list1(alt_value) > pivot ma = delta_seconds > pivot persist = subst(True) << literal('1') > pivot Alt_Used = uri_host + maybe_str(':' + port) > pivot fill_names(globals(), RFC(7838))
subtype = token > pivot media_type = Parametrized << ( (MediaType << type_ + '/' + subtype) * (MultiDict << many(skip(OWS * ';' * OWS) * parameter()))) > pivot content_coding = ContentCoding << token > pivot product_version = token > pivot product = Versioned << ((ProductName << token) * maybe(skip('/') * product_version)) > pivot User_Agent = product % many(skip(RWS) * (product | comment(include_parens=False))) > pivot Server = product % many(skip(RWS) * (product | comment(include_parens=False))) > pivot day_name = (subst(0) << octet(0x4D) * octet(0x6F) * octet(0x6E) | subst(1) << octet(0x54) * octet(0x75) * octet(0x65) | subst(2) << octet(0x57) * octet(0x65) * octet(0x64) | subst(3) << octet(0x54) * octet(0x68) * octet(0x75) | subst(4) << octet(0x46) * octet(0x72) * octet(0x69) | subst(5) << octet(0x53) * octet(0x61) * octet(0x74) | subst(6) << octet(0x53) * octet(0x75) * octet(0x6E)) > pivot @can_complain def _to_date(complain, d, m, y): try: return date(y, m, d) except ValueError: complain(1222, date=u'%d-%02d-%02d' % (y, m, d)) return Unavailable