Exemplo n.º 1
0
def comment(include_parens=False):
    inner = recursive() > named(u'comment', RFC(7230))
    inner.rec = '(' + string(ctext | quoted_pair(sensible_for=u'()\\')
                             | inner) + ')'
    if not include_parens:
        inner = (lambda s: s[1:-1]) << inner
    return inner > named(u'comment', RFC(7230))
Exemplo n.º 2
0
def preference_parameter(head=False):
    # The head (first) ``preference-parameter`` of a ``preference``
    # contains the actual preference name, which we want to annotate.
    name_cls = Preference if head else CaseInsensitive
    return (_normalize_empty_value <<
            (parameter(name_cls=name_cls) | name_cls << token)) > named(
                u'preference-parameter', RFC(7240, errata=4439), is_pivot=True)
Exemplo n.º 3
0
    def process(self, raw):
        processed = {}

        for (field, value) in raw.items():
            if not value:
                continue
            elif field == 'key':
                processed[field] = self.cls(value)
            elif value.isdigit():
                processed[field] = int(value)
            elif value.lower() in [u'true', u'false']:
                processed[field] = (value.lower() == u'true')
            elif isinstance(getattr(self, field, None), type):     # Enum
                processed[field] = getattr(self, field)[value]
            else:
                processed[field] = value

        if 'rfc' in processed:
            processed['citation'] = RFC(processed.pop('rfc'),
                                        processed.pop('rfc_section', None),
                                        processed.pop('rfc_appendix', None))
        if 'cite_url' in processed:
            processed['citation'] = Citation(processed.pop('cite_title', None),
                                             processed.pop('cite_url'))

        return processed
Exemplo n.º 4
0
def transfer_coding(no_trailers=False, no_q=False):
    exclude = _built_in_codings
    if no_trailers:
        exclude = exclude + ['trailers']
    r = transfer_extension(exclude, no_q)
    for name in _built_in_codings:
        r = r | _empty_params << (TransferCoding << literal(name))
    return r > named(u'transfer-coding', RFC(7230), is_pivot=True)
Exemplo n.º 5
0
 def extract_citations(self, record):
     for xref in record.findall('iana:xref', self.xmlns):
         if xref.get('type') == 'rfc':
             match = re.search(
                 r'RFC(\d+), (Section|Appendix) ([A-Z0-9]+(\.[0-9]+)*)',
                 xref.text or '')
             if match:
                 num = int(match.group(1))
                 kw = match.group(2).lower()
                 sect = RFC.parse_sect(match.group(3))
                 yield RFC(num, **{kw: sect})
             else:
                 num = int(xref.get('data')[3:])
                 yield RFC(num)
         elif xref.get('type') == 'uri':
             title = normalize_whitespace(xref.text) if xref.text else None
             url = xref.get('data')
             yield Citation(title, url)
Exemplo n.º 6
0
def quoted_pair(sensible_for):
    # In RFC 7230, ``<quoted-pair>`` is a single rule,
    # but we parametrize it to report no. 1017 depending on the context.
    @can_complain
    def check_sensible(complain, c):
        if c not in sensible_for:
            complain(1017, char=c)
        return c

    return (check_sensible << skip('\\') *
            (HTAB | SP | VCHAR | obs_text) > named(u'quoted-pair', RFC(7230)))
Exemplo n.º 7
0
 def extract_citations(self, record):
     for xref in record.findall('iana:xref', self.xmlns):
         if xref.get('type') == 'rfc':
             match = re.search(
                 r'RFC(\d+), (Section|Appendix) ([A-Z0-9]+(\.[0-9]+)*)',
                 xref.text or '')
             if match:
                 num = int(match.group(1))
                 kw = match.group(2).lower()
                 sect = RFC.parse_sect(match.group(3))
                 yield RFC(num, **{kw: sect})
             else:
                 num = int(xref.get('data')[3:])
                 yield RFC(num)
         elif xref.get('type') == 'uri':
             yield Citation(xref.text, xref.get('data'))
Exemplo n.º 8
0
def link_extension(exclude_builtin):
    if exclude_builtin:
        exclude1 = [name for name in _builtin_params if not name.endswith('*')]
        exclude2 = [name.rstrip('*')
                    for name in _builtin_params if name.endswith('*')]
    else:       # pragma: no cover
        exclude1 = exclude2 = None
    return (
        (
            (CaseInsensitive << parmname__excluding(exclude1)) *
            maybe(skip(OWS * '=' * OWS) * (ptoken | quoted_string))
        ) |
        (
            (CaseInsensitive << ext_name_star__excluding(exclude2)) *
            skip(OWS * '=' * OWS) * ext_value
        )
    ) > named(u'link-extension', RFC(5988), is_pivot=True)
Exemplo n.º 9
0
def media_range(no_q=False):
    return Parametrized << (
        (literal('*/*') | type_ + '/' + '*' | _check_media_type <<
         (MediaType << type_ + '/' + subtype)) * (MultiDict << many(
             skip(OWS * ';' * OWS) * parameter(exclude=['q'] if no_q else [])))
    ) > named(u'media-range', RFC(7231), is_pivot=True)
Exemplo n.º 10
0
from httpolice.syntax.rfc2616 import value
from httpolice.syntax.rfc5987 import ext_value
from httpolice.syntax.rfc7230 import OWS, token, token__excluding

# This has been slightly adapted to the rules of RFC 7230.
# The ``OWS`` are derived from the "implied ``*LWS``" requirement.

# We have no need to special-case "inline" and "attachment", simplify.
disposition_type = CaseInsensitive << token > pivot

filename_parm = (
    (CaseInsensitive << literal('filename')) * skip(OWS * '=' * OWS) * value |
    (CaseInsensitive << literal('filename*')) * skip(OWS * '=' * OWS) *
    ext_value) > pivot

# ``token`` is a superset of ``ext-token``,
# and special-casing ``ext-token`` requires
# something more complex than our `string_excluding`.
# Until then, we can simplify a bit.
disp_ext_parm = (
    (CaseInsensitive << token__excluding(['filename', 'filename*'])) *
    skip(OWS * '=' * OWS) * value) > pivot

disposition_parm = filename_parm | disp_ext_parm > auto

content_disposition = Parametrized << (
    disposition_type *
    (MultiDict << many(skip(OWS * ';' * OWS) * disposition_parm))) > pivot

fill_names(globals(), RFC(6266))
Exemplo n.º 11
0
# -*- coding: utf-8; -*-

from httpolice.citation import RFC
from httpolice.parse import auto, fill_names, many, maybe, pivot, skip, string1
from httpolice.structure import HSTSDirective, Parametrized
from httpolice.syntax.common import DIGIT
from httpolice.syntax.rfc7230 import OWS, quoted_string, token


# This has been slightly adapted to the rules of RFC 7230.
# The ``OWS`` are derived from the "implied ``*LWS``" requirement.

directive_name = HSTSDirective << token                                 > auto
directive_value = token | quoted_string                                 > auto
directive = Parametrized << (
    directive_name * maybe(skip(OWS * '=' * OWS) * directive_value))    > pivot

def _collect_elements(xs):
    return [elem for elem in xs if elem is not None]

Strict_Transport_Security = _collect_elements << (
    maybe(directive) % many(skip(OWS * ';' * OWS) * maybe(directive)))  > pivot

max_age_value = int << string1(DIGIT)                                   > pivot

fill_names(globals(), RFC(6797))
Exemplo n.º 12
0
from httpolice.known.base import KnownDict
from httpolice.structure import Preference
from httpolice.syntax import rfc7240


NO = 0
OPTIONAL = 1
REQUIRED = 2


def argument_required(name):
    return known.get_info(name).get('argument') == REQUIRED

def no_argument(name):
    return known.get_info(name).get('argument') == NO

def parser_for(name):
    return known.get_info(name).get('parser')


known = KnownDict(Preference, [
 {'_': Preference(u'handling'), '_citations': [RFC(7240, section=(4, 4))],
  'argument': REQUIRED, 'parser': rfc7240.handling},
 {'_': Preference(u'respond-async'), '_citations': [RFC(7240, section=(4, 1))],
  'argument': NO},
 {'_': Preference(u'return'), '_citations': [RFC(7240, section=(4, 2))],
  'argument': REQUIRED, 'parser': rfc7240.return_},
 {'_': Preference(u'wait'), '_citations': [RFC(7240, section=(4, 3))],
  'argument': REQUIRED, 'parser': rfc7240.wait}
], extra_info=['argument', 'parser'])
Exemplo n.º 13
0
# -*- coding: utf-8; -*-

from httpolice.citation import RFC, Citation
from httpolice.known.base import KnownDict
from httpolice.structure import ContentCoding


known = KnownDict(ContentCoding, [
 {'_': ContentCoding(u'br'), '_citations': [RFC(7932)]},
 {'_': ContentCoding(u'compress'),
  '_citations': [RFC(7230, section=(4, 2, 1))]},
 {'_': ContentCoding(u'deflate'),
  '_citations': [RFC(7230, section=(4, 2, 2))]},
 {'_': ContentCoding(u'exi'),
  '_citations': [Citation(u'W3C Recommendation: '
                          u'Efficient XML Interchange (EXI) Format',
                          u'http://www.w3.org/TR/exi/')]},
 {'_': ContentCoding(u'gzip'),
  '_citations': [RFC(7230, section=(4, 2, 3))]},
 {'_': ContentCoding(u'identity'),
  '_citations': [RFC(7231, section=(5, 3, 4))]},
 {'_': ContentCoding(u'pack200-gzip'),
  '_citations': [Citation(u'JSR 200: Network Transfer Format for Java',
                          u'http://www.jcp.org/en/jsr/detail?id=200')]},
 {'_': ContentCoding(u'x-compress'),
  '_citations': [RFC(7230, section=(4, 2, 1))]},
 {'_': ContentCoding(u'x-gzip'), '_citations': [RFC(7230, section=(4, 2, 3))]}
])
Exemplo n.º 14
0
#
#   ``_title``
#     The default reason phrase, usually filled by ``tools/iana.py``.
#
#   ``cacheable``
#     If the status code is defined as cacheable by default,
#     set this to ``BY_DEFAULT``.
#     If it is defined as never cacheable, set to ``NOT_AT_ALL``.
#     Otherwise, set to ``NOT_BY_DEFAULT``.

known = KnownDict(
    StatusCode,
    [
        {
            '_': StatusCode(100),
            '_citations': [RFC(7231, section=(6, 2, 1))],
            '_title': u'Continue',
            'cacheable': NOT_BY_DEFAULT
        },
        {
            '_': StatusCode(101),
            '_citations': [RFC(7231, section=(6, 2, 2))],
            '_title': u'Switching Protocols',
            'cacheable': NOT_BY_DEFAULT
        },
        {
            '_': StatusCode(102),
            '_citations': [RFC(2518)],
            '_title': u'Processing'
        },
        {
Exemplo n.º 15
0
from httpolice.parse import (auto, can_complain, fill_names, maybe, octet,
                             octet_range, pivot, string, subst)
from httpolice.structure import EntityTag
from httpolice.syntax.common import DQUOTE
from httpolice.syntax.rfc7230 import comma_list1, obs_text
from httpolice.syntax.rfc7231 import HTTP_date

weak = subst(True) << octet(0x57) * octet(0x2F) > auto
etagc = octet(0x21) | octet_range(0x23, 0x7E) | obs_text > auto


@can_complain
def _no_backslashes(complain, s):
    if u'\\' in s:
        complain(1119)
    return s


opaque_tag = _no_backslashes << DQUOTE + string(etagc) + DQUOTE > auto
entity_tag = EntityTag << maybe(weak, False) * opaque_tag > pivot

ETag = entity_tag > pivot
Last_Modified = HTTP_date > pivot

If_Match = '*' | comma_list1(entity_tag) > pivot
If_None_Match = '*' | comma_list1(entity_tag) > pivot
If_Modified_Since = HTTP_date > pivot
If_Unmodified_Since = HTTP_date > pivot

fill_names(globals(), RFC(7232))
Exemplo n.º 16
0
def transfer_extension(exclude=None, no_q=False):
    return Parametrized << (
        (TransferCoding << token__excluding(exclude or [])) *
        (MultiDict << many(skip(OWS * ';' * OWS) * transfer_parameter(no_q)))
    ) > named(u'transfer-extension', RFC(7230), is_pivot=True)
Exemplo n.º 17
0
def transfer_parameter(no_q=False):
    return ((token__excluding(['q']) if no_q else token) *
            skip(BWS * '=' * BWS) * (token | quoted_string)) > named(
                u'transfer-parameter', RFC(7230), is_pivot=True)
Exemplo n.º 18
0
OPTIONAL = 1
REQUIRED = 2


def argument_required(name):
    return known.get_info(name).get('argument') == REQUIRED


def no_argument(name):
    return known.get_info(name).get('argument') == NO


def parser_for(name):
    return known.get_info(name).get('parser')


known = KnownDict(HSTSDirective, [
    {
        '_': HSTSDirective(u'includeSubDomains'),
        '_citations': [RFC(6797, section=(6, 1, 2))],
        'argument': NO
    },
    {
        '_': HSTSDirective(u'max-age'),
        '_citations': [RFC(6797, section=(6, 1, 1))],
        'argument': REQUIRED,
        'parser': rfc6797.max_age_value
    },
],
                  extra_info=['argument', 'parser'])
Exemplo n.º 19
0
from httpolice.citation import RFC
from httpolice.parse import fill_names, pivot
from httpolice.syntax.rfc7235 import token68


HTTP2_Settings = token68                                                > pivot


fill_names(globals(), RFC(7540))
Exemplo n.º 20
0
# -*- coding: utf-8; -*-

from httpolice.citation import RFC
from httpolice.known.base import KnownDict
from httpolice.structure import AltSvcParam
from httpolice.syntax import rfc7838


def parser_for(name):
    return known.get_info(name).get('parser')


known = KnownDict(AltSvcParam, [{
    '_': AltSvcParam(u'ma'),
    '_citations': [RFC(7838, section=(3, 1))],
    'parser': rfc7838.ma
}, {
    '_': AltSvcParam(u'persist'),
    '_citations': [RFC(7838, section=(3, 1))],
    'parser': rfc7838.persist
}],
                  extra_info=['parser'])
Exemplo n.º 21
0
def parameter(exclude=None):
    return ((CaseInsensitive << token__excluding(exclude or [])) * skip('=') *
            (token | quoted_string)) > named(
                u'parameter', RFC(7231), is_pivot=True)
Exemplo n.º 22
0
parmname = string(attr_char) > pivot

# We don't need to special-case "UTF-8", simplify.
mime_charsetc = (ALPHA | DIGIT | '!' | '#' | '$' | '%' | '&' | '+' | '-' | '^'
                 | '_' | '`' | '{' | '}' | '~') > auto
mime_charset = string1(mime_charsetc) > auto
charset = CaseInsensitive << mime_charset > pivot

pct_encoded = '%' + HEXDIG + HEXDIG > auto
value_chars = pct_decode << (
    force_bytes << string(pct_encoded | attr_char)) > auto


@can_complain
def _check_ext_value(complain, val):
    if val.charset == u'UTF-8':
        try:
            val.value_bytes.decode(val.charset)
        except UnicodeError as e:
            complain(1254, charset=val.charset, error=e)
    else:
        complain(1253, charset=val.charset)
    return val


ext_value = _check_ext_value << (
    ExtValue <<
    (charset * skip("'") * maybe(language) * skip("'") * value_chars)) > pivot

fill_names(globals(), RFC(8187))
Exemplo n.º 23
0
#     should trigger notice 1034.
#
#   ``bad_for_trailer``
#     You can set this to ``True`` if
#     the presence of this header in a trailer
#     should trigger notice 1026.
#
#   ``iana_status``
#     Filled by ``tools/iana.py``. You should not need to change it.

known = KnownDict(
    FieldName,
    [
        {
            '_': FieldName(u'A-IM'),
            '_citations': [RFC(4229)]
        },
        {
            '_': FieldName(u'Accept'),
            '_citations': [RFC(7231, section=(5, 3, 2))],
            'for_request': True,
            'for_response': False,
            'iana_status': u'standard',
            'parser': rfc7231.Accept,
            'precondition': False,
            'proactive_conneg': True,
            'rule': MULTI
        },
        {
            '_': FieldName(u'Accept-Additions'),
            '_citations': [RFC(4229)]
Exemplo n.º 24
0
from httpolice.citation import RFC
from httpolice.parse import fill_names, pivot
from httpolice.syntax.rfc7230 import comma_list1
from httpolice.syntax.rfc7231 import media_type

Accept_Patch = comma_list1(media_type) > pivot

fill_names(globals(), RFC(5789))
Exemplo n.º 25
0
def comma_list1(element):
    return _collect_elements << (many(subst(None) << ',' * OWS) + (
        (lambda x: [x]) << group(element)) + many(
            skip(OWS * ',') * maybe(skip(OWS) * element))) > named(
                u'1#rule', RFC(7230, section=u'7'))
Exemplo n.º 26
0
# -*- coding: utf-8; -*-

from httpolice.citation import RFC
from httpolice.known.base import KnownDict
from httpolice.structure import UpgradeToken

known = KnownDict(
    UpgradeToken, [{
        '_': UpgradeToken(u'HTTP'),
        '_citations': [RFC(7230, section=(2, 6))],
        '_title': u'Hypertext Transfer Protocol'
    }, {
        '_': UpgradeToken(u'TLS'),
        '_citations': [RFC(2817)],
        '_title': u'Transport Layer Security'
    }, {
        '_': UpgradeToken(u'WebSocket'),
        '_citations': [RFC(6455)],
        '_title': u'The Web Socket Protocol'
    }, {
        '_': UpgradeToken(u'h2c'),
        '_citations': [RFC(7540, section=(3, 2))],
        '_title': u'Hypertext Transfer Protocol version 2 (HTTP/2)'
    }])
Exemplo n.º 27
0
protocol_id = _check_protocol_id << token > pivot


@can_complain
def _check_alt_authority(complain, value):
    return parse(value,
                 maybe_str(uri_host) + ':' + port,
                 complain,
                 1257,
                 authority=value)


alt_authority = _check_alt_authority << quoted_string > pivot

alternative = protocol_id * skip('=') * alt_authority > pivot
parameter = ((AltSvcParam << token) * skip('=') *
             (token | quoted_string)) > pivot
alt_value = Parametrized << (
    alternative *
    (MultiDict << many(skip(OWS * ';' * OWS) * parameter))) > pivot

Alt_Svc = clear | comma_list1(alt_value) > pivot

ma = delta_seconds > pivot
persist = subst(True) << literal('1') > pivot

Alt_Used = uri_host + maybe_str(':' + port) > pivot

fill_names(globals(), RFC(7838))
Exemplo n.º 28
0

def parser_for(name):
    return known.get_info(name).get('parser')


# A few of these directives (such as ``no-cache``) actually have
# different citations for requests and for responses;
# but it's hard for us to know whether a given instance of ``CacheDirective``
# refers to a request directive or a response directive,
# so we have to use one, more general citation.

known = KnownDict(CacheDirective, [
    {
        '_': CacheDirective(u'max-age'),
        '_citations': [RFC(7234, section=(5, 2))],
        '_no_sync': ['_citations'],
        'argument': REQUIRED,
        'argument_form': TOKEN_PREFERRED,
        'for_request': True,
        'for_response': True,
        'parser': rfc7234.delta_seconds
    },
    {
        '_': CacheDirective(u'max-stale'),
        '_citations': [RFC(7234, section=(5, 2, 1, 2))],
        'argument': OPTIONAL,
        'argument_form': TOKEN_PREFERRED,
        'for_request': True,
        'for_response': False,
        'parser': rfc7234.delta_seconds
Exemplo n.º 29
0
import re

from httpolice.citation import RFC
from httpolice.codings import decode_deflate, decode_gzip
from httpolice.exchange import Exchange, complaint_box
from httpolice.known import m, st, tc
from httpolice.parse import ParseError, Symbol
from httpolice.request import Request
from httpolice.response import Response
from httpolice.structure import (FieldName, HeaderEntry, HTTPVersion, Method,
                                 StatusCode, Unavailable, okay)

# Create empty symbols just for referring to them in parse errors.

HTTP_message = Symbol(u'HTTP-message', RFC(7230, section=u'3'))
request_line = Symbol(u'request-line', RFC(7230, section=u'3.1.1'))
status_line = Symbol(u'status-line', RFC(7230, section=u'3.1.2'))
header_field = Symbol(u'header-field', RFC(7230, section=u'3.2'))
chunked_body = Symbol(u'chunked-body', RFC(7230, section=u'4.1'))
chunk = Symbol(u'chunk', RFC(7230, section=u'4.1'))
chunk_size = Symbol(u'chunk-size', RFC(7230, section=u'4.1'))

HTTP_VERSION = re.compile(u'^HTTP/[0-9]\\.[0-9]$')
STATUS_CODE = re.compile(u'^[0-9]{3}$')

MAX_BODY_SIZE = 1024 * 1024 * 1024


def parse_streams(inbound, outbound, scheme=None):
    """Parse one or two HTTP/1.x streams.
Exemplo n.º 30
0
#     (see RFC 5789 errata).
#
#   ``is_json``
#     Set this to ``True`` if the media type uses JSON syntax
#     but **does not end** with ``+json``.
#
#   ``is_xml``
#     Set this to ``True`` if the media type uses XML syntax
#     but **does not end** with ``+xml``.
#
#   ``deprecated``
#     Filled by ``tools/iana.py``. You should not need to change it.

known = KnownDict(MediaType, [
 {'_': MediaType(u'application/1d-interleaved-parityfec'),
  '_citations': [RFC(6015)]},
 {'_': MediaType(u'application/alto-costmap+json'), '_citations': [RFC(7285)]},
 {'_': MediaType(u'application/alto-costmapfilter+json'),
  '_citations': [RFC(7285)]},
 {'_': MediaType(u'application/alto-directory+json'),
  '_citations': [RFC(7285)]},
 {'_': MediaType(u'application/alto-endpointprop+json'),
  '_citations': [RFC(7285)]},
 {'_': MediaType(u'application/alto-endpointpropparams+json'),
  '_citations': [RFC(7285)]},
 {'_': MediaType(u'application/alto-endpointcost+json'),
  '_citations': [RFC(7285)]},
 {'_': MediaType(u'application/alto-endpointcostparams+json'),
  '_citations': [RFC(7285)]},
 {'_': MediaType(u'application/alto-error+json'), '_citations': [RFC(7285)]},
 {'_': MediaType(u'application/alto-networkmapfilter+json'),
Exemplo n.º 31
0
def comma_list(element):
    # RFC Errata ID: 5257
    return _collect_elements << (maybe(group(element) * skip(OWS)) % many(
        skip(literal(',') * OWS) * maybe(group(element) * skip(OWS)))) > named(
            u'#rule', RFC(7230, section=u'7'))