示例#1
0
文件: base.py 项目: CERT-Polska/n6
    def get_output_message_id(self, source, created_timestamp,
                              output_data_body, **processed_data):
        """
        Get the output message id (aka `rid`).

        Kwargs:
            `source`:
                The source specification string (based on pattern:
                '<source label>.<source channel>').
            `output_data_body`:
                The output AMQP message body (bytes) as returned by the
                get_output_data_body() method.
            `created_timestamp`:
                Message creation timestamp as an int number.
            <some keyword arguments>:
                Processed data (as returned by the process_input_data()
                method) passed as keyword arguments (the default
                implementation ignores them).

        Returns:
            The output message id (a string).

        Typically, this method is used indirectly -- being called in
        get_output_prop_kwargs() (which is called in get_output_components()).

        The default implementation of this method should be sufficient in
        most cases.
        """
        components = (
            as_bytes(source),
            as_bytes('{0:d}'.format(created_timestamp)),
            output_data_body,
        )
        hashed_bytes = b'\0'.join(components)
        return hashlib.md5(hashed_bytes, usedforsecurity=False).hexdigest()
示例#2
0
def generate_secret_key_qr_code_url(secret_key, login, issuer_name):
    # type: (String, String, String) -> String
    login = as_unicode(login)
    issuer_name = as_unicode(issuer_name)
    if sys.version_info[0] < 3:  #3--
        login = as_bytes(login)  #3--
        issuer_name = as_bytes(issuer_name)  #3--
    return make_totp_handler(secret_key).provisioning_uri(
        name=login, issuer_name=issuer_name)
示例#3
0
文件: base.py 项目: CERT-Polska/n6
 def _deterministic_conv_to_bytes(self, value):
     CONVERTIBLE_TO_BYTES_TYPES = str, bytes, bytearray, memoryview, SupportsBytes
     if isinstance(value, dict):
         converted_key_to_val = {}
         for k, v in value.items():
             if not isinstance(k, (str, bytes)):
                 raise TypeError(
                     'dict {!a} contains a non-str-or-bytes key ({!a})'.
                     format(value, k))
             k = self._py2_bytestring_like_repr(k).encode('ascii')
             if isinstance(v, int):
                 v = b'%d' % v
             else:
                 if not isinstance(v, CONVERTIBLE_TO_BYTES_TYPES):
                     raise TypeError('dict {!a} contains a value ({!a}) '
                                     'whose type ({!a}) is illegal'.format(
                                         value, v, type(v)))
                 v = self._py2_bytestring_like_repr(v).encode('ascii')
             assert isinstance(k, bytes)
             assert isinstance(v, bytes)
             converted_key_to_val[k] = v
         value = b'{%b}' % b', '.join(
             b'%b: %b' % (k, v)
             for k, v in sorted(converted_key_to_val.items()))
     elif isinstance(value, int):
         value = b'%d' % value
     else:
         if not isinstance(value, CONVERTIBLE_TO_BYTES_TYPES):
             raise TypeError('encountered a value ({!a}) '
                             'whose type ({!a}) is illegal)'.format(
                                 value, type(value)))
         value = as_bytes(value)
     assert isinstance(value, bytes)
     return value
示例#4
0
文件: base.py 项目: CERT-Polska/n6
    def get_output_message_id(self, parsed):
        """
        Make the id of the output message (aka `id`).

        Args:
            `parsed` (dict):
                As yielded by parse().

        Returns:
            A string being the output message id.

        Typically, this method is used indirectly -- being called in
        get_output_bodies().
        """
        # Be careful when modifying this method or any method that this
        # method does call: after any code changes it should generate
        # the same ids for already stored data!  (That's why this code
        # may already seem weird a bit...)
        assert isinstance(parsed, RecordDict)
        components = []
        for k, v in sorted(self.iter_output_id_base_items(parsed)):
            if not isinstance(k, str):
                raise TypeError('encountered a non-str key ({!a})'.format(k))
            k = as_bytes(k)
            if isinstance(v, (list, tuple)):
                v = b','.join(sorted(map(self._deterministic_conv_to_bytes,
                                         v)))
            else:
                v = self._deterministic_conv_to_bytes(v)
            assert isinstance(k, bytes)
            assert isinstance(v, bytes)
            components.append(b'%b,%b' % (k, v))
        hashed_bytes = b'\n'.join(components)
        return hashlib.md5(hashed_bytes, usedforsecurity=False).hexdigest()
示例#5
0
 def render_content(self, data, **kwargs):
     if self.RULE_TEMPLATE is None:
         raise NotImplementedError
     if self.filter_renderer_specific(data) or self.filter_common(data):
         return b''
     parsed_content = self.parse_data(data, **kwargs)
     return as_bytes(self.RULE_TEMPLATE.format(**parsed_content))
示例#6
0
def _get_path(match, scheme, epslash):
    conv = _proper_conv(match)
    path = match.group('path') or conv('')
    if (epslash and as_bytes(scheme) in (b'http', b'https', b'ftp')
            and not path):
        path = conv('/')
    return path
示例#7
0
 def before_content(self, **kwargs):
     output = StringIO(newline='')
     writer = csv.DictWriter(output, fieldnames=self.EVENT_FIELDS,
                             extrasaction='ignore', delimiter=',',
                             quotechar='"', quoting=csv.QUOTE_ALL)
     writer.writeheader()
     content = output.getvalue()
     output.close()
     return as_bytes(content)
示例#8
0
def iter_unzip_from_bytes(
        zipped,
        #*,                                                    #3: uncomment this line
        password=None,
        filenames=None,
        yielding_with_dirs=False):
    """
    Extract files from a ZIP archive.

    Args:
        `zipped` (typically a `bytes`/`bytearray`; *cannot* be a `str`):
            The ZIP archive as a *bytes-like* object.

    Kwargs:
        `password` (optional; if given, typically a `str`/`bytes`):
            The password to extract encrypted files. If given (and not
            `None`), it will be, firstly, coerced to `bytes` using the
            `as_bytes()` helper from `n6lib.common_helpers` (by
            performing an `as_bytes(password, 'strict')` call).
        `filenames` (optional; if given, typically a list of `str`/`bytes`):
            A container (e.g., a sequence or a set) of the filenames
            (without dir parts) we are interested in. If given (and
            not `None`) then only the specified files will be extracted,
            ignoring non-existent ones. Each filename will be, firstly,
            coerced to `str` using the `as_unicode()` helper from
            `n6lib.common_helpers`.                                     # maybe TODO: add support for Py3's *path*/*path-like* objects...
        `yielding_with_dirs` (default: False):
            If False -- dir names will be stripped off from yielded file names.
            If True -- file names will be yielded as found in the archive
            (including dir parts).

    Yields:
        Pairs: `(<file name (a str obj)>, <file content (a bytes obj)>).`

    Raises:
        zipfile.BadZipfile, EOFError:
            as zipfile.ZipFile can raise it for invalid input.
        RuntimeError (or subclasses, in particular NotImplementedError):
            as zipfile.ZipFile can raise it for unsupported input
            features, as well as for unspecified or incorrect password.
    """
    if password is not None:
        password = as_bytes(password, 'strict')
    if filenames is not None:
        if sys.version_info[0] < 3:  #3--
            filenames = frozenset(filenames)  #3--
        else:  #3--
            filenames = frozenset(map(as_unicode, filenames))
    zfile = zipfile.ZipFile(io.BytesIO(zipped))
    for fullname in zfile.namelist():
        #assert isinstance(fullname, str)                                        #3: uncomment this line
        basename = (os.path.basename(fullname) if fullname else fullname)
        #assert isinstance(basename, str)                                        #3: uncomment this line
        if filenames is None or basename in filenames:
            content = zfile.read(fullname, pwd=password)
            yield (fullname if yielding_with_dirs else basename), content
示例#9
0
 def _get_sha256(self):
     attr_name = 'sha256'
     if self._attr_in_params(attr_name):
         return random.choice(self._params[attr_name])
     if self._include_in_event(attr_name):
         random_str = ''.join(
             random.choice(string.ascii_letters + string.digits)
             for _ in range(64))
         return hashlib.sha256(as_bytes(random_str)).hexdigest()
     return None
示例#10
0
 def _prepare_url_data_items(self, item_prototype, custom_items):
     url_data = self.get('_url_data_ready')
     if url_data is not None:
         assert 'url_data' not in custom_items
         str = basestring  #3--
         assert isinstance(url_data.get('url_orig'), str)
         url_orig = base64.urlsafe_b64decode(as_bytes(
             url_data['url_orig']))  #3: `as_bytes(`-- `)--
         item_prototype['url'] = make_provisional_url_search_key(
             url_orig)  # [sic]
         custom_items['url_data'] = url_data
示例#11
0
 def render_content(self, data, **kwargs):
     data = self._dict_to_csv_ready(data)
     # fields = sorted(data[0].keys())
     output = StringIO(newline='')
     writer = csv.DictWriter(output, fieldnames=self.EVENT_FIELDS,
                             extrasaction='ignore', delimiter=',',
                             quotechar='"', quoting=csv.QUOTE_ALL)
     writer.writerow(data)
     content = output.getvalue()
     output.close()
     return as_bytes(content)
示例#12
0
 def _urlsafe_b64decode(self, value):
     value = value.rstrip(
         '\r\n')  # some encoders like to append a newline...
     try:
         # `base64.urlsafe_b64decode()` just ignores illegal
         # characters *but* we want to be *more strict*
         if not self._URLSAFE_B64_VALID_CHARACTERS.issuperset(value):
             raise ValueError
         # `base64.urlsafe_b64decode()` (contrary to `base64.standard_b64decode()`)    #3--
         # does *not* accept unicode strings (even not pure-ASCII ones) :-/            #3--
         value = as_bytes(value)  #3--
         value = base64.urlsafe_b64decode(value)
     except (
             ValueError, TypeError
     ):  # (TypeError is raised on incorrect Base64 padding)  # <- TODO: check if it is still true in Py3
         raise FieldValueError(public_message=(
             '"{}" is not a valid URL-safe-Base64-encoded string '
             '[see: RFC 4648, section 5]'.format(ascii_str(value))))
     return value
示例#13
0
def normalize_url(url, transcode1st=False, epslash=False, rmzone=False):
    r"""
    Apply to the given string (or binary data blob) as much of the basic
    URL/IRI normalization as possible, provided that no semantic changes
    are made (i.e., the intent is that the resultant URL/IRI is
    semantically equivalent to the given one).

    Args (required):
        `url` (str or bytes/bytearray):
            The URL (or URI, or IRI) to be normalized.

    Kwargs (optional):
        `transcode1st` (bool; default: False):
            Whether, before the actual URL normalization (see the
            description in the steps 1-18 below...), the given `url`
            should be:
            * if given as a bytes/bytearray instance: decoded using
              the 'utf-8' codec with our custom error handler:
              'utf8_surrogatepass_and_surrogateescape';
            * otherwise (assuming a str instance): "transcoded" using
              `try_to_normalize_surrogate_pairs_to_proper_codepoints()`
              (to ensure that representation of non-BMP characters is
              consistent...).
        `epslash` (bool; default: False):
            Whether the *path* component of the given URL should be
            replaced with `/` if the `url`'s *scheme* is `http`, `https`
            or `ftp` *and* the *path* is empty (note that, generally,
            this normalization step does not change the URL semantics,
            with the exception of an URL being the request target of an
            `OPTIONS` HTTP request; see RFC 7230, section 2.7.3).
        `rmzone` (bool; default: False):
            Whether the IPv6 zone identifier being a part of an IPv6
            address in the `url`'s *host* component should be removed
            (note that, generally, IPv6 zone identifier has no meaning
            outside the local system it is related to; see RFC 6874,
            section 1).

    Returns:
        A `str` object (`if a `str` was given) or a `bytes` object (if a
        `bytes` or `bytearray` object was given *and* `transcode1st` was
        false) representing the URL after a *best effort* but *keeping
        semantic equivalence* normalization (see below: the description
        of the algorithm).

    Raises:
        `TypeError` if `url` is not a str or bytes/bytearray instance.

    The algorithm of normalization consists of the following steps [the
    `+` operator in this description means *string concatenation*]:

    0. Optional `url` transcoding (see the above description of the
       `transcode1st` argument).

    1. Try to split the `url` into two parts: the `scheme` component
       (matching the `scheme` group of the regular expression
       `URL_SCHEME_AND_REST_REGEX`) and `rest` (the rest of the URL).

    2. If no `scheme` could be singled out in step 1 then stop here --
       returning the whole `url`; otherwise proceed to step 3.

    3. Convert `scheme` to *lowercase*.

    4. Try to split `rest` into the following parts:

       * `before host` (i.e., the "://" separator, optionally followed
         by any number of non-"/?#@" characters which, if present, are
         obligatorily followed by exactly one "@"),
       * `host` (see below: steps 6 to 13...),
       * optional `port` (i.e., ":<decimal number>" or just ":"),
       * optional `path` (i.e., "/" + optionally any number of
         non-"?#" characters),
       * optional `after path` (that is: "?" or "#", optionally
         followed by any number of any characters).

    5. If `rest` could not be split in step 4 then stop here --
       returning `scheme` + `rest`; otherwise proceed to step 6.

    6. If `host` consists of "[" + `ipv6` + optional `ipv6 zone` + "]"
       -- where `ipv6` (consisting of hexadecimal digits and ":"
       characters, with optional suffix in the IPv4 four-octets format)
       is a supposed IPv6 address (see RFC 3986) and `ipv6 zone`
       (consisting, if present, of one "%" character followed by some
       non-"/?#[]" characters) is a supposed IPv6 zone identifier (see
       RFC 6874) -- then proceed to step 7, otherwise skip to step 12.

    7. Convert `ipv6` to the normalized IPv6 format which:

       * uses only *lowercase* hexadecimal digits, and `:` characters
         as separators (in particular, the last 32 bits of the address
         are *not* represented using the IPv4 four-octets format),

       * is *condensed*, i.e., non-zero hexadecimal segments are
         formatted without leading zeros, and the `::` marker (if
         applicable) is used to replace the leftmost of the longest
         sequences of '0' segments (see RFC 5952, Section 4.2).

    8. If normalization in step 7 was impossible because of syntactic
       incorrectness (i.e., `ipv6` could not be parsed as a valid IPv6
       address) then leave `ipv6` intact.

    9. If `ipv6 zone` is *not* present, or the `rmzone` argument is
       true, then set `ipv6 zone` to an empty string and skip to step
       11; otherwise proceed to step 10.

    10. If `ipv6 zone` consists only of ASCII characters then convert
        it to *lowercase*; otherwise leave it intact.

    11. Set `host` to "[" + `ipv6` + `ipv6 zone` + "]"; then skip to
        step 14.

    12. Split `host` (consisting of some non-":/?#" characters,
        presumably representing some hostname or IPv4/IPv[Future]
        address; see RFC 3986...) into *labels*, using dot characters
        defined by the `DOMAIN_LABEL_SEPARATOR_..._REGEX` constants
        as the delimiter (in such a way that *labels* do not include
        delimiter dots); for each such a `label` do the following:
        if `label` consists only of ASCII characters then convert
        it to *lowercase*, otherwise leave it intact.

    13. Set `host` to the result of concatenation of the *labels* from
        step 12 (each of them converted to *lowercase* if ASCII-only)
        interleaved with ".".

    14. If `port` is *not* present, or `port` is ":", or ":" followed
        by the known *default port number* for the particular `scheme`
        (according to the mapping `URL_SCHEME_TO_DEFAULT_PORT`; e.g.,
        80 for the "http" value of `scheme`), then set `port` to an
        empty string; otherwise leave `port` intact.

    15. If `path` is present then leave it intact and skip to step 17;
        otherwise proceed to step 16.

    16. If the `epslash` argument is true and `scheme` is one of:
        "http", "https", "ftp" -- then set `path` to "/"; otherwise
        set `path` to an empty string.

    17. If `after path` is *not* present then set it to an empty
        string.

    18. Stop here -- returning `scheme` + `before host` + `host` +
        `port` + `path` + `after path`.


    Ad 0:

    >>> normalize_url('\xf4\x8f\xbf\xbf')
    '\xf4\x8f\xbf\xbf'
    >>> normalize_url('\xf4\x8f\xbf\xbf', transcode1st=True)
    u'\U0010ffff'
    >>> normalize_url(u'\udbff\udfff')  # look at this!
    u'\udbff\udfff'
    >>> normalize_url(u'\udbff\udfff', transcode1st=True)
    u'\U0010ffff'
    >>> normalize_url(u'\U0010ffff')
    u'\U0010ffff'
    >>> normalize_url(u'\U0010ffff', transcode1st=True)
    u'\U0010ffff'


    Ad 0-2:

    >>> normalize_url('Blabla-bla!@#$ %^&\xc4\x85\xcc')
    'Blabla-bla!@#$ %^&\xc4\x85\xcc'
    >>> normalize_url('Blabla-bla!@#$ %^&\xc4\x85\xcc', transcode1st=True)
    u'Blabla-bla!@#$ %^&\u0105\udccc'
    >>> normalize_url(u'Blabla-bla!@#$ %^&\u0105\udccc')
    u'Blabla-bla!@#$ %^&\u0105\udccc'


    Ad 0-1 + 3 + 5:

    >>> normalize_url('SOME-scheme:Blabla-bla!@#$ %^&\xc4\x85\xcc')
    'some-scheme:Blabla-bla!@#$ %^&\xc4\x85\xcc'
    >>> normalize_url('SOME-scheme:Blabla-bla!@#$ %^&\xc4\x85\xcc', transcode1st=True)
    u'some-scheme:Blabla-bla!@#$ %^&\u0105\udccc'
    >>> normalize_url(u'somE-sCHEmE:Blabla-bla!@#$ %^&\u0105\udccc')
    u'some-scheme:Blabla-bla!@#$ %^&\u0105\udccc'


    Ad 0-1 + 3-4 + 6-11 + 14-18:

    >>> normalize_url('HtTP://[2001:0DB8:85A3:0000:0000:8A2E:0370:7334]')
    'http://[2001:db8:85a3::8a2e:370:7334]'
    >>> normalize_url('HtTP://[2001:0DB8:85A3:0000:0000:8A2E:0370:7334FAB]')
    'http://[2001:0DB8:85A3:0000:0000:8A2E:0370:7334FAB]'
    >>> normalize_url('HtTP://[2001:0DB8:85A3:0000:0000:8A2E:3.112.115.52%25en1]')
    'http://[2001:db8:85a3::8a2e:370:7334%25en1]'
    >>> normalize_url('HtTP://[2001:0DB8:85A3::8A2E:0370:7334]/fooBAR',
    ...               epslash=True)
    'http://[2001:db8:85a3::8a2e:370:7334]/fooBAR'
    >>> normalize_url('HtTP://[2001:0DB8:85A3:0000:0000:8A2E:3.112.115.52]:80')
    'http://[2001:db8:85a3::8a2e:370:7334]'
    >>> normalize_url('HtTP://[2001:0DB8:85A3:0000:0000:8A2E:0370:7334%25en1]:80',
    ...               epslash=True)
    'http://[2001:db8:85a3::8a2e:370:7334%25en1]/'
    >>> normalize_url('HtTP://[2001:DB8:85A3::8A2E:3.112.115.52]',
    ...               rmzone=True)
    'http://[2001:db8:85a3::8a2e:370:7334]'
    >>> normalize_url('HtTP://[2001:0db8:85a3:0000:0000:8a2e:0370:7334%25EN1]',
    ...               rmzone=True)
    'http://[2001:db8:85a3::8a2e:370:7334]'
    >>> normalize_url('HtTP://[2001:0DB8:85A3:0000:0000:8A2E:3.112.115.52%25en1]',
    ...               rmzone=True, epslash=True)
    'http://[2001:db8:85a3::8a2e:370:7334]/'
    >>> normalize_url('HtTP://[2001:0DB8:85A3::8A2E:0370:7334%25en1]:80',
    ...               rmzone=True)
    'http://[2001:db8:85a3::8a2e:370:7334]'
    >>> normalize_url('HtTP://[2001:DB8:85A3:0000:0000:8A2E:3.112.115.52%25en1]:80',
    ...               rmzone=True, epslash=True)
    'http://[2001:db8:85a3::8a2e:370:7334]/'
    >>> normalize_url(u'HtTP://[2001:0DB8:85A3:0000:0000:8A2E:3.112.115.52]')
    u'http://[2001:db8:85a3::8a2e:370:7334]'
    >>> normalize_url(u'HtTP://[2001:0db8:85a3::8a2e:370:7334%25EN1]')
    u'http://[2001:db8:85a3::8a2e:370:7334%25en1]'
    >>> normalize_url(u'HtTP://[2001:0DB8:85A3:0000:0000:8A2E:0370:7334FAB%25eN1]',
    ...               epslash=True)
    u'http://[2001:0DB8:85A3:0000:0000:8A2E:0370:7334FAB%25en1]/'
    >>> normalize_url(u'HtTP://[2001:0DB8:85A3:0000:0000:8a2e:3.112.115.52]',
    ...               epslash=True)
    u'http://[2001:db8:85a3::8a2e:370:7334]/'
    >>> normalize_url(u'HtTP://[2001:0DB8:85A3:0000:0000:8A2E:0370:7334]:80')
    u'http://[2001:db8:85a3::8a2e:370:7334]'
    >>> normalize_url(u'HtTP://[2001:0DB8:85A3::8A2E:3.112.115.52%25en1]:80',
    ...               epslash=True)
    u'http://[2001:db8:85a3::8a2e:370:7334%25en1]/'
    >>> normalize_url(u'HtTP://[2001:db8:85a3:0000:0000:8A2E:0370:7334]',
    ...               rmzone=True)
    u'http://[2001:db8:85a3::8a2e:370:7334]'
    >>> normalize_url(u'HtTP://[2001:0DB8:85A3:0000:0000:8A2E:3.112.115.52%25en1]/fooBAR',
    ...               rmzone=True)
    u'http://[2001:db8:85a3::8a2e:370:7334]/fooBAR'
    >>> normalize_url(u'HtTP://[2001:0DB8:85A3::8A2E:0370:7334%25en1]',
    ...               rmzone=True, epslash=True)
    u'http://[2001:db8:85a3::8a2e:370:7334]/'
    >>> normalize_url(u'HtTP://[2001:0DB8:85A3:0000:0000:8A2E:3.112.115.52%25en1]:80',
    ...               rmzone=True)
    u'http://[2001:db8:85a3::8a2e:370:7334]'
    >>> normalize_url(u'HtTP://[2001:0DB8:85A3:0000:0000:8A2E:0370:7334%25en1]:80',
    ...               rmzone=True, epslash=True)
    u'http://[2001:db8:85a3::8a2e:370:7334]/'
    >>> normalize_url('HtTPS://[2001:DB8:85A3:0000:0000:8A2E:3.112.115.52%25En1]:80')
    'https://[2001:db8:85a3::8a2e:370:7334%25en1]:80'
    >>> normalize_url('HtTPS://[2001:DB8:85A3:0000:0000:8A2E:3.112.115.52%25en1]:80',
    ...               rmzone=True)
    'https://[2001:db8:85a3::8a2e:370:7334]:80'
    >>> normalize_url('HtTPS://[2001:0db8:85a3::8a2E:3.112.115.52%25en1]:443',
    ...               rmzone=True)
    'https://[2001:db8:85a3::8a2e:370:7334]'
    >>> normalize_url('HtTPS://[2001:DB8:85A3:0000:0000:8A2E:0370:7334%25eN\xc4\x851]:80',
    ...               epslash=True)
    'https://[2001:db8:85a3::8a2e:370:7334%25eN\xc4\x851]:80/'
    >>> normalize_url(u'HtTPS://[2001:0db8:85a3::8a2E:3.112.115.52%25En1]:443')
    u'https://[2001:db8:85a3::8a2e:370:7334%25en1]'
    >>> normalize_url(u'HtTPS://[2001:0DB8:85A3:0000:0000:8A2E:3.112.115.52%25eN\xc4\x851]:443',
    ...               epslash=True)
    u'https://[2001:db8:85a3::8a2e:370:7334%25eN\xc4\x851]/'
    >>> normalize_url(u'HtTPS://[2001:0DB8:85A3::8A2E:0370:7334%25eN1]:80',
    ...               rmzone=True, epslash=True)
    u'https://[2001:db8:85a3::8a2e:370:7334]:80/'
    >>> normalize_url(u'HtTPS://[2001:0DB8:85A3::8A2E:370:7334%25eN1]:443',
    ...               rmzone=True, epslash=True)
    u'https://[2001:db8:85a3::8a2e:370:7334]/'


    Ad 0-1 + 3-4 + 12-18:

    >>> normalize_url('HTTP://WWW.XyZ-\xc4\x85\xcc.eXamplE.com', epslash=True)
    'http://www.XyZ-\xc4\x85\xcc.example.com/'
    >>> normalize_url('HTTP://WWW.XyZ-\xc4\x85\xcc.eXamplE.com', transcode1st=True)
    u'http://www.XyZ-\u0105\udccc.example.com'
    >>> normalize_url('HTTP://WWW.XyZ-\xc4\x85.eXamplE.com:80/fooBAR')
    'http://www.XyZ-\xc4\x85.example.com/fooBAR'
    >>> normalize_url('HtTP://WWW.XyZ-\xc4\x85.eXamplE.com:80', epslash=True)
    'http://www.XyZ-\xc4\x85.example.com/'
    >>> normalize_url('HtTP://WWW.XyZ-\xc4\x85.eXamplE.com:80/fooBAR', epslash=True)
    'http://www.XyZ-\xc4\x85.example.com/fooBAR'
    >>> normalize_url('HTTP://WWW.XyZ-\xc4\x85\xcc.eXamplE.com', transcode1st=True)
    u'http://www.XyZ-\u0105\udccc.example.com'
    >>> normalize_url(u'HTtp://WWW.XyZ-\u0105\udccc.eXamplE.com:80')
    u'http://www.XyZ-\u0105\udccc.example.com'
    >>> normalize_url(u'HTtp://WWW.XyZ-\u0105.eXamplE.com:80/')
    u'http://www.XyZ-\u0105.example.com/'
    >>> normalize_url(u'hTTP://WWW.XyZ-\u0105.eXamplE.com:80', epslash=True)
    u'http://www.XyZ-\u0105.example.com/'
    >>> normalize_url('HTTPS://WWW.XyZ-\xc4\x85.eXamplE.com:80')
    'https://www.XyZ-\xc4\x85.example.com:80'
    >>> normalize_url('HTTPS://WWW.XyZ-\xc4\x85.eXamplE.com:80/fooBAR')
    'https://www.XyZ-\xc4\x85.example.com:80/fooBAR'
    >>> normalize_url('HTTPs://WWW.XyZ-\xc4\x85.eXamplE.com:443', epslash=True)
    'https://www.XyZ-\xc4\x85.example.com/'
    >>> normalize_url('HTTPs://WWW.XyZ-\xc4\x85.eXamplE.com:443', epslash=True, transcode1st=True)
    u'https://www.XyZ-\u0105.example.com/'
    >>> normalize_url(u'httpS://WWW.XyZ-\u0105.eXamplE.com:80', epslash=True)
    u'https://www.XyZ-\u0105.example.com:80/'
    >>> normalize_url(u'httpS://WWW.XyZ-\u0105.eXamplE.com:80/fooBAR', epslash=True)
    u'https://www.XyZ-\u0105.example.com:80/fooBAR'
    >>> normalize_url(u'hTtpS://WWW.XyZ-\u0105.eXamplE.com:443')
    u'https://www.XyZ-\u0105.example.com'
    >>> normalize_url(u'httpS://WWW.XyZ-\u0105.eXamplE.com:80/fooBAR', epslash=True,
    ...               transcode1st=True)
    u'https://www.XyZ-\u0105.example.com:80/fooBAR'
    """
    if isinstance(url, bytearray):
        url = as_bytes(url)
    if transcode1st:
        url = _transcode(url)
    scheme = _get_scheme(url)
    if scheme is None:
        # does not look like a URL at all
        # -> no normalization
        return url
    rest = url[len(scheme):]
    regex = (_AFTER_SCHEME_COMPONENTS_OF_URL_WITH_AUTHORITY_BYTES_REGEX
             if isinstance(url, bytes) else
             _AFTER_SCHEME_COMPONENTS_OF_URL_WITH_AUTHORITY_REGEX)
    match = regex.search(rest)
    if match is None:
        # probably a URL without the *authority* component
        # -> the only normalized component is *scheme*
        return scheme + rest
    before_host = _get_before_host(match)
    host = _get_host(match, rmzone)
    port = _get_port(match, scheme)
    path = _get_path(match, scheme, epslash)
    after_path = _get_after_path(match)
    return scheme + before_host + host + port + path + after_path
示例#14
0
文件: base.py 项目: CERT-Polska/n6
 def _py2_bytestring_like_repr(self, obj):
     ascii_repr = ascii(as_bytes(obj))
     assert ascii_repr.startswith(("b'", 'b"'))
     return ascii_repr[1:]
示例#15
0
 def _get_value_for_md5_attr(self):
     random_str = ''.join(
         random.choice(string.ascii_letters + string.digits)
         for _ in range(32))
     return hashlib.md5(as_bytes(random_str)).hexdigest()
示例#16
0
def _get_hex_hash_of_names(names):
    hash_base = as_bytes('-'.join(names))
    return hashlib.sha256(hash_base).hexdigest()
示例#17
0
 def before_content(self, **kwargs):
     if 'category' in self.request.params:
         return b'# ' + as_bytes(str(
             self.request.params.get('category'))) + b'\n'
     else:
         return b''