def b85decode(encoded, prefix=None, suffix=None, _base85_bytes=ASCII85_BYTES, _base85_ords=ASCII85_ORDS, _uncompact_zero=True, _compact_char=ZERO_GROUP_CHAR): """ Decodes an ASCII85-encoded string into raw bytes. :param encoded: Encoded ASCII string. :param prefix: The prefix used by the encoded text. None by default. :param suffix: The suffix used by the encoded text. None by default. :param _base85_bytes: (Internal) Character set to use. :param _base85_ords: (Internal) A function to convert a base85 character to its ordinal value. You should not need to use this. :param _uncompact_zero: (Internal) Treats 'z' (a zero-group (\x00\x00\x00\x00)) as a '!!!!!' if ``True`` (default). :param _compact_char: (Internal) Character used to represent compact groups ('z' default) :returns: ASCII85-decoded raw bytes. """ prefix = prefix or EMPTY_BYTE suffix = suffix or EMPTY_BYTE if not (is_bytes(prefix) and is_bytes(suffix)): raise TypeError( "Prefix/suffix must be bytes: got prefix %r, %r" % (type(prefix).__name__, type(suffix).__name__) ) if not is_bytes(_compact_char): raise TypeError("compat character must be raw byte: got %r" % type(_compact_char).__name__) if not is_bytes(encoded): raise TypeError( "Encoded sequence must be bytes: got %r" % type(encoded).__name__ ) # ASCII-85 ignores whitespace. encoded = EMPTY_BYTE.join(encoded.split()) # Strip the prefix and suffix. if prefix and encoded.startswith(prefix): encoded = encoded[len(prefix):] if suffix and encoded.endswith(suffix): encoded = encoded[:-len(suffix)] # Replace all the 'z' occurrences with '!!!!!' if _uncompact_zero: _check_compact_char_occurrence(encoded, _compact_char) encoded = encoded.replace(_compact_char, EXCLAMATION_CHUNK) return _b85decode_chunks(encoded, _base85_bytes, _base85_ords)
def b85encode(raw_bytes, prefix=None, suffix=None, _base85_bytes=ASCII85_BYTES, _padding=False, _compact_zero=True, _compact_char=ZERO_GROUP_CHAR): """ASCII-85 encodes a sequence of raw bytes. The character set in use is:: ASCII 33 ("!") to ASCII 117 ("u") If the number of raw bytes is not divisible by 4, the byte sequence is padded with up to 3 null bytes before encoding. After encoding, as many bytes as were added as padding are removed from the end of the encoded sequence if ``padding`` is ``False`` (default). Encodes a zero-group (\x00\x00\x00\x00) as "z" instead of "!!!!!". The resulting encoded ASCII string is *not URL-safe* nor is it safe to include within SGML/XML/HTML documents. You will need to escape special characters if you decide to include such an encoded string within these documents. :param raw_bytes: Raw bytes. :param prefix: The prefix used by the encoded text. None by default. :param suffix: The suffix used by the encoded text. None by default. :param _base85_bytes: (Internal) Character set to use. :param _compact_zero: (Internal) Encodes a zero-group (\x00\x00\x00\x00) as "z" instead of "!!!!!" if this is ``True`` (default). :param _compact_char: (Internal) Character used to represent compact groups ("z" default) :returns: ASCII-85 encoded bytes. """ prefix = prefix or EMPTY_BYTE suffix = suffix or EMPTY_BYTE if not (builtins.is_bytes(prefix) and builtins.is_bytes(suffix)): raise TypeError("Prefix/suffix must be bytes: got prefix %r, %r" % (type(prefix).__name__, type(suffix).__name__)) if not builtins.is_bytes(_compact_char): raise TypeError("compat character must be raw byte: got %r" % type(_compact_char).__name__) if not builtins.is_bytes(raw_bytes): raise TypeError("data must be raw bytes: got %r" % type(raw_bytes).__name__) # Encode into ASCII85 characters. encoded = _b85encode_chunks(raw_bytes, _base85_bytes, _padding) encoded = (encoded.replace(EXCLAMATION_CHUNK, _compact_char) if _compact_zero else encoded) return prefix + encoded + suffix
def b85decode(encoded, prefix=None, suffix=None, _base85_bytes=ASCII85_BYTES, _base85_ords=ASCII85_ORDS, _uncompact_zero=True, _compact_char=ZERO_GROUP_CHAR): """Decodes an ASCII85-encoded string into raw bytes. :param encoded: Encoded ASCII string. :param prefix: The prefix used by the encoded text. None by default. :param suffix: The suffix used by the encoded text. None by default. :param _base85_bytes: (Internal) Character set to use. :param _base85_ords: (Internal) A function to convert a base85 character to its ordinal value. You should not need to use this. :param _uncompact_zero: (Internal) Treats "z" (a zero-group (\x00\x00\x00\x00)) as a "!!!!!" if ``True`` (default). :param _compact_char: (Internal) Character used to represent compact groups ("z" default) :returns: ASCII85-decoded raw bytes. """ prefix = prefix or EMPTY_BYTE suffix = suffix or EMPTY_BYTE if not (builtins.is_bytes(prefix) and builtins.is_bytes(suffix)): raise TypeError("Prefix/suffix must be bytes: got prefix %r, %r" % (type(prefix).__name__, type(suffix).__name__)) if not builtins.is_bytes(_compact_char): raise TypeError("compat character must be raw byte: got %r" % type(_compact_char).__name__) if not builtins.is_bytes(encoded): raise TypeError("Encoded sequence must be bytes: got %r" % type(encoded).__name__) # ASCII-85 ignores whitespace. encoded = EMPTY_BYTE.join(encoded.split()) # Strip the prefix and suffix. if prefix and encoded.startswith(prefix): encoded = encoded[len(prefix):] if suffix and encoded.endswith(suffix): encoded = encoded[:-len(suffix)] # Replace all the "z" occurrences with "!!!!!" if _uncompact_zero: _check_compact_char_occurrence(encoded, _compact_char) encoded = encoded.replace(_compact_char, EXCLAMATION_CHUNK) return _b85decode_chunks(encoded, _base85_bytes, _base85_ords)
def test_does_not_encode_bytes_or_None_to_utf8(self): self.assertEqual(text.utf8_encode(None), None) self.assertEqual(text.utf8_encode(constants.UTF8_BYTES), constants.UTF8_BYTES) self.assertTrue(builtins.is_bytes(text.utf8_encode(constants.UTF8_BYTES))) self.assertEqual(text.utf8_encode(constants.LATIN1_BYTES), constants.LATIN1_BYTES) self.assertTrue(builtins.is_bytes(text.utf8_encode(constants.LATIN1_BYTES))) self.assertEqual(text.utf8_encode(constants.UTF8_BYTES2), constants.UTF8_BYTES2) self.assertTrue(builtins.is_bytes(text.utf8_encode(constants.UTF8_BYTES2)))
def test_does_not_encode_bytes_or_None_to_utf8(self): self.assertEqual(utf8_encode(None), None) self.assertEqual(utf8_encode(utf8_bytes), utf8_bytes) self.assertTrue(is_bytes(utf8_encode(utf8_bytes))) self.assertEqual(utf8_encode(latin1_bytes), latin1_bytes) self.assertTrue(is_bytes(utf8_encode(latin1_bytes))) self.assertEqual(utf8_encode(utf8_bytes2), utf8_bytes2) self.assertTrue(is_bytes(utf8_encode(utf8_bytes2)))
def test_rejects_non_bytes(self): self.assertFalse(is_bytes(unicode_string)) self.assertFalse(is_bytes(unicode_string2)) self.assertFalse(is_bytes(False)) self.assertFalse(is_bytes(5)) self.assertFalse(is_bytes(None)) self.assertFalse(is_bytes([])) self.assertFalse(is_bytes(())) self.assertFalse(is_bytes([])) self.assertFalse(is_bytes(object))
def test_rejects_non_bytes(self): self.assertFalse(builtins.is_bytes(constants.UNICODE_STRING)) self.assertFalse(builtins.is_bytes(constants.UNICODE_STRING2)) self.assertFalse(builtins.is_bytes(False)) self.assertFalse(builtins.is_bytes(5)) self.assertFalse(builtins.is_bytes(None)) self.assertFalse(builtins.is_bytes([])) self.assertFalse(builtins.is_bytes(())) self.assertFalse(builtins.is_bytes([])) self.assertFalse(builtins.is_bytes(object))
def test_does_not_encode_else_to_utf8(self): self.assertEqual(text.utf8_encode_if_unicode(constants.UTF8_BYTES), constants.UTF8_BYTES) self.assertTrue(builtins.is_bytes(text.utf8_encode_if_unicode(constants.UTF8_BYTES))) self.assertEqual(text.utf8_encode_if_unicode(constants.UTF8_BYTES2), constants.UTF8_BYTES2) self.assertTrue(builtins.is_bytes(text.utf8_encode_if_unicode(constants.UTF8_BYTES2))) self.assertEqual(text.utf8_encode_if_unicode(None), None) self.assertEqual(text.utf8_encode_if_unicode(False), False) self.assertEqual(text.utf8_encode_if_unicode(5), 5) self.assertEqual(text.utf8_encode_if_unicode([]), []) self.assertEqual(text.utf8_encode_if_unicode(()), ()) self.assertEqual(text.utf8_encode_if_unicode({}), {}) self.assertEqual(text.utf8_encode_if_unicode(object), object)
def test_does_not_encode_else_to_utf8(self): self.assertEqual(utf8_encode_if_unicode(utf8_bytes), utf8_bytes) self.assertTrue(is_bytes(utf8_encode_if_unicode(utf8_bytes))) self.assertEqual(utf8_encode_if_unicode(utf8_bytes2), utf8_bytes2) self.assertTrue(is_bytes(utf8_encode_if_unicode(utf8_bytes2))) self.assertEqual(utf8_encode_if_unicode(None), None) self.assertEqual(utf8_encode_if_unicode(False), False) self.assertEqual(utf8_encode_if_unicode(5), 5) self.assertEqual(utf8_encode_if_unicode([]), []) self.assertEqual(utf8_encode_if_unicode(()), ()) self.assertEqual(utf8_encode_if_unicode({}), {}) self.assertEqual(utf8_encode_if_unicode(object), object)
def b62encode_naive(raw_bytes, base_bytes=base62.ASCII62_BYTES, _padding=True, _zero_byte=ZERO_BYTE): """ Base62 encodes a sequence of raw bytes. Zero-byte sequences are preserved by default. :param raw_bytes: Raw bytes to encode. :param base_bytes: The character set to use. Defaults to ``ASCII62_CHARSET`` that uses natural ASCII order. :param _padding: (Internal) ``True`` (default) to include prefixed zero-byte sequence padding converted to appropriate representation. :returns: Base-62 encoded bytes. """ if not builtins.is_bytes(raw_bytes): raise TypeError("data must be raw bytes: got %r" % type(raw_bytes).__name__) number = integer.bytes_to_uint(raw_bytes) encoded = EMPTY_BYTE while number > 0: encoded = base_bytes[number % 62] + encoded number //= 62 # The following makes more divmod calls but is 2x faster. # number, remainder = divmod(number, 62) # encoded = _charset[remainder] + encoded if _padding: zero_leading = functional.leading(lambda w: w == _zero_byte[0], raw_bytes) encoded = (base_bytes[0] * zero_leading) + encoded return encoded
def base_encode(raw_bytes, base, base_bytes, base_zero, padding=True): """ Encodes raw bytes given a base. :param raw_bytes: Raw bytes to encode. :param base: Unsigned integer base. :param base_bytes: The ASCII bytes used in the encoded string. "Character set" or "alphabet". :param base_zero: """ if not is_bytes(raw_bytes): raise TypeError("data must be raw bytes: got %r" % type(raw_bytes).__name__) number = bytes_to_uint(raw_bytes) encoded = EMPTY_BYTE while number > 0: number, remainder = divmod(number, base) encoded = base_bytes[remainder] + encoded if padding: zero_leading = bytes_leading(raw_bytes) encoded = encoded.rjust(len(encoded) + zero_leading, base_zero) return encoded
def base_encode(raw_bytes, base, base_bytes, base_zero, padding=True): """ Encodes raw bytes given a base. :param raw_bytes: Raw bytes to encode. :param base: Unsigned integer base. :param base_bytes: The ASCII bytes used in the encoded string. "Character set" or "alphabet". :param base_zero: """ if not builtins.is_bytes(raw_bytes): raise TypeError("data must be raw bytes: got %r" % type(raw_bytes).__name__) number = integer.bytes_to_uint(raw_bytes) encoded = EMPTY_BYTE while number > 0: number, remainder = divmod(number, base) encoded = base_bytes[remainder] + encoded if padding: zero_leading = builtins.bytes_leading(raw_bytes) encoded = encoded.rjust(len(encoded) + zero_leading, base_zero) return encoded
def rfc1924_b85encode(raw_bytes, _padding=False): """ Base85 encodes using the RFC1924 character set. The character set is:: 0–9, A–Z, a–z, and then !#$%&()*+-;<=>?@^_`{|}~ These characters are specifically not included:: "',./:[]\\ This is the encoding method used by Mercurial (and git?) to generate binary diffs, for example. They chose the IPv6 character set and encode using the ASCII85 encoding method while not compacting zero-byte sequences. :see: http://tools.ietf.org/html/rfc1924 :param raw_bytes: Raw bytes. :param _padding: (Internal) Whether padding should be included in the encoded output. (Default ``False``, which is usually what you want.) :returns: RFC1924 base85 encoded string. """ if not is_bytes(raw_bytes): raise TypeError("data must be raw bytes: got %r" % type(raw_bytes).__name__) return _b85encode_chunks(raw_bytes, RFC1924_BYTES, _padding)
def data_uri_parse(data_uri): """ Parses a data URI into raw bytes and metadata. :param data_uri: The data url string. If a mime-type definition is missing in the metadata, "text/plain;charset=US-ASCII" will be used as default mime-type. :returns: A 2-tuple:: (bytes, mime_type) See :func:`mom.http.mimeparse.mimeparse.parse_mime_type` for what ``mime_type`` looks like. """ if not builtins.is_bytes(data_uri): raise TypeError("data URIs must be ASCII-encoded bytes: got %r" % type(data_uri).__name__) metadata, encoded = data_uri.rsplit(b(","), 1) _, metadata = metadata.split(b("data:"), 1) parts = metadata.rsplit(b(";"), 1) if parts[-1] == b("base64"): decode = codec.base64_decode parts = parts[:-1] else: decode = unquote if not parts or not parts[0]: parts = [b("text/plain;charset=US-ASCII")] mime_type = mimeparse.parse_mime_type(parts[0]) raw_bytes = decode(encoded) return raw_bytes, mime_type
def bytes_to_unicode_recursive(obj, encoding="utf-8"): """ Walks a simple data structure, converting byte strings to unicode. Supports lists, tuples, and dictionaries. :param obj: The Python data structure to walk recursively looking for byte strings. :param encoding: The encoding to use when decoding the byte string into Unicode. Default UTF-8. :returns: obj with all the byte strings converted to Unicode strings. """ if isinstance(obj, dict): return dict( (bytes_to_unicode_recursive(k), bytes_to_unicode_recursive(v)) for (k, v) in obj.items()) elif isinstance(obj, list): return list(bytes_to_unicode_recursive(i) for i in obj) elif isinstance(obj, tuple): return tuple(bytes_to_unicode_recursive(i) for i in obj) elif builtins.is_bytes(obj): return bytes_to_unicode(obj, encoding=encoding) else: return obj
def ipv6_b85decode_naive(encoded, _base85_ords=base85.RFC1924_ORDS): """ Decodes an RFC1924 Base-85 encoded string to its 128-bit unsigned integral representation. Used to base85-decode IPv6 addresses or 128-bit chunks. Whitespace is ignored. Raises an ``OverflowError`` if stray characters are found. :param encoded: RFC1924 Base85-encoded string. :param _base85_ords: (Internal) Look up table. :returns: A 128-bit unsigned integer. """ if not builtins.is_bytes(encoded): raise TypeError("Encoded sequence must be bytes: got %r" % type(encoded).__name__) # Ignore whitespace. encoded = EMPTY_BYTE.join(encoded.split()) if len(encoded) != 20: raise ValueError("Not 20 encoded bytes: %r" % encoded) uint128 = 0 try: for char in encoded: uint128 = uint128 * 85 + _base85_ords[char] except KeyError: raise OverflowError("Cannot decode `%r -- may contain stray " "ASCII bytes" % encoded) if uint128 > UINT128_MAX: raise OverflowError("Cannot decode `%r` -- may contain stray " "ASCII bytes" % encoded) return uint128
def bytes_to_unicode_recursive(obj, encoding="utf-8"): """ Walks a simple data structure, converting byte strings to unicode. Supports lists, tuples, and dictionaries. :param obj: The Python data structure to walk recursively looking for byte strings. :param encoding: The encoding to use when decoding the byte string into Unicode. Default UTF-8. :returns: obj with all the byte strings converted to Unicode strings. """ if isinstance(obj, dict): return dict((bytes_to_unicode_recursive(k), bytes_to_unicode_recursive(v)) for (k, v) in obj.items()) elif isinstance(obj, list): return list(bytes_to_unicode_recursive(i) for i in obj) elif isinstance(obj, tuple): return tuple(bytes_to_unicode_recursive(i) for i in obj) elif builtins.is_bytes(obj): return bytes_to_unicode(obj, encoding=encoding) else: return obj
def test_single_value_lists_are_not_flattened(self): d = parse_qs("a=1&a=2&a=3&b=c") for n, v in d.items(): self.assertTrue(is_bytes(n), "Dictionary key is not bytes.") self.assertTrue(isinstance(v, list), "Dictionary value is not a list.")
def rfc1924_b85encode(raw_bytes, _padding=False): """Base85 encodes using the RFC1924 character set. The character set is:: 0–9, A–Z, a–z, and then !#$%&()*+-;<=>?@^_`{|}~ These characters are specifically not included:: "',./:[]\\ This is the encoding method used by Mercurial (and git?) to generate binary diffs, for example. They chose the IPv6 character set and encode using the ASCII85 encoding method while not compacting zero-byte sequences. :see: http://tools.ietf.org/html/rfc1924 :param raw_bytes: Raw bytes. :param _padding: (Internal) Whether padding should be included in the encoded output. (Default ``False``, which is usually what you want.) :returns: RFC1924 base85 encoded string. """ if not builtins.is_bytes(raw_bytes): raise TypeError("data must be raw bytes: got %r" % type(raw_bytes).__name__) return _b85encode_chunks(raw_bytes, RFC1924_BYTES, _padding)
def ipv6_b85decode(encoded, _base85_ords=RFC1924_ORDS): """ Decodes an RFC1924 Base-85 encoded string to its 128-bit unsigned integral representation. Used to base85-decode IPv6 addresses or 128-bit chunks. :param encoded: RFC1924 Base85-encoded string. :param _base85_ords: (Internal) Look up table. :param _whitespace: (Internal) Whitespace characters. :returns: A 128-bit unsigned integer. """ if not is_bytes(encoded): raise TypeError( "Encoded sequence must be bytes: got %r" % type(encoded).__name__ ) if len(encoded) != 20: raise ValueError( "Encoded IPv6 value must be exactly 20 characters long: got %r" % encoded ) #uint128 = 0L #for char in encoded: # uint128 = uint128 * 85 + _base85_ords[char] # Above loop unrolled to process 4 5-tuple chunks instead: #v, w, x, y, z = encoded[0:5] # v = encoded[0]..z = encoded[4] uint128 = ((((_base85_ords[encoded[0]] * 85 + _base85_ords[encoded[1]]) * 85 + _base85_ords[encoded[2]]) * 85 + _base85_ords[encoded[3]]) * 85 + _base85_ords[encoded[4]]) #v, w, x, y, z = encoded[5:10] # v = encoded[5]..z = encoded[9] uint128 = (((((uint128 * 85 + _base85_ords[encoded[5]]) * 85 + _base85_ords[encoded[6]]) * 85 + _base85_ords[encoded[7]]) * 85 + _base85_ords[encoded[8]]) * 85 + _base85_ords[encoded[9]]) #v, w, x, y, z = encoded[10:15] # v = encoded[10]..z = encoded[14] uint128 = (((((uint128 * 85 + _base85_ords[encoded[10]]) * 85 + _base85_ords[encoded[11]]) * 85 + _base85_ords[encoded[12]]) * 85 + _base85_ords[encoded[13]]) * 85 + _base85_ords[encoded[14]]) #v, w, x, y, z = encoded[15:20] # v = encoded[15]..z = encoded[19] uint128 = (((((uint128 * 85 + _base85_ords[encoded[15]]) * 85 + _base85_ords[encoded[16]]) * 85 + _base85_ords[encoded[17]]) * 85 + _base85_ords[encoded[18]]) * 85 + _base85_ords[encoded[19]]) return uint128
def base_decode(encoded, base, base_ords, base_zero, powers): """Decode from base to base 256.""" if not is_bytes(encoded): raise TypeError("encoded data must be bytes: got %r" % type(encoded).__name__) # Ignore whitespace. encoded = EMPTY_BYTE.join(encoded.split()) # Convert to big integer. number = base_to_uint(encoded, base, base_ords, powers) return uint_to_base256(number, encoded, base_zero)
def base_decode(encoded, base, base_ords, base_zero, powers): """Decode from base to base 256.""" if not builtins.is_bytes(encoded): raise TypeError("encoded data must be bytes: got %r" % type(encoded).__name__) # Ignore whitespace. encoded = EMPTY_BYTE.join(encoded.split()) # Convert to big integer. number = base_to_uint(encoded, base, base_ords, powers) return uint_to_base256(number, encoded, base_zero)
def json_decode(encoded): """ Decodes a JSON string into its equivalent Python value. :param encoded: JSON string. :returns: Decoded Python value. """ if builtins.is_bytes(encoded): raise TypeError("Cannot work with bytes.") return _json_compat.json_loads(encoded)
def hex_decode(encoded): """ Decodes hexadecimal-encoded bytes into raw bytes. :param encoded: Hex representation. :returns: Raw bytes. """ if not builtins.is_bytes(encoded): raise TypeError("argument must be bytes: got %r" % type(encoded).__name__) return binascii.a2b_hex(encoded)
def base64_decode(encoded): """ Decodes base64-encoded bytes into raw bytes. Not URL-safe. :param encoded: Base-64 encoded representation. :returns: Raw bytes. """ if not is_bytes(encoded): raise TypeError("argument must be bytes: got %r" % type(encoded).__name__) return binascii.a2b_base64(encoded)
def hex_decode(encoded): """ Decodes hexadecimal-encoded bytes into raw bytes. :param encoded: Hex representation. :returns: Raw bytes. """ if not is_bytes(encoded): raise TypeError("argument must be bytes: got %r" % type(encoded).__name__) return binascii.a2b_hex(encoded)
def base64_decode(encoded): """ Decodes base64-encoded bytes into raw bytes. Not URL-safe. :param encoded: Base-64 encoded representation. :returns: Raw bytes. """ if not builtins.is_bytes(encoded): raise TypeError("argument must be bytes: got %r" % type(encoded).__name__) return binascii.a2b_base64(encoded)
def to_unicode_if_bytes(obj, encoding="utf-8"): """ Decodes encoded bytes into a Unicode string. :param obj: The value that will be converted to a Unicode string. :param encoding: The encoding used to decode bytes. Defaults to UTF-8. :returns: Unicode string if the argument is a byte string. Otherwise the value is returned unchanged. """ return bytes_to_unicode(obj, encoding) if builtins.is_bytes(obj) else obj
def bytes_to_uint(raw_bytes): """ Converts a series of bytes into an unsigned integer. :param raw_bytes: Raw bytes (base-256 representation). :returns: Unsigned integer. """ if not builtins.is_bytes(raw_bytes): raise TypeError("argument must be raw bytes: got %r" % type(raw_bytes).__name__) # binascii.b2a_hex is written in C as is int. return int(binascii.b2a_hex(raw_bytes), 16)
def bin_decode(encoded): """ Decodes binary-encoded bytes into raw bytes. :param encoded: Binary representation. :returns: Raw bytes. """ if not is_bytes(encoded): raise TypeError("argument must be bytes: got %r" % type(encoded).__name__) return binascii.a2b_hex(EMPTY_BYTE.join(_BIN_TO_HEX_LOOKUP[nibble] for nibble in chunks(encoded, 4)))
def b58decode_naive(encoded, _charset=base58.ASCII58_BYTES, _lookup=base58.ASCII58_ORDS): """ Simple implementation for benchmarking. Base-58 decodes a sequence of bytes into raw bytes. Whitespace is ignored. :param encoded: Base-58 encoded bytes. :param _charset: (Internal) The character set to use. Defaults to ``base58.ASCII58_BYTES`` that uses natural ASCII order. :param _lookup: (Internal) Ordinal-to-character lookup table for the specified character set. :returns: Raw bytes. """ if not builtins.is_bytes(encoded): raise TypeError("encoded data must be bytes: got %r" % type(encoded).__name__) # Ignore whitespace. encoded = re.sub(WHITESPACE_PATTERN, EMPTY_BYTE, encoded) # Convert to big integer. number = 0 for i, char in enumerate(reversed(encoded)): number += _lookup[char] * (58**i) # Obtain raw bytes. if number: raw_bytes = integer.uint_to_bytes(number) else: # We don't want to convert to b"\x00" when we get number == 0. # That would add an off-by-one extra zero byte in the result. raw_bytes = EMPTY_BYTE # Add prefixed padding if required. # 0 byte is represented using the first character in the character set. zero_char = _charset[0] # The extra [0] index in zero_byte_char[0] is for Python2.x-Python3.x # compatibility. Indexing into Python 3 bytes yields an integer, whereas # in Python 2.x it yields a single-byte string. zero_leading = functional.leading(lambda w: w == zero_char[0], encoded) if zero_leading: padding = ZERO_BYTE * zero_leading raw_bytes = padding + raw_bytes return raw_bytes
def bytes_to_uint(raw_bytes): """ Converts a series of bytes into an unsigned integer. :param raw_bytes: Raw bytes (base-256 representation). :returns: Unsigned integer. """ if not is_bytes(raw_bytes): raise TypeError("argument must be raw bytes: got %r" % type(raw_bytes).__name__) # binascii.b2a_hex is written in C as is int. return int(binascii.b2a_hex(raw_bytes), 16)
def b58decode_naive(encoded, _charset=base58.ASCII58_BYTES, _lookup=base58.ASCII58_ORDS): """ Simple implementation for benchmarking. Base-58 decodes a sequence of bytes into raw bytes. Whitespace is ignored. :param encoded: Base-58 encoded bytes. :param _charset: (Internal) The character set to use. Defaults to ``base58.ASCII58_BYTES`` that uses natural ASCII order. :param _lookup: (Internal) Ordinal-to-character lookup table for the specified character set. :returns: Raw bytes. """ if not builtins.is_bytes(encoded): raise TypeError("encoded data must be bytes: got %r" % type(encoded).__name__) # Ignore whitespace. encoded = re.sub(WHITESPACE_PATTERN, EMPTY_BYTE, encoded) # Convert to big integer. number = 0 for i, char in enumerate(reversed(encoded)): number += _lookup[char] * (58 ** i) # Obtain raw bytes. if number: raw_bytes = integer.uint_to_bytes(number) else: # We don't want to convert to b"\x00" when we get number == 0. # That would add an off-by-one extra zero byte in the result. raw_bytes = EMPTY_BYTE # Add prefixed padding if required. # 0 byte is represented using the first character in the character set. zero_char = _charset[0] # The extra [0] index in zero_byte_char[0] is for Python2.x-Python3.x # compatibility. Indexing into Python 3 bytes yields an integer, whereas # in Python 2.x it yields a single-byte string. zero_leading = functional.leading(lambda w: w == zero_char[0], encoded) if zero_leading: padding = ZERO_BYTE * zero_leading raw_bytes = padding + raw_bytes return raw_bytes
def sha1_digest(*inputs): """ Calculates a SHA-1 digest of a variable number of inputs. :param inputs: A variable number of inputs for which the digest will be calculated. :returns: A byte string containing the SHA-1 message digest. """ hash_func = hashlib.sha1() for i in inputs: if not builtins.is_bytes(i): raise TypeError("input type must be bytes: got %r" % type(i).__name__) hash_func.update(i) return hash_func.digest()
def hmac_sha1_digest(key, data): """ Calculates a HMAC SHA-1 digest. :param key: The key for the digest. :param data: The raw bytes data for which the digest will be calculated. :returns: HMAC SHA-1 Digest. """ if not builtins.is_bytes(data): raise TypeError("data type must be bytes: got %r" % type(data).__name__) return hmac.new(key, data, hashlib.sha1).digest()
def hex_encode(raw_bytes): """ Encodes raw bytes into hexadecimal representation. Encode your Unicode strings to a byte encoding before hex-encoding them. :param raw_bytes: Bytes. :returns: Hex-encoded representation. """ if not is_bytes(raw_bytes): raise TypeError("argument must be raw bytes: got %r" % type(raw_bytes).__name__) return binascii.b2a_hex(raw_bytes)
def bin_decode(encoded): """ Decodes binary-encoded bytes into raw bytes. :param encoded: Binary representation. :returns: Raw bytes. """ if not builtins.is_bytes(encoded): raise TypeError("argument must be bytes: got %r" % type(encoded).__name__) return binascii.a2b_hex( EMPTY_BYTE.join(_BIN_TO_HEX_LOOKUP[nibble] for nibble in functional.chunks(encoded, 4)))
def hex_encode(raw_bytes): """ Encodes raw bytes into hexadecimal representation. Encode your Unicode strings to a byte encoding before hex-encoding them. :param raw_bytes: Bytes. :returns: Hex-encoded representation. """ if not builtins.is_bytes(raw_bytes): raise TypeError("argument must be raw bytes: got %r" % type(raw_bytes).__name__) return binascii.b2a_hex(raw_bytes)
def bin_encode(raw_bytes): """ Encodes raw bytes into binary representation. Encode your Unicode strings to a byte encoding before binary-encoding them. :param raw_bytes: Raw bytes. :returns: Binary representation. """ if not builtins.is_bytes(raw_bytes): raise TypeError("argument must be raw bytes: got %r" % type(raw_bytes).__name__) return EMPTY_BYTE.join(_HEX_TO_BIN_LOOKUP[hex_char] for hex_char in binascii.b2a_hex(raw_bytes))
def bytes_to_unicode(raw_bytes, encoding="utf-8"): """ Converts bytes to a Unicode string decoding it according to the encoding specified. :param raw_bytes: If already a Unicode string or None, it is returned unchanged. Otherwise it must be a byte string. :param encoding: The encoding used to decode bytes. Defaults to UTF-8 """ if raw_bytes is None or builtins.is_unicode(raw_bytes): return raw_bytes if not builtins.is_bytes(raw_bytes): raise TypeError("unsupported argument type: %r" % type(raw_bytes).__name__) return raw_bytes.decode(encoding)
def bin_encode(raw_bytes): """ Encodes raw bytes into binary representation. Encode your Unicode strings to a byte encoding before binary-encoding them. :param raw_bytes: Raw bytes. :returns: Binary representation. """ if not is_bytes(raw_bytes): raise TypeError("argument must be raw bytes: got %r" % type(raw_bytes).__name__) return EMPTY_BYTE.join(_HEX_TO_BIN_LOOKUP[hex_char] for hex_char in binascii.b2a_hex(raw_bytes))
def base64_encode(raw_bytes): """ Encodes raw bytes into base64 representation without appending a trailing newline character. Not URL-safe. Encode your Unicode strings to a byte encoding before base64-encoding them. :param raw_bytes: Bytes to encode. :returns: Base64 encoded bytes without newline characters. """ if not builtins.is_bytes(raw_bytes): raise TypeError("argument must be bytes: got %r" % type(raw_bytes).__name__) return binascii.b2a_base64(raw_bytes)[:-1]
def base64_encode(raw_bytes): """ Encodes raw bytes into base64 representation without appending a trailing newline character. Not URL-safe. Encode your Unicode strings to a byte encoding before base64-encoding them. :param raw_bytes: Bytes to encode. :returns: Base64 encoded bytes without newline characters. """ if not is_bytes(raw_bytes): raise TypeError("argument must be bytes: got %r" % type(raw_bytes).__name__) return binascii.b2a_base64(raw_bytes)[:-1]
def utf8_encode(unicode_text): """ UTF-8 encodes a Unicode string into bytes; bytes and None are left alone. Work with Unicode strings in your code and encode your Unicode strings into UTF-8 before they leave your system. :param unicode_text: If already a byte string or None, it is returned unchanged. Otherwise it must be a Unicode string and is encoded as UTF-8 bytes. :returns: UTF-8 encoded bytes. """ if unicode_text is None or builtins.is_bytes(unicode_text): return unicode_text if not builtins.is_unicode(unicode_text): raise TypeError("unsupported argument type: %r" % type(unicode_text).__name__) return unicode_text.encode("utf-8")
def rfc1924_b85decode(encoded): """Base85 decodes using the RFC1924 character set. This is the encoding method used by Mercurial (and git) to generate binary diffs, for example. They chose the IPv6 character set and encode using the ASCII85 encoding method while not compacting zero-byte sequences. :see: http://tools.ietf.org/html/rfc1924 :param encoded: RFC1924 Base85 encoded string. :returns: Decoded bytes. """ if not builtins.is_bytes(encoded): raise TypeError("Encoded sequence must be bytes: got %r" % type(encoded).__name__) # Ignore whitespace. encoded = EMPTY_BYTE.join(encoded.split()) return _b85decode_chunks(encoded, RFC1924_BYTES, RFC1924_ORDS)
def json_encode(obj): """ Encodes a Python value into its equivalent JSON string. JSON permits but does not require forward slashes to be escaped. This is useful when json data is emitted in a <script> tag in HTML, as it prevents </script> tags from prematurely terminating the javscript. Some json libraries do this escaping by default, although python's standard library does not, so we do it here. :see: http://stackoverflow.com/questions/1580647/json-why-are-forward-slashes-escaped :param obj: Python value. :returns: JSON string. """ if builtins.is_bytes(obj): raise TypeError("Cannot work with bytes.") return json_dumps(text.utf8_decode_recursive(obj)).replace("</", "<\\/")
def base64_urlsafe_encode(raw_bytes): """ Encodes raw bytes into URL-safe base64 bytes. Encode your Unicode strings to a byte encoding before base64-encoding them. :param raw_bytes: Bytes to encode. :returns: Base64 encoded string without newline characters. """ if not builtins.is_bytes(raw_bytes): raise TypeError("argument must be bytes: got %r" % type(raw_bytes).__name__) # This is 3-4x faster than urlsafe_b64decode() -Guido. # We're not using the base64.py wrapper around binascii because # this module itself is a wrapper. binascii is implemented in C, so # we avoid module overhead however small. encoded = binascii.b2a_base64(raw_bytes)[:-1] return (encoded.rstrip(EQUAL_BYTE).replace(PLUS_BYTE, HYPHEN_BYTE).replace( FORWARD_SLASH_BYTE, UNDERSCORE_BYTE))
def base64_urlsafe_decode(encoded): """ Decodes URL-safe base64-encoded bytes into raw bytes. :param encoded: Base-64 encoded representation. :returns: Raw bytes. """ if not builtins.is_bytes(encoded): raise TypeError("argument must be bytes: got %r" % type(encoded).__name__) remainder = len(encoded) % 4 if remainder: encoded += EQUAL_BYTE * (4 - remainder) # This is 3-4x faster than urlsafe_b64decode() -Guido. # We're not using the base64.py wrapper around binascii because # this module itself is a wrapper. binascii is implemented in C, so # we avoid module overhead however small. encoded = (encoded.replace(HYPHEN_BYTE, PLUS_BYTE).replace(UNDERSCORE_BYTE, FORWARD_SLASH_BYTE)) return binascii.a2b_base64(encoded)