def test_does_not_encode_unicode_and_None_to_unicode(self): self.assertEqual(utf8_decode(unicode_string), unicode_string) self.assertTrue(is_unicode(utf8_decode(unicode_string))) self.assertEqual(utf8_decode(unicode_string2), unicode_string2) self.assertTrue(is_unicode(utf8_decode(unicode_string2))) self.assertEqual(utf8_decode(None), None)
def test_does_not_encode_unicode_and_None_to_unicode(self): self.assertEqual(text.utf8_decode(constants.UNICODE_STRING), constants.UNICODE_STRING) self.assertTrue(builtins.is_unicode(text.utf8_decode(constants.UNICODE_STRING))) self.assertEqual(text.utf8_decode(constants.UNICODE_STRING2), constants.UNICODE_STRING2) self.assertTrue(builtins.is_unicode(text.utf8_decode(constants.UNICODE_STRING2))) self.assertEqual(text.utf8_decode(None), None)
def test_rejects_non_unicode(self): self.assertFalse(is_unicode(random_bytes)) self.assertFalse(is_unicode(utf8_bytes)) self.assertFalse(is_unicode(utf8_bytes2)) self.assertFalse(is_unicode(False)) self.assertFalse(is_unicode(5)) self.assertFalse(is_unicode(None)) self.assertFalse(is_unicode([])) self.assertFalse(is_unicode(())) self.assertFalse(is_unicode({})) self.assertFalse(is_unicode(object))
def test_rejects_non_unicode(self): self.assertFalse(builtins.is_unicode(RANDOM_BYTES)) self.assertFalse(builtins.is_unicode(constants.UTF8_BYTES)) self.assertFalse(builtins.is_unicode(constants.UTF8_BYTES2)) self.assertFalse(builtins.is_unicode(False)) self.assertFalse(builtins.is_unicode(5)) self.assertFalse(builtins.is_unicode(None)) self.assertFalse(builtins.is_unicode([])) self.assertFalse(builtins.is_unicode(())) self.assertFalse(builtins.is_unicode({})) self.assertFalse(builtins.is_unicode(object))
def test_does_not_encode_else_to_unicode(self): self.assertEqual(text.utf8_decode_if_bytes(constants.UNICODE_STRING), constants.UNICODE_STRING) self.assertTrue(builtins.is_unicode(text.utf8_decode_if_bytes(constants.UNICODE_STRING))) self.assertEqual(text.utf8_decode_if_bytes(constants.UNICODE_STRING2), constants.UNICODE_STRING2) self.assertTrue(builtins.is_unicode(text.utf8_decode_if_bytes(constants.UNICODE_STRING2))) self.assertEqual(text.utf8_decode_if_bytes(None), None) self.assertEqual(text.utf8_decode_if_bytes(False), False) self.assertEqual(text.utf8_decode_if_bytes(5), 5) self.assertEqual(text.utf8_decode_if_bytes([]), []) self.assertEqual(text.utf8_decode_if_bytes(()), ()) self.assertEqual(text.utf8_decode_if_bytes({}), {}) self.assertEqual(text.utf8_decode_if_bytes(object), object)
def test_does_not_encode_else_to_unicode(self): self.assertEqual(utf8_decode_if_bytes(unicode_string), unicode_string) self.assertTrue(is_unicode(utf8_decode_if_bytes(unicode_string))) self.assertEqual(utf8_decode_if_bytes(unicode_string2), unicode_string2) self.assertTrue(is_unicode(utf8_decode_if_bytes(unicode_string2))) self.assertEqual(utf8_decode_if_bytes(None), None) self.assertEqual(utf8_decode_if_bytes(False), False) self.assertEqual(utf8_decode_if_bytes(5), 5) self.assertEqual(utf8_decode_if_bytes([]), []) self.assertEqual(utf8_decode_if_bytes(()), ()) self.assertEqual(utf8_decode_if_bytes({}), {}) self.assertEqual(utf8_decode_if_bytes(object), object)
def utf8_encode_if_unicode(obj): """ UTF-8 encodes the object only if it is a Unicode string. :param obj: The value that will be UTF-8 encoded if it is a Unicode string. :returns: UTF-8 encoded bytes if the argument is a Unicode string; otherwise the value is returned unchanged. """ return utf8_encode(obj) if builtins.is_unicode(obj) else obj
def bytes_to_unicode(raw_bytes, encoding="utf-8"): """ Converts bytes to a Unicode string decoding it according to the encoding specified. :param raw_bytes: If already a Unicode string or None, it is returned unchanged. Otherwise it must be a byte string. :param encoding: The encoding used to decode bytes. Defaults to UTF-8 """ if raw_bytes is None or builtins.is_unicode(raw_bytes): return raw_bytes if not builtins.is_bytes(raw_bytes): raise TypeError("unsupported argument type: %r" % type(raw_bytes).__name__) return raw_bytes.decode(encoding)
def utf8_encode(unicode_text): """ UTF-8 encodes a Unicode string into bytes; bytes and None are left alone. Work with Unicode strings in your code and encode your Unicode strings into UTF-8 before they leave your system. :param unicode_text: If already a byte string or None, it is returned unchanged. Otherwise it must be a Unicode string and is encoded as UTF-8 bytes. :returns: UTF-8 encoded bytes. """ if unicode_text is None or builtins.is_bytes(unicode_text): return unicode_text if not builtins.is_unicode(unicode_text): raise TypeError("unsupported argument type: %r" % type(unicode_text).__name__) return unicode_text.encode("utf-8")
def utf8_encode_recursive(obj): """ Walks a simple data structure, converting Unicode strings to UTF-8 encoded byte strings. Supports lists, tuples, and dictionaries. :param obj: The Python data structure to walk recursively looking for Unicode strings. :returns: obj with all the Unicode strings converted to byte strings. """ if isinstance(obj, dict): return dict((utf8_encode_recursive(k), utf8_encode_recursive(v)) for (k, v) in obj.items()) elif isinstance(obj, list): return list(utf8_encode_recursive(i) for i in obj) elif isinstance(obj, tuple): return tuple(utf8_encode_recursive(i) for i in obj) elif builtins.is_unicode(obj): return utf8_encode(obj) else: return obj
def test_encodes_bytes_to_unicode(self): self.assertEqual(text.utf8_decode_if_bytes(constants.UTF8_BYTES), constants.UNICODE_STRING) self.assertTrue(builtins.is_unicode(text.utf8_decode_if_bytes(constants.UTF8_BYTES))) self.assertEqual(text.utf8_decode_if_bytes(constants.UTF8_BYTES2), constants.UNICODE_STRING2) self.assertTrue(builtins.is_unicode(text.utf8_decode_if_bytes(constants.UTF8_BYTES2)))
def test_converts_utf8_decode(self): self.assertEqual(text.utf8_decode(constants.UTF8_BYTES), constants.UNICODE_STRING) self.assertTrue(builtins.is_unicode(text.utf8_decode(constants.UTF8_BYTES))) self.assertEqual(text.utf8_decode(constants.UTF8_BYTES2), constants.UNICODE_STRING2) self.assertTrue(builtins.is_unicode(text.utf8_decode(constants.UTF8_BYTES2)))
def test_accepts_unicode(self): self.assertTrue(is_unicode(unicode_string)) self.assertTrue(is_unicode(unicode_string2))
def test_accepts_unicode(self): self.assertTrue(builtins.is_unicode(constants.UNICODE_STRING)) self.assertTrue(builtins.is_unicode(constants.UNICODE_STRING2))
def test_encodes_bytes_to_unicode(self): self.assertEqual(utf8_decode_if_bytes(utf8_bytes), unicode_string) self.assertTrue(is_unicode(utf8_decode_if_bytes(utf8_bytes))) self.assertEqual(utf8_decode_if_bytes(utf8_bytes2), unicode_string2) self.assertTrue(is_unicode(utf8_decode_if_bytes(utf8_bytes2)))
def test_converts_utf8_decode(self): self.assertEqual(utf8_decode(utf8_bytes), unicode_string) self.assertTrue(is_unicode(utf8_decode(utf8_bytes))) self.assertEqual(utf8_decode(utf8_bytes2), unicode_string2) self.assertTrue(is_unicode(utf8_decode(utf8_bytes2)))