Пример #1
0
  def test_does_not_encode_unicode_and_None_to_unicode(self):
    self.assertEqual(utf8_decode(unicode_string), unicode_string)
    self.assertTrue(is_unicode(utf8_decode(unicode_string)))

    self.assertEqual(utf8_decode(unicode_string2), unicode_string2)
    self.assertTrue(is_unicode(utf8_decode(unicode_string2)))

    self.assertEqual(utf8_decode(None), None)
Пример #2
0
  def test_does_not_encode_unicode_and_None_to_unicode(self):
    self.assertEqual(text.utf8_decode(constants.UNICODE_STRING), constants.UNICODE_STRING)
    self.assertTrue(builtins.is_unicode(text.utf8_decode(constants.UNICODE_STRING)))

    self.assertEqual(text.utf8_decode(constants.UNICODE_STRING2), constants.UNICODE_STRING2)
    self.assertTrue(builtins.is_unicode(text.utf8_decode(constants.UNICODE_STRING2)))

    self.assertEqual(text.utf8_decode(None), None)
Пример #3
0
 def test_rejects_non_unicode(self):
   self.assertFalse(is_unicode(random_bytes))
   self.assertFalse(is_unicode(utf8_bytes))
   self.assertFalse(is_unicode(utf8_bytes2))
   self.assertFalse(is_unicode(False))
   self.assertFalse(is_unicode(5))
   self.assertFalse(is_unicode(None))
   self.assertFalse(is_unicode([]))
   self.assertFalse(is_unicode(()))
   self.assertFalse(is_unicode({}))
   self.assertFalse(is_unicode(object))
Пример #4
0
 def test_rejects_non_unicode(self):
   self.assertFalse(builtins.is_unicode(RANDOM_BYTES))
   self.assertFalse(builtins.is_unicode(constants.UTF8_BYTES))
   self.assertFalse(builtins.is_unicode(constants.UTF8_BYTES2))
   self.assertFalse(builtins.is_unicode(False))
   self.assertFalse(builtins.is_unicode(5))
   self.assertFalse(builtins.is_unicode(None))
   self.assertFalse(builtins.is_unicode([]))
   self.assertFalse(builtins.is_unicode(()))
   self.assertFalse(builtins.is_unicode({}))
   self.assertFalse(builtins.is_unicode(object))
Пример #5
0
 def test_rejects_non_unicode(self):
     self.assertFalse(builtins.is_unicode(RANDOM_BYTES))
     self.assertFalse(builtins.is_unicode(constants.UTF8_BYTES))
     self.assertFalse(builtins.is_unicode(constants.UTF8_BYTES2))
     self.assertFalse(builtins.is_unicode(False))
     self.assertFalse(builtins.is_unicode(5))
     self.assertFalse(builtins.is_unicode(None))
     self.assertFalse(builtins.is_unicode([]))
     self.assertFalse(builtins.is_unicode(()))
     self.assertFalse(builtins.is_unicode({}))
     self.assertFalse(builtins.is_unicode(object))
Пример #6
0
  def test_does_not_encode_else_to_unicode(self):
    self.assertEqual(text.utf8_decode_if_bytes(constants.UNICODE_STRING), constants.UNICODE_STRING)
    self.assertTrue(builtins.is_unicode(text.utf8_decode_if_bytes(constants.UNICODE_STRING)))

    self.assertEqual(text.utf8_decode_if_bytes(constants.UNICODE_STRING2), constants.UNICODE_STRING2)
    self.assertTrue(builtins.is_unicode(text.utf8_decode_if_bytes(constants.UNICODE_STRING2)))

    self.assertEqual(text.utf8_decode_if_bytes(None), None)
    self.assertEqual(text.utf8_decode_if_bytes(False), False)
    self.assertEqual(text.utf8_decode_if_bytes(5), 5)
    self.assertEqual(text.utf8_decode_if_bytes([]), [])
    self.assertEqual(text.utf8_decode_if_bytes(()), ())
    self.assertEqual(text.utf8_decode_if_bytes({}), {})
    self.assertEqual(text.utf8_decode_if_bytes(object), object)
Пример #7
0
  def test_does_not_encode_else_to_unicode(self):
    self.assertEqual(utf8_decode_if_bytes(unicode_string), unicode_string)
    self.assertTrue(is_unicode(utf8_decode_if_bytes(unicode_string)))

    self.assertEqual(utf8_decode_if_bytes(unicode_string2), unicode_string2)
    self.assertTrue(is_unicode(utf8_decode_if_bytes(unicode_string2)))

    self.assertEqual(utf8_decode_if_bytes(None), None)
    self.assertEqual(utf8_decode_if_bytes(False), False)
    self.assertEqual(utf8_decode_if_bytes(5), 5)
    self.assertEqual(utf8_decode_if_bytes([]), [])
    self.assertEqual(utf8_decode_if_bytes(()), ())
    self.assertEqual(utf8_decode_if_bytes({}), {})
    self.assertEqual(utf8_decode_if_bytes(object), object)
Пример #8
0
def utf8_encode_if_unicode(obj):
  """
  UTF-8 encodes the object only if it is a Unicode string.

  :param obj:
      The value that will be UTF-8 encoded if it is a Unicode string.
  :returns:
      UTF-8 encoded bytes if the argument is a Unicode string; otherwise
      the value is returned unchanged.
  """
  return utf8_encode(obj) if builtins.is_unicode(obj) else obj
Пример #9
0
def utf8_encode_if_unicode(obj):
    """
  UTF-8 encodes the object only if it is a Unicode string.

  :param obj:
      The value that will be UTF-8 encoded if it is a Unicode string.
  :returns:
      UTF-8 encoded bytes if the argument is a Unicode string; otherwise
      the value is returned unchanged.
  """
    return utf8_encode(obj) if builtins.is_unicode(obj) else obj
Пример #10
0
def bytes_to_unicode(raw_bytes, encoding="utf-8"):
  """
  Converts bytes to a Unicode string decoding it according to the encoding
  specified.

  :param raw_bytes:
      If already a Unicode string or None, it is returned unchanged.
      Otherwise it must be a byte string.
  :param encoding:
      The encoding used to decode bytes. Defaults to UTF-8
  """
  if raw_bytes is None or builtins.is_unicode(raw_bytes):
    return raw_bytes
  if not builtins.is_bytes(raw_bytes):
    raise TypeError("unsupported argument type: %r" % type(raw_bytes).__name__)
  return raw_bytes.decode(encoding)
Пример #11
0
def bytes_to_unicode(raw_bytes, encoding="utf-8"):
    """
  Converts bytes to a Unicode string decoding it according to the encoding
  specified.

  :param raw_bytes:
      If already a Unicode string or None, it is returned unchanged.
      Otherwise it must be a byte string.
  :param encoding:
      The encoding used to decode bytes. Defaults to UTF-8
  """
    if raw_bytes is None or builtins.is_unicode(raw_bytes):
        return raw_bytes
    if not builtins.is_bytes(raw_bytes):
        raise TypeError("unsupported argument type: %r" %
                        type(raw_bytes).__name__)
    return raw_bytes.decode(encoding)
Пример #12
0
def utf8_encode(unicode_text):
  """
  UTF-8 encodes a Unicode string into bytes; bytes and None are left alone.

  Work with Unicode strings in your code and encode your Unicode strings into
  UTF-8 before they leave your system.

  :param unicode_text:
      If already a byte string or None, it is returned unchanged.
      Otherwise it must be a Unicode string and is encoded as UTF-8 bytes.
  :returns:
      UTF-8 encoded bytes.
  """
  if unicode_text is None or builtins.is_bytes(unicode_text):
    return unicode_text
  if not builtins.is_unicode(unicode_text):
    raise TypeError("unsupported argument type: %r" %
                    type(unicode_text).__name__)
  return unicode_text.encode("utf-8")
Пример #13
0
def utf8_encode(unicode_text):
    """
  UTF-8 encodes a Unicode string into bytes; bytes and None are left alone.

  Work with Unicode strings in your code and encode your Unicode strings into
  UTF-8 before they leave your system.

  :param unicode_text:
      If already a byte string or None, it is returned unchanged.
      Otherwise it must be a Unicode string and is encoded as UTF-8 bytes.
  :returns:
      UTF-8 encoded bytes.
  """
    if unicode_text is None or builtins.is_bytes(unicode_text):
        return unicode_text
    if not builtins.is_unicode(unicode_text):
        raise TypeError("unsupported argument type: %r" %
                        type(unicode_text).__name__)
    return unicode_text.encode("utf-8")
Пример #14
0
def utf8_encode_recursive(obj):
  """
  Walks a simple data structure, converting Unicode strings to UTF-8 encoded
  byte strings.

  Supports lists, tuples, and dictionaries.

  :param obj:
      The Python data structure to walk recursively looking for
      Unicode strings.
  :returns:
      obj with all the Unicode strings converted to byte strings.
  """
  if isinstance(obj, dict):
    return dict((utf8_encode_recursive(k),
                 utf8_encode_recursive(v)) for (k, v) in obj.items())
  elif isinstance(obj, list):
    return list(utf8_encode_recursive(i) for i in obj)
  elif isinstance(obj, tuple):
    return tuple(utf8_encode_recursive(i) for i in obj)
  elif builtins.is_unicode(obj):
    return utf8_encode(obj)
  else:
    return obj
Пример #15
0
def utf8_encode_recursive(obj):
    """
  Walks a simple data structure, converting Unicode strings to UTF-8 encoded
  byte strings.

  Supports lists, tuples, and dictionaries.

  :param obj:
      The Python data structure to walk recursively looking for
      Unicode strings.
  :returns:
      obj with all the Unicode strings converted to byte strings.
  """
    if isinstance(obj, dict):
        return dict((utf8_encode_recursive(k), utf8_encode_recursive(v))
                    for (k, v) in obj.items())
    elif isinstance(obj, list):
        return list(utf8_encode_recursive(i) for i in obj)
    elif isinstance(obj, tuple):
        return tuple(utf8_encode_recursive(i) for i in obj)
    elif builtins.is_unicode(obj):
        return utf8_encode(obj)
    else:
        return obj
Пример #16
0
  def test_encodes_bytes_to_unicode(self):
    self.assertEqual(text.utf8_decode_if_bytes(constants.UTF8_BYTES), constants.UNICODE_STRING)
    self.assertTrue(builtins.is_unicode(text.utf8_decode_if_bytes(constants.UTF8_BYTES)))

    self.assertEqual(text.utf8_decode_if_bytes(constants.UTF8_BYTES2), constants.UNICODE_STRING2)
    self.assertTrue(builtins.is_unicode(text.utf8_decode_if_bytes(constants.UTF8_BYTES2)))
Пример #17
0
  def test_converts_utf8_decode(self):
    self.assertEqual(text.utf8_decode(constants.UTF8_BYTES), constants.UNICODE_STRING)
    self.assertTrue(builtins.is_unicode(text.utf8_decode(constants.UTF8_BYTES)))

    self.assertEqual(text.utf8_decode(constants.UTF8_BYTES2), constants.UNICODE_STRING2)
    self.assertTrue(builtins.is_unicode(text.utf8_decode(constants.UTF8_BYTES2)))
Пример #18
0
 def test_accepts_unicode(self):
   self.assertTrue(is_unicode(unicode_string))
   self.assertTrue(is_unicode(unicode_string2))
Пример #19
0
 def test_accepts_unicode(self):
   self.assertTrue(builtins.is_unicode(constants.UNICODE_STRING))
   self.assertTrue(builtins.is_unicode(constants.UNICODE_STRING2))
Пример #20
0
  def test_encodes_bytes_to_unicode(self):
    self.assertEqual(utf8_decode_if_bytes(utf8_bytes), unicode_string)
    self.assertTrue(is_unicode(utf8_decode_if_bytes(utf8_bytes)))

    self.assertEqual(utf8_decode_if_bytes(utf8_bytes2), unicode_string2)
    self.assertTrue(is_unicode(utf8_decode_if_bytes(utf8_bytes2)))
Пример #21
0
  def test_converts_utf8_decode(self):
    self.assertEqual(utf8_decode(utf8_bytes), unicode_string)
    self.assertTrue(is_unicode(utf8_decode(utf8_bytes)))

    self.assertEqual(utf8_decode(utf8_bytes2), unicode_string2)
    self.assertTrue(is_unicode(utf8_decode(utf8_bytes2)))
Пример #22
0
 def test_accepts_unicode(self):
     self.assertTrue(builtins.is_unicode(constants.UNICODE_STRING))
     self.assertTrue(builtins.is_unicode(constants.UNICODE_STRING2))