def test_bom(self): # cjk water character in unicode water_unicode = u'\u6C34' # BOM + water character encoded utf16be = '\xfe\xff\x6c\x34' utf16le = '\xff\xfe\x34\x6c' utf32be = '\x00\x00\xfe\xff\x00\x00\x6c\x34' utf32le = '\xff\xfe\x00\x00\x34\x6c\x00\x00' for string in (utf16be, utf16le, utf32be, utf32le): bom_encoding, bom = read_bom(string) decoded = string[len(bom):].decode(bom_encoding) self.assertEqual(water_unicode, decoded) # Body without BOM enc, bom = read_bom("foo") self.assertEqual(enc, None) self.assertEqual(bom, None) # Empty body enc, bom = read_bom("") self.assertEqual(enc, None) self.assertEqual(bom, None)
def test_bom(self): # cjk water character in unicode water_unicode = u'\u6C34' # BOM + water character encoded utf16be = b'\xfe\xff\x6c\x34' utf16le = b'\xff\xfe\x34\x6c' utf32be = b'\x00\x00\xfe\xff\x00\x00\x6c\x34' utf32le = b'\xff\xfe\x00\x00\x34\x6c\x00\x00' for string in (utf16be, utf16le, utf32be, utf32le): bom_encoding, bom = read_bom(string) decoded = string[len(bom):].decode(bom_encoding) self.assertEqual(water_unicode, decoded) # Body without BOM enc, bom = read_bom("foo") self.assertEqual(enc, None) self.assertEqual(bom, None) # Empty body enc, bom = read_bom("") self.assertEqual(enc, None) self.assertEqual(bom, None)
def test_bom(self): # cjk water character in unicode water_unicode = "\u6C34" # BOM + water character encoded utf16be = b"\xfe\xff\x6c\x34" utf16le = b"\xff\xfe\x34\x6c" utf32be = b"\x00\x00\xfe\xff\x00\x00\x6c\x34" utf32le = b"\xff\xfe\x00\x00\x34\x6c\x00\x00" for string in (utf16be, utf16le, utf32be, utf32le): bom_encoding, bom = read_bom(string) assert bom_encoding is not None assert bom is not None decoded = string[len(bom):].decode(bom_encoding) self.assertEqual(water_unicode, decoded) # Body without BOM enc, bom = read_bom(b"foo") self.assertEqual(enc, None) self.assertEqual(bom, None) # Empty body enc, bom = read_bom(b"") self.assertEqual(enc, None) self.assertEqual(bom, None)