def test_all_bmp_characters(): for index in xrange(0xa0, 0xfffd): char = unichr(index) # Exclude code points that are not assigned if unicodedata.category(char) not in ('Co', 'Cn'): garble = char.encode('utf-8').decode('latin-1') assert fix_bad_encoding(garble) == char
def test_all_bmp_characters(): for index in xrange(0xa0, 0xfffd): if not unicodedata.category(unichr(index)) == 'Co': garble = unichr(index).encode('utf-8').decode('latin-1') if index not in WINDOWS_1252_GREMLINS: assert fix_bad_encoding(garble) == unichr(index)
def check_phrase(text): assert fix_bad_encoding(text) == text, text