def test_lookup_error(): ''' ''' #sanity AssertError(LookupError, codecs.lookup_error, "blah garbage xyz") def garbage_error1(someError): pass codecs.register_error("blah garbage xyz", garbage_error1) AreEqual(codecs.lookup_error("blah garbage xyz"), garbage_error1) def garbage_error2(someError): pass codecs.register_error("some other", garbage_error2) AreEqual(codecs.lookup_error("some other"), garbage_error2)
def unicode_call_errorhandler(errors, encoding, reason, input, startinpos, endinpos, decode=True): import _codecs errorHandler = _codecs.lookup_error(errors) if decode: exceptionObject = UnicodeDecodeError(encoding, input, startinpos, endinpos, reason) else: exceptionObject = UnicodeEncodeError(encoding, input, startinpos, endinpos, reason) res = errorHandler(exceptionObject) if isinstance(res, tuple) and isinstance(res[0], unicode) and isinstance( res[1], int): newpos = res[1] if (newpos < 0): newpos = len(input) + newpos if newpos < 0 or newpos > len(input): raise IndexError("position %d from error handler out of bounds" % newpos) return res[0], newpos else: raise TypeError( "encoding error handler must return (unicode, int) tuple, not %s" % repr(res))
def test_surrogatepass_handler(self): import _codecs assert _codecs.lookup_error("surrogatepass") assert ("abc\ud800def".encode("utf-8", "surrogatepass") == b"abc\xed\xa0\x80def") assert (b"abc\xed\xa0\x80def".decode("utf-8", "surrogatepass") == "abc\ud800def") assert ('surrogate:\udcff'.encode("utf-8", "surrogatepass") == b'surrogate:\xed\xb3\xbf') assert (b'surrogate:\xed\xb3\xbf'.decode("utf-8", "surrogatepass") == 'surrogate:\udcff') raises(UnicodeDecodeError, b"abc\xed\xa0".decode, "utf-8", "surrogatepass") raises(UnicodeDecodeError, b"abc\xed\xa0z".decode, "utf-8", "surrogatepass")
def test_surrogatepass_handler(self): import _codecs assert _codecs.lookup_error("surrogatepass") assert ("abc\ud800def".encode( "utf-8", "surrogatepass") == b"abc\xed\xa0\x80def") assert (b"abc\xed\xa0\x80def".decode( "utf-8", "surrogatepass") == "abc\ud800def") assert ('surrogate:\udcff'.encode( "utf-8", "surrogatepass") == b'surrogate:\xed\xb3\xbf') assert (b'surrogate:\xed\xb3\xbf'.decode( "utf-8", "surrogatepass") == 'surrogate:\udcff') raises(UnicodeDecodeError, b"abc\xed\xa0".decode, "utf-8", "surrogatepass") raises(UnicodeDecodeError, b"abc\xed\xa0z".decode, "utf-8", "surrogatepass")
def test_surrogatepass_handler(self): import _codecs assert _codecs.lookup_error("surrogatepass") assert ("abc\ud800def".encode( "utf-8", "surrogatepass") == b"abc\xed\xa0\x80def") assert (b"abc\xed\xa0\x80def".decode( "utf-8", "surrogatepass") == "abc\ud800def") assert ('surrogate:\udcff'.encode( "utf-8", "surrogatepass") == b'surrogate:\xed\xb3\xbf') assert (b'surrogate:\xed\xb3\xbf'.decode( "utf-8", "surrogatepass") == 'surrogate:\udcff') raises(UnicodeDecodeError, b"abc\xed\xa0".decode, "utf-8", "surrogatepass") raises(UnicodeDecodeError, b"abc\xed\xa0z".decode, "utf-8", "surrogatepass") assert u'\ud8ae'.encode('utf_16_be', 'surrogatepass') == b'\xd8\xae' assert (u'\U0000d8ae'.encode('utf-32-be', 'surrogatepass') == b'\x00\x00\xd8\xae')
def unicode_call_errorhandler(errors, encoding, reason, input, startinpos, endinpos, decode=True): import _codecs errorHandler = _codecs.lookup_error(errors) if decode: exceptionObject = UnicodeDecodeError(encoding, input, startinpos, endinpos, reason) else: exceptionObject = UnicodeEncodeError(encoding, input, startinpos, endinpos, reason) res = errorHandler(exceptionObject) if isinstance(res, tuple) and len(res) == 2 and isinstance(res[0], unicode) and isinstance(res[1], int): newpos = res[1] if (newpos < 0): newpos = len(input) + newpos if newpos < 0 or newpos > len(input): raise IndexError( "position %d from error handler out of bounds" % newpos) return res[0], newpos else: raise TypeError("encoding error handler must return (unicode, int) tuple, not %s" % repr(res))
multiple character to \\u001a. """ m = {} for k, v in decoding_map.items(): if not v in m: m[v] = k else: m[v] = None return m ### error handlers try: strict_errors = lookup_error("strict") ignore_errors = lookup_error("ignore") replace_errors = lookup_error("replace") xmlcharrefreplace_errors = lookup_error("xmlcharrefreplace") backslashreplace_errors = lookup_error("backslashreplace") namereplace_errors = lookup_error("namereplace") except LookupError: # In --disable-unicode builds, these error handler are missing strict_errors = None ignore_errors = None replace_errors = None xmlcharrefreplace_errors = None backslashreplace_errors = None namereplace_errors = None # Tell modulefinder that using codecs probably needs the encodings