def test_callbacks(self): def handler1(exc): r = range(exc.start, exc.end) if isinstance(exc, UnicodeEncodeError): l = ["<%d>" % ord(exc.object[pos]) for pos in r] elif isinstance(exc, UnicodeDecodeError): l = ["<%d>" % exc.object[pos] for pos in r] else: raise TypeError("don't know how to handle %r" % exc) return ("[%s]" % "".join(l), exc.end) codecs.register_error("test.handler1", handler1) def handler2(exc): if not isinstance(exc, UnicodeDecodeError): raise TypeError("don't know how to handle %r" % exc) l = ["<%d>" % exc.object[pos] for pos in range(exc.start, exc.end)] return ("[%s]" % "".join(l), exc.end + 1) # skip one character codecs.register_error("test.handler2", handler2) s = b"\x00\x81\x7f\x80\xff" self.assertEqual(s.decode("ascii", "test.handler1"), "\x00[<129>]\x7f[<128>][<255>]") self.assertEqual(s.decode("ascii", "test.handler2"), "\x00[<129>][<128>]") self.assertEqual(b"\\u3042\u3xxx".decode("unicode-escape", "test.handler1"), "\u3042[<92><117><51><120>]xx") self.assertEqual(b"\\u3042\u3xx".decode("unicode-escape", "test.handler1"), "\u3042[<92><117><51><120><120>]") self.assertEqual(codecs.charmap_decode(b"abc", "test.handler1", {ord("a"): "z"})[0], "z[<98>][<99>]") self.assertEqual("g\xfc\xdfrk".encode("ascii", "test.handler1"), b"g[<252><223>]rk") self.assertEqual("g\xfc\xdf".encode("ascii", "test.handler1"), b"g[<252><223>]")
def test_badhandlerresults(self): if test_support.due_to_ironpython_bug("http://tkbgitvstfat01:8080/WorkItemTracking/WorkItem.aspx?artifactMoniker=304331"): return results = ( 42, u"foo", (1,2,3), (u"foo", 1, 3), (u"foo", None), (u"foo",), ("foo", 1, 3), ("foo", None), ("foo",) ) encs = ("ascii", "latin-1", "iso-8859-1", "iso-8859-15") for res in results: codecs.register_error("test.badhandler", lambda x: res) for enc in encs: self.assertRaises( TypeError, u"\u3042".encode, enc, "test.badhandler" ) for (enc, bytes) in ( ("ascii", "\xff"), ("utf-8", "\xff"), ("utf-7", "+x-"), ("unicode-internal", "\x00"), ): self.assertRaises( TypeError, bytes.decode, enc, "test.badhandler" )
def register_codec(): class Codec(codecs.Codec): def decode(self, input, errors='strict'): return codecs.charmap_decode(input, errors, decoding_map) def encode(self, input, errors='strict'): return codecs.charmap_encode(input, errors, encoding_map) class StreamWriter(Codec, codecs.StreamWriter): pass class StreamReader(Codec, codecs.StreamReader): pass def getregentry(encoding): if encoding != 'latscii': return None return (Codec().encode, Codec().decode, StreamReader, StreamWriter) codecs.register(getregentry) def latscii_error(uerr): key = ord(uerr.object[uerr.start:uerr.end]) try: return unichr(decoding_map[key]), uerr.end except KeyError: handler = codecs.lookup_error('replace') return handler(uerr) codecs.register_error('replacelatscii', latscii_error)
def test_decodeunicodeinternal(self): if test_support.due_to_ironpython_bug("http://www.codeplex.com/IronPython/WorkItem/View.aspx?WorkItemId=15506"): return self.assertRaises( UnicodeDecodeError, "\x00\x00\x00\x00\x00".decode, "unicode-internal", ) if sys.maxunicode > 0xffff: def handler_unicodeinternal(exc): if not isinstance(exc, UnicodeDecodeError): raise TypeError("don't know how to handle %r" % exc) return (u"\x01", 1) self.assertEqual( "\x00\x00\x00\x00\x00".decode("unicode-internal", "ignore"), u"\u0000" ) self.assertEqual( "\x00\x00\x00\x00\x00".decode("unicode-internal", "replace"), u"\u0000\ufffd" ) codecs.register_error("test.hui", handler_unicodeinternal) self.assertEqual( "\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui"), u"\u0000\u0001\u0000" )
def test_decoding_callbacks(self): if test_support.due_to_ironpython_bug("http://www.codeplex.com/IronPython/WorkItem/View.aspx?WorkItemId=15544"): return # This is a test for a decoding callback handler # that allows the decoding of the invalid sequence # "\xc0\x80" and returns "\x00" instead of raising an error. # All other illegal sequences will be handled strictly. def relaxedutf8(exc): if not isinstance(exc, UnicodeDecodeError): raise TypeError("don't know how to handle %r" % exc) if exc.object[exc.start:exc.start+2] == "\xc0\x80": return (u"\x00", exc.start+2) # retry after two bytes else: raise exc codecs.register_error("test.relaxedutf8", relaxedutf8) # all the "\xc0\x80" will be decoded to "\x00" sin = "a\x00b\xc0\x80c\xc3\xbc\xc0\x80\xc0\x80" sout = u"a\x00b\x00c\xfc\x00\x00" self.assertEqual(sin.decode("utf-8", "test.relaxedutf8"), sout) # "\xc0\x81" is not valid and a UnicodeDecodeError will be raised sin = "\xc0\x80\xc0\x81" self.assertRaises(UnicodeDecodeError, sin.decode, "utf-8", "test.relaxedutf8")
def chapter1_23(): data = u'''\ <html> <head> <title>Encoding Test</title> </head> <body> <p>accented characters: <ul> <li>\xe0 (a + grave) <li>\xe7 (c + cedilla) <li>\xe9 (e + acute) </ul> <p>symbols: <ul> <li>\xa3 (British pound) <li>\u20ac (Euro) <li>\u221e (infinity) </ul> </body> </html>''' print encode_for_xml(data) codecs.register_error('html_replace', html_replace) print encode_for_html(data) pass
def get_template(template_name, dirs=_dirs_undefined): """ Returns a compiled Template object for the given template name, handling template inheritance recursively. """ # Implementation Note: # If we do this earlier (i.e. when the module is imported), there # is a chance our hook gets overwritten somewhere depending on the # order in which the modules are imported. loader.get_template_from_string = get_template_from_string loader.make_origin = make_origin def fake_strict_errors(exception): #pylint: disable=unused-argument return ("", -1) if template_name.endswith('.pdf'): # HACK: Ignore UnicodeError, due to PDF file read codecs.register_error('strict', fake_strict_errors) if dirs is _dirs_undefined: template = loader.get_template(template_name) else: if django.VERSION[0] >= 1 and django.VERSION[1] >= 8: warnings.warn( "The dirs argument of get_template is deprecated.", RemovedInDjango110Warning, stacklevel=2) #pylint:disable=unexpected-keyword-arg template = loader.get_template(template_name, dirs=dirs) if template_name.endswith('.pdf'): # HACK: Ignore UnicodeError, due to PDF file read codecs.register_error('strict', codecs.strict_errors) return template
def test_xmlcharnamereplace(self): # This time use a named character entity for unencodable # characters, if one is available. def xmlcharnamereplace(exc): if not isinstance(exc, UnicodeEncodeError): raise TypeError("don't know how to handle %r" % exc) l = [] for c in exc.object[exc.start:exc.end]: try: l.append(u"&%s;" % htmlentitydefs.codepoint2name[ord(c)]) except KeyError: l.append(u"&#%d;" % ord(c)) return (u"".join(l), exc.end) codecs.register_error( "test.xmlcharnamereplace", xmlcharnamereplace) sin = u"\xab\u211c\xbb = \u2329\u1234\u20ac\u232a" sout = "«ℜ» = ⟨ሴ€⟩" self.assertEqual(sin.encode("ascii", "test.xmlcharnamereplace"), sout) sout = "\xabℜ\xbb = ⟨ሴ€⟩" self.assertEqual(sin.encode("latin-1", "test.xmlcharnamereplace"), sout) sout = "\xabℜ\xbb = ⟨ሴ\xa4⟩" self.assertEqual(sin.encode("iso-8859-15", "test.xmlcharnamereplace"), sout)
def read_doc(app, env, filename): # type: (Sphinx, BuildEnvironment, str) -> nodes.document """Parse a document and convert to doctree.""" # set up error_handler for the target document error_handler = UnicodeDecodeErrorHandler(env.docname) codecs.register_error('sphinx', error_handler) # type: ignore filetype = get_filetype(app.config.source_suffix, filename) input_class = app.registry.get_source_input(filetype) reader = SphinxStandaloneReader(app) source = input_class(app, env, source=None, source_path=filename, # type: ignore encoding=env.config.source_encoding) parser = app.registry.create_source_parser(app, filetype) if parser.__class__.__name__ == 'CommonMarkParser' and parser.settings_spec == (): # a workaround for recommonmark # If recommonmark.AutoStrictify is enabled, the parser invokes reST parser # internally. But recommonmark-0.4.0 does not provide settings_spec for reST # parser. As a workaround, this copies settings_spec for RSTParser to the # CommonMarkParser. parser.settings_spec = RSTParser.settings_spec pub = Publisher(reader=reader, # type: ignore parser=parser, writer=SphinxDummyWriter(), source_class=SphinxDummySourceClass, destination=NullOutput()) pub.process_programmatic_settings(None, env.settings, None) pub.set_source(source, filename) pub.publish() return pub.document
def test_mutatingdecodehandler(self): baddata = [ ("ascii", b"\xff"), ("utf-7", b"++"), ("utf-8", b"\xff"), ("utf-16", b"\xff"), ("utf-32", b"\xff"), ("unicode-escape", b"\\u123g"), ("raw-unicode-escape", b"\\u123g"), ("unicode-internal", b"\xff"), ] def replacing(exc): if isinstance(exc, UnicodeDecodeError): exc.object = 42 return ("\u4242", 0) else: raise TypeError("don't know how to handle %r" % exc) codecs.register_error("test.replacing", replacing) for (encoding, data) in baddata: self.assertRaises(TypeError, data.decode, encoding, "test.replacing") def mutating(exc): if isinstance(exc, UnicodeDecodeError): exc.object[:] = b"" return ("\u4242", 0) else: raise TypeError("don't know how to handle %r" % exc) codecs.register_error("test.mutating", mutating) # If the decoder doesn't pick up the modified input the following # will lead to an endless loop for (encoding, data) in baddata: self.assertRaises(TypeError, data.decode, encoding, "test.replacing")
def test_decodeunicodeinternal(self): self.assertRaises( UnicodeDecodeError, "\x00\x00\x00\x00\x00".decode, "unicode-internal", ) if sys.maxunicode > 0xffff: def handler_unicodeinternal(exc): if not isinstance(exc, UnicodeDecodeError): raise TypeError("don't know how to handle %r" % exc) return (u"\x01", 1) self.assertEqual( "\x00\x00\x00\x00\x00".decode("unicode-internal", "ignore"), u"\u0000" ) self.assertEqual( "\x00\x00\x00\x00\x00".decode("unicode-internal", "replace"), u"\u0000\ufffd" ) codecs.register_error("test.hui", handler_unicodeinternal) self.assertEqual( "\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui"), u"\u0000\u0001\u0000" )
def test_uninamereplace(self): # We're using the names from the unicode database this time, # and we're doing "syntax highlighting" here, i.e. we include # the replaced text in ANSI escape sequences. For this it is # useful that the error handler is not called for every single # unencodable character, but for a complete sequence of # unencodable characters, otherwise we would output many # unneccessary escape sequences. def uninamereplace(exc): if not isinstance(exc, UnicodeEncodeError): raise TypeError("don't know how to handle %r" % exc) l = [] for c in exc.object[exc.start:exc.end]: l.append(unicodedata.name(c, u"0x%x" % ord(c))) return (u"\033[1m%s\033[0m" % u", ".join(l), exc.end) codecs.register_error( "test.uninamereplace", uninamereplace) sin = u"\xac\u1234\u20ac\u8000" sout = "\033[1mNOT SIGN, ETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m" self.assertEqual(sin.encode("ascii", "test.uninamereplace"), sout) sout = "\xac\033[1mETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m" self.assertEqual(sin.encode("latin-1", "test.uninamereplace"), sout) sout = "\xac\033[1mETHIOPIC SYLLABLE SEE\033[0m\xa4\033[1mCJK UNIFIED IDEOGRAPH-8000\033[0m" self.assertEqual(sin.encode("iso-8859-15", "test.uninamereplace"), sout)
def test_badhandlerresults(self): results = ( 42, u"foo", (1,2,3), (u"foo", 1, 3), (u"foo", None), (u"foo",), ("foo", 1, 3), ("foo", None), ("foo",) ) encs = ("ascii", "latin-1", "iso-8859-1", "iso-8859-15") for res in results: codecs.register_error("test.badhandler", lambda x: res) for enc in encs: self.assertRaises( TypeError, u"\u3042".encode, enc, "test.badhandler" ) for (enc, bytes) in ( ("ascii", "\xff"), ("utf-8", "\xff"), ("utf-7", "+x-"), # ("unicode-internal", "\x00"), - not valid for Jython because PyUnicode/PyString share internal representation ): self.assertRaises( TypeError, bytes.decode, enc, "test.badhandler" )
def test_customreplace_encode(self): if self.has_iso10646: self.skipTest('encoding contains full ISO 10646 map') from html.entities import codepoint2name def xmlcharnamereplace(exc): if not isinstance(exc, UnicodeEncodeError): raise TypeError("don't know how to handle %r" % exc) l = [] for c in exc.object[exc.start:exc.end]: if ord(c) in codepoint2name: l.append("&%s;" % codepoint2name[ord(c)]) else: l.append("&#%d;" % ord(c)) return ("".join(l), exc.end) codecs.register_error("test.xmlcharnamereplace", xmlcharnamereplace) if self.xmlcharnametest: sin, sout = self.xmlcharnametest else: sin = "\xab\u211c\xbb = \u2329\u1234\u232a" sout = b"«ℜ» = ⟨ሴ⟩" self.assertEqual(self.encode(sin, "test.xmlcharnamereplace")[0], sout)
def test_badhandlerresults(self): results = ( 42, "foo", (1,2,3), ("foo", 1, 3), ("foo", None), ("foo",), ("foo", 1, 3), ("foo", None), ("foo",) ) encs = ("ascii", "latin-1", "iso-8859-1", "iso-8859-15") for res in results: codecs.register_error("test.badhandler", lambda x: res) for enc in encs: self.assertRaises( TypeError, "\u3042".encode, enc, "test.badhandler" ) for (enc, bytes) in ( ("ascii", b"\xff"), ("utf-8", b"\xff"), ("utf-7", b"+x-"), ("unicode-internal", b"\x00"), ): with test.support.check_warnings(): # unicode-internal has been deprecated self.assertRaises( TypeError, bytes.decode, enc, "test.badhandler" )
def __init__(self, log, encoding = 'utf-8', replacement = u'\ufffd'): self.log = log self.encoding = encoding self.replacement = replacement self.columns = None self.error_count = 0 codecs.register_error("error_handler", self._error_handler)
def test_customreplace(self): if self.has_iso10646: return import htmlentitydefs names = {} for (key, value) in htmlentitydefs.entitydefs.items(): if len(value) == 1: names[value.decode("latin-1")] = self.decode(key)[0] else: names[unichr(int(value[2:-1]))] = self.decode(key)[0] def xmlcharnamereplace(exc): if not isinstance(exc, UnicodeEncodeError): raise TypeError("don't know how to handle %r" % exc) l = [] for c in exc.object[exc.start : exc.end]: try: l.append(u"&%s;" % names[c]) except KeyError: l.append(u"&#%d;" % ord(c)) return (u"".join(l), exc.end) codecs.register_error("test.xmlcharnamereplace", xmlcharnamereplace) if self.xmlcharnametest: sin, sout = self.xmlcharnametest else: sin = u"\xab\u211c\xbb = \u2329\u1234\u232a" sout = "«ℜ» = ⟨ሴ⟩" self.assertEqual(self.encode(sin, "test.xmlcharnamereplace")[0], sout)
def test_decodeunicodeinternal(self): with test.support.check_warnings(('unicode_internal codec has been ' 'deprecated', DeprecationWarning)): self.assertRaises( UnicodeDecodeError, b"\x00\x00\x00\x00\x00".decode, "unicode-internal", ) if SIZEOF_WCHAR_T == 4: def handler_unicodeinternal(exc): if not isinstance(exc, UnicodeDecodeError): raise TypeError("don't know how to handle %r" % exc) return ("\x01", 1) with test.support.check_warnings(('unicode_internal codec has been ' 'deprecated', DeprecationWarning)): self.assertEqual( b"\x00\x00\x00\x00\x00".decode("unicode-internal", "ignore"), "\u0000" ) self.assertEqual( b"\x00\x00\x00\x00\x00".decode("unicode-internal", "replace"), "\u0000\ufffd" ) codecs.register_error("test.hui", handler_unicodeinternal) self.assertEqual( b"\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui"), "\u0000\u0001\u0000" )
def test_badhandler_longindex(self): import codecs import sys errors = 'test.badhandler_longindex' codecs.register_error(errors, lambda x: ('', sys.maxsize + 1)) # CPython raises OverflowError here raises((IndexError, OverflowError), b'apple\x92ham\x93spam'.decode, 'utf-8', errors)
def test_unicode_internal(self): import codecs import sys try: '\x00'.decode('unicode-internal') except UnicodeDecodeError: pass else: raise Exception("DID NOT RAISE") res = "\x00\x00\x00\x00\x00".decode("unicode-internal", "replace") if sys.maxunicode > 65535: assert res == u"\u0000\ufffd" # UCS4 build else: assert res == u"\x00\x00\ufffd" # UCS2 build res = "\x00\x00\x00\x00\x00".decode("unicode-internal", "ignore") if sys.maxunicode > 65535: assert res == u"\u0000" # UCS4 build else: assert res == u"\x00\x00" # UCS2 build def handler_unicodeinternal(exc): if not isinstance(exc, UnicodeDecodeError): raise TypeError("don't know how to handle %r" % exc) return (u"\x01", 1) codecs.register_error("test.hui", handler_unicodeinternal) res = "\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui") if sys.maxunicode > 65535: assert res == u"\u0000\u0001\u0000" # UCS4 build else: assert res == u"\x00\x00\x01\x00\x00" # UCS2 build
def decode_file(filename, codepage=None): """Read 'filename', strip the BOM if present, strip any leading or trailing whitespace, return a list of Python strings. In order to read the file, we need to know the file's encoding (i.e. how the writer represented the characters contained therein-- ASCII, UTF-8, or whatever). The caller can specify the file encoding explicitly, or set 'codepage' to None to have this function try to deduce the file encoding. TODO: Document in more detail exactly what is returned-- i.e. what exactly does readlines return when the file is opened with an 'encoding' parameter? """ codecs.register_error('fb_cp1252', handle_decode_err_by_fb_cp1252) ext = (os.path.splitext(filename))[1]; if codepage: incp = codepage errs = 'strict' elif '.m3u8' == ext: incp = 'utf_8' errs = 'fb_cp1252' else: incp = 'cp1252' errs = 'strict' with open(filename, 'r', -1, incp, errs) as fh: lines = fh.readlines(); lines[0] = maybe_remove_bom(lines[0]) return lines
def initbotscharsets(): '''set up right charset handling for specific charsets (UNOA, UNOB, UNOC, etc).''' codecs.register(codec_search_function) #tell python how to search a codec defined by bots. These are the codecs in usersys/charset botsglobal.botsreplacechar = unicode(botsglobal.ini.get('settings','botsreplacechar',u' ')) codecs.register_error('botsreplace', botscharsetreplace) #define the ' botsreplace' error handling for codecs/charsets. for key, value in botsglobal.ini.items('charsets'): #set aliases for charsets in bots.ini encodings.aliases.aliases[key]=value
def test_encode_custom_error_handler_type(self): import codecs import sys codecs.register_error("test.test_encode_custom_error_handler_type", lambda e: ('\xc3', e.end)) raises(TypeError, u"\uDDA1".encode, "gbk", "test.test_encode_custom_error_handler_type")
def test_decode_custom_error_handler_overflow(self): import codecs import sys codecs.register_error("test.test_decode_custom_error_handler_overflow", lambda e: (u'', sys.maxint + 1)) raises((IndexError, OverflowError), "abc\xDD".decode, "hz", "test.test_decode_custom_error_handler_overflow")
def conv2ASCII(bigstring): def convHandler(error): return ('1FOREIGN', error.start + 1) codecs.register_error('foreign', convHandler) bigstring = bigstring.encode('ascii', 'foreign') newstring = bigstring.decode('ascii', 'foreign') return newstring
def register_strwidth_error(strwidth): '''Create new encode errors handling method similar to ``replace`` Like ``replace`` this method uses question marks in place of the characters that cannot be represented in the requested encoding. Unlike ``replace`` the amount of question marks is identical to the amount of display cells offending character occupies. Thus encoding ``…`` (U+2026, HORIZONTAL ELLIPSIS) to ``latin1`` will emit one question mark, but encoding ``A`` (U+FF21, FULLWIDTH LATIN CAPITAL LETTER A) will emit two question marks. Since width of some characters depends on the terminal settings and powerline knows how to respect them a single error handling method cannot be used. Instead of it the generator function is used which takes ``strwidth`` function (function that knows how to compute string width respecting all needed settings) and emits new error handling method name. :param function strwidth: Function that computs string width measured in display cells the string occupies when displayed. :return: New error handling method name. ''' global last_swe_idx last_swe_idx += 1 def powerline_encode_strwidth_error(e): if not isinstance(e, UnicodeEncodeError): raise NotImplementedError return ('?' * strwidth(e.object[e.start:e.end]), e.end) ename = 'powerline_encode_strwidth_error_{0}'.format(last_swe_idx) codecs.register_error(ename, powerline_encode_strwidth_error) return ename
def get_sanitized_filename(self): """Create a sanatized version of the filename. :return: Portable and secure version of filename. """ codecs.register_error("replace_", self._replace_under_error_handler) ascii_strip_re = re.compile(r'[^A-Za-z0-9_.-]') windows_device_files = ('CON', 'AUX', 'COM1', 'COM2', 'COM3', 'COM4', 'LPT1', 'LPT2', 'LPT3', 'PRN', 'NUL') if isinstance(self.filename, unicode): from unicodedata import normalize filename = normalize('NFKD', self.filename).encode('ascii', 'replace_') for sep in os.path.sep, os.path.altsep: if sep: filename = self.filename.replace(sep, ' ') filename = str(ascii_strip_re.sub('_', '_'.join( filename.split()))).strip('._') if os.name == 'nt' and filename and\ filename.split('.')[0].upper() in windows_device_files: filename = '_' + filename return filename
def test_bmp(self): codecs.register_error('cssypy', stringutil.css_unicode_error_handler) Writer = codecs.getwriter('ascii') stream = StringIO() writer = Writer(stream, errors='cssypy') writer.write(u'ab\uABCDcd') self.assertEqual('ab\\00ABCDcd', stream.getvalue())
def test_callback_returns_bytes(self): def myreplace(exc): return (b"1234", exc.end) codecs.register_error("test.cjktest", myreplace) enc = self.encode("abc" + self.unmappedunicode + "def", "test.cjktest")[0] self.assertEqual(enc, b"abc1234def")
def test_badhandlerresults(self): results = ( 42, u"foo", (1,2,3), (u"foo", 1, 3), (u"foo", None), (u"foo",), ("foo", 1, 3), ("foo", None), ("foo",) ) encs = ("ascii", "latin-1", "iso-8859-1", "iso-8859-15") for res in results: codecs.register_error("test.badhandler", lambda: res) for enc in encs: self.assertRaises( TypeError, u"\u3042".encode, enc, "test.badhandler" ) for (enc, bytes) in ( ("ascii", "\xff"), ("utf-8", "\xff"), ("utf-7", "+x-"), ("unicode-internal", "\x00"), ): self.assertRaises( TypeError, bytes.decode, enc, "test.badhandler" )
from importlib.util import cache_from_source if PY2: # In Python 2.7, backslashreplace exists # but does not support use for decoding. # We implement our own replace handler for this # situation, so that we can consistently use # backslash replacement for all versions. def backslashreplace_decode_fn(err): raw_bytes = (err.object[i] for i in range(err.start, err.end)) # Python 2 gave us characters - convert to numeric bytes raw_bytes = (ord(b) for b in raw_bytes) return u"".join(map(u"\\x{:x}".format, raw_bytes)), err.end codecs.register_error( "backslashreplace_decode", backslashreplace_decode_fn, ) backslashreplace_decode = "backslashreplace_decode" else: backslashreplace_decode = "backslashreplace" def has_tls(): # type: () -> bool try: import _ssl # noqa: F401 # ignore unused return True except ImportError: pass from pip._vendor.urllib3.util import IS_PYOPENSSL
def get_temp_dir(): return temp_dir def mixed_decoder(unicode_error): err_str = unicode_error[1] err_len = unicode_error.end - unicode_error.start next_position = unicode_error.start + err_len replacement = err_str[unicode_error.start:unicode_error.end].decode( 'cp1252') return u'%s' % replacement, next_position codecs.register_error('mixed', mixed_decoder) def json_request(method, params=None, host='localhost', port=8080, username=None, password=None): # e.g. KodiJRPC_Get("PVR.GetProperties", {"properties": ["recording"]}) url = 'http://{}:{}/jsonrpc'.format(host, port) header = {'Content-Type': 'application/json'} jsondata = {'jsonrpc': '2.0', 'method': method, 'id': method}
#!/usr/bin/env python3 import os import sys import argparse import json from hexdump import hexdump import codecs codecs.register_error("strict", codecs.backslashreplace_errors) from cereal import log import cereal.messaging as messaging from cereal.services import service_list if __name__ == "__main__": parser = argparse.ArgumentParser( description= 'Dump communication sockets. See cereal/services.py for a complete list of available sockets.' ) parser.add_argument('--pipe', action='store_true') parser.add_argument('--raw', action='store_true') parser.add_argument('--json', action='store_true') parser.add_argument('--dump-json', action='store_true') parser.add_argument('--no-print', action='store_true') parser.add_argument('--addr', default='127.0.0.1') parser.add_argument('--values', help='values to monitor (instead of entire event)') parser.add_argument( "socket", type=str, nargs='*',
self.log('TRIAL FINISHED') self.flush_log() """ self.experiment._log.flush() os.fsync(self.experiment._log) def osreplace(exc): """ desc: A replacement function to allow opensame-style replacement of unicode characters. arguments: exc: type: UnicodeEncodeError returns: desc: A (replacement, end) tuple. type: tuple """ _s = u'' for ch in exc.object[exc.start:exc.end]: _s += u'U+%.4X' % ord(ch) return _s, exc.end codecs.register_error(u'osreplace', osreplace)
from collections import namedtuple, OrderedDict import six from six.moves.urllib.parse import (urljoin, urlsplit, urlunsplit, urldefrag, urlencode, urlparse, quote, parse_qs, parse_qsl, ParseResult, unquote, urlunparse) from six.moves.urllib.request import pathname2url, url2pathname from w3lib.util import to_bytes, to_native_str, to_unicode # error handling function for bytes-to-Unicode decoding errors with URLs def _quote_byte(error): return (to_unicode(quote(error.object[error.start:error.end])), error.end) codecs.register_error('percentencode', _quote_byte) # constants from RFC 3986, Section 2.2 and 2.3 RFC3986_GEN_DELIMS = b':/?#[]@' RFC3986_SUB_DELIMS = b"!$&'()*+,;=" RFC3986_RESERVED = RFC3986_GEN_DELIMS + RFC3986_SUB_DELIMS RFC3986_UNRESERVED = (string.ascii_letters + string.digits + "-._~").encode('ascii') EXTRA_SAFE_CHARS = b'|' # see https://github.com/scrapy/w3lib/pull/25 _safe_chars = RFC3986_RESERVED + RFC3986_UNRESERVED + EXTRA_SAFE_CHARS + b'%' def safe_url_string(url, encoding='utf8', path_encoding='utf8'): """Convert the given URL into a legal URL by escaping unsafe characters according to RFC-3986.
#!/usr/bin/env python3 # Command line interface to compile ElasticSearch queries from EQUEL expressions and perform queries against an ES instance. import argparse # change default decoding error behaviour to less strict 'replace', globally import codecs codecs.register_error('strict', codecs.replace_errors) from elasticsearch import Elasticsearch from equel.engine import EQUELEngine, EQUELTimeRange import arrow import json import sys argparser = argparse.ArgumentParser(description="EQUEL Command Line Interface") argparser.add_argument("--server", "-s", action="append", default="localhost", help="ElasticSearch server") argparser.add_argument("--index", "-i", default="*", help="ElasticSearch index pattern to query") argparser.add_argument("--max-results", "-m", type=int, default=1000, help="Maximum returned documents") argparser.add_argument("--timeout",
def test_decodehelper(self): # enhance coverage of: # Objects/unicodeobject.c::unicode_decode_call_errorhandler() # and callers self.assertRaises(LookupError, b"\xff".decode, "ascii", "test.unknown") def baddecodereturn1(exc): return 42 codecs.register_error("test.baddecodereturn1", baddecodereturn1) self.assertRaises(TypeError, b"\xff".decode, "ascii", "test.baddecodereturn1") self.assertRaises(TypeError, b"\\".decode, "unicode-escape", "test.baddecodereturn1") self.assertRaises(TypeError, b"\\x0".decode, "unicode-escape", "test.baddecodereturn1") self.assertRaises(TypeError, b"\\x0y".decode, "unicode-escape", "test.baddecodereturn1") self.assertRaises(TypeError, b"\\Uffffeeee".decode, "unicode-escape", "test.baddecodereturn1") self.assertRaises(TypeError, b"\\uyyyy".decode, "raw-unicode-escape", "test.baddecodereturn1") def baddecodereturn2(exc): return ("?", None) codecs.register_error("test.baddecodereturn2", baddecodereturn2) self.assertRaises(TypeError, b"\xff".decode, "ascii", "test.baddecodereturn2") handler = PosReturn() codecs.register_error("test.posreturn", handler.handle) # Valid negative position handler.pos = -1 self.assertEqual(b"\xff0".decode("ascii", "test.posreturn"), "<?>0") # Valid negative position handler.pos = -2 self.assertEqual(b"\xff0".decode("ascii", "test.posreturn"), "<?><?>") # Negative position out of bounds handler.pos = -3 self.assertRaises(IndexError, b"\xff0".decode, "ascii", "test.posreturn") # Valid positive position handler.pos = 1 self.assertEqual(b"\xff0".decode("ascii", "test.posreturn"), "<?>0") # Largest valid positive position (one beyond end of input) handler.pos = 2 self.assertEqual(b"\xff0".decode("ascii", "test.posreturn"), "<?>") # Invalid positive position handler.pos = 3 self.assertRaises(IndexError, b"\xff0".decode, "ascii", "test.posreturn") # Restart at the "0" handler.pos = 6 self.assertEqual( b"\\uyyyy0".decode("raw-unicode-escape", "test.posreturn"), "<?>0") class D(dict): def __getitem__(self, key): raise ValueError self.assertRaises(UnicodeError, codecs.charmap_decode, b"\xff", "strict", {0xff: None}) self.assertRaises(ValueError, codecs.charmap_decode, b"\xff", "strict", D()) self.assertRaises(TypeError, codecs.charmap_decode, b"\xff", "strict", {0xff: sys.maxunicode + 1})
return vim_bufname(segment_info['bufnr']) else: return name.encode(segment_info['encoding']) if name else None vim_strtrans = vim_get_func('strtrans', rettype='unicode') def powerline_vim_strtrans_error(e): if not isinstance(e, UnicodeDecodeError): raise NotImplementedError text = vim_strtrans(e.object[e.start:e.end]) return (text, e.end) codecs.register_error('powerline_vim_strtrans_error', powerline_vim_strtrans_error) did_autocmd = False buffer_caches = [] def register_buffer_cache(cachedict): global did_autocmd global buffer_caches from powerline.vim import get_default_pycmd, pycmd if not did_autocmd: import __main__ __main__.powerline_on_bwipe = on_bwipe vim.command('augroup Powerline') vim.command( ' autocmd! BufWipeout * :{pycmd} powerline_on_bwipe()'.format(
This python `codecs`_ error handler replaces unencodable characters with HTML entities, or, if no HTML entity exists for the character, XML character references. >>> u'The cost was \u20ac12.'.encode('latin1', 'htmlentityreplace') 'The cost was €12.' """ if isinstance(ex, UnicodeEncodeError): # Handle encoding errors bad_text = ex.object[ex.start:ex.end] text = _html_entities_escaper.escape(bad_text) return (compat.text_type(text), ex.end) raise ex codecs.register_error('htmlentityreplace', htmlentityreplace_errors) # TODO: options to make this dynamic per-compilation will be added in a later # release DEFAULT_ESCAPES = { 'x': 'filters.xml_escape', 'h': 'filters.html_escape', 'u': 'filters.url_escape', 'trim': 'filters.trim', 'entity': 'filters.html_entities_escape', 'unicode': 'unicode', 'decode': 'decode', 'str': 'str', 'n': 'n' }
_to_iri_unsafe = "".join([chr(c) for c in range(128) if c not in _always_safe]) def _codec_error_url_quote(e): """Used in :func:`uri_to_iri` after unquoting to re-quote any invalid bytes. """ out = _fast_url_quote(e.object[e.start : e.end]) if PY2: out = out.decode("utf-8") return out, e.end codecs.register_error("werkzeug.url_quote", _codec_error_url_quote) def uri_to_iri(uri, charset="utf-8", errors="werkzeug.url_quote"): """Convert a URI to an IRI. All valid UTF-8 characters are unquoted, leaving all reserved and invalid characters quoted. If the URL has a domain, it is decoded from Punycode. >>> uri_to_iri("http://xn--n3h.net/p%C3%A5th?q=%C3%A8ry%DF") 'http://\\u2603.net/p\\xe5th?q=\\xe8ry%DF' :param uri: The URI to convert. :param charset: The encoding to encode unquoted bytes with. :param errors: Error handler to use during ``bytes.encode``. By default, invalid bytes are left quoted.
sequences encoded in %XX format, but as part of a unicode string. :param exc: The UnicodeDecodeError exception :return: A 2-element tuple of (replacement unicode string, integer index to resume at) """ bytes_as_ints = bytes_to_list(exc.object[exc.start:exc.end]) replacements = ['%%%02x' % num for num in bytes_as_ints] return (''.join(replacements), exc.end) codecs.register_error('iriutf8', _iri_utf8_errors_handler) def _urlquote(string, safe=''): """ Quotes a unicode string for use in a URL :param string: A unicode string :param safe: A unicode string of character to not encode :return: None (if string is None) or an ASCII byte string of the quoted string """
# -*- coding: utf-8 -*- from oleander import app from itertools import groupby as _groupby from jinja2.filters import _GroupTuple from operator import itemgetter import codecs import string codecs.register_error('alphabetical_directory', lambda error: (u'#', error.start + 1)) class Letter(str): """Alphabet letter. Implements comparing where '#' is at the end of alhpabet.""" def __lt__(self, other): if self == '#': return False if other == '#': return True return str(self) < other def __le__(self, other): if other == '#': return True elif self == '#': return False return str(self) <= other def __gt__(self, other): if other == '#':
codepoints.append(codepoint) for cp in codepoints: e = _encode_entity_map.get(cp) if e: res.append("&") res.append(e) if not e.endswith(";"): res.append(";") else: res.append("&#x%s;" % (hex(cp)[2:])) return ("".join(res), exc.end) else: return xmlcharrefreplace_errors(exc) register_error("htmlentityreplace", htmlentityreplace_errors) def serialize(input, tree="etree", encoding=None, **serializer_opts): """Serializes the input token stream using the specified treewalker :arg input: the token stream to serialize :arg tree: the treewalker to use :arg encoding: the encoding to use :arg serializer_opts: any options to pass to the :py:class:`html5lib.serializer.HTMLSerializer` that gets created :returns: the tree serialized as a string
import o2on_config from o2on_const import regHosts, ProtocolVer, AppName import o2on_node import o2on_dat from o2on_node import ip2e, port2e, e2ip import o2on_key import o2on_im import o2on_util def my_replace_handler(inst): return ((u"\u30fb", inst.start+2)) try: codecs.lookup_error('opy2on_replace') except LookupError: codecs.register_error('opy2on_replace', my_replace_handler) class O2ONServer(BaseHTTPServer.HTTPServer): def __init__(self, handler, port, g): BaseHTTPServer.HTTPServer.__init__(self, ('', port), handler) self.glob = g self.requests = [] self.__is_shut_down = threading.Event() self.__serving = False def serve_forever(self, poll_interval=0.5): #hasattr(BaseHTTPServer.HTTPServer, '_handle_request_noblock'): if sys.hexversion >= 0x020600f0: BaseHTTPServer.HTTPServer.serve_forever(self, poll_interval) # 2.6 else:
This python codecs error handler replaces unencodable characters with HTML entities, or, if no HTML entity exists for the character, XML character references:: >>> u'The cost was \u20ac12.'.encode('latin1', 'htmlentityreplace') 'The cost was €12.' """ if isinstance(ex, UnicodeEncodeError): # Handle encoding errors bad_text = ex.object[ex.start:ex.end] text = _html_entities_escaper.escape(bad_text) return (compat.text_type(text), ex.end) raise ex codecs.register_error("htmlentityreplace", htmlentityreplace_errors) # TODO: options to make this dynamic per-compilation will be added in a later # release DEFAULT_ESCAPES = { "x": "filters.xml_escape", "h": "filters.html_escape", "u": "filters.url_escape", "trim": "filters.trim", "entity": "filters.html_entities_escape", "unicode": "unicode", "decode": "decode", "str": "str", "n": "n", }
import codecs import os import unicodedata import logging from tempfile import NamedTemporaryFile from typing import Any, Dict, Generator, List, Optional, Tuple import demistomock as demisto # noqa: F401 import pyshark from CommonServerPython import * # noqa: F401 TCP_FLAG_FIN = 0x01 TCP_FLAG_SYN = 0x02 TCP_FLAG_ACK = 0x10 codecs.register_error('replace_with_space', lambda x: (u' ', x.start + 1)) # type: ignore[attr-defined] def from_bytes_to_text(mode: str, binary: bytes) -> str: """ Make a text from a binary. :param mode: How to convert the binary to text. :return: A text converted from the binary. """ if mode == 'text-based-protocol': # Keep all the characters used in text based protocols # * The unicodedata category names of control code start with C return ''.join(' ' if c == u'\ufffd' or (c not in ( '\n', '\r', '\t') and unicodedata.category(c)[0] == 'C') else c for c in binary.decode('utf-8', errors='replace'))
from ezdxf.lldxf.const import ( DXFError, DXFStructureError, DXFVersionError, DXFTableEntryError, DXFAppDataError, DXFXDataError, DXFAttributeError, DXFValueError, DXFKeyError, DXFIndexError, DXFTypeError, DXFBlockInUseError, InvalidGeoDataException, InsertUnits, ACI, DXF12, DXF2000, DXF2004, DXF2007, DXF2010, DXF2013, DXF2018, ) # name space imports - do not remove import codecs from ezdxf.lldxf.encoding import ( dxf_backslash_replace, has_dxf_unicode, decode_dxf_unicode, ) # setup DXF unicode encoder -> '\U+nnnn' codecs.register_error('dxfreplace', dxf_backslash_replace) # Load font support automatically: if EZDXF_AUTO_LOAD_FONTS: fonts.load() YES_NO = {True: 'yes', False: 'no'} def print_config(func=print, verbose=False): from pathlib import Path from ezdxf.acc import USE_C_EXT func(f"ezdxf v{__version__} @ {Path(__file__).parent}") func(f"Python version: {sys.version}") func(f"using C-extensions: {YES_NO[USE_C_EXT]}")
def read_doc(self, docname, app=None): # type: (unicode, Sphinx) -> None """Parse a file and add/update inventory entries for the doctree.""" self.temp_data['docname'] = docname # defaults to the global default, but can be re-set in a document self.temp_data['default_domain'] = \ self.domains.get(self.config.primary_domain) self.settings['input_encoding'] = self.config.source_encoding self.settings['trim_footnote_reference_space'] = \ self.config.trim_footnote_reference_space self.settings['gettext_compact'] = self.config.gettext_compact language = self.config.language or 'en' self.settings['language_code'] = language self.settings['smart_quotes'] = True for tag in normalize_language_tag(language): if tag in smartchars.quotes: break else: self.settings['smart_quotes'] = False docutilsconf = path.join(self.srcdir, 'docutils.conf') # read docutils.conf from source dir, not from current dir OptionParser.standard_config_files[1] = docutilsconf if path.isfile(docutilsconf): self.note_dependency(docutilsconf) with sphinx_domains(self): if self.config.default_role: role_fn, messages = roles.role(self.config.default_role, english, 0, dummy_reporter) if role_fn: roles._roles[''] = role_fn else: logger.warning('default role %s not found', self.config.default_role, location=docname) codecs.register_error('sphinx', self.warn_and_replace) # type: ignore # publish manually reader = SphinxStandaloneReader( self.app, parsers=self.app.registry.get_source_parsers()) pub = Publisher(reader=reader, writer=SphinxDummyWriter(), destination_class=NullOutput) pub.set_components(None, 'restructuredtext', None) pub.process_programmatic_settings(None, self.settings, None) src_path = self.doc2path(docname) source = SphinxFileInput(app, self, source=None, source_path=src_path, encoding=self.config.source_encoding) pub.source = source pub.settings._source = src_path pub.set_destination(None, None) pub.publish() doctree = pub.document # post-processing for domain in itervalues(self.domains): domain.process_doc(self, docname, doctree) # allow extension-specific post-processing if app: app.emit('doctree-read', doctree) # store time of reading, for outdated files detection # (Some filesystems have coarse timestamp resolution; # therefore time.time() can be older than filesystem's timestamp. # For example, FAT32 has 2sec timestamp resolution.) self.all_docs[docname] = max(time.time(), path.getmtime(self.doc2path(docname))) if self.versioning_condition: old_doctree = None if self.versioning_compare: # get old doctree try: with open( self.doc2path(docname, self.doctreedir, '.doctree'), 'rb') as f: old_doctree = pickle.load(f) except EnvironmentError: pass # add uids for versioning if not self.versioning_compare or old_doctree is None: list(add_uids(doctree, self.versioning_condition)) else: list( merge_doctrees(old_doctree, doctree, self.versioning_condition)) # make it picklable doctree.reporter = None doctree.transformer = None doctree.settings.warning_stream = None doctree.settings.env = None doctree.settings.record_dependencies = None # cleanup self.temp_data.clear() self.ref_context.clear() roles._roles.pop('', None) # if a document has set a local default role # save the parsed doctree doctree_filename = self.doc2path(docname, self.doctreedir, '.doctree') ensuredir(path.dirname(doctree_filename)) with open(doctree_filename, 'wb') as f: pickle.dump(doctree, f, pickle.HIGHEST_PROTOCOL)
def _escapecss(e): """ Escapes characters not allowed in the current encoding the CSS way with a backslash followed by a uppercase hex code point E.g. the german umlaut 'ä' is escaped as \E4 """ s = e.object[e.start:e.end] return u''.join([ ur'\%s ' % str(hex(ord(x)))[2:] # remove 0x from hex .upper() for x in s ]), e.end codecs.register_error('escapecss', _escapecss) class Preferences(object): """Control output of CSSSerializer. defaultAtKeyword = True Should the literal @keyword from src CSS be used or the default form, e.g. if ``True``: ``@import`` else: ``@i\mport`` defaultPropertyName = True Should the normalized propertyname be used or the one given in the src file, e.g. if ``True``: ``color`` else: ``c\olor`` Only used if ``keepAllProperties==False``. defaultPropertyPriority = True
If encoding fails, \\uxxxx must be emitted. This is similar to the "backshashreplace" handler, only that we never emit \\xnn since this is not legal according to the JSON syntax specs. ''' if isinstance(exc, UnicodeEncodeError): part = exc.object[exc.start] # repr(part) will convert u'\unnnn' to u'u\\nnnn' return u'\\u%04x' % ord(part), exc.start + 1 else: raise exc # register the error handler codecs.register_error('jsonreplace', jsonreplace_handler) ### Writer def write(input, encoding='utf-8', outputEncoding=None): writer = JsonWriter(input_encoding=encoding, output_encoding=outputEncoding) writer.write(input) return writer.getvalue() re_strmangle = re.compile('"|\b|\f|\n|\r|\t|\\\\') def func_strmangle(match):
#!/usr/bin/env python # UTF-8 encoding # Python script getting, parsing and saving (in csv format) data from https://www.peakware.com. # # Author: Alexandre Louisnard [email protected] # 2017 import sys, codecs import urllib.request import re sys.stdout = codecs.getwriter("utf-8")(sys.stdout.detach()) codecs.register_error("strict", codecs.ignore_errors) listUrl = "https://www.peakware.com/peaks.php?choice=SoE" outputFile = "peakware.csv" print("GETTING: {}".format(listUrl)) listPage = urllib.request.urlopen(listUrl).read().decode('utf-8', 'ignore') # (link, name, altitude) matchLinks = re.findall(r"""\<li\>\<a\ href\=\"(peaks\.php\?pk\=[0-9]+)\"\>(.*)\<\/a\>\ \(.*\)\<br\/\>[0-9]+\ ft\/([0-9]+)\ m\<\/li\>""", listPage) print("Found {} summits\n".format(len(matchLinks))) sys.stdout.flush() f = codecs.open(outputFile,"w",encoding='utf8') f.write("name;elevation;latitude;longitude;continent;country;range\n")
from hdt import HDTDocument, IdentifierPosition from collections import deque import pandas as pd import numpy as np import rocksdb import codecs import datetime def strict_handler(exception): return u"", exception.end codecs.register_error("strict", strict_handler) PATH_LOD = "/scratch/wbeek/data/LOD-a-lot/data.hdt" PATH_SAMEAS_NETWORK = "/home/jraad/ssd/data/identity-data/" PATH_ID2TERMS_099 = "/home/jraad/ssd/data/identity-data-0_99/id2terms_0-99.csv" PATH_TERM2ID_099 = "/home/jraad/ssd/data/identity-data-0_99/term2id_0-99.csv" # load the LOD-a-lot HDT file hdt_lod = HDTDocument(PATH_LOD) # these identifiers will be used later to query the HDT file using their IDs id_type = hdt_lod.convert_term( "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", IdentifierPosition.Predicate) id_sameAs = hdt_lod.convert_term("http://www.w3.org/2002/07/owl#sameAs", IdentifierPosition.Predicate) id_subClassOf = hdt_lod.convert_term( "http://www.w3.org/2000/01/rdf-schema#subClassOf",
# Register a 'repr' error strategy. # ================================= # Sometimes we want to echo bytestrings back to a user, and we don't know what # encoding will work. This error strategy replaces non-decodable bytes with # their Python representation, so that they are human-visible. # # See also: # - https://github.com/dcrosta/mongo/commit/e1ac732 # - http://www.crummy.com/software/BeautifulSoup/bs4/doc/#unicode-dammit def replace_with_repr(unicode_error): offender = unicode_error.object[unicode_error.start:unicode_error.end] return (unicode(repr(offender).strip("'").strip('"')), unicode_error.end) codecs.register_error('repr', replace_with_repr) def unicode_dammit(s, encoding="UTF-8"): """Given a bytestring, return a unicode decoded with `encoding`. Any bytes not decodable with UTF-8 will be replaced with their Python representation, so you'll get something like u"foo\\xefbar". """ if not isinstance(s, str): raise TypeError("I got %s, but I want <type 'str'>." % s.__class__) errors = 'repr' return s.decode(encoding, errors)
if not 0xD800 <= code <= 0xDCFF: raise NotASurrogateError() if 0xDC00 <= code <= 0xDC7F: decoded.append(unichr(code - 0xDC00)) elif code <= 0xDCFF: decoded.append(unichr(code - 0xDC00)) else: raise NotASurrogateError() decoded = str().join(decoded) else: raise exc except NotASurrogateError: raise exc return (decoded, exc.end) codecs.register_error('surrogateescape', surrogateescape_handler) def exception_encode(ex, codec): if str == bytes: reduced = ex.__reduce__() ex = reduced[0](*tuple( map( lambda arg: codec.decode(arg)[0] if isinstance(arg, bytes) else arg, reduced[1]))) return ex def sql_commands(read_line): delims = ['"', "'", ';', '--'] counter = 0
def test_encodehelper(self): # enhance coverage of: # Objects/unicodeobject.c::unicode_encode_call_errorhandler() # and callers self.assertRaises(LookupError, "\xff".encode, "ascii", "test.unknown") def badencodereturn1(exc): return 42 codecs.register_error("test.badencodereturn1", badencodereturn1) self.assertRaises(TypeError, "\xff".encode, "ascii", "test.badencodereturn1") def badencodereturn2(exc): return ("?", None) codecs.register_error("test.badencodereturn2", badencodereturn2) self.assertRaises(TypeError, "\xff".encode, "ascii", "test.badencodereturn2") handler = PosReturn() codecs.register_error("test.posreturn", handler.handle) # Valid negative position handler.pos = -1 self.assertEqual("\xff0".encode("ascii", "test.posreturn"), b"<?>0") # Valid negative position handler.pos = -2 self.assertEqual("\xff0".encode("ascii", "test.posreturn"), b"<?><?>") # Negative position out of bounds handler.pos = -3 self.assertRaises(IndexError, "\xff0".encode, "ascii", "test.posreturn") # Valid positive position handler.pos = 1 self.assertEqual("\xff0".encode("ascii", "test.posreturn"), b"<?>0") # Largest valid positive position (one beyond end of input handler.pos = 2 self.assertEqual("\xff0".encode("ascii", "test.posreturn"), b"<?>") # Invalid positive position handler.pos = 3 self.assertRaises(IndexError, "\xff0".encode, "ascii", "test.posreturn") handler.pos = 0 class D(dict): def __getitem__(self, key): raise ValueError for err in ("strict", "replace", "xmlcharrefreplace", "backslashreplace", "test.posreturn"): self.assertRaises(UnicodeError, codecs.charmap_encode, "\xff", err, {0xff: None}) self.assertRaises(ValueError, codecs.charmap_encode, "\xff", err, D()) self.assertRaises(TypeError, codecs.charmap_encode, "\xff", err, {0xff: 300})
if hasattr(config, "WEBIRC_MODE") and config.WEBIRC_MODE == "hmac": HMACKEY = hmac.HMAC(key=config.HMACKEY) def hmacfn(*args): h = HMACKEY.copy() h.update("%d %s" % (int(time.time() / HMACTEMPORAL), " ".join(args))) return h.hexdigest() def utf8_iso8859_1(data, table=dict((x, bytes(x, "iso-8859-1").decode("iso-8859-1")) for x in map(chr, range(0, 256)))): return (table.get(data.object[data.start]), data.start+1) codecs.register_error("mixed-iso-8859-1", utf8_iso8859_1) def irc_decode(x): try: return x.decode("utf-8", "mixed-iso-8859-1") except UnicodeDecodeError: return x.decode("iso-8859-1", "ignore") class QWebIRCClient(basic.LineReceiver): delimiter = b'\n' def __init__(self, *args, **kwargs): self.__nickname = "(unregistered)"
def test_errorcallback_longindex(self): dec = codecs.getdecoder('euc-kr') myreplace = lambda exc: ('', sys.maxsize + 1) codecs.register_error('test.cjktest', myreplace) self.assertRaises(IndexError, dec, b'apple\x92ham\x93spam', 'test.cjktest')
font-size:12px; } </style>""" # Leaving (dirty) possibility to change values from here (e.g. `export SQLMAP__MAX_NUMBER_OF_THREADS=20`) for key, value in os.environ.items(): if key.upper().startswith("%s_" % SQLMAP_ENVIRONMENT_PREFIX): _ = key[len(SQLMAP_ENVIRONMENT_PREFIX) + 1:].upper() if _ in globals(): original = globals()[_] if isinstance(original, int): try: globals()[_] = int(value) except ValueError: pass elif isinstance(original, bool): globals()[_] = value.lower() in ('1', 'true') elif isinstance(original, (list, tuple)): globals()[_] = [__.strip() for __ in _.split(',')] else: globals()[_] = value # Installing "reversible" unicode (decoding) error handler def _reversible(ex): if INVALID_UNICODE_PRIVATE_AREA: return (u"".join(_unichr(int('000f00%2x' % (_ if isinstance(_, int) else ord(_)), 16)) for _ in ex.object[ex.start:ex.end]), ex.end) else: return (u"".join(INVALID_UNICODE_CHAR_FORMAT % (_ if isinstance(_, int) else ord(_)) for _ in ex.object[ex.start:ex.end]), ex.end) codecs.register_error("reversible", _reversible)
if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)): res = [] for c in exc.object[exc.start:exc.end]: e = encode_entity_map.get(c) if e: res.append("&") res.append(e) if not e.endswith(";"): res.append(";") else: res.append(c.encode(exc.encoding, "xmlcharrefreplace")) return (u"".join(res), exc.end) else: return xmlcharrefreplace_errors(exc) register_error(unicode_encode_errors, htmlentityreplace_errors) del register_error def encode(text, encoding): return text.encode(encoding, unicode_encode_errors) class HTMLSerializer(object): quote_attr_values = False quote_char = '"' use_best_quote_char = True minimize_boolean_attributes = True
] def escape_ascii(bytes_data): return u''.join(ASCII_ESCAPE_LOOKUP[bval(ch)] for ch in bytes_data) def escape_ascii_bytes(bytes_data): return b''.join(ASCII_ESCAPE_LOOKUP_BYTES[bval(ch)] for ch in bytes_data) def escape_utf8_error(err): return escape_ascii(err.object[err.start:err.end]), err.end codecs.register_error('rdbslashescape', escape_utf8_error) def escape_utf8(byte_data): return byte_data.decode('utf-8', 'rdbslashescape') def bytes_to_unicode(byte_data, escape, skip_printable=False): """ Decode given bytes using specified escaping method. :param byte_data: The byte-like object with bytes to decode. :param escape: The escape method to use. :param skip_printable: If True, don't escape byte_data with all 'printable ASCII' bytes. Defaults to False. :return: New unicode string, escaped with the specified method if needed. """ if isnumber(byte_data):