def test_callbacks(self):
        def handler1(exc):
            r = range(exc.start, exc.end)
            if isinstance(exc, UnicodeEncodeError):
                l = ["<%d>" % ord(exc.object[pos]) for pos in r]
            elif isinstance(exc, UnicodeDecodeError):
                l = ["<%d>" % exc.object[pos] for pos in r]
            else:
                raise TypeError("don't know how to handle %r" % exc)
            return ("[%s]" % "".join(l), exc.end)

        codecs.register_error("test.handler1", handler1)

        def handler2(exc):
            if not isinstance(exc, UnicodeDecodeError):
                raise TypeError("don't know how to handle %r" % exc)
            l = ["<%d>" % exc.object[pos] for pos in range(exc.start, exc.end)]
            return ("[%s]" % "".join(l), exc.end + 1)  # skip one character

        codecs.register_error("test.handler2", handler2)

        s = b"\x00\x81\x7f\x80\xff"

        self.assertEqual(s.decode("ascii", "test.handler1"), "\x00[<129>]\x7f[<128>][<255>]")
        self.assertEqual(s.decode("ascii", "test.handler2"), "\x00[<129>][<128>]")

        self.assertEqual(b"\\u3042\u3xxx".decode("unicode-escape", "test.handler1"), "\u3042[<92><117><51><120>]xx")

        self.assertEqual(b"\\u3042\u3xx".decode("unicode-escape", "test.handler1"), "\u3042[<92><117><51><120><120>]")

        self.assertEqual(codecs.charmap_decode(b"abc", "test.handler1", {ord("a"): "z"})[0], "z[<98>][<99>]")

        self.assertEqual("g\xfc\xdfrk".encode("ascii", "test.handler1"), b"g[<252><223>]rk")

        self.assertEqual("g\xfc\xdf".encode("ascii", "test.handler1"), b"g[<252><223>]")
示例#2
0
    def test_badhandlerresults(self):
        if test_support.due_to_ironpython_bug("http://tkbgitvstfat01:8080/WorkItemTracking/WorkItem.aspx?artifactMoniker=304331"):
            return
        results = ( 42, u"foo", (1,2,3), (u"foo", 1, 3), (u"foo", None), (u"foo",), ("foo", 1, 3), ("foo", None), ("foo",) )
        encs = ("ascii", "latin-1", "iso-8859-1", "iso-8859-15")

        for res in results:
            codecs.register_error("test.badhandler", lambda x: res)
            for enc in encs:
                self.assertRaises(
                    TypeError,
                    u"\u3042".encode,
                    enc,
                    "test.badhandler"
                )
            for (enc, bytes) in (
                ("ascii", "\xff"),
                ("utf-8", "\xff"),
                ("utf-7", "+x-"),
                ("unicode-internal", "\x00"),
            ):
                self.assertRaises(
                    TypeError,
                    bytes.decode,
                    enc,
                    "test.badhandler"
                )
示例#3
0
def register_codec():
    class Codec(codecs.Codec):
        def decode(self, input, errors='strict'):
            return codecs.charmap_decode(input, errors, decoding_map)

        def encode(self, input, errors='strict'):
            return codecs.charmap_encode(input, errors, encoding_map)


    class StreamWriter(Codec, codecs.StreamWriter):
        pass

    class StreamReader(Codec, codecs.StreamReader):
        pass

    def getregentry(encoding):
        if encoding != 'latscii':
            return None
        return (Codec().encode,
                Codec().decode,
                StreamReader,
                StreamWriter)
    codecs.register(getregentry)

    def latscii_error(uerr):
        key = ord(uerr.object[uerr.start:uerr.end])
        try:
            return unichr(decoding_map[key]), uerr.end
        except KeyError:
            handler = codecs.lookup_error('replace')
            return handler(uerr)
    codecs.register_error('replacelatscii', latscii_error)
示例#4
0
    def test_decodeunicodeinternal(self):
        if test_support.due_to_ironpython_bug("http://www.codeplex.com/IronPython/WorkItem/View.aspx?WorkItemId=15506"):
            return
        self.assertRaises(
            UnicodeDecodeError,
            "\x00\x00\x00\x00\x00".decode,
            "unicode-internal",
        )
        if sys.maxunicode > 0xffff:
            def handler_unicodeinternal(exc):
                if not isinstance(exc, UnicodeDecodeError):
                    raise TypeError("don't know how to handle %r" % exc)
                return (u"\x01", 1)

            self.assertEqual(
                "\x00\x00\x00\x00\x00".decode("unicode-internal", "ignore"),
                u"\u0000"
            )

            self.assertEqual(
                "\x00\x00\x00\x00\x00".decode("unicode-internal", "replace"),
                u"\u0000\ufffd"
            )

            codecs.register_error("test.hui", handler_unicodeinternal)

            self.assertEqual(
                "\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui"),
                u"\u0000\u0001\u0000"
            )
示例#5
0
    def test_decoding_callbacks(self):
        if test_support.due_to_ironpython_bug("http://www.codeplex.com/IronPython/WorkItem/View.aspx?WorkItemId=15544"):
            return
        # This is a test for a decoding callback handler
        # that allows the decoding of the invalid sequence
        # "\xc0\x80" and returns "\x00" instead of raising an error.
        # All other illegal sequences will be handled strictly.
        def relaxedutf8(exc):
            if not isinstance(exc, UnicodeDecodeError):
                raise TypeError("don't know how to handle %r" % exc)
            if exc.object[exc.start:exc.start+2] == "\xc0\x80":
                return (u"\x00", exc.start+2) # retry after two bytes
            else:
                raise exc

        codecs.register_error("test.relaxedutf8", relaxedutf8)

        # all the "\xc0\x80" will be decoded to "\x00"
        sin = "a\x00b\xc0\x80c\xc3\xbc\xc0\x80\xc0\x80"
        sout = u"a\x00b\x00c\xfc\x00\x00"
        self.assertEqual(sin.decode("utf-8", "test.relaxedutf8"), sout)

        # "\xc0\x81" is not valid and a UnicodeDecodeError will be raised
        sin = "\xc0\x80\xc0\x81"
        self.assertRaises(UnicodeDecodeError, sin.decode,
                          "utf-8", "test.relaxedutf8")
示例#6
0
def chapter1_23():
	data = u'''\
<html>
	<head>
		<title>Encoding Test</title>
	</head>
	<body>
		<p>accented characters:
		<ul>
			<li>\xe0 (a + grave)
			<li>\xe7 (c + cedilla)
			<li>\xe9 (e + acute)
		</ul> 
		<p>symbols:
		<ul>
			<li>\xa3 (British pound)
			<li>\u20ac (Euro)
			<li>\u221e (infinity)
		</ul>
	</body>
</html>''' 
	print encode_for_xml(data)
	codecs.register_error('html_replace', html_replace)
	print encode_for_html(data)

	pass
def get_template(template_name, dirs=_dirs_undefined):
    """
    Returns a compiled Template object for the given template name,
    handling template inheritance recursively.
    """
    # Implementation Note:
    # If we do this earlier (i.e. when the module is imported), there
    # is a chance our hook gets overwritten somewhere depending on the
    # order in which the modules are imported.
    loader.get_template_from_string = get_template_from_string
    loader.make_origin = make_origin

    def fake_strict_errors(exception): #pylint: disable=unused-argument
        return ("", -1)

    if template_name.endswith('.pdf'):
        # HACK: Ignore UnicodeError, due to PDF file read
        codecs.register_error('strict', fake_strict_errors)

    if dirs is _dirs_undefined:
        template = loader.get_template(template_name)
    else:
        if django.VERSION[0] >= 1 and django.VERSION[1] >= 8:
            warnings.warn(
                "The dirs argument of get_template is deprecated.",
                RemovedInDjango110Warning, stacklevel=2)
        #pylint:disable=unexpected-keyword-arg
        template = loader.get_template(template_name, dirs=dirs)

    if template_name.endswith('.pdf'):
        # HACK: Ignore UnicodeError, due to PDF file read
        codecs.register_error('strict', codecs.strict_errors)

    return template
示例#8
0
    def test_xmlcharnamereplace(self):
        # This time use a named character entity for unencodable
        # characters, if one is available.

        def xmlcharnamereplace(exc):
            if not isinstance(exc, UnicodeEncodeError):
                raise TypeError("don't know how to handle %r" % exc)
            l = []
            for c in exc.object[exc.start:exc.end]:
                try:
                    l.append(u"&%s;" % htmlentitydefs.codepoint2name[ord(c)])
                except KeyError:
                    l.append(u"&#%d;" % ord(c))
            return (u"".join(l), exc.end)

        codecs.register_error(
            "test.xmlcharnamereplace", xmlcharnamereplace)

        sin = u"\xab\u211c\xbb = \u2329\u1234\u20ac\u232a"
        sout = "&laquo;&real;&raquo; = &lang;&#4660;&euro;&rang;"
        self.assertEqual(sin.encode("ascii", "test.xmlcharnamereplace"), sout)
        sout = "\xab&real;\xbb = &lang;&#4660;&euro;&rang;"
        self.assertEqual(sin.encode("latin-1", "test.xmlcharnamereplace"), sout)
        sout = "\xab&real;\xbb = &lang;&#4660;\xa4&rang;"
        self.assertEqual(sin.encode("iso-8859-15", "test.xmlcharnamereplace"), sout)
示例#9
0
def read_doc(app, env, filename):
    # type: (Sphinx, BuildEnvironment, str) -> nodes.document
    """Parse a document and convert to doctree."""
    # set up error_handler for the target document
    error_handler = UnicodeDecodeErrorHandler(env.docname)
    codecs.register_error('sphinx', error_handler)  # type: ignore

    filetype = get_filetype(app.config.source_suffix, filename)
    input_class = app.registry.get_source_input(filetype)
    reader = SphinxStandaloneReader(app)
    source = input_class(app, env, source=None, source_path=filename,  # type: ignore
                         encoding=env.config.source_encoding)
    parser = app.registry.create_source_parser(app, filetype)
    if parser.__class__.__name__ == 'CommonMarkParser' and parser.settings_spec == ():
        # a workaround for recommonmark
        #   If recommonmark.AutoStrictify is enabled, the parser invokes reST parser
        #   internally.  But recommonmark-0.4.0 does not provide settings_spec for reST
        #   parser.  As a workaround, this copies settings_spec for RSTParser to the
        #   CommonMarkParser.
        parser.settings_spec = RSTParser.settings_spec

    pub = Publisher(reader=reader,  # type: ignore
                    parser=parser,
                    writer=SphinxDummyWriter(),
                    source_class=SphinxDummySourceClass,
                    destination=NullOutput())
    pub.process_programmatic_settings(None, env.settings, None)
    pub.set_source(source, filename)
    pub.publish()
    return pub.document
    def test_mutatingdecodehandler(self):
        baddata = [
            ("ascii", b"\xff"),
            ("utf-7", b"++"),
            ("utf-8",  b"\xff"),
            ("utf-16", b"\xff"),
            ("utf-32", b"\xff"),
            ("unicode-escape", b"\\u123g"),
            ("raw-unicode-escape", b"\\u123g"),
            ("unicode-internal", b"\xff"),
        ]

        def replacing(exc):
            if isinstance(exc, UnicodeDecodeError):
                exc.object = 42
                return ("\u4242", 0)
            else:
                raise TypeError("don't know how to handle %r" % exc)
        codecs.register_error("test.replacing", replacing)
        for (encoding, data) in baddata:
            self.assertRaises(TypeError, data.decode, encoding, "test.replacing")

        def mutating(exc):
            if isinstance(exc, UnicodeDecodeError):
                exc.object[:] = b""
                return ("\u4242", 0)
            else:
                raise TypeError("don't know how to handle %r" % exc)
        codecs.register_error("test.mutating", mutating)
        # If the decoder doesn't pick up the modified input the following
        # will lead to an endless loop
        for (encoding, data) in baddata:
            self.assertRaises(TypeError, data.decode, encoding, "test.replacing")
示例#11
0
    def test_decodeunicodeinternal(self):
        self.assertRaises(
            UnicodeDecodeError,
            "\x00\x00\x00\x00\x00".decode,
            "unicode-internal",
        )
        if sys.maxunicode > 0xffff:
            def handler_unicodeinternal(exc):
                if not isinstance(exc, UnicodeDecodeError):
                    raise TypeError("don't know how to handle %r" % exc)
                return (u"\x01", 1)

            self.assertEqual(
                "\x00\x00\x00\x00\x00".decode("unicode-internal", "ignore"),
                u"\u0000"
            )

            self.assertEqual(
                "\x00\x00\x00\x00\x00".decode("unicode-internal", "replace"),
                u"\u0000\ufffd"
            )

            codecs.register_error("test.hui", handler_unicodeinternal)

            self.assertEqual(
                "\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui"),
                u"\u0000\u0001\u0000"
            )
示例#12
0
    def test_uninamereplace(self):
        # We're using the names from the unicode database this time,
        # and we're doing "syntax highlighting" here, i.e. we include
        # the replaced text in ANSI escape sequences. For this it is
        # useful that the error handler is not called for every single
        # unencodable character, but for a complete sequence of
        # unencodable characters, otherwise we would output many
        # unneccessary escape sequences.

        def uninamereplace(exc):
            if not isinstance(exc, UnicodeEncodeError):
                raise TypeError("don't know how to handle %r" % exc)
            l = []
            for c in exc.object[exc.start:exc.end]:
                l.append(unicodedata.name(c, u"0x%x" % ord(c)))
            return (u"\033[1m%s\033[0m" % u", ".join(l), exc.end)

        codecs.register_error(
            "test.uninamereplace", uninamereplace)

        sin = u"\xac\u1234\u20ac\u8000"
        sout = "\033[1mNOT SIGN, ETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m"
        self.assertEqual(sin.encode("ascii", "test.uninamereplace"), sout)

        sout = "\xac\033[1mETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m"
        self.assertEqual(sin.encode("latin-1", "test.uninamereplace"), sout)

        sout = "\xac\033[1mETHIOPIC SYLLABLE SEE\033[0m\xa4\033[1mCJK UNIFIED IDEOGRAPH-8000\033[0m"
        self.assertEqual(sin.encode("iso-8859-15", "test.uninamereplace"), sout)
示例#13
0
    def test_badhandlerresults(self):
        results = ( 42, u"foo", (1,2,3), (u"foo", 1, 3), (u"foo", None), (u"foo",), ("foo", 1, 3), ("foo", None), ("foo",) )
        encs = ("ascii", "latin-1", "iso-8859-1", "iso-8859-15")

        for res in results:
            codecs.register_error("test.badhandler", lambda x: res)
            for enc in encs:
                self.assertRaises(
                    TypeError,
                    u"\u3042".encode,
                    enc,
                    "test.badhandler"
                )
            for (enc, bytes) in (
                ("ascii", "\xff"),
                ("utf-8", "\xff"),
                ("utf-7", "+x-"),
                # ("unicode-internal", "\x00"), - not valid for Jython because PyUnicode/PyString share internal representation
            ):
                self.assertRaises(
                    TypeError,
                    bytes.decode,
                    enc,
                    "test.badhandler"
                )
    def test_customreplace_encode(self):
        if self.has_iso10646:
            self.skipTest('encoding contains full ISO 10646 map')

        from html.entities import codepoint2name

        def xmlcharnamereplace(exc):
            if not isinstance(exc, UnicodeEncodeError):
                raise TypeError("don't know how to handle %r" % exc)
            l = []
            for c in exc.object[exc.start:exc.end]:
                if ord(c) in codepoint2name:
                    l.append("&%s;" % codepoint2name[ord(c)])
                else:
                    l.append("&#%d;" % ord(c))
            return ("".join(l), exc.end)

        codecs.register_error("test.xmlcharnamereplace", xmlcharnamereplace)

        if self.xmlcharnametest:
            sin, sout = self.xmlcharnametest
        else:
            sin = "\xab\u211c\xbb = \u2329\u1234\u232a"
            sout = b"&laquo;&real;&raquo; = &lang;&#4660;&rang;"
        self.assertEqual(self.encode(sin,
                                    "test.xmlcharnamereplace")[0], sout)
示例#15
0
    def test_badhandlerresults(self):
        results = ( 42, "foo", (1,2,3), ("foo", 1, 3), ("foo", None), ("foo",), ("foo", 1, 3), ("foo", None), ("foo",) )
        encs = ("ascii", "latin-1", "iso-8859-1", "iso-8859-15")

        for res in results:
            codecs.register_error("test.badhandler", lambda x: res)
            for enc in encs:
                self.assertRaises(
                    TypeError,
                    "\u3042".encode,
                    enc,
                    "test.badhandler"
                )
            for (enc, bytes) in (
                ("ascii", b"\xff"),
                ("utf-8", b"\xff"),
                ("utf-7", b"+x-"),
                ("unicode-internal", b"\x00"),
            ):
                with test.support.check_warnings():
                    # unicode-internal has been deprecated
                    self.assertRaises(
                        TypeError,
                        bytes.decode,
                        enc,
                        "test.badhandler"
                    )
示例#16
0
 def __init__(self, log, encoding = 'utf-8', replacement = u'\ufffd'):
     self.log = log
     self.encoding = encoding
     self.replacement = replacement
     self.columns = None
     self.error_count = 0
     codecs.register_error("error_handler", self._error_handler)
        def test_customreplace(self):
            if self.has_iso10646:
                return

            import htmlentitydefs

            names = {}
            for (key, value) in htmlentitydefs.entitydefs.items():
                if len(value) == 1:
                    names[value.decode("latin-1")] = self.decode(key)[0]
                else:
                    names[unichr(int(value[2:-1]))] = self.decode(key)[0]

            def xmlcharnamereplace(exc):
                if not isinstance(exc, UnicodeEncodeError):
                    raise TypeError("don't know how to handle %r" % exc)
                l = []
                for c in exc.object[exc.start : exc.end]:
                    try:
                        l.append(u"&%s;" % names[c])
                    except KeyError:
                        l.append(u"&#%d;" % ord(c))
                return (u"".join(l), exc.end)

            codecs.register_error("test.xmlcharnamereplace", xmlcharnamereplace)

            if self.xmlcharnametest:
                sin, sout = self.xmlcharnametest
            else:
                sin = u"\xab\u211c\xbb = \u2329\u1234\u232a"
                sout = "&laquo;&real;&raquo; = &lang;&#4660;&rang;"
            self.assertEqual(self.encode(sin, "test.xmlcharnamereplace")[0], sout)
示例#18
0
    def test_decodeunicodeinternal(self):
        with test.support.check_warnings(('unicode_internal codec has been '
                                          'deprecated', DeprecationWarning)):
            self.assertRaises(
                UnicodeDecodeError,
                b"\x00\x00\x00\x00\x00".decode,
                "unicode-internal",
            )
        if SIZEOF_WCHAR_T == 4:
            def handler_unicodeinternal(exc):
                if not isinstance(exc, UnicodeDecodeError):
                    raise TypeError("don't know how to handle %r" % exc)
                return ("\x01", 1)

            with test.support.check_warnings(('unicode_internal codec has been '
                                              'deprecated', DeprecationWarning)):
                self.assertEqual(
                    b"\x00\x00\x00\x00\x00".decode("unicode-internal", "ignore"),
                    "\u0000"
                )

                self.assertEqual(
                    b"\x00\x00\x00\x00\x00".decode("unicode-internal", "replace"),
                    "\u0000\ufffd"
                )

                codecs.register_error("test.hui", handler_unicodeinternal)

                self.assertEqual(
                    b"\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui"),
                    "\u0000\u0001\u0000"
                )
示例#19
0
 def test_badhandler_longindex(self):
     import codecs
     import sys
     errors = 'test.badhandler_longindex'
     codecs.register_error(errors, lambda x: ('', sys.maxsize + 1))
     # CPython raises OverflowError here
     raises((IndexError, OverflowError), b'apple\x92ham\x93spam'.decode, 'utf-8', errors)
示例#20
0
    def test_unicode_internal(self):
        import codecs
        import sys
        try:
            '\x00'.decode('unicode-internal')
        except UnicodeDecodeError:
            pass
        else:
            raise Exception("DID NOT RAISE")

        res = "\x00\x00\x00\x00\x00".decode("unicode-internal", "replace")
        if sys.maxunicode > 65535:
            assert res == u"\u0000\ufffd"    # UCS4 build
        else:
            assert res == u"\x00\x00\ufffd"  # UCS2 build

        res = "\x00\x00\x00\x00\x00".decode("unicode-internal", "ignore")
        if sys.maxunicode > 65535:
            assert res == u"\u0000"   # UCS4 build
        else:
            assert res == u"\x00\x00" # UCS2 build

        def handler_unicodeinternal(exc):
            if not isinstance(exc, UnicodeDecodeError):
                raise TypeError("don't know how to handle %r" % exc)
            return (u"\x01", 1)
        codecs.register_error("test.hui", handler_unicodeinternal)
        res = "\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui")
        if sys.maxunicode > 65535:
            assert res == u"\u0000\u0001\u0000"   # UCS4 build
        else:
            assert res == u"\x00\x00\x01\x00\x00" # UCS2 build
示例#21
0
文件: decode.py 项目: sp1ff/rubepl
def decode_file(filename, codepage=None):
    """Read 'filename', strip the BOM if present, strip any leading or trailing
    whitespace, return a list of Python strings.

    In order to read the file, we need to know the file's encoding (i.e. how
    the writer represented the characters contained therein-- ASCII, UTF-8, or
    whatever).  The caller can specify the file encoding explicitly, or set
    'codepage' to None to have this function try to deduce the file encoding.

    TODO: Document in more detail exactly what is returned-- i.e. what exactly
    does readlines return when the file is opened with an 'encoding' parameter?
    """

    codecs.register_error('fb_cp1252', handle_decode_err_by_fb_cp1252)

    ext = (os.path.splitext(filename))[1];
    if codepage:
        incp = codepage
        errs = 'strict'
    elif '.m3u8' == ext:
        incp = 'utf_8'
        errs = 'fb_cp1252'
    else:
        incp = 'cp1252'
        errs = 'strict'

    with open(filename, 'r', -1, incp, errs) as fh:
        lines = fh.readlines();
    lines[0] = maybe_remove_bom(lines[0])

    return lines
示例#22
0
def initbotscharsets():
    '''set up right charset handling for specific charsets (UNOA, UNOB, UNOC, etc).'''
    codecs.register(codec_search_function)  #tell python how to search a codec defined by bots. These are the codecs in usersys/charset
    botsglobal.botsreplacechar = unicode(botsglobal.ini.get('settings','botsreplacechar',u' '))
    codecs.register_error('botsreplace', botscharsetreplace)    #define the ' botsreplace' error handling for codecs/charsets.
    for key, value in botsglobal.ini.items('charsets'): #set aliases for charsets in bots.ini
        encodings.aliases.aliases[key]=value
示例#23
0
 def test_encode_custom_error_handler_type(self):
     import codecs
     import sys
     codecs.register_error("test.test_encode_custom_error_handler_type",
                           lambda e: ('\xc3', e.end))
     raises(TypeError, u"\uDDA1".encode, "gbk",
            "test.test_encode_custom_error_handler_type")
示例#24
0
 def test_decode_custom_error_handler_overflow(self):
     import codecs
     import sys
     codecs.register_error("test.test_decode_custom_error_handler_overflow",
                           lambda e: (u'', sys.maxint + 1))
     raises((IndexError, OverflowError), "abc\xDD".decode, "hz",
            "test.test_decode_custom_error_handler_overflow")
示例#25
0
def conv2ASCII(bigstring): 
	def convHandler(error):
		return ('1FOREIGN', error.start + 1)
	codecs.register_error('foreign', convHandler)
	bigstring = bigstring.encode('ascii', 'foreign')
	newstring = bigstring.decode('ascii', 'foreign')
	return newstring
示例#26
0
def register_strwidth_error(strwidth):
	'''Create new encode errors handling method similar to ``replace``

	Like ``replace`` this method uses question marks in place of the characters 
	that cannot be represented in the requested encoding. Unlike ``replace`` the 
	amount of question marks is identical to the amount of display cells 
	offending character occupies. Thus encoding ``…`` (U+2026, HORIZONTAL 
	ELLIPSIS) to ``latin1`` will emit one question mark, but encoding ``A`` 
	(U+FF21, FULLWIDTH LATIN CAPITAL LETTER A) will emit two question marks.

	Since width of some characters depends on the terminal settings and 
	powerline knows how to respect them a single error handling method cannot be 
	used. Instead of it the generator function is used which takes ``strwidth`` 
	function (function that knows how to compute string width respecting all 
	needed settings) and emits new error handling method name.

	:param function strwidth:
		Function that computs string width measured in display cells the string 
		occupies when displayed.

	:return: New error handling method name.
	'''
	global last_swe_idx
	last_swe_idx += 1

	def powerline_encode_strwidth_error(e):
		if not isinstance(e, UnicodeEncodeError):
			raise NotImplementedError
		return ('?' * strwidth(e.object[e.start:e.end]), e.end)

	ename = 'powerline_encode_strwidth_error_{0}'.format(last_swe_idx)
	codecs.register_error(ename, powerline_encode_strwidth_error)
	return ename
示例#27
0
文件: py7file.py 项目: titusz/py7file
    def get_sanitized_filename(self):
        """Create a sanatized version of the filename.

        :return: Portable and secure version of filename.
        """
        codecs.register_error("replace_", self._replace_under_error_handler)
        ascii_strip_re = re.compile(r'[^A-Za-z0-9_.-]')
        windows_device_files = ('CON', 'AUX', 'COM1', 'COM2', 'COM3', 'COM4',
                                'LPT1', 'LPT2', 'LPT3', 'PRN', 'NUL')

        if isinstance(self.filename, unicode):
            from unicodedata import normalize
            filename = normalize('NFKD', self.filename).encode('ascii',
                                                               'replace_')
        for sep in os.path.sep, os.path.altsep:
            if sep:
                filename = self.filename.replace(sep, ' ')
        filename = str(ascii_strip_re.sub('_', '_'.join(
            filename.split()))).strip('._')

        if os.name == 'nt' and filename and\
           filename.split('.')[0].upper() in windows_device_files:
            filename = '_' + filename

        return filename
示例#28
0
 def test_bmp(self):
     codecs.register_error('cssypy', stringutil.css_unicode_error_handler)
     Writer = codecs.getwriter('ascii')
     stream = StringIO()
     writer = Writer(stream, errors='cssypy')
     writer.write(u'ab\uABCDcd')
     self.assertEqual('ab\\00ABCDcd', stream.getvalue())
示例#29
0
    def test_callback_returns_bytes(self):
        def myreplace(exc):
            return (b"1234", exc.end)

        codecs.register_error("test.cjktest", myreplace)
        enc = self.encode("abc" + self.unmappedunicode + "def", "test.cjktest")[0]
        self.assertEqual(enc, b"abc1234def")
示例#30
0
    def test_badhandlerresults(self):
        results = ( 42, u"foo", (1,2,3), (u"foo", 1, 3), (u"foo", None), (u"foo",), ("foo", 1, 3), ("foo", None), ("foo",) )
        encs = ("ascii", "latin-1", "iso-8859-1", "iso-8859-15")

        for res in results:
            codecs.register_error("test.badhandler", lambda: res)
            for enc in encs:
                self.assertRaises(
                    TypeError,
                    u"\u3042".encode,
                    enc,
                    "test.badhandler"
                )
            for (enc, bytes) in (
                ("ascii", "\xff"),
                ("utf-8", "\xff"),
                ("utf-7", "+x-"),
                ("unicode-internal", "\x00"),
            ):
                self.assertRaises(
                    TypeError,
                    bytes.decode,
                    enc,
                    "test.badhandler"
                )
    from importlib.util import cache_from_source


if PY2:
    # In Python 2.7, backslashreplace exists
    # but does not support use for decoding.
    # We implement our own replace handler for this
    # situation, so that we can consistently use
    # backslash replacement for all versions.
    def backslashreplace_decode_fn(err):
        raw_bytes = (err.object[i] for i in range(err.start, err.end))
        # Python 2 gave us characters - convert to numeric bytes
        raw_bytes = (ord(b) for b in raw_bytes)
        return u"".join(map(u"\\x{:x}".format, raw_bytes)), err.end
    codecs.register_error(
        "backslashreplace_decode",
        backslashreplace_decode_fn,
    )
    backslashreplace_decode = "backslashreplace_decode"
else:
    backslashreplace_decode = "backslashreplace"


def has_tls():
    # type: () -> bool
    try:
        import _ssl  # noqa: F401  # ignore unused
        return True
    except ImportError:
        pass

    from pip._vendor.urllib3.util import IS_PYOPENSSL
示例#32
0
def get_temp_dir():
    return temp_dir


def mixed_decoder(unicode_error):
    err_str = unicode_error[1]
    err_len = unicode_error.end - unicode_error.start
    next_position = unicode_error.start + err_len
    replacement = err_str[unicode_error.start:unicode_error.end].decode(
        'cp1252')

    return u'%s' % replacement, next_position


codecs.register_error('mixed', mixed_decoder)


def json_request(method,
                 params=None,
                 host='localhost',
                 port=8080,
                 username=None,
                 password=None):
    # e.g. KodiJRPC_Get("PVR.GetProperties", {"properties": ["recording"]})

    url = 'http://{}:{}/jsonrpc'.format(host, port)
    header = {'Content-Type': 'application/json'}

    jsondata = {'jsonrpc': '2.0', 'method': method, 'id': method}
示例#33
0
#!/usr/bin/env python3
import os
import sys
import argparse
import json
from hexdump import hexdump
import codecs
codecs.register_error("strict", codecs.backslashreplace_errors)

from cereal import log
import cereal.messaging as messaging
from cereal.services import service_list

if __name__ == "__main__":

    parser = argparse.ArgumentParser(
        description=
        'Dump communication sockets. See cereal/services.py for a complete list of available sockets.'
    )
    parser.add_argument('--pipe', action='store_true')
    parser.add_argument('--raw', action='store_true')
    parser.add_argument('--json', action='store_true')
    parser.add_argument('--dump-json', action='store_true')
    parser.add_argument('--no-print', action='store_true')
    parser.add_argument('--addr', default='127.0.0.1')
    parser.add_argument('--values',
                        help='values to monitor (instead of entire event)')
    parser.add_argument(
        "socket",
        type=str,
        nargs='*',
示例#34
0
			self.log('TRIAL FINISHED')
			self.flush_log()
		"""

        self.experiment._log.flush()
        os.fsync(self.experiment._log)


def osreplace(exc):
    """
	desc:
		A replacement function to allow opensame-style replacement of unicode
		characters.

	arguments:
		exc:
			type:	UnicodeEncodeError

	returns:
		desc:	A (replacement, end) tuple.
		type:	tuple
	"""

    _s = u''
    for ch in exc.object[exc.start:exc.end]:
        _s += u'U+%.4X' % ord(ch)
    return _s, exc.end


codecs.register_error(u'osreplace', osreplace)
示例#35
0
from collections import namedtuple, OrderedDict
import six
from six.moves.urllib.parse import (urljoin, urlsplit, urlunsplit, urldefrag,
                                    urlencode, urlparse, quote, parse_qs,
                                    parse_qsl, ParseResult, unquote,
                                    urlunparse)
from six.moves.urllib.request import pathname2url, url2pathname
from w3lib.util import to_bytes, to_native_str, to_unicode


# error handling function for bytes-to-Unicode decoding errors with URLs
def _quote_byte(error):
    return (to_unicode(quote(error.object[error.start:error.end])), error.end)


codecs.register_error('percentencode', _quote_byte)

# constants from RFC 3986, Section 2.2 and 2.3
RFC3986_GEN_DELIMS = b':/?#[]@'
RFC3986_SUB_DELIMS = b"!$&'()*+,;="
RFC3986_RESERVED = RFC3986_GEN_DELIMS + RFC3986_SUB_DELIMS
RFC3986_UNRESERVED = (string.ascii_letters + string.digits +
                      "-._~").encode('ascii')
EXTRA_SAFE_CHARS = b'|'  # see https://github.com/scrapy/w3lib/pull/25

_safe_chars = RFC3986_RESERVED + RFC3986_UNRESERVED + EXTRA_SAFE_CHARS + b'%'


def safe_url_string(url, encoding='utf8', path_encoding='utf8'):
    """Convert the given URL into a legal URL by escaping unsafe characters
    according to RFC-3986.
示例#36
0
#!/usr/bin/env python3
# Command line interface to compile ElasticSearch queries from EQUEL expressions and perform queries against an ES instance.

import argparse

# change default decoding error behaviour to less strict 'replace', globally
import codecs
codecs.register_error('strict', codecs.replace_errors)

from elasticsearch import Elasticsearch
from equel.engine import EQUELEngine, EQUELTimeRange
import arrow
import json
import sys

argparser = argparse.ArgumentParser(description="EQUEL Command Line Interface")
argparser.add_argument("--server",
                       "-s",
                       action="append",
                       default="localhost",
                       help="ElasticSearch server")
argparser.add_argument("--index",
                       "-i",
                       default="*",
                       help="ElasticSearch index pattern to query")
argparser.add_argument("--max-results",
                       "-m",
                       type=int,
                       default=1000,
                       help="Maximum returned documents")
argparser.add_argument("--timeout",
示例#37
0
    def test_decodehelper(self):
        # enhance coverage of:
        # Objects/unicodeobject.c::unicode_decode_call_errorhandler()
        # and callers
        self.assertRaises(LookupError, b"\xff".decode, "ascii", "test.unknown")

        def baddecodereturn1(exc):
            return 42

        codecs.register_error("test.baddecodereturn1", baddecodereturn1)
        self.assertRaises(TypeError, b"\xff".decode, "ascii",
                          "test.baddecodereturn1")
        self.assertRaises(TypeError, b"\\".decode, "unicode-escape",
                          "test.baddecodereturn1")
        self.assertRaises(TypeError, b"\\x0".decode, "unicode-escape",
                          "test.baddecodereturn1")
        self.assertRaises(TypeError, b"\\x0y".decode, "unicode-escape",
                          "test.baddecodereturn1")
        self.assertRaises(TypeError, b"\\Uffffeeee".decode, "unicode-escape",
                          "test.baddecodereturn1")
        self.assertRaises(TypeError, b"\\uyyyy".decode, "raw-unicode-escape",
                          "test.baddecodereturn1")

        def baddecodereturn2(exc):
            return ("?", None)

        codecs.register_error("test.baddecodereturn2", baddecodereturn2)
        self.assertRaises(TypeError, b"\xff".decode, "ascii",
                          "test.baddecodereturn2")

        handler = PosReturn()
        codecs.register_error("test.posreturn", handler.handle)

        # Valid negative position
        handler.pos = -1
        self.assertEqual(b"\xff0".decode("ascii", "test.posreturn"), "<?>0")

        # Valid negative position
        handler.pos = -2
        self.assertEqual(b"\xff0".decode("ascii", "test.posreturn"), "<?><?>")

        # Negative position out of bounds
        handler.pos = -3
        self.assertRaises(IndexError, b"\xff0".decode, "ascii",
                          "test.posreturn")

        # Valid positive position
        handler.pos = 1
        self.assertEqual(b"\xff0".decode("ascii", "test.posreturn"), "<?>0")

        # Largest valid positive position (one beyond end of input)
        handler.pos = 2
        self.assertEqual(b"\xff0".decode("ascii", "test.posreturn"), "<?>")

        # Invalid positive position
        handler.pos = 3
        self.assertRaises(IndexError, b"\xff0".decode, "ascii",
                          "test.posreturn")

        # Restart at the "0"
        handler.pos = 6
        self.assertEqual(
            b"\\uyyyy0".decode("raw-unicode-escape", "test.posreturn"), "<?>0")

        class D(dict):
            def __getitem__(self, key):
                raise ValueError

        self.assertRaises(UnicodeError, codecs.charmap_decode, b"\xff",
                          "strict", {0xff: None})
        self.assertRaises(ValueError, codecs.charmap_decode, b"\xff", "strict",
                          D())
        self.assertRaises(TypeError, codecs.charmap_decode, b"\xff", "strict",
                          {0xff: sys.maxunicode + 1})
示例#38
0
            return vim_bufname(segment_info['bufnr'])
        else:
            return name.encode(segment_info['encoding']) if name else None


vim_strtrans = vim_get_func('strtrans', rettype='unicode')


def powerline_vim_strtrans_error(e):
    if not isinstance(e, UnicodeDecodeError):
        raise NotImplementedError
    text = vim_strtrans(e.object[e.start:e.end])
    return (text, e.end)


codecs.register_error('powerline_vim_strtrans_error',
                      powerline_vim_strtrans_error)

did_autocmd = False
buffer_caches = []


def register_buffer_cache(cachedict):
    global did_autocmd
    global buffer_caches
    from powerline.vim import get_default_pycmd, pycmd
    if not did_autocmd:
        import __main__
        __main__.powerline_on_bwipe = on_bwipe
        vim.command('augroup Powerline')
        vim.command(
            '	autocmd! BufWipeout * :{pycmd} powerline_on_bwipe()'.format(
示例#39
0
    This python `codecs`_ error handler replaces unencodable
    characters with HTML entities, or, if no HTML entity exists for
    the character, XML character references.

    >>> u'The cost was \u20ac12.'.encode('latin1', 'htmlentityreplace')
    'The cost was &euro;12.'
    """
    if isinstance(ex, UnicodeEncodeError):
        # Handle encoding errors
        bad_text = ex.object[ex.start:ex.end]
        text = _html_entities_escaper.escape(bad_text)
        return (compat.text_type(text), ex.end)
    raise ex


codecs.register_error('htmlentityreplace', htmlentityreplace_errors)

# TODO: options to make this dynamic per-compilation will be added in a later
# release
DEFAULT_ESCAPES = {
    'x': 'filters.xml_escape',
    'h': 'filters.html_escape',
    'u': 'filters.url_escape',
    'trim': 'filters.trim',
    'entity': 'filters.html_entities_escape',
    'unicode': 'unicode',
    'decode': 'decode',
    'str': 'str',
    'n': 'n'
}
示例#40
0
文件: urls.py 项目: zengke123/EbOps
_to_iri_unsafe = "".join([chr(c) for c in range(128) if c not in _always_safe])


def _codec_error_url_quote(e):
    """Used in :func:`uri_to_iri` after unquoting to re-quote any
    invalid bytes.
    """
    out = _fast_url_quote(e.object[e.start : e.end])

    if PY2:
        out = out.decode("utf-8")

    return out, e.end


codecs.register_error("werkzeug.url_quote", _codec_error_url_quote)


def uri_to_iri(uri, charset="utf-8", errors="werkzeug.url_quote"):
    """Convert a URI to an IRI. All valid UTF-8 characters are unquoted,
    leaving all reserved and invalid characters quoted. If the URL has
    a domain, it is decoded from Punycode.

    >>> uri_to_iri("http://xn--n3h.net/p%C3%A5th?q=%C3%A8ry%DF")
    'http://\\u2603.net/p\\xe5th?q=\\xe8ry%DF'

    :param uri: The URI to convert.
    :param charset: The encoding to encode unquoted bytes with.
    :param errors: Error handler to use during ``bytes.encode``. By
        default, invalid bytes are left quoted.
示例#41
0
    sequences encoded in %XX format, but as part of a unicode string.

    :param exc:
        The UnicodeDecodeError exception

    :return:
        A 2-element tuple of (replacement unicode string, integer index to
        resume at)
    """

    bytes_as_ints = bytes_to_list(exc.object[exc.start:exc.end])
    replacements = ['%%%02x' % num for num in bytes_as_ints]
    return (''.join(replacements), exc.end)


codecs.register_error('iriutf8', _iri_utf8_errors_handler)


def _urlquote(string, safe=''):
    """
    Quotes a unicode string for use in a URL

    :param string:
        A unicode string

    :param safe:
        A unicode string of character to not encode

    :return:
        None (if string is None) or an ASCII byte string of the quoted string
    """
示例#42
0
# -*- coding: utf-8 -*-

from oleander import app
from itertools import groupby as _groupby
from jinja2.filters import _GroupTuple
from operator import itemgetter
import codecs
import string

codecs.register_error('alphabetical_directory', lambda error:
                      (u'#', error.start + 1))


class Letter(str):
    """Alphabet letter. Implements comparing where '#' is at the end of alhpabet."""
    def __lt__(self, other):
        if self == '#':
            return False
        if other == '#':
            return True
        return str(self) < other

    def __le__(self, other):
        if other == '#':
            return True
        elif self == '#':
            return False
        return str(self) <= other

    def __gt__(self, other):
        if other == '#':
示例#43
0
            codepoints.append(codepoint)
        for cp in codepoints:
            e = _encode_entity_map.get(cp)
            if e:
                res.append("&")
                res.append(e)
                if not e.endswith(";"):
                    res.append(";")
            else:
                res.append("&#x%s;" % (hex(cp)[2:]))
        return ("".join(res), exc.end)
    else:
        return xmlcharrefreplace_errors(exc)


register_error("htmlentityreplace", htmlentityreplace_errors)


def serialize(input, tree="etree", encoding=None, **serializer_opts):
    """Serializes the input token stream using the specified treewalker

    :arg input: the token stream to serialize

    :arg tree: the treewalker to use

    :arg encoding: the encoding to use

    :arg serializer_opts: any options to pass to the
        :py:class:`html5lib.serializer.HTMLSerializer` that gets created

    :returns: the tree serialized as a string
示例#44
0
import o2on_config
from o2on_const import regHosts, ProtocolVer, AppName
import o2on_node
import o2on_dat
from o2on_node import ip2e, port2e, e2ip
import o2on_key
import o2on_im
import o2on_util

def my_replace_handler(inst):
    return ((u"\u30fb", inst.start+2))

try: 
    codecs.lookup_error('opy2on_replace')
except LookupError:
    codecs.register_error('opy2on_replace', my_replace_handler) 

class O2ONServer(BaseHTTPServer.HTTPServer):
    def __init__(self, handler, port, g):
        BaseHTTPServer.HTTPServer.__init__(self,
                                           ('', port), 
                                           handler)
        self.glob = g
        self.requests = []
        self.__is_shut_down = threading.Event()
        self.__serving = False
    def serve_forever(self, poll_interval=0.5):
        #hasattr(BaseHTTPServer.HTTPServer, '_handle_request_noblock'):
        if sys.hexversion >= 0x020600f0:
            BaseHTTPServer.HTTPServer.serve_forever(self, poll_interval) # 2.6
        else:
示例#45
0
    This python codecs error handler replaces unencodable
    characters with HTML entities, or, if no HTML entity exists for
    the character, XML character references::

        >>> u'The cost was \u20ac12.'.encode('latin1', 'htmlentityreplace')
        'The cost was &euro;12.'
    """
    if isinstance(ex, UnicodeEncodeError):
        # Handle encoding errors
        bad_text = ex.object[ex.start:ex.end]
        text = _html_entities_escaper.escape(bad_text)
        return (compat.text_type(text), ex.end)
    raise ex


codecs.register_error("htmlentityreplace", htmlentityreplace_errors)

# TODO: options to make this dynamic per-compilation will be added in a later
# release
DEFAULT_ESCAPES = {
    "x": "filters.xml_escape",
    "h": "filters.html_escape",
    "u": "filters.url_escape",
    "trim": "filters.trim",
    "entity": "filters.html_entities_escape",
    "unicode": "unicode",
    "decode": "decode",
    "str": "str",
    "n": "n",
}
import codecs
import os
import unicodedata
import logging
from tempfile import NamedTemporaryFile
from typing import Any, Dict, Generator, List, Optional, Tuple

import demistomock as demisto  # noqa: F401
import pyshark
from CommonServerPython import *  # noqa: F401

TCP_FLAG_FIN = 0x01
TCP_FLAG_SYN = 0x02
TCP_FLAG_ACK = 0x10

codecs.register_error('replace_with_space', lambda x:
                      (u' ', x.start + 1))  # type: ignore[attr-defined]


def from_bytes_to_text(mode: str, binary: bytes) -> str:
    """
    Make a text from a binary.

    :param mode: How to convert the binary to text.
    :return: A text converted from the binary.
    """
    if mode == 'text-based-protocol':
        # Keep all the characters used in text based protocols
        # * The unicodedata category names of control code start with C
        return ''.join(' ' if c == u'\ufffd' or (c not in (
            '\n', '\r', '\t') and unicodedata.category(c)[0] == 'C') else c
                       for c in binary.decode('utf-8', errors='replace'))
示例#47
0
from ezdxf.lldxf.const import (
    DXFError, DXFStructureError, DXFVersionError, DXFTableEntryError,
    DXFAppDataError, DXFXDataError, DXFAttributeError, DXFValueError,
    DXFKeyError, DXFIndexError, DXFTypeError, DXFBlockInUseError,
    InvalidGeoDataException, InsertUnits,
    ACI, DXF12, DXF2000, DXF2004, DXF2007, DXF2010, DXF2013, DXF2018,
)
# name space imports - do not remove

import codecs
from ezdxf.lldxf.encoding import (
    dxf_backslash_replace, has_dxf_unicode, decode_dxf_unicode,
)

# setup DXF unicode encoder -> '\U+nnnn'
codecs.register_error('dxfreplace', dxf_backslash_replace)

# Load font support automatically:
if EZDXF_AUTO_LOAD_FONTS:
    fonts.load()

YES_NO = {True: 'yes', False: 'no'}


def print_config(func=print, verbose=False):
    from pathlib import Path
    from ezdxf.acc import USE_C_EXT

    func(f"ezdxf v{__version__} @ {Path(__file__).parent}")
    func(f"Python version: {sys.version}")
    func(f"using C-extensions: {YES_NO[USE_C_EXT]}")
示例#48
0
    def read_doc(self, docname, app=None):
        # type: (unicode, Sphinx) -> None
        """Parse a file and add/update inventory entries for the doctree."""

        self.temp_data['docname'] = docname
        # defaults to the global default, but can be re-set in a document
        self.temp_data['default_domain'] = \
            self.domains.get(self.config.primary_domain)

        self.settings['input_encoding'] = self.config.source_encoding
        self.settings['trim_footnote_reference_space'] = \
            self.config.trim_footnote_reference_space
        self.settings['gettext_compact'] = self.config.gettext_compact

        language = self.config.language or 'en'
        self.settings['language_code'] = language
        self.settings['smart_quotes'] = True
        for tag in normalize_language_tag(language):
            if tag in smartchars.quotes:
                break
        else:
            self.settings['smart_quotes'] = False

        docutilsconf = path.join(self.srcdir, 'docutils.conf')
        # read docutils.conf from source dir, not from current dir
        OptionParser.standard_config_files[1] = docutilsconf
        if path.isfile(docutilsconf):
            self.note_dependency(docutilsconf)

        with sphinx_domains(self):
            if self.config.default_role:
                role_fn, messages = roles.role(self.config.default_role,
                                               english, 0, dummy_reporter)
                if role_fn:
                    roles._roles[''] = role_fn
                else:
                    logger.warning('default role %s not found',
                                   self.config.default_role,
                                   location=docname)

            codecs.register_error('sphinx',
                                  self.warn_and_replace)  # type: ignore

            # publish manually
            reader = SphinxStandaloneReader(
                self.app, parsers=self.app.registry.get_source_parsers())
            pub = Publisher(reader=reader,
                            writer=SphinxDummyWriter(),
                            destination_class=NullOutput)
            pub.set_components(None, 'restructuredtext', None)
            pub.process_programmatic_settings(None, self.settings, None)
            src_path = self.doc2path(docname)
            source = SphinxFileInput(app,
                                     self,
                                     source=None,
                                     source_path=src_path,
                                     encoding=self.config.source_encoding)
            pub.source = source
            pub.settings._source = src_path
            pub.set_destination(None, None)
            pub.publish()
            doctree = pub.document

        # post-processing
        for domain in itervalues(self.domains):
            domain.process_doc(self, docname, doctree)

        # allow extension-specific post-processing
        if app:
            app.emit('doctree-read', doctree)

        # store time of reading, for outdated files detection
        # (Some filesystems have coarse timestamp resolution;
        # therefore time.time() can be older than filesystem's timestamp.
        # For example, FAT32 has 2sec timestamp resolution.)
        self.all_docs[docname] = max(time.time(),
                                     path.getmtime(self.doc2path(docname)))

        if self.versioning_condition:
            old_doctree = None
            if self.versioning_compare:
                # get old doctree
                try:
                    with open(
                            self.doc2path(docname, self.doctreedir,
                                          '.doctree'), 'rb') as f:
                        old_doctree = pickle.load(f)
                except EnvironmentError:
                    pass

            # add uids for versioning
            if not self.versioning_compare or old_doctree is None:
                list(add_uids(doctree, self.versioning_condition))
            else:
                list(
                    merge_doctrees(old_doctree, doctree,
                                   self.versioning_condition))

        # make it picklable
        doctree.reporter = None
        doctree.transformer = None
        doctree.settings.warning_stream = None
        doctree.settings.env = None
        doctree.settings.record_dependencies = None

        # cleanup
        self.temp_data.clear()
        self.ref_context.clear()
        roles._roles.pop('',
                         None)  # if a document has set a local default role

        # save the parsed doctree
        doctree_filename = self.doc2path(docname, self.doctreedir, '.doctree')
        ensuredir(path.dirname(doctree_filename))
        with open(doctree_filename, 'wb') as f:
            pickle.dump(doctree, f, pickle.HIGHEST_PROTOCOL)
示例#49
0
def _escapecss(e):
    """
    Escapes characters not allowed in the current encoding the CSS way
    with a backslash followed by a uppercase hex code point

    E.g. the german umlaut 'ä' is escaped as \E4
    """
    s = e.object[e.start:e.end]
    return u''.join([
        ur'\%s ' % str(hex(ord(x)))[2:]  # remove 0x from hex
        .upper() for x in s
    ]), e.end


codecs.register_error('escapecss', _escapecss)


class Preferences(object):
    """Control output of CSSSerializer.

    defaultAtKeyword = True
        Should the literal @keyword from src CSS be used or the default
        form, e.g. if ``True``: ``@import`` else: ``@i\mport``
    defaultPropertyName = True
        Should the normalized propertyname be used or the one given in
        the src file, e.g. if ``True``: ``color`` else: ``c\olor``

        Only used if ``keepAllProperties==False``.

    defaultPropertyPriority = True
示例#50
0
文件: minjson.py 项目: 2050utopia/hwr
    If encoding fails, \\uxxxx must be emitted. This
    is similar to the "backshashreplace" handler, only
    that we never emit \\xnn since this is not legal
    according to the JSON syntax specs.
    '''
    if isinstance(exc, UnicodeEncodeError):
        part = exc.object[exc.start]
        # repr(part) will convert u'\unnnn' to u'u\\nnnn'
        return u'\\u%04x' % ord(part), exc.start + 1
    else:
        raise exc


# register the error handler
codecs.register_error('jsonreplace', jsonreplace_handler)

### Writer


def write(input, encoding='utf-8', outputEncoding=None):
    writer = JsonWriter(input_encoding=encoding,
                        output_encoding=outputEncoding)
    writer.write(input)
    return writer.getvalue()


re_strmangle = re.compile('"|\b|\f|\n|\r|\t|\\\\')


def func_strmangle(match):
示例#51
0
#!/usr/bin/env python

# UTF-8 encoding

# Python script getting, parsing and saving (in csv format) data from https://www.peakware.com.
#
# Author: Alexandre Louisnard [email protected]
# 2017

import sys, codecs
import urllib.request
import re

sys.stdout = codecs.getwriter("utf-8")(sys.stdout.detach())
codecs.register_error("strict", codecs.ignore_errors)

listUrl = "https://www.peakware.com/peaks.php?choice=SoE"
outputFile = "peakware.csv"

print("GETTING: {}".format(listUrl))

listPage = urllib.request.urlopen(listUrl).read().decode('utf-8', 'ignore')

# (link, name, altitude)
matchLinks = re.findall(r"""\<li\>\<a\ href\=\"(peaks\.php\?pk\=[0-9]+)\"\>(.*)\<\/a\>\ \(.*\)\<br\/\>[0-9]+\ ft\/([0-9]+)\ m\<\/li\>""", listPage)

print("Found {} summits\n".format(len(matchLinks)))
sys.stdout.flush()

f = codecs.open(outputFile,"w",encoding='utf8')
f.write("name;elevation;latitude;longitude;continent;country;range\n")
from hdt import HDTDocument, IdentifierPosition
from collections import deque
import pandas as pd
import numpy as np
import rocksdb
import codecs
import datetime


def strict_handler(exception):
    return u"", exception.end


codecs.register_error("strict", strict_handler)

PATH_LOD = "/scratch/wbeek/data/LOD-a-lot/data.hdt"
PATH_SAMEAS_NETWORK = "/home/jraad/ssd/data/identity-data/"
PATH_ID2TERMS_099 = "/home/jraad/ssd/data/identity-data-0_99/id2terms_0-99.csv"
PATH_TERM2ID_099 = "/home/jraad/ssd/data/identity-data-0_99/term2id_0-99.csv"

# load the LOD-a-lot HDT file
hdt_lod = HDTDocument(PATH_LOD)

# these identifiers will be used later to query the HDT file using their IDs
id_type = hdt_lod.convert_term(
    "http://www.w3.org/1999/02/22-rdf-syntax-ns#type",
    IdentifierPosition.Predicate)
id_sameAs = hdt_lod.convert_term("http://www.w3.org/2002/07/owl#sameAs",
                                 IdentifierPosition.Predicate)
id_subClassOf = hdt_lod.convert_term(
    "http://www.w3.org/2000/01/rdf-schema#subClassOf",
示例#53
0
# Register a 'repr' error strategy.
# =================================
# Sometimes we want to echo bytestrings back to a user, and we don't know what
# encoding will work. This error strategy replaces non-decodable bytes with
# their Python representation, so that they are human-visible.
#
# See also:
#   - https://github.com/dcrosta/mongo/commit/e1ac732
#   - http://www.crummy.com/software/BeautifulSoup/bs4/doc/#unicode-dammit

def replace_with_repr(unicode_error):
    offender = unicode_error.object[unicode_error.start:unicode_error.end]
    return (unicode(repr(offender).strip("'").strip('"')), unicode_error.end)

codecs.register_error('repr', replace_with_repr)


def unicode_dammit(s, encoding="UTF-8"):
    """Given a bytestring, return a unicode decoded with `encoding`.

    Any bytes not decodable with UTF-8 will be replaced with their Python
    representation, so you'll get something like u"foo\\xefbar".

    """
    if not isinstance(s, str):
        raise TypeError("I got %s, but I want <type 'str'>." % s.__class__)
    errors = 'repr'
    return s.decode(encoding, errors)

示例#54
0
                    if not 0xD800 <= code <= 0xDCFF:
                        raise NotASurrogateError()
                    if 0xDC00 <= code <= 0xDC7F:
                        decoded.append(unichr(code - 0xDC00))
                    elif code <= 0xDCFF:
                        decoded.append(unichr(code - 0xDC00))
                    else:
                        raise NotASurrogateError()
                decoded = str().join(decoded)
            else:
                raise exc
        except NotASurrogateError:
            raise exc
        return (decoded, exc.end)

    codecs.register_error('surrogateescape', surrogateescape_handler)


def exception_encode(ex, codec):
    if str == bytes:
        reduced = ex.__reduce__()
        ex = reduced[0](*tuple(
            map(
                lambda arg: codec.decode(arg)[0]
                if isinstance(arg, bytes) else arg, reduced[1])))
    return ex


def sql_commands(read_line):
    delims = ['"', "'", ';', '--']
    counter = 0
示例#55
0
    def test_encodehelper(self):
        # enhance coverage of:
        # Objects/unicodeobject.c::unicode_encode_call_errorhandler()
        # and callers
        self.assertRaises(LookupError, "\xff".encode, "ascii", "test.unknown")

        def badencodereturn1(exc):
            return 42

        codecs.register_error("test.badencodereturn1", badencodereturn1)
        self.assertRaises(TypeError, "\xff".encode, "ascii",
                          "test.badencodereturn1")

        def badencodereturn2(exc):
            return ("?", None)

        codecs.register_error("test.badencodereturn2", badencodereturn2)
        self.assertRaises(TypeError, "\xff".encode, "ascii",
                          "test.badencodereturn2")

        handler = PosReturn()
        codecs.register_error("test.posreturn", handler.handle)

        # Valid negative position
        handler.pos = -1
        self.assertEqual("\xff0".encode("ascii", "test.posreturn"), b"<?>0")

        # Valid negative position
        handler.pos = -2
        self.assertEqual("\xff0".encode("ascii", "test.posreturn"), b"<?><?>")

        # Negative position out of bounds
        handler.pos = -3
        self.assertRaises(IndexError, "\xff0".encode, "ascii",
                          "test.posreturn")

        # Valid positive position
        handler.pos = 1
        self.assertEqual("\xff0".encode("ascii", "test.posreturn"), b"<?>0")

        # Largest valid positive position (one beyond end of input
        handler.pos = 2
        self.assertEqual("\xff0".encode("ascii", "test.posreturn"), b"<?>")

        # Invalid positive position
        handler.pos = 3
        self.assertRaises(IndexError, "\xff0".encode, "ascii",
                          "test.posreturn")

        handler.pos = 0

        class D(dict):
            def __getitem__(self, key):
                raise ValueError

        for err in ("strict", "replace", "xmlcharrefreplace",
                    "backslashreplace", "test.posreturn"):
            self.assertRaises(UnicodeError, codecs.charmap_encode, "\xff", err,
                              {0xff: None})
            self.assertRaises(ValueError, codecs.charmap_encode, "\xff", err,
                              D())
            self.assertRaises(TypeError, codecs.charmap_encode, "\xff", err,
                              {0xff: 300})
示例#56
0
if hasattr(config, "WEBIRC_MODE") and config.WEBIRC_MODE == "hmac":
    HMACKEY = hmac.HMAC(key=config.HMACKEY)


def hmacfn(*args):
    h = HMACKEY.copy()
    h.update("%d %s" % (int(time.time() / HMACTEMPORAL), " ".join(args)))
    return h.hexdigest()


def utf8_iso8859_1(data, table=dict((x, bytes(x, "iso-8859-1").decode("iso-8859-1")) for x in map(chr, range(0, 256)))):
    return (table.get(data.object[data.start]), data.start+1)


codecs.register_error("mixed-iso-8859-1", utf8_iso8859_1)


def irc_decode(x):
    try:
        return x.decode("utf-8", "mixed-iso-8859-1")
    except UnicodeDecodeError:
        return x.decode("iso-8859-1", "ignore")


class QWebIRCClient(basic.LineReceiver):
    delimiter = b'\n'

    def __init__(self, *args, **kwargs):
        self.__nickname = "(unregistered)"
示例#57
0
 def test_errorcallback_longindex(self):
     dec = codecs.getdecoder('euc-kr')
     myreplace = lambda exc: ('', sys.maxsize + 1)
     codecs.register_error('test.cjktest', myreplace)
     self.assertRaises(IndexError, dec, b'apple\x92ham\x93spam',
                       'test.cjktest')
示例#58
0
    font-size:12px;
}
</style>"""

# Leaving (dirty) possibility to change values from here (e.g. `export SQLMAP__MAX_NUMBER_OF_THREADS=20`)
for key, value in os.environ.items():
    if key.upper().startswith("%s_" % SQLMAP_ENVIRONMENT_PREFIX):
        _ = key[len(SQLMAP_ENVIRONMENT_PREFIX) + 1:].upper()
        if _ in globals():
            original = globals()[_]
            if isinstance(original, int):
                try:
                    globals()[_] = int(value)
                except ValueError:
                    pass
            elif isinstance(original, bool):
                globals()[_] = value.lower() in ('1', 'true')
            elif isinstance(original, (list, tuple)):
                globals()[_] = [__.strip() for __ in _.split(',')]
            else:
                globals()[_] = value

# Installing "reversible" unicode (decoding) error handler
def _reversible(ex):
    if INVALID_UNICODE_PRIVATE_AREA:
        return (u"".join(_unichr(int('000f00%2x' % (_ if isinstance(_, int) else ord(_)), 16)) for _ in ex.object[ex.start:ex.end]), ex.end)
    else:
        return (u"".join(INVALID_UNICODE_CHAR_FORMAT % (_ if isinstance(_, int) else ord(_)) for _ in ex.object[ex.start:ex.end]), ex.end)

codecs.register_error("reversible", _reversible)
示例#59
0
        if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)):
            res = []
            for c in exc.object[exc.start:exc.end]:
                e = encode_entity_map.get(c)
                if e:
                    res.append("&")
                    res.append(e)
                    if not e.endswith(";"):
                        res.append(";")
                else:
                    res.append(c.encode(exc.encoding, "xmlcharrefreplace"))
            return (u"".join(res), exc.end)
        else:
            return xmlcharrefreplace_errors(exc)

    register_error(unicode_encode_errors, htmlentityreplace_errors)

    del register_error


def encode(text, encoding):
    return text.encode(encoding, unicode_encode_errors)


class HTMLSerializer(object):

    quote_attr_values = False
    quote_char = '"'
    use_best_quote_char = True
    minimize_boolean_attributes = True
示例#60
0
]


def escape_ascii(bytes_data):
    return u''.join(ASCII_ESCAPE_LOOKUP[bval(ch)] for ch in bytes_data)


def escape_ascii_bytes(bytes_data):
    return b''.join(ASCII_ESCAPE_LOOKUP_BYTES[bval(ch)] for ch in bytes_data)


def escape_utf8_error(err):
    return escape_ascii(err.object[err.start:err.end]), err.end


codecs.register_error('rdbslashescape', escape_utf8_error)


def escape_utf8(byte_data):
    return byte_data.decode('utf-8', 'rdbslashescape')


def bytes_to_unicode(byte_data, escape, skip_printable=False):
    """
    Decode given bytes using specified escaping method.
    :param byte_data: The byte-like object with bytes to decode.
    :param escape: The escape method to use.
    :param skip_printable: If True, don't escape byte_data with all 'printable ASCII' bytes. Defaults to False.
    :return: New unicode string, escaped with the specified method if needed.
    """
    if isnumber(byte_data):