Exemplo n.º 1
0
    def test_newline_decoder(self):
        import codecs
        decoder = codecs.getincrementaldecoder("utf-8")()
        decoder = io.IncrementalNewlineDecoder(decoder, translate=True)

        self.assertEquals(decoder.decode(b'\xe8\xa2\x88'), u"\u8888")

        self.assertEquals(decoder.decode(b'\xe8'), u"")
        self.assertEquals(decoder.decode(b'\xa2'), u"")
        self.assertEquals(decoder.decode(b'\x88'), u"\u8888")

        self.assertEquals(decoder.decode(b'\xe8'), u"")
        self.assertRaises(UnicodeDecodeError, decoder.decode, b'', final=True)

        decoder.setstate((b'', 0))
        self.assertEquals(decoder.decode(b'\n'), u"\n")
        self.assertEquals(decoder.decode(b'\r'), u"")
        self.assertEquals(decoder.decode(b'', final=True), u"\n")
        self.assertEquals(decoder.decode(b'\r', final=True), u"\n")

        self.assertEquals(decoder.decode(b'\r'), u"")
        self.assertEquals(decoder.decode(b'a'), u"\na")

        self.assertEquals(decoder.decode(b'\r\r\n'), u"\n\n")
        self.assertEquals(decoder.decode(b'\r'), u"")
        self.assertEquals(decoder.decode(b'\r'), u"\n")
        self.assertEquals(decoder.decode(b'\na'), u"\na")

        self.assertEquals(decoder.decode(b'\xe8\xa2\x88\r\n'), u"\u8888\n")
        self.assertEquals(decoder.decode(b'\xe8\xa2\x88'), u"\u8888")
        self.assertEquals(decoder.decode(b'\n'), u"\n")
        self.assertEquals(decoder.decode(b'\xe8\xa2\x88\r'), u"\u8888")
        self.assertEquals(decoder.decode(b'\n'), u"\n")

        decoder = codecs.getincrementaldecoder("utf-8")()
        decoder = io.IncrementalNewlineDecoder(decoder, translate=True)
        self.assertEquals(decoder.newlines, None)
        decoder.decode(b"abc\n\r")
        self.assertEquals(decoder.newlines, u'\n')
        decoder.decode(b"\nabc")
        self.assertEquals(decoder.newlines, ('\n', '\r\n'))
        decoder.decode(b"abc\r")
        self.assertEquals(decoder.newlines, ('\n', '\r\n'))
        decoder.decode(b"abc")
        self.assertEquals(decoder.newlines, ('\r', '\n', '\r\n'))
        decoder.decode(b"abc\r")
        decoder.reset()
        self.assertEquals(decoder.decode(b"abc"), "abc")
        self.assertEquals(decoder.newlines, None)
Exemplo n.º 2
0
def source_to_unicode(txt, errors='replace', skip_encoding_cookie=True):
    """Converts a bytes string with python source code to unicode.
    Unicode strings are passed through unchanged. Byte strings are checked
    for the python source file encoding cookie to determine encoding.
    txt can be either a bytes buffer or a string containing the source
    code.
    """
    if isinstance(txt, six.text_type):
        return txt
    if isinstance(txt, six.binary_type):
        buffer = io.BytesIO(txt)
    else:
        buffer = txt
    try:
        encoding, _ = detect_encoding(buffer.readline)
    except SyntaxError:
        encoding = "ascii"
    buffer.seek(0)

    newline_decoder = io.IncrementalNewlineDecoder(None, True)

    text = io.TextIOWrapper(buffer, encoding, errors=errors, line_buffering=True)
    text.mode = 'r'
    if skip_encoding_cookie:
        return u"".join(strip_encoding_cookie(text))
    else:
        return text.read()
Exemplo n.º 3
0
    def get_source(self, fullname):
        """Concrete implementation of InspectLoader.get_source."""
        path = self.get_filename(fullname)
        try:
            source_bytes = self.get_data(path)
        except IOError:
            raise ImportError("source not available through get_data()")

        if py3k:
            import io, tokenize

            readsource = io.BytesIO(source_bytes).readline
            try:
                encoding = tokenize.detect_encoding(readsource)
            except SyntaxError as exc:
                raise ImportError("Failed to detect encoding")

            newline_decoder = io.IncrementalNewlineDecoder(None, True)
            try:
                return newline_decoder.decode(source_bytes.decode(encoding[0]))
            except UnicodeDecodeError as exc:
                raise ImportError("Failed to decode source file")

        else:
            return source_bytes  # XXX proper encoding
Exemplo n.º 4
0
 def test_universal_newlines(self):
     name = 'mod'
     mock = self.SourceOnlyLoaderMock('mod.file')
     source = 'x = 42\r\ny = -13\r\n'
     mock.source = source.encode('utf-8')
     expect = io.IncrementalNewlineDecoder(None, True).decode(source)
     self.assertEqual(mock.get_source(name), expect)
Exemplo n.º 5
0
 def test_universal_newlines(self):
     # PEP 302 says universal newlines should be used.
     name = 'mod'
     mock = SourceOnlyLoaderMock('mod.file')
     source = "x = 42\r\ny = -13\r\n"
     mock.source = source.encode('utf-8')
     expect = io.IncrementalNewlineDecoder(None, True).decode(source)
     self.assertEqual(mock.get_source(name), expect)
Exemplo n.º 6
0
def decode_source(source_bytes):
    """Decode bytes representing source code and return the string.
    Universal newline support is used in the decoding.
    """
    # source_bytes_readline = io.BytesIO(source_bytes).readline
    # encoding, _ = detect_encoding(source_bytes_readline)
    newline_decoder = io.IncrementalNewlineDecoder(None, True)
    return newline_decoder.decode(source_to_unicode(source_bytes))
Exemplo n.º 7
0
 def test_newline_decoder(self):
     encodings = (
         'utf-8',
         'latin-1',
         'utf-16',
         'utf-16-le',
         'utf-16-be',
         'utf-32',
         'utf-32-le',
         'utf-32-be',
     )
     for enc in encodings:
         decoder = codecs.getincrementaldecoder(enc)()
         decoder = io.IncrementalNewlineDecoder(decoder, translate=True)
         self.check_newline_decoder(decoder, enc)
     decoder = codecs.getincrementaldecoder("utf-8")()
     decoder = io.IncrementalNewlineDecoder(decoder, translate=True)
     self.check_newline_decoder_utf8(decoder)
Exemplo n.º 8
0
def decode_source(source_bytes: bytes) -> str:
    """Copied from importlib._bootstrap_external"""
    source_bytes_readline = io.BytesIO(source_bytes).readline
    encoding = tokenize.detect_encoding(source_bytes_readline)
    newline_decoder = io.IncrementalNewlineDecoder(None, True)
    return newline_decoder.decode(source_bytes.decode(encoding[0]))