示例#1
0
    def find_sourceline_and_wrap_info(self, space, source=None):
        """ search for the line of input that caused the error and then return
        a wrapped tuple that can be used to construct a wrapped SyntaxError.
        Optionally pass source, to get better error messages for the case where
        this instance was constructed without a source line (.text
        attribute)"""
        text = self.text
        if text is None and source is not None and self.lineno:
            lines = source.splitlines(True)
            text = lines[self.lineno - 1]
        w_text = w_filename = space.w_None
        offset = self.offset
        w_lineno = space.newint(self.lineno)
        if self.filename is not None:
            w_filename = space.newfilename(self.filename)
        if text is None and self.filename is not None:
            w_text = space.appexec([w_filename, w_lineno],
                                   """(filename, lineno):
                    try:
                        with open(filename) as f:
                            for _ in range(lineno - 1):
                                f.readline()
                            return f.readline()
                    except:  # we can't allow any exceptions here!
                        return None""")
        elif text is not None:
            from rpython.rlib.runicode import str_decode_utf_8_impl

            # text may not be UTF-8 in case of decoding errors.
            # adjust the encoded text offset to a decoded offset
            # XXX do the right thing about continuation lines, which
            # XXX are their own fun, sometimes giving offset >
            # XXX len(text) for example (right now, avoid crashing)
            def replace_error_handler(errors, encoding, msg, s, startpos,
                                      endpos):
                # must return unicode
                return u'\ufffd', endpos

            if offset > len(text):
                offset = len(text)
            replacedtext, _ = str_decode_utf_8_impl(text, offset, 'replace',
                                                    False,
                                                    replace_error_handler,
                                                    True)
            offset = len(replacedtext)
            if len(text) != offset:
                replacedtext, _ = str_decode_utf_8_impl(
                    text, len(text), 'replace', False, replace_error_handler,
                    True)
            w_text = space.newtext(replacedtext.encode('utf8'),
                                   len(replacedtext))
        return space.newtuple([
            space.newtext(self.msg),
            space.newtuple([
                w_filename, w_lineno,
                space.newint(offset), w_text,
                space.newint(self.lastlineno)
            ])
        ])
示例#2
0
def fsdecode(space, w_string):
    from pypy.module._codecs import interp_codecs
    state = space.fromcache(interp_codecs.CodecState)
    if _WIN32:
        bytes = space.bytes_w(w_string)
        uni = str_decode_mbcs(bytes,
                              len(bytes),
                              'strict',
                              errorhandler=decode_error_handler(space),
                              force_ignore=False)[0]
    elif _MACOSX:
        bytes = space.bytes_w(w_string)
        uni = runicode.str_decode_utf_8_impl(
            bytes,
            len(bytes),
            'surrogateescape',
            final=True,
            errorhandler=state.decode_error_handler,
            allow_surrogates=False)[0]
    elif space.sys.filesystemencoding is None or state.codec_need_encodings:
        # bootstrap check: if the filesystemencoding isn't initialized
        # or the filesystem codec is implemented in Python we cannot
        # use it before the codecs are ready. use the locale codec
        # instead
        from pypy.module._codecs.locale import (
            str_decode_locale_surrogateescape)
        bytes = space.bytes_w(w_string)
        uni = str_decode_locale_surrogateescape(
            bytes, errorhandler=decode_error_handler(space))
    else:
        from pypy.module.sys.interp_encoding import getfilesystemencoding
        return space.call_method(w_string, 'decode',
                                 getfilesystemencoding(space),
                                 space.newtext('surrogateescape'))
    return space.newunicode(uni)
示例#3
0
def utf_8_decode(space, string, errors="strict", w_final=None):
    if errors is None:
        errors = 'strict'
    final = space.is_true(w_final)
    state = space.fromcache(CodecState)
    # NB. can't call str_decode_utf_8() directly because that's
    # an @elidable function nowadays.  Instead, we need the _impl().
    # (The problem is the errorhandler, which calls arbitrary Python.)
    result, consumed = runicode.str_decode_utf_8_impl(
        string, len(string), errors,
        final, state.decode_error_handler,
        allow_surrogates=True)
    return space.newtuple([space.newunicode(result), space.newint(consumed)])