def find_sourceline_and_wrap_info(self, space, source=None): """ search for the line of input that caused the error and then return a wrapped tuple that can be used to construct a wrapped SyntaxError. Optionally pass source, to get better error messages for the case where this instance was constructed without a source line (.text attribute)""" text = self.text if text is None and source is not None and self.lineno: lines = source.splitlines(True) text = lines[self.lineno - 1] w_text = w_filename = space.w_None offset = self.offset w_lineno = space.newint(self.lineno) if self.filename is not None: w_filename = space.newfilename(self.filename) if text is None and self.filename is not None: w_text = space.appexec([w_filename, w_lineno], """(filename, lineno): try: with open(filename) as f: for _ in range(lineno - 1): f.readline() return f.readline() except: # we can't allow any exceptions here! return None""") elif text is not None: from rpython.rlib.runicode import str_decode_utf_8_impl # text may not be UTF-8 in case of decoding errors. # adjust the encoded text offset to a decoded offset # XXX do the right thing about continuation lines, which # XXX are their own fun, sometimes giving offset > # XXX len(text) for example (right now, avoid crashing) def replace_error_handler(errors, encoding, msg, s, startpos, endpos): # must return unicode return u'\ufffd', endpos if offset > len(text): offset = len(text) replacedtext, _ = str_decode_utf_8_impl(text, offset, 'replace', False, replace_error_handler, True) offset = len(replacedtext) if len(text) != offset: replacedtext, _ = str_decode_utf_8_impl( text, len(text), 'replace', False, replace_error_handler, True) w_text = space.newtext(replacedtext.encode('utf8'), len(replacedtext)) return space.newtuple([ space.newtext(self.msg), space.newtuple([ w_filename, w_lineno, space.newint(offset), w_text, space.newint(self.lastlineno) ]) ])
def fsdecode(space, w_string): from pypy.module._codecs import interp_codecs state = space.fromcache(interp_codecs.CodecState) if _WIN32: bytes = space.bytes_w(w_string) uni = str_decode_mbcs(bytes, len(bytes), 'strict', errorhandler=decode_error_handler(space), force_ignore=False)[0] elif _MACOSX: bytes = space.bytes_w(w_string) uni = runicode.str_decode_utf_8_impl( bytes, len(bytes), 'surrogateescape', final=True, errorhandler=state.decode_error_handler, allow_surrogates=False)[0] elif space.sys.filesystemencoding is None or state.codec_need_encodings: # bootstrap check: if the filesystemencoding isn't initialized # or the filesystem codec is implemented in Python we cannot # use it before the codecs are ready. use the locale codec # instead from pypy.module._codecs.locale import ( str_decode_locale_surrogateescape) bytes = space.bytes_w(w_string) uni = str_decode_locale_surrogateescape( bytes, errorhandler=decode_error_handler(space)) else: from pypy.module.sys.interp_encoding import getfilesystemencoding return space.call_method(w_string, 'decode', getfilesystemencoding(space), space.newtext('surrogateescape')) return space.newunicode(uni)
def utf_8_decode(space, string, errors="strict", w_final=None): if errors is None: errors = 'strict' final = space.is_true(w_final) state = space.fromcache(CodecState) # NB. can't call str_decode_utf_8() directly because that's # an @elidable function nowadays. Instead, we need the _impl(). # (The problem is the errorhandler, which calls arbitrary Python.) result, consumed = runicode.str_decode_utf_8_impl( string, len(string), errors, final, state.decode_error_handler, allow_surrogates=True) return space.newtuple([space.newunicode(result), space.newint(consumed)])