def test_badandgoodbackslashreplaceexceptions(self): # "backslashreplace" complains about a non-exception passed in self.assertRaises( TypeError, codecs.backslashreplace_errors, 42 ) # "backslashreplace" complains about the wrong exception types self.assertRaises( TypeError, codecs.backslashreplace_errors, UnicodeError("ouch") ) # Use the correct exception tests = [ ("\u3042", "\\u3042"), ("\n", "\\x0a"), ("a", "\\x61"), ("\x00", "\\x00"), ("\xff", "\\xff"), ("\u0100", "\\u0100"), ("\uffff", "\\uffff"), ("\U00010000", "\\U00010000"), ("\U0010ffff", "\\U0010ffff"), # Lone surrogates ("\ud800", "\\ud800"), ("\udfff", "\\udfff"), ("\ud800\udfff", "\\ud800\\udfff"), ] for s, r in tests: with self.subTest(str=s): self.assertEqual( codecs.backslashreplace_errors( UnicodeEncodeError("ascii", "a" + s + "b", 1, 1 + len(s), "ouch")), (r, 1 + len(s)) ) self.assertEqual( codecs.backslashreplace_errors( UnicodeTranslateError("a" + s + "b", 1, 1 + len(s), "ouch")), (r, 1 + len(s)) ) tests = [ (b"a", "\\x61"), (b"\n", "\\x0a"), (b"\x00", "\\x00"), (b"\xff", "\\xff"), ] for b, r in tests: with self.subTest(bytes=b): self.assertEqual( codecs.backslashreplace_errors( UnicodeDecodeError("ascii", bytearray(b"a" + b + b"b"), 1, 2, "ouch")), (r, 2) )
def backslashescape_errors(exception): if isinstance(exception, UnicodeDecodeError): bad_data = exception.object[exception.start:exception.end] escaped = "".join(r"\x%02x" % x for x in bad_data) return escaped, exception.end return codecs.backslashreplace_errors(exception)
def escape_invis(decode_error): decode_error.end = decode_error.start + 1 if CONTROL_CHARS.match( decode_error.object[decode_error.start:decode_error.end]): return codecs.backslashreplace_errors(decode_error) return decode_error.object[decode_error.start:decode_error.end].encode( 'utf-8'), decode_error.end
def test_badandgoodbackslashreplaceexceptions(self): # "backslashreplace" complains about a non-exception passed in self.assertRaises(TypeError, codecs.backslashreplace_errors, 42) # "backslashreplace" complains about the wrong exception types self.assertRaises(TypeError, codecs.backslashreplace_errors, UnicodeError("ouch")) # "backslashreplace" can only be used for encoding self.assertRaises( TypeError, codecs.backslashreplace_errors, UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch")) self.assertRaises(TypeError, codecs.backslashreplace_errors, UnicodeTranslateError("\u3042", 0, 1, "ouch")) # Use the correct exception self.assertEqual( codecs.backslashreplace_errors( UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")), ("\\u3042", 1)) self.assertEqual( codecs.backslashreplace_errors( UnicodeEncodeError("ascii", "\x00", 0, 1, "ouch")), ("\\x00", 1)) self.assertEqual( codecs.backslashreplace_errors( UnicodeEncodeError("ascii", "\xff", 0, 1, "ouch")), ("\\xff", 1)) self.assertEqual( codecs.backslashreplace_errors( UnicodeEncodeError("ascii", "\u0100", 0, 1, "ouch")), ("\\u0100", 1)) self.assertEqual( codecs.backslashreplace_errors( UnicodeEncodeError("ascii", "\uffff", 0, 1, "ouch")), ("\\uffff", 1)) # 1 on UCS-4 builds, 2 on UCS-2 len_wide = len("\U00010000") self.assertEqual( codecs.backslashreplace_errors( UnicodeEncodeError("ascii", "\U00010000", 0, len_wide, "ouch")), ("\\U00010000", len_wide)) self.assertEqual( codecs.backslashreplace_errors( UnicodeEncodeError("ascii", "\U0010ffff", 0, len_wide, "ouch")), ("\\U0010ffff", len_wide)) # Lone surrogates (regardless of unicode width) self.assertEqual( codecs.backslashreplace_errors( UnicodeEncodeError("ascii", "\ud800", 0, 1, "ouch")), ("\\ud800", 1)) self.assertEqual( codecs.backslashreplace_errors( UnicodeEncodeError("ascii", "\udfff", 0, 1, "ouch")), ("\\udfff", 1))
def test_badandgoodbackslashreplaceexceptions(self): # "backslashreplace" complains about a non-exception passed in self.assertRaises(TypeError, codecs.backslashreplace_errors, 42) # "backslashreplace" complains about the wrong exception types self.assertRaises(TypeError, codecs.backslashreplace_errors, UnicodeError("ouch")) # "backslashreplace" can only be used for encoding self.assertRaises(TypeError, codecs.backslashreplace_errors, UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")) self.assertRaises(TypeError, codecs.backslashreplace_errors, UnicodeTranslateError(u"\u3042", 0, 1, "ouch")) # Use the correct exception self.assertEquals( codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")), (u"\\u3042", 1) ) self.assertEquals( codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\x00", 0, 1, "ouch")), (u"\\x00", 1) ) self.assertEquals( codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\xff", 0, 1, "ouch")), (u"\\xff", 1) ) self.assertEquals( codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u0100", 0, 1, "ouch")), (u"\\u0100", 1) ) self.assertEquals( codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\uffff", 0, 1, "ouch")), (u"\\uffff", 1) ) if sys.maxunicode > 0xFFFF: self.assertEquals( codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U00010000", 0, 1, "ouch")), (u"\\U00010000", 1), ) self.assertEquals( codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U0010ffff", 0, 1, "ouch")), (u"\\U0010ffff", 1), )
def test_badandgoodbackslashreplaceexceptions(self): # "backslashreplace" complains about a non-exception passed in self.assertRaises( TypeError, codecs.backslashreplace_errors, 42 ) # "backslashreplace" complains about the wrong exception types self.assertRaises( TypeError, codecs.backslashreplace_errors, UnicodeError("ouch") ) # "backslashreplace" can only be used for encoding self.assertRaises( TypeError, codecs.backslashreplace_errors, UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch") ) self.assertRaises( TypeError, codecs.backslashreplace_errors, UnicodeTranslateError(u"\u3042", 0, 1, "ouch") ) # Use the correct exception tests = [ (u"\u3042", u"\\u3042"), (u"\n", u"\\x0a"), (u"a", u"\\x61"), (u"\x00", u"\\x00"), (u"\xff", u"\\xff"), (u"\u0100", u"\\u0100"), (u"\uffff", u"\\uffff"), # Lone surrogates (u"\ud800", u"\\ud800"), (u"\udfff", u"\\udfff"), ] if sys.maxunicode > 0xffff: tests += [ (u"\U00010000", u"\\U00010000"), (u"\U0010ffff", u"\\U0010ffff"), ] else: tests += [ (u"\U00010000", u"\\ud800\\udc00"), (u"\U0010ffff", u"\\udbff\\udfff"), ] for s, r in tests: self.assertEqual( codecs.backslashreplace_errors( UnicodeEncodeError("ascii", u"a" + s + u"b", 1, 1 + len(s), "ouch")), (r, 1 + len(s)) )
def test_badandgoodbackslashreplaceexceptions(self): # "backslashreplace" complains about a non-exception passed in self.assertRaises( TypeError, codecs.backslashreplace_errors, 42 ) # "backslashreplace" complains about the wrong exception types self.assertRaises( TypeError, codecs.backslashreplace_errors, UnicodeError("ouch") ) # "backslashreplace" can only be used for encoding self.assertRaises( TypeError, codecs.backslashreplace_errors, UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch") ) self.assertRaises( TypeError, codecs.backslashreplace_errors, UnicodeTranslateError(u"\u3042", 0, 1, "ouch") ) if test_support.due_to_ironpython_bug("http://tkbgitvstfat01:8080/WorkItemTracking/WorkItem.aspx?artifactMoniker=303935"): return # Use the correct exception self.assertEquals( codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")), (u"\\u3042", 1) ) self.assertEquals( codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\x00", 0, 1, "ouch")), (u"\\x00", 1) ) self.assertEquals( codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\xff", 0, 1, "ouch")), (u"\\xff", 1) ) self.assertEquals( codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u0100", 0, 1, "ouch")), (u"\\u0100", 1) ) self.assertEquals( codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\uffff", 0, 1, "ouch")), (u"\\uffff", 1) ) if sys.maxunicode>0xffff: self.assertEquals( codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U00010000", 0, 1, "ouch")), (u"\\U00010000", 1) ) self.assertEquals( codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U0010ffff", 0, 1, "ouch")), (u"\\U0010ffff", 1) )
def test_badandgoodbackslashreplaceexceptions(self): # "backslashreplace" complains about a non-exception passed in self.assertRaises( TypeError, codecs.backslashreplace_errors, 42 ) # "backslashreplace" complains about the wrong exception types self.assertRaises( TypeError, codecs.backslashreplace_errors, UnicodeError("ouch") ) # "backslashreplace" can only be used for encoding self.assertRaises( TypeError, codecs.backslashreplace_errors, UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch") ) self.assertRaises( TypeError, codecs.backslashreplace_errors, UnicodeTranslateError(u"\u3042", 0, 1, "ouch") ) # Use the correct exception self.assertEquals( codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")), (u"\\u3042", 1) ) self.assertEquals( codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\x00", 0, 1, "ouch")), (u"\\x00", 1) ) self.assertEquals( codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\xff", 0, 1, "ouch")), (u"\\xff", 1) ) self.assertEquals( codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u0100", 0, 1, "ouch")), (u"\\u0100", 1) ) self.assertEquals( codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\uffff", 0, 1, "ouch")), (u"\\uffff", 1) ) if sys.maxunicode>0xffff: self.assertEquals( codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U00010000", 0, 1, "ouch")), (u"\\U00010000", 1) ) self.assertEquals( codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U0010ffff", 0, 1, "ouch")), (u"\\U0010ffff", 1) )
def test_badandgoodbackslashreplaceexceptions(self): # "backslashreplace" complains about a non-exception passed in self.assertRaises(TypeError, codecs.backslashreplace_errors, 42) # "backslashreplace" complains about the wrong exception types self.assertRaises(TypeError, codecs.backslashreplace_errors, UnicodeError("ouch")) # "backslashreplace" can only be used for encoding self.assertRaises( TypeError, codecs.backslashreplace_errors, UnicodeDecodeError("ascii", bytearray(b"\xff"), 0, 1, "ouch") ) self.assertRaises(TypeError, codecs.backslashreplace_errors, UnicodeTranslateError("\u3042", 0, 1, "ouch")) # Use the correct exception self.assertEqual( codecs.backslashreplace_errors(UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")), ("\\u3042", 1) ) self.assertEqual( codecs.backslashreplace_errors(UnicodeEncodeError("ascii", "\x00", 0, 1, "ouch")), ("\\x00", 1) ) self.assertEqual( codecs.backslashreplace_errors(UnicodeEncodeError("ascii", "\xff", 0, 1, "ouch")), ("\\xff", 1) ) self.assertEqual( codecs.backslashreplace_errors(UnicodeEncodeError("ascii", "\u0100", 0, 1, "ouch")), ("\\u0100", 1) ) self.assertEqual( codecs.backslashreplace_errors(UnicodeEncodeError("ascii", "\uffff", 0, 1, "ouch")), ("\\uffff", 1) ) # 1 on UCS-4 builds, 2 on UCS-2 len_wide = len("\U00010000") self.assertEqual( codecs.backslashreplace_errors(UnicodeEncodeError("ascii", "\U00010000", 0, len_wide, "ouch")), ("\\U00010000", len_wide), ) self.assertEqual( codecs.backslashreplace_errors(UnicodeEncodeError("ascii", "\U0010ffff", 0, len_wide, "ouch")), ("\\U0010ffff", len_wide), ) # Lone surrogates (regardless of unicode width) self.assertEqual( codecs.backslashreplace_errors(UnicodeEncodeError("ascii", "\ud800", 0, 1, "ouch")), ("\\ud800", 1) ) self.assertEqual( codecs.backslashreplace_errors(UnicodeEncodeError("ascii", "\udfff", 0, 1, "ouch")), ("\\udfff", 1) )