Пример #1
0
def encode_basestring(s):
    """Return a JSON representation of a Python string

    """
    if isinstance(s, binary_type) and HAS_UTF8.search(s) is not None:
        s = s.decode('utf-8')
    def replace(match):
        return ESCAPE_DCT[match.group(0)]
    return u('"') + ESCAPE.sub(replace, s) + u('"')
Пример #2
0
    def encode(self, o):
        """Return a JSON string representation of a Python data structure.

        >>> from simplejson import JSONEncoder
        >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
        '{"foo": ["bar", "baz"]}'

        """
        # This is for extremely simple cases and benchmarks.
        if isinstance(o, string_types):
            if isinstance(o, binary_type):
                _encoding = self.encoding
                if (_encoding is not None
                        and not (_encoding == 'utf-8')):
                    o = o.decode(_encoding)
            if self.ensure_ascii:
                return encode_basestring_ascii(o)
            else:
                return encode_basestring(o)
        # This doesn't pass the iterator directly to ''.join() because the
        # exceptions aren't as detailed.  The list call should be roughly
        # equivalent to the PySequence_Fast that ''.join() would do.
        chunks = self.iterencode(o, _one_shot=True)
        if not isinstance(chunks, (list, tuple)):
            chunks = list(chunks)
        if self.ensure_ascii:
            return ''.join(chunks)
        else:
            return u('').join(chunks)
Пример #3
0
 def encode(self, o):
     # Override JSONEncoder.encode because it has hacks for
     # performance that make things more complicated.
     chunks = self.iterencode(o, True)
     if self.ensure_ascii:
         return ''.join(chunks)
     else:
         return u('').join(chunks)
Пример #4
0
 def test_encoding2(self):
     # This test is not appropriate for Python 3: You're not supposed
     # to pass bytes to JSON APIs.
     if not PY3:
         uu = u('\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}')
         s = uu.encode('utf-8')
         ju = json.dumps(uu, encoding='utf-8')
         js = json.dumps(s, encoding='utf-8')
         self.assertEquals(ju, js)
Пример #5
0
 def test_encoding2(self):
     # This test is not appropriate for Python 3: You're not supposed
     # to pass bytes to JSON APIs.
     if not PY3:
         uu = u(
             '\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}')
         s = uu.encode('utf-8')
         ju = json.dumps(uu, encoding='utf-8')
         js = json.dumps(s, encoding='utf-8')
         self.assertEquals(ju, js)
Пример #6
0
 def test_ensure_ascii_false_bytestring_encoding(self):
     # http://code.google.com/p/simplejson/issues/detail?id=48
     doc1 = {u'quux': b('Arr\xc3\xaat sur images')}
     doc2 = {u'quux': u('Arr\xeat sur images')}
     doc_ascii = '{"quux": "Arr\\u00eat sur images"}'
     doc_unicode = u'{"quux": "Arr\xeat sur images"}'
     self.assertEqual(json.dumps(doc1), doc_ascii)
     self.assertEqual(json.dumps(doc2), doc_ascii)
     self.assertEqual(json.dumps(doc1, ensure_ascii=False), doc_unicode)
     self.assertEqual(json.dumps(doc2, ensure_ascii=False), doc_unicode)
Пример #7
0
 def test_encoding1(self):
     # This test is not appropriate for Python 3: You're not supposed
     # to pass bytes to JSON APIs.
     if not PY3:
         encoder = json.JSONEncoder(encoding='utf-8')
         uu = u('\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}')
         s = uu.encode('utf-8')
         ju = encoder.encode(uu)
         js = encoder.encode(s)
         self.assertEquals(ju, js)
Пример #8
0
 def test_ensure_ascii_false_bytestring_encoding(self):
     # http://code.google.com/p/simplejson/issues/detail?id=48
     doc1 = {u'quux': b('Arr\xc3\xaat sur images')}
     doc2 = {u'quux': u('Arr\xeat sur images')}
     doc_ascii = '{"quux": "Arr\\u00eat sur images"}'
     doc_unicode = u'{"quux": "Arr\xeat sur images"}'
     self.assertEqual(json.dumps(doc1), doc_ascii)
     self.assertEqual(json.dumps(doc2), doc_ascii)
     self.assertEqual(json.dumps(doc1, ensure_ascii=False), doc_unicode)
     self.assertEqual(json.dumps(doc2, ensure_ascii=False), doc_unicode)
Пример #9
0
    def test_dump_load(self):
        for s in ['', '"hello"', 'text', u('\u005c')]:
            self.assertEqual(
                s, simplejson.loads(simplejson.dumps(WonkyTextSubclass(s))))

            self.assertEqual(
                s,
                simplejson.loads(
                    simplejson.dumps(WonkyTextSubclass(s),
                                     ensure_ascii=False)))
Пример #10
0
 def test_encoding1(self):
     # This test is not appropriate for Python 3: You're not supposed
     # to pass bytes to JSON APIs.
     if not PY3:
         encoder = json.JSONEncoder(encoding='utf-8')
         uu = u(
             '\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}')
         s = uu.encode('utf-8')
         ju = encoder.encode(uu)
         js = encoder.encode(s)
         self.assertEquals(ju, js)
Пример #11
0
 def test_object_pairs_hook_with_unicode(self):
     s = u('{"xkd":1, "kcw":2, "art":3, "hxm":4, "qrt":5, "pad":6, "hoy":7}')
     p = [(u("xkd"), 1), (u("kcw"), 2), (u("art"), 3), (u("hxm"), 4),
          (u("qrt"), 5), (u("pad"), 6), (u("hoy"), 7)]
     self.assertEqual(json.loads(s), eval(s))
     self.assertEqual(json.loads(s, object_pairs_hook=lambda x: x), p)
     od = json.loads(s, object_pairs_hook=json.OrderedDict)
     self.assertEqual(od, json.OrderedDict(p))
     self.assertEqual(type(od), json.OrderedDict)
     # the object_pairs_hook takes priority over the object_hook
     self.assertEqual(json.loads(s,
                                 object_pairs_hook=json.OrderedDict,
                                 object_hook=lambda x: None),
                      json.OrderedDict(p))
Пример #12
0
 def test_object_pairs_hook_with_unicode(self):
     s = u(
         '{"xkd":1, "kcw":2, "art":3, "hxm":4, "qrt":5, "pad":6, "hoy":7}')
     p = [(u("xkd"), 1), (u("kcw"), 2), (u("art"), 3), (u("hxm"), 4),
          (u("qrt"), 5), (u("pad"), 6), (u("hoy"), 7)]
     self.assertEqual(json.loads(s), eval(s))
     self.assertEqual(json.loads(s, object_pairs_hook=lambda x: x), p)
     od = json.loads(s, object_pairs_hook=json.OrderedDict)
     self.assertEqual(od, json.OrderedDict(p))
     self.assertEqual(type(od), json.OrderedDict)
     # the object_pairs_hook takes priority over the object_hook
     self.assertEqual(
         json.loads(s,
                    object_pairs_hook=json.OrderedDict,
                    object_hook=lambda x: None), json.OrderedDict(p))
Пример #13
0
 def test_array_decoder_issue46(self):
     # http://code.google.com/p/simplejson/issues/detail?id=46
     for doc in [u('[,]'), '[,]']:
         try:
             json.loads(doc)
         except json.JSONDecodeError:
             e = sys.exc_info()[1]
             self.assertEquals(e.pos, 1)
             self.assertEquals(e.lineno, 1)
             self.assertEquals(e.colno, 1)
         except Exception:
             e = sys.exc_info()[1]
             self.fail("Unexpected exception raised %r %s" % (e, e))
         else:
             self.fail("Unexpected success parsing '[,]'")
Пример #14
0
 def test_array_decoder_issue46(self):
     # http://code.google.com/p/simplejson/issues/detail?id=46
     for doc in [u('[,]'), '[,]']:
         try:
             json.loads(doc)
         except json.JSONDecodeError:
             e = sys.exc_info()[1]
             self.assertEquals(e.pos, 1)
             self.assertEquals(e.lineno, 1)
             self.assertEquals(e.colno, 1)
         except Exception:
             e = sys.exc_info()[1]
             self.fail("Unexpected exception raised %r %s" % (e, e))
         else:
             self.fail("Unexpected success parsing '[,]'")
Пример #15
0
 def test_encoding6(self):
     uu = u('\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}')
     j = json.dumps([uu], ensure_ascii=False)
     self.assertEquals(j, u('["') + uu + u('"]'))
Пример #16
0
 def test_encoding4(self):
     uu = u('\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}')
     j = json.dumps([uu])
     self.assertEquals(j, '["\\u03b1\\u03a9"]')
Пример #17
0
 def test_keys_reuse_unicode(self):
     s = u('[{"a_key": 1, "b_\xe9": 2}, {"a_key": 3, "b_\xe9": 4}]')
     self.check_keys_reuse(s, json.loads)
Пример #18
0
def py_scanstring(s,
                  end,
                  encoding=None,
                  strict=True,
                  _b=BACKSLASH,
                  _m=STRINGCHUNK.match):
    """Scan the string s for a JSON string. End is the index of the
    character in s after the quote that started the JSON string.
    Unescapes all valid JSON string escape sequences and raises ValueError
    on attempt to decode an invalid string. If strict is False then literal
    control characters are allowed in the string.

    Returns a tuple of the decoded string and the index of the character in s
    after the end quote."""
    if encoding is None:
        encoding = DEFAULT_ENCODING
    chunks = []
    _append = chunks.append
    begin = end - 1
    while 1:
        chunk = _m(s, end)
        if chunk is None:
            raise JSONDecodeError("Unterminated string starting at", s, begin)
        end = chunk.end()
        content, terminator = chunk.groups()
        # Content is contains zero or more unescaped string characters
        if content:
            if not isinstance(content, text_type):
                content = text_type(content, encoding)
            _append(content)
        # Terminator is the end of string, a literal control character,
        # or a backslash denoting that an escape sequence follows
        if terminator == '"':
            break
        elif terminator != '\\':
            if strict:
                msg = "Invalid control character %r at" % (terminator, )
                #msg = "Invalid control character {0!r} at".format(terminator)
                raise JSONDecodeError(msg, s, end)
            else:
                _append(terminator)
                continue
        try:
            esc = s[end]
        except IndexError:
            raise JSONDecodeError("Unterminated string starting at", s, begin)
        # If not a unicode escape sequence, must be in the lookup table
        if esc != 'u':
            try:
                char = _b[esc]
            except KeyError:
                msg = "Invalid \\escape: " + repr(esc)
                raise JSONDecodeError(msg, s, end)
            end += 1
        else:
            # Unicode escape sequence
            esc = s[end + 1:end + 5]
            next_end = end + 5
            if len(esc) != 4:
                msg = "Invalid \\uXXXX escape"
                raise JSONDecodeError(msg, s, end)
            uni = int(esc, 16)
            # Check for surrogate pair on UCS-4 systems
            if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
                msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
                if not s[end + 5:end + 7] == '\\u':
                    raise JSONDecodeError(msg, s, end)
                esc2 = s[end + 7:end + 11]
                if len(esc2) != 4:
                    raise JSONDecodeError(msg, s, end)
                uni2 = int(esc2, 16)
                uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
                next_end += 6
            char = unichr(uni)
            end = next_end
        # Append the unescaped character
        _append(char)
    return u('').join(chunks), end
Пример #19
0
    # http://json.org/JSON_checker/test/fail18.json
    '[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]',
    # http://json.org/JSON_checker/test/fail19.json
    '{"Missing colon" null}',
    # http://json.org/JSON_checker/test/fail20.json
    '{"Double colon":: null}',
    # http://json.org/JSON_checker/test/fail21.json
    '{"Comma instead of colon", null}',
    # http://json.org/JSON_checker/test/fail22.json
    '["Colon instead of comma": false]',
    # http://json.org/JSON_checker/test/fail23.json
    '["Bad value", truth]',
    # http://json.org/JSON_checker/test/fail24.json
    "['single quote']",
    # http://code.google.com/p/simplejson/issues/detail?id=3
    u('["A\u001FZ control characters in string"]'),
]

SKIPS = {
    1: "why not have a string payload?",
    18: "spec doesn't specify any nesting limitations",
}

class TestFail(TestCase):
    def test_failures(self):
        for idx, doc in enumerate(JSONDOCS):
            idx = idx + 1
            if idx in SKIPS:
                json.loads(doc)
                continue
            try:
Пример #20
0
 def test_default_encoding(self):
     self.assertEquals(json.loads(u('{"a": "\xe9"}').encode('utf-8')),
         {'a': u('\xe9')})
Пример #21
0
 def test_big_unicode_decode(self):
     uu = u('z\U0001d120x')
     self.assertEquals(json.loads('"' + uu + '"'), uu)
     self.assertEquals(json.loads('"z\\ud834\\udd20x"'), uu)
from unittest import TestCase

from simplejson.compat import u, b, binary_type, PY3
import simplejson.encoder

CASES = [
    (u('/\u005c"\ucafe\ubabe\uab98\ufcde\ubcda\uef4a\x08\x0c\n\r\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?'
       ),
     '"/\\\\\\"\\ucafe\\ubabe\\uab98\\ufcde\\ubcda\\uef4a\\b\\f\\n\\r\\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?"'
     ),
    (u('\u0123\u4567\u89ab\ucdef\uabcd\uef4a'),
     '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'),
    (u('controls'), '"controls"'),
    (u('\x08\x0c\n\r\t'), '"\\b\\f\\n\\r\\t"'),
    (u('{"object with 1 member":["array with 1 element"]}'),
     '"{\\"object with 1 member\\":[\\"array with 1 element\\"]}"'),
    (u(' s p a c e d '), '" s p a c e d "'),
    (u('\U0001d120'), '"\\ud834\\udd20"'),
    (u('\u03b1\u03a9'), '"\\u03b1\\u03a9"'),
    (b('\xce\xb1\xce\xa9'), '"\\u03b1\\u03a9"'),
    (u('\u03b1\u03a9'), '"\\u03b1\\u03a9"'),
    (b('\xce\xb1\xce\xa9'), '"\\u03b1\\u03a9"'),
    (u('\u03b1\u03a9'), '"\\u03b1\\u03a9"'),
    (u('\u03b1\u03a9'), '"\\u03b1\\u03a9"'),
    (u("`1~!@#$%^&*()_+-={':[,]}|;.</>?"),
     '"`1~!@#$%^&*()_+-={\':[,]}|;.</>?"'),
    (u('\x08\x0c\n\r\t'), '"\\b\\f\\n\\r\\t"'),
    (u('\u0123\u4567\u89ab\ucdef\uabcd\uef4a'),
     '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'),
]
Пример #23
0
    # http://json.org/JSON_checker/test/fail18.json
    '[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]',
    # http://json.org/JSON_checker/test/fail19.json
    '{"Missing colon" null}',
    # http://json.org/JSON_checker/test/fail20.json
    '{"Double colon":: null}',
    # http://json.org/JSON_checker/test/fail21.json
    '{"Comma instead of colon", null}',
    # http://json.org/JSON_checker/test/fail22.json
    '["Colon instead of comma": false]',
    # http://json.org/JSON_checker/test/fail23.json
    '["Bad value", truth]',
    # http://json.org/JSON_checker/test/fail24.json
    "['single quote']",
    # http://code.google.com/p/simplejson/issues/detail?id=3
    u('["A\u001FZ control characters in string"]'),
]

SKIPS = {
    1: "why not have a string payload?",
    18: "spec doesn't specify any nesting limitations",
}


class TestFail(TestCase):
    def test_failures(self):
        for idx, doc in enumerate(JSONDOCS):
            idx = idx + 1
            if idx in SKIPS:
                json.loads(doc)
                continue
Пример #24
0
    endlineno, endcolno = linecol(doc, end)
    #fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
    #return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
    fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
    return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)


_CONSTANTS = {
    '-Infinity': NegInf,
    'Infinity': PosInf,
    'NaN': NaN,
}

STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
BACKSLASH = {
    '"': u('"'), '\\': u('\u005c'), '/': u('/'),
    'b': u('\b'), 'f': u('\f'), 'n': u('\n'), 'r': u('\r'), 't': u('\t'),
}

DEFAULT_ENCODING = "utf-8"

def py_scanstring(s, end, encoding=None, strict=True,
        _b=BACKSLASH, _m=STRINGCHUNK.match):
    """Scan the string s for a JSON string. End is the index of the
    character in s after the quote that started the JSON string.
    Unescapes all valid JSON string escape sequences and raises ValueError
    on attempt to decode an invalid string. If strict is False then literal
    control characters are allowed in the string.

    Returns a tuple of the decoded string and the index of the character in s
    after the end quote."""
Пример #25
0
 def test_empty_strings(self):
     self.assertEqual(json.loads('""'), "")
     self.assertEqual(json.loads(u('""')), u(""))
     self.assertEqual(json.loads('[""]'), [""])
     self.assertEqual(json.loads(u('[""]')), [u("")])
Пример #26
0
 def test_keys_reuse_unicode(self):
     s = u('[{"a_key": 1, "b_\xe9": 2}, {"a_key": 3, "b_\xe9": 4}]')
     self.check_keys_reuse(s, json.loads)
Пример #27
0
 def test_keys_reuse_str(self):
     s = u('[{"a_key": 1, "b_\xe9": 2}, {"a_key": 3, "b_\xe9": 4}]').encode(
         'utf8')
     self.check_keys_reuse(s, json.loads)
Пример #28
0
 def test_big_unicode_encode(self):
     uu = u('\U0001d120')
     self.assertEquals(json.dumps(uu), '"\\ud834\\udd20"')
     self.assertEquals(json.dumps(uu, ensure_ascii=False), u('"\U0001d120"'))
Пример #29
0
 def test_default_encoding(self):
     self.assertEquals(json.loads(u('{"a": "\xe9"}').encode('utf-8')),
                       {'a': u('\xe9')})
Пример #30
0
 def test_big_unicode_decode(self):
     uu = u('z\U0001d120x')
     self.assertEquals(json.loads('"' + uu + '"'), uu)
     self.assertEquals(json.loads('"z\\ud834\\udd20x"'), uu)
Пример #31
0
 def test_encoding6(self):
     uu = u('\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}')
     j = json.dumps([uu], ensure_ascii=False)
     self.assertEquals(j, u('["') + uu + u('"]'))
Пример #32
0
 def test_big_unicode_encode(self):
     uu = u('\U0001d120')
     self.assertEquals(json.dumps(uu), '"\\ud834\\udd20"')
     self.assertEquals(json.dumps(uu, ensure_ascii=False),
                       u('"\U0001d120"'))
Пример #33
0
    endlineno, endcolno = linecol(doc, end)
    #fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
    #return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
    fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
    return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)


_CONSTANTS = {
    '-Infinity': NegInf,
    'Infinity': PosInf,
    'NaN': NaN,
}

STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
BACKSLASH = {
    '"': u('"'),
    '\\': u('\u005c'),
    '/': u('/'),
    'b': u('\b'),
    'f': u('\f'),
    'n': u('\n'),
    'r': u('\r'),
    't': u('\t'),
}

DEFAULT_ENCODING = "utf-8"


def py_scanstring(s,
                  end,
                  encoding=None,
Пример #34
0
 def test_unicode_preservation(self):
     self.assertEquals(type(json.loads(u('""'))), text_type)
     self.assertEquals(type(json.loads(u('"a"'))), text_type)
     self.assertEquals(type(json.loads(u('["a"]'))[0]), text_type)
Пример #35
0
 def test_unicode_preservation(self):
     self.assertEquals(type(json.loads(u('""'))), text_type)
     self.assertEquals(type(json.loads(u('"a"'))), text_type)
     self.assertEquals(type(json.loads(u('["a"]'))[0]), text_type)
Пример #36
0
 def test_keys_reuse_str(self):
     s = u('[{"a_key": 1, "b_\xe9": 2}, {"a_key": 3, "b_\xe9": 4}]').encode('utf8')
     self.check_keys_reuse(s, json.loads)
Пример #37
0
 def test_dump_load(self):
     for s in ['', '"hello"', 'text', u('\u005c')]:
         self.assertEqual(
             s,
             simplejson.loads(simplejson.dumps(WonkyTextSubclass(s))))
from unittest import TestCase

from simplejson.compat import u, b, binary_type, PY3
import simplejson.encoder

CASES = [
    (u('/\u005c"\ucafe\ubabe\uab98\ufcde\ubcda\uef4a\x08\x0c\n\r\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?'), '"/\\\\\\"\\ucafe\\ubabe\\uab98\\ufcde\\ubcda\\uef4a\\b\\f\\n\\r\\t`1~!@#$%^&*()_+-=[]{}|;:\',./<>?"'),
    (u('\u0123\u4567\u89ab\ucdef\uabcd\uef4a'), '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'),
    (u('controls'), '"controls"'),
    (u('\x08\x0c\n\r\t'), '"\\b\\f\\n\\r\\t"'),
    (u('{"object with 1 member":["array with 1 element"]}'), '"{\\"object with 1 member\\":[\\"array with 1 element\\"]}"'),
    (u(' s p a c e d '), '" s p a c e d "'),
    (u('\U0001d120'), '"\\ud834\\udd20"'),
    (u('\u03b1\u03a9'), '"\\u03b1\\u03a9"'),
    (b('\xce\xb1\xce\xa9'), '"\\u03b1\\u03a9"'),
    (u('\u03b1\u03a9'), '"\\u03b1\\u03a9"'),
    (b('\xce\xb1\xce\xa9'), '"\\u03b1\\u03a9"'),
    (u('\u03b1\u03a9'), '"\\u03b1\\u03a9"'),
    (u('\u03b1\u03a9'), '"\\u03b1\\u03a9"'),
    (u("`1~!@#$%^&*()_+-={':[,]}|;.</>?"), '"`1~!@#$%^&*()_+-={\':[,]}|;.</>?"'),
    (u('\x08\x0c\n\r\t'), '"\\b\\f\\n\\r\\t"'),
    (u('\u0123\u4567\u89ab\ucdef\uabcd\uef4a'), '"\\u0123\\u4567\\u89ab\\ucdef\\uabcd\\uef4a"'),
]

class TestEncodeBaseStringAscii(TestCase):
    def test_py_encode_basestring_ascii(self):
        self._test_encode_basestring_ascii(simplejson.encoder.py_encode_basestring_ascii)

    def test_c_encode_basestring_ascii(self):
        if not simplejson.encoder.c_encode_basestring_ascii:
            return
Пример #39
0
 def test_empty_strings(self):
     self.assertEqual(json.loads('""'), "")
     self.assertEqual(json.loads(u('""')), u(""))
     self.assertEqual(json.loads('[""]'), [""])
     self.assertEqual(json.loads(u('[""]')), [u("")])
Пример #40
0
def py_scanstring(s, end, encoding=None, strict=True,
        _b=BACKSLASH, _m=STRINGCHUNK.match):
    """Scan the string s for a JSON string. End is the index of the
    character in s after the quote that started the JSON string.
    Unescapes all valid JSON string escape sequences and raises ValueError
    on attempt to decode an invalid string. If strict is False then literal
    control characters are allowed in the string.

    Returns a tuple of the decoded string and the index of the character in s
    after the end quote."""
    if encoding is None:
        encoding = DEFAULT_ENCODING
    chunks = []
    _append = chunks.append
    begin = end - 1
    while 1:
        chunk = _m(s, end)
        if chunk is None:
            raise JSONDecodeError(
                "Unterminated string starting at", s, begin)
        end = chunk.end()
        content, terminator = chunk.groups()
        # Content is contains zero or more unescaped string characters
        if content:
            if not isinstance(content, text_type):
                content = text_type(content, encoding)
            _append(content)
        # Terminator is the end of string, a literal control character,
        # or a backslash denoting that an escape sequence follows
        if terminator == '"':
            break
        elif terminator != '\\':
            if strict:
                msg = "Invalid control character %r at" % (terminator,)
                #msg = "Invalid control character {0!r} at".format(terminator)
                raise JSONDecodeError(msg, s, end)
            else:
                _append(terminator)
                continue
        try:
            esc = s[end]
        except IndexError:
            raise JSONDecodeError(
                "Unterminated string starting at", s, begin)
        # If not a unicode escape sequence, must be in the lookup table
        if esc != 'u':
            try:
                char = _b[esc]
            except KeyError:
                msg = "Invalid \\escape: " + repr(esc)
                raise JSONDecodeError(msg, s, end)
            end += 1
        else:
            # Unicode escape sequence
            esc = s[end + 1:end + 5]
            next_end = end + 5
            if len(esc) != 4:
                msg = "Invalid \\uXXXX escape"
                raise JSONDecodeError(msg, s, end)
            uni = int(esc, 16)
            # Check for surrogate pair on UCS-4 systems
            if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
                msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
                if not s[end + 5:end + 7] == '\\u':
                    raise JSONDecodeError(msg, s, end)
                esc2 = s[end + 7:end + 11]
                if len(esc2) != 4:
                    raise JSONDecodeError(msg, s, end)
                uni2 = int(esc2, 16)
                uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
                next_end += 6
            char = unichr(uni)
            end = next_end
        # Append the unescaped character
        _append(char)
    return u('').join(chunks), end
Пример #41
0
 def test_encoding4(self):
     uu = u('\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}')
     j = json.dumps([uu])
     self.assertEquals(j, '["\\u03b1\\u03a9"]')
Пример #42
0
    def _test_scanstring(self, scanstring):
        self.assertEquals(scanstring('"z\\ud834\\udd20x"', 1, None, True),
                          (u('z\U0001d120x'), 16))

        if sys.maxunicode == 65535:
            self.assertEquals(scanstring(u('"z\U0001d120x"'), 1, None, True),
                              (u('z\U0001d120x'), 6))
        else:
            self.assertEquals(scanstring(u('"z\U0001d120x"'), 1, None, True),
                              (u('z\U0001d120x'), 5))

        self.assertEquals(scanstring('"\\u007b"', 1, None, True), (u('{'), 8))

        self.assertEquals(
            scanstring(
                '"A JSON payload should be an object or array, not a string."',
                1, None, True),
            (u('A JSON payload should be an object or array, not a string.'),
             60))

        self.assertEquals(scanstring('["Unclosed array"', 2, None, True),
                          (u('Unclosed array'), 17))

        self.assertEquals(scanstring('["extra comma",]', 2, None, True),
                          (u('extra comma'), 14))

        self.assertEquals(
            scanstring('["double extra comma",,]', 2, None, True),
            (u('double extra comma'), 21))

        self.assertEquals(
            scanstring('["Comma after the close"],', 2, None, True),
            (u('Comma after the close'), 24))

        self.assertEquals(scanstring('["Extra close"]]', 2, None, True),
                          (u('Extra close'), 14))

        self.assertEquals(scanstring('{"Extra comma": true,}', 2, None, True),
                          (u('Extra comma'), 14))

        self.assertEquals(
            scanstring(
                '{"Extra value after close": true} "misplaced quoted value"',
                2, None, True), (u('Extra value after close'), 26))

        self.assertEquals(
            scanstring('{"Illegal expression": 1 + 2}', 2, None, True),
            (u('Illegal expression'), 21))

        self.assertEquals(
            scanstring('{"Illegal invocation": alert()}', 2, None, True),
            (u('Illegal invocation'), 21))

        self.assertEquals(
            scanstring('{"Numbers cannot have leading zeroes": 013}', 2, None,
                       True), (u('Numbers cannot have leading zeroes'), 37))

        self.assertEquals(
            scanstring('{"Numbers cannot be hex": 0x14}', 2, None, True),
            (u('Numbers cannot be hex'), 24))

        self.assertEquals(
            scanstring('[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]',
                       21, None, True), (u('Too deep'), 30))

        self.assertEquals(scanstring('{"Missing colon" null}', 2, None, True),
                          (u('Missing colon'), 16))

        self.assertEquals(scanstring('{"Double colon":: null}', 2, None, True),
                          (u('Double colon'), 15))

        self.assertEquals(
            scanstring('{"Comma instead of colon", null}', 2, None, True),
            (u('Comma instead of colon'), 25))

        self.assertEquals(
            scanstring('["Colon instead of comma": false]', 2, None, True),
            (u('Colon instead of comma'), 25))

        self.assertEquals(scanstring('["Bad value", truth]', 2, None, True),
                          (u('Bad value'), 12))
Пример #43
0
    def _test_scanstring(self, scanstring):
        self.assertEquals(
            scanstring('"z\\ud834\\udd20x"', 1, None, True),
            (u('z\U0001d120x'), 16))

        if sys.maxunicode == 65535:
            self.assertEquals(
                scanstring(u('"z\U0001d120x"'), 1, None, True),
                (u('z\U0001d120x'), 6))
        else:
            self.assertEquals(
                scanstring(u('"z\U0001d120x"'), 1, None, True),
                (u('z\U0001d120x'), 5))

        self.assertEquals(
            scanstring('"\\u007b"', 1, None, True),
            (u('{'), 8))

        self.assertEquals(
            scanstring('"A JSON payload should be an object or array, not a string."', 1, None, True),
            (u('A JSON payload should be an object or array, not a string.'), 60))

        self.assertEquals(
            scanstring('["Unclosed array"', 2, None, True),
            (u('Unclosed array'), 17))

        self.assertEquals(
            scanstring('["extra comma",]', 2, None, True),
            (u('extra comma'), 14))

        self.assertEquals(
            scanstring('["double extra comma",,]', 2, None, True),
            (u('double extra comma'), 21))

        self.assertEquals(
            scanstring('["Comma after the close"],', 2, None, True),
            (u('Comma after the close'), 24))

        self.assertEquals(
            scanstring('["Extra close"]]', 2, None, True),
            (u('Extra close'), 14))

        self.assertEquals(
            scanstring('{"Extra comma": true,}', 2, None, True),
            (u('Extra comma'), 14))

        self.assertEquals(
            scanstring('{"Extra value after close": true} "misplaced quoted value"', 2, None, True),
            (u('Extra value after close'), 26))

        self.assertEquals(
            scanstring('{"Illegal expression": 1 + 2}', 2, None, True),
            (u('Illegal expression'), 21))

        self.assertEquals(
            scanstring('{"Illegal invocation": alert()}', 2, None, True),
            (u('Illegal invocation'), 21))

        self.assertEquals(
            scanstring('{"Numbers cannot have leading zeroes": 013}', 2, None, True),
            (u('Numbers cannot have leading zeroes'), 37))

        self.assertEquals(
            scanstring('{"Numbers cannot be hex": 0x14}', 2, None, True),
            (u('Numbers cannot be hex'), 24))

        self.assertEquals(
            scanstring('[[[[[[[[[[[[[[[[[[[["Too deep"]]]]]]]]]]]]]]]]]]]]', 21, None, True),
            (u('Too deep'), 30))

        self.assertEquals(
            scanstring('{"Missing colon" null}', 2, None, True),
            (u('Missing colon'), 16))

        self.assertEquals(
            scanstring('{"Double colon":: null}', 2, None, True),
            (u('Double colon'), 15))

        self.assertEquals(
            scanstring('{"Comma instead of colon", null}', 2, None, True),
            (u('Comma instead of colon'), 25))

        self.assertEquals(
            scanstring('["Colon instead of comma": false]', 2, None, True),
            (u('Colon instead of comma'), 25))

        self.assertEquals(
            scanstring('["Bad value", truth]', 2, None, True),
            (u('Bad value'), 12))