def __init__(self): from json.decoder import JSONArray, JSONObject, scanstring super().__init__() self.parse_object = _create_object_hook(JSONObject) self.parse_array = _create_object_hook(JSONArray) str_parser_wrapper = _create_object_hook( lambda s_with_end, strict: scanstring(*s_with_end, strict)) self.parse_string = lambda s, end, strict: str_parser_wrapper( (s, end), strict) # Here i'am patching scanner closure, because it's internally refers to # itself and it is't configurable. # Schema is: 'py_make_scanner' defines '_scan_once', which is referred # by 'scan_once' which is result of 'py_make_scanner()' expression. orig_scanner = copy.deepcopy(json.scanner.py_make_scanner(self)) try: cell = next(cell for cell in orig_scanner.__closure__ if callable(cell.cell_contents) and cell.cell_contents.__name__ == '_scan_once') except StopIteration: raise ValueError(f'Failed to path {orig_scanner.__name__}, ' f'probably the internals has been changed') self.scan_once = _create_scanner_wrapper(cell.cell_contents) # Function closure cells read-only before python 3.7, # here using one approach found on internet ... _cell_set(cell, self.scan_once)
def custom_parse_string(string, idx, encoding, strict): obj = scanstring(string, idx, encoding, strict) if type(obj[0]) in [str, unicode]: try: dt = datetime.datetime.strptime(obj[0], ISO_TIMESTAMP_FORMAT) return [dt, obj[1]] except: pass return obj
def _parse_string(*args, **kwargs): s, idx = scanstring(*args, **kwargs) if self._use_binary_hex_encoding: if s and s[0:2] == '0x': s = a2b_hex(s[2:]) else: if s and s[0] == '\x00': s = base64.b64decode(s[1:]) return s, idx
def parse_object(data): yield ('start_map', None) pos, symbol = next(data) if symbol != '}': while True: yield ('map_key', scanstring(symbol, 1)[0]) pos, symbol = next(data) for event in parse_value(data, None, pos): yield event pos, symbol = next(data) if symbol == '}': break pos, symbol = next(data) yield ('end_map', None)
def _parse_string(*args, **kwargs): s, idx = scanstring(*args, **kwargs) if self._use_binary_hex_encoding: if s and s[0:2] == '0x': s = a2b_hex(s[2:]) return s, idx else: if s and s[0] == '\x00': s = base64.b64decode(s[1:]) return s, idx if self._use_decimal_from_str and _DEC_MATCH.match(s): try: s = decimal.Decimal(s) return s, idx except decimal.InvalidOperation: pass return s, idx
def parse_value(data, sym=None, pos=0): if sym is None: pos, sym = next(data) if sym == 'null': yield ('null', None) elif sym == 'true' or sym == 'false': yield ('boolean', sym == 'true') elif sym == '[': for event in parse_array(data): yield event elif sym == '{': for event in parse_object(data): yield event elif sym[0] == '"': yield ('string', scanstring(sym, 1)[0]) else: yield ('number', decimal.Decimal(sym))
def _get_loads(strict=STRICT): if not strict: try: from simplejson import loads except ImportError: from json import loads return loads # If we don't have json, we can only fall back to simplejson, non-strict try: from json import decoder except ImportError: global STRICT STRICT = False from simplejson import loads return loads try: res = decoder.c_scanstring('"str"', 1) except TypeError: # github issue #33: pypy may not have c_scanstring res = decoder.scanstring('"str"', 1) if type(res[0]) is unicode: from json import loads return loads import json as _myjson from json import scanner class MyJSONDecoder(_myjson.JSONDecoder): def __init__(self, *args, **kwargs): _myjson.JSONDecoder.__init__(self, *args, **kwargs) # reset scanner to python-based one using python scanstring self.parse_string = decoder.py_scanstring self.scan_once = scanner.py_make_scanner(self) def loads(s, *args, **kwargs): if 'cls' not in kwargs: kwargs['cls'] = MyJSONDecoder return _myjson.loads(s, *args, **kwargs) return loads
def JSONObject(match, context, _w=decoder.WHITESPACE.match): pairs = OrderedDict(); # Change to an ordered dict s = match.string end = _w(s, match.end()).end() nextchar = s[end:end + 1] # Trivial empty object if nextchar == '}': return pairs, end + 1 if nextchar != '"': raise ValueError(decoder.errmsg("Expecting property name", s, end)) end += 1 encoding = getattr(context, 'encoding', None) strict = getattr(context, 'strict', True) iterscan = JSONScanner.iterscan while True: key, end = decoder.scanstring(s, end, encoding, strict) end = _w(s, end).end() if s[end:end + 1] != ':': raise ValueError(decoder.errmsg("Expecting : delimiter", s, end)) end = _w(s, end + 1).end() try: value, end = iterscan(s, idx=end, context=context).next() except StopIteration: raise ValueError(decoder.errmsg("Expecting object", s, end)) pairs[key] = value end = _w(s, end).end() nextchar = s[end:end + 1] end += 1 if nextchar == '}': break if nextchar != ',': raise ValueError(decoder.errmsg("Expecting , delimiter", s, end - 1)) end = _w(s, end).end() nextchar = s[end:end + 1] end += 1 if nextchar != '"': raise ValueError(decoder.errmsg("Expecting property name", s, end - 1)) object_hook = getattr(context, 'object_hook', None) if object_hook is not None: pairs = object_hook(pairs) return pairs, end
def _TokenizingJSONObject(s_and_end, strict, scan_once, memo, _w=WHITESPACE.match, _ws=WHITESPACE_STR): s, end = s_and_end pairs = [] pairs_append = pairs.append memo_get = memo.setdefault # Use a slice to prevent IndexError from being raised, the following # check will raise a more specific ValueError if the string is empty nextchar = s[end:end + 1] # Normally we expect nextchar == '"' if nextchar != '"': if nextchar in _ws: end = _w(s, end).end() nextchar = s[end:end + 1] # Trivial empty object if nextchar == '}': return {}, end + 1 elif nextchar != '"': raise JSONDecodeError( "Expecting property name enclosed in double quotes", s, end) end += 1 while True: start = end - 1 key, end = scanstring(s, end, strict) key = memo_get(key, key) key = ScalarToken(memo_get(key, key), start, end - 1) # To skip some function call overhead we optimize the fast paths where # the JSON key separator is ": " or just ":". if s[end:end + 1] != ':': end = _w(s, end).end() if s[end:end + 1] != ':': raise JSONDecodeError("Expecting ':' delimiter", s, end) end += 1 try: if s[end] in _ws: end += 1 if s[end] in _ws: end = _w(s, end + 1).end() except IndexError: pass try: value, end = scan_once(s, end) except StopIteration as err: raise JSONDecodeError("Expecting value", s, err.value) from None pairs_append((key, value)) try: nextchar = s[end] if nextchar in _ws: end = _w(s, end + 1).end() nextchar = s[end] except IndexError: nextchar = '' end += 1 if nextchar == '}': break elif nextchar != ',': raise JSONDecodeError("Expecting ',' delimiter", s, end - 1) end = _w(s, end).end() nextchar = s[end:end + 1] end += 1 if nextchar != '"': raise JSONDecodeError( "Expecting property name enclosed in double quotes", s, end - 1) return dict(pairs), end
def _parse_string(*args, **kwargs): s, idx = scanstring(*args, **kwargs) if s and s[0] == u'\x00': s = base64.b64decode(s[1:]) return s, idx
def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook, memo=None, _w=WHITESPACE.match, _ws=WHITESPACE_STR): s, end = s_and_end pairs = [] pairs_append = pairs.append # Backwards compatibility if memo is None: memo = {} memo_get = memo.setdefault # Use a slice to prevent IndexError from being raised, the following # check will raise a more specific ValueError if the string is empty nextchar = s[end:end + 1] # Normally we expect nextchar == '"' if nextchar != '"': if nextchar in _ws: end = _w(s, end).end() nextchar = s[end:end + 1] # Trivial empty object if nextchar == '}': if object_pairs_hook is not None: result = object_pairs_hook(pairs) return result, end + 1 pairs = {} if object_hook is not None: pairs = object_hook(pairs) return pairs, end + 1 elif nextchar != '"': raise JSONDecodeError( "Expecting property name enclosed in double quotes", s, end) end += 1 while True: key_start = end - 1 key, end = scanstring(s, end, strict) key_end = end key = memo_get(key, key) # To skip some function call overhead we optimize the fast paths where # the JSON key separator is ": " or just ":". if s[end:end + 1] != ':': end = _w(s, end).end() if s[end:end + 1] != ':': raise JSONDecodeError("Expecting ':' delimiter", s, end) end += 1 try: if s[end] in _ws: end += 1 if s[end] in _ws: end = _w(s, end + 1).end() except IndexError: pass try: value, end = scan_once(s, end) except StopIteration as err: raise JSONDecodeError("Expecting value", s, err.value) from None value.key_start = key_start value.key_end = key_end pairs_append((key, value)) try: nextchar = s[end] if nextchar in _ws: end = _w(s, end + 1).end() nextchar = s[end] except IndexError: nextchar = '' end += 1 if nextchar == '}': break elif nextchar != ',': raise JSONDecodeError("Expecting ',' delimiter", s, end - 1) end = _w(s, end).end() nextchar = s[end:end + 1] end += 1 if nextchar != '"': raise JSONDecodeError( "Expecting property name enclosed in double quotes", s, end - 1) if object_pairs_hook is not None: result = object_pairs_hook(pairs) return result, end pairs = dict(pairs) if object_hook is not None: pairs = object_hook(pairs) return pairs, end
def unquote_parse_string(*args, **kwargs): result = scanstring(*args, **kwargs) # => (str, end_index) unquotedResult = (unquote(result[0]), result[1]) return unquotedResult
def parse_string(pinfo, string, index): if index < len(string) and string[index] == '"': return scanstring(string, index + 1) return NotImplemented, index
def parse_string(symbol): return scanstring(symbol[1:], 0)[0]
def parse_object(s_and_end, strict, scan_once, object_hook, object_pairs_hook, memo=None, _w=WHITESPACE.match, _ws=WHITESPACE_STR, _ilcs=INLINE_COMMENT_STRING_START, _ilc=INLINE_COMMENT.match, _mlcs=MULTILINE_COMMENT_STRING_START, _mlc=MULTILINE_COMMENT.match): ''' Modified json.decoder.JSONObject function from standard json module (python 3.7.7). ''' # pylint: disable=invalid-name s, end = s_and_end pairs = [] pairs_append = pairs.append # Backwards compatibility if memo is None: memo = {} memo_get = memo.setdefault # Use a slice to prevent IndexError from being raised, the following # check will raise a more specific ValueError if the string is empty nextchar = s[end:end + 1] # Normally we expect nextchar == '"' if nextchar != '"': while True: # Handle comments and whitespaces if nextchar in _ws: end = _w(s, end).end() elif s[end:].startswith(_ilcs): end = _ilc(s, end).end() elif s[end:].startswith(_mlcs): end = _mlc(s, end).end() else: break nextchar = s[end:end + 1] # Trivial empty object if nextchar == '}': if object_pairs_hook is not None: result = object_pairs_hook(pairs) return result, end + 1 pairs = {} if object_hook is not None: pairs = object_hook(pairs) return pairs, end + 1 elif nextchar != '"': raise JSONDecodeError( "Expecting property name enclosed in double quotes", s, end) end += 1 while True: key, end = scanstring(s, end, strict) key = memo_get(key, key) # To skip some function call overhead we optimize the fast paths where # the JSON key separator is ": " or just ":". if s[end:end + 1] != ':': while True: # Handle comments and whitespaces if s[end:end + 1] in _ws: end = _w(s, end).end() elif s[end:].startswith(_ilcs): end = _ilc(s, end).end() elif s[end:].startswith(_mlcs): end = _mlc(s, end).end() else: break if s[end:end + 1] != ':': raise JSONDecodeError("Expecting ':' delimiter", s, end) end += 1 try: while True: # Handle comments and whitespaces if s[end] in _ws: end = _w(s, end).end() elif s[end:].startswith(_ilcs): end = _ilc(s, end).end() elif s[end:].startswith(_mlcs): end = _mlc(s, end).end() else: break except IndexError: pass try: value, end = scan_once(s, end) except StopIteration as err: raise JSONDecodeError("Expecting value", s, err.value) from None pairs_append((key, value)) try: nextchar = s[end] while True: # Handle comments and whitespaces if nextchar in _ws: end = _w(s, end).end() elif s[end:].startswith(_ilcs): end = _ilc(s, end).end() elif s[end:].startswith(_mlcs): end = _mlc(s, end).end() else: break nextchar = s[end] except IndexError: nextchar = '' end += 1 if nextchar == '}': break elif nextchar != ',': raise JSONDecodeError("Expecting ',' delimiter", s, end - 1) while True: # Handle comments and whitespaces if s[end] in _ws: end = _w(s, end).end() elif s[end:].startswith(_ilcs): end = _ilc(s, end).end() elif s[end:].startswith(_mlcs): end = _mlc(s, end).end() else: break nextchar = s[end:end + 1] end += 1 if nextchar != '"': raise JSONDecodeError( "Expecting property name enclosed in double quotes", s, end - 1) if object_pairs_hook is not None: result = object_pairs_hook(pairs) return result, end pairs = dict(pairs) if object_hook is not None: pairs = object_hook(pairs) return pairs, end
def parse_string(symbol): return scanstring(symbol, 1)[0]
def parse_escape_sequences(string): """Parse a string for possible escape sequences. Sample usage: >>> parse_escape_sequences('foo\\nbar') 'foo\nbar' >>> parse_escape_sequences('foo\\\\u0256') 'foo\\u0256' :param string: Any string. :type string: `basestring` :raises: :class:`ValueError` if a backslash character is found, but it doesn't form a proper escape sequence with the character(s) that follow. :return: The parsed string. Will parse the standard escape sequences, and also basic \\uxxxx escape sequences. \\uxxxxxxxxxx escape sequences are not currently supported. :rtype: `unicode` """ string = safe_unicode(string) characters = [] i = 0 string_len = len(string) while i < string_len: character = string[i] if character == '\\': # Figure out the size of the escape sequence. Most escape sequences # are two characters (e.g. '\\' and 'n'), with the sole exception # being \uxxxx escape sequences, which are six characters. if string[(i + 1):(i + 2)] == 'u': offset = 6 else: offset = 2 try: # `json.decoder.scanstring()` mostly does what we want, but it # also does some stuff that we don't want, like parsing quote # characters. This will mess us up. The iteration and scanning # within this loop is meant to isolate the escape sequences, so # that we'll always be calling it with something like # >>> scanstring('"\n"', 1) # or # >>> scanstring('"\u0256"', 1) # The 1 refers to the location of the first character after the # open quote character. json_string = '"' + string[i:(i + offset)] + '"' character = scanstring(json_string, 1)[0] characters.append(character) i += offset except ValueError: # If an exception was raised, raise a new `ValueError`. The # reason we don't re-raise the original exception is because, # in Python 3, it is a custom JSON `ValueError` subclass. We # don't want to raise a JSON error from a function that has # nothing to do with JSON, so we create a new `ValueError`. The # error message is also nonsensical to the caller, in all # cases. raise_from(ValueError(string), None) else: characters.append(character) i += 1 return ''.join(characters)