def parse(self, input_stream, alias_map=None, schema=None, append_to=None, buf_size=65536, \ **kwargs): '''Потоковый разбор JSON''' assert input_stream, 'Не задан источник данных' #переменные которые будут переданы в callback self.kwargs = kwargs #словарь с деревом запроса для определения PK if schema != self.schema: self.schema = schema self.schema_map.clear() self._traverse_query_paths(schema) # map(краткое имя -> полное имя) assert alias_map is None or isinstance( alias_map, dict), 'Неверный тип справочника aliasов' self.alias_map = alias_map #наш герой - C Stream JSon Parser self.parser = YajlParser(self, buf_size) self.parser.allow_multiple_values = True self.parser.dont_validate_strings = True #инициализуем для повторного использования self.root = append_to if append_to is not None else None self.current = append_to if append_to is not None else None #парсим try: self.parser.parse(input_stream) except YajlParseCancelled as err: print(err) self.error = err return False except YajlError as err: print(err) self.error = err return False else: return True
def get_path(json_stream): path_getter = ContentHandler() parser = YajlParser(path_getter) try: parser.parse(f=json_stream) except YajlError as e: if 'premature EOF' in e.value: pass else: return 'invalid json' return path_getter.get_path()
def main(args): parser = YajlParser(ContentHandler()) parser.allow_multiple_values = True if args: for fn in args: f = open(fn) parser.parse(f=f) f.close() else: parser.parse() return 0
def parse_json_utf8_bytes(bytesio: io.BytesIO) -> ParseJsonResult: """ Parse JSON text into a List of columns. * Return error text on invalid JSON ... plus all the data parsed up to the error. (Lots of JSON parse errors are "JSON was truncated"; in that case we want all the data.) * Parse str/int/float as-is; for the rest, concatenate JSON tokens as str. * Drop every row starting at MAX_BYTES_PER_TABLE (and warn) * Drop every column after MAX_COLUMNS_PER_TABLE (and warn) * Drop every row starting at MAX_ROWS_PER_TABLE (and warn) """ content_handler = JsonContentHandler() # holds our data parser = YajlParser(content_handler) errors = [] try: parser.parse(bytesio) except (JsonRootIsNotArray, JsonRecordIsNotObject): errors.append( "Workbench cannot import this JSON file. The JSON file " "must be an Array of Objects for Workbench to import it.") except JsonNumberTooLarge as err: errors.append( f'Stopped parsing JSON because the number "{err.value_str}" ' "is too large.") except JsonTooManyRows: errors.append("The input had too many rows, so we removed rows.") except JsonTooManyBytes: errors.append( "The input was too large, so we stopped before reading the whole " "file.") except YajlError as err: # e.g., 'lexical error: ...\n blah\n ^^here' multiline_err = str(err) oneline_err = multiline_err.split("\n")[0] if (content_handler.columns and next(iter(content_handler.columns.values())).values): errors.append("Stopped parsing after JSON " + oneline_err) else: errors.append("JSON " + oneline_err) if content_handler.truncated_columns: errors.append("The input had too many columns, so we removed some.") return ParseJsonResult(content_handler.columns, "\n".join(errors))
def main(): opt_parser = optparse.OptionParser(description='reformat json from stdin', version='Yajl-Py for Yajl %s' % yajl_version) opt_parser.add_option("-m", dest="beautify", action="store_false", default=True, help="minimize json rather than beautify (default)") opt_parser.add_option( "-u", dest="dont_validate_strings", action='store_true', default=False, help="allow invalid UTF8 inside strings during parsing") opt_parser.add_option( "-e", dest="escape_solidus", action='store_true', default=False, help="escape any forward slashes (for embedding in HTML)") opt_parser.add_option("-s", dest="stream", action='store_true', default=False, help="reformat a stream of multiple json entites") (options, args) = opt_parser.parse_args() # initialize the content handler (creates a Yajl Gen) ch = ReformatContentHandler( beautify=options.beautify, stream=options.stream, ) # initialize the parser yajl_parser = YajlParser(ch) yajl_parser.allow_comments = True # let's allow comments by default yajl_parser.allow_multiple_values = True yajl_parser.dont_validate_strings = options.dont_validate_strings yajl_parser.allow_multiple_values = options.stream yajl_parser.parse()
def parse(self): ret = None self.handler = ParseContentHandler(callbacks) self.parser = YajlParser(content_handler = self.handler) self.parser.dont_validate_strings = self.utf8 fh = self.filename try: fh.read() except AttributeError: fh = open(self.filename, 'r') try: self.parser.parse(fh) except KillParse as e: ret = e.ret if "FinalCallback" in self.callbacks: if "CallbackData" in self.callbacks: ret = self.callbacks["FinalCallback"](ret, self.callbacks) else: ret = self.callbacks["FinalCallback"](ret) fh.close() return ret
def parseToQueue(stream, queue, column_map): parser = YajlParser(ContentHandler(queue, column_map)) parser.parse(stream) queue.put(None)