Пример #1
0
 def parse(self, input_stream, alias_map=None, schema=None, append_to=None, buf_size=65536, \
           **kwargs):
     '''Потоковый разбор JSON'''
     assert input_stream, 'Не задан источник данных'
     #переменные которые будут переданы в callback
     self.kwargs = kwargs
     #словарь с деревом запроса для определения PK
     if schema != self.schema:
         self.schema = schema
         self.schema_map.clear()
         self._traverse_query_paths(schema)
     # map(краткое имя -> полное имя)
     assert alias_map is None or isinstance(
         alias_map, dict), 'Неверный тип справочника aliasов'
     self.alias_map = alias_map
     #наш герой - C Stream JSon Parser
     self.parser = YajlParser(self, buf_size)
     self.parser.allow_multiple_values = True
     self.parser.dont_validate_strings = True
     #инициализуем для повторного использования
     self.root = append_to if append_to is not None else None
     self.current = append_to if append_to is not None else None
     #парсим
     try:
         self.parser.parse(input_stream)
     except YajlParseCancelled as err:
         print(err)
         self.error = err
         return False
     except YajlError as err:
         print(err)
         self.error = err
         return False
     else:
         return True
Пример #2
0
def get_path(json_stream):
    path_getter = ContentHandler()
    parser = YajlParser(path_getter)
    try:
        parser.parse(f=json_stream)
    except YajlError as e:
        if 'premature EOF' in e.value:
            pass
        else:
            return 'invalid json'
    return path_getter.get_path()
Пример #3
0
def main(args):
    parser = YajlParser(ContentHandler())
    parser.allow_multiple_values = True
    if args:
        for fn in args:
            f = open(fn)
            parser.parse(f=f)
            f.close()
    else:
        parser.parse()
    return 0
Пример #4
0
def parse_json_utf8_bytes(bytesio: io.BytesIO) -> ParseJsonResult:
    """
    Parse JSON text into a List of columns.

    * Return error text on invalid JSON ... plus all the data parsed up to the
      error. (Lots of JSON parse errors are "JSON was truncated"; in that case
      we want all the data.)
    * Parse str/int/float as-is; for the rest, concatenate JSON tokens as str.
    * Drop every row starting at MAX_BYTES_PER_TABLE (and warn)
    * Drop every column after MAX_COLUMNS_PER_TABLE (and warn)
    * Drop every row starting at MAX_ROWS_PER_TABLE (and warn)
    """
    content_handler = JsonContentHandler()  # holds our data
    parser = YajlParser(content_handler)
    errors = []
    try:
        parser.parse(bytesio)
    except (JsonRootIsNotArray, JsonRecordIsNotObject):
        errors.append(
            "Workbench cannot import this JSON file. The JSON file "
            "must be an Array of Objects for Workbench to import it.")
    except JsonNumberTooLarge as err:
        errors.append(
            f'Stopped parsing JSON because the number "{err.value_str}" '
            "is too large.")
    except JsonTooManyRows:
        errors.append("The input had too many rows, so we removed rows.")
    except JsonTooManyBytes:
        errors.append(
            "The input was too large, so we stopped before reading the whole "
            "file.")
    except YajlError as err:
        # e.g., 'lexical error: ...\n    blah\n    ^^here'
        multiline_err = str(err)
        oneline_err = multiline_err.split("\n")[0]
        if (content_handler.columns
                and next(iter(content_handler.columns.values())).values):
            errors.append("Stopped parsing after JSON " + oneline_err)
        else:
            errors.append("JSON " + oneline_err)

    if content_handler.truncated_columns:
        errors.append("The input had too many columns, so we removed some.")

    return ParseJsonResult(content_handler.columns, "\n".join(errors))
Пример #5
0
def main():
    opt_parser = optparse.OptionParser(description='reformat json from stdin',
                                       version='Yajl-Py for Yajl %s' %
                                       yajl_version)
    opt_parser.add_option("-m",
                          dest="beautify",
                          action="store_false",
                          default=True,
                          help="minimize json rather than beautify (default)")
    opt_parser.add_option(
        "-u",
        dest="dont_validate_strings",
        action='store_true',
        default=False,
        help="allow invalid UTF8 inside strings during parsing")
    opt_parser.add_option(
        "-e",
        dest="escape_solidus",
        action='store_true',
        default=False,
        help="escape any forward slashes (for embedding in HTML)")
    opt_parser.add_option("-s",
                          dest="stream",
                          action='store_true',
                          default=False,
                          help="reformat a stream of multiple json entites")
    (options, args) = opt_parser.parse_args()
    # initialize the content handler (creates a Yajl Gen)
    ch = ReformatContentHandler(
        beautify=options.beautify,
        stream=options.stream,
    )
    # initialize the parser
    yajl_parser = YajlParser(ch)
    yajl_parser.allow_comments = True  # let's allow comments by default
    yajl_parser.allow_multiple_values = True
    yajl_parser.dont_validate_strings = options.dont_validate_strings
    yajl_parser.allow_multiple_values = options.stream
    yajl_parser.parse()
Пример #6
0
    def parse(self):
	ret = None
        self.handler = ParseContentHandler(callbacks)
        self.parser = YajlParser(content_handler = self.handler)
	self.parser.dont_validate_strings = self.utf8
        fh = self.filename
	try:
            fh.read()
        except AttributeError:
            fh =  open(self.filename, 'r')

	try:
	    self.parser.parse(fh)
	except KillParse as e:
	    ret = e.ret
        if "FinalCallback" in self.callbacks:
            if "CallbackData" in self.callbacks:
                ret = self.callbacks["FinalCallback"](ret, self.callbacks)
            else:
                ret = self.callbacks["FinalCallback"](ret)
        fh.close()
	return ret
def parseToQueue(stream, queue, column_map):
    parser = YajlParser(ContentHandler(queue, column_map))
    parser.parse(stream)
    queue.put(None)