Пример #1
0
def test_ijson_parser():
    from io import BytesIO

    sio = BytesIO(
        bytearray(
            """
    {
        "foo": "bar",
        "foo2": [1, 2, 3],
        "bar": false,
        "something": null,
        "dct": { "foo": [[[], [[]]]], "bar": {"a": {}, "b": { "c": {}}} }
    }
    """, 'utf-8'))

    p = IjsonParser(ijson.basic_parse(sio))
    for v in p.parse_all():
        print(v)

    sio = BytesIO(
        bytearray(
            """
    [
    { "id": 1 },
    { "id": 2 },
    { "id": 3 }
    ]
    """, 'utf-8'))
    p = IjsonParser(ijson.basic_parse(sio))
    for v in p.parse_all():
        print(v)
Пример #2
0
def test_ijson_parser():
    from io import BytesIO

    sio = BytesIO(bytearray("""
    {
        "foo": "bar",
        "foo2": [1, 2, 3],
        "bar": false,
        "something": null,
        "dct": { "foo": [[[], [[]]]], "bar": {"a": {}, "b": { "c": {}}} }
    }
    """, 'utf-8'))

    p = IjsonParser(ijson.basic_parse(sio))
    for v in p.parse_all():
        print(v)

    sio = BytesIO(bytearray("""
    [
    { "id": 1 },
    { "id": 2 },
    { "id": 3 }
    ]
    """, 'utf-8'))
    p = IjsonParser(ijson.basic_parse(sio))
    for v in p.parse_all():
        print(v)
Пример #3
0
    def __iter__(self):
        entity_index = 0
        context = []
        name_context = []
        for event, value in ijson.basic_parse(self._stream):
            if event == 'start_map':
                context.append({})
            elif event == 'map_key':
                name_context.append(value)
            elif event == 'string':
                if args.do_transit_decode:
                    try:
                        value = transit_decode_string(value)
                    except BaseException as e:
                        raise AssertionError("Decode error!")

    #                    raise EntityParseError(entity_index=entity_index,
    #                                           offending_value=value,
    #                                           context=context,
    #                                           name_context=name_context,
    #                                           original_exception=e)

                ctxobj = context[-1]
                if type(ctxobj) is list:
                    ctxobj.append(value)
                elif type(ctxobj) is dict:
                    prop_name = name_context.pop()
                    ctxobj[prop_name] = value
                else:
                    raise Exception("WAT!")

            elif event in {'number', 'boolean', 'null'}:
                ctxobj = context[-1]
                if type(ctxobj) is list:
                    ctxobj.append(value)
                elif type(ctxobj) is dict:
                    prop_name = name_context.pop()
                    ctxobj[prop_name] = value
                else:
                    raise Exception("WAT!")

            elif event == 'end_map':
                entity = context.pop()
                if len(context) == 1 and type(context[0]) is list or len(
                        context) == 0:  # allow reading a single entity
                    yield entity
                    entity_index += 1
                else:
                    parent = context[-1]
                    if type(parent) is dict:
                        parent[name_context.pop()] = entity
                    elif type(parent) is list:
                        parent.append(entity)
                    else:
                        raise Exception("WAT!")
            elif event == 'start_array':
                context.append([])
            elif event == 'end_array':
                l = context.pop()
                if len(context) > 0:
                    parent = context[-1]
                    if type(parent) is list:
                        parent.append(l)
                    else:
                        parent[name_context.pop()] = l
Пример #4
0
def parse_json_stream(stream):
    """Parses stream of JSON entities. Stream can contain either a single
    JSON object or a list of JSON objects.
    :param file-like stream object
    :return: a generator of entity objects
    """
    entity_index = 0
    context = []
    name_context = []
    for event, value in ijson.basic_parse(stream):
        if event == 'start_map':
            context.append({})
        elif event == 'map_key':
            name_context.append(value)
        elif event == 'string':
            ctxobj = context[-1]
            if type(ctxobj) is list:
                if len(value) > 1 and value[0] == "~":
                    value1 = value[1]
                    if value1 == "r":
                        ctxobj.append(value)
                    elif value1 == "t":
                        ctxobj.append(datetime_parse(value[2:]))
                    elif value1 == "b":
                        ctxobj.append(b64decode(value[2:]))
                    elif value1 == "u":
                        ctxobj.append(UUID(value[2:]))
                    elif value1 == "f":
                        ctxobj.append(Decimal(value[2:]))
                    elif value1 == "~":
                        ctxobj.append(value[1:])
                    else:
                        ctxobj.append(value)
                else:
                    ctxobj.append(value)
            elif type(ctxobj) is dict:
                prop_name = name_context.pop()
                if len(value) > 1 and value[0] == "~":
                    value1 = value[1]
                    if value1 == "r":
                        ctxobj[prop_name] = value
                    elif value1 == "t":
                        ctxobj[prop_name] = datetime_parse(value[2:])
                    elif value1 == "b":
                        ctxobj[prop_name] = b64decode(value[2:])
                    elif value1 == "u":
                        ctxobj[prop_name] = UUID(value[2:])
                    elif value1 == "f":
                        ctxobj[prop_name] = Decimal(value[2:])
                    elif value1 == "~":
                        ctxobj[prop_name] = value[1:]
                    else:
                        ctxobj[prop_name] = value

                else:
                    ctxobj[prop_name] = value
            else:
                raise Exception("WAT!")
        elif event in {'number', 'boolean', 'null'}:
            ctxobj = context[-1]
            if type(ctxobj) is list:
                ctxobj.append(value)
            elif type(ctxobj) is dict:
                prop_name = name_context.pop()
                ctxobj[prop_name] = value
            else:
                raise Exception("WAT!")
        elif event == 'end_map':
            entity = context.pop()
            if len(context) == 1 and type(context[0]) is list or len(
                    context) == 0:  # allow reading a single entity
                yield entity
                entity_index += 1
            else:
                parent = context[-1]
                if type(parent) is dict:
                    parent[name_context.pop()] = entity
                elif type(parent) is list:
                    parent.append(entity)
                else:
                    raise Exception("WAT!")
        elif event == 'start_array':
            context.append([])
        elif event == 'end_array':
            l = context.pop()
            if len(context) > 0:
                parent = context[-1]
                if type(parent) is list:
                    parent.append(l)
                else:
                    parent[name_context.pop()] = l
Пример #5
0
    def Iterate(self, inpath, outpath = None, rfrom = 1, rto = 0):
        if self._backend == 'yajl2_cffi':
            import ijson.backends.yajl2_cffi as ijson
        elif self._backend == 'yajl2':
            import ijson.backends.yajl2 as ijson
        else:
            import ijson
        
        self._rfrom = rfrom
        self._rto = rto

        self._recno = 1
        self._lp_rec = 0
        if self._lp_step > 0 and Log.isEnabledFor(logging.INFO):
            self._lp_rec = self._lp_step
        
        # Common or this function local variables
        # 0 - outside element (e.g. between records)
        # 2 - inside record while reading
        # 3 - inside record while skipping
        self._mode = 0
        _curkey = self._rec_tag
        _deck = deque()
        
        # Used for read records
        self._rectag = None
        self._curtag = None

        # Used for skipped records
        self._s_nestlev = 0

        try:
            # OBLIGATORY
            header = []
            if self._flt is not None:
                if hasattr(self._flt, 'setHeader'):
                    self._flt.setHeader(header)
            self._wri.writeHeader(header)
            
            with open(inpath, 'rb') as fd: #binary mode required by C-based backends
                _not_first_event = False
                _parser = ijson.basic_parse(fd)
                for event, value in _parser:
                    if event == 'map_key':
                        _curkey = value
                    elif event == 'start_map':
                        _deck.append(_curkey)
                        if _curkey is not None:
                            self._OnObjStart(_curkey)
                    elif event == 'end_map':
                        _curkey = _deck.pop()
                        if _curkey is not None:
                            for obj in self._OnObjEnd_Iter(_curkey):
                                yield obj
                    elif event == 'start_array':
                        if _not_first_event and _curkey is None:
                            _deck.append('arr')
                            self._OnObjStart('arr')
                    elif event == 'end_array':
                        if len(_deck) > 0:
                            key = _deck.pop()
                            if key == 'arr':
                                for obj in self._OnObjEnd_Iter('arr'):
                                    yield obj
                            else:
                                _deck.append(key)
                    else:
                        key = _curkey if _curkey is not None else event
                        for obj in self._OnData_Iter(key, value):
                            yield obj
                    _not_first_event = True
        except FilterBreak:
            pass
        except ToLimitBreak:
            pass
        finally:
            # OBLIGATORY
            footer = []
            if self._flt is not None:
                if hasattr(self._flt, 'setFooter'):
                    self._flt.setFooter(footer)
            self._wri.writeFooter(footer)
Пример #6
0
    def Process(self, inpath, outpath = None, rfrom = 1, rto = 0):
        """Parameters are usually passed from YAML file as subkeys of ``Reader:PArg`` key.
        
        :param inpath: Path to input file.
        :param outpath: Path to output file passed to Writer (fall-back if output connector is not defined).
        :param rfrom-rto: specifies scope of records to be processed.
        
        For more detailed descriptions see :ref:`readers_conf_template`.
        """
        if self._backend == 'yajl2_cffi':
            import ijson.backends.yajl2_cffi as ijson
        elif self._backend == 'yajl2':
            import ijson.backends.yajl2 as ijson
        else:
            import ijson
        
        self._rfrom = rfrom
        self._rto = rto

        self._recno = 1
        self._lp_rec = 0
        if self._lp_step > 0 and Log.isEnabledFor(logging.INFO):
            self._lp_rec = self._lp_step
        
        # Common or this function local variables
        # 0 - outside element (e.g. between records)
        # 2 - inside record while reading
        # 3 - inside record while skipping
        self._mode = 0
        _curkey = self._rec_tag
        _deck = deque()
        
        # Used for read records
        self._rectag = None
        self._curtag = None

        # Used for skipped records
        self._s_nestlev = 0

        try:
            # OBLIGATORY
            header = []
            if self._flt is not None:
                if hasattr(self._flt, 'setHeader'):
                    self._flt.setHeader(header)
            self._wri.writeHeader(header)
            
            with open(inpath, 'rb') as fd: #binary mode required by C-based backends
                _not_first_event = False
                _parser = ijson.basic_parse(fd)
                for event, value in _parser:
                    if event == 'map_key':
                        _curkey = value
                    elif event == 'start_map':
                        _deck.append(_curkey)
                        if _curkey is not None:
                            self._OnObjStart(_curkey)
                    elif event == 'end_map':
                        _curkey = _deck.pop()
                        if _curkey is not None:
                            self._OnObjEnd(_curkey)
                    elif event == 'start_array':
                        if _not_first_event and _curkey is None:
                            _deck.append('arr')
                            self._OnObjStart('arr')
                    elif event == 'end_array':
                        if len(_deck) > 0:
                            key = _deck.pop()
                            if key == 'arr':
                                self._OnObjEnd('arr')
                            else:
                                _deck.append(key)
                    else:
                        key = _curkey if _curkey is not None else event
                        self._OnData(key, value)
                    
                    _not_first_event = True
        except FilterBreak:
            pass
        except ToLimitBreak:
            pass
        finally:
            # OBLIGATORY
            footer = []
            if self._flt is not None:
                if hasattr(self._flt, 'setFooter'):
                    self._flt.setFooter(footer)
            self._wri.writeFooter(footer)