def entries(self): builder = None for prefix, event, value in self._parser: if (prefix, event) == ('entries.item', 'start_map'): builder = ijson.ObjectBuilder() if builder is not None: builder.event(event, value) if (prefix, event) == ('entries.item', 'end_map'): yield self._parse_entry(builder.value) builder = None
def streamJsonArrayItems(f): # read items from a json array -- without loading the entire file into memory level = 0 currentObject = ijson.ObjectBuilder() parsed = ijson.parse(f) # eat the initial start_array event assertEqual('start_array', next(parsed)[1]) # construct objects. use level in order to support objects within objects for _, event, value in parsed: currentObject.event(event, value) if event == 'start_map': level += 1 elif event == 'end_map': level -= 1 if level == 0: yield currentObject.value currentObject = ijson.ObjectBuilder()
def jsonObjectReader(filepath): """ Creates a generator that parses an array of json objects from a valid json array file, yielding each top level json object in the array. :param filepath: path to json file. """ top_level_array = False array_stack = 0 top_level_object = False object_stack = 0 parser = ijson.parse(open(filepath, 'r')) for prefix, event, value in parser: if event == 'start_array': if not top_level_array: top_level_array = True continue else: array_stack += 1 if event == 'start_map': if not top_level_object: top_level_object = True builder = ijson.ObjectBuilder() else: object_stack += 1 if event == 'end_map': if not top_level_object: raise Exception('end_map without a top level object') else: if object_stack == 0: top_level_object = False yield builder.value else: object_stack -= 1 if event == 'end_array': if not top_level_array: raise Exception('end_array without a top level array') else: if array_stack == 0: top_level_array = False else: array_stack -= 1 # convert Decimal to float because mongo can't serialize Decimal # TODO is this the right place to do this? Should it be done instead # upon save? if isinstance(value, decimal.Decimal): # TODO this has different behavior on python 2.6 vs 2.7 due to # different rounding behavior value = float(value) builder.event(event, value)
def _read_file_generator(self, f, skip): try: events = iter(ijson.parse(f)) while True: current, event, value = next(events) if current and '.' not in current: if event in ('start_map', 'start_array'): builder = ijson.ObjectBuilder() end_event = event.replace('start', 'end') while '.' in current or event != end_event: builder.event(event, value) current, event, value = next(events) yield builder.value except StopIteration: pass
def _parse_defs(self): builder = None for prefix, event, value in self._parser: # If we reach entries array, there are no more tracepoint definitions if prefix == 'entries': break elif prefix == 'tsc_rate': self._tsc_rate = value continue if (prefix, event) == ('tpoints', 'start_array'): builder = ijson.ObjectBuilder() if builder is not None: builder.event(event, value) if (prefix, event) == ('tpoints', 'end_array'): self._parse_tpoints(builder.value) builder = None
def ijson_top_level_items(file, local_streaming_backend): ijson = get_ijson(local_streaming_backend) parser = ijson.parse(file) prefixed_events = iter(parser) wanted = None try: while True: current, event, value = next(prefixed_events) if current != '': wanted = current if event in ('start_map', 'start_array'): builder = ijson_mod.ObjectBuilder() end_event = event.replace('start', 'end') while (current, event) != (wanted, end_event): builder.event(event, value) current, event, value = next(prefixed_events) yield current, builder.value except StopIteration: pass