def objectify_write(path_buf_stream, buf, as_json=False, encoding=_DEFAULT_ENCODING): """Wrapper to write str or bytes to a file or stream path_buf_stream: can be a writable stream-like object or a file path """ writer = getattr(path_buf_stream, 'write', None) try: with (path_buf_stream if writer else open( path_buf_stream, 'w', encoding=encoding)) as infd: if as_json is True: try: return dump(buf, infd) except Exception as err: error_frame('Unable to write JSON to file, invalid JSON?') exit(1) return infd.write(buf) except OSError as err: error_frame('Problem writing to file') error('OSError({0}): {1}'.format(err.errno, err.strerror)) except Exception as err: error_frame(repr(err)) exit(1)
def _load_yaml_ordered(yamlstring): """Load a YaML file as an OrderedDict, which is important if you plan to do nested templating on it using a templating engine like Jinja2 Calling objectify_yaml() is functionally equivalent to calling this function directly, unless template=True is set. In other words, use objectify_yaml, do *not* user _load_yaml_ordered directly as it may change! Returns: None on error OrderedDict({}) on empty file or general (non-YaML) exception OrderedDict({contents}) on properly formatted YaML file """ def _sanity_check(yamlstring): """Sanity check to identify duplicate top-level keys If there are duplicate top-level keys, they will simply overwrite one-another as they are loaded into a Python dict() so it is a good idea to keep this logic, though it isn't very pretty """ lines = yamlstring.splitlines() top_level_keys = [] duped_keys = [] yaml_key_compiled = regex_compile(r'^([A-Za-z0-9_]+) *:') for line in lines: matched = yaml_key_compiled.search(line) if matched: if matched.group(1) in top_level_keys: duped_keys.append(matched.group(1)) else: top_level_keys.append(matched.group(1)) if duped_keys: stderr.write('YaML file %s contains duplicate top-level keys\n', duped_keys) exit(1) return yamlstring, top_level_keys yamlstring, top_level_keys = _sanity_check(yamlstring) yamlfd = StringIO(yamlstring) # 2nd pass to set up the OrderedDict try: dict_tmp = load_yaml_plain(yamlfd) return OrderedDict([(key, dict_tmp[key]) for key in top_level_keys]) except YAMLException as err: error('Parse error, invalid YaML') error_frame(repr(err)) return None except Exception as err: error('Unknown exception when parsing YaML') error_frame(repr(err)) return None
def objectify_read(path_buf_stream, encoding=_DEFAULT_ENCODING): """Wrapper to return str or bytes from a file or stream path_buf_stream: can be a readable stream-like object or a file path """ reader = getattr(path_buf_stream, 'read', None) try: with (path_buf_stream if reader else open( path_buf_stream, 'r', encoding=encoding)) as infd: buf = infd.read() return buf except OSError as err: error_frame('Problem reading from file') error('OSError({0}): {1}'.format(err.errno, err.strerror)) except Exception as err: error_frame(repr(err)) exit(1)
def objectify_json(path_buf_stream, encoding=_DEFAULT_ENCODING, from_string=False, ensure_ascii=False, encode_html_chars=False): """Return a native Python object from a JSON file path, stream or string""" if from_string is True: path_buf_stream = StringIO(path_buf_stream) read = getattr(path_buf_stream, 'read', None) if read is not None: return load(read(), ensure_ascii=ensure_ascii, encode_html_chars=encode_html_chars) with open(path_buf_stream, encoding=encoding) as infd: try: return load(infd, ensure_ascii=ensure_ascii, encode_html_chars=encode_html_chars) except Exception as err: error(repr(err))
def objectify_json_lines(path_buf_stream, from_string=False, fatal_errors=True, encoding=_DEFAULT_ENCODING, ensure_ascii=False, encode_html_chars=False, avoid_memory_pressure=True): """Generator return an object for each line of JSON in a file, stream or string in: path_buf_stream: (str) A string file path containing JSON (stream) An open readable stream from a file containing JSON (stream) A string of JSON content (also requires `from_string=True`) This function intentionally operates as a generator, to avoid using huge amounts of memory when loading a very large file- afterall, this is the primary benefit of the JSON lines format. It is meant to be called many times in succession, sometimes up to millions of times, so it is important that it is relatively quick/simple. There are three ways to invoke this function Each of them returns a native Python object for obj in objectify_json_lines('file.json'): print(obj.items()) json_fd = open('file.json', 'r', encoding='utf-8') for obj in objectify_json_lines(json_fd): print(obj.items()) json_str = '{"A": "B"}\n{"C": "D"}' for obj in objectify_json_lines(json_str, from_string=True): print(obj.items()) """ if from_string is True: # If caller specifies path_buf_stream is a string, turn it into # a stream to avoid an extra set of logic below assert isinstance(path_buf_stream, str) path_buf_stream = StringIO(path_buf_stream) # If path_buf_stream has a read method, it is effectively stream reader = getattr(path_buf_stream, 'read', None) with (path_buf_stream if reader else open( path_buf_stream, 'r', encoding=encoding)) as infd: # If the user doesn't care about memory pressure, don't bother with a generator, just # give them a regular list of objects from the JSON lines file. I guess most of the time # nobody cares, and have to work with a generator in Python3 can be annoying for the caller if avoid_memory_pressure is False: if fatal_errors is True: try: return [ loads(line) for line in infd.read.splitlines() if line ] except JSONDecodeError: return None obj_list = list() for line in infd.read.splitlines(): try: obj = loads(line) obj_list.append(obj) except JSONDecodeError: # Silently ignore bad lines .. continue return obj_list for line in infd.readlines(): line = line.strip() # Exception handlers are expensive to set up and even more expensive # when they fire. If errors should be fatal, don't bother setting one # up at all if fatal_errors is True: yield loads(line) else: # The more expensive path, preparing to catch an exception and # continue gracefully if fatal_errors is False try: yield loads(line) except Exception as err: error('bad JSON-line line: {}'.format(repr(err))) continue