Exemplo n.º 1
0
def objectify_write(path_buf_stream,
                    buf,
                    as_json=False,
                    encoding=_DEFAULT_ENCODING):
    """Wrapper to write str or bytes to a file or stream

    path_buf_stream: can be a writable stream-like object or a file path

    """
    writer = getattr(path_buf_stream, 'write', None)
    try:
        with (path_buf_stream if writer else open(
                path_buf_stream, 'w', encoding=encoding)) as infd:
            if as_json is True:
                try:
                    return dump(buf, infd)
                except Exception as err:
                    error_frame('Unable to write JSON to file, invalid JSON?')
                    exit(1)
            return infd.write(buf)
    except OSError as err:
        error_frame('Problem writing to file')
        error('OSError({0}): {1}'.format(err.errno, err.strerror))
    except Exception as err:
        error_frame(repr(err))
        exit(1)
Exemplo n.º 2
0
def _load_yaml_ordered(yamlstring):
    """Load a YaML file as an OrderedDict, which is important if you plan to
    do nested templating on it using a templating engine like Jinja2

    Calling objectify_yaml() is functionally equivalent to calling this
    function directly, unless template=True is set. In other words, use
    objectify_yaml, do *not* user _load_yaml_ordered directly as it may
    change!

    Returns:
      None on error
      OrderedDict({}) on empty file or general (non-YaML) exception
      OrderedDict({contents}) on properly formatted YaML file
    """
    def _sanity_check(yamlstring):
        """Sanity check to identify duplicate top-level keys

        If there are duplicate top-level keys, they will simply overwrite
        one-another as they are loaded into a Python dict() so it is a
        good idea to keep this logic, though it isn't very pretty
        """
        lines = yamlstring.splitlines()
        top_level_keys = []
        duped_keys = []

        yaml_key_compiled = regex_compile(r'^([A-Za-z0-9_]+) *:')

        for line in lines:
            matched = yaml_key_compiled.search(line)
            if matched:
                if matched.group(1) in top_level_keys:
                    duped_keys.append(matched.group(1))
                else:
                    top_level_keys.append(matched.group(1))
        if duped_keys:
            stderr.write('YaML file %s contains duplicate top-level keys\n',
                         duped_keys)
            exit(1)
        return yamlstring, top_level_keys

    yamlstring, top_level_keys = _sanity_check(yamlstring)
    yamlfd = StringIO(yamlstring)

    # 2nd pass to set up the OrderedDict
    try:
        dict_tmp = load_yaml_plain(yamlfd)
        return OrderedDict([(key, dict_tmp[key]) for key in top_level_keys])
    except YAMLException as err:
        error('Parse error, invalid YaML')
        error_frame(repr(err))
        return None
    except Exception as err:
        error('Unknown exception when parsing YaML')
        error_frame(repr(err))
        return None
Exemplo n.º 3
0
def objectify_read(path_buf_stream, encoding=_DEFAULT_ENCODING):
    """Wrapper to return str or bytes from a file or stream

    path_buf_stream: can be a readable stream-like object or a file path

    """
    reader = getattr(path_buf_stream, 'read', None)
    try:
        with (path_buf_stream if reader else open(
                path_buf_stream, 'r', encoding=encoding)) as infd:
            buf = infd.read()
            return buf
    except OSError as err:
        error_frame('Problem reading from file')
        error('OSError({0}): {1}'.format(err.errno, err.strerror))
    except Exception as err:
        error_frame(repr(err))
        exit(1)
Exemplo n.º 4
0
def objectify_json(path_buf_stream,
                   encoding=_DEFAULT_ENCODING,
                   from_string=False,
                   ensure_ascii=False,
                   encode_html_chars=False):
    """Return a native Python object from a JSON file path, stream or string"""
    if from_string is True:
        path_buf_stream = StringIO(path_buf_stream)

    read = getattr(path_buf_stream, 'read', None)

    if read is not None:
        return load(read(),
                    ensure_ascii=ensure_ascii,
                    encode_html_chars=encode_html_chars)
    with open(path_buf_stream, encoding=encoding) as infd:
        try:
            return load(infd,
                        ensure_ascii=ensure_ascii,
                        encode_html_chars=encode_html_chars)
        except Exception as err:
            error(repr(err))
Exemplo n.º 5
0
def objectify_json_lines(path_buf_stream,
                         from_string=False,
                         fatal_errors=True,
                         encoding=_DEFAULT_ENCODING,
                         ensure_ascii=False,
                         encode_html_chars=False,
                         avoid_memory_pressure=True):
    """Generator return an object for each line of JSON in a file, stream or string

    in: path_buf_stream:
      (str) A string file path containing JSON
      (stream) An open readable stream from a file containing JSON
      (stream) A string of JSON content (also requires `from_string=True`)

    This function intentionally operates as a generator, to avoid using huge
    amounts of memory when loading a very large file- afterall, this is the
    primary benefit of the JSON lines format. It is meant to be called many
    times in succession, sometimes up to millions of times, so it is important
    that it is relatively quick/simple.

    There are three ways to invoke this function
    Each of them returns a native Python object

    for obj in objectify_json_lines('file.json'):
        print(obj.items())

    json_fd = open('file.json', 'r', encoding='utf-8')
    for obj in objectify_json_lines(json_fd):
        print(obj.items())

    json_str = '{"A": "B"}\n{"C": "D"}'
    for obj in objectify_json_lines(json_str, from_string=True):
        print(obj.items())
    """
    if from_string is True:
        # If caller specifies path_buf_stream is a string, turn it into
        # a stream to avoid an extra set of logic below
        assert isinstance(path_buf_stream, str)
        path_buf_stream = StringIO(path_buf_stream)

    # If path_buf_stream has a read method, it is effectively stream
    reader = getattr(path_buf_stream, 'read', None)

    with (path_buf_stream if reader else open(
            path_buf_stream, 'r', encoding=encoding)) as infd:
        # If the user doesn't care about memory pressure, don't bother with a generator, just
        # give them a regular list of objects from the JSON lines file. I guess most of the time
        # nobody cares, and have to work with a generator in Python3 can be annoying for the caller
        if avoid_memory_pressure is False:
            if fatal_errors is True:
                try:
                    return [
                        loads(line) for line in infd.read.splitlines() if line
                    ]
                except JSONDecodeError:
                    return None
            obj_list = list()
            for line in infd.read.splitlines():
                try:
                    obj = loads(line)
                    obj_list.append(obj)
                except JSONDecodeError:
                    # Silently ignore bad lines ..
                    continue
            return obj_list

        for line in infd.readlines():
            line = line.strip()
            # Exception handlers are expensive to set up and even more expensive
            # when they fire. If errors should be fatal, don't bother setting one
            # up at all
            if fatal_errors is True:
                yield loads(line)
            else:
                # The more expensive path, preparing to catch an exception and
                # continue gracefully if fatal_errors is False
                try:
                    yield loads(line)
                except Exception as err:
                    error('bad JSON-line line: {}'.format(repr(err)))
                    continue