Пример #1
0
def read_json_lines(finput, logger=logging, encoding='utf-8'):
    ctx = joint_context(finput) \
        if isiterable(finput) \
        else open_gz(finput, 'r')
    with ctx as fhandle:
        for idx, line in enumerate(fhandle, start=1):
            try:
                obj = json.loads(line, encoding=encoding)
            except ValueError as err:
                logger.error("Could not parse line %d: %s", idx, err)
                continue
            yield obj
Пример #2
0
def read_json_lines(finput, logger=logging, encoding='utf-8'):
    ctx = joint_context(finput) \
        if isiterable(finput) \
        else open_gz(finput, 'r')
    with ctx as fhandle:
        for idx, line in enumerate(fhandle, start=1):
            try:
                obj = json.loads(line, encoding=encoding)
            except ValueError as err:
                logger.error("Could not parse line %d: %s", idx, err)
                continue
            yield obj
Пример #3
0
def read_json_lines(finput, logger=logging, show_progress=None):
    ctx = joint_context(finput) \
        if isiterable(finput) \
        else open_gz(finput, 'r')
    with ctx as fhandle:
        for idx, line in enumerate(fhandle, start=1):
            if show_progress and idx % show_progress == 0 and idx > 1:
                logger.info("Processed %d lines", idx)
            try:
                obj = json.loads(line)
            except ValueError as err:
                logger.error("Could not parse line %d: %s", idx, err)
                continue
            yield obj
Пример #4
0
def read_json_lines(finput, logger=logging, show_progress=None):
    ctx = joint_context(finput) \
        if isiterable(finput) \
        else open_gz(finput, 'r')
    with ctx as fhandle:
        for idx, line in enumerate(fhandle, start=1):
            if show_progress and idx % show_progress == 0 and idx > 1:
                logger.info("Processed %d lines", idx)
            try:
                obj = json.loads(line)
            except ValueError as err:
                logger.error("Could not parse line %d: %s", idx, err)
                continue
            yield obj
Пример #5
0
def read_text_resource(finput, encoding='utf-8', ignore_prefix='#'):
    """Read a text resource ignoring comments beginning with pound sign
    :param finput: path or file handle
    :type finput: str, file
    :param encoding: which encoding to use (default: UTF-8)
    :type encoding: str
    :param ignore_prefix: lines matching this prefix will be skipped
    :type ignore_prefix: str, unicode
    :rtype: generator
    """
    ctx = joint_context(codecs.iterdecode(finput, encoding=encoding)) \
        if isiterable(finput) \
        else codecs.open(finput, 'r', encoding=encoding)
    with ctx as fhandle:
        for line in fhandle:
            if ignore_prefix is not None:
                line = line.split(ignore_prefix)[0]
            line = line.strip()
            if line:
                yield line
Пример #6
0
def read_text_resource(finput, encoding='utf-8', ignore_prefix='#'):
    """Read a text resource ignoring comments beginning with pound sign
    :param finput: path or file handle
    :type finput: str, file
    :param encoding: which encoding to use (default: UTF-8)
    :type encoding: str
    :param ignore_prefix: lines matching this prefix will be skipped
    :type ignore_prefix: str, unicode
    :rtype: generator
    """
    ctx = joint_context(codecs.iterdecode(finput, encoding=encoding)) \
        if isiterable(finput) \
        else codecs.open(finput, 'r', encoding=encoding)
    with ctx as fhandle:
        for line in fhandle:
            if ignore_prefix is not None:
                line = line.split(ignore_prefix)[0]
            line = line.strip()
            if line:
                yield line