Exemplo n.º 1
0
def recordlookup(table, key, dictionary=None):
    """
    Load a dictionary with data from the given table, mapping to record objects.

    """

    if dictionary is None:
        dictionary = dict()

    it = iter(table)
    hdr = next(it)
    flds = list(map(text_type, hdr))
    keyindices = asindices(hdr, key)
    assert len(keyindices) > 0, 'no key selected'
    getkey = operator.itemgetter(*keyindices)
    for row in it:
        k = getkey(row)
        rec = Record(row, flds)
        if k in dictionary:
            # work properly with shelve
            l = dictionary[k]
            l.append(rec)
            dictionary[k] = l
        else:
            dictionary[k] = [rec]
    return dictionary
Exemplo n.º 2
0
def iteraddfield(source, field, value, index):
    it = iter(source)
    hdr = next(it)
    flds = list(map(text_type, hdr))

    # determine index of new field
    if index is None:
        index = len(hdr)

    # construct output fields
    outhdr = list(hdr)
    outhdr.insert(index, field)
    yield tuple(outhdr)

    if callable(value):
        # wrap rows as records if using calculated value
        it = (Record(row, flds) for row in it)
        for row in it:
            outrow = list(row)
            v = value(row)
            outrow.insert(index, v)
            yield tuple(outrow)
    else:
        for row in it:
            outrow = list(row)
            outrow.insert(index, value)
            yield tuple(outrow)
Exemplo n.º 3
0
def iteraddfieldusingcontext(table, field, query):
    it = iter(table)
    hdr = tuple(next(it))
    flds = list(map(text_type, hdr))
    yield hdr + (field, )
    flds.append(field)
    it = (Record(row, flds) for row in it)
    prv = None
    cur = next(it)
    for nxt in it:
        v = query(prv, cur, nxt)
        yield tuple(cur) + (v, )
        prv = Record(tuple(cur) + (v, ), flds)
        cur = nxt
    # handle last row
    v = query(prv, cur, None)
    yield tuple(cur) + (v, )
Exemplo n.º 4
0
def iterrowselect(source, where, missing, complement):
    it = iter(source)
    hdr = next(it)
    flds = list(map(text_type, hdr))
    yield tuple(hdr)
    it = (Record(row, flds, missing=missing) for row in it)
    for row in it:
        if where(row) != complement:  # XOR
            yield tuple(row)  # need to convert back to tuple?
Exemplo n.º 5
0
def iterrowmap(source, rowmapper, header, failonerror):
    it = iter(source)
    hdr = next(it)
    flds = list(map(text_type, hdr))
    yield tuple(header)
    it = (Record(row, flds) for row in it)
    for row in it:
        try:
            outrow = rowmapper(row)
            yield tuple(outrow)
        except Exception as e:
            if failonerror:
                raise e
Exemplo n.º 6
0
    def __iter__(self):
        table = self.table
        source = self.source
        encoding = self.encoding
        errors = self.errors
        lineterminator = self.lineterminator
        caption = self.caption
        index_header = self.index_header
        tr_style = self.tr_style
        td_styles = self.td_styles
        vrepr = self.vrepr
        truncate = self.truncate

        with source.open('wb') as buf:

            # deal with text encoding
            if PY2:
                codec = getcodec(encoding)
                f = codec.streamwriter(buf, errors=errors)
            else:
                f = io.TextIOWrapper(buf,
                                     encoding=encoding,
                                     errors=errors,
                                     newline='')

            # write the table
            try:
                it = iter(table)

                # write header
                hdr = next(it)
                _write_begin(f, hdr, lineterminator, caption, index_header,
                             truncate)
                yield hdr

                # write body
                if tr_style and callable(tr_style):
                    # wrap as records
                    it = (Record(row, hdr) for row in it)
                for row in it:
                    _write_row(f, hdr, row, lineterminator, vrepr,
                               tr_style, td_styles, truncate)
                    yield row

                # finish up
                _write_end(f, lineterminator)
                f.flush()

            finally:
                if not PY2:
                    f.detach()
Exemplo n.º 7
0
def iterrowmapmany(source, rowgenerator, header, failonerror):
    it = iter(source)
    hdr = next(it)
    flds = list(map(text_type, hdr))
    yield tuple(header)
    it = (Record(row, flds) for row in it)
    for row in it:
        try:
            for outrow in rowgenerator(row):
                yield tuple(outrow)
        except Exception as e:
            if failonerror:
                raise e
            else:
                pass
Exemplo n.º 8
0
def iterselectusingcontext(table, query):
    it = iter(table)
    hdr = tuple(next(it))
    flds = list(map(text_type, hdr))
    yield hdr
    it = (Record(row, flds) for row in it)
    prv = None
    cur = next(it)
    for nxt in it:
        if query(prv, cur, nxt):
            yield cur
        prv = cur
        cur = nxt
    # handle last row
    if query(prv, cur, None):
        yield cur
Exemplo n.º 9
0
def iterfieldmap(source, mappings, failonerror, errorvalue):
    it = iter(source)
    hdr = next(it)
    flds = list(map(text_type, hdr))
    outhdr = mappings.keys()
    yield tuple(outhdr)

    mapfuns = dict()
    for outfld, m in mappings.items():
        if m in hdr:
            mapfuns[outfld] = operator.itemgetter(m)
        elif isinstance(m, int) and m < len(hdr):
            mapfuns[outfld] = operator.itemgetter(m)
        elif isinstance(m, string_types):
            mapfuns[outfld] = expr(m)
        elif callable(m):
            mapfuns[outfld] = m
        elif isinstance(m, (tuple, list)) and len(m) == 2:
            srcfld = m[0]
            fm = m[1]
            if callable(fm):
                mapfuns[outfld] = composefun(fm, srcfld)
            elif isinstance(fm, dict):
                mapfuns[outfld] = composedict(fm, srcfld)
            else:
                raise ArgumentError('expected callable or dict')
        else:
            raise ArgumentError('invalid mapping %r: %r' % (outfld, m))

    # wrap rows as records
    it = (Record(row, flds) for row in it)
    for row in it:
        outrow = list()
        for outfld in outhdr:
            try:
                val = mapfuns[outfld](row)
            except Exception as e:
                if failonerror == 'inline':
                    val = e
                elif failonerror:
                    raise e
                else:
                    val = errorvalue
            outrow.append(val)
        yield tuple(outrow)
Exemplo n.º 10
0
def iteraddfields(source, field_defs):
    it = iter(source)
    hdr = next(it)
    flds = list(map(text_type, hdr))

    # initialize output fields and indices
    outhdr = list(hdr)
    value_indexes = []

    for fdef in field_defs:
        # determine the defined field index
        if len(fdef) == 2:
            name, value = fdef
            index = len(outhdr)
        else:
            name, value, index = fdef

        # insert the name into the header at the appropriate index
        outhdr.insert(index, name)

        # remember the value/index pairs for later
        value_indexes.append((value, index))
    yield tuple(outhdr)

    for row in it:
        outrow = list(row)

        # add each defined field into the row at the appropriate index
        for value, index in value_indexes:
            if callable(value):
                # wrap row as record if using calculated value
                row = Record(row, flds)
                v = value(row)
                outrow.insert(index, v)
            else:
                outrow.insert(index, value)

        yield tuple(outrow)
Exemplo n.º 11
0
def recordlookupone(table, key, dictionary=None, strict=False):
    """
    Load a dictionary with data from the given table, mapping to record objects,
    assuming there is at most one row for each key.

    """

    if dictionary is None:
        dictionary = dict()

    it = iter(table)
    hdr = next(it)
    flds = list(map(text_type, hdr))
    keyindices = asindices(hdr, key)
    assert len(keyindices) > 0, 'no key selected'
    getkey = operator.itemgetter(*keyindices)
    for row in it:
        k = getkey(row)
        if strict and k in dictionary:
            raise DuplicateKeyError(k)
        elif k not in dictionary:
            d = Record(row, flds)
            dictionary[k] = d
    return dictionary
Exemplo n.º 12
0
def iterproblems(table, constraints, expected_header):

    outhdr = ('name', 'row', 'field', 'value', 'error')
    yield outhdr

    it = iter(table)
    actual_header = next(it)

    if expected_header is None:
        flds = list(map(text_type, actual_header))
    else:
        expected_flds = list(map(text_type, expected_header))
        actual_flds = list(map(text_type, actual_header))
        try:
            assert expected_flds == actual_flds
        except Exception as e:
            yield ('__header__', 0, None, None, type(e).__name__)
        flds = expected_flds

    local_constraints = normalize_constraints(constraints, flds)

    # setup getters
    for constraint in local_constraints:
        if 'getter' not in constraint:
            if 'field' in constraint:
                # should ensure FieldSelectionError if bad field in constraint
                indices = asindices(flds, constraint['field'])
                getter = operator.itemgetter(*indices)
                constraint['getter'] = getter

    # generate problems
    expected_len = len(flds)
    for i, row in enumerate(it):
        row = tuple(row)

        # row length constraint
        l = None
        try:
            l = len(row)
            assert l == expected_len
        except Exception as e:
            yield ('__len__', i + 1, None, l, type(e).__name__)

        # user defined constraints
        row = Record(row, flds)
        for constraint in local_constraints:
            name = constraint.get('name', None)
            field = constraint.get('field', None)
            assertion = constraint.get('assertion', None)
            test = constraint.get('test', None)
            getter = constraint.get('getter', lambda x: x)
            try:
                target = getter(row)
            except Exception as e:
                # getting target value failed, report problem
                yield (name, i + 1, field, None, type(e).__name__)
            else:
                value = target if field else None
                if test is not None:
                    try:
                        test(target)
                    except Exception as e:
                        # test raised exception, report problem
                        yield (name, i + 1, field, value, type(e).__name__)
                if assertion is not None:
                    try:
                        assert assertion(target)
                    except Exception as e:
                        # assertion raised exception, report problem
                        yield (name, i + 1, field, value, type(e).__name__)
Exemplo n.º 13
0
def iterfieldconvert(source, converters, failonerror, errorvalue, where,
                     pass_row):

    # grab the fields in the source table
    it = iter(source)
    hdr = next(it)
    flds = list(map(text_type, hdr))
    yield tuple(hdr)  # these are not modified

    # build converter functions
    converter_functions = dict()
    for k, c in converters.items():

        # turn field names into row indices
        if not isinstance(k, integer_types):
            try:
                k = flds.index(k)
            except ValueError:  # not in list
                raise FieldSelectionError(k)
        assert isinstance(k, int), 'expected integer, found %r' % k

        # is converter a function?
        if callable(c):
            converter_functions[k] = c

        # is converter a method name?
        elif isinstance(c, string_types):
            converter_functions[k] = methodcaller(c)

        # is converter a method name with arguments?
        elif isinstance(c, (tuple, list)) and isinstance(c[0], string_types):
            methnm = c[0]
            methargs = c[1:]
            converter_functions[k] = methodcaller(methnm, *methargs)

        # is converter a dictionary?
        elif isinstance(c, dict):
            converter_functions[k] = dictconverter(c)

        # is it something else?
        elif c is None:
            pass  # ignore
        else:
            raise ArgumentError(
                'unexpected converter specification on field %r: %r' % (k, c))

    # define a function to transform a value
    def transform_value(i, v, *args):
        if i not in converter_functions:
            # no converter defined on this field, return value as-is
            return v
        else:
            try:
                return converter_functions[i](v, *args)
            except Exception as e:
                if failonerror:
                    raise e
                else:
                    return errorvalue

    # define a function to transform a row
    if pass_row:

        def transform_row(_row):
            return tuple(
                transform_value(i, v, _row) for i, v in enumerate(_row))
    else:

        def transform_row(_row):
            return tuple(transform_value(i, v) for i, v in enumerate(_row))

    # prepare where function
    if isinstance(where, string_types):
        where = expr(where)
    elif where is not None:
        assert callable(where), 'expected callable for "where" argument, ' \
                                'found %r' % where

    # prepare iterator
    if pass_row or where:
        # wrap rows as records
        it = (Record(row, flds) for row in it)

    # construct the data rows
    if where is None:
        # simple case, transform all rows
        for row in it:
            yield transform_row(row)
    else:
        # conditionally transform rows
        for row in it:
            if where(row):
                yield transform_row(row)
            else:
                yield row
Exemplo n.º 14
0
def tohtml(table, source=None, encoding=None, errors=None, caption=None,
           vrepr=text_type, lineterminator='\n', index_header=False,
           tr_style=None, td_styles=None, truncate=None):
    """
    Write the table as HTML to a file. E.g.::

        >>> import petl as etl
        >>> table1 = [['foo', 'bar'],
        ...           ['a', 1],
        ...           ['b', 2],
        ...           ['c', 2]]
        >>> etl.tohtml(table1, 'example.html', caption='example table')
        >>> print(open('example.html').read())
        <table class='petl'>
        <caption>example table</caption>
        <thead>
        <tr>
        <th>foo</th>
        <th>bar</th>
        </tr>
        </thead>
        <tbody>
        <tr>
        <td>a</td>
        <td style='text-align: right'>1</td>
        </tr>
        <tr>
        <td>b</td>
        <td style='text-align: right'>2</td>
        </tr>
        <tr>
        <td>c</td>
        <td style='text-align: right'>2</td>
        </tr>
        </tbody>
        </table>

    The `caption` keyword argument is used to provide a table caption
    in the output HTML.

    """

    source = write_source_from_arg(source)
    with source.open('wb') as buf:

        # deal with text encoding
        if PY2:
            codec = getcodec(encoding)
            f = codec.streamwriter(buf, errors=errors)
        else:
            f = io.TextIOWrapper(buf,
                                 encoding=encoding,
                                 errors=errors,
                                 newline='')

        # write the table
        try:
            it = iter(table)

            # write header
            hdr = next(it)
            _write_begin(f, hdr, lineterminator, caption, index_header,
                         truncate)

            # write body
            if tr_style and callable(tr_style):
                # wrap as records
                it = (Record(row, hdr) for row in it)
            for row in it:
                _write_row(f, hdr, row, lineterminator, vrepr,
                           tr_style, td_styles, truncate)

            # finish up
            _write_end(f, lineterminator)
            f.flush()

        finally:
            if not PY2:
                f.detach()
Exemplo n.º 15
0
 def accept(self, row):
     row = Record(row, self.fields)
     key = self.discriminator(row)
     self.broadcast(key, row)