Пример #1
0
def test_values():

    table = (('foo', 'bar', 'baz'),
             ('a', 1, True),
             ('b', 2),
             ('b', 7, False))

    actual = values(table, 'foo')
    expect = ('a', 'b', 'b')
    ieq(expect, actual)
    ieq(expect, actual)

    actual = values(table, 'bar')
    expect = (1, 2, 7)
    ieq(expect, actual)
    ieq(expect, actual)

    # old style signature for multiple fields, still supported
    actual = values(table, ('foo', 'bar'))
    expect = (('a', 1), ('b', 2), ('b', 7))
    ieq(expect, actual)
    ieq(expect, actual)

    # as of 0.24 new style signature for multiple fields
    actual = values(table, 'foo', 'bar')
    expect = (('a', 1), ('b', 2), ('b', 7))
    ieq(expect, actual)
    ieq(expect, actual)

    actual = values(table, 'baz')
    expect = (True, None, False)
    ieq(expect, actual)
    ieq(expect, actual)
Пример #2
0
def typecounter(table, field):
    """
    Count the number of values found for each Python type.

        >>> import petl as etl
        >>> table = [['foo', 'bar', 'baz'],
        ...          ['A', 1, 2],
        ...          ['B', u'2', '3.4'],
        ...          [u'B', u'3', u'7.8', True],
        ...          ['D', u'xyz', 9.0],
        ...          ['E', 42]]
        >>> etl.typecounter(table, 'foo')
        Counter({'str': 5})
        >>> etl.typecounter(table, 'bar')
        Counter({'str': 3, 'int': 2})
        >>> etl.typecounter(table, 'baz')
        Counter({'str': 2, 'int': 1, 'NoneType': 1, 'float': 1})

    The `field` argument can be a field name or index (starting from zero).

    """

    counter = Counter()
    for v in values(table, field):
        try:
            counter[v.__class__.__name__] += 1
        except IndexError:
            pass  # ignore short rows
    return counter
Пример #3
0
def valuecounter(table, *field, **kwargs):
    """
    Find distinct values for the given field and count the number of
    occurrences. Returns a :class:`dict` mapping values to counts. E.g.::

        >>> import petl as etl
        >>> table = [['foo', 'bar'],
        ...          ['a', True],
        ...          ['b'],
        ...          ['b', True],
        ...          ['c', False]]
        >>> etl.valuecounter(table, 'foo')
        Counter({'b': 2, 'c': 1, 'a': 1})

    The `field` argument can be a single field name or index (starting from
    zero) or a tuple of field names and/or indexes.

    """

    missing = kwargs.get('missing', None)
    counter = Counter()
    for v in values(table, field, missing=missing):
        try:
            counter[v] += 1
        except IndexError:
            pass  # short row
    return counter
Пример #4
0
def valuecount(table, field, value, missing=None):
    """
    Count the number of occurrences of `value` under the given field. Returns
    the absolute count and relative frequency as a pair. E.g.::

        >>> import petl as etl
        >>> table = [['foo', 'bar'],
        ...          ['a', 1],
        ...          ['b', 2],
        ...          ['b', 7]]
        >>> etl.valuecount(table, 'foo', 'b')
        (2, 0.6666666666666666)

    The `field` argument can be a single field name or index (starting from
    zero) or a tuple of field names and/or indexes.

    """

    total = 0
    vs = 0
    for v in values(table, field, missing=missing):
        total += 1
        if v == value:
            vs += 1
    return vs, float(vs)/total
Пример #5
0
def typeset(table, field):
    """
    Return a set containing all Python types found for values in the given
    field. E.g.::

        >>> import petl as etl
        >>> table = [['foo', 'bar', 'baz'],
        ...          ['A', 1, '2'],
        ...          ['B', u'2', '3.4'],
        ...          [u'B', u'3', '7.8', True],
        ...          ['D', u'xyz', 9.0],
        ...          ['E', 42]]
        >>> sorted(etl.typeset(table, 'foo'))
        ['str']
        >>> sorted(etl.typeset(table, 'bar'))
        ['int', 'str']
        >>> sorted(etl.typeset(table, 'baz'))
        ['NoneType', 'float', 'str']

    The `field` argument can be a field name or index (starting from zero).

    """

    s = set()
    for v in values(table, field):
        try:
            s.add(type(v).__name__)
        except IndexError:
            pass  # ignore short rows
    return s
Пример #6
0
def limits(table, field):
    """
    Find minimum and maximum values under the given field. E.g.::

        >>> import petl as etl
        >>> table = [['foo', 'bar'], ['a', 1], ['b', 2], ['b', 3]]
        >>> minv, maxv = etl.limits(table, 'bar')
        >>> minv
        1
        >>> maxv
        3

    The `field` argument can be a field name or index (starting from zero).

    """

    vals = iter(values(table, field))
    try:
        minv = maxv = next(vals)
    except StopIteration:
        return None, None
    else:
        for v in vals:
            if v < minv:
                minv = v
            if v > maxv:
                maxv = v
        return minv, maxv
Пример #7
0
 def __init__(self, *args, **kwargs):
     if len(args) == 2:
         self.input = args[0]
         self.period = args[1]
     elif len(args) == 3:
         self.input = values(args[0], args[1])
         self.period = args[2]
     else:
         assert False, 'invalid arguments'
     self.missing = kwargs.get('missing', None)
Пример #8
0
def diffvalues(t1, t2, f):
    """
    Return the difference between the values under the given field in the two
    tables, e.g.::

        >>> import petl as etl
        >>> table1 = [['foo', 'bar'],
        ...           ['a', 1],
        ...           ['b', 3]]
        >>> table2 = [['bar', 'foo'],
        ...           [1, 'a'],
        ...           [3, 'c']]
        >>> add, sub = etl.diffvalues(table1, table2, 'foo')
        >>> add
        {'c'}
        >>> sub
        {'b'}

    """

    t1v = set(values(t1, f))
    t2v = set(values(t2, f))
    return t2v - t1v, t1v - t2v
Пример #9
0
def stringpatterncounter(table, field):
    """
    Profile string patterns in the given field, returning a :class:`dict`
    mapping patterns to counts.

    """

    trans = maketrans(
        'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789',
        'AAAAAAAAAAAAAAAAAAAAAAAAAAaaaaaaaaaaaaaaaaaaaaaaaaaa9999999999'
    )
    counter = Counter()
    for v in values(table, field):
        p = str(v).translate(trans)
        counter[p] += 1
    return counter
Пример #10
0
def parsecounter(table, field, parsers=(('int', int), ('float', float))):
    """
    Count the number of `str` or `unicode` values under the given fields that
    can be parsed as ints, floats or via custom parser functions. Return a
    pair of `Counter` objects, the first mapping parser names to the number of
    strings successfully parsed, the second mapping parser names to the
    number of errors. E.g.::

        >>> import petl as etl
        >>> table = [['foo', 'bar', 'baz'],
        ...          ['A', 'aaa', 2],
        ...          ['B', u'2', '3.4'],
        ...          [u'B', u'3', u'7.8', True],
        ...          ['D', '3.7', 9.0],
        ...          ['E', 42]]
        >>> counter, errors = etl.parsecounter(table, 'bar')
        >>> counter
        Counter({'float': 3, 'int': 2})
        >>> errors
        Counter({'int': 2, 'float': 1})

    The `field` argument can be a field name or index (starting from zero).

    """

    if isinstance(parsers, (list, tuple)):
        parsers = dict(parsers)
    counter, errors = Counter(), Counter()
    # need to initialise
    for n in parsers.keys():
        counter[n] = 0
        errors[n] = 0
    for v in values(table, field):
        if isinstance(v, string_types):
            for name, parser in parsers.items():
                try:
                    parser(v)
                except:
                    errors[name] += 1
                else:
                    counter[name] += 1
    return counter, errors
Пример #11
0
def collapsedintervals(table, start='start', stop='stop', key=None):
    """
    Utility function to collapse intervals in a table. 
    
    If no facet `key` is given, returns an iterator over `(start, stop)` tuples.
    
    If facet `key` is given, returns an iterator over `(key, start, stop)`
    tuples.
    
    """
    
    if key is None:
        table = sort(table, key=start)
        for iv in _collapse(values(table, (start, stop))):
            yield iv
    else:
        table = sort(table, key=(key, start))
        for k, g in rowgroupby(table, key=key, value=(start, stop)):
            for iv in _collapse(g):
                yield (k,) + iv
Пример #12
0
def facet(table, key):
    """
    Return a dictionary mapping field values to tables. E.g.::

        >>> import petl as etl
        >>> table1 = [['foo', 'bar', 'baz'],
        ...           ['a', 4, 9.3],
        ...           ['a', 2, 88.2],
        ...           ['b', 1, 23.3],
        ...           ['c', 8, 42.0],
        ...           ['d', 7, 100.9],
        ...           ['c', 2]]
        >>> foo = etl.facet(table1, 'foo')
        >>> sorted(foo.keys())
        ['a', 'b', 'c', 'd']
        >>> foo['a']
        +-----+-----+------+
        | foo | bar | baz  |
        +=====+=====+======+
        | 'a' |   4 |  9.3 |
        +-----+-----+------+
        | 'a' |   2 | 88.2 |
        +-----+-----+------+

        >>> foo['c']
        +-----+-----+------+
        | foo | bar | baz  |
        +=====+=====+======+
        | 'c' |   8 | 42.0 |
        +-----+-----+------+
        | 'c' |   2 |      |
        +-----+-----+------+

    See also :func:`petl.util.materialise.facetcolumns`.

    """

    fct = dict()
    for v in set(values(table, key)):
        fct[v] = selecteq(table, key, v)
    return fct
Пример #13
0
def stats(table, field):
    """
    Calculate basic descriptive statistics on a given field. E.g.::

        >>> import petl as etl
        >>> table = [['foo', 'bar', 'baz'],
        ...          ['A', 1, 2],
        ...          ['B', '2', '3.4'],
        ...          [u'B', u'3', u'7.8', True],
        ...          ['D', 'xyz', 9.0],
        ...          ['E', None]]
        >>> etl.stats(table, 'bar')
        stats(count=3, errors=2, sum=6.0, min=1.0, max=3.0, mean=2.0, pvariance=0.6666666666666666, pstdev=0.816496580927726)

    The `field` argument can be a field name or index (starting from zero).

    """

    _min = None
    _max = None
    _sum = 0
    _mean = 0
    _var = 0
    _count = 0
    _errors = 0
    for v in values(table, field):
        try:
            v = float(v)
        except (ValueError, TypeError):
            _errors += 1
        else:
            _count += 1
            if _min is None or v < _min:
                _min = v
            if _max is None or v > _max:
                _max = v
            _sum += v
            _mean, _var = onlinestats(v, _count, mean=_mean, variance=_var)
    _std = _var**.5
    return _stats(_count, _errors, _sum, _min, _max, _mean, _var, _std)