Пример #1
0
def test_rowgroupby():
    
    table = (('foo', 'bar', 'baz'), 
             ('a', 1, True), 
             ('b', 2, True), 
             ('b', 3))
    
    # simplest form

    g = rowgroupby(table, 'foo')

    key, vals = g.next()
    vals = list(vals)
    eq_('a', key)
    eq_(1, len(vals))
    eq_(('a', 1, True), vals[0])

    key, vals = g.next()
    vals = list(vals)
    eq_('b', key)
    eq_(2, len(vals))
    eq_(('b', 2, True), vals[0])
    eq_(('b', 3), vals[1])

    # specify value
    
    g = rowgroupby(table, 'foo', 'bar')
    
    key, vals = g.next()
    vals = list(vals)
    eq_('a', key)
    eq_(1, len(vals))
    eq_(1, vals[0])

    key, vals = g.next()
    vals = list(vals)
    eq_('b', key)
    eq_(2, len(vals))
    eq_(2, vals[0])
    eq_(3, vals[1])

    # callable key
    
    g = rowgroupby(table, lambda r: r['foo'], lambda r: r['baz'])
    
    key, vals = g.next()
    vals = list(vals)
    eq_('a', key)
    eq_(1, len(vals))
    eq_(True, vals[0])

    key, vals = g.next()
    vals = list(vals)
    eq_('b', key)
    eq_(2, len(vals))
    eq_(True, vals[0])
    eq_(None, vals[1]) # gets padded
Пример #2
0
def iterrowreduce(source, key, reducer, fields):
    if fields is None:
        # output fields from source
        fields, source = iterpeek(source)
    yield tuple(fields)
    for key, rows in rowgroupby(source, key):
        yield tuple(reducer(key, rows))
Пример #3
0
def itermergeduplicates(table, key, missing):
    it = iter(table)
    fields, it = iterpeek(it)

    # determine output fields
    if isinstance(key, basestring):
        outflds = [key]
        keyflds = set([key])
    else:
        outflds = list(key)
        keyflds = set(key)
    valflds = [f for f in fields if f not in keyflds]
    valfldidxs = [fields.index(f) for f in valflds]
    outflds.extend(valflds)
    yield tuple(outflds)

    # do the work
    for k, grp in rowgroupby(it, key):
        grp = list(grp)
        if isinstance(key, basestring):
            outrow = [k]
        else:
            outrow = list(k)
        mergedvals = [set(row[i] for row in grp
                          if len(row) > i and row[i] != missing)
                      for i in valfldidxs]
        normedvals = [vals.pop() if len(vals) == 1
                      else missing if len(vals) == 0
                      else Conflict(vals)
                      for vals in mergedvals]
        outrow.extend(normedvals)
        yield tuple(outrow)
Пример #4
0
def itermultiaggregate(source, key, aggregation):
    aggregation = OrderedDict(aggregation.items()) # take a copy
    it = iter(source)
    srcflds = it.next()
    it = itertools.chain([srcflds], it)  # push back header to ensure we iterate only once

    # normalise aggregators
    for outfld in aggregation:
        agg = aggregation[outfld]
        if callable(agg):
            aggregation[outfld] = None, agg
        elif isinstance(agg, basestring):
            aggregation[outfld] = agg, list  # list is default
        elif len(agg) == 1 and isinstance(agg[0], basestring):
            aggregation[outfld] = agg[0], list  # list is default
        elif len(agg) == 1 and callable(agg[0]):
            aggregation[outfld] = None, agg[0]  # aggregate whole rows
        elif len(agg) == 2:
            pass # no need to normalise
        else:
            raise Exception('invalid aggregation: %r, %r' % (outfld, agg))

    # determine output header
    if isinstance(key, (list, tuple)):
        outflds = list(key)
    elif callable(key):
        outflds = ['key']
    else:
        outflds = [key]
    for outfld in aggregation:
        outflds.append(outfld)
    yield tuple(outflds)
    
    # generate data
    for k, rows in rowgroupby(it, key):
        rows = list(rows) # may need to iterate over these more than once
        # handle compound key
        if isinstance(key, (list, tuple)):
            outrow = list(k)
        else:
            outrow = [k]
        for outfld in aggregation:
            srcfld, aggfun = aggregation[outfld]
            if srcfld is None:
                aggval = aggfun(rows)
                outrow.append(aggval)
            elif isinstance(srcfld, (list, tuple)):
                idxs = [srcflds.index(f) for f in srcfld]
                valgetter = operator.itemgetter(*idxs)
                vals = (valgetter(row) for row in rows)
                aggval = aggfun(vals)
                outrow.append(aggval)
            else:
                idx = srcflds.index(srcfld)
                # try using generator comprehension
                vals = (row[idx] for row in rows)
                aggval = aggfun(vals)
                outrow.append(aggval)
        yield tuple(outrow)
Пример #5
0
def itersimpleaggregate(table, key, aggregation, value):

    # special case counting
    if aggregation == len:
        aggregation = lambda g: sum(1 for _ in g)  # count length of iterable

    # determine output header
    if isinstance(key, (list, tuple)):
        outfields = tuple(key) + ('value',)
    elif callable(key):
        outfields = ('key', 'value')
    else:
        outfields = (key, 'value')
    yield outfields

    # generate data
    if isinstance(key, (list, tuple)):
        for k, grp in rowgroupby(table, key, value):
            yield tuple(k) + (aggregation(grp),)
    else:
        for k, grp in rowgroupby(table, key, value):
            yield k, aggregation(grp)
Пример #6
0
 def __iter__(self):
     it = iter(self.table)
     fields = it.next()
     table = itertools.chain([fields], it)
     value = self.value
     vidx = fields.index(value)
     outflds = list(fields)
     outflds[vidx] = '%s_id' % value
     yield tuple(outflds)
     offset, multiplier = self.autoincrement
     for n, (_, group) in enumerate(rowgroupby(table, value)):
         for row in group:
             outrow = list(row)
             outrow[vidx] = (n * multiplier) + offset
             yield tuple(outrow)
Пример #7
0
def collapsedintervals(tbl, start='start', stop='stop', facet=None):
    """
    Utility function to collapse intervals in a table. 
    
    If no facet key is given, returns an iterator over `(start, stop)` tuples.
    
    If facet key is given, returns an iterator over `(key, start, stop)` tuples.  
    
    .. versionadded:: 0.5.5
    
    """
    
    if facet is None:
        tbl = sort(tbl, key=start)
        for iv in _collapse(values(tbl, (start, stop))):
            yield iv
    else:
        tbl = sort(tbl, key=(facet, start))
        for k, g in rowgroupby(tbl, key=facet, value=(start, stop)):
            for iv in _collapse(g):
                yield (k,) + iv
Пример #8
0
def collapsedintervals(tbl, start='start', stop='stop', facet=None):
    """
    Utility function to collapse intervals in a table. 
    
    If no facet key is given, returns an iterator over `(start, stop)` tuples.
    
    If facet key is given, returns an iterator over `(key, start, stop)` tuples.  
    
    .. versionadded:: 0.5.5
    
    """

    if facet is None:
        tbl = sort(tbl, key=start)
        for iv in _collapse(values(tbl, (start, stop))):
            yield iv
    else:
        tbl = sort(tbl, key=(facet, start))
        for k, g in rowgroupby(tbl, key=facet, value=(start, stop)):
            for iv in _collapse(g):
                yield (k, ) + iv
Пример #9
0
def iterfold(table, key, f, value):
    yield ('key', 'value')
    for k, grp in rowgroupby(table, key, value):
        yield k, reduce(f, grp)
Пример #10
0
 def __iter__(self):
     offset, multiplier = self.autoincrement
     yield ('id', self.value)
     for n, (v, _) in enumerate(rowgroupby(self.table, self.value)):
         yield ((n * multiplier) + offset, v)
Пример #11
0
def iterrowgroupmap(source, key, mapper, fields):
    yield tuple(fields)
    for key, rows in rowgroupby(source, key):
        for row in mapper(key, rows):
            yield row
Пример #12
0
def iterrowgroupmap(source, key, mapper, fields):
    yield tuple(fields)
    for key, rows in rowgroupby(source, key):
        for row in mapper(key, rows):
            yield row