def test_rowgroupby(): table = (('foo', 'bar', 'baz'), ('a', 1, True), ('b', 2, True), ('b', 3)) # simplest form g = rowgroupby(table, 'foo') key, vals = g.next() vals = list(vals) eq_('a', key) eq_(1, len(vals)) eq_(('a', 1, True), vals[0]) key, vals = g.next() vals = list(vals) eq_('b', key) eq_(2, len(vals)) eq_(('b', 2, True), vals[0]) eq_(('b', 3), vals[1]) # specify value g = rowgroupby(table, 'foo', 'bar') key, vals = g.next() vals = list(vals) eq_('a', key) eq_(1, len(vals)) eq_(1, vals[0]) key, vals = g.next() vals = list(vals) eq_('b', key) eq_(2, len(vals)) eq_(2, vals[0]) eq_(3, vals[1]) # callable key g = rowgroupby(table, lambda r: r['foo'], lambda r: r['baz']) key, vals = g.next() vals = list(vals) eq_('a', key) eq_(1, len(vals)) eq_(True, vals[0]) key, vals = g.next() vals = list(vals) eq_('b', key) eq_(2, len(vals)) eq_(True, vals[0]) eq_(None, vals[1]) # gets padded
def iterrowreduce(source, key, reducer, fields): if fields is None: # output fields from source fields, source = iterpeek(source) yield tuple(fields) for key, rows in rowgroupby(source, key): yield tuple(reducer(key, rows))
def itermergeduplicates(table, key, missing): it = iter(table) fields, it = iterpeek(it) # determine output fields if isinstance(key, basestring): outflds = [key] keyflds = set([key]) else: outflds = list(key) keyflds = set(key) valflds = [f for f in fields if f not in keyflds] valfldidxs = [fields.index(f) for f in valflds] outflds.extend(valflds) yield tuple(outflds) # do the work for k, grp in rowgroupby(it, key): grp = list(grp) if isinstance(key, basestring): outrow = [k] else: outrow = list(k) mergedvals = [set(row[i] for row in grp if len(row) > i and row[i] != missing) for i in valfldidxs] normedvals = [vals.pop() if len(vals) == 1 else missing if len(vals) == 0 else Conflict(vals) for vals in mergedvals] outrow.extend(normedvals) yield tuple(outrow)
def itermultiaggregate(source, key, aggregation): aggregation = OrderedDict(aggregation.items()) # take a copy it = iter(source) srcflds = it.next() it = itertools.chain([srcflds], it) # push back header to ensure we iterate only once # normalise aggregators for outfld in aggregation: agg = aggregation[outfld] if callable(agg): aggregation[outfld] = None, agg elif isinstance(agg, basestring): aggregation[outfld] = agg, list # list is default elif len(agg) == 1 and isinstance(agg[0], basestring): aggregation[outfld] = agg[0], list # list is default elif len(agg) == 1 and callable(agg[0]): aggregation[outfld] = None, agg[0] # aggregate whole rows elif len(agg) == 2: pass # no need to normalise else: raise Exception('invalid aggregation: %r, %r' % (outfld, agg)) # determine output header if isinstance(key, (list, tuple)): outflds = list(key) elif callable(key): outflds = ['key'] else: outflds = [key] for outfld in aggregation: outflds.append(outfld) yield tuple(outflds) # generate data for k, rows in rowgroupby(it, key): rows = list(rows) # may need to iterate over these more than once # handle compound key if isinstance(key, (list, tuple)): outrow = list(k) else: outrow = [k] for outfld in aggregation: srcfld, aggfun = aggregation[outfld] if srcfld is None: aggval = aggfun(rows) outrow.append(aggval) elif isinstance(srcfld, (list, tuple)): idxs = [srcflds.index(f) for f in srcfld] valgetter = operator.itemgetter(*idxs) vals = (valgetter(row) for row in rows) aggval = aggfun(vals) outrow.append(aggval) else: idx = srcflds.index(srcfld) # try using generator comprehension vals = (row[idx] for row in rows) aggval = aggfun(vals) outrow.append(aggval) yield tuple(outrow)
def itersimpleaggregate(table, key, aggregation, value): # special case counting if aggregation == len: aggregation = lambda g: sum(1 for _ in g) # count length of iterable # determine output header if isinstance(key, (list, tuple)): outfields = tuple(key) + ('value',) elif callable(key): outfields = ('key', 'value') else: outfields = (key, 'value') yield outfields # generate data if isinstance(key, (list, tuple)): for k, grp in rowgroupby(table, key, value): yield tuple(k) + (aggregation(grp),) else: for k, grp in rowgroupby(table, key, value): yield k, aggregation(grp)
def __iter__(self): it = iter(self.table) fields = it.next() table = itertools.chain([fields], it) value = self.value vidx = fields.index(value) outflds = list(fields) outflds[vidx] = '%s_id' % value yield tuple(outflds) offset, multiplier = self.autoincrement for n, (_, group) in enumerate(rowgroupby(table, value)): for row in group: outrow = list(row) outrow[vidx] = (n * multiplier) + offset yield tuple(outrow)
def collapsedintervals(tbl, start='start', stop='stop', facet=None): """ Utility function to collapse intervals in a table. If no facet key is given, returns an iterator over `(start, stop)` tuples. If facet key is given, returns an iterator over `(key, start, stop)` tuples. .. versionadded:: 0.5.5 """ if facet is None: tbl = sort(tbl, key=start) for iv in _collapse(values(tbl, (start, stop))): yield iv else: tbl = sort(tbl, key=(facet, start)) for k, g in rowgroupby(tbl, key=facet, value=(start, stop)): for iv in _collapse(g): yield (k,) + iv
def collapsedintervals(tbl, start='start', stop='stop', facet=None): """ Utility function to collapse intervals in a table. If no facet key is given, returns an iterator over `(start, stop)` tuples. If facet key is given, returns an iterator over `(key, start, stop)` tuples. .. versionadded:: 0.5.5 """ if facet is None: tbl = sort(tbl, key=start) for iv in _collapse(values(tbl, (start, stop))): yield iv else: tbl = sort(tbl, key=(facet, start)) for k, g in rowgroupby(tbl, key=facet, value=(start, stop)): for iv in _collapse(g): yield (k, ) + iv
def iterfold(table, key, f, value): yield ('key', 'value') for k, grp in rowgroupby(table, key, value): yield k, reduce(f, grp)
def __iter__(self): offset, multiplier = self.autoincrement yield ('id', self.value) for n, (v, _) in enumerate(rowgroupby(self.table, self.value)): yield ((n * multiplier) + offset, v)
def iterrowgroupmap(source, key, mapper, fields): yield tuple(fields) for key, rows in rowgroupby(source, key): for row in mapper(key, rows): yield row