def itermultiaggregate(source, key, aggregation): aggregation = OrderedDict(aggregation.items()) # take a copy it = iter(source) hdr = next(it) # push back header to ensure we iterate only once it = itertools.chain([hdr], it) # normalise aggregators for outfld in aggregation: agg = aggregation[outfld] if callable(agg): aggregation[outfld] = None, agg elif isinstance(agg, string_types): aggregation[outfld] = agg, list # list is default elif len(agg) == 1 and isinstance(agg[0], string_types): aggregation[outfld] = agg[0], list # list is default elif len(agg) == 1 and callable(agg[0]): aggregation[outfld] = None, agg[0] # aggregate whole rows elif len(agg) == 2: pass # no need to normalise else: raise ArgumentError('invalid aggregation: %r, %r' % (outfld, agg)) # determine output header if isinstance(key, (list, tuple)): outhdr = list(key) elif callable(key): outhdr = ['key'] else: outhdr = [key] for outfld in aggregation: outhdr.append(outfld) yield tuple(outhdr) # generate data for k, rows in rowgroupby(it, key): rows = list(rows) # may need to iterate over these more than once # handle compound key if isinstance(key, (list, tuple)): outrow = list(k) else: outrow = [k] for outfld in aggregation: srcfld, aggfun = aggregation[outfld] if srcfld is None: aggval = aggfun(rows) outrow.append(aggval) elif isinstance(srcfld, (list, tuple)): idxs = [hdr.index(f) for f in srcfld] valgetter = operator.itemgetter(*idxs) vals = (valgetter(row) for row in rows) aggval = aggfun(vals) outrow.append(aggval) else: idx = hdr.index(srcfld) # try using generator comprehension vals = (row[idx] for row in rows) aggval = aggfun(vals) outrow.append(aggval) yield tuple(outrow)
def itermultirangeaggregate(source, key, width, aggregation, minv, maxv): aggregation = OrderedDict(aggregation.items()) # take a copy it = iter(source) srcflds = it.next() # push back header to ensure we iterate only once it = itertools.chain([srcflds], it) # normalise aggregators for outfld in aggregation: agg = aggregation[outfld] if callable(agg): aggregation[outfld] = None, agg elif isinstance(agg, basestring): aggregation[outfld] = agg, list # list is default elif len(agg) == 1 and isinstance(agg[0], basestring): aggregation[outfld] = agg[0], list # list is default elif len(agg) == 1 and callable(agg[0]): aggregation[outfld] = None, agg[0] # aggregate whole rows elif len(agg) == 2: pass # no need to normalise else: raise Exception('invalid aggregation: %r, %r' % (outfld, agg)) outflds = [key] for outfld in aggregation: outflds.append(outfld) yield tuple(outflds) for k, rows in rowgroupbybin(it, key, width, minv=minv, maxv=maxv): outrow = [k] for outfld in aggregation: srcfld, aggfun = aggregation[outfld] if srcfld is None: aggval = aggfun(rows) outrow.append(aggval) else: idx = srcflds.index(srcfld) # try using generator comprehension vals = (row[idx] for row in rows) aggval = aggfun(vals) outrow.append(aggval) yield tuple(outrow)