示例#1
0
def itersimpleaggregate(table, key, aggregation, value, field):

    # special case counting
    if aggregation == len and key is not None:
        aggregation = lambda g: sum(1 for _ in g)  # count length of iterable

    # special case where length of key is 1
    if isinstance(key, (list, tuple)) and len(key) == 1:
        key = key[0]

    # determine output header
    if isinstance(key, (list, tuple)):
        outhdr = tuple(key) + (field, )
    elif callable(key):
        outhdr = ('key', field)
    elif key is None:
        outhdr = field,
    else:
        outhdr = (key, field)
    yield outhdr

    # generate data
    if isinstance(key, (list, tuple)):
        for k, grp in rowgroupby(table, key, value):
            yield tuple(k) + (aggregation(grp), )
    elif key is None:
        # special case counting
        if aggregation == len:
            yield nrows(table),
        else:
            yield aggregation(values(table, value)),
    else:
        for k, grp in rowgroupby(table, key, value):
            yield k, aggregation(grp)
示例#2
0
def test_rowgroupby():

    table = (('foo', 'bar', 'baz'),
             ('a', 1, True),
             ('b', 2, True),
             ('b', 3))

    # simplest form

    g = rowgroupby(table, 'foo')

    key, vals = next(g)
    vals = list(vals)
    eq_('a', key)
    eq_(1, len(vals))
    eq_(('a', 1, True), vals[0])

    key, vals = next(g)
    vals = list(vals)
    eq_('b', key)
    eq_(2, len(vals))
    eq_(('b', 2, True), vals[0])
    eq_(('b', 3), vals[1])

    # specify value

    g = rowgroupby(table, 'foo', 'bar')

    key, vals = next(g)
    vals = list(vals)
    eq_('a', key)
    eq_(1, len(vals))
    eq_(1, vals[0])

    key, vals = next(g)
    vals = list(vals)
    eq_('b', key)
    eq_(2, len(vals))
    eq_(2, vals[0])
    eq_(3, vals[1])

    # callable key

    g = rowgroupby(table, lambda r: r['foo'], lambda r: r['baz'])

    key, vals = next(g)
    vals = list(vals)
    eq_('a', key)
    eq_(1, len(vals))
    eq_(True, vals[0])

    key, vals = next(g)
    vals = list(vals)
    eq_('b', key)
    eq_(2, len(vals))
    eq_(True, vals[0])
    eq_(None, vals[1])  # gets padded
示例#3
0
def test_rowgroupby():

    table = (('foo', 'bar', 'baz'), ('a', 1, True), ('b', 2, True), ('b', 3))

    # simplest form

    g = rowgroupby(table, 'foo')

    key, vals = next(g)
    vals = list(vals)
    eq_('a', key)
    eq_(1, len(vals))
    eq_(('a', 1, True), vals[0])

    key, vals = next(g)
    vals = list(vals)
    eq_('b', key)
    eq_(2, len(vals))
    eq_(('b', 2, True), vals[0])
    eq_(('b', 3), vals[1])

    # specify value

    g = rowgroupby(table, 'foo', 'bar')

    key, vals = next(g)
    vals = list(vals)
    eq_('a', key)
    eq_(1, len(vals))
    eq_(1, vals[0])

    key, vals = next(g)
    vals = list(vals)
    eq_('b', key)
    eq_(2, len(vals))
    eq_(2, vals[0])
    eq_(3, vals[1])

    # callable key

    g = rowgroupby(table, lambda r: r['foo'], lambda r: r['baz'])

    key, vals = next(g)
    vals = list(vals)
    eq_('a', key)
    eq_(1, len(vals))
    eq_(True, vals[0])

    key, vals = next(g)
    vals = list(vals)
    eq_('b', key)
    eq_(2, len(vals))
    eq_(True, vals[0])
    eq_(None, vals[1])  # gets padded
示例#4
0
def iterrowreduce(source, key, reducer, header):
    if header is None:
        # output header from source
        header, source = iterpeek(source)
    yield tuple(header)
    for key, rows in rowgroupby(source, key):
        yield tuple(reducer(key, rows))
示例#5
0
def itermergeduplicates(table, key, missing):
    it = iter(table)
    hdr, it = iterpeek(it)
    flds = list(map(text_type, hdr))

    # determine output fields
    if isinstance(key, string_types):
        outhdr = [key]
        keyflds = set([key])
    else:
        outhdr = list(key)
        keyflds = set(key)
    valflds = [f for f in flds if f not in keyflds]
    valfldidxs = [flds.index(f) for f in valflds]
    outhdr.extend(valflds)
    yield tuple(outhdr)

    # do the work
    for k, grp in rowgroupby(it, key):
        grp = list(grp)
        if isinstance(key, string_types):
            outrow = [k]
        else:
            outrow = list(k)
        mergedvals = [set(row[i] for row in grp
                          if len(row) > i and row[i] != missing)
                      for i in valfldidxs]
        normedvals = [vals.pop() if len(vals) == 1
                      else missing if len(vals) == 0
                      else Conflict(vals)
                      for vals in mergedvals]
        outrow.extend(normedvals)
        yield tuple(outrow)
示例#6
0
def itermultiaggregate(source, key, aggregation):
    aggregation = OrderedDict(aggregation.items())  # take a copy
    it = iter(source)
    hdr = next(it)
    # push back header to ensure we iterate only once
    it = itertools.chain([hdr], it)

    # normalise aggregators
    for outfld in aggregation:
        agg = aggregation[outfld]
        if callable(agg):
            aggregation[outfld] = None, agg
        elif isinstance(agg, string_types):
            aggregation[outfld] = agg, list  # list is default
        elif len(agg) == 1 and isinstance(agg[0], string_types):
            aggregation[outfld] = agg[0], list  # list is default
        elif len(agg) == 1 and callable(agg[0]):
            aggregation[outfld] = None, agg[0]  # aggregate whole rows
        elif len(agg) == 2:
            pass  # no need to normalise
        else:
            raise ArgumentError('invalid aggregation: %r, %r' % (outfld, agg))

    # determine output header
    if isinstance(key, (list, tuple)):
        outhdr = list(key)
    elif callable(key):
        outhdr = ['key']
    else:
        outhdr = [key]
    for outfld in aggregation:
        outhdr.append(outfld)
    yield tuple(outhdr)
    
    # generate data
    for k, rows in rowgroupby(it, key):
        rows = list(rows)  # may need to iterate over these more than once
        # handle compound key
        if isinstance(key, (list, tuple)):
            outrow = list(k)
        else:
            outrow = [k]
        for outfld in aggregation:
            srcfld, aggfun = aggregation[outfld]
            if srcfld is None:
                aggval = aggfun(rows)
                outrow.append(aggval)
            elif isinstance(srcfld, (list, tuple)):
                idxs = [hdr.index(f) for f in srcfld]
                valgetter = operator.itemgetter(*idxs)
                vals = (valgetter(row) for row in rows)
                aggval = aggfun(vals)
                outrow.append(aggval)
            else:
                idx = hdr.index(srcfld)
                # try using generator comprehension
                vals = (row[idx] for row in rows)
                aggval = aggfun(vals)
                outrow.append(aggval)
        yield tuple(outrow)
示例#7
0
def iterrowreduce(source, key, reducer, header):
    if header is None:
        # output header from source
        header, source = iterpeek(source)
    yield tuple(header)
    for key, rows in rowgroupby(source, key):
        yield tuple(reducer(key, rows))
示例#8
0
def itermergeduplicates(table, key, missing):
    it = iter(table)
    hdr, it = iterpeek(it)
    flds = list(map(text_type, hdr))

    # determine output fields
    if isinstance(key, string_types):
        outhdr = [key]
        keyflds = set([key])
    else:
        outhdr = list(key)
        keyflds = set(key)
    valflds = [f for f in flds if f not in keyflds]
    valfldidxs = [flds.index(f) for f in valflds]
    outhdr.extend(valflds)
    yield tuple(outhdr)

    # do the work
    for k, grp in rowgroupby(it, key):
        grp = list(grp)
        if isinstance(key, string_types):
            outrow = [k]
        else:
            outrow = list(k)
        mergedvals = [
            set(row[i] for row in grp if len(row) > i and row[i] != missing)
            for i in valfldidxs
        ]
        normedvals = [
            vals.pop() if len(vals) == 1 else
            missing if len(vals) == 0 else Conflict(vals)
            for vals in mergedvals
        ]
        outrow.extend(normedvals)
        yield tuple(outrow)
示例#9
0
def itermultiaggregate(source, key, aggregation):
    aggregation = OrderedDict(aggregation.items())  # take a copy
    it = iter(source)
    hdr = next(it)
    # push back header to ensure we iterate only once
    it = itertools.chain([hdr], it)

    # normalise aggregators
    for outfld in aggregation:
        agg = aggregation[outfld]
        if callable(agg):
            aggregation[outfld] = None, agg
        elif isinstance(agg, string_types):
            aggregation[outfld] = agg, list  # list is default
        elif len(agg) == 1 and isinstance(agg[0], string_types):
            aggregation[outfld] = agg[0], list  # list is default
        elif len(agg) == 1 and callable(agg[0]):
            aggregation[outfld] = None, agg[0]  # aggregate whole rows
        elif len(agg) == 2:
            pass  # no need to normalise
        else:
            raise ArgumentError('invalid aggregation: %r, %r' % (outfld, agg))

    # determine output header
    if isinstance(key, (list, tuple)):
        outhdr = list(key)
    elif callable(key):
        outhdr = ['key']
    else:
        outhdr = [key]
    for outfld in aggregation:
        outhdr.append(outfld)
    yield tuple(outhdr)

    # generate data
    for k, rows in rowgroupby(it, key):
        rows = list(rows)  # may need to iterate over these more than once
        # handle compound key
        if isinstance(key, (list, tuple)):
            outrow = list(k)
        else:
            outrow = [k]
        for outfld in aggregation:
            srcfld, aggfun = aggregation[outfld]
            if srcfld is None:
                aggval = aggfun(rows)
                outrow.append(aggval)
            elif isinstance(srcfld, (list, tuple)):
                idxs = [hdr.index(f) for f in srcfld]
                valgetter = operator.itemgetter(*idxs)
                vals = (valgetter(row) for row in rows)
                aggval = aggfun(vals)
                outrow.append(aggval)
            else:
                idx = hdr.index(srcfld)
                # try using generator comprehension
                vals = (row[idx] for row in rows)
                aggval = aggfun(vals)
                outrow.append(aggval)
        yield tuple(outrow)
示例#10
0
def itersimpleaggregate(table, key, aggregation, value):

    # special case counting
    if aggregation == len:
        aggregation = lambda g: sum(1 for _ in g)  # count length of iterable

    # determine output header
    if isinstance(key, (list, tuple)):
        outhdr = tuple(key) + ('value', )
    elif callable(key):
        outhdr = ('key', 'value')
    else:
        outhdr = (key, 'value')
    yield outhdr

    # generate data
    if isinstance(key, (list, tuple)):
        for k, grp in rowgroupby(table, key, value):
            yield tuple(k) + (aggregation(grp), )
    else:
        for k, grp in rowgroupby(table, key, value):
            yield k, aggregation(grp)
示例#11
0
def itersimpleaggregate(table, key, aggregation, value):

    # special case counting
    if aggregation == len:
        aggregation = lambda g: sum(1 for _ in g)  # count length of iterable

    # determine output header
    if isinstance(key, (list, tuple)):
        outhdr = tuple(key) + ('value',)
    elif callable(key):
        outhdr = ('key', 'value')
    else:
        outhdr = (key, 'value')
    yield outhdr

    # generate data
    if isinstance(key, (list, tuple)):
        for k, grp in rowgroupby(table, key, value):
            yield tuple(k) + (aggregation(grp),)
    else:
        for k, grp in rowgroupby(table, key, value):
            yield k, aggregation(grp)
示例#12
0
文件: joins.py 项目: Mgutjahr/petl
 def __iter__(self):
     it = iter(self.table)
     hdr = next(it)
     table = itertools.chain([hdr], it)
     value = self.value
     vidx = hdr.index(value)
     outhdr = list(hdr)
     outhdr[vidx] = '%s_id' % value
     yield tuple(outhdr)
     offset, multiplier = self.autoincrement
     for n, (_, group) in enumerate(rowgroupby(table, value)):
         for row in group:
             outrow = list(row)
             outrow[vidx] = (n * multiplier) + offset
             yield tuple(outrow)
示例#13
0
 def __iter__(self):
     it = iter(self.table)
     hdr = next(it)
     table = itertools.chain([hdr], it)
     value = self.value
     vidx = hdr.index(value)
     outhdr = list(hdr)
     outhdr[vidx] = '%s_id' % value
     yield tuple(outhdr)
     offset, multiplier = self.autoincrement
     for n, (_, group) in enumerate(rowgroupby(table, value)):
         for row in group:
             outrow = list(row)
             outrow[vidx] = (n * multiplier) + offset
             yield tuple(outrow)
示例#14
0
文件: intervals.py 项目: DeanWay/petl
def collapsedintervals(table, start='start', stop='stop', key=None):
    """
    Utility function to collapse intervals in a table. 
    
    If no facet `key` is given, returns an iterator over `(start, stop)` tuples.
    
    If facet `key` is given, returns an iterator over `(key, start, stop)`
    tuples.
    
    """
    
    if key is None:
        table = sort(table, key=start)
        for iv in _collapse(values(table, (start, stop))):
            yield iv
    else:
        table = sort(table, key=(key, start))
        for k, g in rowgroupby(table, key=key, value=(start, stop)):
            for iv in _collapse(g):
                yield (k,) + iv
示例#15
0
def collapsedintervals(table, start='start', stop='stop', key=None):
    """
    Utility function to collapse intervals in a table. 
    
    If no facet `key` is given, returns an iterator over `(start, stop)` tuples.
    
    If facet `key` is given, returns an iterator over `(key, start, stop)`
    tuples.
    
    """

    if key is None:
        table = sort(table, key=start)
        for iv in _collapse(values(table, (start, stop))):
            yield iv
    else:
        table = sort(table, key=(key, start))
        for k, g in rowgroupby(table, key=key, value=(start, stop)):
            for iv in _collapse(g):
                yield (k, ) + iv
示例#16
0
def iterfold(table, key, f, value):
    yield ('key', 'value')
    for k, grp in rowgroupby(table, key, value):
        yield k, reduce(f, grp)
示例#17
0
文件: maps.py 项目: DeanWay/petl
def iterrowgroupmap(source, key, mapper, header):
    yield tuple(header)
    for key, rows in rowgroupby(source, key):
        for row in mapper(key, rows):
            yield row
示例#18
0
def iterfold(table, key, f, value):
    yield ('key', 'value')
    for k, grp in rowgroupby(table, key, value):
        yield k, reduce(f, grp)
示例#19
0
def iterrowgroupmap(source, key, mapper, header):
    yield tuple(header)
    for key, rows in rowgroupby(source, key):
        for row in mapper(key, rows):
            yield row
示例#20
0
 def __iter__(self):
     offset, multiplier = self.autoincrement
     yield ('id', self.value)
     for n, (v, _) in enumerate(rowgroupby(self.table, self.value)):
         yield ((n * multiplier) + offset, v)
示例#21
0
文件: joins.py 项目: Mgutjahr/petl
 def __iter__(self):
     offset, multiplier = self.autoincrement
     yield ('id', self.value)
     for n, (v, _) in enumerate(rowgroupby(self.table, self.value)):
         yield ((n * multiplier) + offset, v)