def selectrangeclosed(table, field, minv, maxv, complement=False): """Select rows where the given field is greater than `minv` and less than `maxv`.""" minv = Comparable(minv) maxv = Comparable(maxv) return select(table, field, lambda v: minv < Comparable(v) < maxv, complement=complement)
def selectrangeopen(table, field, minv, maxv, complement=False): """Select rows where the given field is greater than or equal to `minv` and less than or equal to `maxv`.""" minv = Comparable(minv) maxv = Comparable(maxv) return select(table, field, lambda v: minv <= v <= maxv, complement=complement)
def iterantijoin(left, right, lkey, rkey): lit = iter(left) rit = iter(right) lhdr = next(lit) rhdr = next(rit) yield tuple(lhdr) # determine indices of the key fields in left and right tables lkind = asindices(lhdr, lkey) rkind = asindices(rhdr, rkey) # construct functions to extract key values from both tables lgetk = comparable_itemgetter(*lkind) rgetk = comparable_itemgetter(*rkind) # construct group iterators for both tables lgit = itertools.groupby(lit, key=lgetk) rgit = itertools.groupby(rit, key=rgetk) lrowgrp = [] # loop until *either* of the iterators is exhausted lkval, rkval = Comparable(None), Comparable(None) try: # pick off initial row groups lkval, lrowgrp = next(lgit) rkval, _ = next(rgit) while True: if lkval < rkval: for row in lrowgrp: yield tuple(row) # advance left lkval, lrowgrp = next(lgit) elif lkval > rkval: # advance right rkval, _ = next(rgit) else: # advance both lkval, lrowgrp = next(lgit) rkval, _ = next(rgit) except StopIteration: pass # any left over? if lkval > rkval: # yield anything that got left hanging for row in lrowgrp: yield tuple(row) # and the rest... for lkval, lrowgrp in lgit: for row in lrowgrp: yield tuple(row)
def itercomplement(ta, tb): # coerce rows to tuples to ensure hashable and comparable ita = (tuple(row) for row in iter(ta)) itb = (tuple(row) for row in iter(tb)) aflds = tuple(str(f) for f in next(ita)) next(itb) # ignore b fields yield aflds try: a = next(ita) except StopIteration: debug('a is empty, nothing to yield') pass else: try: b = next(itb) except StopIteration: debug('b is empty, just iterate through a') yield a for row in ita: yield row else: # we want the elements in a that are not in b while True: debug('current rows: %r %r', a, b) if b is None or Comparable(a) < Comparable(b): yield a debug('advance a') try: a = next(ita) except StopIteration: break elif a == b: debug('advance both') try: a = next(ita) except StopIteration: break try: b = next(itb) except StopIteration: b = None else: debug('advance b') try: b = next(itb) except StopIteration: b = None
def itercomplement(ta, tb, strict): # coerce rows to tuples to ensure hashable and comparable ita = (tuple(row) for row in iter(ta)) itb = (tuple(row) for row in iter(tb)) ahdr = tuple(next(ita)) next(itb) # ignore b fields yield ahdr try: a = next(ita) except StopIteration: pass else: try: b = next(itb) except StopIteration: yield a for row in ita: yield row else: # we want the elements in a that are not in b while True: if b is None or Comparable(a) < Comparable(b): yield a try: a = next(ita) except StopIteration: break elif a == b: try: a = next(ita) except StopIteration: break if not strict: try: b = next(itb) except StopIteration: b = None else: try: b = next(itb) except StopIteration: b = None
def iterintersection(a, b): ita = iter(a) itb = iter(b) ahdr = next(ita) next(itb) # ignore b header yield tuple(ahdr) try: a = tuple(next(ita)) b = tuple(next(itb)) while True: if Comparable(a) < Comparable(b): a = tuple(next(ita)) elif a == b: yield a a = tuple(next(ita)) b = tuple(next(itb)) else: b = tuple(next(itb)) except StopIteration: pass
def test_select(): table = (('foo', 'bar', 'baz'), ('a', 4, 9.3), ('a', 2, 88.2), ('b', 1, 23.3), ('c', 8, 42.0), ('d', 7, 100.9), ('c', 2)) actual = select(table, lambda rec: rec[0] == 'a') expect = (('foo', 'bar', 'baz'), ('a', 4, 9.3), ('a', 2, 88.2)) ieq(expect, actual) ieq(expect, actual) # check can iterate twice table = (('foo', 'bar', 'baz'), ('a', 4, 9.3), ('a', 2, 88.2), ('b', 1, 23.3), ('c', 8, 42.0), ('d', 7, 100.9), ('c', 2)) actual = select(table, lambda rec: rec['foo'] == 'a') expect = (('foo', 'bar', 'baz'), ('a', 4, 9.3), ('a', 2, 88.2)) ieq(expect, actual) ieq(expect, actual) # check can iterate twice table = (('foo', 'bar', 'baz'), ('a', 4, 9.3), ('a', 2, 88.2), ('b', 1, 23.3), ('c', 8, 42.0), ('d', 7, 100.9), ('c', 2)) actual = select(table, lambda rec: rec.foo == 'a') expect = (('foo', 'bar', 'baz'), ('a', 4, 9.3), ('a', 2, 88.2)) ieq(expect, actual) ieq(expect, actual) # check can iterate twice # check select complement actual = select(table, lambda rec: rec['foo'] == 'a', complement=True) expect = (('foo', 'bar', 'baz'), ('b', 1, 23.3), ('c', 8, 42.0), ('d', 7, 100.9), ('c', 2)) ieq(expect, actual) ieq(expect, actual) # check can iterate twice actual = select(table, lambda rec: rec['foo'] == 'a' and rec['bar'] > 3) expect = (('foo', 'bar', 'baz'), ('a', 4, 9.3)) ieq(expect, actual) actual = select(table, "{foo} == 'a'") expect = (('foo', 'bar', 'baz'), ('a', 4, 9.3), ('a', 2, 88.2)) ieq(expect, actual) actual = select(table, "{foo} == 'a' and {bar} > 3") expect = (('foo', 'bar', 'baz'), ('a', 4, 9.3)) ieq(expect, actual) # check error handling on short rows actual = select(table, lambda rec: Comparable(rec['baz']) > 88.1) expect = (('foo', 'bar', 'baz'), ('a', 2, 88.2), ('d', 7, 100.9)) ieq(expect, actual) # check single field tests actual = select(table, 'foo', lambda v: v == 'a') expect = (('foo', 'bar', 'baz'), ('a', 4, 9.3), ('a', 2, 88.2)) ieq(expect, actual) ieq(expect, actual) # check can iterate twice # check select complement actual = select(table, 'foo', lambda v: v == 'a', complement=True) expect = (('foo', 'bar', 'baz'), ('b', 1, 23.3), ('c', 8, 42.0), ('d', 7, 100.9), ('c', 2)) ieq(expect, actual) ieq(expect, actual) # check can iterate twice
def iterjoin(left, right, lkey, rkey, leftouter=False, rightouter=False, missing=None, lprefix=None, rprefix=None): lit = iter(left) rit = iter(right) lhdr = next(lit) rhdr = next(rit) # determine indices of the key fields in left and right tables lkind = asindices(lhdr, lkey) rkind = asindices(rhdr, rkey) # construct functions to extract key values from both tables lgetk = comparable_itemgetter(*lkind) rgetk = comparable_itemgetter(*rkind) # determine indices of non-key fields in the right table # (in the output, we only include key fields from the left table - we # don't want to duplicate fields) rvind = [i for i in range(len(rhdr)) if i not in rkind] rgetv = rowgetter(*rvind) # determine the output fields if lprefix is None: outhdr = list(lhdr) else: outhdr = [(text_type(lprefix) + text_type(f)) for f in lhdr] if rprefix is None: outhdr.extend(rgetv(rhdr)) else: outhdr.extend([(text_type(rprefix) + text_type(f)) for f in rgetv(rhdr)]) yield tuple(outhdr) # define a function to join two groups of rows def joinrows(_lrowgrp, _rrowgrp): if _rrowgrp is None: for lrow in _lrowgrp: outrow = list(lrow) # start with the left row # extend with missing values in place of the right row outrow.extend([missing] * len(rvind)) yield tuple(outrow) elif _lrowgrp is None: for rrow in _rrowgrp: # start with missing values in place of the left row outrow = [missing] * len(lhdr) # set key values for li, ri in zip(lkind, rkind): outrow[li] = rrow[ri] # extend with non-key values from the right row outrow.extend(rgetv(rrow)) yield tuple(outrow) else: _rrowgrp = list(_rrowgrp) # may need to iterate more than once for lrow in _lrowgrp: for rrow in _rrowgrp: # start with the left row outrow = list(lrow) # extend with non-key values from the right row outrow.extend(rgetv(rrow)) yield tuple(outrow) # construct group iterators for both tables lgit = itertools.groupby(lit, key=lgetk) rgit = itertools.groupby(rit, key=rgetk) lrowgrp = [] rrowgrp = [] # loop until *either* of the iterators is exhausted # initialise here to handle empty tables lkval, rkval = Comparable(None), Comparable(None) try: # pick off initial row groups lkval, lrowgrp = next(lgit) rkval, rrowgrp = next(rgit) while True: if lkval < rkval: if leftouter: for row in joinrows(lrowgrp, None): yield tuple(row) # advance left lkval, lrowgrp = next(lgit) elif lkval > rkval: if rightouter: for row in joinrows(None, rrowgrp): yield tuple(row) # advance right rkval, rrowgrp = next(rgit) else: for row in joinrows(lrowgrp, rrowgrp): yield tuple(row) # advance both lkval, lrowgrp = next(lgit) rkval, rrowgrp = next(rgit) except StopIteration: pass # make sure any left rows remaining are yielded if leftouter: if lkval > rkval: # yield anything that got left hanging for row in joinrows(lrowgrp, None): yield tuple(row) # yield the rest for lkval, lrowgrp in lgit: for row in joinrows(lrowgrp, None): yield tuple(row) # make sure any right rows remaining are yielded if rightouter: if lkval < rkval: # yield anything that got left hanging for row in joinrows(None, rrowgrp): yield tuple(row) # yield the rest for rkval, rrowgrp in rgit: for row in joinrows(None, rrowgrp): yield tuple(row)
def selectge(table, field, value, complement=False): """Select rows where the given field is greater than or equal to the given value.""" value = Comparable(value) return selectop(table, field, value, operator.ge, complement=complement)
def selectlt(table, field, value, complement=False): """Select rows where the given field is less than the given value.""" value = Comparable(value) return selectop(table, field, value, operator.lt, complement=complement)