def test_intervalantijoin_faceted(): left = (('fruit', 'begin', 'end'), ('apple', 1, 2), ('apple', 2, 4), ('apple', 2, 5), ('orange', 2, 5), ('orange', 9, 14), ('orange', 19, 140), ('apple', 1, 1), ('apple', 2, 2), ('apple', 4, 4), ('apple', 5, 5), ('orange', 5, 5)) right = (('type', 'start', 'stop', 'value'), ('apple', 1, 4, 'foo'), ('apple', 3, 7, 'bar'), ('orange', 4, 9, 'baz')) expect = (('fruit', 'begin', 'end'), ('orange', 9, 14), ('orange', 19, 140), ('apple', 1, 1), ('apple', 2, 2), ('apple', 4, 4), ('apple', 5, 5), ('orange', 5, 5)) actual = intervalantijoin(left, right, lstart='begin', lstop='end', rstart='start', rstop='stop', lkey='fruit', rkey='type') ieq(expect, actual) ieq(expect, actual)
def test_intervaljoin_prefixes(): left = (('begin', 'end', 'quux'), (1, 2, 'a'), (2, 4, 'b'), (2, 5, 'c'), (9, 14, 'd'), (9, 140, 'e'), (1, 1, 'f'), (2, 2, 'g'), (4, 4, 'h'), (5, 5, 'i'), (1, 8, 'j')) right = (('start', 'stop', 'value'), (1, 4, 'foo'), (3, 7, 'bar'), (4, 9, 'baz')) actual = intervaljoin(left, right, lstart='begin', lstop='end', rstart='start', rstop='stop', lprefix='l_', rprefix='r_') expect = (('l_begin', 'l_end', 'l_quux', 'r_start', 'r_stop', 'r_value'), (1, 2, 'a', 1, 4, 'foo'), (2, 4, 'b', 1, 4, 'foo'), (2, 4, 'b', 3, 7, 'bar'), (2, 5, 'c', 1, 4, 'foo'), (2, 5, 'c', 3, 7, 'bar'), (2, 5, 'c', 4, 9, 'baz'), (1, 8, 'j', 1, 4, 'foo'), (1, 8, 'j', 3, 7, 'bar'), (1, 8, 'j', 4, 9, 'baz')) ieq(expect, actual) ieq(expect, actual)
def test_intervalantijoin_include_stop(): left = (('begin', 'end', 'quux'), (1, 2, 'a'), (2, 4, 'b'), (2, 5, 'c'), (9, 14, 'd'), (9, 140, 'e'), (10, 140, 'e'), (1, 1, 'f'), (2, 2, 'g'), (4, 4, 'h'), (5, 5, 'i'), (1, 8, 'j')) right = (('start', 'stop', 'value'), (1, 4, 'foo'), (3, 7, 'bar'), (4, 9, 'baz')) actual = intervalantijoin(left, right, lstart='begin', lstop='end', rstart='start', rstop='stop', include_stop=True) expect = (('begin', 'end', 'quux'), (10, 140, 'e')) debug(lookall(actual)) ieq(expect, actual) ieq(expect, actual)
def test_integration(): left = etl.wrap((('begin', 'end', 'quux'), (1, 2, 'a'), (2, 4, 'b'), (2, 5, 'c'), (9, 14, 'd'), (9, 140, 'e'), (1, 1, 'f'), (2, 2, 'g'), (4, 4, 'h'), (5, 5, 'i'), (1, 8, 'j'))) right = etl.wrap((('start', 'stop', 'value'), (1, 4, 'foo'), (3, 7, 'bar'), (4, 9, 'baz'))) actual = left.intervaljoin(right, lstart='begin', lstop='end', rstart='start', rstop='stop') expect = (('begin', 'end', 'quux', 'start', 'stop', 'value'), (1, 2, 'a', 1, 4, 'foo'), (2, 4, 'b', 1, 4, 'foo'), (2, 4, 'b', 3, 7, 'bar'), (2, 5, 'c', 1, 4, 'foo'), (2, 5, 'c', 3, 7, 'bar'), (2, 5, 'c', 4, 9, 'baz'), (1, 8, 'j', 1, 4, 'foo'), (1, 8, 'j', 3, 7, 'bar'), (1, 8, 'j', 4, 9, 'baz')) ieq(expect, actual) ieq(expect, actual)
def test_operator_overload(): table = ( ("foo", "bar", "baz"), ("A", 1, 2), ("B", "2", "3.4"), ("D", "xyz", 9.0), ("B", u"3", u"7.8", True), ("B", "2", 42), ("E", None), ("D", 4, 12.3), ) fn1 = NamedTemporaryFile().name p = sort("foo") p | duplicates("foo") | topickle(fn1) p.push(table) expectation = ( ("foo", "bar", "baz"), ("B", "2", "3.4"), ("B", u"3", u"7.8", True), ("B", "2", 42), ("D", "xyz", 9.0), ("D", 4, 12.3), ) ieq(expectation, frompickle(fn1))
def test_unique(): table = ( ("foo", "bar", "baz"), ("A", 1, 2), ("B", "2", "3.4"), ("D", "xyz", 9.0), ("B", u"3", u"7.8", True), ("B", "2", 42), ("E", None), ("D", 4, 12.3), ) fn1 = NamedTemporaryFile().name fn2 = NamedTemporaryFile().name p = sort("foo") q = p.pipe(unique("foo")) q.pipe(topickle(fn1)) q.pipe("remainder", topickle(fn2)) p.push(table) expectation = (("foo", "bar", "baz"), ("A", 1, 2), ("E", None)) ieq(expectation, frompickle(fn1)) exremainder = ( ("foo", "bar", "baz"), ("B", "2", "3.4"), ("B", u"3", u"7.8", True), ("B", "2", 42), ("D", "xyz", 9.0), ("D", 4, 12.3), ) ieq(exremainder, frompickle(fn2))
def test_constraints(): constraints = [ dict(name='C1', field='foo', test=int), dict(name='C2', field='bar', test=etl.dateparser('%Y-%m-%d')), dict(name='C3', field='baz', assertion=lambda v: v in ['Y', 'N']), dict(name='C4', assertion=lambda row: None not in row) ] table = (('foo', 'bar', 'baz'), (1, '2000-01-01', 'Y'), ('x', '2010-10-10', 'N'), (2, '2000/01/01', 'Y'), (3, '2015-12-12', 'x'), (4, None, 'N'), ('y', '1999-99-99', 'z')) expect = (('name', 'row', 'field', 'value', 'error'), ('C1', 2, 'foo', 'x', 'ValueError'), ('C2', 3, 'bar', '2000/01/01', 'ValueError'), ('C3', 4, 'baz', 'x', 'AssertionError'), ('C2', 5, 'bar', None, 'AttributeError'), ('C4', 5, None, None, 'AssertionError'), ('C1', 6, 'foo', 'y', 'ValueError'), ('C2', 6, 'bar', '1999-99-99', 'ValueError'), ('C3', 6, 'baz', 'z', 'AssertionError')) actual = validate(table, constraints) debug(actual) ieq(expect, actual) ieq(expect, actual)
def test_duplicates(): table = (('foo', 'bar', 'baz'), ('A', 1, 2), ('B', '2', '3.4'), ('D', 'xyz', 9.0), ('B', u'3', u'7.8', True), ('B', '2', 42), ('E', None), ('D', 4, 12.3)) result = duplicates(table, 'foo') expectation = (('foo', 'bar', 'baz'), ('B', '2', '3.4'), ('B', u'3', u'7.8', True), ('B', '2', 42), ('D', 'xyz', 9.0), ('D', 4, 12.3)) ieq(expectation, result) # test with compound key result = duplicates(table, key=('foo', 'bar')) expectation = (('foo', 'bar', 'baz'), ('B', '2', '3.4'), ('B', '2', 42)) ieq(expectation, result)
def test_fromhdf5sorted(): f = NamedTemporaryFile() # set up a new hdf5 table to work with h5file = tables.open_file(f.name, mode='w', title='Test file') h5file.create_group('/', 'testgroup', 'Test Group') h5table = h5file.create_table('/testgroup', 'testtable', FooBar, 'Test Table') # load some data into the table table1 = (('foo', 'bar'), (3, b'asdfgh'), (2, b'qwerty'), (1, b'zxcvbn')) for row in table1[1:]: for i, f in enumerate(table1[0]): h5table.row[f] = row[i] h5table.row.append() h5table.cols.foo.create_csindex() h5file.flush() # verify we can get the data back out table2 = fromhdf5sorted(h5table, sortby='foo') ieq(sort(table1, 'foo'), table2) ieq(sort(table1, 'foo'), table2) # clean up h5file.close()
def _test_rightjoin_multiple(rightjoin_impl): table1 = (('id', 'color', 'cost'), (1, 'blue', 12), (1, 'red', 8), (2, 'yellow', 15), (2, 'orange', 5), (3, 'purple', 4), (4, 'chartreuse', 42)) table2 = (('id', 'shape', 'size'), (1, 'circle', 'big'), (2, 'square', 'tiny'), (2, 'square', 'big'), (3, 'ellipse', 'small'), (3, 'ellipse', 'tiny'), (5, 'didodecahedron', 3.14159265)) actual = rightjoin_impl(table1, table2, key='id') expect = (('id', 'color', 'cost', 'shape', 'size'), (1, 'blue', 12, 'circle', 'big'), (1, 'red', 8, 'circle', 'big'), (2, 'yellow', 15, 'square', 'tiny'), (2, 'yellow', 15, 'square', 'big'), (2, 'orange', 5, 'square', 'tiny'), (2, 'orange', 5, 'square', 'big'), (3, 'purple', 4, 'ellipse', 'small'), (3, 'purple', 4, 'ellipse', 'tiny'), (5, None, None, 'didodecahedron', 3.14159265)) # N.B., need to sort because hash and sort implementations will return # rows in a different order ieq(sort(expect), sort(actual))
def test_fromdb_mkcursor(): # initial data data = (("a", 1), ("b", 2), ("c", 2.0)) connection = sqlite3.connect(":memory:") c = connection.cursor() c.execute("create table foobar (foo, bar)") for row in data: c.execute("insert into foobar values (?, ?)", row) connection.commit() c.close() # test the function mkcursor = lambda: connection.cursor() actual = fromdb(mkcursor, "select * from foobar") expect = (("foo", "bar"), ("a", 1), ("b", 2), ("c", 2.0)) ieq(expect, actual) ieq(expect, actual) # verify can iterate twice # test iterators are isolated i1 = iter(actual) i2 = iter(actual) eq_(("foo", "bar"), next(i1)) eq_(("a", 1), next(i1)) eq_(("foo", "bar"), next(i2)) eq_(("b", 2), next(i1))
def test_outerjoin_multiple(): table1 = (('id', 'color', 'cost'), (1, 'blue', 12), (1, 'red', 8), (2, 'yellow', 15), (2, 'orange', 5), (3, 'purple', 4), (4, 'chartreuse', 42)) table2 = (('id', 'shape', 'size'), (1, 'circle', 'big'), (2, 'square', 'tiny'), (2, 'square', 'big'), (3, 'ellipse', 'small'), (3, 'ellipse', 'tiny'), (5, 'didodecahedron', 3.14159265)) actual = outerjoin(table1, table2, key='id') expect = (('id', 'color', 'cost', 'shape', 'size'), (1, 'blue', 12, 'circle', 'big'), (1, 'red', 8, 'circle', 'big'), (2, 'yellow', 15, 'square', 'tiny'), (2, 'yellow', 15, 'square', 'big'), (2, 'orange', 5, 'square', 'tiny'), (2, 'orange', 5, 'square', 'big'), (3, 'purple', 4, 'ellipse', 'small'), (3, 'purple', 4, 'ellipse', 'tiny'), (4, 'chartreuse', 42, None, None), (5, None, None, 'didodecahedron', 3.14159265)) ieq(expect, actual)
def test_todb_appenddb_cursor(): f = NamedTemporaryFile(delete=False) conn = sqlite3.connect(f.name) conn.execute("create table foobar (foo, bar)") conn.commit() # exercise function table = (("foo", "bar"), ("a", 1), ("b", 2), ("c", 2)) cursor = conn.cursor() todb(table, cursor, "foobar") # check what it did actual = conn.execute("select * from foobar") expect = (("a", 1), ("b", 2), ("c", 2)) ieq(expect, actual) # try appending table2 = (("foo", "bar"), ("d", 7), ("e", 9), ("f", 1)) appenddb(table2, cursor, "foobar") # check what it did actual = conn.execute("select * from foobar") expect = (("a", 1), ("b", 2), ("c", 2), ("d", 7), ("e", 9), ("f", 1)) ieq(expect, actual)
def test_addfieldusingcontext_stateful(): table1 = (('foo', 'bar'), ('A', 1), ('B', 4), ('C', 5), ('D', 9)) expect = (('foo', 'bar', 'baz', 'quux'), ('A', 1, 1, 5), ('B', 4, 5, 10), ('C', 5, 10, 19), ('D', 9, 19, 19)) def upstream(prv, cur, nxt): if prv is None: return cur.bar else: return cur.bar + prv.baz def downstream(prv, cur, nxt): if nxt is None: return prv.quux elif prv is None: return nxt.bar + cur.bar else: return nxt.bar + prv.quux table2 = addfieldusingcontext(table1, 'baz', upstream) table3 = addfieldusingcontext(table2, 'quux', downstream) ieq(expect, table3) ieq(expect, table3)
def test_addfieldusingcontext(): table1 = (('foo', 'bar'), ('A', 1), ('B', 4), ('C', 5), ('D', 9)) expect = (('foo', 'bar', 'baz', 'quux'), ('A', 1, None, 3), ('B', 4, 3, 1), ('C', 5, 1, 4), ('D', 9, 4, None)) def upstream(prv, cur, nxt): if prv is None: return None else: return cur.bar - prv.bar def downstream(prv, cur, nxt): if nxt is None: return None else: return nxt.bar - cur.bar table2 = addfieldusingcontext(table1, 'baz', upstream) table3 = addfieldusingcontext(table2, 'quux', downstream) ieq(expect, table3) ieq(expect, table3)
def test_cat_dupfields(): table1 = (('foo', 'foo'), (1, 'A'), (2,), (3, 'B', True)) # these cases are pathological, including to confirm expected behaviour, # but user needs to rename fields to get something sensible actual = cat(table1) expect = (('foo', 'foo'), (1, 1), (2, 2), (3, 3)) ieq(expect, actual) table2 = (('foo', 'foo', 'bar'), (4, 'C', True), (5, 'D', False)) actual = cat(table1, table2) expect = (('foo', 'foo', 'bar'), (1, 1, None), (2, 2, None), (3, 3, None), (4, 4, True), (5, 5, False)) ieq(expect, actual)
def test_rowreduce_empty(): table = (('foo', 'bar'),) expect = (('foo', 'bar'),) reducer = lambda key, rows: (key, [r[0] for r in rows]) actual = rowreduce(table, key='foo', reducer=reducer, header=('foo', 'bar')) ieq(expect, actual)
def test_valuecounts(): table = (('foo', 'bar'), ('a', 1), ('b', 2), ('b', 7)) actual = valuecounts(table, 'foo') expect = (('foo', 'count', 'frequency'), ('b', 2, 2./3), ('a', 1, 1./3)) ieq(expect, actual) ieq(expect, actual)
def test_fold(): t1 = (('id', 'count'), (1, 3), (1, 5), (2, 4), (2, 8)) t2 = fold(t1, 'id', operator.add, 'count', presorted=True) expect = (('key', 'value'), (1, 8), (2, 12)) ieq(expect, t2) ieq(expect, t2)
def test_fromcsv_gz(): data = [b'foo,bar', b'a,1', b'b,2', b'c,2'] expect = (('foo', 'bar'), ('a', '1'), ('b', '2'), ('c', '2')) # '\r' not supported in PY2 because universal newline mode is # not supported by gzip module if PY2: lts = b'\n', b'\r\n' else: lts = b'\r', b'\n', b'\r\n' for lt in lts: f = NamedTemporaryFile(delete=False) f.close() fn = f.name + '.gz' os.rename(f.name, fn) fz = gzip.open(fn, 'wb') fz.write(lt.join(data)) fz.close() actual = fromcsv(fn, encoding='ascii') ieq(expect, actual) ieq(expect, actual) # verify can iterate twice
def test_recordmapmany(): table = (('id', 'sex', 'age', 'height', 'weight'), (1, 'male', 16, 1.45, 62.0), (2, 'female', 19, 1.34, 55.4), (3, '-', 17, 1.78, 74.4), (4, 'male', 21, 1.33)) def rowgenerator(rec): transmf = {'male': 'M', 'female': 'F'} yield [rec['id'], 'gender', transmf[rec['sex']] if rec['sex'] in transmf else rec['sex']] yield [rec['id'], 'age_months', rec['age'] * 12] yield [rec['id'], 'bmi', rec['weight'] / rec['height'] ** 2] actual = rowmapmany(table, rowgenerator, header=['subject_id', 'variable', 'value']) expect = (('subject_id', 'variable', 'value'), (1, 'gender', 'M'), (1, 'age_months', 16 * 12), (1, 'bmi', 62.0 / 1.45 ** 2), (2, 'gender', 'F'), (2, 'age_months', 19 * 12), (2, 'bmi', 55.4 / 1.34 ** 2), (3, 'gender', '-'), (3, 'age_months', 17 * 12), (3, 'bmi', 74.4 / 1.78 ** 2), (4, 'gender', 'M'), (4, 'age_months', 21 * 12)) ieq(expect, actual) ieq(expect, actual) # can iteratate twice?
def test_unflatten(): table1 = (('lines',), ('A',), (1,), (True,), ('C',), (7,), (False,), ('B',), (2,), (False,), ('C',), (9,)) expect1 = (('f0', 'f1', 'f2'), ('A', 1, True), ('C', 7, False), ('B', 2, False), ('C', 9, None)) actual1 = unflatten(table1, 'lines', 3) ieq(expect1, actual1) ieq(expect1, actual1)
def _test_complement_2(complement_impl): tablea = (('foo', 'bar', 'baz'), ('A', 1, True), ('C', 7, False), ('B', 2, False), ('C', 9, True)) tableb = (('x', 'y', 'z'), ('B', 2, False), ('A', 9, False), ('B', 3, True), ('C', 9, True)) aminusb = (('foo', 'bar', 'baz'), ('A', 1, True), ('C', 7, False)) result = complement_impl(tablea, tableb) ieq(aminusb, result) bminusa = (('x', 'y', 'z'), ('A', 9, False), ('B', 3, True)) result = complement_impl(tableb, tablea) ieq(bminusa, result)
def test_stringsource(): tbl1 = (('foo', 'bar'), ('a', '1'), ('b', '2'), ('c', '2')) # test writing to a string buffer ss = StringSource() etl.tocsv(tbl1, ss) expect = "foo,bar\r\na,1\r\nb,2\r\nc,2\r\n" if not PY2: expect = expect.encode('ascii') actual = ss.getvalue() eq_(expect, actual) # test reading from a string buffer tbl2 = etl.fromcsv(StringSource(actual)) ieq(tbl1, tbl2) ieq(tbl1, tbl2) # test appending etl.appendcsv(tbl1, ss) actual = ss.getvalue() expect = "foo,bar\r\na,1\r\nb,2\r\nc,2\r\na,1\r\nb,2\r\nc,2\r\n" if not PY2: expect = expect.encode('ascii') eq_(expect, actual)
def test_recordcomplement_2(): tablea = (('foo', 'bar', 'baz'), ('A', 1, True), ('C', 7, False), ('B', 2, False), ('C', 9, True)) tableb = (('bar', 'foo', 'baz'), (2, 'B', False), (9, 'A', False), (3, 'B', True), (9, 'C', True)) aminusb = (('foo', 'bar', 'baz'), ('A', 1, True), ('C', 7, False)) result = recordcomplement(tablea, tableb) ieq(aminusb, result) bminusa = (('bar', 'foo', 'baz'), (3, 'B', True), (9, 'A', False)) result = recordcomplement(tableb, tablea) ieq(bminusa, result)
def test_recorddiff(): tablea = (('foo', 'bar', 'baz'), ('A', 1, True), ('C', 7, False), ('B', 2, False), ('C', 9, True)) tableb = (('bar', 'foo', 'baz'), (2, 'B', False), (9, 'A', False), (3, 'B', True), (9, 'C', True)) aminusb = (('foo', 'bar', 'baz'), ('A', 1, True), ('C', 7, False)) bminusa = (('bar', 'foo', 'baz'), (3, 'B', True), (9, 'A', False)) added, subtracted = recorddiff(tablea, tableb) ieq(aminusb, subtracted) ieq(bminusa, added)
def test_diff(): tablea = (('foo', 'bar', 'baz'), ('A', 1, True), ('C', 7, False), ('B', 2, False), ('C', 9, True)) tableb = (('x', 'y', 'z'), ('B', 2, False), ('A', 9, False), ('B', 3, True), ('C', 9, True)) aminusb = (('foo', 'bar', 'baz'), ('A', 1, True), ('C', 7, False)) bminusa = (('x', 'y', 'z'), ('A', 9, False), ('B', 3, True)) added, subtracted = diff(tablea, tableb) ieq(bminusa, added) ieq(aminusb, subtracted)
def test_hashcomplement_seqtypes(): # test complement isn't confused by list vs tuple ta = [['a', 'b'], ['A', 1], ['B', 2]] tb = [('a', 'b'), ('A', 1), ('B', 2)] expectation = (('a', 'b'),) actual = hashcomplement(ta, tb) ieq(expectation, actual)
def test_itervalues(): table = (('foo', 'bar', 'baz'), ('a', 1, True), ('b', 2), ('b', 7, False)) actual = itervalues(table, 'foo') expect = ('a', 'b', 'b') ieq(expect, actual) actual = itervalues(table, 'bar') expect = (1, 2, 7) ieq(expect, actual) actual = itervalues(table, ('foo', 'bar')) expect = (('a', 1), ('b', 2), ('b', 7)) ieq(expect, actual) actual = itervalues(table, 'baz') expect = (True, None, False) ieq(expect, actual) actual = itervalues(table, ('foo', 'baz')) expect = (('a', True), ('b', None), ('b', False)) ieq(expect, actual)
def test_select_falsey(): table = (('foo',), ([],), ('',)) expect = (('foo',),) actual = select(table, '{foo}') ieq(expect, actual)
def test_skipcomments(): table1 = (('##aaa', 'bbb', 'ccc'), ('##mmm', ), ('#foo', 'bar'), ('##nnn', 1), ('a', 1), ('b', 2)) table2 = skipcomments(table1, '##') expect2 = (('#foo', 'bar'), ('a', 1), ('b', 2)) ieq(expect2, table2) ieq(expect2, table2) # can iterate twice?
def test_integration(): tbl = (('foo', 'bar'), ('A', 1), ('B', 2), ('C', 2), (u'é', datetime(2012, 1, 1))) f = NamedTemporaryFile(delete=False) f.close() etl.wrap(tbl).toxlsx(f.name, 'Sheet1') actual = etl.fromxlsx(f.name, 'Sheet1') ieq(tbl, actual)
def test_integration(): expect = (('foo', 'bar'), ('A', 1), ('B', 2), ('C', 2)) f = NamedTemporaryFile(delete=False) f.close() etl.wrap(expect).toxls(f.name, 'Sheet1') actual = etl.fromxls(f.name, 'Sheet1') ieq(expect, actual) ieq(expect, actual)
def test_duplicates_wholerow(): table = (('foo', 'bar', 'baz'), ('A', 1, 2), ('B', '2', '3.4'), ('B', '2', '3.4'), ('D', 4, 12.3)) result = duplicates(table) expectation = (('foo', 'bar', 'baz'), ('B', '2', '3.4'), ('B', '2', '3.4')) ieq(expectation, result)
def test_empty_addcolumn(): table1 = empty() table2 = addcolumn(table1, 'foo', ['A', 'B']) table3 = addcolumn(table2, 'bar', [1, 2]) expect = (('foo', 'bar'), ('A', 1), ('B', 2)) ieq(expect, table3) ieq(expect, table3)
def test_addrownumbers(): table1 = (('foo', 'bar'), ('A', 9), ('C', 2), ('F', 1)) expect = (('row', 'foo', 'bar'), (1, 'A', 9), (2, 'C', 2), (3, 'F', 1)) actual = addrownumbers(table1) ieq(expect, actual) ieq(expect, actual)
def test_addrownumbers_field_name(): table1 = (('foo', 'bar'), ('A', 9), ('C', 2)) expect = (('id', 'foo', 'bar'), (1, 'A', 9), (2, 'C', 2)) actual = addrownumbers(table1, field='id') ieq(expect, actual) ieq(expect, actual)
def test_skip(): table1 = (('#aaa', 'bbb', 'ccc'), ('#mmm', ), ('foo', 'bar'), ('a', 1), ('b', 2)) table2 = skip(table1, 2) expect2 = (('foo', 'bar'), ('a', 1), ('b', 2)) ieq(expect2, table2) ieq(expect2, table2) # can iterate twice?
def test_key_distinct_2(): # test for https://github.com/alimanfoo/petl/issues/318 tbl = (('a', 'b'), ('x', '1'), ('x', '3'), ('y', '1'), (None, None)) result = distinct(tbl, key='b') expect = (('a', 'b'), (None, None), ('x', '1'), ('x', '3')) ieq(expect, result)
def test_fromxlsx_offset(): filename = _get_test_xlsx() if filename is None: return tbl = fromxlsx(filename, 'Sheet1', min_row=2, min_col=2) expect = ((1, ), (2, ), (2, ), (datetime(2012, 1, 1, 0, 0), )) ieq(expect, tbl) ieq(expect, tbl)
def test_unique_wholerow(): table = (('foo', 'bar', 'baz'), ('A', 1, 2), ('B', '2', '3.4'), ('B', '2', '3.4'), ('D', 4, 12.3)) result = unique(table) expectation = (('foo', 'bar', 'baz'), ('A', 1, 2), ('D', 4, 12.3)) ieq(expectation, result)
def test_fieldmap_empty(): table = (('foo', 'bar'),) expect = (('foo', 'baz'),) mappings = OrderedDict() mappings['foo'] = 'foo' mappings['baz'] = 'bar', lambda v: v * 2 actual = fieldmap(table, mappings) ieq(expect, actual)
def test_toxlsx_nosheet(): tbl = (('foo', 'bar'), ('A', 1), ('B', 2), ('C', 2), (u'é', datetime(2012, 1, 1))) f = NamedTemporaryFile(delete=False, suffix='.xlsx') f.close() toxlsx(tbl, f.name) actual = fromxlsx(f.name) ieq(tbl, actual)
def test_fromxls_use_view(): filename = pkg_resources.resource_filename('petl', 'test/resources/test.xls') tbl = fromxls(filename, 'Sheet1', use_view=False) expect = (('foo', 'bar'), ('A', 1), ('B', 2), ('C', 2), (u'é', 40909.0)) ieq(expect, tbl) ieq(expect, tbl)
def test_transpose(): table1 = (('id', 'colour'), (1, 'blue'), (2, 'red'), (3, 'purple'), (5, 'yellow'), (7, 'orange')) table2 = transpose(table1) expect2 = (('id', 1, 2, 3, 5, 7), ('colour', 'blue', 'red', 'purple', 'yellow', 'orange')) ieq(expect2, table2) ieq(expect2, table2)
def test_toxls(): expect = (('foo', 'bar'), ('A', 1), ('B', 2), ('C', 2)) f = NamedTemporaryFile(delete=False) f.close() toxls(expect, f.name, 'Sheet1') actual = fromxls(f.name, 'Sheet1') ieq(expect, actual) ieq(expect, actual)
def test_convert_with_row_backwards_compat(): table = (('foo', 'bar'), (' a ', 1), (' b ', 2)) expect = (('foo', 'bar'), ('a', 1), ('b', 2)) actual = convert(table, 'foo', 'strip') ieq(expect, actual)
def test_fromdataframe(): tbl = [('foo', 'bar', 'baz'), ('apples', 1, 2.5), ('oranges', 3, 4.4), ('pears', 7, .1)] df = pd.DataFrame.from_records(tbl[1:], columns=tbl[0]) ieq(tbl, fromdataframe(df)) ieq(tbl, fromdataframe(df))
def test_fromxlsx(): filename = pkg_resources.resource_filename('petl', 'test/resources/test.xlsx') tbl = fromxlsx(filename, 'Sheet1') expect = (('foo', 'bar'), ('A', 1), ('B', 2), ('C', 2), (u'é', datetime(2012, 1, 1))) ieq(expect, tbl) ieq(expect, tbl)
def test_addfield_dupfield(): table = (('foo', 'foo'), ('M', 12), ('F', 34), ('-', 56)) result = addfield(table, 'bar', 42) expectation = (('foo', 'foo', 'bar'), ('M', 12, 42), ('F', 34, 42), ('-', 56, 42)) ieq(expectation, result) ieq(expectation, result)
def test_sort_4(): table = (('foo', 'bar'), ('C', 2), ('A', 9), ('A', 6), ('F', 1), ('D', 10)) result = sort(table, 'bar') expectation = (('foo', 'bar'), ('F', 1), ('C', 2), ('A', 6), ('A', 9), ('D', 10)) ieq(expectation, result)
def test_toxlsx(): tbl = (('foo', 'bar'), ('A', 1), ('B', 2), ('C', 2), (u'é', datetime(2012, 1, 1))) f = NamedTemporaryFile(delete=False) f.close() toxlsx(tbl, f.name, 'Sheet1') actual = fromxlsx(f.name, 'Sheet1') ieq(tbl, actual)
def test_convert_translate(): table = (('foo', 'bar'), ('M', 12), ('F', 34), ('-', 56)) trans = {'M': 'male', 'F': 'female'} result = convert(table, 'foo', trans) expectation = (('foo', 'bar'), ('male', 12), ('female', 34), ('-', 56)) ieq(expectation, result)
def test_tail(): table1 = (('foo', 'bar'), ('a', 1), ('b', 2), ('c', 5), ('d', 7), ('f', 42), ('f', 3), ('h', 90), ('k', 12), ('l', 77), ('q', 2)) table2 = tail(table1, 4) expect = (('foo', 'bar'), ('h', 90), ('k', 12), ('l', 77), ('q', 2)) ieq(expect, table2)
def test_selectgt(): table = (('foo', 'bar', 'baz'), ('a', 4, 9.3), ('a', 2, 88.2), ('b', 1, None), ('c', 8, 42.0), ('d', 7, 100.9), ('c', 2)) actual = selectgt(table, 'baz', 50) expect = (('foo', 'bar', 'baz'), ('a', 2, 88.2), ('d', 7, 100.9)) ieq(expect, actual) ieq(expect, actual)
def _test_antijoin_empty(antijoin_impl): table1 = (('id', 'colour'), (0, 'black'), (1, 'blue'), (2, 'red'), (4, 'yellow'), (5, 'white')) table2 = (('id', 'shape'), ) actual = antijoin_impl(table1, table2, key='id') expect = table1 ieq(expect, actual)
def test_recast4(): # deal with missing data table = (('id', 'variable', 'value'), (1, 'gender', 'F'), (2, 'age', 17), (1, 'age', 12), (3, 'gender', 'M')) result = recast(table, key='id') expect = (('id', 'age', 'gender'), (1, 12, 'F'), (2, 17, None), (3, None, 'M')) ieq(expect, result)
def test_annex_uneven_rows(): table1 = (('foo', 'bar'), ('A', 9, True), ('C', 2), ('F', )) table2 = (('foo', 'baz'), ('B', 3), ('D', 10)) expect = (('foo', 'bar', 'foo', 'baz'), ('A', 9, 'B', 3), ('C', 2, 'D', 10), ('F', None, None, None)) actual = annex(table1, table2) ieq(expect, actual) ieq(expect, actual)
def test_rowreduce_empty(): table = (('foo', 'bar'), ) expect = (('foo', 'bar'), ) reducer = lambda key, rows: (key, [r[0] for r in rows]) actual = rowreduce(table, key='foo', reducer=reducer, header=('foo', 'bar')) ieq(expect, actual)
def test_wrap_tuple_return(): tablea = etl.wrap((('foo', 'bar'), ('A', 1), ('C', 7))) tableb = etl.wrap((('foo', 'bar'), ('B', 5), ('C', 7))) added, removed = tablea.diff(tableb) eq_(('foo', 'bar'), added.header()) eq_(('foo', 'bar'), removed.header()) ieq(etl.data(added), added.data()) ieq(etl.data(removed), removed.data())