def test_teehtml(): t1 = (('foo', 'bar'), ('a', 2), ('b', 1), ('c', 3)) f1 = NamedTemporaryFile(delete=False) f2 = NamedTemporaryFile(delete=False) etl.wrap(t1).teehtml(f1.name).selectgt('bar', 1).topickle(f2.name) ieq(t1, etl.fromxml(f1.name, './/tr', ('th', 'td')).convertnumbers()) ieq(etl.wrap(t1).selectgt('bar', 1), etl.frompickle(f2.name))
def test_teepickle(): t1 = (('foo', 'bar'), ('a', 2), ('b', 1), ('c', 3)) f1 = NamedTemporaryFile(delete=False) f2 = NamedTemporaryFile(delete=False) etl.wrap(t1).teepickle(f1.name).selectgt('bar', 1).topickle(f2.name) ieq(t1, etl.frompickle(f1.name)) ieq(etl.wrap(t1).selectgt('bar', 1), etl.frompickle(f2.name))
def test_container(): table = (('foo', 'bar'), ('a', 1), ('b', 2), ('c', 2)) actual = etl.wrap(table)[0] expect = ('foo', 'bar') eq_(expect, actual) actual = etl.wrap(table)['bar'] expect = (1, 2, 2) ieq(expect, actual) actual = len(etl.wrap(table)) expect = 4 eq_(expect, actual)
def test_teetsv(): t1 = (('foo', 'bar'), ('a', 2), ('b', 1), ('c', 3)) f1 = NamedTemporaryFile(delete=False) f2 = NamedTemporaryFile(delete=False) etl.wrap(t1).teetsv(f1.name).selectgt('bar', 1).totsv(f2.name) ieq(t1, etl.fromtsv(f1.name).convertnumbers()) ieq(etl.wrap(t1).selectgt('bar', 1), etl.fromtsv(f2.name).convertnumbers())
def test_teeuhtml(): t1 = ((u'foo', u'bar'), (u'Արամ Խաչատրյան', 2), (u'Johann Strauß', 1), (u'Вагиф Сәмәдоғлу', 3)) f1 = NamedTemporaryFile(delete=False) f2 = NamedTemporaryFile(delete=False) etl.wrap(t1).teeuhtml(f1.name).selectgt('bar', 1).topickle(f2.name) ieq(t1, etl.fromxml(f1.name, './/tr', ('th', 'td')).convertnumbers()) ieq(etl.wrap(t1).selectgt('bar', 1), etl.frompickle(f2.name))
def test_teetext(): t1 = (('foo', 'bar'), ('a', 2), ('b', 1), ('c', 3)) f1 = NamedTemporaryFile(delete=False) f2 = NamedTemporaryFile(delete=False) prologue = 'foo,bar\n' template = '{foo},{bar}\n' epilogue = 'd,4' (etl .wrap(t1) .teetext(f1.name, template=template, prologue=prologue, epilogue=epilogue) .selectgt('bar', 1) .topickle(f2.name)) ieq(t1 + (('d', 4),), etl.fromcsv(f1.name).convertnumbers()) ieq(etl.wrap(t1).selectgt('bar', 1), etl.frompickle(f2.name))
def test_values_container_convenience_methods(): table = etl.wrap((('foo', 'bar'), ('a', 1), ('b', 2), ('c', 2))) actual = table.values('foo').set() expect = set(['a', 'b', 'c']) eq_(expect, actual) actual = table.values('foo').list() expect = ['a', 'b', 'c'] eq_(expect, actual) actual = table.values('foo').tuple() expect = ('a', 'b', 'c') eq_(expect, actual) actual = table.values('bar').sum() expect = 5 eq_(expect, actual) actual = table.data().dict() expect = {'a': 1, 'b': 2, 'c': 2} eq_(expect, actual)
def test_teeutext(): t1 = ((u'foo', u'bar'), (u'Արամ Խաչատրյան', 2), (u'Johann Strauß', 1), (u'Вагиф Сәмәдоғлу', 3)) f1 = NamedTemporaryFile(delete=False) f2 = NamedTemporaryFile(delete=False) prologue = u'foo,bar\n' template = u'{foo},{bar}\n' epilogue = u'章子怡,4' (etl .wrap(t1) .teeutext(f1.name, template=template, prologue=prologue, epilogue=epilogue) .selectgt('bar', 1) .topickle(f2.name)) ieq(t1 + ((u'章子怡', 4),), etl.fromucsv(f1.name).convertnumbers()) ieq(etl.wrap(t1).selectgt('bar', 1), etl.frompickle(f2.name))
def test_teeutsv(): t1 = ( (u'name', u'id'), (u'Արամ Խաչատրյան', 1), (u'Johann Strauß', 2), (u'Вагиф Сәмәдоғлу', 3), (u'章子怡', 4), ) f1 = NamedTemporaryFile(delete=False) f2 = NamedTemporaryFile(delete=False) etl.wrap(t1).teeutsv(f1.name).selectgt('id', 1).toutsv(f2.name) ieq(t1, etl.fromutsv(f1.name).convertnumbers()) ieq(etl.wrap(t1).selectgt('id', 1), etl.fromutsv(f2.name).convertnumbers())
def test_valuesarray_explicit_dtype(): t = [("foo", "bar", "baz"), ("apples", 1, 2.5), ("oranges", 3, 4.4), ("pears", 7, 0.1)] expect = np.array([1, 3, 7], dtype="i2") actual = etl.wrap(t).values("bar").array(dtype="i2") eq_(expect.dtype, actual.dtype) assert np.all(expect == actual)
def test_teeutsv(): t1 = ((u'name', u'id'), (u'Արամ Խաչատրյան', 1), (u'Johann Strauß', 2), (u'Вагиф Сәмәдоғлу', 3), (u'章子怡', 4), ) f1 = NamedTemporaryFile(delete=False) f2 = NamedTemporaryFile(delete=False) etl.wrap(t1).teeutsv(f1.name).selectgt('id', 1).toutsv(f2.name) ieq(t1, etl.fromutsv(f1.name).convertnumbers()) ieq(etl.wrap(t1).selectgt('id', 1), etl.fromutsv(f2.name).convertnumbers())
def test_teetext(): t1 = (('foo', 'bar'), ('a', 2), ('b', 1), ('c', 3)) f1 = NamedTemporaryFile(delete=False) f2 = NamedTemporaryFile(delete=False) prologue = 'foo,bar\n' template = '{foo},{bar}\n' epilogue = 'd,4' (etl.wrap(t1).teetext(f1.name, template=template, prologue=prologue, epilogue=epilogue).selectgt('bar', 1).topickle(f2.name)) ieq(t1 + (('d', 4), ), etl.fromcsv(f1.name).convertnumbers()) ieq(etl.wrap(t1).selectgt('bar', 1), etl.frompickle(f2.name))
def test_valuesarray_explicit_dtype(): t = [('foo', 'bar', 'baz'), ('apples', 1, 2.5), ('oranges', 3, 4.4), ('pears', 7, .1)] expect = np.array([1, 3, 7], dtype='i2') actual = etl.wrap(t).values('bar').array(dtype='i2') eq_(expect.dtype, actual.dtype) assert np.all(expect == actual)
def test_teeutext(): t1 = ((u'foo', u'bar'), (u'Արամ Խաչատրյան', 2), (u'Johann Strauß', 1), (u'Вагиф Сәмәдоғлу', 3)) f1 = NamedTemporaryFile(delete=False) f2 = NamedTemporaryFile(delete=False) prologue = u'foo,bar\n' template = u'{foo},{bar}\n' epilogue = u'章子怡,4' (etl.wrap(t1).teeutext(f1.name, template=template, prologue=prologue, epilogue=epilogue).selectgt('bar', 1).topickle(f2.name)) ieq(t1 + ((u'章子怡', 4), ), etl.fromucsv(f1.name).convertnumbers()) ieq(etl.wrap(t1).selectgt('bar', 1), etl.frompickle(f2.name))
def test_integration(): #apidb|MAL1 ApiDB gene 56913 57116 . - . ID=apidb|PFA0035c;Name=PFA0035c;description=hypothetical+protein%2C+conserved+in+P.+falciparum;size=204;web_id=PFA0035c;locus_tag=PFA0035c;size=204;Alias=MAL1P4.06b snps = etl.wrap((('chr', 'pos'), ('apidb|MAL1', 56911), ('apidb|MAL1', 56915))) features = etl.fromgff3(plasmodb_gff3_file.name) genes = features.selecteq('type', 'gene') actual = snps.gff3join(genes, seqid='chr', start='pos', end='pos') expect = (('chr', 'pos', 'seqid', 'source', 'type', 'start', 'end', 'score', 'strand', 'phase', 'attributes'), ('apidb|MAL1', 56915, 'apidb|MAL1', 'ApiDB', 'gene', 56913, 57116, '.', '-', '.', gff3_parse_attributes("ID=apidb|PFA0035c;Name=PFA0035c;description=hypothetical+protein%2C+conserved+in+P.+falciparum;size=204;web_id=PFA0035c;locus_tag=PFA0035c;size=204;Alias=MAL1P4.06b"))) ieq(expect, actual) ieq(expect, actual)
def test_integration(): left = etl.wrap((('begin', 'end', 'quux'), (1, 2, 'a'), (2, 4, 'b'), (2, 5, 'c'), (9, 14, 'd'), (9, 140, 'e'), (1, 1, 'f'), (2, 2, 'g'), (4, 4, 'h'), (5, 5, 'i'), (1, 8, 'j'))) right = etl.wrap((('start', 'stop', 'value'), (1, 4, 'foo'), (3, 7, 'bar'), (4, 9, 'baz'))) actual = left.intervaljoin(right, lstart='begin', lstop='end', rstart='start', rstop='stop') expect = (('begin', 'end', 'quux', 'start', 'stop', 'value'), (1, 2, 'a', 1, 4, 'foo'), (2, 4, 'b', 1, 4, 'foo'), (2, 4, 'b', 3, 7, 'bar'), (2, 5, 'c', 1, 4, 'foo'), (2, 5, 'c', 3, 7, 'bar'), (2, 5, 'c', 4, 9, 'baz'), (2, 2, 'g', 1, 4, 'foo'), (4, 4, 'h', 3, 7, 'bar'), (5, 5, 'i', 3, 7, 'bar'), (5, 5, 'i', 4, 9, 'baz'), (1, 8, 'j', 1, 4, 'foo'), (1, 8, 'j', 3, 7, 'bar'), (1, 8, 'j', 4, 9, 'baz')) ieq(expect, actual) ieq(expect, actual)
def test_basics(): t1 = (('foo', 'bar'), ('A', 1), ('B', 2)) w1 = etl.wrap(t1) eq_(('foo', 'bar'), w1.header()) eq_(petl.header(w1), w1.header()) ieq((('A', 1), ('B', 2)), w1.data()) ieq(petl.data(w1), w1.data()) w2 = w1.cut('bar', 'foo') expect2 = (('bar', 'foo'), (1, 'A'), (2, 'B')) ieq(expect2, w2) ieq(petl.cut(w1, 'bar', 'foo'), w2) w3 = w1.cut('bar', 'foo').cut('foo', 'bar') ieq(t1, w3)
def test_integration(): # set up a new hdf5 table to work with h5file = openFile("test4.h5", mode="w", title="Test file") h5file.createGroup('/', 'testgroup', 'Test Group') class FooBar(IsDescription): foo = Int32Col(pos=0) bar = StringCol(6, pos=2) h5file.createTable('/testgroup', 'testtable', FooBar, 'Test Table') h5file.flush() h5file.close() # load some initial data via tohdf5() table1 = etl.wrap((('foo', 'bar'), (1, 'asdfgh'), (2, 'qwerty'), (3, 'zxcvbn'))) table1.tohdf5('test4.h5', '/testgroup', 'testtable') ieq(table1, etl.fromhdf5('test4.h5', '/testgroup', 'testtable')) # append some more data table1.appendhdf5('test4.h5', '/testgroup', 'testtable') ieq(chain(table1, table1[1:]), etl.fromhdf5('test4.h5', '/testgroup', 'testtable'))
def test_integration(): t = etl.wrap([("foo", "bar", "baz"), ("apples", 1, 2.5), ("oranges", 3, 4.4), ("pears", 7, 0.1)]) a = t.toarray() u = etl.fromarray(a).convert("bar", int) ieq(t, u)
m = self.regex.match(line) if m: yield m.groups() else: raise ValueError("Failed to match regex on line: "+line) def cachetag(self): try: return hash((self.source.checksum(), self.args, tuple(self.kwargs.items()))) except Exception as e: raise Exception(e) # # Fluentize # import sys from petl.fluent import FluentWrapper, wrap # # Add all of the functions in this module into the FluentWrapper as # methods # for n, c in sys.modules[__name__].__dict__.items(): if callable(c): setattr(FluentWrapper, n, wrap(c))
for line in f: m = self.regex.match(line) if m: yield m.groups() else: raise ValueError("Failed to match regex on line: " + line) def cachetag(self): try: return hash((self.source.checksum(), self.args, tuple(self.kwargs.items()))) except Exception as e: raise Exception(e) # # Fluentize # import sys from petl.fluent import FluentWrapper, wrap # # Add all of the functions in this module into the FluentWrapper as # methods # for n, c in sys.modules[__name__].__dict__.items(): if callable(c): setattr(FluentWrapper, n, wrap(c))
def test_integration(): t = etl.wrap([('foo', 'bar', 'baz'), ('apples', 1, 2.5), ('oranges', 3, 4.4), ('pears', 7, .1)]) a = t.toarray() u = etl.fromarray(a).convert('bar', int) ieq(t, u)