def test_teecsv(): t1 = (('foo', 'bar'), ('a', 2), ('b', 1), ('c', 3)) f1 = NamedTemporaryFile(delete=False) f2 = NamedTemporaryFile(delete=False) etl.wrap(t1).teecsv(f1.name).selectgt('bar', 1).tocsv(f2.name) ieq(t1, etl.fromcsv(f1.name).convertnumbers()) ieq(etl.wrap(t1).selectgt('bar', 1), etl.fromcsv(f2.name).convertnumbers())
def build(self): import petl import petl.fluent as petlf p = self.partitions.find_or_new(table='businesses') dp = petl.dateparser('%m/%d/%Y') for name, url in self.sources.items(): self.log("Converting: {}".format(url)) df = self.filesystem.download(url) t = ( petlf.fromcsv(df) .addfield('businesses_id',None, index=0) .addfield('zip5',lambda r: r['ZIP'][0:5] if len(r['ZIP']) >=5 else None, index=7) .addfield('zip4',lambda r: r['ZIP'][-4:] if len(r['ZIP']) == 10 else None, index=8) .setheader([c.name for c in p.table.columns]) .convert(('naics','acct_no'), int) .convert(('created', 'started','expires'), dp) .convert(('dba', 'address','city','owner','desc'), str.title) .convert(('incorp_type'), str.lower) .convert([c.name for c in p.table.columns if c.datatype =='text'], unicode) ) t.appendsqlite3(p.database.path, p.table.name) return True
def test_teetext(): t1 = (('foo', 'bar'), ('a', 2), ('b', 1), ('c', 3)) f1 = NamedTemporaryFile(delete=False) f2 = NamedTemporaryFile(delete=False) prologue = 'foo,bar\n' template = '{foo},{bar}\n' epilogue = 'd,4' (etl .wrap(t1) .teetext(f1.name, template=template, prologue=prologue, epilogue=epilogue) .selectgt('bar', 1) .topickle(f2.name)) ieq(t1 + (('d', 4),), etl.fromcsv(f1.name).convertnumbers()) ieq(etl.wrap(t1).selectgt('bar', 1), etl.frompickle(f2.name))
def handle(self, *args, **options): try: filename = args[0] except IndexError: raise CommandError('You must provide a filename') else: table = fromcsv(filename, delimiter=';').rename({'COD_LOCAL': 'retail_code', 'CodMaterial': 'material_code', 'DESCRIPCION': 'description', 'Instock': 'in_stock', 'CodInterno': 'internal_code'}) table.progress().todb(connection.cursor(), 'stock_product') self.stdout.write('Successfully loaded product data')
def test_staticmethods(): f = NamedTemporaryFile(delete=False) writer = csv.writer(f, delimiter='\t') table = (('foo', 'bar'), ('a', 1), ('b', 2), ('c', 2)) for row in table: writer.writerow(row) f.close() actual = etl.fromcsv(f.name, delimiter='\t') expect = (('foo', 'bar'), ('a', '1'), ('b', '2'), ('c', '2')) ieq(expect, actual) ieq(expect, actual) # verify can iterate twice
def test_teetext(): t1 = (('foo', 'bar'), ('a', 2), ('b', 1), ('c', 3)) f1 = NamedTemporaryFile(delete=False) f2 = NamedTemporaryFile(delete=False) prologue = 'foo,bar\n' template = '{foo},{bar}\n' epilogue = 'd,4' (etl.wrap(t1).teetext(f1.name, template=template, prologue=prologue, epilogue=epilogue).selectgt('bar', 1).topickle(f2.name)) ieq(t1 + (('d', 4), ), etl.fromcsv(f1.name).convertnumbers()) ieq(etl.wrap(t1).selectgt('bar', 1), etl.frompickle(f2.name))