Пример #1
0
def test_teecsv():

    t1 = (('foo', 'bar'), ('a', 2), ('b', 1), ('c', 3))

    f1 = NamedTemporaryFile(delete=False)
    f2 = NamedTemporaryFile(delete=False)

    etl.wrap(t1).teecsv(f1.name).selectgt('bar', 1).tocsv(f2.name)

    ieq(t1, etl.fromcsv(f1.name).convertnumbers())
    ieq(etl.wrap(t1).selectgt('bar', 1), etl.fromcsv(f2.name).convertnumbers())
Пример #2
0
    def build(self):
        import petl
        import petl.fluent as petlf
        
        p = self.partitions.find_or_new(table='businesses')
        
        dp = petl.dateparser('%m/%d/%Y')

        for name, url in self.sources.items():
            
            self.log("Converting: {}".format(url))
            
            df = self.filesystem.download(url)
            
            t = ( petlf.fromcsv(df)
                .addfield('businesses_id',None, index=0)
                .addfield('zip5',lambda r: r['ZIP'][0:5] if len(r['ZIP']) >=5 else None, index=7)
                .addfield('zip4',lambda r: r['ZIP'][-4:] if len(r['ZIP']) == 10 else None, index=8)
                .setheader([c.name for c in p.table.columns])
                .convert(('naics','acct_no'), int)
                .convert(('created', 'started','expires'), dp)
                .convert(('dba', 'address','city','owner','desc'), str.title)
                .convert(('incorp_type'), str.lower)
                .convert([c.name for c in p.table.columns if c.datatype =='text'], unicode)
            )
         
            t.appendsqlite3(p.database.path, p.table.name)
            
        return True
Пример #3
0
def test_teecsv():

    t1 = (('foo', 'bar'),
          ('a', 2),
          ('b', 1),
          ('c', 3))

    f1 = NamedTemporaryFile(delete=False)
    f2 = NamedTemporaryFile(delete=False)

    etl.wrap(t1).teecsv(f1.name).selectgt('bar', 1).tocsv(f2.name)

    ieq(t1,
        etl.fromcsv(f1.name).convertnumbers())
    ieq(etl.wrap(t1).selectgt('bar', 1),
        etl.fromcsv(f2.name).convertnumbers())
Пример #4
0
def test_teetext():

    t1 = (('foo', 'bar'),
          ('a', 2),
          ('b', 1),
          ('c', 3))

    f1 = NamedTemporaryFile(delete=False)
    f2 = NamedTemporaryFile(delete=False)

    prologue = 'foo,bar\n'
    template = '{foo},{bar}\n'
    epilogue = 'd,4'
    (etl
     .wrap(t1)
     .teetext(f1.name,
              template=template,
              prologue=prologue,
              epilogue=epilogue)
     .selectgt('bar', 1)
     .topickle(f2.name))

    ieq(t1 + (('d', 4),),
        etl.fromcsv(f1.name).convertnumbers())
    ieq(etl.wrap(t1).selectgt('bar', 1),
        etl.frompickle(f2.name))
Пример #5
0
 def handle(self, *args, **options):
     try:
         filename = args[0]
     except IndexError:
         raise CommandError('You must provide a filename')
     else:
         table = fromcsv(filename, delimiter=';').rename({'COD_LOCAL': 'retail_code',
                                                          'CodMaterial': 'material_code',
                                                          'DESCRIPCION': 'description',
                                                          'Instock': 'in_stock',
                                                          'CodInterno': 'internal_code'})
         table.progress().todb(connection.cursor(), 'stock_product')
         self.stdout.write('Successfully loaded product data')
Пример #6
0
def test_staticmethods():

    f = NamedTemporaryFile(delete=False)
    writer = csv.writer(f, delimiter='\t')
    table = (('foo', 'bar'), ('a', 1), ('b', 2), ('c', 2))
    for row in table:
        writer.writerow(row)
    f.close()

    actual = etl.fromcsv(f.name, delimiter='\t')
    expect = (('foo', 'bar'), ('a', '1'), ('b', '2'), ('c', '2'))
    ieq(expect, actual)
    ieq(expect, actual)  # verify can iterate twice
Пример #7
0
def test_teetext():

    t1 = (('foo', 'bar'), ('a', 2), ('b', 1), ('c', 3))

    f1 = NamedTemporaryFile(delete=False)
    f2 = NamedTemporaryFile(delete=False)

    prologue = 'foo,bar\n'
    template = '{foo},{bar}\n'
    epilogue = 'd,4'
    (etl.wrap(t1).teetext(f1.name,
                          template=template,
                          prologue=prologue,
                          epilogue=epilogue).selectgt('bar',
                                                      1).topickle(f2.name))

    ieq(t1 + (('d', 4), ), etl.fromcsv(f1.name).convertnumbers())
    ieq(etl.wrap(t1).selectgt('bar', 1), etl.frompickle(f2.name))
Пример #8
0
def test_staticmethods():
    
    f = NamedTemporaryFile(delete=False)
    writer = csv.writer(f, delimiter='\t')
    table = (('foo', 'bar'),
             ('a', 1),
             ('b', 2),
             ('c', 2))
    for row in table:
        writer.writerow(row)
    f.close()
    
    actual = etl.fromcsv(f.name, delimiter='\t')
    expect = (('foo', 'bar'),
              ('a', '1'),
              ('b', '2'),
              ('c', '2'))
    ieq(expect, actual)
    ieq(expect, actual) # verify can iterate twice