Exemplo n.º 1
0
def test_toxlsx_with_non_str_header(xlsx_table_with_non_str_header):
    f = NamedTemporaryFile(delete=True, suffix='.xlsx')
    f.close()

    toxlsx(xlsx_table_with_non_str_header, f.name, 'Sheet1')
    actual = etl.fromxlsx(f.name, 'Sheet1')
    ieq(xlsx_table_with_non_str_header, actual)
Exemplo n.º 2
0
 def test_integration():
     tbl = (('foo', 'bar'), ('A', 1), ('B', 2), ('C', 2),
            (u'é', datetime(2012, 1, 1)))
     f = NamedTemporaryFile(delete=False)
     f.close()
     etl.wrap(tbl).toxlsx(f.name, 'Sheet1')
     actual = etl.fromxlsx(f.name, 'Sheet1')
     ieq(tbl, actual)
Exemplo n.º 3
0
def test_integration(xlsx_test_table):
    f = NamedTemporaryFile(delete=True, suffix='.xlsx')
    f.close()
    tbl = etl.wrap(xlsx_test_table)
    tbl.toxlsx(f.name, 'Sheet1')
    actual = etl.fromxlsx(f.name, 'Sheet1')
    ieq(tbl, actual)
    tbl.appendxlsx(f.name, 'Sheet1')
    expect = tbl.cat(tbl)
    ieq(expect, actual)
Exemplo n.º 4
0
 def test_integration():
     tbl = (('foo', 'bar'),
            ('A', 1),
            ('B', 2),
            ('C', 2),
            (u'é', datetime(2012, 1, 1)))
     f = NamedTemporaryFile(delete=False)
     f.close()
     etl.wrap(tbl).toxlsx(f.name, 'Sheet1')
     actual = etl.fromxlsx(f.name, 'Sheet1')
     ieq(tbl, actual)
Exemplo n.º 5
0
 def test_integration():
     tbl = (('foo', 'bar'), ('A', 1), ('B', 2), ('C', 2),
            (u'é', datetime(2012, 1, 1)))
     f = NamedTemporaryFile(delete=True, suffix='.xlsx')
     f.close()
     tbl = etl.wrap(tbl)
     tbl.toxlsx(f.name, 'Sheet1')
     actual = etl.fromxlsx(f.name, 'Sheet1')
     ieq(tbl, actual)
     tbl.appendxlsx(f.name, 'Sheet1')
     expect = tbl.cat(tbl)
     ieq(expect, actual)
Exemplo n.º 6
0
def main():
    """The main function which extracts the data from the sample-data
    spreadsheet and splits it to a csv per account"""

    bank_lookup = {
        'Spending': bank_main,
        'Income': bank_main,
        'Saving': bank_savings,
        'Credit card': bank_credit,
    }

    sheet_data = petl.fromxlsx('sample-data.xlsx', sheet='Data')
    data = sheet_data.cut(*range(5))
    early_data = data.select('Date', lambda r: r.month <= 2)

    for account, table in split_table(early_data, 'Account'):
        modified_table = bank_lookup[account](table)
        # modified_table.tocsv(table['Account'][0]+'.csv')
        print(modified_table)
Exemplo n.º 7
0
    def etl(self, *args, **kw):
        table = petl.fromxlsx(self._src_path)

        model = DEPTH_TO_WATER
        self._update_model(model, self._vocab)

        # group table by sys_loc_code
        header = petl.header(table)
        for name, records in petl.rowgroupby(petl.sort(table, 'sys_loc_code'),
                                             'sys_loc_code'):
            records = [dict(zip(header, record)) for record in records]
            record = records[0]
            location_id = self._post_location(record, model)
            thing_id = self._post_thing(record, model, location_id)

            print('---------------')
            print(f'len records {len(records)}')
            # self.add_package(record)
            self.observation.set_records(records)
            self.observation.etl(tids=self._make_tids(thing_id, record),
                                 models=(model, ))
Exemplo n.º 8
0
def kcmo_convert(filepath, xtrapath):
    """
    Takes the file path to a csv in the format used by Kansas City proper
    converts to universal format 
    outputs csv.
    """
    kcmo = etl.fromcsv(filepath)
    kcx = etl.fromxlsx(xtrapath)
    kcjoin = etl.join(kcmo, kcx, lkey='POLEID', rkey='IDNumber')
    del kcmo
    del kcx

    kcjoin = etl.addfield(kcjoin, 'PoleID', lambda x: x['POLEID'])
    kcjoin = etl.addfield(kcjoin, 'Longitude',
                          lambda x: geom_to_tuple(x['the_geom'])[0])
    kcjoin = etl.addfield(kcjoin, 'Latitude',
                          lambda x: geom_to_tuple(x['the_geom'])[1])
    kcjoin = etl.addfield(kcjoin, 'LightbulbType',
                          lambda x: x['LUMINAIRE TYPE'])
    kcjoin = etl.addfield(kcjoin, 'Wattage', lambda x: x['WATTS'])
    kcjoin = etl.addfield(kcjoin, 'Lumens', None)
    kcjoin = etl.addfield(
        kcjoin, 'LightAttributes', lambda x: make_a_list(
            x['ATTACHMENT 10'], x['ATTACHMENT 9'], x['ATTACHMENT 8'], x[
                'ATTACHMENT 7'], x['ATTACHMENT 6'], x['ATTACHMENT 5'], x[
                    'ATTACHMENT 4'], x['ATTACHMENT 3'], x['ATTACHMENT 2'], x[
                        'ATTACHMENT 1'], x['SPECIAL_N2'], x['SPECIAL_NO']))
    kcjoin = etl.addfield(kcjoin, 'AttachedTech',
                          lambda x: bool(x['LightAttributes']))
    kcjoin = etl.addfield(
        kcjoin, 'FiberWiFiEnable', lambda x: find_wifi(*x[
            'LightAttributes'], x['SPECIAL_N2'], x['SPECIAL_NO']))
    kcjoin = etl.addfield(kcjoin, 'PoleType', lambda x: x['POLE TYPE'])
    kcjoin = etl.addfield(kcjoin, 'PoleOwner', lambda x: x['POLE OWNER'])
    kcjoin = etl.addfield(kcjoin, 'DataSource', 'Kansas City')
    kcjoin = etl.cut(kcjoin, 'PoleID', 'Longitude', 'Latitude',
                     'LightbulbType', 'Wattage', 'Lumens', 'AttachedTech',
                     'LightAttributes', 'FiberWiFiEnable', 'PoleType',
                     'PoleOwner', 'DataSource')
    etl.tocsv(kcjoin, 'data/kcmo_clean.csv')
Exemplo n.º 9
0
# To run (ideally using screen):
# python $HOME/src/github/malariagen/methods-dev/pf3k_techbm/scripts/gatk_pipeline/run_all_pipelines.py >> $HOME/src/github/malariagen/methods-dev/pf3k_techbm/log/gatk_pipeline_20150917.log
import os
import petl as etl
import subprocess

PIPELINES_FN='%s/src/github/malariagen/methods-dev/pf3k_techbm/scripts/gatk_pipeline/pipeline_parameters.xlsx' % os.environ['HOME']
SCRIPT_FN='%s/src/github/malariagen/methods-dev/pf3k_techbm/scripts/gatk_pipeline/gatk_pipeline_20150917.sh' % os.environ['HOME']

tbl_pipelines = etl.fromxlsx(PIPELINES_FN)
var_names = tbl_pipelines.header()
for rec in tbl_pipelines.data():
    print(rec)
    for i, var_name in enumerate(var_names):
        print(var_name, rec[i])
        os.environ[var_name] = str(rec[i])
    subprocess.call(SCRIPT_FN)