def lookup_and_transform(ts_kv_table): """The table has the following structure: +---------------------------------+---------------+---------------+--------+ | entity_id | key | ts | value | +=================================+===============+===============+========+ | 1ea47494dc14d40bd76a73c738b665f | Temperature | 1583010011665 | -1.8 | +---------------------------------+---------------+---------------+--------+ | 1ea47494dc14d40bd76a73c738b665f | WindDirection | 1583010000692 | 227 | +---------------------------------+---------------+---------------+--------+ The output is a dictionary {device_id:table} of tables like that: +--------------+--------------+---------------+ | ts | Temperature | WindDirection | +--------------+--------------+---------------+ |1583010011665 | -1.8 | 230 | +--------------+--------------+---------------+ |1583010000692 | -2.5 | 227 | +--------------+--------------+---------------+ """ lkp = petl.lookup(ts_kv_table, 'entity_id', value=('key', 'ts', 'value')) for entity_id in lkp: tbl = [('key', 'ts', 'value')] + lkp[entity_id] tbl = petl.recast(tbl, variablefield='key', valuefield='value') cut_keys = KEYS_TO_REMOVE & set(petl.fieldnames(tbl)) tbl = petl.cutout(tbl, *cut_keys) tbl = petl.transform.headers.sortheader(tbl) tbl = petl.transform.basics.movefield(tbl, 'ts', 0) lkp[entity_id] = petl.sort(tbl, 'ts') return lkp
table6 = [['id', 'time', 'variable', 'value'], [1, 11, 'weight', 66.4], [1, 14, 'weight', 55.2], [2, 12, 'weight', 53.2], [2, 16, 'weight', 43.3], [3, 12, 'weight', 34.5], [3, 17, 'weight', 49.4]] table9 = [['id', 'variable', 'value'], [1, 'gender', 'F'], [2, 'age', 17], [1, 'age', 12], [3, 'gender', 'M']] from petl import recast, look look(table1) table2 = recast(table1) look(table2) # specifying variable and value fields look(table3) table4 = recast(table3, variablefield='vars', valuefield='vals') look(table4) # if there are multiple values for each key/variable pair, and no reducers # function is provided, then all values will be listed look(table6) table7 = recast(table6, key='id') look(table7) # multiple values can be reduced via an aggregation function def mean(values): return float(sum(values)) / len(values) table8 = recast(table6, key='id', reducers={'weight': mean})
# recast() ########## import petl as etl table1 = [ ["id", "variable", "value"], [3, "age", 16], [1, "gender", "F"], [2, "gender", "M"], [2, "age", 17], [1, "age", 12], [3, "gender", "M"], ] table2 = etl.recast(table1) table2 # specifying variable and value fields table3 = [ ["id", "vars", "vals"], [3, "age", 16], [1, "gender", "F"], [2, "gender", "M"], [2, "age", 17], [1, "age", 12], [3, "gender", "M"], ] table4 = etl.recast(table3, variablefield="vars", valuefield="vals") table4 # if there are multiple values for each key/variable pair, and no # reducers function is provided, then all values will be listed
import os import petl import logging as log log.basicConfig(level=os.environ.get("LOGLEVEL", "INFO")) inputFile = './35100077.csv' try: cansimData = petl.fromcsv(inputFile) except Exception as e: log.error(e) cansimData = petl.cut(cansimData, 0, 'GEO', 'DGUID', 'Statistics', 'VALUE') outputData = petl.recast(cansimData, variablefield='Statistics', valuefield='VALUE') petl.io.csv.tocsv(outputData, 'output.csv')