# conflicts table1 = [['foo', 'bar', 'baz'], ['A', 1, 2.7], ['B', 2, None], ['D', 3, 9.4], ['B', None, 7.8], ['E', None], ['D', 3, 12.3], ['A', 2, None]] from petl import conflicts, look look(table1) table2 = conflicts(table1, 'foo') look(table2) # complement a = [['foo', 'bar', 'baz'], ['A', 1, True], ['C', 7, False], ['B', 2, False], ['C', 9, True]] b = [['x', 'y', 'z'], ['B', 2, False], ['A', 9, False], ['B', 3, True], ['C', 9, True]]
# Write to a local file etl.csv.tocsv(src, source=os.path.join(datroot,'.'.join([fn,'tmp','csv','gz'])), encoding='utf8', write_header=True) print "File saved locally to avoid too much in memory..." del(src) # Tidy up some of the fields so that they're db-friendly tidy = etl.io.fromcsv(os.path.join(datroot,'.'.join([fn,'tmp','csv','gz']))).convert('transaction_id','replace','{','').convert('transaction_id','replace','}','').convert('price_int',int).convert('completion_dt',lambda v: datetime.datetime.strptime(v, "%Y-%m-%d 00:00").date()).sort('completion_dt') # Summarise what's there (helpful for tracking # changes to the format, especially the status codes). print "There are {} rows of data.".format(etl.util.counting.nrows(tidy)) counts = etl.util.counting.valuecounts(tidy, 'status_cd') print "I found the following record types and counts:" print counts confs = etl.conflicts(tidy, key='transaction_id') if confs.nrows() > 0: print "I found the following conflicts:" print confs else: print "I found no conflicting Transaction IDs" proceed = raw_input('Given these stats should I proceed with the processing [y/n]: ') if proceed=='y': print("OK, will load the data.") else: print("OK, stopping.") exit() etl.csv.totsv(tidy, source=os.path.join(datroot,'.'.join([fn,'.csv'])), encoding='utf-8', write_header=True) print "Foo!"
table2 # conflicts() ############# import petl as etl table1 = [['foo', 'bar', 'baz'], ['A', 1, 2.7], ['B', 2, None], ['D', 3, 9.4], ['B', None, 7.8], ['E', None], ['D', 3, 12.3], ['A', 2, None]] table2 = etl.conflicts(table1, 'foo') table2 # isunique() ############ import petl as etl table1 = [['foo', 'bar'], ['a', 1], ['b'], ['b', 2], ['c', 3, True]] etl.isunique(table1, 'foo') etl.isunique(table1, 'bar')
# compound keys are supported table3 = etl.duplicates(table1, key=['foo', 'bar']) table3 # unique() ########## import petl as etl table1 = [['foo', 'bar', 'baz'], ['A', 1, 2], ['B', '2', '3.4'], ['D', 'xyz', 9.0], ['B', u'3', u'7.8'], ['B', '2', 42], ['E', None, None], ['D', 4, 12.3], ['F', 7, 2.3]] table2 = etl.unique(table1, 'foo') table2 # conflicts() ############# import petl as etl table1 = [['foo', 'bar', 'baz'], ['A', 1, 2.7], ['B', 2, None], ['D', 3, 9.4], ['B', None, 7.8], ['E', None], ['D', 3, 12.3], ['A', 2, None]] table2 = etl.conflicts(table1, 'foo') table2 # isunique() ############ import petl as etl table1 = [['foo', 'bar'], ['a', 1], ['b'], ['b', 2], ['c', 3, True]] etl.isunique(table1, 'foo') etl.isunique(table1, 'bar')