def run(self, driver, task, log): inp = task["source"]["file"] inp = compat.translate_unicode(inp) inp = "input/{}".format(inp) sheet = task["source"].get("sheet", None) use_view = task["source"].get("use_view", True) record_set = etl.fromxls(inp, sheet, use_view=use_view) if not etl.data(record_set).any(): log.write("Task skipped. No rows on source") else: transform = TransformSubTask(task, log) record_set = transform.get_result(record_set) out = task["target"]["file"] out = compat.translate_unicode(out) out = "output/{}".format(out) separator = task["target"].get("delimiter", ";") separator = compat.translate_unicode(separator) enc = task["target"].get("encoding", "utf-8") task_log = "log/xls-csv_{}_{}.log".format(task["name"], get_time_filename()) with open(task_log, "w") as lg: if "truncate" in task["target"] and task["target"]["truncate"]: record_set.progress(10000, out=lg).tocsv(out, encoding=enc, delimiter=separator) else: record_set.progress(10000, out=lg).appendcsv(out, encoding=enc, delimiter=separator)
def test_integration(): expect = (('foo', 'bar'), ('A', 1), ('B', 2), ('C', 2)) f = NamedTemporaryFile(delete=False) f.close() etl.wrap(expect).toxls(f.name, 'Sheet1') actual = etl.fromxls(f.name, 'Sheet1') ieq(expect, actual) ieq(expect, actual)
def import_ksh_settlements(xlsfilename, output_csv): (petl.fromxls(xlsfilename) # keep only the settlement column .cut(0) # replace header rows .skip(2).pushheader(('telepules', )) # skip empty row at the end .selecttrue(0).convert(0, 'lower').tocsv(output_csv, encoding='utf-8'))
def etl(file): tb1 = list() import petl as etl import sqllite3 if ".csv" in file: tb1 = etl.fromcsv(file) elif ".json" in file: tb1 = etl.fromjson(file) elif ".xls" in file: tb1 = etl.fromxls(file) elif ".xml" in file: tb1 = etl.fromxml(file,"row")
def process_animal_extended(shelter_id, session, input_directory): table = petl.fromxls(os.path.join(input_directory, 'AnimalIntakeExtended.xls'), sheet='AnimalIntakeExtended') ## Because an animal can appear in the intake report more than once, ## we must sort the table in order to upsert the latest value table_sorted = petl.sort(table, key='Intake Date/Time') for row in petl.dicts(table_sorted): id = row['Animal ID'] set_values = { 'arn': normalize_string(row['ARN']), 'name': normalize_string(row['Animal Name']), 'species': normalize_string(row['Species']), 'primary_breed': normalize_string(row['Primary Breed']), 'secondary_bred': normalize_string(row['Secondary Breed']), 'gender': normalize_string(row['Gender']), 'pre_altered': to_bool(row['Pre Altered']), 'altered': to_bool(row['Altered']), 'primary_color': normalize_string(row['Primary Colour']), 'secondary_color': normalize_string(row['Secondary Colour']), 'third_color': normalize_string(row['Third Colour']), 'color_pattern': normalize_string(row['Colour Pattern']), 'second_color_pattern': normalize_string(row['Second Colour Pattern']), 'size': normalize_string(row['Size']) } insert_stmt = insert(Animal)\ .values( id=id, shelter_id=shelter_id, ## TODO: add to index for constraint? make composite pk? **set_values)\ .on_conflict_do_update( constraint='animals_pkey', set_={ 'shelter_id': shelter_id, **set_values, 'updated_at': func.now() }) session.execute(insert_stmt) session.commit()
def fromxlswithheader(filename, sheet, **kwargs): """ Call `petl.fromcsv` and automatically parse metadata header if present. """ # preliminary open for inspection rawtable = petl.fromxls(filename, sheet, header=None, **kwargs) metadata, header, data = parse_metadata_header(rawtable) # transfer data to a in-memory data buffer databuffer = petl.MemorySource() petl.tocsv(data, databuffer, write_header=True, encoding='utf-8') databuffer.s = databuffer.getvalue() # re-open with right headers and add metadata table = petl.fromcsv(databuffer, header=header) table.metadata = metadata return table
def extract(file): tb = list() if file == "sql": host = raw_input("Enter Host:") user = raw_input("Enter Username:"******"Enter pwd:") dtb = raw_input("Enter Database Name:") table = raw_input("Enter Table Name:") conn = pymysql.connect(host=host, user=user, password=pwd, db=dtb, charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor) temp = etl.fromdb(conn, "SELECT * FROM " + table) tb = d2l(temp) elif ".csv" in file: tb = etl.fromcsv(file) elif ".xlsx" in file: tb = etl.fromxls(file) elif ".json" in file: tb = etl.fromjson(file) print tb elif ".xml" in file: f = open(file, 'r').read() options = optparse.Values({"pretty": True}) jsn = json.dumps(xml2json.xml2json(f, options)) ob = json.loads(jsn.decode('string-escape').strip('"')) temp = dict() for key in ob.keys(): for skey in ob[key].keys(): temp = json.dumps(ob[key][skey]) with open("temp.json", "w") as tmp: tmp.write(temp) tb = etl.fromjson("temp.json") print tb[0] #tb = etl.fromxml(file,'.//ROW',{'Service_Name':'Service_Name','Status':'Status','Service_Type':'Service_Type','Time':'Time'}) elif ".txt" in file: tb = etl.fromtext(file) print tb return tb
# print('%s %s' % (place['name'].encode(), place['location'].get('zip'))) if __name__ == '__main__': main() #####################SQL DB Read############################ import sqlite3 connection = sqlite3.connect('sqlite.db') table7 = etl.fromdb(connection, 'SELECT * FROM demo') print(table7) ####################EXCEL File Read######################## table8 = etl.fromxls('Financial Sample.xlsx') print(table8) ###################Get data from wiki#################### #Documentation- https://pypi.org/project/Wikipedia-API/ import wikipediaapi wiki_wiki = wikipediaapi.Wikipedia('en') page_py = wiki_wiki.page('Python_(programming_language)') print(page_py) print("Page - Summary: %s" % page_py.summary) ####################GoogleAnalytics data##########################