示例#1
0
    def run(self, driver, task, log):
        inp = task["source"]["file"]
        inp = compat.translate_unicode(inp)
        inp = "input/{}".format(inp)
        sheet = task["source"].get("sheet", None)
        use_view = task["source"].get("use_view", True)

        record_set = etl.fromxls(inp, sheet, use_view=use_view)
        if not etl.data(record_set).any():
            log.write("Task skipped. No rows on source")
        else:
            transform = TransformSubTask(task, log)
            record_set = transform.get_result(record_set)

            out = task["target"]["file"]
            out = compat.translate_unicode(out)
            out = "output/{}".format(out)
            separator = task["target"].get("delimiter", ";")
            separator = compat.translate_unicode(separator)
            enc = task["target"].get("encoding", "utf-8")

            task_log = "log/xls-csv_{}_{}.log".format(task["name"],
                                                      get_time_filename())
            with open(task_log, "w") as lg:
                if "truncate" in task["target"] and task["target"]["truncate"]:
                    record_set.progress(10000,
                                        out=lg).tocsv(out,
                                                      encoding=enc,
                                                      delimiter=separator)
                else:
                    record_set.progress(10000,
                                        out=lg).appendcsv(out,
                                                          encoding=enc,
                                                          delimiter=separator)
示例#2
0
 def test_integration():
     expect = (('foo', 'bar'), ('A', 1), ('B', 2), ('C', 2))
     f = NamedTemporaryFile(delete=False)
     f.close()
     etl.wrap(expect).toxls(f.name, 'Sheet1')
     actual = etl.fromxls(f.name, 'Sheet1')
     ieq(expect, actual)
     ieq(expect, actual)
示例#3
0
def import_ksh_settlements(xlsfilename, output_csv):
    (petl.fromxls(xlsfilename)
     # keep only the settlement column
     .cut(0)
     # replace header rows
     .skip(2).pushheader(('telepules', ))
     # skip empty row at the end
     .selecttrue(0).convert(0, 'lower').tocsv(output_csv, encoding='utf-8'))
示例#4
0
文件: test_xls.py 项目: DeanWay/petl
 def test_integration():
     expect = (('foo', 'bar'),
               ('A', 1),
               ('B', 2),
               ('C', 2))
     f = NamedTemporaryFile(delete=False)
     f.close()
     etl.wrap(expect).toxls(f.name, 'Sheet1')
     actual = etl.fromxls(f.name, 'Sheet1')
     ieq(expect, actual)
     ieq(expect, actual)
def etl(file):
    tb1 = list()
    import petl as etl
    import sqllite3
    if ".csv" in file:
    	tb1 = etl.fromcsv(file)
    elif ".json" in file:
    	tb1 = etl.fromjson(file)	
    elif ".xls" in file:
    	tb1 = etl.fromxls(file)
    elif ".xml" in file:
    	tb1 = etl.fromxml(file,"row")
示例#6
0
def process_animal_extended(shelter_id, session, input_directory):
    table = petl.fromxls(os.path.join(input_directory,
                                      'AnimalIntakeExtended.xls'),
                         sheet='AnimalIntakeExtended')

    ## Because an animal can appear in the intake report more than once,
    ## we must sort the table in order to upsert the latest value
    table_sorted = petl.sort(table, key='Intake Date/Time')

    for row in petl.dicts(table_sorted):
        id = row['Animal ID']

        set_values = {
            'arn': normalize_string(row['ARN']),
            'name': normalize_string(row['Animal Name']),
            'species': normalize_string(row['Species']),
            'primary_breed': normalize_string(row['Primary Breed']),
            'secondary_bred': normalize_string(row['Secondary Breed']),
            'gender': normalize_string(row['Gender']),
            'pre_altered': to_bool(row['Pre Altered']),
            'altered': to_bool(row['Altered']),
            'primary_color': normalize_string(row['Primary Colour']),
            'secondary_color': normalize_string(row['Secondary Colour']),
            'third_color': normalize_string(row['Third Colour']),
            'color_pattern': normalize_string(row['Colour Pattern']),
            'second_color_pattern':
            normalize_string(row['Second Colour Pattern']),
            'size': normalize_string(row['Size'])
        }

        insert_stmt = insert(Animal)\
            .values(
                id=id,
                shelter_id=shelter_id, ## TODO: add to index for constraint? make composite pk?
                **set_values)\
            .on_conflict_do_update(
                constraint='animals_pkey',
                set_={
                    'shelter_id': shelter_id,
                    **set_values,
                    'updated_at': func.now()
                })

        session.execute(insert_stmt)
        session.commit()
示例#7
0
def fromxlswithheader(filename, sheet, **kwargs):
    """
    Call `petl.fromcsv` and automatically parse metadata header if present.
    """
    # preliminary open for inspection
    rawtable = petl.fromxls(filename, sheet, header=None, **kwargs)
    metadata, header, data = parse_metadata_header(rawtable)

    # transfer data to a in-memory data buffer
    databuffer = petl.MemorySource()
    petl.tocsv(data, databuffer, write_header=True, encoding='utf-8')
    databuffer.s = databuffer.getvalue()

    # re-open with right headers and add metadata
    table = petl.fromcsv(databuffer, header=header)
    table.metadata = metadata

    return table
示例#8
0
def extract(file):
    tb = list()
    if file == "sql":
        host = raw_input("Enter Host:")
        user = raw_input("Enter Username:"******"Enter pwd:")
        dtb = raw_input("Enter Database Name:")
        table = raw_input("Enter Table Name:")
        conn = pymysql.connect(host=host,
                               user=user,
                               password=pwd,
                               db=dtb,
                               charset='utf8mb4',
                               cursorclass=pymysql.cursors.DictCursor)
        temp = etl.fromdb(conn, "SELECT * FROM " + table)
        tb = d2l(temp)
    elif ".csv" in file:
        tb = etl.fromcsv(file)
    elif ".xlsx" in file:
        tb = etl.fromxls(file)
    elif ".json" in file:
        tb = etl.fromjson(file)
        print tb
    elif ".xml" in file:
        f = open(file, 'r').read()
        options = optparse.Values({"pretty": True})
        jsn = json.dumps(xml2json.xml2json(f, options))
        ob = json.loads(jsn.decode('string-escape').strip('"'))
        temp = dict()
        for key in ob.keys():
            for skey in ob[key].keys():
                temp = json.dumps(ob[key][skey])
        with open("temp.json", "w") as tmp:
            tmp.write(temp)
        tb = etl.fromjson("temp.json")
        print tb[0]
        #tb = etl.fromxml(file,'.//ROW',{'Service_Name':'Service_Name','Status':'Status','Service_Type':'Service_Type','Time':'Time'})
    elif ".txt" in file:
        tb = etl.fromtext(file)
        print tb
    return tb
示例#9
0
    #  print('%s %s' % (place['name'].encode(), place['location'].get('zip')))


if __name__ == '__main__':
    main()

#####################SQL DB Read############################

import sqlite3
connection = sqlite3.connect('sqlite.db')
table7 = etl.fromdb(connection, 'SELECT * FROM demo')
print(table7)

####################EXCEL File Read########################

table8 = etl.fromxls('Financial Sample.xlsx')
print(table8)

###################Get data from wiki####################

#Documentation- https://pypi.org/project/Wikipedia-API/

import wikipediaapi
wiki_wiki = wikipediaapi.Wikipedia('en')

page_py = wiki_wiki.page('Python_(programming_language)')

print(page_py)
print("Page - Summary: %s" % page_py.summary)

####################GoogleAnalytics data##########################