def execute(self, context): try: nm_arq = 'CONSULTAS.csv' with open(f'{_PROC_FILES}/{nm_arq}', 'wb') as data_from: data_from.write( self.client_from.get_blob_client( nm_arq).download_blob().readall()) table = etl.fromcsv(f'{_PROC_FILES}/{nm_arq}', delimiter='|') table1 = etl.convert(table, { 'fk_servico': int, 'fk_operadora': int }) etl.tocsv(table1, f'{_PROC_FILES}/t{nm_arq}', delimiter='|') self.logger.info(f"Destino do arquivo {self.client_to}") self.logger.info(f"Container destino {self.container_to}") except azure.core.exceptions.ResourceNotFoundError: print('Entrou na exceção :)') upload_file = f'{_PROC_FILES}/t{nm_arq}' try: if os.path.isfile(upload_file): with open(upload_file, "rb") as data: self.client_to.upload_blob(nm_arq, data, overwrite=True) self.logger.info(f'{data} carregado') else: self.logger.info(f't{nm_arq} não foi encontrado no container') finally: self.logger.info('Tudo Carregado')
def test_tocsv_appendcsv_gz(): """Test the tocsv and appendcsv function.""" # exercise function table = (("foo", "bar"), ("a", 1), ("b", 2), ("c", 2)) f = NamedTemporaryFile(delete=False) fn = f.name + ".gz" f.close() tocsv(table, fn, delimiter="\t") # check what it did with gzip.open(fn, "rb") as o: actual = csv.reader(o, delimiter="\t") expect = [["foo", "bar"], ["a", "1"], ["b", "2"], ["c", "2"]] ieq(expect, actual) # check appending table2 = (("foo", "bar"), ("d", 7), ("e", 9), ("f", 1)) appendcsv(table2, fn, delimiter="\t") # check what it did with gzip.open(fn, "rb") as o: actual = csv.reader(o, delimiter="\t") expect = [["foo", "bar"], ["a", "1"], ["b", "2"], ["c", "2"], ["d", "7"], ["e", "9"], ["f", "1"]] ieq(expect, actual)
def tocsvwithheader(table, source, **kwargs): """ Use `petl.tocsv` to write CSV data in `table` to file `source`, including key-value metadata header if passed in as the keyword argument `metadata`. The first row in `table` is assumed to contain the header columns. """ metadata = kwargs.pop("metadata", {}) kwargs.pop("write_header", None) # make sure write_header not in kwargs # prepare header header = petl.header(table) # prepare metadata rows using #-prefix, and :-suffix for keys metadata_rows = [] for key, value in metadata.items(): metadata_row = [''] * len(header) metadata_row[0] = '#' + str(key) + ':' metadata_row[1] = str(value) metadata_rows.append(metadata_row) # prepare data (stripped of header) data = petl.data(table) # combine metadata + header + data the write out combined = metadata_rows + [header] + list(data) petl.tocsv(combined, source, write_header=True, **kwargs)
def test_stringsource(): table1 = (('foo', 'bar'), ('a', '1'), ('b', '2'), ('c', '2')) # test writing to a string buffer ss = StringSource() etl.tocsv(table1, ss) expect = "foo,bar\r\na,1\r\nb,2\r\nc,2\r\n" if not PY2: expect = expect.encode('ascii') actual = ss.getvalue() eq_(expect, actual) # test reading from a string buffer table2 = etl.fromcsv(StringSource(actual)) ieq(table1, table2) ieq(table1, table2) # test appending etl.appendcsv(table1, ss) actual = ss.getvalue() expect = "foo,bar\r\na,1\r\nb,2\r\nc,2\r\na,1\r\nb,2\r\nc,2\r\n" if not PY2: expect = expect.encode('ascii') eq_(expect, actual)
def test_stringsource(): tbl1 = (('foo', 'bar'), ('a', '1'), ('b', '2'), ('c', '2')) # test writing to a string buffer ss = StringSource() etl.tocsv(tbl1, ss) expect = "foo,bar\r\na,1\r\nb,2\r\nc,2\r\n" if not PY2: expect = expect.encode('ascii') actual = ss.getvalue() eq_(expect, actual) # test reading from a string buffer tbl2 = etl.fromcsv(StringSource(actual)) ieq(tbl1, tbl2) ieq(tbl1, tbl2) # test appending etl.appendcsv(tbl1, ss) actual = ss.getvalue() expect = "foo,bar\r\na,1\r\nb,2\r\nc,2\r\na,1\r\nb,2\r\nc,2\r\n" if not PY2: expect = expect.encode('ascii') eq_(expect, actual)
def download_new_collection(cls) -> None: # store small dictionary for later on transofrmation planets_arr = {} for planets in SWAPI.fetch_data(settings.SW_PLANETS_URL): planets_arr.update({i['url']: i['name'] for i in planets}) create = True file_name = '{}.csv'.format(time()) csv_path = Path(CSV_PATH, file_name) for people in SWAPI.fetch_data(settings.SW_PEOPLE_URL): table = etl.fromdicts( people, header=[ 'name', 'height', 'mass', 'hair_color', 'skin_color', 'eye_color', 'birth_year', 'gender', 'homeworld', 'edited' ]).convert('edited', lambda v: v[0:10]).convert( 'homeworld', lambda v: planets_arr.get(v, '')).rename('edited', 'date') if create: etl.tocsv(table, source=csv_path, write_header=True) create = False else: etl.appendcsv(table, source=csv_path) c = SWPeopleCollection() c.file.name = file_name c.save()
def to_csv(self, local_path=None, temp_file_compression=None, encoding=None, errors='strict', write_header=True, csv_name=None, **csvargs): """ Outputs table to a CSV. Additional key word arguments are passed to ``csv.writer()``. So, e.g., to override the delimiter from the default CSV dialect, provide the delimiter keyword argument. .. warning:: If a file already exists at the given location, it will be overwritten. `Args:` local_path: str The path to write the csv locally. If it ends in ".gz" or ".zip", the file will be compressed. If not specified, a temporary file will be created and returned, and that file will be removed automatically when the script is done running. temp_file_compression: str If a temp file is requested (ie. no ``local_path`` is specified), the compression type for that file. Currently "None", "gzip" or "zip" are supported. If a ``local_path`` is specified, this argument is ignored. encoding: str The CSV encoding type for `csv.writer() <https://docs.python.org/2/library/csv.html#csv.writer/>`_ errors: str Raise an Error if encountered write_header: boolean Include header in output csv_name: str If ``zip`` compression (either specified or inferred), the name of csv file within the archive. \**csvargs: kwargs ``csv_writer`` optional arguments `Returns:` str The path of the new file """ # noqa: W605 # If a zip archive. if files.zip_check(local_path, temp_file_compression): return self.to_zip_csv(archive_path=local_path, encoding=encoding, errors=errors, write_header=write_header, csv_name=csv_name, **csvargs) if not local_path: suffix = '.csv' + files.suffix_for_compression_type(temp_file_compression) local_path = files.create_temp_file(suffix=suffix) # Create normal csv/.gzip petl.tocsv(self.table, source=local_path, encoding=encoding, errors=errors, write_header=write_header, **csvargs) return local_path
def test_append_to_file_for_csv(user_export_file, tmpdir, media_root): # given export_data = [ {"id": "123", "name": "test1", "collections": "coll1"}, {"id": "345", "name": "test2"}, ] headers = ["id", "name", "collections"] delimiter = ";" file_name = "test.csv" table = etl.fromdicts([{"id": "1", "name": "A"}], header=headers, missing=" ") with NamedTemporaryFile() as temp_file: etl.tocsv(table, temp_file.name, delimiter=delimiter) user_export_file.content_file.save(file_name, temp_file) # when append_to_file(export_data, headers, user_export_file, FileTypes.CSV, delimiter) # then user_export_file.refresh_from_db() csv_file = user_export_file.content_file file_content = csv_file.read().decode().split("\r\n") assert ";".join(headers) in file_content assert ";".join(export_data[0].values()) in file_content assert (";".join(export_data[1].values()) + "; ") in file_content shutil.rmtree(tmpdir)
def test_tocsv_appendcsv_gz(): """Test the tocsv and appendcsv function.""" # exercise function table = (('foo', 'bar'), ('a', 1), ('b', 2), ('c', 2)) f = NamedTemporaryFile(delete=False) fn = f.name + '.gz' f.close() tocsv(table, fn, delimiter='\t') # check what it did o = gzip.open(fn, 'rb') try: actual = csv.reader(o, delimiter='\t') expect = [['foo', 'bar'], ['a', '1'], ['b', '2'], ['c', '2']] ieq(expect, actual) finally: o.close() # check appending table2 = (('foo', 'bar'), ('d', 7), ('e', 9), ('f', 1)) appendcsv(table2, fn, delimiter='\t') # check what it did o = gzip.open(fn, 'rb') try: actual = csv.reader(o, delimiter='\t') expect = [['foo', 'bar'], ['a', '1'], ['b', '2'], ['c', '2'], ['d', '7'], ['e', '9'], ['f', '1']] ieq(expect, actual) finally: o.close()
def DataIntegration(clinics_LOC, Services_LOC, Location_LOC): # Reading the clinics.csv file fileData = pt.fromcsv(clinics_LOC) # Reading the clinic_services.csv file servicesData = pt.fromcsv(Services_LOC) # reading the xml file cliniclocations.xml locationXML = pt.fromxml(Location_LOC, 'clinic', { "ClinicID": "ClinicID", "Lat": "Lat", "Lon": "Lon" }) # join the csv file's using the inbuilt function join using ClinicID as main key fileJoin = pt.join(servicesData, fileData, key="ClinicID") # join the csv file using the inbuilt function join using ClinicID as main key MainJoin = pt.join(fileJoin, locationXML, key="ClinicID") # acquire the required columns result = pt.cut(MainJoin, 'ClinicServiceID', 'Service', 'ClinicID', 'Suburb', 'Postcode', 'Lat', 'Lon') # creating the final csv file which is clinicservicelocations.csv pt.tocsv(result, "clinic_service_locations.csv") print('Csv file generated.!!!')
def export(data, output_file, source, csv_arg, errors, write_header, append): """Export the specified table of data to a csv file.""" existing_data = data.get(source) if append is True: petl.appendcsv(existing_data, output_file, errors=errors, **dict(csv_arg)) else: petl.tocsv(existing_data, output_file, errors=errors, write_header=write_header, **dict(csv_arg))
def test_export_gift_cards_in_batches_to_csv( gift_card, gift_card_expiry_date, gift_card_used, tmpdir, ): # given gift_cards = GiftCard.objects.exclude(id=gift_card_used.id).order_by("pk") table = etl.wrap([["code"]]) temp_file = NamedTemporaryFile() etl.tocsv(table, temp_file.name, delimiter=",") # when export_gift_cards_in_batches( gift_cards, ["code"], ",", temp_file, "csv", ) # then file_content = temp_file.read().decode().split("\r\n") # ensure headers are in the file assert "code" in file_content for card in gift_cards: assert card.code in file_content shutil.rmtree(tmpdir)
def main(argv): global full_name2sk_indiv_id parser = argparse.ArgumentParser() parser.add_argument("--attendance-filename", required=True, nargs='+', action='append', \ help="Attendance filename (input Servant Keeper attendance report file(s)...can be wildcard)") parser.add_argument("--mapping-filename", required=True, help="'Mapping' filename (CSV mapping file with " \ "'Last Name', 'Preferred Name' and 'Individual ID' Servant Keeper data columns)") parser.add_argument("--output-filename", required=True, help="'Output' filename (output loading CSV file " \ "containing resulting <date>, <time>, <ccb_event_id>, <sk_indiv_id> data)") parser.add_argument('--emit-data-csvs', action='store_true', help="If specified, output a CSV file per input " \ "attendance data text file") parser.add_argument('--add-extra-fields', action='store_true', help="If specified, emit attender's full name, " \ "event name, and Servant Keeper week number in addition to base fields into loading CSV file") args = parser.parse_args() # Load up mapping matrix to map from Servant Keeper full_name's to Servant Keeper individual_id's full_name2sk_indiv_id = {} with open(args.mapping_filename, 'rb') as csvfile: csvreader = csv.reader(csvfile) for row in csvreader: full_name2sk_indiv_id[row[0] + ', ' + row[1]] = row[2] if args.emit_data_csvs: output_csv_filebase = os.path.dirname(args.output_filename) else: output_csv_filebase = None attendance_table = join_tables(args.attendance_filename[0], output_csv_filebase, args.add_extra_fields) petl.tocsv(attendance_table, args.output_filename)
def test_export_products_in_batches_for_csv( product_list, user_export_file, tmpdir, media_root, ): # given qs = Product.objects.all() export_info = { "fields": [ ProductFieldEnum.NAME.value, ProductFieldEnum.DESCRIPTION.value, ProductFieldEnum.VARIANT_SKU.value, ], "warehouses": [], "attributes": [], "channels": [], } export_fields = ["id", "name", "variants__sku"] expected_headers = ["id", "name", "variant sku"] table = etl.wrap([expected_headers]) temp_file = NamedTemporaryFile() etl.tocsv(table, temp_file.name, delimiter=";") # when export_products_in_batches( qs, export_info, set(export_fields), export_fields, ";", temp_file, FileTypes.CSV, ) # then expected_data = [] for product in qs.order_by("pk"): product_data = [] id = graphene.Node.to_global_id("Product", product.pk) product_data.append(id) product_data.append(product.name) for variant in product.variants.all(): product_data.append(str(variant.sku)) expected_data.append(product_data) file_content = temp_file.read().decode().split("\r\n") # ensure headers are in file assert ";".join(expected_headers) in file_content for row in expected_data: assert ";".join(row) in file_content shutil.rmtree(tmpdir)
def test_stdoutsource_unicode(): tbl = [('foo', 'bar'), (u'Արամ Խաչատրյան', 1), (u'Johann Strauß', 2)] etl.tocsv(tbl, StdoutSource(), encoding='utf-8') etl.tohtml(tbl, StdoutSource(), encoding='utf-8') etl.topickle(tbl, StdoutSource())
def createDimCampaign(): try: tbl_campaign = [['campaign_name', 'campaign_started', 'campaign_ended'], ['none', '2014-04-28T00:00:00', '2018-09-30T00:00:00']] dim_campaign = etl.head(tbl_campaign, 1) # Export as csv to load folder etl.tocsv(dim_campaign, 'load/dim_campaign.csv') except Exception as e: print("Something went wrong. Error {0}".format(e))
def get_load_result(nameFile): table1 = etl.fromjson('./static/data/tabalaElegidaCalculadora.json') tocsv(table1, './exelFiles/' + str(nameFile) + '.csv') etl.tohtml(table1, './exelFiles/' + str(nameFile) + '.html', caption=str(nameFile)) return jsonify(True)
def createDimSubscriptions(events): try: dim_subscriptions_cut = etl.cut(events, 'type') dim_subscriptions_rename = etl.rename(dim_subscriptions_cut, {'type': 'subscription_name'}) dim_subscriptions = etl.distinct(dim_subscriptions_rename) # Export as csv to load folder etl.tocsv(dim_subscriptions, 'load/dim_subscriptions.csv') except Exception as e: print("Something went wrong. Error {0}".format(e))
def createDimMedium(events): try: dim_medium_cut = etl.cut(events, 'utm_medium') dim_medium_rename = etl.rename(dim_medium_cut, {'utm_medium': 'medium'}) dim_medium = etl.distinct(dim_medium_rename) # Export as csv to load folder etl.tocsv(dim_medium, 'load/dim_medium.csv') except Exception as e: print("Something went wrong. Error {0}".format(e))
def save_data_to_csv(data, record_count): db_meta = dict() transformed_data = transform_data(data) csv_filename = f'{datetime.now().ctime()}.csv' csv_file = CSV_DIR + csv_filename db_meta['name'] = csv_filename db_meta['count'] = record_count petl.tocsv(transformed_data, csv_file) add_metadata(db_meta)
def createDimCampaignType(events): try: dim_campaigntype_cut = etl.cut(events, 'utm_campaign') dim_campaigntype_rename = etl.rename(dim_campaigntype_cut, {'utm_campaign': 'campaign_type'}) dim_campaigntype = etl.distinct(dim_campaigntype_rename) # export as csv to load folder etl.tocsv(dim_campaigntype, 'load/dim_campaigntype.csv') except Exception as e: print("Something went wrong. Error {0}".format(e))
def test_export_products_in_batches_for_csv( product_list, user_export_file, tmpdir, media_root, ): # given qs = Product.objects.all() export_info = { "fields": [ProductFieldEnum.NAME.value, ProductFieldEnum.VARIANT_SKU.value], "warehouses": [], "attributes": [], } file_name = "test.csv" export_fields = ["id", "name", "variants__sku"] expected_headers = ["id", "name", "variant sku"] table = etl.wrap([expected_headers]) with NamedTemporaryFile() as temp_file: etl.tocsv(table, temp_file.name, delimiter=";") user_export_file.content_file.save(file_name, temp_file) assert user_export_file.content_file # when export_products_in_batches( qs, export_info, set(export_fields), export_fields, ";", user_export_file, FileTypes.CSV, ) # then user_export_file.refresh_from_db() csv_file = user_export_file.content_file assert csv_file expected_data = [] for product in qs.order_by("pk"): product_data = [] product_data.append(str(product.pk)) product_data.append(product.name) for variant in product.variants.all(): product_data.append(str(variant.sku)) expected_data.append(product_data) file_content = csv_file.read().decode().split("\r\n") # ensure headers are in file assert ";".join(expected_headers) in file_content for row in expected_data: assert ";".join(row) in file_content shutil.rmtree(tmpdir)
def execute(self, context): try: nm_arq = 'RECEITAS.csv' with open(f'{_PROC_FILES}/{nm_arq}', 'wb') as data_from: data_from.write( self.client_from.get_blob_client( nm_arq).download_blob().readall()) table = etl.fromcsv(f'{_PROC_FILES}/{nm_arq}', delimiter='|') def rowmapper(row): strnull = {'NULL': ''} return [ row[0].strip(), row[1].strip(), row[2].strip(), row[3].strip(), row[4].strip(), strnull[row['nr_centro_custo'].strip()] if row['nr_centro_custo'].strip() in strnull else row['nr_centro_custo'].strip(), strnull[row['descricao_centro_custo'].strip()] if row['descricao_centro_custo'].strip() in strnull else row['descricao_centro_custo'].strip(), row[7].strip(), row[8].strip(), row[9].strip(), strnull[row['tipo_cobranca_sub'].strip()] if row['tipo_cobranca_sub'].strip() in strnull else row['tipo_cobranca_sub'].strip() ] table1 = etl.rowmap(table, rowmapper, header=[ 'mes_competencia', 'fk_beneficiario', 'fk_empresa', 'dt_geracao_titulo', 'dt_pgto', 'nr_centro_custo', 'descricao_centro_custo', 'tipo_cobranca', 'vl_cobranca', 'vl_pago', 'tipo_cobranca_sub' ]) table2 = etl.addfields(table1, [('fk_operadora', _SOURCE)]) etl.tocsv(table2, f'{_PROC_FILES}/t{nm_arq}', delimiter='|') self.logger.info(f"Destino do arquivo {self.client_to}") self.logger.info(f"Container destino {self.container_to}") except azure.core.exceptions.ResourceNotFoundError: print('Entrou na exceção :)') upload_file = f'{_PROC_FILES}/t{nm_arq}' try: if os.path.isfile(upload_file): with open(upload_file, "rb") as data: self.client_to.upload_blob(nm_arq, data, overwrite=True) self.logger.info(f'{data} carregado') else: self.logger.info(f't{nm_arq} não foi encontrado no container') finally: self.logger.info('Tudo Carregado')
def setup(self): # write headers to CSV file etl.tocsv([self.csv_file_columns], self.csv_file_name) # create collection in database self.person_collection = PersonCollection( file_name=self.file_name, date=datetime.datetime.now(datetime.timezone.utc), ) self.person_collection.save()
def full_export(data, output_dir, csv_arg, errors, write_header, exclude_empty): """Export all data tables as CSV files.""" if not os.path.exists(output_dir): os.mkdir(output_dir) for name in data.registry: current_data = data.get(name) if exclude_empty and current_data.nrows() <= 0: continue output_file = os.path.join(output_dir, data.filename(name, 'csv')) petl.tocsv(current_data, output_file, errors=errors, write_header=write_header, **dict(csv_arg))
def create_file_with_headers(file_headers: List[str], delimiter: str, file_type: str): table = etl.wrap([file_headers]) if file_type == FileTypes.CSV: temp_file = NamedTemporaryFile("ab+", suffix=".csv") etl.tocsv(table, temp_file.name, delimiter=delimiter) else: temp_file = NamedTemporaryFile("ab+", suffix=".xlsx") etl.io.xlsx.toxlsx(table, temp_file.name) return temp_file
def run_backup(sqlite_db, backup_path): """backs-up each table in the inventory database to a csv, zips them all up, and saves the zip with a timestamp-derived name. """ ts = timestamp() # SET UP THE FOLDERS ----------------------------------------------------- #check for backup folder, make if it doesn't exist if not os.path.exists(backup_path): os.makedirs(backup_path) #make a folder for this backup this_backup_path = os.path.join(backup_path, "backup_{0}".format(ts)) if not os.path.exists(this_backup_path): os.makedirs(this_backup_path) click.echo(this_backup_path) # GET THE DATA OUT ------------------------------------------------------- # temporarily store extracted csv files. (use this to delete them later) csvs = [] # connect to the DB, get each table, save out as a csv. conn = sqlite3.connect(sqlite_db) for table in [ 'product', 'product_tags', 'sale', 'staff', 'supplier', 'tag' ]: t = etl.fromdb(lambda: conn.cursor(), """SELECT * FROM {0}""".format(table)) out_csv = os.path.join(this_backup_path, '{0}.csv'.format(table)) etl.tocsv(t, out_csv) csvs.append(out_csv) # ZIP THE DATA UP -------------------------------------------------------- # make a zip file in the main backup location zipfile_directory = os.path.join(backup_path, "inventory_backup_{0}.zip".format(ts)) # create a zip file object zf = zipfile.ZipFile(zipfile_directory, mode="w") for each in csvs: click.echo(each) zf.write(filename=each, arcname=os.path.basename(each), compress_type=compression) zf.close() # REMOVE TEMP FILES ------------------------------------------------------- for each in csvs: os.remove(each) os.rmdir(this_backup_path)
def run(argv=None): parser = argparse.ArgumentParser() parser.add_argument("--filename", type=str, help='Input filename', required=True) # parser.add_argument("--max_iterations", type=int, help='Max number of requests', default=1000) known_args, _ = parser.parse_known_args() file_content = [line for line in read_file(known_args.filename)] table = petl.fromdicts(file_content) tokenized_table = tokenize(table) petl.tocsv(tokenized_table, 'words.csv')
def execute(self, context): try: nm_arq = 'SERVICOS.csv' with open(f'{_PROC_FILES}/{nm_arq}', 'wb') as data_from: data_from.write( self.client_from.get_blob_client( nm_arq).download_blob().readall()) table = etl.fromcsv(f'{_PROC_FILES}/{nm_arq}', delimiter='|') def rowmapper(row): strnull = {'NULL': ''} return [ row[0].strip(), row[1].strip(), row[2].strip(), row[3].strip(), row[4].strip(), strnull[row['subgrupo'].strip()] if row['subgrupo'].strip() in strnull else row['subgrupo'].strip(), row[6].strip(), strnull[row['dt_alteracao'].strip()] if row['dt_alteracao'].strip() in strnull else row['dt_alteracao'].strip(), strnull[row['ind_cirurgico'].strip()] if row['ind_cirurgico'].strip() in strnull else row['ind_cirurgico'].strip() ] table1 = etl.rowmap(table, rowmapper, header=[ 'pk_servico', 'tipo', 'descricao', 'capitulo', 'grupo', 'subgrupo', 'dt_inclusao', 'dt_alteracao', 'ind_cirurgico' ]) table2 = etl.addfields(table1, [('fk_operadora', _SOURCE)]) etl.tocsv(table2, f'{_PROC_FILES}/t{nm_arq}', delimiter='|') self.logger.info(f"Destino do arquivo {self.client_to}") self.logger.info(f"Container destino {self.container_to}") except azure.core.exceptions.ResourceNotFoundError: print('Entrou na exceção :)') upload_file = f'{_PROC_FILES}/t{nm_arq}' try: if os.path.isfile(upload_file): with open(upload_file, "rb") as data: self.client_to.upload_blob(nm_arq, data, overwrite=True) self.logger.info(f'{data} carregado') else: self.logger.info(f't{nm_arq} não foi encontrado no container') finally: self.logger.info('Tudo Carregado')
def save_as_csv(data: List[Dict], file_path: str) -> None: """ Saves a CSV to a filesystem. """ csv_header = data[0].keys() csv_table = [csv_header] for row in data: csv_row = [row[row_name] for row_name in csv_header] csv_table.append(csv_row) petl.tocsv(csv_table, file_path)
def test_issue_231(): table = [['foo', 'bar'], ['a', '1'], ['b', '2']] t = cut(table, 'foo') totsv(t, 'tmp/issue_231.tsv') u = fromtsv('tmp/issue_231.tsv') ieq(t, u) tocsv(t, 'tmp/issue_231.csv') u = fromcsv('tmp/issue_231.csv') ieq(t, u) topickle(t, 'tmp/issue_231.pickle') u = frompickle('tmp/issue_231.pickle') ieq(t, u)
def transform_xls(hires_and_promotions_excel, separations_excel, exempt_roster_excel, output_file): hires_and_promotions = petl.io.xls \ .fromxls(hires_and_promotions_excel, sheet='Data') \ .rename(column_map_shared) separations = petl.io.xls \ .fromxls(separations_excel, sheet='Data') \ .rename({**column_map_shared, **column_map_separations}) def dedup_separations(payroll_number, rows): rows_sorted = sorted(rows, key=lambda x: x['termination_date']) return rows_sorted[-1] separations_deduped = petl.rowreduce(separations, 'payroll_number', dedup_separations) exempt_roster = petl.io.xls \ .fromxls(exempt_roster_excel, sheet='Data') \ .rename(column_map_roster) merged = petl.mergesort(hires_and_promotions, separations_deduped, exempt_roster, key='payroll_number') def dedup_merged(payroll_number, rows): rows_sorted = sorted(rows, key=lambda x: x['latest_start_date']) if len(rows_sorted) == 1: return rows_sorted[-1] merged_row = [] for i in range(0, len(rows_sorted[0]) - 1): if (rows_sorted[0][i] == '' or rows_sorted[0][i] == None ) and rows_sorted[1][i] != '' and rows_sorted[1][i] != None: merged_row.append(rows_sorted[1][i]) elif (rows_sorted[1][i] == '' or rows_sorted[1][i] == None ) and rows_sorted[0][i] != '' and rows_sorted[0][i] != None: merged_row.append(rows_sorted[0][i]) elif rows_sorted[0][i] == rows_sorted[1][i]: merged_row.append(rows_sorted[0][i]) else: merged_row.append( rows_sorted[1][i]) ## take latest value by start date return merged_row merged_deduped = petl.rowreduce(merged, 'payroll_number', dedup_merged) petl.tocsv(merged_deduped, source=output_file)
def main(argv): global full_name2sk_indiv_id parser = argparse.ArgumentParser() parser.add_argument( "--attendance-filename", required=True, nargs="+", action="append", help="Attendance filename (input Servant Keeper attendance report file(s)...can be wildcard)", ) parser.add_argument( "--mapping-filename", required=True, help="'Mapping' filename (CSV mapping file with " "'Last Name', 'Preferred Name' and 'Individual ID' Servant Keeper data columns)", ) parser.add_argument( "--output-filename", required=True, help="'Output' filename (output loading CSV file " "containing resulting <date>, <time>, <ccb_event_id>, <sk_indiv_id> data)", ) parser.add_argument( "--emit-data-csvs", action="store_true", help="If specified, output a CSV file per input " "attendance data text file", ) parser.add_argument( "--add-extra-fields", action="store_true", help="If specified, emit attender's full name, " "event name, and Servant Keeper week number in addition to base fields into loading CSV file", ) args = parser.parse_args() # Load up mapping matrix to map from Servant Keeper full_name's to Servant Keeper individual_id's full_name2sk_indiv_id = {} with open(args.mapping_filename, "rb") as csvfile: csvreader = csv.reader(csvfile) for row in csvreader: full_name2sk_indiv_id[row[0] + ", " + row[1]] = row[2] if args.emit_data_csvs: output_csv_filebase = os.path.dirname(args.output_filename) else: output_csv_filebase = None attendance_table = join_tables(args.attendance_filename[0], output_csv_filebase, args.add_extra_fields) petl.tocsv(attendance_table, args.output_filename)
def load(tables_by_id, output_folder, devices): for device_id in tables_by_id: name = valid_name(devices[device_id]['name']) tbl_device_file = path.join(output_folder, f"{name}.csv") if path.isfile(tbl_device_file): tbl_old = petl.fromcsv(tbl_device_file, delimiter=';') old_header = petl.header(tbl_old) new_header = petl.header(tables_by_id[device_id]) if old_header == new_header: petl.appendcsv(tables_by_id[device_id], source=tbl_device_file, delimiter=';') else: # TODO: write to the new file raise ValueError(f"Incompatible headers:\n old={old_header}\n new={new_header}") else: petl.tocsv(tables_by_id[device_id], tbl_device_file, delimiter=';')
def test_gzipsource(): # setup tbl = [('foo', 'bar'), ('a', '1'), ('b', '2')] fn = NamedTemporaryFile().name + '.gz' expect = b"foo,bar\na,1\nb,2\n" # write explicit etl.tocsv(tbl, GzipSource(fn), lineterminator='\n') actual = gzip.open(fn).read() eq_(expect, actual) # write implicit etl.tocsv(tbl, fn, lineterminator='\n') actual = gzip.open(fn).read() eq_(expect, actual) # read explicit tbl2 = etl.fromcsv(GzipSource(fn)) ieq(tbl, tbl2) # read implicit tbl2 = etl.fromcsv(fn) ieq(tbl, tbl2)
def test_tocsv_appendcsv(): """Test the tocsv and appendcsv function.""" # exercise function table = (('foo', 'bar'), ('a', 1), ('b', 2), ('c', 2)) f = NamedTemporaryFile(delete=False) tocsv(table, f.name, delimiter='\t') # check what it did with open(f.name, 'rb') as o: actual = csv.reader(o, delimiter='\t') expect = [['foo', 'bar'], ['a', '1'], ['b', '2'], ['c', '2']] ieq(expect, actual) # check appending table2 = (('foo', 'bar'), ('d', 7), ('e', 9), ('f', 1)) appendcsv(table2, f.name, delimiter='\t') # check what it did with open(f.name, 'rb') as o: actual = csv.reader(o, delimiter='\t') expect = [['foo', 'bar'], ['a', '1'], ['b', '2'], ['c', '2'], ['d', '7'], ['e', '9'], ['f', '1']] ieq(expect, actual)
def test_StringSource(): table1 = (('foo', 'bar'), ('a', '1'), ('b', '2'), ('c', '2')) # test writing to a string buffer ss = StringSource() tocsv(table1, ss) expect = "foo,bar\r\na,1\r\nb,2\r\nc,2\r\n" actual = ss.getvalue() eq_(expect, actual) # test reading from a string buffer table2 = fromcsv(StringSource(actual)) ieq(table1, table2) ieq(table1, table2) # test appending appendcsv(table1, ss) actual = ss.getvalue() expect = "foo,bar\r\na,1\r\nb,2\r\nc,2\r\na,1\r\nb,2\r\nc,2\r\n" eq_(expect, actual)
error = [e,'',''] query_errors[url] = error except JSONDecodeError as e: error = [e, r.raw.data, r.raw.read(100)] query_errors[url] = error read_conn = psycopg2.connect("dbname=ais_engine user=ais_engine") address_count = etl.fromdb(read_conn, 'select count(*) as N from {}'.format(warmup_address_table_name)) n = list(address_count.values('n'))[0] warmup_rows = etl.fromdb(read_conn, 'select {address_field} from {table} OFFSET floor(random()*{n}) limit {limit}'.format(address_field=warmup_address_field, table=warmup_address_table_name, n=n, limit=warmup_row_limit)) # print(etl.look(warmup_rows)) responses = warmup_rows.addfield('response_status', (lambda a: query_address(a['street_address']))).progress(100) # print(etl.look(responses)) eval = responses.aggregate('response_status', len) print(etl.look(eval)) f_200 = [(count/warmup_row_limit) for status, count in eval[1:] if status == 200][0] print(f_200) ########################### # WRITE ERRORS OUT TO FILE # ############################ print("Writing errors to file...") error_table = [] for url, error_vals in query_errors.items(): error_table.append([url, error_vals[0], error_vals[1]]) etl.tocsv(error_table, error_file) exit(0) if f_200 > warmup_fraction_success else exit(1)
attr = data['attibutes'][x]['attrName'] rules = data['attibutes'][x]['rules'] rulesListSize = len(rules) for y in range(rulesListSize): if rules[y] == "Remove Null Value Rows": cleansedTable = etl.select(cleansedTable, attr, lambda v: v != '') if rules[y] == "Remove Duplicates": cleansedTable = etl.aggregate(cleansedTable, attr) if rules[y] == "Sort": cleansedTable = etl.mergesort(cleansedTable, key=attr) if rules[y] == "Number Validation": cleansedTable = etl.select(cleansedTable, attr) if rules[y] == "Fill Missing Values": cleansedTable = etl.filldown(cleansedTable, attr) etl.tocsv(cleansedTable,'src/etl/outputs/cleansed.csv') #Create rawData Table dataTable = cleansedTable rawDataTable = cleansedTable reasonUniqueValues = etl.aggregate(dataTable,dataTable[0][20]) mappings = OrderedDict() #mapping attributes # go through each column (c = table) for i in range(length): #get unique values for each column uniqueValues = etl.aggregate(dataTable,dataTable[0][i]) #create unique value for each column
#!/usr/bin/env python import methods_raw_input import preprocessing from petl import tocsv # Main code for sentiment classifier #Setting Parameters data_filename = "Tweets.csv" p_train_data = 0.7 split_mode = 'normal' train_data, test_data = methods_raw_input.really_read_filelines(data_filename, p_train_data, split_mode) train_data = preprocessing.tokenise_data(train_data) train_data = preprocessing.word_frequency(train_data, 'tweet_tokenized') tocsv(train_data,'New_table.csv')
attr = data['attibutes'][x]['attrName'] rules = data['attibutes'][x]['rules'] rulesListSize = len(rules) for y in range(rulesListSize): if rules[y] == "Remove Null Value Rows": cleansedTable = etl.select(cleansedTable, attr, lambda v: v != '') if rules[y] == "Remove Duplicates": cleansedTable = etl.aggregate(cleansedTable, attr) if rules[y] == "Sort": cleansedTable = etl.mergesort(cleansedTable, key=attr) if rules[y] == "Number Validation": cleansedTable = etl.select(cleansedTable, attr) if rules[y] == "Fill Missing Values": cleansedTable = etl.filldown(cleansedTable, attr) etl.tocsv(cleansedTable,'src/etl/outputs/cleansed.csv') #Create rawData Table dataTable = cleansedTable rawDataTable = cleansedTable mappings = OrderedDict() #mapping attributes #go through each column (c = table) for i in range(length): #get unique values for each column uniqueValues = etl.aggregate(dataTable,dataTable[0][i]) #create unique value for each column uniqueValArr = [] k = 0 for iterating_var in uniqueValues:
# a table and field name can also be provided as arguments look(table1) table2 = unflatten(table1, 'lines', 3) look(table2) # tocsv table = [['foo', 'bar'], ['a', 1], ['b', 2], ['c', 2]] from petl import tocsv, look look(table) tocsv(table, 'test.csv', delimiter='\t') # look what it did from petl import fromcsv look(fromcsv('test.csv', delimiter='\t')) # appendcsv table = [['foo', 'bar'], ['d', 7], ['e', 42], ['f', 12]] # look at an existing CSV file from petl import look, fromcsv testcsv = fromcsv('test.csv', delimiter='\t')
def main(argv): urls = { 'INDIVIDUALS': { 'xmlroot': 'response/individuals/individual', 'parse_dict': { 'Family ID': ('family', 'id'), 'Individual ID': ('.', 'id'), 'Family Position': 'family_position', 'Prefix': 'salutation', 'First Name': 'first_name', 'Middle Name': 'middle_name', 'Last Name': 'last_name', 'Legal Name': 'legal_first_name', 'Legal Name': 'legal_first_name', 'Active': 'active', 'Campus': 'campus', 'Email': 'email', 'Mailing Street': ".//address[@type='mailing']/street_address", 'Mailing City': ".//address[@type='mailing']/city", 'Mailing State': ".//address[@type='mailing']/state", 'Mailing Postal Code': ".//address[@type='mailing']/zip", 'Mailing Country': ".//address[@type='mailing']/country", 'Home Street': ".//address[@type='home']/street_address", 'Home City': ".//address[@type='home']/city", 'Home State': ".//address[@type='home']/state", 'Home Postal Code': ".//address[@type='home']/zip", 'Home Country': ".//address[@type='home']/country", 'Other Street': ".//address[@type='other']/street_address", 'Other City': ".//address[@type='other']/city", 'Other State': ".//address[@type='other']/state", 'Other Postal Code': ".//address[@type='other']/zip", 'Other Country': ".//address[@type='other']/country", 'Contact Phone': ".//phone[@type='contact']", 'Home Phone': ".//phone[@type='home']", 'Work Phone': ".//phone[@type='work']", 'Mobile Phone': ".//phone[@type='mobile']", 'Emergency Phone': ".//phone[@type='emergency']", 'Birthday': 'birthday', 'Anniversary': 'anniversary', 'Gender': 'gender', 'Giving Number': 'giving_number', 'Marital Status': 'marital_status', 'Membership Start Date': 'membership_date', 'Membership End Date': 'membership_end', 'Membership Type': 'membership_type', 'Baptized': 'baptized', # 'School District': ??, # 'How They Heard': ??, # 'How They Joined': ??, # 'Reason Left Church': ??, # 'Job Title': ??, 'Deceased': 'deceased', # !!! 'Baptism Date': ".//user_defined_date_fields/user_defined_date_field[label='Baptism Date']/date", 'Baptized By': ".//user_defined_text_fields/user_defined_text_field[label='Baptized By']/text", 'Confirmed Date': ".//user_defined_date_fields/user_defined_date_field[label='Confirmed Date']/date", 'Confirmed': ".//user_defined_pulldown_fields/user_defined_pulldown_field[label='Confirmed']/selection", 'Mailbox Number': ".//user_defined_text_fields/user_defined_text_field[label='Mailbox Number']/text", 'Spirit Mailing': ".//user_defined_pulldown_fields/user_defined_pulldown_field[label='Spirit Mailing']/selection", 'Photo Release': ".//user_defined_pulldown_fields/user_defined_pulldown_field[label='Photo Release']/selection", 'Ethnicity': ".//user_defined_pulldown_fields/user_defined_pulldown_field[label='Ethnicity']/selection", 'Transferred Frm': ".//user_defined_text_fields/user_defined_text_field[label='Transferred Frm']/text", 'Transferred To': ".//user_defined_text_fields/user_defined_text_field[label='Transferred To']/text", 'Pastr When Join': ".//user_defined_text_fields/user_defined_text_field[label='Pastr When Join']/text", 'Pastr When Leav': ".//user_defined_text_fields/user_defined_text_field[label='Pastr When Leav']/text", 'SK Indiv ID': ".//user_defined_text_fields/user_defined_text_field[label='SK Indiv ID']/text" } }, 'GROUPS': 'https://ingomar.ccbchurch.com/api.php?srv=group_profiles', 'ACCOUNTS': 'https://ingomar.ccbchurch.com/api.php?srv=transaction_detail_type_list', 'TRANSACTIONS': { 'xmlroot': 'response/batches/batch/transactions/transaction', 'parse_dict': { 'Date': 'date', 'Payment Type': 'payment_type', 'Check Number': 'check_number', 'Individual ID': ('individual', 'id'), 'Account': './/transaction_details/transaction_detail/coa', 'Amount': './/transaction_details/transaction_detail/amount', 'Tax Deductible': './/transaction_details/transaction_detail/tax_deductible', 'Note': './/transaction_details/transaction_detail/note' } } } parser = argparse.ArgumentParser(description="Parses XML file into CSV output") parser.add_argument("--type", required=True, help='One of ' + ', '.join(urls.keys())) parser.add_argument("--xml-input-filename", required=True, help="XML file to parse") parser.add_argument("--csv-output-filename", required=True, help="CSV output file") args = parser.parse_args() table1 = petl.fromxml(args.xml_input_filename, urls[args.type]['xmlroot'], urls[args.type]['parse_dict']) petl.tocsv(table1, args.csv_output_filename)
# fromcsv() ########### import petl as etl import csv # set up a CSV file to demonstrate with table1 = [['foo', 'bar'], ['a', 1], ['b', 2], ['c', 2]] with open('example.csv', 'w') as f: writer = csv.writer(f) writer.writerows(table1) # now demonstrate the use of fromcsv() table2 = etl.fromcsv('example.csv') table2 # tocsv() ######### import petl as etl table1 = [['foo', 'bar'], ['a', 1], ['b', 2], ['c', 2]] etl.tocsv(table1, 'example.csv') # look what it did print(open('example.csv').read())
import petl as etl import re from collections import OrderedDict import pymysql import dbconfig read_db_conn = pymysql.connect(host=dbconfig.db_host, port=dbconfig.db_port, charset="utf8", user=dbconfig.db_user, password=dbconfig.db_pass, db=dbconfig.db_name) products = etl.fromdb(read_db_conn, "SELECT id,name,description FROM {} limit 5000".format(dbconfig.db_table_items)) # remove non-alphanumeric character def cleanString(val): nonewline = val.replace('\n'," ") return re.sub(r'\W+', ' ', nonewline).lower() mappings = OrderedDict() mappings['id'] = 'id' mappings['item_description'] = lambda val : cleanString(val['name'] + " " +val['description']) products = etl.fieldmap(products, mappings) etl.tocsv(products, 'query_result.csv')
def writeDataToCsv(path, output): etl.tocsv(getTable(path), output)
def save(self): if (self.rows is not None): etl.tocsv(self.rows, self.datafile, encoding='utf8')
from __future__ import division, print_function, absolute_import # progress() ############ import petl as etl table = etl.dummytable(100000) table.progress(10000).tocsv('example.csv') # clock() ######### import petl as etl t1 = etl.dummytable(100000) c1 = etl.clock(t1) t2 = etl.convert(c1, 'foo', lambda v: v**2) c2 = etl.clock(t2) p = etl.progress(c2, 10000) etl.tocsv(p, 'example.csv') # time consumed retrieving rows from t1 c1.time # time consumed retrieving rows from t2 c2.time # actual time consumed by the convert step c2.time - c1.time
def attendance_file2table(filename, output_csv_filebase, add_extra_fields): global full_name2sk_indiv_id print "*** Parsing file: " + filename print attendance_dicts = [] # CCB's Worship Service event IDs... event_ids = {} event_ids["8"] = 6 event_ids["9"] = 7 event_ids["10"] = 8 event_ids["11:15"] = 9 event_ids["Christmas"] = 13 # The following are used to create CSV output filenames and to emit human-readable event name if add_extra_fields # flag is on event_names = {} event_names[6] = "08am" event_names[7] = "09am" event_names[8] = "10am" event_names[9] = "11_15am" event_names[13] = "Christmas Eve" # Time of event in Excel-parseable format event_times = {} event_times[6] = "08:00 AM" event_times[7] = "09:00 AM" event_times[8] = "10:00 AM" event_times[9] = "11:15 AM" event_times[13] = "04:00 PM" # Starting state... prior_line = None matched_month_year = None matched_service_time = None month = None year = None service_time = None line_number = 1 total_row_dict = None event_id = None accumulated_row_totals_dict = {"week1": 0, "week2": 0, "week3": 0, "week4": 0, "week5": 0, "week6": 0, "total": 0} full_name = None phone = None num_processed_lines = 0 for line in open(filename): # First pick off line at front of file indicating month and year that this attendance file is for... if not matched_month_year: matched_month_year = re.search("For the month of ([A-Z][a-z]+), ([0-9]{4})", line) if matched_month_year: month = string2monthnum(matched_month_year.group(1)) year = string2yearnum(matched_month_year.group(2)) if not (month and year): print >> sys.stderr, "*** Filename: " + filename + ", line number: " + str(line_number) print >> sys.stderr, "*** ERROR! Invalid month or year found" print >> sys.stderr, line print >> sys.stderr sys.exit(1) first_day_in_month, num_days_in_month = calendar.monthrange(year, month) # Create list of 6 date objects, month_sundays, representing week1, week2, ... week6 Sunday dates # If a week has no Sunday, it is None day_countup = 1 day_countup += 6 - first_day_in_month month_sundays = [] if first_day_in_month != 6: month_sundays.append(None) while day_countup <= num_days_in_month: month_sundays.append(datetime.date(year, month, day_countup)) day_countup += 7 while len(month_sundays) < 6: month_sundays.append(None) christmas_eve_date = datetime.date(year, month, 24) # Second pick off line at front of file indicating worship service time that this attendance file is for... elif not matched_service_time: matched_service_time = re.search("Worship Service - (Sunday |Summer )?([^ ]*)", line) if matched_service_time: service_time = matched_service_time.group(2) if service_time in event_ids: event_id = event_ids[service_time] event_name = event_names[event_id] else: print >> sys.stderr, "*** Filename: " + filename + ", line number: " + str(line_number) print >> sys.stderr, '*** ERROR! Unrecognized service_time: "' + service_time + '"' print >> sys.stderr sys.exit(1) # ...then match attendance (row per person with weeks they attended) and total (summary at bottom) rows else: # Once we found row with totals...we're done, that's last line in attendance file we need to parse matched_total_line = re.search("^ {18}Total: {13}(?P<attendance>( +[0-9]+)+)\r?$", line) if matched_total_line: totals_attendance_dict = attendance_str2dict( matched_total_line.group("attendance"), [-3, -9, -15, -20, -24, -29, -35], 3 ) break matched_attendance_line = re.search( "^ {6}" + "(?P<full_name>(?P<last_name>[A-Za-z]+([ \-'][A-Za-z]+)*), " + "(?P<first_name>([A-Za-z]+\.?)+([\-' ][A-Za-z]+)*)( \((?P<nick_name>[A-Za-z]+)\))?\.?)?\r?" + "(?P<phone>( +)?([0-9]{3}-[0-9]{3}-[0-9]{4}|Unlisted))?" + "(?P<attendance> +(1 +)+[1-6])?\r?$", line, ) if matched_attendance_line: if matched_attendance_line.group("full_name"): full_name = matched_attendance_line.group("full_name").strip() if matched_attendance_line.group("phone"): phone = matched_attendance_line.group("phone").strip() if matched_attendance_line.group("attendance"): if full_name: attendance = matched_attendance_line.group("attendance").strip() row_dict = attendance_str2dict(attendance, [-1, -7, -13, -18, -22, -27, -33], 1) row_dict["full_name"] = full_name if phone: row_dict["phone"] = phone else: row_dict["phone"] = "" num_processed_lines += 1 full_name = None phone = None if row_dict["total"] != ( row_dict["week1"] + row_dict["week2"] + row_dict["week3"] + row_dict["week4"] + row_dict["week5"] + row_dict["week6"] ): print >> sys.stderr, "*** Filename: " + filename + ", line number: " + str(line_number) print >> sys.stderr, "*** ERROR! Bad row total, doesn't match sum of weeks 1-6" print >> sys.stderr, row_dict print >> sys.stderr break for key in accumulated_row_totals_dict: accumulated_row_totals_dict[key] += row_dict[key] attendance_dicts.append(row_dict) # Buffer the current line for line folding if needed (see 'line folding' above) prior_line = line line_number += 1 print "*** Number of attendance lines processed: " + str(num_processed_lines) print "*** Number of attendees: " + str(accumulated_row_totals_dict["total"]) print if output_csv_filebase and event_id: output_csv_filename = ( output_csv_filebase + "/" + str(year) + format(month, "02d") + "_" + str(event_names[event_id]) + ".csv" ) all_columns_table = petl.fromdicts(attendance_dicts) petl.tocsv(all_columns_table, output_csv_filename) # Build 2nd list of dicts, where each list item is dict of individual date/event attendance. I.e. a row per # worship service date vs original attendance dicts format of a row per attendee across all weeks in month. # This is the actual one returned and eventually emitted into output file attendance_dicts2 = [] for attendance_dict in attendance_dicts: for key in attendance_dict: if key[:4] == "week" and attendance_dict[key] != 0: week_index = int(key[4:5]) - 1 if month_sundays[week_index] is not None: attendance_dict2 = {} full_name = attendance_dict["full_name"] if full_name in full_name2sk_indiv_id: attendance_dict2["Individual ID"] = full_name2sk_indiv_id[full_name] if event_name == "Christmas Eve": attendance_dict2["Date"] = christmas_eve_date else: attendance_dict2["Date"] = month_sundays[week_index] attendance_dict2["Event ID"] = event_id if add_extra_fields: attendance_dict2["Time"] = event_times[event_id] attendance_dict2["Full Name"] = full_name attendance_dict2["Event Name"] = event_name attendance_dict2["Week Num"] = week_index + 1 attendance_dicts2.append(attendance_dict2) else: print >> sys.stderr, '*** WARNING! Cannot find "' + full_name + '" in map' print >> sys.stderr else: print >> sys.stderr, '*** WARNING! Cannot find Sunday date for week index "' + str(week_index) + '"' print >> sys.stderr # Check if numbers on Servant Keeper's reported Total: line match the totals we've been accumulating # per attendance row entry. If they don't match, show WARNING (not ERROR, since via manual checks, it appears # that Servant Keeper totals are buggy) if totals_attendance_dict: for key in accumulated_row_totals_dict: if accumulated_row_totals_dict[key] != totals_attendance_dict[key]: pp = pprint.PrettyPrinter(stream=sys.stderr) print >> sys.stderr, "*** WARNING! Servant Keeper reported totals do not match data totals" print >> sys.stderr, "Servant Keeper Totals:" pp.pprint(totals_attendance_dict) print >> sys.stderr, "Data Totals:" pp.pprint(accumulated_row_totals_dict) print >> sys.stderr break return_table = petl.fromdicts(attendance_dicts2) header = petl.header(return_table) if "Event Name" in header: return_table = petl.cut( return_table, "Full Name", "Event Name", "Time", "Week Num", "Date", "Event ID", "Individual ID" ) else: return_table = petl.cut(return_table, "Date", "Event ID", "Individual ID") return return_table
def test_stdoutsource(): tbl = [('foo', 'bar'), ('a', 1), ('b', 2)] etl.tocsv(tbl, StdoutSource(), encoding='ascii') etl.tohtml(tbl, StdoutSource(), encoding='ascii') etl.topickle(tbl, StdoutSource())
import csv import petl as etl import os def call(): input_filename = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data.csv' )) return ( etl .fromcsv(input_filename) .rename('intptlat', 'Latitude') .rename('intptlong', 'Longitude') .rename('zcta5', 'ZCTA') .convert('Latitude', float) .convert('Longitude', float) ) if __name__ == '__main__': etl.tocsv(call())