def get_bu_graph(**options): graph = bonobo.Graph() graph.add_chain( get_business_units, join_cost_centers, centerstone_BU_SupOrg_Merge_remap, centerstone_BussUnit_remap, ) graph.add_chain( #bonobo.Limit(3), #bonobo.PrettyPrinter(), productLineLevel1_remap, unique_product_line, bonobo.UnpackItems(0), bonobo.PrettyPrinter(), bonobo.CsvWriter('/etl/centerstone/downloads/ProductLineLevel1.txt' + options['suffix'], lineterminator="\n", delimiter="\t", fs="brickftp"), _input=centerstone_BussUnit_remap) graph.add_chain( teamLevel3_remap, bonobo.UnpackItems(0), bonobo.CsvWriter('/etl/centerstone/downloads/TeamLevel3.txt' + options['suffix'], lineterminator="\n", delimiter="\t", fs="brickftp"), _input=centerstone_BussUnit_remap) return graph
def get_costcenter_graph(**options): """ This function builds the graph that needs to be executed. :return: bonobo.Graph """ graph = bonobo.Graph() graph.add_chain( get_cost_centers, cache_cost_centers, centerstone_CostCenter_remap, #bonobo.PrettyPrinter(), bonobo.UnpackItems(0), # Can't skip the header, but must bonobo.CsvWriter( '/etl/centerstone/downloads/CostCenterLevel2.txt' + options['suffix'], lineterminator="\n", delimiter="\t", fs="brickftp"), bonobo.CsvWriter( 'CostCenterLevel2.txt' + options['suffix'], lineterminator="\n", delimiter="\t", fs="centerstone"), bonobo.count, _name="main") return graph
def get_workday_employee_graph(**options): """ This function builds the graph that needs to be executed. :return: bonobo.Graph """ graph = bonobo.Graph() graph.add_chain( get_workday_users, workday_centerstone_employee_remap, bonobo.UnpackItems(0), bonobo.CsvWriter('/etl/centerstone/downloads/workday-users.csv' + options['suffix'], lineterminator="\n", delimiter="\t", fs="brickftp"), bonobo.CsvWriter('workday-users.csv' + options['suffix'], lineterminator="\n", delimiter="\t", fs="centerstone")) graph.add_chain(split_active_employee, bonobo.UnpackItems(0), HeaderlessCsvWriter( '/etl/centerstone/downloads/Mozilla_Active_Users.txt' + options['suffix'], lineterminator="\n", delimiter="\t", fs="brickftp"), HeaderlessCsvWriter('Mozilla_Active_Users.txt' + options['suffix'], lineterminator="\n", delimiter="\t", fs="centerstone"), _input=workday_centerstone_employee_remap) graph.add_chain(split_termed_employee, bonobo.UnpackItems(0), HeaderlessCsvWriter( '/etl/centerstone/downloads/Mozilla_Termed_Users.txt' + options['suffix'], lineterminator="\n", delimiter="\t", fs="brickftp"), HeaderlessCsvWriter('Mozilla_Termed_Users.txt' + options['suffix'], lineterminator="\n", delimiter="\t", fs="centerstone"), _input=workday_centerstone_employee_remap) return graph
def get_graph(**options): """ This function builds the graph that needs to be executed. :return: bonobo.Graph """ graph = bonobo.Graph() graph.add_chain(extract_accounts, transform, bonobo.JsonWriter('aws_accounts_ex.json'), valid_aws_account, _name="main") graph.add_chain( bonobo.JsonWriter('aws_accounts.json'), _input="main", ) graph.add_chain( bonobo.UnpackItems(0), bonobo.CsvWriter('aws_accounts.csv'), _input=valid_aws_account, ) graph.add_chain( bonobo.UnpackItems(0), bonobo_sqlalchemy.InsertOrUpdate(table_name='aws_accounts' + options['table_suffix'], discriminant=('account_id', ), engine='db'), _input=valid_aws_account, ) return graph
def get_graph(graph=None, *, _limit=(), _print=()): """ Extracts a list of cafes with on euro in Paris, renames the name, address and zipcode fields, reorders the fields and formats to json and csv files. """ graph = graph or bonobo.Graph() producer = ( graph.get_cursor() >> ODSReader(dataset="liste-des-cafes-a-un-euro", netloc="opendata.paris.fr") >> PartialGraph(*_limit) >> bonobo.UnpackItems(0) >> bonobo.Rename( name="nom_du_cafe", address="adresse", zipcode="arrondissement") >> bonobo.Format(city="Paris", country="France") >> bonobo.OrderFields([ "name", "address", "zipcode", "city", "country", "geometry", "geoloc" ]) >> PartialGraph(*_print)) # Comma separated values. graph.get_cursor(producer.output) >> bonobo.CsvWriter( "coffeeshops.csv", fields=["name", "address", "zipcode", "city"], delimiter=",") # Standard JSON graph.get_cursor( producer.output) >> bonobo.JsonWriter(path="coffeeshops.json") # Line-delimited JSON graph.get_cursor( producer.output) >> bonobo.LdjsonWriter(path="coffeeshops.ldjson") return graph
def get_graph(graph=None, *, _limit=(), _print=()): """ Extracts a list of cafes with on euro in Paris, renames the name, address and zipcode fields, reorders the fields and formats to json and csv files. """ graph = graph or bonobo.Graph() producer = graph.add_chain( ODSReader(dataset='liste-des-cafes-a-un-euro', netloc='opendata.paris.fr'), *_limit, bonobo.UnpackItems(0), bonobo.Rename(name='nom_du_cafe', address='adresse', zipcode='arrondissement'), bonobo.Format(city='Paris', country='France'), bonobo.OrderFields([ 'name', 'address', 'zipcode', 'city', 'country', 'geometry', 'geoloc' ]), *_print, ) # Comma separated values. graph.add_chain( bonobo.CsvWriter('coffeeshops.csv', fields=['name', 'address', 'zipcode', 'city'], delimiter=','), _input=producer.output, ) # Standard JSON graph.add_chain( bonobo.JsonWriter(path='coffeeshops.json'), _input=producer.output, ) # Line-delimited JSON graph.add_chain( bonobo.LdjsonWriter(path='coffeeshops.ldjson'), _input=producer.output, ) return graph
def get_graph(job, graph=None, *, _limit=(), _print=()): """Builds the execution graph.""" graph = graph or bonobo.Graph() graph.add_chain( bonobo.CsvReader(job.input_file, fs=FS_IN_SERVICE_ID, fields=[ 'integration_id', 'site_name', 'address', 'borough', 'status' ], skip=1), *_limit, search, bonobo.UnpackItems(0), bonobo.CsvWriter(job.output_file, fs=FS_OUT_SERVICE_ID), *_print, ) return graph
def get_graph(**options): """ This function builds the graph that needs to be executed. :return: bonobo.Graph """ graph = bonobo.Graph() graph.add_chain( get_cards, wishlist_map, bonobo.UnpackItems(0), bonobo.CsvWriter('Deckbox-wishlist.csv'), _name='main', ) return graph
def get_graph(*, _limit=None, _print=False): return bonobo.Graph(bonobo.CsvReader("coffeeshops.csv"), *((bonobo.Limit(_limit), ) if _limit else ()), *((bonobo.PrettyPrinter(), ) if _print else ()), bonobo.CsvWriter("coffeeshops.csv", fs="fs.output"))
def get_graph(**options): """ This function builds the graph that needs to be executed. :return: bonobo.Graph """ graph = bonobo.Graph() graph.add_chain( bonobo.CsvWriter('billing.csv'), bonobo.JsonWriter('billing.json'), invalid_entries, fix_numbers, parse_dates, #bonobo.PrettyPrinter(), filter_summary, #bonobo.PrettyPrinter(), lookup_account_sk, lookup_date_sk, summarize_costs, bonobo.UnpackItems(0), bonobo_sqlalchemy.InsertOrUpdate( table_name='fact_itsm_aws_historical_cost' + options['table_suffix'], discriminant=( 'productname', 'date_sk', 'account_name_sk', ), engine='database'), _name="main", _input=None, ) now = options['now'] # Go to beginning of month now += relativedelta(day=1, hour=0, minute=0, second=0, microsecond=0) when = now for log in range(0, options['months']): when = when + relativedelta(months=-1) tstamp = when.strftime("%Y-%m") print("# %d Processing %s" % (log, tstamp)) if options['limit']: _limit = (bonobo.Limit(options['limit']), ) else: _limit = () graph.add_chain( AwsBillingReader('%s-aws-cost-allocation-%s.csv' % (options['aws_account_id'], tstamp), fs='s3', skip=1), *_limit, _output="main", ) graph.add_chain( bonobo_sqlalchemy.InsertOrUpdate( table_name=options['table'] + options['table_suffix'], discriminant=('invoiceid', 'linkedaccountid', 'payeraccountid', 'recordid'), engine='database'), _input=parse_dates, ) return graph
def get_graph(*, _limit=None, _print=False): return bonobo.Graph(bonobo.CsvReader('datasets/coffeeshops.txt'), *((bonobo.Limit(_limit), ) if _limit else ()), *((bonobo.PrettyPrinter(), ) if _print else ()), bonobo.CsvWriter('coffeeshops.csv', fs='fs.output'))
def get_graph(**options): """ This function builds the graph that needs to be executed. :return: bonobo.Graph """ graph = bonobo.Graph() split = bonobo.noop graph.add_chain( bonobo.CsvWriter('DeckedBuilder.csv'), # bonobo.Limit(10), metadata, # bonobo.UnpackItems(0), split, _input=None, _name='main', ) graph.add_chain( bonobo.CsvReader('main-en.csv'), bonobo.Format(Language='English'), _output='main', ) graph.add_chain( bonobo.CsvReader('main-de.csv'), bonobo.Format(Language='German'), _output='main', ) graph.add_chain( bonobo.CsvReader('main-ru.csv'), bonobo.Format(Language='Russian'), _output='main', ) graph.add_chain( bonobo.CsvReader('main-it.csv'), bonobo.Format(Language='Italian'), _output='main', ) graph.add_chain( bonobo.CsvReader('main-jp.csv'), bonobo.Format(Language='Japanese'), _output='main', ) graph.add_chain( bonobo.CsvReader('main-fr.csv'), bonobo.Format(Language='French'), _output='main', ) graph.add_chain( bonobo.CsvReader('main-kr.csv'), bonobo.Format(Language='Korean'), _output='main', ) graph.add_chain( bonobo.CsvReader('main-cs.csv'), bonobo.Format(Language='Chinese'), _output='main', ) graph.add_chain( bonobo.CsvReader('Deckbox-extras.csv'), bonobo.Format(Language='English'), _output='main', ) if ECHO_MTG: # Reg Qty,Foil Qty,Name,Set,Acquired,Language echomtg = {'Acquired For': '0.004', 'Language': 'en'} graph.add_chain( # echomtg specific fiddling remove_metadata, bonobo.UnpackItems(0), # bonobo.PrettyPrinter(), bonobo.Rename(Name='Card'), bonobo.Format(**echomtg), bonobo.CsvWriter('EchoMTG.csv'), _input=split, ) # MTG Studio if MTG_STUDIO: graph.add_chain( mtg_studio, remove_metadata, bonobo.UnpackItems(0), # bonobo.Format(Edition='{Set}'), bonobo.Rename(Edition='Set'), # bonobo.Rename(Name='Card'), # bonobo.Rename(Qty='Reg Qty'), # bonobo.Rename(Foil='Foil Qty'), # bonobo.PrettyPrinter(), bonobo.CsvWriter('MTG-Studio.csv'), _input=split, ) # graph.add_chain( # tradeable, # bonobo.UnpackItems(0), # #bonobo.PrettyPrinter(), # #bonobo.Limit(3000), # bonobo.CsvWriter("DeckedBuilder-tradelist.csv"), # bonobo.OrderFields([ # 'Card', # 'Set', # 'Foil', # 'Quantity', # ]), # bonobo.CsvWriter("CardKingdom-buylist.csv"), # bonobo.OrderFields([ # 'Quantity', # 'Card', # 'Set', # ]), # bonobo.CsvWriter( # "mtgprice-buylist.csv", # delimiter="\t", # ), # _input=split, # ) # if DECKBOX: csv_out = bonobo.CsvWriter('Deckbox-inventory.csv') graph.add_chain( # # metadata, # #bonobo.UnpackItems(0), deckbox, bonobo.UnpackItems(0), csv_out, _input=split, ) graph.add_chain(bonobo.CsvReader('Deckbox-specials.csv'), _output=csv_out) return graph
import bonobo def guess_email(**row): return { **row, 'email': row['name'] + '@' + row['domain'] } graph = bonobo.Graph( bonobo.CsvReader('employees.csv'), bonobo.Filter(lambda *row: row['position'] != 'CEO'), # guess_email, bonobo.CsvWriter('employees.output.csv'), ) if __name__ == "__main__": parser = bonobo.get_argument_parser() with bonobo.parse_args(parser): bonobo.run(graph)