def test_postgres(postgres): #bonobo.settings.QUIET.set(True) db_name = 'my_db' port = postgres['NetworkSettings']['Ports']['5432/tcp'][0]['HostPort'] wait_for_postgres(port) root_engine = create_root_engine(port) _execute_sql(root_engine, "CREATE ROLE my_user WITH LOGIN PASSWORD '';") _execute_sql( root_engine, 'CREATE DATABASE {name} WITH OWNER=my_user TEMPLATE=template0 ENCODING="utf-8"' .format(name=db_name)) engine = create_engine('my_user', db_name, port) metadata.create_all(engine) services = {'sqlalchemy.engine': engine} graph = bonobo.Graph() graph.add_chain(extract, bonobo_sqlalchemy.InsertOrUpdate(TABLE_1)) assert bonobo.run(graph, services=services) buf = Bufferize() graph = bonobo.Graph() graph.add_chain( bonobo_sqlalchemy.Select('SELECT * FROM ' + TABLE_1), buf, ) assert bonobo.run(graph, services=services) assert buf.buffer == [((0, 'value for 0'), {}), ((1, 'value for 1'), {}), ((2, 'value for 2'), {}), ((3, 'value for 3'), {}), ((4, 'value for 4'), {}), ((5, 'value for 5'), {}), ((6, 'value for 6'), {}), ((7, 'value for 7'), {}), ((8, 'value for 8'), {}), ((9, 'value for 9'), {})] graph = bonobo.Graph( bonobo_sqlalchemy.Select('SELECT * FROM ' + TABLE_1), bonobo_sqlalchemy.InsertOrUpdate(TABLE_2), ) assert bonobo.run(graph, services=services) buf = Bufferize() graph = bonobo.Graph() graph.add_chain( bonobo_sqlalchemy.Select('SELECT * FROM ' + TABLE_2), buf, ) assert bonobo.run(graph, services=services) assert buf.buffer == [((0, 'value for 0'), {}), ((1, 'value for 1'), {}), ((2, 'value for 2'), {}), ((3, 'value for 3'), {}), ((4, 'value for 4'), {}), ((5, 'value for 5'), {}), ((6, 'value for 6'), {}), ((7, 'value for 7'), {}), ((8, 'value for 8'), {}), ((9, 'value for 9'), {})]
def get_graph(**options): """ This function builds the graph that needs to be executed. :return: bonobo.Graph """ graph = bonobo.Graph() graph.add_chain(extract_accounts, transform, bonobo.JsonWriter('aws_accounts_ex.json'), valid_aws_account, _name="main") graph.add_chain( bonobo.JsonWriter('aws_accounts.json'), _input="main", ) graph.add_chain( bonobo.UnpackItems(0), bonobo.CsvWriter('aws_accounts.csv'), _input=valid_aws_account, ) graph.add_chain( bonobo.UnpackItems(0), bonobo_sqlalchemy.InsertOrUpdate(table_name='aws_accounts' + options['table_suffix'], discriminant=('account_id', ), engine='db'), _input=valid_aws_account, ) return graph
def get_graph(**options): """ This function builds the graph that needs to be executed. :return: bonobo.Graph """ graph = bonobo.Graph() split_dbs = bonobo.noop graph.add_chain(extract_accounts, transform, valid_aws_account, bonobo.UnpackItems(0), split_dbs, _name="main") for engine in list(set(options['engine'])): graph.add_chain(bonobo_sqlalchemy.InsertOrUpdate( table_name=options['table_name'] + options['table_suffix'], discriminant=('linked_account_number', ), engine=engine), _input=split_dbs) return graph
def get_graph(**options): """ This function builds the graph that needs to be executed. :return: bonobo.Graph """ graph = bonobo.Graph() split_dbs = bonobo.noop graph.add_chain( bonobo.CsvReader('/etl/metrics-insights/workday-users.csv', fs='brickftp'), employee_active, find_badge_id, bonobo.UnpackItems(0), split_dbs) for engine in list(set(options['engine'])): graph.add_chain(bonobo_sqlalchemy.InsertOrUpdate( table_name=options['table_name'] + options['table_suffix'], discriminant=('badgeid', ), buffer_size=10, engine=engine), _input=split_dbs) return graph
def get_graph(**options): return bonobo.Graph( bonobo_sqlalchemy.Select('SELECT * FROM table', engine='sqlalchemy.pgengine'), bonobo_sqlalchemy.InsertOrUpdate(table_name='table_1', engine='sqlalchemy.pgengine'), )
def get_graph(**options): """This function builds the graph that needs to be executed. :return: bonobo.Graph """ return bonobo.Graph( bonobo_sqlalchemy.Select('SELECT * FROM table', engine='sqlalchemy.pgengine'), bonobo_sqlalchemy.InsertOrUpdate(table_name='table_1', engine='sqlalchemy.pgengine'), )
def get_graph(**options): """ This function builds the graph that needs to be executed. :return: bonobo.Graph """ graph = bonobo.Graph() split_dbs = bonobo.noop graph.add_chain( GetOrderXML(prefix="/etl/ivm", glob=[ 'Mozilla_Corporation{timestamp:%Y_%m_%d}*.xml'.format( timestamp=options['now']) ]), ParseDates(['Transactionlog_Tranenddatetime']), truncate_description, bonobo.UnpackItems(0), bonobo.Rename(transaction_date='Transactionlog_Tranenddatetime', item_number='Transactionlog_Itemnumber', transaction_id='Transactionlog_Tlid', item_description='Transactionlog_Itemdesc'), bonobo.Rename( user_id='Transactionlog_User', quantity='Transactionlog_Qty', transaction_code='Transactionlog_Transcode', description='Vendingmachines_Descr', ), split_dbs, _name="main") #insert into ivm (description, transaction_id, item_number, item_description, user_id, quantity, transaction_date, transaction_code) values for engine in list(set(options['engine'])): graph.add_chain(bonobo_sqlalchemy.InsertOrUpdate( table_name=options['table_name'] + options['table_suffix'], discriminant=('transaction_id', ), engine=engine), _input=split_dbs) return graph
def get_graph(**options): """ This function builds the graph that needs to be executed. :return: bonobo.Graph """ graph = bonobo.Graph() split_dbs = bonobo.noop graph.add_chain(bonobo.CsvReader(options['input_file'], delimiter='|', fields=('Admitted', 'blank1', 'Timestamp', 'blank2', 'Name', 'card_id', 'Location'), fs='brickftp'), timestamp, card_id, map_fields, bonobo.UnpackItems(0), split_dbs, _name="main") for engine in list(set(options['engine'])): graph.add_chain(bonobo_sqlalchemy.InsertOrUpdate( table_name=options['table_name'] + options['table_suffix'], discriminant=( 'activitydate', 'badgeid', 'username', 'location', ), engine=engine), _input=split_dbs) return graph
def get_graph(**options): graph = bonobo.Graph() graph.add_chain(extract, bonobo_sqlalchemy.InsertOrUpdate('example')) return graph
def get_graph(**options): """ This function builds the graph that needs to be executed. :return: bonobo.Graph """ graph = bonobo.Graph() graph.add_chain( bonobo.CsvWriter('billing.csv'), bonobo.JsonWriter('billing.json'), invalid_entries, fix_numbers, parse_dates, #bonobo.PrettyPrinter(), filter_summary, #bonobo.PrettyPrinter(), lookup_account_sk, lookup_date_sk, summarize_costs, bonobo.UnpackItems(0), bonobo_sqlalchemy.InsertOrUpdate( table_name='fact_itsm_aws_historical_cost' + options['table_suffix'], discriminant=( 'productname', 'date_sk', 'account_name_sk', ), engine='database'), _name="main", _input=None, ) now = options['now'] # Go to beginning of month now += relativedelta(day=1, hour=0, minute=0, second=0, microsecond=0) when = now for log in range(0, options['months']): when = when + relativedelta(months=-1) tstamp = when.strftime("%Y-%m") print("# %d Processing %s" % (log, tstamp)) if options['limit']: _limit = (bonobo.Limit(options['limit']), ) else: _limit = () graph.add_chain( AwsBillingReader('%s-aws-cost-allocation-%s.csv' % (options['aws_account_id'], tstamp), fs='s3', skip=1), *_limit, _output="main", ) graph.add_chain( bonobo_sqlalchemy.InsertOrUpdate( table_name=options['table'] + options['table_suffix'], discriminant=('invoiceid', 'linkedaccountid', 'payeraccountid', 'recordid'), engine='database'), _input=parse_dates, ) return graph