示例#1
0
def test_postgres(postgres):
    #bonobo.settings.QUIET.set(True)

    db_name = 'my_db'
    port = postgres['NetworkSettings']['Ports']['5432/tcp'][0]['HostPort']
    wait_for_postgres(port)
    root_engine = create_root_engine(port)
    _execute_sql(root_engine, "CREATE ROLE my_user WITH LOGIN PASSWORD '';")
    _execute_sql(
        root_engine,
        'CREATE DATABASE {name} WITH OWNER=my_user TEMPLATE=template0 ENCODING="utf-8"'
        .format(name=db_name))

    engine = create_engine('my_user', db_name, port)
    metadata.create_all(engine)

    services = {'sqlalchemy.engine': engine}

    graph = bonobo.Graph()
    graph.add_chain(extract, bonobo_sqlalchemy.InsertOrUpdate(TABLE_1))
    assert bonobo.run(graph, services=services)

    buf = Bufferize()
    graph = bonobo.Graph()
    graph.add_chain(
        bonobo_sqlalchemy.Select('SELECT * FROM ' + TABLE_1),
        buf,
    )
    assert bonobo.run(graph, services=services)
    assert buf.buffer == [((0, 'value for 0'), {}), ((1, 'value for 1'), {}),
                          ((2, 'value for 2'), {}), ((3, 'value for 3'), {}),
                          ((4, 'value for 4'), {}), ((5, 'value for 5'), {}),
                          ((6, 'value for 6'), {}), ((7, 'value for 7'), {}),
                          ((8, 'value for 8'), {}), ((9, 'value for 9'), {})]

    graph = bonobo.Graph(
        bonobo_sqlalchemy.Select('SELECT * FROM ' + TABLE_1),
        bonobo_sqlalchemy.InsertOrUpdate(TABLE_2),
    )
    assert bonobo.run(graph, services=services)

    buf = Bufferize()
    graph = bonobo.Graph()
    graph.add_chain(
        bonobo_sqlalchemy.Select('SELECT * FROM ' + TABLE_2),
        buf,
    )
    assert bonobo.run(graph, services=services)
    assert buf.buffer == [((0, 'value for 0'), {}), ((1, 'value for 1'), {}),
                          ((2, 'value for 2'), {}), ((3, 'value for 3'), {}),
                          ((4, 'value for 4'), {}), ((5, 'value for 5'), {}),
                          ((6, 'value for 6'), {}), ((7, 'value for 7'), {}),
                          ((8, 'value for 8'), {}), ((9, 'value for 9'), {})]
def get_graph(**options):
    """
    This function builds the graph that needs to be executed.

    :return: bonobo.Graph

    """
    graph = bonobo.Graph()
    graph.add_chain(extract_accounts,
                    transform,
                    bonobo.JsonWriter('aws_accounts_ex.json'),
                    valid_aws_account,
                    _name="main")

    graph.add_chain(
        bonobo.JsonWriter('aws_accounts.json'),
        _input="main",
    )

    graph.add_chain(
        bonobo.UnpackItems(0),
        bonobo.CsvWriter('aws_accounts.csv'),
        _input=valid_aws_account,
    )

    graph.add_chain(
        bonobo.UnpackItems(0),
        bonobo_sqlalchemy.InsertOrUpdate(table_name='aws_accounts' +
                                         options['table_suffix'],
                                         discriminant=('account_id', ),
                                         engine='db'),
        _input=valid_aws_account,
    )

    return graph
def get_graph(**options):
    """
    This function builds the graph that needs to be executed.

    :return: bonobo.Graph

    """
    graph = bonobo.Graph()

    split_dbs = bonobo.noop

    graph.add_chain(extract_accounts,
                    transform,
                    valid_aws_account,
                    bonobo.UnpackItems(0),
                    split_dbs,
                    _name="main")

    for engine in list(set(options['engine'])):
        graph.add_chain(bonobo_sqlalchemy.InsertOrUpdate(
            table_name=options['table_name'] + options['table_suffix'],
            discriminant=('linked_account_number', ),
            engine=engine),
                        _input=split_dbs)

    return graph
示例#4
0
def get_graph(**options):
    """
    This function builds the graph that needs to be executed.

    :return: bonobo.Graph

    """

    graph = bonobo.Graph()

    split_dbs = bonobo.noop

    graph.add_chain(
        bonobo.CsvReader('/etl/metrics-insights/workday-users.csv',
                         fs='brickftp'), employee_active, find_badge_id,
        bonobo.UnpackItems(0), split_dbs)

    for engine in list(set(options['engine'])):
        graph.add_chain(bonobo_sqlalchemy.InsertOrUpdate(
            table_name=options['table_name'] + options['table_suffix'],
            discriminant=('badgeid', ),
            buffer_size=10,
            engine=engine),
                        _input=split_dbs)

    return graph
示例#5
0
def get_graph(**options):
    return bonobo.Graph(
        bonobo_sqlalchemy.Select('SELECT * FROM table',
                                 engine='sqlalchemy.pgengine'),
        bonobo_sqlalchemy.InsertOrUpdate(table_name='table_1',
                                         engine='sqlalchemy.pgengine'),
    )
示例#6
0
def get_graph(**options):
    """This function builds the graph that needs to be executed. :return: bonobo.Graph   """
    return bonobo.Graph(
        bonobo_sqlalchemy.Select('SELECT * FROM table',
                                 engine='sqlalchemy.pgengine'),
        bonobo_sqlalchemy.InsertOrUpdate(table_name='table_1',
                                         engine='sqlalchemy.pgengine'),
    )
示例#7
0
def get_graph(**options):
    """
    This function builds the graph that needs to be executed.

    :return: bonobo.Graph

    """
    graph = bonobo.Graph()

    split_dbs = bonobo.noop

    graph.add_chain(
        GetOrderXML(prefix="/etl/ivm",
                    glob=[
                        'Mozilla_Corporation{timestamp:%Y_%m_%d}*.xml'.format(
                            timestamp=options['now'])
                    ]),
        ParseDates(['Transactionlog_Tranenddatetime']),
        truncate_description,
        bonobo.UnpackItems(0),
        bonobo.Rename(transaction_date='Transactionlog_Tranenddatetime',
                      item_number='Transactionlog_Itemnumber',
                      transaction_id='Transactionlog_Tlid',
                      item_description='Transactionlog_Itemdesc'),
        bonobo.Rename(
            user_id='Transactionlog_User',
            quantity='Transactionlog_Qty',
            transaction_code='Transactionlog_Transcode',
            description='Vendingmachines_Descr',
        ),
        split_dbs,
        _name="main")

    #insert into ivm (description, transaction_id, item_number, item_description, user_id, quantity, transaction_date, transaction_code) values

    for engine in list(set(options['engine'])):
        graph.add_chain(bonobo_sqlalchemy.InsertOrUpdate(
            table_name=options['table_name'] + options['table_suffix'],
            discriminant=('transaction_id', ),
            engine=engine),
                        _input=split_dbs)

    return graph
示例#8
0
def get_graph(**options):
    """
    This function builds the graph that needs to be executed.

    :return: bonobo.Graph

    """
    graph = bonobo.Graph()

    split_dbs = bonobo.noop

    graph.add_chain(bonobo.CsvReader(options['input_file'],
                                     delimiter='|',
                                     fields=('Admitted', 'blank1', 'Timestamp',
                                             'blank2', 'Name', 'card_id',
                                             'Location'),
                                     fs='brickftp'),
                    timestamp,
                    card_id,
                    map_fields,
                    bonobo.UnpackItems(0),
                    split_dbs,
                    _name="main")

    for engine in list(set(options['engine'])):
        graph.add_chain(bonobo_sqlalchemy.InsertOrUpdate(
            table_name=options['table_name'] + options['table_suffix'],
            discriminant=(
                'activitydate',
                'badgeid',
                'username',
                'location',
            ),
            engine=engine),
                        _input=split_dbs)

    return graph
示例#9
0
def get_graph(**options):
    graph = bonobo.Graph()
    graph.add_chain(extract, bonobo_sqlalchemy.InsertOrUpdate('example'))

    return graph
示例#10
0
def get_graph(**options):
    """
    This function builds the graph that needs to be executed.

    :return: bonobo.Graph

    """
    graph = bonobo.Graph()

    graph.add_chain(
        bonobo.CsvWriter('billing.csv'),
        bonobo.JsonWriter('billing.json'),
        invalid_entries,
        fix_numbers,
        parse_dates,
        #bonobo.PrettyPrinter(),
        filter_summary,
        #bonobo.PrettyPrinter(),
        lookup_account_sk,
        lookup_date_sk,
        summarize_costs,
        bonobo.UnpackItems(0),
        bonobo_sqlalchemy.InsertOrUpdate(
            table_name='fact_itsm_aws_historical_cost' +
            options['table_suffix'],
            discriminant=(
                'productname',
                'date_sk',
                'account_name_sk',
            ),
            engine='database'),
        _name="main",
        _input=None,
    )

    now = options['now']

    # Go to beginning of month
    now += relativedelta(day=1, hour=0, minute=0, second=0, microsecond=0)

    when = now
    for log in range(0, options['months']):
        when = when + relativedelta(months=-1)
        tstamp = when.strftime("%Y-%m")
        print("# %d Processing %s" % (log, tstamp))
        if options['limit']:
            _limit = (bonobo.Limit(options['limit']), )
        else:
            _limit = ()

        graph.add_chain(
            AwsBillingReader('%s-aws-cost-allocation-%s.csv' %
                             (options['aws_account_id'], tstamp),
                             fs='s3',
                             skip=1),
            *_limit,
            _output="main",
        )

    graph.add_chain(
        bonobo_sqlalchemy.InsertOrUpdate(
            table_name=options['table'] + options['table_suffix'],
            discriminant=('invoiceid', 'linkedaccountid', 'payeraccountid',
                          'recordid'),
            engine='database'),
        _input=parse_dates,
    )

    return graph