Exemplo n.º 1
0
def create_connection(
        configuration: dict, isolation_level: str = None) -> Connection:
    # todo: a user may prefer to run a session that commits data only at the
    #  very end of the ETL instead of an auto commit execution at engine level.

    # if no isolation level is indicated, we just create an engine without the
    # isolation level parameter. This means it will use the default one.
    # Check your backend DB documentation to know which one is the default as
    # it varies between databases.
    if not isolation_level:
        engine = create_engine(
            configuration['connection_url'], poolclass=NullPool
        )
    else:
        engine = create_engine(
            configuration['connection_url'],
            poolclass=NullPool,
            isolation_level=isolation_level
        )
    connection = engine.connect()
    if configuration.get('connection_query') is not None:
        logger.info(f'Running connection query: '
                    f'{configuration["connection_query"]}')
        connection.execute(text(configuration['connection_query']))

    return connection
Exemplo n.º 2
0
    def render_queries(self) -> None:
        for i, query in enumerate(self.configuration["order"]):

            # we skip the queries out of index
            if i < self.from_step_index or i > self.to_step_index:
                continue
            rendered_query = self.configuration["queries"][query]
            logger.info(f'\n##### Query {str(i + 1)}: {query} #####\n'
                        f'\n{rendered_query}\n')
Exemplo n.º 3
0
    def run_job(sqlbucket, name, db, fstep, tstep, to_date, from_date,
                from_days, to_days, group, isolation, verbose, rendering, all,
                edb, args):

        submitted_variables = cli_variables_parser(args)

        if from_days is not None:
            from_date = n_days_ago(int(from_days))

        if to_days is not None:
            to_date = n_days_ago(int(to_days))

        submitted_variables["to"] = to_date
        submitted_variables["from"] = from_date

        logger.info('Variables used')
        logger.info(submitted_variables)

        # included dbs
        dbs = list()
        if db:
            dbs = db.split(',')
        elif all:
            dbs = list(sqlbucket.connections.keys())
        else:
            print(f"Either parameter db (-b) or (--all) flag is required")

        # excluded dbs
        if edb:
            ex_dbs = edb.split(',')
            dbs = [item for item in dbs if item not in ex_dbs]

        for dbi in dbs:
            connection_variables = sqlbucket.connection_variables[dbi]
            if 'isolation_level' in connection_variables and isolation is None:
                isolation = connection_variables['isolation_level']

            etl = sqlbucket.load_project(project_name=name,
                                         connection_name=dbi,
                                         variables=submitted_variables)
            if rendering:
                etl.render(from_step=fstep, to_step=tstep, group=group)
            else:
                if isolation:
                    isolation = isolation.upper()

                etl.run(from_step=fstep,
                        to_step=tstep,
                        group=group,
                        verbose=verbose,
                        isolation_level=isolation)
Exemplo n.º 4
0
    def run_integrity(sqlbucket, name, db, prefix, verbose, args):

        submitted_variables = cli_variables_parser(args)

        logger.info('Variables used')
        logger.info(submitted_variables)

        etl = sqlbucket.load_project(project_name=name,
                                     connection_name=db,
                                     variables=submitted_variables)
        errors = etl.run_integrity(prefix=prefix, verbose=verbose)

        if errors:
            sys.exit(3)
Exemplo n.º 5
0
    def run_project(self) -> None:
        self.starting_logs()

        start = datetime.now()
        connection = create_connection(
            self.configuration, isolation_level=self.isolation_level
        )

        for i, query in enumerate(self.configuration["order"]):

            # we skip the queries out of index
            if i < self.from_step_index or i > self.to_step_index:
                continue

            # we run the query and monitor the time it takes
            query_start = datetime.now()
            logger.info(f"Now running query {str(i + 1)}: '{query}'...")
            rendered_query = self.configuration["queries"][query]
            if self.verbose:
                logger.info(f'\n\n{rendered_query}\n')
            connection.execute(text(rendered_query))

            query_end = datetime.now()
            timing = str(query_end - query_start)
            logger.info(f"Query '{query}' successfully executed in {timing}.")

        end = datetime.now()
        self.ending_logs(start, end)
Exemplo n.º 6
0
def create_connection(configuration: dict) -> Connection:
    # todo: isolation parameter consider possibility to set different isolation
    #  level.
    # todo: a user may prefer to run a session that commits data only at the
    #  very end of the ETL instead of an auto commit execution at engine level.

    isolation_level = "AUTOCOMMIT"

    # SQLITE does not have autocommit, so we set to a more
    if configuration['connection_url'][:6] == 'sqlite':
        isolation_level = 'SERIALIZABLE'

    engine = create_engine(configuration['connection_url'],
                           poolclass=NullPool,
                           isolation_level=isolation_level)
    connection = engine.connect()
    if configuration.get('connection_query') is not None:
        logger.info(f'Running connection query: '
                    f'{configuration["connection_query"]}')
        connection.execute(text(configuration['connection_query']))

    return connection
Exemplo n.º 7
0
    def starting_logs(self):
        logger.info(sqlbucket_logo)
        logger.info(
            f"Starting project {self.configuration['project_name'].upper()}"
            f" for connection {self.configuration['connection_name'].upper()}")
        logger.info(f"Variables: {self.configuration['context']}")

        queries = list()
        for i, query in enumerate(self.configuration["order"]):
            if i < self.from_step_index or i > self.to_step_index:
                continue
            queries.append(query)

        logger.info("\n\nRunning the following queries:"
                    "\n\t" + "\n\t".join(queries) + '\n')
Exemplo n.º 8
0
 def create_project(sqlbucket, name):
     sqlbucket.create_project(name)
     logger.info(f'Project "{name}" successfully created!')
Exemplo n.º 9
0
 def ending_logs(self, start, end):
     logger.info(f"Project '{self.configuration['project_name']}' "
                 f"successfully completed for database "
                 f"'{self.configuration['connection_name']}'")
     logger.info(f"Project completed in {end - start}")