Exemplo n.º 1
0
 def execute_pipeline(self):
     
     self.bonobo_parser = bonobo.get_argument_parser()
     with bonobo.parse_args(self.bonobo_parser) as options:
         bonobo.run(
             self.build_graph(**options),
             services=self.get_services(**options))
Exemplo n.º 2
0
def run_etl(input_file_path, config_file, output_cube_path=None, cube_config=None):
    """
    Run ETl Process for passed excel file.

    :param input_file_path: excel file path

    :param config_file: config file path

    example of config::

        # in the config file you specify for each table, columns associate with it.
        Facts: [Price, Quantity]
        Accounts: ['Source Account', 'Destination Account']
        Client: ['Client Activity', 'Client Role']

    :param output_cube_path: cube folder path

    :param cube_config: if you want to call run_etl as function, you can pass dict config directly as param,
    there an example::

        @click.command()
        @click.pass_context
        def myETL(ctx):
            # demo run_etl as function with config as dict
            config = {
                'Facts': ['Amount', 'Count'],
                'Geography': ['Continent', 'Country', 'City'],
                'Product': ['Company', 'Article', 'Licence'],
                'Date': ['Year', 'Quarter', 'Month', 'Day']
            }
            ctx.invoke(run_etl, input_file_path='sales.xlsx', cube_config=config, output_cube_path='cube2')

    """
    parser = bonobo.get_argument_parser()
    parser.add_argument("-in", "--input_file_path", help="Input file")
    parser.add_argument("-cf", "--config_file", help="Configuration file path")
    parser.add_argument("-out", "--output_cube_path", help="Cube export path")
    with bonobo.parse_args(parser) as options:

        if cube_config:
            options["cube_config"] = cube_config
        elif config_file:
            with open(config_file) as config_file:
                options["cube_config"] = yaml.load(config_file)
        else:
            raise Exception("Config file is not specified")

        if input_file_path:
            options["input_file_path"] = input_file_path
        else:
            raise Exception("Excel file is not specified")

        if output_cube_path:
            options["output_cube_path"] = output_cube_path
        else:
            options["output_cube_path"] = os.path.join(
                expanduser("~"), "olapy-data", "cubes", Path(input_file_path).stem
            )

        bonobo.run(get_graph(**options), services=get_services(**options))
Exemplo n.º 3
0
def main(setting, ckan_portal, dataset_id, ressource, namespace, filename):
    parser = bonobo.get_argument_parser()
    with bonobo.parse_args(parser) as options:
        bonobo.run(
            get_graph(ckan_portal, dataset_id, ressource, namespace, filename, **options),
            services=get_services(setting, **options)
        )
Exemplo n.º 4
0
def get_argument_parser(parser=None):
    """Extend CLI parser provided by bobobo and returns it."""
    parser = bonobo.get_argument_parser(parser=parser)

    parser.add_argument("--input_file", "-i", type=str, default=None, help="Path of the input file.")
    parser.add_argument("--output_file", "-o", type=str, default=None, help="Path of the output file.")

    return parser
def main():
    """Execute the pipeline graph"""
    # logfilename = "wh.log"
    # logger = logging.getLogger()
    # ch = logging.FileHandler(logfilename)
    # formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    # ch.setFormatter(formatter)
    # logger.addHandler(ch)
    parser = bonobo.get_argument_parser()
    with bonobo.parse_args(parser) as options:
        bonobo.run(get_graph(**options), services=get_services(**options))
def get_argument_parser(parser=None):
    """Extend CLI parser provided by bobobo and returns it."""
    parser = bonobo.get_argument_parser(parser=parser)

    parser.add_argument("--input_file", "-i", type=str, default=None, help="Path of the input file.")
    parser.add_argument("--output_file", "-o", type=str, default=None, help="Path of the output file.")

    # these parameters are added for accessing different S3 services
    parser.add_argument("--bucket", "-b", type=str, default=None, help="Bucket name in S3 service.")
    parser.add_argument("--key", "-k", type=str, default=None, help="Key to access S3 service.")
    parser.add_argument("--secret_key", "-sk", type=str, default=None, help="Secret key to access the S3 service.")
    parser.add_argument("--endpoint_url", "-ep", type=str, default=None, help="Endpoint URL for S3 service.")

    return parser
Exemplo n.º 7
0
def get_argument_parser(parser=None):
    parser = bonobo.get_argument_parser(parser=parser)

    parser.add_argument('--limit',
                        '-l',
                        type=int,
                        default=None,
                        help='If set, limits the number of processed lines.')
    parser.add_argument(
        '--print',
        '-p',
        action='store_true',
        default=False,
        help='If set, pretty prints before writing to output file.')

    return parser
Exemplo n.º 8
0
def get_argument_parser(parser=None):
    parser = bonobo.get_argument_parser(parser=parser)

    parser.add_argument(
        '--limit',
        '-l',
        type=int,
        default=None,
        help='If set, limits the number of processed lines.'
    )
    parser.add_argument(
        '--print',
        '-p',
        action='store_true',
        default=False,
        help='If set, pretty prints before writing to output file.'
    )

    return parser
Exemplo n.º 9
0
def parse_args(parser=None):
    parser = parser or bonobo.get_argument_parser()

    parser.add_argument('--drop', '-D', action='store_true')
    parser.add_argument('--create', '-C', action='store_true')
    parser.add_argument('--echo', action='store_true')

    with bonobo.parse_args(parser) as options:
        import models
        import settings
        import services

        if options['echo']:
            logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)

        if options['drop'] or options['create']:
            root_engine = services.create_engine(superuser=True)
            if options['drop']:
                # drop database/role with super user privileges
                _execute_sql(root_engine,
                             "DROP DATABASE {}".format(settings.DATABASE_NAME))
                _execute_sql(root_engine,
                             "DROP ROLE {}".format(settings.DATABASE_USERNAME))

            if options['create']:
                # create database/role with super user privileges
                _execute_sql(
                    root_engine,
                    'CREATE ROLE {} WITH LOGIN PASSWORD \'{}\';'.format(
                        settings.DATABASE_USERNAME,
                        settings.DATABASE_PASSWORD))
                _execute_sql(
                    root_engine,
                    'CREATE DATABASE {} WITH OWNER={} TEMPLATE=template0 ENCODING="utf-8";'
                    .format(settings.DATABASE_NAME,
                            settings.DATABASE_USERNAME))

                # create tables in userland
                engine = services.create_engine()
                models.metadata.create_all(engine)

        yield options
Exemplo n.º 10
0
def get_argument_parser(parser=None):
    parser = bonobo.get_argument_parser(parser=parser)

    parser.add_argument("--limit",
                        "-l",
                        type=int,
                        default=None,
                        help="If set, limits the number of processed lines.")
    parser.add_argument(
        "--print",
        "-p",
        action="store_true",
        default=False,
        help="If set, pretty prints before writing to output file.")

    parser.add_argument("--strategy",
                        "-s",
                        type=str,
                        choices=STRATEGIES.keys(),
                        default=DEFAULT_STRATEGY)

    return parser
Exemplo n.º 11
0
import bonobo
from bonobo.examples.files._services import get_services


def get_graph(*, _limit=None, _print=False):
    return bonobo.Graph(
        bonobo.CsvReader('datasets/coffeeshops.txt'),
        *((bonobo.Limit(_limit), ) if _limit else ()),
        *((bonobo.PrettyPrinter(), ) if _print else ()),
        bonobo.CsvWriter('coffeeshops.csv', fs='fs.output')
    )


if __name__ == '__main__':
    parser = bonobo.get_argument_parser()

    parser.add_argument(
        '--limit',
        '-l',
        type=int,
        default=None,
        help='If set, limits the number of processed lines.'
    )
    parser.add_argument(
        '--print',
        '-p',
        action='store_true',
        default=False,
        help='If set, pretty prints before writing to output file.'
    )
Exemplo n.º 12
0
 def create_parser(self, prog_name, subcommand):
     return bonobo.get_argument_parser(super().create_parser(
         prog_name, subcommand))
Exemplo n.º 13
0
def parse_args(mixed=None):
    """
    Context manager to extract and apply environment related options from the provided argparser result.

    A dictionnary with unknown options will be yielded, so the remaining options can be used by the caller.

    :api: bonobo.patch_environ

    :param mixed: ArgumentParser instance, Namespace, or dict.
    :return:
    """

    if mixed is None:
        global _parser
        if _parser is not None:
            warnings.warn(
                'You are calling bonobo.parse_args() without a parser argument, but it looks like you created a parser before. You probably want to pass your parser to this call, or if creating a new parser here is really what you want to do, please create a new one explicitely to silence this warning.'
            )
        # use the api from bonobo namespace, in case a command patched it.
        import bonobo
        mixed = bonobo.get_argument_parser()

    if isinstance(mixed, argparse.ArgumentParser):
        options = mixed.parse_args()
    else:
        options = mixed

    if not isinstance(options, dict):
        options = options.__dict__

    # make a copy so we don't polute our parent variables.
    options = dict(options)

    # storage for values before patch.
    _backup = {}

    # Priority order: --env > --env-file > system > --default-env > --default-env-file
    #
    # * The code below is reading default-env before default-env-file as if the first sets something, default-env-file
    #   won't override it.
    # * Then, env-file is read from before env, as the behaviour will be the oposite (env will override a var even if
    #   env-file sets something.)
    try:
        # Set default environment
        for name, value in map(parse_var, options.pop('default_env', []) or []):
            if not name in os.environ:
                if not name in _backup:
                    _backup[name] = os.environ.get(name, None)
                os.environ[name] = value

        # Read and set default environment from file(s)
        for filename in options.pop('default_env_file', []) or []:
            for name, value in load_env_from_file(filename):
                if not name in os.environ:
                    if not name in _backup:
                        _backup[name] = os.environ.get(name, None)
                    os.environ[name] = value

        # Read and set environment from file(s)
        for filename in options.pop('env_file', []) or []:
            for name, value in load_env_from_file(filename):
                if not name in _backup:
                    _backup[name] = os.environ.get(name, None)
                os.environ[name] = value

        # Set environment
        for name, value in map(parse_var, options.pop('env', []) or []):
            if not name in _backup:
                _backup[name] = os.environ.get(name, None)
            os.environ[name] = value

        yield options
    finally:
        for name, value in _backup.items():
            if value is None:
                del os.environ[name]
            else:
                os.environ[name] = value
Exemplo n.º 14
0
Arquivo: etl.py Projeto: mlipper/bray
def get_argument_parser(argparser):
    """Augments the given ArgumentParser for use with the Bonobo ETL framework."""
    return bonobo.get_argument_parser(parser=argparser)
Exemplo n.º 15
0
def main(setting, query, namespace, filename):
    parser = bonobo.get_argument_parser()
    with bonobo.parse_args(parser) as options:
        bonobo.run(get_graph(query, namespace, filename, **options),
                   services=get_services(setting, **options))
Exemplo n.º 16
0
 def create_parser(self, prog_name, subcommand):
     return bonobo.get_argument_parser(super().create_parser(prog_name, subcommand))
Exemplo n.º 17
0
def load(result):
    # Cada resultado que ingrese a este punto
    # ingresarlo como una nueva linea a un archivo
    # de texto (usando open con 'a' y write)
    # o insertando a una base de datos a elección.
    # El objetivo es que quede almacenado en un archivo
    # o una base de datos la tabla del 5

    cinco.insert_multiplo(result)

    print('Fin!')


def get_graph(**options):
    graph = bonobo.Graph()
    graph.add_chain(extract, transform, load)
    return graph


def get_services(**options):
    return {}


if __name__ == "__main__":
    cinco.create_schema()

    parser = bonobo.get_argument_parser()
    with bonobo.parse_args(parser) as options:
        bonobo.run(get_graph(**options), services=get_services(**options))
def main():
    """Execute the pipeline graph
    """
    parser = bonobo.get_argument_parser()
    with bonobo.parse_args(parser) as options:
        bonobo.run(get_graph(**options), services=get_services(**options))