Пример #1
0
def project_init(ctx, overwrite, dbnd_home, dbnd_system):
    """Initialize the project structure"""

    from dbnd._core.errors import DatabandSystemError
    from dbnd import databand_lib_path

    os.environ["SKIP_DAGS_PARSING"] = "True"  # Exclude airflow dag examples

    conf_folder = databand_lib_path("conf/project_init")

    if os.path.exists(os.path.join(dbnd_home, "project.cfg")):
        if not overwrite:
            raise DatabandSystemError(
                "You are trying to re-initialize your project. You already have dbnd configuration at %s. "
                "You can force project-init by providing --overwrite flag. "
                "If you need to create/update database use `dbnd db init` instead"
                % dbnd_system
            )

        logger.warning(
            "You are re-initializing your project, all files at %s are going to be over written!"
            % dbnd_home
        )

    copy_tree(conf_folder, dbnd_home)
    click.echo("Databand project has been initialized at %s" % dbnd_home)
    return
Пример #2
0
def project_init(ctx, no_init_db, overwrite, dbnd_home, dbnd_system):
    """Initialize the project structure and local db"""

    from dbnd._core.errors import DatabandSystemError
    from dbnd import databand_lib_path

    os.environ["SKIP_DAGS_PARSING"] = "True"  # Exclude airflow dag examples

    conf_folder = databand_lib_path("conf/project_init")
    project_name = os.path.basename(dbnd_home)
    output_dir = os.path.dirname(dbnd_home)

    if os.path.exists(os.path.join(dbnd_home, "project.cfg")):
        if not overwrite:
            raise DatabandSystemError(
                "You are trying to re-initialize your project. You already have dbnd configuration at %s. "
                "You can force project-init by providing --overwrite flag. "
                "If you need to create/update database use `dbnd db init` instead"
                % dbnd_system)

        logger.warning(
            "You are re-initializing your project, all files at %s are going to be over written!"
            % dbnd_home)

    copy_tree(conf_folder, dbnd_home)
    click.echo("Databand project has been initialized at %s" % dbnd_home)
    config.load_system_configs(force=True)
    if no_init_db:
        click.echo("Don't forget to run `dbnd db init` ")
        return

    if is_web_enabled():
        from dbnd_web.cmd_db import init as db_init

        ctx.invoke(db_init)
Пример #3
0
def _click_echo_jobs(jobs):
    from dbnd._core.utils.object_utils import tabulate_objects

    headers = get_current_context().obj["headers"]

    job_objects = [
        ScheduledJobNamedTuple(**dict(j.pop("DbndScheduledJob"), **j))
        for j in jobs
    ]

    click.echo(tabulate_objects(job_objects, headers=headers))
Пример #4
0
def delete(name, force):
    """Delete scheduled job"""
    if not force:
        to_delete = get_scheduled_jobs(name_pattern=name)
        if not to_delete:
            click.echo("no jobs found matching the given name pattern")
            return

        click.echo("the following jobs will be deleted:")
        _click_echo_jobs(to_delete)

        click.confirm("are you sure?", abort=True)

    delete_scheduled_job(name)
Пример #5
0
def print_help(ctx, task_cls):
    from dbnd._core.parameter.parameter_definition import _ParameterKind

    formatter = ctx.make_formatter()
    run.format_help(ctx, formatter)
    if task_cls:
        dl = []
        for (param_name, param_obj) in task_cls.task_definition.task_params.items():
            if param_obj.system or param_obj.kind == _ParameterKind.task_output:
                continue

            param_help = _help(param_obj.description)
            dl.append(("-r " + param_name, param_help))

        with formatter.section("Task"):
            formatter.write_dl(dl)
    click.echo(formatter.getvalue().rstrip("\n"), color=ctx.color)
Пример #6
0
def _list_tasks(ctx, module, search, is_config):
    from dbnd import Config
    from dbnd._core.context.databand_context import new_dbnd_context
    from dbnd._core.parameter.parameter_definition import _ParameterKind

    formatter = ctx.make_formatter()

    load_user_modules(config, modules=module)

    with new_dbnd_context():
        tasks = get_task_registry().list_dbnd_task_classes()

    for task_cls in tasks:
        td = task_cls.task_definition
        full_task_family = td.full_task_family
        task_family = td.task_family

        if not (task_family.startswith(search) or full_task_family.startswith(search)):
            continue

        if issubclass(task_cls, Config) != is_config:
            continue

        dl = []
        for param_name, param_obj in td.task_param_defs.items():
            if param_obj.system or param_obj.kind == _ParameterKind.task_output:
                continue
            if not is_config and param_name in COMMON_PARAMS:
                continue
            param_help = _help(param_obj.description)
            dl.append((param_name, param_help))

        if dl:
            with formatter.section(
                "{task_family} ({full_task_family})".format(
                    full_task_family=full_task_family, task_family=task_family
                )
            ):
                formatter.write_dl(dl)

    click.echo(formatter.getvalue().rstrip("\n"))
Пример #7
0
def pause(name):
    """Pause scheduled job"""
    set_scheduled_job_active(name, False)

    click.echo('Scheduled job "%s" paused' % name)
Пример #8
0
def enable(name):
    """Enable scheduled job"""
    set_scheduled_job_active(name, True)

    click.echo('Scheduled job "%s" is enabled' % name)
Пример #9
0
def run(
    ctx,
    is_help,
    task,
    module,
    _sets,
    _sets_config,
    _sets_root,
    _overrides,
    verbose,
    print_task_band,
    describe,
    env,
    parallel,
    conf_file,
    task_version,
    project_name,
    name,
    description,
    run_driver,
    alternative_task_name,
    scheduled_job_name,
    scheduled_date,
    interactive,
    submit_driver,
    submit_tasks,
    disable_web_tracker,
    open_web_tab,
    docker_build_tag,
):
    """
    Run a task or a DAG

    To see tasks use `dbnd show-tasks` (tab completion is available).
    """

    from dbnd._core.context.databand_context import new_dbnd_context, DatabandContext
    from dbnd._core.utils.structures import combine_mappings
    from dbnd import config

    task_name = task
    # --verbose, --describe, --env, --parallel, --conf-file and --project-name
    # we filter out false flags since otherwise they will always override the config with their falseness
    main_switches = dict(
        databand=filter_dict_remove_false_values(
            dict(
                verbose=verbose > 0,
                task_band=print_task_band,
                describe=describe,
                env=env,
                conf_file=conf_file,
                project_name=project_name,
            )),
        run=filter_dict_remove_false_values(
            dict(
                name=name,
                parallel=parallel,
                description=description,
                is_archived=describe,
            )),
    )

    if submit_driver is not None:
        main_switches["run"]["submit_driver"] = bool(submit_driver)
    if submit_tasks is not None:
        main_switches["run"]["submit_tasks"] = bool(submit_tasks)
    if disable_web_tracker:
        main_switches.setdefault("core", {})["tracker_api"] = "disabled"

    if task_version is not None:
        main_switches["task"] = {"task_version": task_version}

    cmd_line_config = parse_and_build_config_store(source="cli",
                                                   config_values=main_switches)

    _sets = list(_sets)
    _sets_config = list(_sets_config)
    _sets_root = list(_sets_root)

    root_task_config = {}
    for _set in _sets_root:
        root_task_config = combine_mappings(left=root_task_config, right=_set)

    # remove all "first level" config values, assume that they are for the main task
    # add them to _sets_root
    for _set in _sets:
        for k, v in list(_set.items()):
            # so json-like values won't be included
            if "." not in k and isinstance(v, six.string_types):
                root_task_config[k] = v
                del _set[k]

    # --set, --set-config
    if _sets:
        cmd_line_config.update(_parse_cli(_sets, source="--set"))
    if _sets_config:
        cmd_line_config.update(_parse_cli(_sets_config, source="--set-config"))
    if _overrides:
        cmd_line_config.update(
            _parse_cli(_overrides, source="--set-override", override=True))
    if interactive:
        cmd_line_config.update(
            _parse_cli([{
                "run.interactive": True
            }], source="--interactive"))
    if verbose > 1:
        cmd_line_config.update(
            _parse_cli([{
                "task_build.verbose": True
            }], source="-v -v"))

    if cmd_line_config:
        config.set_values(cmd_line_config, source="cmdline")
    if verbose:
        logger.info("CLI config: \n%s",
                    pformat_config_store_as_table(cmd_line_config))

    # double checking on bootstrap, as we can run from all kind of locations
    # usually we should be bootstraped already as we run from cli.
    dbnd_bootstrap()
    if not config.getboolean("log", "disabled"):
        configure_basic_logging(None)

    scheduled_run_info = None
    if scheduled_job_name:
        scheduled_run_info = ScheduledRunInfo(
            scheduled_job_name=scheduled_job_name,
            scheduled_date=scheduled_date)

    with tracking_mode_context(tracking=False), new_dbnd_context(
            name="run", module=module) as context:  # type: DatabandContext
        task_registry = get_task_registry()

        tasks = task_registry.list_dbnd_task_classes()
        completer.refresh(tasks)

        # modules are loaded, we can load the task
        task_cls = None
        if task_name:
            task_cls = task_registry.get_task_cls(task_name)
            if alternative_task_name:
                task_cls = build_dynamic_task(
                    original_cls=task_cls, new_cls_name=alternative_task_name)
                task_name = alternative_task_name

        # --set-root
        # now we can get it config, as it's not main task, we can load config after the configuration is loaded
        if task_cls is not None and root_task_config:
            # adding root task to configuration
            config.set_values(
                {
                    task_cls.task_definition.task_config_section:
                    root_task_config
                },
                source="--set-root",
            )

        if is_help or not task_name:
            print_help(ctx, task_cls)
            return

        if open_web_tab:
            config.set_values({"run": {"open_web_tracker_in_browser": "True"}})

        if docker_build_tag:
            config.set_values(
                {"kubernetes": {
                    "docker_build_tag": docker_build_tag
                }})

        if context.settings.system.describe:
            # we want to print describe without triggering real run
            logger.info("Building main task '%s'", task_name)
            root_task = get_task_registry().build_dbnd_task(task_name)
            root_task.ctrl.describe_dag.describe_dag()
            click.echo("Task %s has been described!" % task_name)
            return root_task

        return context.dbnd_run_task(
            task_or_task_name=task_name,
            run_uid=run_driver,
            scheduled_run_info=scheduled_run_info,
        )
Пример #10
0
def pause(scheduled_job_service, name):
    """Pause scheduled job"""
    scheduled_job_service.set_active(name, False)

    click.echo('Scheduled job "%s" paused' % name)
Пример #11
0
def enable(scheduled_job_service, name):
    """Enable scheduled job"""
    scheduled_job_service.set_active(name, True)

    click.echo('Scheduled job "%s" is enabled' % name)