Пример #1
0
    def __init__(self, task_class, classdict):
        super(TaskDefinition, self).__init__()

        self.task_definition_uid = get_uuid()
        self.hidden = False

        self.task_class = task_class  # type: Type[Task]

        self.task_passport = TaskPassport.from_task_cls(task_class)

        # TODO: maybe use properties or other way to delegate those...
        self.full_task_family = self.task_passport.full_task_family
        self.full_task_family_short = self.task_passport.full_task_family_short
        self.task_family = self.task_passport.task_family
        self.task_config_section = self.task_passport.task_config_section

        # all the attributes that points to_Parameter
        self.task_params = dict()  # type: Dict[str, ParameterDefinition]

        # the defaults attribute
        self.defaults = dict()  # type: Dict[ParameterDefinition, Any]

        self.task_params, self.defaults = self._calculate_task_class_values(classdict)

        # if we have output params in function arguments, like   f(some_p=parameter.output)
        # the new function can not return the result of return
        self.single_result_output = self._is_result_single_output(self.task_params)

        defaults = {
            p.name: p.default
            for p in self.task_params.values()
            if is_defined(p.default)
        }
        self.task_defaults_config_store = parse_and_build_config_store(
            source=self.task_passport.format_source_name("defaults"),
            config_values={self.task_config_section: defaults},
            set_if_not_exists_only=True,
        )

        self.task_defaults_config_store.update(
            parse_and_build_config_store(
                source=self.task_passport.format_source_name("defaults_section"),
                config_values=self.defaults,
            )
        )
        # now, if we have overloads in code ( calculated in task_definition):
        # class T(BaseT):
        #     some_base_t_property = new_value
        if self.task_class._conf__track_source_code:
            self.task_source_code = _get_task_source_code(self.task_class)
            self.task_module_code = _get_task_module_source_code(self.task_class)
            self.task_source_file = _get_source_file(self.task_class)
        else:
            self.task_source_code = None
            self.task_module_code = ""
            self.task_source_file = None
Пример #2
0
    def _calculate_and_add_layer_of_task_config(self, param_task_config):
        """
        calculate any task class level params and updates the config with thier values
        @pipeline(task_config={CoreConfig.tracker: ["console"])
        <or>
        class a(Task):
            task_config = {"core": {"tracker": ["console"]}}
            <or>
            task_config = {CoreConfig.tracker: ["console"]}
        """
        # calculate the value of `Task.task_config` using it's definition
        # (check for all inheritance and find value for `task_config`
        param_task_config_value = self._build_parameter_value(
            param_task_config)
        if param_task_config_value.value:
            # Support two modes:
            # 1. Task.param_name:333
            # 2. {"section": {"key":"value"}}
            # dict parameter value can't have non string as a key
            param_task_config_value.value = parse_and_build_config_store(
                config_values=param_task_config_value.value,
                source=self._source_name("task_config"),
            )
            # merging `Task.task_config` into current configuration
            # we are adding "ultimate" layer on top of all layers
            self.config.set_values(param_task_config_value.value)

        return param_task_config_value
Пример #3
0
    def _new_config_layer(self,
                          config_values,
                          source=None,
                          override=False,
                          merge_settings=None):
        # let validate that we are initialized
        # user can call this function out of no-where, so we will create a layer, and will override it
        # the moment we create more layers on config.system_load
        dbnd_system_bootstrap()

        if not config_values:
            return self.config_layer
        if not isinstance(config_values, _ConfigStore):
            if not source:
                source = "{sig}".format(sig=id(config_values))
            config_values = parse_and_build_config_store(
                config_values=config_values, source=source,
                override=override)  # type: _ConfigStore

        source = source or config_values.source
        if not source:
            source = "{sig}".format(sig=id(config_values))
        return self.config_layer.create_layer(name=source,
                                              config_values=config_values,
                                              merge_settings=merge_settings)
Пример #4
0
def _parse_cli(configs, source, override=False):
    """
    Parse every item in configs , joining them into one big ConfigStore
    """
    config_values_list = [
        parse_and_build_config_store(
            config_values=c, source=source, override=override, auto_section_parse=True
        )
        for c in configs
    ]
    return functools.reduce((lambda x, y: x.update(y)), config_values_list)
Пример #5
0
 def get_autoloaded_config(self):
     # override values for task task_auto_config
     override_values = {
         "task_auto_config": {
             "param_int": self.param_int,
             "param_datetime": self.param_datetime,
         }
     }
     return parse_and_build_config_store(config_values=override_values,
                                         override=True,
                                         source="user_config")
Пример #6
0
 def get_autoloaded_config(self):
     # override values for task task_auto_config
     override_values = {
         "task_auto_config": {
             "param_int": self.param_int,
             "param_datetime": self.param_datetime,
         }
     }
     return parse_and_build_config_store(
         config_values=override_values,
         priority=ConfigValuePriority.OVERRIDE,
         source="user_config",
     )
Пример #7
0
    def get_and_process_dbnd_dag_config(self):
        dag = self.dag
        if not dag.default_args:
            dag_dbnd_config = {}
        else:
            dag_dbnd_config = dag.default_args.get("dbnd_config", {})

        config_store = parse_and_build_config_store(
            source="%s default args" % dag.dag_id,
            config_values=dag_dbnd_config,
            auto_section_parse=True,
        )

        # config can have problems around serialization,
        # let override with "normalized" config
        if dag.default_args:
            dag.default_args["dbnd_config"] = config_store

        config_store = _default_dbnd_dag_context_config.merge(config_store)
        logger.debug("Config store for %s: %s", self.dag.dag_id, config_store)
        return config_store
Пример #8
0
def run(
    ctx,
    is_help,
    task,
    module,
    _sets,
    _sets_config,
    _sets_root,
    _overrides,
    verbose,
    describe,
    env,
    parallel,
    conf_file,
    task_version,
    project_name,
    name,
    description,
    run_driver,
    alternative_task_name,
    scheduled_job_name,
    scheduled_date,
    interactive,
    submit_driver,
    submit_tasks,
    disable_web_tracker,
):
    """
    Run a task or a DAG

    To see tasks use `dbnd show-tasks` (tab completion is available).
    """

    from dbnd._core.context.databand_context import new_dbnd_context, DatabandContext
    from dbnd._core.utils.structures import combine_mappings
    from dbnd import config

    task_name = task
    # --verbose, --describe, --env, --parallel, --conf-file and --project-name
    # we filter out false flags since otherwise they will always override the config with their falseness
    main_switches = dict(
        databand=filter_dict_remove_false_values(
            dict(
                verbose=verbose > 0,
                describe=describe,
                env=env,
                conf_file=conf_file,
                project_name=project_name,
            )
        ),
        run=filter_dict_remove_false_values(
            dict(
                name=name,
                parallel=parallel,
                description=description,
                is_archived=describe,
            )
        ),
    )

    if submit_driver is not None:
        main_switches["run"]["submit_driver"] = bool(submit_driver)
    if submit_tasks is not None:
        main_switches["run"]["submit_tasks"] = bool(submit_tasks)
    if disable_web_tracker:
        main_switches.setdefault("core", {})["tracker_api"] = "disabled"

    if task_version is not None:
        main_switches["task"] = {"task_version": task_version}

    cmd_line_config = parse_and_build_config_store(
        source="cli", config_values=main_switches
    )

    _sets = list(_sets)
    _sets_config = list(_sets_config)
    _sets_root = list(_sets_root)

    root_task_config = {}
    for _set in _sets_root:
        root_task_config = combine_mappings(left=root_task_config, right=_set)

    # remove all "first level" config values, assume that they are for the main task
    # add them to _sets_root
    for _set in _sets:
        for k, v in list(_set.items()):
            # so json-like values won't be included
            if "." not in k and isinstance(v, six.string_types):
                root_task_config[k] = v
                del _set[k]

    # --set, --set-config
    if _sets:
        cmd_line_config.update(_parse_cli(_sets, source="--set"))
    if _sets_config:
        cmd_line_config.update(_parse_cli(_sets_config, source="--set-config"))
    if _overrides:
        cmd_line_config.update(
            _parse_cli(_overrides, source="--set-override", override=True)
        )
    if interactive:
        cmd_line_config.update(
            _parse_cli([{"run.interactive": True}], source="--interactive")
        )
    if verbose > 1:
        cmd_line_config.update(
            _parse_cli([{"task_build.verbose": True}], source="-v -v")
        )

    if cmd_line_config:
        config.set_values(cmd_line_config, source="cmdline")
    if verbose:
        logger.info("CLI config: \n%s", pformat_config_store_as_table(cmd_line_config))

    # double checking on bootstrap, as we can run from all kind of locations
    # usually we should be bootstraped already as we run from cli.
    dbnd_bootstrap()
    if not config.getboolean("log", "disabled"):
        configure_basic_logging(None)

    scheduled_run_info = None
    if scheduled_job_name:
        scheduled_run_info = ScheduledRunInfo(
            scheduled_job_name=scheduled_job_name, scheduled_date=scheduled_date
        )

    with new_dbnd_context(
        name="run", module=module
    ) as context:  # type: DatabandContext
        task_registry = get_task_registry()

        tasks = task_registry.list_dbnd_task_classes()
        completer.refresh(tasks)

        # modules are loaded, we can load the task
        task_cls = None
        if task_name:
            task_cls = task_registry.get_task_cls(task_name)
            if alternative_task_name:
                task_cls = build_dynamic_task(
                    original_cls=task_cls, new_cls_name=alternative_task_name
                )
                task_name = alternative_task_name

        # --set-root
        # now we can get it config, as it's not main task, we can load config after the configuration is loaded
        if task_cls is not None:
            if root_task_config:
                # adding root task to configuration
                config.set_values(
                    {task_cls.task_definition.task_config_section: root_task_config},
                    source="--set-root",
                )

        if is_help or not task_name:
            print_help(ctx, task_cls)
            return

        return context.dbnd_run_task(
            task_or_task_name=task_name,
            run_uid=run_driver,
            scheduled_run_info=scheduled_run_info,
        )
Пример #9
0
    # dbnd code captures inline operator creation with Catcher, it's not in airflow mode.
    return not safe_isinstance(context_manager_dag, "DatabandOpCatcherDag")


@dbnd_handle_errors(exit_on_error=False)
def build_task_at_airflow_dag_context(task_cls, call_args, call_kwargs):
    dag = safe_get_context_manager_dag()
    dag_ctrl = DagFuncOperatorCtrl.build_or_get_dag_ctrl(dag)
    return dag_ctrl.build_airflow_operator(
        task_cls=task_cls, call_args=call_args, call_kwargs=call_kwargs
    )


_default_dbnd_dag_context_config = parse_and_build_config_store(
    source="airflow_defaults",
    config_values={"log": {"disabled": True, "capture_task_run_log": False}},
)


class DagFuncOperatorCtrl(object):
    dag_to_context = {}

    def __init__(self, dag):
        self.dag = dag
        self.dbnd_airflow_name = {}
        config_store = self.get_and_process_dbnd_dag_config()
        with dbnd_config(
            config_values=config_store, source="airflow"
        ) as current_config:
            self.dbnd_context = DatabandContext(name="airflow__%s" % self.dag.dag_id)
            with DatabandContext.context(_context=self.dbnd_context):
Пример #10
0
def cmd_run(
    ctx,
    is_help,
    task,
    module,
    _sets,
    _sets_config,
    _sets_root,
    _overrides,
    _extend,
    verbose,
    print_task_band,
    describe,
    env,
    parallel,
    conf_file,
    task_version,
    project,
    name,
    description,
    run_driver,
    override_run_uid,
    alternative_task_name,
    job_name,
    scheduled_job_name,
    scheduled_date,
    interactive,
    submit_driver,
    submit_tasks,
    disable_web_tracker,
    open_web_tab,
    docker_build_tag,
):
    """
    Run a task or a DAG

    To see all available tasks use `dbnd show-tasks` (tab completion is available).
    `dbnd show-configs` will print all available configs.
    """

    from dbnd import config
    from dbnd._core.context.databand_context import DatabandContext, new_dbnd_context
    from dbnd._core.utils.structures import combine_mappings

    task_registry = get_task_registry()

    # we need to do it before we are looking for the task cls
    load_user_modules(dbnd_config=config, modules=module)

    task_name = task
    # --verbose, --describe, --env, --parallel, --conf-file and --project
    # we filter out false flags since otherwise they will always override the config with their falseness
    main_switches = dict(
        databand=dict(
            verbose=verbose > 0,
            print_task_band=print_task_band,
            describe=describe,
            env=env,
            conf_file=conf_file,
            project=project,
        ),
        run=dict(
            name=name,
            parallel=parallel,
            interactive=interactive,
            description=description,
            is_archived=describe,
            open_web_tracker_in_browser=open_web_tab,
            submit_driver=_nullable_flag(submit_driver),
            submit_tasks=_nullable_flag(submit_tasks),
        ),
        kubernetes=dict(docker_build_tag=docker_build_tag),
        task=dict(task_version=task_version),
        task_build=dict(verbose=True if verbose > 1 else None),
        core=dict(tracker_api="disabled" if disable_web_tracker else None),
    )

    main_switches = cleanup_empty_switches(main_switches)

    _sets = list(_sets)
    _sets_config = list(_sets_config)
    _sets_root = list(_sets_root)

    root_task_config = {}
    for _set in _sets_root:
        root_task_config = combine_mappings(left=root_task_config, right=_set)

    # remove all "first level" config values, assume that they are for the main task
    # add them to _sets_root
    for _set in _sets:
        for k, v in list(_set.items()):
            # so json-like values won't be included
            if "." not in k and isinstance(v, six.string_types):
                root_task_config[k] = v
                del _set[k]

    cmd_line_config = parse_and_build_config_store(source="cli",
                                                   config_values=main_switches)
    # --set, --set-config
    if _sets:
        cmd_line_config.update(_parse_cli(_sets, source="--set"))
    if _sets_config:
        cmd_line_config.update(_parse_cli(_sets_config, source="--set-config"))
    if _extend:
        cmd_line_config.update(
            _parse_cli(_extend, source="--extend-config", extend=True))
    if _overrides:
        cmd_line_config.update(
            _parse_cli(
                _overrides,
                source="--set-override",
                priority=ConfigValuePriority.OVERRIDE,
            ))

    # --set-root
    if root_task_config:
        task_cls = task_registry.get_task_cls(task_name)
        task_section = task_cls.task_definition.task_config_section
        # adding root task to configuration
        cmd_line_config.update(
            parse_and_build_config_store(
                config_values={task_section: root_task_config},
                source="--set-root"))

    # UPDATE CURRENT CONFIG with CLI values
    if cmd_line_config:
        if verbose:
            logger.info("CLI config: \n%s",
                        pformat_config_store_as_table(cmd_line_config))
        config.set_values(cmd_line_config, source="cmdline")

    # double checking on bootstrap, as we can run from all kind of locations
    # usually we should be bootstraped already as we run from cli.
    dbnd_bootstrap()

    # initialize basic logging (until we get to the context logging
    if not config.getboolean("log", "disabled"):
        configure_basic_logging(None)

    scheduled_run_info = None
    if scheduled_job_name:
        scheduled_run_info = ScheduledRunInfo(
            scheduled_job_name=scheduled_job_name,
            scheduled_date=scheduled_date)

    # update completer
    if config.getboolean("databand", "completer"):
        tasks = task_registry.list_dbnd_task_classes()
        completer.refresh(tasks)

    # bootstrap and modules are loaded, we can load the task
    task_cls = None
    if task_name:
        task_cls = task_registry.get_task_cls(task_name)

    if not task_name:
        print_help(ctx, None)
        return

    if is_help:
        print_help(ctx, task_cls)
        return

    with tracking_mode_context(tracking=False), new_dbnd_context(
            name="run") as context:  # type: DatabandContext
        if context.settings.system.describe:
            # we want to print describe without triggering real run
            logger.info("Building main task '%s'", task_name)
            root_task = get_task_registry().build_dbnd_task(task_name)
            root_task.ctrl.describe_dag.describe_dag()
            # currently there is bug with the click version we have when using python 2
            # so we don't use the click.echo function
            # https://github.com/pallets/click/issues/564
            print("Task %s has been described!" % task_name)
            return root_task
        return context.dbnd_run_task(
            task_or_task_name=task_name,
            force_task_name=alternative_task_name,
            job_name=job_name or alternative_task_name or task_name,
            run_uid=run_driver or override_run_uid,
            existing_run=run_driver is not None,
            scheduled_run_info=scheduled_run_info,
            project=project,
        )
Пример #11
0
    def __init__(self, task_class, classdict, namespace_at_class_time):
        super(TaskDefinition, self).__init__()

        self.task_definition_uid = get_uuid()
        self.hidden = False

        self.task_class = task_class  # type: Type[Task]
        self.namespace_at_class_time = namespace_at_class_time
        if self.task_class._conf__decorator_spec:
            cls_name = self.task_class._conf__decorator_spec.name
        else:
            cls_name = self.task_class.__name__

        self.full_task_family = "%s.%s" % (task_class.__module__, cls_name)
        self.full_task_family_short = "%s.%s" % (
            _short_name(task_class.__module__),
            cls_name,
        )

        self.task_family = self._build_user_task_family()
        if not self.task_family:
            self.task_family = cls_name
            self.task_config_section = self.full_task_family
        else:
            self.task_config_section = self.task_family

        # all the attributes that points to_Parameter
        self.task_params = dict()  # type: Dict[str, ParameterDefinition]

        # the defaults attribute
        self.defaults = dict()  # type: Dict[ParameterDefinition, Any]

        self.task_params, self.defaults = self._calculate_task_class_values(
            classdict)

        # if we have output params in function arguments, like   f(some_p=parameter.output)
        # the new function can not return the result of return
        self.single_result_output = self._is_result_single_output(
            self.task_params)

        defaults = {
            p.name: p.default
            for p in self.task_params.values() if is_defined(p.default)
        }
        self.task_defaults_config_store = parse_and_build_config_store(
            source="%s[defaults]" % self.full_task_family_short,
            config_values={self.task_config_section: defaults},
            set_if_not_exists_only=True,
        )

        self.task_defaults_config_store.update(
            parse_and_build_config_store(
                source="%s[defaults_section]" % self.full_task_family_short,
                config_values=self.defaults,
            ))
        # now, if we have overloads in code ( calculated in task_definition):
        # class T(BaseT):
        #     some_base_t_property = new_value
        if self.task_class._conf__track_source_code:
            self.task_source_code = _get_task_source_code(self.task_class)
            self.task_module_code = _get_task_module_source_code(
                self.task_class)
            self.task_source_file = _get_source_file(self.task_class)
        else:

            self.task_source_code = None
            self.task_module_code = ""
            self.task_source_file = None
Пример #12
0
    def __init__(
            self,
            task_passport,  # type: TaskPassport
            classdict=None,  # type: Optional[Dict[str, Any]]
            base_task_definitions=None,  # type: Optional[List[TaskDefinition]]
            defaults=None,  # type: Optional[Dict[ParameterDefinition, Any]]
            task_decorator=None,  # type: Optional[TaskDecorator]
            source_code=None,  # type: Optional[TaskSourceCode]
            external_parameters=None,  # type: Optional[Parameters]
            task_definition_uid=None,  # type: Optional[UUID]
    ):
        super(TaskDefinition, self).__init__()

        self.hidden = False

        self.task_passport = task_passport
        self.source_code = source_code
        self.task_decorator = task_decorator
        self.base_task_definitions = (base_task_definitions
                                      or [])  # type: List[ TaskDefinition]

        # TODO: maybe use properties or other way to delegate those...
        self.full_task_family = self.task_passport.full_task_family
        self.full_task_family_short = self.task_passport.full_task_family_short
        self.task_family = self.task_passport.task_family
        self.task_config_section = self.task_passport.task_config_section

        # all the attributes that points to_Parameter
        self.task_param_defs = dict()  # type: Dict[str, ParameterDefinition]

        # the defaults attribute
        self.defaults = dict()  # type: Dict[ParameterDefinition, Any]

        self.task_param_defs = self._calculate_task_class_values(
            classdict, external_parameters)
        # if we have output params in function arguments, like   f(some_p=parameter.output)
        # the new function can not return the result of return
        self.single_result_output = self._is_result_single_output(
            self.task_param_defs)

        self.param_defaults = {
            p.name: p.default
            for p in self.task_param_defs.values() if is_defined(p.default)
        }

        # TODO: consider joining with task_config
        # TODO: calculate defaults value as _ConfigStore and merge using standard mechanism
        self.defaults = self._calculate_task_defaults(defaults)
        self.task_defaults_config_store = parse_and_build_config_store(
            source=self.task_passport.format_source_name("task.defaults"),
            config_values=self.defaults,
            priority=ConfigValuePriority.FALLBACK,
        )

        self.task_signature_extra = {}
        if config.getboolean("task_build", "sign_with_full_qualified_name"):
            self.task_signature_extra[
                "full_task_family"] = self.full_task_family
        if config.getboolean("task_build", "sign_with_task_code"):
            self.task_signature_extra[
                "task_code_hash"] = user_friendly_signature(
                    self.source_code.task_source_code)

        if task_definition_uid:
            self.task_definition_uid = task_definition_uid
        else:
            self.task_definition_uid = get_uuid()