Пример #1
0
def task_with_str_param(something=parameter(default=None)[str]):
    current_task = try_get_current_task()
    ctrl = current_task.ctrl  # type: TaskCtrl
    task_as_cmd_line = ctrl.task_repr.calculate_command_line_for_task()
    logging.info("Str type: %s, task repr: %s", type(str), task_as_cmd_line)

    assert "newstr.BaseNewStr" in str(type(str))
    assert "@" not in task_as_cmd_line
    return "task_with_str"
Пример #2
0
def task_with_object_param(something=parameter(default=None)[object]):
    current_task = try_get_current_task()
    ctrl = current_task.ctrl  # type: TaskCtrl
    task_as_cmd_line = ctrl.task_repr.calculate_command_line_for_task()
    logging.info("Object type: %s, task repr: %s", type(object),
                 task_as_cmd_line)

    assert "newobject" in object.__name__
    assert "@" not in task_as_cmd_line
    return "task_with_object_param"
Пример #3
0
def _get_or_create_inplace_task():
    """
    try to get existing task, and if not exists - try to get/create inplace_task_run
    """
    current_task = try_get_current_task()
    if not current_task:
        from dbnd._core.inplace_run.inplace_run_manager import try_get_inplace_task_run

        inplace_task_run = try_get_inplace_task_run()
        if inplace_task_run:
            current_task = inplace_task_run.task
    return current_task
Пример #4
0
    def __init__(
        self,
        task_name,
        task_definition,
        task_params,
        task_signature_obj=None,
        task_version=None,
    ):
        task_signature_obj = task_signature_obj or _generate_unique_tracking_signature(
        )

        super(TrackingTask, self).__init__(
            task_name=task_name,
            task_definition=task_definition,
            task_signature_obj=task_signature_obj,
            task_params=task_params,
        )

        self.task_definition = task_definition  # type: TaskDefinition
        # we don't have signature for outputs
        self.task_outputs_signature_obj = self.task_signature_obj
        self.ctrl = TrackingTaskCtrl(self)

        self.task_call_source = [
            self.dbnd_context.user_code_detector.find_user_side_frame(1)
        ]
        parent_task = try_get_current_task()
        if parent_task:
            parent_task.descendants.add_child(self.task_id)
            self.task_call_source.extend(parent_task.task_call_source)

            # inherit from parent if it has it
            self.task_version = task_version or parent_task.task_version
            self.task_target_date = parent_task.task_target_date
            self.task_env = parent_task.task_env
            # pass-through parent children scope params
            # task_children_scope_params will be used in case of any Task inside TrackedTask
            # for example tracked task creates Config objects
            self.task_children_scope_params = parent_task.task_children_scope_params
        else:
            # we need better definition of "what we use for tracking"
            self.task_version = task_version or utcnow().strftime(
                "%Y%m%d_%H%M%S")
            self.task_target_date = utcnow().date()
            self.task_env = get_databand_context().env
            self.task_children_scope_params = {}

        self.task_outputs = dict()
        for parameter, value in self._params.get_params_with_value(
                ParameterFilters.OUTPUTS):
            if is_not_defined(value):
                value_as_target = self.build_tracking_output(parameter)
                task_params.update_param_value(parameter.name, value_as_target)

            if isinstance(parameter, FuncResultParameter):
                continue

            # This is used to keep backward compatibility for tracking luigi behaviour
            # This is not something we want to keep, at least not in this form
            value = traverse_and_set_target(value,
                                            parameter._target_source(self))
            self.task_outputs[parameter.name] = value

        self.ctrl._initialize_task()

        # so we can be found via task_id
        self.dbnd_context.task_instance_cache.register_task_instance(self)
Пример #5
0
def _call_handler(task_cls, call_user_code, call_args, call_kwargs):
    """
    -= Use "Step into My Code"" to get back from Databand code! =-

    decorated object call/creation  ( my_func(), MyDecoratedTask()
    """
    force_invoke = call_kwargs.pop("__force_invoke", False)
    dbnd_project_config = get_dbnd_project_config()

    if force_invoke or dbnd_project_config.disabled:
        # 1. Databand is not enabled
        # 2. we have this call coming from Task.run / Task.band direct invocation
        return call_user_code(*call_args, **call_kwargs)
    func_call = FuncCall(
        task_cls=task_cls,
        call_args=call_args,
        call_kwargs=call_kwargs,
        call_user_code=call_user_code,
    )

    if is_in_airflow_dag_build_context(
    ):  # we are in Airflow DAG building mode
        return build_task_at_airflow_dag_context(task_cls=task_cls,
                                                 call_args=call_args,
                                                 call_kwargs=call_kwargs)

    current = try_get_current_task()
    if not current:
        from dbnd._core.tracking.script_tracking_manager import (
            try_get_inplace_tracking_task_run, )

        task_run = try_get_inplace_tracking_task_run()
        if task_run:
            current = task_run.task

    if not current:  # direct call to the function
        return func_call.invoke()

    ######
    # current is not None, and we are not in trackign/airflow/luigi
    # DBND Orchestration mode
    # we can be in the context of .run() or in .band()
    # called from  user code using some_func()  or SomeTask()
    # this call path is not coming from it's not coming from _invoke_func
    phase = current_phase()
    if phase is TaskContextPhase.BUILD:
        # we are in the @pipeline context, we are building execution plan
        t = task_cls(*call_args, **call_kwargs)

        # we are in inline debug mode -> we are going to execute the task
        # we are in the band
        # and want to return result of the object
        if t.task_definition.single_result_output:
            return t.result

        # we have multiple outputs ( result, another output.. )
        # -> just return task object
        return t

    if phase is TaskContextPhase.RUN:
        # we are in the run function!
        if (current.settings.dynamic_task.enabled
                and current.task_supports_dynamic_tasks):
            # isinstance() check required to prevent infinite recursion when @task is on
            # class and not on func (example: see test_task_decorated_class.py)
            # and the current task supports inline calls
            # that's extra mechanism in addition to __force_invoke
            # on pickle/unpickle isinstance fails to run.
            return create_and_run_dynamic_task_safe(func_call=func_call,
                                                    parent_task_run=current)

    # we can not call it in"databand" way, fallback to normal execution
    return func_call.invoke()
Пример #6
0
    def __init__(
        self,
        task_name,
        task_family,
        task_definition,
        config_layer,
        task_params,
        task_config_override,
        task_enabled=True,
        task_sections=None,
    ):
        super(TaskMeta, self).__init__()
        # we should not use it directly, the value in object can outdated
        self.task_params = task_params  # type: Dict[str, ParameterValue]

        # passport
        self.task_name = task_name  # type: str
        self.task_family = task_family  # type: str
        self.task_definition = task_definition  # type: TaskDefinition

        # configuration data
        self.config_layer = config_layer  # type: _ConfigLayer
        self.task_sections = task_sections
        self.task_config_override = task_config_override

        self.dbnd_context = get_databand_context()  # type: DatabandContext

        # ids and signatures
        self.task_id = None
        self.task_signature = None
        self.task_signature_source = None
        self.task_outputs_signature = None
        self.task_outputs_signature_source = None

        self.task_enabled = task_enabled

        self.obj_key = self._calculate_task_meta_key()
        # we want to have task id immediately
        self.initialize_task_id([
            (p_value.name, p_value.parameter.signature(p_value.value))
            for p_value in self.task_params.values()
            if isinstance(p_value.parameter, ParameterDefinition)
            and p_value.parameter.significant
        ])

        self.task_enabled = task_enabled  # relevant only for orchestration

        # miscellaneous
        self.task_type = self.task_definition.task_class._conf__task_type_name
        self.task_user = username

        self.task_call_source = [
            self.dbnd_context.user_code_detector.find_user_side_frame(2)
        ]
        parent_task = try_get_current_task()
        if self.task_call_source and parent_task:
            self.task_call_source.extend(
                parent_task.task_meta.task_call_source)

        # we count task meta creation
        # even if task_meta will not be used by TaskMetaClass when we already have created task
        # we will increase creation id
        # if t2 created after t1, t2.task_meta.task_creation_id > t1.task_meta.task_creation_id
        TaskMeta._current_task_creation_id += 1
        self.task_creation_id = TaskMeta._current_task_creation_id
Пример #7
0
    def handle_callable_call(self, *call_args, **call_kwargs):
        dbnd_project_config = get_dbnd_project_config()
        if dbnd_project_config.disabled:
            return self.class_or_func(*call_args, **call_kwargs)

        # we are at tracking mode
        if dbnd_project_config.is_tracking_mode():
            with self.tracking_context(call_args, call_kwargs) as track_result_callback:
                fp_result = self.class_or_func(*call_args, **call_kwargs)
                return track_result_callback(fp_result)

        #### DBND ORCHESTRATION MODE
        #
        #     -= Use "Step into My Code"" to get back from dbnd code! =-
        #
        # decorated object call/creation  ( my_func(), MyDecoratedTask()
        # we are at orchestration mode

        task_cls = self.get_task_cls()

        if is_in_airflow_dag_build_context():
            # we are in Airflow DAG building mode - AIP-31
            return build_task_at_airflow_dag_context(
                task_cls=task_cls, call_args=call_args, call_kwargs=call_kwargs
            )

        current = try_get_current_task()
        if not current:
            # no tracking/no orchestration,
            # falling back to "natural call" of the class_or_func
            message = (
                "Can't report tracking info. %s is decorated with @task, but no tracking context was found"
                % (self.class_or_func.__name__,)
            )
            get_one_time_logger().log_once(message, "task_decorator", logging.WARNING)
            return self.class_or_func(*call_args, **call_kwargs)

        ######
        # current is not None, and we are not in tracking/airflow/luigi
        # this is DBND Orchestration mode
        # we can be in the context of task.run() or in task.band()
        # called from user code using user_decorated_func()  or UserDecoratedTask()

        if self.is_class:
            call_kwargs.pop("__call_original_cls", False)

        # we should not get here from _TaskFromTaskDecorator.invoke()
        # at that function we should call user code directly
        phase = current_phase()
        if phase is TaskContextPhase.BUILD:
            # we are in the @pipeline.band() context, we are building execution plan
            t = task_cls(*call_args, **call_kwargs)

            # we are in the band, and if user_code() is called we want to remove redundant
            # `user_code().result` usage
            if t.task_definition.single_result_output:
                return t.result

            # we have multiple outputs (more than one "output" parameter)
            # just return task object, user will use it as `user_code().output_1`
            return t
        elif phase is TaskContextPhase.RUN:
            # we are "running" inside some other task execution (orchestration!)
            #  (inside user_defined_function() or UserDefinedTask.run()

            # if possible we will run it as "orchestration" task
            # with parameters parsing
            if (
                current.settings.run.task_run_at_execution_time_enabled
                and current.task_supports_dynamic_tasks
            ):
                return self._run_task_from_another_task_execution(
                    parent_task=current, call_args=call_args, call_kwargs=call_kwargs
                )
            # we can not call it in "dbnd" way, fallback to normal call
            if self.is_class:
                call_kwargs["__call_original_cls"] = False
            return self.class_or_func(*call_args, **call_kwargs)
        else:
            raise Exception()
Пример #8
0
    def tracking_context(self, call_args, call_kwargs):
        user_code_called = False  # whether we got to executing of user code
        user_code_finished = False  # whether we passed executing of user code
        func_call = None
        try:
            # 1. check that we don't have too many calls
            if self._call_count_limit_exceeded():
                yield _do_nothing_decorator
                return

            # 2. Start or reuse existing "main tracking task" that is root for tracked tasks
            if not try_get_current_task():
                """
                try to get existing task, and if not exists - try to get/create inplace_task_run
                """
                from dbnd._core.tracking.script_tracking_manager import (
                    try_get_inplace_tracking_task_run, )

                inplace_tacking_task = try_get_inplace_tracking_task_run()
                if not inplace_tacking_task:
                    # we didn't manage to start inplace tracking task run, we will not be able to track
                    yield _do_nothing_decorator
                    return

            tracking_task_definition = self.get_tracking_task_definition()
            callable_spec = tracking_task_definition.task_decorator.get_callable_spec(
            )

            func_call = TrackedFuncCallWithResult(
                callable=self.callable,
                call_args=tuple(
                    call_args),  # prevent original call_args modification
                call_kwargs=dict(
                    call_kwargs),  # prevent original kwargs modification
            )
            # replace any position argument with kwarg if it possible
            args, kwargs = args_to_kwargs(callable_spec.args,
                                          func_call.call_args,
                                          func_call.call_kwargs)

            # instantiate inline task
            task = TrackingTask.for_func(tracking_task_definition, args,
                                         kwargs)

            # update upstream/downstream relations - needed for correct tracking
            # we can have the task as upstream , as it was executed already
            parent_task = current_task_run().task
            if not parent_task.task_dag.has_upstream(task):
                parent_task.set_upstream(task)

            # checking if any of the inputs are the outputs of previous task.
            # we can add that task as upstream.
            dbnd_run = get_databand_run()
            call_kwargs_as_targets = dbnd_run.target_origin.get_for_map(kwargs)
            for value_origin in call_kwargs_as_targets.values():
                up_task = value_origin.origin_target.task
                task.set_upstream(up_task)

            # creating task_run as a task we found mid-run
            task_run = dbnd_run.create_task_run_at_execution_time(
                task, task_engine=current_task_run().task_engine)

            should_capture_log = (
                TrackingConfig.from_databand_context().capture_tracking_log)
            with task_run.runner.task_run_execution_context(
                    handle_sigterm=True, capture_log=should_capture_log):
                task_run.set_task_run_state(state=TaskRunState.RUNNING)

                _log_inputs(task_run)

                # if we reached this line, then all tracking initialization is
                # finished successfully, and we're going to execute user code
                user_code_called = True

                try:
                    # tracking_context is context manager - user code will run on yield
                    yield func_call.set_result

                    # if we reached this line, this means that user code finished
                    # successfully without any exceptions
                    user_code_finished = True
                # We catch BaseException since we want to catch KeyboardInterrupts as well
                except BaseException as ex:
                    task_run.finished_time = utcnow()

                    error = TaskRunError.build_from_ex(ex, task_run)
                    task_run.set_task_run_state(TaskRunState.FAILED,
                                                error=error)
                    raise

                else:
                    task_run.finished_time = utcnow()

                    # func_call.result should contain result, log it
                    _log_result(task_run, func_call.result)

                    task_run.set_task_run_state(TaskRunState.SUCCESS)
        except BaseException:
            if user_code_called and not user_code_finished:
                # if we started to call the user code and not got to user_code_finished
                # line - it means there was user code exception - so just re-raise it
                raise
            # else it's either we didn't reached calling user code, or already passed it
            # then it's some dbnd tracking error - just log it
            if func_call:
                _handle_tracking_error("tracking-init", func_call)
            else:
                log_exception_to_server()
        # if we didn't reached user_code_called=True line - there was an error during
        # dbnd tracking initialization, so nothing is done - user function wasn't called yet
        if not user_code_called:
            # tracking_context is context manager - user code will run on yield
            yield _do_nothing_decorator
            return