Exemplo n.º 1
0
    def run_airflow_dynamic_task(self, func_call):
        # type: (FuncCall) -> Any
        if has_current_task():
            can_run_nested = False
            try:
                current = current_task()
                phase = current_phase()
                if (phase is TaskContextPhase.RUN
                        and current.settings.dynamic_task.enabled
                        and current.task_supports_dynamic_tasks):
                    can_run_nested = True
            except Exception:
                return _handle_tracking_error(func_call, "nested-check")

            if can_run_nested:
                return self._create_and_run_dynamic_task_safe(
                    func_call, attach_to_monitor_op=False)
            else:
                # unsupported mode
                return func_call.invoke()

        context_enter_ok = False
        try:
            with self.dr.run_context():
                with self.airflow_operator__task_run.runner.task_run_execution_context(
                ):
                    context_enter_ok = True
                    return self._create_and_run_dynamic_task_safe(
                        func_call, attach_to_monitor_op=True)
        except Exception:
            if context_enter_ok:
                raise
            return _handle_tracking_error(func_call, "context-enter")
Exemplo n.º 2
0
    def get_context_spawn_env(self):
        env = {}
        if has_current_task():
            current = current_task()
        else:
            current = self.root_task

        if current:
            tr = self.get_task_run_by_id(current.task_id)
            if tr:
                parent_task_run_uid = tr.task_run_uid
                env[DBND_PARENT_TASK_RUN_UID] = str(parent_task_run_uid)

        env[DBND_ROOT_RUN_UID] = str(self.root_run_info.root_run_uid)
        env[DBND_ROOT_RUN_TRACKER_URL] = self.root_run_info.root_run_url

        if self.context.settings.core.user_code_on_fork:
            env[ENV_DBND__USER_PRE_INIT] = self.context.settings.core.user_code_on_fork
        return env
Exemplo n.º 3
0
        def simple_beam(text_input=parameter[PathStr],
                        text_output=beam_output):
            class RequireSomeParameter(PipelineOptions):
                @classmethod
                def _add_argparse_args(cls, parser):
                    parser.add_argument("--param", dest="param", required=True)

            dataflow_pipeline = current_task().build_pipeline(["--param", "2"])

            # Read the text file[pattern] into a PCollection.
            lines = dataflow_pipeline | "read" >> ReadFromText(text_input)

            x = dataflow_pipeline._options.view_as(RequireSomeParameter)
            assert x.param == "2"

            counts = (lines
                      | "pair_with_one" >> beam.Map(lambda x: (x, 1))
                      | "group" >> beam.GroupByKey())

            counts | "write" >> WriteToText(text_output)
            target(text_output).mkdir()

            result = dataflow_pipeline.run()
            result.wait_until_finish()
Exemplo n.º 4
0
 def my_task():
     assert current_task().task_name == "test_name"
Exemplo n.º 5
0
 def my_task():
     assert current_task().task_version == "2"
Exemplo n.º 6
0
    def _call_handler(cls, call_user_code, call_args, call_kwargs):
        """
        -= Use "Step into My Code"" to get back from Databand code! =-

        decorated object call/creation  ( my_func(), MyDecoratedTask()
        """
        force_invoke = call_kwargs.pop("__force_invoke", False)
        if force_invoke or not is_databand_enabled():
            # 1. Databand is not enabled
            # 2. we have this call coming from Task.run / Task.band direct invocation
            return call_user_code(*call_args, **call_kwargs)

        if is_in_airflow_dag_build_context():
            return build_task_at_airflow_dag_context(task_cls=cls,
                                                     call_args=call_args,
                                                     call_kwargs=call_kwargs)

        if not has_current_task():
            ######
            # DBND HANDLING OF CALL
            airflow_task_context = try_get_airflow_context()
            if airflow_task_context:
                return track_airflow_dag_run_operator_run(
                    task_cls=cls,
                    call_args=call_args,
                    call_kwargs=call_kwargs,
                    airflow_task_context=airflow_task_context,
                )
            else:
                # direct call to the function
                return call_user_code(*call_args, **call_kwargs)

        # now we can make some decisions what we do with the call
        # it's not coming from _invoke_func
        # but from   user code ...   some_func()  or SomeTask()
        current = current_task()
        phase = current_phase()
        if phase is TaskContextPhase.BUILD:
            # we are in the @pipeline context, we are building execution plan
            t = cls(*call_args, **call_kwargs)

            # we are in inline debug mode -> we are going to execute the task
            # we are in the band
            # and want to return result of the object
            if t.task_definition.single_result_output:
                return t.result

            # we have multiple outputs ( result, another output.. ) -> just return task object
            return t

        if phase is TaskContextPhase.RUN:
            # we are in the run function!
            if (current.settings.dynamic_task.enabled
                    and current.task_supports_dynamic_tasks):
                # isinstance() check required to prevent infinite recursion when @task is on
                # class and not on func (example: see test_task_decorated_class.py)
                # and the current task supports inline calls
                # that's extra mechanism in addition to __force_invoke
                # on pickle/unpickle isinstance fails to run.
                task_run = run_dynamic_task(
                    parent_task_run=current_task_run(),
                    task_cls=cls,
                    call_args=call_args,
                    call_kwargs=call_kwargs,
                )
                t = task_run.task
                # if we are inside run, we want to have real values, not deferred!
                if t.task_definition.single_result_output:
                    return t.__class__.result.load_from_target(t.result)
                    # we have func without result, just fallback to None
                return t

        # we can not call it in"databand" way, fallback to normal execution
        return call_user_code(*call_args, **call_kwargs)