def wait(timeout): with new_dbnd_context(name="new_context") as dbnd_ctx: logger.info("Waiting {} seconds for tracker to become ready:".format(timeout)) is_ready = wait_until(dbnd_ctx.tracking_store.is_ready, timeout) if not is_ready: logger.error("Tracker is not ready after {} seconds.".format(timeout)) sys.exit(1) logger.info("Tracker is ready.")
def test_user_code_run(self): with new_dbnd_context(conf={ "core": { "user_init": "test_dbnd.test_task_context._user_code" } }): pass assert _user_code_run, "user code wasn't executed" logger.info("done")
def test_user_code_fail(self): with pytest.raises(Exception, match=r"USER_CODE_ERROR"): with new_dbnd_context( conf={ "core": { "user_init": "test_dbnd.test_task_context._user_code_raises" } }): pass
def start( self, root_task_name, in_memory=True, run_uid=None, airflow_context=False, job_name=None, ): if try_get_databand_context(): return if not airflow_context and not self._atexit_registered: atexit.register(self.stop) if is_airflow_enabled(): from airflow.settings import dispose_orm atexit.unregister(dispose_orm) c = { "run": { "skip_completed": False }, # we don't want to "check" as script is task_version="now" "task": { "task_in_memory_outputs": in_memory }, # do not save any outputs } config.set_values(config_values=c, override=True, source="dbnd_start") context_kwargs = {"name": "airflow"} if airflow_context else {} # create databand context dc = self._enter_cm( new_dbnd_context(**context_kwargs)) # type: DatabandContext root_task = _build_inline_root_task(root_task_name, airflow_context=airflow_context) # create databand run dr = self._enter_cm( new_databand_run( context=dc, task_or_task_name=root_task, run_uid=run_uid, existing_run=False, job_name=job_name, )) # type: DatabandRun if run_uid: root_task_run_uid = get_task_run_uid(run_uid, root_task_name) else: root_task_run_uid = None dr._init_without_run(root_task_run_uid=root_task_run_uid) self._start_taskrun(dr.driver_task_run) self._start_taskrun(dr.root_task_run) return dr
def set_context(self, ti): """ Airflow's log handler use this method to setup the context when running a TaskInstance(=ti). We use this method to setup the dbnd context and communicate information to the `<airflow_operator>_execute` task, that we create in `execute_tracking.py`. """ # we setting up only when we are not in our own orchestration dag if ti.dag_id.startswith(AD_HOC_DAG_PREFIX): return if config.getboolean("mlflow_tracking", "databand_tracking"): self.airflow_logger.warning( "dbnd can't track mlflow and airflow together please disable dbnd config " "`databand_tracking` in section `mlflow_tracking`") return # we are not tracking SubDagOperator if ti.operator == SubDagOperator.__name__: return task_key = calc_task_run_attempt_key_from_af_ti(ti) env_attempt_uid = os.environ.get(task_key) # This key is already set which means we are in --raw run if env_attempt_uid: # no need for further actions inside --raw run return # communicate the task_run_attempt_uid to inner processes # will be used for the task_run of `<airflow_operator>_execute` task self.task_run_attempt_uid = get_uuid() self.task_env_key = task_key os.environ[self.task_env_key] = str(self.task_run_attempt_uid) # airflow calculation for the relevant log_file log_relative_path = self.log_file_name_factory(ti, ti.try_number) self.log_file = os.path.join(self.airflow_base_log_dir, log_relative_path) # make sure we are not polluting the airflow logs get_dbnd_project_config().quiet_mode = True # tracking msg self.airflow_logger.info( "Tracked by Databand {version}".format(version=dbnd.__version__)) # context with disabled logs self.dbnd_context_manage = new_dbnd_context( conf={"log": { "disabled": True }}) self.dbnd_context = self.dbnd_context_manage.__enter__()
def start(self, root_task_name, job_name=None): if self._run or self._active or try_get_databand_run(): return airflow_context = try_get_airflow_context() set_tracking_config_overide(use_dbnd_log=True, airflow_context=airflow_context) # 1. create proper DatabandContext so we can create other objects dc = self._enter_cm(new_dbnd_context()) # type: DatabandContext if airflow_context: root_task_or_task_name = AirflowOperatorRuntimeTask.build_from_airflow_context( airflow_context ) source = UpdateSource.airflow_tracking job_name = "{}.{}".format(airflow_context.dag_id, airflow_context.task_id) else: root_task_or_task_name = _build_inline_root_task(root_task_name) source = UpdateSource.dbnd # create databand run # this will create databand run with driver and root tasks. # create databand run # we will want to preserve self._run = self._enter_cm( new_databand_run( context=dc, task_or_task_name=root_task_or_task_name, job_name=job_name, existing_run=False, source=source, af_context=airflow_context, ) ) # type: DatabandRun if not self._atexit_registered: _set_process_exit_handler(self.stop) self._atexit_registered = True sys.excepthook = self.stop_on_exception self._active = True # now we send data to DB self._run._init_without_run() self._start_taskrun(self._run.driver_task_run) self._start_taskrun(self._run.root_task_run) self._task_run = self._run.root_task_run return self._task_run
def test_user_config_inject(self): with new_dbnd_context( conf={ "core": { "user_init": "test_dbnd.test_task_context.inject_some_params" }, "MyConfig22": { "config_id": "1" }, }) as c: c.dbnd_run_task(task_or_task_name="user_func") logger.info("done")
def start(self, root_task_name=None, airflow_context=None): if self._run or self._active or try_get_databand_run(): return # we probably should use only airlfow context via parameter. # also, there are mocks that cover only get_dbnd_project_config().airflow_context airflow_context = airflow_context or get_dbnd_project_config().airflow_context() set_tracking_config_overide(use_dbnd_log=True, airflow_context=airflow_context) dc = self._enter_cm( new_dbnd_context(name="inplace_tracking") ) # type: DatabandContext if airflow_context: root_task, job_name, source = build_run_time_airflow_task(airflow_context) else: root_task = _build_inline_root_task(root_task_name) job_name = root_task.task_name source = UpdateSource.dbnd self._run = run = self._enter_cm( new_databand_run( context=dc, job_name=job_name, existing_run=False, source=source, af_context=airflow_context, ) ) # type: DatabandRun self._run.root_task = root_task if not self._atexit_registered: _set_process_exit_handler(self.stop) self._atexit_registered = True sys.excepthook = self.stop_on_exception self._active = True # now we send data to DB root_task_run = run._build_and_add_task_run(root_task) root_task_run.is_root = True # No need to track the state because we track in init_run run.root_task_run.set_task_run_state(TaskRunState.RUNNING, track=False) run.tracker.init_run() self._enter_cm(run.root_task_run.runner.task_run_execution_context()) self._task_run = run.root_task_run return self._task_run
def databand_test_context( request, tmpdir, databand_context_kwargs, databand_config ): # type: (...) -> DatabandContext test_config = { "run": { "name": _run_name_for_test_request(request), "heartbeat_interval_s": -1, }, "local": {"root": str(tmpdir.join("local_root"))}, } with config(test_config, source="databand_test_context"), new_dbnd_context( **databand_context_kwargs ) as t: yield t
def start(self, root_task_name, job_name=None): if self._run: return if self._started or self._disabled: # started or failed return try: if try_get_databand_run(): return self._started = True # 1. create proper DatabandContext so we can create other objects set_tracking_config_overide(use_dbnd_log=True) # create databand context dc = self._enter_cm(new_dbnd_context()) # type: DatabandContext root_task = _build_inline_root_task(root_task_name) # create databand run self._run = self._enter_cm( new_databand_run( context=dc, task_or_task_name=root_task, existing_run=False, job_name=job_name, )) # type: DatabandRun self._run._init_without_run() if not self._atexit_registered: atexit.register(self.stop) sys.excepthook = self.stop_on_exception self._start_taskrun(self._run.driver_task_run) self._start_taskrun(self._run.root_task_run) self._task_run = self._run.root_task_run return self._task_run except Exception: _handle_inline_error("inline-start") self._disabled = True return finally: self._started = True
def start(self, root_task_name=None, airflow_context=None): if self._run or self._active or try_get_databand_run(): return airflow_context = airflow_context or try_get_airflow_context() set_tracking_config_overide(use_dbnd_log=True, airflow_context=airflow_context) dc = self._enter_cm(new_dbnd_context()) # type: DatabandContext if airflow_context: root_task, job_name, source = build_run_time_airflow_task( airflow_context) else: root_task = _build_inline_root_task(root_task_name) job_name = None source = UpdateSource.dbnd self._run = self._enter_cm( new_databand_run( context=dc, task_or_task_name=root_task, job_name=job_name, existing_run=False, source=source, af_context=airflow_context, send_heartbeat=False, )) # type: DatabandRun if not self._atexit_registered: _set_process_exit_handler(self.stop) self._atexit_registered = True sys.excepthook = self.stop_on_exception self._active = True # now we send data to DB self._run._init_without_run() self._start_taskrun(self._run.driver_task_run) self._start_taskrun(self._run.root_task_run) self._task_run = self._run.root_task_run return self._task_run
def _list_tasks(ctx, module, search, is_config): from dbnd import Config from dbnd._core.context.databand_context import new_dbnd_context from dbnd._core.parameter.parameter_definition import _ParameterKind formatter = ctx.make_formatter() load_user_modules(config, modules=module) with new_dbnd_context(): tasks = get_task_registry().list_dbnd_task_classes() for task_cls in tasks: td = task_cls.task_definition full_task_family = td.full_task_family task_family = td.task_family if not (task_family.startswith(search) or full_task_family.startswith(search)): continue if issubclass(task_cls, Config) != is_config: continue dl = [] for param_name, param_obj in td.task_param_defs.items(): if param_obj.system or param_obj.kind == _ParameterKind.task_output: continue if not is_config and param_name in COMMON_PARAMS: continue param_help = _help(param_obj.description) dl.append((param_name, param_help)) if dl: with formatter.section( "{task_family} ({full_task_family})".format( full_task_family=full_task_family, task_family=task_family ) ): formatter.write_dl(dl) click.echo(formatter.getvalue().rstrip("\n"))
def _get_task_run_mock(tra_uid): """ We need better implementation for this, currently in use only for spark """ try: from dbnd._core.task_run.task_run_tracker import TaskRunTracker task_run = TaskRunMock(tra_uid) from dbnd import config from dbnd._core.settings import CoreConfig with config({CoreConfig.tracker_raise_on_error: False}, source="on_demand_tracking"): with new_dbnd_context(name="fast_dbnd_context", autoload_modules=False) as fast_dbnd_ctx: trt = TaskRunTracker(task_run, fast_dbnd_ctx.tracking_store) task_run.tracker = trt return task_run except Exception: logger.info("Failed during dbnd inplace tracking init.", exc_info=True) return None
def run( ctx, is_help, task, module, _sets, _sets_config, _sets_root, _overrides, verbose, describe, env, parallel, conf_file, task_version, project_name, name, description, run_driver, alternative_task_name, scheduled_job_name, scheduled_date, interactive, submit_driver, submit_tasks, disable_web_tracker, ): """ Run a task or a DAG To see tasks use `dbnd show-tasks` (tab completion is available). """ from dbnd._core.context.databand_context import new_dbnd_context, DatabandContext from dbnd._core.utils.structures import combine_mappings from dbnd import config task_name = task # --verbose, --describe, --env, --parallel, --conf-file and --project-name # we filter out false flags since otherwise they will always override the config with their falseness main_switches = dict( databand=filter_dict_remove_false_values( dict( verbose=verbose > 0, describe=describe, env=env, conf_file=conf_file, project_name=project_name, ) ), run=filter_dict_remove_false_values( dict( name=name, parallel=parallel, description=description, is_archived=describe, ) ), ) if submit_driver is not None: main_switches["run"]["submit_driver"] = bool(submit_driver) if submit_tasks is not None: main_switches["run"]["submit_tasks"] = bool(submit_tasks) if disable_web_tracker: main_switches.setdefault("core", {})["tracker_api"] = "disabled" if task_version is not None: main_switches["task"] = {"task_version": task_version} cmd_line_config = parse_and_build_config_store( source="cli", config_values=main_switches ) _sets = list(_sets) _sets_config = list(_sets_config) _sets_root = list(_sets_root) root_task_config = {} for _set in _sets_root: root_task_config = combine_mappings(left=root_task_config, right=_set) # remove all "first level" config values, assume that they are for the main task # add them to _sets_root for _set in _sets: for k, v in list(_set.items()): # so json-like values won't be included if "." not in k and isinstance(v, six.string_types): root_task_config[k] = v del _set[k] # --set, --set-config if _sets: cmd_line_config.update(_parse_cli(_sets, source="--set")) if _sets_config: cmd_line_config.update(_parse_cli(_sets_config, source="--set-config")) if _overrides: cmd_line_config.update( _parse_cli(_overrides, source="--set-override", override=True) ) if interactive: cmd_line_config.update( _parse_cli([{"run.interactive": True}], source="--interactive") ) if verbose > 1: cmd_line_config.update( _parse_cli([{"task_build.verbose": True}], source="-v -v") ) if cmd_line_config: config.set_values(cmd_line_config, source="cmdline") if verbose: logger.info("CLI config: \n%s", pformat_config_store_as_table(cmd_line_config)) # double checking on bootstrap, as we can run from all kind of locations # usually we should be bootstraped already as we run from cli. dbnd_bootstrap() if not config.getboolean("log", "disabled"): configure_basic_logging(None) scheduled_run_info = None if scheduled_job_name: scheduled_run_info = ScheduledRunInfo( scheduled_job_name=scheduled_job_name, scheduled_date=scheduled_date ) with new_dbnd_context( name="run", module=module ) as context: # type: DatabandContext task_registry = get_task_registry() tasks = task_registry.list_dbnd_task_classes() completer.refresh(tasks) # modules are loaded, we can load the task task_cls = None if task_name: task_cls = task_registry.get_task_cls(task_name) if alternative_task_name: task_cls = build_dynamic_task( original_cls=task_cls, new_cls_name=alternative_task_name ) task_name = alternative_task_name # --set-root # now we can get it config, as it's not main task, we can load config after the configuration is loaded if task_cls is not None: if root_task_config: # adding root task to configuration config.set_values( {task_cls.task_definition.task_config_section: root_task_config}, source="--set-root", ) if is_help or not task_name: print_help(ctx, task_cls) return return context.dbnd_run_task( task_or_task_name=task_name, run_uid=run_driver, scheduled_run_info=scheduled_run_info, )
def cmd_run( ctx, is_help, task, module, _sets, _sets_config, _sets_root, _overrides, _extend, verbose, print_task_band, describe, env, parallel, conf_file, task_version, project, name, description, run_driver, override_run_uid, alternative_task_name, job_name, scheduled_job_name, scheduled_date, interactive, submit_driver, submit_tasks, disable_web_tracker, open_web_tab, docker_build_tag, ): """ Run a task or a DAG To see all available tasks use `dbnd show-tasks` (tab completion is available). `dbnd show-configs` will print all available configs. """ from dbnd import config from dbnd._core.context.databand_context import DatabandContext, new_dbnd_context from dbnd._core.utils.structures import combine_mappings task_registry = get_task_registry() # we need to do it before we are looking for the task cls load_user_modules(dbnd_config=config, modules=module) task_name = task # --verbose, --describe, --env, --parallel, --conf-file and --project # we filter out false flags since otherwise they will always override the config with their falseness main_switches = dict( databand=dict( verbose=verbose > 0, print_task_band=print_task_band, describe=describe, env=env, conf_file=conf_file, project=project, ), run=dict( name=name, parallel=parallel, interactive=interactive, description=description, is_archived=describe, open_web_tracker_in_browser=open_web_tab, submit_driver=_nullable_flag(submit_driver), submit_tasks=_nullable_flag(submit_tasks), ), kubernetes=dict(docker_build_tag=docker_build_tag), task=dict(task_version=task_version), task_build=dict(verbose=True if verbose > 1 else None), core=dict(tracker_api="disabled" if disable_web_tracker else None), ) main_switches = cleanup_empty_switches(main_switches) _sets = list(_sets) _sets_config = list(_sets_config) _sets_root = list(_sets_root) root_task_config = {} for _set in _sets_root: root_task_config = combine_mappings(left=root_task_config, right=_set) # remove all "first level" config values, assume that they are for the main task # add them to _sets_root for _set in _sets: for k, v in list(_set.items()): # so json-like values won't be included if "." not in k and isinstance(v, six.string_types): root_task_config[k] = v del _set[k] cmd_line_config = parse_and_build_config_store(source="cli", config_values=main_switches) # --set, --set-config if _sets: cmd_line_config.update(_parse_cli(_sets, source="--set")) if _sets_config: cmd_line_config.update(_parse_cli(_sets_config, source="--set-config")) if _extend: cmd_line_config.update( _parse_cli(_extend, source="--extend-config", extend=True)) if _overrides: cmd_line_config.update( _parse_cli( _overrides, source="--set-override", priority=ConfigValuePriority.OVERRIDE, )) # --set-root if root_task_config: task_cls = task_registry.get_task_cls(task_name) task_section = task_cls.task_definition.task_config_section # adding root task to configuration cmd_line_config.update( parse_and_build_config_store( config_values={task_section: root_task_config}, source="--set-root")) # UPDATE CURRENT CONFIG with CLI values if cmd_line_config: if verbose: logger.info("CLI config: \n%s", pformat_config_store_as_table(cmd_line_config)) config.set_values(cmd_line_config, source="cmdline") # double checking on bootstrap, as we can run from all kind of locations # usually we should be bootstraped already as we run from cli. dbnd_bootstrap() # initialize basic logging (until we get to the context logging if not config.getboolean("log", "disabled"): configure_basic_logging(None) scheduled_run_info = None if scheduled_job_name: scheduled_run_info = ScheduledRunInfo( scheduled_job_name=scheduled_job_name, scheduled_date=scheduled_date) # update completer if config.getboolean("databand", "completer"): tasks = task_registry.list_dbnd_task_classes() completer.refresh(tasks) # bootstrap and modules are loaded, we can load the task task_cls = None if task_name: task_cls = task_registry.get_task_cls(task_name) if not task_name: print_help(ctx, None) return if is_help: print_help(ctx, task_cls) return with tracking_mode_context(tracking=False), new_dbnd_context( name="run") as context: # type: DatabandContext if context.settings.system.describe: # we want to print describe without triggering real run logger.info("Building main task '%s'", task_name) root_task = get_task_registry().build_dbnd_task(task_name) root_task.ctrl.describe_dag.describe_dag() # currently there is bug with the click version we have when using python 2 # so we don't use the click.echo function # https://github.com/pallets/click/issues/564 print("Task %s has been described!" % task_name) return root_task return context.dbnd_run_task( task_or_task_name=task_name, force_task_name=alternative_task_name, job_name=job_name or alternative_task_name or task_name, run_uid=run_driver or override_run_uid, existing_run=run_driver is not None, scheduled_run_info=scheduled_run_info, project=project, )
def start(self, root_task_name=None, project_name=None, airflow_context=None): if self._run or self._active or try_get_databand_run(): return # we probably should use only airlfow context via parameter. # also, there are mocks that cover only get_dbnd_project_config().airflow_context airflow_context = airflow_context or get_dbnd_project_config( ).airflow_context() if airflow_context: _set_dbnd_config_from_airflow_connections() _set_tracking_config_overide(airflow_context=airflow_context) dc = self._enter_cm( new_dbnd_context(name="inplace_tracking")) # type: DatabandContext if not root_task_name: # extract the name of the script we are running (in Airflow scenario it will be just "airflow") root_task_name = sys.argv[0].split(os.path.sep)[-1] if airflow_context: root_task, job_name, source, run_uid = build_run_time_airflow_task( airflow_context, root_task_name) try_number = airflow_context.try_number else: root_task = _build_inline_root_task(root_task_name) job_name = root_task_name source = UpdateSource.generic_tracking run_uid = None try_number = 1 tracking_source = ( None # TODO_CORE build tracking_source -> typeof TrackingSourceSchema ) self._run = run = self._enter_cm( new_databand_run( context=dc, job_name=job_name, run_uid=run_uid, existing_run=run_uid is not None, source=source, af_context=airflow_context, tracking_source=tracking_source, project_name=project_name, )) # type: DatabandRun self._run.root_task = root_task self.update_run_from_airflow_context(airflow_context) if not self._atexit_registered: _set_process_exit_handler(self.stop) self._atexit_registered = True sys.excepthook = self.stop_on_exception self._active = True # now we send data to DB root_task_run = run._build_and_add_task_run( root_task, task_af_id=root_task.task_name, try_number=try_number) root_task_run.is_root = True run.tracker.init_run() run.root_task_run.set_task_run_state(TaskRunState.RUNNING) should_capture_log = TrackingConfig.from_databand_context( ).capture_tracking_log self._enter_cm( run.root_task_run.runner.task_run_execution_context( capture_log=should_capture_log, handle_sigterm=False)) self._task_run = run.root_task_run return self._task_run
def test_foreign_context_should_not_fail(self): with new_dbnd_context(): t = SimplestTask() t.dbnd_run() TTaskWithInput(t_input=t).dbnd_run()
def set_context(self, ti): """ Airflow's log handler use this method to setup the context when running a TaskInstance(=ti). We use this method to setup the dbnd context and communicate information to the `<airflow_operator>_execute` task, that we create in `execute_tracking.py`. """ # we setting up only when we are not in our own orchestration dag if ti.dag_id.startswith(AD_HOC_DAG_PREFIX): return if not is_dag_eligable_for_tracking(ti.dag_id): return if config.getboolean("mlflow_tracking", "databand_tracking"): self.airflow_logger.warning( "dbnd can't track mlflow and airflow together please disable dbnd config " "`databand_tracking` in section `mlflow_tracking`") return # we are not tracking SubDagOperator if ti.operator is None or ti.operator == SubDagOperator.__name__: return # Airflow is running with two process `run` and `--raw run`. # But we want the handler to run only once (Idempotency) # So we are using an environment variable to sync those two process task_key = calc_task_key_from_af_ti(ti) if os.environ.get(task_key, False): # This key is already set which means we are in `--raw run` return else: # We are in the outer `run` self.task_env_key = task_key # marking the environment with the current key for the environ_utils.set_on(task_key) from dbnd_airflow.tracking.dbnd_airflow_conf import ( set_dbnd_config_from_airflow_connections, ) # When we are in `--raw run`, in tracking, it runs the main airflow process # for every task, which made some of the features to run twice, # once when the `worker` process ran, and once when the `main` one ran, # which made some of the features to run with different configurations. # it still runs twice, but know with the same configurations. set_dbnd_config_from_airflow_connections() self.task_run_attempt_uid = get_task_run_attempt_uid_from_af_ti(ti) # airflow calculation for the relevant log_file log_relative_path = self.log_file_name_factory(ti, ti.try_number) self.log_file = os.path.join(self.airflow_base_log_dir, log_relative_path) # make sure we are not polluting the airflow logs get_dbnd_project_config().quiet_mode = True # tracking msg self.airflow_logger.info("Databand Tracking Started {version}".format( version=dbnd.__version__)) # context with disabled logs self.dbnd_context_manage = new_dbnd_context( conf={"log": { "disabled": True }}) self.dbnd_context = self.dbnd_context_manage.__enter__()
def __init__(self, af_context): # type: (AirflowTaskContext) -> None self.run_uid = get_job_run_uid( dag_id=af_context.root_dag_id, execution_date=af_context.execution_date) self.dag_id = af_context.dag_id # this is the real operator uid, we need to connect to it with our "tracked" task, # so the moment monitor is on -> we can sync af_runtime_op_task_id = af_context.task_id self.af_operator_sync__task_run_uid = get_task_run_uid( self.run_uid, af_context.dag_id, af_runtime_op_task_id) # 1. create proper DatabandContext so we can create other objects set_tracking_config_overide( use_dbnd_log=override_airflow_log_system_for_tracking()) # create databand context with new_dbnd_context(name="airflow") as dc: # type: DatabandContext # now create "operator" task for current task_id, # we can't actually run it, we even don't know when it's going to finish # current execution is inside the operator, this is the only thing we know # STATE AFTER INIT: # AirflowOperator__runtime -> DAG__runtime task_target_date = pendulum.parse(af_context.execution_date, tz=pytz.UTC).date() # AIRFLOW OPERATOR RUNTIME af_runtime_op = AirflowOperatorRuntimeTask( task_family=task_name_for_runtime(af_runtime_op_task_id), dag_id=af_context.dag_id, execution_date=af_context.execution_date, task_target_date=task_target_date, task_version="%s:%s" % (af_runtime_op_task_id, af_context.execution_date), ) # this is the real operator uid, we need to connect to it with our "tracked" task, # so the moment monitor is on -> we can sync af_db_op_task_run_uid = get_task_run_uid(self.run_uid, af_context.dag_id, af_runtime_op_task_id) af_runtime_op.task_meta.extra_parents_task_run_uids.add( af_db_op_task_run_uid) af_runtime_op.ctrl.force_task_run_uid = TaskRunUidGen_TaskAfId( af_context.dag_id) self.af_operator_runtime__task = af_runtime_op # AIRFLOW DAG RUNTIME self.af_dag_runtime__task = AirflowDagRuntimeTask( task_name=task_name_for_runtime(DAG_SPECIAL_TASK_ID), dag_id=af_context.root_dag_id, # <- ROOT DAG! execution_date=af_context.execution_date, task_target_date=task_target_date, ) _add_child(self.af_dag_runtime__task, self.af_operator_runtime__task) # this will create databand run with driver and root tasks. # we need the "root" task to be the same between different airflow tasks invocations # since in dbnd we must have single root task, so we create "dummy" task with dag_id name # create databand run # we will want to preserve with new_databand_run( context=dc, task_or_task_name=self.af_dag_runtime__task, run_uid=self.run_uid, existing_run=False, job_name=af_context.root_dag_id, send_heartbeat=False, # we don't send heartbeat in tracking source=UpdateSource.airflow_tracking, ) as dr: self.dr = dr dr._init_without_run() self.airflow_operator__task_run = dr.get_task_run_by_id( af_runtime_op.task_id)