def try_get_databand_run(): # type: () -> Optional[DatabandRun] from dbnd._core.run.databand_run import DatabandRun # noqa: F811 if DatabandRun.has_instance(): return get_databand_run() return None
def get_databand_run(): # type: () -> DatabandRun """Returns current Task/Pipeline/Flow instance.""" from dbnd._core.run.databand_run import DatabandRun as _DatabandRun v = _DatabandRun.get_instance() return v
def run(self): env = os.environ.copy() env[DBND_RUN_UID] = str(DatabandRun.get_instance().run_uid) env[DBND_RESUBMIT_RUN] = "true" return bash_cmd.func(cmd=self.scheduled_cmd, env=env, dbnd_env=False, shell=self.shell)
def test_save_huge_graph(self): # let validate that pickle python2 can handle huge graph of tasks task = generate_huge_task(200) # dill._dill._trace(True) r = self._save_graph(task) actual = DatabandRun.load_run(r.driver_dump, False) assert actual
def execute(ctx, dbnd_run, disable_tracking_api): """Execute databand primitives""" from dbnd._core.run.databand_run import DatabandRun from targets import target run = DatabandRun.load_run(dump_file=target(dbnd_run), disable_tracking_api=disable_tracking_api) ctx.obj = {"run": run, "disable_tracking_api": disable_tracking_api}
def set_active_run_context(run): # type: (DatabandRun) -> Iterator[DatabandRun] from dbnd._core.context.databand_context import DatabandContext # noqa: F811 with DatabandContext.context(_context=run.context): with DatabandRun.context(_context=run) as dr: yield dr
def test_task_runner_context(self): actual = dummy_nested_config_task.dbnd_run( "test_limits", config_name="gcp_k8s_engine") with DatabandRun.context(actual): task_run = actual.task.current_task_run # type: TaskRun with task_run.task.ctrl.task_context(phase=TaskContextPhase.BUILD): actual = build_task_from_config(task_name="gcp_k8s_engine") assert actual.cluster_context == "test" assert actual.container_tag == "test_f_value"
def test_task_runner_context(self): # same as test_task_sub_config_override # we check that task_run_context "put" us in the right config layer actual = dummy_nested_config_task.dbnd_run(config_name="sub_tconfig") with DatabandRun.context(actual): task_run = actual.task.current_task_run # type: TaskRun with task_run.task.ctrl.task_context(phase=TaskContextPhase.BUILD): actual = build_task_from_config(task_name="sub_tconfig") assert actual.config_value_s1 == "override_config_s1" assert actual.config_value_s2 == "value_sub_from_databand_test_cfg_s2"
def dbnd_operator__execute(dbnd_operator, context): from dbnd._core.current import try_get_databand_run from dbnd._core.run.databand_run import DatabandRun from targets import target run = try_get_databand_run() if not run: # we are not inside dbnd run, probably we are running from native airflow # let's try to load it: try: executor_config = dbnd_operator.executor_config logger.info("context: %s", context) logger.info("task.executor_config: %s", dbnd_operator.executor_config) logger.info("ti.executor_config: %s", context["ti"].executor_config) driver_dump = executor_config["DatabandExecutor"].get( "dbnd_driver_dump") print( "Running dbnd task %s from %s" % (dbnd_operator.dbnd_task_id, driver_dump), file=sys.__stderr__, ) if executor_config["DatabandExecutor"].get( "remove_airflow_std_redirect", False): sys.stdout = sys.__stdout__ sys.stderr = sys.__stderr__ dbnd_bootstrap() dbnd_airflow_bootstrap() run = DatabandRun.load_run(dump_file=target(driver_dump), disable_tracking_api=False) except Exception as e: print( "Failed to load dbnd task in native airflow execution! Exception: %s" % (e, ), file=sys.__stderr__, ) dump_trace() raise with run.run_context() as dr: task_run = run.get_task_run_by_id(dbnd_operator.dbnd_task_id) ret_value = task_run.runner.execute(airflow_context=context) else: task_run = run.get_task_run_by_id(dbnd_operator.dbnd_task_id) ret_value = task_run.runner.execute(airflow_context=context) return ret_value
def test_task_runner_context(self): # same as test_task_sub_config_override # we check that task_run_context "put" us in the right config layer actual = dummy_nested_config_task.dbnd_run(config_name="sub_tconfig") with DatabandRun.context(actual): task_run = actual.task.current_task_run # type: TaskRun with task_run.task.ctrl.task_context(phase=TaskContextPhase.BUILD): actual = build_task_from_config(task_name="sub_tconfig") assert actual.config_value_s1 == "override_config_s1" # because we have task_config in dummy_nested_config_task that overrides config # tconfig is higher than value for [ sub_tconfig] at config file # config layer is down.. assert actual.config_value_s2 == "task_config_regular_s2"
def _save_graph(self, task): with new_dbnd_context( conf={ RunConfig.task_executor_type: override( TaskExecutorType.local), CoreConfig.tracker: override(["console"]), }) as dc: run = dc.dbnd_run_task(task_or_task_name=task) run.save_run() loaded_run = DatabandRun.load_run(dump_file=run.driver_dump, disable_tracking_api=False) assert loaded_run return run
def bash_cmd( cmd=None, args=None, check_retcode=0, cwd=None, env=None, dbnd_env=True, output_encoding="utf-8", popen_kwargs=None, wait_for_termination_s=5, shell=False, ): # type:( str, List[str], Optional[int], str, Dict[str,str], bool, str, Dict[str,Any]) -> int if popen_kwargs is None: popen_kwargs = dict() popen_kwargs = popen_kwargs.copy() if cmd and args: raise DatabandConfigError("You should not provide cmd and args ") if cmd: if shell: args = cmd else: args = shlex.split(cmd) elif args: args = list(map(str, args)) cmd = list2cmdline(args) if shell: args = cmd logger.info("Running: " + cmd) # To simplify rerunning failing tests if dbnd_env and DatabandRun.has_instance(): env = env or os.environ.copy() dbnd_env_vars = DatabandRun.get_instance().get_context_spawn_env() logger.info( "Exporting the following env vars:\n%s", "\n".join(["{}={}".format(k, v) for k, v in dbnd_env_vars.items()]), ) env.update() def preexec_fn(): if windows_compatible_mode: return # Restore default signal disposition and invoke setsid for sig in ("SIGPIPE", "SIGXFZ", "SIGXFSZ"): if hasattr(signal, sig): safe_signal(getattr(signal, sig), signal.SIG_DFL) os.setsid() process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, bufsize=-1, universal_newlines=True, env=env, preexec_fn=preexec_fn, cwd=cwd, shell=shell, **popen_kwargs) try: task_run = try_get_current_task_run() if task_run: task_run.task.process = process logger.info("Process is running, output:") # While command is running let read it's output output = [] while True: line = process.stdout.readline() if line == "" or line == b"": break line = safe_decode(line, output_encoding).rstrip() logger.info("out: %s", line) # keep last 1000 lines only output.append(line) if len(output) > 1500: output = output[-1000:] returncode = process.wait() logger.info("Command exited with return code %s", process.returncode) if check_retcode is not None and returncode != check_retcode: raise failed_to_run_cmd("Bash command failed", cmd_str=cmd, return_code=returncode) return returncode except Exception: logger.info("Received interrupt. Terminating subprocess and waiting") try: process.terminate() process.wait(wait_for_termination_s) except Exception: pass raise
def get_databand_run(): # type: () -> DatabandRun from dbnd._core.run.databand_run import DatabandRun # noqa: F811 v = DatabandRun.get_instance() return v
def test_save_databand_run(self): s = TTask() r = self._save_graph(s) actual = DatabandRun.load_run(r.driver_dump, False) assert actual