Exemplo n.º 1
0
def try_get_databand_run():
    # type: () -> Optional[DatabandRun]
    from dbnd._core.run.databand_run import DatabandRun  # noqa: F811

    if DatabandRun.has_instance():
        return get_databand_run()
    return None
Exemplo n.º 2
0
def get_databand_run():
    # type: () -> DatabandRun
    """Returns current Task/Pipeline/Flow instance."""
    from dbnd._core.run.databand_run import DatabandRun as _DatabandRun

    v = _DatabandRun.get_instance()
    return v
Exemplo n.º 3
0
 def run(self):
     env = os.environ.copy()
     env[DBND_RUN_UID] = str(DatabandRun.get_instance().run_uid)
     env[DBND_RESUBMIT_RUN] = "true"
     return bash_cmd.func(cmd=self.scheduled_cmd,
                          env=env,
                          dbnd_env=False,
                          shell=self.shell)
Exemplo n.º 4
0
    def test_save_huge_graph(self):
        # let validate that pickle python2 can handle huge graph of tasks
        task = generate_huge_task(200)

        # dill._dill._trace(True)
        r = self._save_graph(task)
        actual = DatabandRun.load_run(r.driver_dump, False)
        assert actual
Exemplo n.º 5
0
def execute(ctx, dbnd_run, disable_tracking_api):
    """Execute databand primitives"""
    from dbnd._core.run.databand_run import DatabandRun
    from targets import target

    run = DatabandRun.load_run(dump_file=target(dbnd_run),
                               disable_tracking_api=disable_tracking_api)
    ctx.obj = {"run": run, "disable_tracking_api": disable_tracking_api}
Exemplo n.º 6
0
def set_active_run_context(run):
    # type: (DatabandRun) -> Iterator[DatabandRun]

    from dbnd._core.context.databand_context import DatabandContext  # noqa: F811

    with DatabandContext.context(_context=run.context):
        with DatabandRun.context(_context=run) as dr:
            yield dr
Exemplo n.º 7
0
    def test_task_runner_context(self):
        actual = dummy_nested_config_task.dbnd_run(
            "test_limits", config_name="gcp_k8s_engine")
        with DatabandRun.context(actual):
            task_run = actual.task.current_task_run  # type: TaskRun

            with task_run.task.ctrl.task_context(phase=TaskContextPhase.BUILD):
                actual = build_task_from_config(task_name="gcp_k8s_engine")
                assert actual.cluster_context == "test"
                assert actual.container_tag == "test_f_value"
Exemplo n.º 8
0
    def test_task_runner_context(self):
        # same as test_task_sub_config_override
        # we check that task_run_context "put" us in the right config layer
        actual = dummy_nested_config_task.dbnd_run(config_name="sub_tconfig")
        with DatabandRun.context(actual):
            task_run = actual.task.current_task_run  # type: TaskRun

            with task_run.task.ctrl.task_context(phase=TaskContextPhase.BUILD):
                actual = build_task_from_config(task_name="sub_tconfig")
                assert actual.config_value_s1 == "override_config_s1"
                assert actual.config_value_s2 == "value_sub_from_databand_test_cfg_s2"
Exemplo n.º 9
0
def dbnd_operator__execute(dbnd_operator, context):
    from dbnd._core.current import try_get_databand_run
    from dbnd._core.run.databand_run import DatabandRun
    from targets import target

    run = try_get_databand_run()
    if not run:
        # we are not inside dbnd run, probably we are running from native airflow
        # let's try to load it:
        try:

            executor_config = dbnd_operator.executor_config
            logger.info("context: %s", context)

            logger.info("task.executor_config: %s",
                        dbnd_operator.executor_config)
            logger.info("ti.executor_config: %s",
                        context["ti"].executor_config)
            driver_dump = executor_config["DatabandExecutor"].get(
                "dbnd_driver_dump")
            print(
                "Running dbnd task %s from %s" %
                (dbnd_operator.dbnd_task_id, driver_dump),
                file=sys.__stderr__,
            )

            if executor_config["DatabandExecutor"].get(
                    "remove_airflow_std_redirect", False):
                sys.stdout = sys.__stdout__
                sys.stderr = sys.__stderr__

            dbnd_bootstrap()
            dbnd_airflow_bootstrap()
            run = DatabandRun.load_run(dump_file=target(driver_dump),
                                       disable_tracking_api=False)
        except Exception as e:
            print(
                "Failed to load dbnd task in native airflow execution! Exception: %s"
                % (e, ),
                file=sys.__stderr__,
            )
            dump_trace()
            raise

        with run.run_context() as dr:
            task_run = run.get_task_run_by_id(dbnd_operator.dbnd_task_id)
            ret_value = task_run.runner.execute(airflow_context=context)
    else:
        task_run = run.get_task_run_by_id(dbnd_operator.dbnd_task_id)
        ret_value = task_run.runner.execute(airflow_context=context)

    return ret_value
Exemplo n.º 10
0
    def test_task_runner_context(self):
        # same as test_task_sub_config_override
        # we check that task_run_context "put" us in the right config layer
        actual = dummy_nested_config_task.dbnd_run(config_name="sub_tconfig")
        with DatabandRun.context(actual):
            task_run = actual.task.current_task_run  # type: TaskRun

            with task_run.task.ctrl.task_context(phase=TaskContextPhase.BUILD):
                actual = build_task_from_config(task_name="sub_tconfig")
                assert actual.config_value_s1 == "override_config_s1"
                # because we have task_config in dummy_nested_config_task that overrides config
                # tconfig is higher than  value for [ sub_tconfig] at config file
                # config layer is down..
                assert actual.config_value_s2 == "task_config_regular_s2"
Exemplo n.º 11
0
    def _save_graph(self, task):
        with new_dbnd_context(
                conf={
                    RunConfig.task_executor_type: override(
                        TaskExecutorType.local),
                    CoreConfig.tracker: override(["console"]),
                }) as dc:
            run = dc.dbnd_run_task(task_or_task_name=task)
            run.save_run()

        loaded_run = DatabandRun.load_run(dump_file=run.driver_dump,
                                          disable_tracking_api=False)
        assert loaded_run
        return run
Exemplo n.º 12
0
def bash_cmd(
    cmd=None,
    args=None,
    check_retcode=0,
    cwd=None,
    env=None,
    dbnd_env=True,
    output_encoding="utf-8",
    popen_kwargs=None,
    wait_for_termination_s=5,
    shell=False,
):
    # type:( str, List[str], Optional[int], str, Dict[str,str], bool, str, Dict[str,Any]) -> int
    if popen_kwargs is None:
        popen_kwargs = dict()
    popen_kwargs = popen_kwargs.copy()

    if cmd and args:
        raise DatabandConfigError("You should not provide cmd and args ")

    if cmd:
        if shell:
            args = cmd
        else:
            args = shlex.split(cmd)
    elif args:
        args = list(map(str, args))
        cmd = list2cmdline(args)
        if shell:
            args = cmd

    logger.info("Running: " + cmd)  # To simplify rerunning failing tests

    if dbnd_env and DatabandRun.has_instance():
        env = env or os.environ.copy()
        dbnd_env_vars = DatabandRun.get_instance().get_context_spawn_env()
        logger.info(
            "Exporting the following env vars:\n%s",
            "\n".join(["{}={}".format(k, v)
                       for k, v in dbnd_env_vars.items()]),
        )
        env.update()

    def preexec_fn():
        if windows_compatible_mode:
            return
        # Restore default signal disposition and invoke setsid
        for sig in ("SIGPIPE", "SIGXFZ", "SIGXFSZ"):
            if hasattr(signal, sig):
                safe_signal(getattr(signal, sig), signal.SIG_DFL)
        os.setsid()

    process = subprocess.Popen(args,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.STDOUT,
                               bufsize=-1,
                               universal_newlines=True,
                               env=env,
                               preexec_fn=preexec_fn,
                               cwd=cwd,
                               shell=shell,
                               **popen_kwargs)

    try:
        task_run = try_get_current_task_run()
        if task_run:
            task_run.task.process = process

        logger.info("Process is running, output:")
        # While command is running let read it's output
        output = []
        while True:
            line = process.stdout.readline()
            if line == "" or line == b"":
                break
            line = safe_decode(line, output_encoding).rstrip()

            logger.info("out: %s", line)
            # keep last 1000 lines only
            output.append(line)
            if len(output) > 1500:
                output = output[-1000:]

        returncode = process.wait()
        logger.info("Command exited with return code %s", process.returncode)
        if check_retcode is not None and returncode != check_retcode:
            raise failed_to_run_cmd("Bash command failed",
                                    cmd_str=cmd,
                                    return_code=returncode)
        return returncode
    except Exception:
        logger.info("Received interrupt. Terminating subprocess and waiting")
        try:
            process.terminate()
            process.wait(wait_for_termination_s)
        except Exception:
            pass
        raise
Exemplo n.º 13
0
def get_databand_run():
    # type: () -> DatabandRun
    from dbnd._core.run.databand_run import DatabandRun  # noqa: F811

    v = DatabandRun.get_instance()
    return v
Exemplo n.º 14
0
    def test_save_databand_run(self):
        s = TTask()
        r = self._save_graph(s)

        actual = DatabandRun.load_run(r.driver_dump, False)
        assert actual