Exemplo n.º 1
0
    def _add_tasks_info(self, b):
        run = self.run
        reused = sum(tr.is_reused and not tr.is_skipped_as_not_required
                     for tr in run.task_runs)
        optimizations = []
        if reused:
            optimizations.append(
                "There are {completed} reused tasks.".format(completed=reused))
        task_skipped_as_not_required = sum(
            tr.is_reused and tr.is_skipped_as_not_required
            for tr in run.task_runs)
        if task_skipped_as_not_required:
            optimizations.append(
                " {skipped} tasks are not required by any uncompleted task "
                "that is essential for your root task.".format(
                    skipped=task_skipped_as_not_required))
        show_more = is_verbose()
        task_runs = run.task_runs
        if not show_more:  # show only non system
            task_runs = run.get_task_runs(without_system=True)
            if not task_runs:  # we have only system
                task_runs = run.task_runs

        states = Counter(tr.task_run_state for tr in task_runs
                         if tr.task_run_state)

        b.column("TOTAL TASKS", len(task_runs))

        tasks = [(k.value, v) for k, v in states.items()]
        b.column_properties("STATES", tasks)
        if optimizations:
            b.column("RUN OPTIMIZATION", " ".join(optimizations))
        return b
Exemplo n.º 2
0
def get_project_git():
    global _project_git_version
    if is_defined(_project_git_version):
        return _project_git_version

    _project_git_version = get_git_commit(project_path(), verbose=is_verbose())
    return _project_git_version
Exemplo n.º 3
0
    def banner(
        self,
        msg,
        color=None,
        verbose=False,
        print_task_band=False,
        task_run=None,
        exc_info=None,
    ):
        try:
            b = TextBanner(msg, color)

            if verbose or is_verbose():
                verbosity = FormatterVerbosity.HIGH
            else:
                verbosity = FormatterVerbosity.NORMAL

            builder = _TaskBannerBuilder(
                task=self.task,
                banner=b,
                verbosity=verbosity,
                print_task_band=print_task_band,
            )

            return builder.build_banner(task_run=task_run,
                                        exc_info=exc_info).get_banner_str()
        except Exception as ex:
            log_exception(
                "Failed to calculate banner for '%s'" % self.task_id,
                ex,
                non_critical=True,
            )
            return msg + (" ( task_id=%s)" % self.task_id)
Exemplo n.º 4
0
def dbnd_log_debug(msg, *args, **kwargs):
    try:
        if is_verbose():
            logger.info(msg, *args, **kwargs)
        else:
            logger.debug(msg, *args, **kwargs)
    except:
        print("Failed to print dbnd info message")
Exemplo n.º 5
0
def dbnd_log_info_error(msg, *args, **kwargs):
    """we show exception only in verbose mode"""
    try:
        if is_verbose():
            logger.exception(msg, *args, **kwargs)
        else:
            logger.info(msg, *args, **kwargs)
    except Exception:
        print("Failed to print dbnd error message")
Exemplo n.º 6
0
def _handle_tracking_error(msg):
    if is_verbose():
        logger.warning(
            "Failed during dbnd %s, ignoring, and continue without tracking" % msg,
            exc_info=True,
        )
    else:
        logger.info(
            "Failed during dbnd %s, ignoring, and continue without tracking" % msg
        )
Exemplo n.º 7
0
def _handle_dynamic_error(msg, func_call):
    if is_verbose():
        logger.warning(
            "Failed during dbnd %s for %s, ignoring, and continue without tracking/orchestration"
            % (msg, func_call.task_cls),
            exc_info=True,
        )
    else:
        logger.info(
            "Failed during dbnd %s for %s, ignoring, and continue without tracking"
            % (msg, func_call.task_cls))
Exemplo n.º 8
0
def _handle_tracking_error(msg, func_call=None):
    log_exception_to_server()
    location = " for %s" % func_call.callable if func_call else ""
    msg = "Failed during dbnd %s for %s, ignoring, and continue without tracking" % (
        msg,
        location,
    )
    if is_verbose():
        logger.warning(msg, exc_info=True)
    else:
        logger.info(msg)
Exemplo n.º 9
0
    def _run(
        self,
        kube_client: client.CoreV1Api,
        resource_version,
        worker_uuid,
        kube_config: Configuration,
    ):
        from kubernetes import watch

        watcher = watch.Watch()
        request_timeout = self.kube_dbnd.engine_config.watcher_request_timeout_seconds
        kwargs = {
            "label_selector":
            "airflow-worker={}".format(worker_uuid),
            "_request_timeout": (request_timeout, request_timeout),
            "timeout_seconds":
            self.kube_dbnd.engine_config.watcher_client_timeout_seconds,
        }

        if resource_version:
            kwargs["resource_version"] = resource_version
        if kube_config.kube_client_request_args:
            for key, value in kube_config.kube_client_request_args.items():
                kwargs[key] = value

        for event in watcher.stream(kube_client.list_namespaced_pod,
                                    self.namespace, **kwargs):
            try:
                # DBND PATCH
                # we want to process the message
                task = event["object"]
                self.log.debug(" %s had an event of type %s",
                               task.metadata.name, event["type"])

                if event["type"] == "ERROR":
                    return self.process_error(event)

                self._extended_process_state(event)
                self.resource_version = task.metadata.resource_version

            except Exception as e:
                msg = "Event: Exception raised on specific event: %s, Exception: %s" % (
                    event,
                    e,
                )
                if is_verbose():
                    self.log.exception(msg)
                else:
                    self.log.warning(msg)
        return self.resource_version
Exemplo n.º 10
0
def _create_temp_working_dir(tmp_build_dir=None):
    clean_build_dir = False
    try:
        if not tmp_build_dir:
            tmp_build_dir = mkdtemp(prefix="dbnd-build-")
            clean_build_dir = True
        yield tmp_build_dir
    finally:
        if clean_build_dir:
            if is_verbose():  # do not clean build dir in verbose mode
                logger.info("Keeping build dir because verbose mode is on")
            else:
                logger.info("Deleting tmp directory: %s", tmp_build_dir)
                shutil.rmtree(tmp_build_dir, ignore_errors=True)
Exemplo n.º 11
0
    def banner(
        self,
        msg,
        color=None,
        verbose=False,
        print_task_band=False,
        task_run=None,
        exc_info=None,
    ):
        task_id = self.task.task_id
        try:
            # Saving banner for testability
            self._banner = TextBanner(msg, color)

            if verbose or is_verbose():
                verbosity = FormatterVerbosity.HIGH
            else:
                verbosity = FormatterVerbosity.NORMAL

            builder = _TaskBannerBuilder(
                task=self.task,
                banner=self._banner,
                verbosity=verbosity,
                print_task_band=print_task_band,
            )

            # different banners for tracking and orchestration
            if TaskEssence.TRACKING.is_instance(self.task):
                builder.build_tracking_banner(task_run=task_run,
                                              exc_info=exc_info)
            else:
                if TaskEssence.CONFIG.is_instance(self.task):
                    builder.build_config_banner()
                else:
                    builder.build_orchestration_banner(task_run=task_run,
                                                       exc_info=exc_info)

            return self._banner.get_banner_str()

        except Exception as ex:
            log_exception("Failed to calculate banner for '%s'" % task_id,
                          ex,
                          non_critical=True)
            return msg + (" ( task_id=%s)" % task_id)
Exemplo n.º 12
0
def log_exception(msg, ex, logger_=None, verbose=None, non_critical=False):
    logger_ = logger_ or logger

    from dbnd._core.errors.base import DatabandError

    if verbose is None:
        verbose = is_verbose()

    if verbose:
        # just show the exception
        logger_.exception(msg)
        return

    if non_critical:
        logger_.info(msg + ": %s" % str(ex))
        return

    if isinstance(ex, DatabandError):
        # msg = "{msg}:{ex}".format(msg=msg, ex=ex)
        logger_.error(msg + ": %s" % str(ex))
    else:
        # should we? let's show the exception for now so we can debug
        logger_.exception(msg)
Exemplo n.º 13
0
 def __init__(self):
     super(ConsoleStore, self).__init__()
     self.max_log_value_len = 50
     self.verbose = is_verbose()
     self.ascii_graph = Pyasciigraph()
Exemplo n.º 14
0
def log_pod_events_on_sigterm(stack_frame):
    print_stack_trace(stack_frame)
    print_driver_events()
    print_cpu_memory_usage()
    if is_verbose():
        print_dmesg()
Exemplo n.º 15
0
    def build_pod(
        self,
        task_run: TaskRun,
        cmds: List[str],
        args: Optional[List[str]] = None,
        labels: Optional[Dict[str, str]] = None,
        try_number: Optional[int] = None,
        include_system_secrets: bool = False,
    ) -> k8s.V1Pod:
        if not self.container_tag:
            raise DatabandConfigError(
                "Your container tag is None, please check your configuration",
                help_msg="Container tag should be assigned",
            )

        pod_name = self.get_pod_name(task_run=task_run, try_number=try_number)

        image = self.full_image
        labels = combine_mappings(labels, self.labels)
        labels["pod_name"] = pod_name

        labels["dbnd_run_uid"] = task_run.run.run_uid
        labels["dbnd_task_run_uid"] = task_run.task_run_uid
        labels["dbnd_task_run_attempt_uid"] = task_run.task_run_attempt_uid
        labels[
            "dbnd_task_family"] = task_run.task.task_definition.full_task_family_short
        labels["dbnd_task_name"] = task_run.task.task_name
        labels["dbnd_task_af_id"] = task_run.task_af_id

        # for easier pod deletion (kubectl delete pod -l dbnd=task_run -n <my_namespace>)
        if task_run.task.task_is_system:
            labels["dbnd"] = "dbnd_system_task_run"
        else:
            labels["dbnd"] = "task_run"

        # we need to be sure that the values meet the dns label names RFC
        # https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#dns-label-names
        labels = {
            label_name: clean_label_name_dns1123(str(label_value))
            for label_name, label_value in six.iteritems(labels)
        }
        if is_verbose():
            logger.info("Build pod with kubernetes labels {}".format(labels))

        annotations = self.annotations.copy()
        if self.gcp_service_account_keys:
            annotations[
                "iam.cloud.google.com/service-account"] = self.gcp_service_account_keys
        annotations["dbnd_tracker"] = task_run.task_tracker_url

        from dbnd_docker.kubernetes.vendorized_airflow.dbnd_extended_resources import (
            DbndExtendedResources, )

        resources = DbndExtendedResources(
            requests=self.requests,
            limits=self.limits,
            request_memory=self.request_memory,
            request_cpu=self.request_cpu,
            limit_memory=self.limit_memory,
            limit_cpu=self.limit_cpu,
        )
        env_vars = {
            ENV_DBND_POD_NAME: pod_name,
            ENV_DBND_POD_NAMESPACE: self.namespace,
            ENV_DBND_USER: task_run.task_run_env.user,
            ENV_DBND__ENV_IMAGE: image,
            ENV_DBND_ENV: task_run.run.env.task_name,
            ENV_DBND__ENV_MACHINE: "%s at %s" % (pod_name, self.namespace),
        }

        if AIRFLOW_VERSION_2:
            env_vars[
                "AIRFLOW__CORE__TASK_RUNNER"] = "dbnd_airflow.compat.dbnd_task_runner.DbndStandardTaskRunner"

        if self.auto_remove:
            env_vars[ENV_DBND_AUTO_REMOVE_POD] = "True"
        env_vars[self._params.get_param_env_key(self, "in_cluster")] = "True"
        env_vars["AIRFLOW__KUBERNETES__IN_CLUSTER"] = "True"
        env_vars[
            "DBND__RUN_INFO__SOURCE_VERSION"] = task_run.run.context.task_run_env.user_code_version
        env_vars["AIRFLOW__KUBERNETES__DAGS_IN_IMAGE"] = "True"
        if not get_dbnd_project_config().is_tracking_mode():
            env_vars[ENV_DBND__TRACKING] = "False"
        # we want that all next runs will be able to use the image that we have in our configuration

        env_vars.update(
            self._params.to_env_map(self, "container_repository",
                                    "container_tag"))

        env_vars.update(self.env_vars)
        env_vars.update(task_run.run.get_context_spawn_env())

        secrets = self.get_secrets(
            include_system_secrets=include_system_secrets)

        if self.trap_exit_file_flag:
            args = [
                textwrap.dedent("""
                trap "touch {trap_file}" EXIT
                {command}
                """.format(
                    trap_file=self.trap_exit_file_flag,
                    command=subprocess.list2cmdline(cmds),
                ))
            ]
            # we update cmd now
            cmds = ["/bin/bash", "-c"]

        if self.debug_with_command:
            logger.warning(
                "%s replacing pod %s command with '%s', original command=`%s`",
                task_run,
                pod_name,
                self.debug_with_command,
                subprocess.list2cmdline(cmds),
            )
            cmds = shlex.split(self.debug_with_command)

        base_pod = self._build_base_pod()

        pod = self._to_real_pod(
            cmds=cmds,
            args=args,
            namespace=self.namespace,
            name=pod_name,
            envs=env_vars,
            image=image,
            labels=labels,
            secrets=secrets,
            resources=resources,
            annotations=annotations,
        )

        final_pod = reconcile_pods(base_pod, pod)

        return final_pod
Exemplo n.º 16
0
 def __init__(self):
     super(ConsoleStore, self).__init__()
     self.verbose = is_verbose()
Exemplo n.º 17
0
 def __init__(self, *args, **kwargs):
     super(ConsoleStore, self).__init__(*args, **kwargs)
     self.max_log_value_len = 50
     self.verbose = is_verbose()
     self.ascii_graph = Pyasciigraph()
     self._is_in_airflow_tracking_mode = in_airflow_tracking_mode()
Exemplo n.º 18
0
    def run_airflow_dag(self, dag, session=None):
        # type:  (DAG, Session) -> None
        af_dag = dag
        databand_run = self.run
        databand_context = databand_run.context
        execution_date = databand_run.execution_date
        s = databand_context.settings  # type: DatabandSettings
        s_run = s.run  # type: RunConfig

        run_id = s_run.id
        if not run_id:
            # we need this name, otherwise Airflow will try to manage our local jobs at scheduler
            # ..zombies cleanup and so on
            run_id = "backfill_{0}_{1}".format(
                databand_run.name, databand_run.execution_date.isoformat())

        if self.airflow_config.disable_db_ping_on_connect:
            from airflow import settings as airflow_settings

            try:
                remove_listener_by_name(airflow_settings.engine,
                                        "engine_connect", "ping_connection")
            except Exception as ex:
                logger.warning("Failed to optimize DB access: %s" % ex)

        if isinstance(self.airflow_task_executor, InProcessExecutor):
            heartrate = 0
        else:
            # we are in parallel mode
            heartrate = airflow_conf.getfloat("scheduler", "JOB_HEARTBEAT_SEC")

        # "Amount of time in seconds to wait when the limit "
        # "on maximum active dag runs (max_active_runs) has "
        # "been reached before trying to execute a dag run "
        # "again.
        delay_on_limit = 1.0

        self._pickle_dag_and_save_pickle_id_for_versioned(af_dag,
                                                          session=session)
        af_dag.sync_to_db(session=session)

        # let create relevant TaskInstance, so SingleDagRunJob will run them
        create_dagrun_from_dbnd_run(
            databand_run=databand_run,
            dag=af_dag,
            run_id=run_id,
            execution_date=execution_date,
            session=session,
            state=State.RUNNING,
            external_trigger=False,
        )

        self.airflow_task_executor.fail_fast = s_run.fail_fast
        # we don't want to be stopped by zombie jobs/tasks
        airflow_conf.set("core", "dag_concurrency", str(10000))
        airflow_conf.set("core", "max_active_runs_per_dag", str(10000))

        job = SingleDagRunJob(
            dag=af_dag,
            execution_date=databand_run.execution_date,
            mark_success=s_run.mark_success,
            executor=self.airflow_task_executor,
            donot_pickle=(s_run.donot_pickle
                          or airflow_conf.getboolean("core", "donot_pickle")),
            ignore_first_depends_on_past=s_run.ignore_first_depends_on_past,
            ignore_task_deps=s_run.ignore_dependencies,
            fail_fast=s_run.fail_fast,
            pool=s_run.pool,
            delay_on_limit_secs=delay_on_limit,
            verbose=s.system.verbose,
            heartrate=heartrate,
            airflow_config=self.airflow_config,
        )

        # we need localDagJob to be available from "internal" functions
        # because of ti_state_manager use
        from dbnd._core.current import is_verbose

        with SingleDagRunJob.new_context(_context=job,
                                         allow_override=True,
                                         verbose=is_verbose()):
            job.run()
Exemplo n.º 19
0
    def _extended_process_state(self, event):
        """
        check more types of events
        :param event:
        :return:
        """
        pod_data = event["object"]
        pod_id = pod_data.metadata.name
        phase = pod_data.status.phase
        resource_version = pod_data.metadata.resource_version
        labels = pod_data.metadata.labels
        task_id = labels.get("task_id")
        event_msg = "Event from %s(%s)" % (pod_id, task_id)

        try:
            try_num = int(labels.get("try_number", "1"))
            if try_num > 1:
                event_msg += " (try %s)" % try_num
        except ValueError:
            pass

        _fail_event = get_tuple_for_watcher_queue(pod_id, self.namespace,
                                                  State.FAILED, labels,
                                                  resource_version)
        debug_phase = (self.kube_dbnd.engine_config.debug_phase
                       )  # print only if user defined debug phase
        if is_verbose() or (debug_phase and phase == debug_phase):
            self.log.info(
                "Event verbose:%s %s %s: %s",
                pod_id,
                event_msg,
                event.get("type"),
                event.get("raw_object"),
            )

        if event.get("type") == "DELETED" and phase not in {
                "Succeeded", "Failed"
        }:
            # from Airflow 2.0 -> k8s may delete pods (preemption?)
            self.log.info(
                "%s: pod has been deleted: phase=%s deletion_timestamp=%s",
                event_msg,
                phase,
                pod_data.metadata.deletion_timestamp,
            )
            self.watcher_queue.put(_fail_event)
        elif pod_data.metadata.deletion_timestamp:
            self.log.info(
                "%s: pod is being deleted: phase=%s deletion_timestamp=%s ",
                event_msg,
                phase,
                pod_data.metadata.deletion_timestamp,
            )
            self.watcher_queue.put(_fail_event)
        elif phase == "Pending":
            pod_ctrl = self.kube_dbnd.get_pod_ctrl(
                pod_id, namespace=pod_data.metadata.namespace)
            try:
                # now we only fail, we will use the same code to try to rerun at scheduler code
                pod_ctrl.check_deploy_errors(pod_data)
                self.log.info("%s: pod is Pending", event_msg)
            except Exception as ex:
                self.log.info(
                    "Event: %s Pending: failing with %s",
                    pod_id,
                    str(ex),
                )
                self.watcher_queue.put(_fail_event)

        elif phase == "Running":
            pod_ctrl = self.kube_dbnd.get_pod_ctrl(
                pod_id, namespace=pod_data.metadata.namespace)
            try:
                # now we only fail, we will use the same code to try to rerun at scheduler code
                pod_ctrl.check_running_errors(pod_data)
                self.log.info("%s: pod is Running", event_msg)
                self.watcher_queue.put(
                    get_tuple_for_watcher_queue(pod_id, self.namespace,
                                                State.RUNNING, labels,
                                                resource_version))
            except Exception as ex:
                self.log.info(
                    "Event: %s Pending: failing with %s",
                    pod_id,
                    str(ex),
                )
                self.watcher_queue.put(_fail_event)
        elif phase == "Failed":
            self.log.info("%s: pod has Failed", event_msg)
            self.watcher_queue.put(_fail_event)
        elif phase == "Succeeded":
            self.log.info("%s: pod has Succeeded", event_msg)
            self.watcher_queue.put(
                get_tuple_for_watcher_queue(pod_id, self.namespace, None,
                                            labels, resource_version))
        else:
            self.log.warning(
                "Event: Invalid state: %s on pod: %s with labels: %s with "
                "resource_version: %s",
                phase,
                pod_id,
                labels,
                resource_version,
            )