Exemplo n.º 1
0
def log_exception(msg, ex, logger_=None, verbose=None, non_critical=False):
    logger_ = logger_ or logger

    from dbnd._core.errors.base import DatabandError

    if verbose is None:
        dc = try_get_databand_context()
        verbose = (dc.system_settings.verbose
                   if dc and getattr(dc, "system_settings", None) else True)

    if verbose:
        # just show the exception
        logger_.exception(msg)
        return

    if non_critical:
        logger_.info(msg + ": %s" % str(ex))
        return

    if isinstance(ex, DatabandError):
        # msg = "{msg}:{ex}".format(msg=msg, ex=ex)
        logger_.error(msg + ": %s" % str(ex))
    else:
        # should we? let's show the exception for now so we can debug
        logger_.exception(msg)
Exemplo n.º 2
0
    def is_verbose(self):
        from dbnd._core.current import try_get_databand_context

        context = try_get_databand_context()
        if context and getattr(context, "system_settings", None):
            return context.system_settings.verbose

        return self._verbose
Exemplo n.º 3
0
 def build_project_wheel_file_task(cls):
     # Use current_context_uid to make sure this task is going to be run only once per pipeline
     # Constant task_target_date so the signature won't change if user changes task_target_date parameter.
     fat_wheel_task = cls(
         # we need it to run every time we "rerun" the pipeline
         task_version=try_get_databand_context().current_context_uid,
         # we don't want to inherit from parent task, as they might have different target_dates
         task_target_date="today",
     )
     return fat_wheel_task
Exemplo n.º 4
0
    def start(
        self,
        root_task_name,
        in_memory=True,
        run_uid=None,
        airflow_context=False,
        job_name=None,
    ):
        if try_get_databand_context():
            return

        if not airflow_context and not self._atexit_registered:
            atexit.register(self.stop)
            if is_airflow_enabled():
                from airflow.settings import dispose_orm

                atexit.unregister(dispose_orm)
        c = {
            "run": {
                "skip_completed": False
            },  # we don't want to "check" as script is task_version="now"
            "task": {
                "task_in_memory_outputs": in_memory
            },  # do not save any outputs
        }
        config.set_values(config_values=c, override=True, source="dbnd_start")
        context_kwargs = {"name": "airflow"} if airflow_context else {}
        # create databand context
        dc = self._enter_cm(
            new_dbnd_context(**context_kwargs))  # type: DatabandContext

        root_task = _build_inline_root_task(root_task_name,
                                            airflow_context=airflow_context)
        # create databand run
        dr = self._enter_cm(
            new_databand_run(
                context=dc,
                task_or_task_name=root_task,
                run_uid=run_uid,
                existing_run=False,
                job_name=job_name,
            ))  # type: DatabandRun

        if run_uid:
            root_task_run_uid = get_task_run_uid(run_uid, root_task_name)
        else:
            root_task_run_uid = None
        dr._init_without_run(root_task_run_uid=root_task_run_uid)

        self._start_taskrun(dr.driver_task_run)
        self._start_taskrun(dr.root_task_run)
        return dr
Exemplo n.º 5
0
def dbnd_status():
    report = DoctorStatusReportBuilder("Databand Status")

    report.log("env.DBND_HOME", os.environ.get("DBND_HOME"))
    dc = try_get_databand_context()
    report.log("DatabandContext", dc)
    if dc:
        report.log("initialized", dc)

    # calling metrics.
    log_metric("metric_check", "OK")
    log_metric("metric_random_value", random.random())
    return report.get_status_str()
Exemplo n.º 6
0
    def build_airflow_operator(self, task_cls, call_args, call_kwargs):
        if try_get_databand_context() is self.dbnd_context:
            # we are already in the context of build
            return self._build_airflow_operator(
                task_cls=task_cls, call_args=call_args, call_kwargs=call_kwargs
            )

        # we are coming from external world
        with dbnd_config.config_layer_context(
            self.dbnd_config_layer
        ) as c, DatabandContext.context(_context=self.dbnd_context) as dc:
            return self._build_airflow_operator(
                task_cls=task_cls, call_args=call_args, call_kwargs=call_kwargs
            )
Exemplo n.º 7
0
    def band(self):
        result = super(_BaseSparkTask, self).band()

        if self.spark_config.include_user_project:
            fat_wheel_task = fat_wheel_building_task(
                task_version=try_get_databand_context().current_context_uid,
                task_target_date="today",
            )
            self.spark_resources = {"user_project": fat_wheel_task}

        if self.spark_engine.disable_task_band:
            logger.debug("Task band is disabled due to disable_task_band flag")
            self.task_band = None

        return result
Exemplo n.º 8
0
 def test_none_string_marshalling(self):
     dc = try_get_databand_context()
     # Prevent target caching, force reload from disk
     dc.settings.features.in_memory_cache_target_value = False
     p = NoneStringPipeline()
     p.dbnd_run()
Exemplo n.º 9
0
def is_in_memory_cache_target_value():
    dc = try_get_databand_context()
    if dc:
        return dc.settings.features.in_memory_cache_target_value
    return False
Exemplo n.º 10
0
 def test_none_string_marshalling(self):
     dc = try_get_databand_context()
     # Prevent target caching, force reload from disk
     dc.settings.run.target_cache_on_access = False
     p = NoneStringPipeline()
     p.dbnd_run()
Exemplo n.º 11
0
def is_in_memory_cache_target_value():
    dc = try_get_databand_context()
    if dc:
        return dc.settings.run.target_cache_on_access
    return False
Exemplo n.º 12
0
def get_value_preview_max_len():
    dc = try_get_databand_context()

    if dc:
        return dc.settings.core.value_preview_max_len
    return _DEFAULT_VALUE_PREVIEW_MAX_LEN
Exemplo n.º 13
0
def logging_status():
    # type: ()->str
    """
    Shows the status of the logging system
    All known loggers, logging configuration and so on.
    :return:
    """
    report = DoctorStatusReportBuilder("Logging Status")

    report.log("logging.root", logging.root)
    report.log("logging.root.handlers", logging.root.handlers)
    report.log("logger", logger)
    report.log("logger.handlers", logger.handlers)

    # airflow usually alternate stderr/stdout
    report.log("sys.stderr", sys.stderr)
    report.log("sys.stderr[close]", hasattr(sys.stderr, "close"))
    report.log("sys.stderr", sys.__stderr__)
    report.log("sys.__stderr__[close]", hasattr(sys.__stderr__, "close"))

    dbnd_context = try_get_databand_context()
    if dbnd_context:
        from dbnd._core.task_ctrl.task_visualiser import TaskVisualiser

        report.add_sub_report(
            TaskVisualiser(dbnd_context.settings.log).banner("Log Config"))
    # check airflow logging

    try:
        from logging import Logger

        airflow_task_logger = Logger.manager.loggerDict.get("airflow.task")
        if airflow_task_logger:
            report.log("Airlfow task logger", airflow_task_logger)
            report.log("Airlfow task logger handlers",
                       airflow_task_logger.handlers)
        else:
            report.log("Airlfow task logger", "not found")
    except Exception as ex:
        ex_msg = "Failed to get airlfow.task logger status: %s" % ex
        report.log("Airflow task logger", ex_msg)
        logger.exception(ex_msg)

    logging_status = report.get_status_str()
    logging_status = "\n{sep}\n{msg}\n{sep}s\n".format(msg=logging_status,
                                                       sep="*" * 40)
    logger.info(logging_status)
    # if we run this check we might have a problem with logs, we don't know how we are going to see the message
    print("\n\nLogging Status (via __stderr__)%s" % logging_status,
          file=sys.__stderr__)

    logger.info(
        "Running logging validation.. (you will see a lot of messages)")

    # now we can print things, it might be that one of them will "kill the process"
    # because of some weird log handlers loop
    print("Message via print")
    print("Message via print stderr", file=sys.stderr)
    print("Message via print __stderr__", file=sys.__stderr__)
    logging.info("Message via logging root")
    logger.info("Message via logger")

    return logging_status