def log_exception(msg, ex, logger_=None, verbose=None, non_critical=False): logger_ = logger_ or logger from dbnd._core.errors.base import DatabandError if verbose is None: dc = try_get_databand_context() verbose = (dc.system_settings.verbose if dc and getattr(dc, "system_settings", None) else True) if verbose: # just show the exception logger_.exception(msg) return if non_critical: logger_.info(msg + ": %s" % str(ex)) return if isinstance(ex, DatabandError): # msg = "{msg}:{ex}".format(msg=msg, ex=ex) logger_.error(msg + ": %s" % str(ex)) else: # should we? let's show the exception for now so we can debug logger_.exception(msg)
def is_verbose(self): from dbnd._core.current import try_get_databand_context context = try_get_databand_context() if context and getattr(context, "system_settings", None): return context.system_settings.verbose return self._verbose
def build_project_wheel_file_task(cls): # Use current_context_uid to make sure this task is going to be run only once per pipeline # Constant task_target_date so the signature won't change if user changes task_target_date parameter. fat_wheel_task = cls( # we need it to run every time we "rerun" the pipeline task_version=try_get_databand_context().current_context_uid, # we don't want to inherit from parent task, as they might have different target_dates task_target_date="today", ) return fat_wheel_task
def start( self, root_task_name, in_memory=True, run_uid=None, airflow_context=False, job_name=None, ): if try_get_databand_context(): return if not airflow_context and not self._atexit_registered: atexit.register(self.stop) if is_airflow_enabled(): from airflow.settings import dispose_orm atexit.unregister(dispose_orm) c = { "run": { "skip_completed": False }, # we don't want to "check" as script is task_version="now" "task": { "task_in_memory_outputs": in_memory }, # do not save any outputs } config.set_values(config_values=c, override=True, source="dbnd_start") context_kwargs = {"name": "airflow"} if airflow_context else {} # create databand context dc = self._enter_cm( new_dbnd_context(**context_kwargs)) # type: DatabandContext root_task = _build_inline_root_task(root_task_name, airflow_context=airflow_context) # create databand run dr = self._enter_cm( new_databand_run( context=dc, task_or_task_name=root_task, run_uid=run_uid, existing_run=False, job_name=job_name, )) # type: DatabandRun if run_uid: root_task_run_uid = get_task_run_uid(run_uid, root_task_name) else: root_task_run_uid = None dr._init_without_run(root_task_run_uid=root_task_run_uid) self._start_taskrun(dr.driver_task_run) self._start_taskrun(dr.root_task_run) return dr
def dbnd_status(): report = DoctorStatusReportBuilder("Databand Status") report.log("env.DBND_HOME", os.environ.get("DBND_HOME")) dc = try_get_databand_context() report.log("DatabandContext", dc) if dc: report.log("initialized", dc) # calling metrics. log_metric("metric_check", "OK") log_metric("metric_random_value", random.random()) return report.get_status_str()
def build_airflow_operator(self, task_cls, call_args, call_kwargs): if try_get_databand_context() is self.dbnd_context: # we are already in the context of build return self._build_airflow_operator( task_cls=task_cls, call_args=call_args, call_kwargs=call_kwargs ) # we are coming from external world with dbnd_config.config_layer_context( self.dbnd_config_layer ) as c, DatabandContext.context(_context=self.dbnd_context) as dc: return self._build_airflow_operator( task_cls=task_cls, call_args=call_args, call_kwargs=call_kwargs )
def band(self): result = super(_BaseSparkTask, self).band() if self.spark_config.include_user_project: fat_wheel_task = fat_wheel_building_task( task_version=try_get_databand_context().current_context_uid, task_target_date="today", ) self.spark_resources = {"user_project": fat_wheel_task} if self.spark_engine.disable_task_band: logger.debug("Task band is disabled due to disable_task_band flag") self.task_band = None return result
def test_none_string_marshalling(self): dc = try_get_databand_context() # Prevent target caching, force reload from disk dc.settings.features.in_memory_cache_target_value = False p = NoneStringPipeline() p.dbnd_run()
def is_in_memory_cache_target_value(): dc = try_get_databand_context() if dc: return dc.settings.features.in_memory_cache_target_value return False
def test_none_string_marshalling(self): dc = try_get_databand_context() # Prevent target caching, force reload from disk dc.settings.run.target_cache_on_access = False p = NoneStringPipeline() p.dbnd_run()
def is_in_memory_cache_target_value(): dc = try_get_databand_context() if dc: return dc.settings.run.target_cache_on_access return False
def get_value_preview_max_len(): dc = try_get_databand_context() if dc: return dc.settings.core.value_preview_max_len return _DEFAULT_VALUE_PREVIEW_MAX_LEN
def logging_status(): # type: ()->str """ Shows the status of the logging system All known loggers, logging configuration and so on. :return: """ report = DoctorStatusReportBuilder("Logging Status") report.log("logging.root", logging.root) report.log("logging.root.handlers", logging.root.handlers) report.log("logger", logger) report.log("logger.handlers", logger.handlers) # airflow usually alternate stderr/stdout report.log("sys.stderr", sys.stderr) report.log("sys.stderr[close]", hasattr(sys.stderr, "close")) report.log("sys.stderr", sys.__stderr__) report.log("sys.__stderr__[close]", hasattr(sys.__stderr__, "close")) dbnd_context = try_get_databand_context() if dbnd_context: from dbnd._core.task_ctrl.task_visualiser import TaskVisualiser report.add_sub_report( TaskVisualiser(dbnd_context.settings.log).banner("Log Config")) # check airflow logging try: from logging import Logger airflow_task_logger = Logger.manager.loggerDict.get("airflow.task") if airflow_task_logger: report.log("Airlfow task logger", airflow_task_logger) report.log("Airlfow task logger handlers", airflow_task_logger.handlers) else: report.log("Airlfow task logger", "not found") except Exception as ex: ex_msg = "Failed to get airlfow.task logger status: %s" % ex report.log("Airflow task logger", ex_msg) logger.exception(ex_msg) logging_status = report.get_status_str() logging_status = "\n{sep}\n{msg}\n{sep}s\n".format(msg=logging_status, sep="*" * 40) logger.info(logging_status) # if we run this check we might have a problem with logs, we don't know how we are going to see the message print("\n\nLogging Status (via __stderr__)%s" % logging_status, file=sys.__stderr__) logger.info( "Running logging validation.. (you will see a lot of messages)") # now we can print things, it might be that one of them will "kill the process" # because of some weird log handlers loop print("Message via print") print("Message via print stderr", file=sys.stderr) print("Message via print __stderr__", file=sys.__stderr__) logging.info("Message via logging root") logger.info("Message via logger") return logging_status