def try_get_airflow_context_from_spark_conf(): if (not environ_config.environ_enabled("DBND__ENABLE__SPARK_CONTEXT_ENV") or _SPARK_ENV_FLAG not in os.environ): return None if not _is_dbnd_spark_installed(): return None try: from pyspark import SparkContext conf = SparkContext.getOrCreate().getConf() dag_id = conf.get("spark.env.AIRFLOW_CTX_DAG_ID") execution_date = conf.get("spark.env.AIRFLOW_CTX_EXECUTION_DATE") task_id = conf.get("spark.env.AIRFLOW_CTX_TASK_ID") if dag_id and task_id and execution_date: return AirflowTaskContext(dag_id=dag_id, execution_date=execution_date, task_id=task_id) except Exception as ex: logger.info("Failed to get airlfow context info from spark job: %s", ex) return None
def is_verbose(): context = try_get_databand_context() if context and getattr(context, "system_settings", None): return context.system_settings.verbose from dbnd._core.configuration import environ_config return environ_config.environ_enabled(environ_config.ENV_DBND__VERBOSE)
def get_dags(): if environ_enabled(ENV_DBND_DISABLE_SCHEDULED_DAGS_LOAD): return None from dbnd._core.errors.base import DatabandConnectionException, DatabandApiError try: # let be sure that we are loaded config.load_system_configs() dags = DbndSchedulerDBDagsProvider().get_dags() if not in_quiet_mode(): logger.info("providing %s dags from scheduled jobs" % len(dags)) return dags except (DatabandConnectionException, DatabandApiError) as e: logger.error(str(e)) except Exception as e: raise e
def safe_tabulate(tabular_data, headers, **kwargs): terminal_columns, _ = get_terminal_size() # fancy_grid format has utf-8 characters (in corners of table) # cp1252 fails to encode that fancy_grid = not windows_compatible_mode and not environ_enabled( ENV_DBND__NO_TABLES) tablefmt = "fancy_grid" if fancy_grid else "grid" table = tabulate(tabular_data, headers=headers, tablefmt=tablefmt, **kwargs) if table and max(map(len, table.split())) >= terminal_columns: table = tabulate(tabular_data, headers=headers, tablefmt="plain", **kwargs) return table
def cleanup_after_run(self): # this run was submitted by task_run_async - we need to cleanup ourself if not environ_enabled(ENV_DBND_AUTO_REMOVE_POD): return if ENV_DBND_POD_NAME in environ and ENV_DBND_POD_NAMESPACE in environ: try: logger.warning( "Auto deleteing pod as accordingly to '%s' env variable" % ENV_DBND_AUTO_REMOVE_POD) kube_dbnd = self.build_kube_dbnd() kube_dbnd.delete_pod( name=environ[ENV_DBND_POD_NAME], namespace=environ[ENV_DBND_POD_NAMESPACE], ) except Exception as e: logger.warning("Tried to delete this pod but failed: %s" % e) else: logger.warning( "Auto deleting pod as set, but pod name and pod namespace is not defined" )
def try_get_airflow_context_from_spark_conf(): if (not environ_config.environ_enabled("DBND__ENABLE__SPARK_CONTEXT_ENV") or _SPARK_ENV_FLAG not in os.environ): return None try: from pyspark import SparkContext except Exception: return None conf = SparkContext.getOrCreate().getConf() dag_id = conf.get("spark.env.AIRFLOW_CTX_DAG_ID") execution_date = conf.get("spark.env.AIRFLOW_CTX_EXECUTION_DATE") task_id = conf.get("spark.env.AIRFLOW_CTX_TASK_ID") if dag_id and task_id and execution_date: return AirflowTaskContext(dag_id=dag_id, execution_date=execution_date, task_id=task_id) return None
def disable_airflow_subdag_tracking(): return environ_config.environ_enabled( environ_config.ENV_DBND__DISABLE_AIRFLOW_SUBDAG_TRACKING, False)
def override_airflow_log_system_for_tracking(): return environ_config.environ_enabled( environ_config.ENV_DBND__OVERRIDE_AIRFLOW_LOG_SYSTEM_FOR_TRACKING)
def is_inplace_run(): return environ_config.environ_enabled(environ_config.ENV_DBND__TRACKING)