def project_path(*path): """ Gets a local path from databand directory and returns its absolute path. Deprecated. Look at dbnd_project_config at dbnd._core.configuration.environ_config """ return get_dbnd_project_config().dbnd_project_path(*path)
def databand_system_path(*path): """ Deprecated function. Look at dbnd_project_config at dbnd._core.configuration.environ_config. """ return get_dbnd_project_config().dbnd_system_path(*path)
def set_tracking_context(): try: reset_dbnd_project_config() get_dbnd_project_config()._dbnd_tracking = True yield finally: dbnd_tracking_stop() reset_dbnd_project_config()
def dbnd_bootstrap(): """Runs dbnd bootstrapping.""" global _dbnd_bootstrap global _dbnd_bootstrap_started if _dbnd_bootstrap_started: return _dbnd_bootstrap_started = True dbnd_system_bootstrap() dbnd_project_config = get_dbnd_project_config() from targets.marshalling import register_basic_data_marshallers register_basic_data_marshallers() _surpress_loggers() _suppress_warnings() enable_osx_forked_request_calls() register_dbnd_plugins() from dbnd._core.configuration import environ_config from dbnd._core.configuration.dbnd_config import config from dbnd._core.plugin.dbnd_plugins import pm from dbnd._core.utils.basics.load_python_module import run_user_func user_plugins = config.get("core", "plugins", None) if user_plugins: register_dbnd_user_plugins(user_plugins.split(",")) if is_unit_test_mode(): pm.hook.dbnd_setup_unittest() pm.hook.dbnd_setup_plugin() if dbnd_project_config.is_sigquit_handler_on: from dbnd._core.utils.basics.signal_utils import ( register_sigquit_stack_dump_handler, ) register_sigquit_stack_dump_handler() # now we can run user code ( at driver/task) user_preinit = environ_config.get_user_preinit() if user_preinit: run_user_func(user_preinit) # if for any reason there will be code that calls dbnd_bootstrap, this will prevent endless recursion _dbnd_bootstrap = True
def dbnd_system_bootstrap(): global _dbnd_system_bootstrap if _dbnd_system_bootstrap: return try: _dbnd_system_bootstrap = True # this will also initialize env if it's not initialized project_config = get_dbnd_project_config() if not project_config.quiet_mode: logger.info("Starting Databand %s!\n%s", dbnd.__version__, _env_banner()) from databand import dbnd_config dbnd_config.load_system_configs() except Exception: _dbnd_system_bootstrap = False raise
def dbnd_system_bootstrap(): global _dbnd_system_bootstrap if _dbnd_system_bootstrap: return try: _dbnd_system_bootstrap = True # this will also initialize env if it's not initialized project_config = get_dbnd_project_config() if not project_config.quiet_mode: logger.info("Starting Databand %s!\n%s", dbnd.__version__, _env_banner()) dbnd_run_info_source_version = os.environ.get( "DBND__RUN_INFO__SOURCE_VERSION" ) if dbnd_run_info_source_version: logger.info("revision: %s", dbnd_run_info_source_version) from databand import dbnd_config dbnd_config.load_system_configs() except Exception: _dbnd_system_bootstrap = False raise
def _get_env_vars(self, conf_env_vars=None): env_vars = { DBND_TASK_RUN_ATTEMPT_UID: str( current().current_task_run.task_run_attempt_uid ), ENV_DBND__TRACKING: str(get_dbnd_project_config().is_tracking_mode()), } if conf_env_vars is None: conf_env_vars = self.config.env_vars if conf_env_vars: env_vars.update(conf_env_vars) if self.config.fix_pyspark_imports: env_vars[ENV_DBND_FIX_PYSPARK_IMPORTS] = "True" if self.config.disable_pluggy_entrypoint_loading: # Disable pluggy loading for spark-submitted run env_vars[ENV_DBND__DISABLE_PLUGGY_ENTRYPOINT_LOADING] = "True" plugin_modules = [p[0].replace("-", "_") for p in pm.list_name_plugin()] plugin_modules_formatted = ",".join(plugin_modules) # Attach all loaded plugins to be manually loaded in submitted run env_vars[ENV_DBND__CORE__PLUGINS] = plugin_modules_formatted return env_vars
def databand_config_path(*path): return get_dbnd_project_config().dbnd_config_path(*path)
def _is_verbose(): config = get_dbnd_project_config() return config.is_verbose()
track_module_functions, track_modules, ) from dbnd._core.utils.project.project_fs import ( databand_lib_path, databand_system_path, project_path, relative_path, ) from dbnd.tasks import basics from targets import _set_patches from dbnd._core.configuration.environ_config import ( # isort:skip get_dbnd_project_config, ) get_dbnd_project_config().validate_init() # isort:skip dbnd_config = config __all__ = [ "hookimpl", # context management "new_dbnd_context", "current", "dbnd_context", "current_task", "current_task_run", "get_databand_run", "get_databand_context", # inplace implementation "dbnd_run_start", "dbnd_run_stop",
def project_path(*path): return get_dbnd_project_config().dbnd_project_path(*path)
def handle_callable_call(self, *call_args, **call_kwargs): dbnd_project_config = get_dbnd_project_config() if dbnd_project_config.disabled: return self.class_or_func(*call_args, **call_kwargs) # we are at tracking mode if dbnd_project_config.is_tracking_mode(): with self.tracking_context(call_args, call_kwargs) as track_result_callback: fp_result = self.class_or_func(*call_args, **call_kwargs) return track_result_callback(fp_result) #### DBND ORCHESTRATION MODE # # -= Use "Step into My Code"" to get back from dbnd code! =- # # decorated object call/creation ( my_func(), MyDecoratedTask() # we are at orchestration mode task_cls = self.get_task_cls() if is_in_airflow_dag_build_context(): # we are in Airflow DAG building mode - AIP-31 return build_task_at_airflow_dag_context( task_cls=task_cls, call_args=call_args, call_kwargs=call_kwargs ) current = try_get_current_task() if not current: # no tracking/no orchestration, # falling back to "natural call" of the class_or_func message = ( "Can't report tracking info. %s is decorated with @task, but no tracking context was found" % (self.class_or_func.__name__,) ) get_one_time_logger().log_once(message, "task_decorator", logging.WARNING) return self.class_or_func(*call_args, **call_kwargs) ###### # current is not None, and we are not in tracking/airflow/luigi # this is DBND Orchestration mode # we can be in the context of task.run() or in task.band() # called from user code using user_decorated_func() or UserDecoratedTask() if self.is_class: call_kwargs.pop("__call_original_cls", False) # we should not get here from _TaskFromTaskDecorator.invoke() # at that function we should call user code directly phase = current_phase() if phase is TaskContextPhase.BUILD: # we are in the @pipeline.band() context, we are building execution plan t = task_cls(*call_args, **call_kwargs) # we are in the band, and if user_code() is called we want to remove redundant # `user_code().result` usage if t.task_definition.single_result_output: return t.result # we have multiple outputs (more than one "output" parameter) # just return task object, user will use it as `user_code().output_1` return t elif phase is TaskContextPhase.RUN: # we are "running" inside some other task execution (orchestration!) # (inside user_defined_function() or UserDefinedTask.run() # if possible we will run it as "orchestration" task # with parameters parsing if ( current.settings.run.task_run_at_execution_time_enabled and current.task_supports_dynamic_tasks ): return self._run_task_from_another_task_execution( parent_task=current, call_args=call_args, call_kwargs=call_kwargs ) # we can not call it in "dbnd" way, fallback to normal call if self.is_class: call_kwargs["__call_original_cls"] = False return self.class_or_func(*call_args, **call_kwargs) else: raise Exception()
def apply_env_vars_to_pod(self, pod): pod.envs["AIRFLOW__KUBERNETES__DAGS_IN_IMAGE"] = "True" if not get_dbnd_project_config().is_tracking_mode(): pod.envs[ENV_DBND__TRACKING] = "False"
def build_pod( self, task_run: TaskRun, cmds: List[str], args: Optional[List[str]] = None, labels: Optional[Dict[str, str]] = None, try_number: Optional[int] = None, include_system_secrets: bool = False, ) -> k8s.V1Pod: if not self.container_tag: raise DatabandConfigError( "Your container tag is None, please check your configuration", help_msg="Container tag should be assigned", ) pod_name = self.get_pod_name(task_run=task_run, try_number=try_number) image = self.full_image labels = combine_mappings(labels, self.labels) labels["pod_name"] = pod_name labels["dbnd_run_uid"] = task_run.run.run_uid labels["dbnd_task_run_uid"] = task_run.task_run_uid labels["dbnd_task_run_attempt_uid"] = task_run.task_run_attempt_uid labels[ "dbnd_task_family"] = task_run.task.task_definition.full_task_family_short labels["dbnd_task_name"] = task_run.task.task_name labels["dbnd_task_af_id"] = task_run.task_af_id # for easier pod deletion (kubectl delete pod -l dbnd=task_run -n <my_namespace>) if task_run.task.task_is_system: labels["dbnd"] = "dbnd_system_task_run" else: labels["dbnd"] = "task_run" # we need to be sure that the values meet the dns label names RFC # https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#dns-label-names labels = { label_name: clean_label_name_dns1123(str(label_value)) for label_name, label_value in six.iteritems(labels) } if is_verbose(): logger.info("Build pod with kubernetes labels {}".format(labels)) annotations = self.annotations.copy() if self.gcp_service_account_keys: annotations[ "iam.cloud.google.com/service-account"] = self.gcp_service_account_keys annotations["dbnd_tracker"] = task_run.task_tracker_url from dbnd_docker.kubernetes.vendorized_airflow.dbnd_extended_resources import ( DbndExtendedResources, ) resources = DbndExtendedResources( requests=self.requests, limits=self.limits, request_memory=self.request_memory, request_cpu=self.request_cpu, limit_memory=self.limit_memory, limit_cpu=self.limit_cpu, ) env_vars = { ENV_DBND_POD_NAME: pod_name, ENV_DBND_POD_NAMESPACE: self.namespace, ENV_DBND_USER: task_run.task_run_env.user, ENV_DBND__ENV_IMAGE: image, ENV_DBND_ENV: task_run.run.env.task_name, ENV_DBND__ENV_MACHINE: "%s at %s" % (pod_name, self.namespace), } if AIRFLOW_VERSION_2: env_vars[ "AIRFLOW__CORE__TASK_RUNNER"] = "dbnd_airflow.compat.dbnd_task_runner.DbndStandardTaskRunner" if self.auto_remove: env_vars[ENV_DBND_AUTO_REMOVE_POD] = "True" env_vars[self._params.get_param_env_key(self, "in_cluster")] = "True" env_vars["AIRFLOW__KUBERNETES__IN_CLUSTER"] = "True" env_vars[ "DBND__RUN_INFO__SOURCE_VERSION"] = task_run.run.context.task_run_env.user_code_version env_vars["AIRFLOW__KUBERNETES__DAGS_IN_IMAGE"] = "True" if not get_dbnd_project_config().is_tracking_mode(): env_vars[ENV_DBND__TRACKING] = "False" # we want that all next runs will be able to use the image that we have in our configuration env_vars.update( self._params.to_env_map(self, "container_repository", "container_tag")) env_vars.update(self.env_vars) env_vars.update(task_run.run.get_context_spawn_env()) secrets = self.get_secrets( include_system_secrets=include_system_secrets) if self.trap_exit_file_flag: args = [ textwrap.dedent(""" trap "touch {trap_file}" EXIT {command} """.format( trap_file=self.trap_exit_file_flag, command=subprocess.list2cmdline(cmds), )) ] # we update cmd now cmds = ["/bin/bash", "-c"] if self.debug_with_command: logger.warning( "%s replacing pod %s command with '%s', original command=`%s`", task_run, pod_name, self.debug_with_command, subprocess.list2cmdline(cmds), ) cmds = shlex.split(self.debug_with_command) base_pod = self._build_base_pod() pod = self._to_real_pod( cmds=cmds, args=args, namespace=self.namespace, name=pod_name, envs=env_vars, image=image, labels=labels, secrets=secrets, resources=resources, annotations=annotations, ) final_pod = reconcile_pods(base_pod, pod) return final_pod