def __init__(self, config, task_cls, task_args, task_kwargs): # type:(DbndConfig, Type[_BaseTask], Any, Any)->None self.task_cls = task_cls self.task_definition = task_cls.task_definition # type: TaskDefinition # keep copy of user inputs self.task_kwargs__ctor = task_kwargs.copy() self.task_args__ctor = list(task_args) self.parent_task = try_get_current_task() # let find if we are running this constructor withing another Databand Task self.dbnd_context = get_databand_context() self.task_call_source = [ self.dbnd_context.user_code_detector.find_user_side_frame(2) ] if self.task_call_source and self.parent_task: self.task_call_source.extend( self.parent_task.task_meta.task_call_source) self.task_family = self.task_definition.task_family self.task_name = self.task_family self.multi_sec_conf = MultiSectionConfig(config, []) self._task_params = self.task_definition._task_params.clone() self.ctor_kwargs = {} self._exc_desc = self.task_family self.task_errors = []
def _get_current_task_run(self): task = try_get_current_task() if task is None: # TODO: fake task raise NotImplementedError( "DatabandStore usage outside of DBND task is not implemented yet." ) return task.current_task_run
def stop(): msg("stopping!") task = try_get_current_task() msg("Current tasks looks like: %s" % (task)) run = try_get_databand_run() if run: run.kill() return
def __init__(self, config, task_cls, task_definition, task_args, task_kwargs): # type:(DbndConfig, Type[_TaskWithParams],TaskDefinition, Any, Any)->None self.task_cls = task_cls self.task_definition = task_definition # keep copy of user inputs self.task_kwargs__ctor = task_kwargs.copy() self.task_args__ctor = list(task_args) self.task_env_config = None # type: Optional[EnvConfig] self.parent_task = try_get_current_task() self._ctor_as_str = "%s@%s" % ( _get_call_repr( self.task_passport.task_family, self.task_args__ctor, self.task_kwargs__ctor, ), str(self.task_cls), ) # extract all "system" keywords from kwargs # support task_family in kwargs -> use it as task_name (old behavior) task_family = task_kwargs.get("task_family", self.task_passport.task_family) self.task_name = task_kwargs.pop("task_name", task_family) self.task_config_override = task_kwargs.pop("override", None) or {} task_config_sections_extra = task_kwargs.pop("task_config_sections", None) self.task_kwargs = task_kwargs self.task_name = TASK_ID_INVALID_CHAR_REGEX.sub("_", self.task_name) self.task_factory_config = TaskFactoryConfig.from_dbnd_config(config) self.verbose_build = self.task_factory_config.verbose # current config, NOTE: it's Singleton self.config = config self.config_sections = [] self.task_errors = [] self.build_warnings = [] # will be used for ConfigValue self.config_sections = self._get_task_config_sections( config=config, task_config_sections_extra=task_config_sections_extra)
def _create_task(cls, args, kwargs): task_definition = cls.task_definition # we need to have context initialized before we start to run all logic in config() scope # update config with current class defaults # we apply them to config only if there are no values (this is defaults) with config( config_values=task_definition.task_defaults_config_store, source=task_definition.task_passport.format_source_name( "defaults"), merge_settings=ConfigMergeSettings.on_non_exists_only, ) as task_config: tracking_mode = TaskEssence.TRACKING.is_included(cls) # create task meta first task_meta_factory = (TrackedTaskMetaFactory if tracking_mode else TaskMetaFactory) factory = task_meta_factory(config=task_config, task_cls=cls, task_args=args, task_kwargs=kwargs) task_meta = factory.create_dbnd_task_meta() # If a Task has already been instantiated with the same parameters, # the previous instance is returned to reduce number of object instances. tic = get_databand_context().task_instance_cache task = tic.get_task_obj_by_id(task_meta.obj_key.id) if not task or tracking_mode or hasattr(task, "_dbnd_no_cache"): task = cls._build_task_obj(task_meta) tic.register_task_obj_instance(task) # now the task is created - all nested constructors will see it as parent with task_context(task, TaskContextPhase.BUILD): task._initialize() task._validate() task.task_meta.config_layer = config.config_layer tic.register_task_instance(task) parent_task = try_get_current_task() if (parent_task and hasattr(task, "task_id") and (task.task_essence != TaskEssence.CONFIG)): parent_task.descendants.add_child(task.task_id) return task
def __call__(cls, *args, **kwargs): """ Custom class instantiation utilizing instance cache. """ # use-case of TaskClass() call from airflow context during DAG creation _dbnd_disable_airflow_inplace = kwargs.pop( "_dbnd_disable_airflow_inplace", False) if (is_in_airflow_dag_build_context() and TaskEssence.is_task_cls(cls) and not _dbnd_disable_airflow_inplace and not getattr(cls, "_dbnd_decorated_task", False)): kwargs = kwargs.copy() kwargs["_dbnd_disable_airflow_inplace"] = True return build_task_at_airflow_dag_context(task_cls=cls, call_args=args, call_kwargs=kwargs) task_definition = cls.task_definition # we need to have context initialized before we start to run all logic in config() scope # create new config layer, so when we are out of this process -> config is back to the previous value with config( config_values={}, source=task_definition.task_passport.format_source_name( "ctor"), ) as task_config: factory = TaskFactory( config=task_config, task_cls=cls, task_definition=cls.task_definition, task_args=args, task_kwargs=kwargs, ) task_object = factory.build_task_object(cls) parent_task = try_get_current_task() if (parent_task and hasattr(task_object, "task_id") and (task_object.task_essence != TaskEssence.CONFIG)): parent_task.descendants.add_child(task_object.task_id) return task_object
def __init__(self, config, task_cls, task_args, task_kwargs): # type:(DbndConfig, Type[_BaseTask], Any, Any)->None self.task_cls = task_cls self.task_definition = task_cls.task_definition # type: TaskDefinition # keep copy of user inputs self.task_kwargs__ctor = task_kwargs.copy() self.task_args__ctor = list(task_args) self.parent_task = try_get_current_task() self.task_family = self.task_definition.task_family self.task_name = self.task_family self.multi_sec_conf = MultiSectionConfig(config, []) self._task_params = self.task_definition.task_params.copy() self.ctor_kwargs = {} self._exc_desc = self.task_family self.task_errors = []
def create_dbnd_task(config, new_task_factory, task_cls, task_args, task_kwargs): # type:(DbndConfig, Any, Type[_BaseTask], Any, Any, bool)->None tracking_mode = task_cls.is_tracking_mode task_meta_factory = TrackedTaskMetaFactory if tracking_mode else TaskMetaFactory factory = task_meta_factory( config=config, task_cls=task_cls, task_args=task_args, task_kwargs=task_kwargs, ) task_meta = factory.create_dbnd_task_meta() # If a Task has already been instantiated with the same parameters, # the previous instance is returned to reduce number of object instances. tic = get_databand_context().task_instance_cache task = tic.get_task_obj_by_id(task_meta.obj_key.id) if not task or tracking_mode or hasattr(task, "_dbnd_no_cache"): task = new_task_factory(task_meta) tic.register_task_obj_instance(task) # now the task is created - all nested constructors will see it as parent with task_context(task, TaskContextPhase.BUILD): task._initialize() task._validate() task.task_meta.config_layer = config.config_layer tic.register_task_instance(task) parent_task = try_get_current_task() if ( parent_task and hasattr(task, "task_id") and isinstance(task, _TaskParamContainer) ): parent_task.task_meta.add_child(task.task_id) return task
def _call_handler(cls, call_user_code, call_args, call_kwargs): """ -= Use "Step into My Code"" to get back from Databand code! =- decorated object call/creation ( my_func(), MyDecoratedTask() """ force_invoke = call_kwargs.pop("__force_invoke", False) if force_invoke or not is_databand_enabled(): # 1. Databand is not enabled # 2. we have this call coming from Task.run / Task.band direct invocation return call_user_code(*call_args, **call_kwargs) func_call = FuncCall( task_cls=cls, call_args=call_args, call_kwargs=call_kwargs, call_user_code=call_user_code, ) if is_in_airflow_dag_build_context( ): # we are in Airflow DAG building mode return build_task_at_airflow_dag_context(task_cls=cls, call_args=call_args, call_kwargs=call_kwargs) airflow_task_context = try_get_airflow_context() if airflow_task_context: return track_airflow_dag_run_operator_run( func_call=func_call, airflow_task_context=airflow_task_context) current = try_get_current_task() if not current and is_inplace_run(): from dbnd._core.inplace_run.inplace_run_manager import dbnd_run_start task_run = dbnd_run_start() if task_run: current = task_run.task if not current: # direct call to the function return func_call.invoke() ###### # DBND HANDLING OF CALL # now we can make some decisions what we do with the call # it's not coming from _invoke_func # but from user code ... some_func() or SomeTask() phase = current_phase() if phase is TaskContextPhase.BUILD: # we are in the @pipeline context, we are building execution plan t = cls(*call_args, **call_kwargs) # we are in inline debug mode -> we are going to execute the task # we are in the band # and want to return result of the object if t.task_definition.single_result_output: return t.result # we have multiple outputs ( result, another output.. ) # -> just return task object return t if phase is TaskContextPhase.RUN: # we are in the run function! if (current.settings.dynamic_task.enabled and current.task_supports_dynamic_tasks): # isinstance() check required to prevent infinite recursion when @task is on # class and not on func (example: see test_task_decorated_class.py) # and the current task supports inline calls # that's extra mechanism in addition to __force_invoke # on pickle/unpickle isinstance fails to run. return create_and_run_dynamic_task_safe(func_call=func_call) # we can not call it in"databand" way, fallback to normal execution return func_call.invoke()
def __init__(self, dbnd_context, config, new_task_factory, task_cls, task_args, task_kwargs): # type:(DatabandContext, DbndConfig, Any, Type[_BaseTask], Any, Any)->None self.task_cls = task_cls self.task_definition = task_cls.task_definition # type: TaskDefinition self.new_task_factory = new_task_factory # keep copy of user inputs self.task_kwargs__ctor = task_kwargs.copy() self.task_args__ctor = list(task_args) self.parent_task = try_get_current_task() self.config = config self.task_factory_config = TaskFactoryConfig.from_dbnd_config(config) self.verbose_build = self.task_factory_config.verbose # let find if we are running this constructor withing another Databand Task self.dbnd_context = dbnd_context self.task_call_source = [ self.dbnd_context.user_code_detector.find_user_side_frame(2) ] if self.task_call_source and self.parent_task: self.task_call_source.extend( self.parent_task.task_meta.task_call_source) self.task_family = task_kwargs.pop("task_family", None) # extra params from constructor self.task_name = task_kwargs.pop("task_name", None) kwargs_task_config_sections = task_kwargs.pop("task_config_sections", None) self.task_config_override = task_kwargs.pop("override", None) or {} self.task_kwargs = task_kwargs if not self.task_family: self.task_family = self.task_definition.task_family if self.task_name: self.task_name = TASK_ID_INVALID_CHAR_REGEX.sub( "_", self.task_name) # user gives explicit name, or it full_task_family self.task_main_config_section = ( self.task_name or self.task_definition.task_config_section) if self.task_name is None: self.task_name = self.task_family # there is priority of task name over task family, as name is more specific sections = [self.task_name] # _from at config files sections.extend(self._get_task_from_sections(config, self.task_name)) sections.extend( [self.task_family, self.task_definition.full_task_family]) if kwargs_task_config_sections: sections.extend(kwargs_task_config_sections) # adding "default sections" - LOWEST PRIORITY if issubclass(self.task_definition.task_class, _TaskParamContainer): sections += [CONF_TASK_SECTION] from dbnd._core.task.config import Config if issubclass(self.task_definition.task_class, Config): sections += [CONF_CONFIG_SECTION] sections = list(unique_everseen(filter(None, sections))) self.task_config_sections = sections self.task_params = list(self.task_definition.task_params.values() ) # type: List[ParameterDefinition] self.ctor_kwargs = None # utilities section self.build_warnings = [] self._exc_desc = "%s(%s)" % ( self.task_family, ", ".join(("%s=%s" % (p, safe_string(repr(k), 300)) for p, k in iteritems(self.task_kwargs__ctor))), ) self.task_errors = []