def __init__(self, databand_context): super(DatabandSettings, self).__init__() self.databand_context = databand_context # type: DatabandContext self.core = CoreConfig() self.features = FeaturesConfig() # type: FeaturesConfig self.tracking = TrackingConfig() # type: TrackingConfig self.dynamic_task = DynamicTaskConfig() self.run = RunConfig() self.git = GitConfig() self.describe = DescribeConfig() self.log = LoggingConfig() self.output = OutputConfig() self.scheduler = SchedulerConfig() self.singleton_configs = {} self.user_configs = {} for user_config in self.core.user_configs: self.user_configs[user_config] = build_task_from_config( user_config) self._web = None
def dummy_nested_config_task(expected, config_name): # type: ( object, str)-> object # explicitly build config for k8s actual = build_task_from_config(task_name=config_name) return (actual.limits, actual.cluster_context, actual.container_tag)
def parse_from_str(self, input): """ Parse a task_famly using the :class:`~dbnd._core.register.Register` """ from dbnd._core.settings.env import EnvConfig if isinstance(self.config_cls, EnvConfig): return get_settings().get_env_config(input) return build_task_from_config(input)
def get_env_config(self, name_or_env): # type: ( Union[str, EnvConfig]) -> EnvConfig if isinstance(name_or_env, EnvConfig): return name_or_env if name_or_env not in self.core.environments: raise DatabandConfigError( "Unknown env name '%s', available environments are %s, please enable it at '[core]environments' " % (name_or_env, self.core.environments)) return build_task_from_config(name_or_env, EnvConfig)
def test_task_runner_context(self): actual = dummy_nested_config_task.dbnd_run( "test_limits", config_name="gcp_k8s_engine") with DatabandRun.context(actual): task_run = actual.task.current_task_run # type: TaskRun with task_run.task.ctrl.task_context(phase=TaskContextPhase.BUILD): actual = build_task_from_config(task_name="gcp_k8s_engine") assert actual.cluster_context == "test" assert actual.container_tag == "test_f_value"
def test_task_runner_context(self): # same as test_task_sub_config_override # we check that task_run_context "put" us in the right config layer actual = dummy_nested_config_task.dbnd_run(config_name="sub_tconfig") with DatabandRun.context(actual): task_run = actual.task.current_task_run # type: TaskRun with task_run.task.ctrl.task_context(phase=TaskContextPhase.BUILD): actual = build_task_from_config(task_name="sub_tconfig") assert actual.config_value_s1 == "override_config_s1" assert actual.config_value_s2 == "value_sub_from_databand_test_cfg_s2"
def __init__(self, databand_context): super(DatabandSettings, self).__init__() self.databand_context = databand_context # type: DatabandContext self.singleton_configs = {} self.user_configs = {} for user_config in self.core.user_configs: self.user_configs[user_config] = build_task_from_config( user_config) self._web = None
def test_task_runner_context(self): # same as test_task_sub_config_override # we check that task_run_context "put" us in the right config layer actual = dummy_nested_config_task.dbnd_run(config_name="sub_tconfig") with DatabandRun.context(actual): task_run = actual.task.current_task_run # type: TaskRun with task_run.task.ctrl.task_context(phase=TaskContextPhase.BUILD): actual = build_task_from_config(task_name="sub_tconfig") assert actual.config_value_s1 == "override_config_s1" # because we have task_config in dummy_nested_config_task that overrides config # tconfig is higher than value for [ sub_tconfig] at config file # config layer is down.. assert actual.config_value_s2 == "task_config_regular_s2"
def task_with_extend(name, expected_labels): labels = build_task_from_config(task_name=name).labels assert all(label_name in labels and labels[label_name] == label_value for label_name, label_value in expected_labels.items()) assert "task_name" in labels and labels["task_name"] == "the one and only"
def dummy_nested_config_task(config_name): # type: ( str)-> object actual = build_task_from_config(task_name=config_name) return (actual.config_value_s1, actual.config_value_s2)
def build_engine_config(name): # type: ( Union[str, EngineConfig]) -> EngineConfig """ Builds EngineConfig object for `name` """ return build_task_from_config(name, EngineConfig)
def test_config_with_double_from(self): actual = build_task_from_config("my_ttt_from_tt") assert actual.get_task_family() == "MyTaskConfig" assert actual.task_name == "my_ttt_from_tt" assert actual.p_str == "my_ttt_from_tt_sql" assert actual.p_basic == "basic_my_t"
def test_simple_config(self): actual = build_task_from_config("my_t") assert actual.get_task_family() == "MyTaskConfig" assert actual.task_name == "my_t" assert actual.p_str == "my_t_sql" assert actual.p_basic == "basic_my_t"
def create_hdfs_client(): # type ()-> FileSystem return build_task_from_config(dbnd_context().env.hdfs)
def request_builder(config_name): k8s_config = build_task_from_config(task_name=config_name) pod = k8s_config.build_pod(task_run=try_get_current_task_run(), cmds=["dummy"]) return k8s_config.build_kube_pod_req(pod)
def pod_builder(config_name): # explicitly build config for k8s k8s_config = build_task_from_config(task_name=config_name) pod = k8s_config.build_pod(task_run=try_get_current_task_run(), cmds=["dummy"]) return pod
def _get_engine_config(self, name): # type: ( Union[str, EngineConfig]) -> EngineConfig return build_task_from_config(name, EngineConfig)
def build_task_runs(self, run, root_task, remote_engine, root_task_run_uid=None): # type: (DatabandRun, Task, EngineConfig, UUID) -> List[TaskRun] run_config = run.context.settings.run # type: RunConfig # first, let remove all tasks explicitly marked as disabled by user tasks_to_run, tasks_disabled = self.get_tasks_without_disabled( root_task) if tasks_disabled: logger.info( "Tasks were removed from the task graph as they are marked as not to run: %s", tasks_summary(tasks_disabled), ) roots = [root_task] tasks_skipped = set() # in case we need to run only part of the graph we mark all other tasks as skipped if run_config.task or run_config.id: task_dag = root_task.ctrl.task_dag # type: _TaskDagNode if run_config.task: roots = task_dag.select_by_task_names(run_config.task, tasks=tasks_to_run) elif run_config.id: roots = task_dag.select_by_task_ids(run_config.id, tasks=tasks_to_run) tasks_skipped = tasks_to_run.difference(all_subdags(roots)) enabled_tasks = tasks_to_run.difference(tasks_skipped) tasks_completed = set() task_skipped_as_not_required = set() if run_config.skip_completed: tasks_completed, task_skipped_as_not_required = find_tasks_to_skip_complete( roots, enabled_tasks) # # if any of the tasks is spark add policy # from dbnd._core.task.spark import _BaseSparkTask # for t in tasks_to_run: # if isinstance(t, _BaseSparkTask): # t.spark.apply_spark_cluster_policy(t) # bash_op = BashOperator(task_id="echo", bash_command="echo hi") # self.root_task.set_upstream(bash_op.task) friendly_ids = calculate_friendly_task_ids(tasks_to_run) completed_ids = tasks_to_ids_set(tasks_completed) task_skipped_as_not_required_ids = tasks_to_ids_set( task_skipped_as_not_required) skipped_ids = tasks_to_ids_set(tasks_skipped) task_runs = [] for task in tasks_to_run: with task.ctrl.task_context(phase=TaskContextPhase.BUILD): # we want to have configuration with task overrides task_engine = build_task_from_config( task_name=remote_engine.task_name) task_engine.require_submit = remote_engine.require_submit task_run = TaskRun( run=run, task=task, task_af_id=friendly_ids[task.task_id], task_engine=task_engine, _uuid=root_task_run_uid if task.task_id == root_task.task_id else None, ) if task.task_id in completed_ids: task_run.is_reused = True if task.task_id in task_skipped_as_not_required_ids: task_run.is_reused = True task_run.is_skipped_as_not_required = True if task.task_id in skipped_ids: task_run.is_skipped = True if task.task_id == root_task.task_id: task_run.is_root = True task_runs.append(task_run) return task_runs