def test_tracking_after_flush(self, fake_api_request): ctx = get_databand_context() async_store = TrackingStoreThroughChannel.build_with_async_web_channel( ctx) async_store.heartbeat(get_uuid()) async_store.flush() async_store.heartbeat(get_uuid()) async_store.flush()
def _handle(self, name, data): schema = self.get_schema_by_handler_name(name) labels = { "sender": "TrackingProtoWebChannel", "source_version": self.source_version, # TODO: add env details } event = Event(uuid=str(get_uuid()), schema=str_type(schema), labels=labels) event.data.update(data) post_event_request = PostEventsRequest() post_event_request.events.append(event) post_event_request.timestamp.GetCurrentTime() data = post_event_request.SerializeToString() encoded_response_str = self.client.api_request("tracking/proto", data) b64encoded_response_bytes = encoded_response_str.encode("utf-8") raw_bytes = base64.b64decode(b64encoded_response_bytes) post_event_response = PostEventsResponse() post_event_response.ParseFromString(raw_bytes) if post_event_response.exception: raise Exception("Response error: %s" % post_event_response.exception) return post_event_response.responses[event.uuid]
def init_attempt(self): self.task_run_attempt_uid = get_uuid() self.attempt_folder = self.task._meta_output.folder( "attempt_%s_%s" % (self.attempt_number, self.task_run_attempt_uid), extension=None, ) self.meta_files = TaskRunMetaFiles(self.attempt_folder) self.log = TaskRunLogManager(task_run=self)
def test_thread_not_started_immideately(self, fake_api_request): ctx = get_databand_context() async_store = TrackingStoreThroughChannel.build_with_async_web_channel( ctx) assert async_store.is_ready() assert not async_store.channel._background_worker.is_alive async_store.heartbeat(get_uuid()) assert async_store.channel._background_worker.is_alive
def __init__(self, task_class, classdict): super(TaskDefinition, self).__init__() self.task_definition_uid = get_uuid() self.hidden = False self.task_class = task_class # type: Type[Task] self.task_passport = TaskPassport.from_task_cls(task_class) # TODO: maybe use properties or other way to delegate those... self.full_task_family = self.task_passport.full_task_family self.full_task_family_short = self.task_passport.full_task_family_short self.task_family = self.task_passport.task_family self.task_config_section = self.task_passport.task_config_section # all the attributes that points to_Parameter self.task_params = dict() # type: Dict[str, ParameterDefinition] # the defaults attribute self.defaults = dict() # type: Dict[ParameterDefinition, Any] self.task_params, self.defaults = self._calculate_task_class_values(classdict) # if we have output params in function arguments, like f(some_p=parameter.output) # the new function can not return the result of return self.single_result_output = self._is_result_single_output(self.task_params) defaults = { p.name: p.default for p in self.task_params.values() if is_defined(p.default) } self.task_defaults_config_store = parse_and_build_config_store( source=self.task_passport.format_source_name("defaults"), config_values={self.task_config_section: defaults}, set_if_not_exists_only=True, ) self.task_defaults_config_store.update( parse_and_build_config_store( source=self.task_passport.format_source_name("defaults_section"), config_values=self.defaults, ) ) # now, if we have overloads in code ( calculated in task_definition): # class T(BaseT): # some_base_t_property = new_value if self.task_class._conf__track_source_code: self.task_source_code = _get_task_source_code(self.task_class) self.task_module_code = _get_task_module_source_code(self.task_class) self.task_source_file = _get_source_file(self.task_class) else: self.task_source_code = None self.task_module_code = "" self.task_source_file = None
def test_no_skip_after_failure(self, fake_api_request): with new_dbnd_context( conf={ "core": { "tracker_raise_on_error": False }, "databand": { "verbose": True }, }) as ctx: with patch.object(TrackingAsyncWebChannel, "_background_worker_skip_processing_callback" ) as fake_skip: async_store = TrackingStoreThroughChannel.build_with_async_web_channel( ctx) fake_api_request.side_effect = DatabandWebserverNotReachableError( "fake_message") async_store.heartbeat(get_uuid()) # fail here async_store.heartbeat(get_uuid()) # no skip here async_store.flush() fake_skip.assert_not_called()
def set_context(self, ti): """ Airflow's log handler use this method to setup the context when running a TaskInstance(=ti). We use this method to setup the dbnd context and communicate information to the `<airflow_operator>_execute` task, that we create in `execute_tracking.py`. """ # we setting up only when we are not in our own orchestration dag if ti.dag_id.startswith(AD_HOC_DAG_PREFIX): return if config.getboolean("mlflow_tracking", "databand_tracking"): self.airflow_logger.warning( "dbnd can't track mlflow and airflow together please disable dbnd config " "`databand_tracking` in section `mlflow_tracking`") return # we are not tracking SubDagOperator if ti.operator == SubDagOperator.__name__: return task_key = calc_task_run_attempt_key_from_af_ti(ti) env_attempt_uid = os.environ.get(task_key) # This key is already set which means we are in --raw run if env_attempt_uid: # no need for further actions inside --raw run return # communicate the task_run_attempt_uid to inner processes # will be used for the task_run of `<airflow_operator>_execute` task self.task_run_attempt_uid = get_uuid() self.task_env_key = task_key os.environ[self.task_env_key] = str(self.task_run_attempt_uid) # airflow calculation for the relevant log_file log_relative_path = self.log_file_name_factory(ti, ti.try_number) self.log_file = os.path.join(self.airflow_base_log_dir, log_relative_path) # make sure we are not polluting the airflow logs get_dbnd_project_config().quiet_mode = True # tracking msg self.airflow_logger.info( "Tracked by Databand {version}".format(version=dbnd.__version__)) # context with disabled logs self.dbnd_context_manage = new_dbnd_context( conf={"log": { "disabled": True }}) self.dbnd_context = self.dbnd_context_manage.__enter__()
def init_new_task_run_attempt(self): # trying to find if we should use attempt_uid that been set from external process. # if so - the attempt_uid is uniquely for this task_run_attempt, and that why we pop. attempt_id = try_pop_attempt_id_from_env(self.task) if attempt_id: self.task_run_attempt_uid = UUID(attempt_id) else: self.task_run_attempt_uid = get_uuid() self.attempt_folder = self.task._meta_output.folder( "attempt_%s_%s" % (self.attempt_number, self.task_run_attempt_uid), extension=None, ) self.attempt_folder_local = self.local_task_run_root.folder( "attempt_%s_%s" % (self.attempt_number, self.task_run_attempt_uid), extension=None, ) self.attemp_folder_local_cache = self.attempt_folder_local.folder( "cache") self.meta_files = TaskRunMetaFiles(self.attempt_folder) self.log = TaskRunLogManager(task_run=self)
def build_task_run_info(self): task_run_env_uid = get_uuid() import dbnd logging.debug("Created new task run env with uid '%s'", task_run_env_uid) machine = environ.get(ENV_DBND__ENV_MACHINE, "") if environ.get(ENV_DBND__ENV_IMAGE, None): machine += " image=%s" % environ.get(ENV_DBND__ENV_IMAGE) return TaskRunEnvInfo( uid=task_run_env_uid, databand_version=dbnd.__version__, user_code_version=self.source_version, user_code_committed=True, cmd_line=subprocess.list2cmdline(sys.argv), user=self.user or dbnd_getuser(), machine=machine, project_root=project_path(), user_data=safe_string(self.user_data, max_value_len=500), heartbeat=utcnow(), )
def __init__(self, task_class, classdict, namespace_at_class_time): super(TaskDefinition, self).__init__() self.task_definition_uid = get_uuid() self.hidden = False self.task_class = task_class # type: Type[Task] self.namespace_at_class_time = namespace_at_class_time if self.task_class._conf__decorator_spec: cls_name = self.task_class._conf__decorator_spec.name else: cls_name = self.task_class.__name__ self.full_task_family = "%s.%s" % (task_class.__module__, cls_name) self.full_task_family_short = "%s.%s" % ( _short_name(task_class.__module__), cls_name, ) self.task_family = self._build_user_task_family() if not self.task_family: self.task_family = cls_name self.task_config_section = self.full_task_family else: self.task_config_section = self.task_family # all the attributes that points to_Parameter self.task_params = dict() # type: Dict[str, ParameterDefinition] # the defaults attribute self.defaults = dict() # type: Dict[ParameterDefinition, Any] self.task_params, self.defaults = self._calculate_task_class_values( classdict) # if we have output params in function arguments, like f(some_p=parameter.output) # the new function can not return the result of return self.single_result_output = self._is_result_single_output( self.task_params) defaults = { p.name: p.default for p in self.task_params.values() if is_defined(p.default) } self.task_defaults_config_store = parse_and_build_config_store( source="%s[defaults]" % self.full_task_family_short, config_values={self.task_config_section: defaults}, set_if_not_exists_only=True, ) self.task_defaults_config_store.update( parse_and_build_config_store( source="%s[defaults_section]" % self.full_task_family_short, config_values=self.defaults, )) # now, if we have overloads in code ( calculated in task_definition): # class T(BaseT): # some_base_t_property = new_value if self.task_class._conf__track_source_code: self.task_source_code = _get_task_source_code(self.task_class) self.task_module_code = _get_task_module_source_code( self.task_class) self.task_source_file = _get_source_file(self.task_class) else: self.task_source_code = None self.task_module_code = "" self.task_source_file = None
def __init__( self, context, task_or_task_name, run_uid=None, scheduled_run_info=None, send_heartbeat=True, existing_run=None, job_name=None, source=UpdateSource.dbnd, af_context=None, ): # type:(DatabandContext, Union[Task, str] , Optional[UUID], Optional[ScheduledRunInfo], Optional[bool], Optional[UpdateSource]) -> None self.context = context s = self.context.settings # type: DatabandSettings if isinstance(task_or_task_name, six.string_types): self.root_task_name = task_or_task_name self.root_task = None elif isinstance(task_or_task_name, Task): self.root_task_name = task_or_task_name.task_name self.root_task = task_or_task_name else: raise self.job_name = job_name or self.root_task_name self.description = s.run.description self.is_archived = s.run.is_archived self.source = source # this was added to allow the scheduler to create the run which will be continued by the actually run command instead of having 2 separate runs if not run_uid and DBND_RUN_UID in os.environ: # we pop so if this run spawnes subprocesses with their own runs they will be associated using the sub-runs mechanism instead # of being fused into this run directly run_uid = os.environ.pop(DBND_RUN_UID) if run_uid: self.run_uid = run_uid self.existing_run = True else: self.run_uid = get_uuid() self.existing_run = False if existing_run is not None: self.existing_run = existing_run self.name = s.run.name or get_name_for_uid(self.run_uid) # this is so the scheduler can create a run with partial information and then have the subprocess running the actual cmd fill in the details self.resubmit_run = (DBND_RESUBMIT_RUN in os.environ and os.environ.pop(DBND_RESUBMIT_RUN) == "true") # AIRFLOW, move into executor # dag_id , execution_date and run_id is used by airflow self.dag_id = AD_HOC_DAG_PREFIX + self.root_task_name self.execution_date = unique_execution_date() run_id = s.run.id if not run_id: # we need this name, otherwise Airflow will try to manage our local jobs at scheduler # ..zombies cleanup and so on run_id = "backfill_{0}_{1}".format(self.name, self.execution_date.isoformat()) self.run_id = run_id self._template_vars = self._build_template_vars() self.is_tracked = True self.runtime_errors = [] self._run_state = None self.task_runs = [] # type: List[TaskRun] self.task_runs_by_id = {} self.task_runs_by_af_id = {} self.target_origin = TargetIdentitySourceMap() self.describe = DescribeRun(self) self.tracker = RunTracker(self, tracking_store=self.context.tracking_store) # ALL RUN CONTEXT SPECIFIC thing self.root_run_info = RootRunInfo.from_env(current_run=self) self.scheduled_run_info = scheduled_run_info or ScheduledRunInfo.from_env( self.run_uid) # now we can add driver task self.driver_task_run = None # type: Optional[TaskRun] self.root_task_run = None # type: Optional[TaskRun] self.run_folder_prefix = os.path.join( "log", self.execution_date.strftime("%Y-%m-%d"), "%s_%s_%s" % ( self.execution_date.strftime("%Y-%m-%dT%H%M%S.%f"), self.root_task_name, self.name, ), ) self.run_config = self.context.settings.run # type: RunConfig self.env = env = self.context.env self.local_engine = self._get_engine_config(env.local_engine) self.remote_engine = self._get_engine_config(env.remote_engine or env.local_engine) self.submit_driver = (self.run_config.submit_driver if self.run_config.submit_driver is not None else env.submit_driver) self.submit_tasks = (self.run_config.submit_tasks if self.run_config.submit_tasks is not None else env.submit_tasks) self.task_executor_type, self.parallel = calculate_task_executor_type( self.submit_tasks, self.remote_engine, self.context.settings) self.sends_heartbeat = send_heartbeat self.dynamic_af_tasks_count = dict() self.af_context = af_context self.start_time = None self.finished_time = None
def run_submitter(self): """ This is the task that represents "submission" it can just one task, or.. more tasks, as we can have "docker builds" or other preparations this is why we will not run it directly, but do a "full run" with executor """ run = self.run # we are running submitter, that will send driver to remote remote_engine = self.remote_engine settings = run.context.settings settings.git.validate_git_policy() # let prepare for remote execution remote_engine.prepare_for_run(run) result_map_target = run.run_root.file("{}.json".format(get_uuid())) cmd_line_args = (["run"] + _get_dbnd_run_relative_cmd() + [ "--run-driver", str(run.run_uid), "--set", "run.run_result_json_path={}".format(result_map_target.path), "--set", "run.execution_date={}".format( run.execution_date.strftime("%Y-%m-%dT%H%M%S.%f")), ]) args = remote_engine.dbnd_executable + cmd_line_args submit_to_engine_task = remote_engine.submit_to_engine_task( env=run.env, args=args, task_name="dbnd_driver_run", interactive=settings.run.interactive, ) submit_to_engine_task._conf_confirm_on_kill_msg = ( "Ctrl-C Do you want to kill your submitted pipeline?" "If selection is 'no', this process will detach from the run.") run.root_task = submit_to_engine_task # we run all tasks on local engine task_runs = self._init_task_runs_for_execution( task_engine=self.host_engine) # create executor without driver task! # We use local executor to run all tasks (submit_to_engine and required by it tasks) # In most cases it will run only submit_to_engine task, # But there are scenarios when submit_to_engine task asks for docker builds # so we execute the whole pipeline. task_executor = LocalTaskExecutor( run, task_executor_type=TaskExecutorType.local, host_engine=self.host_engine, target_engine=self.host_engine, task_runs=task_runs, ) task_executor.do_run() self.result_location = result_map_target logger.info(run.describe.run_banner_for_submitted())
def __init__( self, task, run, task_af_id=None, try_number=1, is_dynamic=None, task_engine=None, ): # type: (Task, DatabandRun, str, int, bool, EngineConfig)-> None # actually this is used as Task uid self.task = task # type: Task self.run = run # type: DatabandRun self.task_engine = task_engine self.try_number = try_number self.is_dynamic = is_dynamic if is_dynamic is not None else task.task_is_dynamic self.is_system = task.task_is_system self.task_af_id = task_af_id or self.task.task_id if task.ctrl.force_task_run_uid: self.task_run_uid = tr_uid = task.ctrl.force_task_run_uid if isinstance(tr_uid, TaskRunUidGen): self.task_run_uid = tr_uid.generate_task_run_uid( run=run, task=task, task_af_id=self.task_af_id ) else: self.task_run_uid = get_uuid() # used by all kind of submission controllers self.job_name = clean_job_name(self.task_af_id).lower() self.job_id = self.job_name + "_" + str(self.task_run_uid)[:8] # DNS-1123 subdomain name (k8s) self.job_id__dns1123 = clean_job_name_dns1123( "dbnd.{task_family}.{task_name}".format( task_family=self.task.task_meta.task_family, task_name=self.task.task_meta.task_name, ), postfix=".%s" % str(self.task_run_uid)[:8], ) # custom per task engine , or just use one from global env dbnd_local_root = ( self.task_engine.dbnd_local_root or self.run.env.dbnd_local_root ) self.local_task_run_root = ( dbnd_local_root.folder(run.run_folder_prefix) .folder("tasks") .folder(self.task.task_id) ) self._attempt_number = 1 self.task_run_attempt_uid = get_uuid() self.attempt_folder = None self.meta_files = None self.log = None self.init_attempt() # TODO: inherit from parent task if disabled self.is_tracked = task._conf__tracked if self.is_tracked and self.run.is_tracked: tracking_store = self.run.context.tracking_store else: tracking_store = ConsoleStore() self.tracking_store = tracking_store self.tracker = TaskRunTracker(task_run=self, tracking_store=tracking_store) self.runner = TaskRunRunner(task_run=self) self.deploy = TaskSyncCtrl(task_run=self) self.task_tracker_url = self.tracker.task_run_url() self.external_resource_urls = dict() self.errors = [] self.is_root = False self.is_reused = False self.is_skipped = False # Task can be skipped as it's not required by any other task scheduled to run self.is_skipped_as_not_required = False self._airflow_context = None self._task_run_state = None self.start_time = None self.finished_time = None
def generate_task_run_uid(self, run, task, task_af_id): return get_uuid()
def __init__( self, task_passport, # type: TaskPassport classdict=None, # type: Optional[Dict[str, Any]] base_task_definitions=None, # type: Optional[List[TaskDefinition]] defaults=None, # type: Optional[Dict[ParameterDefinition, Any]] task_decorator=None, # type: Optional[TaskDecorator] source_code=None, # type: Optional[TaskSourceCode] external_parameters=None, # type: Optional[Parameters] task_definition_uid=None, # type: Optional[UUID] ): super(TaskDefinition, self).__init__() self.hidden = False self.task_passport = task_passport self.source_code = source_code self.task_decorator = task_decorator self.base_task_definitions = (base_task_definitions or []) # type: List[ TaskDefinition] # TODO: maybe use properties or other way to delegate those... self.full_task_family = self.task_passport.full_task_family self.full_task_family_short = self.task_passport.full_task_family_short self.task_family = self.task_passport.task_family self.task_config_section = self.task_passport.task_config_section # all the attributes that points to_Parameter self.task_param_defs = dict() # type: Dict[str, ParameterDefinition] # the defaults attribute self.defaults = dict() # type: Dict[ParameterDefinition, Any] self.task_param_defs = self._calculate_task_class_values( classdict, external_parameters) # if we have output params in function arguments, like f(some_p=parameter.output) # the new function can not return the result of return self.single_result_output = self._is_result_single_output( self.task_param_defs) self.param_defaults = { p.name: p.default for p in self.task_param_defs.values() if is_defined(p.default) } # TODO: consider joining with task_config # TODO: calculate defaults value as _ConfigStore and merge using standard mechanism self.defaults = self._calculate_task_defaults(defaults) self.task_defaults_config_store = parse_and_build_config_store( source=self.task_passport.format_source_name("task.defaults"), config_values=self.defaults, priority=ConfigValuePriority.FALLBACK, ) self.task_signature_extra = {} if config.getboolean("task_build", "sign_with_full_qualified_name"): self.task_signature_extra[ "full_task_family"] = self.full_task_family if config.getboolean("task_build", "sign_with_task_code"): self.task_signature_extra[ "task_code_hash"] = user_friendly_signature( self.source_code.task_source_code) if task_definition_uid: self.task_definition_uid = task_definition_uid else: self.task_definition_uid = get_uuid()
def _generate_unique_tracking_signature(): return Signature("tracking", user_friendly_signature(str(get_uuid())), "unique tracking call")
def __init__( self, context, # type: DatabandContext job_name, run_uid=None, # type: Optional[UUID] scheduled_run_info=None, # type: Optional[ScheduledRunInfo] existing_run=None, source=UpdateSource.dbnd, # type:Optional[UpdateSource] af_context=None, is_orchestration=False, ): self.context = context s = self.context.settings # type: DatabandSettings self.job_name = job_name self.description = s.run.description self.is_archived = s.run.is_archived self.source = source self.is_orchestration = is_orchestration self.existing_run = existing_run or False # this was added to allow the scheduler to create the run which will be continued by the actually run command instead of having 2 separate runs if not run_uid and DBND_RUN_UID in os.environ: # we pop so if this run spawnes subprocesses with their own runs they will be associated using the sub-runs mechanism instead # of being fused into this run directly run_uid = os.environ.pop(DBND_RUN_UID) if run_uid: self.run_uid = run_uid self.existing_run = True else: self.run_uid = get_uuid() # if user provided name - use it # otherwise - generate human friendly name for the run self.name = s.run.name or get_random_name(seed=self.run_uid) self.execution_date = unique_execution_date() self.is_tracked = True # tracking/orchestration main task self.root_task = None # type: Optional[Task] # task run that wraps execution (tracking or orchestration) self._driver_task_run = None # ORCHESTRATION: execution of the run self.run_executor = None # type: Optional[RunExecutor] # dag_id , execution_date are used by Airflow, # should be deprecated (still used by DB tracking) self.dag_id = AD_HOC_DAG_PREFIX + self.job_name # RUN STATE self._run_state = None self.task_runs = [] # type: List[TaskRun] self.task_runs_by_id = {} self.task_runs_by_af_id = {} self.target_origin = TargetIdentitySourceMap() self.describe = RunBanner(self) self.tracker = RunTracker(self, tracking_store=self.context.tracking_store) # ALL RUN CONTEXT SPECIFIC thing self.root_run_info = RootRunInfo.from_env(current_run=self) self.scheduled_run_info = scheduled_run_info or ScheduledRunInfo.from_env( self.run_uid) self.env = self.context.env self.run_folder_prefix = os.path.join( "log", self.execution_date.strftime("%Y-%m-%d"), "%s_%s_%s" % ( self.execution_date.strftime("%Y-%m-%dT%H%M%S.%f"), self.job_name, self.name, ), ) self.run_root = self.env.dbnd_root.folder(self.run_folder_prefix) self.run_local_root = self.env.dbnd_local_root.folder( self.run_folder_prefix) self.local_engine = build_engine_config( self.env.local_engine).clone(require_submit=False) self.dynamic_af_tasks_count = dict() self.af_context = af_context self.start_time = None self.finished_time = None