def test_set(self): a = build_signature("t", [("a", { 1: {target("a"), target("b")} })]).signature assert (a == build_signature("t", [("a", { 1: {target("b"), target("a")} })]).signature)
def _calculate_task_meta_key(self): params = [(p_value.name, p_value.parameter.signature(p_value.value)) for p_value in self.task_params if not p_value.parameter.is_output() and p_value.parameter.significant] override_signature = {} for p_obj, p_val in six.iteritems(self.task_config_override): if isinstance(p_obj, ParameterDefinition): override_key = "%s.%s" % (p_obj.task_cls.get_task_family(), p_obj.name) override_value = (p_val if isinstance(p_val, six.string_types) else p_obj.signature(p_val)) else: # very problematic approach till we fix the override structure override_key = str(p_obj) override_value = str(p_val) override_signature[override_key] = override_value params.append(("task_override", override_signature)) # task schema id is unique per Class definition. # so if we have new implementation - we will not a problem with rerunning it full_task_name = "%s@%s(object=%s)" % ( self.task_name, self.task_definition.full_task_family, str(id(self.task_definition)), ) return build_signature(name=full_task_name, params=params)
def initialize_task_id(self, params=None): name = self.task_name extra = {} if config.getboolean("task_build", "sign_with_full_qualified_name"): extra["full_task_family"] = self.task_definition.full_task_family if config.getboolean("task_build", "sign_with_task_code"): extra["task_code_hash"] = user_friendly_signature( self.task_definition.task_source_code) signature = build_signature(name=name, params=params, extra=extra) self.task_id, self.task_signature = (signature.id, signature.signature) self.task_signature_source = signature.signature_source
def initialize_relations(self): # STEP 0 - run band function self.initialize_band() # STEP 1 - calculate all inputs and _required try: self.task_inputs = self.initialize_required() except Exception: logger.warning("Failed to calculate relationships for %s" % self.task_id, exc_info=True) self.task_inputs = {} if not self.task.task_is_dynamic: raise # STEP 2 ( now we have all inputs, we can calculate real signature) # support for two phase build # will be called from MetaClass params = self.params.get_params_serialized( ParameterFilters.SIGNIFICANT_INPUTS) if "user" in self.task_inputs: # TODO : why do we need to convert all "user side" inputs? # what if the input is insignificant? system_input = self.task_inputs.get("system") if system_input and "band" in system_input: band_input = system_input["band"] task_inputs_user_only = { "user": self.task_inputs.get("user"), "system": { "band": band_input }, } else: task_inputs_user_only = {"user": self.task_inputs.get("user")} task_inputs_as_str = traverse( task_inputs_user_only, convert_f=str, filter_none=True, filter_empty=True, ) if task_inputs_as_str is None: task_inputs_as_str = "" params.append(("_task_inputs", task_inputs_as_str)) # IMPORTANT PART: we initialize task_id here again # after all values are calculated (all task_inputs are assigned) # we do it again, now we have all inputs calculated task = self.task task.task_signature_obj = build_signature( name=task.task_name, params=params, extra=task.task_definition.task_signature_extra, ) task.task_id = "{}__{}".format(task.task_name, task.task_signature_obj.signature) # for airflow operator task handling: airflow_task_id_p = self.params.get_param("airflow_task_id") if airflow_task_id_p: self.task.task_id = self.task.airflow_task_id # STEP 3 - now let update outputs self.initialize_outputs() outputs_sig = self._get_outputs_to_sign() if outputs_sig: sig = build_signature_from_values("task_outputs", outputs_sig) task.task_outputs_signature_obj = sig else: task.task_outputs_signature_obj = task.task_signature_obj
def build_task_object(self, task_metaclass): databand_context = get_databand_context() # convert args to kwargs, validate values self.task_kwargs = self._build_and_validate_task_ctor_kwargs( self.task_args__ctor, self.task_kwargs) self._log_build_step("Resolving task params with %s" % self.config_sections) try: task_param_values = self._build_task_param_values() task_params = Parameters(source=self._ctor_as_str, param_values=task_param_values) except Exception: self._log_config(force_log=True) raise task_enabled = True if self.parent_task and not self.parent_task.ctrl.should_run(): task_enabled = False # load from task_band if exists task_band_param = task_params.get_param_value(TASK_BAND_PARAMETER_NAME) if task_band_param and task_band_param.value: task_band = task_band_param.value # we are going to load all task parameters from task_band task_params = self.load_task_params_from_task_band( task_band, task_params) params = task_params.get_params_signatures( ParameterFilters.SIGNIFICANT_INPUTS) # we add override to Object Cache signature override_signature = self._get_override_params_signature() # task schema id is unique per Class definition. # so if we have new implementation - we will not a problem with rerunning it full_task_name = "%s@%s(object=%s)" % ( self.task_name, self.task_definition.full_task_family, str(id(self.task_definition)), ) # now we don't know the real signature - so we calculate signature based on all known params cache_object_signature = build_signature( name=full_task_name, params=params, extra={"task_override": override_signature}, ) self._log_build_step("Task task_signature %s" % str(cache_object_signature.signature)) # If a Task has already been instantiated with the same parameters, # the previous instance is returned to reduce number of object instances. tic = databand_context.task_instance_cache cached_task_object = tic.get_cached_task_obj(cache_object_signature) if cached_task_object and not hasattr(cached_task_object, "_dbnd_no_cache"): return cached_task_object # we want to have task id immediately, so we can initialize outputs/use by user # we should switch to SIGNIFICANT_INPUT here task_signature_obj = build_signature( name=self.task_name, params=params, extra=self.task_definition.task_signature_extra, ) task_children_scope_params = self._calculate_task_children_scope_params( task_params=task_params) task = task_metaclass._build_task_obj( task_definition=self.task_definition, task_name=self.task_name, task_params=task_params, task_signature_obj=task_signature_obj, task_config_override=self.task_config_override, task_config_layer=self.config.config_layer, task_enabled=task_enabled, task_sections=self.config_sections, task_children_scope_params=task_children_scope_params, ) tic.register_task_obj_cache_instance( task, task_obj_cache_signature=cache_object_signature) task.task_call_source = [ databand_context.user_code_detector.find_user_side_frame(2) ] if task.task_call_source and self.parent_task: task.task_call_source.extend(self.parent_task.task_call_source) # now the task is created - all nested constructors will see it as parent with task_context(task, TaskContextPhase.BUILD): task._initialize() task._validate() # it might be that config has been changed even more task.task_config_layer = self.config.config_layer # only now we know "task_id" so we can register in "publicaly facing cache tic.register_task_instance(task) return task
def test_dict(self): a = build_signature("t", [("a", {1: 2, 2: 3})]).signature assert a == build_signature("t", [("a", {2: 3, 1: 2})]).signature
def test_simple(self): assert build_signature("t", [("a", "b")])