def test_kubernetes_optional(): """Serialisation / deserialisation continues to work without kubernetes installed""" def mock__import__(name, globals_=None, locals_=None, fromlist=(), level=0): if level == 0 and name.partition('.')[0] == 'kubernetes': raise ImportError("No module named 'kubernetes'") return importlib.__import__(name, globals=globals_, locals=locals_, fromlist=fromlist, level=level) with mock.patch('builtins.__import__', side_effect=mock__import__) as import_mock: # load module from scratch, this does not replace any already imported # airflow.serialization.serialized_objects module in sys.modules spec = importlib.util.find_spec("airflow.serialization.serialized_objects") module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) # if we got this far, the module did not try to load kubernetes, but # did it try to access airflow.kubernetes.*? imported_airflow = { c.args[0].split('.', 2)[1] for c in import_mock.call_args_list if c.args[0].startswith("airflow.") } assert "kubernetes" not in imported_airflow # pod loading is not supported when kubernetes is not available pod_override = { '__type': 'k8s.V1Pod', '__var': PodGenerator.serialize_pod(executor_config_pod), } with pytest.raises(RuntimeError): module.BaseSerialization.from_dict(pod_override) # basic serialization should succeed module.SerializedDAG.to_dict(make_simple_dag()["simple_dag"])
def _adopt_completed_pods(self, kube_client: kubernetes.client.CoreV1Api): """ Patch completed pod so that the KubernetesJobWatcher can delete it. :param kube_client: kubernetes client for speaking to kube API """ kwargs = { 'field_selector': "status.phase=Succeeded", 'label_selector': 'kubernetes_executor=True', } pod_list = kube_client.list_namespaced_pod( namespace=self.kube_config.kube_namespace, **kwargs) for pod in pod_list.items: self.log.info("Attempting to adopt pod %s", pod.metadata.name) pod.metadata.labels['airflow-worker'] = str(self.scheduler_job_id) try: kube_client.patch_namespaced_pod( name=pod.metadata.name, namespace=pod.metadata.namespace, body=PodGenerator.serialize_pod(pod), ) except ApiException as e: self.log.info("Failed to adopt pod %s. Reason: %s", pod.metadata.name, e)
def adopt_launched_task(self, kube_client: client.CoreV1Api, pod: k8s.V1Pod, pod_ids: Dict[TaskInstanceKey, k8s.V1Pod]) -> None: """ Patch existing pod so that the current KubernetesJobWatcher can monitor it via label selectors :param kube_client: kubernetes client for speaking to kube API :param pod: V1Pod spec that we will patch with new label :param pod_ids: pod_ids we expect to patch. """ self.log.info("attempting to adopt pod %s", pod.metadata.name) pod.metadata.labels[ 'airflow-worker'] = pod_generator.make_safe_label_value( str(self.scheduler_job_id)) pod_id = annotations_to_key(pod.metadata.annotations) if pod_id not in pod_ids: self.log.error( "attempting to adopt taskinstance which was not specified by database: %s", pod_id) return try: kube_client.patch_namespaced_pod( name=pod.metadata.name, namespace=pod.metadata.namespace, body=PodGenerator.serialize_pod(pod), ) pod_ids.pop(pod_id) self.running.add(pod_id) except ApiException as e: self.log.info("Failed to adopt pod %s. Reason: %s", pod.metadata.name, e)
def adopt_launched_task(self, kube_client, pod, pod_ids: dict): """ Patch existing pod so that the current KubernetesJobWatcher can monitor it via label selectors :param kube_client: kubernetes client for speaking to kube API :param pod: V1Pod spec that we will patch with new label :param pod_ids: pod_ids we expect to patch. """ self.log.info("attempting to adopt pod %s", pod.metadata.name) pod.metadata.labels['airflow-worker'] = str(self.scheduler_job_id) dag_id = pod.metadata.labels['dag_id'] task_id = pod.metadata.labels['task_id'] pod_id = create_pod_id(dag_id=dag_id, task_id=task_id) if pod_id not in pod_ids: self.log.error( "attempting to adopt task %s in dag %s which was not specified by database", task_id, dag_id, ) else: try: kube_client.patch_namespaced_pod( name=pod.metadata.name, namespace=pod.metadata.namespace, body=PodGenerator.serialize_pod(pod), ) pod_ids.pop(pod_id) except ApiException as e: self.log.info("Failed to adopt pod %s. Reason: %s", pod.metadata.name, e)
def _default(obj): """Convert dates and numpy objects in a json serializable format.""" if isinstance(obj, datetime): return obj.strftime('%Y-%m-%dT%H:%M:%SZ') elif isinstance(obj, date): return obj.strftime('%Y-%m-%d') elif isinstance( obj, ( np.int_, np.intc, np.intp, np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64, ), ): return int(obj) elif isinstance(obj, np.bool_): return bool(obj) elif isinstance( obj, (np.float_, np.float16, np.float32, np.float64, np.complex_, np.complex64, np.complex128) ): return float(obj) elif k8s is not None and isinstance(obj, (k8s.V1Pod, k8s.V1ResourceRequirements)): from airflow.kubernetes.pod_generator import PodGenerator return PodGenerator.serialize_pod(obj) raise TypeError(f"Object of type '{obj.__class__.__name__}' is not JSON serializable")
def _serialize( cls, var: Any ) -> Any: # Unfortunately there is no support for recursive types in mypy """Helper function of depth first search for serialization. The serialization protocol is: (1) keeping JSON supported types: primitives, dict, list; (2) encoding other types as ``{TYPE: 'foo', VAR: 'bar'}``, the deserialization step decode VAR according to TYPE; (3) Operator has a special field CLASS to record the original class name for displaying in UI. """ if cls._is_primitive(var): # enum.IntEnum is an int instance, it causes json dumps error so we use its value. if isinstance(var, enum.Enum): return var.value return var elif isinstance(var, dict): return cls._encode( {str(k): cls._serialize(v) for k, v in var.items()}, type_=DAT.DICT) elif isinstance(var, list): return [cls._serialize(v) for v in var] elif HAS_KUBERNETES and isinstance(var, k8s.V1Pod): json_pod = PodGenerator.serialize_pod(var) return cls._encode(json_pod, type_=DAT.POD) elif isinstance(var, DAG): return SerializedDAG.serialize_dag(var) elif isinstance(var, BaseOperator): return SerializedBaseOperator.serialize_operator(var) elif isinstance(var, cls._datetime_types): return cls._encode(var.timestamp(), type_=DAT.DATETIME) elif isinstance(var, datetime.timedelta): return cls._encode(var.total_seconds(), type_=DAT.TIMEDELTA) elif isinstance(var, Timezone): return cls._encode(encode_timezone(var), type_=DAT.TIMEZONE) elif isinstance(var, relativedelta.relativedelta): return cls._encode(encode_relativedelta(var), type_=DAT.RELATIVEDELTA) elif callable(var): return str(get_python_source(var)) elif isinstance(var, set): # FIXME: casts set to list in customized serialization in future. try: return cls._encode(sorted(cls._serialize(v) for v in var), type_=DAT.SET) except TypeError: return cls._encode([cls._serialize(v) for v in var], type_=DAT.SET) elif isinstance(var, tuple): # FIXME: casts tuple to list in customized serialization in future. return cls._encode([cls._serialize(v) for v in var], type_=DAT.TUPLE) elif isinstance(var, TaskGroup): return SerializedTaskGroup.serialize_task_group(var) else: log.debug('Cast type %s to str in serialization.', type(var)) return str(var)
def patch_already_checked(self, pod: k8s.V1Pod): """ Add an "already tried annotation to ensure we only retry once """ pod.metadata.labels["already_checked"] = "True" body = PodGenerator.serialize_pod(pod) self.client.patch_namespaced_pod(pod.metadata.name, pod.metadata.namespace, body)
def _default(obj): """Convert dates and numpy objects in a json serializable format.""" if isinstance(obj, datetime): return obj.strftime('%Y-%m-%dT%H:%M:%SZ') elif isinstance(obj, date): return obj.strftime('%Y-%m-%d') elif isinstance(obj, Decimal): _, _, exponent = obj.as_tuple() if exponent >= 0: # No digits after the decimal point. return int(obj) # Technically lossy due to floating point errors, but the best we # can do without implementing a custom encode function. return float(obj) elif np is not None and isinstance( obj, ( np.int_, np.intc, np.intp, np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64, ), ): return int(obj) elif np is not None and isinstance(obj, np.bool_): return bool(obj) elif np is not None and isinstance( obj, (np.float_, np.float16, np.float32, np.float64, np.complex_, np.complex64, np.complex128)): return float(obj) elif k8s is not None and isinstance( obj, (k8s.V1Pod, k8s.V1ResourceRequirements)): from airflow.kubernetes.pod_generator import PodGenerator return PodGenerator.serialize_pod(obj) raise TypeError( f"Object of type '{obj.__class__.__name__}' is not JSON serializable" )
"_outlets": [], "ui_color": "#f0ede4", "ui_fgcolor": "#000", "template_fields": ['bash_command', 'env'], "template_fields_renderers": {'bash_command': 'bash', 'env': 'json'}, "bash_command": "echo {{ task.task_id }}", 'label': 'bash_task', "_task_type": "BashOperator", "_task_module": "airflow.operators.bash", "pool": "default_pool", "executor_config": { '__type': 'dict', '__var': { "pod_override": { '__type': 'k8s.V1Pod', '__var': PodGenerator.serialize_pod(executor_config_pod), } }, }, }, { "task_id": "custom_task", "retries": 1, "retry_delay": 300.0, "sla": 100.0, "_downstream_task_ids": [], "_inlets": [], "_is_dummy": False, "_outlets": [], "_operator_extra_links": [{"tests.test_utils.mock_operators.CustomOpLink": {}}], "ui_color": "#fff",
def patch_already_checked(self, pod: k8s.V1Pod): """Add an "already checked" annotation to ensure we don't reattach on retries""" pod.metadata.labels[self.POD_CHECKED_KEY] = "True" body = PodGenerator.serialize_pod(pod) self.client.patch_namespaced_pod(pod.metadata.name, pod.metadata.namespace, body)
def _serialize(cls, var: Any) -> Any: # Unfortunately there is no support for recursive types in mypy """Helper function of depth first search for serialization. The serialization protocol is: (1) keeping JSON supported types: primitives, dict, list; (2) encoding other types as ``{TYPE: 'foo', VAR: 'bar'}``, the deserialization step decode VAR according to TYPE; (3) Operator has a special field CLASS to record the original class name for displaying in UI. """ try: if cls._is_primitive(var): # enum.IntEnum is an int instance, it causes json dumps error so we use its value. if isinstance(var, enum.Enum): return var.value return var elif isinstance(var, dict): return cls._encode( {str(k): cls._serialize(v) for k, v in var.items()}, type_=DAT.DICT ) elif isinstance(var, list): return [cls._serialize(v) for v in var] elif isinstance(var, k8s.V1Pod): json_pod = PodGenerator.serialize_pod(var) return cls._encode(json_pod, type_=DAT.POD) elif isinstance(var, DAG): return SerializedDAG.serialize_dag(var) elif isinstance(var, BaseOperator): return SerializedBaseOperator.serialize_operator(var) elif isinstance(var, cls._datetime_types): return cls._encode(var.timestamp(), type_=DAT.DATETIME) elif isinstance(var, datetime.timedelta): return cls._encode(var.total_seconds(), type_=DAT.TIMEDELTA) elif isinstance(var, (Timezone)): return cls._encode(str(var.name), type_=DAT.TIMEZONE) elif isinstance(var, relativedelta.relativedelta): encoded = {k: v for k, v in var.__dict__.items() if not k.startswith("_") and v} if var.weekday and var.weekday.n: # Every n'th Friday for example encoded['weekday'] = [var.weekday.weekday, var.weekday.n] elif var.weekday: encoded['weekday'] = [var.weekday.weekday] return cls._encode(encoded, type_=DAT.RELATIVEDELTA) elif callable(var): return str(get_python_source(var)) elif isinstance(var, set): # FIXME: casts set to list in customized serialization in future. return cls._encode( [cls._serialize(v) for v in var], type_=DAT.SET) elif isinstance(var, tuple): # FIXME: casts tuple to list in customized serialization in future. return cls._encode( [cls._serialize(v) for v in var], type_=DAT.TUPLE) else: log.debug('Cast type %s to str in serialization.', type(var)) return str(var) except Exception: # pylint: disable=broad-except log.error('Failed to stringify.', exc_info=True) return FAILED
def _default(obj): """Convert dates and numpy objects in a json serializable format.""" if isinstance(obj, datetime): if is_naive(obj): obj = convert_to_utc(obj) return obj.isoformat() elif isinstance(obj, date): return obj.strftime('%Y-%m-%d') elif isinstance(obj, Decimal): _, _, exponent = obj.as_tuple() if exponent >= 0: # No digits after the decimal point. return int(obj) # Technically lossy due to floating point errors, but the best we # can do without implementing a custom encode function. return float(obj) elif np is not None and isinstance( obj, ( np.int_, np.intc, np.intp, np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64, ), ): return int(obj) elif np is not None and isinstance(obj, np.bool_): return bool(obj) elif np is not None and isinstance( obj, (np.float_, np.float16, np.float32, np.float64, np.complex_, np.complex64, np.complex128)): return float(obj) elif k8s is not None and isinstance( obj, (k8s.V1Pod, k8s.V1ResourceRequirements)): from airflow.kubernetes.pod_generator import PodGenerator def safe_get_name(pod): """ We're running this in an except block, so we don't want it to fail under any circumstances, e.g. by accessing an attribute that isn't there """ try: return pod.metadata.name except Exception: return None try: return PodGenerator.serialize_pod(obj) except Exception: log.warning("JSON encoding failed for pod %s", safe_get_name(obj)) log.debug("traceback for pod JSON encode error", exc_info=True) return {} raise TypeError( f"Object of type '{obj.__class__.__name__}' is not JSON serializable" )