Exemplo n.º 1
0
    def serialize_dag(cls, dag: DAG) -> dict:
        """Serializes a DAG into a JSON object."""
        try:
            serialize_dag = cls.serialize_to_json(dag, cls._decorated_fields)

            serialize_dag["tasks"] = [
                cls._serialize(task) for _, task in dag.task_dict.items()
            ]
            serialize_dag["dag_dependencies"] = [
                vars(t)
                for t in (SerializedBaseOperator.detect_dependencies(task)
                          for task in dag.task_dict.values()) if t is not None
            ]
            serialize_dag[
                '_task_group'] = SerializedTaskGroup.serialize_task_group(
                    dag.task_group)

            # Edge info in the JSON exactly matches our internal structure
            serialize_dag["edge_info"] = dag.edge_info

            # has_on_*_callback are only stored if the value is True, as the default is False
            if dag.has_on_success_callback:
                serialize_dag['has_on_success_callback'] = True
            if dag.has_on_failure_callback:
                serialize_dag['has_on_failure_callback'] = True
            return serialize_dag
        except SerializationError:
            raise
        except Exception:
            raise SerializationError(f'Failed to serialize dag {dag.dag_id!r}')
Exemplo n.º 2
0
    def serialize_dag(cls, dag: DAG) -> dict:
        """Serializes a DAG into a JSON object."""
        try:
            serialized_dag = cls.serialize_to_json(dag, cls._decorated_fields)

            # If schedule_interval is backed by timetable, serialize only
            # timetable; vice versa for a timetable backed by schedule_interval.
            if dag.timetable.summary == dag.schedule_interval:
                del serialized_dag["schedule_interval"]
            else:
                del serialized_dag["timetable"]

            serialized_dag["tasks"] = [cls._serialize(task) for _, task in dag.task_dict.items()]
            serialized_dag["dag_dependencies"] = [
                vars(t)
                for t in (SerializedBaseOperator.detect_dependencies(task) for task in dag.task_dict.values())
                if t is not None
            ]
            serialized_dag['_task_group'] = SerializedTaskGroup.serialize_task_group(dag.task_group)

            # Edge info in the JSON exactly matches our internal structure
            serialized_dag["edge_info"] = dag.edge_info
            serialized_dag["params"] = cls._serialize_params_dict(dag.params)

            # has_on_*_callback are only stored if the value is True, as the default is False
            if dag.has_on_success_callback:
                serialized_dag['has_on_success_callback'] = True
            if dag.has_on_failure_callback:
                serialized_dag['has_on_failure_callback'] = True
            return serialized_dag
        except SerializationError:
            raise
        except Exception as e:
            raise SerializationError(f'Failed to serialize DAG {dag.dag_id!r}: {e}')
Exemplo n.º 3
0
    def serialize_dag(cls, dag: DAG) -> dict:
        """Serializes a DAG into a JSON object."""
        try:
            serialize_dag = cls.serialize_to_json(dag, cls._decorated_fields)

            serialize_dag["tasks"] = [cls._serialize(task) for _, task in dag.task_dict.items()]
            serialize_dag['_task_group'] = SerializedTaskGroup.serialize_task_group(dag.task_group)
            return serialize_dag
        except SerializationError:
            raise
        except Exception:
            raise SerializationError(f'Failed to serialize dag {dag.dag_id!r}')
Exemplo n.º 4
0
    def _serialize_node(cls, op: Union[BaseOperator, MappedOperator]) -> Dict[str, Any]:
        """Serializes operator into a JSON object."""
        serialize_op = cls.serialize_to_json(op, cls._decorated_fields)
        serialize_op['_task_type'] = type(op).__name__
        serialize_op['_task_module'] = type(op).__module__

        # Used to determine if an Operator is inherited from DummyOperator
        serialize_op['_is_dummy'] = op.inherits_from_dummy_operator

        if op.operator_extra_links:
            serialize_op['_operator_extra_links'] = cls._serialize_operator_extra_links(
                op.operator_extra_links
            )

        if op.deps is not BaseOperator.deps:
            # Are the deps different to BaseOperator, if so serialize the class names!
            # For Airflow 2.0 expediency we _only_ allow built in Dep classes.
            # Fix this for 2.0.x or 2.1
            deps = []
            for dep in op.deps:
                klass = type(dep)
                module_name = klass.__module__
                if not module_name.startswith("airflow.ti_deps.deps."):
                    assert op.dag  # for type checking
                    raise SerializationError(
                        f"Cannot serialize {(op.dag.dag_id + '.' + op.task_id)!r} with `deps` from non-core "
                        f"module {module_name!r}"
                    )

                deps.append(f'{module_name}.{klass.__name__}')
            # deps needs to be sorted here, because op.deps is a set, which is unstable when traversing,
            # and the same call may get different results.
            # When calling json.dumps(self.data, sort_keys=True) to generate dag_hash, misjudgment will occur
            serialize_op['deps'] = sorted(deps)

        # Store all template_fields as they are if there are JSON Serializable
        # If not, store them as strings
        if op.template_fields:
            for template_field in op.template_fields:
                value = getattr(op, template_field, None)
                if not cls._is_excluded(value, template_field, op):
                    serialize_op[template_field] = serialize_template_field(value)

        if op.params:
            serialize_op['params'] = cls._serialize_params_dict(op.params)

        return serialize_op
Exemplo n.º 5
0
    def serialize_dag(cls, dag: DAG) -> dict:
        """Serializes a DAG into a JSON object."""
        try:
            serialize_dag = cls.serialize_to_json(dag, cls._decorated_fields)

            serialize_dag["tasks"] = [cls._serialize(task) for _, task in dag.task_dict.items()]
            serialize_dag['_task_group'] = SerializedTaskGroup.serialize_task_group(dag.task_group)

            # has_on_*_callback are only stored if the value is True, as the default is False
            if dag.has_on_success_callback:
                serialize_dag['has_on_success_callback'] = True
            if dag.has_on_failure_callback:
                serialize_dag['has_on_failure_callback'] = True
            return serialize_dag
        except SerializationError:
            raise
        except Exception:
            raise SerializationError(f'Failed to serialize dag {dag.dag_id!r}')
Exemplo n.º 6
0
    def serialize_operator(cls, op: BaseOperator) -> Dict[str, Any]:
        """Serializes operator into a JSON object."""
        serialize_op = cls.serialize_to_json(op, cls._decorated_fields)
        serialize_op['_task_type'] = op.__class__.__name__
        serialize_op['_task_module'] = op.__class__.__module__

        # Used to determine if an Operator is inherited from DummyOperator
        serialize_op['_is_dummy'] = op.inherits_from_dummy_operator

        if op.operator_extra_links:
            serialize_op['_operator_extra_links'] = cls._serialize_operator_extra_links(
                op.operator_extra_links
            )

        if op.deps is not BaseOperator.deps:
            # Are the deps different to BaseOperator, if so serialize the class names!
            # For Airflow 2.0 expediency we _only_ allow built in Dep classes.
            # Fix this for 2.0.x or 2.1
            deps = []
            for dep in op.deps:
                klass = type(dep)
                module_name = klass.__module__
                if not module_name.startswith("airflow.ti_deps.deps."):
                    raise SerializationError(
                        f"Cannot serialize {(op.dag.dag_id + '.' + op.task_id)!r} with `deps` from non-core "
                        f"module {module_name!r}"
                    )

                deps.append(f'{module_name}.{klass.__name__}')
            serialize_op['deps'] = deps

        # Store all template_fields as they are if there are JSON Serializable
        # If not, store them as strings
        if op.template_fields:
            for template_field in op.template_fields:
                value = getattr(op, template_field, None)
                if not cls._is_excluded(value, template_field, op):
                    serialize_op[template_field] = serialize_template_field(value)

        event_handler = op.get_events_handler()
        if event_handler is not None:
            serialize_op['_events_handler'] = EventHandler.serialize(op.get_events_handler())

        return serialize_op
Exemplo n.º 7
0
    def _deserialize_deps(cls, deps: List[str]) -> Set["BaseTIDep"]:
        from airflow import plugins_manager

        plugins_manager.initialize_ti_deps_plugins()
        if plugins_manager.registered_ti_dep_classes is None:
            raise AirflowException("Can not load plugins")

        instances = set()
        for qualname in set(deps):
            if (
                not qualname.startswith("airflow.ti_deps.deps.")
                and qualname not in plugins_manager.registered_ti_dep_classes
            ):
                raise SerializationError(
                    f"Custom dep class {qualname} not deserialized, please register it through plugins."
                )

            try:
                instances.add(import_string(qualname)())
            except ImportError:
                log.warning("Error importing dep %r", qualname, exc_info=True)
        return instances
Exemplo n.º 8
0
    def _serialize_deps(cls, op_deps: Iterable["BaseTIDep"]) -> List[str]:
        from airflow import plugins_manager

        plugins_manager.initialize_ti_deps_plugins()
        if plugins_manager.registered_ti_dep_classes is None:
            raise AirflowException("Can not load plugins")

        deps = []
        for dep in op_deps:
            klass = type(dep)
            module_name = klass.__module__
            qualname = f'{module_name}.{klass.__name__}'
            if (
                not qualname.startswith("airflow.ti_deps.deps.")
                and qualname not in plugins_manager.registered_ti_dep_classes
            ):
                raise SerializationError(
                    f"Custom dep class {qualname} not serialized, please register it through plugins."
                )
            deps.append(qualname)
        # deps needs to be sorted here, because op_deps is a set, which is unstable when traversing,
        # and the same call may get different results.
        # When calling json.dumps(self.data, sort_keys=True) to generate dag_hash, misjudgment will occur
        return sorted(deps)