Exemplo n.º 1
0
    def topological_sort(self, _include_subdag_tasks: bool = False):
        """
        Sorts children in topographical order, such that a task comes after any of its
        upstream dependencies.

        :return: list of tasks in topological order
        """
        # This uses a modified version of Kahn's Topological Sort algorithm to
        # not have to pre-compute the "in-degree" of the nodes.
        from airflow.operators.subdag import SubDagOperator  # Avoid circular import

        graph_unsorted = copy.copy(self.children)

        graph_sorted: List[DAGNode] = []

        # special case
        if len(self.children) == 0:
            return graph_sorted

        # Run until the unsorted graph is empty.
        while graph_unsorted:
            # Go through each of the node/edges pairs in the unsorted graph. If a set of edges doesn't contain
            # any nodes that haven't been resolved, that is, that are still in the unsorted graph, remove the
            # pair from the unsorted graph, and append it to the sorted graph. Note here that by using using
            # the values() method for iterating, a copy of the unsorted graph is used, allowing us to modify
            # the unsorted graph as we move through it.
            #
            # We also keep a flag for checking that graph is acyclic, which is true if any nodes are resolved
            # during each pass through the graph. If not, we need to exit as the graph therefore can't be
            # sorted.
            acyclic = False
            for node in list(graph_unsorted.values()):
                for edge in node.upstream_list:
                    if edge.node_id in graph_unsorted:
                        break
                    # Check for task's group is a child (or grand child) of this TG,
                    tg = edge.task_group
                    while tg:
                        if tg.node_id in graph_unsorted:
                            break
                        tg = tg.task_group

                    if tg:
                        # We are already going to visit that TG
                        break
                else:
                    acyclic = True
                    del graph_unsorted[node.node_id]
                    graph_sorted.append(node)
                    if _include_subdag_tasks and isinstance(
                            node, SubDagOperator):
                        graph_sorted.extend(
                            node.subdag.task_group.topological_sort(
                                _include_subdag_tasks=True))

            if not acyclic:
                raise AirflowDagCycleException(
                    f"A cyclic dependency occurred in dag: {self.dag_id}")

        return graph_sorted
Exemplo n.º 2
0
 def _check_adjacent_tasks(task_id, current_task):
     """Returns first untraversed child task, else None if all tasks traversed."""
     for adjacent_task in current_task.get_direct_relative_ids():
         if visited[adjacent_task] == CYCLE_IN_PROGRESS:
             msg = f"Cycle detected in DAG. Faulty task: {task_id}"
             raise AirflowDagCycleException(msg)
         elif visited[adjacent_task] == CYCLE_NEW:
             return adjacent_task
     return None
Exemplo n.º 3
0
    def _test_cycle_helper(visit_map: Dict[str, int], task_id: str) -> None:
        """
        Checks if a cycle exists from the input task using DFS traversal
        """
        if visit_map[task_id] == CYCLE_DONE:
            return

        visit_map[task_id] = CYCLE_IN_PROGRESS

        task = dag.task_dict[task_id]
        for descendant_id in task.get_direct_relative_ids():
            if visit_map[descendant_id] == CYCLE_IN_PROGRESS:
                msg = "Cycle detected in DAG. Faulty task: {0} to {1}".format(
                    task_id, descendant_id)
                raise AirflowDagCycleException(msg)
            else:
                _test_cycle_helper(visit_map, descendant_id)

        visit_map[task_id] = CYCLE_DONE