def test_populate_node_family_full_parentage_complex_dependencies(self) -> None:
        view_1 = BigQueryView(
            dataset_id="dataset_1",
            view_id="table_1",
            description="table_1 description",
            view_query_template="SELECT * FROM `{project_id}.source_dataset.source_table`",
        )
        view_2 = BigQueryView(
            dataset_id="dataset_2",
            view_id="table_2",
            description="table_2 description",
            view_query_template="SELECT * FROM `{project_id}.dataset_1.table_1`",
        )
        view_3 = BigQueryView(
            dataset_id="dataset_3",
            view_id="table_3",
            description="table_3 description",
            view_query_template="""
                           SELECT * FROM `{project_id}.dataset_1.table_1`
                           JOIN `{project_id}.dataset_2.table_2`
                           USING (col)""",
        )
        view_4 = BigQueryView(
            dataset_id="dataset_4",
            view_id="table_4",
            description="table_4 description",
            view_query_template="""
                           SELECT * FROM `{project_id}.dataset_2.table_2`
                           JOIN `{project_id}.dataset_3.table_3`
                           USING (col)""",
        )

        dag_walker = BigQueryViewDagWalker([view_1, view_2, view_3, view_4])
        start_node = dag_walker.node_for_view(view_4)

        dag_walker.populate_node_family_for_node(
            node=start_node, view_source_table_datasets={"source_dataset"}
        )
        expected_parent_nodes = {
            DagKey(
                view_address=BigQueryAddress(
                    dataset_id="source_dataset", table_id="source_table"
                )
            ),
            DagKey.for_view(view_1),
            DagKey.for_view(view_2),
            DagKey.for_view(view_3),
        }
        self.assertEqual(expected_parent_nodes, start_node.node_family.full_parentage)
    def test_dag_returns_parent_results(self) -> None:
        walker = BigQueryViewDagWalker(self.all_views)

        def process_check_parents(
            _view: BigQueryView, parent_results: Dict[BigQueryView, int]
        ) -> int:
            if not parent_results:
                return 1
            return max(parent_results.values()) + 1

        result = walker.process_dag(process_check_parents)
        self.assertEqual(len(self.all_views), len(result))

        max_depth = 0
        max_depth_view = None
        for view, depth in result.items():
            if depth > max_depth:
                max_depth = depth
                max_depth_view = view
        if not max_depth_view:
            self.fail("Found no max_depth_view")
        max_depth_node = walker.node_for_view(max_depth_view)
        self.assertEqual(set(), max_depth_node.child_keys)
    def test_populate_node_family_descendants_dfs_tree_str(self) -> None:
        view_1 = BigQueryView(
            dataset_id="dataset_1",
            view_id="table_1",
            description="table_1 description",
            view_query_template="SELECT * FROM `{project_id}.source_dataset.source_table`",
        )
        view_2 = BigQueryView(
            dataset_id="dataset_2",
            view_id="table_2",
            description="table_2 description",
            view_query_template="SELECT * FROM `{project_id}.source_dataset.source_table_2`",
        )
        view_3 = BigQueryView(
            dataset_id="dataset_3",
            view_id="table_3",
            description="table_3 description",
            view_query_template="""
            SELECT * FROM `{project_id}.dataset_1.table_1`
            JOIN `{project_id}.dataset_2.table_2`
            USING (col)""",
        )
        view_4 = BigQueryView(
            dataset_id="dataset_4",
            view_id="table_4",
            description="table_4 description",
            view_query_template="""
            SELECT * FROM `{project_id}.dataset_3.table_3`""",
        )
        view_5 = BigQueryView(
            dataset_id="dataset_5",
            view_id="table_5",
            description="table_5 description",
            view_query_template="""
            SELECT * FROM `{project_id}.dataset_3.table_3`""",
        )
        view_6 = BigQueryView(
            dataset_id="dataset_6",
            view_id="table_6",
            description="table_6 description",
            view_query_template="""
            SELECT * FROM `{project_id}.dataset_5.table_5`""",
        )
        dag_walker = BigQueryViewDagWalker(
            [view_1, view_2, view_3, view_4, view_5, view_6]
        )

        # Top level view
        node = dag_walker.node_for_view(view_2)
        dag_walker.populate_node_family_for_node(node=node)
        expected_tree = """dataset_2.table_2
|--dataset_3.table_3
|----dataset_4.table_4
|----dataset_5.table_5
|------dataset_6.table_6
"""
        self.assertEqual(expected_tree, node.node_family.child_dfs_tree_str)

        # Descendants from middle of tree
        node = dag_walker.node_for_view(view_3)
        dag_walker.populate_node_family_for_node(node=node)
        expected_tree = """dataset_3.table_3
|--dataset_4.table_4
|--dataset_5.table_5
|----dataset_6.table_6
"""
        self.assertEqual(expected_tree, node.node_family.child_dfs_tree_str)
    def test_populate_node_family_parentage_dfs_tree_str(self) -> None:
        view_1 = BigQueryView(
            dataset_id="dataset_1",
            view_id="table_1",
            description="table_1 description",
            view_query_template="SELECT * FROM `{project_id}.source_dataset.source_table`",
        )
        view_2 = BigQueryView(
            dataset_id="dataset_2",
            view_id="table_2",
            description="table_2 description",
            view_query_template="SELECT * FROM `{project_id}.source_dataset.source_table_2`",
        )
        view_3 = BigQueryView(
            dataset_id="dataset_3",
            view_id="table_3",
            description="table_3 description",
            view_query_template="""
            SELECT * FROM `{project_id}.dataset_1.table_1`
            JOIN `{project_id}.dataset_2.table_2`
            USING (col)""",
        )
        view_4 = BigQueryView(
            dataset_id="dataset_4",
            view_id="table_4",
            description="table_4 description",
            view_query_template="""
            SELECT * FROM `{project_id}.dataset_3.table_3`""",
        )
        view_5 = BigQueryView(
            dataset_id="dataset_5",
            view_id="table_5",
            description="table_5 description",
            view_query_template="""
            SELECT * FROM `{project_id}.dataset_3.table_3`""",
        )
        dag_walker = BigQueryViewDagWalker([view_1, view_2, view_3, view_4, view_5])

        # Top level view
        node = dag_walker.node_for_view(view_5)
        dag_walker.populate_node_family_for_node(node=node)
        expected_tree = """dataset_5.table_5
|--dataset_3.table_3
|----dataset_2.table_2
|------source_dataset.source_table_2
|----dataset_1.table_1
|------source_dataset.source_table
"""
        self.assertEqual(expected_tree, node.node_family.parent_dfs_tree_str)

        # Middle of tree
        node = dag_walker.node_for_view(view_3)
        dag_walker.populate_node_family_for_node(node=node)
        expected_tree = """dataset_3.table_3
|--dataset_2.table_2
|----source_dataset.source_table_2
|--dataset_1.table_1
|----source_dataset.source_table
"""
        self.assertEqual(expected_tree, node.node_family.parent_dfs_tree_str)

        # Skip datasets
        dag_walker.populate_node_family_for_node(
            node=node,
            datasets_to_skip={"dataset_1"},
        )
        expected_tree = """dataset_3.table_3
|--dataset_2.table_2
|----source_dataset.source_table_2
|--source_dataset.source_table
"""
        self.assertEqual(expected_tree, node.node_family.parent_dfs_tree_str)

        # Custom formatted
        def _custom_formatter(dag_key: DagKey) -> str:
            return f"custom_formatted_{dag_key.dataset_id}_{dag_key.table_id}"

        dag_walker.populate_node_family_for_node(
            node=node, custom_node_formatter=_custom_formatter
        )

        expected_tree = """custom_formatted_dataset_3_table_3
|--custom_formatted_dataset_2_table_2
|----custom_formatted_source_dataset_source_table_2
|--custom_formatted_dataset_1_table_1
|----custom_formatted_source_dataset_source_table
"""
        self.assertEqual(expected_tree, node.node_family.parent_dfs_tree_str)
    def test_populate_node_family_full_parentage(self) -> None:
        dag_walker = BigQueryViewDagWalker(self.x_shaped_dag_views_list)

        # root node start
        start_node = dag_walker.node_for_view(self.x_shaped_dag_views_list[0])
        dag_walker.populate_node_family_for_node(
            node=start_node, view_source_table_datasets={"source_dataset"}
        )

        self.assertEqual(
            {
                DagKey(
                    view_address=BigQueryAddress(
                        dataset_id="source_dataset", table_id="source_table"
                    )
                )
            },
            start_node.node_family.full_parentage,
        )

        # start in middle
        start_node = dag_walker.node_for_view(self.x_shaped_dag_views_list[2])
        dag_walker.populate_node_family_for_node(
            node=start_node, view_source_table_datasets={"source_dataset"}
        )
        expected_parent_nodes = {
            DagKey(
                view_address=BigQueryAddress(
                    dataset_id="source_dataset", table_id="source_table"
                )
            ),
            DagKey(
                view_address=BigQueryAddress(
                    dataset_id="source_dataset", table_id="source_table_2"
                )
            ),
            DagKey.for_view(self.x_shaped_dag_views_list[0]),
            DagKey.for_view(self.x_shaped_dag_views_list[1]),
        }
        self.assertEqual(
            expected_parent_nodes,
            start_node.node_family.full_parentage,
        )

        # single start node
        start_node = dag_walker.node_for_view(self.x_shaped_dag_views_list[3])
        dag_walker.populate_node_family_for_node(
            node=start_node, view_source_table_datasets={"source_dataset"}
        )
        expected_parent_nodes = {
            DagKey(
                view_address=BigQueryAddress(
                    dataset_id="source_dataset", table_id="source_table"
                )
            ),
            DagKey(
                view_address=BigQueryAddress(
                    dataset_id="source_dataset", table_id="source_table_2"
                )
            ),
            DagKey.for_view(self.x_shaped_dag_views_list[0]),
            DagKey.for_view(self.x_shaped_dag_views_list[1]),
            DagKey.for_view(self.x_shaped_dag_views_list[2]),
        }
        self.assertEqual(
            expected_parent_nodes,
            start_node.node_family.full_parentage,
        )

        # multiple start nodes
        start_nodes = [
            start_node,
            dag_walker.node_for_view(self.x_shaped_dag_views_list[4]),
        ]

        parentage_set: Set[DagKey] = set()
        for node in start_nodes:
            dag_walker.populate_node_family_for_node(
                node=node, view_source_table_datasets={"source_dataset"}
            )
            parentage_set = parentage_set.union(node.node_family.full_parentage)

        self.assertEqual(expected_parent_nodes, parentage_set)