示例#1
0
    def test_transform_operators_single_subdag(self):
        """
        tests:
            transformer transforms operator to subdag between a dag
        """
        dag = self._get_test_dag()

        transformer = AirflowDagTransformer(
            DAG(dag_id='transformed_dag',
                default_args=DEFAULT_DAG_ARGS,
                dagrun_timeout=timedelta(hours=2),
                max_active_runs=1,
                schedule_interval=None),
            transformer_resolvers=[
                ClassTransformerResolver(
                    {FileToWasbOperator: TestTransformer1})
            ])

        transformer.transform_operators(dag)

        with DAG(dag_id='expected_dag',
                 default_args=DEFAULT_DAG_ARGS) as exp_dag:
            op1 = copy_op(dag.task_dict['op1'])
            op2 = copy_op(dag.task_dict['op2'])
            op3 = copy_op(dag.task_dict['op3'])
            op4 = copy_op(dag.task_dict['op4'])
            op5 = copy_op(dag.task_dict['op5'])
            op7 = copy_op(dag.task_dict['op7'])
            op8 = copy_op(dag.task_dict['op8'])
            op9 = copy_op(dag.task_dict['op9'])
            op10 = copy_op(dag.task_dict['op10'])
            tp1 = copy_op(TestTransformer1.tp1)
            tp2 = copy_op(TestTransformer1.tp2)
            tp3 = copy_op(TestTransformer1.tp3)
            tp4 = copy_op(TestTransformer1.tp4)
            tp5 = copy_op(TestTransformer1.tp5)

            op1 >> [op2, op3, op4]
            op2 >> [op5, tp1, tp5]
            tp1 >> [tp2, tp3] >> tp4
            tp4 >> [op7, op8, op9]
            tp5 >> [op7, op8, op9]
            op3 >> [op7, op8]
            op8 >> [op9, op10]

        if self.show_graphs:
            rendering.show_multi_dag_graphviz(
                [dag, exp_dag, transformer.target_dag])

        TestUtils.assert_dags_equals(self, exp_dag, transformer.target_dag)
示例#2
0
    def test_transform_sub_dags_match_multi(self):
        """
            tests:
                finding multiple matching sub-dags and transforming them
                converting a sub-dag to another transformed sub-dag (with multiple roots)
                finding a sub-dag which isn't at the root
                returned sub-dag contains tasks which can be transformed
        """
        dag = self._get_subdag_test_dag()

        transformer = AirflowDagTransformer(
            DAG(dag_id='transformed_dag',
                default_args=DEFAULT_DAG_ARGS,
                dagrun_timeout=timedelta(hours=2),
                max_active_runs=1,
                schedule_interval=None),
            subdag_transformers=[TestSubDagTransformer1],
            transformer_resolvers=[
                ClassTransformerResolver(
                    {SparkSubmitOperator: TestTransformer5})
            ])

        src_dag = copy.deepcopy(dag)
        src_dag.dag_id = 'transformed_dag'
        transformer.transform_sub_dags(src_dag)

        exp_dag = self._get_expected_dag_sub_dags_match_multi(
            dag, TestSubDagTransformer1.op1)

        if self.show_graphs:
            rendering.show_multi_dag_graphviz([dag, exp_dag, src_dag])

        TestUtils.assert_dags_equals(self, exp_dag, src_dag)

        # transform operators in the transformed subdags
        transformer.transform_operators(src_dag)
        exp_dag = self._get_expected_dag_sub_dags_match_multi(
            dag,
            LivyBatchSensor(batch_id="foo",
                            task_id="foo",
                            azure_conn_id="foo",
                            cluster_name="foo",
                            verify_in="yarn",
                            dag=src_dag))

        if self.show_graphs:
            rendering.show_multi_dag_graphviz(
                [dag, exp_dag, transformer.target_dag])
        TestUtils.assert_dags_equals(self, exp_dag, transformer.target_dag)
示例#3
0
    def test_mutiple_sub_dag_transformers(self):
        """
            tests:
                multiple sub-dag transformers matching overlapping subdags
                order of the transformers decides result
        """
        dag = self._get_subdag_test_dag()

        transformer = AirflowDagTransformer(
            DAG(dag_id='transformed_dag',
                default_args=DEFAULT_DAG_ARGS,
                dagrun_timeout=timedelta(hours=2),
                max_active_runs=1,
                schedule_interval=None),
            subdag_transformers=[
                TestSubDagTransformer2, TestSubDagTransformer1
            ])

        src_dag = copy.deepcopy(dag)
        src_dag.dag_id = "transformed_dag"
        transformer.transform_sub_dags(src_dag)

        with DAG(dag_id='expected_dag',
                 default_args=DEFAULT_DAG_ARGS) as exp_dag:
            op1 = copy_op(dag.task_dict['op1'])
            t2p1 = copy_op(TestSubDagTransformer2.tp1)

            tp1 = copy_op(TestSubDagTransformer1.op1, task_id='tp1')
            tp2 = copy_op(TestSubDagTransformer1.op2, task_id='tp2')
            tp3 = copy_op(TestSubDagTransformer1.op3, task_id='tp3')
            tp4 = copy_op(TestSubDagTransformer1.op4, task_id='tp4')
            tp5 = copy_op(TestSubDagTransformer1.op5, task_id='tp5')

            tp1 >> [tp2, tp3] >> tp4

            op1 >> [tp1, tp5]
            [tp4, tp5] >> t2p1

        if self.show_graphs:
            rendering.show_multi_dag_graphviz([dag, exp_dag, src_dag])

        TestUtils.assert_dags_equals(self, exp_dag, src_dag)
示例#4
0
    def mocked_transform(self, emr_dag, transform_call, expected_hdi_dag,
                         figsize, mock_session):
        # mock airflow stuff
        session = UnifiedAlchemyMagicMock(
            data=[([
                mock.call.query(Connection),
                mock.call.filter(Connection.conn_id == AWS_CONN_ID)
            ], [AWS_CONN]),
                  ([
                      mock.call.query(Connection),
                      mock.call.filter(Connection.conn_id == EMR_CONN_ID)
                  ], [EMR_CONN]),
                  ([
                      mock.call.query(Connection),
                      mock.call.filter(Connection.conn_id == AZURE_CONN_ID)
                  ], [AZURE_CONN]),
                  ([
                      mock.call.query(Connection),
                      mock.call.filter(Connection.conn_id == HDI_CONN_ID)
                  ], [HDI_CONN])])
        mock_session.return_value = session

        src_dag = emr_dag.create_dag()
        target_dag = transform_call(copy.deepcopy(src_dag))
        exp_dag = expected_hdi_dag.create_dag()

        print("Input DAG")
        src_dag.tree_view()
        print("Transformed DAG:")
        target_dag.tree_view()
        print("Expected DAG:")
        exp_dag.tree_view()

        if self.show_graphs:
            rendering.show_multi_dag_graphviz(
                [src_dag, exp_dag, target_dag],
                relabeler=rendering.debug_relabeler,
                colorer=rendering.debug_colorer,
                legender=rendering.debug_legender,
                figsize=figsize)

        TestUtils.assert_dags_equals(self, exp_dag, target_dag)
示例#5
0
    def test_transform_operators_multi_subdag(self):
        """
            tests:
                connected task's transformers returning multiple subdags
                finding a task in a parent chain of multiple transformed subdags
                finding a task in upstream chain of multiple transformed subdags
        """
        dag = self._get_test_dag()

        transformer = AirflowDagTransformer(DAG(
            dag_id='transformed_dag',
            default_args=DEFAULT_DAG_ARGS,
            dagrun_timeout=timedelta(hours=2),
            max_active_runs=1,
            schedule_interval=None),
                                            transformer_resolvers=[
                                                ClassTransformerResolver({
                                                    FileToWasbOperator:
                                                    TestTransformer1,
                                                    S3CopyObjectOperator:
                                                    TestTransformer2,
                                                    BranchPythonOperator:
                                                    TestTransformer3,
                                                    PythonOperator:
                                                    TestTransformer4
                                                })
                                            ])

        transformer.transform_operators(dag)

        with DAG(dag_id='expected_dag',
                 default_args=DEFAULT_DAG_ARGS) as exp_dag:
            op1 = copy_op(dag.task_dict['op1'])
            op2 = copy_op(dag.task_dict['op2'])
            op3 = copy_op(dag.task_dict['op3'])
            op4 = copy_op(dag.task_dict['op4'])
            op5 = copy_op(dag.task_dict['op5'])
            op7 = copy_op(dag.task_dict['op7'])
            op9 = copy_op(dag.task_dict['op9'])
            op10 = copy_op(dag.task_dict['op10'])
            tp1 = copy_op(TestTransformer1.tp1)
            tp2 = copy_op(TestTransformer1.tp2)
            tp3 = copy_op(TestTransformer1.tp3)
            tp4 = copy_op(TestTransformer1.tp4)
            tp5 = copy_op(TestTransformer1.tp5)
            t2p1 = copy_op(TestTransformer2.tp1)
            t2p2 = copy_op(TestTransformer2.tp2)
            t2p3 = copy_op(TestTransformer2.tp3)
            t2p4 = copy_op(TestTransformer2.tp4)
            t2p5 = copy_op(TestTransformer2.tp5)

            op1 >> [op2, op3, op4]
            op2 >> [op5, tp1, tp5]
            tp1 >> [tp2, tp3] >> tp4
            t2p1 >> [t2p2, t2p3] >> t2p4
            tp4 >> [op7, t2p1, t2p5, op9]
            tp5 >> [op7, t2p1, t2p5, op9]
            op3 >> [op7, t2p1, t2p5]
            [t2p4, t2p5] >> op9
            [t2p4, t2p5] >> op10

        if self.show_graphs:
            rendering.show_multi_dag_graphviz(
                [dag, exp_dag, transformer.target_dag])

        TestUtils.assert_dags_equals(self, exp_dag, transformer.target_dag)
        self.assertEqual(TestTransformer3.livy_batch_op, tp1)
        self.assertEqual(TestTransformer4.livy_sensor_op, t2p1)