def test_transform_operators_single_subdag(self): """ tests: transformer transforms operator to subdag between a dag """ dag = self._get_test_dag() transformer = AirflowDagTransformer( DAG(dag_id='transformed_dag', default_args=DEFAULT_DAG_ARGS, dagrun_timeout=timedelta(hours=2), max_active_runs=1, schedule_interval=None), transformer_resolvers=[ ClassTransformerResolver( {FileToWasbOperator: TestTransformer1}) ]) transformer.transform_operators(dag) with DAG(dag_id='expected_dag', default_args=DEFAULT_DAG_ARGS) as exp_dag: op1 = copy_op(dag.task_dict['op1']) op2 = copy_op(dag.task_dict['op2']) op3 = copy_op(dag.task_dict['op3']) op4 = copy_op(dag.task_dict['op4']) op5 = copy_op(dag.task_dict['op5']) op7 = copy_op(dag.task_dict['op7']) op8 = copy_op(dag.task_dict['op8']) op9 = copy_op(dag.task_dict['op9']) op10 = copy_op(dag.task_dict['op10']) tp1 = copy_op(TestTransformer1.tp1) tp2 = copy_op(TestTransformer1.tp2) tp3 = copy_op(TestTransformer1.tp3) tp4 = copy_op(TestTransformer1.tp4) tp5 = copy_op(TestTransformer1.tp5) op1 >> [op2, op3, op4] op2 >> [op5, tp1, tp5] tp1 >> [tp2, tp3] >> tp4 tp4 >> [op7, op8, op9] tp5 >> [op7, op8, op9] op3 >> [op7, op8] op8 >> [op9, op10] if self.show_graphs: rendering.show_multi_dag_graphviz( [dag, exp_dag, transformer.target_dag]) TestUtils.assert_dags_equals(self, exp_dag, transformer.target_dag)
def test_transform_sub_dags_match_multi(self): """ tests: finding multiple matching sub-dags and transforming them converting a sub-dag to another transformed sub-dag (with multiple roots) finding a sub-dag which isn't at the root returned sub-dag contains tasks which can be transformed """ dag = self._get_subdag_test_dag() transformer = AirflowDagTransformer( DAG(dag_id='transformed_dag', default_args=DEFAULT_DAG_ARGS, dagrun_timeout=timedelta(hours=2), max_active_runs=1, schedule_interval=None), subdag_transformers=[TestSubDagTransformer1], transformer_resolvers=[ ClassTransformerResolver( {SparkSubmitOperator: TestTransformer5}) ]) src_dag = copy.deepcopy(dag) src_dag.dag_id = 'transformed_dag' transformer.transform_sub_dags(src_dag) exp_dag = self._get_expected_dag_sub_dags_match_multi( dag, TestSubDagTransformer1.op1) if self.show_graphs: rendering.show_multi_dag_graphviz([dag, exp_dag, src_dag]) TestUtils.assert_dags_equals(self, exp_dag, src_dag) # transform operators in the transformed subdags transformer.transform_operators(src_dag) exp_dag = self._get_expected_dag_sub_dags_match_multi( dag, LivyBatchSensor(batch_id="foo", task_id="foo", azure_conn_id="foo", cluster_name="foo", verify_in="yarn", dag=src_dag)) if self.show_graphs: rendering.show_multi_dag_graphviz( [dag, exp_dag, transformer.target_dag]) TestUtils.assert_dags_equals(self, exp_dag, transformer.target_dag)
def test_mutiple_sub_dag_transformers(self): """ tests: multiple sub-dag transformers matching overlapping subdags order of the transformers decides result """ dag = self._get_subdag_test_dag() transformer = AirflowDagTransformer( DAG(dag_id='transformed_dag', default_args=DEFAULT_DAG_ARGS, dagrun_timeout=timedelta(hours=2), max_active_runs=1, schedule_interval=None), subdag_transformers=[ TestSubDagTransformer2, TestSubDagTransformer1 ]) src_dag = copy.deepcopy(dag) src_dag.dag_id = "transformed_dag" transformer.transform_sub_dags(src_dag) with DAG(dag_id='expected_dag', default_args=DEFAULT_DAG_ARGS) as exp_dag: op1 = copy_op(dag.task_dict['op1']) t2p1 = copy_op(TestSubDagTransformer2.tp1) tp1 = copy_op(TestSubDagTransformer1.op1, task_id='tp1') tp2 = copy_op(TestSubDagTransformer1.op2, task_id='tp2') tp3 = copy_op(TestSubDagTransformer1.op3, task_id='tp3') tp4 = copy_op(TestSubDagTransformer1.op4, task_id='tp4') tp5 = copy_op(TestSubDagTransformer1.op5, task_id='tp5') tp1 >> [tp2, tp3] >> tp4 op1 >> [tp1, tp5] [tp4, tp5] >> t2p1 if self.show_graphs: rendering.show_multi_dag_graphviz([dag, exp_dag, src_dag]) TestUtils.assert_dags_equals(self, exp_dag, src_dag)
def mocked_transform(self, emr_dag, transform_call, expected_hdi_dag, figsize, mock_session): # mock airflow stuff session = UnifiedAlchemyMagicMock( data=[([ mock.call.query(Connection), mock.call.filter(Connection.conn_id == AWS_CONN_ID) ], [AWS_CONN]), ([ mock.call.query(Connection), mock.call.filter(Connection.conn_id == EMR_CONN_ID) ], [EMR_CONN]), ([ mock.call.query(Connection), mock.call.filter(Connection.conn_id == AZURE_CONN_ID) ], [AZURE_CONN]), ([ mock.call.query(Connection), mock.call.filter(Connection.conn_id == HDI_CONN_ID) ], [HDI_CONN])]) mock_session.return_value = session src_dag = emr_dag.create_dag() target_dag = transform_call(copy.deepcopy(src_dag)) exp_dag = expected_hdi_dag.create_dag() print("Input DAG") src_dag.tree_view() print("Transformed DAG:") target_dag.tree_view() print("Expected DAG:") exp_dag.tree_view() if self.show_graphs: rendering.show_multi_dag_graphviz( [src_dag, exp_dag, target_dag], relabeler=rendering.debug_relabeler, colorer=rendering.debug_colorer, legender=rendering.debug_legender, figsize=figsize) TestUtils.assert_dags_equals(self, exp_dag, target_dag)
def test_transform_operators_multi_subdag(self): """ tests: connected task's transformers returning multiple subdags finding a task in a parent chain of multiple transformed subdags finding a task in upstream chain of multiple transformed subdags """ dag = self._get_test_dag() transformer = AirflowDagTransformer(DAG( dag_id='transformed_dag', default_args=DEFAULT_DAG_ARGS, dagrun_timeout=timedelta(hours=2), max_active_runs=1, schedule_interval=None), transformer_resolvers=[ ClassTransformerResolver({ FileToWasbOperator: TestTransformer1, S3CopyObjectOperator: TestTransformer2, BranchPythonOperator: TestTransformer3, PythonOperator: TestTransformer4 }) ]) transformer.transform_operators(dag) with DAG(dag_id='expected_dag', default_args=DEFAULT_DAG_ARGS) as exp_dag: op1 = copy_op(dag.task_dict['op1']) op2 = copy_op(dag.task_dict['op2']) op3 = copy_op(dag.task_dict['op3']) op4 = copy_op(dag.task_dict['op4']) op5 = copy_op(dag.task_dict['op5']) op7 = copy_op(dag.task_dict['op7']) op9 = copy_op(dag.task_dict['op9']) op10 = copy_op(dag.task_dict['op10']) tp1 = copy_op(TestTransformer1.tp1) tp2 = copy_op(TestTransformer1.tp2) tp3 = copy_op(TestTransformer1.tp3) tp4 = copy_op(TestTransformer1.tp4) tp5 = copy_op(TestTransformer1.tp5) t2p1 = copy_op(TestTransformer2.tp1) t2p2 = copy_op(TestTransformer2.tp2) t2p3 = copy_op(TestTransformer2.tp3) t2p4 = copy_op(TestTransformer2.tp4) t2p5 = copy_op(TestTransformer2.tp5) op1 >> [op2, op3, op4] op2 >> [op5, tp1, tp5] tp1 >> [tp2, tp3] >> tp4 t2p1 >> [t2p2, t2p3] >> t2p4 tp4 >> [op7, t2p1, t2p5, op9] tp5 >> [op7, t2p1, t2p5, op9] op3 >> [op7, t2p1, t2p5] [t2p4, t2p5] >> op9 [t2p4, t2p5] >> op10 if self.show_graphs: rendering.show_multi_dag_graphviz( [dag, exp_dag, transformer.target_dag]) TestUtils.assert_dags_equals(self, exp_dag, transformer.target_dag) self.assertEqual(TestTransformer3.livy_batch_op, tp1) self.assertEqual(TestTransformer4.livy_sensor_op, t2p1)