Пример #1
0
    def test_should_keep_node_in_correct_flow(self):
        transformer = RemoveKillTransformer()

        workflow = Workflow(input_directory_path="",
                            output_directory_path="",
                            dag_name="DAG_NAME_B")

        first_mapper = mock.Mock(spec=BaseMapper)
        first_mapper.name = "first_task"
        third_mapper = mock.Mock(spec=KillMapper)
        third_mapper.name = "third_task"

        first_node = OozieNode(first_mapper)
        third_node = OozieNode(third_mapper)

        first_node.downstream_names = [third_mapper.name]

        workflow.nodes[first_mapper.name] = first_node
        workflow.nodes[third_mapper.name] = third_node

        transformer.process_workflow_after_parse_workflow_xml(workflow)

        self.assertEqual({first_mapper.name, third_mapper.name},
                         set(workflow.nodes.keys()))
        self.assertEqual([third_node.name], first_node.downstream_names)
        self.assertEqual([], third_node.downstream_names)
Пример #2
0
    def test_should_remove_node_in_error_flow(self):
        transformer = RemoveKillTransformer()

        workflow = Workflow(input_directory_path="",
                            output_directory_path="",
                            dag_name="DAG_NAME_B")

        first_mapper = mock.Mock(spec=BaseMapper)
        first_mapper.name = "first_task"
        second_mapper = mock.Mock(spec=KillMapper)
        second_mapper.name = "second_task"

        first_node = OozieNode(first_mapper)
        second_node = OozieNode(second_mapper)
        first_node.error_downstream_name = second_mapper.name

        workflow.nodes[first_mapper.name] = first_node
        workflow.nodes[second_mapper.name] = second_node

        transformer.process_workflow_after_parse_workflow_xml(workflow)

        self.assertEqual({first_mapper.name}, set(workflow.nodes.keys()))
Пример #3
0
def main():
    args = parse_args(sys.argv[1:])
    input_directory_path = args.input_directory_path
    output_directory_path = args.output_directory_path

    start_days_ago = args.start_days_ago
    schedule_interval = args.schedule_interval
    dag_name = args.dag_name

    if not dag_name:
        dag_name = os.path.basename(input_directory_path)

    conf_path = os.path.join(input_directory_path, CONFIG)
    if not os.path.isfile(conf_path):
        logging.warning(
            f"""

#################################### WARNING ###########################################

The '{CONFIG}' file was not detected in {input_directory_path}.
It may be necessary to provide input parameters for the workflow.

In case of any conversion errors make sure this configuration file is really not needed.
Otherwise please provide it.

########################################################################################
        """
        )
    validate_workflows_script = get_o2a_validate_workflows_script()
    if validate_workflows_script:
        try:
            check_call([validate_workflows_script, f"{input_directory_path}/{HDFS_FOLDER}/{WORKFLOW_XML}"])
        except CalledProcessError:
            logging.error(
                "Workflow failed schema validation. " "Please correct the workflow XML and try again."
            )
            exit(1)
    os.makedirs(output_directory_path, exist_ok=True)

    if args.dot:
        renderer_class = DotRenderer
    else:
        renderer_class = PythonRenderer

    renderer = renderer_class(
        output_directory_path=output_directory_path,
        schedule_interval=schedule_interval,
        start_days_ago=start_days_ago,
    )

    transformers = [
        RemoveInaccessibleNodeTransformer(),
        RemoveEndTransformer(),
        RemoveKillTransformer(),
        RemoveStartTransformer(),
        RemoveJoinTransformer(),
        RemoveForkTransformer(),
        AddWorkflowNotificationTransformer(),
        AddNodeNotificationTransformer(),
    ]

    converter = OozieConverter(
        dag_name=dag_name,
        input_directory_path=input_directory_path,
        output_directory_path=output_directory_path,
        action_mapper=ACTION_MAP,
        renderer=renderer,
        transformers=transformers,
        user=args.user,
    )
    converter.recreate_output_directory()
    converter.convert()
Пример #4
0
    def test_should_convert_demo_workflow(self):
        renderer = mock.MagicMock()

        transformers = [
            RemoveInaccessibleNodeTransformer(),
            RemoveEndTransformer(),
            RemoveKillTransformer(),
            RemoveStartTransformer(),
            RemoveJoinTransformer(),
            RemoveForkTransformer(),
        ]

        input_directory_path = path.join(EXAMPLES_PATH, "demo")
        converter = OozieConverter(
            dag_name="demo",
            input_directory_path=input_directory_path,
            output_directory_path="/tmp/",
            action_mapper=ACTION_MAP,
            renderer=renderer,
            transformers=transformers,
            user="******",
        )
        converter.recreate_output_directory()
        converter.convert()
        _, kwargs = renderer.create_workflow_file.call_args
        workflow: Workflow = kwargs["workflow"]
        self.assertEqual(input_directory_path, workflow.input_directory_path)
        self.assertEqual("/tmp/", workflow.output_directory_path)
        self.assertEqual("demo", workflow.dag_name)
        self.assertEqual(
            {
                Relation(from_task_id="decision-node", to_task_id="end", is_error=False),
                Relation(from_task_id="decision-node", to_task_id="hdfs-node", is_error=False),
                Relation(from_task_id="join-node", to_task_id="decision-node", is_error=False),
                Relation(from_task_id="pig-node", to_task_id="join-node", is_error=False),
                Relation(from_task_id="shell-node", to_task_id="join-node", is_error=False),
                Relation(from_task_id="subworkflow-node", to_task_id="join-node", is_error=False),
            },
            workflow.task_group_relations,
        )
        self.assertEqual({}, workflow.nodes)
        self.assertEqual(
            {"pig-node", "subworkflow-node", "shell-node", "join-node", "decision-node", "hdfs-node", "end"},
            workflow.task_groups.keys(),
        )
        self.assertEqual(
            {
                "from airflow import models",
                "from airflow.contrib.operators import dataproc_operator",
                "from airflow.operators import bash_operator",
                "from airflow.operators import dummy_operator",
                "from airflow.operators import python_operator",
                "from airflow.operators.subdag_operator import SubDagOperator",
                "from airflow.operators import bash_operator, dummy_operator",
                "from airflow.utils import dates",
                "from airflow.utils.trigger_rule import TriggerRule",
                "from o2a.o2a_libs.el_basic_functions import *",
                "from o2a.o2a_libs.el_basic_functions import first_not_null",
                "from o2a.o2a_libs.el_wf_functions import *",
                "from o2a.o2a_libs.property_utils import PropertySet",
                "import datetime",
                "import shlex",
                "import subdag_childwf",
            },
            workflow.dependencies,
        )