def test_parse_invalid_workflows():
    directory = 'test/data/bad-dags'
    for filename in os.listdir(directory):
        path = os.path.join(directory, filename)

        with pytest.raises(Exception):
            Workflow.load(path)
示例#2
0
    def load_workflow(self, primary_workflow_name, args):
        oozie_config = plugins.manager.get_oozie_config(args)

        cluster_config = oozie_config.cluster_config()

        (primary_dag, sub_dags) = self._parse_and_build_dags(
            primary_workflow_name,
            cluster_config,
            oozie_config)

        if self._requires_cluster_resource(primary_dag, sub_dags, cluster_config):
            primary_dag = self._insert_cluster_resource(
                primary_dag, cluster_config)

        primary_dag['compatibility_version'] = VERSION_STRING

        if oozie_config.plugin_config():
            primary_dag['plugin_config'] = oozie_config.plugin_config()

        if oozie_config.default_task_args():
            primary_dag['default_task_args'] = oozie_config.default_task_args()

        if oozie_config.dag_args():
            primary_dag['dag_args'] = oozie_config.dag_args()

        if oozie_config.dag_imports():
            primary_dag['imports'] = oozie_config.dag_imports()

        primary_dag['name'] = args.dag_name or primary_dag['name']

        # Build the workflow
        wf = Workflow(
            primary=primary_dag,
            secondary=list(sub_dags.values()),
            filename='oozie:' + primary_workflow_name,
        )

        # Prune away any unwanted nodes
        if args.prune_nodes or args.only_nodes:
            wf = wf.prune(
                prune_nodes=args.prune_nodes,
                only_nodes=args.only_nodes)

        # Now add in any additional operators that we require.  We have to do
        # this down here in order to avoid having these nodes pruned out above.
        if oozie_config.upstream_operators():
            primary_copy = wf.primary.copy()
            primary_copy['before'] = oozie_config.upstream_operators()
            wf = Workflow(
                primary=primary_copy,
                secondary=wf.secondary,
                filename='oozie:' + primary_workflow_name)

        return wf
def test_workflow_prune_only_nodes():
    filename = 'test/data/good-dags/subdag_test.yaml'
    wf = Workflow.load(filename)

    pruned_wf = wf.prune(only_nodes=['subdag-tester.SubDagSuccessFileSensor'])

    assert pruned_wf.primary['sub_dags'] == wf.primary['sub_dags']
    assert pruned_wf.secondary == wf.secondary
def test_workflow_prune_resulting_in_unreachable():
    filename = 'test/data/good-dags/subdag_test.yaml'
    wf = Workflow.load(filename)

    pruned_wf = wf.prune(prune_nodes=['tester'])

    assert 'sub_dags' in wf.primary
    assert wf.secondary

    assert 'sub_dags' not in pruned_wf.primary
    assert not pruned_wf.secondary
def test_workflow_prune():
    filename = 'test/data/good-dags/subdag_test.yaml'
    wf = Workflow.load(filename)

    pruned_wf = wf.prune(prune_nodes=['subdag-tester.SubDagSuccessFileSensor'])

    assert 'sub_dags' in wf.primary
    assert wf.secondary

    assert 'sub_dags' not in pruned_wf.primary
    assert not pruned_wf.secondary
def test_invalid_node_reference_errors():
    filename = 'test/data/good-dags/subdag_test.yaml'
    wf = Workflow.load(filename)

    with pytest.raises(InvalidWorkflowReference):
        wf._partitioned_node_paths(node_refs=['invalid-dag.any_node'])

    with pytest.raises(InvalidNodeReference):
        print(
            wf._partitioned_node_paths(
                node_refs=['subdag-tester.invalid_node']))
def test_node_path_builder():
    filename = 'test/data/good-dags/subdag_test.yaml'
    wf = Workflow.load(filename)

    assert wf._all_node_paths() == {
        (None, 'Datacopier'): [(None, 'Datacopier')],
        (None, 'SuccessFileSensor'): [(None, 'SuccessFileSensor')],
        (None, 'tester'): [(None, 'tester')],
        ('subdag-tester', 'SubDagSuccessFileSensor'):
        [(None, 'tester'), ('subdag-tester', 'SubDagSuccessFileSensor')]
    }
def test_only_subdag_operator_includes_sub_workflow():
    filename = 'test/data/good-dags/subdag_test.yaml'
    wf = Workflow.load(filename)

    pruned_wf = wf.prune(only_nodes=['tester'])

    # assert wf.secondary == pruned_wf.secondary

    assert 'sub_dags' in wf.primary
    assert wf.secondary

    assert 'sub_dags' in pruned_wf.primary
    assert pruned_wf.secondary
def test_parse_workflow_with_resources():
    filename = 'test/data/good-dags/simple_dataproc_dag.yaml'
    wf = Workflow.load(filename)

    assert frozenset(node.name for node in wf.specs.graphs.primary.graph.nodes()) == \
        frozenset(
            [

                'Datacopier',
                'dataproc_cluster_create',
                'dataproc_cluster_destroy',
                'dataproc_cluster_destroy-sentinel',
            ]
        )
    assert not wf.specs.graphs.secondary

    print(wf.build_dag(PrimaryDagBuilder, SubDagBuilder, GeneratorBuilder))
示例#10
0
def test_parse_workflow_with_generator():
    filename = 'test/data/good-dags/generator_test.yaml'
    wf = Workflow.load(filename)

    assert frozenset(node.name for node in wf.specs.graphs.primary.graph.nodes()) == \
        frozenset([
            'file_lister',
            'Datacopier',
            'SuccessFileSensor',
            'dataproc_cluster_create',
            'dataproc_cluster_destroy',
        ])
    assert len(wf.specs.graphs.secondary) == 1
    assert frozenset(node.name for node in wf.specs.graphs.secondary[0].graph.nodes()) == \
        frozenset(['SubDagSuccessFileSensor'])

    print(wf.build_dag(PrimaryDagBuilder, SubDagBuilder, GeneratorBuilder))
示例#11
0
def test_parse_workflow_with_multiple_generators():
    filename = 'test/data/good-dags/multi_generators.yaml'
    wf = Workflow.load(filename)

    assert frozenset(node.name for node in wf.specs.graphs.primary.graph.nodes()) == \
        frozenset([
            'file_lister',
            'file_lister_1',
            'file_lister_2',
            'file_lister_3',
            'file_lister-6e0a68-fc',
            'file_lister_3-b3341b-fc',
        ])
    assert len(wf.specs.graphs.secondary) == 1
    assert frozenset(node.name for node in wf.specs.graphs.secondary[0].graph.nodes()) == \
        frozenset(['SubDagSuccessFileSensor'])

    print(wf.build_dag(PrimaryDagBuilder, SubDagBuilder, GeneratorBuilder))
示例#12
0
def test_node_path_partitioner():
    filename = 'test/data/good-dags/subdag_test.yaml'
    wf = Workflow.load(filename)

    (referenced, complement) = wf._partitioned_node_paths(
        node_refs=['subdag-tester.SubDagSuccessFileSensor'])

    referenced.sort(key=lambda path: path[0][1])
    assert referenced == [[
        (None, 'tester'),
        ('subdag-tester', 'SubDagSuccessFileSensor'),
    ]]

    complement.sort(key=lambda path: path[0][1])
    assert complement == [
        [(None, 'Datacopier')],
        [(None, 'SuccessFileSensor')],
        [(None, 'tester')],
    ]
示例#13
0
def test_build_referrer_map():
    filename = 'test/data/good-dags/subdag_test.yaml'
    wf = Workflow.load(filename)

    assert wf._build_referrer_map() == {'subdag-tester': [(None, 'tester')]}