Exemplo n.º 1
0
def test_pipelines_success(file_path, run_config_path):

    with pushd(
            file_relative_path(__file__,
                               "../../../docs_snippets/legacy/data_science/")):
        with instance_for_test() as instance:
            run_config = load_yaml_from_path(
                run_config_path) if run_config_path else {}
            recon_pipeline = ReconstructablePipeline.for_file(
                file_path, "iris_classify")

            with tempfile.TemporaryDirectory() as temp_dir:
                run_config["resources"] = {
                    "io_manager": {
                        "config": {
                            "base_dir": temp_dir
                        }
                    }
                }
                pipeline_result = execute_pipeline(
                    recon_pipeline,
                    run_config=run_config,
                    instance=instance,
                    solid_selection=["k_means_iris"
                                     ],  # skip download_file in tests
                )
                assert pipeline_result.success
Exemplo n.º 2
0
def execute_pipeline_on_celery(pipeline_name,
                               instance=None,
                               run_config=None,
                               tempdir=None,
                               tags=None,
                               subset=None):
    with tempdir_wrapper(tempdir) as tempdir:
        pipeline_def = ReconstructablePipeline.for_file(
            REPO_FILE, pipeline_name).subset_for_execution(subset)
        with _instance_wrapper(instance) as wrapped_instance:
            run_config = run_config or {
                "resources": {
                    "io_manager": {
                        "config": {
                            "base_dir": tempdir
                        }
                    }
                },
                "execution": {
                    "celery": {}
                },
            }
            result = execute_pipeline(
                pipeline_def,
                run_config=run_config,
                instance=wrapped_instance,
                tags=tags,
            )
            yield result
Exemplo n.º 3
0
def test_engine_error(instance, tempdir):
    with mock.patch(
        "dagster.core.execution.context.system.PlanData.raise_on_error",
        return_value=True,
    ):
        with pytest.raises(DagsterSubprocessError):
            storage = os.path.join(tempdir, "flakey_storage")
            execute_pipeline(
                ReconstructablePipeline.for_file(REPO_FILE, "engine_error"),
                run_config={
                    "resources": {"io_manager": {"config": {"base_dir": storage}}},
                    "execution": {
                        "celery": {"config": {"config_source": {"task_always_eager": True}}}
                    },
                    "solids": {"destroy": {"config": storage}},
                },
                instance=instance,
            )
Exemplo n.º 4
0
def test_dask_terminate():
    run_config = {
        "solids": {
            "sleepy_dask_solid": {
                "inputs": {
                    "df": {
                        "read": {
                            "csv": {
                                "path":
                                file_relative_path(__file__, "ex*.csv")
                            }
                        }
                    }
                }
            }
        }
    }

    interrupt_thread = None
    result_types = []

    with instance_for_test() as instance:
        for result in execute_pipeline_iterator(
                pipeline=ReconstructablePipeline.for_file(
                    __file__, sleepy_dask_pipeline.name),
                run_config=run_config,
                instance=instance,
        ):
            # Interrupt once the first step starts
            if result.event_type == DagsterEventType.STEP_START and not interrupt_thread:
                interrupt_thread = Thread(target=send_interrupt, args=())
                interrupt_thread.start()

            if result.event_type == DagsterEventType.STEP_FAILURE:
                assert ("DagsterExecutionInterruptedError"
                        in result.event_specific_data.error.message)

            result_types.append(result.event_type)

        interrupt_thread.join()

        assert DagsterEventType.STEP_FAILURE in result_types
        assert DagsterEventType.PIPELINE_FAILURE in result_types
Exemplo n.º 5
0
def test_terminate_pipeline_on_celery(dagster_celery_worker, instance, tempdir):
    pipeline_def = ReconstructablePipeline.for_file(REPO_FILE, "interrupt_pipeline")

    run_config = {
        "resources": {"io_manager": {"config": {"base_dir": tempdir}}},
        "execution": {"celery": {}},
    }

    results = []
    result_types = []
    interrupt_thread = None

    for result in execute_pipeline_iterator(
        pipeline=pipeline_def,
        run_config=run_config,
        instance=instance,
    ):
        # Interrupt once the first step starts
        if result.event_type == DagsterEventType.STEP_START and not interrupt_thread:
            interrupt_thread = Thread(target=send_interrupt, args=())
            interrupt_thread.start()

        results.append(result)
        result_types.append(result.event_type)

    interrupt_thread.join()

    # At least one step succeeded (the one that was running when the interrupt fired)
    assert DagsterEventType.STEP_SUCCESS in result_types

    # At least one step was revoked (and there were no step failure events)
    revoke_steps = [
        result
        for result in results
        if result.event_type == DagsterEventType.ENGINE_EVENT and "was revoked." in result.message
    ]

    assert len(revoke_steps) > 0

    # The overall pipeline failed
    assert DagsterEventType.PIPELINE_FAILURE in result_types
Exemplo n.º 6
0
def test_dask():
    run_config = {
        "solids": {
            "dask_solid": {
                "inputs": {
                    "df": {
                        "read": {
                            "csv": {
                                "path":
                                file_relative_path(__file__, "ex*.csv")
                            }
                        }
                    }
                }
            }
        }
    }
    with instance_for_test() as instance:

        result = execute_pipeline(
            ReconstructablePipeline.for_file(__file__, dask_pipeline.name),
            run_config={
                "execution": {
                    "dask": {
                        "config": {
                            "cluster": {
                                "local": {
                                    "timeout": 30
                                }
                            }
                        }
                    }
                },
                **run_config,
            },
            instance=instance,
        )

    assert result.success
Exemplo n.º 7
0
def test_hello_world_reexecution():
    with exec_for_test("hello_world_pipeline") as result:
        assert result.success

        output_notebook_path = get_path(
            [x for x in result.step_event_list if x.event_type_value == "ASSET_MATERIALIZATION"][0]
        )

        with tempfile.NamedTemporaryFile("w+", suffix=".py") as reexecution_notebook_file:
            reexecution_notebook_file.write(
                (
                    "from dagster import pipeline\n"
                    "from dagstermill import define_dagstermill_solid\n\n\n"
                    "reexecution_solid = define_dagstermill_solid(\n"
                    "    'hello_world_reexecution', '{output_notebook_path}'\n"
                    ")\n\n"
                    "@pipeline\n"
                    "def reexecution_pipeline():\n"
                    "    reexecution_solid()\n"
                ).format(output_notebook_path=output_notebook_path)
            )
            reexecution_notebook_file.flush()

            result = None
            reexecution_pipeline = ReconstructablePipeline.for_file(
                reexecution_notebook_file.name, "reexecution_pipeline"
            )

            reexecution_result = None
            with instance_for_test() as instance:
                try:
                    reexecution_result = execute_pipeline(reexecution_pipeline, instance=instance)
                    assert reexecution_result.success
                finally:
                    if reexecution_result:
                        cleanup_result_notebook(reexecution_result)