Пример #1
0
def test_k8s_run_launcher(dagster_instance_for_k8s_run_launcher,
                          helm_namespace_for_k8s_run_launcher):
    run_config = load_yaml_from_path(
        os.path.join(get_test_project_environments_path(), "env.yaml"))
    pipeline_name = "demo_pipeline"
    with get_test_project_workspace_and_external_pipeline(
            dagster_instance_for_k8s_run_launcher, pipeline_name) as (
                workspace,
                external_pipeline,
            ):
        reoriginated_pipeline = ReOriginatedExternalPipelineForTest(
            external_pipeline)
        run = create_run_for_test(
            dagster_instance_for_k8s_run_launcher,
            pipeline_name=pipeline_name,
            run_config=run_config,
            mode="default",
            external_pipeline_origin=reoriginated_pipeline.get_external_origin(
            ),
            pipeline_code_origin=reoriginated_pipeline.get_python_origin(),
        )

        dagster_instance_for_k8s_run_launcher.launch_run(run.run_id, workspace)
        result = wait_for_job_and_get_raw_logs(
            job_name="dagster-run-%s" % run.run_id,
            namespace=helm_namespace_for_k8s_run_launcher)

        assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(
            result)
Пример #2
0
def test_reoriginated_external_pipeline():
    with instance_for_test() as instance:
        with get_test_project_workspace_and_external_pipeline(instance, "demo_pipeline_celery") as (
            _workspace,
            external_pipeline,
        ):
            reoriginated_pipeline = ReOriginatedExternalPipelineForTest(external_pipeline)

            assert reoriginated_pipeline.get_python_origin()
            assert reoriginated_pipeline.get_external_origin()
Пример #3
0
def test_launch_docker_image_on_instance_config():
    docker_image = get_test_project_docker_image()
    launcher_config = {
        "env_vars": [
            "AWS_ACCESS_KEY_ID",
            "AWS_SECRET_ACCESS_KEY",
        ],
        "network": "container:test-postgres-db-docker",
        "image": docker_image,
    }

    if IS_BUILDKITE:
        launcher_config["registry"] = get_buildkite_registry_config()
    else:
        find_local_test_image(docker_image)

    run_config = merge_yamls([
        os.path.join(get_test_project_environments_path(), "env.yaml"),
        os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
    ])

    with docker_postgres_instance(
            overrides={
                "run_launcher": {
                    "class": "DockerRunLauncher",
                    "module": "dagster_docker",
                    "config": launcher_config,
                }
            }) as instance:
        recon_pipeline = get_test_project_recon_pipeline("demo_pipeline")
        with get_test_project_workspace_and_external_pipeline(
                instance, "demo_pipeline") as (
                    workspace,
                    orig_pipeline,
                ):
            external_pipeline = ReOriginatedExternalPipelineForTest(
                orig_pipeline)

            run = instance.create_run_for_pipeline(
                pipeline_def=recon_pipeline.get_definition(),
                run_config=run_config,
                external_pipeline_origin=external_pipeline.get_external_origin(
                ),
                pipeline_code_origin=external_pipeline.get_python_origin(),
            )

            instance.launch_run(run.run_id, workspace)

            poll_for_finished_run(instance, run.run_id, timeout=60)

            assert instance.get_run_by_id(
                run.run_id).status == PipelineRunStatus.SUCCESS
Пример #4
0
def test_launch_docker_invalid_image():
    docker_image = "_invalid_format_image"
    launcher_config = {
        "env_vars": [
            "AWS_ACCESS_KEY_ID",
            "AWS_SECRET_ACCESS_KEY",
        ],
        "network": "container:test-postgres-db-docker",
        "image": docker_image,
    }

    if IS_BUILDKITE:
        launcher_config["registry"] = get_buildkite_registry_config()

    run_config = merge_yamls(
        [
            os.path.join(get_test_project_environments_path(), "env.yaml"),
            os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
        ]
    )

    with docker_postgres_instance(
        overrides={
            "run_launcher": {
                "class": "DockerRunLauncher",
                "module": "dagster_docker",
                "config": launcher_config,
            }
        }
    ) as instance:
        recon_pipeline = get_test_project_recon_pipeline("demo_pipeline_s3")
        with get_test_project_workspace_and_external_pipeline(instance, "demo_pipeline_s3") as (
            workspace,
            orig_pipeline,
        ):
            external_pipeline = ReOriginatedExternalPipelineForTest(orig_pipeline)

            run = instance.create_run_for_pipeline(
                pipeline_def=recon_pipeline.get_definition(),
                run_config=run_config,
                external_pipeline_origin=external_pipeline.get_external_origin(),
                pipeline_code_origin=external_pipeline.get_python_origin(),
            )

            with pytest.raises(
                Exception,
                match=re.escape(
                    "Docker image name _invalid_format_image is not correctly formatted"
                ),
            ):
                instance.launch_run(run.run_id, workspace)
Пример #5
0
def test_execute_on_celery_k8s_image_from_origin(  # pylint: disable=redefined-outer-name
        dagster_docker_image, dagster_instance, helm_namespace):
    # Like the previous test, but the image is included in the pipeline origin
    # rather than in the executor config
    run_config = merge_dicts(
        merge_yamls([
            os.path.join(get_test_project_environments_path(), "env.yaml"),
            os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
        ]),
        get_celery_engine_config(dagster_docker_image=None,
                                 job_namespace=helm_namespace),
    )

    pipeline_name = "demo_pipeline_celery"
    with get_test_project_workspace_and_external_pipeline(
            dagster_instance, pipeline_name,
            container_image=dagster_docker_image) as (workspace,
                                                      external_pipeline):
        reoriginated_pipeline = ReOriginatedExternalPipelineForTest(
            external_pipeline, container_image=dagster_docker_image)

        run = create_run_for_test(
            dagster_instance,
            pipeline_name=pipeline_name,
            run_config=run_config,
            mode="default",
            external_pipeline_origin=reoriginated_pipeline.get_external_origin(
            ),
            pipeline_code_origin=reoriginated_pipeline.get_python_origin(),
        )

        dagster_instance.launch_run(run.run_id, workspace)

        result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" %
                                               run.run_id,
                                               namespace=helm_namespace)

        assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(
            result)

        updated_run = dagster_instance.get_run_by_id(run.run_id)
        assert updated_run.tags[DOCKER_IMAGE_TAG] == dagster_docker_image
Пример #6
0
def test_failing_k8s_run_launcher(dagster_instance_for_k8s_run_launcher,
                                  helm_namespace_for_k8s_run_launcher):
    run_config = {"blah blah this is wrong": {}}
    pipeline_name = "demo_pipeline"
    with get_test_project_workspace_and_external_pipeline(
            dagster_instance_for_k8s_run_launcher, pipeline_name) as (
                workspace,
                external_pipeline,
            ):
        reoriginated_pipeline = ReOriginatedExternalPipelineForTest(
            external_pipeline)

        run = create_run_for_test(
            dagster_instance_for_k8s_run_launcher,
            pipeline_name=pipeline_name,
            run_config=run_config,
            external_pipeline_origin=reoriginated_pipeline.get_external_origin(
            ),
            pipeline_code_origin=reoriginated_pipeline.get_python_origin(),
        )

        dagster_instance_for_k8s_run_launcher.launch_run(run.run_id, workspace)
        result = wait_for_job_and_get_raw_logs(
            job_name="dagster-run-%s" % run.run_id,
            namespace=helm_namespace_for_k8s_run_launcher)

        assert "PIPELINE_SUCCESS" not in result, "no match, result: {}".format(
            result)

        event_records = dagster_instance_for_k8s_run_launcher.all_logs(
            run.run_id)

        assert any([
            'Received unexpected config entry "blah blah this is wrong"'
            in str(event) for event in event_records
        ])
        assert any([
            'Missing required config entry "solids"' in str(event)
            for event in event_records
        ])
Пример #7
0
def test_execute_subset_on_celery_k8s(  # pylint: disable=redefined-outer-name
        dagster_docker_image, dagster_instance, helm_namespace):
    run_config = merge_dicts(
        merge_yamls([
            os.path.join(get_test_project_environments_path(),
                         "env_subset.yaml"),
            os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
        ]),
        get_celery_engine_config(dagster_docker_image=dagster_docker_image,
                                 job_namespace=helm_namespace),
    )

    pipeline_name = "demo_pipeline_celery"
    with get_test_project_workspace_and_external_pipeline(
            dagster_instance, pipeline_name) as (
                workspace,
                external_pipeline,
            ):
        reoriginated_pipeline = ReOriginatedExternalPipelineForTest(
            external_pipeline)

        run = create_run_for_test(
            dagster_instance,
            pipeline_name=pipeline_name,
            run_config=run_config,
            mode="default",
            solids_to_execute={"count_letters"},
            external_pipeline_origin=reoriginated_pipeline.get_external_origin(
            ),
            pipeline_code_origin=reoriginated_pipeline.get_python_origin(),
        )

        dagster_instance.launch_run(run.run_id, workspace)

        result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" %
                                               run.run_id,
                                               namespace=helm_namespace)

        assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(
            result)
Пример #8
0
def _test_launch(docker_image, launcher_config, terminate=False):
    if IS_BUILDKITE:
        launcher_config["registry"] = get_buildkite_registry_config()
    else:
        find_local_test_image(docker_image)

    run_config = merge_yamls(
        [
            os.path.join(get_test_project_environments_path(), "env.yaml"),
            os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
        ]
    )

    with docker_postgres_instance(
        overrides={
            "run_launcher": {
                "class": "DockerRunLauncher",
                "module": "dagster_docker",
                "config": launcher_config,
            }
        }
    ) as instance:
        recon_pipeline = get_test_project_recon_pipeline("demo_pipeline_s3")
        with get_test_project_workspace_and_external_pipeline(instance, "demo_pipeline_s3") as (
            workspace,
            orig_pipeline,
        ):
            external_pipeline = ReOriginatedExternalPipelineForTest(orig_pipeline)

            run = instance.create_run_for_pipeline(
                pipeline_def=recon_pipeline.get_definition(),
                run_config=run_config,
                external_pipeline_origin=external_pipeline.get_external_origin(),
                pipeline_code_origin=external_pipeline.get_python_origin(),
            )

            instance.launch_run(run.run_id, workspace)

            if not terminate:
                poll_for_finished_run(instance, run.run_id, timeout=60)

                assert instance.get_run_by_id(run.run_id).status == PipelineRunStatus.SUCCESS
            else:
                start_time = time.time()

                filters = PipelineRunsFilter(
                    run_ids=[run.run_id],
                    statuses=[
                        PipelineRunStatus.STARTED,
                    ],
                )

                while True:
                    runs = instance.get_runs(filters, limit=1)
                    if runs:
                        break
                    else:
                        time.sleep(0.1)
                        if time.time() - start_time > 60:
                            raise Exception("Timed out waiting for run to start")

                launcher = instance.run_launcher
                assert launcher.can_terminate(run.run_id)
                assert launcher.terminate(run.run_id)

                poll_for_finished_run(instance, run.run_id, timeout=60)
                assert instance.get_run_by_id(run.run_id).status == PipelineRunStatus.CANCELED
Пример #9
0
def test_launch_docker_no_network():
    docker_image = get_test_project_docker_image()
    launcher_config = {
        "env_vars": [
            "AWS_ACCESS_KEY_ID",
            "AWS_SECRET_ACCESS_KEY",
        ],
    }

    if IS_BUILDKITE:
        launcher_config["registry"] = get_buildkite_registry_config()
    else:
        find_local_test_image(docker_image)

    run_config = merge_yamls(
        [
            os.path.join(get_test_project_environments_path(), "env.yaml"),
            os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
        ]
    )
    with docker_postgres_instance(
        overrides={
            "run_launcher": {
                "class": "DockerRunLauncher",
                "module": "dagster_docker",
                "config": launcher_config,
            }
        },
        # Ensure the container will time out and fail quickly
        conn_args={
            "params": {"connect_timeout": 2},
        },
    ) as instance:
        recon_pipeline = get_test_project_recon_pipeline("demo_pipeline_s3", docker_image)
        with get_test_project_workspace_and_external_pipeline(
            instance, "demo_pipeline_s3", container_image=docker_image
        ) as (workspace, orig_pipeline):

            external_pipeline = ReOriginatedExternalPipelineForTest(
                orig_pipeline,
                container_image=docker_image,
            )
            run = instance.create_run_for_pipeline(
                pipeline_def=recon_pipeline.get_definition(),
                run_config=run_config,
                external_pipeline_origin=external_pipeline.get_external_origin(),
                pipeline_code_origin=external_pipeline.get_python_origin(),
            )
            instance.launch_run(run.run_id, workspace)

            # Container launches, but run is stuck in STARTING state
            # due to not being able to access the network
            run = instance.get_run_by_id(run.run_id)
            assert run.tags[DOCKER_IMAGE_TAG] == docker_image

            container_id = run.tags[DOCKER_CONTAINER_ID_TAG]

            run = instance.get_run_by_id(run.run_id)

            assert run.status == PipelineRunStatus.STARTING
            assert run.tags[DOCKER_IMAGE_TAG] == docker_image
            client = docker.client.from_env()

            container = None

            try:
                start_time = time.time()
                while True:
                    container = client.containers.get(container_id)
                    if time.time() - start_time > 60:
                        raise Exception("Timed out waiting for container to exit")

                    if container.status == "exited":
                        break

                    time.sleep(3)

            finally:
                if container:
                    container.remove(force=True)
Пример #10
0
def test_terminate_launched_docker_run():
    docker_image = get_test_project_docker_image()
    launcher_config = {
        "env_vars": [
            "AWS_ACCESS_KEY_ID",
            "AWS_SECRET_ACCESS_KEY",
        ],
        "network": "container:test-postgres-db-docker",
    }

    if IS_BUILDKITE:
        launcher_config["registry"] = get_buildkite_registry_config()
    else:
        find_local_test_image(docker_image)

    run_config = merge_yamls(
        [
            os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
        ]
    )

    with docker_postgres_instance(
        overrides={
            "run_launcher": {
                "class": "DockerRunLauncher",
                "module": "dagster_docker",
                "config": launcher_config,
            }
        }
    ) as instance:
        recon_pipeline = get_test_project_recon_pipeline("hanging_pipeline", docker_image)
        with get_test_project_workspace_and_external_pipeline(
            instance, "hanging_pipeline", container_image=docker_image
        ) as (workspace, orig_pipeline):
            external_pipeline = ReOriginatedExternalPipelineForTest(
                orig_pipeline,
                container_image=docker_image,
            )

            run = instance.create_run_for_pipeline(
                pipeline_def=recon_pipeline.get_definition(),
                run_config=run_config,
                external_pipeline_origin=external_pipeline.get_external_origin(),
                pipeline_code_origin=external_pipeline.get_python_origin(),
            )

            run_id = run.run_id

            instance.launch_run(run_id, workspace)

            poll_for_step_start(instance, run_id)

            assert instance.run_launcher.can_terminate(run_id)
            assert instance.run_launcher.terminate(run_id)

            terminated_pipeline_run = poll_for_finished_run(instance, run_id, timeout=30)
            terminated_pipeline_run = instance.get_run_by_id(run_id)
            assert terminated_pipeline_run.status == PipelineRunStatus.CANCELED

            run_logs = instance.all_logs(run_id)

            _check_event_log_contains(
                run_logs,
                [
                    ("PIPELINE_CANCELING", "Sending run termination request"),
                    ("STEP_FAILURE", 'Execution of step "hanging_solid" failed.'),
                    ("PIPELINE_CANCELED", 'Execution of run for "hanging_pipeline" canceled.'),
                    ("ENGINE_EVENT", "Process for run exited"),
                ],
            )
Пример #11
0
def test_launch_docker_image_on_pipeline_config():
    # Docker image name to use for launch specified as part of the pipeline origin
    # rather than in the run launcher instance config

    docker_image = get_test_project_docker_image()
    launcher_config = {
        "env_vars": [
            "AWS_ACCESS_KEY_ID",
            "AWS_SECRET_ACCESS_KEY",
            "DOCKER_LAUNCHER_NETWORK",
        ],
        "network": {"env": "DOCKER_LAUNCHER_NETWORK"},
        "container_kwargs": {
            "auto_remove": True,
        },
    }

    if IS_BUILDKITE:
        launcher_config["registry"] = get_buildkite_registry_config()
    else:
        find_local_test_image(docker_image)

    run_config = merge_yamls(
        [
            os.path.join(get_test_project_environments_path(), "env.yaml"),
            os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
        ]
    )

    with environ({"DOCKER_LAUNCHER_NETWORK": "container:test-postgres-db-docker"}):
        with docker_postgres_instance(
            overrides={
                "run_launcher": {
                    "class": "DockerRunLauncher",
                    "module": "dagster_docker",
                    "config": launcher_config,
                }
            }
        ) as instance:
            recon_pipeline = get_test_project_recon_pipeline("demo_pipeline_s3", docker_image)
            with get_test_project_workspace_and_external_pipeline(
                instance, "demo_pipeline_s3", container_image=docker_image
            ) as (workspace, orig_pipeline):

                external_pipeline = ReOriginatedExternalPipelineForTest(
                    orig_pipeline,
                    container_image=docker_image,
                )
                run = instance.create_run_for_pipeline(
                    pipeline_def=recon_pipeline.get_definition(),
                    run_config=run_config,
                    external_pipeline_origin=external_pipeline.get_external_origin(),
                    pipeline_code_origin=external_pipeline.get_python_origin(),
                )
                instance.launch_run(run.run_id, workspace)

                poll_for_finished_run(instance, run.run_id, timeout=60)

                run = instance.get_run_by_id(run.run_id)

                assert run.status == PipelineRunStatus.SUCCESS

                assert run.tags[DOCKER_IMAGE_TAG] == docker_image
Пример #12
0
def test_docker_monitoring():
    docker_image = get_test_project_docker_image()

    launcher_config = {
        "env_vars": [
            "AWS_ACCESS_KEY_ID",
            "AWS_SECRET_ACCESS_KEY",
        ],
        "networks": ["container:test-postgres-db-docker"],
        "container_kwargs": {
            # "auto_remove": True,
            "volumes": ["/var/run/docker.sock:/var/run/docker.sock"],
        },
    }

    if IS_BUILDKITE:
        launcher_config["registry"] = get_buildkite_registry_config()
    else:
        find_local_test_image(docker_image)

    run_config = merge_dicts(
        load_yaml_from_path(
            os.path.join(get_test_project_environments_path(), "env_s3.yaml")),
        {
            "solids": {
                "multiply_the_word_slow": {
                    "inputs": {
                        "word": "bar"
                    },
                    "config": {
                        "factor": 2,
                        "sleep_time": 20
                    },
                }
            },
            "execution": {
                "docker": {
                    "config": {}
                }
            },
        },
    )

    with docker_postgres_instance({
            "run_monitoring": {
                "enabled": True
            },
            "run_launcher": {
                "class": "DockerRunLauncher",
                "module": "dagster_docker",
                "config": launcher_config,
            },
    }) as instance:
        recon_pipeline = get_test_project_recon_pipeline(
            "demo_pipeline_docker_slow", docker_image)
        with get_test_project_workspace_and_external_pipeline(
                instance,
                "demo_pipeline_docker_slow",
                container_image=docker_image) as (
                    workspace,
                    orig_pipeline,
                ):
            with start_daemon():
                external_pipeline = ReOriginatedExternalPipelineForTest(
                    orig_pipeline, container_image=docker_image)

                run = instance.create_run_for_pipeline(
                    pipeline_def=recon_pipeline.get_definition(),
                    run_config=run_config,
                    external_pipeline_origin=external_pipeline.
                    get_external_origin(),
                    pipeline_code_origin=external_pipeline.get_python_origin(),
                )

                with log_run_events(instance, run.run_id):

                    instance.launch_run(run.run_id, workspace)

                    start_time = time.time()
                    while time.time() - start_time < 60:
                        run = instance.get_run_by_id(run.run_id)
                        if run.status == PipelineRunStatus.STARTED:
                            break
                        assert run.status == PipelineRunStatus.STARTING
                        time.sleep(1)

                    time.sleep(3)

                    instance.run_launcher._get_container(  # pylint:disable=protected-access
                        instance.get_run_by_id(run.run_id)).stop()

                    # daemon resumes the run
                    poll_for_finished_run(instance, run.run_id, timeout=90)
                    assert instance.get_run_by_id(
                        run.run_id).status == PipelineRunStatus.SUCCESS
Пример #13
0
def test_k8s_executor_config_override(kubeconfig_file):
    # Construct a K8s run launcher in a fake k8s environment.
    mock_k8s_client_batch_api = mock.MagicMock()
    celery_k8s_run_launcher = CeleryK8sRunLauncher(
        instance_config_map="dagster-instance",
        postgres_password_secret="dagster-postgresql-secret",
        dagster_home="/opt/dagster/dagster_home",
        load_incluster_config=False,
        kubeconfig_file=kubeconfig_file,
        k8s_client_batch_api=mock_k8s_client_batch_api,
    )

    pipeline_name = "demo_pipeline"

    with instance_for_test() as instance:
        with get_test_project_workspace_and_external_pipeline(
                instance, pipeline_name,
                "my_image:tag") as (workspace, external_pipeline):

            # Launch the run in a fake Dagster instance.
            celery_k8s_run_launcher.register_instance(instance)

            # Launch without custom job_image
            run = create_run_for_test(
                instance,
                pipeline_name=pipeline_name,
                run_config={"execution": {
                    "celery-k8s": {}
                }},
                external_pipeline_origin=external_pipeline.get_external_origin(
                ),
                pipeline_code_origin=external_pipeline.get_python_origin(),
            )
            celery_k8s_run_launcher.launch_run(LaunchRunContext(
                run, workspace))

            updated_run = instance.get_run_by_id(run.run_id)
            assert updated_run.tags[DOCKER_IMAGE_TAG] == "my_image:tag"

            # Launch with custom job_image
            run = create_run_for_test(
                instance,
                pipeline_name=pipeline_name,
                run_config={
                    "execution": {
                        "celery-k8s": {
                            "config": {
                                "job_image": "fake-image-name"
                            }
                        }
                    }
                },
                external_pipeline_origin=external_pipeline.get_external_origin(
                ),
                pipeline_code_origin=external_pipeline.get_python_origin(),
            )
            celery_k8s_run_launcher.launch_run(LaunchRunContext(
                run, workspace))

            updated_run = instance.get_run_by_id(run.run_id)
            assert updated_run.tags[DOCKER_IMAGE_TAG] == "fake-image-name"

        # Check that user defined k8s config was passed down to the k8s job.
        mock_method_calls = mock_k8s_client_batch_api.method_calls
        assert len(mock_method_calls) > 0

        _, _args, kwargs = mock_method_calls[0]
        assert kwargs["body"].spec.template.spec.containers[
            0].image == "my_image:tag"

        _, _args, kwargs = mock_method_calls[1]
        assert kwargs["body"].spec.template.spec.containers[
            0].image == "fake-image-name"
Пример #14
0
def test_execute_on_celery_k8s_retry_pipeline(  # pylint: disable=redefined-outer-name
        dagster_docker_image, dagster_instance, helm_namespace):
    run_config = merge_dicts(
        merge_yamls([
            os.path.join(get_test_project_environments_path(), "env_s3.yaml")
        ]),
        get_celery_engine_config(dagster_docker_image=dagster_docker_image,
                                 job_namespace=helm_namespace),
    )

    pipeline_name = "retry_pipeline"
    with get_test_project_workspace_and_external_pipeline(
            dagster_instance, pipeline_name) as (
                workspace,
                external_pipeline,
            ):
        reoriginated_pipeline = ReOriginatedExternalPipelineForTest(
            external_pipeline)

        run = create_run_for_test(
            dagster_instance,
            pipeline_name=pipeline_name,
            run_config=run_config,
            mode="default",
            external_pipeline_origin=reoriginated_pipeline.get_external_origin(
            ),
            pipeline_code_origin=reoriginated_pipeline.get_python_origin(),
        )

        dagster_instance.launch_run(run.run_id, workspace)

        result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" %
                                               run.run_id,
                                               namespace=helm_namespace)

        assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(
            result)

        stats = dagster_instance.get_run_stats(run.run_id)
        assert stats.steps_succeeded == 1

        assert DagsterEventType.STEP_START in [
            event.dagster_event.event_type
            for event in dagster_instance.all_logs(run.run_id)
            if event.is_dagster_event
        ]

        assert DagsterEventType.STEP_UP_FOR_RETRY in [
            event.dagster_event.event_type
            for event in dagster_instance.all_logs(run.run_id)
            if event.is_dagster_event
        ]

        assert DagsterEventType.STEP_RESTARTED in [
            event.dagster_event.event_type
            for event in dagster_instance.all_logs(run.run_id)
            if event.is_dagster_event
        ]

        assert DagsterEventType.STEP_SUCCESS in [
            event.dagster_event.event_type
            for event in dagster_instance.all_logs(run.run_id)
            if event.is_dagster_event
        ]
Пример #15
0
def test_container_context_on_pipeline():
    docker_image = get_test_project_docker_image()

    launcher_config = {}

    if IS_BUILDKITE:
        launcher_config["registry"] = get_buildkite_registry_config()
    else:
        find_local_test_image(docker_image)

    executor_config = {
        "execution": {
            "docker": {
                "config": {}
            }
        },
    }

    run_config = merge_dicts(
        merge_yamls([
            os.path.join(get_test_project_environments_path(), "env.yaml"),
            os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
        ]),
        executor_config,
    )

    with docker_postgres_instance(
            overrides={
                "run_launcher": {
                    "class": "DockerRunLauncher",
                    "module": "dagster_docker",
                    "config": launcher_config,
                }
            }) as instance:
        recon_pipeline = get_test_project_recon_pipeline(
            "demo_pipeline_docker",
            docker_image,
            container_context={
                "docker": {
                    "env_vars": [
                        "AWS_ACCESS_KEY_ID",
                        "AWS_SECRET_ACCESS_KEY",
                    ],
                    "networks": ["container:test-postgres-db-docker"],
                    "container_kwargs": {
                        "auto_remove": True,
                        "volumes":
                        ["/var/run/docker.sock:/var/run/docker.sock"],
                    },
                }
            },
        )
        with get_test_project_workspace_and_external_pipeline(
                instance, "demo_pipeline_docker",
                container_image=docker_image) as (
                    workspace,
                    orig_pipeline,
                ):
            external_pipeline = ReOriginatedExternalPipelineForTest(
                orig_pipeline, container_image=docker_image)

            run = instance.create_run_for_pipeline(
                pipeline_def=recon_pipeline.get_definition(),
                run_config=run_config,
                external_pipeline_origin=external_pipeline.get_external_origin(
                ),
                pipeline_code_origin=recon_pipeline.get_python_origin(),
            )

            instance.launch_run(run.run_id, workspace)

            poll_for_finished_run(instance, run.run_id, timeout=60)

            for log in instance.all_logs(run.run_id):
                print(log)  # pylint: disable=print-call

            assert instance.get_run_by_id(
                run.run_id).status == PipelineRunStatus.SUCCESS
Пример #16
0
def test_execute_on_celery_k8s_with_hard_failure(  # pylint: disable=redefined-outer-name
        dagster_docker_image, dagster_instance,
        set_dagster_k8s_pipeline_run_namespace_env):
    run_config = merge_dicts(
        merge_dicts(
            merge_yamls([
                os.path.join(get_test_project_environments_path(),
                             "env_s3.yaml"),
            ]),
            get_celery_engine_config(
                dagster_docker_image=dagster_docker_image,
                job_namespace={"env": "DAGSTER_K8S_PIPELINE_RUN_NAMESPACE"},
            ),
        ),
        {"solids": {
            "hard_fail_or_0": {
                "config": {
                    "fail": True
                }
            }
        }},
    )

    pipeline_name = "hard_failer"
    with get_test_project_workspace_and_external_pipeline(
            dagster_instance, pipeline_name) as (
                workspace,
                external_pipeline,
            ):
        reoriginated_pipeline = ReOriginatedExternalPipelineForTest(
            external_pipeline)
        run = create_run_for_test(
            dagster_instance,
            pipeline_name=pipeline_name,
            run_config=run_config,
            mode="default",
            external_pipeline_origin=reoriginated_pipeline.get_external_origin(
            ),
            pipeline_code_origin=reoriginated_pipeline.get_python_origin(),
        )

        dagster_instance.launch_run(run.run_id, workspace)
        assert isinstance(dagster_instance.run_launcher, CeleryK8sRunLauncher)

        # Check that pipeline run is marked as failed
        pipeline_run_status_failure = False
        start_time = datetime.datetime.now()
        timeout = datetime.timedelta(0, 120)

        while datetime.datetime.now() < start_time + timeout:
            pipeline_run = dagster_instance.get_run_by_id(run.run_id)
            if pipeline_run.status == PipelineRunStatus.FAILURE:
                pipeline_run_status_failure = True
                break
            time.sleep(5)
        assert pipeline_run_status_failure

        # Check for step failure for hard_fail_or_0.compute
        start_time = datetime.datetime.now()
        step_failure_found = False
        while datetime.datetime.now() < start_time + timeout:
            event_records = dagster_instance.all_logs(run.run_id)
            for event_record in event_records:
                if event_record.dagster_event:
                    if (event_record.dagster_event.event_type
                            == DagsterEventType.STEP_FAILURE
                            and event_record.dagster_event.step_key
                            == "hard_fail_or_0"):
                        step_failure_found = True
                        break
            time.sleep(5)
        assert step_failure_found
Пример #17
0
def _test_termination(dagster_instance, run_config):
    pipeline_name = "resource_pipeline"
    with get_test_project_workspace_and_external_pipeline(
            dagster_instance, pipeline_name) as (
                workspace,
                external_pipeline,
            ):
        reoriginated_pipeline = ReOriginatedExternalPipelineForTest(
            external_pipeline)
        run = create_run_for_test(
            dagster_instance,
            pipeline_name=pipeline_name,
            run_config=run_config,
            mode="default",
            external_pipeline_origin=reoriginated_pipeline.get_external_origin(
            ),
            pipeline_code_origin=reoriginated_pipeline.get_python_origin(),
        )

        dagster_instance.launch_run(run.run_id, workspace)
        assert isinstance(dagster_instance.run_launcher, CeleryK8sRunLauncher)

        # Wait for pipeline run to start
        timeout = datetime.timedelta(0, 120)
        start_time = datetime.datetime.now()
        can_terminate = False
        while datetime.datetime.now() < start_time + timeout:
            if dagster_instance.run_launcher.can_terminate(run_id=run.run_id):
                can_terminate = True
                break
            time.sleep(5)
        assert can_terminate

        # Wait for step to start
        step_start_found = False
        start_time = datetime.datetime.now()
        while datetime.datetime.now() < start_time + timeout:
            event_records = dagster_instance.all_logs(run.run_id)
            for event_record in event_records:
                if (event_record.dagster_event
                        and event_record.dagster_event.event_type
                        == DagsterEventType.STEP_START):
                    step_start_found = True
                    break

            if step_start_found:
                break

            time.sleep(5)
        assert step_start_found

        # Terminate run
        assert dagster_instance.run_launcher.can_terminate(run_id=run.run_id)
        assert dagster_instance.run_launcher.terminate(run_id=run.run_id)

        # Check that pipeline run is marked as canceled
        pipeline_run_status_canceled = False
        start_time = datetime.datetime.now()
        while datetime.datetime.now() < start_time + timeout:
            pipeline_run = dagster_instance.get_run_by_id(run.run_id)
            if pipeline_run.status == PipelineRunStatus.CANCELED:
                pipeline_run_status_canceled = True
                break
            time.sleep(5)
        assert pipeline_run_status_canceled

        # Check that terminate cannot be called again
        assert not dagster_instance.run_launcher.can_terminate(
            run_id=run.run_id)
        assert not dagster_instance.run_launcher.terminate(run_id=run.run_id)

        # Check for step failure and resource tear down
        expected_events_found = False
        start_time = datetime.datetime.now()
        while datetime.datetime.now() < start_time + timeout:
            step_failures_count = 0
            resource_tear_down_count = 0
            resource_init_count = 0
            termination_request_count = 0
            termination_success_count = 0
            event_records = dagster_instance.all_logs(run.run_id)
            for event_record in event_records:
                if event_record.dagster_event:
                    if event_record.dagster_event.event_type == DagsterEventType.STEP_FAILURE:
                        step_failures_count += 1
                    elif (event_record.dagster_event.event_type ==
                          DagsterEventType.PIPELINE_CANCELING):
                        termination_request_count += 1
                    elif (event_record.dagster_event.event_type ==
                          DagsterEventType.PIPELINE_CANCELED):
                        termination_success_count += 1
                elif event_record.message:
                    if "initializing s3_resource_with_context_manager" in event_record.message:
                        resource_init_count += 1
                    if "tearing down s3_resource_with_context_manager" in event_record.message:
                        resource_tear_down_count += 1
            if (step_failures_count == 1 and resource_init_count == 1
                    and resource_tear_down_count == 1
                    and termination_request_count == 1
                    and termination_success_count == 1):
                expected_events_found = True
                break
            time.sleep(5)
        assert expected_events_found

        s3 = boto3.resource("s3",
                            region_name="us-west-1",
                            use_ssl=True,
                            endpoint_url=None).meta.client
        bucket = "dagster-scratch-80542c2"
        key = "resource_termination_test/{}".format(run.run_id)
        assert s3.get_object(Bucket=bucket, Key=key)