Пример #1
0
def _launch_run_and_wait_for_resume(
    dagit_url_for_k8s_run_launcher,
    run_config,
    instance,
    namespace,
    pipeline_name="slow_pipeline",
):

    try:
        run_id = launch_run_over_graphql(
            dagit_url_for_k8s_run_launcher,
            run_config=run_config,
            pipeline_name=pipeline_name,
            mode="k8s",
        )

        start_time = time.time()
        while True:
            assert time.time() - start_time < 60, "Timed out waiting for run to start"
            run = instance.get_run_by_id(run_id)
            if run.status == PipelineRunStatus.STARTED:
                break
            assert run.status == PipelineRunStatus.STARTING
            time.sleep(1)

        time.sleep(5)
        assert delete_job(get_job_name_from_run_id(run_id), namespace)

        poll_for_finished_run(instance, run_id, timeout=120)
        assert instance.get_run_by_id(run_id).status == PipelineRunStatus.SUCCESS
    finally:
        log_run_events(instance, run_id)
Пример #2
0
def test_execute_on_celery_k8s_default(  # pylint: disable=redefined-outer-name
    dagster_docker_image,
    dagster_instance,
    helm_namespace,
    dagit_url,
):
    run_config = merge_dicts(
        merge_yamls([
            os.path.join(get_test_project_environments_path(), "env.yaml"),
            os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
        ]),
        get_celery_engine_config(dagster_docker_image=dagster_docker_image,
                                 job_namespace=helm_namespace),
    )

    run_id = launch_run_over_graphql(dagit_url,
                                     run_config=run_config,
                                     pipeline_name="demo_pipeline_celery")

    result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" % run_id,
                                           namespace=helm_namespace)

    assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result)

    updated_run = dagster_instance.get_run_by_id(run_id)
    assert updated_run.tags[DOCKER_IMAGE_TAG] == dagster_docker_image
Пример #3
0
def test_execute_queued_run_on_celery_k8s(  # pylint: disable=redefined-outer-name
    dagster_docker_image,
    dagster_instance_for_daemon,
    helm_namespace_for_daemon,
    dagit_url_for_daemon,
):
    run_config = merge_dicts(
        merge_yamls([
            os.path.join(get_test_project_environments_path(), "env.yaml"),
            os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
        ]),
        get_celery_engine_config(
            dagster_docker_image=dagster_docker_image,
            job_namespace=helm_namespace_for_daemon,
        ),
    )

    run_id = launch_run_over_graphql(dagit_url_for_daemon,
                                     run_config=run_config,
                                     pipeline_name="demo_pipeline_celery")

    wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" % run_id,
                                  namespace=helm_namespace_for_daemon)

    logs = dagster_instance_for_daemon.all_logs(run_id)
    assert_events_in_order(
        logs,
        [
            "PIPELINE_ENQUEUED", "PIPELINE_DEQUEUED", "PIPELINE_STARTING",
            "PIPELINE_SUCCESS"
        ],
    )
Пример #4
0
def _launch_executor_run(
    dagit_url,
    run_config,
    dagster_instance_for_k8s_run_launcher,
    helm_namespace_for_k8s_run_launcher,
    pipeline_name="demo_k8s_executor_pipeline",
    num_steps=2,
    mode="default",
):
    run_id = launch_run_over_graphql(dagit_url,
                                     run_config=run_config,
                                     pipeline_name=pipeline_name,
                                     mode=mode)

    result = wait_for_job_and_get_raw_logs(
        job_name="dagster-run-%s" % run_id,
        namespace=helm_namespace_for_k8s_run_launcher)

    assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result)

    updated_run = dagster_instance_for_k8s_run_launcher.get_run_by_id(run_id)
    assert updated_run.tags[DOCKER_IMAGE_TAG] == get_test_project_docker_image(
    )

    events = dagster_instance_for_k8s_run_launcher.all_logs(run_id)
    assert len(_get_step_execution_events(events)) == num_steps

    return run_id
Пример #5
0
def test_run_monitoring_fails_on_interrupt(  # pylint: disable=redefined-outer-name
    dagster_docker_image, dagster_instance, helm_namespace, dagit_url
):
    run_config = merge_dicts(
        merge_yamls(
            [
                os.path.join(get_test_project_environments_path(), "env.yaml"),
                os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
            ]
        ),
        get_celery_job_engine_config(
            dagster_docker_image=dagster_docker_image, job_namespace=helm_namespace
        ),
    )

    pipeline_name = "demo_job_celery"

    try:
        run_id = launch_run_over_graphql(
            dagit_url, run_config=run_config, pipeline_name=pipeline_name
        )
        start_time = time.time()
        while time.time() - start_time < 60:
            run = dagster_instance.get_run_by_id(run_id)
            if run.status == PipelineRunStatus.STARTED:
                break
            assert run.status == PipelineRunStatus.STARTING
            time.sleep(1)

        assert delete_job(get_job_name_from_run_id(run_id), helm_namespace)
        poll_for_finished_run(dagster_instance, run.run_id, timeout=120)
        assert dagster_instance.get_run_by_id(run_id).status == PipelineRunStatus.FAILURE
    finally:
        log_run_events(dagster_instance, run_id)
Пример #6
0
def test_k8s_run_launcher_default(
    dagster_instance_for_k8s_run_launcher,
    helm_namespace_for_k8s_run_launcher,
    dagit_url_for_k8s_run_launcher,
):
    pods = DagsterKubernetesClient.production_client(
    ).core_api.list_namespaced_pod(
        namespace=helm_namespace_for_k8s_run_launcher)
    celery_pod_names = [
        p.metadata.name for p in pods.items
        if "celery-workers" in p.metadata.name
    ]
    check.invariant(not celery_pod_names)

    run_config = load_yaml_from_path(
        os.path.join(get_test_project_environments_path(), "env.yaml"))
    pipeline_name = "demo_pipeline"

    run_id = launch_run_over_graphql(dagit_url_for_k8s_run_launcher,
                                     run_config=run_config,
                                     pipeline_name=pipeline_name)

    result = wait_for_job_and_get_raw_logs(
        job_name="dagster-run-%s" % run_id,
        namespace=helm_namespace_for_k8s_run_launcher)

    assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result)

    updated_run = dagster_instance_for_k8s_run_launcher.get_run_by_id(run_id)
    assert updated_run.tags[DOCKER_IMAGE_TAG] == get_test_project_docker_image(
    )
Пример #7
0
def test_execute_on_celery_k8s_job_api_with_legacy_configmap_set(  # pylint: disable=redefined-outer-name
        dagster_docker_image, dagster_instance, helm_namespace, dagit_url):
    # Originally, jobs needed to include "dagster-pipeline-env" to pick up needed config when
    # using the helm chart - it's no longer needed, but verify that nothing breaks if it's included
    run_config = merge_dicts(
        merge_yamls([
            os.path.join(get_test_project_environments_path(), "env.yaml"),
            os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
        ]),
        get_celery_job_engine_config(
            dagster_docker_image=dagster_docker_image,
            job_namespace=helm_namespace,
            include_dagster_pipeline_env=True,
        ),
    )

    run_id = launch_run_over_graphql(dagit_url,
                                     run_config=run_config,
                                     pipeline_name="demo_job_celery")

    result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" % run_id,
                                           namespace=helm_namespace)

    assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result)

    updated_run = dagster_instance.get_run_by_id(run_id)
    assert updated_run.tags[DOCKER_IMAGE_TAG] == dagster_docker_image
Пример #8
0
def test_failing_k8s_run_launcher(
    dagster_instance_for_k8s_run_launcher,
    helm_namespace_for_k8s_run_launcher,
    dagit_url_for_k8s_run_launcher,
):
    run_config = load_yaml_from_path(
        os.path.join(get_test_project_environments_path(), "env.yaml"))

    pipeline_name = "always_fail_pipeline"

    run_id = launch_run_over_graphql(dagit_url_for_k8s_run_launcher,
                                     run_config=run_config,
                                     pipeline_name=pipeline_name)

    result = wait_for_job_and_get_raw_logs(
        job_name="dagster-run-%s" % run_id,
        namespace=helm_namespace_for_k8s_run_launcher)

    assert "PIPELINE_SUCCESS" not in result, "no match, result: {}".format(
        result)

    event_records = dagster_instance_for_k8s_run_launcher.all_logs(run_id)

    assert any(
        ["Op Exception Message" in str(event) for event in event_records])
Пример #9
0
def test_k8s_run_launcher_terminate(
    dagster_instance_for_k8s_run_launcher,
    helm_namespace_for_k8s_run_launcher,
    dagster_docker_image,
    dagit_url_for_k8s_run_launcher,
):
    pipeline_name = "slow_pipeline"

    run_config = merge_dicts(
        load_yaml_from_path(os.path.join(get_test_project_environments_path(), "env_s3.yaml")),
        {
            "execution": {
                "k8s": {
                    "config": {
                        "job_namespace": helm_namespace_for_k8s_run_launcher,
                        "job_image": dagster_docker_image,
                        "image_pull_policy": image_pull_policy(),
                    }
                }
            },
        },
    )

    run_id = launch_run_over_graphql(
        dagit_url_for_k8s_run_launcher,
        run_config=run_config,
        pipeline_name=pipeline_name,
        mode="k8s",
    )

    wait_for_job(job_name="dagster-run-%s" % run_id, namespace=helm_namespace_for_k8s_run_launcher)
    timeout = datetime.timedelta(0, 30)
    start_time = datetime.datetime.now()
    while True:
        assert datetime.datetime.now() < start_time + timeout, "Timed out waiting for can_terminate"
        if can_terminate_run_over_graphql(dagit_url_for_k8s_run_launcher, run_id):
            break
        time.sleep(5)

    terminate_run_over_graphql(dagit_url_for_k8s_run_launcher, run_id=run_id)

    start_time = datetime.datetime.now()
    pipeline_run = None
    while True:
        assert datetime.datetime.now() < start_time + timeout, "Timed out waiting for termination"
        pipeline_run = dagster_instance_for_k8s_run_launcher.get_run_by_id(run_id)
        if pipeline_run.status == PipelineRunStatus.CANCELED:
            break

        time.sleep(5)

    # useful to have logs here, because the worker pods get deleted
    print(dagster_instance_for_k8s_run_launcher.all_logs(run_id))  # pylint: disable=print-call

    assert pipeline_run.status == PipelineRunStatus.CANCELED

    assert not can_terminate_run_over_graphql(dagit_url_for_k8s_run_launcher, run_id)
Пример #10
0
def test_memoization_on_celery_k8s(  # pylint: disable=redefined-outer-name
        dagster_docker_image, dagster_instance, helm_namespace, dagit_url):
    ephemeral_prefix = str(uuid.uuid4())
    run_config = deep_merge_dicts(
        merge_yamls([
            os.path.join(get_test_project_environments_path(), "env_s3.yaml")
        ]),
        get_celery_engine_config(dagster_docker_image=dagster_docker_image,
                                 job_namespace=helm_namespace),
    )
    run_config = deep_merge_dicts(
        run_config,
        {
            "resources": {
                "io_manager": {
                    "config": {
                        "s3_prefix": ephemeral_prefix
                    }
                }
            }
        },
    )

    try:

        run_ids = []
        for _ in range(2):
            run_id = launch_run_over_graphql(
                dagit_url,
                run_config=run_config,
                pipeline_name="memoization_pipeline",
                mode="celery",
            )

            result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" %
                                                   run_id,
                                                   namespace=helm_namespace)

            assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(
                result)

            run_ids.append(run_id)

        unmemoized_run_id = run_ids[0]
        step_events = _get_step_events(
            dagster_instance.all_logs(unmemoized_run_id))
        assert len(step_events) == 4

        memoized_run_id = run_ids[1]
        step_events = _get_step_events(
            dagster_instance.all_logs(memoized_run_id))
        assert len(step_events) == 0

    finally:
        cleanup_memoized_results(define_memoization_pipeline(), "celery",
                                 dagster_instance, run_config)
Пример #11
0
def test_execute_on_k8s_retry_pipeline(  # pylint: disable=redefined-outer-name
    dagster_instance_for_k8s_run_launcher,
    helm_namespace_for_k8s_run_launcher,
    dagster_docker_image,
    dagit_url_for_k8s_run_launcher,
):
    run_config = merge_dicts(
        load_yaml_from_path(os.path.join(get_test_project_environments_path(), "env_s3.yaml")),
        {
            "execution": {
                "k8s": {
                    "config": {
                        "job_namespace": helm_namespace_for_k8s_run_launcher,
                        "job_image": dagster_docker_image,
                        "image_pull_policy": image_pull_policy(),
                    }
                }
            },
        },
    )

    pipeline_name = "retry_pipeline"

    run_id = launch_run_over_graphql(
        dagit_url_for_k8s_run_launcher,
        run_config=run_config,
        pipeline_name=pipeline_name,
        mode="k8s",
    )

    result = wait_for_job_and_get_raw_logs(
        job_name="dagster-run-%s" % run_id, namespace=helm_namespace_for_k8s_run_launcher
    )

    assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result)

    stats = dagster_instance_for_k8s_run_launcher.get_run_stats(run_id)
    assert stats.steps_succeeded == 1

    all_logs = dagster_instance_for_k8s_run_launcher.all_logs(run_id)

    assert DagsterEventType.STEP_START in [
        event.dagster_event.event_type for event in all_logs if event.is_dagster_event
    ]

    assert DagsterEventType.STEP_UP_FOR_RETRY in [
        event.dagster_event.event_type for event in all_logs if event.is_dagster_event
    ]

    assert DagsterEventType.STEP_RESTARTED in [
        event.dagster_event.event_type for event in all_logs if event.is_dagster_event
    ]

    assert DagsterEventType.STEP_SUCCESS in [
        event.dagster_event.event_type for event in all_logs if event.is_dagster_event
    ]
Пример #12
0
def test_execute_on_celery_k8s_with_hard_failure(  # pylint: disable=redefined-outer-name
        dagster_docker_image, dagster_instance,
        set_dagster_k8s_pipeline_run_namespace_env, dagit_url):
    run_config = merge_dicts(
        merge_dicts(
            merge_yamls([
                os.path.join(get_test_project_environments_path(),
                             "env_s3.yaml"),
            ]),
            get_celery_engine_config(
                dagster_docker_image=dagster_docker_image,
                job_namespace={"env": "DAGSTER_K8S_PIPELINE_RUN_NAMESPACE"},
            ),
        ),
        {"solids": {
            "hard_fail_or_0": {
                "config": {
                    "fail": True
                }
            }
        }},
    )

    run_id = launch_run_over_graphql(dagit_url,
                                     run_config=run_config,
                                     pipeline_name="hard_failer")

    # Check that pipeline run is marked as failed
    pipeline_run_status_failure = False
    start_time = datetime.datetime.now()
    timeout = datetime.timedelta(0, 120)

    while datetime.datetime.now() < start_time + timeout:
        pipeline_run = dagster_instance.get_run_by_id(run_id)
        if pipeline_run.status == PipelineRunStatus.FAILURE:
            pipeline_run_status_failure = True
            break
        time.sleep(5)
    assert pipeline_run_status_failure

    # Check for step failure for hard_fail_or_0.compute
    start_time = datetime.datetime.now()
    step_failure_found = False
    while datetime.datetime.now() < start_time + timeout:
        event_records = dagster_instance.all_logs(run_id)
        for event_record in event_records:
            if event_record.dagster_event:
                if (event_record.dagster_event.event_type
                        == DagsterEventType.STEP_FAILURE
                        and event_record.dagster_event.step_key
                        == "hard_fail_or_0"):
                    step_failure_found = True
                    break
        time.sleep(5)
    assert step_failure_found
Пример #13
0
def test_k8s_executor_resource_requirements(
    dagster_instance_for_k8s_run_launcher,
    helm_namespace_for_k8s_run_launcher,
    dagster_docker_image,
    dagit_url_for_k8s_run_launcher,
):
    # sanity check that we have a K8sRunLauncher
    pods = DagsterKubernetesClient.production_client(
    ).core_api.list_namespaced_pod(
        namespace=helm_namespace_for_k8s_run_launcher)
    celery_pod_names = [
        p.metadata.name for p in pods.items
        if "celery-workers" in p.metadata.name
    ]
    check.invariant(not celery_pod_names)

    run_config = merge_dicts(
        load_yaml_from_path(
            os.path.join(get_test_project_environments_path(), "env_s3.yaml")),
        {
            "execution": {
                "k8s": {
                    "config": {
                        "job_namespace": helm_namespace_for_k8s_run_launcher,
                        "job_image": dagster_docker_image,
                        "image_pull_policy": image_pull_policy(),
                    }
                }
            },
        },
    )

    pipeline_name = "resources_limit_pipeline"

    run_id = launch_run_over_graphql(
        dagit_url_for_k8s_run_launcher,
        run_config=run_config,
        pipeline_name=pipeline_name,
        mode="k8s",
    )

    result = wait_for_job_and_get_raw_logs(
        job_name="dagster-run-%s" % run_id,
        namespace=helm_namespace_for_k8s_run_launcher)

    assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result)

    updated_run = dagster_instance_for_k8s_run_launcher.get_run_by_id(run_id)
    assert updated_run.tags[DOCKER_IMAGE_TAG] == get_test_project_docker_image(
    )
Пример #14
0
def test_k8s_run_launcher_image_from_origin(
    dagster_instance_for_k8s_run_launcher,
    helm_namespace_for_k8s_run_launcher,
    dagit_url_for_k8s_run_launcher,
):
    # Like the previous test, but the executor doesn't supply an image - it's pulled
    # from the origin on the run instead
    pods = DagsterKubernetesClient.production_client(
    ).core_api.list_namespaced_pod(
        namespace=helm_namespace_for_k8s_run_launcher)
    celery_pod_names = [
        p.metadata.name for p in pods.items
        if "celery-workers" in p.metadata.name
    ]
    check.invariant(not celery_pod_names)

    run_config = merge_dicts(
        load_yaml_from_path(
            os.path.join(get_test_project_environments_path(), "env.yaml")),
        load_yaml_from_path(
            os.path.join(get_test_project_environments_path(), "env_s3.yaml")),
        {
            "execution": {
                "k8s": {
                    "config": {
                        "job_namespace": helm_namespace_for_k8s_run_launcher,
                        "image_pull_policy": image_pull_policy(),
                    }
                }
            },
        },
    )

    pipeline_name = "demo_k8s_executor_pipeline"

    run_id = launch_run_over_graphql(dagit_url_for_k8s_run_launcher,
                                     run_config=run_config,
                                     pipeline_name=pipeline_name)

    result = wait_for_job_and_get_raw_logs(
        job_name="dagster-run-%s" % run_id,
        namespace=helm_namespace_for_k8s_run_launcher)

    assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result)

    updated_run = dagster_instance_for_k8s_run_launcher.get_run_by_id(run_id)
    assert updated_run.tags[DOCKER_IMAGE_TAG] == get_test_project_docker_image(
    )
Пример #15
0
def test_execute_on_celery_k8s_retry_pipeline(  # pylint: disable=redefined-outer-name
        dagster_docker_image, dagster_instance, helm_namespace, dagit_url):
    run_config = merge_dicts(
        merge_yamls([
            os.path.join(get_test_project_environments_path(), "env_s3.yaml")
        ]),
        get_celery_engine_config(dagster_docker_image=dagster_docker_image,
                                 job_namespace=helm_namespace),
    )

    run_id = launch_run_over_graphql(dagit_url,
                                     run_config=run_config,
                                     pipeline_name="retry_pipeline")

    result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" % run_id,
                                           namespace=helm_namespace)

    assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result)

    stats = dagster_instance.get_run_stats(run_id)
    assert stats.steps_succeeded == 1

    assert DagsterEventType.STEP_START in [
        event.dagster_event.event_type
        for event in dagster_instance.all_logs(run_id)
        if event.is_dagster_event
    ]

    assert DagsterEventType.STEP_UP_FOR_RETRY in [
        event.dagster_event.event_type
        for event in dagster_instance.all_logs(run_id)
        if event.is_dagster_event
    ]

    assert DagsterEventType.STEP_RESTARTED in [
        event.dagster_event.event_type
        for event in dagster_instance.all_logs(run_id)
        if event.is_dagster_event
    ]

    assert DagsterEventType.STEP_SUCCESS in [
        event.dagster_event.event_type
        for event in dagster_instance.all_logs(run_id)
        if event.is_dagster_event
    ]
Пример #16
0
def test_k8s_run_launcher_with_celery_executor_fails(
    dagster_docker_image,
    dagster_instance_for_k8s_run_launcher,
    helm_namespace_for_k8s_run_launcher,
    dagit_url_for_k8s_run_launcher,
):
    run_config = merge_dicts(
        merge_yamls([
            os.path.join(get_test_project_environments_path(), "env.yaml"),
            os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
        ]),
        get_celery_engine_config(
            dagster_docker_image=dagster_docker_image,
            job_namespace=helm_namespace_for_k8s_run_launcher,
        ),
    )

    pipeline_name = "demo_pipeline_celery"

    run_id = launch_run_over_graphql(dagit_url_for_k8s_run_launcher,
                                     run_config=run_config,
                                     pipeline_name=pipeline_name)

    timeout = datetime.timedelta(0, 120)

    start_time = datetime.datetime.now()

    while True:
        assert (datetime.datetime.now() <
                start_time + timeout), "Timed out waiting for pipeline failure"
        event_records = dagster_instance_for_k8s_run_launcher.all_logs(run_id)

        found_pipeline_failure = False
        for event_record in event_records:
            if event_record.dagster_event:
                if event_record.dagster_event.event_type == DagsterEventType.PIPELINE_FAILURE:
                    found_pipeline_failure = True

        if found_pipeline_failure:
            break

        time.sleep(5)

    assert (dagster_instance_for_k8s_run_launcher.get_run_by_id(run_id).status
            == PipelineRunStatus.FAILURE)
Пример #17
0
def test_execute_on_celery_k8s_with_resource_requirements(  # pylint: disable=redefined-outer-name
        dagster_docker_image, dagster_instance, helm_namespace, dagit_url):
    run_config = merge_dicts(
        merge_yamls([
            os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
        ]),
        get_celery_engine_config(dagster_docker_image=dagster_docker_image,
                                 job_namespace=helm_namespace),
    )

    run_id = launch_run_over_graphql(dagit_url,
                                     run_config=run_config,
                                     pipeline_name="resources_limit_pipeline")

    result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" % run_id,
                                           namespace=helm_namespace)

    assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result)
Пример #18
0
def test_k8s_run_launcher_terminate(
    dagster_instance_for_k8s_run_launcher,
    helm_namespace_for_k8s_run_launcher,
    dagit_url_for_k8s_run_launcher,
):
    pipeline_name = "slow_pipeline"

    run_config = load_yaml_from_path(
        os.path.join(get_test_project_environments_path(), "env_s3.yaml"))

    run_id = launch_run_over_graphql(dagit_url_for_k8s_run_launcher,
                                     run_config=run_config,
                                     pipeline_name=pipeline_name)

    wait_for_job(job_name="dagster-run-%s" % run_id,
                 namespace=helm_namespace_for_k8s_run_launcher)

    timeout = datetime.timedelta(0, 30)
    start_time = datetime.datetime.now()
    while True:
        assert datetime.datetime.now(
        ) < start_time + timeout, "Timed out waiting for can_terminate"
        if can_terminate_run_over_graphql(dagit_url_for_k8s_run_launcher,
                                          run_id):
            break
        time.sleep(5)

    terminate_run_over_graphql(dagit_url_for_k8s_run_launcher, run_id=run_id)

    start_time = datetime.datetime.now()
    pipeline_run = None
    while True:
        assert datetime.datetime.now(
        ) < start_time + timeout, "Timed out waiting for termination"
        pipeline_run = dagster_instance_for_k8s_run_launcher.get_run_by_id(
            run_id)
        if pipeline_run.status == PipelineRunStatus.CANCELED:
            break
        time.sleep(5)

    assert pipeline_run.status == PipelineRunStatus.CANCELED

    assert not can_terminate_run_over_graphql(dagit_url_for_k8s_run_launcher,
                                              run_id)
Пример #19
0
def test_volume_mounts(dagster_docker_image, dagster_instance, helm_namespace,
                       dagit_url):
    run_config = deep_merge_dicts(
        merge_yamls([
            os.path.join(get_test_project_environments_path(), "env_s3.yaml")
        ]),
        get_celery_engine_config(dagster_docker_image=dagster_docker_image,
                                 job_namespace=helm_namespace),
    )

    run_id = launch_run_over_graphql(
        dagit_url,
        run_config=run_config,
        pipeline_name="volume_mount_pipeline",
        mode="celery",
    )

    result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" % run_id,
                                           namespace=helm_namespace)

    assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result)
Пример #20
0
def test_k8s_run_launcher_secret_from_deployment(
    helm_namespace_for_k8s_run_launcher,
    dagit_url_for_k8s_run_launcher,
):
    # This run_config requires that WORD_FACTOR be set on both the user code deployment
    # and the run launcher. It will only work if secrets are propagated from the deployment
    # to the run launcher, since TEST_DEPLOYMENT_SECRET_NAME is only set on the user code
    # deployment but not on the run launcher config.
    run_config = load_yaml_from_path(
        os.path.join(get_test_project_environments_path(),
                     "env_config_from_secrets.yaml"))
    pipeline_name = "demo_pipeline"

    run_id = launch_run_over_graphql(dagit_url_for_k8s_run_launcher,
                                     run_config=run_config,
                                     pipeline_name=pipeline_name)

    result = wait_for_job_and_get_raw_logs(
        job_name="dagster-run-%s" % run_id,
        namespace=helm_namespace_for_k8s_run_launcher)

    assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result)
Пример #21
0
def test_execute_subset_on_celery_k8s(  # pylint: disable=redefined-outer-name
        dagster_docker_image, helm_namespace, dagit_url):
    run_config = merge_dicts(
        merge_yamls([
            os.path.join(get_test_project_environments_path(),
                         "env_subset.yaml"),
            os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
        ]),
        get_celery_engine_config(dagster_docker_image=dagster_docker_image,
                                 job_namespace=helm_namespace),
    )

    run_id = launch_run_over_graphql(
        dagit_url,
        run_config=run_config,
        pipeline_name="demo_pipeline_celery",
        solid_selection=["count_letters"],
    )

    result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" % run_id,
                                           namespace=helm_namespace)

    assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result)
Пример #22
0
def test_memoization_k8s_executor(
    dagster_instance_for_k8s_run_launcher,
    helm_namespace_for_k8s_run_launcher,
    dagster_docker_image,
    dagit_url_for_k8s_run_launcher,
):
    ephemeral_path = str(uuid.uuid4())
    run_config = deep_merge_dicts(
        load_yaml_from_path(
            os.path.join(get_test_project_environments_path(), "env_s3.yaml")),
        {
            "execution": {
                "k8s": {
                    "config": {
                        "job_namespace": helm_namespace_for_k8s_run_launcher,
                        "job_image": dagster_docker_image,
                        "image_pull_policy": image_pull_policy(),
                    }
                }
            },
        },
    )

    run_config = deep_merge_dicts(
        run_config,
        {
            "resources": {
                "io_manager": {
                    "config": {
                        "s3_prefix": ephemeral_path
                    }
                }
            }
        },
    )

    # wrap in try-catch to ensure that memoized results are always cleaned from s3 bucket
    try:
        pipeline_name = "memoization_pipeline"

        run_ids = []
        for _ in range(2):
            run_id = launch_run_over_graphql(
                dagit_url_for_k8s_run_launcher,
                run_config=run_config,
                pipeline_name=pipeline_name,
                mode="k8s",
            )

            result = wait_for_job_and_get_raw_logs(
                job_name="dagster-run-%s" % run_id,
                namespace=helm_namespace_for_k8s_run_launcher,
            )

            assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(
                result)

            run_ids.append(run_id)

        # We expect that first run should have to run the step, since it has not yet been
        # memoized.
        unmemoized_run_id = run_ids[0]
        events = dagster_instance_for_k8s_run_launcher.all_logs(
            unmemoized_run_id)
        assert len(_get_step_execution_events(events)) == 1

        # We expect that second run should not have to run the step, since it has been memoized.
        memoized_run_id = run_ids[1]
        events = dagster_instance_for_k8s_run_launcher.all_logs(
            memoized_run_id)
        assert len(_get_step_execution_events(events)) == 0
    finally:
        cleanup_memoized_results(define_memoization_pipeline(), "k8s",
                                 dagster_instance_for_k8s_run_launcher,
                                 run_config)
Пример #23
0
def _test_termination(dagit_url, dagster_instance, run_config):
    run_id = launch_run_over_graphql(dagit_url,
                                     run_config=run_config,
                                     pipeline_name="resource_pipeline")

    # Wait for pipeline run to start
    timeout = datetime.timedelta(0, 120)
    start_time = datetime.datetime.now()

    while True:
        assert datetime.datetime.now(
        ) < start_time + timeout, "Timed out waiting for can_terminate"
        pipeline_run = dagster_instance.get_run_by_id(run_id)
        if can_terminate_run_over_graphql(dagit_url, run_id):
            break
        time.sleep(5)

    # Wait for step to start
    step_start_found = False
    start_time = datetime.datetime.now()
    while datetime.datetime.now() < start_time + timeout:
        event_records = dagster_instance.all_logs(run_id)
        for event_record in event_records:
            if (event_record.dagster_event
                    and event_record.dagster_event.event_type
                    == DagsterEventType.STEP_START):
                step_start_found = True
                break

        if step_start_found:
            break

        time.sleep(5)
    assert step_start_found

    # Terminate run
    assert can_terminate_run_over_graphql(dagit_url, run_id=run_id)
    terminate_run_over_graphql(dagit_url, run_id=run_id)

    # Check that pipeline run is marked as canceled
    pipeline_run_status_canceled = False
    start_time = datetime.datetime.now()
    while datetime.datetime.now() < start_time + timeout:
        pipeline_run = dagster_instance.get_run_by_id(run_id)
        if pipeline_run.status == PipelineRunStatus.CANCELED:
            pipeline_run_status_canceled = True
            break
        time.sleep(5)
    assert pipeline_run_status_canceled

    # Check that terminate cannot be called again
    assert not can_terminate_run_over_graphql(dagit_url, run_id=run_id)

    # Check for step failure and resource tear down
    expected_events_found = False
    start_time = datetime.datetime.now()
    while datetime.datetime.now() < start_time + timeout:
        step_failures_count = 0
        resource_tear_down_count = 0
        resource_init_count = 0
        termination_request_count = 0
        termination_success_count = 0
        event_records = dagster_instance.all_logs(run_id)
        for event_record in event_records:
            if event_record.dagster_event:
                if event_record.dagster_event.event_type == DagsterEventType.STEP_FAILURE:
                    step_failures_count += 1
                elif event_record.dagster_event.event_type == DagsterEventType.PIPELINE_CANCELING:
                    termination_request_count += 1
                elif event_record.dagster_event.event_type == DagsterEventType.PIPELINE_CANCELED:
                    termination_success_count += 1
            elif event_record.message:
                if "initializing s3_resource_with_context_manager" in event_record.message:
                    resource_init_count += 1
                if "tearing down s3_resource_with_context_manager" in event_record.message:
                    resource_tear_down_count += 1
        if (step_failures_count == 1 and resource_init_count == 1
                and resource_tear_down_count == 1
                and termination_request_count == 1
                and termination_success_count == 1):
            expected_events_found = True
            break
        time.sleep(5)
    assert expected_events_found

    s3 = boto3.resource("s3",
                        region_name="us-west-1",
                        use_ssl=True,
                        endpoint_url=None).meta.client
    bucket = "dagster-scratch-80542c2"
    key = "resource_termination_test/{}".format(run_id)
    assert s3.get_object(Bucket=bucket, Key=key)