示例#1
0
def test_k8s_run_launcher_default(dagster_instance_for_k8s_run_launcher,
                                  helm_namespace_for_k8s_run_launcher):
    # sanity check that we have a K8sRunLauncher
    check.inst(dagster_instance_for_k8s_run_launcher.run_launcher,
               K8sRunLauncher)
    pods = DagsterKubernetesClient.production_client(
    ).core_api.list_namespaced_pod(
        namespace=helm_namespace_for_k8s_run_launcher)
    celery_pod_names = [
        p.metadata.name for p in pods.items
        if "celery-workers" in p.metadata.name
    ]
    check.invariant(not celery_pod_names)

    run_config = load_yaml_from_path(
        os.path.join(get_test_project_environments_path(), "env.yaml"))
    pipeline_name = "demo_pipeline"
    tags = {"key": "value"}

    with get_test_project_location_and_external_pipeline(pipeline_name) as (
            location,
            external_pipeline,
    ):
        run = create_run_for_test(
            dagster_instance_for_k8s_run_launcher,
            pipeline_name=pipeline_name,
            run_config=run_config,
            tags=tags,
            mode="default",
            pipeline_snapshot=external_pipeline.pipeline_snapshot,
            execution_plan_snapshot=location.get_external_execution_plan(
                external_pipeline, run_config, "default", None,
                None).execution_plan_snapshot,
        )
        dagster_instance_for_k8s_run_launcher.launch_run(
            run.run_id,
            ReOriginatedExternalPipelineForTest(external_pipeline),
        )

        result = wait_for_job_and_get_raw_logs(
            job_name="dagster-run-%s" % run.run_id,
            namespace=helm_namespace_for_k8s_run_launcher)

        assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(
            result)

        updated_run = dagster_instance_for_k8s_run_launcher.get_run_by_id(
            run.run_id)
        assert updated_run.tags[
            DOCKER_IMAGE_TAG] == get_test_project_docker_image()
示例#2
0
def test_k8s_executor_resource_requirements(
    dagster_instance_for_k8s_run_launcher,
    helm_namespace_for_k8s_run_launcher,
    dagster_docker_image,
    dagit_url_for_k8s_run_launcher,
):
    # sanity check that we have a K8sRunLauncher
    pods = DagsterKubernetesClient.production_client(
    ).core_api.list_namespaced_pod(
        namespace=helm_namespace_for_k8s_run_launcher)
    celery_pod_names = [
        p.metadata.name for p in pods.items
        if "celery-workers" in p.metadata.name
    ]
    check.invariant(not celery_pod_names)

    run_config = merge_dicts(
        load_yaml_from_path(
            os.path.join(get_test_project_environments_path(), "env_s3.yaml")),
        {
            "execution": {
                "k8s": {
                    "config": {
                        "job_namespace": helm_namespace_for_k8s_run_launcher,
                        "job_image": dagster_docker_image,
                        "image_pull_policy": image_pull_policy(),
                    }
                }
            },
        },
    )

    pipeline_name = "resources_limit_pipeline"

    run_id = launch_run_over_graphql(
        dagit_url_for_k8s_run_launcher,
        run_config=run_config,
        pipeline_name=pipeline_name,
        mode="k8s",
    )

    result = wait_for_job_and_get_raw_logs(
        job_name="dagster-run-%s" % run_id,
        namespace=helm_namespace_for_k8s_run_launcher)

    assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result)

    updated_run = dagster_instance_for_k8s_run_launcher.get_run_by_id(run_id)
    assert updated_run.tags[DOCKER_IMAGE_TAG] == get_test_project_docker_image(
    )
示例#3
0
def test_execute_subset_on_celery_k8s(  # pylint: disable=redefined-outer-name
        dagster_docker_image, helm_namespace, dagit_url):
    run_config = merge_dicts(
        merge_yamls([
            os.path.join(get_test_project_environments_path(),
                         "env_subset.yaml"),
            os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
        ]),
        get_celery_engine_config(dagster_docker_image=dagster_docker_image,
                                 job_namespace=helm_namespace),
    )

    run_id = launch_run_over_graphql(
        dagit_url,
        run_config=run_config,
        pipeline_name="demo_pipeline_celery",
        solid_selection=["count_letters"],
    )

    result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" % run_id,
                                           namespace=helm_namespace)

    assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result)
示例#4
0
def test_execute_on_celery_k8s_with_env_var_and_termination(  # pylint: disable=redefined-outer-name
        dagster_docker_image, dagster_instance,
        set_dagster_k8s_pipeline_run_namespace_env):
    run_config = merge_dicts(
        merge_yamls([
            os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
        ]),
        get_celery_engine_config(
            dagster_docker_image=dagster_docker_image,
            job_namespace={"env": "DAGSTER_K8S_PIPELINE_RUN_NAMESPACE"},
        ),
    )

    _test_termination(dagster_instance, run_config)
示例#5
0
def test_valid_job_format_with_backcompat_resources(run_launcher):
    docker_image = get_test_project_docker_image()

    run_config = load_yaml_from_path(
        os.path.join(get_test_project_environments_path(), "env.yaml"))
    pipeline_name = "demo_pipeline"
    run = PipelineRun(pipeline_name=pipeline_name, run_config=run_config)

    tags = validate_tags({
        K8S_RESOURCE_REQUIREMENTS_KEY: ({
            "requests": {
                "cpu": "250m",
                "memory": "64Mi"
            },
            "limits": {
                "cpu": "500m",
                "memory": "2560Mi"
            },
        })
    })
    user_defined_k8s_config = get_user_defined_k8s_config(tags)
    job_name = "dagster-run-%s" % run.run_id
    pod_name = "dagster-run-%s" % run.run_id
    job = construct_dagster_k8s_job(
        job_config=run_launcher.get_static_job_config(),
        args=["dagster", "api", "execute_run"],
        job_name=job_name,
        user_defined_k8s_config=user_defined_k8s_config,
        pod_name=pod_name,
        component="run_coordinator",
    )

    assert (yaml.dump(
        remove_none_recursively(job.to_dict()),
        default_flow_style=False).strip() == EXPECTED_JOB_SPEC.format(
            run_id=run.run_id,
            job_image=docker_image,
            image_pull_policy=image_pull_policy(),
            dagster_version=dagster_version,
            env_from=ENV_FROM,
            resources="""
        resources:
          limits:
            cpu: 500m
            memory: 2560Mi
          requests:
            cpu: 250m
            memory: 64Mi""",
        ).strip())
示例#6
0
def test_execute_on_celery_k8s_with_termination(  # pylint: disable=redefined-outer-name
    dagster_docker_image,
    dagster_instance,
    helm_namespace,
    dagit_url,
):
    run_config = merge_dicts(
        merge_yamls([
            os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
        ]),
        get_celery_engine_config(dagster_docker_image=dagster_docker_image,
                                 job_namespace=helm_namespace),
    )

    _test_termination(dagit_url, dagster_instance, run_config)
示例#7
0
def test_k8s_executor_get_config_from_run_launcher(
        dagster_instance_for_k8s_run_launcher,
        helm_namespace_for_k8s_run_launcher, dagster_docker_image):
    # Verify that if you do not specify executor config it is delegated by the run launcher
    run_config = merge_dicts(
        load_yaml_from_path(
            os.path.join(get_test_project_environments_path(), "env.yaml")),
        load_yaml_from_path(
            os.path.join(get_test_project_environments_path(), "env_s3.yaml")),
        {
            "execution": {
                "k8s": {
                    "config": {
                        "job_image": dagster_docker_image
                    }
                }
            },
        },
    )
    _launch_executor_run(
        run_config,
        dagster_instance_for_k8s_run_launcher,
        helm_namespace_for_k8s_run_launcher,
    )
示例#8
0
def test_execute_on_celery_k8s_image_from_origin(  # pylint: disable=redefined-outer-name
        dagster_docker_image, dagster_instance, helm_namespace, dagit_url):
    # Like the previous test, but the image is found from the pipeline origin
    # rather than the executor config
    run_config = merge_dicts(
        merge_yamls([
            os.path.join(get_test_project_environments_path(), "env.yaml"),
            os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
        ]),
        get_celery_engine_config(dagster_docker_image=None,
                                 job_namespace=helm_namespace),
    )

    run_id = launch_run_over_graphql(dagit_url,
                                     run_config=run_config,
                                     pipeline_name="demo_pipeline_celery")

    result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" % run_id,
                                           namespace=helm_namespace)

    assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result)

    updated_run = dagster_instance.get_run_by_id(run_id)
    assert updated_run.tags[DOCKER_IMAGE_TAG] == dagster_docker_image
示例#9
0
def test_skip_operator(
    dagster_airflow_docker_operator_pipeline, dagster_docker_image
):  # pylint: disable=redefined-outer-name
    pipeline_name = "optional_outputs"
    environments_path = get_test_project_environments_path()
    results = dagster_airflow_docker_operator_pipeline(
        pipeline_name=pipeline_name,
        recon_repo=ReconstructableRepository.for_module(
            "dagster_test.test_project.test_pipelines.repo", "define_demo_execution_repo",
        ),
        environment_yaml=[os.path.join(environments_path, "env_filesystem.yaml")],
        op_kwargs={"host_tmp_dir": "/tmp"},
        image=dagster_docker_image,
    )
    validate_skip_pipeline_execution(results)
示例#10
0
def test_execute_subset_on_celery_k8s(  # pylint: disable=redefined-outer-name
    dagster_docker_image, dagster_instance, helm_namespace
):
    run_config = merge_dicts(
        merge_yamls(
            [
                os.path.join(get_test_project_environments_path(), "env_subset.yaml"),
                os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
            ]
        ),
        get_celery_engine_config(
            dagster_docker_image=dagster_docker_image, job_namespace=helm_namespace
        ),
    )

    pipeline_name = "demo_pipeline_celery"
    run = create_run_for_test(
        dagster_instance,
        pipeline_name=pipeline_name,
        run_config=run_config,
        mode="default",
        solids_to_execute={"count_letters"},
    )

    with get_test_project_external_pipeline(pipeline_name) as external_pipeline:

        dagster_instance.launch_run(
            run.run_id,
            ReOriginatedExternalPipelineForTest(external_pipeline),
        )

        result = wait_for_job_and_get_raw_logs(
            job_name="dagster-run-%s" % run.run_id, namespace=helm_namespace
        )

        assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result)
示例#11
0
def test_fs_storage_no_explicit_base_dir(
    dagster_airflow_python_operator_pipeline,
):  # pylint: disable=redefined-outer-name
    pipeline_name = "demo_pipeline"
    environments_path = get_test_project_environments_path()
    results = dagster_airflow_python_operator_pipeline(
        pipeline_name=pipeline_name,
        recon_repo=ReconstructableRepository.for_module(
            "dagster_test.test_project.test_pipelines.repo", pipeline_name
        ),
        environment_yaml=[
            os.path.join(environments_path, "env.yaml"),
        ],
    )
    validate_pipeline_execution(results)
示例#12
0
def test_s3_storage(dagster_airflow_docker_operator_pipeline,
                    dagster_docker_image):  # pylint: disable=redefined-outer-name
    pipeline_name = "demo_pipeline_s3"
    environments_path = get_test_project_environments_path()
    results = dagster_airflow_docker_operator_pipeline(
        pipeline_name=pipeline_name,
        recon_repo=ReconstructableRepository.for_module(
            "dagster_test.test_project.test_pipelines.repo",
            "define_demo_execution_repo",
        ),
        environment_yaml=[
            os.path.join(environments_path, "env.yaml"),
            os.path.join(environments_path, "env_s3.yaml"),
        ],
        image=dagster_docker_image,
    )
    validate_pipeline_execution(results)
示例#13
0
def test_execute_on_celery_k8s_retry_pipeline(  # pylint: disable=redefined-outer-name
        dagster_docker_image, dagster_instance, helm_namespace, dagit_url):
    run_config = merge_dicts(
        merge_yamls([
            os.path.join(get_test_project_environments_path(), "env_s3.yaml")
        ]),
        get_celery_engine_config(dagster_docker_image=dagster_docker_image,
                                 job_namespace=helm_namespace),
    )

    run_id = launch_run_over_graphql(dagit_url,
                                     run_config=run_config,
                                     pipeline_name="retry_pipeline")

    result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" % run_id,
                                           namespace=helm_namespace)

    assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result)

    stats = dagster_instance.get_run_stats(run_id)
    assert stats.steps_succeeded == 1

    assert DagsterEventType.STEP_START in [
        event.dagster_event.event_type
        for event in dagster_instance.all_logs(run_id)
        if event.is_dagster_event
    ]

    assert DagsterEventType.STEP_UP_FOR_RETRY in [
        event.dagster_event.event_type
        for event in dagster_instance.all_logs(run_id)
        if event.is_dagster_event
    ]

    assert DagsterEventType.STEP_RESTARTED in [
        event.dagster_event.event_type
        for event in dagster_instance.all_logs(run_id)
        if event.is_dagster_event
    ]

    assert DagsterEventType.STEP_SUCCESS in [
        event.dagster_event.event_type
        for event in dagster_instance.all_logs(run_id)
        if event.is_dagster_event
    ]
示例#14
0
def test_error_dag_python_job():
    job_name = "demo_error_job"
    recon_repo = ReconstructableRepository.for_module(
        "dagster_test.test_project.test_pipelines.repo", job_name
    )
    environments_path = get_test_project_environments_path()
    environment_yaml = [
        os.path.join(environments_path, "env_filesystem.yaml"),
    ]
    run_config = load_yaml_from_glob_list(environment_yaml)
    execution_date = timezone.utcnow()

    dag, tasks = make_airflow_dag_for_recon_repo(recon_repo, job_name, run_config)

    with pytest.raises(AirflowException) as exc_info:
        execute_tasks_in_dag(dag, tasks, run_id=make_new_run_id(), execution_date=execution_date)

    assert "Exception: Unusual error" in str(exc_info.value)
示例#15
0
def test_execute_on_celery_k8s_with_resource_requirements(  # pylint: disable=redefined-outer-name
        dagster_docker_image, dagster_instance, helm_namespace, dagit_url):
    run_config = merge_dicts(
        merge_yamls([
            os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
        ]),
        get_celery_engine_config(dagster_docker_image=dagster_docker_image,
                                 job_namespace=helm_namespace),
    )

    run_id = launch_run_over_graphql(dagit_url,
                                     run_config=run_config,
                                     pipeline_name="resources_limit_pipeline")

    result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" % run_id,
                                           namespace=helm_namespace)

    assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result)
示例#16
0
def test_k8s_run_launcher_terminate(
    dagster_instance_for_k8s_run_launcher,
    helm_namespace_for_k8s_run_launcher,
    dagit_url_for_k8s_run_launcher,
):
    pipeline_name = "slow_pipeline"

    run_config = load_yaml_from_path(
        os.path.join(get_test_project_environments_path(), "env_s3.yaml"))

    run_id = launch_run_over_graphql(dagit_url_for_k8s_run_launcher,
                                     run_config=run_config,
                                     pipeline_name=pipeline_name)

    wait_for_job(job_name="dagster-run-%s" % run_id,
                 namespace=helm_namespace_for_k8s_run_launcher)

    timeout = datetime.timedelta(0, 30)
    start_time = datetime.datetime.now()
    while True:
        assert datetime.datetime.now(
        ) < start_time + timeout, "Timed out waiting for can_terminate"
        if can_terminate_run_over_graphql(dagit_url_for_k8s_run_launcher,
                                          run_id):
            break
        time.sleep(5)

    terminate_run_over_graphql(dagit_url_for_k8s_run_launcher, run_id=run_id)

    start_time = datetime.datetime.now()
    pipeline_run = None
    while True:
        assert datetime.datetime.now(
        ) < start_time + timeout, "Timed out waiting for termination"
        pipeline_run = dagster_instance_for_k8s_run_launcher.get_run_by_id(
            run_id)
        if pipeline_run.status == PipelineRunStatus.CANCELED:
            break
        time.sleep(5)

    assert pipeline_run.status == PipelineRunStatus.CANCELED

    assert not can_terminate_run_over_graphql(dagit_url_for_k8s_run_launcher,
                                              run_id)
示例#17
0
def test_volume_mounts(dagster_docker_image, dagster_instance, helm_namespace,
                       dagit_url):
    run_config = deep_merge_dicts(
        merge_yamls([
            os.path.join(get_test_project_environments_path(), "env_s3.yaml")
        ]),
        get_celery_engine_config(dagster_docker_image=dagster_docker_image,
                                 job_namespace=helm_namespace),
    )

    run_id = launch_run_over_graphql(
        dagit_url,
        run_config=run_config,
        pipeline_name="volume_mount_pipeline",
        mode="celery",
    )

    result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" % run_id,
                                           namespace=helm_namespace)

    assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result)
示例#18
0
def test_k8s_run_launcher(
    dagster_instance_for_k8s_run_launcher, helm_namespace_for_k8s_run_launcher
):
    run_config = load_yaml_from_path(os.path.join(get_test_project_environments_path(), "env.yaml"))
    pipeline_name = "demo_pipeline"
    run = create_run_for_test(
        dagster_instance_for_k8s_run_launcher,
        pipeline_name=pipeline_name,
        run_config=run_config,
        mode="default",
    )

    dagster_instance_for_k8s_run_launcher.launch_run(
        run.run_id,
        ReOriginatedExternalPipelineForTest(get_test_project_external_pipeline(pipeline_name)),
    )
    result = wait_for_job_and_get_raw_logs(
        job_name="dagster-run-%s" % run.run_id, namespace=helm_namespace_for_k8s_run_launcher
    )

    assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result)
示例#19
0
def test_k8s_run_launcher_secret_from_deployment(
    helm_namespace_for_k8s_run_launcher,
    dagit_url_for_k8s_run_launcher,
):
    # This run_config requires that WORD_FACTOR be set on both the user code deployment
    # and the run launcher. It will only work if secrets are propagated from the deployment
    # to the run launcher, since TEST_DEPLOYMENT_SECRET_NAME is only set on the user code
    # deployment but not on the run launcher config.
    run_config = load_yaml_from_path(
        os.path.join(get_test_project_environments_path(),
                     "env_config_from_secrets.yaml"))
    pipeline_name = "demo_pipeline"

    run_id = launch_run_over_graphql(dagit_url_for_k8s_run_launcher,
                                     run_config=run_config,
                                     pipeline_name=pipeline_name)

    result = wait_for_job_and_get_raw_logs(
        job_name="dagster-run-%s" % run_id,
        namespace=helm_namespace_for_k8s_run_launcher)

    assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(result)
示例#20
0
def test_airflow_execution_date_tags_containerized(dagster_docker_image, ):  # pylint: disable=redefined-outer-name, unused-argument
    pipeline_name = "demo_airflow_execution_date_pipeline_s3"
    recon_repo = ReconstructableRepository.for_module(
        "dagster_test.test_project.test_pipelines.repo",
        "define_demo_execution_repo")
    environments_path = get_test_project_environments_path()
    environment_yaml = [
        os.path.join(environments_path, "env_s3.yaml"),
    ]
    run_config = load_yaml_from_glob_list(environment_yaml)

    execution_date = timezone.utcnow()

    with postgres_instance() as instance:
        dag, tasks = make_airflow_dag_containerized_for_recon_repo(
            recon_repo,
            pipeline_name,
            dagster_docker_image,
            run_config,
            instance=instance,
            op_kwargs={"network_mode": "container:test-postgres-db-airflow"},
        )

        results = execute_tasks_in_dag(dag,
                                       tasks,
                                       run_id=make_new_run_id(),
                                       execution_date=execution_date)

        materialized_airflow_execution_date = None
        for result in results.values():
            for event in result:
                if event.event_type_value == "ASSET_MATERIALIZATION":
                    materialization = event.event_specific_data.materialization
                    materialization_entry = materialization.metadata_entries[0]
                    materialized_airflow_execution_date = materialization_entry.entry_data.text

        assert execution_date.isoformat(
        ) == materialized_airflow_execution_date
示例#21
0
def test_k8s_run_monitoring(
    dagster_instance_for_k8s_run_launcher,
    helm_namespace_for_k8s_run_launcher,
    dagit_url_for_k8s_run_launcher,
):
    run_config = merge_dicts(
        load_yaml_from_path(os.path.join(get_test_project_environments_path(), "env_s3.yaml")),
        {
            "execution": {
                "k8s": {
                    "config": {
                        "job_namespace": helm_namespace_for_k8s_run_launcher,
                        "image_pull_policy": image_pull_policy(),
                    }
                }
            },
        },
    )
    _launch_run_and_wait_for_resume(
        dagit_url_for_k8s_run_launcher,
        run_config,
        dagster_instance_for_k8s_run_launcher,
        helm_namespace_for_k8s_run_launcher,
    )
示例#22
0
def test_execute_on_celery_k8s_with_resource_requirements(  # pylint: disable=redefined-outer-name
        dagster_docker_image, dagster_instance, helm_namespace):
    run_config = merge_dicts(
        merge_yamls([
            os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
        ]),
        get_celery_engine_config(dagster_docker_image=dagster_docker_image,
                                 job_namespace=helm_namespace),
    )

    pipeline_name = "resources_limit_pipeline"
    with get_test_project_workspace_and_external_pipeline(
            dagster_instance, pipeline_name) as (
                workspace,
                external_pipeline,
            ):
        reoriginated_pipeline = ReOriginatedExternalPipelineForTest(
            external_pipeline)
        run = create_run_for_test(
            dagster_instance,
            pipeline_name=pipeline_name,
            run_config=run_config,
            mode="default",
            external_pipeline_origin=reoriginated_pipeline.get_external_origin(
            ),
            pipeline_code_origin=reoriginated_pipeline.get_python_origin(),
        )

        dagster_instance.launch_run(run.run_id, workspace)

        result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" %
                                               run.run_id,
                                               namespace=helm_namespace)

        assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(
            result)
示例#23
0
def test_k8s_run_launcher_with_celery_executor_fails(
    dagster_docker_image, dagster_instance_for_k8s_run_launcher, helm_namespace_for_k8s_run_launcher
):
    run_config = merge_dicts(
        merge_yamls(
            [
                os.path.join(get_test_project_environments_path(), "env.yaml"),
                os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
            ]
        ),
        get_celery_engine_config(
            dagster_docker_image=dagster_docker_image,
            job_namespace=helm_namespace_for_k8s_run_launcher,
        ),
    )

    pipeline_name = "demo_pipeline_celery"

    with get_test_project_external_pipeline_hierarchy(
        dagster_instance_for_k8s_run_launcher, pipeline_name
    ) as (
        workspace,
        location,
        _repo,
        external_pipeline,
    ):
        reoriginated_pipeline = ReOriginatedExternalPipelineForTest(external_pipeline)

        run = create_run_for_test(
            dagster_instance_for_k8s_run_launcher,
            pipeline_name=pipeline_name,
            run_config=run_config,
            mode="default",
            pipeline_snapshot=external_pipeline.pipeline_snapshot,
            execution_plan_snapshot=location.get_external_execution_plan(
                external_pipeline, run_config, "default", None, None
            ).execution_plan_snapshot,
            external_pipeline_origin=reoriginated_pipeline.get_external_origin(),
            pipeline_code_origin=reoriginated_pipeline.get_python_origin(),
        )
        dagster_instance_for_k8s_run_launcher.launch_run(run.run_id, workspace)

        timeout = datetime.timedelta(0, 120)

        found_pipeline_failure = False

        start_time = datetime.datetime.now()

        while datetime.datetime.now() < start_time + timeout:
            event_records = dagster_instance_for_k8s_run_launcher.all_logs(run.run_id)

            for event_record in event_records:
                if event_record.dagster_event:
                    if event_record.dagster_event.event_type == DagsterEventType.PIPELINE_FAILURE:
                        found_pipeline_failure = True

            if found_pipeline_failure:
                break

            time.sleep(5)

        assert found_pipeline_failure
        assert (
            dagster_instance_for_k8s_run_launcher.get_run_by_id(run.run_id).status
            == PipelineRunStatus.FAILURE
        )
示例#24
0
def test_memoization_k8s_executor(
    dagster_instance_for_k8s_run_launcher,
    helm_namespace_for_k8s_run_launcher,
    dagster_docker_image,
    dagit_url_for_k8s_run_launcher,
):
    ephemeral_path = str(uuid.uuid4())
    run_config = deep_merge_dicts(
        load_yaml_from_path(
            os.path.join(get_test_project_environments_path(), "env_s3.yaml")),
        {
            "execution": {
                "k8s": {
                    "config": {
                        "job_namespace": helm_namespace_for_k8s_run_launcher,
                        "job_image": dagster_docker_image,
                        "image_pull_policy": image_pull_policy(),
                    }
                }
            },
        },
    )

    run_config = deep_merge_dicts(
        run_config,
        {
            "resources": {
                "io_manager": {
                    "config": {
                        "s3_prefix": ephemeral_path
                    }
                }
            }
        },
    )

    # wrap in try-catch to ensure that memoized results are always cleaned from s3 bucket
    try:
        pipeline_name = "memoization_pipeline"

        run_ids = []
        for _ in range(2):
            run_id = launch_run_over_graphql(
                dagit_url_for_k8s_run_launcher,
                run_config=run_config,
                pipeline_name=pipeline_name,
                mode="k8s",
            )

            result = wait_for_job_and_get_raw_logs(
                job_name="dagster-run-%s" % run_id,
                namespace=helm_namespace_for_k8s_run_launcher,
            )

            assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format(
                result)

            run_ids.append(run_id)

        # We expect that first run should have to run the step, since it has not yet been
        # memoized.
        unmemoized_run_id = run_ids[0]
        events = dagster_instance_for_k8s_run_launcher.all_logs(
            unmemoized_run_id)
        assert len(_get_step_execution_events(events)) == 1

        # We expect that second run should not have to run the step, since it has been memoized.
        memoized_run_id = run_ids[1]
        events = dagster_instance_for_k8s_run_launcher.all_logs(
            memoized_run_id)
        assert len(_get_step_execution_events(events)) == 0
    finally:
        cleanup_memoized_results(define_memoization_pipeline(), "k8s",
                                 dagster_instance_for_k8s_run_launcher,
                                 run_config)
示例#25
0
def test_launch_docker_image_on_pipeline_config():
    # Docker image name to use for launch specified as part of the pipeline origin
    # rather than in the run launcher instance config

    docker_image = get_test_project_docker_image()
    launcher_config = {
        "env_vars": [
            "AWS_ACCESS_KEY_ID",
            "AWS_SECRET_ACCESS_KEY",
            "DOCKER_LAUNCHER_NETWORK",
        ],
        "network": {"env": "DOCKER_LAUNCHER_NETWORK"},
        "container_kwargs": {
            "auto_remove": True,
        },
    }

    if IS_BUILDKITE:
        launcher_config["registry"] = get_buildkite_registry_config()
    else:
        find_local_test_image(docker_image)

    run_config = merge_yamls(
        [
            os.path.join(get_test_project_environments_path(), "env.yaml"),
            os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
        ]
    )

    with environ({"DOCKER_LAUNCHER_NETWORK": "container:test-postgres-db-docker"}):
        with docker_postgres_instance(
            overrides={
                "run_launcher": {
                    "class": "DockerRunLauncher",
                    "module": "dagster_docker",
                    "config": launcher_config,
                }
            }
        ) as instance:
            recon_pipeline = get_test_project_recon_pipeline("demo_pipeline_s3", docker_image)
            with get_test_project_workspace_and_external_pipeline(
                instance, "demo_pipeline_s3", container_image=docker_image
            ) as (workspace, orig_pipeline):

                external_pipeline = ReOriginatedExternalPipelineForTest(
                    orig_pipeline,
                    container_image=docker_image,
                )
                run = instance.create_run_for_pipeline(
                    pipeline_def=recon_pipeline.get_definition(),
                    run_config=run_config,
                    external_pipeline_origin=external_pipeline.get_external_origin(),
                    pipeline_code_origin=external_pipeline.get_python_origin(),
                )
                instance.launch_run(run.run_id, workspace)

                poll_for_finished_run(instance, run.run_id, timeout=60)

                run = instance.get_run_by_id(run.run_id)

                assert run.status == PipelineRunStatus.SUCCESS

                assert run.tags[DOCKER_IMAGE_TAG] == docker_image
示例#26
0
def test_launch_docker_no_network():
    docker_image = get_test_project_docker_image()
    launcher_config = {
        "env_vars": [
            "AWS_ACCESS_KEY_ID",
            "AWS_SECRET_ACCESS_KEY",
        ],
    }

    if IS_BUILDKITE:
        launcher_config["registry"] = get_buildkite_registry_config()
    else:
        find_local_test_image(docker_image)

    run_config = merge_yamls(
        [
            os.path.join(get_test_project_environments_path(), "env.yaml"),
            os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
        ]
    )
    with docker_postgres_instance(
        overrides={
            "run_launcher": {
                "class": "DockerRunLauncher",
                "module": "dagster_docker",
                "config": launcher_config,
            }
        },
        # Ensure the container will time out and fail quickly
        conn_args={
            "params": {"connect_timeout": 2},
        },
    ) as instance:
        recon_pipeline = get_test_project_recon_pipeline("demo_pipeline_s3", docker_image)
        with get_test_project_workspace_and_external_pipeline(
            instance, "demo_pipeline_s3", container_image=docker_image
        ) as (workspace, orig_pipeline):

            external_pipeline = ReOriginatedExternalPipelineForTest(
                orig_pipeline,
                container_image=docker_image,
            )
            run = instance.create_run_for_pipeline(
                pipeline_def=recon_pipeline.get_definition(),
                run_config=run_config,
                external_pipeline_origin=external_pipeline.get_external_origin(),
                pipeline_code_origin=external_pipeline.get_python_origin(),
            )
            instance.launch_run(run.run_id, workspace)

            # Container launches, but run is stuck in STARTING state
            # due to not being able to access the network
            run = instance.get_run_by_id(run.run_id)
            assert run.tags[DOCKER_IMAGE_TAG] == docker_image

            container_id = run.tags[DOCKER_CONTAINER_ID_TAG]

            run = instance.get_run_by_id(run.run_id)

            assert run.status == PipelineRunStatus.STARTING
            assert run.tags[DOCKER_IMAGE_TAG] == docker_image
            client = docker.client.from_env()

            container = None

            try:
                start_time = time.time()
                while True:
                    container = client.containers.get(container_id)
                    if time.time() - start_time > 60:
                        raise Exception("Timed out waiting for container to exit")

                    if container.status == "exited":
                        break

                    time.sleep(3)

            finally:
                if container:
                    container.remove(force=True)
示例#27
0
def _test_launch(docker_image, launcher_config, terminate=False):
    if IS_BUILDKITE:
        launcher_config["registry"] = get_buildkite_registry_config()
    else:
        find_local_test_image(docker_image)

    run_config = merge_yamls(
        [
            os.path.join(get_test_project_environments_path(), "env.yaml"),
            os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
        ]
    )

    with docker_postgres_instance(
        overrides={
            "run_launcher": {
                "class": "DockerRunLauncher",
                "module": "dagster_docker",
                "config": launcher_config,
            }
        }
    ) as instance:
        recon_pipeline = get_test_project_recon_pipeline("demo_pipeline_s3")
        with get_test_project_workspace_and_external_pipeline(instance, "demo_pipeline_s3") as (
            workspace,
            orig_pipeline,
        ):
            external_pipeline = ReOriginatedExternalPipelineForTest(orig_pipeline)

            run = instance.create_run_for_pipeline(
                pipeline_def=recon_pipeline.get_definition(),
                run_config=run_config,
                external_pipeline_origin=external_pipeline.get_external_origin(),
                pipeline_code_origin=external_pipeline.get_python_origin(),
            )

            instance.launch_run(run.run_id, workspace)

            if not terminate:
                poll_for_finished_run(instance, run.run_id, timeout=60)

                assert instance.get_run_by_id(run.run_id).status == PipelineRunStatus.SUCCESS
            else:
                start_time = time.time()

                filters = PipelineRunsFilter(
                    run_ids=[run.run_id],
                    statuses=[
                        PipelineRunStatus.STARTED,
                    ],
                )

                while True:
                    runs = instance.get_runs(filters, limit=1)
                    if runs:
                        break
                    else:
                        time.sleep(0.1)
                        if time.time() - start_time > 60:
                            raise Exception("Timed out waiting for run to start")

                launcher = instance.run_launcher
                assert launcher.can_terminate(run.run_id)
                assert launcher.terminate(run.run_id)

                poll_for_finished_run(instance, run.run_id, timeout=60)
                assert instance.get_run_by_id(run.run_id).status == PipelineRunStatus.CANCELED
示例#28
0
def test_execute_on_celery_k8s_with_hard_failure(  # pylint: disable=redefined-outer-name
        dagster_docker_image, dagster_instance,
        set_dagster_k8s_pipeline_run_namespace_env):
    run_config = merge_dicts(
        merge_dicts(
            merge_yamls([
                os.path.join(get_test_project_environments_path(),
                             "env_s3.yaml"),
            ]),
            get_celery_engine_config(
                dagster_docker_image=dagster_docker_image,
                job_namespace={"env": "DAGSTER_K8S_PIPELINE_RUN_NAMESPACE"},
            ),
        ),
        {"solids": {
            "hard_fail_or_0": {
                "config": {
                    "fail": True
                }
            }
        }},
    )

    pipeline_name = "hard_failer"
    run = create_run_for_test(
        dagster_instance,
        pipeline_name=pipeline_name,
        run_config=run_config,
        mode="default",
    )

    dagster_instance.launch_run(
        run.run_id,
        ReOriginatedExternalPipelineForTest(
            get_test_project_external_pipeline(pipeline_name)),
    )
    assert isinstance(dagster_instance.run_launcher, CeleryK8sRunLauncher)

    # Check that pipeline run is marked as failed
    pipeline_run_status_failure = False
    start_time = datetime.datetime.now()
    timeout = datetime.timedelta(0, 120)

    while datetime.datetime.now() < start_time + timeout:
        pipeline_run = dagster_instance.get_run_by_id(run.run_id)
        if pipeline_run.status == PipelineRunStatus.FAILURE:
            pipeline_run_status_failure = True
            break
        time.sleep(5)
    assert pipeline_run_status_failure

    # Check for step failure for hard_fail_or_0.compute
    start_time = datetime.datetime.now()
    step_failure_found = False
    while datetime.datetime.now() < start_time + timeout:
        event_records = dagster_instance.all_logs(run.run_id)
        for event_record in event_records:
            if event_record.dagster_event:
                if (event_record.dagster_event.event_type
                        == DagsterEventType.STEP_FAILURE
                        and event_record.dagster_event.step_key
                        == "hard_fail_or_0"):
                    step_failure_found = True
                    break
        time.sleep(5)
    assert step_failure_found
示例#29
0
def test_terminate_launched_docker_run():
    docker_image = get_test_project_docker_image()
    launcher_config = {
        "env_vars": [
            "AWS_ACCESS_KEY_ID",
            "AWS_SECRET_ACCESS_KEY",
        ],
        "network": "container:test-postgres-db-docker",
    }

    if IS_BUILDKITE:
        launcher_config["registry"] = get_buildkite_registry_config()
    else:
        find_local_test_image(docker_image)

    run_config = merge_yamls([
        os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
    ])

    with docker_postgres_instance(
            overrides={
                "run_launcher": {
                    "class": "DockerRunLauncher",
                    "module": "dagster_docker",
                    "config": launcher_config,
                }
            }) as instance:
        recon_pipeline = get_test_project_recon_pipeline(
            "hanging_pipeline", docker_image)
        run = instance.create_run_for_pipeline(
            pipeline_def=recon_pipeline.get_definition(),
            run_config=run_config,
        )

        run_id = run.run_id

        external_pipeline = ReOriginatedExternalPipelineForTest(
            get_test_project_external_pipeline("hanging_pipeline",
                                               container_image=docker_image),
            container_image=docker_image,
        )
        instance.launch_run(run_id, external_pipeline)

        poll_for_step_start(instance, run_id)

        assert instance.run_launcher.can_terminate(run_id)
        assert instance.run_launcher.terminate(run_id)

        terminated_pipeline_run = poll_for_finished_run(instance,
                                                        run_id,
                                                        timeout=30)
        terminated_pipeline_run = instance.get_run_by_id(run_id)
        assert terminated_pipeline_run.status == PipelineRunStatus.CANCELED

        run_logs = instance.all_logs(run_id)

        _check_event_log_contains(
            run_logs,
            [
                ("PIPELINE_CANCELING", "Sending pipeline termination request"),
                ("STEP_FAILURE", 'Execution of step "hanging_solid" failed.'),
                ("PIPELINE_CANCELED",
                 'Execution of pipeline "hanging_pipeline" canceled.'),
                ("ENGINE_EVENT", "Pipeline execution terminated by interrupt"),
                ("ENGINE_EVENT", "Process for pipeline exited"),
            ],
        )
示例#30
0
def test_docker_monitoring():
    docker_image = get_test_project_docker_image()

    launcher_config = {
        "env_vars": [
            "AWS_ACCESS_KEY_ID",
            "AWS_SECRET_ACCESS_KEY",
        ],
        "networks": ["container:test-postgres-db-docker"],
        "container_kwargs": {
            # "auto_remove": True,
            "volumes": ["/var/run/docker.sock:/var/run/docker.sock"],
        },
    }

    if IS_BUILDKITE:
        launcher_config["registry"] = get_buildkite_registry_config()
    else:
        find_local_test_image(docker_image)

    run_config = merge_dicts(
        load_yaml_from_path(
            os.path.join(get_test_project_environments_path(), "env_s3.yaml")),
        {
            "solids": {
                "multiply_the_word_slow": {
                    "inputs": {
                        "word": "bar"
                    },
                    "config": {
                        "factor": 2,
                        "sleep_time": 20
                    },
                }
            },
            "execution": {
                "docker": {
                    "config": {}
                }
            },
        },
    )

    with docker_postgres_instance({
            "run_monitoring": {
                "enabled": True
            },
            "run_launcher": {
                "class": "DockerRunLauncher",
                "module": "dagster_docker",
                "config": launcher_config,
            },
    }) as instance:
        recon_pipeline = get_test_project_recon_pipeline(
            "demo_pipeline_docker_slow", docker_image)
        with get_test_project_workspace_and_external_pipeline(
                instance,
                "demo_pipeline_docker_slow",
                container_image=docker_image) as (
                    workspace,
                    orig_pipeline,
                ):
            with start_daemon():
                external_pipeline = ReOriginatedExternalPipelineForTest(
                    orig_pipeline, container_image=docker_image)

                run = instance.create_run_for_pipeline(
                    pipeline_def=recon_pipeline.get_definition(),
                    run_config=run_config,
                    external_pipeline_origin=external_pipeline.
                    get_external_origin(),
                    pipeline_code_origin=external_pipeline.get_python_origin(),
                )

                with log_run_events(instance, run.run_id):

                    instance.launch_run(run.run_id, workspace)

                    start_time = time.time()
                    while time.time() - start_time < 60:
                        run = instance.get_run_by_id(run.run_id)
                        if run.status == PipelineRunStatus.STARTED:
                            break
                        assert run.status == PipelineRunStatus.STARTING
                        time.sleep(1)

                    time.sleep(3)

                    instance.run_launcher._get_container(  # pylint:disable=protected-access
                        instance.get_run_by_id(run.run_id)).stop()

                    # daemon resumes the run
                    poll_for_finished_run(instance, run.run_id, timeout=90)
                    assert instance.get_run_by_id(
                        run.run_id).status == PipelineRunStatus.SUCCESS