def test_k8s_run_launcher(dagster_instance_for_k8s_run_launcher, helm_namespace_for_k8s_run_launcher): run_config = load_yaml_from_path( os.path.join(get_test_project_environments_path(), "env.yaml")) pipeline_name = "demo_pipeline" with get_test_project_workspace_and_external_pipeline( dagster_instance_for_k8s_run_launcher, pipeline_name) as ( workspace, external_pipeline, ): reoriginated_pipeline = ReOriginatedExternalPipelineForTest( external_pipeline) run = create_run_for_test( dagster_instance_for_k8s_run_launcher, pipeline_name=pipeline_name, run_config=run_config, mode="default", external_pipeline_origin=reoriginated_pipeline.get_external_origin( ), pipeline_code_origin=reoriginated_pipeline.get_python_origin(), ) dagster_instance_for_k8s_run_launcher.launch_run(run.run_id, workspace) result = wait_for_job_and_get_raw_logs( job_name="dagster-run-%s" % run.run_id, namespace=helm_namespace_for_k8s_run_launcher) assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format( result)
def test_reoriginated_external_pipeline(): with instance_for_test() as instance: with get_test_project_workspace_and_external_pipeline(instance, "demo_pipeline_celery") as ( _workspace, external_pipeline, ): reoriginated_pipeline = ReOriginatedExternalPipelineForTest(external_pipeline) assert reoriginated_pipeline.get_python_origin() assert reoriginated_pipeline.get_external_origin()
def test_launch_docker_image_on_instance_config(): docker_image = get_test_project_docker_image() launcher_config = { "env_vars": [ "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", ], "network": "container:test-postgres-db-docker", "image": docker_image, } if IS_BUILDKITE: launcher_config["registry"] = get_buildkite_registry_config() else: find_local_test_image(docker_image) run_config = merge_yamls([ os.path.join(get_test_project_environments_path(), "env.yaml"), os.path.join(get_test_project_environments_path(), "env_s3.yaml"), ]) with docker_postgres_instance( overrides={ "run_launcher": { "class": "DockerRunLauncher", "module": "dagster_docker", "config": launcher_config, } }) as instance: recon_pipeline = get_test_project_recon_pipeline("demo_pipeline") with get_test_project_workspace_and_external_pipeline( instance, "demo_pipeline") as ( workspace, orig_pipeline, ): external_pipeline = ReOriginatedExternalPipelineForTest( orig_pipeline) run = instance.create_run_for_pipeline( pipeline_def=recon_pipeline.get_definition(), run_config=run_config, external_pipeline_origin=external_pipeline.get_external_origin( ), pipeline_code_origin=external_pipeline.get_python_origin(), ) instance.launch_run(run.run_id, workspace) poll_for_finished_run(instance, run.run_id, timeout=60) assert instance.get_run_by_id( run.run_id).status == PipelineRunStatus.SUCCESS
def test_launch_docker_invalid_image(): docker_image = "_invalid_format_image" launcher_config = { "env_vars": [ "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", ], "network": "container:test-postgres-db-docker", "image": docker_image, } if IS_BUILDKITE: launcher_config["registry"] = get_buildkite_registry_config() run_config = merge_yamls( [ os.path.join(get_test_project_environments_path(), "env.yaml"), os.path.join(get_test_project_environments_path(), "env_s3.yaml"), ] ) with docker_postgres_instance( overrides={ "run_launcher": { "class": "DockerRunLauncher", "module": "dagster_docker", "config": launcher_config, } } ) as instance: recon_pipeline = get_test_project_recon_pipeline("demo_pipeline_s3") with get_test_project_workspace_and_external_pipeline(instance, "demo_pipeline_s3") as ( workspace, orig_pipeline, ): external_pipeline = ReOriginatedExternalPipelineForTest(orig_pipeline) run = instance.create_run_for_pipeline( pipeline_def=recon_pipeline.get_definition(), run_config=run_config, external_pipeline_origin=external_pipeline.get_external_origin(), pipeline_code_origin=external_pipeline.get_python_origin(), ) with pytest.raises( Exception, match=re.escape( "Docker image name _invalid_format_image is not correctly formatted" ), ): instance.launch_run(run.run_id, workspace)
def test_execute_on_celery_k8s_image_from_origin( # pylint: disable=redefined-outer-name dagster_docker_image, dagster_instance, helm_namespace): # Like the previous test, but the image is included in the pipeline origin # rather than in the executor config run_config = merge_dicts( merge_yamls([ os.path.join(get_test_project_environments_path(), "env.yaml"), os.path.join(get_test_project_environments_path(), "env_s3.yaml"), ]), get_celery_engine_config(dagster_docker_image=None, job_namespace=helm_namespace), ) pipeline_name = "demo_pipeline_celery" with get_test_project_workspace_and_external_pipeline( dagster_instance, pipeline_name, container_image=dagster_docker_image) as (workspace, external_pipeline): reoriginated_pipeline = ReOriginatedExternalPipelineForTest( external_pipeline, container_image=dagster_docker_image) run = create_run_for_test( dagster_instance, pipeline_name=pipeline_name, run_config=run_config, mode="default", external_pipeline_origin=reoriginated_pipeline.get_external_origin( ), pipeline_code_origin=reoriginated_pipeline.get_python_origin(), ) dagster_instance.launch_run(run.run_id, workspace) result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" % run.run_id, namespace=helm_namespace) assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format( result) updated_run = dagster_instance.get_run_by_id(run.run_id) assert updated_run.tags[DOCKER_IMAGE_TAG] == dagster_docker_image
def test_failing_k8s_run_launcher(dagster_instance_for_k8s_run_launcher, helm_namespace_for_k8s_run_launcher): run_config = {"blah blah this is wrong": {}} pipeline_name = "demo_pipeline" with get_test_project_workspace_and_external_pipeline( dagster_instance_for_k8s_run_launcher, pipeline_name) as ( workspace, external_pipeline, ): reoriginated_pipeline = ReOriginatedExternalPipelineForTest( external_pipeline) run = create_run_for_test( dagster_instance_for_k8s_run_launcher, pipeline_name=pipeline_name, run_config=run_config, external_pipeline_origin=reoriginated_pipeline.get_external_origin( ), pipeline_code_origin=reoriginated_pipeline.get_python_origin(), ) dagster_instance_for_k8s_run_launcher.launch_run(run.run_id, workspace) result = wait_for_job_and_get_raw_logs( job_name="dagster-run-%s" % run.run_id, namespace=helm_namespace_for_k8s_run_launcher) assert "PIPELINE_SUCCESS" not in result, "no match, result: {}".format( result) event_records = dagster_instance_for_k8s_run_launcher.all_logs( run.run_id) assert any([ 'Received unexpected config entry "blah blah this is wrong"' in str(event) for event in event_records ]) assert any([ 'Missing required config entry "solids"' in str(event) for event in event_records ])
def test_execute_subset_on_celery_k8s( # pylint: disable=redefined-outer-name dagster_docker_image, dagster_instance, helm_namespace): run_config = merge_dicts( merge_yamls([ os.path.join(get_test_project_environments_path(), "env_subset.yaml"), os.path.join(get_test_project_environments_path(), "env_s3.yaml"), ]), get_celery_engine_config(dagster_docker_image=dagster_docker_image, job_namespace=helm_namespace), ) pipeline_name = "demo_pipeline_celery" with get_test_project_workspace_and_external_pipeline( dagster_instance, pipeline_name) as ( workspace, external_pipeline, ): reoriginated_pipeline = ReOriginatedExternalPipelineForTest( external_pipeline) run = create_run_for_test( dagster_instance, pipeline_name=pipeline_name, run_config=run_config, mode="default", solids_to_execute={"count_letters"}, external_pipeline_origin=reoriginated_pipeline.get_external_origin( ), pipeline_code_origin=reoriginated_pipeline.get_python_origin(), ) dagster_instance.launch_run(run.run_id, workspace) result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" % run.run_id, namespace=helm_namespace) assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format( result)
def _test_launch(docker_image, launcher_config, terminate=False): if IS_BUILDKITE: launcher_config["registry"] = get_buildkite_registry_config() else: find_local_test_image(docker_image) run_config = merge_yamls( [ os.path.join(get_test_project_environments_path(), "env.yaml"), os.path.join(get_test_project_environments_path(), "env_s3.yaml"), ] ) with docker_postgres_instance( overrides={ "run_launcher": { "class": "DockerRunLauncher", "module": "dagster_docker", "config": launcher_config, } } ) as instance: recon_pipeline = get_test_project_recon_pipeline("demo_pipeline_s3") with get_test_project_workspace_and_external_pipeline(instance, "demo_pipeline_s3") as ( workspace, orig_pipeline, ): external_pipeline = ReOriginatedExternalPipelineForTest(orig_pipeline) run = instance.create_run_for_pipeline( pipeline_def=recon_pipeline.get_definition(), run_config=run_config, external_pipeline_origin=external_pipeline.get_external_origin(), pipeline_code_origin=external_pipeline.get_python_origin(), ) instance.launch_run(run.run_id, workspace) if not terminate: poll_for_finished_run(instance, run.run_id, timeout=60) assert instance.get_run_by_id(run.run_id).status == PipelineRunStatus.SUCCESS else: start_time = time.time() filters = PipelineRunsFilter( run_ids=[run.run_id], statuses=[ PipelineRunStatus.STARTED, ], ) while True: runs = instance.get_runs(filters, limit=1) if runs: break else: time.sleep(0.1) if time.time() - start_time > 60: raise Exception("Timed out waiting for run to start") launcher = instance.run_launcher assert launcher.can_terminate(run.run_id) assert launcher.terminate(run.run_id) poll_for_finished_run(instance, run.run_id, timeout=60) assert instance.get_run_by_id(run.run_id).status == PipelineRunStatus.CANCELED
def test_launch_docker_no_network(): docker_image = get_test_project_docker_image() launcher_config = { "env_vars": [ "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", ], } if IS_BUILDKITE: launcher_config["registry"] = get_buildkite_registry_config() else: find_local_test_image(docker_image) run_config = merge_yamls( [ os.path.join(get_test_project_environments_path(), "env.yaml"), os.path.join(get_test_project_environments_path(), "env_s3.yaml"), ] ) with docker_postgres_instance( overrides={ "run_launcher": { "class": "DockerRunLauncher", "module": "dagster_docker", "config": launcher_config, } }, # Ensure the container will time out and fail quickly conn_args={ "params": {"connect_timeout": 2}, }, ) as instance: recon_pipeline = get_test_project_recon_pipeline("demo_pipeline_s3", docker_image) with get_test_project_workspace_and_external_pipeline( instance, "demo_pipeline_s3", container_image=docker_image ) as (workspace, orig_pipeline): external_pipeline = ReOriginatedExternalPipelineForTest( orig_pipeline, container_image=docker_image, ) run = instance.create_run_for_pipeline( pipeline_def=recon_pipeline.get_definition(), run_config=run_config, external_pipeline_origin=external_pipeline.get_external_origin(), pipeline_code_origin=external_pipeline.get_python_origin(), ) instance.launch_run(run.run_id, workspace) # Container launches, but run is stuck in STARTING state # due to not being able to access the network run = instance.get_run_by_id(run.run_id) assert run.tags[DOCKER_IMAGE_TAG] == docker_image container_id = run.tags[DOCKER_CONTAINER_ID_TAG] run = instance.get_run_by_id(run.run_id) assert run.status == PipelineRunStatus.STARTING assert run.tags[DOCKER_IMAGE_TAG] == docker_image client = docker.client.from_env() container = None try: start_time = time.time() while True: container = client.containers.get(container_id) if time.time() - start_time > 60: raise Exception("Timed out waiting for container to exit") if container.status == "exited": break time.sleep(3) finally: if container: container.remove(force=True)
def test_terminate_launched_docker_run(): docker_image = get_test_project_docker_image() launcher_config = { "env_vars": [ "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", ], "network": "container:test-postgres-db-docker", } if IS_BUILDKITE: launcher_config["registry"] = get_buildkite_registry_config() else: find_local_test_image(docker_image) run_config = merge_yamls( [ os.path.join(get_test_project_environments_path(), "env_s3.yaml"), ] ) with docker_postgres_instance( overrides={ "run_launcher": { "class": "DockerRunLauncher", "module": "dagster_docker", "config": launcher_config, } } ) as instance: recon_pipeline = get_test_project_recon_pipeline("hanging_pipeline", docker_image) with get_test_project_workspace_and_external_pipeline( instance, "hanging_pipeline", container_image=docker_image ) as (workspace, orig_pipeline): external_pipeline = ReOriginatedExternalPipelineForTest( orig_pipeline, container_image=docker_image, ) run = instance.create_run_for_pipeline( pipeline_def=recon_pipeline.get_definition(), run_config=run_config, external_pipeline_origin=external_pipeline.get_external_origin(), pipeline_code_origin=external_pipeline.get_python_origin(), ) run_id = run.run_id instance.launch_run(run_id, workspace) poll_for_step_start(instance, run_id) assert instance.run_launcher.can_terminate(run_id) assert instance.run_launcher.terminate(run_id) terminated_pipeline_run = poll_for_finished_run(instance, run_id, timeout=30) terminated_pipeline_run = instance.get_run_by_id(run_id) assert terminated_pipeline_run.status == PipelineRunStatus.CANCELED run_logs = instance.all_logs(run_id) _check_event_log_contains( run_logs, [ ("PIPELINE_CANCELING", "Sending run termination request"), ("STEP_FAILURE", 'Execution of step "hanging_solid" failed.'), ("PIPELINE_CANCELED", 'Execution of run for "hanging_pipeline" canceled.'), ("ENGINE_EVENT", "Process for run exited"), ], )
def test_launch_docker_image_on_pipeline_config(): # Docker image name to use for launch specified as part of the pipeline origin # rather than in the run launcher instance config docker_image = get_test_project_docker_image() launcher_config = { "env_vars": [ "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "DOCKER_LAUNCHER_NETWORK", ], "network": {"env": "DOCKER_LAUNCHER_NETWORK"}, "container_kwargs": { "auto_remove": True, }, } if IS_BUILDKITE: launcher_config["registry"] = get_buildkite_registry_config() else: find_local_test_image(docker_image) run_config = merge_yamls( [ os.path.join(get_test_project_environments_path(), "env.yaml"), os.path.join(get_test_project_environments_path(), "env_s3.yaml"), ] ) with environ({"DOCKER_LAUNCHER_NETWORK": "container:test-postgres-db-docker"}): with docker_postgres_instance( overrides={ "run_launcher": { "class": "DockerRunLauncher", "module": "dagster_docker", "config": launcher_config, } } ) as instance: recon_pipeline = get_test_project_recon_pipeline("demo_pipeline_s3", docker_image) with get_test_project_workspace_and_external_pipeline( instance, "demo_pipeline_s3", container_image=docker_image ) as (workspace, orig_pipeline): external_pipeline = ReOriginatedExternalPipelineForTest( orig_pipeline, container_image=docker_image, ) run = instance.create_run_for_pipeline( pipeline_def=recon_pipeline.get_definition(), run_config=run_config, external_pipeline_origin=external_pipeline.get_external_origin(), pipeline_code_origin=external_pipeline.get_python_origin(), ) instance.launch_run(run.run_id, workspace) poll_for_finished_run(instance, run.run_id, timeout=60) run = instance.get_run_by_id(run.run_id) assert run.status == PipelineRunStatus.SUCCESS assert run.tags[DOCKER_IMAGE_TAG] == docker_image
def test_docker_monitoring(): docker_image = get_test_project_docker_image() launcher_config = { "env_vars": [ "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", ], "networks": ["container:test-postgres-db-docker"], "container_kwargs": { # "auto_remove": True, "volumes": ["/var/run/docker.sock:/var/run/docker.sock"], }, } if IS_BUILDKITE: launcher_config["registry"] = get_buildkite_registry_config() else: find_local_test_image(docker_image) run_config = merge_dicts( load_yaml_from_path( os.path.join(get_test_project_environments_path(), "env_s3.yaml")), { "solids": { "multiply_the_word_slow": { "inputs": { "word": "bar" }, "config": { "factor": 2, "sleep_time": 20 }, } }, "execution": { "docker": { "config": {} } }, }, ) with docker_postgres_instance({ "run_monitoring": { "enabled": True }, "run_launcher": { "class": "DockerRunLauncher", "module": "dagster_docker", "config": launcher_config, }, }) as instance: recon_pipeline = get_test_project_recon_pipeline( "demo_pipeline_docker_slow", docker_image) with get_test_project_workspace_and_external_pipeline( instance, "demo_pipeline_docker_slow", container_image=docker_image) as ( workspace, orig_pipeline, ): with start_daemon(): external_pipeline = ReOriginatedExternalPipelineForTest( orig_pipeline, container_image=docker_image) run = instance.create_run_for_pipeline( pipeline_def=recon_pipeline.get_definition(), run_config=run_config, external_pipeline_origin=external_pipeline. get_external_origin(), pipeline_code_origin=external_pipeline.get_python_origin(), ) with log_run_events(instance, run.run_id): instance.launch_run(run.run_id, workspace) start_time = time.time() while time.time() - start_time < 60: run = instance.get_run_by_id(run.run_id) if run.status == PipelineRunStatus.STARTED: break assert run.status == PipelineRunStatus.STARTING time.sleep(1) time.sleep(3) instance.run_launcher._get_container( # pylint:disable=protected-access instance.get_run_by_id(run.run_id)).stop() # daemon resumes the run poll_for_finished_run(instance, run.run_id, timeout=90) assert instance.get_run_by_id( run.run_id).status == PipelineRunStatus.SUCCESS
def test_k8s_executor_config_override(kubeconfig_file): # Construct a K8s run launcher in a fake k8s environment. mock_k8s_client_batch_api = mock.MagicMock() celery_k8s_run_launcher = CeleryK8sRunLauncher( instance_config_map="dagster-instance", postgres_password_secret="dagster-postgresql-secret", dagster_home="/opt/dagster/dagster_home", load_incluster_config=False, kubeconfig_file=kubeconfig_file, k8s_client_batch_api=mock_k8s_client_batch_api, ) pipeline_name = "demo_pipeline" with instance_for_test() as instance: with get_test_project_workspace_and_external_pipeline( instance, pipeline_name, "my_image:tag") as (workspace, external_pipeline): # Launch the run in a fake Dagster instance. celery_k8s_run_launcher.register_instance(instance) # Launch without custom job_image run = create_run_for_test( instance, pipeline_name=pipeline_name, run_config={"execution": { "celery-k8s": {} }}, external_pipeline_origin=external_pipeline.get_external_origin( ), pipeline_code_origin=external_pipeline.get_python_origin(), ) celery_k8s_run_launcher.launch_run(LaunchRunContext( run, workspace)) updated_run = instance.get_run_by_id(run.run_id) assert updated_run.tags[DOCKER_IMAGE_TAG] == "my_image:tag" # Launch with custom job_image run = create_run_for_test( instance, pipeline_name=pipeline_name, run_config={ "execution": { "celery-k8s": { "config": { "job_image": "fake-image-name" } } } }, external_pipeline_origin=external_pipeline.get_external_origin( ), pipeline_code_origin=external_pipeline.get_python_origin(), ) celery_k8s_run_launcher.launch_run(LaunchRunContext( run, workspace)) updated_run = instance.get_run_by_id(run.run_id) assert updated_run.tags[DOCKER_IMAGE_TAG] == "fake-image-name" # Check that user defined k8s config was passed down to the k8s job. mock_method_calls = mock_k8s_client_batch_api.method_calls assert len(mock_method_calls) > 0 _, _args, kwargs = mock_method_calls[0] assert kwargs["body"].spec.template.spec.containers[ 0].image == "my_image:tag" _, _args, kwargs = mock_method_calls[1] assert kwargs["body"].spec.template.spec.containers[ 0].image == "fake-image-name"
def test_execute_on_celery_k8s_retry_pipeline( # pylint: disable=redefined-outer-name dagster_docker_image, dagster_instance, helm_namespace): run_config = merge_dicts( merge_yamls([ os.path.join(get_test_project_environments_path(), "env_s3.yaml") ]), get_celery_engine_config(dagster_docker_image=dagster_docker_image, job_namespace=helm_namespace), ) pipeline_name = "retry_pipeline" with get_test_project_workspace_and_external_pipeline( dagster_instance, pipeline_name) as ( workspace, external_pipeline, ): reoriginated_pipeline = ReOriginatedExternalPipelineForTest( external_pipeline) run = create_run_for_test( dagster_instance, pipeline_name=pipeline_name, run_config=run_config, mode="default", external_pipeline_origin=reoriginated_pipeline.get_external_origin( ), pipeline_code_origin=reoriginated_pipeline.get_python_origin(), ) dagster_instance.launch_run(run.run_id, workspace) result = wait_for_job_and_get_raw_logs(job_name="dagster-run-%s" % run.run_id, namespace=helm_namespace) assert "PIPELINE_SUCCESS" in result, "no match, result: {}".format( result) stats = dagster_instance.get_run_stats(run.run_id) assert stats.steps_succeeded == 1 assert DagsterEventType.STEP_START in [ event.dagster_event.event_type for event in dagster_instance.all_logs(run.run_id) if event.is_dagster_event ] assert DagsterEventType.STEP_UP_FOR_RETRY in [ event.dagster_event.event_type for event in dagster_instance.all_logs(run.run_id) if event.is_dagster_event ] assert DagsterEventType.STEP_RESTARTED in [ event.dagster_event.event_type for event in dagster_instance.all_logs(run.run_id) if event.is_dagster_event ] assert DagsterEventType.STEP_SUCCESS in [ event.dagster_event.event_type for event in dagster_instance.all_logs(run.run_id) if event.is_dagster_event ]
def test_container_context_on_pipeline(): docker_image = get_test_project_docker_image() launcher_config = {} if IS_BUILDKITE: launcher_config["registry"] = get_buildkite_registry_config() else: find_local_test_image(docker_image) executor_config = { "execution": { "docker": { "config": {} } }, } run_config = merge_dicts( merge_yamls([ os.path.join(get_test_project_environments_path(), "env.yaml"), os.path.join(get_test_project_environments_path(), "env_s3.yaml"), ]), executor_config, ) with docker_postgres_instance( overrides={ "run_launcher": { "class": "DockerRunLauncher", "module": "dagster_docker", "config": launcher_config, } }) as instance: recon_pipeline = get_test_project_recon_pipeline( "demo_pipeline_docker", docker_image, container_context={ "docker": { "env_vars": [ "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", ], "networks": ["container:test-postgres-db-docker"], "container_kwargs": { "auto_remove": True, "volumes": ["/var/run/docker.sock:/var/run/docker.sock"], }, } }, ) with get_test_project_workspace_and_external_pipeline( instance, "demo_pipeline_docker", container_image=docker_image) as ( workspace, orig_pipeline, ): external_pipeline = ReOriginatedExternalPipelineForTest( orig_pipeline, container_image=docker_image) run = instance.create_run_for_pipeline( pipeline_def=recon_pipeline.get_definition(), run_config=run_config, external_pipeline_origin=external_pipeline.get_external_origin( ), pipeline_code_origin=recon_pipeline.get_python_origin(), ) instance.launch_run(run.run_id, workspace) poll_for_finished_run(instance, run.run_id, timeout=60) for log in instance.all_logs(run.run_id): print(log) # pylint: disable=print-call assert instance.get_run_by_id( run.run_id).status == PipelineRunStatus.SUCCESS
def test_execute_on_celery_k8s_with_hard_failure( # pylint: disable=redefined-outer-name dagster_docker_image, dagster_instance, set_dagster_k8s_pipeline_run_namespace_env): run_config = merge_dicts( merge_dicts( merge_yamls([ os.path.join(get_test_project_environments_path(), "env_s3.yaml"), ]), get_celery_engine_config( dagster_docker_image=dagster_docker_image, job_namespace={"env": "DAGSTER_K8S_PIPELINE_RUN_NAMESPACE"}, ), ), {"solids": { "hard_fail_or_0": { "config": { "fail": True } } }}, ) pipeline_name = "hard_failer" with get_test_project_workspace_and_external_pipeline( dagster_instance, pipeline_name) as ( workspace, external_pipeline, ): reoriginated_pipeline = ReOriginatedExternalPipelineForTest( external_pipeline) run = create_run_for_test( dagster_instance, pipeline_name=pipeline_name, run_config=run_config, mode="default", external_pipeline_origin=reoriginated_pipeline.get_external_origin( ), pipeline_code_origin=reoriginated_pipeline.get_python_origin(), ) dagster_instance.launch_run(run.run_id, workspace) assert isinstance(dagster_instance.run_launcher, CeleryK8sRunLauncher) # Check that pipeline run is marked as failed pipeline_run_status_failure = False start_time = datetime.datetime.now() timeout = datetime.timedelta(0, 120) while datetime.datetime.now() < start_time + timeout: pipeline_run = dagster_instance.get_run_by_id(run.run_id) if pipeline_run.status == PipelineRunStatus.FAILURE: pipeline_run_status_failure = True break time.sleep(5) assert pipeline_run_status_failure # Check for step failure for hard_fail_or_0.compute start_time = datetime.datetime.now() step_failure_found = False while datetime.datetime.now() < start_time + timeout: event_records = dagster_instance.all_logs(run.run_id) for event_record in event_records: if event_record.dagster_event: if (event_record.dagster_event.event_type == DagsterEventType.STEP_FAILURE and event_record.dagster_event.step_key == "hard_fail_or_0"): step_failure_found = True break time.sleep(5) assert step_failure_found
def _test_termination(dagster_instance, run_config): pipeline_name = "resource_pipeline" with get_test_project_workspace_and_external_pipeline( dagster_instance, pipeline_name) as ( workspace, external_pipeline, ): reoriginated_pipeline = ReOriginatedExternalPipelineForTest( external_pipeline) run = create_run_for_test( dagster_instance, pipeline_name=pipeline_name, run_config=run_config, mode="default", external_pipeline_origin=reoriginated_pipeline.get_external_origin( ), pipeline_code_origin=reoriginated_pipeline.get_python_origin(), ) dagster_instance.launch_run(run.run_id, workspace) assert isinstance(dagster_instance.run_launcher, CeleryK8sRunLauncher) # Wait for pipeline run to start timeout = datetime.timedelta(0, 120) start_time = datetime.datetime.now() can_terminate = False while datetime.datetime.now() < start_time + timeout: if dagster_instance.run_launcher.can_terminate(run_id=run.run_id): can_terminate = True break time.sleep(5) assert can_terminate # Wait for step to start step_start_found = False start_time = datetime.datetime.now() while datetime.datetime.now() < start_time + timeout: event_records = dagster_instance.all_logs(run.run_id) for event_record in event_records: if (event_record.dagster_event and event_record.dagster_event.event_type == DagsterEventType.STEP_START): step_start_found = True break if step_start_found: break time.sleep(5) assert step_start_found # Terminate run assert dagster_instance.run_launcher.can_terminate(run_id=run.run_id) assert dagster_instance.run_launcher.terminate(run_id=run.run_id) # Check that pipeline run is marked as canceled pipeline_run_status_canceled = False start_time = datetime.datetime.now() while datetime.datetime.now() < start_time + timeout: pipeline_run = dagster_instance.get_run_by_id(run.run_id) if pipeline_run.status == PipelineRunStatus.CANCELED: pipeline_run_status_canceled = True break time.sleep(5) assert pipeline_run_status_canceled # Check that terminate cannot be called again assert not dagster_instance.run_launcher.can_terminate( run_id=run.run_id) assert not dagster_instance.run_launcher.terminate(run_id=run.run_id) # Check for step failure and resource tear down expected_events_found = False start_time = datetime.datetime.now() while datetime.datetime.now() < start_time + timeout: step_failures_count = 0 resource_tear_down_count = 0 resource_init_count = 0 termination_request_count = 0 termination_success_count = 0 event_records = dagster_instance.all_logs(run.run_id) for event_record in event_records: if event_record.dagster_event: if event_record.dagster_event.event_type == DagsterEventType.STEP_FAILURE: step_failures_count += 1 elif (event_record.dagster_event.event_type == DagsterEventType.PIPELINE_CANCELING): termination_request_count += 1 elif (event_record.dagster_event.event_type == DagsterEventType.PIPELINE_CANCELED): termination_success_count += 1 elif event_record.message: if "initializing s3_resource_with_context_manager" in event_record.message: resource_init_count += 1 if "tearing down s3_resource_with_context_manager" in event_record.message: resource_tear_down_count += 1 if (step_failures_count == 1 and resource_init_count == 1 and resource_tear_down_count == 1 and termination_request_count == 1 and termination_success_count == 1): expected_events_found = True break time.sleep(5) assert expected_events_found s3 = boto3.resource("s3", region_name="us-west-1", use_ssl=True, endpoint_url=None).meta.client bucket = "dagster-scratch-80542c2" key = "resource_termination_test/{}".format(run.run_id) assert s3.get_object(Bucket=bucket, Key=key)