def test_backfill_from_failure_for_subselection(): with instance_for_context(default_repo) as ( instance, workspace, external_repo, ): partition = parallel_failure_partition_set.get_partition("one") run_config = parallel_failure_partition_set.run_config_for_partition(partition) tags = parallel_failure_partition_set.tags_for_partition(partition) external_partition_set = external_repo.get_external_partition_set( "parallel_failure_partition_set" ) execute_pipeline( parallel_failure_pipeline, run_config=run_config, tags=tags, instance=instance, solid_selection=["fail_three", "success_four"], raise_on_error=False, ) assert instance.get_runs_count() == 1 wait_for_all_runs_to_finish(instance) run = instance.get_runs()[0] assert run.status == PipelineRunStatus.FAILURE instance.add_backfill( PartitionBackfill( backfill_id="fromfailure", partition_set_origin=external_partition_set.get_external_origin(), status=BulkActionStatus.REQUESTED, partition_names=["one"], from_failure=True, reexecution_steps=None, tags=None, backfill_timestamp=pendulum.now().timestamp(), ) ) list( execute_backfill_iteration( instance, workspace, get_default_daemon_logger("BackfillDaemon") ) ) assert instance.get_runs_count() == 2 run = instance.get_runs(limit=1)[0] assert run.solids_to_execute assert run.solid_selection assert len(run.solids_to_execute) == 2 assert len(run.solid_selection) == 2
def test_intermediate_storage_def_to_io_manager_def(): called = {} @intermediate_storage() def no_config_intermediate_storage(init_context): called["ran"] = True object_store = InMemoryObjectStore() return build_intermediate_storage_from_object_store( object_store=object_store, init_context=init_context) @solid def return_one(_): return 1 @pipeline(mode_defs=[ ModeDefinition( resource_defs={ "io_manager": io_manager_from_intermediate_storage( no_config_intermediate_storage) }) ]) def foo(): return_one() assert execute_pipeline(foo).success
def test_intermediate_storage_reexecution(): @solid def return_one(_): return 1 @solid def plus_one(_, one): return one + 1 @pipeline def foo(): plus_one(return_one()) run_config = {"intermediate_storage": {"filesystem": {}}} instance = DagsterInstance.ephemeral() result = execute_pipeline(foo, run_config=run_config, instance=instance) assert result.success reexecution_result = reexecute_pipeline(foo, run_config=run_config, parent_run_id=result.run_id, instance=instance) assert reexecution_result.success partial_reexecution_result = reexecute_pipeline( foo, run_config=run_config, step_selection=["plus_one"], parent_run_id=result.run_id, instance=instance, ) assert partial_reexecution_result.success
def test_dynamic(gcs_bucket): @solid(output_defs=[DynamicOutputDefinition()]) def numbers(_): for i in range(3): yield DynamicOutput(i, mapping_key=str(i)) @solid def echo(_, x): return x @pipeline(mode_defs=[ ModeDefinition(resource_defs={ "io_manager": gcs_pickle_io_manager, "gcs": gcs_resource }) ]) def dynamic(): numbers().map(echo) result = execute_pipeline(dynamic, run_config={ "resources": { "io_manager": { "config": { "gcs_bucket": gcs_bucket } } } }) assert result.success
def test_execute_intervals(): TestStepHandler.reset() with instance_for_test() as instance: result = execute_pipeline( reconstructable(foo_pipline), instance=instance, run_config={ "execution": { "test_step_delegating_executor": { "config": { "check_step_health_interval_seconds": 60 } } } }, ) TestStepHandler.wait_for_processes() assert result.success assert TestStepHandler.launch_step_count == 3 assert TestStepHandler.terminate_step_count == 0 # pipeline should complete before 60s assert TestStepHandler.check_step_health_count == 0 TestStepHandler.reset() with instance_for_test() as instance: result = execute_pipeline( reconstructable(foo_pipline), instance=instance, run_config={ "execution": { "test_step_delegating_executor": { "config": { "check_step_health_interval_seconds": 0 } } } }, ) TestStepHandler.wait_for_processes() assert result.success assert TestStepHandler.launch_step_count == 3 assert TestStepHandler.terminate_step_count == 0 # every step should get checked at least once assert TestStepHandler.check_step_health_count >= 3
def test_intermediate_storage_deprecation_warning(): @solid def return_one(_): return 1 @pipeline def foo(): return_one() with assert_no_warnings(): execute_pipeline(foo) with pytest.warns( UserWarning, match=re.escape( "Intermediate Storages are deprecated in 0.10.0 and will be removed in 0.11.0." ), ): execute_pipeline(foo, run_config={"intermediate_storage": {"filesystem": {}}})
def test_skipping(): from .test_jobs import define_skpping_job TestStepHandler.reset() with instance_for_test() as instance: result = execute_pipeline( reconstructable(define_skpping_job), instance=instance, ) TestStepHandler.wait_for_processes() assert result.success
def test_dynamic_execute(): from .test_jobs import define_dynamic_job TestStepHandler.reset() with instance_for_test() as instance: result = execute_pipeline( reconstructable(define_dynamic_job), instance=instance, ) TestStepHandler.wait_for_processes() assert result.success assert (len([ e for e in result.event_list if e.event_type_value == DagsterEventType.STEP_START.value ]) == 11)
def test_docker_executor(): """ Note that this test relies on having AWS credentials in the environment. """ executor_config = { "execution": { "docker": { "config": { "networks": ["container:test-postgres-db-docker"], "env_vars": [ "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", ], } } } } docker_image = get_test_project_docker_image() if IS_BUILDKITE: executor_config["execution"]["docker"]["config"][ "registry" ] = get_buildkite_registry_config() else: find_local_test_image(docker_image) run_config = merge_dicts( merge_yamls( [ os.path.join(get_test_project_environments_path(), "env.yaml"), os.path.join(get_test_project_environments_path(), "env_s3.yaml"), ] ), executor_config, ) with environ({"DOCKER_LAUNCHER_NETWORK": "container:test-postgres-db-docker"}): with docker_postgres_instance() as instance: recon_pipeline = get_test_project_recon_pipeline("demo_pipeline_docker", docker_image) assert execute_pipeline( recon_pipeline, run_config=run_config, instance=instance ).success
def test_intermediate_storage_event_message(): @solid def return_one(_): return 1 @solid def plus_one(_, one): return one + 1 @pipeline def foo(): plus_one(return_one()) run_config = {"intermediate_storage": {"filesystem": {}}} result = execute_pipeline(foo, run_config=run_config) for i in filter(lambda i: i.is_handled_output, result.event_list): assert "output manager" not in i.message for i in filter(lambda i: i.is_loaded_input, result.event_list): assert "input manager" not in i.message
def test_execute_verify_step(): TestStepHandler.reset() with instance_for_test() as instance: result = execute_pipeline( reconstructable(foo_pipline_verify_step), instance=instance, run_config={ "execution": { "test_step_delegating_executor_verify_step": { "config": {} } } }, ) TestStepHandler.wait_for_processes() assert any([ "Starting execution with step handler TestStepHandler" in event for event in result.event_list ]) assert result.success assert TestStepHandler.verify_step_count == 3
def test_launch_once(external_repo_context, capfd): freeze_datetime = to_timezone( create_pendulum_time( year=2019, month=2, day=27, hour=23, minute=59, second=59, tz="UTC", ), "US/Central", ) with instance_with_sensors(external_repo_context) as ( instance, grpc_server_registry, external_repo, ): with pendulum.test(freeze_datetime): external_sensor = external_repo.get_external_sensor( "run_key_sensor") instance.add_job_state( JobState(external_sensor.get_external_origin(), JobType.SENSOR, JobStatus.RUNNING)) assert instance.get_runs_count() == 0 ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 0 evaluate_sensors(instance, grpc_server_registry) wait_for_all_runs_to_start(instance) assert instance.get_runs_count() == 1 run = instance.get_runs()[0] ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_sensor, freeze_datetime, JobTickStatus.SUCCESS, expected_run_ids=[run.run_id], ) # run again (after 30 seconds), to ensure that the run key maintains idempotence freeze_datetime = freeze_datetime.add(seconds=30) with pendulum.test(freeze_datetime): evaluate_sensors(instance, grpc_server_registry) assert instance.get_runs_count() == 1 ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 2 validate_tick( ticks[0], external_sensor, freeze_datetime, JobTickStatus.SKIPPED, ) captured = capfd.readouterr() assert ( 'Skipping 1 run for sensor run_key_sensor already completed with run keys: ["only_once"]' in captured.out) launched_run = instance.get_runs()[0] # Manually create a new run with the same tags execute_pipeline( the_pipeline, run_config=launched_run.run_config, tags=launched_run.tags, instance=instance, ) # Sensor loop still executes freeze_datetime = freeze_datetime.add(seconds=30) with pendulum.test(freeze_datetime): evaluate_sensors(instance, grpc_server_registry) ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 3 validate_tick( ticks[0], external_sensor, freeze_datetime, JobTickStatus.SKIPPED, )
def run_in_prod(): execute_pipeline(download_pipeline, mode="prod")
def test_local(): result = execute_pipeline(download_pipeline, mode="unit_test") assert result.success
def test_launch_once(external_repo_context, capfd): freeze_datetime = pendulum.datetime( year=2019, month=2, day=27, hour=23, minute=59, second=59, ).in_tz("US/Central") with instance_with_sensors(external_repo_context) as (instance, external_repo): with pendulum.test(freeze_datetime): external_sensor = external_repo.get_external_sensor( "run_key_sensor") instance.add_job_state( JobState(external_sensor.get_external_origin(), JobType.SENSOR, JobStatus.RUNNING)) assert instance.get_runs_count() == 0 ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 0 list( execute_sensor_iteration( instance, get_default_daemon_logger("SensorDaemon"))) wait_for_all_runs_to_start(instance) assert instance.get_runs_count() == 1 run = instance.get_runs()[0] ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_sensor, freeze_datetime, JobTickStatus.SUCCESS, expected_run_ids=[run.run_id], ) # run again, ensure list( execute_sensor_iteration( instance, get_default_daemon_logger("SensorDaemon"))) assert instance.get_runs_count() == 1 ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 2 validate_tick( ticks[0], external_sensor, freeze_datetime, JobTickStatus.SKIPPED, ) captured = capfd.readouterr() assert ( f"Run {run.run_id} already completed with the run key `only_once` for run_key_sensor" in captured.out) launched_run = instance.get_runs()[0] # Manually create a new run with the same tags execute_pipeline( the_pipeline, run_config=launched_run.run_config, tags=launched_run.tags, instance=instance, ) # Sensor loop still executes list( execute_sensor_iteration( instance, get_default_daemon_logger("SensorDaemon"))) ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 3 validate_tick( ticks[0], external_sensor, freeze_datetime, JobTickStatus.SKIPPED, )