def test_custom_interval_sensor(external_repo_context): freeze_datetime = pendulum.datetime(year=2019, month=2, day=28).in_tz("US/Central") with instance_with_sensors(external_repo_context) as (instance, external_repo): with pendulum.test(freeze_datetime): external_sensor = external_repo.get_external_sensor("custom_interval_sensor") instance.add_job_state( JobState(external_sensor.get_external_origin(), JobType.SENSOR, JobStatus.RUNNING) ) ticks = instance.get_job_ticks(external_sensor.get_external_origin_id()) assert len(ticks) == 0 list(execute_sensor_iteration(instance, get_default_daemon_logger("SensorDaemon"))) ticks = instance.get_job_ticks(external_sensor.get_external_origin_id()) assert len(ticks) == 1 validate_tick(ticks[0], external_sensor, freeze_datetime, JobTickStatus.SKIPPED) freeze_datetime = freeze_datetime.add(seconds=30) with pendulum.test(freeze_datetime): list(execute_sensor_iteration(instance, get_default_daemon_logger("SensorDaemon"))) ticks = instance.get_job_ticks(external_sensor.get_external_origin_id()) # no additional tick created after 30 seconds assert len(ticks) == 1 freeze_datetime = freeze_datetime.add(seconds=30) with pendulum.test(freeze_datetime): list(execute_sensor_iteration(instance, get_default_daemon_logger("SensorDaemon"))) ticks = instance.get_job_ticks(external_sensor.get_external_origin_id()) assert len(ticks) == 2 expected_datetime = pendulum.datetime(year=2019, month=2, day=28, hour=0, minute=1) validate_tick(ticks[0], external_sensor, expected_datetime, JobTickStatus.SKIPPED)
def test_simple_sensor(external_repo_context, capfd): freeze_datetime = pendulum.datetime( year=2019, month=2, day=27, hour=23, minute=59, second=59, ).in_tz("US/Central") with instance_with_sensors(external_repo_context) as (instance, external_repo): with pendulum.test(freeze_datetime): external_sensor = external_repo.get_external_sensor("simple_sensor") instance.add_job_state( JobState(external_sensor.get_external_origin(), JobType.SENSOR, JobStatus.RUNNING) ) assert instance.get_runs_count() == 0 ticks = instance.get_job_ticks(external_sensor.get_external_origin_id()) assert len(ticks) == 0 list(execute_sensor_iteration(instance, get_default_daemon_logger("SensorDaemon"))) assert instance.get_runs_count() == 0 ticks = instance.get_job_ticks(external_sensor.get_external_origin_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_sensor, freeze_datetime, JobTickStatus.SKIPPED, ) captured = capfd.readouterr() assert ( captured.out == """2019-02-27 17:59:59 - SensorDaemon - INFO - Checking for new runs for the following sensors: simple_sensor 2019-02-27 17:59:59 - SensorDaemon - INFO - Sensor returned false for simple_sensor, skipping """ ) freeze_datetime = freeze_datetime.add(seconds=1) with pendulum.test(freeze_datetime): list(execute_sensor_iteration(instance, get_default_daemon_logger("SensorDaemon"))) wait_for_all_runs_to_start(instance) assert instance.get_runs_count() == 1 run = instance.get_runs()[0] validate_run_started(run) ticks = instance.get_job_ticks(external_sensor.get_external_origin_id()) assert len(ticks) == 2 expected_datetime = pendulum.datetime(year=2019, month=2, day=28) validate_tick( ticks[0], external_sensor, expected_datetime, JobTickStatus.SUCCESS, [run.run_id], ) captured = capfd.readouterr() assert ( captured.out == """2019-02-27 18:00:00 - SensorDaemon - INFO - Checking for new runs for the following sensors: simple_sensor 2019-02-27 18:00:00 - SensorDaemon - INFO - Launching run for simple_sensor 2019-02-27 18:00:00 - SensorDaemon - INFO - Completed launch of run {run_id} for simple_sensor """.format( run_id=run.run_id ) )
def test_launch_once(external_repo_context, capfd): freeze_datetime = pendulum.datetime( year=2019, month=2, day=27, hour=23, minute=59, second=59, ).in_tz("US/Central") with instance_with_sensors(external_repo_context) as (instance, external_repo): with pendulum.test(freeze_datetime): external_sensor = external_repo.get_external_sensor( "run_key_sensor") instance.add_job_state( JobState(external_sensor.get_external_origin(), JobType.SENSOR, JobStatus.RUNNING)) assert instance.get_runs_count() == 0 ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 0 execute_sensor_iteration(instance, get_default_daemon_logger("SensorDaemon")) wait_for_all_runs_to_start(instance) assert instance.get_runs_count() == 1 run = instance.get_runs()[0] ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_sensor, freeze_datetime, JobTickStatus.SUCCESS, expected_run_ids=[run.run_id], ) # run again, ensure execute_sensor_iteration(instance, get_default_daemon_logger("SensorDaemon")) assert instance.get_runs_count() == 1 ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 2 validate_tick( ticks[0], external_sensor, freeze_datetime, JobTickStatus.SKIPPED, ) captured = capfd.readouterr() assert f"Run {run.run_id} already completed with the run key `only_once` for run_key_sensor"
def test_wrong_config_sensor(external_repo_context, capfd): freeze_datetime = pendulum.datetime( year=2019, month=2, day=27, hour=23, minute=59, second=59, ).in_tz("US/Central") with instance_with_sensors(external_repo_context) as (instance, external_repo): with pendulum.test(freeze_datetime): external_sensor = external_repo.get_external_sensor("wrong_config_sensor") instance.add_job_state( JobState(external_sensor.get_external_origin(), JobType.SENSOR, JobStatus.RUNNING) ) assert instance.get_runs_count() == 0 ticks = instance.get_job_ticks(external_sensor.get_external_origin_id()) assert len(ticks) == 0 list(execute_sensor_iteration(instance, get_default_daemon_logger("SensorDaemon"))) assert instance.get_runs_count() == 0 ticks = instance.get_job_ticks(external_sensor.get_external_origin_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_sensor, freeze_datetime, JobTickStatus.FAILURE, [], "Error in config for pipeline the_pipeline", ) captured = capfd.readouterr() assert ("Error in config for pipeline the_pipeline") in captured.out # Error repeats on subsequent ticks list(execute_sensor_iteration(instance, get_default_daemon_logger("SensorDaemon"))) assert instance.get_runs_count() == 0 ticks = instance.get_job_ticks(external_sensor.get_external_origin_id()) assert len(ticks) == 2 validate_tick( ticks[0], external_sensor, freeze_datetime, JobTickStatus.FAILURE, [], "Error in config for pipeline the_pipeline", ) captured = capfd.readouterr() assert ("Error in config for pipeline the_pipeline") in captured.out
def evaluate_sensors(instance, workspace): list( execute_sensor_iteration( instance, get_default_daemon_logger("SensorDaemon"), workspace, ))
def test_large_backfill(external_repo_context): with instance_for_context(external_repo_context) as ( instance, grpc_server_registry, external_repo, ): external_partition_set = external_repo.get_external_partition_set( "large_partition_set") instance.add_backfill( PartitionBackfill( backfill_id="simple", partition_set_origin=external_partition_set. get_external_origin(), status=BulkActionStatus.REQUESTED, partition_names=["one", "two", "three"], from_failure=False, reexecution_steps=None, tags=None, backfill_timestamp=pendulum.now().timestamp(), )) assert instance.get_runs_count() == 0 list( execute_backfill_iteration( instance, grpc_server_registry, get_default_daemon_logger("BackfillDaemon"))) assert instance.get_runs_count() == 3
def test_error_sensor(external_repo_context, capfd): freeze_datetime = pendulum.datetime( year=2019, month=2, day=27, hour=23, minute=59, second=59, ).in_tz("US/Central") with instance_with_sensors(external_repo_context) as (instance, external_repo): with pendulum.test(freeze_datetime): external_sensor = external_repo.get_external_sensor("error_sensor") instance.add_job_state( JobState(external_sensor.get_external_origin(), JobType.SENSOR, JobStatus.RUNNING) ) assert instance.get_runs_count() == 0 ticks = instance.get_job_ticks(external_sensor.get_external_origin_id()) assert len(ticks) == 0 list(execute_sensor_iteration(instance, get_default_daemon_logger("SensorDaemon"))) assert instance.get_runs_count() == 0 ticks = instance.get_job_ticks(external_sensor.get_external_origin_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_sensor, freeze_datetime, JobTickStatus.FAILURE, [], "Error occurred during the execution of evaluation_fn for sensor error_sensor", ) captured = capfd.readouterr() assert ("Failed to resolve sensor for error_sensor : ") in captured.out assert ( "Error occurred during the execution of evaluation_fn for sensor error_sensor" ) in captured.out
def test_unloadable_backfill(external_repo_context): with instance_for_context(external_repo_context) as ( instance, workspace, _external_repo, ): unloadable_origin = _unloadable_partition_set_origin() instance.add_backfill( PartitionBackfill( backfill_id="simple", partition_set_origin=unloadable_origin, status=BulkActionStatus.REQUESTED, partition_names=["one", "two", "three"], from_failure=False, reexecution_steps=None, tags=None, backfill_timestamp=pendulum.now().timestamp(), )) assert instance.get_runs_count() == 0 list( execute_backfill_iteration( instance, workspace, get_default_daemon_logger("BackfillDaemon"))) assert instance.get_runs_count() == 0 backfill = instance.get_backfill("simple") assert backfill.status == BulkActionStatus.FAILED assert isinstance(backfill.error, SerializableErrorInfo)
def _create_tick(graphql_context): with create_test_daemon_workspace(graphql_context.process_context. workspace_load_target) as workspace: list( execute_sensor_iteration(graphql_context.instance, get_default_daemon_logger("SensorDaemon"), workspace))
def test_launch_failure(external_repo_context, capfd): freeze_datetime = pendulum.datetime( year=2019, month=2, day=27, hour=23, minute=59, second=59, ).in_tz("US/Central") with instance_with_sensors( external_repo_context, overrides={ "run_launcher": {"module": "dagster.core.test_utils", "class": "ExplodingRunLauncher",}, }, ) as (instance, external_repo): with pendulum.test(freeze_datetime): external_sensor = external_repo.get_external_sensor("always_on_sensor") instance.add_job_state( JobState(external_sensor.get_external_origin(), JobType.SENSOR, JobStatus.RUNNING) ) assert instance.get_runs_count() == 0 ticks = instance.get_job_ticks(external_sensor.get_external_origin_id()) assert len(ticks) == 0 list(execute_sensor_iteration(instance, get_default_daemon_logger("SensorDaemon"))) assert instance.get_runs_count() == 1 run = instance.get_runs()[0] ticks = instance.get_job_ticks(external_sensor.get_external_origin_id()) assert len(ticks) == 1 validate_tick( ticks[0], external_sensor, freeze_datetime, JobTickStatus.SUCCESS, [run.run_id] ) captured = capfd.readouterr() assert ( "Run {run_id} created successfully but failed to launch.".format(run_id=run.run_id) ) in captured.out
def test_canceled_backfill(): with instance_for_context(default_repo) as ( instance, workspace, external_repo, ): external_partition_set = external_repo.get_external_partition_set( "simple_partition_set") instance.add_backfill( PartitionBackfill( backfill_id="simple", partition_set_origin=external_partition_set. get_external_origin(), status=BulkActionStatus.REQUESTED, partition_names=["one", "two", "three"], from_failure=False, reexecution_steps=None, tags=None, backfill_timestamp=pendulum.now().timestamp(), )) assert instance.get_runs_count() == 0 iterator = execute_backfill_iteration( instance, workspace, get_default_daemon_logger("BackfillDaemon")) next(iterator) assert instance.get_runs_count() == 1 backfill = instance.get_backfills()[0] assert backfill.status == BulkActionStatus.REQUESTED instance.update_backfill( backfill.with_status(BulkActionStatus.CANCELED)) list(iterator) backfill = instance.get_backfill(backfill.backfill_id) assert backfill.status == BulkActionStatus.CANCELED assert instance.get_runs_count() == 1
def _create_tick(instance): with ProcessGrpcServerRegistry() as grpc_server_registry: with DynamicWorkspace(grpc_server_registry) as workspace: list( execute_sensor_iteration( instance, get_default_daemon_logger("SensorDaemon"), workspace))
def test_simple_backfill(external_repo_context): with instance_for_context(external_repo_context) as (instance, external_repo): external_partition_set = external_repo.get_external_partition_set( "simple_partition_set") instance.add_backfill( PartitionBackfill( backfill_id="simple", partition_set_origin=external_partition_set. get_external_origin(), status=BulkActionStatus.REQUESTED, partition_names=["one", "two", "three"], from_failure=False, reexecution_steps=None, tags=None, backfill_timestamp=pendulum.now().timestamp(), )) assert instance.get_runs_count() == 0 list( execute_backfill_iteration( instance, get_default_daemon_logger("BackfillDaemon"))) assert instance.get_runs_count() == 3 runs = instance.get_runs() three, two, one = runs assert one.tags[BACKFILL_ID_TAG] == "simple" assert one.tags[PARTITION_NAME_TAG] == "one" assert two.tags[BACKFILL_ID_TAG] == "simple" assert two.tags[PARTITION_NAME_TAG] == "two" assert three.tags[BACKFILL_ID_TAG] == "simple" assert three.tags[PARTITION_NAME_TAG] == "three"
def _create_sensor_tick(instance): with ProcessGrpcServerRegistry(wait_for_processes_on_exit=True) as grpc_server_registry: with RepositoryLocationHandleManager(grpc_server_registry) as handle_manager: list( execute_sensor_iteration( instance, get_default_daemon_logger("SensorDaemon"), handle_manager ) )
def _create_sensor_tick(instance): with ProcessGrpcServerRegistry() as grpc_server_registry: with RepositoryLocationManager( grpc_server_registry) as location_manager: list( execute_sensor_iteration( instance, get_default_daemon_logger("SensorDaemon"), location_manager))
def test_custom_interval_sensor_with_offset(external_repo_context, monkeypatch): freeze_datetime = to_timezone( create_pendulum_time(year=2019, month=2, day=28, tz="UTC"), "US/Central") sleeps = [] def fake_sleep(s): sleeps.append(s) pendulum.set_test_now(pendulum.now().add(seconds=s)) monkeypatch.setattr(time, "sleep", fake_sleep) with instance_with_sensors(external_repo_context) as ( instance, grpc_server_registry, external_repo, ): with pendulum.test(freeze_datetime): # 60 second custom interval external_sensor = external_repo.get_external_sensor( "custom_interval_sensor") instance.add_job_state( JobState(external_sensor.get_external_origin(), JobType.SENSOR, JobStatus.RUNNING)) # create a tick evaluate_sensors(instance, grpc_server_registry) ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 1 # calling for another iteration should not generate another tick because time has not # advanced evaluate_sensors(instance, grpc_server_registry) ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 1 # call the sensor_iteration_loop, which should loop, and call the monkeypatched sleep # to advance 30 seconds list( execute_sensor_iteration_loop( instance, grpc_server_registry, get_default_daemon_logger("SensorDaemon"), daemon_shutdown_event=None, until=freeze_datetime.add(seconds=65).timestamp(), )) assert pendulum.now() == freeze_datetime.add(seconds=65) ticks = instance.get_job_ticks( external_sensor.get_external_origin_id()) assert len(ticks) == 2 assert sum(sleeps) == 65
def evaluate_sensors(instance, grpc_server_registry): with RepositoryLocationManager(grpc_server_registry) as location_manager: list( execute_sensor_iteration( instance, get_default_daemon_logger("SensorDaemon"), location_manager, ))
def _test_launch_sensor_runs_in_subprocess(instance_ref, execution_datetime, debug_crash_flags): with DagsterInstance.from_ref(instance_ref) as instance: try: with pendulum.test(execution_datetime): execute_sensor_iteration( instance, get_default_daemon_logger("SensorDaemon"), debug_crash_flags=debug_crash_flags, ) finally: cleanup_test_instance(instance)
def test_backfill_from_failure_for_subselection(): with instance_for_context(default_repo) as ( instance, workspace, external_repo, ): partition = parallel_failure_partition_set.get_partition("one") run_config = parallel_failure_partition_set.run_config_for_partition(partition) tags = parallel_failure_partition_set.tags_for_partition(partition) external_partition_set = external_repo.get_external_partition_set( "parallel_failure_partition_set" ) execute_pipeline( parallel_failure_pipeline, run_config=run_config, tags=tags, instance=instance, solid_selection=["fail_three", "success_four"], raise_on_error=False, ) assert instance.get_runs_count() == 1 wait_for_all_runs_to_finish(instance) run = instance.get_runs()[0] assert run.status == PipelineRunStatus.FAILURE instance.add_backfill( PartitionBackfill( backfill_id="fromfailure", partition_set_origin=external_partition_set.get_external_origin(), status=BulkActionStatus.REQUESTED, partition_names=["one"], from_failure=True, reexecution_steps=None, tags=None, backfill_timestamp=pendulum.now().timestamp(), ) ) list( execute_backfill_iteration( instance, workspace, get_default_daemon_logger("BackfillDaemon") ) ) assert instance.get_runs_count() == 2 run = instance.get_runs(limit=1)[0] assert run.solids_to_execute assert run.solid_selection assert len(run.solids_to_execute) == 2 assert len(run.solid_selection) == 2
def _test_launch_sensor_runs_in_subprocess(instance_ref, execution_datetime, debug_crash_flags): with DagsterInstance.from_ref(instance_ref) as instance: try: with pendulum.test(execution_datetime), ProcessGrpcServerRegistry( ) as grpc_server_registry: with DynamicWorkspace(grpc_server_registry) as workspace: list( execute_sensor_iteration( instance, get_default_daemon_logger("SensorDaemon"), workspace, debug_crash_flags=debug_crash_flags, )) finally: cleanup_test_instance(instance)
def test_sensor_next_ticks(graphql_context): external_repository = graphql_context.get_repository_location( main_repo_location_name() ).get_repository(main_repo_name()) graphql_context.instance.reconcile_scheduler_state(external_repository) sensor_name = "always_no_config_sensor" external_sensor = external_repository.get_external_sensor(sensor_name) sensor_selector = infer_sensor_selector(graphql_context, sensor_name) result = execute_dagster_graphql( graphql_context, GET_SENSOR_QUERY, variables={"sensorSelector": sensor_selector} ) # test default sensor off assert result.data assert result.data["sensorOrError"]["__typename"] == "Sensor" next_tick = result.data["sensorOrError"]["nextTick"] assert not next_tick # test default sensor with no tick graphql_context.instance.add_job_state( JobState(external_sensor.get_external_origin(), JobType.SENSOR, JobStatus.RUNNING) ) result = execute_dagster_graphql( graphql_context, GET_SENSOR_QUERY, variables={"sensorSelector": sensor_selector} ) assert result.data assert len(result.data["sensorOrError"]["sensorState"]["ticks"]) == 0 assert result.data["sensorOrError"]["__typename"] == "Sensor" next_tick = result.data["sensorOrError"]["nextTick"] assert not next_tick # test default sensor with last tick list( execute_sensor_iteration( graphql_context.instance, get_default_daemon_logger("SensorDaemon") ) ) result = execute_dagster_graphql( graphql_context, GET_SENSOR_QUERY, variables={"sensorSelector": sensor_selector} ) assert len(result.data["sensorOrError"]["sensorState"]["ticks"]) == 1 assert result.data assert result.data["sensorOrError"]["__typename"] == "Sensor" next_tick = result.data["sensorOrError"]["nextTick"] assert next_tick
def _test_backfill_in_subprocess(instance_ref, debug_crash_flags): execution_datetime = pendulum.datetime( year=2021, month=2, day=17, ).in_tz("US/Central") with DagsterInstance.from_ref(instance_ref) as instance: try: with pendulum.test(execution_datetime): list( execute_backfill_iteration( instance, get_default_daemon_logger("BackfillDaemon"), debug_crash_flags=debug_crash_flags, )) finally: cleanup_test_instance(instance)
def _test_launch_sensor_runs_in_subprocess(instance_ref, execution_datetime, debug_crash_flags): with DagsterInstance.from_ref(instance_ref) as instance: try: with pendulum.test(execution_datetime), ProcessGrpcServerRegistry( wait_for_processes_on_exit=True ) as grpc_server_registry: with RepositoryLocationHandleManager(grpc_server_registry) as handle_manager: list( execute_sensor_iteration( instance, get_default_daemon_logger("SensorDaemon"), handle_manager, debug_crash_flags=debug_crash_flags, ) ) finally: cleanup_test_instance(instance)
def _test_launch_sensor_runs_in_subprocess(instance_ref, execution_datetime, debug_crash_flags): with DagsterInstance.from_ref(instance_ref) as instance: try: with pendulum.test( execution_datetime), create_test_daemon_workspace( workspace_load_target=workspace_load_target(), instance=instance, ) as workspace: list( execute_sensor_iteration( instance, get_default_daemon_logger("SensorDaemon"), workspace, debug_crash_flags=debug_crash_flags, )) finally: cleanup_test_instance(instance)
def _test_backfill_in_subprocess(instance_ref, debug_crash_flags): execution_datetime = to_timezone( create_pendulum_time( year=2021, month=2, day=17, ), "US/Central", ) with DagsterInstance.from_ref(instance_ref) as instance: try: with pendulum.test(execution_datetime), ProcessGrpcServerRegistry( wait_for_processes_on_exit=True) as grpc_server_registry: list( execute_backfill_iteration( instance, grpc_server_registry, get_default_daemon_logger("BackfillDaemon"), debug_crash_flags=debug_crash_flags, )) finally: cleanup_test_instance(instance)
def _test_backfill_in_subprocess(instance_ref, debug_crash_flags): execution_datetime = to_timezone( create_pendulum_time( year=2021, month=2, day=17, ), "US/Central", ) with DagsterInstance.from_ref(instance_ref) as instance: try: with pendulum.test(execution_datetime), create_test_daemon_workspace() as workspace: list( execute_backfill_iteration( instance, workspace, get_default_daemon_logger("BackfillDaemon"), debug_crash_flags=debug_crash_flags, ) ) finally: cleanup_test_instance(instance)
def test_backfill_from_partitioned_job(external_repo_context): partition_name_list = [ partition.name for partition in my_config.partitions_def.get_partitions() ] with instance_for_context(external_repo_context) as ( instance, workspace, external_repo, ): external_partition_set = external_repo.get_external_partition_set( "comp_always_succeed_default_partition_set") instance.add_backfill( PartitionBackfill( backfill_id="partition_schedule_from_job", partition_set_origin=external_partition_set. get_external_origin(), status=BulkActionStatus.REQUESTED, partition_names=partition_name_list[:3], from_failure=False, reexecution_steps=None, tags=None, backfill_timestamp=pendulum.now().timestamp(), )) assert instance.get_runs_count() == 0 list( execute_backfill_iteration( instance, workspace, get_default_daemon_logger("BackfillDaemon"))) assert instance.get_runs_count() == 3 runs = reversed(instance.get_runs()) for idx, run in enumerate(runs): assert run.tags[BACKFILL_ID_TAG] == "partition_schedule_from_job" assert run.tags[PARTITION_NAME_TAG] == partition_name_list[idx] assert run.tags[ PARTITION_SET_TAG] == "comp_always_succeed_default_partition_set"
def test_partial_backfill(external_repo_context): with instance_for_context(external_repo_context) as ( instance, grpc_server_registry, external_repo, ): external_partition_set = external_repo.get_external_partition_set( "partial_partition_set") # create full runs, where every step is executed instance.add_backfill( PartitionBackfill( backfill_id="full", partition_set_origin=external_partition_set. get_external_origin(), status=BulkActionStatus.REQUESTED, partition_names=["one", "two", "three"], from_failure=False, reexecution_steps=None, tags=None, backfill_timestamp=pendulum.now().timestamp(), )) assert instance.get_runs_count() == 0 list( execute_backfill_iteration( instance, grpc_server_registry, get_default_daemon_logger("BackfillDaemon"))) wait_for_all_runs_to_start(instance) assert instance.get_runs_count() == 3 runs = instance.get_runs() three, two, one = runs assert one.tags[BACKFILL_ID_TAG] == "full" assert one.tags[PARTITION_NAME_TAG] == "one" assert one.status == PipelineRunStatus.SUCCESS assert step_succeeded(instance, one, "step_one") assert step_succeeded(instance, one, "step_two") assert step_succeeded(instance, one, "step_three") assert two.tags[BACKFILL_ID_TAG] == "full" assert two.tags[PARTITION_NAME_TAG] == "two" assert two.status == PipelineRunStatus.SUCCESS assert step_succeeded(instance, two, "step_one") assert step_succeeded(instance, two, "step_two") assert step_succeeded(instance, two, "step_three") assert three.tags[BACKFILL_ID_TAG] == "full" assert three.tags[PARTITION_NAME_TAG] == "three" assert three.status == PipelineRunStatus.SUCCESS assert step_succeeded(instance, three, "step_one") assert step_succeeded(instance, three, "step_two") assert step_succeeded(instance, three, "step_three") # delete one of the runs, the partial reexecution should still succeed because the steps # can be executed independently, require no input/output config instance.delete_run(one.run_id) assert instance.get_runs_count() == 2 # create partial runs instance.add_backfill( PartitionBackfill( backfill_id="partial", partition_set_origin=external_partition_set. get_external_origin(), status=BulkActionStatus.REQUESTED, partition_names=["one", "two", "three"], from_failure=False, reexecution_steps=["step_one"], tags=None, backfill_timestamp=pendulum.now().timestamp(), )) list( execute_backfill_iteration( instance, grpc_server_registry, get_default_daemon_logger("BackfillDaemon"))) wait_for_all_runs_to_start(instance) assert instance.get_runs_count() == 5 partial_filter = PipelineRunsFilter(tags={BACKFILL_ID_TAG: "partial"}) assert instance.get_runs_count(filters=partial_filter) == 3 runs = instance.get_runs(filters=partial_filter) three, two, one = runs assert one.status == PipelineRunStatus.SUCCESS assert step_succeeded(instance, one, "step_one") assert step_did_not_run(instance, one, "step_two") assert step_did_not_run(instance, one, "step_three") assert two.status == PipelineRunStatus.SUCCESS assert step_succeeded(instance, two, "step_one") assert step_did_not_run(instance, two, "step_two") assert step_did_not_run(instance, two, "step_three") assert three.status == PipelineRunStatus.SUCCESS assert step_succeeded(instance, three, "step_one") assert step_did_not_run(instance, three, "step_two") assert step_did_not_run(instance, three, "step_three")
def test_failure_backfill(external_repo_context): output_file = _failure_flag_file() with instance_for_context(external_repo_context) as ( instance, grpc_server_registry, external_repo, ): external_partition_set = external_repo.get_external_partition_set( "conditionally_fail_partition_set") instance.add_backfill( PartitionBackfill( backfill_id="shouldfail", partition_set_origin=external_partition_set. get_external_origin(), status=BulkActionStatus.REQUESTED, partition_names=["one", "two", "three"], from_failure=False, reexecution_steps=None, tags=None, backfill_timestamp=pendulum.now().timestamp(), )) assert instance.get_runs_count() == 0 try: touch_file(output_file) list( execute_backfill_iteration( instance, grpc_server_registry, get_default_daemon_logger("BackfillDaemon"))) wait_for_all_runs_to_start(instance) finally: os.remove(output_file) assert instance.get_runs_count() == 3 runs = instance.get_runs() three, two, one = runs assert one.tags[BACKFILL_ID_TAG] == "shouldfail" assert one.tags[PARTITION_NAME_TAG] == "one" assert one.status == PipelineRunStatus.FAILURE assert step_succeeded(instance, one, "always_succeed") assert step_failed(instance, one, "conditionally_fail") assert step_did_not_run(instance, one, "after_failure") assert two.tags[BACKFILL_ID_TAG] == "shouldfail" assert two.tags[PARTITION_NAME_TAG] == "two" assert two.status == PipelineRunStatus.FAILURE assert step_succeeded(instance, two, "always_succeed") assert step_failed(instance, two, "conditionally_fail") assert step_did_not_run(instance, two, "after_failure") assert three.tags[BACKFILL_ID_TAG] == "shouldfail" assert three.tags[PARTITION_NAME_TAG] == "three" assert three.status == PipelineRunStatus.FAILURE assert step_succeeded(instance, three, "always_succeed") assert step_failed(instance, three, "conditionally_fail") assert step_did_not_run(instance, three, "after_failure") instance.add_backfill( PartitionBackfill( backfill_id="fromfailure", partition_set_origin=external_partition_set. get_external_origin(), status=BulkActionStatus.REQUESTED, partition_names=["one", "two", "three"], from_failure=True, reexecution_steps=None, tags=None, backfill_timestamp=pendulum.now().timestamp(), )) assert not os.path.isfile(_failure_flag_file()) list( execute_backfill_iteration( instance, grpc_server_registry, get_default_daemon_logger("BackfillDaemon"))) wait_for_all_runs_to_start(instance) assert instance.get_runs_count() == 6 from_failure_filter = PipelineRunsFilter( tags={BACKFILL_ID_TAG: "fromfailure"}) assert instance.get_runs_count(filters=from_failure_filter) == 3 runs = instance.get_runs(filters=from_failure_filter) three, two, one = runs assert one.tags[BACKFILL_ID_TAG] == "fromfailure" assert one.tags[PARTITION_NAME_TAG] == "one" assert one.status == PipelineRunStatus.SUCCESS assert step_did_not_run(instance, one, "always_succeed") assert step_succeeded(instance, one, "conditionally_fail") assert step_succeeded(instance, one, "after_failure") assert two.tags[BACKFILL_ID_TAG] == "fromfailure" assert two.tags[PARTITION_NAME_TAG] == "two" assert two.status == PipelineRunStatus.SUCCESS assert step_did_not_run(instance, one, "always_succeed") assert step_succeeded(instance, one, "conditionally_fail") assert step_succeeded(instance, one, "after_failure") assert three.tags[BACKFILL_ID_TAG] == "fromfailure" assert three.tags[PARTITION_NAME_TAG] == "three" assert three.status == PipelineRunStatus.SUCCESS assert step_did_not_run(instance, one, "always_succeed") assert step_succeeded(instance, one, "conditionally_fail") assert step_succeeded(instance, one, "after_failure")
def logger(): return get_default_daemon_logger("MonitoringDaemon")