示例#1
0
def test_custom_interval_sensor(external_repo_context):
    freeze_datetime = to_timezone(
        create_pendulum_time(year=2019, month=2, day=28, tz="UTC"),
        "US/Central")
    with instance_with_sensors(external_repo_context) as (instance,
                                                          external_repo):
        with pendulum.test(freeze_datetime):
            external_sensor = external_repo.get_external_sensor(
                "custom_interval_sensor")
            instance.add_job_state(
                JobState(external_sensor.get_external_origin(), JobType.SENSOR,
                         JobStatus.RUNNING))
            ticks = instance.get_job_ticks(
                external_sensor.get_external_origin_id())
            assert len(ticks) == 0

            list(
                execute_sensor_iteration(
                    instance, get_default_daemon_logger("SensorDaemon")))
            ticks = instance.get_job_ticks(
                external_sensor.get_external_origin_id())
            assert len(ticks) == 1
            validate_tick(ticks[0], external_sensor, freeze_datetime,
                          JobTickStatus.SKIPPED)

            freeze_datetime = freeze_datetime.add(seconds=30)

        with pendulum.test(freeze_datetime):
            list(
                execute_sensor_iteration(
                    instance, get_default_daemon_logger("SensorDaemon")))
            ticks = instance.get_job_ticks(
                external_sensor.get_external_origin_id())
            # no additional tick created after 30 seconds
            assert len(ticks) == 1

            freeze_datetime = freeze_datetime.add(seconds=30)

        with pendulum.test(freeze_datetime):
            list(
                execute_sensor_iteration(
                    instance, get_default_daemon_logger("SensorDaemon")))
            ticks = instance.get_job_ticks(
                external_sensor.get_external_origin_id())
            assert len(ticks) == 2

            expected_datetime = create_pendulum_time(year=2019,
                                                     month=2,
                                                     day=28,
                                                     hour=0,
                                                     minute=1)
            validate_tick(ticks[0], external_sensor, expected_datetime,
                          JobTickStatus.SKIPPED)
示例#2
0
def test_partitions_for_hourly_schedule_decorators_without_timezone():
    with instance_for_test() as instance:
        with pendulum.test(
            to_timezone(create_pendulum_time(2019, 2, 27, 0, 1, 1, tz="UTC"), "US/Eastern")
        ):

            context_without_time = build_schedule_context(instance)

            start_date = datetime(year=2019, month=1, day=1)

            @hourly_schedule(
                pipeline_name="foo_pipeline",
                start_date=start_date,
                execution_time=time(hour=0, minute=25),
            )
            def hourly_foo_schedule(hourly_time):
                return {"hourly_time": hourly_time.isoformat()}

            _check_partitions(
                hourly_foo_schedule,
                HOURS_UNTIL_FEBRUARY_27,
                pendulum.instance(start_date, tz="UTC"),
                DEFAULT_HOURLY_FORMAT_WITHOUT_TIMEZONE,
                relativedelta(hours=1),
            )

            execution_data = hourly_foo_schedule.get_execution_data(context_without_time)
            assert len(execution_data) == 1
            assert isinstance(execution_data[0], RunRequest)
            assert execution_data[0].run_config == {
                "hourly_time": create_pendulum_time(
                    year=2019, month=2, day=26, hour=23, tz="UTC"
                ).isoformat()
            }

            valid_time = create_pendulum_time(
                year=2019, month=1, day=27, hour=1, minute=25, tz="UTC"
            )
            context_with_valid_time = build_schedule_context(instance, valid_time)

            execution_data = hourly_foo_schedule.get_execution_data(context_with_valid_time)
            assert len(execution_data) == 1
            assert isinstance(execution_data[0], RunRequest)
            assert execution_data[0].run_config == {
                "hourly_time": create_pendulum_time(
                    year=2019, month=1, day=27, hour=0, tz="UTC"
                ).isoformat()
            }
示例#3
0
def test_cursor_sensor(external_repo_context):
    freeze_datetime = to_timezone(
        create_pendulum_time(year=2019, month=2, day=27, tz="UTC"),
        "US/Central",
    )
    with instance_with_sensors(external_repo_context) as (
        instance,
        workspace,
        external_repo,
    ):
        with pendulum.test(freeze_datetime):
            skip_sensor = external_repo.get_external_sensor("skip_cursor_sensor")
            run_sensor = external_repo.get_external_sensor("run_cursor_sensor")
            instance.start_sensor(skip_sensor)
            instance.start_sensor(run_sensor)
            evaluate_sensors(instance, workspace)

            skip_ticks = instance.get_job_ticks(skip_sensor.get_external_origin_id())
            assert len(skip_ticks) == 1
            validate_tick(
                skip_ticks[0],
                skip_sensor,
                freeze_datetime,
                JobTickStatus.SKIPPED,
            )
            assert skip_ticks[0].cursor == "1"

            run_ticks = instance.get_job_ticks(run_sensor.get_external_origin_id())
            assert len(run_ticks) == 1
            validate_tick(
                run_ticks[0],
                run_sensor,
                freeze_datetime,
                JobTickStatus.SUCCESS,
            )
            assert run_ticks[0].cursor == "1"

        freeze_datetime = freeze_datetime.add(seconds=60)
        with pendulum.test(freeze_datetime):
            evaluate_sensors(instance, workspace)

            skip_ticks = instance.get_job_ticks(skip_sensor.get_external_origin_id())
            assert len(skip_ticks) == 2
            validate_tick(
                skip_ticks[0],
                skip_sensor,
                freeze_datetime,
                JobTickStatus.SKIPPED,
            )
            assert skip_ticks[0].cursor == "2"

            run_ticks = instance.get_job_ticks(run_sensor.get_external_origin_id())
            assert len(run_ticks) == 2
            validate_tick(
                run_ticks[0],
                run_sensor,
                freeze_datetime,
                JobTickStatus.SUCCESS,
            )
            assert run_ticks[0].cursor == "2"
示例#4
0
def today_at_midnight(timezone_name="UTC"):
    check.str_param(timezone_name, "timezone_name")
    now = pendulum.now(timezone_name)
    return create_pendulum_time(now.year,
                                now.month,
                                now.day,
                                tz=now.timezone.name)
def test_failure_recovery_between_multi_runs(external_repo_context,
                                             crash_location, crash_signal):
    with instance_with_schedules(external_repo_context) as (instance,
                                                            external_repo):
        initial_datetime = create_pendulum_time(year=2019,
                                                month=2,
                                                day=28,
                                                hour=0,
                                                minute=0,
                                                second=0)
        frozen_datetime = initial_datetime.add()
        external_schedule = external_repo.get_external_schedule(
            "multi_run_schedule")
        with pendulum.test(frozen_datetime):
            instance.start_schedule_and_update_storage_state(external_schedule)

            debug_crash_flags = {
                external_schedule.name: {
                    crash_location: crash_signal
                }
            }

            scheduler_process = multiprocessing.Process(
                target=_test_launch_scheduled_runs_in_subprocess,
                args=[instance.get_ref(), frozen_datetime, debug_crash_flags],
            )
            scheduler_process.start()
            scheduler_process.join(timeout=60)

            assert scheduler_process.exitcode != 0

            wait_for_all_runs_to_start(instance)
            assert instance.get_runs_count() == 1
            validate_run_started(instance.get_runs()[0], initial_datetime)

            ticks = instance.get_job_ticks(
                external_schedule.get_external_origin_id())
            assert len(ticks) == 1

        frozen_datetime = frozen_datetime.add(minutes=1)
        with pendulum.test(frozen_datetime):
            scheduler_process = multiprocessing.Process(
                target=_test_launch_scheduled_runs_in_subprocess,
                args=[instance.get_ref(), frozen_datetime, None],
            )
            scheduler_process.start()
            scheduler_process.join(timeout=60)
            assert scheduler_process.exitcode == 0
            assert instance.get_runs_count() == 2
            validate_run_started(instance.get_runs()[0], initial_datetime)
            ticks = instance.get_job_ticks(
                external_schedule.get_external_origin_id())
            assert len(ticks) == 1
            validate_tick(
                ticks[0],
                external_schedule,
                initial_datetime,
                JobTickStatus.SUCCESS,
                [run.run_id for run in instance.get_runs()],
            )
示例#6
0
def test_partitions_for_weekly_schedule_decorators_with_timezone():
    with instance_for_test() as instance:
        with pendulum.test(
                create_pendulum_time(2019, 2, 27, 0, 1, 1, tz="US/Central")):

            start_date = datetime(year=2019, month=1, day=1)

            @weekly_schedule(
                pipeline_name="foo_pipeline",
                execution_day_of_week=3,
                start_date=start_date,
                execution_time=time(9, 30),
                execution_timezone="US/Central",
            )
            def weekly_foo_schedule(weekly_time):
                return {"weekly_time": weekly_time.isoformat()}

            assert weekly_foo_schedule.execution_timezone == "US/Central"

            valid_weekly_time = create_pendulum_time(year=2019,
                                                     month=1,
                                                     day=30,
                                                     hour=9,
                                                     minute=30,
                                                     tz="US/Central")
            context_with_valid_time = ScheduleExecutionContext(
                instance.get_ref(), valid_weekly_time)

            execution_data = weekly_foo_schedule.get_execution_data(
                context_with_valid_time)
            assert len(execution_data) == 1
            assert isinstance(execution_data[0], RunRequest)
            assert execution_data[0].run_config == {
                "weekly_time":
                create_pendulum_time(year=2019,
                                     month=1,
                                     day=22,
                                     tz="US/Central").isoformat()
            }

            _check_partitions(
                weekly_foo_schedule,
                8,
                pendulum.instance(start_date, tz="US/Central"),
                DEFAULT_DATE_FORMAT,
                relativedelta(weeks=1),
            )
示例#7
0
def test_partitions_outside_schedule_range():
    with instance_for_test() as instance:
        execution_time = create_pendulum_time(year=2021,
                                              month=1,
                                              day=1,
                                              tz="UTC")
        context = ScheduleExecutionContext(instance.get_ref(), execution_time)

        @monthly_schedule(
            pipeline_name="too early",
            start_date=create_pendulum_time(year=2021,
                                            month=1,
                                            day=1,
                                            tz="UTC"),
        )
        def too_early(monthly_time):
            return {"monthly_time": monthly_time.isoformat()}

        execution_data = too_early.get_execution_data(context)
        assert len(execution_data) == 1
        skip_data = execution_data[0]
        assert isinstance(skip_data, SkipReason)
        assert skip_data.skip_message == (
            "Your partition (2020-12-01T00:00:00+00:00) is before the beginning of "
            "the partition set (2021-01-01T00:00:00+00:00). "
            "Verify your schedule's start_date is correct.")

        @monthly_schedule(
            pipeline_name="too late",
            start_date=create_pendulum_time(year=2020,
                                            month=1,
                                            day=1,
                                            tz="UTC"),
            end_date=create_pendulum_time(year=2020, month=12, day=1,
                                          tz="UTC"),
        )
        def too_late(monthly_time):
            return {"monthly_time": monthly_time.isoformat()}

        execution_data = too_late.get_execution_data(context)
        assert len(execution_data) == 1
        skip_data = execution_data[0]
        assert isinstance(skip_data, SkipReason)
        assert skip_data.skip_message == (
            "Your partition (2020-12-01T00:00:00+00:00) is after the end of "
            "the partition set (2020-11-01T00:00:00+00:00). "
            "Verify your schedule's end_date is correct.")
示例#8
0
def test_custom_interval_sensor_with_offset(external_repo_context,
                                            monkeypatch):
    freeze_datetime = to_timezone(
        create_pendulum_time(year=2019, month=2, day=28, tz="UTC"),
        "US/Central")

    sleeps = []

    def fake_sleep(s):
        sleeps.append(s)
        pendulum.set_test_now(pendulum.now().add(seconds=s))

    monkeypatch.setattr(time, "sleep", fake_sleep)

    with instance_with_sensors(external_repo_context) as (
            instance,
            grpc_server_registry,
            external_repo,
    ):
        with pendulum.test(freeze_datetime):

            # 60 second custom interval
            external_sensor = external_repo.get_external_sensor(
                "custom_interval_sensor")

            instance.add_job_state(
                JobState(external_sensor.get_external_origin(), JobType.SENSOR,
                         JobStatus.RUNNING))

            # create a tick
            evaluate_sensors(instance, grpc_server_registry)
            ticks = instance.get_job_ticks(
                external_sensor.get_external_origin_id())
            assert len(ticks) == 1

            # calling for another iteration should not generate another tick because time has not
            # advanced
            evaluate_sensors(instance, grpc_server_registry)
            ticks = instance.get_job_ticks(
                external_sensor.get_external_origin_id())
            assert len(ticks) == 1

            # call the sensor_iteration_loop, which should loop, and call the monkeypatched sleep
            # to advance 30 seconds
            list(
                execute_sensor_iteration_loop(
                    instance,
                    grpc_server_registry,
                    get_default_daemon_logger("SensorDaemon"),
                    daemon_shutdown_event=None,
                    until=freeze_datetime.add(seconds=65).timestamp(),
                ))

            assert pendulum.now() == freeze_datetime.add(seconds=65)
            ticks = instance.get_job_ticks(
                external_sensor.get_external_origin_id())
            assert len(ticks) == 2
            assert sum(sleeps) == 65
示例#9
0
def test_partitions_for_daily_schedule_decorators_with_timezone():
    with instance_for_test() as instance:
        with pendulum.test(
                create_pendulum_time(2019, 2, 27, 0, 1, 1, tz="US/Central")):
            start_date = datetime(year=2019, month=1, day=1)

            @daily_schedule(
                pipeline_name="foo_pipeline",
                start_date=start_date,
                execution_time=time(hour=9, minute=30),
                execution_timezone="US/Central",
            )
            def daily_central_schedule(daily_time):
                return {"daily_time": daily_time.isoformat()}

            assert daily_central_schedule.execution_timezone == "US/Central"

            _check_partitions(
                daily_central_schedule,
                (31 + 26),
                pendulum.instance(start_date, tz="US/Central"),
                DEFAULT_DATE_FORMAT,
                relativedelta(days=1),
            )

            valid_daily_time = create_pendulum_time(year=2019,
                                                    month=1,
                                                    day=27,
                                                    hour=9,
                                                    minute=30,
                                                    tz="US/Central")
            context_with_valid_time = ScheduleExecutionContext(
                instance, valid_daily_time)

            execution_data = daily_central_schedule.get_execution_data(
                context_with_valid_time)
            assert len(execution_data) == 1
            assert isinstance(execution_data[0], RunRequest)
            assert execution_data[0].run_config == {
                "daily_time":
                create_pendulum_time(year=2019,
                                     month=1,
                                     day=26,
                                     tz="US/Central").isoformat()
            }
示例#10
0
def test_sensor_start_stop(external_repo_context):
    freeze_datetime = to_timezone(
        create_pendulum_time(year=2019, month=2, day=27, tz="UTC"),
        "US/Central",
    )
    with instance_with_sensors(external_repo_context) as (
            instance,
            grpc_server_registry,
            external_repo,
    ):
        with pendulum.test(freeze_datetime):
            external_sensor = external_repo.get_external_sensor(
                "always_on_sensor")
            external_origin_id = external_sensor.get_external_origin_id()
            instance.start_sensor(external_sensor)

            assert instance.get_runs_count() == 0
            ticks = instance.get_job_ticks(external_origin_id)
            assert len(ticks) == 0

            evaluate_sensors(instance, grpc_server_registry)

            assert instance.get_runs_count() == 1
            run = instance.get_runs()[0]
            ticks = instance.get_job_ticks(external_origin_id)
            assert len(ticks) == 1
            validate_tick(ticks[0], external_sensor, freeze_datetime,
                          JobTickStatus.SUCCESS, [run.run_id])

            freeze_datetime = freeze_datetime.add(seconds=15)

        with pendulum.test(freeze_datetime):
            evaluate_sensors(instance, grpc_server_registry)
            # no new ticks, no new runs, we are below the 30 second min interval
            assert instance.get_runs_count() == 1
            ticks = instance.get_job_ticks(external_origin_id)
            assert len(ticks) == 1

            # stop / start
            instance.stop_sensor(external_origin_id)
            instance.start_sensor(external_sensor)

            evaluate_sensors(instance, grpc_server_registry)
            # no new ticks, no new runs, we are below the 30 second min interval
            assert instance.get_runs_count() == 1
            ticks = instance.get_job_ticks(external_origin_id)
            assert len(ticks) == 1

            freeze_datetime = freeze_datetime.add(seconds=16)

        with pendulum.test(freeze_datetime):
            evaluate_sensors(instance, grpc_server_registry)
            # should have new tick, new run, we are after the 30 second min interval
            assert instance.get_runs_count() == 2
            ticks = instance.get_job_ticks(external_origin_id)
            assert len(ticks) == 2
示例#11
0
 def _invalid_partition_selector(_cotnext, _partition_set_def):
     return [
         Partition(
             value=create_pendulum_time(year=2019,
                                        month=1,
                                        day=27,
                                        hour=1,
                                        minute=25),
             name="made_up",
         )
     ]
示例#12
0
def test_bad_load_sensor_repository(external_repo_context, capfd):
    freeze_datetime = to_timezone(
        create_pendulum_time(year=2019,
                             month=2,
                             day=27,
                             hour=23,
                             minute=59,
                             second=59,
                             tz="UTC"),
        "US/Central",
    )
    with instance_with_sensors(external_repo_context) as (
            instance,
            grpc_server_registry,
            external_repo,
    ):
        with pendulum.test(freeze_datetime):
            external_sensor = external_repo.get_external_sensor(
                "simple_sensor")

            valid_origin = external_sensor.get_external_origin()

            # Swap out a new repository name
            invalid_repo_origin = ExternalJobOrigin(
                ExternalRepositoryOrigin(
                    valid_origin.external_repository_origin.
                    repository_location_origin,
                    "invalid_repo_name",
                ),
                valid_origin.job_name,
            )

            instance.add_job_state(
                JobState(invalid_repo_origin, JobType.SENSOR,
                         JobStatus.RUNNING))

            assert instance.get_runs_count() == 0
            ticks = instance.get_job_ticks(invalid_repo_origin.get_id())
            assert len(ticks) == 0

            evaluate_sensors(instance, grpc_server_registry)

            assert instance.get_runs_count() == 0
            ticks = instance.get_job_ticks(invalid_repo_origin.get_id())
            assert len(ticks) == 0

            captured = capfd.readouterr()
            assert "Sensor daemon caught an error for sensor simple_sensor" in captured.out
            assert (
                "Could not find repository invalid_repo_name in location test_location to run sensor simple_sensor"
                in captured.out)
示例#13
0
def test_partitions_for_monthly_schedule_decorators_with_timezone():
    with instance_for_test() as instance:
        with pendulum.test(create_pendulum_time(2019, 2, 27, 0, 1, 1, tz="US/Central")):
            start_date = datetime(year=2019, month=1, day=1)

            @monthly_schedule(
                pipeline_name="foo_pipeline",
                execution_day_of_month=3,
                start_date=start_date,
                execution_time=time(9, 30),
                execution_timezone="US/Central",
            )
            def monthly_foo_schedule(monthly_time):
                return {"monthly_time": monthly_time.isoformat()}

            assert monthly_foo_schedule.execution_timezone == "US/Central"

            valid_monthly_time = create_pendulum_time(
                year=2019, month=2, day=3, hour=9, minute=30, tz="US/Central"
            )
            context_with_valid_time = build_schedule_context(instance, valid_monthly_time)

            execution_data = monthly_foo_schedule.get_execution_data(context_with_valid_time)
            assert len(execution_data) == 1
            assert isinstance(execution_data[0], RunRequest)
            assert execution_data[0].run_config == {
                "monthly_time": create_pendulum_time(
                    year=2019, month=1, day=1, tz="US/Central"
                ).isoformat()
            }

            _check_partitions(
                monthly_foo_schedule,
                1,
                pendulum.instance(start_date, tz="US/Central"),
                DEFAULT_MONTHLY_FORMAT,
                relativedelta(months=1),
            )
示例#14
0
def test_error_sensor(external_repo_context, capfd):
    freeze_datetime = to_timezone(
        create_pendulum_time(year=2019,
                             month=2,
                             day=27,
                             hour=23,
                             minute=59,
                             second=59,
                             tz="UTC"),
        "US/Central",
    )
    with instance_with_sensors(external_repo_context) as (
            instance,
            grpc_server_registry,
            external_repo,
    ):
        with pendulum.test(freeze_datetime):
            external_sensor = external_repo.get_external_sensor("error_sensor")
            instance.add_job_state(
                JobState(external_sensor.get_external_origin(), JobType.SENSOR,
                         JobStatus.RUNNING))
            assert instance.get_runs_count() == 0
            ticks = instance.get_job_ticks(
                external_sensor.get_external_origin_id())
            assert len(ticks) == 0

            evaluate_sensors(instance, grpc_server_registry)

            assert instance.get_runs_count() == 0
            ticks = instance.get_job_ticks(
                external_sensor.get_external_origin_id())
            assert len(ticks) == 1
            validate_tick(
                ticks[0],
                external_sensor,
                freeze_datetime,
                JobTickStatus.FAILURE,
                [],
                "Error occurred during the execution of evaluation_fn for sensor error_sensor",
            )

            captured = capfd.readouterr()
            assert (
                "Failed to resolve sensor for error_sensor : ") in captured.out

            assert (
                "Error occurred during the execution of evaluation_fn for sensor error_sensor"
            ) in captured.out
示例#15
0
def test_error_sensor_daemon(external_repo_context, monkeypatch):
    freeze_datetime = to_timezone(
        create_pendulum_time(year=2019, month=2, day=28, tz="UTC"),
        "US/Central")

    sleeps = []

    def fake_sleep(s):
        sleeps.append(s)
        pendulum.set_test_now(pendulum.now().add(seconds=s))

    monkeypatch.setattr(time, "sleep", fake_sleep)

    with instance_with_sensors(
            external_repo_context,
            overrides={
                "run_launcher": {
                    "module": "dagster.core.test_utils",
                    "class": "ExplodingRunLauncher",
                },
            },
    ) as (instance, workspace, _external_repo):

        @contextmanager
        def _gen_workspace(_instance):
            yield workspace

        with pendulum.test(freeze_datetime):
            instance.add_job_state(
                JobState(_get_unloadable_sensor_origin(), JobType.SENSOR,
                         JobStatus.RUNNING))
            sensor_daemon = SensorDaemon.create_from_instance(instance)
            daemon_shutdown_event = threading.Event()
            sensor_daemon.run_loop(
                "my_uuid",
                daemon_shutdown_event,
                _gen_workspace,
                heartbeat_interval_seconds=DEFAULT_HEARTBEAT_INTERVAL_SECONDS,
                error_interval_seconds=DEFAULT_DAEMON_ERROR_INTERVAL_SECONDS,
                until=freeze_datetime.add(seconds=65),
            )

            heartbeats = instance.get_daemon_heartbeats()
            heartbeat = heartbeats["SENSOR"]
            assert heartbeat
            assert heartbeat.errors
            assert len(heartbeat.errors) == DAEMON_HEARTBEAT_ERROR_LIMIT
示例#16
0
def test_launch_failure(external_repo_context, capfd):
    freeze_datetime = to_timezone(
        create_pendulum_time(year=2019,
                             month=2,
                             day=27,
                             hour=23,
                             minute=59,
                             second=59,
                             tz="UTC"),
        "US/Central",
    )
    with instance_with_sensors(
            external_repo_context,
            overrides={
                "run_launcher": {
                    "module": "dagster.core.test_utils",
                    "class": "ExplodingRunLauncher",
                },
            },
    ) as (instance, grpc_server_registry, external_repo):
        with pendulum.test(freeze_datetime):

            external_sensor = external_repo.get_external_sensor(
                "always_on_sensor")
            instance.add_job_state(
                JobState(external_sensor.get_external_origin(), JobType.SENSOR,
                         JobStatus.RUNNING))
            assert instance.get_runs_count() == 0
            ticks = instance.get_job_ticks(
                external_sensor.get_external_origin_id())
            assert len(ticks) == 0

            evaluate_sensors(instance, grpc_server_registry)

            assert instance.get_runs_count() == 1
            run = instance.get_runs()[0]
            ticks = instance.get_job_ticks(
                external_sensor.get_external_origin_id())
            assert len(ticks) == 1
            validate_tick(ticks[0], external_sensor, freeze_datetime,
                          JobTickStatus.SUCCESS, [run.run_id])

            captured = capfd.readouterr()
            assert ("Run {run_id} created successfully but failed to launch:".
                    format(run_id=run.run_id)) in captured.out

            assert "The entire purpose of this is to throw on launch" in captured.out
示例#17
0
def test_get_unloadable_job(graphql_context):
    instance = graphql_context.instance
    initial_datetime = create_pendulum_time(
        year=2019,
        month=2,
        day=27,
        hour=23,
        minute=59,
        second=59,
    )
    with pendulum.test(initial_datetime):
        instance.add_job_state(
            JobState(
                _get_unloadable_schedule_origin("unloadable_running"),
                JobType.SCHEDULE,
                JobStatus.RUNNING,
                ScheduleJobData(
                    "0 0 * * *",
                    pendulum.now("UTC").timestamp(),
                    graphql_context.instance.scheduler.__class__.__name__,
                ),
            )
        )

        instance.add_job_state(
            JobState(
                _get_unloadable_schedule_origin("unloadable_stopped"),
                JobType.SCHEDULE,
                JobStatus.STOPPED,
                ScheduleJobData(
                    "0 0 * * *",
                    pendulum.now("UTC").timestamp(),
                    graphql_context.instance.scheduler.__class__.__name__,
                ),
            )
        )

    result = execute_dagster_graphql(graphql_context, GET_UNLOADABLE_QUERY)
    assert len(result.data["unloadableJobStatesOrError"]["results"]) == 1
    assert result.data["unloadableJobStatesOrError"]["results"][0]["name"] == "unloadable_running"
def _test_backfill_in_subprocess(instance_ref, debug_crash_flags):
    execution_datetime = to_timezone(
        create_pendulum_time(
            year=2021,
            month=2,
            day=17,
        ),
        "US/Central",
    )
    with DagsterInstance.from_ref(instance_ref) as instance:
        try:
            with pendulum.test(execution_datetime), ProcessGrpcServerRegistry(
                    wait_for_processes_on_exit=True) as grpc_server_registry:
                list(
                    execute_backfill_iteration(
                        instance,
                        grpc_server_registry,
                        get_default_daemon_logger("BackfillDaemon"),
                        debug_crash_flags=debug_crash_flags,
                    ))
        finally:
            cleanup_test_instance(instance)
示例#19
0
def test_large_sensor(external_repo_context):
    freeze_datetime = to_timezone(
        create_pendulum_time(year=2019, month=2, day=27, tz="UTC"),
        "US/Central",
    )
    with instance_with_sensors(external_repo_context) as (
        instance,
        workspace,
        external_repo,
    ):
        with pendulum.test(freeze_datetime):
            external_sensor = external_repo.get_external_sensor("large_sensor")
            instance.start_sensor(external_sensor)
            evaluate_sensors(instance, workspace)
            ticks = instance.get_job_ticks(external_sensor.get_external_origin_id())
            assert len(ticks) == 1
            validate_tick(
                ticks[0],
                external_sensor,
                freeze_datetime,
                JobTickStatus.SUCCESS,
            )
示例#20
0
def test_partitions_for_monthly_schedule_decorators_without_timezone():
    with instance_for_test() as instance:
        with pendulum.test(
                to_timezone(
                    create_pendulum_time(2019, 2, 27, 0, 1, 1, tz="UTC"),
                    "US/Eastern")):
            context_without_time = ScheduleExecutionContext(
                instance.get_ref(), None)

            start_date = datetime(year=2019, month=1, day=1)

            @monthly_schedule(
                pipeline_name="foo_pipeline",
                execution_day_of_month=3,
                start_date=start_date,
                execution_time=time(9, 30),
            )
            def monthly_foo_schedule(monthly_time):
                return {"monthly_time": monthly_time.isoformat()}

            valid_monthly_time = create_pendulum_time(year=2019,
                                                      month=2,
                                                      day=3,
                                                      hour=9,
                                                      minute=30,
                                                      tz="UTC")
            context_with_valid_time = ScheduleExecutionContext(
                instance.get_ref(), valid_monthly_time)

            execution_data = monthly_foo_schedule.get_execution_data(
                context_with_valid_time)
            assert len(execution_data) == 1
            assert isinstance(execution_data[0], RunRequest)
            assert execution_data[0].run_config == {
                "monthly_time":
                create_pendulum_time(year=2019, month=1, day=1,
                                     tz="UTC").isoformat()
            }

            execution_data = monthly_foo_schedule.get_execution_data(
                context_without_time)
            assert len(execution_data) == 1
            assert isinstance(execution_data[0], RunRequest)
            assert execution_data[0].run_config == {
                "monthly_time":
                create_pendulum_time(year=2019, month=1, day=1,
                                     tz="UTC").isoformat()
            }

            _check_partitions(
                monthly_foo_schedule,
                1,
                pendulum.instance(start_date, tz="UTC"),
                DEFAULT_MONTHLY_FORMAT,
                relativedelta(months=1),
            )

            # test partition_months_offset=0

            @monthly_schedule(
                pipeline_name="foo_pipeline",
                execution_day_of_month=3,
                start_date=start_date,
                execution_time=time(9, 30),
                partition_months_offset=0,
            )
            def monthly_foo_schedule_same_month(monthly_time):
                return {"monthly_time": monthly_time.isoformat()}

            valid_monthly_time = create_pendulum_time(year=2019,
                                                      month=2,
                                                      day=3,
                                                      hour=9,
                                                      minute=30,
                                                      tz="UTC")
            context_with_valid_time = ScheduleExecutionContext(
                instance.get_ref(), valid_monthly_time)

            execution_data = monthly_foo_schedule_same_month.get_execution_data(
                context_with_valid_time)
            assert len(execution_data) == 1
            assert isinstance(execution_data[0], RunRequest)
            assert execution_data[0].run_config == {
                "monthly_time":
                create_pendulum_time(year=2019, month=2, day=1,
                                     tz="UTC").isoformat()
            }
示例#21
0
def test_wrong_config_sensor(external_repo_context, capfd):
    freeze_datetime = to_timezone(
        create_pendulum_time(
            year=2019,
            month=2,
            day=27,
            hour=23,
            minute=59,
            second=59,
        ),
        "US/Central",
    )
    with instance_with_sensors(external_repo_context) as (
            instance,
            grpc_server_registry,
            external_repo,
    ):
        with pendulum.test(freeze_datetime):
            external_sensor = external_repo.get_external_sensor(
                "wrong_config_sensor")
            instance.add_job_state(
                JobState(external_sensor.get_external_origin(), JobType.SENSOR,
                         JobStatus.RUNNING))
            assert instance.get_runs_count() == 0
            ticks = instance.get_job_ticks(
                external_sensor.get_external_origin_id())
            assert len(ticks) == 0

            evaluate_sensors(instance, grpc_server_registry)
            assert instance.get_runs_count() == 0
            ticks = instance.get_job_ticks(
                external_sensor.get_external_origin_id())
            assert len(ticks) == 1

            validate_tick(
                ticks[0],
                external_sensor,
                freeze_datetime,
                JobTickStatus.FAILURE,
                [],
                "Error in config for pipeline the_pipeline",
            )

            captured = capfd.readouterr()
            assert (
                "Error in config for pipeline the_pipeline") in captured.out

            # Error repeats on subsequent ticks

            evaluate_sensors(instance, grpc_server_registry)
            assert instance.get_runs_count() == 0
            ticks = instance.get_job_ticks(
                external_sensor.get_external_origin_id())
            assert len(ticks) == 2

            validate_tick(
                ticks[0],
                external_sensor,
                freeze_datetime,
                JobTickStatus.FAILURE,
                [],
                "Error in config for pipeline the_pipeline",
            )

            captured = capfd.readouterr()
            assert (
                "Error in config for pipeline the_pipeline") in captured.out
示例#22
0
def test_get_single_schedule_definition(graphql_context):
    context = graphql_context
    instance = context.instance

    schedule_selector = infer_schedule_selector(
        context, "partition_based_multi_mode_decorator")

    # fetch schedule before reconcile
    result = execute_dagster_graphql(
        context,
        GET_SCHEDULE_QUERY,
        variables={"scheduleSelector": schedule_selector})
    assert result.data
    assert result.data["scheduleOrError"]["__typename"] == "Schedule"
    assert result.data["scheduleOrError"]["scheduleState"]

    instance.reconcile_scheduler_state(
        external_repository=context.get_repository_location(
            main_repo_location_name()).get_repository(main_repo_name()), )

    result = execute_dagster_graphql(
        context,
        GET_SCHEDULE_QUERY,
        variables={"scheduleSelector": schedule_selector})

    assert result.data

    assert result.data["scheduleOrError"]["__typename"] == "Schedule"
    assert result.data["scheduleOrError"]["partitionSet"]
    assert result.data["scheduleOrError"]["executionTimezone"] == pendulum.now(
    ).timezone.name

    future_ticks = result.data["scheduleOrError"]["futureTicks"]
    assert future_ticks
    assert len(future_ticks["results"]) == 3

    schedule_selector = infer_schedule_selector(context, "timezone_schedule")

    future_ticks_start_time = create_pendulum_time(
        2019, 2, 27, tz="US/Central").timestamp()

    result = execute_dagster_graphql(
        context,
        GET_SCHEDULE_QUERY,
        variables={
            "scheduleSelector": schedule_selector,
            "ticksAfter": future_ticks_start_time
        },
    )

    assert result.data
    assert result.data["scheduleOrError"]["__typename"] == "Schedule"
    assert result.data["scheduleOrError"]["executionTimezone"] == "US/Central"

    future_ticks = result.data["scheduleOrError"]["futureTicks"]
    assert future_ticks
    assert len(future_ticks["results"]) == 3
    timestamps = [
        future_tick["timestamp"] for future_tick in future_ticks["results"]
    ]

    assert timestamps == [
        create_pendulum_time(2019, 2, 27, tz="US/Central").timestamp(),
        create_pendulum_time(2019, 2, 28, tz="US/Central").timestamp(),
        create_pendulum_time(2019, 3, 1, tz="US/Central").timestamp(),
    ]

    cursor = future_ticks["cursor"]

    assert future_ticks["cursor"] == (
        create_pendulum_time(2019, 3, 1, tz="US/Central").timestamp() + 1)

    result = execute_dagster_graphql(
        context,
        GET_SCHEDULE_QUERY,
        variables={
            "scheduleSelector": schedule_selector,
            "ticksAfter": cursor
        },
    )

    future_ticks = result.data["scheduleOrError"]["futureTicks"]

    assert future_ticks
    assert len(future_ticks["results"]) == 3
    timestamps = [
        future_tick["timestamp"] for future_tick in future_ticks["results"]
    ]

    assert timestamps == [
        create_pendulum_time(2019, 3, 2, tz="US/Central").timestamp(),
        create_pendulum_time(2019, 3, 3, tz="US/Central").timestamp(),
        create_pendulum_time(2019, 3, 4, tz="US/Central").timestamp(),
    ]
示例#23
0
def test_partitions_for_hourly_schedule_decorators_without_timezone():
    with instance_for_test() as instance:
        with pendulum.test(
                to_timezone(
                    create_pendulum_time(2019, 2, 27, 0, 1, 1, tz="UTC"),
                    "US/Eastern")):

            context_without_time = ScheduleExecutionContext(
                instance.get_ref(), None)

            start_date = datetime(year=2019, month=1, day=1)

            @hourly_schedule(
                pipeline_name="foo_pipeline",
                start_date=start_date,
                execution_time=time(hour=0, minute=25),
            )
            def hourly_foo_schedule(hourly_time):
                return {"hourly_time": hourly_time.isoformat()}

            _check_partitions(
                hourly_foo_schedule,
                HOURS_UNTIL_FEBRUARY_27,
                pendulum.instance(start_date, tz="UTC"),
                DEFAULT_HOURLY_FORMAT_WITHOUT_TIMEZONE,
                relativedelta(hours=1),
            )

            execution_data = hourly_foo_schedule.get_execution_data(
                context_without_time)
            assert len(execution_data) == 1
            assert isinstance(execution_data[0], RunRequest)
            assert execution_data[0].run_config == {
                "hourly_time":
                create_pendulum_time(year=2019,
                                     month=2,
                                     day=26,
                                     hour=23,
                                     tz="UTC").isoformat()
            }

            # time that's invalid since it corresponds to a partition before the start date
            # should not execute and should yield a SkipReason if it tries to generate run config
            execution_time_with_invalid_partition = create_pendulum_time(
                year=2018, month=12, day=30, hour=3, minute=25, tz="UTC")
            context_with_invalid_time = ScheduleExecutionContext(
                instance.get_ref(), execution_time_with_invalid_partition)

            execution_data = hourly_foo_schedule.get_execution_data(
                context_with_invalid_time)

            assert len(execution_data) == 1
            skip_data = execution_data[0]
            assert isinstance(skip_data, SkipReason)
            assert (
                "Partition selector did not return a partition. "
                "Make sure that the timezone on your partition set matches your execution timezone."
                in skip_data.skip_message)

            valid_time = create_pendulum_time(year=2019,
                                              month=1,
                                              day=27,
                                              hour=1,
                                              minute=25,
                                              tz="UTC")
            context_with_valid_time = ScheduleExecutionContext(
                instance.get_ref(), valid_time)

            execution_data = hourly_foo_schedule.get_execution_data(
                context_with_valid_time)
            assert len(execution_data) == 1
            assert isinstance(execution_data[0], RunRequest)
            assert execution_data[0].run_config == {
                "hourly_time":
                create_pendulum_time(year=2019,
                                     month=1,
                                     day=27,
                                     hour=0,
                                     tz="UTC").isoformat()
            }
示例#24
0
def test_simple_sensor(external_repo_context, capfd):
    freeze_datetime = to_timezone(
        create_pendulum_time(year=2019,
                             month=2,
                             day=27,
                             hour=23,
                             minute=59,
                             second=59,
                             tz="UTC"),
        "US/Central",
    )
    with instance_with_sensors(external_repo_context) as (
            instance,
            grpc_server_registry,
            external_repo,
    ):
        with pendulum.test(freeze_datetime):
            external_sensor = external_repo.get_external_sensor(
                "simple_sensor")
            instance.add_job_state(
                JobState(external_sensor.get_external_origin(), JobType.SENSOR,
                         JobStatus.RUNNING))
            assert instance.get_runs_count() == 0
            ticks = instance.get_job_ticks(
                external_sensor.get_external_origin_id())
            assert len(ticks) == 0

            evaluate_sensors(instance, grpc_server_registry)

            assert instance.get_runs_count() == 0
            ticks = instance.get_job_ticks(
                external_sensor.get_external_origin_id())
            assert len(ticks) == 1
            validate_tick(
                ticks[0],
                external_sensor,
                freeze_datetime,
                JobTickStatus.SKIPPED,
            )

            captured = capfd.readouterr()
            assert (
                captured.out ==
                """2019-02-27 17:59:59 - SensorDaemon - INFO - Checking for new runs for sensor: simple_sensor
2019-02-27 17:59:59 - SensorDaemon - INFO - Sensor returned false for simple_sensor, skipping
""")

            freeze_datetime = freeze_datetime.add(seconds=30)

        with pendulum.test(freeze_datetime):
            evaluate_sensors(instance, grpc_server_registry)
            wait_for_all_runs_to_start(instance)
            assert instance.get_runs_count() == 1
            run = instance.get_runs()[0]
            validate_run_started(run)
            ticks = instance.get_job_ticks(
                external_sensor.get_external_origin_id())
            assert len(ticks) == 2

            expected_datetime = create_pendulum_time(year=2019,
                                                     month=2,
                                                     day=28,
                                                     hour=0,
                                                     minute=0,
                                                     second=29)
            validate_tick(
                ticks[0],
                external_sensor,
                expected_datetime,
                JobTickStatus.SUCCESS,
                [run.run_id],
            )

            captured = capfd.readouterr()
            assert (
                captured.out ==
                """2019-02-27 18:00:29 - SensorDaemon - INFO - Checking for new runs for sensor: simple_sensor
2019-02-27 18:00:29 - SensorDaemon - INFO - Launching run for simple_sensor
2019-02-27 18:00:29 - SensorDaemon - INFO - Completed launch of run {run_id} for simple_sensor
""".format(run_id=run.run_id))
示例#25
0
def test_failure_before_run_created(external_repo_context, crash_location,
                                    crash_signal, capfd):
    frozen_datetime = to_timezone(
        create_pendulum_time(year=2019,
                             month=2,
                             day=28,
                             hour=0,
                             minute=0,
                             second=1,
                             tz="UTC"),
        "US/Central",
    )
    with instance_with_sensors(external_repo_context) as (
            instance,
            _grpc_server_registry,
            external_repo,
    ):
        with pendulum.test(frozen_datetime):
            external_sensor = external_repo.get_external_sensor(
                "simple_sensor")
            instance.add_job_state(
                JobState(external_sensor.get_external_origin(), JobType.SENSOR,
                         JobStatus.RUNNING))

            # create a tick
            launch_process = multiprocessing.Process(
                target=_test_launch_sensor_runs_in_subprocess,
                args=[instance.get_ref(), frozen_datetime, None],
            )
            launch_process.start()
            launch_process.join(timeout=60)
            ticks = instance.get_job_ticks(
                external_sensor.get_external_origin_id())
            assert len(ticks) == 1
            assert ticks[0].status == JobTickStatus.SKIPPED
            captured = capfd.readouterr()

            # create a starting tick, but crash
            debug_crash_flags = {
                external_sensor.name: {
                    crash_location: crash_signal
                }
            }
            launch_process = multiprocessing.Process(
                target=_test_launch_sensor_runs_in_subprocess,
                args=[
                    instance.get_ref(),
                    frozen_datetime.add(seconds=31), debug_crash_flags
                ],
            )
            launch_process.start()
            launch_process.join(timeout=60)

            assert launch_process.exitcode != 0

            captured = capfd.readouterr()

            ticks = instance.get_job_ticks(
                external_sensor.get_external_origin_id())
            assert len(ticks) == 2
            assert ticks[0].status == JobTickStatus.STARTED
            assert not int(
                ticks[0].timestamp) % 2  # skip condition for simple_sensor
            assert instance.get_runs_count() == 0

            # create another tick, but ensure that the last evaluation time used is from the first,
            # successful tick rather than the failed tick
            launch_process = multiprocessing.Process(
                target=_test_launch_sensor_runs_in_subprocess,
                args=[
                    instance.get_ref(),
                    frozen_datetime.add(seconds=62), None
                ],
            )
            launch_process.start()
            launch_process.join(timeout=60)

            assert launch_process.exitcode == 0
            wait_for_all_runs_to_start(instance)

            assert instance.get_runs_count() == 1
            run = instance.get_runs()[0]
            captured = capfd.readouterr()
            assert (
                captured.out.replace("\r\n", "\n") ==
                f"""2019-02-27 18:01:03 - SensorDaemon - INFO - Checking for new runs for sensor: simple_sensor
2019-02-27 18:01:03 - SensorDaemon - INFO - Launching run for simple_sensor
2019-02-27 18:01:03 - SensorDaemon - INFO - Completed launch of run {run.run_id} for simple_sensor
""")

            ticks = instance.get_job_ticks(
                external_sensor.get_external_origin_id())
            assert len(ticks) == 3
            assert ticks[0].status == JobTickStatus.SUCCESS
def test_failure_recovery_before_run_created(external_repo_context,
                                             crash_location, crash_signal,
                                             capfd):
    # Verify that if the scheduler crashes or is interrupted before a run is created,
    # it will create exactly one tick/run when it is re-launched
    with instance_with_schedules(external_repo_context) as (instance,
                                                            external_repo):
        initial_datetime = to_timezone(
            create_pendulum_time(year=2019,
                                 month=2,
                                 day=27,
                                 hour=0,
                                 minute=0,
                                 second=0,
                                 tz="UTC"),
            "US/Central",
        )

        frozen_datetime = initial_datetime.add()

        external_schedule = external_repo.get_external_schedule(
            "simple_schedule")
        with pendulum.test(frozen_datetime):
            instance.start_schedule_and_update_storage_state(external_schedule)

            debug_crash_flags = {
                external_schedule.name: {
                    crash_location: crash_signal
                }
            }

            scheduler_process = multiprocessing.Process(
                target=_test_launch_scheduled_runs_in_subprocess,
                args=[instance.get_ref(), frozen_datetime, debug_crash_flags],
            )
            scheduler_process.start()
            scheduler_process.join(timeout=60)

            assert scheduler_process.exitcode != 0

            captured = capfd.readouterr()
            assert (
                captured.out.replace("\r\n", "\n") ==
                """2019-02-26 18:00:00 - SchedulerDaemon - INFO - Checking for new runs for the following schedules: simple_schedule
2019-02-26 18:00:00 - SchedulerDaemon - INFO - Evaluating schedule `simple_schedule` at 2019-02-27 00:00:00+0000
""")

            ticks = instance.get_job_ticks(
                external_schedule.get_external_origin_id())
            assert len(ticks) == 1
            assert ticks[0].status == JobTickStatus.STARTED

            assert instance.get_runs_count() == 0

        frozen_datetime = frozen_datetime.add(minutes=5)
        with pendulum.test(frozen_datetime):
            scheduler_process = multiprocessing.Process(
                target=_test_launch_scheduled_runs_in_subprocess,
                args=[instance.get_ref(), frozen_datetime, None],
            )
            scheduler_process.start()
            scheduler_process.join(timeout=60)
            assert scheduler_process.exitcode == 0

            assert instance.get_runs_count() == 1
            wait_for_all_runs_to_start(instance)
            validate_run_started(
                instance.get_runs()[0],
                execution_time=initial_datetime,
                partition_time=create_pendulum_time(2019, 2, 26),
            )

            ticks = instance.get_job_ticks(
                external_schedule.get_external_origin_id())
            assert len(ticks) == 1
            validate_tick(
                ticks[0],
                external_schedule,
                initial_datetime,
                JobTickStatus.SUCCESS,
                [instance.get_runs()[0].run_id],
            )
            captured = capfd.readouterr()
            assert (
                captured.out.replace("\r\n", "\n") ==
                """2019-02-26 18:05:00 - SchedulerDaemon - INFO - Checking for new runs for the following schedules: simple_schedule
2019-02-26 18:05:00 - SchedulerDaemon - INFO - Evaluating schedule `simple_schedule` at 2019-02-27 00:00:00+0000
2019-02-26 18:05:00 - SchedulerDaemon - INFO - Resuming previously interrupted schedule execution
2019-02-26 18:05:00 - SchedulerDaemon - INFO - Completed scheduled launch of run {run_id} for simple_schedule
""".format(run_id=instance.get_runs()[0].run_id))
示例#27
0
def test_launch_once(external_repo_context, capfd):
    freeze_datetime = to_timezone(
        create_pendulum_time(
            year=2019,
            month=2,
            day=27,
            hour=23,
            minute=59,
            second=59,
            tz="UTC",
        ),
        "US/Central",
    )
    with instance_with_sensors(external_repo_context) as (
            instance,
            grpc_server_registry,
            external_repo,
    ):
        with pendulum.test(freeze_datetime):

            external_sensor = external_repo.get_external_sensor(
                "run_key_sensor")
            instance.add_job_state(
                JobState(external_sensor.get_external_origin(), JobType.SENSOR,
                         JobStatus.RUNNING))
            assert instance.get_runs_count() == 0
            ticks = instance.get_job_ticks(
                external_sensor.get_external_origin_id())
            assert len(ticks) == 0

            evaluate_sensors(instance, grpc_server_registry)
            wait_for_all_runs_to_start(instance)

            assert instance.get_runs_count() == 1
            run = instance.get_runs()[0]
            ticks = instance.get_job_ticks(
                external_sensor.get_external_origin_id())
            assert len(ticks) == 1
            validate_tick(
                ticks[0],
                external_sensor,
                freeze_datetime,
                JobTickStatus.SUCCESS,
                expected_run_ids=[run.run_id],
            )

        # run again (after 30 seconds), to ensure that the run key maintains idempotence
        freeze_datetime = freeze_datetime.add(seconds=30)
        with pendulum.test(freeze_datetime):
            evaluate_sensors(instance, grpc_server_registry)
            assert instance.get_runs_count() == 1
            ticks = instance.get_job_ticks(
                external_sensor.get_external_origin_id())
            assert len(ticks) == 2
            validate_tick(
                ticks[0],
                external_sensor,
                freeze_datetime,
                JobTickStatus.SKIPPED,
            )
            captured = capfd.readouterr()
            assert (
                'Skipping 1 run for sensor run_key_sensor already completed with run keys: ["only_once"]'
                in captured.out)

            launched_run = instance.get_runs()[0]

            # Manually create a new run with the same tags
            execute_pipeline(
                the_pipeline,
                run_config=launched_run.run_config,
                tags=launched_run.tags,
                instance=instance,
            )

            # Sensor loop still executes
        freeze_datetime = freeze_datetime.add(seconds=30)
        with pendulum.test(freeze_datetime):
            evaluate_sensors(instance, grpc_server_registry)
            ticks = instance.get_job_ticks(
                external_sensor.get_external_origin_id())

            assert len(ticks) == 3
            validate_tick(
                ticks[0],
                external_sensor,
                freeze_datetime,
                JobTickStatus.SKIPPED,
            )
示例#28
0
def test_partitions_for_hourly_schedule_decorators_with_timezone():
    with instance_for_test() as instance:
        with pendulum.test(
                create_pendulum_time(2019, 2, 27, 0, 1, 1, tz="US/Central")):
            start_date = datetime(year=2019, month=1, day=1)

            # You can specify a start date with no timezone and it will be assumed to be
            # in the execution timezone

            @hourly_schedule(
                pipeline_name="foo_pipeline",
                start_date=start_date,
                execution_time=time(hour=0, minute=25),
                execution_timezone="US/Central",
            )
            def hourly_central_schedule(hourly_time):
                return {"hourly_time": hourly_time.isoformat()}

            assert hourly_central_schedule.execution_timezone == "US/Central"

            _check_partitions(
                hourly_central_schedule,
                HOURS_UNTIL_FEBRUARY_27,
                pendulum.instance(start_date, tz="US/Central"),
                DEFAULT_HOURLY_FORMAT_WITH_TIMEZONE,
                relativedelta(hours=1),
            )

            valid_time = create_pendulum_time(year=2019,
                                              month=1,
                                              day=27,
                                              hour=1,
                                              minute=25,
                                              tz="US/Central")
            context_with_valid_time = ScheduleExecutionContext(
                instance.get_ref(), valid_time)

            execution_data = hourly_central_schedule.get_execution_data(
                context_with_valid_time)
            assert len(execution_data) == 1
            assert isinstance(execution_data[0], RunRequest)
            assert execution_data[0].run_config == {
                "hourly_time":
                create_pendulum_time(year=2019,
                                     month=1,
                                     day=27,
                                     hour=0,
                                     tz="US/Central").isoformat()
            }

            # You can specify a start date in a different timezone and it will be transformed into the
            # execution timezone
            start_date_with_different_timezone = create_pendulum_time(
                2019, 1, 1, 0, tz="US/Pacific")

            @hourly_schedule(
                pipeline_name="foo_pipeline",
                start_date=start_date_with_different_timezone,
                execution_time=time(hour=0, minute=25),
                execution_timezone="US/Central",
            )
            def hourly_central_schedule_with_timezone_start_time(hourly_time):
                return {"hourly_time": hourly_time.isoformat()}

            _check_partitions(
                hourly_central_schedule_with_timezone_start_time,
                HOURS_UNTIL_FEBRUARY_27 -
                2,  # start date is two hours later since it's in PT
                to_timezone(start_date_with_different_timezone, "US/Central"),
                DEFAULT_HOURLY_FORMAT_WITH_TIMEZONE,
                relativedelta(hours=1),
            )

            # test partition_hours_offset=0

            @hourly_schedule(
                pipeline_name="foo_pipeline",
                start_date=start_date_with_different_timezone,
                execution_time=time(hour=0, minute=25),
                execution_timezone="US/Central",
                partition_hours_offset=0,
            )
            def hourly_schedule_for_current_hour(hourly_time):
                return {"hourly_time": hourly_time.isoformat()}

            valid_time = create_pendulum_time(year=2019,
                                              month=1,
                                              day=27,
                                              hour=1,
                                              minute=25,
                                              tz="US/Central")
            context_with_valid_time = ScheduleExecutionContext(
                instance.get_ref(), valid_time)

            execution_data = hourly_schedule_for_current_hour.get_execution_data(
                context_with_valid_time)
            assert len(execution_data) == 1
            assert isinstance(execution_data[0], RunRequest)
            assert execution_data[0].run_config == {
                "hourly_time":
                create_pendulum_time(year=2019,
                                     month=1,
                                     day=27,
                                     hour=1,
                                     tz="US/Central").isoformat()
            }

            # test partition_hours_offset=2

            @hourly_schedule(
                pipeline_name="foo_pipeline",
                start_date=start_date_with_different_timezone,
                execution_time=time(hour=0, minute=25),
                execution_timezone="US/Central",
                partition_hours_offset=2,
            )
            def hourly_schedule_for_two_hours_ago(hourly_time):
                return {"hourly_time": hourly_time.isoformat()}

            valid_time = create_pendulum_time(year=2019,
                                              month=1,
                                              day=27,
                                              hour=1,
                                              minute=25,
                                              tz="US/Central")
            context_with_valid_time = ScheduleExecutionContext(
                instance.get_ref(), valid_time)

            execution_data = hourly_schedule_for_two_hours_ago.get_execution_data(
                context_with_valid_time)
            assert len(execution_data) == 1
            assert isinstance(execution_data[0], RunRequest)
            assert execution_data[0].run_config == {
                "hourly_time":
                create_pendulum_time(year=2019,
                                     month=1,
                                     day=26,
                                     hour=23,
                                     tz="US/Central").isoformat()
            }
示例#29
0
def test_failure_after_run_launched(external_repo_context, crash_location,
                                    crash_signal, capfd):
    frozen_datetime = to_timezone(
        create_pendulum_time(
            year=2019,
            month=2,
            day=28,
            hour=0,
            minute=0,
            second=0,
            tz="UTC",
        ),
        "US/Central",
    )
    with instance_with_sensors(external_repo_context) as (
            instance,
            _grpc_server_registry,
            external_repo,
    ):
        with pendulum.test(frozen_datetime):
            external_sensor = external_repo.get_external_sensor(
                "run_key_sensor")
            instance.add_job_state(
                JobState(external_sensor.get_external_origin(), JobType.SENSOR,
                         JobStatus.RUNNING))

            # create a run, launch but crash
            debug_crash_flags = {
                external_sensor.name: {
                    crash_location: crash_signal
                }
            }
            launch_process = multiprocessing.Process(
                target=_test_launch_sensor_runs_in_subprocess,
                args=[instance.get_ref(), frozen_datetime, debug_crash_flags],
            )
            launch_process.start()
            launch_process.join(timeout=60)

            assert launch_process.exitcode != 0

            ticks = instance.get_job_ticks(
                external_sensor.get_external_origin_id())

            assert len(ticks) == 1
            assert ticks[0].status == JobTickStatus.STARTED
            assert instance.get_runs_count() == 1

            run = instance.get_runs()[0]
            wait_for_all_runs_to_start(instance)
            assert run.tags.get(SENSOR_NAME_TAG) == "run_key_sensor"
            assert run.tags.get(RUN_KEY_TAG) == "only_once"
            capfd.readouterr()

            launch_process = multiprocessing.Process(
                target=_test_launch_sensor_runs_in_subprocess,
                args=[
                    instance.get_ref(),
                    frozen_datetime.add(seconds=1), None
                ],
            )
            launch_process.start()
            launch_process.join(timeout=60)

            assert launch_process.exitcode == 0
            wait_for_all_runs_to_start(instance)

            assert instance.get_runs_count() == 1
            run = instance.get_runs()[0]
            captured = capfd.readouterr()

            assert (
                'Skipping 1 run for sensor run_key_sensor already completed with run keys: ["only_once"]'
                in captured.out)

            ticks = instance.get_job_ticks(
                external_sensor.get_external_origin_id())
            assert len(ticks) == 2
            assert ticks[0].status == JobTickStatus.SKIPPED
def test_failure_recovery_after_tick_success(external_repo_context,
                                             crash_location, crash_signal):
    # Verify that if the scheduler crashes or is interrupted after a run is created,
    # it will just re-launch the already-created run when it runs again
    with instance_with_schedules(external_repo_context) as (instance,
                                                            external_repo):
        initial_datetime = create_pendulum_time(year=2019,
                                                month=2,
                                                day=27,
                                                hour=0,
                                                minute=0,
                                                second=0)
        frozen_datetime = initial_datetime.add()
        external_schedule = external_repo.get_external_schedule(
            "simple_schedule")
        with pendulum.test(frozen_datetime):
            instance.start_schedule_and_update_storage_state(external_schedule)

            debug_crash_flags = {
                external_schedule.name: {
                    crash_location: crash_signal
                }
            }

            scheduler_process = multiprocessing.Process(
                target=_test_launch_scheduled_runs_in_subprocess,
                args=[instance.get_ref(), frozen_datetime, debug_crash_flags],
            )
            scheduler_process.start()
            scheduler_process.join(timeout=60)

            assert scheduler_process.exitcode != 0

            # As above there's a possible race condition here if the scheduler crashes
            # and launches the same run twice if we crash right after the launch and re-run
            # before the run actually starts
            wait_for_all_runs_to_start(instance)

            assert instance.get_runs_count() == 1
            validate_run_started(instance.get_runs()[0], initial_datetime,
                                 create_pendulum_time(2019, 2, 26))

            ticks = instance.get_job_ticks(
                external_schedule.get_external_origin_id())
            assert len(ticks) == 1

            if crash_signal == get_terminate_signal():
                run_ids = []
            else:
                run_ids = [run.run_id for run in instance.get_runs()]

            validate_tick(ticks[0], external_schedule, initial_datetime,
                          JobTickStatus.STARTED, run_ids)

        frozen_datetime = frozen_datetime.add(minutes=1)
        with pendulum.test(frozen_datetime):
            # Running again just marks the tick as success since the run has already started
            scheduler_process = multiprocessing.Process(
                target=_test_launch_scheduled_runs_in_subprocess,
                args=[instance.get_ref(), frozen_datetime, None],
            )
            scheduler_process.start()
            scheduler_process.join(timeout=60)
            assert scheduler_process.exitcode == 0

            assert instance.get_runs_count() == 1
            validate_run_started(instance.get_runs()[0], initial_datetime,
                                 create_pendulum_time(2019, 2, 26))

            ticks = instance.get_job_ticks(
                external_schedule.get_external_origin_id())
            assert len(ticks) == 1
            validate_tick(
                ticks[0],
                external_schedule,
                initial_datetime,
                JobTickStatus.SUCCESS,
                [instance.get_runs()[0].run_id],
            )