Exemplo n.º 1
0
def test_scheduled_jobs():
    from dagster import Field, String

    @op(config_schema={"foo": Field(String)})
    def foo_op(context):
        pass

    DEFAULT_FOO_CONFIG = {"ops": {"foo_op": {"config": {"foo": "bar"}}}}

    @job(config=DEFAULT_FOO_CONFIG)
    def foo_job():
        foo_op()

    my_schedule = ScheduleDefinition(name="my_schedule",
                                     cron_schedule="* * * * *",
                                     job=foo_job)

    context_without_time = build_schedule_context()
    execution_time = datetime(year=2019, month=2, day=27)
    context_with_time = build_schedule_context(
        scheduled_execution_time=execution_time)
    execution_data = my_schedule.evaluate_tick(context_without_time)
    assert execution_data.run_requests
    assert len(execution_data.run_requests) == 1

    validate_run_config(foo_job, execution_data.run_requests[0].run_config)
Exemplo n.º 2
0
def test_partitions_for_hourly_schedule_decorators_without_timezone(
        partition_hours_offset: int):
    with pendulum.test(
            to_timezone(create_pendulum_time(2019, 2, 27, 0, 1, 1, tz="UTC"),
                        "US/Eastern")):

        context_without_time = build_schedule_context()

        start_date = datetime(year=2019, month=1, day=1)

        @hourly_schedule(
            pipeline_name="foo_pipeline",
            start_date=start_date,
            execution_time=time(hour=0, minute=25),
            partition_hours_offset=partition_hours_offset,
        )
        def hourly_foo_schedule(hourly_time):
            return {"hourly_time": hourly_time.isoformat()}

        _check_partitions(
            hourly_foo_schedule,
            HOURS_UNTIL_FEBRUARY_27 + 1 - partition_hours_offset,
            pendulum.instance(start_date, tz="UTC"),
            DEFAULT_HOURLY_FORMAT_WITHOUT_TIMEZONE,
            relativedelta(hours=1),
        )

        execution_data = hourly_foo_schedule.evaluate_tick(
            context_without_time)
        assert execution_data.run_requests
        assert len(execution_data.run_requests) == 1
        assert execution_data.run_requests[0].run_config == {
            "hourly_time":
            create_pendulum_time(
                year=2019, month=2, day=27,
                tz="UTC").subtract(hours=partition_hours_offset).isoformat()
        }

        valid_time = create_pendulum_time(year=2019,
                                          month=1,
                                          day=27,
                                          hour=1,
                                          minute=25,
                                          tz="UTC")
        context_with_valid_time = build_schedule_context(
            scheduled_execution_time=valid_time)

        execution_data = hourly_foo_schedule.evaluate_tick(
            context_with_valid_time)
        assert execution_data.run_requests
        assert len(execution_data.run_requests) == 1
        assert execution_data.run_requests[0].run_config == {
            "hourly_time":
            create_pendulum_time(
                year=2019, month=1, day=27, hour=1,
                tz="UTC").subtract(hours=partition_hours_offset).isoformat()
        }
Exemplo n.º 3
0
def test_partitions_for_monthly_schedule_decorators_without_timezone(
        partition_months_offset: int):
    with pendulum.test(
            to_timezone(create_pendulum_time(2019, 3, 27, 0, 1, 1, tz="UTC"),
                        "US/Eastern")):
        context_without_time = build_schedule_context()

        start_date = datetime(year=2019, month=1, day=1)

        @monthly_schedule(
            pipeline_name="foo_pipeline",
            execution_day_of_month=3,
            start_date=start_date,
            execution_time=time(9, 30),
            partition_months_offset=partition_months_offset,
        )
        def monthly_foo_schedule(monthly_time):
            return {"monthly_time": monthly_time.isoformat()}

        valid_monthly_time = create_pendulum_time(year=2019,
                                                  month=3,
                                                  day=3,
                                                  hour=9,
                                                  minute=30,
                                                  tz="UTC")
        context_with_valid_time = build_schedule_context(
            scheduled_execution_time=valid_monthly_time)

        execution_data = monthly_foo_schedule.evaluate_tick(
            context_with_valid_time)
        assert execution_data.run_requests
        assert len(execution_data.run_requests) == 1
        assert execution_data.run_requests[0].run_config == {
            "monthly_time":
            create_pendulum_time(
                year=2019, month=3, day=1,
                tz="UTC").subtract(months=partition_months_offset).isoformat()
        }

        execution_data = monthly_foo_schedule.evaluate_tick(
            context_without_time)
        assert execution_data.run_requests
        assert len(execution_data.run_requests) == 1
        assert execution_data.run_requests[0].run_config == {
            "monthly_time":
            create_pendulum_time(
                year=2019, month=3, day=1,
                tz="UTC").subtract(months=partition_months_offset).isoformat()
        }

        _check_partitions(
            monthly_foo_schedule,
            3 - partition_months_offset,
            pendulum.instance(start_date, tz="UTC"),
            DEFAULT_MONTHLY_FORMAT,
            relativedelta(months=1),
        )
Exemplo n.º 4
0
def test_instance_access():
    with pytest.raises(
        DagsterInvariantViolationError,
        match="Attempted to initialize dagster instance, but no instance reference was provided.",
    ):
        build_schedule_context().instance  # pylint: disable=expression-not-assigned

    with instance_for_test() as instance:
        assert isinstance(build_schedule_context(instance).instance, DagsterInstance)
Exemplo n.º 5
0
def test_cron_schedule_invocation_all_args():
    basic_schedule_context = cron_test_schedule_factory_context()

    assert basic_schedule_context(None) == {}
    assert basic_schedule_context(build_schedule_context()) == {}
    assert basic_schedule_context(_=None) == {}
    assert basic_schedule_context(_=build_schedule_context()) == {}

    basic_schedule_no_context = cron_test_schedule_factory_no_context()

    assert basic_schedule_no_context() == {}
Exemplo n.º 6
0
def test_scheduler():
    def define_schedules():
        return [
            ScheduleDefinition(
                name="my_schedule",
                cron_schedule="* * * * *",
                pipeline_name="test_pipeline",
                run_config={},
            )
        ]

    @schedule(cron_schedule="* * * * *", pipeline_name="foo_pipeline")
    def echo_time_schedule(context):
        return {
            "echo_time": (
                (
                    context.scheduled_execution_time.isoformat()
                    if context.scheduled_execution_time
                    else ""
                )
            )
        }

    @schedule(
        cron_schedule="* * * * *", pipeline_name="foo_pipeline", should_execute=lambda x: False
    )
    def always_skip_schedule(context):
        return {}

    with instance_for_test() as instance:
        context_without_time = build_schedule_context(instance)

        execution_time = datetime(year=2019, month=2, day=27)

        context_with_time = build_schedule_context(instance, execution_time)

        execution_data = echo_time_schedule.get_execution_data(context_without_time)
        assert len(execution_data) == 1
        assert isinstance(execution_data[0], RunRequest)
        assert execution_data[0].run_config == {"echo_time": ""}

        execution_data = echo_time_schedule.get_execution_data(context_with_time)
        assert len(execution_data) == 1
        assert isinstance(execution_data[0], RunRequest)
        assert execution_data[0].run_config == {"echo_time": execution_time.isoformat()}

        execution_data = always_skip_schedule.get_execution_data(context_with_time)
        assert len(execution_data) == 1
        assert isinstance(execution_data[0], SkipReason)
        assert (
            execution_data[0].skip_message
            == "should_execute function for always_skip_schedule returned false."
        )
Exemplo n.º 7
0
def test_scheduler():
    def define_schedules():
        return [
            ScheduleDefinition(
                name="my_schedule",
                cron_schedule="* * * * *",
                pipeline_name="test_pipeline",
                run_config={},
            )
        ]

    @schedule(cron_schedule="* * * * *", pipeline_name="foo_pipeline")
    def echo_time_schedule(context):
        return {
            "echo_time": (
                (
                    context.scheduled_execution_time.isoformat()
                    if context.scheduled_execution_time
                    else ""
                )
            )
        }

    @schedule(
        cron_schedule="* * * * *", pipeline_name="foo_pipeline", should_execute=lambda x: False
    )
    def always_skip_schedule():
        return {}

    context_without_time = build_schedule_context()

    execution_time = datetime(year=2019, month=2, day=27)

    context_with_time = build_schedule_context(scheduled_execution_time=execution_time)

    execution_data = echo_time_schedule.evaluate_tick(context_without_time)
    assert execution_data.run_requests
    assert len(execution_data.run_requests) == 1
    assert execution_data.run_requests[0].run_config == {"echo_time": ""}

    execution_data = echo_time_schedule.evaluate_tick(context_with_time)
    assert execution_data.run_requests
    assert len(execution_data.run_requests) == 1
    assert execution_data.run_requests[0].run_config == {"echo_time": execution_time.isoformat()}

    execution_data = always_skip_schedule.evaluate_tick(context_with_time)
    assert execution_data.skip_message
    assert (
        execution_data.skip_message
        == "should_execute function for always_skip_schedule returned false."
    )
Exemplo n.º 8
0
def test_partitions_for_hourly_schedule_decorators_without_timezone():
    with instance_for_test() as instance:
        with pendulum.test(
            to_timezone(create_pendulum_time(2019, 2, 27, 0, 1, 1, tz="UTC"), "US/Eastern")
        ):

            context_without_time = build_schedule_context(instance)

            start_date = datetime(year=2019, month=1, day=1)

            @hourly_schedule(
                pipeline_name="foo_pipeline",
                start_date=start_date,
                execution_time=time(hour=0, minute=25),
            )
            def hourly_foo_schedule(hourly_time):
                return {"hourly_time": hourly_time.isoformat()}

            _check_partitions(
                hourly_foo_schedule,
                HOURS_UNTIL_FEBRUARY_27,
                pendulum.instance(start_date, tz="UTC"),
                DEFAULT_HOURLY_FORMAT_WITHOUT_TIMEZONE,
                relativedelta(hours=1),
            )

            execution_data = hourly_foo_schedule.get_execution_data(context_without_time)
            assert len(execution_data) == 1
            assert isinstance(execution_data[0], RunRequest)
            assert execution_data[0].run_config == {
                "hourly_time": create_pendulum_time(
                    year=2019, month=2, day=26, hour=23, tz="UTC"
                ).isoformat()
            }

            valid_time = create_pendulum_time(
                year=2019, month=1, day=27, hour=1, minute=25, tz="UTC"
            )
            context_with_valid_time = build_schedule_context(instance, valid_time)

            execution_data = hourly_foo_schedule.get_execution_data(context_with_valid_time)
            assert len(execution_data) == 1
            assert isinstance(execution_data[0], RunRequest)
            assert execution_data[0].run_config == {
                "hourly_time": create_pendulum_time(
                    year=2019, month=1, day=27, hour=0, tz="UTC"
                ).isoformat()
            }
Exemplo n.º 9
0
def test_daily_schedule_with_offsets():
    @daily_partitioned_config(start_date="2021-05-05",
                              minute_offset=15,
                              hour_offset=2)
    def my_partitioned_config(start, end):
        return {"start": str(start), "end": str(end)}

    keys = my_partitioned_config.get_partition_keys()
    assert keys[0] == "2021-05-05"
    assert keys[1] == "2021-05-06"

    partitions = my_partitioned_config.partitions_def.get_partitions()
    assert partitions[0].value == time_window("2021-05-05T02:15:00",
                                              "2021-05-06T02:15:00")

    assert my_partitioned_config.get_run_config_for_partition_key(keys[0]) == {
        "start": "2021-05-05T02:15:00+00:00",
        "end": "2021-05-06T02:15:00+00:00",
    }

    my_schedule = schedule_for_partitioned_config(my_partitioned_config,
                                                  hour_of_day=9,
                                                  minute_of_hour=30)
    assert my_schedule.cron_schedule == "30 9 * * *"

    assert my_schedule.evaluate_tick(
        build_schedule_context(scheduled_execution_time=datetime(
            2021, 5, 8, 9, 30))).run_requests[0].run_config == {
                "start": "2021-05-07T02:15:00+00:00",
                "end": "2021-05-08T02:15:00+00:00",
            }

    @repository
    def _repo():
        return [my_schedule]
Exemplo n.º 10
0
def test_monthly_schedule():
    @monthly_partitioned_config(start_date="2021-05-05")
    def my_partitioned_config(start, end):
        return {"start": str(start), "end": str(end)}

    keys = my_partitioned_config.get_partition_keys()
    assert keys[0] == "2021-06-01"
    assert keys[1] == "2021-07-01"

    partitions = my_partitioned_config.partitions_def.get_partitions()
    assert partitions[0].value == time_window("2021-06-01", "2021-07-01")

    assert my_partitioned_config.get_run_config_for_partition_key(keys[0]) == {
        "start": "2021-06-01T00:00:00+00:00",
        "end": "2021-07-01T00:00:00+00:00",
    }

    my_schedule = schedule_for_partitioned_config(my_partitioned_config,
                                                  hour_of_day=9,
                                                  minute_of_hour=30,
                                                  day_of_month=2)
    assert my_schedule.cron_schedule == "30 9 2 * *"

    assert my_schedule.evaluate_tick(
        build_schedule_context(scheduled_execution_time=datetime.strptime(
            "2021-07-21", DATE_FORMAT))).run_requests[0].run_config == {
                "start": "2021-06-01T00:00:00+00:00",
                "end": "2021-07-01T00:00:00+00:00",
            }

    @repository
    def _repo():
        return [my_schedule]
Exemplo n.º 11
0
def test_hourly_schedule_with_offsets():
    @hourly_partitioned_config(start_date=datetime(2021, 5, 5),
                               minute_offset=20)
    def my_partitioned_config(start, end):
        return {"start": str(start), "end": str(end)}

    keys = my_partitioned_config.get_partition_keys()
    assert keys[0] == "2021-05-05-00:20"
    assert keys[1] == "2021-05-05-01:20"

    partitions = my_partitioned_config.partitions_def.get_partitions()
    assert partitions[0].value == time_window("2021-05-05T00:20:00",
                                              "2021-05-05T01:20:00")

    assert my_partitioned_config.get_run_config_for_partition_key(keys[0]) == {
        "start": "2021-05-05T00:20:00+00:00",
        "end": "2021-05-05T01:20:00+00:00",
    }

    my_schedule = schedule_for_partitioned_config(my_partitioned_config,
                                                  minute_of_hour=30)
    assert my_schedule.cron_schedule == "30 * * * *"

    assert my_schedule.evaluate_tick(
        build_schedule_context(scheduled_execution_time=datetime.strptime(
            "2021-05-08", DATE_FORMAT))).run_requests[0].run_config == {
                "start": "2021-05-07T22:20:00+00:00",
                "end": "2021-05-07T23:20:00+00:00",
            }

    @repository
    def _repo():
        return [my_schedule]
Exemplo n.º 12
0
def test_hourly_schedule():
    @hourly_partitioned_config(start_date=datetime(2021, 5, 5))
    def my_partitioned_config(start, end):
        return {"start": str(start), "end": str(end)}

    assert my_partitioned_config(datetime(2021, 5, 7, 23),
                                 datetime(2021, 5, 8)) == {
                                     "start": "2021-05-07 23:00:00",
                                     "end": "2021-05-08 00:00:00",
                                 }

    my_schedule = schedule_for_partitioned_config(my_partitioned_config,
                                                  minute_of_hour=30)
    assert my_schedule.cron_schedule == "30 * * * *"

    assert my_schedule.evaluate_tick(
        build_schedule_context(scheduled_execution_time=datetime.strptime(
            "2021-05-08", DATE_FORMAT))).run_requests[0].run_config == {
                "start": "2021-05-07T23:00:00+00:00",
                "end": "2021-05-08T00:00:00+00:00",
            }

    @repository
    def _repo():
        return [my_schedule]
Exemplo n.º 13
0
def test_weekly_schedule():
    @weekly_partitioned_config(start_date="2021-05-05")
    def my_partitioned_config(start, end):
        return {"start": str(start), "end": str(end)}

    assert my_partitioned_config(datetime(2021, 12, 13),
                                 datetime(2021, 12, 19)) == {
                                     "start": "2021-12-13 00:00:00",
                                     "end": "2021-12-19 00:00:00",
                                 }

    my_schedule = schedule_for_partitioned_config(my_partitioned_config,
                                                  hour_of_day=9,
                                                  minute_of_hour=30,
                                                  day_of_week=2)
    assert my_schedule.cron_schedule == "30 9 * * 2"

    assert my_schedule.evaluate_tick(
        build_schedule_context(scheduled_execution_time=datetime.strptime(
            "2021-05-21", DATE_FORMAT))).run_requests[0].run_config == {
                "start": "2021-05-09T00:00:00+00:00",
                "end": "2021-05-16T00:00:00+00:00",
            }

    @repository
    def _repo():
        return [my_schedule]
Exemplo n.º 14
0
def test_monthly_schedule():
    @monthly_partitioned_config(start_date="2021-05-05")
    def my_partitioned_config(start, end):
        return {"start": str(start), "end": str(end)}

    assert my_partitioned_config(datetime(2021, 11, 1),
                                 datetime(2021, 11, 30)) == {
                                     "start": "2021-11-01 00:00:00",
                                     "end": "2021-11-30 00:00:00",
                                 }

    my_schedule = schedule_for_partitioned_config(my_partitioned_config,
                                                  hour_of_day=9,
                                                  minute_of_hour=30,
                                                  day_of_month=2)
    assert my_schedule.cron_schedule == "30 9 2 * *"

    assert my_schedule.evaluate_tick(
        build_schedule_context(scheduled_execution_time=datetime.strptime(
            "2021-07-21", DATE_FORMAT))).run_requests[0].run_config == {
                "start": "2021-06-01T00:00:00+00:00",
                "end": "2021-07-01T00:00:00+00:00",
            }

    @repository
    def _repo():
        return [my_schedule]
Exemplo n.º 15
0
def test_partitions_outside_schedule_range():
    execution_time = create_pendulum_time(year=2021, month=1, day=1, tz="UTC")
    context = build_schedule_context(scheduled_execution_time=execution_time)

    @monthly_schedule(
        pipeline_name="too early",
        start_date=create_pendulum_time(year=2021, month=1, day=1, tz="UTC"),
    )
    def too_early(monthly_time):
        return {"monthly_time": monthly_time.isoformat()}

    execution_data = too_early.evaluate_tick(context)
    assert execution_data.skip_message == (
        "Your partition (2020-12-01T00:00:00+00:00) is before the beginning of "
        "the partition set (2021-01-01T00:00:00+00:00). "
        "Verify your schedule's start_date is correct.")

    @monthly_schedule(
        pipeline_name="too late",
        start_date=create_pendulum_time(year=2020, month=1, day=1, tz="UTC"),
        end_date=create_pendulum_time(year=2020, month=12, day=1, tz="UTC"),
        partition_months_offset=0,
    )
    def too_late(monthly_time):
        return {"monthly_time": monthly_time.isoformat()}

    execution_data = too_late.evaluate_tick(context)
    assert execution_data.skip_message == (
        "Your partition (2021-01-01T00:00:00+00:00) is after the end of "
        "the partition set (2020-12-01T00:00:00+00:00). "
        "Verify your schedule's end_date is correct.")
Exemplo n.º 16
0
def test_my_execution_time_schedule():
    @solid(config_schema={"dataset_name": str, "execution_date": str})
    def process_data(_):
        pass

    @pipeline
    def pipeline_for_test():
        process_data()

    run_config = my_execution_time_schedule(
        build_schedule_context(scheduled_execution_time=datetime(2021, 1, 1)))
    assert validate_run_config(pipeline_for_test, run_config)
Exemplo n.º 17
0
def test_job_schedules():
    for module, attr_name in job_schedules:
        schedule = getattr(module, attr_name)
        try:
            assert schedule.has_loadable_target()
            job = schedule.load_target()
            context = build_schedule_context()
            run_config = schedule.evaluate_tick(
                context).run_requests[0].run_config
            assert job.execute_in_process(run_config=run_config).success
        except Exception as ex:
            raise Exception(
                f"Error while executing schedule '{schedule.name}' from module '{module.__name__}'"
            ) from ex
Exemplo n.º 18
0
def test_pipeline_schedules():
    for module, (schedule_name, pipeline_name) in pipeline_schedules:
        schedule = getattr(module, schedule_name)
        the_pipeline = getattr(module, pipeline_name)
        try:
            context = build_schedule_context()
            run_config = schedule.evaluate_tick(
                context).run_requests[0].run_config
            assert execute_pipeline(the_pipeline,
                                    run_config=run_config).success
        except Exception as ex:
            raise Exception(
                f"Error while executing schedule '{schedule.name}' from module '{module.__name__}'"
            ) from ex
Exemplo n.º 19
0
def test_hourly_schedule_from_partitions():
    @hourly_partitioned_config(start_date="2021-05-05")
    def my_partitioned_config(start, end):
        return {"start": str(start), "end": str(end)}

    my_schedule = job_for_partitions(my_partitioned_config, minute_of_hour=30)
    assert my_schedule.cron_schedule == "30 * * * *"

    assert my_schedule.evaluate_tick(
        build_schedule_context(scheduled_execution_time=datetime.strptime(
            "2021-05-08", DATE_FORMAT))).run_requests[0].run_config == {
                "start": "2021-05-07T23:00:00+00:00",
                "end": "2021-05-08T00:00:00+00:00",
            }
Exemplo n.º 20
0
def test_partitions_for_weekly_schedule_decorators_with_timezone(
        partition_weeks_offset: int):
    with pendulum.test(
            create_pendulum_time(2019, 2, 27, 0, 1, 1, tz="US/Central")):

        start_date = datetime(year=2019, month=1, day=1)

        @weekly_schedule(
            pipeline_name="foo_pipeline",
            execution_day_of_week=3,
            start_date=start_date,
            execution_time=time(9, 30),
            execution_timezone="US/Central",
            partition_weeks_offset=partition_weeks_offset,
        )
        def weekly_foo_schedule(weekly_time):
            return {"weekly_time": weekly_time.isoformat()}

        assert weekly_foo_schedule.execution_timezone == "US/Central"

        valid_weekly_time = create_pendulum_time(year=2019,
                                                 month=1,
                                                 day=30,
                                                 hour=9,
                                                 minute=30,
                                                 tz="US/Central")
        context_with_valid_time = build_schedule_context(
            scheduled_execution_time=valid_weekly_time)

        execution_data = weekly_foo_schedule.evaluate_tick(
            context_with_valid_time)
        assert execution_data.run_requests
        assert len(execution_data.run_requests) == 1
        assert execution_data.run_requests[0].run_config == {
            "weekly_time":
            create_pendulum_time(year=2019, month=1, day=29,
                                 tz="US/Central").subtract(
                                     weeks=partition_weeks_offset).isoformat()
        }

        _check_partitions(
            weekly_foo_schedule,
            9 - partition_weeks_offset,
            pendulum.instance(start_date, tz="US/Central"),
            DEFAULT_DATE_FORMAT,
            relativedelta(weeks=1),
        )
Exemplo n.º 21
0
def test_weekly_schedule_from_partitions():
    @weekly_partitioned_config(start_date="2021-05-05")
    def my_partitioned_config(start, end):
        return {"start": str(start), "end": str(end)}

    my_schedule = job_for_partitions(my_partitioned_config,
                                     hour_of_day=9,
                                     minute_of_hour=30,
                                     day_of_week=2)
    assert my_schedule.cron_schedule == "30 9 * * 2"

    assert my_schedule.evaluate_tick(
        build_schedule_context(scheduled_execution_time=datetime.strptime(
            "2021-05-21", DATE_FORMAT))).run_requests[0].run_config == {
                "start": "2021-05-09T00:00:00+00:00",
                "end": "2021-05-16T00:00:00+00:00",
            }
Exemplo n.º 22
0
def test_vixie_cronstring_schedule():
    context_without_time = build_schedule_context()
    start_date = datetime(year=2019, month=1, day=1)

    @op
    def foo_op(context):
        pass

    @job
    def foo_job():
        foo_op()

    @schedule(cron_schedule="@daily", job=foo_job)
    def foo_schedule():
        yield RunRequest(run_key=None, run_config={}, tags={"foo": "FOO"})

    # evaluate tick
    execution_data = foo_schedule.evaluate_tick(context_without_time)
    assert execution_data.run_requests
    assert len(execution_data.run_requests) == 1
    assert execution_data.run_requests[0].tags.get("foo") == "FOO"
Exemplo n.º 23
0
def test_request_based_schedule_generator():
    from dagster import Field, String

    context_without_time = build_schedule_context()

    start_date = datetime(year=2019, month=1, day=1)

    @op(config_schema={"foo": Field(String)})
    def foo_op(context):
        pass

    @job
    def foo_job():
        foo_op()

    FOO_CONFIG = {"ops": {"foo_op": {"config": {"foo": "bar"}}}}

    @schedule(
        cron_schedule="* * * * *",
        job=foo_job,
    )
    def foo_schedule(_context):
        yield RunRequest(run_key=None,
                         run_config=FOO_CONFIG,
                         tags={"foo": "FOO"})

    # evaluate tick
    execution_data = foo_schedule.evaluate_tick(context_without_time)
    assert execution_data.run_requests
    assert len(execution_data.run_requests) == 1
    assert execution_data.run_requests[0].run_config == FOO_CONFIG
    assert execution_data.run_requests[0].tags.get("foo") == "FOO"

    # test direct invocation
    request_generator = foo_schedule(context_without_time)
    assert inspect.isgenerator(request_generator)
    requests = list(request_generator)
    assert len(requests) == 1
    assert requests[0].run_config == FOO_CONFIG
    assert requests[0].tags.get("foo") == "FOO"
Exemplo n.º 24
0
def test_partitions_for_weekly_schedule_decorators_with_timezone():
    with instance_for_test() as instance:
        with pendulum.test(create_pendulum_time(2019, 2, 27, 0, 1, 1, tz="US/Central")):

            start_date = datetime(year=2019, month=1, day=1)

            @weekly_schedule(
                pipeline_name="foo_pipeline",
                execution_day_of_week=3,
                start_date=start_date,
                execution_time=time(9, 30),
                execution_timezone="US/Central",
            )
            def weekly_foo_schedule(weekly_time):
                return {"weekly_time": weekly_time.isoformat()}

            assert weekly_foo_schedule.execution_timezone == "US/Central"

            valid_weekly_time = create_pendulum_time(
                year=2019, month=1, day=30, hour=9, minute=30, tz="US/Central"
            )
            context_with_valid_time = build_schedule_context(instance, valid_weekly_time)

            execution_data = weekly_foo_schedule.get_execution_data(context_with_valid_time)
            assert len(execution_data) == 1
            assert isinstance(execution_data[0], RunRequest)
            assert execution_data[0].run_config == {
                "weekly_time": create_pendulum_time(
                    year=2019, month=1, day=22, tz="US/Central"
                ).isoformat()
            }

            _check_partitions(
                weekly_foo_schedule,
                8,
                pendulum.instance(start_date, tz="US/Central"),
                DEFAULT_DATE_FORMAT,
                relativedelta(weeks=1),
            )
Exemplo n.º 25
0
def test_partitions_outside_schedule_range():
    with instance_for_test() as instance:
        execution_time = create_pendulum_time(year=2021, month=1, day=1, tz="UTC")
        context = build_schedule_context(instance, execution_time)

        @monthly_schedule(
            pipeline_name="too early",
            start_date=create_pendulum_time(year=2021, month=1, day=1, tz="UTC"),
        )
        def too_early(monthly_time):
            return {"monthly_time": monthly_time.isoformat()}

        execution_data = too_early.get_execution_data(context)
        assert len(execution_data) == 1
        skip_data = execution_data[0]
        assert isinstance(skip_data, SkipReason)
        assert skip_data.skip_message == (
            "Your partition (2020-12-01T00:00:00+00:00) is before the beginning of "
            "the partition set (2021-01-01T00:00:00+00:00). "
            "Verify your schedule's start_date is correct."
        )

        @monthly_schedule(
            pipeline_name="too late",
            start_date=create_pendulum_time(year=2020, month=1, day=1, tz="UTC"),
            end_date=create_pendulum_time(year=2020, month=12, day=1, tz="UTC"),
        )
        def too_late(monthly_time):
            return {"monthly_time": monthly_time.isoformat()}

        execution_data = too_late.get_execution_data(context)
        assert len(execution_data) == 1
        skip_data = execution_data[0]
        assert isinstance(skip_data, SkipReason)
        assert skip_data.skip_message == (
            "Your partition (2020-12-01T00:00:00+00:00) is after the end of "
            "the partition set (2020-11-01T00:00:00+00:00). "
            "Verify your schedule's end_date is correct."
        )
Exemplo n.º 26
0
def test_weekly_schedule_with_offsets():
    @weekly_partitioned_config(start_date="2021-05-05",
                               minute_offset=10,
                               hour_offset=13,
                               day_offset=3)
    def my_partitioned_config(start, end):
        return {"start": str(start), "end": str(end)}

    keys = my_partitioned_config.get_partition_keys()
    assert keys[0] == "2021-05-05"
    assert keys[1] == "2021-05-12"

    partitions = my_partitioned_config.partitions_def.get_partitions()
    assert partitions[0].value == time_window("2021-05-05T13:10:00",
                                              "2021-05-12T13:10:00")

    assert my_partitioned_config.get_run_config_for_partition_key(keys[0]) == {
        "start": "2021-05-05T13:10:00+00:00",
        "end": "2021-05-12T13:10:00+00:00",
    }

    my_schedule = schedule_for_partitioned_config(my_partitioned_config,
                                                  hour_of_day=9,
                                                  minute_of_hour=30,
                                                  day_of_week=2)
    assert my_schedule.cron_schedule == "30 9 * * 2"

    assert my_schedule.evaluate_tick(
        build_schedule_context(scheduled_execution_time=datetime.strptime(
            "2021-05-21", DATE_FORMAT))).run_requests[0].run_config == {
                "start": "2021-05-12T13:10:00+00:00",
                "end": "2021-05-19T13:10:00+00:00",
            }

    @repository
    def _repo():
        return [my_schedule]
Exemplo n.º 27
0
def test_config_based_schedule_no_context():
    from dagster import Field, String

    context_without_time = build_schedule_context()

    start_date = datetime(year=2019, month=1, day=1)

    @op(config_schema={"foo": Field(String)})
    def foo_op(context):
        pass

    @job
    def foo_job():
        foo_op()

    FOO_CONFIG = {"ops": {"foo_op": {"config": {"foo": "bar"}}}}

    @schedule(
        cron_schedule="* * * * *",
        job=foo_job,
        tags={"foo": "FOO"},
    )
    def foo_schedule():
        return FOO_CONFIG

    # evaluate tick
    execution_data = foo_schedule.evaluate_tick(context_without_time)
    assert execution_data.run_requests
    assert len(execution_data.run_requests) == 1
    run_request = execution_data.run_requests[0]
    assert run_request.run_config == FOO_CONFIG
    assert run_request.tags.get("foo") == "FOO"

    # direct invocation
    run_config = foo_schedule()
    assert run_config == FOO_CONFIG
Exemplo n.º 28
0
def test_partitions_for_hourly_schedule_decorators_with_timezone(
        partition_hours_offset: int):
    with pendulum.test(
            create_pendulum_time(2019, 2, 27, 0, 1, 1, tz="US/Central")):
        start_date = datetime(year=2019, month=1, day=1)

        # You can specify a start date with no timezone and it will be assumed to be
        # in the execution timezone

        @hourly_schedule(
            pipeline_name="foo_pipeline",
            start_date=start_date,
            execution_time=time(hour=0, minute=25),
            execution_timezone="US/Central",
            partition_hours_offset=partition_hours_offset,
        )
        def hourly_central_schedule(hourly_time):
            return {"hourly_time": hourly_time.isoformat()}

        assert hourly_central_schedule.execution_timezone == "US/Central"

        _check_partitions(
            hourly_central_schedule,
            HOURS_UNTIL_FEBRUARY_27 + 1 - partition_hours_offset,
            pendulum.instance(start_date, tz="US/Central"),
            DEFAULT_HOURLY_FORMAT_WITH_TIMEZONE,
            relativedelta(hours=1),
        )

        valid_time = create_pendulum_time(year=2019,
                                          month=1,
                                          day=27,
                                          hour=1,
                                          minute=25,
                                          tz="US/Central")
        context_with_valid_time = build_schedule_context(
            scheduled_execution_time=valid_time)

        execution_data = hourly_central_schedule.evaluate_tick(
            context_with_valid_time)
        assert execution_data.run_requests
        assert len(execution_data.run_requests) == 1
        assert execution_data.run_requests[0].run_config == {
            "hourly_time":
            create_pendulum_time(year=2019,
                                 month=1,
                                 day=27,
                                 hour=1,
                                 tz="US/Central").subtract(
                                     hours=partition_hours_offset).isoformat()
        }

        # You can specify a start date in a different timezone and it will be transformed into the
        # execution timezone
        start_date_with_different_timezone = create_pendulum_time(
            2019, 1, 1, 0, tz="US/Pacific")

        @hourly_schedule(
            pipeline_name="foo_pipeline",
            start_date=start_date_with_different_timezone,
            execution_time=time(hour=0, minute=25),
            execution_timezone="US/Central",
            partition_hours_offset=partition_hours_offset,
        )
        def hourly_central_schedule_with_timezone_start_time(hourly_time):
            return {"hourly_time": hourly_time.isoformat()}

        _check_partitions(
            hourly_central_schedule_with_timezone_start_time,
            HOURS_UNTIL_FEBRUARY_27 -
            2  # start date is two hours later since it's in PT
            + 1 - partition_hours_offset,
            to_timezone(start_date_with_different_timezone, "US/Central"),
            DEFAULT_HOURLY_FORMAT_WITH_TIMEZONE,
            relativedelta(hours=1),
        )
Exemplo n.º 29
0
def test_my_cron_schedule_with_context():
    context = build_schedule_context(
        scheduled_execution_time=datetime.datetime(2020, 1, 1))
    run_config = my_schedule_uses_context(context)
    assert validate_run_config(pipeline_for_test, run_config)
Exemplo n.º 30
0
def test_configurable_job_schedule():
    context = build_schedule_context(
        scheduled_execution_time=datetime.datetime(2020, 1, 1))
    run_request = configurable_job_schedule(context)
    assert validate_run_config(configurable_job, run_request.run_config)