def test_scheduled_jobs(): from dagster import Field, String @op(config_schema={"foo": Field(String)}) def foo_op(context): pass DEFAULT_FOO_CONFIG = {"ops": {"foo_op": {"config": {"foo": "bar"}}}} @job(config=DEFAULT_FOO_CONFIG) def foo_job(): foo_op() my_schedule = ScheduleDefinition(name="my_schedule", cron_schedule="* * * * *", job=foo_job) context_without_time = build_schedule_context() execution_time = datetime(year=2019, month=2, day=27) context_with_time = build_schedule_context( scheduled_execution_time=execution_time) execution_data = my_schedule.evaluate_tick(context_without_time) assert execution_data.run_requests assert len(execution_data.run_requests) == 1 validate_run_config(foo_job, execution_data.run_requests[0].run_config)
def test_partition_based_schedule_example(): @solid(config_schema={"date": str}) def process_data_for_date(context): return context.solid_config["date"] @pipeline def pipeline_for_test(): process_data_for_date() run_config = partition_schedule_example(datetime(2021, 1, 1)) assert validate_run_config(pipeline_for_test, run_config) run_config = my_timezone_schedule(datetime(2021, 1, 1)) assert validate_run_config(pipeline_for_test, run_config)
def test_my_partitioned_config(): # assert that the decorated function returns the expected output run_config = my_partitioned_config(datetime(2020, 1, 3), datetime(2020, 1, 4)) assert run_config == {"ops": {"process_data_for_date": {"config": {"date": "2020-01-03"}}}} # assert that the output of the decorated function is valid configuration for the # do_stuff_partitioned job assert validate_run_config(do_stuff_partitioned, run_config)
def test_non_partition_schedule_example(): @solid(config_schema={"dataset_name": str}) def process_data(_): pass @pipeline def pipeline_for_test(): process_data() run_config = non_partition_schedule_example(None) assert validate_run_config(pipeline_for_test, run_config)
def test_schedule_examples(schedule_to_test): @solid(config_schema={"date": str}) def process_data_for_date(_): pass @pipeline(mode_defs=[ModeDefinition("basic")]) def pipeline_for_test(): process_data_for_date() run_config = schedule_to_test(datetime(2021, 1, 1)) assert validate_run_config(pipeline_for_test, run_config)
def test_my_execution_time_schedule(): @solid(config_schema={"dataset_name": str, "execution_date": str}) def process_data(_): pass @pipeline def pipeline_for_test(): process_data() run_config = my_execution_time_schedule( build_schedule_context(scheduled_execution_time=datetime(2021, 1, 1))) assert validate_run_config(pipeline_for_test, run_config)
def test_my_offset_partitioned_config(): # test that the partition keys are what you expect keys = my_offset_partitioned_config.get_partition_keys() assert keys[0] == "2020-01-01" assert keys[1] == "2020-01-02" # test that the run_config for a partition is valid for do_stuff_partitioned run_config = my_offset_partitioned_config.get_run_config_for_partition_key( keys[0]) assert validate_run_config(do_more_stuff_partitioned, run_config) # test that the contents of run_config are what you expect assert run_config == { "ops": { "process_data": { "config": { "start": "2020-01-01-00:15", "end": "2020-01-02-00:15" } } } }
def test_multiple_runs_for_successful_runs(): def get_should_launch_run(): return PipelineRun( run_id=str(uuid.uuid4()), status=PipelineRunStatus.SUCCESS, mode="prod", pipeline_name="download_pipeline", run_config={"resources": DEFAULT_PARTITION_RESOURCE_CONFIG}, ) with tempfile.TemporaryDirectory() as temp_dir: instance = DagsterInstance.local_temp(temp_dir) for run in [ get_should_launch_run(), PipelineRun(status=PipelineRunStatus.FAILURE, mode="prod", pipeline_name="download_pipeline"), PipelineRun(status=PipelineRunStatus.SUCCESS, mode="dev", pipeline_name="weird_pipeline"), PipelineRun(status=PipelineRunStatus.SUCCESS, mode="test", pipeline_name="download_pipeline"), PipelineRun(status=PipelineRunStatus.SUCCESS, mode="prod", pipeline_name="other"), get_should_launch_run(), get_should_launch_run(), get_should_launch_run(), ]: instance.add_run(run) run_requests = list( dbt_on_hn_download_finished( build_sensor_context(instance=instance))) assert len(run_requests) == 4 for run_request in run_requests: assert validate_run_config(dbt_pipeline, run_request.run_config)
def test_validate_run_config(): @solid def basic(): pass @pipeline def basic_pipeline(): basic() validate_run_config(basic_pipeline) @solid(config_schema={"foo": str}) def requires_config(_): pass @pipeline def pipeline_requires_config(): requires_config() result = validate_run_config( pipeline_requires_config, {"solids": {"requires_config": {"config": {"foo": "bar"}}}} ) assert result == { "solids": {"requires_config": {"config": {"foo": "bar"}, "inputs": {}, "outputs": None}}, "execution": {"in_process": {"retries": {"enabled": {}}}}, "resources": {"io_manager": {"config": None}}, "loggers": {}, } result_with_storage = validate_run_config( pipeline_requires_config, {"solids": {"requires_config": {"config": {"foo": "bar"}}}}, ) assert result_with_storage == { "solids": {"requires_config": {"config": {"foo": "bar"}, "inputs": {}, "outputs": None}}, "execution": {"in_process": {"retries": {"enabled": {}}}}, "resources": {"io_manager": {"config": None}}, "loggers": {}, } with pytest.raises(DagsterInvalidConfigError): validate_run_config(pipeline_requires_config)
def test_my_cron_schedule_with_context(): context = build_schedule_context( scheduled_execution_time=datetime.datetime(2020, 1, 1)) run_config = my_schedule_uses_context(context) assert validate_run_config(pipeline_for_test, run_config)
def test_sensor(): for run_request in trigger_apis(None): assert dg.validate_run_config(dg.log_file_pipeline, run_request.run_config)
def test_sensor(): for run_request in sensor_to_test(): assert validate_run_config(log_file_pipeline, run_request.run_config)
def test_my_directory_sensor_cursor(): context = build_sensor_context(cursor="0") for run_request in my_directory_sensor_cursor(context): assert validate_run_config(log_file_pipeline, run_request.run_config)
def test_partition_schedule(): schedule_data = my_schedule.evaluate_tick(build_schedule_context()) for run_request in schedule_data.run_requests: validate_run_config(my_data_pipeline, run_request.run_config)
def test_hourly_schedule(): run_config = hourly_schedule_to_test(datetime.datetime(2020, 1, 1)) assert validate_run_config(pipeline_for_test, run_config)
def test_my_cron_schedule(): run_config = my_cron_schedule() assert validate_run_config(my_pipeline_on_cron, run_config)
def test_configurable_job_schedule(): context = build_schedule_context( scheduled_execution_time=datetime.datetime(2020, 1, 1)) run_request = configurable_job_schedule(context) assert validate_run_config(configurable_job, run_request.run_config)