def stock_data_partitions(): return [ Partition(datetime.datetime(2019, 1, 1)), Partition(datetime.datetime(2019, 2, 1)), Partition(datetime.datetime(2019, 3, 1)), Partition(datetime.datetime(2019, 4, 1)), ]
def define_partitions(): integer_set = PartitionSetDefinition( name="integer_partition", pipeline_name="no_config_pipeline", solid_selection=["return_hello"], mode="default", partition_fn=lambda: [Partition(i) for i in range(10)], run_config_fn_for_partition=lambda _: {"storage": { "filesystem": {} }}, tags_fn_for_partition=lambda partition: {"foo": partition.name}, ) enum_set = PartitionSetDefinition( name="enum_partition", pipeline_name="noop_pipeline", partition_fn=lambda: ["one", "two", "three"], run_config_fn_for_partition=lambda _: {"storage": { "filesystem": {} }}, ) chained_partition_set = PartitionSetDefinition( name="chained_integer_partition", pipeline_name="chained_failure_pipeline", mode="default", partition_fn=lambda: [Partition(i) for i in range(10)], run_config_fn_for_partition=lambda _: {"storage": { "filesystem": {} }}, ) return [integer_set, enum_set, chained_partition_set]
def get_stock_ticker_partitions(): return [ Partition("AAPL"), Partition("GOOG"), Partition("MSFT"), Partition("TSLA"), ]
def define_partitions(): integer_set = PartitionSetDefinition( name="integer_partition", pipeline_name="no_config_pipeline", solid_selection=["return_hello"], mode="default", partition_fn=lambda: [Partition(i) for i in range(10)], tags_fn_for_partition=lambda partition: {"foo": partition.name}, ) enum_set = PartitionSetDefinition( name="enum_partition", pipeline_name="noop_pipeline", partition_fn=lambda: ["one", "two", "three"], ) chained_partition_set = PartitionSetDefinition( name="chained_integer_partition", pipeline_name="chained_failure_pipeline", mode="default", partition_fn=lambda: [Partition(i) for i in range(10)], ) alphabet_partition_set = PartitionSetDefinition( name="alpha_partition", pipeline_name="no_config_pipeline", partition_fn=lambda: list(string.ascii_lowercase), ) return [ integer_set, enum_set, chained_partition_set, alphabet_partition_set ]
def _invalid_partition_selector(_cotnext, _partition_set_def): return [ Partition( value=create_pendulum_time(year=2019, month=1, day=27, hour=1, minute=25), name="made_up", ) ]
def get_date_partitions(): """Every day in 2020""" d1 = datetime.date(2020, 1, 1) d2 = datetime.date(2021, 1, 1) days = [d1 + datetime.timedelta(days=x) for x in range((d2 - d1).days + 1)] return [Partition(day.strftime("%Y-%m-%d")) for day in days]
def get_date_range_partitions(): current = start _end = end or date.today() date_names = [] while current < _end: date_names.append( Partition(value=current, name=current.strftime('%Y-%m-%d'))) current = current + timedelta(days=1) return date_names
def assert_partitioned_schedule_builds( job_def: JobDefinition, start: datetime, end: datetime, ): partition_set = job_def.get_partition_set_def() run_config = partition_set.run_config_for_partition(Partition( (start, end))) create_execution_plan(job_def, run_config=run_config)
def assert_partitioned_schedule_builds( schedule_def: PartitionScheduleDefinition, pipeline_def: PipelineDefinition, partition: datetime, ): run_config = schedule_def.get_partition_set().run_config_for_partition( Partition(partition)) create_execution_plan(pipeline_def, run_config=run_config, mode=schedule_def.mode)
def get_day_partition(): return [ Partition("M"), Partition("Tu"), Partition("W"), Partition("Th"), Partition("F"), Partition("Sa"), Partition("Su"), ]
def get_date_partitions(): """Every day in the month of May, 2020""" start_date = datetime(2021, 1, 1) end_date = datetime.now() partitions = [] while start_date < end_date: partitions.append(Partition(start_date.strftime("%Y-%m-%d"))) start_date = start_date + timedelta(days=1) return partitions
def define_bar_schedules(): partition_set = PartitionSetDefinition( name="scheduled_partitions", pipeline_name="partitioned_scheduled_pipeline", partition_fn=lambda: string.digits, ) return { "foo_schedule": ScheduleDefinition( "foo_schedule", cron_schedule="* * * * *", pipeline_name="test_pipeline", run_config={}, ), "partitioned_schedule": partition_set.create_schedule_definition( schedule_name="partitioned_schedule", cron_schedule="* * * * *", partition_selector=lambda _context, _def: Partition("7"), ), }
def define_partitions(): integer_set = PartitionSetDefinition( name="integer_partition", pipeline_name="no_config_pipeline", solid_selection=['return_hello'], mode="default", partition_fn=lambda: [Partition(i) for i in range(10)], environment_dict_fn_for_partition=lambda _: {"storage": { "filesystem": {} }}, ) enum_set = PartitionSetDefinition( name="enum_partition", pipeline_name="noop_pipeline", partition_fn=lambda: ["one", "two", "three"], environment_dict_fn_for_partition=lambda _: {"storage": { "filesystem": {} }}, ) return [integer_set, enum_set]
from dagster import Partition, PartitionSetDefinition, repository_partitions integer_set = PartitionSetDefinition( name="integer_partition", pipeline_name="no_config_pipeline", solid_subset=['return_hello'], mode="default", partition_fn=lambda: [Partition(i) for i in range(10)], environment_dict_fn_for_partition=lambda _: {"storage": { "filesystem": {} }}, ) enum_set = PartitionSetDefinition( name="enum_partition", pipeline_name="noop_pipeline", partition_fn=lambda: ["one", "two", "three"], environment_dict_fn_for_partition=lambda _: {"storage": { "filesystem": {} }}, ) @repository_partitions def define_partitions(): return [integer_set, enum_set]
def define_schedules(): integer_partition_set = PartitionSetDefinition( name="scheduled_integer_partitions", pipeline_name="no_config_pipeline", partition_fn=lambda: [Partition(x) for x in range(1, 10)], tags_fn_for_partition=lambda _partition: {"test": "1234"}, ) no_config_pipeline_hourly_schedule = ScheduleDefinition( name="no_config_pipeline_hourly_schedule", cron_schedule="0 0 * * *", pipeline_name="no_config_pipeline", ) no_config_pipeline_hourly_schedule_with_config_fn = ScheduleDefinition( name="no_config_pipeline_hourly_schedule_with_config_fn", cron_schedule="0 0 * * *", pipeline_name="no_config_pipeline", ) no_config_should_execute = ScheduleDefinition( name="no_config_should_execute", cron_schedule="0 0 * * *", pipeline_name="no_config_pipeline", should_execute=lambda _context: False, ) dynamic_config = ScheduleDefinition( name="dynamic_config", cron_schedule="0 0 * * *", pipeline_name="no_config_pipeline", ) partition_based = integer_partition_set.create_schedule_definition( schedule_name="partition_based", cron_schedule="0 0 * * *", partition_selector=last_empty_partition, ) @daily_schedule( pipeline_name="no_config_pipeline", start_date=today_at_midnight().subtract(days=1), execution_time=(datetime.datetime.now() + datetime.timedelta(hours=2)).time(), ) def partition_based_decorator(_date): return {} @daily_schedule( pipeline_name="no_config_pipeline", start_date=today_at_midnight().subtract(days=1), execution_time=(datetime.datetime.now() + datetime.timedelta(hours=2)).time(), default_status=DefaultScheduleStatus.RUNNING, ) def running_in_code_schedule(_date): return {} @daily_schedule( pipeline_name="multi_mode_with_loggers", start_date=today_at_midnight().subtract(days=1), execution_time=(datetime.datetime.now() + datetime.timedelta(hours=2)).time(), mode="foo_mode", ) def partition_based_multi_mode_decorator(_date): return {} @hourly_schedule( pipeline_name="no_config_chain_pipeline", start_date=today_at_midnight().subtract(days=1), execution_time=(datetime.datetime.now() + datetime.timedelta(hours=2)).time(), solid_selection=["return_foo"], ) def solid_selection_hourly_decorator(_date): return {} @daily_schedule( pipeline_name="no_config_chain_pipeline", start_date=today_at_midnight().subtract(days=2), execution_time=(datetime.datetime.now() + datetime.timedelta(hours=3)).time(), solid_selection=["return_foo"], ) def solid_selection_daily_decorator(_date): return {} @monthly_schedule( pipeline_name="no_config_chain_pipeline", start_date=(today_at_midnight().subtract(days=100)).replace(day=1), execution_time=(datetime.datetime.now() + datetime.timedelta(hours=4)).time(), solid_selection=["return_foo"], ) def solid_selection_monthly_decorator(_date): return {} @weekly_schedule( pipeline_name="no_config_chain_pipeline", start_date=today_at_midnight().subtract(days=50), execution_time=(datetime.datetime.now() + datetime.timedelta(hours=5)).time(), solid_selection=["return_foo"], ) def solid_selection_weekly_decorator(_date): return {} # Schedules for testing the user error boundary @daily_schedule( pipeline_name="no_config_pipeline", start_date=today_at_midnight().subtract(days=1), should_execute=lambda _: asdf, # pylint: disable=undefined-variable ) def should_execute_error_schedule(_date): return {} @daily_schedule( pipeline_name="no_config_pipeline", start_date=today_at_midnight().subtract(days=1), tags_fn_for_date=lambda _: asdf, # pylint: disable=undefined-variable ) def tags_error_schedule(_date): return {} @daily_schedule( pipeline_name="no_config_pipeline", start_date=today_at_midnight().subtract(days=1), ) def run_config_error_schedule(_date): return asdf # pylint: disable=undefined-variable @daily_schedule( pipeline_name="no_config_pipeline", start_date=today_at_midnight("US/Central") - datetime.timedelta(days=1), execution_timezone="US/Central", ) def timezone_schedule(_date): return {} tagged_pipeline_schedule = ScheduleDefinition( name="tagged_pipeline_schedule", cron_schedule="0 0 * * *", pipeline_name="tagged_pipeline", ) tagged_pipeline_override_schedule = ScheduleDefinition( name="tagged_pipeline_override_schedule", cron_schedule="0 0 * * *", pipeline_name="tagged_pipeline", tags={"foo": "notbar"}, ) invalid_config_schedule = ScheduleDefinition( name="invalid_config_schedule", cron_schedule="0 0 * * *", pipeline_name="pipeline_with_enum_config", run_config={"solids": { "takes_an_enum": { "config": "invalid" } }}, ) return [ run_config_error_schedule, no_config_pipeline_hourly_schedule, no_config_pipeline_hourly_schedule_with_config_fn, no_config_should_execute, dynamic_config, partition_based, partition_based_decorator, partition_based_multi_mode_decorator, solid_selection_hourly_decorator, solid_selection_daily_decorator, solid_selection_monthly_decorator, solid_selection_weekly_decorator, should_execute_error_schedule, tagged_pipeline_schedule, tagged_pipeline_override_schedule, tags_error_schedule, timezone_schedule, invalid_config_schedule, running_in_code_schedule, ]
] # end_repo_include def _weekday_run_config_for_partition(_partition): pass # start_manual_partition_schedule weekday_partition_set = PartitionSetDefinition( name="weekday_partition_set", pipeline_name="my_data_pipeline", partition_fn=lambda: [ Partition("Monday"), Partition("Tuesday"), Partition("Wednesday"), Partition("Thursday"), Partition("Friday"), Partition("Saturday"), Partition("Sunday"), ], run_config_fn_for_partition=_weekday_run_config_for_partition, ) def weekday_partition_selector( ctx: ScheduleExecutionContext, partition_set: PartitionSetDefinition ) -> Union[Partition, List[Partition]]: """Maps a schedule execution time to the corresponding partition or list of partitions that
def define_schedules(): integer_partition_set = PartitionSetDefinition( name='scheduled_integer_partitions', pipeline_name='no_config_pipeline', partition_fn=lambda: [Partition(x) for x in range(1, 10)], environment_dict_fn_for_partition=lambda _partition: {"storage": { "filesystem": {} }}, tags_fn_for_partition=lambda _partition: {"test": "1234"}, ) no_config_pipeline_hourly_schedule = ScheduleDefinition( name="no_config_pipeline_hourly_schedule", cron_schedule="0 0 * * *", pipeline_name="no_config_pipeline", environment_dict={"storage": { "filesystem": {} }}, ) no_config_pipeline_hourly_schedule_with_config_fn = ScheduleDefinition( name="no_config_pipeline_hourly_schedule_with_config_fn", cron_schedule="0 0 * * *", pipeline_name="no_config_pipeline", environment_dict_fn=lambda _context: {"storage": { "filesystem": {} }}, ) no_config_should_execute = ScheduleDefinition( name="no_config_should_execute", cron_schedule="0 0 * * *", pipeline_name="no_config_pipeline", environment_dict={"storage": { "filesystem": {} }}, should_execute=lambda _context: False, ) dynamic_config = ScheduleDefinition( name="dynamic_config", cron_schedule="0 0 * * *", pipeline_name="no_config_pipeline", environment_dict_fn=lambda _context: {"storage": { "filesystem": {} }}, ) partition_based = integer_partition_set.create_schedule_definition( schedule_name="partition_based", cron_schedule="0 0 * * *", ) partition_based_custom_selector = integer_partition_set.create_schedule_definition( schedule_name="partition_based_custom_selector", cron_schedule="0 0 * * *", partition_selector=last_empty_partition, ) @daily_schedule( pipeline_name='no_config_pipeline', start_date=datetime.datetime.now() - datetime.timedelta(days=1), execution_time=(datetime.datetime.now() + datetime.timedelta(hours=2)).time(), ) def partition_based_decorator(_date): return {"storage": {"filesystem": {}}} @daily_schedule( pipeline_name='multi_mode_with_loggers', start_date=datetime.datetime.now() - datetime.timedelta(days=1), execution_time=(datetime.datetime.now() + datetime.timedelta(hours=2)).time(), mode='foo_mode', ) def partition_based_multi_mode_decorator(_date): return {"storage": {"filesystem": {}}} @hourly_schedule( pipeline_name='no_config_chain_pipeline', start_date=datetime.datetime.now() - datetime.timedelta(days=1), execution_time=(datetime.datetime.now() + datetime.timedelta(hours=2)).time(), solid_selection=['return_foo'], ) def solid_selection_hourly_decorator(_date): return {"storage": {"filesystem": {}}} @daily_schedule( pipeline_name='no_config_chain_pipeline', start_date=datetime.datetime.now() - datetime.timedelta(days=2), execution_time=(datetime.datetime.now() + datetime.timedelta(hours=3)).time(), solid_selection=['return_foo'], ) def solid_selection_daily_decorator(_date): return {"storage": {"filesystem": {}}} @monthly_schedule( pipeline_name='no_config_chain_pipeline', start_date=datetime.datetime.now() - datetime.timedelta(days=100), execution_time=(datetime.datetime.now() + datetime.timedelta(hours=4)).time(), solid_selection=['return_foo'], ) def solid_selection_monthly_decorator(_date): return {"storage": {"filesystem": {}}} @weekly_schedule( pipeline_name='no_config_chain_pipeline', start_date=datetime.datetime.now() - datetime.timedelta(days=50), execution_time=(datetime.datetime.now() + datetime.timedelta(hours=5)).time(), solid_selection=['return_foo'], ) def solid_selection_weekly_decorator(_date): return {"storage": {"filesystem": {}}} # Schedules for testing the user error boundary @daily_schedule( pipeline_name='no_config_pipeline', start_date=datetime.datetime.now() - datetime.timedelta(days=1), should_execute=lambda _: asdf, # pylint: disable=undefined-variable ) def should_execute_error_schedule(_date): return {"storage": {"filesystem": {}}} @daily_schedule( pipeline_name='no_config_pipeline', start_date=datetime.datetime.now() - datetime.timedelta(days=1), tags_fn_for_date=lambda _: asdf, # pylint: disable=undefined-variable ) def tags_error_schedule(_date): return {"storage": {"filesystem": {}}} @daily_schedule( pipeline_name='no_config_pipeline', start_date=datetime.datetime.now() - datetime.timedelta(days=1), ) def environment_dict_error_schedule(_date): return asdf # pylint: disable=undefined-variable tagged_pipeline_schedule = ScheduleDefinition( name="tagged_pipeline_schedule", cron_schedule="0 0 * * *", pipeline_name="tagged_pipeline", environment_dict={"storage": { "filesystem": {} }}, ) tagged_pipeline_override_schedule = ScheduleDefinition( name="tagged_pipeline_override_schedule", cron_schedule="0 0 * * *", pipeline_name="tagged_pipeline", environment_dict={"storage": { "filesystem": {} }}, tags={'foo': 'notbar'}, ) invalid_config_schedule = ScheduleDefinition( name="invalid_config_schedule", cron_schedule="0 0 * * *", pipeline_name="pipeline_with_enum_config", environment_dict={"solids": { "takes_an_enum": { 'config': "invalid" } }}, ) return [ environment_dict_error_schedule, no_config_pipeline_hourly_schedule, no_config_pipeline_hourly_schedule_with_config_fn, no_config_should_execute, dynamic_config, partition_based, partition_based_custom_selector, partition_based_decorator, partition_based_multi_mode_decorator, solid_selection_hourly_decorator, solid_selection_daily_decorator, solid_selection_monthly_decorator, solid_selection_weekly_decorator, should_execute_error_schedule, tagged_pipeline_schedule, tagged_pipeline_override_schedule, tags_error_schedule, invalid_config_schedule, ]
import datetime from dagster import Partition, PartitionSetDefinition, ScheduleDefinition, daily_schedule, schedules from dagster.core.definitions.partition import last_empty_partition from dagster.utils.test import FilesytemTestScheduler integer_partition_set = PartitionSetDefinition( name='scheduled_integer_partitions', pipeline_name='no_config_pipeline', partition_fn=lambda: [Partition(x) for x in range(1, 10)], environment_dict_fn_for_partition=lambda _partition: {"storage": { "filesystem": {} }}, tags_fn_for_partition=lambda _partition: {"test": "1234"}, ) @schedules(scheduler=FilesytemTestScheduler) def define_scheduler(): no_config_pipeline_hourly_schedule = ScheduleDefinition( name="no_config_pipeline_hourly_schedule", cron_schedule="0 0 * * *", pipeline_name="no_config_pipeline", environment_dict={"storage": { "filesystem": {} }}, ) no_config_pipeline_hourly_schedule_with_config_fn = ScheduleDefinition(
def get_date_partitions(): """Every day in the month of May, 2020""" return [Partition(f"2020-05-{str(day).zfill(2)}") for day in range(1, 32)]