def inner(fn): check.callable_param(fn, 'fn') schedule_name = name or fn.__name__ def _environment_dict_fn_for_partition(partition): return fn(partition.value) partition_set_name = '{}_hourly'.format(pipeline_name) partition_set = PartitionSetDefinition( name=partition_set_name, pipeline_name=pipeline_name, partition_fn=date_partition_range( start_date, delta=datetime.timedelta(hours=1), fmt="%Y-%m-%d-%H:%M" ), environment_dict_fn_for_partition=_environment_dict_fn_for_partition, mode=mode, ) return partition_set.create_schedule_definition( schedule_name, cron_schedule, should_execute=should_execute, environment_vars=environment_vars, )
def inner(fn): check.callable_param(fn, "fn") schedule_name = name or fn.__name__ tags_fn_for_partition_value = lambda partition: {} if tags_fn_for_date: tags_fn_for_partition_value = lambda partition: tags_fn_for_date( partition.value) partition_set = PartitionSetDefinition( name="{}_partitions".format(schedule_name), pipeline_name=pipeline_name, partition_fn=partition_fn, run_config_fn_for_partition=lambda partition: fn(partition.value), solid_selection=solid_selection, tags_fn_for_partition=tags_fn_for_partition_value, mode=mode, ) return partition_set.create_schedule_definition( schedule_name, cron_schedule, should_execute=should_execute, environment_vars=environment_vars, partition_selector=create_offset_partition_selector( execution_time_to_partition_fn=execution_time_to_partition_fn, ), execution_timezone=execution_timezone, )
def inner(fn: Callable[..., Dict[str, Any]]) -> ScheduleDefinition: check.callable_param(fn, "fn") schedule_name = name or fn.__name__ tags_fn_for_partition_value: Callable[["Partition"], Optional[Dict[ str, str]]] = lambda partition: {} if tags_fn_for_date: tags_fn = cast( Callable[[datetime.datetime], Optional[Dict[str, str]]], tags_fn_for_date) tags_fn_for_partition_value = lambda partition: tags_fn(partition. value) partition_set = PartitionSetDefinition( name="{}_partitions".format(schedule_name), pipeline_name=pipeline_name, partition_fn=partition_fn, run_config_fn_for_partition=lambda partition: fn(partition.value), solid_selection=solid_selection, tags_fn_for_partition=tags_fn_for_partition_value, mode=mode, ) return partition_set.create_schedule_definition( schedule_name, cron_schedule, should_execute=should_execute, environment_vars=environment_vars, partition_selector=create_offset_partition_selector( execution_time_to_partition_fn=execution_time_to_partition_fn, ), execution_timezone=execution_timezone, )
def inner(fn): check.callable_param(fn, "fn") schedule_name = name or fn.__name__ tags_fn_for_partition_value = lambda partition: {} if tags_fn_for_date: tags_fn_for_partition_value = lambda partition: tags_fn_for_date( partition.value) partition_set = PartitionSetDefinition( name="{}_partitions".format(schedule_name), pipeline_name=pipeline_name, partition_fn=partition_fn, run_config_fn_for_partition=lambda partition: fn(partition.value), solid_selection=solid_selection, tags_fn_for_partition=tags_fn_for_partition_value, mode=mode, ) return partition_set.create_schedule_definition( schedule_name, cron_schedule, should_execute=should_execute, environment_vars=environment_vars, partition_selector=create_default_partition_selector_fn( delta_fn=lambda d: pendulum.instance(d).subtract( hours=1, minutes=(execution_time.minute - start_date.minute) % 60), fmt=fmt, ), execution_timezone=execution_timezone, )
def inner(fn): check.callable_param(fn, 'fn') schedule_name = name or fn.__name__ tags_fn_for_partition_value = lambda partition: {} if tags_fn_for_date: tags_fn_for_partition_value = lambda partition: tags_fn_for_date( partition.value) partition_set = PartitionSetDefinition( name='{}_partitions'.format(schedule_name), pipeline_name=pipeline_name, partition_fn=partition_fn, environment_dict_fn_for_partition=lambda partition: fn(partition. value), solid_selection=solid_selection, tags_fn_for_partition=tags_fn_for_partition_value, mode=mode, ) return partition_set.create_schedule_definition( schedule_name, cron_schedule, should_execute=should_execute, environment_vars=environment_vars, )
def create_hourly_hn_download_schedule(): pipeline_name = "download_pipeline" schedule_name = "hourly_hn_download_schedule" start_date = datetime.datetime(2021, 1, 1) execution_time = datetime.time(0, 0) partitions_def = ScheduleTimeBasedPartitionsDefinition( schedule_type=ScheduleType.HOURLY, start=start_date, execution_time=execution_time, fmt=DEFAULT_HOURLY_FORMAT_WITH_TIMEZONE, timezone="UTC", ) partition_set = PartitionSetDefinition( name="{}_partitions".format(schedule_name), pipeline_name=pipeline_name, # type: ignore[arg-type] run_config_fn_for_partition=lambda partition: get_hourly_download_def_schedule_config(partition.value), mode="prod", partitions_def=partitions_def, ) schedule_def = partition_set.create_schedule_definition( schedule_name, partitions_def.get_cron_schedule(), partition_selector=create_offset_partition_selector( execution_time_to_partition_fn=partitions_def. get_execution_time_to_partition_fn(), ), execution_timezone="UTC", decorated_fn=get_hourly_download_def_schedule_config, job=download_comments_and_stories_prod, ) return schedule_def
def inner(fn: Callable[[datetime.datetime], Dict[str, Any]]) -> PartitionScheduleDefinition: check.callable_param(fn, "fn") schedule_name = name or fn.__name__ tags_fn_for_partition_value: Callable[ ["Partition"], Optional[Dict[str, str]] ] = lambda partition: {} if tags_fn_for_date: tags_fn = cast( Callable[[datetime.datetime], Optional[Dict[str, str]]], tags_fn_for_date ) tags_fn_for_partition_value = lambda partition: tags_fn(partition.value) fmt = ( DEFAULT_HOURLY_FORMAT_WITH_TIMEZONE if execution_timezone else DEFAULT_HOURLY_FORMAT_WITHOUT_TIMEZONE ) partitions_def = ScheduleTimeBasedPartitionsDefinition( schedule_type=ScheduleType.HOURLY, start=start_date, execution_time=execution_time, end=end_date, fmt=fmt, timezone=execution_timezone, offset=partition_hours_offset, ) partition_set = PartitionSetDefinition( name="{}_partitions".format(schedule_name), pipeline_name=pipeline_name, # type: ignore[arg-type] run_config_fn_for_partition=lambda partition: fn(partition.value), solid_selection=solid_selection, tags_fn_for_partition=tags_fn_for_partition_value, mode=mode, partitions_def=partitions_def, ) schedule_def = partition_set.create_schedule_definition( schedule_name, partitions_def.get_cron_schedule(), should_execute=should_execute, environment_vars=environment_vars, partition_selector=create_offset_partition_selector( execution_time_to_partition_fn=partitions_def.get_execution_time_to_partition_fn(), ), execution_timezone=execution_timezone, description=description, decorated_fn=fn, default_status=default_status, ) update_wrapper(schedule_def, wrapped=fn) return schedule_def
def offset_partition_selector( context: ScheduleEvaluationContext, partition_set_def: PartitionSetDefinition ) -> Union[Partition, SkipReason]: no_partitions_skip_reason = SkipReason( "Partition selector did not return a partition. Make sure that the timezone " "on your partition set matches your execution timezone.") earliest_possible_partition = next( iter(partition_set_def.get_partitions(None)), None) if not earliest_possible_partition: return no_partitions_skip_reason valid_partitions = partition_set_def.get_partitions( context.scheduled_execution_time) if not context.scheduled_execution_time: if not valid_partitions: return no_partitions_skip_reason return valid_partitions[-1] partition_time = execution_time_to_partition_fn( context.scheduled_execution_time) if partition_time < earliest_possible_partition.value: return SkipReason( f"Your partition ({partition_time.isoformat()}) is before the beginning of " f"the partition set ({earliest_possible_partition.value.isoformat()}). " "Verify your schedule's start_date is correct.") if partition_time > valid_partitions[-1].value: return SkipReason( f"Your partition ({partition_time.isoformat()}) is after the end of " f"the partition set ({valid_partitions[-1].value.isoformat()}). " "Verify your schedule's end_date is correct.") for partition in valid_partitions: if partition.value.isoformat() == partition_time.isoformat(): return partition return no_partitions_skip_reason
def inner(fn): check.callable_param(fn, 'fn') schedule_name = name or fn.__name__ def _environment_dict_fn_for_partition(partition): return fn(partition.value) partition_set_name = '{}_daily'.format(pipeline_name) partition_set = PartitionSetDefinition( name=partition_set_name, pipeline_name=pipeline_name, partition_fn=date_partition_range(start_date), environment_dict_fn_for_partition= _environment_dict_fn_for_partition, ) return partition_set.create_schedule_definition( schedule_name, cron_schedule, should_execute=should_execute, environment_vars=environment_vars, )
def get_partition_set_def( self, pipeline_name: str) -> Optional["PartitionSetDefinition"]: from dagster.core.definitions.partition import PartitionSetDefinition if not self.partitioned_config: return None return PartitionSetDefinition( pipeline_name=pipeline_name, name=pipeline_name + "_" + self.name + "_partition_set", partitions_def=self.partitioned_config.partitions_def, run_config_fn_for_partition=self.partitioned_config. run_config_for_partition_fn, mode=self.name, )