Пример #1
0
    def inner(fn):
        check.callable_param(fn, 'fn')

        schedule_name = name or fn.__name__

        def _environment_dict_fn_for_partition(partition):
            return fn(partition.value)

        partition_set_name = '{}_hourly'.format(pipeline_name)
        partition_set = PartitionSetDefinition(
            name=partition_set_name,
            pipeline_name=pipeline_name,
            partition_fn=date_partition_range(
                start_date, delta=datetime.timedelta(hours=1), fmt="%Y-%m-%d-%H:%M"
            ),
            environment_dict_fn_for_partition=_environment_dict_fn_for_partition,
            mode=mode,
        )

        return partition_set.create_schedule_definition(
            schedule_name,
            cron_schedule,
            should_execute=should_execute,
            environment_vars=environment_vars,
        )
Пример #2
0
    def inner(fn):
        check.callable_param(fn, "fn")

        schedule_name = name or fn.__name__

        tags_fn_for_partition_value = lambda partition: {}
        if tags_fn_for_date:
            tags_fn_for_partition_value = lambda partition: tags_fn_for_date(
                partition.value)

        partition_set = PartitionSetDefinition(
            name="{}_partitions".format(schedule_name),
            pipeline_name=pipeline_name,
            partition_fn=partition_fn,
            run_config_fn_for_partition=lambda partition: fn(partition.value),
            solid_selection=solid_selection,
            tags_fn_for_partition=tags_fn_for_partition_value,
            mode=mode,
        )

        return partition_set.create_schedule_definition(
            schedule_name,
            cron_schedule,
            should_execute=should_execute,
            environment_vars=environment_vars,
            partition_selector=create_offset_partition_selector(
                execution_time_to_partition_fn=execution_time_to_partition_fn,
            ),
            execution_timezone=execution_timezone,
        )
Пример #3
0
    def inner(fn: Callable[..., Dict[str, Any]]) -> ScheduleDefinition:
        check.callable_param(fn, "fn")

        schedule_name = name or fn.__name__

        tags_fn_for_partition_value: Callable[["Partition"], Optional[Dict[
            str, str]]] = lambda partition: {}
        if tags_fn_for_date:
            tags_fn = cast(
                Callable[[datetime.datetime], Optional[Dict[str, str]]],
                tags_fn_for_date)
            tags_fn_for_partition_value = lambda partition: tags_fn(partition.
                                                                    value)

        partition_set = PartitionSetDefinition(
            name="{}_partitions".format(schedule_name),
            pipeline_name=pipeline_name,
            partition_fn=partition_fn,
            run_config_fn_for_partition=lambda partition: fn(partition.value),
            solid_selection=solid_selection,
            tags_fn_for_partition=tags_fn_for_partition_value,
            mode=mode,
        )

        return partition_set.create_schedule_definition(
            schedule_name,
            cron_schedule,
            should_execute=should_execute,
            environment_vars=environment_vars,
            partition_selector=create_offset_partition_selector(
                execution_time_to_partition_fn=execution_time_to_partition_fn,
            ),
            execution_timezone=execution_timezone,
        )
Пример #4
0
    def inner(fn):
        check.callable_param(fn, "fn")

        schedule_name = name or fn.__name__

        tags_fn_for_partition_value = lambda partition: {}
        if tags_fn_for_date:
            tags_fn_for_partition_value = lambda partition: tags_fn_for_date(
                partition.value)

        partition_set = PartitionSetDefinition(
            name="{}_partitions".format(schedule_name),
            pipeline_name=pipeline_name,
            partition_fn=partition_fn,
            run_config_fn_for_partition=lambda partition: fn(partition.value),
            solid_selection=solid_selection,
            tags_fn_for_partition=tags_fn_for_partition_value,
            mode=mode,
        )

        return partition_set.create_schedule_definition(
            schedule_name,
            cron_schedule,
            should_execute=should_execute,
            environment_vars=environment_vars,
            partition_selector=create_default_partition_selector_fn(
                delta_fn=lambda d: pendulum.instance(d).subtract(
                    hours=1,
                    minutes=(execution_time.minute - start_date.minute) % 60),
                fmt=fmt,
            ),
            execution_timezone=execution_timezone,
        )
Пример #5
0
    def inner(fn):
        check.callable_param(fn, 'fn')

        schedule_name = name or fn.__name__

        tags_fn_for_partition_value = lambda partition: {}
        if tags_fn_for_date:
            tags_fn_for_partition_value = lambda partition: tags_fn_for_date(
                partition.value)

        partition_set = PartitionSetDefinition(
            name='{}_partitions'.format(schedule_name),
            pipeline_name=pipeline_name,
            partition_fn=partition_fn,
            environment_dict_fn_for_partition=lambda partition: fn(partition.
                                                                   value),
            solid_selection=solid_selection,
            tags_fn_for_partition=tags_fn_for_partition_value,
            mode=mode,
        )

        return partition_set.create_schedule_definition(
            schedule_name,
            cron_schedule,
            should_execute=should_execute,
            environment_vars=environment_vars,
        )
def create_hourly_hn_download_schedule():
    pipeline_name = "download_pipeline"
    schedule_name = "hourly_hn_download_schedule"
    start_date = datetime.datetime(2021, 1, 1)
    execution_time = datetime.time(0, 0)
    partitions_def = ScheduleTimeBasedPartitionsDefinition(
        schedule_type=ScheduleType.HOURLY,
        start=start_date,
        execution_time=execution_time,
        fmt=DEFAULT_HOURLY_FORMAT_WITH_TIMEZONE,
        timezone="UTC",
    )

    partition_set = PartitionSetDefinition(
        name="{}_partitions".format(schedule_name),
        pipeline_name=pipeline_name,  # type: ignore[arg-type]
        run_config_fn_for_partition=lambda partition:
        get_hourly_download_def_schedule_config(partition.value),
        mode="prod",
        partitions_def=partitions_def,
    )

    schedule_def = partition_set.create_schedule_definition(
        schedule_name,
        partitions_def.get_cron_schedule(),
        partition_selector=create_offset_partition_selector(
            execution_time_to_partition_fn=partitions_def.
            get_execution_time_to_partition_fn(), ),
        execution_timezone="UTC",
        decorated_fn=get_hourly_download_def_schedule_config,
        job=download_comments_and_stories_prod,
    )

    return schedule_def
Пример #7
0
    def inner(fn: Callable[[datetime.datetime], Dict[str, Any]]) -> PartitionScheduleDefinition:
        check.callable_param(fn, "fn")

        schedule_name = name or fn.__name__

        tags_fn_for_partition_value: Callable[
            ["Partition"], Optional[Dict[str, str]]
        ] = lambda partition: {}
        if tags_fn_for_date:
            tags_fn = cast(
                Callable[[datetime.datetime], Optional[Dict[str, str]]], tags_fn_for_date
            )
            tags_fn_for_partition_value = lambda partition: tags_fn(partition.value)

        fmt = (
            DEFAULT_HOURLY_FORMAT_WITH_TIMEZONE
            if execution_timezone
            else DEFAULT_HOURLY_FORMAT_WITHOUT_TIMEZONE
        )

        partitions_def = ScheduleTimeBasedPartitionsDefinition(
            schedule_type=ScheduleType.HOURLY,
            start=start_date,
            execution_time=execution_time,
            end=end_date,
            fmt=fmt,
            timezone=execution_timezone,
            offset=partition_hours_offset,
        )

        partition_set = PartitionSetDefinition(
            name="{}_partitions".format(schedule_name),
            pipeline_name=pipeline_name,  # type: ignore[arg-type]
            run_config_fn_for_partition=lambda partition: fn(partition.value),
            solid_selection=solid_selection,
            tags_fn_for_partition=tags_fn_for_partition_value,
            mode=mode,
            partitions_def=partitions_def,
        )

        schedule_def = partition_set.create_schedule_definition(
            schedule_name,
            partitions_def.get_cron_schedule(),
            should_execute=should_execute,
            environment_vars=environment_vars,
            partition_selector=create_offset_partition_selector(
                execution_time_to_partition_fn=partitions_def.get_execution_time_to_partition_fn(),
            ),
            execution_timezone=execution_timezone,
            description=description,
            decorated_fn=fn,
            default_status=default_status,
        )

        update_wrapper(schedule_def, wrapped=fn)
        return schedule_def
Пример #8
0
    def offset_partition_selector(
        context: ScheduleEvaluationContext,
        partition_set_def: PartitionSetDefinition
    ) -> Union[Partition, SkipReason]:
        no_partitions_skip_reason = SkipReason(
            "Partition selector did not return a partition. Make sure that the timezone "
            "on your partition set matches your execution timezone.")

        earliest_possible_partition = next(
            iter(partition_set_def.get_partitions(None)), None)
        if not earliest_possible_partition:
            return no_partitions_skip_reason

        valid_partitions = partition_set_def.get_partitions(
            context.scheduled_execution_time)

        if not context.scheduled_execution_time:
            if not valid_partitions:
                return no_partitions_skip_reason
            return valid_partitions[-1]

        partition_time = execution_time_to_partition_fn(
            context.scheduled_execution_time)

        if partition_time < earliest_possible_partition.value:
            return SkipReason(
                f"Your partition ({partition_time.isoformat()}) is before the beginning of "
                f"the partition set ({earliest_possible_partition.value.isoformat()}). "
                "Verify your schedule's start_date is correct.")

        if partition_time > valid_partitions[-1].value:
            return SkipReason(
                f"Your partition ({partition_time.isoformat()}) is after the end of "
                f"the partition set ({valid_partitions[-1].value.isoformat()}). "
                "Verify your schedule's end_date is correct.")

        for partition in valid_partitions:
            if partition.value.isoformat() == partition_time.isoformat():
                return partition

        return no_partitions_skip_reason
Пример #9
0
    def inner(fn):
        check.callable_param(fn, 'fn')

        schedule_name = name or fn.__name__

        def _environment_dict_fn_for_partition(partition):
            return fn(partition.value)

        partition_set_name = '{}_daily'.format(pipeline_name)
        partition_set = PartitionSetDefinition(
            name=partition_set_name,
            pipeline_name=pipeline_name,
            partition_fn=date_partition_range(start_date),
            environment_dict_fn_for_partition=
            _environment_dict_fn_for_partition,
        )

        return partition_set.create_schedule_definition(
            schedule_name,
            cron_schedule,
            should_execute=should_execute,
            environment_vars=environment_vars,
        )
Пример #10
0
    def get_partition_set_def(
            self, pipeline_name: str) -> Optional["PartitionSetDefinition"]:
        from dagster.core.definitions.partition import PartitionSetDefinition

        if not self.partitioned_config:
            return None

        return PartitionSetDefinition(
            pipeline_name=pipeline_name,
            name=pipeline_name + "_" + self.name + "_partition_set",
            partitions_def=self.partitioned_config.partitions_def,
            run_config_fn_for_partition=self.partitioned_config.
            run_config_for_partition_fn,
            mode=self.name,
        )