def create_hourly_hn_download_schedule():
    pipeline_name = "download_pipeline"
    schedule_name = "hourly_hn_download_schedule"
    start_date = datetime.datetime(2021, 1, 1)
    execution_time = datetime.time(0, 0)
    partitions_def = ScheduleTimeBasedPartitionsDefinition(
        schedule_type=ScheduleType.HOURLY,
        start=start_date,
        execution_time=execution_time,
        fmt=DEFAULT_HOURLY_FORMAT_WITH_TIMEZONE,
        timezone="UTC",
    )

    partition_set = PartitionSetDefinition(
        name="{}_partitions".format(schedule_name),
        pipeline_name=pipeline_name,  # type: ignore[arg-type]
        run_config_fn_for_partition=lambda partition:
        get_hourly_download_def_schedule_config(partition.value),
        mode="prod",
        partitions_def=partitions_def,
    )

    schedule_def = partition_set.create_schedule_definition(
        schedule_name,
        partitions_def.get_cron_schedule(),
        partition_selector=create_offset_partition_selector(
            execution_time_to_partition_fn=partitions_def.
            get_execution_time_to_partition_fn(), ),
        execution_timezone="UTC",
        decorated_fn=get_hourly_download_def_schedule_config,
        job=download_comments_and_stories_prod,
    )

    return schedule_def
Пример #2
0
    def inner(fn: Callable[[datetime.datetime], Dict[str, Any]]) -> PartitionScheduleDefinition:
        check.callable_param(fn, "fn")

        schedule_name = name or fn.__name__

        tags_fn_for_partition_value: Callable[
            ["Partition"], Optional[Dict[str, str]]
        ] = lambda partition: {}
        if tags_fn_for_date:
            tags_fn = cast(
                Callable[[datetime.datetime], Optional[Dict[str, str]]], tags_fn_for_date
            )
            tags_fn_for_partition_value = lambda partition: tags_fn(partition.value)

        fmt = (
            DEFAULT_HOURLY_FORMAT_WITH_TIMEZONE
            if execution_timezone
            else DEFAULT_HOURLY_FORMAT_WITHOUT_TIMEZONE
        )

        partitions_def = ScheduleTimeBasedPartitionsDefinition(
            schedule_type=ScheduleType.HOURLY,
            start=start_date,
            execution_time=execution_time,
            end=end_date,
            fmt=fmt,
            timezone=execution_timezone,
            offset=partition_hours_offset,
        )

        partition_set = PartitionSetDefinition(
            name="{}_partitions".format(schedule_name),
            pipeline_name=pipeline_name,  # type: ignore[arg-type]
            run_config_fn_for_partition=lambda partition: fn(partition.value),
            solid_selection=solid_selection,
            tags_fn_for_partition=tags_fn_for_partition_value,
            mode=mode,
            partitions_def=partitions_def,
        )

        schedule_def = partition_set.create_schedule_definition(
            schedule_name,
            partitions_def.get_cron_schedule(),
            should_execute=should_execute,
            environment_vars=environment_vars,
            partition_selector=create_offset_partition_selector(
                execution_time_to_partition_fn=partitions_def.get_execution_time_to_partition_fn(),
            ),
            execution_timezone=execution_timezone,
            description=description,
            decorated_fn=fn,
            default_status=default_status,
        )

        update_wrapper(schedule_def, wrapped=fn)
        return schedule_def