Exemplo n.º 1
0
def test_partitions():
    @op(config_schema={"date": str})
    def my_op(_):
        pass

    @graph
    def my_graph():
        my_op()

    def config_fn(partition: Partition):
        return {"ops": {"my_op": {"config": {"date": partition.value}}}}

    job = my_graph.to_job(config=PartitionedConfig(
        run_config_for_partition_fn=config_fn,
        partitions_def=StaticPartitionsDefinition(
            [Partition("2020-02-25"),
             Partition("2020-02-26")]),
    ), )
    mode = job.mode_definitions[0]
    partition_set = mode.get_partition_set_def("my_graph")
    partitions = partition_set.get_partitions()
    assert len(partitions) == 2
    assert partitions[0].value == "2020-02-25"
    assert partitions[0].name == "2020-02-25"
    assert partition_set.run_config_for_partition(partitions[0]) == {
        "ops": {
            "my_op": {
                "config": {
                    "date": "2020-02-25"
                }
            }
        }
    }
Exemplo n.º 2
0
    def get_date_range_partitions(current_time=None):
        check.opt_inst_param(current_time, "current_time", datetime.datetime)
        tz = timezone if timezone else "UTC"
        _start = (to_timezone(start, tz) if isinstance(start, PendulumDateTime)
                  else pendulum.instance(start, tz=tz))

        if end:
            _end = end
        elif current_time:
            _end = current_time
        else:
            _end = pendulum.now(tz)

        # coerce to the definition timezone
        if isinstance(_end, PendulumDateTime):
            _end = to_timezone(_end, tz)
        else:
            _end = pendulum.instance(_end, tz=tz)

        period = pendulum.period(_start, _end)
        date_names = [
            Partition(value=current, name=current.strftime(fmt))
            for current in period.range(delta_range, delta_amount)
        ]

        # We don't include the last element here by default since we only want
        # fully completed intervals, and the _end time is in the middle of the interval
        # represented by the last element of date_names
        if inclusive:
            return date_names

        return date_names[:-1]
Exemplo n.º 3
0
    def get_schedule_range_partitions():
        tz = timezone if timezone else pendulum.now().timezone.name
        _start = (start.in_tz(tz) if isinstance(start, pendulum.Pendulum) else
                  pendulum.instance(start, tz=tz))

        if not end:
            _end = pendulum.now(tz)
        elif isinstance(end, pendulum.Pendulum):
            _end = end.in_tz(tz)
        else:
            _end = pendulum.instance(end, tz=tz)

        end_timestamp = _end.timestamp()

        partitions = []
        for next_time in schedule_execution_time_iterator(
                _start.timestamp(), cron_schedule, tz):

            partition_time = execution_time_to_partition_fn(next_time)

            if partition_time.timestamp() > end_timestamp:
                break

            if partition_time.timestamp() < _start.timestamp():
                continue

            partitions.append(
                Partition(value=partition_time,
                          name=partition_time.strftime(fmt)))

        return partitions[:-1]
Exemplo n.º 4
0
    def get_date_range_partitions():
        tz = timezone if timezone else pendulum.now().timezone.name
        _start = (start.in_tz(tz) if isinstance(start, pendulum.Pendulum) else
                  pendulum.instance(start, tz=tz))

        if not end:
            _end = pendulum.now(tz)
        elif isinstance(end, pendulum.Pendulum):
            _end = end.in_tz(tz)
        else:
            _end = pendulum.instance(end, tz=tz)

        period = pendulum.period(_start, _end)
        date_names = [
            Partition(value=current, name=current.strftime(fmt))
            for current in period.range(delta_range, delta_amount)
        ]

        # We don't include the last element here by default since we only want
        # fully completed intervals, and the _end time is in the middle of the interval
        # represented by the last element of date_names
        if inclusive:
            return date_names

        return date_names[:-1]
Exemplo n.º 5
0
 def test():
     return [
         bare.to_job(
             resource_defs={},
             config=PartitionedConfig(
                 partitions_def=StaticPartitionsDefinition(
                     [Partition("abc")]),
                 run_config_for_partition_fn=lambda _: {},
             ),
         )
     ]
Exemplo n.º 6
0
    def get_date_range_partitions():
        current = start
        _end = end or datetime.datetime.now()
        date_names = []
        while current < _end:
            date_names.append(
                Partition(value=current, name=current.strftime(fmt)))
            current = current + delta

        # We don't include the last element here since we only want
        # fully completed intervals, and the _end time is in the middle of the interval
        # represented by the last element of date_names
        return date_names[:-1]
Exemplo n.º 7
0
    def get_schedule_range_partitions(current_time=None):
        check.opt_inst_param(current_time, "current_time", datetime.datetime)
        tz = timezone if timezone else "UTC"
        _start = (
            to_timezone(start, tz)
            if isinstance(start, PendulumDateTime)
            else pendulum.instance(start, tz=tz)
        )

        if end:
            _end = end
        elif current_time:
            _end = current_time
        else:
            _end = pendulum.now(tz)

        # coerce to the definition timezone
        if isinstance(_end, PendulumDateTime):
            _end = to_timezone(_end, tz)
        else:
            _end = pendulum.instance(_end, tz=tz)

        end_timestamp = _end.timestamp()

        partitions = []
        for next_time in schedule_execution_time_iterator(_start.timestamp(), cron_schedule, tz):

            partition_time = execution_time_to_partition_fn(next_time)

            if partition_time.timestamp() > end_timestamp:
                break

            if partition_time.timestamp() < _start.timestamp():
                continue

            partitions.append(Partition(value=partition_time, name=partition_time.strftime(fmt)))

        return partitions if inclusive else partitions[:-1]
Exemplo n.º 8
0
            start=start,
            execution_time=time(0, 1),
            end=end,
            timezone=timezone,
            fmt=DEFAULT_HOURLY_FORMAT_WITH_TIMEZONE,
            offset=partition_hours_offset,
        )

        assert_expected_partitions(partitions.get_partitions(),
                                   expected_partitions)


@pytest.mark.parametrize(
    argnames=["partition_fn"],
    argvalues=[
        (lambda _current_time: [Partition("a_partition")], ),
        (lambda _current_time: [Partition(x) for x in range(10)], ),
    ],
)
def test_dynamic_partitions_partitions(
        partition_fn: Callable[[Optional[datetime]], List[Partition]]):
    partitions = DynamicPartitionsDefinition(partition_fn)

    assert [(p.name, p.value) for p in partitions.get_partitions()
            ] == [(p.name, p.value) for p in partition_fn(None)]

    assert partitions.get_partition_keys() == [
        p.name for p in partition_fn(None)
    ]