示例#1
0
 def test_handles_none(self):
     state = Cached(cached_parameters=dict(x=5))
     assert partial_inputs_only(validate_on=["x"])(state, dict(x=5),
                                                   None) is False
     state = Cached(cached_inputs=dict(x=5))
     assert partial_inputs_only(validate_on=["x"])(state, None,
                                                   None) is False
示例#2
0
 def test_inputs_validate_with_defaults(self):
     state = Cached(cached_inputs=dict(x=1, s="str"))
     assert partial_inputs_only(None)(state, dict(x=1, s="str"),
                                      None) is True
     state = Cached(cached_inputs=dict(x=1, s="str"))
     assert partial_inputs_only(None)(state, dict(x=1, s="strs"),
                                      None) is True
示例#3
0
 def test_validate_on_kwarg(self):
     state = Cached(cached_inputs=dict(x=1, s="str"))
     assert (partial_inputs_only(validate_on=["x", "s"])(
         state, dict(x=1, s="str"), None) is True)
     state = Cached(cached_inputs=dict(x=1, s="str"))
     assert (partial_inputs_only(validate_on=["x", "s"])(
         state, dict(x=1, s="strs"), None) is False)
     assert (partial_inputs_only(validate_on=["x"])(
         state, dict(x=1, s="strs"), None) is True)
     assert (partial_inputs_only(validate_on=["s"])(
         state, dict(x=1, s="strs"), None) is False)
示例#4
0
    def test_flow_dot_run_handles_mapped_cached_states_with_non_cached(self):
        class MockSchedule(prefect.schedules.Schedule):
            call_count = 0

            def next(self, n):
                if self.call_count < 3:
                    self.call_count += 1
                    return [pendulum.now("utc")]
                else:
                    return []

        class StatefulTask(Task):
            def __init__(self, maxit=False, **kwargs):
                self.maxit = maxit
                super().__init__(**kwargs)

            call_count = 0

            def run(self):
                self.call_count += 1
                if self.maxit:
                    return [max(self.call_count, 2)] * 3
                else:
                    return [self.call_count + i for i in range(3)]

        @task(
            cache_for=datetime.timedelta(minutes=1),
            cache_validator=partial_inputs_only(validate_on=["x"]),
        )
        def return_x(x, y):
            return 1 / (y - 1)

        storage = {"y": []}

        @task(trigger=prefect.triggers.always_run)
        def store_y(y):
            storage["y"].append(y)

        t1, t2 = StatefulTask(maxit=True), StatefulTask()
        schedule = MockSchedule()
        with Flow(name="test", schedule=schedule) as f:
            res = store_y(return_x.map(x=t1, y=t2))

        f.run()

        first_run = storage["y"][0]
        second_run = storage["y"][1]
        third_run = storage["y"][2]

        ## first run: one child fails, the other two succeed
        assert isinstance(first_run[0], ZeroDivisionError)
        assert first_run[1:] == [1.0, 0.5]

        ## second run: all tasks succeed, the latter two use cached state
        assert second_run[0] == 1.0
        assert second_run[1:] == [1.0, 0.5]

        ## third run: all tasks succeed, no caching used
        assert third_run == [1 / 2, 1 / 3, 1 / 4]
示例#5
0
    def test_flow_dot_run_handles_cached_states(self):
        class MockSchedule(prefect.schedules.Schedule):
            call_count = 0

            def next(self, n):
                if self.call_count < 3:
                    self.call_count += 1
                    return [pendulum.now("utc")]
                else:
                    raise SyntaxError("Cease scheduling!")

        class StatefulTask(Task):
            def __init__(self, maxit=False, **kwargs):
                self.maxit = maxit
                super().__init__(**kwargs)

            call_count = 0

            def run(self):
                self.call_count += 1
                if self.maxit:
                    return max(self.call_count, 2)
                else:
                    return self.call_count

        @task(
            cache_for=datetime.timedelta(minutes=1),
            cache_validator=partial_inputs_only(validate_on=["x"]),
        )
        def return_x(x, y):
            return y

        storage = {"y": []}

        @task
        def store_y(y):
            storage["y"].append(y)

        t1, t2 = StatefulTask(maxit=True), StatefulTask()
        schedule = MockSchedule()
        with Flow(name="test", schedule=schedule) as f:
            res = store_y(return_x(x=t1, y=t2))

        with pytest.raises(SyntaxError) as exc:
            f.run()

        assert storage == dict(y=[1, 1, 3])
示例#6
0
def poll_modified_date(location: str) -> datetime.datetime:
    """Get the last modified date of a location.

    Arguments:
        location {str} -- Web address for the file.

    Returns:
        datetime.datetime -- Last modified date as returned by the HTTP header.
    """
    r = requests.head(location)
    modified_string = r.headers.get('Last-Modified')
    return datetime.datetime(*eut.parsedate(modified_string)[:6])


@task(cache_for=timedelta(days=365),
      cache_validator=partial_inputs_only(['last_modified_date']))
def get_and_extract(location: str,
                    last_modified_date: datetime.datetime) -> Dict[str, Dict]:
    temp_file = tempfile.NamedTemporaryFile(delete=True)
    data = requests.get(location)
    temp_file.write(data.content)
    temp_dir = tempfile.TemporaryDirectory()
    with zipfile.ZipFile(temp_file) as zip_ref:
        zip_ref.extractall(temp_dir.name)
    contents = list(Path(temp_dir.name).iterdir())
    output_dict = {}
    for path in contents:
        with open(path) as json_file:
            data = json.load(json_file)
        name = path.with_suffix('').name
        output_dict[name] = data
示例#7
0
 def test_curried(self):
     state = Cached(cached_inputs=dict(x=1, s="str"))
     validator = partial_inputs_only(validate_on=["x"])
     assert validator(state, dict(x=1), None) is True
     assert validator(state, dict(x=2, s="str"), None) is False