def test_handles_none(self): state = Cached(cached_parameters=dict(x=5)) assert partial_inputs_only(validate_on=["x"])(state, dict(x=5), None) is False state = Cached(cached_inputs=dict(x=5)) assert partial_inputs_only(validate_on=["x"])(state, None, None) is False
def test_inputs_validate_with_defaults(self): state = Cached(cached_inputs=dict(x=1, s="str")) assert partial_inputs_only(None)(state, dict(x=1, s="str"), None) is True state = Cached(cached_inputs=dict(x=1, s="str")) assert partial_inputs_only(None)(state, dict(x=1, s="strs"), None) is True
def test_validate_on_kwarg(self): state = Cached(cached_inputs=dict(x=1, s="str")) assert (partial_inputs_only(validate_on=["x", "s"])( state, dict(x=1, s="str"), None) is True) state = Cached(cached_inputs=dict(x=1, s="str")) assert (partial_inputs_only(validate_on=["x", "s"])( state, dict(x=1, s="strs"), None) is False) assert (partial_inputs_only(validate_on=["x"])( state, dict(x=1, s="strs"), None) is True) assert (partial_inputs_only(validate_on=["s"])( state, dict(x=1, s="strs"), None) is False)
def test_flow_dot_run_handles_mapped_cached_states_with_non_cached(self): class MockSchedule(prefect.schedules.Schedule): call_count = 0 def next(self, n): if self.call_count < 3: self.call_count += 1 return [pendulum.now("utc")] else: return [] class StatefulTask(Task): def __init__(self, maxit=False, **kwargs): self.maxit = maxit super().__init__(**kwargs) call_count = 0 def run(self): self.call_count += 1 if self.maxit: return [max(self.call_count, 2)] * 3 else: return [self.call_count + i for i in range(3)] @task( cache_for=datetime.timedelta(minutes=1), cache_validator=partial_inputs_only(validate_on=["x"]), ) def return_x(x, y): return 1 / (y - 1) storage = {"y": []} @task(trigger=prefect.triggers.always_run) def store_y(y): storage["y"].append(y) t1, t2 = StatefulTask(maxit=True), StatefulTask() schedule = MockSchedule() with Flow(name="test", schedule=schedule) as f: res = store_y(return_x.map(x=t1, y=t2)) f.run() first_run = storage["y"][0] second_run = storage["y"][1] third_run = storage["y"][2] ## first run: one child fails, the other two succeed assert isinstance(first_run[0], ZeroDivisionError) assert first_run[1:] == [1.0, 0.5] ## second run: all tasks succeed, the latter two use cached state assert second_run[0] == 1.0 assert second_run[1:] == [1.0, 0.5] ## third run: all tasks succeed, no caching used assert third_run == [1 / 2, 1 / 3, 1 / 4]
def test_flow_dot_run_handles_cached_states(self): class MockSchedule(prefect.schedules.Schedule): call_count = 0 def next(self, n): if self.call_count < 3: self.call_count += 1 return [pendulum.now("utc")] else: raise SyntaxError("Cease scheduling!") class StatefulTask(Task): def __init__(self, maxit=False, **kwargs): self.maxit = maxit super().__init__(**kwargs) call_count = 0 def run(self): self.call_count += 1 if self.maxit: return max(self.call_count, 2) else: return self.call_count @task( cache_for=datetime.timedelta(minutes=1), cache_validator=partial_inputs_only(validate_on=["x"]), ) def return_x(x, y): return y storage = {"y": []} @task def store_y(y): storage["y"].append(y) t1, t2 = StatefulTask(maxit=True), StatefulTask() schedule = MockSchedule() with Flow(name="test", schedule=schedule) as f: res = store_y(return_x(x=t1, y=t2)) with pytest.raises(SyntaxError) as exc: f.run() assert storage == dict(y=[1, 1, 3])
def poll_modified_date(location: str) -> datetime.datetime: """Get the last modified date of a location. Arguments: location {str} -- Web address for the file. Returns: datetime.datetime -- Last modified date as returned by the HTTP header. """ r = requests.head(location) modified_string = r.headers.get('Last-Modified') return datetime.datetime(*eut.parsedate(modified_string)[:6]) @task(cache_for=timedelta(days=365), cache_validator=partial_inputs_only(['last_modified_date'])) def get_and_extract(location: str, last_modified_date: datetime.datetime) -> Dict[str, Dict]: temp_file = tempfile.NamedTemporaryFile(delete=True) data = requests.get(location) temp_file.write(data.content) temp_dir = tempfile.TemporaryDirectory() with zipfile.ZipFile(temp_file) as zip_ref: zip_ref.extractall(temp_dir.name) contents = list(Path(temp_dir.name).iterdir()) output_dict = {} for path in contents: with open(path) as json_file: data = json.load(json_file) name = path.with_suffix('').name output_dict[name] = data
def test_curried(self): state = Cached(cached_inputs=dict(x=1, s="str")) validator = partial_inputs_only(validate_on=["x"]) assert validator(state, dict(x=1), None) is True assert validator(state, dict(x=2, s="str"), None) is False