def test_detect_cycle(): f = Flow(name="test") t1 = Task() t2 = Task() f.add_edge(t1, t2) with pytest.raises(ValueError): f.add_edge(t2, t1, validate=True)
def test_add_edge_raise_error_for_downstream_parameter(): f = Flow(name="test") t = Task() p = Parameter("p") with pytest.raises(ValueError) as exc: f.add_edge(upstream_task=t, downstream_task=p) assert "can not have upstream dependencies" in str(exc.value)
def test_validate_cycles(): f = Flow(name="test") t1 = Task() t2 = Task() f.add_edge(t1, t2) f.add_edge(t2, t1) with pytest.raises(ValueError) as exc: f.validate() assert "cycle found" in str(exc.value).lower()
def test_validate_missing_edge_upstream_tasks(): f = Flow(name="test") t1 = Task() t2 = Task() f.add_edge(t1, t2) f.tasks.remove(t1) with pytest.raises(ValueError) as exc: f.validate() assert "edges refer to tasks" in str(exc.value).lower()
def test_add_edge_raise_error_for_duplicate_key_if_validate(): f = Flow(name="test") t = Task() a = AddTask() f.add_edge(upstream_task=t, downstream_task=a, key="x") with pytest.raises(ValueError) as exc: f.add_edge(upstream_task=t, downstream_task=a, key="x", validate=True) assert "already been assigned" in str(exc.value)
def test_add_edge(): f = Flow(name="test") t1 = Task() t2 = Task() f.add_edge(upstream_task=t1, downstream_task=t2) assert f.upstream_tasks(t2) == set([t1]) assert f.upstream_tasks(t1) == set() assert f.downstream_tasks(t2) == set() assert f.downstream_tasks(t1) == set([t2]) assert f.edges_to(t2) == f.edges_from(t1)
def test_serialize_validates_invalid_flows(self): t1, t2 = Task(), Task() f = Flow(name="test") f.add_edge(t1, t2) # default settings should allow this even though it's illegal f.add_edge(t2, t1) with pytest.raises(ValueError) as exc: f.serialize() assert "cycle found" in str(exc).lower()
def test_serialization(self): p1, t2, t3, = Parameter("1"), Task("2"), Task("3") f = Flow(name="test", tasks=[p1, t2, t3]) f.add_edge(p1, t2) f.add_edge(p1, t3) serialized = f.serialize() assert isinstance(serialized, dict) assert len(serialized["tasks"]) == len(f.tasks)
def test_eager_cycle_detection_works(): with set_temporary_config({"flows.eager_edge_validation": True}): f = Flow(name="test") t1 = Task() t2 = Task() f.add_edge(t1, t2) with pytest.raises(ValueError): f.add_edge(t2, t1) assert not prefect.config.flows.eager_edge_validation
def test_sorted_tasks(): """ t1 -> t2 -> t3 -> t4 """ f = Flow(name="test") t1 = Task("1") t2 = Task("2") t3 = Task("3") t4 = Task("4") f.add_edge(t1, t2) f.add_edge(t2, t3) f.add_edge(t3, t4) assert f.sorted_tasks() == (t1, t2, t3, t4)
def test_cache_all_downstream_edges(self): f = Flow(name="test") t1 = Task() t2 = Task() t3 = Task() f.add_edge(t1, t2) f.all_downstream_edges() key = ("all_downstream_edges", ()) f._cache[key] = 1 assert f.all_downstream_edges() == 1 f.add_edge(t2, t3) assert f.all_downstream_edges() != 1
def test_eager_cycle_detection_defaults_false(): assert not prefect.config.flows.eager_edge_validation f = Flow(name="test") t1 = Task() t2 = Task() f.add_edge(t1, t2) # no cycle detected assert f.add_edge(t2, t1) with pytest.raises(ValueError): f.validate()
def test_sorted_tasks_with_invalid_start_task(): """ t1 -> t2 -> t3 -> t4 t3 -> t5 """ f = Flow(name="test") t1 = Task("1") t2 = Task("2") t3 = Task("3") f.add_edge(t1, t2) with pytest.raises(ValueError) as exc: f.sorted_tasks(root_tasks=[t3]) assert "not found in Flow" in str(exc.value)
def test_merge(): f1 = Flow(name="test") f2 = Flow(name="test") t1 = Task() t2 = Task() t3 = Task() f1.add_edge(t1, t2) f2.add_edge(t2, t3) f2.update(f1) assert f2.tasks == set([t1, t2, t3]) assert len(f2.edges) == 2
def test_equality_based_on_edges(self): f1 = Flow(name="test") f2 = Flow(name="test") t1 = Task() t2 = Task() t3 = Task() for f in [f1, f2]: f.add_edge(t1, t2) f.add_edge(t1, t3) assert f1 == f2 f2.add_edge(t2, t3) assert f1 != f2
def test_cache_survives_pickling(self): f = Flow(name="test") t1 = Task() t2 = Task() t3 = Task() f.add_edge(t1, t2) f.sorted_tasks() key = ("_sorted_tasks", (("root_tasks", ()), )) f._cache[key] = 1 assert f.sorted_tasks() == 1 f2 = cloudpickle.loads(cloudpickle.dumps(f)) assert f2.sorted_tasks() == 1 f2.add_edge(t2, t3) assert f2.sorted_tasks() != 1
def test_sorted_tasks_with_start_task(): """ t1 -> t2 -> t3 -> t4 t3 -> t5 """ f = Flow(name="test") t1 = Task("1") t2 = Task("2") t3 = Task("3") t4 = Task("4") t5 = Task("5") f.add_edge(t1, t2) f.add_edge(t2, t3) f.add_edge(t3, t4) f.add_edge(t3, t5) assert set(f.sorted_tasks(root_tasks=[])) == set([t1, t2, t3, t4, t5]) assert set(f.sorted_tasks(root_tasks=[t3])) == set([t3, t4, t5])
def test_cache_sorted_tasks(self): f = Flow(name="test") t1 = Task() t2 = Task() t3 = Task() f.add_edge(t1, t2) f.sorted_tasks() # check that cache holds result key = ("_sorted_tasks", (("root_tasks", ()), )) assert f._cache[key] == (t1, t2) # check that cache is read f._cache[key] = 1 assert f.sorted_tasks() == 1 f.add_edge(t2, t3) assert f.sorted_tasks() == (t1, t2, t3)
def test_cache_terminal_tasks(self): f = Flow(name="test") t1 = Task() t2 = Task() t3 = Task() f.add_edge(t1, t2) f.terminal_tasks() # check that cache holds result key = ("terminal_tasks", ()) assert f._cache[key] == set([t2]) # check that cache is read f._cache[key] = 1 assert f.terminal_tasks() == 1 f.add_edge(t2, t3) assert f.terminal_tasks() == set([t3])
def test_add_edge_returns_edge(): f = Flow(name="test") t1 = Task() t2 = Task() edge = Edge(t1, t2) added_edge = f.add_edge(upstream_task=t1, downstream_task=t2) assert edge == added_edge assert added_edge in f.edges assert edge in f.edges
def test_cache_task_ids(self): f = Flow(name="test") t1 = Task() t2 = Task() t3 = Task() f.add_edge(t1, t2) ids = f.task_ids # check that cache holds result key = ("task_ids", ()) assert f._cache[key] == ids # check that cache is read f._cache[key] = 1 assert f.task_ids == 1 f.add_edge(t2, t3) assert len(f.task_ids) == 3
def test_deserialization(self): p1, t2, t3, = Parameter("1"), Task("2"), Task("3") f = Flow( name="hi", tasks=[p1, t2, t3], schedule=prefect.schedules.CronSchedule("0 0 * * *"), ) f.add_edge(p1, t2) f.add_edge(p1, t3) serialized = f.serialize() f2 = prefect.serialization.flow.FlowSchema().load(serialized) assert len(f2.tasks) == 3 assert len(f2.edges) == 2 assert len(f2.reference_tasks()) == 2 assert {t.name for t in f2.reference_tasks()} == {"2", "3"} assert f2.name == f.name assert isinstance(f2.schedule, prefect.schedules.CronSchedule)
def test_sorted_tasks_with_ambiguous_sort(): """ t1 -> bottleneck t2 -> bottleneck t3 -> bottleneck bottleneck -> t4 bottleneck -> t5 bottleneck -> t6 """ f = Flow(name="test") t1 = Task("1") t2 = Task("2") t3 = Task("3") t4 = Task("4") t5 = Task("5") t6 = Task("6") bottleneck = Task("bottleneck") f.add_edge(t1, bottleneck) f.add_edge(t2, bottleneck) f.add_edge(t3, bottleneck) f.add_edge(bottleneck, t4) f.add_edge(bottleneck, t5) f.add_edge(bottleneck, t6) tasks = f.sorted_tasks() assert set(tasks[:3]) == set([t1, t2, t3]) assert list(tasks)[3] is bottleneck assert set(tasks[4:]) == set([t4, t5, t6])
def test_skip_validate_edges(): f = Flow(name="test") t1, t2 = Task(), Task() # these tasks don't support keyed edges f.add_edge(t1, t2, key="x", validate=False) f.add_edge(t2, t1, validate=False) # this introduces a cycle
def test_validate_edges(): with set_temporary_config({"flows.eager_edge_validation": True}): f = Flow(name="test") t1, t2 = Task(), Task() # these tasks don't support keyed edges with pytest.raises(TypeError): f.add_edge(t1, t2, key="x")
def test_validate_edges_kwarg(): f = Flow(name="test") t1, t2 = Task(), Task() # these tasks don't support keyed edges with pytest.raises(TypeError): f.add_edge(t1, t2, key="x", validate=True)
def test_adding_edge_clears_cache(self): f = Flow(name="test") f._cache[1] = 2 f.add_edge(Task(), Task()) assert 1 not in f._cache