def test_infer_dependencies_when_job_uses_stdin_stdout_and_stderr(self): wf = Workflow("wf") j1 = Job("t1", _id="j1").add_outputs(File("f1")) j2 = Job("t1", _id="j2").set_stdin(*j1.get_outputs()).set_stdout(File("f2")) j3 = Job("t1", _id="j3").add_inputs(*j2.get_outputs()) wf.add_jobs(j1, j2, j3) # manually call _infer_dependencies() as it is only called when # wf.write() is called wf._infer_dependencies() assert wf.dependencies["j1"] == _JobDependency("j1", {"j2"}) assert wf.dependencies["j2"] == _JobDependency("j2", {"j3"})
def test_add_dependency_children(self): wf = Workflow("wf") job = Job("t", _id="job") children = [ Job("t", _id="child1"), Job("t", _id="child2"), Job("t", _id="child3"), ] wf.add_jobs(job, *children) wf.add_dependency(job, children=[children[0]]) assert wf.dependencies[job._id] == _JobDependency( job._id, {children[0]._id}) wf.add_dependency(job, children=children[1:]) assert wf.dependencies[job._id] == _JobDependency( job._id, {child._id for child in children})
def test_add_dependency_parents_and_children(self): wf = Workflow("wf") job = Job("t", _id="job") parents = [Job("t", _id="parent1"), Job("t", _id="parent2")] children = [Job("t", _id="child1"), Job("t", _id="child2")] wf.add_jobs(*parents, *children) # add nothing wf.add_dependency(job) assert len(wf.dependencies) == 0 wf.add_dependency(job, parents=parents, children=children) for parent in parents: assert wf.dependencies[parent._id] == _JobDependency( parent._id, {job._id}) assert wf.dependencies[job._id] == _JobDependency( job._id, {child._id for child in children})
def test_infer_dependencies_fork_join_wf(self): wf = Workflow("wf") f1 = File("f1") f2 = File("f2") f3 = File("f3") f4 = File("f4") fork = Job("t1", _id="fork").add_outputs(f1, f2) work1 = Job("t1", _id="work1").add_inputs(f1).add_outputs(f3) work2 = Job("t1", _id="work2").add_inputs(f2).add_outputs(f4) join = Job("t1", _id="join").add_inputs(f3, f4) wf.add_jobs(fork, work1, work2, join) # manually call _infer_dependencies() as it is only called when # wf.write() is called wf._infer_dependencies() assert wf.dependencies["fork"] == _JobDependency( "fork", {"work1", "work2"}) assert wf.dependencies["work1"] == _JobDependency("work1", {"join"}) assert wf.dependencies["work2"] == _JobDependency("work2", {"join"})
def test_add_dependency_parents(self): wf = Workflow("wf") job = Job("t", _id="job") parents = [ Job("t", _id="parent1"), Job("t", _id="parent2"), Job("t", _id="parent3"), ] wf.add_jobs(job, *parents) wf.add_dependency(job, parents=[parents[0]]) wf.add_dependency(job, parents=parents[1:]) for parent in parents: assert wf.dependencies[parent._id] == _JobDependency( parent._id, {job._id})
def _to_wf(d: dict) -> Workflow: """Convert dict to Workflow :param d: Workflow represented as a dict :type d: dict :raises PegasusError: encountered error parsing :return: a Workflow object based on d :rtype: Workflow """ try: # wf = Workflow(d["name"], infer_dependencies=False) # add rc if "replicaCatalog" in d: wf.replica_catalog = _to_rc(d["replicaCatalog"]) # add tc if "transformationCatalog" in d: wf.transformation_catalog = _to_tc(d["transformationCatalog"]) # add sc if "siteCatalog" in d: wf.site_catalog = _to_sc(d["siteCatalog"]) # add jobs for j in d["jobs"]: # create appropriate job based on type if j["type"] == "job": job = Job( j["name"], _id=j["id"], node_label=j.get("nodeLabel"), namespace=j.get("namespace"), version=j.get("version"), ) elif j["type"] in {"pegasusWorkflow", "condorWorkflow"}: f = File(j["file"]) is_planned = False if j["type"] == "pegasusWorkflow" else True job = SubWorkflow( f, is_planned, _id=j["id"], node_label=j.get("nodeLabel") ) else: raise ValueError # add args args = list() for a in j["arguments"]: args.append(a) job.args = args # add uses uses = set() for u in j["uses"]: f = File(u["lfn"], size=u.get("size")) try: f.metadata = u["metadata"] except KeyError: pass uses.add( _Use( f, getattr(_LinkType, u["type"].upper()), stage_out=u.get("stageOut"), register_replica=u.get("registerReplica"), bypass_staging=u.get("bypass"), ) ) job.uses = uses # set stdin if "stdin" in j: for u in job.uses: if u.file.lfn == j["stdin"]: job.stdin = u.file break # set stdout if "stdout" in j: for u in job.uses: if u.file.lfn == j["stdout"]: job.stdout = u.file break # set stderr if "stderr" in j: for u in job.uses: if u.file.lfn == j["stderr"]: job.stderr = u.file break # add profiles if j.get("profiles"): job.profiles = defaultdict(dict, j.get("profiles")) # add metadata if j.get("metadata"): job.metadata = j.get("metadata") # add hooks if j.get("hooks"): job.hooks = defaultdict(list, j.get("hooks")) # add job to wf wf.add_jobs(job) # add dependencies if d.get("jobDependencies"): dependencies = defaultdict(_JobDependency) for item in d.get("jobDependencies"): dependencies[item["id"]] = _JobDependency( item["id"], {child for child in item["children"]} ) wf.dependencies = dependencies # add profiles if d.get("profiles"): wf.profiles = defaultdict(dict, d.get("profiles")) # add metadata if d.get("metadata"): wf.metadata = d.get("metadata") # add hooks if d.get("hooks"): wf.hooks = defaultdict(list, d.get("hooks")) return wf except (KeyError, ValueError): raise PegasusError("error parsing {}".format(d))
def test_tojson(self): jd = _JobDependency("parent_id", {"child_id1"}) assert jd.__json__() == { "id": "parent_id", "children": ["child_id1"], }
def test_eq_invalid(self): with pytest.raises(ValueError) as e: _JobDependency("1", {"2", "3"}) == 123 assert "_JobDependency cannot be compared with" in str(e)
def test_eq(self): assert _JobDependency("1", {"2", "3"}) == _JobDependency("1", {"2", "3"})