Пример #1
0
    def test_infer_dependencies_when_job_uses_stdin_stdout_and_stderr(self):
        wf = Workflow("wf")
        j1 = Job("t1", _id="j1").add_outputs(File("f1"))
        j2 = Job("t1",
                 _id="j2").set_stdin(*j1.get_outputs()).set_stdout(File("f2"))
        j3 = Job("t1", _id="j3").add_inputs(*j2.get_outputs())
        wf.add_jobs(j1, j2, j3)

        # manually call _infer_dependencies() as it is only called when
        # wf.write() is called
        wf._infer_dependencies()

        assert wf.dependencies["j1"] == _JobDependency("j1", {"j2"})
        assert wf.dependencies["j2"] == _JobDependency("j2", {"j3"})
Пример #2
0
    def test_add_dependency_children(self):
        wf = Workflow("wf")
        job = Job("t", _id="job")
        children = [
            Job("t", _id="child1"),
            Job("t", _id="child2"),
            Job("t", _id="child3"),
        ]

        wf.add_jobs(job, *children)

        wf.add_dependency(job, children=[children[0]])
        assert wf.dependencies[job._id] == _JobDependency(
            job._id, {children[0]._id})

        wf.add_dependency(job, children=children[1:])
        assert wf.dependencies[job._id] == _JobDependency(
            job._id, {child._id
                      for child in children})
Пример #3
0
    def test_add_dependency_parents_and_children(self):
        wf = Workflow("wf")
        job = Job("t", _id="job")
        parents = [Job("t", _id="parent1"), Job("t", _id="parent2")]

        children = [Job("t", _id="child1"), Job("t", _id="child2")]

        wf.add_jobs(*parents, *children)

        # add nothing
        wf.add_dependency(job)
        assert len(wf.dependencies) == 0

        wf.add_dependency(job, parents=parents, children=children)

        for parent in parents:
            assert wf.dependencies[parent._id] == _JobDependency(
                parent._id, {job._id})

        assert wf.dependencies[job._id] == _JobDependency(
            job._id, {child._id
                      for child in children})
Пример #4
0
    def test_infer_dependencies_fork_join_wf(self):
        wf = Workflow("wf")

        f1 = File("f1")
        f2 = File("f2")
        f3 = File("f3")
        f4 = File("f4")

        fork = Job("t1", _id="fork").add_outputs(f1, f2)
        work1 = Job("t1", _id="work1").add_inputs(f1).add_outputs(f3)
        work2 = Job("t1", _id="work2").add_inputs(f2).add_outputs(f4)
        join = Job("t1", _id="join").add_inputs(f3, f4)
        wf.add_jobs(fork, work1, work2, join)

        # manually call _infer_dependencies() as it is only called when
        # wf.write() is called
        wf._infer_dependencies()

        assert wf.dependencies["fork"] == _JobDependency(
            "fork", {"work1", "work2"})
        assert wf.dependencies["work1"] == _JobDependency("work1", {"join"})
        assert wf.dependencies["work2"] == _JobDependency("work2", {"join"})
Пример #5
0
    def test_add_dependency_parents(self):
        wf = Workflow("wf")
        job = Job("t", _id="job")
        parents = [
            Job("t", _id="parent1"),
            Job("t", _id="parent2"),
            Job("t", _id="parent3"),
        ]

        wf.add_jobs(job, *parents)

        wf.add_dependency(job, parents=[parents[0]])
        wf.add_dependency(job, parents=parents[1:])

        for parent in parents:
            assert wf.dependencies[parent._id] == _JobDependency(
                parent._id, {job._id})
Пример #6
0
def _to_wf(d: dict) -> Workflow:
    """Convert dict to Workflow

    :param d: Workflow represented as a dict
    :type d: dict
    :raises PegasusError: encountered error parsing
    :return: a Workflow object based on d
    :rtype: Workflow
    """

    try:
        #
        wf = Workflow(d["name"], infer_dependencies=False)

        # add rc
        if "replicaCatalog" in d:
            wf.replica_catalog = _to_rc(d["replicaCatalog"])

        # add tc
        if "transformationCatalog" in d:
            wf.transformation_catalog = _to_tc(d["transformationCatalog"])

        # add sc
        if "siteCatalog" in d:
            wf.site_catalog = _to_sc(d["siteCatalog"])

        # add jobs
        for j in d["jobs"]:
            # create appropriate job based on type
            if j["type"] == "job":
                job = Job(
                    j["name"],
                    _id=j["id"],
                    node_label=j.get("nodeLabel"),
                    namespace=j.get("namespace"),
                    version=j.get("version"),
                )
            elif j["type"] in {"pegasusWorkflow", "condorWorkflow"}:
                f = File(j["file"])

                is_planned = False if j["type"] == "pegasusWorkflow" else True

                job = SubWorkflow(
                    f, is_planned, _id=j["id"], node_label=j.get("nodeLabel")
                )

            else:
                raise ValueError

            # add args
            args = list()
            for a in j["arguments"]:
                args.append(a)

            job.args = args

            # add uses
            uses = set()
            for u in j["uses"]:
                f = File(u["lfn"], size=u.get("size"))
                try:
                    f.metadata = u["metadata"]
                except KeyError:
                    pass

                uses.add(
                    _Use(
                        f,
                        getattr(_LinkType, u["type"].upper()),
                        stage_out=u.get("stageOut"),
                        register_replica=u.get("registerReplica"),
                        bypass_staging=u.get("bypass"),
                    )
                )

            job.uses = uses

            # set stdin
            if "stdin" in j:
                for u in job.uses:
                    if u.file.lfn == j["stdin"]:
                        job.stdin = u.file
                        break

            # set stdout
            if "stdout" in j:
                for u in job.uses:
                    if u.file.lfn == j["stdout"]:
                        job.stdout = u.file
                        break

            # set stderr
            if "stderr" in j:
                for u in job.uses:
                    if u.file.lfn == j["stderr"]:
                        job.stderr = u.file
                        break

            # add profiles
            if j.get("profiles"):
                job.profiles = defaultdict(dict, j.get("profiles"))

            # add metadata
            if j.get("metadata"):
                job.metadata = j.get("metadata")

            # add hooks
            if j.get("hooks"):
                job.hooks = defaultdict(list, j.get("hooks"))

            # add job to wf
            wf.add_jobs(job)

        # add dependencies
        if d.get("jobDependencies"):
            dependencies = defaultdict(_JobDependency)
            for item in d.get("jobDependencies"):
                dependencies[item["id"]] = _JobDependency(
                    item["id"], {child for child in item["children"]}
                )

            wf.dependencies = dependencies

        # add profiles
        if d.get("profiles"):
            wf.profiles = defaultdict(dict, d.get("profiles"))

        # add metadata
        if d.get("metadata"):
            wf.metadata = d.get("metadata")

        # add hooks
        if d.get("hooks"):
            wf.hooks = defaultdict(list, d.get("hooks"))

        return wf
    except (KeyError, ValueError):
        raise PegasusError("error parsing {}".format(d))
Пример #7
0
 def test_tojson(self):
     jd = _JobDependency("parent_id", {"child_id1"})
     assert jd.__json__() == {
         "id": "parent_id",
         "children": ["child_id1"],
     }
Пример #8
0
    def test_eq_invalid(self):
        with pytest.raises(ValueError) as e:
            _JobDependency("1", {"2", "3"}) == 123

        assert "_JobDependency cannot be compared with" in str(e)
Пример #9
0
 def test_eq(self):
     assert _JobDependency("1",
                           {"2", "3"}) == _JobDependency("1", {"2", "3"})