Пример #1
0
    def test_set_duplicate_stdout(self):
        job = AbstractJob()
        job.add_outputs(File("a"))
        with pytest.raises(DuplicateError) as e:
            job.set_stdout(File("a"))

        assert "file: {file}".format(file=File("a")) in str(e)
Пример #2
0
    def test_add_duplicate_checkpoint(self):
        job = AbstractJob()
        job.add_inputs(File("abc"))
        with pytest.raises(DuplicateError) as e:
            job.add_checkpoint(File("abc"))

        assert "file: {file}".format(file=File("abc")) in str(e)
Пример #3
0
    def test_set_stdout_already_set(self):
        job = AbstractJob()
        job.set_stdout(File("a"))
        with pytest.raises(DuplicateError) as e:
            job.set_stdout(File("b"))

        assert "stdout has already been set" in str(e)
Пример #4
0
    def test_add_outputs(self):
        job = AbstractJob()
        f1 = File("a")
        f2 = File("b")

        job.add_outputs(f1, f2)

        assert job.get_outputs() == {f1, f2}
Пример #5
0
    def test_chaining(self):
        j = (Job("t1").add_args("-n5").add_inputs(File("if")).add_outputs(
            File("of")).set_stdin(File("stdin")).set_stdout(
                File("stdout")).set_stderr(File("stderr")))

        assert j.transformation == "t1"
        assert j.args == ["-n5"]
        assert j.get_inputs() == {File("if"), File("stdin")}
        assert j.get_outputs() == {File("of"), File("stdout"), File("stderr")}
Пример #6
0
    def test_add_inputs_and_outputs(self):
        job = AbstractJob()
        job.add_inputs(File("a"))
        job.add_outputs(File("b"))

        with pytest.raises(DuplicateError) as e:
            job.add_inputs(File("b"))

        assert "file: {file}".format(file=File("b")) in str(e)
Пример #7
0
def wf2():
    _in = File("in")
    stdin = File("stdin")
    stdout = File("stdout")
    stderr = File("stderr")
    out = File("out")
    out2 = File("out2")

    pegasus_workflow = File("pegasus_workflow")
    condor_workflow = File("condor_workflow")

    j1 = (Job("tr", _id="1", node_label="test").add_args(
        "-i", _in, "-o", out).add_inputs(_in).add_outputs(out).set_stdin(
            stdin).set_stdout(stdout).set_stderr(stderr))

    j2 = (Job("tr2", _id="2", node_label="test").add_args(
        "-i", out, "-o", out2).add_inputs(out).add_outputs(out2))

    sbwf_pegasus = SubWorkflow(pegasus_workflow,
                               False,
                               _id="unplanned",
                               node_label="test").add_args("-flag", "-flag2")

    sbwf_condor = SubWorkflow(condor_workflow,
                              True,
                              _id="planned",
                              node_label="test")

    return (Workflow("test", infer_dependencies=False).add_jobs(
        j1, j2, sbwf_pegasus, sbwf_condor).add_dependency(j1, children=[j2]))
Пример #8
0
    def test_infer_dependencies_when_job_uses_stdin_stdout_and_stderr(self):
        wf = Workflow("wf")
        j1 = Job("t1", _id="j1").add_outputs(File("f1"))
        j2 = Job("t1",
                 _id="j2").set_stdin(*j1.get_outputs()).set_stdout(File("f2"))
        j3 = Job("t1", _id="j3").add_inputs(*j2.get_outputs())
        wf.add_jobs(j1, j2, j3)

        # manually call _infer_dependencies() as it is only called when
        # wf.write() is called
        wf._infer_dependencies()

        assert wf.dependencies["j1"] == _JobDependency("j1", {"j2"})
        assert wf.dependencies["j2"] == _JobDependency("j2", {"j3"})
Пример #9
0
    def test_get_outputs(self):
        job = AbstractJob()
        f1 = File("a")

        job.add_outputs(f1)

        assert job.get_outputs() == {f1}
Пример #10
0
    def test_add_duplicate_replica(self):
        rc = ReplicaCatalog()
        with pytest.raises(DuplicateError) as e:
            rc.add_replica("site", "lfn", "pfn", True)
            rc.add_replica("site", File("lfn"), "pfn", True)

        assert (
            "entry: ReplicaEntry(site=site, lfn=lfn, pfn=pfn, regex=True, checksum_type=None, checksum_value=None)"
            in str(e))
Пример #11
0
    def test_tojson_with_metdata(self, convert_yaml_schemas_to_json,
                                 load_schema):
        result = File("lfn", size=2048).add_metadata(key="value").__json__()
        expected = {"lfn": "lfn", "metadata": {"key": "value"}, "size": 2048}

        file_schema = load_schema("rc-5.0.json")["$defs"]["file"]
        validate(instance=result, schema=file_schema)

        assert result == expected
Пример #12
0
    def test_infer_dependencies_fork_join_wf(self):
        wf = Workflow("wf")

        f1 = File("f1")
        f2 = File("f2")
        f3 = File("f3")
        f4 = File("f4")

        fork = Job("t1", _id="fork").add_outputs(f1, f2)
        work1 = Job("t1", _id="work1").add_inputs(f1).add_outputs(f3)
        work2 = Job("t1", _id="work2").add_inputs(f2).add_outputs(f4)
        join = Job("t1", _id="join").add_inputs(f3, f4)
        wf.add_jobs(fork, work1, work2, join)

        # manually call _infer_dependencies() as it is only called when
        # wf.write() is called
        wf._infer_dependencies()

        assert wf.dependencies["fork"] == _JobDependency(
            "fork", {"work1", "work2"})
        assert wf.dependencies["work1"] == _JobDependency("work1", {"join"})
        assert wf.dependencies["work2"] == _JobDependency("work2", {"join"})
Пример #13
0
class TestSubWorkflow:
    @pytest.mark.parametrize("file, is_planned", [(File("wf-file"), False),
                                                  ("wf-file", True)])
    def test_valid_subworkflow(self, file, is_planned):
        assert SubWorkflow(file, is_planned)

    def test_invalid_subworkflow(self):
        with pytest.raises(TypeError) as e:
            SubWorkflow(123, False)

        assert "invalid file: 123" in str(e)

    @pytest.mark.parametrize(
        "subworkflow, expected",
        [
            (
                SubWorkflow(
                    "file", False, _id="test-subworkflow",
                    node_label="label").add_args("--sites", "condorpool"),
                {
                    "type": "pegasusWorkflow",
                    "file": "file",
                    "id": "test-subworkflow",
                    "nodeLabel": "label",
                    "arguments": ["--sites", "condorpool"],
                    "uses": [{
                        "lfn": "file",
                        "type": "input"
                    }],
                },
            ),
            (
                SubWorkflow(
                    "file", True, _id="test-subworkflow", node_label="label"),
                {
                    "type": "condorWorkflow",
                    "file": "file",
                    "id": "test-subworkflow",
                    "nodeLabel": "label",
                    "arguments": [],
                    "uses": [{
                        "lfn": "file",
                        "type": "input"
                    }],
                },
            ),
        ],
    )
    def test_tojson(self, subworkflow, expected):
        result = json.loads(json.dumps(subworkflow, cls=_CustomEncoder))
        assert result == expected
Пример #14
0
def wf1():
    in1 = File("in1", size=2048).add_metadata(createdBy="ryan")
    in2 = File("in2")
    stdin = File("stdin").add_metadata(size=1024)
    stdout = File("stdout").add_metadata(size=1024)
    stderr = File("stderr").add_metadata(size=1024)
    out = File("out").add_metadata(size=1024)
    out2 = File("out2").add_metadata(size=1024)

    pegasus_workflow = File("pegasus_workflow").add_metadata(size=2048)
    condor_workflow = File("condor_workflow").add_metadata(size=2048)

    j1 = (Job("tr", _id="1", node_label="test").add_args(
        "-i", in1, "-o", out, "-n", 1, 1.1).add_inputs(in1).add_inputs(
            in2, bypass_staging=True).add_outputs(out).set_stdin(
                stdin).set_stdout(stdout).set_stderr(stderr).add_shell_hook(
                    EventType.START,
                    "/cmd2").add_env(JAVA_HOME="/usr/bin/java").add_metadata(
                        xtra_info="123"))

    j2 = (Job("tr2", _id="2", node_label="test").add_args(
        "-i", out, "-o", out2).add_inputs(out).add_outputs(out2))

    sbwf_pegasus = SubWorkflow(pegasus_workflow,
                               False,
                               _id="unplanned",
                               node_label="test").add_args("-flag", "-flag2")

    sbwf_condor = SubWorkflow(condor_workflow,
                              True,
                              _id="planned",
                              node_label="test")

    return (Workflow("test", infer_dependencies=False).add_shell_hook(
        EventType.START, "/cmd").add_dagman_profile(retry=1).add_metadata(
            author="ryan").add_jobs(j1, j2, sbwf_pegasus,
                                    sbwf_condor).add_dependency(j1,
                                                                children=[j2]))
Пример #15
0
def wf():
    wf = Workflow("wf")

    j1 = (Job("t1", _id="a").add_outputs(File("f1"), File("f2")).add_args(
        File("do-nothing"), "-n", 1,
        1.1).set_stdin("stdin").set_stdout("stdout").set_stderr("stderr"))
    j2 = (Job("t1", _id="b").add_inputs(File("f1"), File("f2")).add_checkpoint(
        File("checkpoint")))
    j3 = SubWorkflow("subworkflow.dag", True,
                     _id="c").add_args("--sites", "condorpool")
    j4 = SubWorkflow(File("subworkflow.dax"), False, _id="d")

    wf.add_jobs(j1, j2, j3, j4)

    wf._infer_dependencies()

    wf.add_env(JAVA_HOME="/java/home")
    wf.add_shell_hook(EventType.START, "/bin/echo hi")
    wf.add_metadata(key="value")

    return wf
Пример #16
0
    def test_write(self, _format, loader):
        rc = ReplicaCatalog()
        f_a = File("f.a", size=1024).add_metadata(creator="ryan")
        rc.add_replica(
            "local",
            f_a,
            "/f.a",
            checksum={"sha256": "123"},
            metadata={"extra": "metadata"},
        )
        rc.add_replica("condorpool", f_a, "/f.a")
        rc.add_replica("local", "f.b", "/f.b")
        rc.add_regex_replica("local",
                             "*.txt",
                             "/path",
                             metadata={"creator": "ryan"})

        expected = {
            "pegasus":
            "5.0",
            "replicas": [
                {
                    "lfn":
                    "f.a",
                    "pfns": [
                        {
                            "site": "local",
                            "pfn": "/f.a"
                        },
                        {
                            "site": "condorpool",
                            "pfn": "/f.a"
                        },
                    ],
                    "checksum": {
                        "sha256": "123"
                    },
                    "metadata": {
                        "extra": "metadata",
                        "size": 1024,
                        "creator": "ryan"
                    },
                },
                {
                    "lfn": "f.b",
                    "pfns": [{
                        "site": "local",
                        "pfn": "/f.b"
                    }],
                    "metadata": {
                        "size": 1024,
                        "creator": "ryan"
                    },
                },
                {
                    "lfn": "*.txt",
                    "pfns": [{
                        "site": "local",
                        "pfn": "/path"
                    }],
                    "metadata": {
                        "creator": "ryan"
                    },
                    "regex": True,
                },
            ],
        }
        expected["replicas"][0]["pfns"] = sorted(
            expected["replicas"][0]["pfns"], key=lambda pfn: pfn["site"])

        with NamedTemporaryFile(mode="r+") as f:
            rc.write(f, _format=_format)
            f.seek(0)
            result = loader(f)

        result["replicas"][0]["pfns"] = sorted(result["replicas"][0]["pfns"],
                                               key=lambda pfn: pfn["site"])

        assert "createdOn" in result["x-pegasus"]
        assert result["x-pegasus"]["createdBy"] == getpass.getuser()
        assert result["x-pegasus"]["apiLang"] == "python"
        del result["x-pegasus"]
        assert result == expected
Пример #17
0
    def test_invalid_file(self, lfn: str):
        with pytest.raises(TypeError) as e:
            File(lfn)

        assert "invalid lfn: {lfn}".format(lfn=lfn) in str(e)
Пример #18
0
 def test_valid_file(self, lfn: str, size: int):
     assert File(lfn, size)
Пример #19
0
    def test_tojson(self):
        j = AbstractJob(_id="aj", node_label="test")
        j.set_stdin("stdin")
        j.set_stdout("stdout")
        j.set_stderr("stderr")
        j.add_args("-i", File("f1"))
        j.add_inputs(File("if1"), File("if2"))
        j.add_outputs(File("of1"), File("of2"))
        j.add_checkpoint(File("cpf"))

        result = json.loads(json.dumps(j, cls=_CustomEncoder))
        result["uses"] = sorted(result["uses"], key=lambda use: use["lfn"])

        expected = {
            "id":
            "aj",
            "nodeLabel":
            "test",
            "arguments": ["-i", "f1"],
            "stdin":
            "stdin",
            "stdout":
            "stdout",
            "stderr":
            "stderr",
            "uses": [
                {
                    "lfn": "stdin",
                    "type": "input"
                },
                {
                    "lfn": "if1",
                    "type": "input"
                },
                {
                    "lfn": "if2",
                    "type": "input"
                },
                {
                    "lfn": "stdout",
                    "type": "output",
                    "stageOut": True,
                    "registerReplica": False,
                },
                {
                    "lfn": "stderr",
                    "type": "output",
                    "stageOut": True,
                    "registerReplica": False,
                },
                {
                    "lfn": "of1",
                    "type": "output",
                    "stageOut": True,
                    "registerReplica": False,
                },
                {
                    "lfn": "of2",
                    "type": "output",
                    "stageOut": True,
                    "registerReplica": False,
                },
                {
                    "lfn": "cpf",
                    "type": "checkpoint",
                    "stageOut": True,
                    "registerReplica": False,
                },
            ],
        }

        expected["uses"] = sorted(expected["uses"], key=lambda use: use["lfn"])

        assert result == expected
Пример #20
0
def _to_wf(d: dict) -> Workflow:
    """Convert dict to Workflow

    :param d: Workflow represented as a dict
    :type d: dict
    :raises PegasusError: encountered error parsing
    :return: a Workflow object based on d
    :rtype: Workflow
    """

    try:
        #
        wf = Workflow(d["name"], infer_dependencies=False)

        # add rc
        if "replicaCatalog" in d:
            wf.replica_catalog = _to_rc(d["replicaCatalog"])

        # add tc
        if "transformationCatalog" in d:
            wf.transformation_catalog = _to_tc(d["transformationCatalog"])

        # add sc
        if "siteCatalog" in d:
            wf.site_catalog = _to_sc(d["siteCatalog"])

        # add jobs
        for j in d["jobs"]:
            # create appropriate job based on type
            if j["type"] == "job":
                job = Job(
                    j["name"],
                    _id=j["id"],
                    node_label=j.get("nodeLabel"),
                    namespace=j.get("namespace"),
                    version=j.get("version"),
                )
            elif j["type"] in {"pegasusWorkflow", "condorWorkflow"}:
                f = File(j["file"])

                is_planned = False if j["type"] == "pegasusWorkflow" else True

                job = SubWorkflow(
                    f, is_planned, _id=j["id"], node_label=j.get("nodeLabel")
                )

            else:
                raise ValueError

            # add args
            args = list()
            for a in j["arguments"]:
                args.append(a)

            job.args = args

            # add uses
            uses = set()
            for u in j["uses"]:
                f = File(u["lfn"], size=u.get("size"))
                try:
                    f.metadata = u["metadata"]
                except KeyError:
                    pass

                uses.add(
                    _Use(
                        f,
                        getattr(_LinkType, u["type"].upper()),
                        stage_out=u.get("stageOut"),
                        register_replica=u.get("registerReplica"),
                        bypass_staging=u.get("bypass"),
                    )
                )

            job.uses = uses

            # set stdin
            if "stdin" in j:
                for u in job.uses:
                    if u.file.lfn == j["stdin"]:
                        job.stdin = u.file
                        break

            # set stdout
            if "stdout" in j:
                for u in job.uses:
                    if u.file.lfn == j["stdout"]:
                        job.stdout = u.file
                        break

            # set stderr
            if "stderr" in j:
                for u in job.uses:
                    if u.file.lfn == j["stderr"]:
                        job.stderr = u.file
                        break

            # add profiles
            if j.get("profiles"):
                job.profiles = defaultdict(dict, j.get("profiles"))

            # add metadata
            if j.get("metadata"):
                job.metadata = j.get("metadata")

            # add hooks
            if j.get("hooks"):
                job.hooks = defaultdict(list, j.get("hooks"))

            # add job to wf
            wf.add_jobs(job)

        # add dependencies
        if d.get("jobDependencies"):
            dependencies = defaultdict(_JobDependency)
            for item in d.get("jobDependencies"):
                dependencies[item["id"]] = _JobDependency(
                    item["id"], {child for child in item["children"]}
                )

            wf.dependencies = dependencies

        # add profiles
        if d.get("profiles"):
            wf.profiles = defaultdict(dict, d.get("profiles"))

        # add metadata
        if d.get("metadata"):
            wf.metadata = d.get("metadata")

        # add hooks
        if d.get("hooks"):
            wf.hooks = defaultdict(list, d.get("hooks"))

        return wf
    except (KeyError, ValueError):
        raise PegasusError("error parsing {}".format(d))
Пример #21
0
 def test_eq(self):
     assert _Use(File("a"),
                 _LinkType.INPUT) == _Use(File("a"), _LinkType.OUTPUT)
     assert _Use(File("a"), _LinkType.INPUT) != _Use(
         File("b"), _LinkType.INPUT)
Пример #22
0
 def test_valid_use(self):
     assert _Use(File("a"), _LinkType.INPUT)
Пример #23
0
class TestReplicaCatalog:
    @pytest.mark.parametrize("replica", [("lfn", "pfn", "site", True),
                                         (File("lfn"), "pfn", "site", True)])
    def test_add_replica(self, replica: tuple):
        rc = ReplicaCatalog()
        rc.add_replica(*replica)
        assert len(rc.replicas) == 1

    def test_add_duplicate_replica(self):
        rc = ReplicaCatalog()
        with pytest.raises(DuplicateError) as e:
            rc.add_replica("site", "lfn", "pfn", True)
            rc.add_replica("site", File("lfn"), "pfn", True)

        assert (
            "entry: ReplicaEntry(site=site, lfn=lfn, pfn=pfn, regex=True, checksum_type=None, checksum_value=None)"
            in str(e))

    def test_add_invalid_replica(self):
        rc = ReplicaCatalog()
        with pytest.raises(TypeError) as e:
            rc.add_replica("site", set(), "pfn")

        assert "invalid lfn: {lfn}".format(lfn=set()) in str(e)

    def test_tojson(self, convert_yaml_schemas_to_json, load_schema):
        rc = ReplicaCatalog()
        rc.add_replica("site1", "lfn1", "pfn1")
        rc.add_replica("site2", "lfn2", "pfn2", True)
        rc.add_replica("site3",
                       "lfn3",
                       "pfn3",
                       checksum_type="sha256",
                       checksum_value="abc123")

        expected = {
            "pegasus":
            PEGASUS_VERSION,
            "replicas": [
                {
                    "lfn": "lfn1",
                    "pfn": "pfn1",
                    "site": "site1"
                },
                {
                    "lfn": "lfn2",
                    "pfn": "pfn2",
                    "site": "site2",
                    "regex": True
                },
                {
                    "lfn": "lfn3",
                    "pfn": "pfn3",
                    "site": "site3",
                    "checksum": {
                        "type": "sha256",
                        "value": "abc123"
                    },
                },
            ],
        }
        expected["replicas"] = sorted(expected["replicas"],
                                      key=lambda d: d["lfn"])

        result = json.loads(json.dumps(rc, cls=_CustomEncoder))
        result["replicas"] = sorted(result["replicas"], key=lambda d: d["lfn"])

        rc_schema = load_schema("rc-5.0.json")
        validate(instance=result, schema=rc_schema)

        assert result == expected

    @pytest.mark.parametrize("_format, loader", [("json", json.load),
                                                 ("yml", yaml.safe_load)])
    def test_write(self, _format, loader):
        rc = ReplicaCatalog()
        rc.add_replica("site1", "lfn1", "pfn1",
                       True).add_replica("site2", "lfn2", "pfn2", True)

        expected = {
            "pegasus":
            PEGASUS_VERSION,
            "replicas": [
                {
                    "lfn": "lfn1",
                    "pfn": "pfn1",
                    "site": "site1",
                    "regex": True
                },
                {
                    "lfn": "lfn2",
                    "pfn": "pfn2",
                    "site": "site2",
                    "regex": True
                },
            ],
        }
        expected["replicas"] = sorted(expected["replicas"],
                                      key=lambda d: d["lfn"])

        with NamedTemporaryFile(mode="r+") as f:
            rc.write(f, _format=_format)
            f.seek(0)
            result = loader(f)

        result["replicas"] = sorted(result["replicas"], key=lambda d: d["lfn"])

        assert result == expected

    def test_write_default(self):
        expected_file = Path("replicas.yml")
        ReplicaCatalog().write()

        try:
            expected_file.unlink()
        except FileNotFoundError:
            pytest.fail("could not find {}".format(expected_file))

    def test_replica_catalog_ordering_on_yml_write(self):
        ReplicaCatalog().write()

        EXPECTED_FILE = Path("replicas.yml")

        with EXPECTED_FILE.open() as f:
            # reading in as str so ordering of keys is not disrupted
            # when loaded into a dict
            result = f.read()

        EXPECTED_FILE.unlink()
        """
        Check that rc keys have been ordered as follows:
        - pegasus
        - replicas
        """
        p = re.compile(r"pegasus: '5.0'[\w\W]+replicas:[\w\W]+")
        assert p.match(result) is not None
Пример #24
0
class TestWorkflow:
    @pytest.mark.parametrize(
        "job",
        [
            (Job("t1", _id="job")),
            (SubWorkflow(File("f1"), False, _id="job")),
            (SubWorkflow("f1", True, _id="job")),
        ],
    )
    def test_add_job(self, job):
        wf = Workflow("wf")
        wf.add_jobs(job)

        assert job == wf.get_job("job")

    def test_add_duplicate_job(self):
        wf = Workflow("wf")
        with pytest.raises(DuplicateError):
            wf.add_jobs(Job("t1", _id="j1"), Job("t2", _id="j1"))

    def test_get_job(self):
        wf = Workflow("wf")
        j1 = Job("t1", _id="j1")
        wf.add_jobs(j1)

        assert j1 == wf.get_job("j1")

    def test_get_invalid_job(self):
        wf = Workflow("wf")
        with pytest.raises(NotFoundError):
            wf.get_job("abc123")

    def test_job_id_assignment_by_workflow(self):
        wf = Workflow("wf")
        j1 = Job("t1", _id="a")
        j2 = Job("t2")
        j3 = Job("t3", _id="b")
        j4 = Job("t4")
        j5 = Job("t5")
        wf.add_jobs(j1, j2, j3, j4, j5)

        assert j2._id == "ID0000001"
        assert j4._id == "ID0000002"
        assert j5._id == "ID0000003"

    def test_add_site_catalog(self):
        sc = SiteCatalog()
        wf = Workflow("wf")

        try:
            wf.add_site_catalog(sc)
        except:
            pytest.fail("should not have raised exception")

    def test_add_invalid_site_catalog(self):
        wf = Workflow("wf")
        with pytest.raises(TypeError) as e:
            wf.add_site_catalog(123)

        assert "invalid catalog: 123" in str(e)

    def test_add_duplicate_site_catalog(self):
        sc = SiteCatalog()
        wf = Workflow("wf")
        wf.add_site_catalog(sc)

        with pytest.raises(DuplicateError) as e:
            wf.add_site_catalog(sc)

        assert "a SiteCatalog has already" in str(e)

    def test_add_replica_catalog(self):
        rc = ReplicaCatalog()
        wf = Workflow("wf")

        try:
            wf.add_replica_catalog(rc)
        except:
            pytest.fail("should not have raised exception")

    def test_add_invalid_replica_catalog(self):
        wf = Workflow("wf")
        with pytest.raises(TypeError) as e:
            wf.add_replica_catalog(123)

        assert "invalid catalog: 123" in str(e)

    def test_add_duplicate_replica_catalog(self):
        rc = ReplicaCatalog()
        wf = Workflow("wf")
        wf.add_replica_catalog(rc)

        with pytest.raises(DuplicateError) as e:
            wf.add_replica_catalog(rc)

        assert "a ReplicaCatalog has already" in str(e)

    def test_add_transformation_catalog(self):
        tc = TransformationCatalog()
        wf = Workflow("wf")

        try:
            wf.add_transformation_catalog(tc)
        except:
            pytest.fail("should not have raised exception")

    def test_add_invalid_transformation_catalog(self):
        wf = Workflow("wf")
        with pytest.raises(TypeError) as e:
            wf.add_transformation_catalog(123)

        assert "invalid catalog: 123" in str(e)

    def test_add_duplicate_transformation_catalog(self):
        tc = TransformationCatalog()
        wf = Workflow("wf")
        wf.add_transformation_catalog(tc)

        with pytest.raises(DuplicateError) as e:
            wf.add_transformation_catalog(tc)

        assert "a TransformationCatalog has already" in str(e)

    def test_add_dependency_parents(self):
        wf = Workflow("wf")
        job = Job("t", _id="job")
        parents = [
            Job("t", _id="parent1"),
            Job("t", _id="parent2"),
            Job("t", _id="parent3"),
        ]

        wf.add_jobs(job, *parents)

        wf.add_dependency(job, parents=[parents[0]])
        wf.add_dependency(job, parents=parents[1:])

        for parent in parents:
            assert wf.dependencies[parent._id] == _JobDependency(
                parent._id, {job._id})

    def test_add_dependency_children(self):
        wf = Workflow("wf")
        job = Job("t", _id="job")
        children = [
            Job("t", _id="child1"),
            Job("t", _id="child2"),
            Job("t", _id="child3"),
        ]

        wf.add_jobs(job, *children)

        wf.add_dependency(job, children=[children[0]])
        assert wf.dependencies[job._id] == _JobDependency(
            job._id, {children[0]._id})

        wf.add_dependency(job, children=children[1:])
        assert wf.dependencies[job._id] == _JobDependency(
            job._id, {child._id
                      for child in children})

    def test_add_dependency_parents_and_children(self):
        wf = Workflow("wf")
        job = Job("t", _id="job")
        parents = [Job("t", _id="parent1"), Job("t", _id="parent2")]

        children = [Job("t", _id="child1"), Job("t", _id="child2")]

        wf.add_jobs(*parents, *children)

        # add nothing
        wf.add_dependency(job)
        assert len(wf.dependencies) == 0

        wf.add_dependency(job, parents=parents, children=children)

        for parent in parents:
            assert wf.dependencies[parent._id] == _JobDependency(
                parent._id, {job._id})

        assert wf.dependencies[job._id] == _JobDependency(
            job._id, {child._id
                      for child in children})

    def test_add_duplicate_parent_dependency(self):
        wf = Workflow("wf")
        job = Job("t", _id="job")
        parent = Job("t", _id="parent")

        wf.add_jobs(job, parent)

        with pytest.raises(DuplicateError) as e:
            wf.add_dependency(job, parents=[parent, parent])

        assert (
            "A dependency already exists between parent id: parent and job id: job"
            in str(e))

    def test_add_duplicate_child_dependency(self):
        wf = Workflow("wf")
        job = Job("t", _id="job")
        child = Job("t", _id="child")

        wf.add_jobs(job, child)

        with pytest.raises(DuplicateError) as e:
            wf.add_dependency(job, children=[child, child])

        assert (
            "A dependency already exists between job id: job and child id: child"
            in str(e))

    def test_add_dependency_invalid_job(self):
        wf = Workflow("wf")
        job = Job("t")

        with pytest.raises(ValueError) as e:
            wf.add_dependency(job)

        assert "The given job does not have an id" in str(e)

    def test_add_dependency_invalid_parent(self):
        wf = Workflow("wf")
        job = Job("t", _id="job")
        parent = Job("t")

        with pytest.raises(ValueError) as e:
            wf.add_dependency(job, parents=[parent])

        assert "One of the given parents does not have an id" in str(e)

    def test_add_dependency_invalid_child(self):
        wf = Workflow("wf")
        job = Job("t", _id="job")
        child = Job("t")

        with pytest.raises(ValueError) as e:
            wf.add_dependency(job, children=[child])

        assert "One of the given children does not have an id" in str(e)

    def test_infer_dependencies_fork_join_wf(self):
        wf = Workflow("wf")

        f1 = File("f1")
        f2 = File("f2")
        f3 = File("f3")
        f4 = File("f4")

        fork = Job("t1", _id="fork").add_outputs(f1, f2)
        work1 = Job("t1", _id="work1").add_inputs(f1).add_outputs(f3)
        work2 = Job("t1", _id="work2").add_inputs(f2).add_outputs(f4)
        join = Job("t1", _id="join").add_inputs(f3, f4)
        wf.add_jobs(fork, work1, work2, join)

        # manually call _infer_dependencies() as it is only called when
        # wf.write() is called
        wf._infer_dependencies()

        assert wf.dependencies["fork"] == _JobDependency(
            "fork", {"work1", "work2"})
        assert wf.dependencies["work1"] == _JobDependency("work1", {"join"})
        assert wf.dependencies["work2"] == _JobDependency("work2", {"join"})

    def test_infer_dependencies_when_job_uses_stdin_stdout_and_stderr(self):
        wf = Workflow("wf")
        j1 = Job("t1", _id="j1").add_outputs(File("f1"))
        j2 = Job("t1",
                 _id="j2").set_stdin(*j1.get_outputs()).set_stdout(File("f2"))
        j3 = Job("t1", _id="j3").add_inputs(*j2.get_outputs())
        wf.add_jobs(j1, j2, j3)

        # manually call _infer_dependencies() as it is only called when
        # wf.write() is called
        wf._infer_dependencies()

        assert wf.dependencies["j1"] == _JobDependency("j1", {"j2"})
        assert wf.dependencies["j2"] == _JobDependency("j2", {"j3"})

    def test_tojson(self, convert_yaml_schemas_to_json, load_schema, wf,
                    expected_json):
        result = json.loads(json.dumps(wf, cls=_CustomEncoder))

        workflow_schema = load_schema("wf-5.0.json")
        validate(instance=result, schema=workflow_schema)

        result["jobs"] = sorted(result["jobs"], key=lambda j: j["id"])
        result["jobs"][0]["uses"] = sorted(result["jobs"][0]["uses"],
                                           key=lambda u: u["lfn"])
        result["jobs"][1]["uses"] = sorted(result["jobs"][1]["uses"],
                                           key=lambda u: u["lfn"])

        assert result == expected_json

    @pytest.mark.parametrize("_format, loader", [("json", json.load),
                                                 ("yml", yaml.safe_load)])
    def test_write_file_obj(
        self,
        convert_yaml_schemas_to_json,
        load_schema,
        wf,
        expected_json,
        _format,
        loader,
    ):
        with NamedTemporaryFile("r+") as f:
            wf.write(f, _format=_format)

            # _path should be set by the call to write
            assert wf._path == f.name

            f.seek(0)
            result = loader(f)

        workflow_schema = load_schema("wf-5.0.json")
        validate(instance=result, schema=workflow_schema)

        result["jobs"] = sorted(result["jobs"], key=lambda j: j["id"])
        result["jobs"][0]["uses"] = sorted(result["jobs"][0]["uses"],
                                           key=lambda u: u["lfn"])
        result["jobs"][1]["uses"] = sorted(result["jobs"][1]["uses"],
                                           key=lambda u: u["lfn"])

        assert result == expected_json

    def test_write_str_filename(self, wf, load_schema, expected_json):
        path = "wf.yml"
        wf.write(path)

        # _path should be set by the call to write
        assert wf._path == path

        with open(path) as f:
            result = yaml.safe_load(f)

        workflow_schema = load_schema("wf-5.0.json")
        validate(instance=result, schema=workflow_schema)

        result["jobs"] = sorted(result["jobs"], key=lambda j: j["id"])
        result["jobs"][0]["uses"] = sorted(result["jobs"][0]["uses"],
                                           key=lambda u: u["lfn"])
        result["jobs"][1]["uses"] = sorted(result["jobs"][1]["uses"],
                                           key=lambda u: u["lfn"])

        assert result == expected_json

        os.remove(path)

    def test_write_default_filename(self, wf, expected_json):
        wf.write()
        EXPECTED_FILE = "workflow.yml"

        with open(EXPECTED_FILE) as f:
            result = yaml.safe_load(f)

        result["jobs"] = sorted(result["jobs"], key=lambda j: j["id"])

        for i in range(len(result["jobs"])):
            result["jobs"][i]["uses"] = sorted(result["jobs"][i]["uses"],
                                               key=lambda u: u["lfn"])

        assert result == expected_json

        os.remove(EXPECTED_FILE)

    def test_write_wf_catalogs_included(self):
        wf = Workflow("test")
        wf.add_jobs(Job("ls"))

        wf.add_transformation_catalog(TransformationCatalog())
        wf.add_site_catalog(SiteCatalog())
        wf.add_replica_catalog(ReplicaCatalog())

        wf_path = Path("workflow.yml")
        with wf_path.open("w+") as f:
            wf.write(f)
            f.seek(0)
            result = yaml.load(f)

        expected = {
            "pegasus":
            "5.0",
            "name":
            "test",
            "siteCatalog": {
                "sites": []
            },
            "replicaCatalog": {
                "replicas": []
            },
            "transformationCatalog": {
                "transformations": []
            },
            "jobs": [{
                "type": "job",
                "name": "ls",
                "id": "ID0000001",
                "arguments": [],
                "uses": [],
            }],
            "jobDependencies": [],
        }

        assert expected == result

        wf_path.unlink()

    def test_write_valid_hierarchical_workflow(self, mocker):
        mocker.patch("Pegasus.api.workflow.Workflow.write")

        try:
            wf = Workflow("test")
            wf.add_jobs(SubWorkflow("file", False))
            wf.write(file="workflow.yml", _format="yml")
        except PegasusError:
            pytest.fail("shouldn't have thrown PegasusError")

        Pegasus.api.workflow.Workflow.write.assert_called_once_with(
            file="workflow.yml", _format="yml")

    @pytest.mark.parametrize(
        "sc, tc",
        [
            (SiteCatalog(), None),
            (None, TransformationCatalog()),
            (SiteCatalog(), TransformationCatalog()),
        ],
    )
    def test_write_hierarchical_workflow_when_catalogs_are_inlined(
            self, sc, tc):
        wf = Workflow("test")
        wf.add_jobs(SubWorkflow("file", False))

        if sc:
            wf.add_site_catalog(sc)

        if tc:
            wf.add_transformation_catalog(tc)

        with pytest.raises(PegasusError) as e:
            wf.write()

        assert (
            "Site Catalog and Transformation Catalog must be written as a separate"
            in str(e))

    def test_workflow_key_ordering_on_yml_write(self):
        tc = TransformationCatalog()
        rc = ReplicaCatalog()
        sc = SiteCatalog()

        wf = Workflow("wf")
        wf.add_transformation_catalog(tc)
        wf.add_replica_catalog(rc)
        wf.add_site_catalog(sc)

        wf.add_jobs(Job("t1", _id="a"))

        wf.add_env(JAVA_HOME="/java/home")
        wf.add_shell_hook(EventType.START, "/bin/echo hi")
        wf.add_metadata(key="value")

        wf.write()
        EXPECTED_FILE = Path("workflow.yml")

        with EXPECTED_FILE.open() as f:
            # reading in as str so ordering of keys is not disrupted
            # when loaded into a dict
            result = f.read()

        EXPECTED_FILE.unlink()
        """
        Check that wf keys have been ordered as follows (while ignoring nested keys):
        - pegasus,
        - name,
        - hooks,
        - profiles,
        - metadata,
        - siteCatalog,
        - replicaCatalog,
        - transformationCatalog,
        - jobs
        - jobDependencies
        """
        p = re.compile(
            r"pegasus: '5.0'[\w\W]+name:[\w\W]+hooks:[\w\W]+profiles:[\w\W]+metadata:[\w\W]+siteCatalog:[\w\W]+replicaCatalog:[\w\W]+transformationCatalog:[\w\W]+jobs:[\w\W]+jobDependencies:[\w\W]+"
        )
        assert p.match(result) is not None

    def test_plan_workflow_already_written(self, wf, mocker):
        mocker.patch("shutil.which", return_value="/usr/bin/pegasus-version")
        mocker.patch("Pegasus.client._client.Client.plan")

        path = "wf.yml"
        wf.write(path).plan()

        assert wf._path == path

        Pegasus.client._client.Client.plan.assert_called_once_with(
            path,
            cleanup="none",
            conf=None,
            dir=None,
            force=False,
            input_dirs=None,
            output_dir=None,
            output_sites=["local"],
            relative_dir=None,
            sites=None,
            staging_sites=None,
            submit=False,
            verbose=0,
        )

        os.remove(path)

    def test_plan_workflow_not_written(self, wf, mocker):
        mocker.patch("shutil.which", return_value="/usr/bin/pegasus-version")
        mocker.patch("Pegasus.client._client.Client.plan")

        DEFAULT_WF_PATH = "workflow.yml"
        wf.plan()

        assert wf._path == DEFAULT_WF_PATH

        Pegasus.client._client.Client.plan.assert_called_once_with(
            DEFAULT_WF_PATH,
            cleanup="none",
            conf=None,
            dir=None,
            force=False,
            input_dirs=None,
            output_dir=None,
            output_sites=["local"],
            relative_dir=None,
            sites=None,
            staging_sites=None,
            submit=False,
            verbose=0,
        )

        os.remove(DEFAULT_WF_PATH)

    def test_run(self, wf, mocker):
        mocker.patch("Pegasus.client._client.Client.run")
        mocker.patch("shutil.which", return_value="/usr/bin/pegasus-version")

        wf.run()

        Pegasus.client._client.Client.run.assert_called_once_with(None,
                                                                  verbose=0)

    def test_status(self, wf, mocker):
        mocker.patch("Pegasus.client._client.Client.status")
        mocker.patch("shutil.which", return_value="/usr/bin/pegasus-version")

        wf._submit_dir = "submit_dir"
        wf.status()

        Pegasus.client._client.Client.status.assert_called_once_with(
            wf._submit_dir, long=0, verbose=0)

    def test_remove(self, wf, mocker):
        mocker.patch("Pegasus.client._client.Client.remove")
        mocker.patch("shutil.which", return_value="/usr/bin/pegasus-version")

        wf._submit_dir = "submit_dir"
        wf.remove()

        Pegasus.client._client.Client.remove.assert_called_once_with(
            wf._submit_dir, verbose=0)

    def test_analyze(self, wf, mocker):
        mocker.patch("Pegasus.client._client.Client.analyzer")
        mocker.patch("shutil.which", return_value="/usr/bin/pegasus-version")

        wf._submit_dir = "submit_dir"
        wf.analyze()

        Pegasus.client._client.Client.analyzer.assert_called_once_with(
            wf._submit_dir, verbose=0)

    def test_statistics(self, wf, mocker):
        mocker.patch("Pegasus.client._client.Client.statistics")
        mocker.patch("shutil.which", return_value="/usr/bin/pegasus-version")

        wf._submit_dir = "submit_dir"
        wf.statistics()

        Pegasus.client._client.Client.statistics.assert_called_once_with(
            wf._submit_dir, verbose=0)
Пример #25
0
    def test_eq_invalid(self):
        with pytest.raises(ValueError) as e:
            _Use(File("a"), _LinkType.INPUT) == "use"

        assert "_Use cannot be compared with" in str(e)
Пример #26
0
    def test_tojson_no_mixins(self):
        j = Job("t1",
                namespace="ns",
                node_label="label",
                _id="id",
                version="1")
        j.set_stdin("stdin")
        j.set_stdout("stdout")
        j.set_stderr("stderr")
        j.add_args("-i", File("f1"), "-n", 1, 1.1)
        j.add_inputs(File("if1"), File("if2"))
        j.add_outputs(File("of1"), File("of2"))

        result = json.loads(json.dumps(j, cls=_CustomEncoder))
        result["uses"] = sorted(result["uses"], key=lambda use: use["lfn"])

        expected = {
            "type":
            "job",
            "name":
            "t1",
            "namespace":
            "ns",
            "id":
            "id",
            "nodeLabel":
            "label",
            "version":
            "1",
            "arguments": ["-i", "f1", "-n", 1, 1.1],
            "stdin":
            "stdin",
            "stdout":
            "stdout",
            "stderr":
            "stderr",
            "uses": [
                {
                    "lfn": "stdin",
                    "type": "input"
                },
                {
                    "lfn": "if1",
                    "type": "input"
                },
                {
                    "lfn": "if2",
                    "type": "input"
                },
                {
                    "lfn": "stdout",
                    "type": "output",
                    "stageOut": True,
                    "registerReplica": False,
                },
                {
                    "lfn": "stderr",
                    "type": "output",
                    "stageOut": True,
                    "registerReplica": False,
                },
                {
                    "lfn": "of1",
                    "type": "output",
                    "stageOut": True,
                    "registerReplica": False,
                },
                {
                    "lfn": "of2",
                    "type": "output",
                    "stageOut": True,
                    "registerReplica": False,
                },
            ],
        }

        expected["uses"] = sorted(expected["uses"], key=lambda use: use["lfn"])

        assert result == expected
Пример #27
0
 def test_tojson_no_metadata(self, lfn, size, expected):
     assert File(lfn, size).__json__() == expected
Пример #28
0
class Test_Use:
    def test_valid_use(self):
        assert _Use(File("a"), _LinkType.INPUT)

    def test_invalid_use_bad_file(self):
        with pytest.raises(TypeError) as e:
            _Use(123, _LinkType.INPUT)

        assert "invalid file: 123; file must be of type File" in str(e)

    def test_invalid_use_bad_link_type(self):
        with pytest.raises(TypeError) as e:
            _Use(File("a"), "link")

        assert "invalid link_type: link;" in str(e)

    def test_eq(self):
        assert _Use(File("a"),
                    _LinkType.INPUT) == _Use(File("a"), _LinkType.OUTPUT)
        assert _Use(File("a"), _LinkType.INPUT) != _Use(
            File("b"), _LinkType.INPUT)

    def test_eq_invalid(self):
        with pytest.raises(ValueError) as e:
            _Use(File("a"), _LinkType.INPUT) == "use"

        assert "_Use cannot be compared with" in str(e)

    @pytest.mark.parametrize(
        "use, expected",
        [
            (
                _Use(File("a"),
                     _LinkType.INPUT,
                     stage_out=None,
                     register_replica=False),
                {
                    "lfn": "a",
                    "type": "input",
                    "registerReplica": False,
                },
            ),
            (
                _Use(File("a"),
                     _LinkType.INPUT,
                     stage_out=None,
                     register_replica=None),
                {
                    "lfn": "a",
                    "type": "input",
                },
            ),
            (
                _Use(
                    File("a", size=2048).add_metadata(createdBy="ryan"),
                    _LinkType.OUTPUT,
                    stage_out=False,
                    register_replica=True,
                ),
                {
                    "lfn": "a",
                    "size": 2048,
                    "metadata": {
                        "createdBy": "ryan"
                    },
                    "type": "output",
                    "stageOut": False,
                    "registerReplica": True,
                },
            ),
            (
                _Use(
                    File("a", size=1024),
                    _LinkType.CHECKPOINT,
                    stage_out=True,
                    register_replica=True,
                ),
                {
                    "lfn": "a",
                    "size": 1024,
                    "type": "checkpoint",
                    "stageOut": True,
                    "registerReplica": True,
                },
            ),
        ],
    )
    def test_tojson(self, use, expected):
        result = json.loads(json.dumps(use, cls=_CustomEncoder))
        assert result == expected
Пример #29
0
 def test_eq(self):
     assert File("a") == File("a")
     assert File("a") != File("b")
     assert File("a") != 1
Пример #30
0
    def test_invalid_use_bad_link_type(self):
        with pytest.raises(TypeError) as e:
            _Use(File("a"), "link")

        assert "invalid link_type: link;" in str(e)