示例#1
0
def dump(obj: SiteCatalog, fp: TextIO, _format="yml", *args, **kwargs) -> None:
    """
    Serialize ``obj`` as a :py:class:`~Pegasus.api.site_catalog.SiteCatalog` formatted stream to ``fp`` (a ``.write()``-supporting file-like object).

    :param obj: SiteCatalog to serialize
    :type obj: SiteCatalog
    :param fp: file like object to serialize to
    :type fp: TextIO
    :param _format: format to write to if fp does not have an extension; can be one of ["yml" | "yaml" | "json"], defaults to "yml"
    :type _format: str
    :rtype: NoReturn
    """
    obj.write(fp, _format=_format)
示例#2
0
def dumps(obj: SiteCatalog, _format="yml", *args, **kwargs) -> str:
    """
    Serialize ``obj`` to a :py:class:`~Pegasus.api.site_catalog.SiteCatalog` formatted ``str``.

    :param obj: SiteCatalog to serialize
    :type obj: SiteCatalog
    :param _format: format to write to if fp does not have an extension; can be one of ["yml" | "yaml" | "json"], defaults to "yml"
    :type _format: str
    :return: SiteCatalog serialized as a string
    :rtype: str
    """
    with StringIO() as s:
        obj.write(s, _format=_format)
        s.seek(0)
        return s.read()
示例#3
0
def sc1():
    return SiteCatalog().add_sites(
        Site(
            "local",
            arch=Arch.X86_64,
            os_type=OS.LINUX,
            os_release="1",
            os_version="1",
        ).add_directories(
            Directory(Directory.LOCAL_SCRATCH, "/path").add_file_servers(
                FileServer("url", Operation.ALL).add_dagman_profile(
                    retry=1))).add_dagman_profile(retry=1).add_grids(
                        Grid(
                            Grid.CONDOR,
                            "contact",
                            Scheduler.CONDOR,
                            job_type=SupportedJobs.REGISTER,
                            free_mem=1,
                            total_mem=1,
                            max_count=1,
                            max_cpu_time=1,
                            running_jobs=1,
                            jobs_in_queue=1,
                            idle_nodes=1,
                            total_nodes=1,
                        )))
示例#4
0
    def test_write_default(self):
        expected_file = Path("sites.yml")
        SiteCatalog().write()

        try:
            expected_file.unlink()
        except FileNotFoundError:
            pytest.fail("could not find {}".format(expected_file))
示例#5
0
    def test_add_site_catalog(self):
        sc = SiteCatalog()
        wf = Workflow("wf")

        try:
            wf.add_site_catalog(sc)
        except:
            pytest.fail("should not have raised exception")
示例#6
0
def wf3():
    wf = Workflow("test")
    wf.add_jobs(Job("ls"))
    wf.add_site_catalog(SiteCatalog())
    wf.add_transformation_catalog(TransformationCatalog())
    wf.add_replica_catalog(ReplicaCatalog())

    return wf
示例#7
0
    def test_add_duplicate_site_catalog(self):
        sc = SiteCatalog()
        wf = Workflow("wf")
        wf.add_site_catalog(sc)

        with pytest.raises(DuplicateError) as e:
            wf.add_site_catalog(sc)

        assert "a SiteCatalog has already" in str(e)
示例#8
0
def sc2():
    return SiteCatalog().add_sites(
        Site("local",)
        .add_directories(
            Directory(Directory.LOCAL_SCRATCH, "/path").add_file_servers(
                FileServer("url", Operation.ALL)
            )
        )
        .add_grids(Grid(Grid.CONDOR, "contact", Scheduler.CONDOR,))
    )
示例#9
0
    def test_site_catalog_key_ordering_on_yml_write(self):
        SiteCatalog().write()

        EXPECTED_FILE = Path("sites.yml")

        with EXPECTED_FILE.open() as f:
            # reading in as str so ordering of keys is not disrupted
            # when loaded into a dict
            result = f.read()

        EXPECTED_FILE.unlink()
        """
        Check that sc keys have been ordered as follows:
        - pegasus
        - sites
        """
        p = re.compile(r"x-pegasus:[\w\W]+pegasus: '5.0'[\w\W]+sites:[\w\W]")
        assert p.match(result) is not None
示例#10
0
    def test_workflow_key_ordering_on_yml_write(self):
        tc = TransformationCatalog()
        rc = ReplicaCatalog()
        sc = SiteCatalog()

        wf = Workflow("wf")
        wf.add_transformation_catalog(tc)
        wf.add_replica_catalog(rc)
        wf.add_site_catalog(sc)

        wf.add_jobs(Job("t1", _id="a"))

        wf.add_env(JAVA_HOME="/java/home")
        wf.add_shell_hook(EventType.START, "/bin/echo hi")
        wf.add_metadata(key="value")

        wf.write()
        EXPECTED_FILE = Path("workflow.yml")

        with EXPECTED_FILE.open() as f:
            # reading in as str so ordering of keys is not disrupted
            # when loaded into a dict
            result = f.read()

        EXPECTED_FILE.unlink()
        """
        Check that wf keys have been ordered as follows (while ignoring nested keys):
        - pegasus,
        - name,
        - hooks,
        - profiles,
        - metadata,
        - siteCatalog,
        - replicaCatalog,
        - transformationCatalog,
        - jobs
        - jobDependencies
        """
        p = re.compile(
            r"pegasus: '5.0'[\w\W]+name:[\w\W]+hooks:[\w\W]+profiles:[\w\W]+metadata:[\w\W]+siteCatalog:[\w\W]+replicaCatalog:[\w\W]+transformationCatalog:[\w\W]+jobs:[\w\W]+jobDependencies:[\w\W]+"
        )
        assert p.match(result) is not None
示例#11
0
def sc1():
    return SiteCatalog().add_sites(
        Site(
            "local", arch=Arch.X86_64, os_type=OS.LINUX, os_release="1", os_version="1",
        )
        .add_directories(
            Directory(Directory.LOCAL_SCRATCH, "/path").add_file_servers(
                FileServer("url", Operation.ALL).add_dagman_profile(retry=1)
            )
        )
        .add_dagman_profile(retry=1)
        .add_grids(
            Grid(
                Grid.CONDOR,
                "contact",
                Scheduler.CONDOR,
                job_type=SupportedJobs.REGISTER,
            )
        )
    )
示例#12
0
    def test_write_wf_catalogs_included(self):
        wf = Workflow("test")
        wf.add_jobs(Job("ls"))

        wf.add_transformation_catalog(TransformationCatalog())
        wf.add_site_catalog(SiteCatalog())
        wf.add_replica_catalog(ReplicaCatalog())

        wf_path = Path("workflow.yml")
        with wf_path.open("w+") as f:
            wf.write(f)
            f.seek(0)
            result = yaml.load(f)

        expected = {
            "pegasus":
            "5.0",
            "name":
            "test",
            "siteCatalog": {
                "sites": []
            },
            "replicaCatalog": {
                "replicas": []
            },
            "transformationCatalog": {
                "transformations": []
            },
            "jobs": [{
                "type": "job",
                "name": "ls",
                "id": "ID0000001",
                "arguments": [],
                "uses": [],
            }],
            "jobDependencies": [],
        }

        assert expected == result

        wf_path.unlink()
示例#13
0
def _to_sc(d: dict) -> SiteCatalog:
    """Convert dict to SiteCatalog

    :param d: SiteCatalog represented as a dict
    :type d: dict
    :raises PegasusError: encountered error parsing
    :return: a SiteCatalog object based on d
    :rtype: SiteCatalog
    """

    try:
        sc = SiteCatalog()

        for s in d["sites"]:
            site = Site(
                s["name"],
                arch=getattr(Arch,
                             s.get("arch").upper()) if s.get("arch") else None,
                os_type=getattr(OS,
                                s.get("os.type").upper())
                if s.get("os.type") else None,
                os_release=s.get("os.release"),
                os_version=s.get("os.version"),
            )

            # add directories
            for _dir in s["directories"]:

                dir_type = None
                for enum_name, enum in _DirectoryType.__members__.items():
                    if _dir["type"] == enum.value:
                        dir_type = enum_name
                        break

                directory = Directory(getattr(Directory, dir_type),
                                      _dir["path"])

                # add file servers
                for fs in _dir["fileServers"]:
                    file_server = FileServer(
                        fs["url"], getattr(Operation, fs["operation"].upper()))

                    # add profiles
                    if fs.get("profiles"):
                        file_server.profiles = defaultdict(
                            dict, fs.get("profiles"))

                    # add file server to this directory
                    directory.add_file_servers(file_server)

                # add directory to this site
                site.add_directories(directory)

            # add grids
            if s.get("grids"):
                for gr in s.get("grids"):
                    grid = Grid(
                        getattr(Grid, gr["type"].upper()),
                        gr["contact"],
                        getattr(Scheduler, gr["scheduler"].upper()),
                        job_type=getattr(SupportedJobs,
                                         gr.get("jobtype").upper())
                        if gr.get("jobtype") else None,
                    )

                    # add grid to this site
                    site.add_grids(grid)

            # add profiles
            if s.get("profiles"):
                site.profiles = defaultdict(dict, s.get("profiles"))

            # add site to sc
            sc.add_sites(site)

        return sc

    except KeyError:
        raise PegasusError("error parsing {}".format(d))
示例#14
0
 def test_add_duplicate_site(self):
     sc = SiteCatalog()
     sc.add_sites(Site("local"))
     with pytest.raises(DuplicateError):
         sc.add_sites(Site("local"))
示例#15
0
    def test_chaining(self):
        sc = SiteCatalog()
        a = sc.add_sites(Site("local"))
        b = sc.add_sites(Site("condor_pool"))

        assert id(a) == id(b)
示例#16
0
    def test_tojson(self, convert_yaml_schemas_to_json, load_schema,
                    expected_json):
        sc = SiteCatalog().add_sites(
            Site("local", arch=Arch.X86_64, os_type=OS.LINUX).add_directories(
                Directory(Directory.SHARED_SCRATCH,
                          "/tmp/workflows/scratch").add_file_servers(
                              FileServer("file:///tmp/workflows/scratch",
                                         Operation.ALL)),
                Directory(Directory.LOCAL_STORAGE,
                          "/tmp/workflows/outputs").add_file_servers(
                              FileServer("file:///tmp/workflows/outputs",
                                         Operation.ALL)),
            ),
            Site("condor_pool", arch=Arch.X86_64,
                 os_type=OS.LINUX).add_directories(
                     Directory(
                         Directory.SHARED_SCRATCH, "/lustre").add_file_servers(
                             FileServer(
                                 "gsiftp://smarty.isi.edu/lustre",
                                 Operation.ALL))).add_grids(
                                     Grid(
                                         Grid.GT5,
                                         "smarty.isi.edu/jobmanager-pbs",
                                         Scheduler.PBS,
                                         job_type=SupportedJobs.AUXILLARY,
                                     ),
                                     Grid(
                                         Grid.GT5,
                                         "smarty.isi.edu/jobmanager-pbs",
                                         Scheduler.PBS,
                                         job_type=SupportedJobs.COMPUTE,
                                     ),
                                 ).add_env(JAVA_HOME="/usr/bin/java"),
            Site("staging_site", arch=Arch.X86_64,
                 os_type=OS.LINUX).add_directories(
                     Directory(
                         Directory.SHARED_SCRATCH, "/data").add_file_servers(
                             FileServer("scp://obelix.isi.edu/data",
                                        Operation.PUT)).add_file_servers(
                                            FileServer(
                                                "http://obelix.isi.edu/data",
                                                Operation.GET))),
        )

        result = json.loads(json.dumps(sc, cls=_CustomEncoder))

        sc_schema = load_schema("sc-5.0.json")
        validate(instance=result, schema=sc_schema)

        result["sites"].sort(key=lambda s: s["name"])
        for i in range(len(result["sites"])):
            result["sites"][i]["directories"].sort(key=lambda d: d["path"])

            for j in range(len(result["sites"][i]["directories"])):
                result["sites"][i]["directories"][j]["fileServers"].sort(
                    key=lambda fs: fs["url"])

            if "grids" in result["sites"][i]:
                result["sites"][i]["grids"].sort(key=lambda g: g["jobtype"])

        assert result == expected_json
示例#17
0
    def test_write(self, expected_json, _format, loader):
        sc = (SiteCatalog().add_sites(
            Site("local", arch=Arch.X86_64, os_type=OS.LINUX).add_directories(
                Directory(Directory.SHARED_SCRATCH,
                          "/tmp/workflows/scratch").add_file_servers(
                              FileServer("file:///tmp/workflows/scratch",
                                         Operation.ALL)),
                Directory(Directory.LOCAL_STORAGE,
                          "/tmp/workflows/outputs").add_file_servers(
                              FileServer("file:///tmp/workflows/outputs",
                                         Operation.ALL)),
            )).add_sites(
                Site("condor_pool", arch=Arch.X86_64,
                     os_type=OS.LINUX).add_directories(
                         Directory(
                             Directory.SHARED_SCRATCH,
                             "/lustre").add_file_servers(
                                 FileServer(
                                     "gsiftp://smarty.isi.edu/lustre",
                                     Operation.ALL))).add_grids(
                                         Grid(
                                             Grid.GT5,
                                             "smarty.isi.edu/jobmanager-pbs",
                                             Scheduler.PBS,
                                             job_type=SupportedJobs.AUXILLARY,
                                         ),
                                         Grid(
                                             Grid.GT5,
                                             "smarty.isi.edu/jobmanager-pbs",
                                             Scheduler.PBS,
                                             job_type=SupportedJobs.COMPUTE,
                                         ),
                                     ).add_env(JAVA_HOME="/usr/bin/java"),
                Site("staging_site", arch=Arch.X86_64,
                     os_type=OS.LINUX).add_directories(
                         Directory(Directory.SHARED_SCRATCH,
                                   "/data").add_file_servers(
                                       FileServer("scp://obelix.isi.edu/data",
                                                  Operation.PUT),
                                       FileServer("http://obelix.isi.edu/data",
                                                  Operation.GET),
                                   )),
            ))

        with NamedTemporaryFile(mode="r+") as f:
            sc.write(f, _format=_format)
            f.seek(0)
            result = loader(f)

        result["sites"].sort(key=lambda s: s["name"])
        for i in range(len(result["sites"])):
            result["sites"][i]["directories"].sort(key=lambda d: d["path"])

            for j in range(len(result["sites"][i]["directories"])):
                result["sites"][i]["directories"][j]["fileServers"].sort(
                    key=lambda fs: fs["url"])

            if "grids" in result["sites"][i]:
                result["sites"][i]["grids"].sort(key=lambda g: g["jobtype"])

        assert "createdOn" in result["x-pegasus"]
        assert result["x-pegasus"]["createdBy"] == getpass.getuser()
        assert result["x-pegasus"]["apiLang"] == "python"
        del result["x-pegasus"]
        assert result == expected_json
示例#18
0
    def test_add_invalid_site(self):
        with pytest.raises(TypeError) as e:
            sc = SiteCatalog()
            sc.add_sites("badsite")

        assert "invalid site: badsite" in str(e)
示例#19
0
class TestWorkflow:
    @pytest.mark.parametrize(
        "job",
        [
            (Job("t1", _id="job")),
            (SubWorkflow(File("f1"), False, _id="job")),
            (SubWorkflow("f1", True, _id="job")),
        ],
    )
    def test_add_job(self, job):
        wf = Workflow("wf")
        wf.add_jobs(job)

        assert job == wf.get_job("job")

    def test_add_duplicate_job(self):
        wf = Workflow("wf")
        with pytest.raises(DuplicateError):
            wf.add_jobs(Job("t1", _id="j1"), Job("t2", _id="j1"))

    def test_get_job(self):
        wf = Workflow("wf")
        j1 = Job("t1", _id="j1")
        wf.add_jobs(j1)

        assert j1 == wf.get_job("j1")

    def test_get_invalid_job(self):
        wf = Workflow("wf")
        with pytest.raises(NotFoundError):
            wf.get_job("abc123")

    def test_job_id_assignment_by_workflow(self):
        wf = Workflow("wf")
        j1 = Job("t1", _id="a")
        j2 = Job("t2")
        j3 = Job("t3", _id="b")
        j4 = Job("t4")
        j5 = Job("t5")
        wf.add_jobs(j1, j2, j3, j4, j5)

        assert j2._id == "ID0000001"
        assert j4._id == "ID0000002"
        assert j5._id == "ID0000003"

    def test_add_site_catalog(self):
        sc = SiteCatalog()
        wf = Workflow("wf")

        try:
            wf.add_site_catalog(sc)
        except:
            pytest.fail("should not have raised exception")

    def test_add_invalid_site_catalog(self):
        wf = Workflow("wf")
        with pytest.raises(TypeError) as e:
            wf.add_site_catalog(123)

        assert "invalid catalog: 123" in str(e)

    def test_add_duplicate_site_catalog(self):
        sc = SiteCatalog()
        wf = Workflow("wf")
        wf.add_site_catalog(sc)

        with pytest.raises(DuplicateError) as e:
            wf.add_site_catalog(sc)

        assert "a SiteCatalog has already" in str(e)

    def test_add_replica_catalog(self):
        rc = ReplicaCatalog()
        wf = Workflow("wf")

        try:
            wf.add_replica_catalog(rc)
        except:
            pytest.fail("should not have raised exception")

    def test_add_invalid_replica_catalog(self):
        wf = Workflow("wf")
        with pytest.raises(TypeError) as e:
            wf.add_replica_catalog(123)

        assert "invalid catalog: 123" in str(e)

    def test_add_duplicate_replica_catalog(self):
        rc = ReplicaCatalog()
        wf = Workflow("wf")
        wf.add_replica_catalog(rc)

        with pytest.raises(DuplicateError) as e:
            wf.add_replica_catalog(rc)

        assert "a ReplicaCatalog has already" in str(e)

    def test_add_transformation_catalog(self):
        tc = TransformationCatalog()
        wf = Workflow("wf")

        try:
            wf.add_transformation_catalog(tc)
        except:
            pytest.fail("should not have raised exception")

    def test_add_invalid_transformation_catalog(self):
        wf = Workflow("wf")
        with pytest.raises(TypeError) as e:
            wf.add_transformation_catalog(123)

        assert "invalid catalog: 123" in str(e)

    def test_add_duplicate_transformation_catalog(self):
        tc = TransformationCatalog()
        wf = Workflow("wf")
        wf.add_transformation_catalog(tc)

        with pytest.raises(DuplicateError) as e:
            wf.add_transformation_catalog(tc)

        assert "a TransformationCatalog has already" in str(e)

    def test_add_dependency_parents(self):
        wf = Workflow("wf")
        job = Job("t", _id="job")
        parents = [
            Job("t", _id="parent1"),
            Job("t", _id="parent2"),
            Job("t", _id="parent3"),
        ]

        wf.add_jobs(job, *parents)

        wf.add_dependency(job, parents=[parents[0]])
        wf.add_dependency(job, parents=parents[1:])

        for parent in parents:
            assert wf.dependencies[parent._id] == _JobDependency(
                parent._id, {job._id})

    def test_add_dependency_children(self):
        wf = Workflow("wf")
        job = Job("t", _id="job")
        children = [
            Job("t", _id="child1"),
            Job("t", _id="child2"),
            Job("t", _id="child3"),
        ]

        wf.add_jobs(job, *children)

        wf.add_dependency(job, children=[children[0]])
        assert wf.dependencies[job._id] == _JobDependency(
            job._id, {children[0]._id})

        wf.add_dependency(job, children=children[1:])
        assert wf.dependencies[job._id] == _JobDependency(
            job._id, {child._id
                      for child in children})

    def test_add_dependency_parents_and_children(self):
        wf = Workflow("wf")
        job = Job("t", _id="job")
        parents = [Job("t", _id="parent1"), Job("t", _id="parent2")]

        children = [Job("t", _id="child1"), Job("t", _id="child2")]

        wf.add_jobs(*parents, *children)

        # add nothing
        wf.add_dependency(job)
        assert len(wf.dependencies) == 0

        wf.add_dependency(job, parents=parents, children=children)

        for parent in parents:
            assert wf.dependencies[parent._id] == _JobDependency(
                parent._id, {job._id})

        assert wf.dependencies[job._id] == _JobDependency(
            job._id, {child._id
                      for child in children})

    def test_add_duplicate_parent_dependency(self):
        wf = Workflow("wf")
        job = Job("t", _id="job")
        parent = Job("t", _id="parent")

        wf.add_jobs(job, parent)

        with pytest.raises(DuplicateError) as e:
            wf.add_dependency(job, parents=[parent, parent])

        assert (
            "A dependency already exists between parent id: parent and job id: job"
            in str(e))

    def test_add_duplicate_child_dependency(self):
        wf = Workflow("wf")
        job = Job("t", _id="job")
        child = Job("t", _id="child")

        wf.add_jobs(job, child)

        with pytest.raises(DuplicateError) as e:
            wf.add_dependency(job, children=[child, child])

        assert (
            "A dependency already exists between job id: job and child id: child"
            in str(e))

    def test_add_dependency_invalid_job(self):
        wf = Workflow("wf")
        job = Job("t")

        with pytest.raises(ValueError) as e:
            wf.add_dependency(job)

        assert "The given job does not have an id" in str(e)

    def test_add_dependency_invalid_parent(self):
        wf = Workflow("wf")
        job = Job("t", _id="job")
        parent = Job("t")

        with pytest.raises(ValueError) as e:
            wf.add_dependency(job, parents=[parent])

        assert "One of the given parents does not have an id" in str(e)

    def test_add_dependency_invalid_child(self):
        wf = Workflow("wf")
        job = Job("t", _id="job")
        child = Job("t")

        with pytest.raises(ValueError) as e:
            wf.add_dependency(job, children=[child])

        assert "One of the given children does not have an id" in str(e)

    def test_infer_dependencies_fork_join_wf(self):
        wf = Workflow("wf")

        f1 = File("f1")
        f2 = File("f2")
        f3 = File("f3")
        f4 = File("f4")

        fork = Job("t1", _id="fork").add_outputs(f1, f2)
        work1 = Job("t1", _id="work1").add_inputs(f1).add_outputs(f3)
        work2 = Job("t1", _id="work2").add_inputs(f2).add_outputs(f4)
        join = Job("t1", _id="join").add_inputs(f3, f4)
        wf.add_jobs(fork, work1, work2, join)

        # manually call _infer_dependencies() as it is only called when
        # wf.write() is called
        wf._infer_dependencies()

        assert wf.dependencies["fork"] == _JobDependency(
            "fork", {"work1", "work2"})
        assert wf.dependencies["work1"] == _JobDependency("work1", {"join"})
        assert wf.dependencies["work2"] == _JobDependency("work2", {"join"})

    def test_infer_dependencies_when_job_uses_stdin_stdout_and_stderr(self):
        wf = Workflow("wf")
        j1 = Job("t1", _id="j1").add_outputs(File("f1"))
        j2 = Job("t1",
                 _id="j2").set_stdin(*j1.get_outputs()).set_stdout(File("f2"))
        j3 = Job("t1", _id="j3").add_inputs(*j2.get_outputs())
        wf.add_jobs(j1, j2, j3)

        # manually call _infer_dependencies() as it is only called when
        # wf.write() is called
        wf._infer_dependencies()

        assert wf.dependencies["j1"] == _JobDependency("j1", {"j2"})
        assert wf.dependencies["j2"] == _JobDependency("j2", {"j3"})

    def test_tojson(self, convert_yaml_schemas_to_json, load_schema, wf,
                    expected_json):
        result = json.loads(json.dumps(wf, cls=_CustomEncoder))

        workflow_schema = load_schema("wf-5.0.json")
        validate(instance=result, schema=workflow_schema)

        result["jobs"] = sorted(result["jobs"], key=lambda j: j["id"])
        result["jobs"][0]["uses"] = sorted(result["jobs"][0]["uses"],
                                           key=lambda u: u["lfn"])
        result["jobs"][1]["uses"] = sorted(result["jobs"][1]["uses"],
                                           key=lambda u: u["lfn"])

        assert result == expected_json

    @pytest.mark.parametrize("_format, loader", [("json", json.load),
                                                 ("yml", yaml.safe_load)])
    def test_write_file_obj(
        self,
        convert_yaml_schemas_to_json,
        load_schema,
        wf,
        expected_json,
        _format,
        loader,
    ):
        with NamedTemporaryFile("r+") as f:
            wf.write(f, _format=_format)

            # _path should be set by the call to write
            assert wf._path == f.name

            f.seek(0)
            result = loader(f)

        workflow_schema = load_schema("wf-5.0.json")
        validate(instance=result, schema=workflow_schema)

        result["jobs"] = sorted(result["jobs"], key=lambda j: j["id"])
        result["jobs"][0]["uses"] = sorted(result["jobs"][0]["uses"],
                                           key=lambda u: u["lfn"])
        result["jobs"][1]["uses"] = sorted(result["jobs"][1]["uses"],
                                           key=lambda u: u["lfn"])

        assert result == expected_json

    def test_write_str_filename(self, wf, load_schema, expected_json):
        path = "wf.yml"
        wf.write(path)

        # _path should be set by the call to write
        assert wf._path == path

        with open(path) as f:
            result = yaml.safe_load(f)

        workflow_schema = load_schema("wf-5.0.json")
        validate(instance=result, schema=workflow_schema)

        result["jobs"] = sorted(result["jobs"], key=lambda j: j["id"])
        result["jobs"][0]["uses"] = sorted(result["jobs"][0]["uses"],
                                           key=lambda u: u["lfn"])
        result["jobs"][1]["uses"] = sorted(result["jobs"][1]["uses"],
                                           key=lambda u: u["lfn"])

        assert result == expected_json

        os.remove(path)

    def test_write_default_filename(self, wf, expected_json):
        wf.write()
        EXPECTED_FILE = "workflow.yml"

        with open(EXPECTED_FILE) as f:
            result = yaml.safe_load(f)

        result["jobs"] = sorted(result["jobs"], key=lambda j: j["id"])

        for i in range(len(result["jobs"])):
            result["jobs"][i]["uses"] = sorted(result["jobs"][i]["uses"],
                                               key=lambda u: u["lfn"])

        assert result == expected_json

        os.remove(EXPECTED_FILE)

    def test_write_wf_catalogs_included(self):
        wf = Workflow("test")
        wf.add_jobs(Job("ls"))

        wf.add_transformation_catalog(TransformationCatalog())
        wf.add_site_catalog(SiteCatalog())
        wf.add_replica_catalog(ReplicaCatalog())

        wf_path = Path("workflow.yml")
        with wf_path.open("w+") as f:
            wf.write(f)
            f.seek(0)
            result = yaml.load(f)

        expected = {
            "pegasus":
            "5.0",
            "name":
            "test",
            "siteCatalog": {
                "sites": []
            },
            "replicaCatalog": {
                "replicas": []
            },
            "transformationCatalog": {
                "transformations": []
            },
            "jobs": [{
                "type": "job",
                "name": "ls",
                "id": "ID0000001",
                "arguments": [],
                "uses": [],
            }],
            "jobDependencies": [],
        }

        assert expected == result

        wf_path.unlink()

    def test_write_valid_hierarchical_workflow(self, mocker):
        mocker.patch("Pegasus.api.workflow.Workflow.write")

        try:
            wf = Workflow("test")
            wf.add_jobs(SubWorkflow("file", False))
            wf.write(file="workflow.yml", _format="yml")
        except PegasusError:
            pytest.fail("shouldn't have thrown PegasusError")

        Pegasus.api.workflow.Workflow.write.assert_called_once_with(
            file="workflow.yml", _format="yml")

    @pytest.mark.parametrize(
        "sc, tc",
        [
            (SiteCatalog(), None),
            (None, TransformationCatalog()),
            (SiteCatalog(), TransformationCatalog()),
        ],
    )
    def test_write_hierarchical_workflow_when_catalogs_are_inlined(
            self, sc, tc):
        wf = Workflow("test")
        wf.add_jobs(SubWorkflow("file", False))

        if sc:
            wf.add_site_catalog(sc)

        if tc:
            wf.add_transformation_catalog(tc)

        with pytest.raises(PegasusError) as e:
            wf.write()

        assert (
            "Site Catalog and Transformation Catalog must be written as a separate"
            in str(e))

    def test_workflow_key_ordering_on_yml_write(self):
        tc = TransformationCatalog()
        rc = ReplicaCatalog()
        sc = SiteCatalog()

        wf = Workflow("wf")
        wf.add_transformation_catalog(tc)
        wf.add_replica_catalog(rc)
        wf.add_site_catalog(sc)

        wf.add_jobs(Job("t1", _id="a"))

        wf.add_env(JAVA_HOME="/java/home")
        wf.add_shell_hook(EventType.START, "/bin/echo hi")
        wf.add_metadata(key="value")

        wf.write()
        EXPECTED_FILE = Path("workflow.yml")

        with EXPECTED_FILE.open() as f:
            # reading in as str so ordering of keys is not disrupted
            # when loaded into a dict
            result = f.read()

        EXPECTED_FILE.unlink()
        """
        Check that wf keys have been ordered as follows (while ignoring nested keys):
        - pegasus,
        - name,
        - hooks,
        - profiles,
        - metadata,
        - siteCatalog,
        - replicaCatalog,
        - transformationCatalog,
        - jobs
        - jobDependencies
        """
        p = re.compile(
            r"pegasus: '5.0'[\w\W]+name:[\w\W]+hooks:[\w\W]+profiles:[\w\W]+metadata:[\w\W]+siteCatalog:[\w\W]+replicaCatalog:[\w\W]+transformationCatalog:[\w\W]+jobs:[\w\W]+jobDependencies:[\w\W]+"
        )
        assert p.match(result) is not None

    def test_plan_workflow_already_written(self, wf, mocker):
        mocker.patch("shutil.which", return_value="/usr/bin/pegasus-version")
        mocker.patch("Pegasus.client._client.Client.plan")

        path = "wf.yml"
        wf.write(path).plan()

        assert wf._path == path

        Pegasus.client._client.Client.plan.assert_called_once_with(
            path,
            cleanup="none",
            conf=None,
            dir=None,
            force=False,
            input_dirs=None,
            output_dir=None,
            output_sites=["local"],
            relative_dir=None,
            sites=None,
            staging_sites=None,
            submit=False,
            verbose=0,
        )

        os.remove(path)

    def test_plan_workflow_not_written(self, wf, mocker):
        mocker.patch("shutil.which", return_value="/usr/bin/pegasus-version")
        mocker.patch("Pegasus.client._client.Client.plan")

        DEFAULT_WF_PATH = "workflow.yml"
        wf.plan()

        assert wf._path == DEFAULT_WF_PATH

        Pegasus.client._client.Client.plan.assert_called_once_with(
            DEFAULT_WF_PATH,
            cleanup="none",
            conf=None,
            dir=None,
            force=False,
            input_dirs=None,
            output_dir=None,
            output_sites=["local"],
            relative_dir=None,
            sites=None,
            staging_sites=None,
            submit=False,
            verbose=0,
        )

        os.remove(DEFAULT_WF_PATH)

    def test_run(self, wf, mocker):
        mocker.patch("Pegasus.client._client.Client.run")
        mocker.patch("shutil.which", return_value="/usr/bin/pegasus-version")

        wf.run()

        Pegasus.client._client.Client.run.assert_called_once_with(None,
                                                                  verbose=0)

    def test_status(self, wf, mocker):
        mocker.patch("Pegasus.client._client.Client.status")
        mocker.patch("shutil.which", return_value="/usr/bin/pegasus-version")

        wf._submit_dir = "submit_dir"
        wf.status()

        Pegasus.client._client.Client.status.assert_called_once_with(
            wf._submit_dir, long=0, verbose=0)

    def test_remove(self, wf, mocker):
        mocker.patch("Pegasus.client._client.Client.remove")
        mocker.patch("shutil.which", return_value="/usr/bin/pegasus-version")

        wf._submit_dir = "submit_dir"
        wf.remove()

        Pegasus.client._client.Client.remove.assert_called_once_with(
            wf._submit_dir, verbose=0)

    def test_analyze(self, wf, mocker):
        mocker.patch("Pegasus.client._client.Client.analyzer")
        mocker.patch("shutil.which", return_value="/usr/bin/pegasus-version")

        wf._submit_dir = "submit_dir"
        wf.analyze()

        Pegasus.client._client.Client.analyzer.assert_called_once_with(
            wf._submit_dir, verbose=0)

    def test_statistics(self, wf, mocker):
        mocker.patch("Pegasus.client._client.Client.statistics")
        mocker.patch("shutil.which", return_value="/usr/bin/pegasus-version")

        wf._submit_dir = "submit_dir"
        wf.statistics()

        Pegasus.client._client.Client.statistics.assert_called_once_with(
            wf._submit_dir, verbose=0)
示例#20
0
 def test_add_valid_site(self):
     sc = SiteCatalog()
     assert sc.add_sites(Site("local"))