Exemplo n.º 1
0
    def fixture():
        # We can't use pytest's tempdir because that is limited to
        # scope=function.
        tmpdir = tempfile.mkdtemp(prefix="reproman-tests-")
        repodir = os.path.realpath(os.path.join(tmpdir, "repo0"))
        os.mkdir(repodir)

        retval = repodir

        with chpwd(repodir):
            runner.run(["git", "init"])
            setup_user()

            if kind != "empty":
                add_and_commit("foo")
                add_and_commit("bar")
                runner.run(["git", "tag", "tag0"])
                add_and_commit("subdir/baz")

            if kind == "pair":
                localdir = os.path.realpath(os.path.join(tmpdir, "repo1"))
                runner.run(["git", "clone", repodir, localdir],
                           expect_stderr=True)
                with chpwd(localdir):
                    setup_user()
                retval = localdir, repodir
        yield retval
        shutil.rmtree(tmpdir)
Exemplo n.º 2
0
def test_orc_datalad_abort_if_dirty(job_spec, dataset, ssh):
    subds = dataset.create(path="sub")
    subds.create(path="subsub")
    dataset.save()

    job_spec["inputs"] = []
    job_spec["outputs"] = []

    def get_orc(jspec=None):
        return orcs.DataladPairRunOrchestrator(
            ssh, submission_type="local",
            job_spec=jspec or job_spec)

    def run(**spec_kwds):
        jspec = dict(job_spec, **spec_kwds)
        with chpwd(dataset.path):
            orc = get_orc(jspec)
            # Run one job so that we create the remote repository.
            orc.prepare_remote()
            orc.submit()
            orc.follow()
            orc.fetch()
            return orc

    with chpwd(dataset.path):
        # We abort if the local dataset is dirty.
        create_tree(dataset.path, {"local-dirt": ""})
        with pytest.raises(OrchestratorError) as exc:
            get_orc()
        assert "dirty" in str(exc.value)
        os.unlink("local-dirt")

    # Run one job so that we create the remote repository.
    run(_resolved_command_str="echo one >one")

    with chpwd(dataset.path):
        orc1 = get_orc()
        create_tree(orc1.working_directory, {"dirty": ""})
        with pytest.raises(OrchestratorError) as exc:
            orc1.prepare_remote()
        assert "dirty" in str(exc.value)
    os.unlink(op.join(orc1.working_directory, "dirty"))

    # We can run if the submodule simply has a different commit checked out.
    run(_resolved_command_str="echo two >two")

    create_tree(op.join(dataset.path, "sub"), {"for-local-commit": ""})
    dataset.add(".", recursive=True)

    run(_resolved_command_str="echo three >three")

    # But we abort if subdataset is actually dirty.
    with chpwd(dataset.path):
        orc2 = get_orc()
        create_tree(orc2.working_directory,
                    {"sub": {"subsub": {"subdirt": ""}}})
        with pytest.raises(OrchestratorError) as exc:
            orc2.prepare_remote()
        assert "dirty" in str(exc.value)
    os.unlink(op.join(orc2.working_directory, "sub", "subsub", "subdirt"))
Exemplo n.º 3
0
def test_orc_datalad_abort_if_dirty(job_spec, dataset, shell):
    with chpwd(dataset.path):
        orc0 = orcs.DataladPairOrchestrator(
            shell, submission_type="local", job_spec=job_spec)
        # Run one job so that we create the remote repository.
        orc0.prepare_remote()
        orc0.submit()
        orc0.follow()

    with chpwd(dataset.path):
        orc1 = orcs.DataladPairOrchestrator(
            shell, submission_type="local", job_spec=job_spec)
        create_tree(orc1.working_directory, {"dirty": ""})
        with pytest.raises(OrchestratorError) as exc:
            orc1.prepare_remote()
        assert "dirty" in str(exc)
Exemplo n.º 4
0
def test_orc_datalad_pair_need_follow_parent(job_spec, dataset, shell):
    # An example of a scenario that fails without DataLad's --follow=parentds
    with chpwd(dataset.path):
        dataset.create("sub")
        dataset.save()

        job_spec["_resolved_command_str"] = "sh -c 'echo baz >baz'"
        job_spec["inputs"] = []
        job_spec["outputs"] = []

        orc0 = orcs.DataladPairOrchestrator(shell,
                                            submission_type="local",
                                            job_spec=job_spec)
        orc0.prepare_remote()
        orc0.submit()
        orc0.follow()

        job_spec["_resolved_command_str"] = "sh -c 'echo bar >sub/bar'"
        output = op.join("sub", "bar")
        job_spec["outputs"] = [output]
        orc1 = orcs.DataladPairOrchestrator(shell,
                                            submission_type="local",
                                            job_spec=job_spec)
        orc1.prepare_remote()
        orc1.submit()
        orc1.follow()
        orc1.fetch()
        assert op.exists(output)
Exemplo n.º 5
0
def test_orc_datalad_pair_new_submodule(job_spec, dataset, shell):
    with chpwd(dataset.path):
        orc = orcs.DataladPairOrchestrator(shell,
                                           submission_type="local",
                                           job_spec=job_spec)
        orc.prepare_remote()
        orc.submit()
        orc.follow()
        orc.fetch()

        # prepare_remote() doesn't fail when a new subdataset is added after
        # the first run.
        sub = dataset.create("sub")
        dataset.save()

        job_spec["_resolved_command_str"] = "sh -c 'echo a >sub/a'"
        job_spec["inputs"] = []
        job_spec["outputs"] = []

        orc = orcs.DataladPairOrchestrator(shell,
                                           submission_type="local",
                                           job_spec=job_spec)
        orc.prepare_remote()
        orc.submit()
        orc.follow()
        orc.fetch()
        assert sub.repo.is_under_annex("a")
Exemplo n.º 6
0
def test_orc_datalad_no_remote_get(tmpdir, shell, should_pass):
    import datalad.api as dl

    topdir = str(tmpdir)
    ds_a = dl.create(op.join(topdir, "a"))
    if should_pass:
        (ds_a.pathobj / "foo").write_text("data")
        ds_a.save()

    ds_b = dl.clone(ds_a.path, op.join(topdir, "b"))
    assert not ds_b.repo.file_has_content("foo")
    with chpwd(ds_b.path):
        orc = orcs.DataladNoRemoteOrchestrator(shell,
                                               submission_type="local",
                                               job_spec={
                                                   "root_directory":
                                                   op.join(topdir, "run-root"),
                                                   "inputs": ["foo"],
                                                   "outputs": ["out"],
                                                   "_resolved_command_str":
                                                   'sh -c "cat foo foo >out"'
                                               })
        if should_pass:
            orc.prepare_remote()
            orc.submit()
            orc.follow()

            finish_fn = MagicMock()
            orc.fetch(on_remote_finish=finish_fn)
            finish_fn.assert_called_once_with(orc.resource, [])
            assert (ds_b.pathobj / "out").read_text() == "datadata"
        else:
            with pytest.raises(OrchestratorError):
                orc.prepare_remote()
Exemplo n.º 7
0
def test_orc_datalad_concurrent(job_spec, dataset, ssh, orc_class, sub_type):
    names = ["paul", "rosa"]

    job_spec["inputs"] = ["{p[name]}.in"]
    job_spec["outputs"] = ["{p[name]}.out"]
    job_spec["_resolved_command_str"] = "sh -c 'cat {inputs} {inputs} >{outputs}'"
    job_spec["_resolved_batch_parameters"] = [{"name": n} for n in names]

    in_files = [n + ".in" for n in names]
    for fname in in_files:
        with open(op.join(dataset.path, fname), "w") as fh:
            fh.write(fname[0])
    dataset.save(path=in_files)

    with chpwd(dataset.path):
        orc = orc_class(ssh, submission_type=sub_type, job_spec=job_spec)
        orc.prepare_remote()
        orc.submit()
        orc.follow()
        # Just make sure each fetch() seems to have wired up on_remote_finish.
        # test_run.py tests the actual --follow actions.
        remote_fn = MagicMock()
        orc.fetch(on_remote_finish=remote_fn)
        remote_fn.assert_called_once_with(orc.resource, [])

        out_files = [n + ".out" for n in names]
        for ofile in out_files:
            assert dataset.repo.file_has_content(ofile)
            with open(ofile) as ofh:
                assert ofh.read() == ofile[0] * 2
Exemplo n.º 8
0
def test_orc_datalad_no_remote_only_local(dataset, job_spec, ssh):
    with chpwd(dataset.path):
        orc = orcs.DataladNoRemoteOrchestrator(ssh,
                                               submission_type="local",
                                               job_spec=job_spec)
        with pytest.raises(OrchestratorError):
            orc.prepare_remote()
Exemplo n.º 9
0
def test_orc_datalad_pair_run_ontop(job_spec, dataset, ssh):
    # Run one orchestrator and fetch, then run another and fetch:
    #
    #   orc 1, master
    #   |
    #   o orc 0
    #   |
    #   o
    ds = dataset
    create_tree(ds.path, {"in": "content\n"})
    ds.add(".")

    js0 = job_spec
    js1 = dict(job_spec, _resolved_command_str='bash -c "echo other >other"')
    with chpwd(ds.path):
        def do(js):
            orc = orcs.DataladPairRunOrchestrator(
                ssh, submission_type="local", job_spec=js)
            orc.prepare_remote()
            orc.submit()
            orc.follow()
            orc.fetch()
            return orc

        orc0 = do(js0)
        orc1 = do(js1)

        ref0 = "refs/reproman/{}".format(orc0.jobid)
        ref1 = "refs/reproman/{}".format(orc1.jobid)

        assert ds.repo.is_ancestor(ref0, ref1)
        assert ds.repo.get_hexsha(ref0) != ds.repo.get_hexsha(ref1)
        assert ds.repo.get_hexsha(ref1) == ds.repo.get_hexsha("master")
        assert ds.repo.get_active_branch() == "master"
Exemplo n.º 10
0
    def _expand_globs(self):
        def normalize_hit(h):
            normalized = op.relpath(h) + ("" if op.basename(h) else op.sep)
            if h == op.curdir + op.sep + normalized:
                # Don't let relpath prune "./fname" (gh-3034).
                return h
            return normalized

        expanded = []
        with chpwd(self.pwd):
            for pattern in self._paths["patterns"]:
                hits = glob.glob(pattern)
                if hits:
                    expanded.extend(sorted(map(normalize_hit, hits)))
                else:
                    lgr.debug("No matching files found for '%s'", pattern)
                    # We didn't find a hit for the complete pattern. If we find
                    # a sub-pattern hit, that may mean we have an uninstalled
                    # subdataset.
                    for sub_pattern in self._get_sub_patterns(pattern):
                        sub_hits = glob.glob(sub_pattern)
                        if sub_hits:
                            expanded.extend(
                                sorted(map(normalize_hit, sub_hits)))
                            break
                    # ... but we still want to retain the original pattern
                    # because we don't know for sure at this point, and it
                    # won't bother the "install, reglob" routine.
                    expanded.extend([pattern])
        return expanded
Exemplo n.º 11
0
    def fetch(self):
        """Fetch the results from the remote dataset sibling.
        """
        lgr.info("Fetching results for %s", self.jobid)
        if self.resource.type == "ssh":
            ref = self.job_refname
            self.ds.repo.fetch(self.resource.name, "{0}:{0}".format(ref))
            self.ds.update(sibling=self.resource.name,
                           merge=True,
                           recursive=True)
            with head_at(self.ds, ref):
                outputs = self.job_spec.get("outputs")
                if outputs:
                    self.ds.get(path=outputs)
            if not self.ds.repo.is_ancestor(ref, "HEAD"):
                lgr.info(
                    "Results stored on %s. "
                    "Bring them into this branch with "
                    "'git merge %s'", ref, ref)
        elif self.resource.type == "shell":
            # Below is just for local testing.  It doesn't support actually
            # getting the content.
            with chpwd(self.ds.path):
                self.session.execute_command([
                    "git", "fetch", self.working_directory,
                    "{0}:{0}".format(self.job_refname)
                ])
                self.session.execute_command(["git", "merge", "FETCH_HEAD"])

        def get_metadir(mdir):
            if self.resource.type == "ssh":
                self.ds.get(path=mdir)

        self.log_failed(get_metadir)
Exemplo n.º 12
0
def _resurrect_orc(job):
    resource = get_manager().get_resource(job["resource_id"], "id")
    with chpwd(job["local_directory"]):
        orchestrator_class = ORCHESTRATORS[job["orchestrator"]]
        orc = orchestrator_class(resource, job["submitter"], job,
                                 resurrection=True)
        orc.submitter.submission_id = job.get("submission_id")
    return orc
Exemplo n.º 13
0
def test_orc_datalad_abort_if_detached(job_spec, dataset, shell):
    dataset.repo.checkout("HEAD^{}")

    with chpwd(dataset.path):
        orc = orcs.DataladPairOrchestrator(
            shell, submission_type="local", job_spec=job_spec)
        with pytest.raises(OrchestratorError):
            orc.prepare_remote()
Exemplo n.º 14
0
def test_dataset_as_dict(shell, dataset, job_spec):
    with chpwd(dataset.path):
        orc = orcs.DataladLocalRunOrchestrator(shell, submission_type="local",
                                               job_spec=job_spec)
    d = orc.as_dict()
    # Check for keys that DataladOrchestrator should extend
    # OrchestratorError.asdict() with.
    assert "head" in d
    assert "dataset_id" in d
Exemplo n.º 15
0
 def run(**spec_kwds):
     jspec = dict(job_spec, **spec_kwds)
     with chpwd(dataset.path):
         orc = get_orc(jspec)
         # Run one job so that we create the remote repository.
         orc.prepare_remote()
         orc.submit()
         orc.follow()
         orc.fetch()
         return orc
Exemplo n.º 16
0
def test_orc_datalad_resurrect(job_spec, dataset, shell):
    for k in ["jobid",
              "working_directory", "root_directory", "local_directory"]:
        job_spec[k] = "doesn't matter"
    job_spec["head"] = "deadbee"
    with chpwd(dataset.path):
        orc = orcs.DataladPairOrchestrator(
            shell, submission_type="local", job_spec=job_spec,
            resurrection=True)
    assert orc.head == "deadbee"
Exemplo n.º 17
0
def test_orc_datalad_nonrun(job_spec, dataset, shell, orc_class):
    with chpwd(dataset.path):
        orc = orc_class(shell, submission_type="local", job_spec=job_spec)
        orc.prepare_remote()
        orc.submit()
        orc.follow()

        orc.fetch()
        assert dataset.repo.is_under_annex("out")
        assert (dataset.pathobj / "out").exists()
Exemplo n.º 18
0
def test_orc_datalad_run(job_spec, dataset, shell, orc_class, sub_type):
    with chpwd(dataset.path):
        orc = orc_class(shell, submission_type=sub_type, job_spec=job_spec)
        orc.prepare_remote()
        orc.submit()
        orc.follow()

        orc.fetch()
        assert dataset.repo.file_has_content("out")
        assert open("out").read() == "content\nmore\n"
Exemplo n.º 19
0
    def run_and_check(spec):
        with chpwd(dataset.path):
            orc = orc_class(shell, submission_type=sub_type, job_spec=spec)
            orc.prepare_remote()
            orc.submit()
            orc.follow()

            orc.fetch()
            assert dataset.repo.file_has_content("out")
            assert open("out").read() == "content\nmore\n"
            return orc
Exemplo n.º 20
0
def test_orc_datalad_run_results_missing(job_spec, dataset, shell):
    with chpwd(dataset.path):
        orc = orcs.DataladLocalRunOrchestrator(
            shell, submission_type="local", job_spec=job_spec)
        orc.prepare_remote()
        orc.submit()
        orc.follow()
        os.unlink(op.join(orc.root_directory, "outputs",
                          "{}.tar.gz".format(orc.jobid)))
        with pytest.raises(OrchestratorError):
            orc.fetch()
Exemplo n.º 21
0
 def run_fn(*args, **kwargs):
     with contextlib.ExitStack() as stack:
         stack.enter_context(chpwd(path))
         # Patch home to avoid populating testing machine with jobs when
         # using local shell.
         stack.enter_context(patch.dict(os.environ, {"HOME": home}))
         stack.enter_context(patch("reproman.interface.run.get_manager",
                                   return_value=resource_manager))
         stack.enter_context(patch("reproman.interface.run.LocalRegistry",
                                   job_registry))
         return run(*args, **kwargs)
Exemplo n.º 22
0
def test_combine_batch_params_glob(tmpdir):
    tmpdir = str(tmpdir)
    create_tree(tmpdir, {"aaa": "a",
                         "subdir": {"b": "b", "c": "c"}})
    with chpwd(tmpdir):
        res = sorted(_combine_batch_params(["foo=a*,subdir/*,other"]),
                     key=lambda d: d["foo"])
        assert list(res) == [
            {"foo": "aaa"},
            {"foo": "other"},
            {"foo": "subdir/b"},
            {"foo": "subdir/c"}]
Exemplo n.º 23
0
def test_orc_plain_failure(tmpdir, job_spec, shell):
    job_spec["_resolved_command_str"] = "iwillfail"
    job_spec["inputs"] = []
    local_dir = str(tmpdir)
    with chpwd(local_dir):
        orc = orcs.PlainOrchestrator(shell, submission_type="local",
                                     job_spec=job_spec)
        orc.prepare_remote()
        orc.submit()
        orc.follow()
    for fname in "status", "stderr", "stdout":
        assert op.exists(op.join(orc.meta_directory, fname + ".0"))
Exemplo n.º 24
0
def test_orc_datalad_pair(job_spec, dataset, shell):
    with chpwd(dataset.path):
        orc = orcs.DataladPairOrchestrator(
            shell, submission_type="local", job_spec=job_spec)
        orc.prepare_remote()
        orc.submit()
        orc.follow()

        orc.fetch()
        # The local fetch variant doesn't currently get the content, so just
        # check that the file is under annex.
        assert dataset.repo.is_under_annex("out")
Exemplo n.º 25
0
def test_venv_identify_distributions(venv_test_dir):
    libpaths = {p[-1]: os.path.join("lib", PY_VERSION, *p)
                for p in [("abc.py",),
                          ("importlib", "yaml", "machinery.py"),
                          ("site-packages", "yaml", "parser.py"),
                          ("site-packages", "attr", "filters.py")]}

    with chpwd(venv_test_dir):
        path_args = [
            # Both full ...
            os.path.join(venv_test_dir, "venv0", libpaths["parser.py"]),
            # ... and relative paths work.
            os.path.join("venv1", libpaths["filters.py"]),
            # A virtualenv file that isn't part of any particular package.
            os.path.join("venv1", "bin", "python"),
            # A link to the outside world ...
            os.path.join("venv1", libpaths["abc.py"]),
            # or in a directory that is a link to the outside world.
            os.path.join("venv1", libpaths["machinery.py"])
        ]
        path_args.append(COMMON_SYSTEM_PATH)

        tracer = VenvTracer()

        dists = list(tracer.identify_distributions(path_args))
        assert len(dists) == 1

        distributions, unknown_files = dists[0]
        # Unknown files do not include "venv0/bin/python", which is a link
        # another path within venv0, but they do include the link to the system
        # abc.py.
        assert unknown_files == {
            COMMON_SYSTEM_PATH,
            op.realpath(os.path.join("venv1", libpaths["abc.py"])),
            op.realpath(os.path.join("venv1", libpaths["machinery.py"])),
            # The editable package was added by VenvTracer as an unknown file.
            os.path.join(venv_test_dir, "minimal_pymodule")}

        assert len(distributions.environments) == 2

        expect = {"environments":
                  [{"packages": [{"files": [libpaths["parser.py"]],
                                  "name": "PyYAML",
                                  "editable": False},
                                 {"files": [], "name": "nmtest",
                                  "editable": True}],
                    "system_site_packages": False},
                   {"packages": [{"files": [libpaths["filters.py"]],
                                  "name": "attrs",
                                  "editable": False}],
                    "system_site_packages": False}]}
        assert_is_subset_recur(expect, attr.asdict(distributions), [dict, list])
Exemplo n.º 26
0
def test_venv_identify_distributions(venv_test_dir):
    paths = [
        "lib/" + PY_VERSION + "/site-packages/yaml/parser.py",
        "lib/" + PY_VERSION + "/site-packages/attr/filters.py"
    ]

    with chpwd(venv_test_dir):
        path_args = [
            # Both full ...
            os.path.join(venv_test_dir, "venv0", paths[0]),
            # ... and relative paths work.
            os.path.join("venv1", paths[1]),
        ]
        path_args.append("/sbin/iptables")

        tracer = VenvTracer()

        dists = list(tracer.identify_distributions(path_args))
        assert len(dists) == 1

        distributions, unknown_files = dists[0]
        assert unknown_files == {
            "/sbin/iptables",
            # The editable package was added by VenvTracer as an unknown file.
            os.path.join(venv_test_dir, "minimal_pymodule")
        }

        assert len(distributions.environments) == 2

        expect = {
            "environments": [{
                "packages": [{
                    "files": [paths[0]],
                    "name": "PyYAML",
                    "editable": False
                }, {
                    "files": [],
                    "name": "nmtest",
                    "editable": True
                }]
            }, {
                "packages": [{
                    "files": [paths[1]],
                    "name": "attrs",
                    "editable": False
                }]
            }]
        }
        assert_is_subset_recur(expect, attr.asdict(distributions),
                               [dict, list])
Exemplo n.º 27
0
    def fixture(tmpdir_factory):
        skipif.no_network()
        skipif.no_singularity()

        # Change to a temporary directory so that we don't pollute the current
        # directory with image files.
        with chpwd(str(tmpdir_factory.mktemp("singularity-resource"))):
            from reproman.resource.singularity import Singularity
            resource = Singularity(name=name or str(uuid.uuid4().hex)[:11],
                                   image=image)
            resource.connect()
            list(resource.create())
        yield resource
        resource.delete()
Exemplo n.º 28
0
def test_orc_datalad_run_failed(job_spec, dataset, shell):
    job_spec["command_str"] = "iwillfail"
    job_spec["inputs"] = []

    with chpwd(dataset.path):
        orc = orcs.DataladLocalRunOrchestrator(
            shell, submission_type="local", job_spec=job_spec)
        orc.prepare_remote()
        orc.submit()
        orc.follow()
        with swallow_logs(new_level=logging.INFO) as log:
            orc.fetch()
            assert "Job status" in log.out
            assert "stderr:" in log.out
Exemplo n.º 29
0
def test_orc_datalad_pair_existing_remote(job_spec, dataset, shell):
    root_directory = job_spec["root_directory"]
    dataset.repo.add_remote("localshell", "i-dont-match")
    with chpwd(dataset.path):
        orc = orcs.DataladPairOrchestrator(shell,
                                           submission_type="local",
                                           job_spec=job_spec)
        # If a remote with the resource name exists, we abort if the
        # URL doesn't match the expected target...
        with pytest.raises(OrchestratorError):
            orc.prepare_remote()
        # ... and continue if it does.
        dataset.repo.set_remote_url("localshell", orc.working_directory)
        orc.prepare_remote()
Exemplo n.º 30
0
    def fn(resource, jspec):
        create_tree(local_dir, {"d": {"in": "content\n"}})
        with chpwd(local_dir):
            orc = orcs.PlainOrchestrator(resource, submission_type="local",
                                         job_spec=jspec)
            orc.prepare_remote()
            assert orc.session.exists(
                op.join(orc.working_directory, "d", "in"))

            orc.submit()
            orc.follow()
            assert orc.session.exists(op.join(orc.working_directory, "out"))

            orc.fetch()
            assert open("out").read() == "content\nmore\n"