Пример #1
0
def test_inject(path):
    ds = Dataset(path).create(force=True)
    assert_repo_status(ds.path, untracked=['foo', 'bar'])
    list(run_command("nonsense command",
                     dataset=ds,
                     inject=True,
                     extra_info={"custom_key": "custom_field"}))
    msg = ds.repo.format_commit("%B")
    assert_in("custom_key", msg)
    assert_in("nonsense command", msg)
Пример #2
0
def test_inject(path):
    ds = Dataset(path).create(force=True)
    assert_repo_status(ds.path, untracked=['foo', 'bar'])
    list(run_command("nonsense command",
                     dataset=ds,
                     inject=True,
                     extra_info={"custom_key": "custom_field"}))
    msg = last_commit_msg(ds.repo)
    assert_in("custom_key", msg)
    assert_in("nonsense command", msg)
Пример #3
0
def test_inject(path):
    ds = Dataset(path).create(force=True)
    assert_repo_status(ds.path, untracked=['foo', 'bar'])
    list(
        run_command("nonsense command",
                    dataset=ds,
                    inject=True,
                    extra_info={"custom_key": "custom_field"}))
    # ATTN: Use master explicitly so that this check works when we're on an
    # adjusted branch too (e.g., when this test is executed under Windows).
    msg = ds.repo.format_commit("%B", "master")
    assert_in("custom_key", msg)
    assert_in("nonsense command", msg)
Пример #4
0
def _rerun(dset, results):
    for res in results:
        rerun_action = res.get("rerun_action")
        if not rerun_action:
            yield res
        elif rerun_action == "skip":
            yield res
        elif rerun_action == "checkout":
            if res.get("branch"):
                checkout_options = ["-b", res["branch"]]
            else:
                checkout_options = ["--detach"]
            dset.repo.checkout(res["commit"],
                               options=checkout_options)
        elif rerun_action == "pick":
            dset.repo.cherry_pick(res["commit"])
            yield res
        elif rerun_action == "run":
            hexsha = res["commit"]
            run_info = res["run_info"]

            # Keep a "rerun" trail.
            if "chain" in run_info:
                run_info["chain"].append(hexsha)
            else:
                run_info["chain"] = [hexsha]

            # now we have to find out what was modified during the last run,
            # and enable re-modification ideally, we would bring back the
            # entire state of the tree with #1424, but we limit ourself to file
            # addition/not-in-place-modification for now
            auto_outputs = (ap["path"] for ap in new_or_modified(res["diff"]))
            outputs = run_info.get("outputs", [])
            outputs_dir = op.join(dset.path, run_info["pwd"])
            auto_outputs = [p for p in auto_outputs
                            # run records outputs relative to the "pwd" field.
                            if op.relpath(p, outputs_dir) not in outputs]

            message = res["rerun_message"] or res["run_message"]
            for r in run_command(run_info['cmd'],
                                 dataset=dset,
                                 inputs=run_info.get("inputs", []),
                                 extra_inputs=run_info.get("extra_inputs", []),
                                 outputs=outputs,
                                 rerun_outputs=auto_outputs,
                                 message=message,
                                 rerun_info=run_info):
                yield r
Пример #5
0
def test_run_inputs_outputs(src, path):
    for subds in [("s0", "s1_0", "s2"), ("s0", "s1_1", "s2"), ("s0", "s1_0"),
                  ("s0", "s1_1"), ("s0", "ss"), ("s0", )]:
        Dataset(op.join(*((src, ) + subds))).create(force=True).save()
    src_ds = Dataset(src).create(force=True)
    src_ds.save()

    ds = install(path,
                 source=src,
                 result_xfm='datasets',
                 return_type='item-or-list')
    assert_false(ds.repo.file_has_content("input.dat"))
    assert_false(ds.repo.file_has_content("extra-input.dat"))

    # The specified inputs and extra inputs will be retrieved before the run.
    # (Use run_command() to access the extra_inputs argument.)
    list(
        run_command("{} {{inputs}} {{inputs}} >doubled.dat".format(
            'type' if on_windows else 'cat'),
                    dataset=ds,
                    inputs=["input.dat"],
                    extra_inputs=["extra-input.dat"]))

    assert_repo_status(ds.path)
    ok_(ds.repo.file_has_content("input.dat"))
    ok_(ds.repo.file_has_content("extra-input.dat"))
    ok_(ds.repo.file_has_content("doubled.dat"))
    with open(op.join(path, "doubled.dat")) as fh:
        content = fh.read()
        assert_in("input", content)
        assert_not_in("extra-input", content)

    # Rerunning the commit will also get the input file.
    ds.repo.drop(["input.dat", "extra-input.dat"], options=["--force"])
    assert_false(ds.repo.file_has_content("input.dat"))
    assert_false(ds.repo.file_has_content("extra-input.dat"))
    ds.rerun()
    ok_(ds.repo.file_has_content("input.dat"))
    ok_(ds.repo.file_has_content("extra-input.dat"))

    with swallow_logs(new_level=logging.WARN) as cml:
        ds.run("cd .> dummy", inputs=["not-there"])
        assert_in("Input does not exist: ", cml.out)

    # Test different combinations of globs and explicit files.
    inputs = ["a.dat", "b.dat", "c.txt", "d.txt"]
    create_tree(ds.path, {i: i for i in inputs})

    ds.save()
    ds.repo.copy_to(inputs, remote="origin")
    ds.repo.drop(inputs, options=["--force"])

    test_cases = [(["*.dat"], ["a.dat", "b.dat"]),
                  (["*.dat", "c.txt"], ["a.dat", "b.dat", "c.txt"]),
                  (["*"], inputs)]

    for idx, (inputs_arg, expected_present) in enumerate(test_cases):
        assert_false(any(ds.repo.file_has_content(i) for i in inputs))

        ds.run("cd .> dummy{}".format(idx), inputs=inputs_arg)
        ok_(all(ds.repo.file_has_content(f) for f in expected_present))
        # Globs are stored unexpanded by default.
        assert_in(inputs_arg[0], ds.repo.format_commit("%B"))
        ds.repo.drop(inputs, options=["--force"])

    # --input can be passed a subdirectory.
    create_tree(ds.path, {"subdir": {"a": "subdir a", "b": "subdir b"}})
    ds.save("subdir")
    ds.repo.copy_to(["subdir/a", "subdir/b"], remote="origin")
    ds.repo.drop("subdir", options=["--force"])
    ds.run("cd .> subdir-dummy", inputs=[op.join(ds.path, "subdir")])
    ok_(all(
        ds.repo.file_has_content(op.join("subdir", f)) for f in ["a", "b"]))

    # Inputs are specified relative to a dataset's subdirectory.
    ds.repo.drop(op.join("subdir", "a"), options=["--force"])
    with chpwd(op.join(path, "subdir")):
        run("cd .> subdir-dummy1", inputs=["a"])
    ok_(ds.repo.file_has_content(op.join("subdir", "a")))

    # --input=. runs "datalad get ."
    ds.run("cd .> dot-dummy", inputs=["."])
    eq_(ds.repo.get_annexed_files(),
        ds.repo.get_annexed_files(with_content_only=True))
    # On rerun, we get all files, even those that weren't in the tree at the
    # time of the run.
    create_tree(ds.path, {"after-dot-run": "after-dot-run content"})
    ds.save()
    ds.repo.copy_to(["after-dot-run"], remote="origin")
    ds.repo.drop(["after-dot-run"], options=["--force"])
    ds.rerun("HEAD^")
    ds.repo.file_has_content("after-dot-run")

    # --output will unlock files that are present.
    ds.repo.get("a.dat")
    ds.run("echo ' appended' >>a.dat", outputs=["a.dat"])
    with open(op.join(path, "a.dat")) as fh:
        eq_(fh.read(),
            "a.dat' appended' \n" if on_windows else "a.dat appended\n")

    # --output will remove files that are not present.
    ds.repo.drop(["a.dat", "d.txt"], options=["--force"])
    ds.run("echo ' appended' >>a.dat", outputs=["a.dat"])
    if not on_windows:
        # MIH doesn't yet understand how to port this
        with open(op.join(path, "a.dat")) as fh:
            eq_(fh.read(), " appended\n")

    # --input can be combined with --output.
    ds.repo.call_git(["reset", "--hard", "HEAD~2"])
    ds.run("echo ' appended' >>a.dat", inputs=["a.dat"], outputs=["a.dat"])
    if not on_windows:
        # MIH doesn't yet understand how to port this
        with open(op.join(path, "a.dat")) as fh:
            eq_(fh.read(), "a.dat appended\n")

    if not on_windows:
        # see datalad#2606
        with swallow_logs(new_level=logging.DEBUG) as cml:
            with swallow_outputs():
                ds.run("echo blah", outputs=["not-there"])
                assert_in("Filtered out non-existing path: ", cml.out)

    ds.create('sub')
    ds.run("echo sub_orig >sub/subfile")
    ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"])
    ds.drop("sub/subfile", check=False)
    ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"])

    # --input/--output globs can be stored in expanded form.
    ds.run("cd .> expand-dummy",
           inputs=["a.*"],
           outputs=["b.*"],
           expand="both")
    assert_in("a.dat", ds.repo.format_commit("%B"))
    assert_in("b.dat", ds.repo.format_commit("%B"))

    res = ds.rerun(report=True, return_type='item-or-list')
    eq_(res["run_info"]['inputs'], ["a.dat"])
    eq_(res["run_info"]['outputs'], ["b.dat"])

    # We install subdatasets to fully resolve globs.
    ds.uninstall("s0")
    assert_false(Dataset(op.join(path, "s0")).is_installed())
    ds.run("echo {inputs} >globbed-subds", inputs=["s0/s1_*/s2/*.dat"])
    ok_file_has_content(op.join(ds.path, "globbed-subds"),
                        "'s0\\s1_0\\s2\\a.dat' 's0\\s1_1\\s2\\c.dat'"
                        if on_windows else "s0/s1_0/s2/a.dat s0/s1_1/s2/c.dat",
                        strip=True)

    ds_ss = Dataset(op.join(path, "s0", "ss"))
    assert_false(ds_ss.is_installed())
    ds.run("echo blah >{outputs}", outputs=["s0/ss/out"])
    ok_(ds_ss.is_installed())
    ok_file_has_content(op.join(ds.path, "s0", "ss", "out"),
                        "blah",
                        strip=True)
Пример #6
0
def _rerun(dset, results, assume_ready=None, explicit=False, jobs=None):
    ds_repo = dset.repo
    # Keep a map from an original hexsha to a new hexsha created by the rerun
    # (i.e. a reran, cherry-picked, or merged commit).
    new_bases = {}  # original hexsha => reran hexsha
    branch_to_restore = ds_repo.get_active_branch()
    head = onto = ds_repo.get_hexsha()
    for res in results:
        lgr.info(_get_rerun_log_msg(res))
        rerun_action = res.get("rerun_action")
        if not rerun_action:
            yield res
            continue

        res_hexsha = res["commit"]
        if rerun_action == "checkout":
            if res.get("branch"):
                branch = res["branch"]
                checkout_options = ["-b", branch]
                branch_to_restore = branch
            else:
                checkout_options = ["--detach"]
                branch_to_restore = None
            ds_repo.checkout(res_hexsha,
                             options=checkout_options)
            head = onto = res_hexsha
            continue

        # First handle the two cases that don't require additional steps to
        # identify the base, a root commit or a merge commit.

        if not res["parents"]:
            _mark_nonrun_result(res, "skip")
            yield res
            continue

        if rerun_action == "merge":
            old_parents = res["parents"]
            new_parents = [new_bases.get(p, p) for p in old_parents]
            if old_parents == new_parents:
                if not ds_repo.is_ancestor(res_hexsha, head):
                    ds_repo.checkout(res_hexsha)
            elif res_hexsha != head:
                if ds_repo.is_ancestor(res_hexsha, onto):
                    new_parents = [p for p in new_parents
                                   if not ds_repo.is_ancestor(p, onto)]
                if new_parents:
                    if new_parents[0] != head:
                        # Keep the direction of the original merge.
                        ds_repo.checkout(new_parents[0])
                    if len(new_parents) > 1:
                        msg = ds_repo.format_commit("%B", res_hexsha)
                        ds_repo.call_git(
                            ["merge", "-m", msg,
                             "--no-ff", "--allow-unrelated-histories"] +
                            new_parents[1:])
                    head = ds_repo.get_hexsha()
                    new_bases[res_hexsha] = head
            yield res
            continue

        # For all the remaining actions, first make sure we're on the
        # appropriate base.

        parent = res["parents"][0]
        new_base = new_bases.get(parent)
        head_to_restore = None  # ... to find our way back if we skip.

        if new_base:
            if new_base != head:
                ds_repo.checkout(new_base)
                head_to_restore, head = head, new_base
        elif parent != head and ds_repo.is_ancestor(onto, parent):
            if rerun_action == "run":
                ds_repo.checkout(parent)
                head = parent
            else:
                _mark_nonrun_result(res, "skip")
                yield res
                continue
        else:
            if parent != head:
                new_bases[parent] = head

        # We've adjusted base. Now skip, pick, or run the commit.

        if rerun_action == "skip-or-pick":
            if ds_repo.is_ancestor(res_hexsha, head):
                _mark_nonrun_result(res, "skip")
                if head_to_restore:
                    ds_repo.checkout(head_to_restore)
                    head, head_to_restore = head_to_restore, None
                yield res
                continue
            else:
                ds_repo.cherry_pick(res_hexsha)
                _mark_nonrun_result(res, "pick")
                yield res
        elif rerun_action == "run":
            run_info = res["run_info"]
            # Keep a "rerun" trail.
            if "chain" in run_info:
                run_info["chain"].append(res_hexsha)
            else:
                run_info["chain"] = [res_hexsha]

            # now we have to find out what was modified during the last run,
            # and enable re-modification ideally, we would bring back the
            # entire state of the tree with #1424, but we limit ourself to file
            # addition/not-in-place-modification for now
            auto_outputs = (ap["path"] for ap in new_or_modified(res["diff"]))
            outputs = run_info.get("outputs", [])
            outputs_dir = op.join(dset.path, run_info["pwd"])
            auto_outputs = [p for p in auto_outputs
                            # run records outputs relative to the "pwd" field.
                            if op.relpath(p, outputs_dir) not in outputs]

            message = res["rerun_message"] or res["run_message"]
            for r in run_command(run_info['cmd'],
                                 dataset=dset,
                                 inputs=run_info.get("inputs", []),
                                 extra_inputs=run_info.get("extra_inputs", []),
                                 outputs=outputs,
                                 assume_ready=assume_ready,
                                 explicit=explicit,
                                 rerun_outputs=auto_outputs,
                                 message=message,
                                 jobs=jobs,
                                 rerun_info=run_info):
                yield r
        new_head = ds_repo.get_hexsha()
        if new_head not in [head, res_hexsha]:
            new_bases[res_hexsha] = new_head
        head = new_head

    if branch_to_restore:
        # The user asked us to replay the sequence onto a branch, but the
        # history had merges, so we're in a detached state.
        ds_repo.update_ref("refs/heads/" + branch_to_restore,
                           "HEAD")
        ds_repo.checkout(branch_to_restore)
Пример #7
0
    def __call__(cmd, container_name=None, dataset=None,
                 inputs=None, outputs=None, message=None, expand=None,
                 explicit=False, sidecar=None):
        from unittest.mock import patch  # delayed, since takes long (~600ms for yoh)
        pwd, _ = get_command_pwds(dataset)
        ds = require_dataset(dataset, check_installed=True,
                             purpose='run a containerized command execution')

        container = None
        for res in find_container_(ds, container_name):
            if res.get("action") == "containers":
                container = res
            else:
                yield res
        assert container, "bug: container should always be defined here"

        image_path = op.relpath(container["path"], pwd)
        # container record would contain path to the (sub)dataset containing
        # it.  If not - take current dataset, as it must be coming from it
        image_dspath = op.relpath(container.get('parentds', ds.path), pwd)

        # sure we could check whether the container image is present,
        # but it might live in a subdataset that isn't even installed yet
        # let's leave all this business to `get` that is called by `run`

        cmd = normalize_command(cmd)
        # expand the command with container execution
        if 'cmdexec' in container:
            callspec = container['cmdexec']

            # Temporary kludge to give a more helpful message
            if callspec.startswith("["):
                import simplejson
                try:
                    simplejson.loads(callspec)
                except simplejson.errors.JSONDecodeError:
                    pass  # Never mind, false positive.
                else:
                    raise ValueError(
                        'cmdexe {!r} is in an old, unsupported format. '
                        'Convert it to a plain string.'.format(callspec))
            try:
                cmd_kwargs = dict(
                    img=image_path,
                    cmd=cmd,
                    img_dspath=image_dspath,
                )
                cmd = callspec.format(**cmd_kwargs)
            except KeyError as exc:
                yield get_status_dict(
                    'run',
                    ds=ds,
                    status='error',
                    message=(
                        'Unrecognized cmdexec placeholder: %s. '
                        'See containers-add for information on known ones: %s',
                        exc,
                        ", ".join(cmd_kwargs)))
                return
        else:
            # just prepend and pray
            cmd = container['path'] + ' ' + cmd

        with patch.dict('os.environ',
                        {CONTAINER_NAME_ENVVAR: container['name']}):
            # fire!
            for r in run_command(
                    cmd=cmd,
                    dataset=dataset or (ds if ds.path == pwd else None),
                    inputs=inputs,
                    extra_inputs=[image_path],
                    outputs=outputs,
                    message=message,
                    expand=expand,
                    explicit=explicit,
                    sidecar=sidecar):
                yield r