def test_inject(path): ds = Dataset(path).rev_create(force=True) assert_repo_status(ds.path, untracked=['foo', 'bar']) list(run_command("nonsense command", dataset=ds, inject=True, extra_info={"custom_key": "custom_field"})) msg = ds.repo.format_commit("%B") assert_in("custom_key", msg) assert_in("nonsense command", msg)
def test_inject(path): ds = Dataset(path).create(force=True) ok_(ds.repo.is_dirty()) list(run_command("nonsense command", dataset=ds, inject=True, extra_info={"custom_key": "custom_field"})) msg = ds.repo.format_commit("%B") assert_in("custom_key", msg) assert_in("nonsense command", msg)
def test_inject(path): ds = Dataset(path).create(force=True) ok_(ds.repo.is_dirty()) list( run_command("nonsense command", dataset=ds, inject=True, extra_info={"custom_key": "custom_field"})) msg = ds.repo.format_commit("%B") assert_in("custom_key", msg) assert_in("nonsense command", msg)
def _rerun(dset, results): for res in results: if res["status"] == "error": yield res return rerun_action = res.get("rerun_action") if rerun_action == "skip": yield res elif rerun_action == "checkout": if res.get("branch"): checkout_options = ["-b", res["branch"]] else: checkout_options = ["--detach"] dset.repo.checkout(res["commit"], options=checkout_options) elif rerun_action == "pick": dset.repo._git_custom_command( None, ["git", "cherry-pick", res["commit"]], check_fake_dates=True) yield res else: hexsha = res["commit"] run_info = res["run_info"] # Keep a "rerun" trail. if "chain" in run_info: run_info["chain"].append(hexsha) else: run_info["chain"] = [hexsha] # now we have to find out what was modified during the last run, # and enable re-modification ideally, we would bring back the # entire state of the tree with #1424, but we limit ourself to file # addition/not-in-place-modification for now auto_outputs = (ap["path"] for ap in new_or_modified(res["diff"])) outputs = run_info.get("outputs", []) outputs_dir = op.join(dset.path, run_info["pwd"]) auto_outputs = [ p for p in auto_outputs # run records outputs relative to the "pwd" field. if op.relpath(p, outputs_dir) not in outputs ] message = res["rerun_message"] or res["run_message"] for r in run_command(run_info['cmd'], dataset=dset, inputs=run_info.get("inputs", []), outputs=outputs, rerun_outputs=auto_outputs, message=message, rerun_info=run_info): yield r
def __call__(cmd=None, dataset=None, inputs=None, outputs=None, expand=None, explicit=False, message=None, sidecar=None): for r in run_command(cmd, dataset=dataset, inputs=inputs, outputs=outputs, expand=expand, explicit=explicit, message=message, sidecar=sidecar, saver=_save_outputs): yield r
def fetch(self): """Fetch results tarball and inject run record into the local dataset. """ lgr.info("Fetching results for %s", self.jobid) import tarfile tfile = "{}.tar.gz".format(self.jobid) remote_tfile = op.join(self.root_directory, "outputs", tfile) if not self.session.exists(remote_tfile): raise OrchestratorError( "Expected output file does not exist: {}".format(remote_tfile)) with head_at(self.ds, self.head) as moved: with chpwd(self.ds.path): self.session.get(remote_tfile) with tarfile.open(tfile, mode="r:gz") as tar: tar.extractall(path=".") os.unlink(tfile) # TODO: How to handle output cleanup on the remote? from datalad.interface.run import run_command lgr.info("Creating run commit in %s", self.ds.path) for res in run_command( inputs=self.job_spec.get("inputs_unexpanded"), outputs=self.job_spec.get("outputs_unexpanded"), inject=True, extra_info={"reproman_jobid": self.jobid}, message=self.job_spec.get("message"), cmd=self.job_spec["command_str_unexpanded"]): # Oh, if only I were a datalad extension. pass ref = self.job_refname if moved: lgr.info( "Results stored on %s. " "Bring them into this branch with " "'git merge %s'", ref, ref) self.ds.repo.update_ref(ref, "HEAD") self.log_failed()
def test_run_inputs_outputs(src, path): for subds in [("s0", "s1_0", "s2"), ("s0", "s1_1", "s2"), ("s0", "s1_0"), ("s0", "s1_1"), ("s0", "ss"), ("s0", )]: Dataset(op.join(*((src, ) + subds))).create(force=True) src_ds = Dataset(src).create(force=True) src_ds.add(".", recursive=True) ds = install(path, source=src, result_xfm='datasets', return_type='item-or-list') assert_false(ds.repo.file_has_content("input.dat")) assert_false(ds.repo.file_has_content("extra-input.dat")) # The specified inputs and extra inputs will be retrieved before the run. # (Use run_command() to access the extra_inputs argument.) list( run_command("cat {inputs} {inputs} >doubled.dat", dataset=ds, inputs=["input.dat"], extra_inputs=["extra-input.dat"])) ok_clean_git(ds.path) ok_(ds.repo.file_has_content("input.dat")) ok_(ds.repo.file_has_content("extra-input.dat")) ok_(ds.repo.file_has_content("doubled.dat")) with open(opj(path, "doubled.dat")) as fh: content = fh.read() assert_in("input", content) assert_not_in("extra-input", content) # Rerunning the commit will also get the input file. ds.repo.drop(["input.dat", "extra-input.dat"], options=["--force"]) assert_false(ds.repo.file_has_content("input.dat")) assert_false(ds.repo.file_has_content("extra-input.dat")) ds.rerun() ok_(ds.repo.file_has_content("input.dat")) ok_(ds.repo.file_has_content("extra-input.dat")) with swallow_logs(new_level=logging.WARN) as cml: ds.run("touch dummy", inputs=["not-there"]) assert_in("Input does not exist: ", cml.out) # Test different combinations of globs and explicit files. inputs = ["a.dat", "b.dat", "c.txt", "d.txt"] create_tree(ds.path, {i: i for i in inputs}) ds.add(".") ds.repo.copy_to(inputs, remote="origin") ds.repo.drop(inputs, options=["--force"]) test_cases = [(["*.dat"], ["a.dat", "b.dat"]), (["*.dat", "c.txt"], ["a.dat", "b.dat", "c.txt"]), (["*"], inputs)] for idx, (inputs_arg, expected_present) in enumerate(test_cases): assert_false(any(ds.repo.file_has_content(i) for i in inputs)) ds.run("touch dummy{}".format(idx), inputs=inputs_arg) ok_(all(ds.repo.file_has_content(f) for f in expected_present)) # Globs are stored unexpanded by default. assert_in(inputs_arg[0], ds.repo.format_commit("%B")) ds.repo.drop(inputs, options=["--force"]) # --input can be passed a subdirectory. create_tree(ds.path, {"subdir": {"a": "subdir a", "b": "subdir b"}}) ds.add("subdir") ds.repo.copy_to(["subdir/a", "subdir/b"], remote="origin") ds.repo.drop("subdir", options=["--force"]) ds.run("touch subdir-dummy", inputs=[opj(ds.path, "subdir")]) ok_(all(ds.repo.file_has_content(opj("subdir", f)) for f in ["a", "b"])) # Inputs are specified relative to a dataset's subdirectory. ds.repo.drop(opj("subdir", "a"), options=["--force"]) with chpwd(opj(path, "subdir")): run("touch subdir-dummy1", inputs=["a"]) ok_(ds.repo.file_has_content(opj("subdir", "a"))) # --input=. runs "datalad get ." ds.run("touch dot-dummy", inputs=["."]) eq_(ds.repo.get_annexed_files(), ds.repo.get_annexed_files(with_content_only=True)) # On rerun, we get all files, even those that weren't in the tree at the # time of the run. create_tree(ds.path, {"after-dot-run": "after-dot-run content"}) ds.add(".") ds.repo.copy_to(["after-dot-run"], remote="origin") ds.repo.drop(["after-dot-run"], options=["--force"]) ds.rerun("HEAD^") ds.repo.file_has_content("after-dot-run") # --output will unlock files that are present. ds.repo.get("a.dat") ds.run("echo ' appended' >>a.dat", outputs=["a.dat"]) with open(opj(path, "a.dat")) as fh: eq_(fh.read(), "a.dat appended\n") # --output will remove files that are not present. ds.repo.drop(["a.dat", "d.txt"], options=["--force"]) ds.run("echo ' appended' >>a.dat", outputs=["a.dat"]) with open(opj(path, "a.dat")) as fh: eq_(fh.read(), " appended\n") # --input can be combined with --output. ds.repo.repo.git.reset("--hard", "HEAD~2") ds.run("echo ' appended' >>a.dat", inputs=["a.dat"], outputs=["a.dat"]) with open(opj(path, "a.dat")) as fh: eq_(fh.read(), "a.dat appended\n") with swallow_logs(new_level=logging.DEBUG) as cml: with swallow_outputs(): ds.run("echo blah", outputs=["not-there"]) assert_in("Filtered out non-existing path: ", cml.out) ds.create('sub') ds.run("echo sub_orig >sub/subfile") ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"]) ds.drop("sub/subfile", check=False) ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"]) # --input/--output globs can be stored in expanded form. ds.run("touch expand-dummy", inputs=["a.*"], outputs=["b.*"], expand="both") assert_in("a.dat", ds.repo.format_commit("%B")) assert_in("b.dat", ds.repo.format_commit("%B")) res = ds.rerun(report=True, return_type='item-or-list') eq_(res["run_info"]['inputs'], ["a.dat"]) eq_(res["run_info"]['outputs'], ["b.dat"]) # We install subdatasets to fully resolve globs. ds.uninstall("s0") assert_false(Dataset(op.join(path, "s0")).is_installed()) ds.run("echo {inputs} >globbed-subds", inputs=["s0/s1_*/s2/*.dat"]) ok_file_has_content(op.join(ds.path, "globbed-subds"), "s0/s1_0/s2/a.dat s0/s1_1/s2/c.dat", strip=True) ds_ss = Dataset(op.join(path, "s0", "ss")) assert_false(ds_ss.is_installed()) ds.run("echo blah >{outputs}", outputs=["s0/ss/out"]) ok_(ds_ss.is_installed()) ok_file_has_content(op.join(ds.path, "s0", "ss", "out"), "blah", strip=True)
def __call__(cmd, container_name=None, dataset=None, inputs=None, outputs=None, message=None, expand=None, explicit=False, sidecar=None): from mock import patch # delayed, since takes long (~600ms for yoh) pwd, _ = get_command_pwds(dataset) ds = require_dataset(dataset, check_installed=True, purpose='run a containerized command execution') container = find_container(ds, container_name) image_path = op.relpath(container["path"], pwd) # container record would contain path to the (sub)dataset containing # it. If not - take current dataset, as it must be coming from it image_dspath = op.relpath(container.get('parentds', ds.path), pwd) # sure we could check whether the container image is present, # but it might live in a subdataset that isn't even installed yet # let's leave all this business to `get` that is called by `run` cmd = normalize_command(cmd) # expand the command with container execution if 'cmdexec' in container: callspec = container['cmdexec'] # Temporary kludge to give a more helpful message if callspec.startswith("["): import simplejson try: simplejson.loads(callspec) except simplejson.errors.JSONDecodeError: pass # Never mind, false positive. else: raise ValueError( 'cmdexe {!r} is in an old, unsupported format. ' 'Convert it to a plain string.'.format(callspec)) try: cmd_kwargs = dict( img=image_path, cmd=cmd, img_dspath=image_dspath, ) cmd = callspec.format(**cmd_kwargs) except KeyError as exc: yield get_status_dict( 'run', ds=ds, status='error', message=( 'Unrecognized cmdexec placeholder: %s. ' 'See containers-add for information on known ones: %s', exc, ", ".join(cmd_kwargs))) return else: # just prepend and pray cmd = container['path'] + ' ' + cmd with patch.dict('os.environ', {CONTAINER_NAME_ENVVAR: container['name']}): # fire! for r in run_command(cmd=cmd, dataset=dataset or (ds if ds.path == pwd else None), inputs=inputs, extra_inputs=[image_path], outputs=outputs, message=message, expand=expand, explicit=explicit, sidecar=sidecar): yield r
def __call__(revision="HEAD", since=None, dataset=None, branch=None, message=None, onto=None): ds = require_dataset(dataset, check_installed=True, purpose='rerunning a command') lgr.debug('rerunning command output underneath %s', ds) from datalad.tests.utils import ok_clean_git try: ok_clean_git(ds.path) except AssertionError: yield get_status_dict('run', ds=ds, status='impossible', message=('unsaved modifications present, ' 'cannot detect changes by command')) return err_info = get_status_dict('run', ds=ds) if not ds.repo.get_hexsha(): yield dict(err_info, status='impossible', message='cannot rerun command, nothing recorded') return if branch and branch in ds.repo.get_branches(): yield get_status_dict( "run", ds=ds, status="error", message="branch '{}' already exists".format(branch)) return if not commit_exists(ds, revision + "^"): # Only a single commit is reachable from `revision`. In # this case, --since has no effect on the range construction. revrange = revision elif since is None: revrange = "{rev}^..{rev}".format(rev=revision) elif since.strip() == "": revrange = revision else: revrange = "{}..{}".format(since, revision) if ds.repo.repo.git.rev_list("--merges", revrange, "--"): yield get_status_dict( "run", ds=ds, status="error", message="cannot rerun history with merge commits") return Revision = namedtuple("Revision", ["id", "message", "info"]) def revision_with_info(rev): msg, info = get_commit_runinfo(ds.repo, rev) return Revision(rev, msg, info) ids = ds.repo.repo.git.rev_list("--reverse", revrange, "--").split() try: revs = list(map(revision_with_info, ids)) except ValueError as exc: yield dict(err_info, status='error', message=exc_str(exc)) return if since is not None and since.strip() == "": # For --since='', drop any leading commits that don't have # a run command. revs = list(dropwhile(lambda r: r.info is None, revs)) if onto is not None and onto.strip() == "": # Special case: --onto='' is the value of --since. # Because we're currently aborting if the revision list # contains merges, we know that, regardless of if and how # --since is specified, the effective value for --since is # the parent of the first revision. onto = revs[0].id + "^" if not commit_exists(ds, onto): # This is unlikely to happen in the wild because it # means that the first commit is a datalad run commit. # Just abort rather than trying to checkout on orphan # branch or something like that. yield get_status_dict( "run", ds=ds, status="error", message="Commit for --onto does not exist.") return if branch or onto: start_point = onto or "HEAD" if branch: checkout_options = ["-b", branch] else: checkout_options = ["--detach"] ds.repo.checkout(start_point, options=checkout_options) for rev in revs: if not rev.info: pick = False try: ds.repo.repo.git.merge_base("--is-ancestor", rev.id, "HEAD") except GitCommandError: # Revision is NOT an ancestor of HEAD. pick = True shortrev = ds.repo.repo.git.rev_parse("--short", rev.id) err_msg = "no command for {} found; {}".format( shortrev, "cherry picking" if pick else "skipping") yield dict(err_info, status='ok', message=err_msg) if pick: ds.repo.repo.git.cherry_pick(rev.id) continue # Keep a "rerun" trail. if "chain" in rev.info: rev.info["chain"].append(rev.id) else: rev.info["chain"] = [rev.id] # now we have to find out what was modified during the # last run, and enable re-modification ideally, we would # bring back the entire state of the tree with #1424, but # we limit ourself to file addition/not-in-place-modification # for now for r in ds.unlock(new_or_modified(ds, rev.id), return_type='generator', result_xfm=None): yield r for r in run_command(rev.info['cmd'], ds, message or rev.message, rerun_info=rev.info): yield r
def test_run_inputs_outputs(src, path): for subds in [("s0", "s1_0", "s2"), ("s0", "s1_1", "s2"), ("s0", "s1_0"), ("s0", "s1_1"), ("s0", "ss"), ("s0",)]: Dataset(op.join(*((src,) + subds))).create(force=True) src_ds = Dataset(src).create(force=True) src_ds.add(".", recursive=True) ds = install(path, source=src, result_xfm='datasets', return_type='item-or-list') assert_false(ds.repo.file_has_content("input.dat")) assert_false(ds.repo.file_has_content("extra-input.dat")) # The specified inputs and extra inputs will be retrieved before the run. # (Use run_command() to access the extra_inputs argument.) list(run_command("cat {inputs} {inputs} >doubled.dat", dataset=ds, inputs=["input.dat"], extra_inputs=["extra-input.dat"])) ok_clean_git(ds.path) ok_(ds.repo.file_has_content("input.dat")) ok_(ds.repo.file_has_content("extra-input.dat")) ok_(ds.repo.file_has_content("doubled.dat")) with open(opj(path, "doubled.dat")) as fh: content = fh.read() assert_in("input", content) assert_not_in("extra-input", content) # Rerunning the commit will also get the input file. ds.repo.drop(["input.dat", "extra-input.dat"], options=["--force"]) assert_false(ds.repo.file_has_content("input.dat")) assert_false(ds.repo.file_has_content("extra-input.dat")) ds.rerun() ok_(ds.repo.file_has_content("input.dat")) ok_(ds.repo.file_has_content("extra-input.dat")) with swallow_logs(new_level=logging.WARN) as cml: ds.run("touch dummy", inputs=["not-there"]) assert_in("Input does not exist: ", cml.out) # Test different combinations of globs and explicit files. inputs = ["a.dat", "b.dat", "c.txt", "d.txt"] create_tree(ds.path, {i: i for i in inputs}) ds.add(".") ds.repo.copy_to(inputs, remote="origin") ds.repo.drop(inputs, options=["--force"]) test_cases = [(["*.dat"], ["a.dat", "b.dat"]), (["*.dat", "c.txt"], ["a.dat", "b.dat", "c.txt"]), (["*"], inputs)] for idx, (inputs_arg, expected_present) in enumerate(test_cases): assert_false(any(ds.repo.file_has_content(i) for i in inputs)) ds.run("touch dummy{}".format(idx), inputs=inputs_arg) ok_(all(ds.repo.file_has_content(f) for f in expected_present)) # Globs are stored unexpanded by default. assert_in(inputs_arg[0], ds.repo.format_commit("%B")) ds.repo.drop(inputs, options=["--force"]) # --input can be passed a subdirectory. create_tree(ds.path, {"subdir": {"a": "subdir a", "b": "subdir b"}}) ds.add("subdir") ds.repo.copy_to(["subdir/a", "subdir/b"], remote="origin") ds.repo.drop("subdir", options=["--force"]) ds.run("touch subdir-dummy", inputs=[opj(ds.path, "subdir")]) ok_(all(ds.repo.file_has_content(opj("subdir", f)) for f in ["a", "b"])) # Inputs are specified relative to a dataset's subdirectory. ds.repo.drop(opj("subdir", "a"), options=["--force"]) with chpwd(opj(path, "subdir")): run("touch subdir-dummy1", inputs=["a"]) ok_(ds.repo.file_has_content(opj("subdir", "a"))) # --input=. runs "datalad get ." ds.run("touch dot-dummy", inputs=["."]) eq_(ds.repo.get_annexed_files(), ds.repo.get_annexed_files(with_content_only=True)) # On rerun, we get all files, even those that weren't in the tree at the # time of the run. create_tree(ds.path, {"after-dot-run": "after-dot-run content"}) ds.add(".") ds.repo.copy_to(["after-dot-run"], remote="origin") ds.repo.drop(["after-dot-run"], options=["--force"]) ds.rerun("HEAD^") ds.repo.file_has_content("after-dot-run") # --output will unlock files that are present. ds.repo.get("a.dat") ds.run("echo ' appended' >>a.dat", outputs=["a.dat"]) with open(opj(path, "a.dat")) as fh: eq_(fh.read(), "a.dat appended\n") # --output will remove files that are not present. ds.repo.drop(["a.dat", "d.txt"], options=["--force"]) ds.run("echo ' appended' >>a.dat", outputs=["a.dat"]) with open(opj(path, "a.dat")) as fh: eq_(fh.read(), " appended\n") # --input can be combined with --output. ds.repo.repo.git.reset("--hard", "HEAD~2") ds.run("echo ' appended' >>a.dat", inputs=["a.dat"], outputs=["a.dat"]) with open(opj(path, "a.dat")) as fh: eq_(fh.read(), "a.dat appended\n") with swallow_logs(new_level=logging.DEBUG) as cml: ds.run("echo blah", outputs=["not-there"]) assert_in("Filtered out non-existing path: ", cml.out) ds.create('sub') ds.run("echo sub_orig >sub/subfile") ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"]) ds.drop("sub/subfile", check=False) ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"]) # --input/--output globs can be stored in expanded form. ds.run("touch expand-dummy", inputs=["a.*"], outputs=["b.*"], expand="both") assert_in("a.dat", ds.repo.format_commit("%B")) assert_in("b.dat", ds.repo.format_commit("%B")) res = ds.rerun(report=True, return_type='item-or-list') eq_(res["run_info"]['inputs'], ["a.dat"]) eq_(res["run_info"]['outputs'], ["b.dat"]) # We install subdatasets to fully resolve globs. ds.uninstall("s0") assert_false(Dataset(op.join(path, "s0")).is_installed()) ds.run("echo {inputs} >globbed-subds", inputs=["s0/s1_*/s2/*.dat"]) ok_file_has_content(op.join(ds.path, "globbed-subds"), "s0/s1_0/s2/a.dat s0/s1_1/s2/c.dat", strip=True) ds_ss = Dataset(op.join(path, "s0", "ss")) assert_false(ds_ss.is_installed()) ds.run("echo blah >{outputs}", outputs=["s0/ss/out"]) ok_(ds_ss.is_installed()) ok_file_has_content(op.join(ds.path, "s0", "ss", "out"), "blah", strip=True)
def fetch(self, on_remote_finish=None): """Fetch results tarball and inject run record into the local dataset. on_remote_finish : callable, optional Function to be called when work with the resource is finished. It will be passed two arguments, the resource and the failed subjobs (list of ints). """ lgr.info("Fetching results for %s", self.jobid) import tarfile tfile = "{}.tar.gz".format(self.jobid) remote_tfile = op.join(self.root_directory, "outputs", tfile) if not self.session.exists(remote_tfile): raise OrchestratorError( "Expected output file does not exist: {}".format(remote_tfile)) failed = self.get_failed_subjobs() with head_at(self.ds, self.head) as moved: with chpwd(self.ds.path): resource_name = self.resource.name lgr.info("Fetching output tarball from '%s'", resource_name) self.session.get(remote_tfile) # This log_failed() may mention files that won't be around # until the tarball extraction below, but we do call # log_failed() now because it might need the remote resource # and we want to finish up with remote operations. self.log_failed(failed) lgr.info("Finished with remote resource '%s'", resource_name) if on_remote_finish: on_remote_finish(self.resource, failed) lgr.info("Extracting output tarball into local dataset '%s'", self.ds.path) with tarfile.open(tfile, mode="r:gz") as tar: tar.extractall(path=".") os.unlink(tfile) # TODO: How to handle output cleanup on the remote? from datalad.interface.run import run_command lgr.info("Creating run commit in %s", self.ds.path) cmds = self.job_spec["_command_array"] if len(cmds) == 1: cmd = cmds[0] else: # FIXME: Can't use unexpanded command because of unknown # placeholders. cmd = self.jobid for res in run_command( # FIXME: How to represent inputs and outputs given that # they are formatted per subjob and then expanded by # glob? inputs=self.job_spec.get("inputs"), extra_inputs=self.job_spec.get("_extra_inputs"), outputs=self.job_spec.get("outputs"), inject=True, extra_info={"reproman_jobid": self.jobid}, message=self.job_spec.get("message"), cmd=cmd): # Oh, if only I were a datalad extension. if res["status"] in ["impossible", "error"]: raise OrchestratorError( "Making datalad-run commit failed: {}".format( res["message"])) ref = self.job_refname if moved: lgr.info( "Results stored on %s. " "Bring them into this branch with " "'git merge %s'", ref, ref) self.ds.repo.update_ref(ref, "HEAD")
def __call__(revision="HEAD", since=None, dataset=None, branch=None, message=None, onto=None, script=None): ds = require_dataset(dataset, check_installed=True, purpose='rerunning a command') lgr.debug('rerunning command output underneath %s', ds) if script is None and ds.repo.dirty: yield get_status_dict('run', ds=ds, status='impossible', message=('unsaved modifications present, ' 'cannot detect changes by command')) return err_info = get_status_dict('run', ds=ds) if not ds.repo.get_hexsha(): yield dict(err_info, status='impossible', message='cannot rerun command, nothing recorded') return if branch and branch in ds.repo.get_branches(): yield get_status_dict( "run", ds=ds, status="error", message="branch '{}' already exists".format(branch)) return if not commit_exists(ds, revision + "^"): # Only a single commit is reachable from `revision`. In # this case, --since has no effect on the range construction. revrange = revision elif since is None: revrange = "{rev}^..{rev}".format(rev=revision) elif since.strip() == "": revrange = revision else: revrange = "{}..{}".format(since, revision) if ds.repo.repo.git.rev_list("--merges", revrange, "--"): yield get_status_dict( "run", ds=ds, status="error", message="cannot rerun history with merge commits") return revs = [{ "hexsha": hexsha, "message": ds.repo.repo.git.show(hexsha, "--format=%B", "--no-patch") } for hexsha in ds.repo.repo.git.rev_list( "--reverse", revrange, "--").split()] for rev in revs: try: msg, info = get_run_info(rev["message"]) except ValueError as exc: yield dict(err_info, status='error', message="Error on {}'s message: {}".format( rev["hexsha"], exc_str(exc))) return if info is not None: rev["run_info"] = info rev["run_message"] = msg if since is not None and since.strip() == "": # For --since='', drop any leading commits that don't have # a run command. revs = list(dropwhile(lambda r: "run_info" not in r, revs)) if script: ofh = sys.stdout if script.strip() == "-" else open(script, "w") header = """\ #!/bin/sh # # This file was generated by running (the equivalent of) # # datalad rerun --script={script}{since} {revision} # # in {ds}{path}\n""" ofh.write( header.format( script=script, since="" if since is None else " --since=" + since, revision=ds.repo.repo.git.rev_parse(revision), ds='dataset {} at '.format(ds.id) if ds.id else '', path=ds.path)) for rev in revs: if "run_info" not in rev: continue cmd = rev["run_info"]["cmd"] msg = rev["run_message"] if msg == _format_cmd_shorty(cmd): msg = '' ofh.write("\n" + "".join("# " + ln for ln in msg.splitlines(True)) + "\n") commit_descr = ds.repo.describe(rev['hexsha']) ofh.write('# (record: {})\n'.format( commit_descr if commit_descr else rev['hexsha'])) if isinstance(cmd, list): cmd = " ".join(cmd) ofh.write(cmd + "\n") if ofh is not sys.stdout: ofh.close() else: if onto is not None and onto.strip() == "": # Special case: --onto='' is the value of --since. # Because we're currently aborting if the revision list # contains merges, we know that, regardless of if and how # --since is specified, the effective value for --since is # the parent of the first revision. onto = revs[0]["hexsha"] + "^" if not commit_exists(ds, onto): # This is unlikely to happen in the wild because it # means that the first commit is a datalad run commit. # Just abort rather than trying to checkout on orphan # branch or something like that. yield get_status_dict( "run", ds=ds, status="error", message="Commit for --onto does not exist.") return if branch or onto: start_point = onto or "HEAD" if branch: checkout_options = ["-b", branch] else: checkout_options = ["--detach"] ds.repo.checkout(start_point, options=checkout_options) for rev in revs: hexsha = rev["hexsha"] if "run_info" not in rev: pick = False try: ds.repo.repo.git.merge_base("--is-ancestor", hexsha, "HEAD") except GitCommandError: # Revision is NOT an ancestor of HEAD. pick = True shortrev = ds.repo.repo.git.rev_parse("--short", hexsha) err_msg = "no command for {} found; {}".format( shortrev, "cherry picking" if pick else "skipping") yield dict(err_info, status='ok', message=err_msg) if pick: ds.repo._git_custom_command( None, ["git", "cherry-pick", hexsha], check_fake_dates=True) continue run_info = rev["run_info"] # Keep a "rerun" trail. if "chain" in run_info: run_info["chain"].append(hexsha) else: run_info["chain"] = [hexsha] # now we have to find out what was modified during the # last run, and enable re-modification ideally, we would # bring back the entire state of the tree with #1424, but # we limit ourself to file addition/not-in-place-modification # for now for r in ds.unlock(new_or_modified(ds, hexsha), return_type='generator', result_xfm=None): yield r for r in run_command(run_info['cmd'], ds, message or rev["run_message"], rerun_info=run_info): yield r
def _apply_output(ds, jdir, sdir): common = dict( action='htc_result_merge', refds=text_type(ds.pathobj), path=text_type(jdir), logger=lgr, ) args_path = sdir / 'runargs.json' try: # anything below PY3.6 needs stringification runargs = json_py.load(str(args_path)) except Exception as e: yield dict( common, status='error', message=("could not load submission arguments from '%s': %s", args_path, exc_str(e))) return # TODO check recursive status to have dataset clean # TODO have query limited to outputs if exlicit was given # prep outputs (unlock or remove) # COPY: this is a copy of the code from run_command outputs = GlobbedPaths(runargs['outputs'], pwd=runargs['pwd'], expand=runargs['expand'] in ["outputs", "both"]) if outputs: for res in _install_and_reglob(ds, outputs): yield res for res in _unlock_or_remove(ds, outputs.expand(full=True)): yield res # END COPY # TODO need to immitate PWD change, if needed # -> extract tarball try: stdout, stderr = Runner().run( ['tar', '-xf', '{}'.format(jdir / 'output')], cwd=ds.path) except CommandError as e: yield dict( common, status='error', message=("could not un-tar job results from '%s' at '%s': %s", str(jdir / 'output'), ds.path, exc_str(e))) return # fake a run record, as if we would have executed locally for res in run_command( runargs['cmd'], dataset=ds, inputs=runargs['inputs'], outputs=runargs['outputs'], expand=runargs['expand'], explicit=runargs['explicit'], message=runargs['message'], sidecar=runargs['sidecar'], # TODO pwd, exit code extra_info=None, inject=True): yield res res = list(_remove_dir(ds, jdir))[0] res['action'] = 'htc_results_merge' res['status'] = 'ok' res.pop('message', None) # not removing the submission files (for now), even if the last job output # might be removed now. Those submissions are tiny and could be resubmitted yield res