def test_format_command_strip_leading_dashes(path): ds = Dataset(path).create() eq_(format_command(ds, ["--", "cmd", "--opt"]), "cmd --opt") eq_(format_command(ds, ["--"]), "") # Can repeat to escape. eq_(format_command(ds, ["--", "--", "ok"]), "-- ok") # String stays as is. eq_(format_command(ds, "--"), "--")
def _datalad_format_command(ds, spec): """Adjust `spec` to use `datalad run`-style formatting. Create "*_array" keys and format commands with DataLad's `format_command`. """ from datalad.interface.run import format_command # DataLad's GlobbedPaths _should_ be the same as ours, but let's use # DataLad's to avoid potential discrepancies with datalad-run's behavior. from datalad.interface.run import GlobbedPaths batch_parameters = spec.get("_resolved_batch_parameters") or [{}] spec["_command_array"] = [] spec["_inputs_array"] = [] spec["_outputs_array"] = [] for cp in batch_parameters: fmt_kwds = {} for key in ["inputs", "outputs"]: if key in spec: parametrized = [io.format(p=cp) for io in spec[key]] gp = GlobbedPaths(parametrized) spec["_{}_array".format(key)].append(gp.expand(dot=False)) fmt_kwds[key] = gp fmt_kwds["p"] = cp cmd_str = spec.get("_container_command_str", spec["_resolved_command_str"]) spec["_command_array"].append(format_command(ds, cmd_str, **fmt_kwds)) exinputs = spec.get("_extra_inputs", []) spec["_extra_inputs_array"] = [exinputs] * len(batch_parameters)
def fn(dset, results): header = """\ #!/bin/sh # # This file was generated by running (the equivalent of) # # datalad rerun --script={script}{since} {revision} # # in {ds}{path}\n""" ofh.write( header.format( script=script, since="" if since is None else " --since=" + since, revision=dset.repo.get_hexsha(revision), ds='dataset {} at '.format(dset.id) if dset.id else '', path=dset.path)) for res in results: if res["status"] != "ok": yield res return if "run_info" not in res: continue run_info = res["run_info"] cmd = run_info["cmd"] expanded_cmd = format_command( dset, cmd, **dict(run_info, dspath=dset.path, pwd=op.join(dset.path, run_info["pwd"]))) msg = res["run_message"] if msg == _format_cmd_shorty(expanded_cmd): msg = '' ofh.write("\n" + "".join("# " + ln for ln in msg.splitlines(True)) + "\n") commit_descr = dset.repo.describe(res["commit"]) ofh.write('# (record: {})\n'.format( commit_descr if commit_descr else res["commit"])) ofh.write(expanded_cmd + "\n") if ofh is not sys.stdout: ofh.close() if ofh is sys.stdout: yield None else: yield get_status_dict("run", ds=dset, status="ok", path=script, message=("Script written to %s", script))
def _datalad_format_command(ds, spec): """Adjust `spec` to use `datalad run`-style formatting. The "inputs", "outputs", and "command_str" keys in `spec` are replaced and the original are moved under the `*_unexpanded` key. """ from datalad.interface.run import format_command from datalad.interface.run import GlobbedPaths fmt_kwds = {} for key in ["inputs", "outputs"]: if key in spec: spec["{}_unexpanded".format(key)] = spec[key] gp = GlobbedPaths(spec[key]) spec[key] = gp.expand(dot=False) fmt_kwds[key] = gp cmd_expanded = format_command(ds, spec["command_str"], **fmt_kwds) spec["command_str_unexpanded"] = spec["command_str"] spec["command_str"] = cmd_expanded
def __call__(cmd=None, dataset=None, inputs=None, outputs=None, expand=None, explicit=False, message=None, sidecar=None, jobcfg='default', submit=False): # TODO makes sure a different rel_pwd is handled properly on the remote end pwd, rel_pwd = get_command_pwds(dataset) ds = require_dataset(dataset, check_installed=True, purpose='preparing a remote command execution') try: cmd_expanded = format_command(ds, cmd, pwd=pwd, dspath=ds.path, inputs=inputs, outputs=outputs) except KeyError as exc: yield get_status_dict( 'htcprepare', ds=ds, status='impossible', message=('command has an unrecognized placeholder: %s', exc)) return transfer_files_list = ['pre.sh', 'post.sh'] # where all the submission packs live subroot_dir = get_submissions_dir(ds) subroot_dir.mkdir(parents=True, exist_ok=True) # location of to-be-created submission submission_dir = ut.Path( tempfile.mkdtemp(prefix='submit_', dir=text_type(subroot_dir))) submission = submission_dir.name[7:] split_cmd = shlex.split(cmd_expanded) # is this a singularity job? singularity_job = get_singularity_jobspec(split_cmd) if not singularity_job: with (submission_dir / 'runner.sh').open('wb') as f: f.write( resource_string('datalad_htcondor', 'resources/scripts/runner_direct.sh')) job_args = split_cmd else: # link the container into the submission dir (submission_dir / 'singularity.simg').symlink_to( ut.Path(singularity_job[0]).resolve()) transfer_files_list.append('singularity.simg') # arguments of the job job_args = singularity_job[1] job_args.insert(0, 'singularity.simg') # TODO conditional on run_as_user=false with (submission_dir / 'runner.sh').open('wb') as f: f.write( resource_string( 'datalad_htcondor', 'resources/scripts/runner_singularity_anon.sh')) make_executable(submission_dir / 'runner.sh') # htcondor wants the log dir to exist at submit time # TODO ATM we only support a single job per cluster submission (submission_dir / 'job_0' / 'logs').mkdir(parents=True) # TODO make job pre/post script selection configurable with (submission_dir / 'pre.sh').open('wb') as f: f.write( resource_string('datalad_htcondor', 'resources/scripts/pre_posix_chirp.sh')) make_executable(submission_dir / 'pre.sh') with (submission_dir / 'post.sh').open('wb') as f: f.write( resource_string('datalad_htcondor', 'resources/scripts/post_posix.sh')) make_executable(submission_dir / 'post.sh') # API support selection (bound dataset methods and such) # internal import to avoid circularities from datalad.api import ( rev_status as status, ) inputs = GlobbedPaths(inputs, pwd=pwd) prepare_inputs(ds, inputs) # it could be that an input expression does not expand, # because it doesn't match anything. In such a case # we need to filter out such globs to not confuse # the status() call below that only takes real paths inputs = [p for p in inputs.expand(full=True) if op.lexists(p)] # now figure out what matches the remaining paths in the # entire repo and dump a list of files to transfer if inputs: with (submission_dir / 'input_files').open('w') as f: # TODO disable output renderer for p in ds.rev_status( path=inputs, # TODO do we really want that True? I doubt it # this might pull in the world recursive=False, # we would have otherwise no idea untracked='no', result_renderer=None): f.write(text_type(p['path'])) f.write(u'\0') transfer_files_list.append('input_files') if outputs: # write the output globs to a file for eval on the execute # side # XXX we may not want to eval them on the remote side # at all, however. This would make things different # than with local execute, where we also just write to # a dataset and do not have an additional filter (submission_dir / 'output_globs').write_text( # we need a final trailing delimiter as a terminator u'\0'.join(outputs) + u'\0') transfer_files_list.append('output_globs') (submission_dir / 'source_dataset_location').write_text(text_type(ds.pathobj) + op.sep) transfer_files_list.append('source_dataset_location') with (submission_dir / 'cluster.submit').open('w') as f: f.write( submission_template.format( executable='runner.sh', # TODO if singularity_job else 'job.sh', transfer_files_list=','.join( op.join(op.pardir, f) for f in transfer_files_list), **submission_defaults)) f.write(u'\narguments = "{}"\nqueue\n'.format( # TODO deal with single quotes in the args ' '.join("'{}'".format(a) for a in job_args))) # dump the run command args into a file for re-use # when the result is merged # include even args that are already evaluated and # acted upon, to be able to convince `run` to create # a full run record that maybe could be re-run # locally json_py.dump( dict( cmd=cmd, inputs=inputs, outputs=outputs, expand=expand, explicit=explicit, message=message, sidecar=sidecar, # report the PWD to, to given `run` a chance # to be correct after the fact pwd=pwd, ), text_type(submission_dir / 'runargs.json')) # we use this file to inspect what state this submission is in (submission_dir / 'status').write_text(u'prepared') yield get_status_dict(action='htc_prepare', status='ok', refds=text_type(ds.pathobj), submission=submission, path=text_type(submission_dir), logger=lgr) if submit: try: Runner(cwd=text_type(submission_dir)).run( ['condor_submit', 'cluster.submit'], log_stdout=False, log_stderr=False, expect_stderr=True, expect_fail=True, ) (submission_dir / 'status').write_text(u'submitted') yield get_status_dict(action='htc_submit', status='ok', submission=submission, refds=text_type(ds.pathobj), path=text_type(submission_dir), logger=lgr) except CommandError as e: yield get_status_dict(action='htc_submit', status='error', submission=submission, message=('condor_submit failed: %s', exc_str(e)), refds=text_type(ds.pathobj), path=text_type(submission_dir), logger=lgr)
# checks on unrelated subtrees. subject_dir = op.join(dataset.path, "sub-{}".format(subject)) participants = op.join(dataset.path, "participants.tsv") from datalad.interface.run import format_command # TODO: This pattern is likely incomplete. Also: run prob. needs to break # down format_command into smaller pieces (needs mere substitutions) # TODO: Post run issue. Globs in outputs need to be evaluted AFTER execution # (again). May not yet exist. outputs = [subject_dir, participants] task = dataset.config.get("datalad.run.substitutions.bids-task") if task and task != "None": outputs.append( op.join( dataset.path, format_command(dataset, "task-{bids-task}_{bids-modality}.json"))) # we expect location to be a directory (with DICOMS somewhere beneath) if not op.isdir(location): raise ValueError("%s is not a directory" % location) from datalad.utils import with_pathsep # append location with /* to specify inputs for containers-run # we need to get those files, but nothing from within a possible .datalad # for example inputs = [with_pathsep(location) + "*", rel_spec_path] run_results = list() with patch.dict('os.environ', { 'HIRNI_STUDY_SPEC': rel_spec_path, 'HIRNI_SPEC2BIDS_SUBJECT': subject }):