Exemplo n.º 1
0
def _datalad_format_command(ds, spec):
    """Adjust `spec` to use `datalad run`-style formatting.

    The "inputs", "outputs", and "command_str" keys in `spec` are replaced and
    the original are moved under the `*_unexpanded` key.
    """
    from datalad.interface.run import format_command
    from datalad.interface.run import GlobbedPaths

    fmt_kwds = {}
    for key in ["inputs", "outputs"]:
        if key in spec:
            spec["{}_unexpanded".format(key)] = spec[key]
            gp = GlobbedPaths(spec[key])
            spec[key] = gp.expand(dot=False)
            fmt_kwds[key] = gp

    cmd_expanded = format_command(ds, spec["command_str"], **fmt_kwds)
    spec["command_str_unexpanded"] = spec["command_str"]
    spec["command_str"] = cmd_expanded
Exemplo n.º 2
0
def test_globbedpaths_get_sub_patterns():
    gp = GlobbedPaths([], "doesn't matter")
    for pat, expected in [
            # If there are no patterns in the directory component, we get no
            # sub-patterns.
        ("", []),
        ("nodir", []),
        (op.join("nomagic", "path"), []),
        (op.join("nomagic", "path*"), []),
            # Create sub-patterns from leading path, successively dropping the
            # right-most component.
        (op.join("s*", "path"), ["s*" + op.sep]),
        (op.join("s", "ss*", "path"), [op.join("s", "ss*") + op.sep]),
        (op.join("s", "ss*", "path*"), [op.join("s", "ss*") + op.sep]),
        (op.join("s", "ss*" + op.sep), []),
        (op.join("s*", "ss",
                 "path*"), [op.join("s*", "ss") + op.sep, "s*" + op.sep]),
        (op.join("s?", "ss", "sss*", "path*"), [
            op.join("s?", "ss", "sss*") + op.sep,
            op.join("s?", "ss") + op.sep, "s?" + op.sep
        ])
    ]:
        eq_(gp._get_sub_patterns(pat), expected)
Exemplo n.º 3
0
def test_globbedpaths(path):
    for patterns, expected in [(["1.txt", "2.dat"], {"1.txt", "2.dat"}),
                               (["*.txt",
                                 "*.dat"], {"1.txt", "2.dat", "3.txt"}),
                               (["*.txt"], {"1.txt", "3.txt"})]:
        gp = GlobbedPaths(patterns, pwd=path)
        eq_(set(gp.expand()), expected)
        eq_(set(gp.expand(full=True)), {opj(path, p) for p in expected})

    # Full patterns still get returned as relative to pwd.
    gp = GlobbedPaths([opj(path, "*.dat")], pwd=path)
    eq_(gp.expand(), ["2.dat"])

    # "." gets special treatment.
    gp = GlobbedPaths([".", "*.dat"], pwd=path)
    eq_(set(gp.expand()), {"2.dat", "."})
    eq_(gp.expand(dot=False), ["2.dat"])
    gp = GlobbedPaths(["."], pwd=path, expand=False)
    eq_(gp.expand(), ["."])
    eq_(gp.paths, ["."])

    # We can the glob outputs.
    glob_results = {"z": "z", "a": ["x", "d", "b"]}
    with patch('datalad.interface.run.glob', glob_results.get):
        gp = GlobbedPaths(["z", "a"])
        eq_(gp.expand(), ["z", "b", "d", "x"])

    # glob expansion for paths property is determined by expand argument.
    for expand, expected in [(True, ["2.dat"]), (False, ["*.dat"])]:
        gp = GlobbedPaths(["*.dat"], pwd=path, expand=expand)
        eq_(gp.paths, expected)

    with swallow_logs(new_level=logging.WARN) as cml:
        GlobbedPaths(["not here"], pwd=path).expand()
        assert_in("No matching files found for 'not here'", cml.out)
        GlobbedPaths(["also not"], pwd=path, warn=False).expand()
        assert_not_in("No matching files found for 'also not'", cml.out)
Exemplo n.º 4
0
    def __call__(cmd=None,
                 dataset=None,
                 inputs=None,
                 outputs=None,
                 expand=None,
                 explicit=False,
                 message=None,
                 sidecar=None,
                 jobcfg='default',
                 submit=False):

        # TODO makes sure a different rel_pwd is handled properly on the remote end
        pwd, rel_pwd = get_command_pwds(dataset)

        ds = require_dataset(dataset,
                             check_installed=True,
                             purpose='preparing a remote command execution')

        try:
            cmd_expanded = format_command(ds,
                                          cmd,
                                          pwd=pwd,
                                          dspath=ds.path,
                                          inputs=inputs,
                                          outputs=outputs)
        except KeyError as exc:
            yield get_status_dict(
                'htcprepare',
                ds=ds,
                status='impossible',
                message=('command has an unrecognized placeholder: %s', exc))
            return

        transfer_files_list = ['pre.sh', 'post.sh']

        # where all the submission packs live
        subroot_dir = get_submissions_dir(ds)
        subroot_dir.mkdir(parents=True, exist_ok=True)

        # location of to-be-created submission
        submission_dir = ut.Path(
            tempfile.mkdtemp(prefix='submit_', dir=text_type(subroot_dir)))
        submission = submission_dir.name[7:]

        split_cmd = shlex.split(cmd_expanded)
        # is this a singularity job?
        singularity_job = get_singularity_jobspec(split_cmd)
        if not singularity_job:
            with (submission_dir / 'runner.sh').open('wb') as f:
                f.write(
                    resource_string('datalad_htcondor',
                                    'resources/scripts/runner_direct.sh'))
            job_args = split_cmd
        else:
            # link the container into the submission dir
            (submission_dir / 'singularity.simg').symlink_to(
                ut.Path(singularity_job[0]).resolve())
            transfer_files_list.append('singularity.simg')
            # arguments of the job
            job_args = singularity_job[1]
            job_args.insert(0, 'singularity.simg')

            # TODO conditional on run_as_user=false
            with (submission_dir / 'runner.sh').open('wb') as f:
                f.write(
                    resource_string(
                        'datalad_htcondor',
                        'resources/scripts/runner_singularity_anon.sh'))
        make_executable(submission_dir / 'runner.sh')

        # htcondor wants the log dir to exist at submit time
        # TODO ATM we only support a single job per cluster submission
        (submission_dir / 'job_0' / 'logs').mkdir(parents=True)

        # TODO make job pre/post script selection configurable
        with (submission_dir / 'pre.sh').open('wb') as f:
            f.write(
                resource_string('datalad_htcondor',
                                'resources/scripts/pre_posix_chirp.sh'))
        make_executable(submission_dir / 'pre.sh')

        with (submission_dir / 'post.sh').open('wb') as f:
            f.write(
                resource_string('datalad_htcondor',
                                'resources/scripts/post_posix.sh'))
        make_executable(submission_dir / 'post.sh')

        # API support selection (bound dataset methods and such)
        # internal import to avoid circularities
        from datalad.api import (
            rev_status as status, )

        inputs = GlobbedPaths(inputs, pwd=pwd)
        prepare_inputs(ds, inputs)

        # it could be that an input expression does not expand,
        # because it doesn't match anything. In such a case
        # we need to filter out such globs to not confuse
        # the status() call below that only takes real paths
        inputs = [p for p in inputs.expand(full=True) if op.lexists(p)]
        # now figure out what matches the remaining paths in the
        # entire repo and dump a list of files to transfer
        if inputs:
            with (submission_dir / 'input_files').open('w') as f:
                # TODO disable output renderer
                for p in ds.rev_status(
                        path=inputs,
                        # TODO do we really want that True? I doubt it
                        # this might pull in the world
                        recursive=False,
                        # we would have otherwise no idea
                        untracked='no',
                        result_renderer=None):
                    f.write(text_type(p['path']))
                    f.write(u'\0')
                transfer_files_list.append('input_files')

        if outputs:
            # write the output globs to a file for eval on the execute
            # side
            # XXX we may not want to eval them on the remote side
            # at all, however. This would make things different
            # than with local execute, where we also just write to
            # a dataset and do not have an additional filter
            (submission_dir / 'output_globs').write_text(
                # we need a final trailing delimiter as a terminator
                u'\0'.join(outputs) + u'\0')
            transfer_files_list.append('output_globs')

        (submission_dir /
         'source_dataset_location').write_text(text_type(ds.pathobj) + op.sep)
        transfer_files_list.append('source_dataset_location')

        with (submission_dir / 'cluster.submit').open('w') as f:
            f.write(
                submission_template.format(
                    executable='runner.sh',
                    # TODO if singularity_job else 'job.sh',
                    transfer_files_list=','.join(
                        op.join(op.pardir, f) for f in transfer_files_list),
                    **submission_defaults))

            f.write(u'\narguments = "{}"\nqueue\n'.format(
                # TODO deal with single quotes in the args
                ' '.join("'{}'".format(a) for a in job_args)))

        # dump the run command args into a file for re-use
        # when the result is merged
        # include even args that are already evaluated and
        # acted upon, to be able to convince `run` to create
        # a full run record that maybe could be re-run
        # locally
        json_py.dump(
            dict(
                cmd=cmd,
                inputs=inputs,
                outputs=outputs,
                expand=expand,
                explicit=explicit,
                message=message,
                sidecar=sidecar,
                # report the PWD to, to given `run` a chance
                # to be correct after the fact
                pwd=pwd,
            ),
            text_type(submission_dir / 'runargs.json'))

        # we use this file to inspect what state this submission is in
        (submission_dir / 'status').write_text(u'prepared')

        yield get_status_dict(action='htc_prepare',
                              status='ok',
                              refds=text_type(ds.pathobj),
                              submission=submission,
                              path=text_type(submission_dir),
                              logger=lgr)

        if submit:
            try:
                Runner(cwd=text_type(submission_dir)).run(
                    ['condor_submit', 'cluster.submit'],
                    log_stdout=False,
                    log_stderr=False,
                    expect_stderr=True,
                    expect_fail=True,
                )
                (submission_dir / 'status').write_text(u'submitted')
                yield get_status_dict(action='htc_submit',
                                      status='ok',
                                      submission=submission,
                                      refds=text_type(ds.pathobj),
                                      path=text_type(submission_dir),
                                      logger=lgr)
            except CommandError as e:
                yield get_status_dict(action='htc_submit',
                                      status='error',
                                      submission=submission,
                                      message=('condor_submit failed: %s',
                                               exc_str(e)),
                                      refds=text_type(ds.pathobj),
                                      path=text_type(submission_dir),
                                      logger=lgr)
Exemplo n.º 5
0
def _apply_output(ds, jdir, sdir):
    common = dict(
        action='htc_result_merge',
        refds=text_type(ds.pathobj),
        path=text_type(jdir),
        logger=lgr,
    )
    args_path = sdir / 'runargs.json'
    try:
        # anything below PY3.6 needs stringification
        runargs = json_py.load(str(args_path))
    except Exception as e:
        yield dict(
            common,
            status='error',
            message=("could not load submission arguments from '%s': %s",
                     args_path, exc_str(e)))
        return
    # TODO check recursive status to have dataset clean
    # TODO have query limited to outputs if exlicit was given
    # prep outputs (unlock or remove)
    # COPY: this is a copy of the code from run_command
    outputs = GlobbedPaths(runargs['outputs'],
                           pwd=runargs['pwd'],
                           expand=runargs['expand'] in ["outputs", "both"])
    if outputs:
        for res in _install_and_reglob(ds, outputs):
            yield res
        for res in _unlock_or_remove(ds, outputs.expand(full=True)):
            yield res
    # END COPY

    # TODO need to immitate PWD change, if needed
    # -> extract tarball
    try:
        stdout, stderr = Runner().run(
            ['tar', '-xf', '{}'.format(jdir / 'output')], cwd=ds.path)
    except CommandError as e:
        yield dict(
            common,
            status='error',
            message=("could not un-tar job results from '%s' at '%s': %s",
                     str(jdir / 'output'), ds.path, exc_str(e)))
        return

    # fake a run record, as if we would have executed locally
    for res in run_command(
            runargs['cmd'],
            dataset=ds,
            inputs=runargs['inputs'],
            outputs=runargs['outputs'],
            expand=runargs['expand'],
            explicit=runargs['explicit'],
            message=runargs['message'],
            sidecar=runargs['sidecar'],
            # TODO pwd, exit code
            extra_info=None,
            inject=True):
        yield res

    res = list(_remove_dir(ds, jdir))[0]
    res['action'] = 'htc_results_merge'
    res['status'] = 'ok'
    res.pop('message', None)
    # not removing the submission files (for now), even if the last job output
    # might be removed now. Those submissions are tiny and could be resubmitted
    yield res