示例#1
0
文件: test_cmd.py 项目: jgors/datalad
def test_runner_log_stdout():
    # TODO: no idea of how to check correct logging via any kind of assertion yet.

    runner = Runner(dry=False)
    cmd_ = ['echo', 'stdout-Message should be logged']
    for cmd in [cmd_, ' '.join(cmd_)]:
        # should be identical runs, either as a string or as a list
        kw = {}
        # on Windows it can't find echo if ran outside the shell
        if on_windows and isinstance(cmd, list):
            kw['shell'] = True
        with swallow_logs(logging.DEBUG) as cm:
            ret = runner.run(cmd, log_stdout=True, **kw)
            eq_(cm.lines[0], "Running: %s" % cmd)
            if not on_windows:
                # we can just count on sanity
                eq_(cm.lines[1], "stdout| stdout-Message should be logged")
            else:
                # echo outputs quoted lines for some reason, so relax check
                ok_("stdout-Message should be logged" in cm.lines[1])
        assert_equal(runner.commands, [], "Run of: %s resulted in non-empty buffer: %s" % (cmd, runner.commands.__str__()))

    cmd = 'echo stdout-Message should not be logged'
    with swallow_outputs() as cmo:
        with swallow_logs(new_level=logging.INFO) as cml:
            ret = runner.run(cmd, log_stdout=False)
            eq_(cmo.out, "stdout-Message should not be logged\n")
            eq_(cml.out, "")
    assert_equal(runner.commands, [], "Run of: %s resulted in non-empty buffer: %s" % (cmd, runner.commands.__str__()))
示例#2
0
def check_runner_heavy_output(log_online):
    # TODO: again, no automatic detection of this resulting in being stucked yet.

    runner = Runner()
    cmd = '%s -c "import datalad.tests.heavyoutput;"' % sys.executable
    with swallow_outputs() as cm:
        ret = runner.run(cmd,
                         log_stderr=False,
                         log_stdout=False,
                         expect_stderr=True)
        eq_(cm.err, cm.out)  # they are identical in that script
        eq_(cm.out[:10], "[0, 1, 2, ")
        eq_(cm.out[-15:], "997, 998, 999]\n")

    #do it again with capturing:
    ret = runner.run(cmd, log_stderr=True, log_stdout=True, expect_stderr=True)

    # and now original problematic command with a massive single line
    if not log_online:
        # We know it would get stuck in online mode
        cmd = '%s -c "import sys; x=str(list(range(1000))); [(sys.stdout.write(x), sys.stderr.write(x)) for i in xrange(100)];"' % sys.executable
        ret = runner.run(cmd,
                         log_stderr=True,
                         log_stdout=True,
                         expect_stderr=True)
示例#3
0
def compress_files(files, archive, path=None, overwrite=True):
    """Compress `files` into an `archive` file

    Parameters
    ----------
    files : list of str
    archive : str
    path : str
      Alternative directory under which compressor will be invoked, to e.g.
      take into account relative paths of files and/or archive
    overwrite : bool
      Whether to allow overwriting the target archive file if one already exists
    """
    runner = Runner(cwd=path)
    apath = Path(archive)
    if apath.exists():
        if overwrite:
            apath.unlink()
        else:
            raise ValueError(
                'Target archive {} already exists and overwrite is forbidden'.
                format(apath))
    if len(apath.suffixes) > 1 and apath.suffixes[-2] == '.tar':
        cmd = '7z u .tar -so -- {} | 7z u -si -- {}'.format(
            ' '.join(quote_cmdlinearg(f) for f in files),
            quote_cmdlinearg(str(apath)),
        )
    else:
        cmd = ['7z', 'u', str(apath), '--'] + files
    runner.run(cmd)
示例#4
0
def test_runner_log_stdout():
    # TODO: no idea of how to check correct logging via any kind of assertion yet.

    runner = Runner(dry=False)
    cmd_ = ['echo', 'stdout-Message should be logged']
    for cmd in [cmd_, ' '.join(cmd_)]:
        # should be identical runs, either as a string or as a list
        kw = {}
        # on Windows it can't find echo if ran outside the shell
        if on_windows and isinstance(cmd, list):
            kw['shell'] = True
        with swallow_logs(logging.DEBUG) as cm:
            ret = runner.run(cmd, log_stdout=True, **kw)
            eq_(cm.lines[0], "Running: %s" % cmd)
            if not on_windows:
                # we can just count on sanity
                eq_(cm.lines[1], "stdout| stdout-Message should be logged")
            else:
                # echo outputs quoted lines for some reason, so relax check
                ok_("stdout-Message should be logged" in cm.lines[1])
        assert_equal(
            runner.commands, [],
            "Run of: %s resulted in non-empty buffer: %s" %
            (cmd, runner.commands.__str__()))

    cmd = 'echo stdout-Message should not be logged'
    with swallow_outputs() as cmo:
        with swallow_logs(new_level=logging.INFO) as cml:
            ret = runner.run(cmd, log_stdout=False)
            eq_(cmo.out, "stdout-Message should not be logged\n")
            eq_(cml.out, "")
    assert_equal(
        runner.commands, [], "Run of: %s resulted in non-empty buffer: %s" %
        (cmd, runner.commands.__str__()))
示例#5
0
def get_singularity_jobspec(cmd):
    """Extract the runscript of a singularity container used as an executable

    Parameters
    ----------
    cmd : list
      A command as an argument list.

    Returns
    -------
    None or str, None or list
      If no singularity is available, or the executable in the command is not
      a singularity image given by its path, None is return. Otherwise the
      runscript of the container is returned a string. The second value is
      None if the first is None, or a list of arguments to the runscript.
    """
    # get the path to the command's executable
    exec_path = cmd[0]

    runner = Runner()
    if not op.exists(exec_path):
        # probably a command from PATH
        return

    # this is a real file, not just a command on the path
    try:
        stdout, stderr = runner.run(
            ['singularity', '--version'],
            log_stdout=True,
            log_stderr=True,
            expect_stderr=True,
            expect_fail=True,
        )
        # TODO could be used to tailor handling to particular versions
    except CommandError as e:  # pragma: no cover
        # we do not have a singularity installation that we can handle
        # log debug, because there is no guarantee that the executable
        # actually was a singularity container
        lgr.debug('No suitable singularity version installed: %s', exc_str(e))
        return
    # we have singularity
    try:
        stdout, stderr = runner.run(
            # stringification only needed for pythons older than 3.6
            ['singularity', 'exec', exec_path, 'cat', '/singularity'],
            log_stdout=True,
            log_stderr=True,
            expect_stderr=True,
            expect_fail=True,
        )
        # TODO could be used to tailor handling to particular versions
    except CommandError as e:
        # we do not have a singularity installation that we can handle
        # log debug, because there is no guarantee that the executable
        # actually was a singularity container
        lgr.debug('%s is not a singularity image: %s', exec_path, exc_str(e))
        return
    # all but the container itself are the arguments
    return exec_path, cmd[1:]
示例#6
0
 def create_info_file(self):
     runner = Runner()
     annex_version = runner.run("git annex version")[0].split()[2]
     git_version = runner.run("git --version")[0].split()[2]
     self.create_file('INFO.txt',
                      "git: %s\n"
                      "annex: %s\n"
                      "datalad: %s\n"
                      % (git_version, annex_version, __version__),
                      annex=False)
示例#7
0
文件: test_cmd.py 项目: jgors/datalad
def test_runner_failure(dir):

    runner = Runner()
    failing_cmd = ['git', 'annex', 'add', 'notexistent.dat']
    assert_raises(CommandError, runner.run, failing_cmd, cwd=dir)

    try:
        runner.run(failing_cmd, cwd=dir)
    except CommandError, e:
        assert_equal(1, e.code)
        assert_in('notexistent.dat not found', e.stderr)
示例#8
0
def test_runner_failure(dir):

    runner = Runner()
    failing_cmd = ['git', 'annex', 'add', 'notexistent.dat']
    assert_raises(CommandError, runner.run, failing_cmd, cwd=dir)

    try:
        runner.run(failing_cmd, cwd=dir)
    except CommandError, e:
        assert_equal(1, e.code)
        assert_in('notexistent.dat not found', e.stderr)
示例#9
0
文件: test_cmd.py 项目: jgors/datalad
def test_runner_log_stderr():
    # TODO: no idea of how to check correct logging via any kind of assertion yet.

    runner = Runner(dry=False)
    cmd = 'echo stderr-Message should be logged >&2'
    ret = runner.run(cmd, log_stderr=True, expect_stderr=True)
    assert_equal(runner.commands, [], "Run of: %s resulted in non-empty buffer: %s" % (cmd, runner.commands.__str__()))

    cmd = 'echo stderr-Message should not be logged >&2'
    with swallow_outputs() as cmo:
        with swallow_logs(new_level=logging.INFO) as cml:
            ret = runner.run(cmd, log_stderr=False)
            eq_(cmo.err.rstrip(), "stderr-Message should not be logged")
            eq_(cml.out, "")
    assert_equal(runner.commands, [], "Run of: %s resulted in non-empty buffer: %s" % (cmd, runner.commands.__str__()))
示例#10
0
def check_run_and_get_output(cmd):
    runner = Runner()
    try:
        output = runner.run(["datalad", "--help"])
    except CommandError, e:
        raise AssertionError("'datalad --help' failed to start normally. "
                             "Exited with %d and output %s" % (e.code, (e.stdout, e.stderr)))
示例#11
0
def check_run_and_get_output(cmd):
    runner = Runner()
    try:
        output = runner.run(["datalad", "--help"])
    except CommandError, e:
        raise AssertionError("'datalad --help' failed to start normally. "
                             "Exited with %d and output %s" %
                             (e.code, (e.stdout, e.stderr)))
示例#12
0
class runner(SuprocBenchmarks):
    """Some rudimentary tests to see if there is no major slowdowns from Runner
    """
    def setup(self):
        from datalad.cmd import Runner
        self.runner = Runner()
        # older versions might not have it
        try:
            from datalad.cmd import GitRunner
            self.git_runner = GitRunner()
        except ImportError:
            pass

    def time_echo(self):
        self.runner.run("echo")

    def time_echo_gitrunner(self):
        self.git_runner.run("echo")
示例#13
0
def test_cfg_passthrough(path):
    runner = Runner()
    _ = runner.run([
        'datalad', '-c', 'annex.tune.objecthash1=true', '-c',
        'annex.tune.objecthashlower=true', 'create', path
    ])
    ds = Dataset(path)
    eq_(ds.config.get('annex.tune.objecthash1', None), 'true')
    eq_(ds.config.get('annex.tune.objecthashlower', None), 'true')
示例#14
0
def test_quoting(path):
    ds = Dataset(op.join(path, OBSCURE_FILENAME)).create(force=True)
    # Our custom procedure fails if it receives anything other than two
    # procedure arguments (so the script itself receives 3). Check a few cases
    # from the Python API and CLI.
    ds.config.add("datalad.locations.dataset-procedures",
                  "code",
                  where="dataset")
    with swallow_outputs():
        ds.run_procedure(spec=["just2args", "with ' sing", 'with " doub'])
        with assert_raises(CommandError):
            ds.run_procedure(spec=["just2args", "still-one arg"])

        runner = Runner(cwd=ds.path)
        runner.run(
            "datalad run-procedure just2args \"with ' sing\" 'with \" doub'")
        with assert_raises(CommandError):
            runner.run("datalad run-procedure just2args 'still-one arg'")
示例#15
0
def test_runner_log_stderr():
    # TODO: no idea of how to check correct logging via any kind of assertion yet.

    runner = Runner(dry=False)
    cmd = 'echo stderr-Message should be logged >&2'
    ret = runner.run(cmd, log_stderr=True, expect_stderr=True)
    assert_equal(
        runner.commands, [], "Run of: %s resulted in non-empty buffer: %s" %
        (cmd, runner.commands.__str__()))

    cmd = 'echo stderr-Message should not be logged >&2'
    with swallow_outputs() as cmo:
        with swallow_logs(new_level=logging.INFO) as cml:
            ret = runner.run(cmd, log_stderr=False)
            eq_(cmo.err.rstrip(), "stderr-Message should not be logged")
            eq_(cml.out, "")
    assert_equal(
        runner.commands, [], "Run of: %s resulted in non-empty buffer: %s" %
        (cmd, runner.commands.__str__()))
示例#16
0
def check_run_and_get_output(cmd):
    runner = Runner()
    try:
        # suppress log output happen it was set to high values
        with patch.dict('os.environ', {'DATALAD_LOG_LEVEL': 'WARN'}):
            output = runner.run(["datalad", "--help"])
    except CommandError as e:
        raise AssertionError("'datalad --help' failed to start normally. "
                             "Exited with %d and output %s" % (e.code, (e.stdout, e.stderr)))
    return output
示例#17
0
def test_cfg_passthrough(path):
    runner = Runner()
    _ = runner.run(
        ['datalad',
         '-c', 'annex.tune.objecthash1=true',
         '-c', 'annex.tune.objecthashlower=true',
         'create', path])
    ds = Dataset(path)
    eq_(ds.config.get('annex.tune.objecthash1', None), 'true')
    eq_(ds.config.get('annex.tune.objecthashlower', None), 'true')
示例#18
0
文件: test_cmd.py 项目: jgors/datalad
def check_runner_heavy_output(log_online):
    # TODO: again, no automatic detection of this resulting in being stucked yet.

    runner = Runner()
    cmd = '%s -c "import datalad.tests.heavyoutput;"' % sys.executable
    with swallow_outputs() as cm:
        ret = runner.run(cmd, log_stderr=False, log_stdout=False, expect_stderr=True)
        eq_(cm.err, cm.out) # they are identical in that script
        eq_(cm.out[:10], "[0, 1, 2, ")
        eq_(cm.out[-15:], "997, 998, 999]\n")

    #do it again with capturing:
    ret = runner.run(cmd, log_stderr=True, log_stdout=True, expect_stderr=True)

    # and now original problematic command with a massive single line
    if not log_online:
        # We know it would get stuck in online mode
        cmd = '%s -c "import sys; x=str(list(range(1000))); [(sys.stdout.write(x), sys.stderr.write(x)) for i in xrange(100)];"' % sys.executable
        ret = runner.run(cmd, log_stderr=True, log_stdout=True, expect_stderr=True)
示例#19
0
class RunnerSuite(SuprocBenchmarks):
    """Some rudimentary tests to see if there is no major slowdowns from Runner
    """

    def setup(self):
        from datalad.cmd import Runner
        self.runner = Runner()
        # older versions might not have it
        try:
            from datalad.cmd import GitRunner
            self.git_runner = GitRunner()
        except ImportError:
            pass

    def time_echo(self):
        self.runner.run("echo")

    def time_echo_gitrunner(self):
        self.git_runner.run("echo")
示例#20
0
def check_run_and_get_output(cmd):
    runner = Runner()
    try:
        # suppress log output happen it was set to high values
        with patch.dict('os.environ', {'DATALAD_LOGLEVEL': 'WARN'}):
            output = runner.run(["datalad", "--help"])
    except CommandError as e:
        raise AssertionError("'datalad --help' failed to start normally. "
                             "Exited with %d and output %s" %
                             (e.code, (e.stdout, e.stderr)))
    return output
示例#21
0
def decompress_file(archive, dir_):
    """Decompress `archive` into a directory `dir_`

    This is an alternative implementation without patool, but directly calling 7z.

    Parameters
    ----------
    archive: str
    dir_: str
    """
    apath = Path(archive)
    runner = Runner(cwd=dir_)
    if len(apath.suffixes) > 1 and apath.suffixes[-2] == '.tar':
        # we have a compressed tar file that needs to be fed through the
        # decompressor first
        # hangs somehow, do via single string arg
        #cmd = ['7z', 'x', archive, '-so', '|', '7z', 'x', '-si', '-ttar']
        cmd = '7z x {} -so | 7z x -si -ttar'.format(quote_cmdlinearg(archive))
    else:
        # fire and forget
        cmd = ['7z', 'x', archive]
    runner.run(cmd)
示例#22
0
文件: run.py 项目: hanke/datalad
def _execute_command(command, pwd, expected_exit=None):
    from datalad.cmd import Runner

    exc = None
    cmd_exitcode = None
    runner = Runner(cwd=pwd)
    try:
        lgr.info("== Command start (output follows) =====")
        runner.run(
            command,
            # immediate output
            log_online=True,
            # not yet sure what we should do with the command output
            # IMHO `run` itself should be very silent and let the command talk
            log_stdout=False,
            log_stderr=False,
            expect_stderr=True,
            expect_fail=True,
            # TODO stdin
        )
    except CommandError as e:
        # strip our own info from the exception. The original command output
        # went to stdout/err -- we just have to exitcode in the same way
        exc = e
        cmd_exitcode = e.code

        if expected_exit is not None and expected_exit != cmd_exitcode:
            # we failed in a different way during a rerun.  This can easily
            # happen if we try to alter a locked file
            #
            # TODO add the ability to `git reset --hard` the dataset tree on failure
            # we know that we started clean, so we could easily go back, needs gh-1424
            # to be able to do it recursively
            raise exc

    lgr.info("== Command exit (modification check follows) =====")
    return cmd_exitcode or 0, exc
示例#23
0
文件: run.py 项目: nicholsn/datalad
def _execute_command(command, pwd, expected_exit=None):
    from datalad.cmd import Runner

    exc = None
    cmd_exitcode = None
    runner = Runner(cwd=pwd)
    try:
        lgr.info("== Command start (output follows) =====")
        runner.run(
            command,
            # immediate output
            log_online=True,
            # not yet sure what we should do with the command output
            # IMHO `run` itself should be very silent and let the command talk
            log_stdout=False,
            log_stderr=False,
            expect_stderr=True,
            expect_fail=True,
            # TODO stdin
        )
    except CommandError as e:
        # strip our own info from the exception. The original command output
        # went to stdout/err -- we just have to exitcode in the same way
        exc = e
        cmd_exitcode = e.code

        if expected_exit is not None and expected_exit != cmd_exitcode:
            # we failed in a different way during a rerun.  This can easily
            # happen if we try to alter a locked file
            #
            # TODO add the ability to `git reset --hard` the dataset tree on failure
            # we know that we started clean, so we could easily go back, needs gh-1424
            # to be able to do it recursively
            raise exc

    lgr.info("== Command exit (modification check follows) =====")
    return cmd_exitcode or 0, exc
示例#24
0
    def populate(self):

        super(SubmoduleDataset, self).populate()
        # add submodules
        annex = BasicAnnexTestRepo()
        annex.create()
        from datalad.cmd import Runner
        runner = Runner()
        kw = dict(cwd=self.path, expect_stderr=True)
        runner.run(['git', 'submodule', 'add', annex.url, 'sub1'], **kw)
        runner.run(['git', 'submodule', 'add', annex.url, 'sub2'], **kw)
        runner.run(['git', 'commit', '-m', 'Added sub1 and sub2.'], **kw)
        runner.run(['git', 'submodule', 'update', '--init', '--recursive'], **kw)
        # init annex in subdatasets
        for s in ('sub1', 'sub2'):
            runner.run(['git', 'annex', 'init'],
                       cwd=opj(self.path, s), expect_stderr=True)
示例#25
0
文件: test_cmd.py 项目: jgors/datalad
def test_runner(tempfile):

    # test non-dry command call
    runner = Runner(dry=False)
    cmd = 'echo Testing real run > %s' % tempfile
    ret = runner.run(cmd)
    assert_equal(runner.commands, [], "Run of: %s resulted in non-empty buffer: %s" % (cmd, runner.commands.__str__()))
    assert_true(os.path.exists(tempfile), "Run of: %s resulted with non-existing file %s" % (cmd, tempfile))

    # test non-dry python function call
    output = runner.call(os.path.join, 'foo', 'bar')
    assert_equal(os.path.join('foo', 'bar'), output,
                 "Drycall of: os.path.join, 'foo', 'bar' returned %s" % output)
    assert_equal(runner.commands.__str__().find('os.path.join'), -1,
                   "Drycall of: os.path.join, 'foo', 'bar' resulted in buffer: %s" % runner.commands.__str__())
示例#26
0
文件: test_cmd.py 项目: jgors/datalad
def test_runner_dry(tempfile):

    runner = Runner(dry=True)

    # test dry command call
    cmd = 'echo Testing dry run > %s' % tempfile
    ret = runner.run(cmd)
    assert_equal(("DRY", "DRY"), ret, "Dry run of: %s resulted in output %s" % (cmd, ret))
    assert_greater(runner.commands.__str__().find('echo Testing dry run'), -1,
                 "Dry run of: %s resulted in buffer: %s" % (cmd, runner.commands.__str__()))
    assert_false(os.path.exists(tempfile))

    # test dry python function call
    output = runner.call(os.path.join, 'foo', 'bar')
    assert_is(None, output, "Drycall of: os.path.join, 'foo', 'bar' returned %s" % output)
    assert_greater(runner.commands.__str__().find('join'), -1,
                   "Drycall of: os.path.join, 'foo', 'bar' resulted in buffer: %s" % runner.commands.__str__())
示例#27
0
def test_create(path):
    ds = Dataset(path)
    ds.create(
        description="funny",
        # custom git init option
        initopts=dict(shared='world'))
    ok_(ds.is_installed())
    assert_repo_status(ds.path, annex=True)

    # check default backend
    eq_(ds.config.get("annex.backends"), 'MD5E')
    eq_(ds.config.get("core.sharedrepository"), '2')
    runner = Runner()
    # check description in `info`
    cmd = ['git', 'annex', 'info']
    cmlout = runner.run(cmd, cwd=path)
    assert_in('funny [here]', cmlout[0])
    # check datset ID
    eq_(ds.config.get_value('datalad.dataset', 'id'), ds.id)
示例#28
0
def test_create(path):
    ds = Dataset(path)
    ds.create(
        description="funny",
        # custom git init option
        initopts=dict(shared='world'))
    ok_(ds.is_installed())
    assert_repo_status(ds.path, annex=True)

    # check default backend
    eq_(ds.config.get("annex.backends"), 'MD5E')
    eq_(ds.config.get("core.sharedrepository"), '2')
    runner = Runner()
    # check description in `info`
    cmd = ['git', 'annex', 'info']
    cmlout = runner.run(cmd, cwd=path)
    assert_in('funny [here]', cmlout[0])
    # check datset ID
    eq_(ds.config.get_value('datalad.dataset', 'id'),
        ds.id)
示例#29
0
def test_create(path):
    ds = Dataset(path)
    ds.create(description="funny",
              native_metadata_type=['bim', 'bam', 'bum'],
              shared_access='world')
    ok_(ds.is_installed())
    ok_clean_git(ds.path, annex=True)

    # check default backend
    eq_(ds.config.get("annex.backends"), 'MD5E')
    eq_(ds.config.get("core.sharedrepository"), '2')
    runner = Runner()
    # check description in `info`
    cmd = ['git-annex', 'info']
    cmlout = runner.run(cmd, cwd=path)
    assert_in('funny [here]', cmlout[0])
    # check datset ID
    eq_(ds.config.get_value('datalad.dataset', 'id'), ds.id)
    assert_equal(ds.config.get_value('datalad.metadata', 'nativetype'),
                 ('bim', 'bam', 'bum'))
示例#30
0
def test_runner(tempfile):

    # test non-dry command call
    runner = Runner(dry=False)
    cmd = 'echo Testing real run > %s' % tempfile
    ret = runner.run(cmd)
    assert_equal(
        runner.commands, [], "Run of: %s resulted in non-empty buffer: %s" %
        (cmd, runner.commands.__str__()))
    assert_true(
        os.path.exists(tempfile),
        "Run of: %s resulted with non-existing file %s" % (cmd, tempfile))

    # test non-dry python function call
    output = runner.call(os.path.join, 'foo', 'bar')
    assert_equal(os.path.join('foo', 'bar'), output,
                 "Drycall of: os.path.join, 'foo', 'bar' returned %s" % output)
    assert_equal(
        runner.commands.__str__().find('os.path.join'), -1,
        "Drycall of: os.path.join, 'foo', 'bar' resulted in buffer: %s" %
        runner.commands.__str__())
示例#31
0
def test_runner_dry(tempfile):

    runner = Runner(dry=True)

    # test dry command call
    cmd = 'echo Testing dry run > %s' % tempfile
    ret = runner.run(cmd)
    assert_equal(("DRY", "DRY"), ret,
                 "Dry run of: %s resulted in output %s" % (cmd, ret))
    assert_greater(
        runner.commands.__str__().find('echo Testing dry run'), -1,
        "Dry run of: %s resulted in buffer: %s" %
        (cmd, runner.commands.__str__()))
    assert_false(os.path.exists(tempfile))

    # test dry python function call
    output = runner.call(os.path.join, 'foo', 'bar')
    assert_is(None, output,
              "Drycall of: os.path.join, 'foo', 'bar' returned %s" % output)
    assert_greater(
        runner.commands.__str__().find('join'), -1,
        "Drycall of: os.path.join, 'foo', 'bar' resulted in buffer: %s" %
        runner.commands.__str__())
示例#32
0
def test_create(probe, path):
    # only as a probe whether this FS is a crippled one
    ar = AnnexRepo(probe, create=True)

    ds = Dataset(path)
    ds.create(
        description="funny",
        # custom git init option
        initopts=dict(shared='world') if not ar.is_managed_branch() else None)
    ok_(ds.is_installed())
    assert_repo_status(ds.path, annex=True)

    # check default backend
    eq_(ds.config.get("annex.backends"), 'MD5E')
    if not ar.is_managed_branch():
        eq_(ds.config.get("core.sharedrepository"), '2')
    runner = Runner()
    # check description in `info`
    cmd = ['git', 'annex', 'info']
    cmlout = runner.run(cmd, cwd=path)
    assert_in('funny [here]', cmlout[0])
    # check datset ID
    eq_(ds.config.get_value('datalad.dataset', 'id'),
        ds.id)
示例#33
0
文件: run.py 项目: adhvaithrp/datalad
    def __call__(
            # it is optional, because `rerun` can get a recorded one
            cmd=None,
            dataset=None,
            message=None,
            rerun=False):
        if rerun and cmd:
            lgr.warning('Ignoring provided command in --rerun mode')
            cmd = None
        if not dataset:
            # act on the whole dataset if nothing else was specified
            dataset = get_dataset_root(curdir)
        ds = require_dataset(dataset,
                             check_installed=True,
                             purpose='tracking outcomes of a command')
        # not needed ATM
        #refds_path = ds.path

        # delayed imports
        from datalad.cmd import Runner
        from datalad.tests.utils import ok_clean_git

        lgr.debug('tracking command output underneath %s', ds)
        try:
            # base assumption is that the animal smells superb
            ok_clean_git(ds.path)
        except AssertionError:
            yield get_status_dict(
                'run',
                ds=ds,
                status='impossible',
                message=
                'unsaved modifications present, cannot detect changes by command'
            )
            return

        if not cmd and not rerun:
            # TODO here we would need to recover a cmd when a rerun is attempted
            return

        if rerun:
            # pull run info out of the last commit message
            err_info = get_status_dict('run', ds=ds)
            if not ds.repo.get_hexsha():
                yield dict(err_info,
                           status='impossible',
                           message='cannot re-run command, nothing recorded')
                return
            last_commit_msg = ds.repo.repo.head.commit.message
            cmdrun_regex = r'\[DATALAD RUNCMD\] (.*)=== Do not change lines below ===\n(.*)\n\^\^\^ Do not change lines above \^\^\^'
            runinfo = re.match(cmdrun_regex, last_commit_msg,
                               re.MULTILINE | re.DOTALL)
            if not runinfo:
                yield dict(
                    err_info,
                    status='impossible',
                    message=
                    'cannot re-run command, last saved state does not look like a recorded command run'
                )
                return
            rec_msg, runinfo = runinfo.groups()
            if message is None:
                # re-use commit message, if nothing new was given
                message = rec_msg
            try:
                runinfo = json.loads(runinfo)
            except Exception as e:
                yield dict(
                    err_info,
                    status='error',
                    message=
                    ('cannot re-run command, command specification is not valid JSON: %s',
                     e.message))
                return
            if 'cmd' not in runinfo:
                yield dict(
                    err_info,
                    status='error',
                    message=
                    'cannot re-run command, command specification missing in recorded state'
                )
                return
            cmd = runinfo['cmd']
            rec_exitcode = runinfo.get('exit', 0)
            rel_pwd = runinfo.get('pwd', None)
            if rel_pwd:
                # recording is relative to the dataset
                pwd = normpath(opj(ds.path, rel_pwd))
            else:
                rel_pwd = None  # normalize, just in case
                pwd = None

            # now we have to find out what was modified during the last run, and enable re-modification
            # ideally, we would bring back the entire state of the tree with #1424, but we limit ourself
            # to file addition/not-in-place-modification for now
            to_unlock = []
            for r in ds.diff(recursive=True,
                             revision='HEAD~1...HEAD',
                             return_type='generator',
                             result_renderer=None):
                if r.get('type', None) == 'file' and \
                        r.get('state', None) in ('added', 'modified'):
                    r.pop('status', None)
                    to_unlock.append(r)
            if to_unlock:
                for r in ds.unlock(to_unlock,
                                   return_type='generator',
                                   result_xfm=None):
                    yield r
        else:
            # not a rerun, figure out where we are running
            pwd = ds.path
            rel_pwd = curdir

        # anticipate quoted compound shell commands
        cmd = cmd[0] if isinstance(cmd, list) and len(cmd) == 1 else cmd

        # TODO do our best to guess which files to unlock based on the command string
        #      in many cases this will be impossible (but see --rerun). however,
        #      generating new data (common case) will be just fine already

        # we have a clean dataset, let's run things
        cmd_exitcode = None
        runner = Runner(cwd=pwd)
        try:
            lgr.info("== Command start (output follows) =====")
            runner.run(
                cmd,
                # immediate output
                log_online=True,
                # not yet sure what we should do with the command output
                # IMHO `run` itself should be very silent and let the command talk
                log_stdout=False,
                log_stderr=False,
                expect_stderr=True,
                expect_fail=True,
                # TODO stdin
            )
        except CommandError as e:
            # strip our own info from the exception. The original command output
            # went to stdout/err -- we just have to exitcode in the same way
            cmd_exitcode = e.code
            if not rerun or rec_exitcode != cmd_exitcode:
                # we failed during a fresh run, or in a different way during a rerun
                # the latter can easily happen if we try to alter a locked file
                #
                # let's fail here, the command could have had a typo or some
                # other undesirable condition. If we would `add` nevertheless,
                # we would need to rerun and aggregate annex content that we
                # likely don't want
                # TODO add switch to ignore failure (some commands are stupid)
                # TODO add the ability to `git reset --hard` the dataset tree on failure
                # we know that we started clean, so we could easily go back, needs gh-1424
                # to be able to do it recursively
                raise CommandError(code=cmd_exitcode)

        lgr.info("== Command exit (modification check follows) =====")

        # ammend commit message with `run` info:
        # - pwd if inside the dataset
        # - the command itself
        # - exit code of the command
        run_info = {
            'cmd': cmd,
            'exit': cmd_exitcode if cmd_exitcode is not None else 0,
        }
        if rel_pwd is not None:
            # only when inside the dataset to not leak information
            run_info['pwd'] = rel_pwd

        # compose commit message
        cmd_shorty = (' '.join(cmd) if isinstance(cmd, list) else cmd)
        cmd_shorty = '{}{}'.format(cmd_shorty[:40],
                                   '...' if len(cmd_shorty) > 40 else '')
        msg = '[DATALAD RUNCMD] {}\n\n=== Do not change lines below ===\n{}\n^^^ Do not change lines above ^^^'.format(
            message if message is not None else cmd_shorty,
            json.dumps(run_info, indent=1),
            sort_keys=True,
            ensure_ascii=False,
            encoding='utf-8')

        for r in ds.add('.', recursive=True, message=msg):
            yield r
示例#34
0
def test_clone(src, tempdir):
    # Verify that all our repos are clonable
    r = Runner()
    output = r.run(["git" , "clone", src, tempdir], log_online=True)
    #status, output = getstatusoutput("git clone %(src)s %(tempdir)s" % locals())
    ok_(os.path.exists(os.path.join(tempdir, ".git")))
示例#35
0
文件: core.py 项目: leej3/datalad
class runner(SuprocBenchmarks):
    """Some rudimentary tests to see if there is no major slowdowns from Runner
    """
    def setup(self):
        self.runner = Runner()
        # older versions might not have it
        try:
            from datalad.cmd import GitRunner
            self.git_runner = GitRunner()
        except ImportError:
            pass

    def time_echo(self):
        self.runner.run("echo")

    def time_echo_gitrunner(self):
        self.git_runner.run("echo")

    # Following "track" measures computing overhead comparing to the simplest
    # os.system call on the same command without carrying for in/out

    unit = "% overhead"

    def _get_overhead(self, cmd, nrepeats=3, **run_kwargs):
        """Estimate overhead over running command via the simplest os.system
        and to not care about any output
        """
        # asv does not repeat tracking ones I think, so nrepeats
        overheads = []
        for _ in range(nrepeats):
            t0 = time()
            os.system(cmd + " >/dev/null 2>&1")
            t1 = time()
            self.runner.run(cmd, **run_kwargs)
            t2 = time()
            overhead = 100 * ((t2 - t1) / (t1 - t0) - 1.0)
            # print("O :", t1 - t0, t2 - t0, overhead)
            overheads.append(overhead)
        overhead = round(sum(overheads) / len(overheads), 2)
        #overhead = round(min(overheads), 2)
        return overhead

    def track_overhead_echo(self):
        return self._get_overhead("echo")

    # 100ms chosen below as providing some sensible stability for me.
    # at 10ms -- too much variability
    def track_overhead_100ms(self):
        return self._get_overhead("sleep 0.1")

    def track_overhead_heavyout(self):
        # run busyloop for 100ms outputing as much as it could
        return self._get_overhead(heavyout_cmd)

    def track_overhead_heavyout_online_through(self):
        return self._get_overhead(
            heavyout_cmd,
            log_stderr='offline',  # needed to would get stuck
            log_online=True)

    def track_overhead_heavyout_online_process(self):
        return self._get_overhead(
            heavyout_cmd,
            log_stdout=lambda s: '',
            log_stderr='offline',  # needed to would get stuck
            log_online=True)
示例#36
0
文件: run.py 项目: raamana/datalad
def run_command(cmd, dataset=None, message=None, rerun_info=None):
    rel_pwd = rerun_info.get('pwd') if rerun_info else None
    if rel_pwd and dataset:
        # recording is relative to the dataset
        pwd = normpath(opj(dataset.path, rel_pwd))
        rel_pwd = relpath(pwd, dataset.path)
    elif dataset:
        pwd = dataset.path
        rel_pwd = curdir
    else:
        # act on the whole dataset if nothing else was specified
        dataset = get_dataset_root(curdir)
        # Follow our generic semantic that if dataset is specified,
        # paths are relative to it, if not -- relative to pwd
        pwd = getpwd()
        if dataset:
            rel_pwd = relpath(pwd, dataset)
        else:
            rel_pwd = pwd  # and leave handling on deciding either we
            # deal with it or crash to checks below

    ds = require_dataset(dataset,
                         check_installed=True,
                         purpose='tracking outcomes of a command')
    # not needed ATM
    #refds_path = ds.path

    # delayed imports
    from datalad.cmd import Runner

    lgr.debug('tracking command output underneath %s', ds)
    if not rerun_info and ds.repo.dirty:  # Rerun already takes care of this.
        yield get_status_dict('run',
                              ds=ds,
                              status='impossible',
                              message=('unsaved modifications present, '
                                       'cannot detect changes by command'))
        return

    # anticipate quoted compound shell commands
    cmd = cmd[0] if isinstance(cmd, list) and len(cmd) == 1 else cmd

    # TODO do our best to guess which files to unlock based on the command string
    #      in many cases this will be impossible (but see rerun). however,
    #      generating new data (common case) will be just fine already

    # we have a clean dataset, let's run things
    exc = None
    cmd_exitcode = None
    runner = Runner(cwd=pwd)
    try:
        lgr.info("== Command start (output follows) =====")
        runner.run(
            cmd,
            # immediate output
            log_online=True,
            # not yet sure what we should do with the command output
            # IMHO `run` itself should be very silent and let the command talk
            log_stdout=False,
            log_stderr=False,
            expect_stderr=True,
            expect_fail=True,
            # TODO stdin
        )
    except CommandError as e:
        # strip our own info from the exception. The original command output
        # went to stdout/err -- we just have to exitcode in the same way
        exc = e
        cmd_exitcode = e.code

        if rerun_info and rerun_info.get("exit", 0) != cmd_exitcode:
            # we failed in a different way during a rerun.  This can easily
            # happen if we try to alter a locked file
            #
            # TODO add the ability to `git reset --hard` the dataset tree on failure
            # we know that we started clean, so we could easily go back, needs gh-1424
            # to be able to do it recursively
            raise exc

    lgr.info("== Command exit (modification check follows) =====")

    # amend commit message with `run` info:
    # - pwd if inside the dataset
    # - the command itself
    # - exit code of the command
    run_info = {
        'cmd': cmd,
        'exit': cmd_exitcode if cmd_exitcode is not None else 0,
        'chain': rerun_info["chain"] if rerun_info else [],
    }
    if rel_pwd is not None:
        # only when inside the dataset to not leak information
        run_info['pwd'] = rel_pwd

    # compose commit message
    msg = '[DATALAD RUNCMD] {}\n\n=== Do not change lines below ===\n{}\n^^^ Do not change lines above ^^^'.format(
        message if message is not None else _format_cmd_shorty(cmd),
        json.dumps(run_info, indent=1),
        sort_keys=True,
        ensure_ascii=False,
        encoding='utf-8')

    if not rerun_info and cmd_exitcode:
        msg_path = opj(relpath(ds.repo.repo.git_dir), "COMMIT_EDITMSG")
        with open(msg_path, "w") as ofh:
            ofh.write(msg)
        lgr.info(
            "The command had a non-zero exit code. "
            "If this is expected, you can save the changes with "
            "'datalad save -r -F%s .'", msg_path)
        raise exc
    else:
        for r in ds.add('.', recursive=True, message=msg):
            yield r
    def __call__(sshurl, target=None, target_dir=None,
                 target_url=None, target_pushurl=None,
                 dataset=None, recursive=False,
                 existing='raise', shared=False):

        if sshurl is None:
            raise ValueError("""insufficient information for target creation
            (needs at least a dataset and a SSH URL).""")

        if target is None and (target_url is not None
                               or target_pushurl is not None):
            raise ValueError("""insufficient information for adding the target
            as a sibling (needs at least a name)""")

        # shortcut
        ds = dataset

        if ds is not None and not isinstance(ds, Dataset):
            ds = Dataset(ds)
        if ds is None:
            # try to find a dataset at or above CWD
            dspath = GitRepo.get_toppath(abspath(getpwd()))
            if dspath is None:
                raise ValueError("""No dataset found
                                 at or above {0}.""".format(getpwd()))
            ds = Dataset(dspath)
            lgr.debug("Resolved dataset for target creation: {0}".format(ds))
        assert(ds is not None and sshurl is not None)

        if not ds.is_installed():
            raise ValueError("""Dataset {0} is not installed yet.""".format(ds))
        assert(ds.repo is not None)

        # determine target parameters:
        parsed_target = urlparse(sshurl)
        host_name = parsed_target.netloc

        # TODO: Sufficient to fail on this condition?
        if not parsed_target.netloc:
            raise ValueError("Malformed URL: {0}".format(sshurl))

        if target_dir is None:
            if parsed_target.path:
                target_dir = parsed_target.path
            else:
                target_dir = '.'

        # TODO: centralize and generalize template symbol handling
        replicate_local_structure = False
        if "%NAME" not in target_dir:
            replicate_local_structure = True

        # collect datasets to use:
        datasets = dict()
        datasets[basename(ds.path)] = ds
        if recursive:
            for subds in ds.get_dataset_handles(recursive=True):
                sub_path = opj(ds.path, subds)
                # TODO: when enhancing Dataset/*Repo classes and therefore
                # adapt to moved code, make proper distinction between name and
                # path of a submodule, which are technically different. This
                # probably will become important on windows as well as whenever
                # we want to allow for moved worktrees.
                datasets[basename(ds.path) + '/' + subds] = \
                    Dataset(sub_path)

        # setup SSH Connection:
        # TODO: Make the entire setup a helper to use it when pushing via
        # publish?

        # - build control master:
        from datalad.utils import assure_dir
        not_supported_on_windows("TODO")
        from os import geteuid  # Linux specific import
        var_run_user_datalad = "/var/run/user/%s/datalad" % geteuid()
        assure_dir(var_run_user_datalad)
        control_path = "%s/%s" % (var_run_user_datalad, host_name)
        control_path += ":%s" % parsed_target.port if parsed_target.port else ""

        # - start control master:
        cmd = "ssh -o ControlMaster=yes -o \"ControlPath=%s\" " \
              "-o ControlPersist=yes %s exit" % (control_path, host_name)
        lgr.debug("Try starting control master by calling:\n%s" % cmd)
        import subprocess
        proc = subprocess.Popen(cmd, shell=True)
        proc.communicate(input="\n")  # why the f.. this is necessary?

        runner = Runner()
        ssh_cmd = ["ssh", "-S", control_path, host_name]

        lgr.info("Creating target datasets ...")
        for current_dataset in datasets:
            if not replicate_local_structure:
                path = target_dir.replace("%NAME",
                                          current_dataset.replace("/", "-"))
            else:
                # TODO: opj depends on local platform, not the remote one.
                # check how to deal with it. Does windows ssh server accept
                # posix paths? vice versa? Should planned SSH class provide
                # tools for this issue?
                path = normpath(opj(target_dir,
                                    relpath(datasets[current_dataset].path,
                                            start=ds.path)))

            if path != '.':
                # check if target exists
                # TODO: Is this condition valid for != '.' only?
                path_exists = True
                cmd = ssh_cmd + ["ls", path]
                try:
                    out, err = runner.run(cmd, expect_fail=True,
                                          expect_stderr=True)
                except CommandError as e:
                    if "No such file or directory" in e.stderr and \
                                    path in e.stderr:
                        path_exists = False
                    else:
                        raise  # It's an unexpected failure here

                if path_exists:
                    if existing == 'raise':
                        raise RuntimeError(
                            "Target directory %s already exists." % path)
                    elif existing == 'skip':
                        continue
                    elif existing == 'replace':
                        pass
                    else:
                        raise ValueError("Do not know how to hand existing=%s" % repr(existing))

                cmd = ssh_cmd + ["mkdir", "-p", path]
                try:
                    runner.run(cmd)
                except CommandError as e:
                    lgr.error("Remotely creating target directory failed at "
                              "%s.\nError: %s" % (path, str(e)))
                    continue

            # init git repo
            cmd = ssh_cmd + ["git", "-C", path, "init"]
            if shared:
                cmd.append("--shared=%s" % shared)
            try:
                runner.run(cmd)
            except CommandError as e:
                lgr.error("Remotely initializing git repository failed at %s."
                          "\nError: %s\nSkipping ..." % (path, str(e)))
                continue

            # check git version on remote end:
            cmd = ssh_cmd + ["git", "version"]
            try:
                out, err = runner.run(cmd)
                git_version = out.lstrip("git version").strip()
                lgr.debug("Detected git version on server: %s" % git_version)
                if git_version < "2.4":
                    lgr.error("Git version >= 2.4 needed to configure remote."
                              " Version detected on server: %s\nSkipping ..."
                              % git_version)
                    continue

            except CommandError as e:
                lgr.warning(
                    "Failed to determine git version on remote.\n"
                    "Error: {0}\nTrying to configure anyway "
                    "...".format(e.message))

            # allow for pushing to checked out branch
            cmd = ssh_cmd + ["git", "-C", path, "config",
                             "receive.denyCurrentBranch",
                             "updateInstead"]
            try:
                runner.run(cmd)
            except CommandError as e:
                lgr.warning("git config failed at remote location %s.\n"
                            "You will not be able to push to checked out "
                            "branch." % path)

            # enable post-update hook:
            cmd = ssh_cmd + ["mv", opj(path, ".git/hooks/post-update.sample"),
                             opj(path, ".git/hooks/post-update")]
            try:
                runner.run(cmd)
            except CommandError as e:
                lgr.error("Failed to enable post update hook.\n"
                          "Error: %s" % e.message)

            # initially update server info "manually":
            cmd = ssh_cmd + ["git", "-C", path, "update-server-info"]
            try:
                runner.run(cmd)
            except CommandError as e:
                lgr.error("Failed to update server info.\n"
                          "Error: %s" % e.message)

        # stop controlmaster (close ssh connection):
        cmd = ["ssh", "-O", "stop", "-S", control_path, host_name]
        out, err = runner.run(cmd, expect_stderr=True)

        if target:
            # add the sibling(s):
            if target_url is None:
                target_url = sshurl
            if target_pushurl is None:
                target_pushurl = sshurl
            result_adding = AddSibling()(dataset=ds,
                                         name=target,
                                         url=target_url,
                                         pushurl=target_pushurl,
                                         recursive=recursive,
                                         force=existing in {'replace'})
示例#38
0
文件: core.py 项目: datalad/datalad
class runner(SuprocBenchmarks):
    """Some rudimentary tests to see if there is no major slowdowns from Runner
    """

    def setup(self):
        self.runner = Runner()
        # older versions might not have it
        try:
            from datalad.cmd import GitRunner
            self.git_runner = GitRunner()
        except ImportError:
            pass

    def time_echo(self):
        self.runner.run("echo")

    def time_echo_gitrunner(self):
        self.git_runner.run("echo")

    # Following "track" measures computing overhead comparing to the simplest
    # os.system call on the same command without carrying for in/out

    unit = "% overhead"

    def _get_overhead(self, cmd, nrepeats=3, **run_kwargs):
        """Estimate overhead over running command via the simplest os.system
        and to not care about any output
        """
        # asv does not repeat tracking ones I think, so nrepeats
        overheads = []
        for _ in range(nrepeats):
            t0 = time()
            os.system(cmd + " >/dev/null 2>&1")
            t1 = time()
            self.runner.run(cmd, **run_kwargs)
            t2 = time()
            overhead = 100 * ((t2 - t1) / (t1 - t0) - 1.0)
            # print("O :", t1 - t0, t2 - t0, overhead)
            overheads.append(overhead)
        overhead = round(sum(overheads) / len(overheads), 2)
        #overhead = round(min(overheads), 2)
        return overhead

    def track_overhead_echo(self):
        return self._get_overhead("echo")

    # 100ms chosen below as providing some sensible stability for me.
    # at 10ms -- too much variability
    def track_overhead_100ms(self):
        return self._get_overhead("sleep 0.1")

    def track_overhead_heavyout(self):
        # run busyloop for 100ms outputing as much as it could
        return self._get_overhead(heavyout_cmd)

    def track_overhead_heavyout_online_through(self):
        return self._get_overhead(heavyout_cmd,
                                  log_stderr='offline',  # needed to would get stuck
                                  log_online=True)

    def track_overhead_heavyout_online_process(self):
        return self._get_overhead(heavyout_cmd,
                                  log_stdout=lambda s: '',
                                  log_stderr='offline',  # needed to would get stuck
                                  log_online=True)

    # # Probably not really interesting, and good lord wobbles around 0
    # def track_overhead_heavyout_offline(self):
    #     return self._get_overhead(heavyout_cmd,
    #                               log_stdout='offline',
    #                               log_stderr='offline')

    # TODO: track the one with in/out, i.e. for those BatchedProcesses
示例#39
0
 def populate(self):
     super(NestedDataset, self).populate()
     ds = SubmoduleDataset()
     ds.create()
     from datalad.cmd import Runner
     runner = Runner()
     kw = dict(expect_stderr=True)
     runner.run(['git', 'submodule', 'add', ds.url, 'subdataset'],
                cwd=self.path, **kw)
     runner.run(['git', 'submodule', 'add', ds.url, 'subsubdataset'],
                cwd=opj(self.path, 'subdataset'), **kw)
     runner.run(['git', 'commit', '-m', 'Added subdataset.'],
                cwd=opj(self.path, 'subdataset'), **kw)
     runner.run(['git', 'commit', '-a', '-m', 'Added subdatasets.'],
                cwd=self.path, **kw)
     runner.run(['git', 'submodule', 'update', '--init', '--recursive'],
                cwd=self.path, **kw)
     # init all annexes
     for s in ('', 'subdataset', opj('subdataset', 'subsubdataset')):
         runner.run(['git', 'annex', 'init'],
                    cwd=opj(self.path, s), expect_stderr=True)
示例#40
0
    def __call__(name,
                 url=None,
                 dataset=None,
                 call_fmt=None,
                 image=None,
                 update=False):
        if not name:
            raise InsufficientArgumentsError("`name` argument is required")

        ds = require_dataset(dataset,
                             check_installed=True,
                             purpose='add container')
        runner = Runner()

        # prevent madness in the config file
        if not re.match(r'^[0-9a-zA-Z-]+$', name):
            raise ValueError(
                "Container names can only contain alphanumeric characters "
                "and '-', got: '{}'".format(name))

        cfgbasevar = "datalad.containers.{}".format(name)
        if cfgbasevar + ".image" in ds.config:
            if not update:
                yield get_status_dict(
                    action="containers_add",
                    ds=ds,
                    logger=lgr,
                    status="impossible",
                    message=("Container named %r already exists. "
                             "Use --update to reconfigure.", name))
                return

            if not (url or image or call_fmt):
                # No updated values were provided. See if an update url is
                # configured (currently relevant only for Singularity Hub).
                url = ds.config.get(cfgbasevar + ".updateurl")
                if not url:
                    yield get_status_dict(
                        action="containers_add",
                        ds=ds,
                        logger=lgr,
                        status="impossible",
                        message="No values to update specified")
                    return

            call_fmt = call_fmt or ds.config.get(cfgbasevar + ".cmdexec")
            image = image or ds.config.get(cfgbasevar + ".image")

        if not image:
            loc_cfg_var = "datalad.containers.location"
            # TODO: We should provide an entry point (or sth similar) for extensions
            # to get config definitions into the ConfigManager. In other words an
            # easy way to extend definitions in datalad's common_cfgs.py.
            container_loc = \
                ds.config.obtain(
                    loc_cfg_var,
                    where=definitions[loc_cfg_var]['destination'],
                    # if not False it would actually modify the
                    # dataset config file -- undesirable
                    store=False,
                    default=definitions[loc_cfg_var]['default'],
                    dialog_type=definitions[loc_cfg_var]['ui'][0],
                    valtype=definitions[loc_cfg_var]['type'],
                    **definitions[loc_cfg_var]['ui'][1]
                )
            image = op.join(ds.path, container_loc, name, 'image')
        else:
            image = op.join(ds.path, image)

        result = get_status_dict(
            action="containers_add",
            path=image,
            type="file",
            logger=lgr,
        )

        if call_fmt is None:
            # maybe built in knowledge can help
            call_fmt = _guess_call_fmt(ds, name, url)

        # collect bits for a final and single save() call
        to_save = []
        imgurl = url
        was_updated = False
        if url:
            if update and op.lexists(image):
                was_updated = True
                # XXX: check=False is used to avoid dropping the image. It
                # should use drop=False if remove() gets such an option (see
                # DataLad's gh-2673).
                for r in ds.remove(image,
                                   save=False,
                                   check=False,
                                   return_type="generator"):
                    yield r

            imgurl = _resolve_img_url(url)
            lgr.debug('Attempt to obtain container image from: %s', imgurl)
            if url.startswith("dhub://"):
                from .adapters import docker

                docker_image = url[len("dhub://"):]

                lgr.debug("Running 'docker pull %s and saving image to %s",
                          docker_image, image)
                runner.run(["docker", "pull", docker_image])
                docker.save(docker_image, image)
            elif url.startswith("docker://"):
                image_dir, image_basename = op.split(image)
                if not image_basename:
                    raise ValueError("No basename in path {}".format(image))
                if image_dir and not op.exists(image_dir):
                    os.makedirs(image_dir)

                lgr.info(
                    "Building Singularity image for %s "
                    "(this may take some time)", url)
                runner.run(["singularity", "build", image_basename, url],
                           cwd=image_dir or None)
            elif op.exists(url):
                lgr.info("Copying local file %s to %s", url, image)
                image_dir = op.dirname(image)
                if image_dir and not op.exists(image_dir):
                    os.makedirs(image_dir)
                copyfile(url, image)
            else:
                if _HAS_SHUB_DOWNLOADER and url.startswith('shub://'):
                    _ensure_datalad_remote(ds.repo)

                try:
                    ds.repo.add_url_to_file(image, imgurl)
                except Exception as e:
                    result["status"] = "error"
                    result["message"] = str(e)
                    yield result
            # TODO do we have to take care of making the image executable
            # if --call_fmt is not provided?
            to_save.append(image)
        # continue despite a remote access failure, the following config
        # setting will enable running the command again with just the name
        # given to ease a re-run
        if not op.lexists(image):
            result["status"] = "error"
            result["message"] = ('no image at %s', image)
            yield result
            return

        # store configs
        if imgurl != url:
            # store originally given URL, as it resolves to something
            # different and maybe can be used to update the container
            # at a later point in time
            ds.config.set("{}.updateurl".format(cfgbasevar), url)
        # force store the image, and prevent multiple entries
        ds.config.set("{}.image".format(cfgbasevar),
                      op.relpath(image, start=ds.path),
                      force=True)
        if call_fmt:
            ds.config.set("{}.cmdexec".format(cfgbasevar),
                          call_fmt,
                          force=True)
        # store changes
        to_save.append(op.join(".datalad", "config"))
        for r in ds.save(
                path=to_save,
                message="[DATALAD] {do} containerized environment '{name}'".
                format(do="Update" if was_updated else "Configure",
                       name=name)):
            yield r
        result["status"] = "ok"
        yield result
示例#41
0
def test_clone(src, tempdir):
    # Verify that all our repos are clonable
    r = Runner()
    output = r.run(["git", "clone", src, tempdir], log_online=True)
    #status, output = getstatusoutput("git clone %(src)s %(tempdir)s" % locals())
    ok_(os.path.exists(os.path.join(tempdir, ".git")))
示例#42
0
    def __call__(dataset=None,
                 name=None,
                 url=None,
                 pushurl=None,
                 recursive=False,
                 force=False):

        # TODO: Detect malformed URL and fail?

        if name is None or (url is None and pushurl is None):
            raise ValueError("""insufficient information to add a sibling
                (needs at least a dataset, a name and an URL).""")
        if url is None:
            url = pushurl

        # shortcut
        ds = dataset

        if ds is not None and not isinstance(ds, Dataset):
            ds = Dataset(ds)
        if ds is None:
            # try to find a dataset at or above CWD
            dspath = GitRepo.get_toppath(abspath(getpwd()))
            if dspath is None:
                raise ValueError("No dataset found at or above {0}.".format(
                    getpwd()))
            ds = Dataset(dspath)
            lgr.debug("Resolved dataset for target creation: {0}".format(ds))

        assert (ds is not None and name is not None and url is not None)

        if not ds.is_installed():
            raise ValueError("Dataset {0} is not installed yet.".format(ds))
        assert (ds.repo is not None)

        ds_basename = basename(ds.path)
        repos = {ds_basename: {'repo': ds.repo}}
        if recursive:
            for subds in ds.get_dataset_handles(recursive=True):
                sub_path = opj(ds.path, subds)
                repos[ds_basename + '/' + subds] = {
                    #                repos[subds] = {
                    'repo': GitRepo(sub_path, create=False)
                }

        # Note: This is copied from create_publication_target_sshwebserver
        # as it is the same logic as for its target_dir.
        # TODO: centralize and generalize template symbol handling
        # TODO: Check pushurl for template symbols too. Probably raise if only
        #       one of them uses such symbols

        replicate_local_structure = False
        if "%NAME" not in url:
            replicate_local_structure = True

        for repo in repos:
            if not replicate_local_structure:
                repos[repo]['url'] = url.replace("%NAME",
                                                 repo.replace("/", "-"))
                if pushurl:
                    repos[repo]['pushurl'] = pushurl.replace(
                        "%NAME", repo.replace("/", "-"))
            else:
                repos[repo]['url'] = url
                if pushurl:
                    repos[repo]['pushurl'] = pushurl

                if repo != ds_basename:
                    repos[repo]['url'] = _urljoin(repos[repo]['url'],
                                                  repo[len(ds_basename) + 1:])
                    if pushurl:
                        repos[repo]['pushurl'] = _urljoin(
                            repos[repo]['pushurl'],
                            repo[len(ds_basename) + 1:])

        # collect existing remotes:
        already_existing = list()
        conflicting = list()
        for repo in repos:
            if name in repos[repo]['repo'].git_get_remotes():
                already_existing.append(repo)
                lgr.debug("""Remote '{0}' already exists
                          in '{1}'.""".format(name, repo))

                existing_url = repos[repo]['repo'].git_get_remote_url(name)
                existing_pushurl = \
                    repos[repo]['repo'].git_get_remote_url(name, push=True)

                if repos[repo]['url'].rstrip('/') != existing_url.rstrip('/') \
                        or (pushurl and existing_pushurl and
                            repos[repo]['pushurl'].rstrip('/') !=
                                    existing_pushurl.rstrip('/')) \
                        or (pushurl and not existing_pushurl):
                    conflicting.append(repo)

        if not force and conflicting:
            raise RuntimeError("Sibling '{0}' already exists with conflicting"
                               " URL for {1} dataset(s). {2}".format(
                                   name, len(conflicting), conflicting))

        runner = Runner()
        successfully_added = list()
        for repo in repos:
            if repo in already_existing:
                if repo not in conflicting:
                    lgr.debug("Skipping {0}. Nothing to do.".format(repo))
                    continue
                # rewrite url
                cmd = ["git", "remote", "set-url", name, repos[repo]['url']]
                runner.run(cmd, cwd=repos[repo]['repo'].path)
            else:
                # add the remote
                cmd = ["git", "remote", "add", name, repos[repo]['url']]
                runner.run(cmd, cwd=repos[repo]['repo'].path)
            if pushurl:
                cmd = [
                    "git", "remote", "set-url", "--push", name,
                    repos[repo]['pushurl']
                ]
                runner.run(cmd, cwd=repos[repo]['repo'].path)
            successfully_added.append(repo)

        return successfully_added
示例#43
0
    def __call__(sshurl,
                 target=None,
                 target_dir=None,
                 target_url=None,
                 target_pushurl=None,
                 dataset=None,
                 recursive=False,
                 existing='raise',
                 shared=False):

        if sshurl is None:
            raise ValueError("""insufficient information for target creation
            (needs at least a dataset and a SSH URL).""")

        if target is None and (target_url is not None
                               or target_pushurl is not None):
            raise ValueError("""insufficient information for adding the target
            as a sibling (needs at least a name)""")

        # shortcut
        ds = dataset

        if ds is not None and not isinstance(ds, Dataset):
            ds = Dataset(ds)
        if ds is None:
            # try to find a dataset at or above CWD
            dspath = GitRepo.get_toppath(abspath(getpwd()))
            if dspath is None:
                raise ValueError("""No dataset found
                                 at or above {0}.""".format(getpwd()))
            ds = Dataset(dspath)
            lgr.debug("Resolved dataset for target creation: {0}".format(ds))
        assert (ds is not None and sshurl is not None)

        if not ds.is_installed():
            raise ValueError(
                """Dataset {0} is not installed yet.""".format(ds))
        assert (ds.repo is not None)

        # determine target parameters:
        parsed_target = urlparse(sshurl)
        host_name = parsed_target.netloc

        # TODO: Sufficient to fail on this condition?
        if not parsed_target.netloc:
            raise ValueError("Malformed URL: {0}".format(sshurl))

        if target_dir is None:
            if parsed_target.path:
                target_dir = parsed_target.path
            else:
                target_dir = '.'

        # TODO: centralize and generalize template symbol handling
        replicate_local_structure = False
        if "%NAME" not in target_dir:
            replicate_local_structure = True

        # collect datasets to use:
        datasets = dict()
        datasets[basename(ds.path)] = ds
        if recursive:
            for subds in ds.get_dataset_handles(recursive=True):
                sub_path = opj(ds.path, subds)
                # TODO: when enhancing Dataset/*Repo classes and therefore
                # adapt to moved code, make proper distinction between name and
                # path of a submodule, which are technically different. This
                # probably will become important on windows as well as whenever
                # we want to allow for moved worktrees.
                datasets[basename(ds.path) + '/' + subds] = \
                    Dataset(sub_path)

        # setup SSH Connection:
        # TODO: Make the entire setup a helper to use it when pushing via
        # publish?

        # - build control master:
        from datalad.utils import assure_dir
        not_supported_on_windows("TODO")
        from os import geteuid  # Linux specific import
        var_run_user_datalad = "/var/run/user/%s/datalad" % geteuid()
        assure_dir(var_run_user_datalad)
        control_path = "%s/%s" % (var_run_user_datalad, host_name)
        control_path += ":%s" % parsed_target.port if parsed_target.port else ""

        # - start control master:
        cmd = "ssh -o ControlMaster=yes -o \"ControlPath=%s\" " \
              "-o ControlPersist=yes %s exit" % (control_path, host_name)
        lgr.debug("Try starting control master by calling:\n%s" % cmd)
        import subprocess
        proc = subprocess.Popen(cmd, shell=True)
        proc.communicate(input="\n")  # why the f.. this is necessary?

        runner = Runner()
        ssh_cmd = ["ssh", "-S", control_path, host_name]

        lgr.info("Creating target datasets ...")
        for current_dataset in datasets:
            if not replicate_local_structure:
                path = target_dir.replace("%NAME",
                                          current_dataset.replace("/", "-"))
            else:
                # TODO: opj depends on local platform, not the remote one.
                # check how to deal with it. Does windows ssh server accept
                # posix paths? vice versa? Should planned SSH class provide
                # tools for this issue?
                path = normpath(
                    opj(target_dir,
                        relpath(datasets[current_dataset].path,
                                start=ds.path)))

            if path != '.':
                # check if target exists
                # TODO: Is this condition valid for != '.' only?
                path_exists = True
                cmd = ssh_cmd + ["ls", path]
                try:
                    out, err = runner.run(cmd,
                                          expect_fail=True,
                                          expect_stderr=True)
                except CommandError as e:
                    if "No such file or directory" in e.stderr and \
                                    path in e.stderr:
                        path_exists = False
                    else:
                        raise  # It's an unexpected failure here

                if path_exists:
                    if existing == 'raise':
                        raise RuntimeError(
                            "Target directory %s already exists." % path)
                    elif existing == 'skip':
                        continue
                    elif existing == 'replace':
                        pass
                    else:
                        raise ValueError(
                            "Do not know how to hand existing=%s" %
                            repr(existing))

                cmd = ssh_cmd + ["mkdir", "-p", path]
                try:
                    runner.run(cmd)
                except CommandError as e:
                    lgr.error("Remotely creating target directory failed at "
                              "%s.\nError: %s" % (path, str(e)))
                    continue

            # init git repo
            cmd = ssh_cmd + ["git", "-C", path, "init"]
            if shared:
                cmd.append("--shared=%s" % shared)
            try:
                runner.run(cmd)
            except CommandError as e:
                lgr.error("Remotely initializing git repository failed at %s."
                          "\nError: %s\nSkipping ..." % (path, str(e)))
                continue

            # check git version on remote end:
            cmd = ssh_cmd + ["git", "version"]
            try:
                out, err = runner.run(cmd)
                git_version = out.lstrip("git version").strip()
                lgr.debug("Detected git version on server: %s" % git_version)
                if git_version < "2.4":
                    lgr.error("Git version >= 2.4 needed to configure remote."
                              " Version detected on server: %s\nSkipping ..." %
                              git_version)
                    continue

            except CommandError as e:
                lgr.warning("Failed to determine git version on remote.\n"
                            "Error: {0}\nTrying to configure anyway "
                            "...".format(e.message))

            # allow for pushing to checked out branch
            cmd = ssh_cmd + [
                "git", "-C", path, "config", "receive.denyCurrentBranch",
                "updateInstead"
            ]
            try:
                runner.run(cmd)
            except CommandError as e:
                lgr.warning("git config failed at remote location %s.\n"
                            "You will not be able to push to checked out "
                            "branch." % path)

            # enable post-update hook:
            cmd = ssh_cmd + [
                "mv",
                opj(path, ".git/hooks/post-update.sample"),
                opj(path, ".git/hooks/post-update")
            ]
            try:
                runner.run(cmd)
            except CommandError as e:
                lgr.error("Failed to enable post update hook.\n"
                          "Error: %s" % e.message)

            # initially update server info "manually":
            cmd = ssh_cmd + ["git", "-C", path, "update-server-info"]
            try:
                runner.run(cmd)
            except CommandError as e:
                lgr.error("Failed to update server info.\n"
                          "Error: %s" % e.message)

        # stop controlmaster (close ssh connection):
        cmd = ["ssh", "-O", "stop", "-S", control_path, host_name]
        out, err = runner.run(cmd, expect_stderr=True)

        if target:
            # add the sibling(s):
            if target_url is None:
                target_url = sshurl
            if target_pushurl is None:
                target_pushurl = sshurl
            result_adding = AddSibling()(dataset=ds,
                                         name=target,
                                         url=target_url,
                                         pushurl=target_pushurl,
                                         recursive=recursive,
                                         force=existing in {'replace'})
示例#44
0
文件: config.py 项目: silky/datalad
class ConfigManager(object):
    """Thin wrapper around `git-config` with support for a dataset configuration.

    The general idea is to have an object that is primarily used to read/query
    configuration option.  Upon creation, current configuration is read via one
    (or max two, in the case of the presence of dataset-specific configuration)
    calls to `git config`.  If this class is initialized with a Dataset
    instance, it supports reading and writing configuration from
    ``.datalad/config`` inside a dataset too. This file is committed to Git and
    hence useful to ship certain configuration items with a dataset.

    The API aims to provide the most significant read-access API of a
    dictionary, the Python ConfigParser, and GitPython's config parser
    implementations.

    This class is presently not capable of efficiently writing multiple
    configurations items at once.  Instead, each modification results in a
    dedicated call to `git config`. This author thinks this is OK, as he
    cannot think of a situation where a large number of items need to be
    written during normal operation. If such need arises, various solutions are
    possible (via GitPython, or an independent writer).

    Any DATALAD_* environment variable is also presented as a configuration
    item. Settings read from environment variables are not stored in any of the
    configuration file, but are read dynamically from the environment at each
    `reload()` call. Their values take precedence over any specification in
    configuration files.

    Parameters
    ----------
    dataset : Dataset, optional
      If provided, all `git config` calls are executed in this dataset's
      directory. Moreover, any modifications are, by default, directed to
      this dataset's configuration file (which will be created on demand)
    dataset_only : bool
      If True, configuration items are only read from a datasets persistent
      configuration file, if any present (the one in ``.datalad/config``, not
      ``.git/config``).
    """
    def __init__(self, dataset=None, dataset_only=False):
        # store in a simple dict
        # no subclassing, because we want to be largely read-only, and implement
        # config writing separately
        self._store = {}
        self._dataset = dataset
        self._dataset_only = dataset_only
        # Since configs could contain sensitive information, to prevent
        # any "facilitated" leakage -- just disable loging of outputs for
        # this runner
        run_kwargs = dict(log_outputs=False)
        if dataset is not None:
            # make sure we run the git config calls in the dataset
            # to pick up the right config files
            run_kwargs['cwd'] = dataset.path
        self._runner = Runner(**run_kwargs)
        self.reload()

    def reload(self):
        """Reload all configuration items from the configured sources"""
        self._store = {}
        # 2-step strategy:
        #   - load datalad dataset config from dataset
        #   - load git config from all supported by git sources
        # in doing so we always stay compatible with where Git gets its
        # config from, but also allow to override persistent information
        # from dataset locally or globally
        if self._dataset:
            # now any dataset config
            dscfg_fname = opj(self._dataset.path, '.datalad', 'config')
            if exists(dscfg_fname):
                stdout, stderr = self._run(['-z', '-l', '--file', dscfg_fname],
                                           log_stderr=True)
                # overwrite existing value, do not amend to get multi-line
                # values
                self._store = _parse_gitconfig_dump(
                    stdout, self._store, replace=False)

        if not self._dataset_only:
            stdout, stderr = self._run(['-z', '-l'], log_stderr=True)
            self._store = _parse_gitconfig_dump(
                stdout, self._store, replace=True)

            # override with environment variables
            self._store = _parse_env(self._store)

    @_where_reload
    def obtain(self, var, default=None, dialog_type=None, valtype=None,
               store=False, where=None, reload=True, **kwargs):
        """
        Convenience method to obtain settings interactively, if needed

        A UI will be used to ask for user input in interactive sessions.
        Questions to ask, and additional explanations can be passed directly
        as arguments, or retrieved from a list of pre-configured items.

        Additionally, this method allows for type conversion and storage
        of obtained settings. Both aspects can also be pre-configured.

        Parameters
        ----------
        var : str
          Variable name including any section like `git config` expects them,
          e.g. 'core.editor'
        default : any type
          In interactive sessions and if `store` is True, this default value
          will be presented to the user for confirmation (or modification).
          In all other cases, this value will be silently assigned unless
          there is an existing configuration setting.
        dialog_type : {'question', 'yesno', None}
          Which dialog type to use in interactive sessions. If `None`,
          pre-configured UI options are used.
        store : bool
          Whether to store the obtained value (or default)
        %s
        `**kwargs`
          Additional arguments for the UI function call, such as a question
          `text`.
        """
        # do local import, as this module is import prominently and the
        # could theroetically import all kind of weired things for type
        # conversion
        from datalad.interface.common_cfg import definitions as cfg_defs
        # fetch what we know about this variable
        cdef = cfg_defs.get(var, {})
        # type conversion setup
        if valtype is None and 'type' in cdef:
            valtype = cdef['type']
        if valtype is None:
            valtype = lambda x: x

        # any default?
        if default is None and 'default' in cdef:
            default = cdef['default']

        _value = None
        if var in self:
            # nothing needs to be obtained, it is all here already
            _value = self[var]
        elif store is False and default is not None:
            # nothing will be stored, and we have a default -> no user confirmation
            # we cannot use logging, because we want to use the config to confiugre
            # the logging
            #lgr.debug('using default {} for config setting {}'.format(default, var))
            _value = default

        if _value is not None:
            # we got everything we need and can exit early
            try:
                return valtype(_value)
            except Exception as e:
                raise ValueError(
                    "value '{}' of existing configuration for '{}' cannot be "
                    "converted to the desired type '{}' ({})".format(
                        _value, var, valtype, exc_str(e)))

        # now we need to try to obtain something from the user
        from datalad.ui import ui

        # configure UI
        dialog_opts = kwargs
        if dialog_type is None:  # no override
            # check for common knowledge on how to obtain a value
            if 'ui' in cdef:
                dialog_type = cdef['ui'][0]
                # pull standard dialog settings
                dialog_opts = cdef['ui'][1]
                # update with input
                dialog_opts.update(kwargs)

        if (not ui.is_interactive or dialog_type is None) and default is None:
            raise RuntimeError(
                "cannot obtain value for configuration item '{}', "
                "not preconfigured, no default, no UI available".format(var))

        if not hasattr(ui, dialog_type):
            raise ValueError("UI '{}' does not support dialog type '{}'".format(
                ui, dialog_type))

        # configure storage destination, if needed
        if store:
            if where is None and 'destination' in cdef:
                where = cdef['destination']
            if where is None:
                raise ValueError(
                    "request to store configuration item '{}', but no "
                    "storage destination specified".format(var))

        # obtain via UI
        dialog = getattr(ui, dialog_type)
        _value = dialog(default=default, **dialog_opts)

        if _value is None:
            # we got nothing
            if default is None:
                raise RuntimeError(
                    "could not obtain value for configuration item '{}', "
                    "not preconfigured, no default".format(var))
            # XXX maybe we should return default here, even it was returned
            # from the UI -- if that is even possible

        # execute type conversion before storing to check that we got
        # something that looks like what we want
        try:
            value = valtype(_value)
        except Exception as e:
            raise ValueError(
                "cannot convert user input `{}` to desired type ({})".format(
                    _value, exc_str(e)))
            # XXX we could consider "looping" until we have a value of proper
            # type in case of a user typo...

        if store:
            # store value as it was before any conversion, needs to be str
            # anyway
            # needs string conversion nevertheless, because default could come
            # in as something else
            self.add(var, '{}'.format(_value), where=where, reload=reload)
        return value

    #
    # Compatibility with dict API
    #
    def __len__(self):
        return len(self._store)

    def __getitem__(self, key):
        return self._store.__getitem__(key)

    def __contains__(self, key):
        return self._store.__contains__(key)

    def keys(self):
        """Returns list of configuration item names"""
        return self._store.keys()

    def get(self, key, default=None):
        """D.get(k[,d]) -> D[k] if k in D, else d.  d defaults to None."""
        return self._store.get(key, default)

    #
    # Compatibility with ConfigParser API
    #
    def sections(self):
        """Returns a list of the sections available"""
        return list(set([cfg_section_regex.match(k).group(1) for k in self._store]))

    def options(self, section):
        """Returns a list of options available in the specified section."""
        opts = []
        for k in self._store:
            sec, opt = cfg_sectionoption_regex.match(k).groups()
            if sec == section:
                opts.append(opt)
        return opts

    def has_section(self, section):
        """Indicates whether a section is present in the configuration"""
        for k in self._store:
            if k.startswith(section):
                return True
        return False

    def has_option(self, section, option):
        """If the given section exists, and contains the given option"""
        for k in self._store:
            sec, opt = cfg_sectionoption_regex.match(k).groups()
            if sec == section and opt == option:
                return True
        return False

    def getint(self, section, option):
        """A convenience method which coerces the option value to an integer"""
        return int(self.get_value(section, option))

    def getbool(self, section, option, default=None):
        """A convenience method which coerces the option value to a bool

        Values "on", "yes", "true" and any int!=0 are considered True
        Values which evaluate to bool False, "off", "no", "false" are considered
        False
        TypeError is raised for other values.
        """
        val = self.get_value(section, option, default=default)
        return anything2bool(val)

    def getfloat(self, section, option):
        """A convenience method which coerces the option value to a float"""
        return float(self.get_value(section, option))

    # this is a hybrid of ConfigParser and dict API
    def items(self, section=None):
        """Return a list of (name, value) pairs for each option

        Optionally limited to a given section.
        """
        if section is None:
            return self._store.items()
        return [(k, v) for k, v in self._store.items()
                if cfg_section_regex.match(k).group(1) == section]

    #
    # Compatibility with GitPython's ConfigParser
    #
    def get_value(self, section, option, default=None):
        """Like `get()`, but with an optional default value

        If the default is not None, the given default value will be returned in
        case the option did not exist. This behavior immitates GitPython's
        config parser.
        """
        try:
            return self['.'.join((section, option))]
        except KeyError as e:
            # this strange dance is needed because gitpython does it this way
            if default is not None:
                return default
            else:
                raise e

    #
    # Modify configuration (proxy respective git-config call)
    #
    @_where_reload
    def _run(self, args, where=None, reload=False, **kwargs):
        """Centralized helper to run "git config" calls

        Parameters
        ----------
        args : list
          Arguments to pass for git config
        %s
        **kwargs
          Keywords arguments for Runner's call
        """
        if where:
            args = self._get_location_args(where) + args
        out = self._runner.run(['git', 'config'] + args, **kwargs)
        if reload:
            self.reload()
        return out

    def _get_location_args(self, where, args=None):
        if args is None:
            args = []
        cfg_labels = ('dataset', 'local', 'global')
        if where not in cfg_labels:
            raise ValueError(
                "unknown configuration label '{}' (not in {})".format(
                    where, cfg_labels))
        if where == 'dataset':
            if not self._dataset:
                raise ValueError(
                    'ConfigManager cannot store to configuration to dataset, none specified')
            # create an empty config file if none exists, `git config` will
            # fail otherwise
            dscfg_dirname = opj(self._dataset.path, '.datalad')
            dscfg_fname = opj(dscfg_dirname, 'config')
            if not exists(dscfg_dirname):
                os.makedirs(dscfg_dirname)
            if not exists(dscfg_fname):
                open(dscfg_fname, 'w').close()
            args.extend(['--file', opj(self._dataset.path, '.datalad', 'config')])
        elif where == 'global':
            args.append('--global')
        elif where == 'local':
            args.append('--local')
        return args

    @_where_reload
    def add(self, var, value, where='dataset', reload=True):
        """Add a configuration variable and value

        Parameters
        ----------
        var : str
          Variable name including any section like `git config` expects them, e.g.
          'core.editor'
        value : str
          Variable value
        %s"""
        self._run(['--add', var, value], where=where, reload=reload, log_stderr=True)

    @_where_reload
    def rename_section(self, old, new, where='dataset', reload=True):
        """Rename a configuration section

        Parameters
        ----------
        old : str
          Name of the section to rename.
        new : str
          Name of the section to rename to.
        %s"""
        self._run(['--rename-section', old, new], where=where, reload=reload)

    @_where_reload
    def remove_section(self, sec, where='dataset', reload=True):
        """Rename a configuration section

        Parameters
        ----------
        sec : str
          Name of the section to remove.
        %s"""
        self._run(['--remove-section', sec], where=where, reload=reload)

    @_where_reload
    def unset(self, var, where='dataset', reload=True):
        """Remove all occurrences of a variable

        Parameters
        ----------
        var : str
          Name of the variable to remove
        %s"""
        # use unset all as it is simpler for now
        self._run(['--unset-all', var], where=where, reload=reload)
示例#45
0
class ConfigManager(object):
    """Thin wrapper around `git-config` with support for a dataset configuration.

    The general idea is to have an object that is primarily used to read/query
    configuration option.  Upon creation, current configuration is read via one
    (or max two, in the case of the presence of dataset-specific configuration)
    calls to `git config`.  If this class is initialized with a Dataset
    instance, it supports reading and writing configuration from
    ``.datalad/config`` inside a dataset too. This file is committed to Git and
    hence useful to ship certain configuration items with a dataset.

    The API aims to provide the most significant read-access API of a
    dictionary, the Python ConfigParser, and GitPython's config parser
    implementations.

    This class is presently not capable of efficiently writing multiple
    configurations items at once.  Instead, each modification results in a
    dedicated call to `git config`. This author thinks this is OK, as he
    cannot think of a situation where a large number of items need to be
    written during normal operation. If such need arises, various solutions are
    possible (via GitPython, or an independent writer).

    Any DATALAD_* environment variable is also presented as a configuration
    item. Settings read from environment variables are not stored in any of the
    configuration file, but are read dynamically from the environment at each
    `reload()` call. Their values take precedence over any specification in
    configuration files.

    Parameters
    ----------
    dataset : Dataset, optional
      If provided, all `git config` calls are executed in this dataset's
      directory. Moreover, any modifications are, by default, directed to
      this dataset's configuration file (which will be created on demand)
    dataset_only : bool
      If True, configuration items are only read from a datasets persistent
      configuration file, if any present (the one in ``.datalad/config``, not
      ``.git/config``).
    """
    def __init__(self, dataset=None, dataset_only=False):
        # store in a simple dict
        # no subclassing, because we want to be largely read-only, and implement
        # config writing separately
        self._store = {}
        self._dataset_path = dataset.path if dataset else None
        self._dataset_only = dataset_only
        # Since configs could contain sensitive information, to prevent
        # any "facilitated" leakage -- just disable logging of outputs for
        # this runner
        run_kwargs = dict(log_outputs=False)
        if dataset is not None:
            # make sure we run the git config calls in the dataset
            # to pick up the right config files
            run_kwargs['cwd'] = dataset.path
        self._runner = Runner(**run_kwargs)
        self.reload()

    def reload(self):
        """Reload all configuration items from the configured sources"""
        self._store = {}
        # 2-step strategy:
        #   - load datalad dataset config from dataset
        #   - load git config from all supported by git sources
        # in doing so we always stay compatible with where Git gets its
        # config from, but also allow to override persistent information
        # from dataset locally or globally
        if self._dataset_path:
            # now any dataset config
            dscfg_fname = opj(self._dataset_path, '.datalad', 'config')
            if exists(dscfg_fname):
                stdout, stderr = self._run(['-z', '-l', '--file', dscfg_fname],
                                           log_stderr=True)
                # overwrite existing value, do not amend to get multi-line
                # values
                self._store = _parse_gitconfig_dump(
                    stdout, self._store, replace=False)

        if not self._dataset_only:
            stdout, stderr = self._run(['-z', '-l'], log_stderr=True)
            self._store = _parse_gitconfig_dump(
                stdout, self._store, replace=True)

            # override with environment variables
            self._store = _parse_env(self._store)

    @_where_reload
    def obtain(self, var, default=None, dialog_type=None, valtype=None,
               store=False, where=None, reload=True, **kwargs):
        """
        Convenience method to obtain settings interactively, if needed

        A UI will be used to ask for user input in interactive sessions.
        Questions to ask, and additional explanations can be passed directly
        as arguments, or retrieved from a list of pre-configured items.

        Additionally, this method allows for type conversion and storage
        of obtained settings. Both aspects can also be pre-configured.

        Parameters
        ----------
        var : str
          Variable name including any section like `git config` expects them,
          e.g. 'core.editor'
        default : any type
          In interactive sessions and if `store` is True, this default value
          will be presented to the user for confirmation (or modification).
          In all other cases, this value will be silently assigned unless
          there is an existing configuration setting.
        dialog_type : {'question', 'yesno', None}
          Which dialog type to use in interactive sessions. If `None`,
          pre-configured UI options are used.
        store : bool
          Whether to store the obtained value (or default)
        %s
        `**kwargs`
          Additional arguments for the UI function call, such as a question
          `text`.
        """
        # do local import, as this module is import prominently and the
        # could theroetically import all kind of weired things for type
        # conversion
        from datalad.interface.common_cfg import definitions as cfg_defs
        # fetch what we know about this variable
        cdef = cfg_defs.get(var, {})
        # type conversion setup
        if valtype is None and 'type' in cdef:
            valtype = cdef['type']
        if valtype is None:
            valtype = lambda x: x

        # any default?
        if default is None and 'default' in cdef:
            default = cdef['default']

        _value = None
        if var in self:
            # nothing needs to be obtained, it is all here already
            _value = self[var]
        elif store is False and default is not None:
            # nothing will be stored, and we have a default -> no user confirmation
            # we cannot use logging, because we want to use the config to confiugre
            # the logging
            #lgr.debug('using default {} for config setting {}'.format(default, var))
            _value = default

        if _value is not None:
            # we got everything we need and can exit early
            try:
                return valtype(_value)
            except Exception as e:
                raise ValueError(
                    "value '{}' of existing configuration for '{}' cannot be "
                    "converted to the desired type '{}' ({})".format(
                        _value, var, valtype, exc_str(e)))

        # now we need to try to obtain something from the user
        from datalad.ui import ui

        # configure UI
        dialog_opts = kwargs
        if dialog_type is None:  # no override
            # check for common knowledge on how to obtain a value
            if 'ui' in cdef:
                dialog_type = cdef['ui'][0]
                # pull standard dialog settings
                dialog_opts = cdef['ui'][1]
                # update with input
                dialog_opts.update(kwargs)

        if (not ui.is_interactive or dialog_type is None) and default is None:
            raise RuntimeError(
                "cannot obtain value for configuration item '{}', "
                "not preconfigured, no default, no UI available".format(var))

        if not hasattr(ui, dialog_type):
            raise ValueError("UI '{}' does not support dialog type '{}'".format(
                ui, dialog_type))

        # configure storage destination, if needed
        if store:
            if where is None and 'destination' in cdef:
                where = cdef['destination']
            if where is None:
                raise ValueError(
                    "request to store configuration item '{}', but no "
                    "storage destination specified".format(var))

        # obtain via UI
        dialog = getattr(ui, dialog_type)
        _value = dialog(default=default, **dialog_opts)

        if _value is None:
            # we got nothing
            if default is None:
                raise RuntimeError(
                    "could not obtain value for configuration item '{}', "
                    "not preconfigured, no default".format(var))
            # XXX maybe we should return default here, even it was returned
            # from the UI -- if that is even possible

        # execute type conversion before storing to check that we got
        # something that looks like what we want
        try:
            value = valtype(_value)
        except Exception as e:
            raise ValueError(
                "cannot convert user input `{}` to desired type ({})".format(
                    _value, exc_str(e)))
            # XXX we could consider "looping" until we have a value of proper
            # type in case of a user typo...

        if store:
            # store value as it was before any conversion, needs to be str
            # anyway
            # needs string conversion nevertheless, because default could come
            # in as something else
            self.add(var, '{}'.format(_value), where=where, reload=reload)
        return value

    #
    # Compatibility with dict API
    #
    def __len__(self):
        return len(self._store)

    def __getitem__(self, key):
        return self._store.__getitem__(key)

    def __contains__(self, key):
        return self._store.__contains__(key)

    def keys(self):
        """Returns list of configuration item names"""
        return self._store.keys()

    def get(self, key, default=None):
        """D.get(k[,d]) -> D[k] if k in D, else d.  d defaults to None."""
        return self._store.get(key, default)

    #
    # Compatibility with ConfigParser API
    #
    def sections(self):
        """Returns a list of the sections available"""
        return list(set([cfg_section_regex.match(k).group(1) for k in self._store]))

    def options(self, section):
        """Returns a list of options available in the specified section."""
        opts = []
        for k in self._store:
            sec, opt = cfg_sectionoption_regex.match(k).groups()
            if sec == section:
                opts.append(opt)
        return opts

    def has_section(self, section):
        """Indicates whether a section is present in the configuration"""
        for k in self._store:
            if k.startswith(section):
                return True
        return False

    def has_option(self, section, option):
        """If the given section exists, and contains the given option"""
        for k in self._store:
            sec, opt = cfg_sectionoption_regex.match(k).groups()
            if sec == section and opt == option:
                return True
        return False

    def getint(self, section, option):
        """A convenience method which coerces the option value to an integer"""
        return int(self.get_value(section, option))

    def getbool(self, section, option, default=None):
        """A convenience method which coerces the option value to a bool

        Values "on", "yes", "true" and any int!=0 are considered True
        Values which evaluate to bool False, "off", "no", "false" are considered
        False
        TypeError is raised for other values.
        """
        val = self.get_value(section, option, default=default)
        return anything2bool(val)

    def getfloat(self, section, option):
        """A convenience method which coerces the option value to a float"""
        return float(self.get_value(section, option))

    # this is a hybrid of ConfigParser and dict API
    def items(self, section=None):
        """Return a list of (name, value) pairs for each option

        Optionally limited to a given section.
        """
        if section is None:
            return self._store.items()
        return [(k, v) for k, v in self._store.items()
                if cfg_section_regex.match(k).group(1) == section]

    #
    # Compatibility with GitPython's ConfigParser
    #
    def get_value(self, section, option, default=None):
        """Like `get()`, but with an optional default value

        If the default is not None, the given default value will be returned in
        case the option did not exist. This behavior immitates GitPython's
        config parser.
        """
        try:
            return self['.'.join((section, option))]
        except KeyError as e:
            # this strange dance is needed because gitpython does it this way
            if default is not None:
                return default
            else:
                raise e

    #
    # Modify configuration (proxy respective git-config call)
    #
    @_where_reload
    def _run(self, args, where=None, reload=False, **kwargs):
        """Centralized helper to run "git config" calls

        Parameters
        ----------
        args : list
          Arguments to pass for git config
        %s
        **kwargs
          Keywords arguments for Runner's call
        """
        if where:
            args = self._get_location_args(where) + args
        out = self._runner.run(['git', 'config'] + args, **kwargs)
        if reload:
            self.reload()
        return out

    def _get_location_args(self, where, args=None):
        if args is None:
            args = []
        cfg_labels = ('dataset', 'local', 'global')
        if where not in cfg_labels:
            raise ValueError(
                "unknown configuration label '{}' (not in {})".format(
                    where, cfg_labels))
        if where == 'dataset':
            if not self._dataset_path:
                raise ValueError(
                    'ConfigManager cannot store to configuration to dataset, none specified')
            # create an empty config file if none exists, `git config` will
            # fail otherwise
            dscfg_dirname = opj(self._dataset_path, '.datalad')
            dscfg_fname = opj(dscfg_dirname, 'config')
            if not exists(dscfg_dirname):
                os.makedirs(dscfg_dirname)
            if not exists(dscfg_fname):
                open(dscfg_fname, 'w').close()
            args.extend(['--file', opj(self._dataset_path, '.datalad', 'config')])
        elif where == 'global':
            args.append('--global')
        elif where == 'local':
            args.append('--local')
        return args

    @_where_reload
    def add(self, var, value, where='dataset', reload=True):
        """Add a configuration variable and value

        Parameters
        ----------
        var : str
          Variable name including any section like `git config` expects them, e.g.
          'core.editor'
        value : str
          Variable value
        %s"""
        self._run(['--add', var, value], where=where, reload=reload, log_stderr=True)

    @_where_reload
    def set(self, var, value, where='dataset', reload=True, force=False):
        """Set a variable to a value.

        In opposition to `add`, this replaces the value of `var` if there is
        one already.

        Parameters
        ----------
        var : str
          Variable name including any section like `git config` expects them, e.g.
          'core.editor'
        value : str
          Variable value
        force: bool
          if set, replaces all occurrences of `var` by a single one with the
          given `value`. Otherwise raise if multiple entries for `var` exist
          already
        %s"""
        from datalad.support.gitrepo import to_options

        self._run(to_options(replace_all=force) + [var, value],
                  where=where, reload=reload, log_stderr=True)

    @_where_reload
    def rename_section(self, old, new, where='dataset', reload=True):
        """Rename a configuration section

        Parameters
        ----------
        old : str
          Name of the section to rename.
        new : str
          Name of the section to rename to.
        %s"""
        self._run(['--rename-section', old, new], where=where, reload=reload)

    @_where_reload
    def remove_section(self, sec, where='dataset', reload=True):
        """Rename a configuration section

        Parameters
        ----------
        sec : str
          Name of the section to remove.
        %s"""
        self._run(['--remove-section', sec], where=where, reload=reload)

    @_where_reload
    def unset(self, var, where='dataset', reload=True):
        """Remove all occurrences of a variable

        Parameters
        ----------
        var : str
          Name of the variable to remove
        %s"""
        # use unset all as it is simpler for now
        self._run(['--unset-all', var], where=where, reload=reload)
示例#46
0
    def __call__(dataset=None, name=None, url=None,
                 pushurl=None, recursive=False, force=False):

        # TODO: Detect malformed URL and fail?

        if name is None or (url is None and pushurl is None):
            raise ValueError("""insufficient information to add a sibling
                (needs at least a dataset, a name and an URL).""")
        if url is None:
            url = pushurl

        # shortcut
        ds = dataset

        if ds is not None and not isinstance(ds, Dataset):
            ds = Dataset(ds)
        if ds is None:
            # try to find a dataset at or above CWD
            dspath = GitRepo.get_toppath(abspath(getpwd()))
            if dspath is None:
                raise ValueError(
                        "No dataset found at or above {0}.".format(getpwd()))
            ds = Dataset(dspath)
            lgr.debug("Resolved dataset for target creation: {0}".format(ds))

        assert(ds is not None and name is not None and url is not None)

        if not ds.is_installed():
            raise ValueError("Dataset {0} is not installed yet.".format(ds))
        assert(ds.repo is not None)

        ds_basename = basename(ds.path)
        repos = {
            ds_basename: {'repo': ds.repo}
        }
        if recursive:
            for subds in ds.get_dataset_handles(recursive=True):
                sub_path = opj(ds.path, subds)
                repos[ds_basename + '/' + subds] = {
#                repos[subds] = {
                    'repo': GitRepo(sub_path, create=False)
                }

        # Note: This is copied from create_publication_target_sshwebserver
        # as it is the same logic as for its target_dir.
        # TODO: centralize and generalize template symbol handling
        # TODO: Check pushurl for template symbols too. Probably raise if only
        #       one of them uses such symbols

        replicate_local_structure = False
        if "%NAME" not in url:
            replicate_local_structure = True

        for repo in repos:
            if not replicate_local_structure:
                repos[repo]['url'] = url.replace("%NAME",
                                                 repo.replace("/", "-"))
                if pushurl:
                    repos[repo]['pushurl'] = pushurl.replace("%NAME",
                                                             repo.replace("/",
                                                                          "-"))
            else:
                repos[repo]['url'] = url
                if pushurl:
                    repos[repo]['pushurl'] = pushurl

                if repo != ds_basename:
                    repos[repo]['url'] = _urljoin(repos[repo]['url'], repo[len(ds_basename)+1:])
                    if pushurl:
                        repos[repo]['pushurl'] = _urljoin(repos[repo]['pushurl'], repo[len(ds_basename)+1:])

        # collect existing remotes:
        already_existing = list()
        conflicting = list()
        for repo in repos:
            if name in repos[repo]['repo'].git_get_remotes():
                already_existing.append(repo)
                lgr.debug("""Remote '{0}' already exists
                          in '{1}'.""".format(name, repo))

                existing_url = repos[repo]['repo'].git_get_remote_url(name)
                existing_pushurl = \
                    repos[repo]['repo'].git_get_remote_url(name, push=True)

                if repos[repo]['url'].rstrip('/') != existing_url.rstrip('/') \
                        or (pushurl and existing_pushurl and
                            repos[repo]['pushurl'].rstrip('/') !=
                                    existing_pushurl.rstrip('/')) \
                        or (pushurl and not existing_pushurl):
                    conflicting.append(repo)

        if not force and conflicting:
            raise RuntimeError("Sibling '{0}' already exists with conflicting"
                               " URL for {1} dataset(s). {2}".format(
                                   name, len(conflicting), conflicting))

        runner = Runner()
        successfully_added = list()
        for repo in repos:
            if repo in already_existing:
                if repo not in conflicting:
                    lgr.debug("Skipping {0}. Nothing to do.".format(repo))
                    continue
                # rewrite url
                cmd = ["git", "remote", "set-url", name, repos[repo]['url']]
                runner.run(cmd, cwd=repos[repo]['repo'].path)
            else:
                # add the remote
                cmd = ["git", "remote", "add", name, repos[repo]['url']]
                runner.run(cmd, cwd=repos[repo]['repo'].path)
            if pushurl:
                cmd = ["git", "remote", "set-url", "--push", name,
                       repos[repo]['pushurl']]
                runner.run(cmd, cwd=repos[repo]['repo'].path)
            successfully_added.append(repo)

        return successfully_added
示例#47
0
文件: run.py 项目: mprati/datalad
def run_command(cmd, dataset=None, inputs=None, outputs=None, expand=None,
                message=None, rerun_info=None, rerun_outputs=None, sidecar=None):
    rel_pwd = rerun_info.get('pwd') if rerun_info else None
    if rel_pwd and dataset:
        # recording is relative to the dataset
        pwd = normpath(opj(dataset.path, rel_pwd))
        rel_pwd = relpath(pwd, dataset.path)
    else:
        pwd, rel_pwd = get_command_pwds(dataset)

    ds = require_dataset(
        dataset, check_installed=True,
        purpose='tracking outcomes of a command')

    # not needed ATM
    #refds_path = ds.path

    # delayed imports
    from datalad.cmd import Runner

    lgr.debug('tracking command output underneath %s', ds)
    if not rerun_info and ds.repo.dirty:  # Rerun already takes care of this.
        yield get_status_dict(
            'run',
            ds=ds,
            status='impossible',
            message=('unsaved modifications present, '
                     'cannot detect changes by command'))
        return

    cmd = normalize_command(cmd)

    inputs = GlobbedPaths(inputs, pwd=pwd,
                          expand=expand in ["inputs", "both"])
    if inputs:
        for res in ds.get(inputs.expand(full=True), on_failure="ignore"):
            yield res

    outputs = GlobbedPaths(outputs, pwd=pwd,
                           expand=expand in ["outputs", "both"],
                           warn=not rerun_info)
    if outputs:
        for res in _unlock_or_remove(ds, outputs.expand(full=True)):
            yield res

    if rerun_outputs is not None:
        # These are files we need to unlock/remove for a rerun that aren't
        # included in the explicit outputs. Unlike inputs/outputs, these are
        # full paths, so we can pass them directly to unlock.
        for res in _unlock_or_remove(ds, rerun_outputs):
            yield res

    sfmt = SequenceFormatter()
    cmd_expanded = sfmt.format(cmd,
                               pwd=pwd,
                               dspath=ds.path,
                               inputs=inputs.expand(dot=False),
                               outputs=outputs.expand(dot=False))

    # we have a clean dataset, let's run things
    exc = None
    cmd_exitcode = None
    runner = Runner(cwd=pwd)
    try:
        lgr.info("== Command start (output follows) =====")
        runner.run(
            cmd_expanded,
            # immediate output
            log_online=True,
            # not yet sure what we should do with the command output
            # IMHO `run` itself should be very silent and let the command talk
            log_stdout=False,
            log_stderr=False,
            expect_stderr=True,
            expect_fail=True,
            # TODO stdin
        )
    except CommandError as e:
        # strip our own info from the exception. The original command output
        # went to stdout/err -- we just have to exitcode in the same way
        exc = e
        cmd_exitcode = e.code

        if rerun_info and rerun_info.get("exit", 0) != cmd_exitcode:
            # we failed in a different way during a rerun.  This can easily
            # happen if we try to alter a locked file
            #
            # TODO add the ability to `git reset --hard` the dataset tree on failure
            # we know that we started clean, so we could easily go back, needs gh-1424
            # to be able to do it recursively
            raise exc

    lgr.info("== Command exit (modification check follows) =====")

    # amend commit message with `run` info:
    # - pwd if inside the dataset
    # - the command itself
    # - exit code of the command
    run_info = {
        'cmd': cmd,
        'exit': cmd_exitcode if cmd_exitcode is not None else 0,
        'chain': rerun_info["chain"] if rerun_info else [],
        'inputs': inputs.paths,
        'outputs': outputs.paths,
    }
    if rel_pwd is not None:
        # only when inside the dataset to not leak information
        run_info['pwd'] = rel_pwd
    if ds.id:
        run_info["dsid"] = ds.id

    record = json.dumps(run_info, indent=1, sort_keys=True, ensure_ascii=False)

    use_sidecar = sidecar or (
        sidecar is None and
        ds.config.get('datalad.run.record-sidecar', default=False))

    if use_sidecar:
        # record ID is hash of record itself
        from hashlib import md5
        record_id = md5(record.encode('utf-8')).hexdigest()
        record_dir = ds.config.get('datalad.run.record-directory', default=op.join('.datalad', 'runinfo'))
        record_path = op.join(ds.path, record_dir, record_id)
        if not op.lexists(record_path):
            # go for compression, even for minimal records not much difference, despite offset cost
            # wrap in list -- there is just one record
            dump2stream([run_info], record_path, compressed=True)

    # compose commit message
    msg = u"""\
[DATALAD RUNCMD] {}

=== Do not change lines below ===
{}
^^^ Do not change lines above ^^^
"""
    msg = msg.format(
        message if message is not None else _format_cmd_shorty(cmd),
        '"{}"'.format(record_id) if use_sidecar else record)
    msg = assure_bytes(msg)

    if not rerun_info and cmd_exitcode:
        msg_path = opj(relpath(ds.repo.repo.git_dir), "COMMIT_EDITMSG")
        with open(msg_path, "wb") as ofh:
            ofh.write(msg)
        lgr.info("The command had a non-zero exit code. "
                 "If this is expected, you can save the changes with "
                 "'datalad save -r -F%s .'",
                 msg_path)
        raise exc
    else:
        for r in ds.add('.', recursive=True, message=msg):
            yield r
示例#48
0
文件: run.py 项目: xlecours/datalad
def run_command(cmd, dataset=None, message=None, rerun_info=None):
    rel_pwd = rerun_info.get('pwd') if rerun_info else None
    if rel_pwd and dataset:
        # recording is relative to the dataset
        pwd = normpath(opj(dataset.path, rel_pwd))
        rel_pwd = relpath(pwd, dataset.path)
    elif dataset:
        pwd = dataset.path
        rel_pwd = curdir
    else:
        # act on the whole dataset if nothing else was specified
        dataset = get_dataset_root(curdir)
        # Follow our generic semantic that if dataset is specified,
        # paths are relative to it, if not -- relative to pwd
        pwd = getpwd()
        if dataset:
            rel_pwd = relpath(pwd, dataset)
        else:
            rel_pwd = pwd  # and leave handling on deciding either we
            # deal with it or crash to checks below

    ds = require_dataset(dataset,
                         check_installed=True,
                         purpose='tracking outcomes of a command')
    # not needed ATM
    #refds_path = ds.path

    # delayed imports
    from datalad.cmd import Runner
    from datalad.tests.utils import ok_clean_git

    lgr.debug('tracking command output underneath %s', ds)
    if not rerun_info:  # Rerun already takes care of this.
        try:
            # base assumption is that the animal smells superb
            ok_clean_git(ds.path)
        except AssertionError:
            yield get_status_dict('run',
                                  ds=ds,
                                  status='impossible',
                                  message=('unsaved modifications present, '
                                           'cannot detect changes by command'))
            return

    # anticipate quoted compound shell commands
    cmd = cmd[0] if isinstance(cmd, list) and len(cmd) == 1 else cmd

    # TODO do our best to guess which files to unlock based on the command string
    #      in many cases this will be impossible (but see rerun). however,
    #      generating new data (common case) will be just fine already

    # we have a clean dataset, let's run things
    cmd_exitcode = None
    runner = Runner(cwd=pwd)
    try:
        lgr.info("== Command start (output follows) =====")
        runner.run(
            cmd,
            # immediate output
            log_online=True,
            # not yet sure what we should do with the command output
            # IMHO `run` itself should be very silent and let the command talk
            log_stdout=False,
            log_stderr=False,
            expect_stderr=True,
            expect_fail=True,
            # TODO stdin
        )
    except CommandError as e:
        # strip our own info from the exception. The original command output
        # went to stdout/err -- we just have to exitcode in the same way
        cmd_exitcode = e.code

        if not rerun_info or rerun_info.get("exit", 0) != cmd_exitcode:
            # we failed during a fresh run, or in a different way during a rerun
            # the latter can easily happen if we try to alter a locked file
            #
            # let's fail here, the command could have had a typo or some
            # other undesirable condition. If we would `add` nevertheless,
            # we would need to rerun and aggregate annex content that we
            # likely don't want
            # TODO add switch to ignore failure (some commands are stupid)
            # TODO add the ability to `git reset --hard` the dataset tree on failure
            # we know that we started clean, so we could easily go back, needs gh-1424
            # to be able to do it recursively
            raise CommandError(code=cmd_exitcode)

    lgr.info("== Command exit (modification check follows) =====")

    # ammend commit message with `run` info:
    # - pwd if inside the dataset
    # - the command itself
    # - exit code of the command
    run_info = {
        'cmd': cmd,
        'exit': cmd_exitcode if cmd_exitcode is not None else 0,
        'chain': rerun_info["chain"] if rerun_info else [],
    }
    if rel_pwd is not None:
        # only when inside the dataset to not leak information
        run_info['pwd'] = rel_pwd

    # compose commit message
    cmd_shorty = (' '.join(cmd) if isinstance(cmd, list) else cmd)
    cmd_shorty = '{}{}'.format(cmd_shorty[:40],
                               '...' if len(cmd_shorty) > 40 else '')
    msg = '[DATALAD RUNCMD] {}\n\n=== Do not change lines below ===\n{}\n^^^ Do not change lines above ^^^'.format(
        message if message is not None else cmd_shorty,
        json.dumps(run_info, indent=1),
        sort_keys=True,
        ensure_ascii=False,
        encoding='utf-8')

    for r in ds.add('.', recursive=True, message=msg):
        yield r