示例#1
0
文件: core.py 项目: leej3/datalad
 def setup(self):
     self.runner = Runner()
     # older versions might not have it
     try:
         from datalad.cmd import GitRunner
         self.git_runner = GitRunner()
     except ImportError:
         pass
示例#2
0
def test_magic_number():
    # we hard code the magic SHA1 that represents the state of a Git repo
    # prior to the first commit -- used to diff from scratch to a specific
    # commit
    # given the level of dark magic, we better test whether this stays
    # constant across Git versions (it should!)
    out, err = GitRunner().run('git hash-object -t tree /dev/null')
    eq_(out.strip(), PRE_INIT_COMMIT_SHA)
示例#3
0
def _get_untracked_content(dspath, report_untracked, paths=None):
    cmd = [
        'git',
        '--work-tree=.',
        'status',
        '--porcelain',
        # file names NULL terminated
        '-z',
        # we never want to touch submodules, they cannot be untracked
        '--ignore-submodules=all',
        # fully untracked dirs as such, the rest as files
        '--untracked={}'.format(report_untracked)
    ]
    try:
        stdout, stderr = GitRunner(cwd=dspath).run(cmd,
                                                   log_stderr=True,
                                                   log_stdout=True,
                                                   log_online=False,
                                                   expect_stderr=False,
                                                   shell=False,
                                                   expect_fail=True)
    except CommandError as e:
        # TODO should we catch any and handle them in here?
        raise e

    if paths:
        paths = [r['path'] for r in paths]
        if len(paths) == 1 and paths[0] == dspath:
            # nothing to filter
            paths = None

    from datalad.utils import assure_unicode

    for line in stdout.split('\0'):
        if not line:
            continue
        line = assure_unicode(line)
        if not line.startswith('?? '):
            # nothing untracked, ignore, task of `diff`
            continue
        apath = opj(
            dspath,
            # strip state marker
            line[3:])
        norm_apath = normpath(apath)
        if paths and not any(norm_apath == p or path_startswith(apath, p)
                             for p in paths):
            # we got a whitelist for paths, don't report any other
            continue
        ap = dict(path=norm_apath,
                  parentds=dspath,
                  state='untracked',
                  type='directory' if isdir(apath) else 'file')
        yield ap
示例#4
0
def test_publish_no_fetch_refspec_configured(path):
    from datalad.cmd import GitRunner

    path = Path(path)
    GitRunner(cwd=str(path)).run(["git", "init", "--bare", "empty-remote"])
    ds = Dataset(path / "ds").create()
    ds.repo.add_remote("origin", str(ds.pathobj.parent / "empty-remote"))
    # Mimic a situation that can happen with an LFS remote. See gh-4199.
    ds.repo.config.unset("remote.origin.fetch", where="local")
    (ds.repo.pathobj / "foo").write_text("a")
    ds.save()
    ds.publish(to="origin")
示例#5
0
    def __init__(self, dataset=None, dataset_only=False, overrides=None):
        # store in a simple dict
        # no subclassing, because we want to be largely read-only, and implement
        # config writing separately
        self._store = {}
        self._cfgfiles = set()
        self._cfgmtimes = None
        # public dict to store variables that always override any setting
        # read from a file
        # `hasattr()` is needed because `datalad.cfg` is generated upon first module
        # import, hence when this code runs first, there cannot be any config manager
        # to inherit from
        self.overrides = datalad.cfg.overrides.copy() if hasattr(
            datalad, 'cfg') else {}
        if overrides is not None:
            self.overrides.update(overrides)
        if dataset is None:
            self._dataset_path = None
            self._dataset_cfgfname = None
            self._repo_cfgfname = None
        else:
            self._dataset_path = dataset.path
            self._dataset_cfgfname = opj(self._dataset_path, '.datalad',
                                         'config')
            if not dataset_only:
                self._repo_cfgfname = opj(self._dataset_path, '.git', 'config')
        self._dataset_only = dataset_only
        # Since configs could contain sensitive information, to prevent
        # any "facilitated" leakage -- just disable logging of outputs for
        # this runner
        run_kwargs = dict(log_outputs=False)
        if dataset is not None:
            # make sure we run the git config calls in the dataset
            # to pick up the right config files
            run_kwargs['cwd'] = dataset.path
        self._runner = GitRunner(**run_kwargs)
        try:
            self._gitconfig_has_showorgin = \
                LooseVersion(get_git_version(self._runner)) >= '2.8.0'
        except:
            # no git something else broken, assume git is present anyway
            # to not delay this, but assume it is old
            self._gitconfig_has_showorgin = False

        self.reload(force=True)
示例#6
0
def _parse_git_submodules(dspath, recursive):
    """All known ones with some properties"""
    if not exists(opj(dspath, ".gitmodules")):
        # easy way out. if there is no .gitmodules file
        # we cannot have (functional) subdatasets
        return

    # this will not work in direct mode, need better way #1422
    cmd = ['git', '--work-tree=.', 'submodule', 'status']
    if recursive:
        cmd.append('--recursive')

    # need to go rogue  and cannot use proper helper in GitRepo
    # as they also pull in all of GitPython's magic
    try:
        stdout, stderr = GitRunner(cwd=dspath).run(
            cmd,
            log_stderr=True,
            log_stdout=True,
            # not sure why exactly, but log_online has to be false!
            log_online=False,
            expect_stderr=False,
            shell=False,
            # we don't want it to scream on stdout
            expect_fail=True)
    except CommandError as e:
        raise InvalidGitRepositoryError(exc_str(e))

    for line in stdout.split('\n'):
        if not line:
            continue
        sm = {}
        sm['state'] = status_map[line[0]]
        props = submodule_full_props.match(line[1:])
        if props:
            sm['revision'] = props.group(1)
            sm['path'] = opj(dspath, props.group(2))
            sm['revision_descr'] = props.group(3)
        else:
            props = submodule_nodescribe_props.match(line[1:])
            sm['revision'] = props.group(1)
            sm['path'] = opj(dspath, props.group(2))
        yield sm
示例#7
0
文件: wtf.py 项目: vsoch/datalad
def _describe_annex():
    from datalad.cmd import GitRunner

    runner = GitRunner()
    try:
        out, err = runner.run(['git', 'annex', 'version'])
    except CommandError as e:
        return dict(
            version='not available',
            message=exc_str(e),
        )
    info = {}
    for line in out.split(os.linesep):
        key = line.split(':')[0]
        if not key:
            continue
        value = line[len(key) + 2:].strip()
        key = key.replace('git-annex ', '')
        if key.endswith('s'):
            value = value.split()
        info[key] = value
    return info
示例#8
0
def _parse_git_submodules(dspath):
    """All known ones with some properties"""
    if not exists(opj(dspath, ".gitmodules")):
        # easy way out. if there is no .gitmodules file
        # we cannot have (functional) subdatasets
        return

    # this will not work in direct mode, need better way #1422
    cmd = ['git', 'ls-files', '--stage', '-z']

    # need to go rogue  and cannot use proper helper in GitRepo
    # as they also pull in all of GitPython's magic
    try:
        stdout, stderr = GitRunner(cwd=dspath).run(
            cmd,
            log_stderr=True,
            log_stdout=True,
            # not sure why exactly, but log_online has to be false!
            log_online=False,
            expect_stderr=False,
            shell=False,
            # we don't want it to scream on stdout
            expect_fail=True)
    except CommandError as e:
        raise InvalidGitRepositoryError(exc_str(e))

    for line in stdout.split('\0'):
        if not line or not line.startswith('160000'):
            continue
        sm = {}
        props = submodule_full_props.match(line)
        sm['revision'] = props.group(2)
        subpath = _path_(dspath, props.group(4))
        sm['path'] = subpath
        if not exists(subpath) or not GitRepo.is_valid_repo(subpath):
            sm['state'] = 'absent'
        yield sm
示例#9
0
        if other is self:
            return 0
        raise TypeError("UNKNOWN version is not comparable")


#
# Custom handlers
#
from datalad.cmd import Runner
from datalad.cmd import GitRunner
from datalad.support.exceptions import (
    MissingExternalDependency,
    OutdatedExternalDependency,
)
_runner = Runner()
_git_runner = GitRunner()


def _get_annex_version():
    """Return version of available git-annex"""
    try:
        return _runner.run('git annex version --raw'.split())[0]
    except CommandError:
        # fall back on method that could work with older installations
        out, err = _runner.run(['git', 'annex', 'version'])
        return out.split('\n')[0].split(':')[1].strip()


def _get_git_version():
    """Return version of git we use (might be bundled)"""
    return __get_git_version(_git_runner)
示例#10
0
def _parse_git_diff(dspath,
                    diff_thingie=None,
                    paths=None,
                    ignore_submodules='none',
                    staged=False):
    # use '--work-tree=.' to get direct omde to cooperate
    cmd = [
        'git',
        '--work-tree=.',
        'diff',
        '--raw',
        # file names NULL terminated
        '-z',
        # how to treat submodules (see git diff docs)
        '--ignore-submodules={}'.format(ignore_submodules),
        # never abbreviate sha sums
        '--abbrev=40'
    ]
    if staged:
        cmd.append('--staged')
    if diff_thingie:
        cmd.append(diff_thingie)
    if paths:
        cmd.append('--')
        cmd.extend(ap['path'] for ap in paths if ap.get('raw_input', False))

    try:
        stdout, stderr = GitRunner(cwd=dspath).run(cmd,
                                                   log_stderr=True,
                                                   log_stdout=True,
                                                   log_online=False,
                                                   expect_stderr=False,
                                                   shell=False,
                                                   expect_fail=True)
    except CommandError as e:
        if 'bad revision' in e.stderr:
            yield dict(path=dspath,
                       type='dataset',
                       status='impossible',
                       message=e.stderr.strip())
            return
        raise e

    ap = None
    for line in stdout.split('\0'):
        if not line:
            continue
        if line.startswith(':'):
            # a new path
            # yield any existing one
            if ap:
                yield ap
                ap = None
            # start new record
            m_src, m_dst, sha_src, sha_dst, status = \
                line[1:].split()
            ap = dict(mode_src=int(m_src, base=8),
                      mode=int(m_dst, base=8),
                      revision_src=sha_src if sha_src != '0' * 40 else None,
                      revision=sha_dst if sha_dst != '0' * 40 else None,
                      parentds=dspath)
            _translate_status(status, ap)
            _translate_type(ap['mode'], ap, 'type')
            _translate_type(ap['mode_src'], ap, 'type_src')
        else:
            # a filename
            if 'path' in ap:
                ap['path_src'] = ap['path']
            ap['path'] = opj(dspath, line)
    if ap:
        yield ap
示例#11
0
def get_git_version(runner=None):
    """Return version of available git"""
    runner = runner or GitRunner()
    return runner.run('git version'.split())[0].split()[2]
示例#12
0
    def __init__(self,
                 dataset=None,
                 dataset_only=False,
                 overrides=None,
                 source='any'):
        if source not in ('any', 'local', 'dataset', 'dataset-local'):
            raise ValueError(
                'Unkown ConfigManager(source=) setting: {}'.format(source))
            # legacy compat
        if dataset_only:
            if source != 'any':
                raise ValueError(
                    'Refuse to combine legacy dataset_only flag, with '
                    'source setting')
            source = 'dataset'
        # store in a simple dict
        # no subclassing, because we want to be largely read-only, and implement
        # config writing separately
        self._store = {}
        self._cfgfiles = set()
        self._cfgmtimes = None
        self._dataset_path = None
        self._dataset_cfgfname = None
        self._repo_cfgfname = None
        self._config_cmd = ['git', 'config']
        # public dict to store variables that always override any setting
        # read from a file
        # `hasattr()` is needed because `datalad.cfg` is generated upon first module
        # import, hence when this code runs first, there cannot be any config manager
        # to inherit from
        self.overrides = datalad.cfg.overrides.copy() if hasattr(
            datalad, 'cfg') else {}
        if overrides is not None:
            self.overrides.update(overrides)
        if dataset is None:
            if source in ('dataset', 'dataset-local'):
                raise ValueError(
                    'ConfigManager configured to read dataset only, '
                    'but no dataset given')
            # The caller didn't specify a repository. Unset the git directory
            # when calling 'git config' to prevent a repository in the current
            # working directory from leaking configuration into the output.
            self._config_cmd = ['git', '--git-dir=', 'config']
        else:
            self._dataset_path = dataset.path
            if source != 'local':
                self._dataset_cfgfname = opj(self._dataset_path,
                                             DATASET_CONFIG_FILE)
            if source != 'dataset':
                self._repo_cfgfname = opj(self._dataset_path, '.git', 'config')
        self._src_mode = source
        # Since configs could contain sensitive information, to prevent
        # any "facilitated" leakage -- just disable logging of outputs for
        # this runner
        run_kwargs = dict(log_outputs=False)
        if dataset is not None:
            # make sure we run the git config calls in the dataset
            # to pick up the right config files
            run_kwargs['cwd'] = dataset.path
        self._runner = GitRunner(**run_kwargs)
        try:
            self._gitconfig_has_showorgin = \
                LooseVersion(get_git_version()) >= '2.8.0'
        except Exception:
            # no git something else broken, assume git is present anyway
            # to not delay this, but assume it is old
            self._gitconfig_has_showorgin = False

        self.reload(force=True)