def setup(self): self.runner = Runner() # older versions might not have it try: from datalad.cmd import GitRunner self.git_runner = GitRunner() except ImportError: pass
def test_magic_number(): # we hard code the magic SHA1 that represents the state of a Git repo # prior to the first commit -- used to diff from scratch to a specific # commit # given the level of dark magic, we better test whether this stays # constant across Git versions (it should!) out, err = GitRunner().run('git hash-object -t tree /dev/null') eq_(out.strip(), PRE_INIT_COMMIT_SHA)
def _get_untracked_content(dspath, report_untracked, paths=None): cmd = [ 'git', '--work-tree=.', 'status', '--porcelain', # file names NULL terminated '-z', # we never want to touch submodules, they cannot be untracked '--ignore-submodules=all', # fully untracked dirs as such, the rest as files '--untracked={}'.format(report_untracked) ] try: stdout, stderr = GitRunner(cwd=dspath).run(cmd, log_stderr=True, log_stdout=True, log_online=False, expect_stderr=False, shell=False, expect_fail=True) except CommandError as e: # TODO should we catch any and handle them in here? raise e if paths: paths = [r['path'] for r in paths] if len(paths) == 1 and paths[0] == dspath: # nothing to filter paths = None from datalad.utils import assure_unicode for line in stdout.split('\0'): if not line: continue line = assure_unicode(line) if not line.startswith('?? '): # nothing untracked, ignore, task of `diff` continue apath = opj( dspath, # strip state marker line[3:]) norm_apath = normpath(apath) if paths and not any(norm_apath == p or path_startswith(apath, p) for p in paths): # we got a whitelist for paths, don't report any other continue ap = dict(path=norm_apath, parentds=dspath, state='untracked', type='directory' if isdir(apath) else 'file') yield ap
def test_publish_no_fetch_refspec_configured(path): from datalad.cmd import GitRunner path = Path(path) GitRunner(cwd=str(path)).run(["git", "init", "--bare", "empty-remote"]) ds = Dataset(path / "ds").create() ds.repo.add_remote("origin", str(ds.pathobj.parent / "empty-remote")) # Mimic a situation that can happen with an LFS remote. See gh-4199. ds.repo.config.unset("remote.origin.fetch", where="local") (ds.repo.pathobj / "foo").write_text("a") ds.save() ds.publish(to="origin")
def __init__(self, dataset=None, dataset_only=False, overrides=None): # store in a simple dict # no subclassing, because we want to be largely read-only, and implement # config writing separately self._store = {} self._cfgfiles = set() self._cfgmtimes = None # public dict to store variables that always override any setting # read from a file # `hasattr()` is needed because `datalad.cfg` is generated upon first module # import, hence when this code runs first, there cannot be any config manager # to inherit from self.overrides = datalad.cfg.overrides.copy() if hasattr( datalad, 'cfg') else {} if overrides is not None: self.overrides.update(overrides) if dataset is None: self._dataset_path = None self._dataset_cfgfname = None self._repo_cfgfname = None else: self._dataset_path = dataset.path self._dataset_cfgfname = opj(self._dataset_path, '.datalad', 'config') if not dataset_only: self._repo_cfgfname = opj(self._dataset_path, '.git', 'config') self._dataset_only = dataset_only # Since configs could contain sensitive information, to prevent # any "facilitated" leakage -- just disable logging of outputs for # this runner run_kwargs = dict(log_outputs=False) if dataset is not None: # make sure we run the git config calls in the dataset # to pick up the right config files run_kwargs['cwd'] = dataset.path self._runner = GitRunner(**run_kwargs) try: self._gitconfig_has_showorgin = \ LooseVersion(get_git_version(self._runner)) >= '2.8.0' except: # no git something else broken, assume git is present anyway # to not delay this, but assume it is old self._gitconfig_has_showorgin = False self.reload(force=True)
def _parse_git_submodules(dspath, recursive): """All known ones with some properties""" if not exists(opj(dspath, ".gitmodules")): # easy way out. if there is no .gitmodules file # we cannot have (functional) subdatasets return # this will not work in direct mode, need better way #1422 cmd = ['git', '--work-tree=.', 'submodule', 'status'] if recursive: cmd.append('--recursive') # need to go rogue and cannot use proper helper in GitRepo # as they also pull in all of GitPython's magic try: stdout, stderr = GitRunner(cwd=dspath).run( cmd, log_stderr=True, log_stdout=True, # not sure why exactly, but log_online has to be false! log_online=False, expect_stderr=False, shell=False, # we don't want it to scream on stdout expect_fail=True) except CommandError as e: raise InvalidGitRepositoryError(exc_str(e)) for line in stdout.split('\n'): if not line: continue sm = {} sm['state'] = status_map[line[0]] props = submodule_full_props.match(line[1:]) if props: sm['revision'] = props.group(1) sm['path'] = opj(dspath, props.group(2)) sm['revision_descr'] = props.group(3) else: props = submodule_nodescribe_props.match(line[1:]) sm['revision'] = props.group(1) sm['path'] = opj(dspath, props.group(2)) yield sm
def _describe_annex(): from datalad.cmd import GitRunner runner = GitRunner() try: out, err = runner.run(['git', 'annex', 'version']) except CommandError as e: return dict( version='not available', message=exc_str(e), ) info = {} for line in out.split(os.linesep): key = line.split(':')[0] if not key: continue value = line[len(key) + 2:].strip() key = key.replace('git-annex ', '') if key.endswith('s'): value = value.split() info[key] = value return info
def _parse_git_submodules(dspath): """All known ones with some properties""" if not exists(opj(dspath, ".gitmodules")): # easy way out. if there is no .gitmodules file # we cannot have (functional) subdatasets return # this will not work in direct mode, need better way #1422 cmd = ['git', 'ls-files', '--stage', '-z'] # need to go rogue and cannot use proper helper in GitRepo # as they also pull in all of GitPython's magic try: stdout, stderr = GitRunner(cwd=dspath).run( cmd, log_stderr=True, log_stdout=True, # not sure why exactly, but log_online has to be false! log_online=False, expect_stderr=False, shell=False, # we don't want it to scream on stdout expect_fail=True) except CommandError as e: raise InvalidGitRepositoryError(exc_str(e)) for line in stdout.split('\0'): if not line or not line.startswith('160000'): continue sm = {} props = submodule_full_props.match(line) sm['revision'] = props.group(2) subpath = _path_(dspath, props.group(4)) sm['path'] = subpath if not exists(subpath) or not GitRepo.is_valid_repo(subpath): sm['state'] = 'absent' yield sm
if other is self: return 0 raise TypeError("UNKNOWN version is not comparable") # # Custom handlers # from datalad.cmd import Runner from datalad.cmd import GitRunner from datalad.support.exceptions import ( MissingExternalDependency, OutdatedExternalDependency, ) _runner = Runner() _git_runner = GitRunner() def _get_annex_version(): """Return version of available git-annex""" try: return _runner.run('git annex version --raw'.split())[0] except CommandError: # fall back on method that could work with older installations out, err = _runner.run(['git', 'annex', 'version']) return out.split('\n')[0].split(':')[1].strip() def _get_git_version(): """Return version of git we use (might be bundled)""" return __get_git_version(_git_runner)
def _parse_git_diff(dspath, diff_thingie=None, paths=None, ignore_submodules='none', staged=False): # use '--work-tree=.' to get direct omde to cooperate cmd = [ 'git', '--work-tree=.', 'diff', '--raw', # file names NULL terminated '-z', # how to treat submodules (see git diff docs) '--ignore-submodules={}'.format(ignore_submodules), # never abbreviate sha sums '--abbrev=40' ] if staged: cmd.append('--staged') if diff_thingie: cmd.append(diff_thingie) if paths: cmd.append('--') cmd.extend(ap['path'] for ap in paths if ap.get('raw_input', False)) try: stdout, stderr = GitRunner(cwd=dspath).run(cmd, log_stderr=True, log_stdout=True, log_online=False, expect_stderr=False, shell=False, expect_fail=True) except CommandError as e: if 'bad revision' in e.stderr: yield dict(path=dspath, type='dataset', status='impossible', message=e.stderr.strip()) return raise e ap = None for line in stdout.split('\0'): if not line: continue if line.startswith(':'): # a new path # yield any existing one if ap: yield ap ap = None # start new record m_src, m_dst, sha_src, sha_dst, status = \ line[1:].split() ap = dict(mode_src=int(m_src, base=8), mode=int(m_dst, base=8), revision_src=sha_src if sha_src != '0' * 40 else None, revision=sha_dst if sha_dst != '0' * 40 else None, parentds=dspath) _translate_status(status, ap) _translate_type(ap['mode'], ap, 'type') _translate_type(ap['mode_src'], ap, 'type_src') else: # a filename if 'path' in ap: ap['path_src'] = ap['path'] ap['path'] = opj(dspath, line) if ap: yield ap
def get_git_version(runner=None): """Return version of available git""" runner = runner or GitRunner() return runner.run('git version'.split())[0].split()[2]
def __init__(self, dataset=None, dataset_only=False, overrides=None, source='any'): if source not in ('any', 'local', 'dataset', 'dataset-local'): raise ValueError( 'Unkown ConfigManager(source=) setting: {}'.format(source)) # legacy compat if dataset_only: if source != 'any': raise ValueError( 'Refuse to combine legacy dataset_only flag, with ' 'source setting') source = 'dataset' # store in a simple dict # no subclassing, because we want to be largely read-only, and implement # config writing separately self._store = {} self._cfgfiles = set() self._cfgmtimes = None self._dataset_path = None self._dataset_cfgfname = None self._repo_cfgfname = None self._config_cmd = ['git', 'config'] # public dict to store variables that always override any setting # read from a file # `hasattr()` is needed because `datalad.cfg` is generated upon first module # import, hence when this code runs first, there cannot be any config manager # to inherit from self.overrides = datalad.cfg.overrides.copy() if hasattr( datalad, 'cfg') else {} if overrides is not None: self.overrides.update(overrides) if dataset is None: if source in ('dataset', 'dataset-local'): raise ValueError( 'ConfigManager configured to read dataset only, ' 'but no dataset given') # The caller didn't specify a repository. Unset the git directory # when calling 'git config' to prevent a repository in the current # working directory from leaking configuration into the output. self._config_cmd = ['git', '--git-dir=', 'config'] else: self._dataset_path = dataset.path if source != 'local': self._dataset_cfgfname = opj(self._dataset_path, DATASET_CONFIG_FILE) if source != 'dataset': self._repo_cfgfname = opj(self._dataset_path, '.git', 'config') self._src_mode = source # Since configs could contain sensitive information, to prevent # any "facilitated" leakage -- just disable logging of outputs for # this runner run_kwargs = dict(log_outputs=False) if dataset is not None: # make sure we run the git config calls in the dataset # to pick up the right config files run_kwargs['cwd'] = dataset.path self._runner = GitRunner(**run_kwargs) try: self._gitconfig_has_showorgin = \ LooseVersion(get_git_version()) >= '2.8.0' except Exception: # no git something else broken, assume git is present anyway # to not delay this, but assume it is old self._gitconfig_has_showorgin = False self.reload(force=True)