def test_GitRepo_pull(test_path, orig_path, clone_path): origin = GitRepo.clone(test_path, orig_path) clone = GitRepo.clone(orig_path, clone_path) filename = get_most_obscure_supported_name() with open(op.join(orig_path, filename), 'w') as f: f.write("New file.") origin.add(filename) origin.commit("new file added.") clone.pull() ok_(op.exists(op.join(clone_path, filename))) # While at it, let's test _get_remotes_having_commit a bit clone.add_remote("very_origin", test_path) clone.fetch("very_origin") eq_( clone._get_remotes_having_commit(clone.get_hexsha()), ['origin'] ) prev_commit = clone.get_hexsha('HEAD^') eq_( set(clone._get_remotes_having_commit(prev_commit)), {'origin', 'very_origin'} )
def test_GitRepo_fetch(test_path, orig_path, clone_path): origin = GitRepo.clone(test_path, orig_path) clone = GitRepo.clone(orig_path, clone_path) filename = get_most_obscure_supported_name() origin.checkout("new_branch", ['-b']) with open(op.join(orig_path, filename), 'w') as f: f.write("New file.") origin.add(filename) origin.commit("new file added.") fetched = clone.fetch(remote='origin') # test FetchInfo list returned by fetch eq_([u'origin/' + clone.get_active_branch(), u'origin/new_branch'], [commit.name for commit in fetched]) ok_clean_git(clone.path, annex=False) assert_in("origin/new_branch", clone.get_remote_branches()) assert_in(filename, clone.get_files("origin/new_branch")) assert_false(op.exists(op.join(clone_path, filename))) # not checked out # create a remote without an URL: origin.add_remote('not-available', 'git://example.com/not/existing') origin.config.unset('remote.not-available.url', where='local') # fetch without provided URL fetched = origin.fetch('not-available') # nothing was done, nothing returned: eq_([], fetched)
def test_knows_annex(here, there): from datalad.support.gitrepo import GitRepo from datalad.support.annexrepo import AnnexRepo GitRepo(path=here, create=True) assert_false(knows_annex(here)) AnnexRepo(path=here, create=True) assert_true(knows_annex(here)) GitRepo.clone(path=there, url=here, create=True) assert_true(knows_annex(there))
def test_GitRepo_add(src, path): gr = GitRepo(path, src) filename = "test_git_add.dat" with open(os.path.join(path, filename), 'w') as f: f.write("File to add to git") gr.git_add(filename) assert_in(filename, gr.get_indexed_files(), "%s not successfully added to %s" % (filename, path))
def _clone_from_any_source(sources, dest): # should not be the case, but we need to distinguish between failure # of git-clone, due to existing target and an unsuccessful clone # attempt. See below. existed = dest and exists(dest) for source_ in sources: try: lgr.debug("Retrieving a dataset from URL: " "{0}".format(source_)) with swallow_logs(): GitRepo.clone(path=dest, url=source_, create=True) return source_ # do not bother with other sources if succeeded except GitCommandError as e: lgr.debug("Failed to retrieve from URL: " "{0}".format(source_)) if not existed and dest \ and exists(dest): lgr.debug("Wiping out unsuccessful clone attempt at " "{}".format(dest)) rmtree(dest) if source_ == sources[-1]: # Note: The following block is evaluated whenever we # fail even with the last try. Not nice, but currently # necessary until we get a more precise exception: #################################### # TODO: We may want to introduce a --force option to # overwrite the target. # TODO: Currently assuming if `existed` and there is a # GitCommandError means that these both things are connected. # Need newer GitPython to get stderr from GitCommandError # (already fixed within GitPython.) if existed: # rudimentary check for an installed dataset at target: # (TODO: eventually check for being the one, that this # is about) dest_ds = Dataset(dest) if dest_ds.is_installed(): lgr.info("{0} appears to be installed already." "".format(dest_ds)) break else: lgr.warning("Target {0} already exists and is not " "an installed dataset. Skipped." "".format(dest)) # Keep original in debug output: lgr.debug("Original failure:{0}" "{1}".format(linesep, exc_str(e))) return None ################## # Re-raise if failed even with the last candidate lgr.debug("Unable to establish repository instance at " "{0} from {1}" "".format(dest, sources)) raise
def test_GitRepo_commit(path): gr = GitRepo(path) filename = "test_git_add.dat" with open(os.path.join(path, filename), 'w') as f: f.write("File to add to git") gr.git_add(filename) gr.git_commit("Testing GitRepo.git_commit().") ok_clean_git(path, annex=False, untracked=[])
def test_GitRepo_dirty(path): repo = GitRepo(path, create=True) ok_(not repo.dirty) # untracked file with open(op.join(path, 'file1.txt'), 'w') as f: f.write('whatever') ok_(repo.dirty) # staged file repo.add('file1.txt') ok_(repo.dirty) # clean again repo.commit("file1.txt added") ok_(not repo.dirty) # modify to be the same with open(op.join(path, 'file1.txt'), 'w') as f: f.write('whatever') ok_(not repo.dirty) # modified file with open(op.join(path, 'file1.txt'), 'w') as f: f.write('something else') ok_(repo.dirty) # clean again repo.add('file1.txt') repo.commit("file1.txt modified") ok_(not repo.dirty)
def test_GitRepo_get_indexed_files(src, path): gr = GitRepo(path, src) idx_list = gr.get_indexed_files() runner = Runner() out = runner(['git', 'ls-files'], cwd=path) out_list = out[0].split() for item in idx_list: assert_in(item, out_list, "%s not found in output of git ls-files in %s" % (item, path)) for item in out_list: assert_in(item, idx_list, "%s not found in output of get_indexed_files in %s" % (item, path))
def test_clone_dataladri(src, topurl, path): # make plain git repo ds_path = opj(src, 'ds') gr = GitRepo(ds_path, create=True) gr.add('test.txt') gr.commit('demo') Runner(cwd=gr.path)(['git', 'update-server-info']) # now install it somewhere else with patch('datalad.consts.DATASETS_TOPURL', topurl): ds = clone('///ds', path, result_xfm='datasets', return_type='item-or-list') eq_(ds.path, path) ok_clean_git(path, annex=False) ok_file_has_content(opj(path, 'test.txt'), 'some')
def test_GitRepo_get_toppath(repo, tempdir, repo2): reporeal = op.realpath(repo) eq_(GitRepo.get_toppath(repo, follow_up=False), reporeal) eq_(GitRepo.get_toppath(repo), repo) # Generate some nested directory GitRepo(repo2, create=True) repo2real = op.realpath(repo2) nested = op.join(repo2, "d1", "d2") os.makedirs(nested) eq_(GitRepo.get_toppath(nested, follow_up=False), repo2real) eq_(GitRepo.get_toppath(nested), repo2) # and if not under git, should return None eq_(GitRepo.get_toppath(tempdir), None)
def test_hierarchy(topdir): # GH 1178 from datalad.api import create_test_dataset with swallow_logs(), swallow_outputs(): dss = create_test_dataset(topdir, spec='1/1') eq_(len(dss), 3) eq_(dss[0], topdir) for ids, ds in enumerate(dss): ok_clean_git(ds, annex=False) # each one should have 2 commits (but the last one)-- one for file and # another one for sub-dataset repo = GitRepo(ds) eq_(len(list(repo.get_branch_commits())), 1 + int(ids<2))
def test_GitRepo_push_n_checkout(orig_path, clone_path): origin = GitRepo(orig_path) clone = GitRepo.clone(orig_path, clone_path) filename = get_most_obscure_supported_name() with open(op.join(clone_path, filename), 'w') as f: f.write("New file.") clone.add(filename) clone.commit("new file added.") # TODO: need checkout first: clone.push('origin', '+master:new-branch') origin.checkout('new-branch') ok_(op.exists(op.join(orig_path, filename)))
def knows_annex(path): """Returns whether at a given path there is information about an annex This includes actually present annexes, but also uninitialized ones, or even the presence of a remote annex branch. """ from os.path import exists if not exists(path): lgr.debug("No annex: test path {0} doesn't exist".format(path)) return False from datalad.support.gitrepo import GitRepo repo = GitRepo(path, create=False) return "origin/git-annex" in repo.git_get_remote_branches() \ or "git-annex" in repo.git_get_branches()
def test_install_dataladri(src, topurl, path): # make plain git repo ds_path = opj(src, 'ds') gr = GitRepo(ds_path, create=True) gr.add('test.txt') gr.commit('demo') Runner(cwd=gr.path)(['git', 'update-server-info']) # now install it somewhere else with patch('datalad.consts.DATASETS_TOPURL', topurl), \ swallow_logs(): ds = install(path, source='///ds') eq_(ds.path, path) ok_clean_git(path, annex=False) ok_file_has_content(opj(path, 'test.txt'), 'some')
def test_install_simple_local(src, path): origin = Dataset(path) # now install it somewhere else ds = install(path, source=src, description='mydummy') eq_(ds.path, path) ok_(ds.is_installed()) if not isinstance(origin.repo, AnnexRepo): # this means it is a GitRepo ok_(isinstance(origin.repo, GitRepo)) # stays plain Git repo ok_(isinstance(ds.repo, GitRepo)) ok_(not isinstance(ds.repo, AnnexRepo)) ok_(GitRepo.is_valid_repo(ds.path)) eq_(set(ds.repo.get_indexed_files()), {'test.dat', 'INFO.txt'}) ok_clean_git(path, annex=False) else: # must be an annex ok_(isinstance(ds.repo, AnnexRepo)) ok_(AnnexRepo.is_valid_repo(ds.path, allow_noninitialized=False)) eq_(set(ds.repo.get_indexed_files()), {'test.dat', 'INFO.txt', 'test-annex.dat'}) ok_clean_git(path, annex=True) # no content was installed: ok_(not ds.repo.file_has_content('test-annex.dat')) uuid_before = ds.repo.uuid eq_(ds.repo.get_description(), 'mydummy') # installing it again, shouldn't matter: res = install(path, source=src, result_xfm=None, return_type='list') assert_status('notneeded', res) ok_(ds.is_installed()) if isinstance(origin.repo, AnnexRepo): eq_(uuid_before, ds.repo.uuid)
def _discover_subdatasets_recursively( discovered, top, trace, recursion_limit): # this beast walks the directory tree from a give `top` directory # and discovers valid repos that are scattered around, regardless # of whether they are already subdatasets or not # `trace` must be a list that has at least one element (the base # dataset) if recursion_limit is not None and len(trace) > recursion_limit: return if not isdir(top): return if not op.islink(top) and GitRepo.is_valid_repo(top): if top in discovered: # this was found already, assume everything beneath it too return discovered[top] = dict( path=top, # and its content process_content=True, type='dataset', parentds=trace[-1]) # new node in the trace down trace = trace + [top] for path in listdir(top): path = opj(top, path) if not isdir(path): continue # next level down _discover_subdatasets_recursively( discovered, path, trace, recursion_limit)
def test_GitRepo_get_remote_url(orig_path, path): gr = GitRepo.clone(orig_path, path) gr.add_remote('github', 'git://github.com/datalad/testrepo--basic--r1') eq_(gr.get_remote_url('origin'), orig_path) eq_(gr.get_remote_url('github'), 'git://github.com/datalad/testrepo--basic--r1')
def test_submodule_deinit(path): from datalad.support.annexrepo import AnnexRepo top_repo = AnnexRepo(path, create=False) eq_({'subm 1', '2'}, {s.name for s in top_repo.get_submodules()}) # note: here init=True is ok, since we are using it just for testing with swallow_logs(new_level=logging.WARN) as cml: top_repo.update_submodule('subm 1', init=True) assert_in('Do not use update_submodule with init=True', cml.out) top_repo.update_submodule('2', init=True) # ok_(all([s.module_exists() for s in top_repo.get_submodules()])) # TODO: old assertion above if non-bare? (can't use "direct mode" in test_gitrepo) # Alternatively: New testrepo (plain git submodules) and have a dedicated # test for annexes in addition ok_(all([GitRepo.is_valid_repo(op.join(top_repo.path, s.path)) for s in top_repo.get_submodules()])) # modify submodule: with open(op.join(top_repo.path, 'subm 1', 'file_ut.dat'), "w") as f: f.write("some content") assert_raises(CommandError, top_repo.deinit_submodule, 'sub1') # using force should work: top_repo.deinit_submodule('subm 1', force=True) ok_(not top_repo.repo.submodule('subm 1').module_exists())
def _parse_git_submodules(ds, paths): """All known ones with some properties""" if not (ds.pathobj / ".gitmodules").exists(): # easy way out. if there is no .gitmodules file # we cannot have (functional) subdatasets return if paths: paths = [ p.relative_to(ds.pathobj) for p in paths if ds.pathobj == p or ds.pathobj in p.parents] if not paths: # we had path contraints, but none matched this dataset return for path, props in iteritems(ds.repo.get_content_info( paths=paths, ref=None, untracked='no', eval_file_type=False)): if props.get('type', None) != 'dataset': continue if ds.pathobj != ds.repo.pathobj: props['path'] = ds.pathobj / path.relative_to(ds.repo.pathobj) else: props['path'] = path if not path.exists() or not GitRepo.is_valid_repo(text_type(path)): props['state'] = 'absent' # TODO kill this after some time. We used to do custom things here # and gitshasum was called revision. Be nice and duplicate for a bit # wipe out when patience is gone props['revision'] = props['gitshasum'] yield props
def test_GitRepo_add(src, path): gr = GitRepo.clone(src, path) filename = get_most_obscure_supported_name() with open(op.join(path, filename), 'w') as f: f.write("File to add to git") added = gr.add(filename) eq_(added, {'success': True, 'file': filename}) assert_in(filename, gr.get_indexed_files(), "%s not successfully added to %s" % (filename, path)) # uncommitted: ok_(gr.dirty) filename = "another.txt" with open(op.join(path, filename), 'w') as f: f.write("Another file to add to git") # include committing: added2 = gr.add(filename) gr.commit(msg="Add two files.") eq_(added2, {'success': True, 'file': filename}) assert_in(filename, gr.get_indexed_files(), "%s not successfully added to %s" % (filename, path)) ok_clean_git(path)
def test_publish_simple(origin, src_path, dst_path): # prepare src source = install(src_path, source=origin, recursive=True) # forget we cloned it (provide no 'origin' anymore), which should lead to # setting tracking branch to target: source.repo.remove_remote("origin") # create plain git at target: target = GitRepo(dst_path, create=True) target.checkout("TMP", ["-b"]) source.repo.add_remote("target", dst_path) res = publish(dataset=source, to="target", result_xfm='datasets') eq_(res, [source]) ok_clean_git(source.repo, annex=None) ok_clean_git(target, annex=None) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) # don't fail when doing it again res = publish(dataset=source, to="target") # and nothing is pushed assert_result_count(res, 1, status='notneeded') ok_clean_git(source.repo, annex=None) ok_clean_git(target, annex=None) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) eq_(list(target.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex"))) # 'target/master' should be tracking branch at this point, so # try publishing without `to`: # MIH: Nope, we don't automatically add this anymore # some modification: with open(opj(src_path, 'test_mod_file'), "w") as f: f.write("Some additional stuff.") source.add(opj(src_path, 'test_mod_file'), to_git=True, message="Modified.") ok_clean_git(source.repo, annex=None) res = publish(dataset=source, to='target', result_xfm='datasets') eq_(res, [source]) ok_clean_git(dst_path, annex=None) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) # Since git-annex 6.20170220, post-receive hook gets triggered # which results in entry being added for that repo into uuid.log on remote # end since then finally git-annex senses that it needs to init that remote, # so it might have 1 more commit than local. # see https://github.com/datalad/datalad/issues/1319 ok_(set(source.repo.get_branch_commits("git-annex")).issubset( set(target.get_branch_commits("git-annex"))))
def _install_necessary_subdatasets( ds, path, reckless, refds_path, description=None): """Installs subdatasets of `ds`, that are necessary to obtain in order to have access to `path`. Gets the subdataset containing `path` regardless of whether or not it was already installed. While doing so, installs everything necessary in between the uppermost installed one and `path`. Note: `ds` itself has to be installed. Parameters ---------- ds: Dataset path: str reckless: bool """ # figuring out what dataset to start with, --contains limits --recursive # to visit only subdataset on the trajectory to the target path subds_trail = ds.subdatasets(contains=path, recursive=True) if not subds_trail: # there is not a single known subdataset (installed or not) # for this path -- job done return # otherwise we start with the one deepest down cur_subds = subds_trail[-1] while not GitRepo.is_valid_repo(cur_subds['path']): # install using helper that give some flexibility regarding where to # get the module from try: sd = _install_subds_from_flexible_source( Dataset(cur_subds['parentds']), relpath(cur_subds['path'], start=cur_subds['parentds']), cur_subds['gitmodule_url'], reckless, description=description) except Exception as e: # skip all of downstairs, if we didn't manage to install subdataset yield get_status_dict( 'install', path=cur_subds['path'], type='dataset', status='error', logger=lgr, refds=refds_path, message=("Installation of subdatasets %s failed with exception: %s", cur_subds['path'], exc_str(e))) return # report installation, whether it helped or not yield get_status_dict( 'install', ds=sd, status='ok', logger=lgr, refds=refds_path, message=("Installed subdataset in order to get %s", path)) # now check whether the just installed subds brought us any closer to # the target path subds_trail = sd.subdatasets(contains=path, recursive=False) if not subds_trail: # no (newly available) subdataset get's us any closer return # next round cur_subds = subds_trail[-1]
def test_GitRepo_remote_remove(orig_path, path): gr = GitRepo.clone(orig_path, path) gr.add_remote('github', 'git://github.com/datalad/testrepo--basic--r1') gr.remove_remote('github') out = gr.get_remotes() eq_(len(out), 1) assert_in('origin', out)
def test_GitRepo_remove(path): gr = GitRepo(path, create=True) gr.add('*') gr.commit("committing all the files") eq_(gr.remove('file'), ['file']) eq_(set(gr.remove('d', r=True, f=True)), {'d/f1', 'd/f2'}) eq_(set(gr.remove('*', r=True, f=True)), {'file2', 'd2/f1', 'd2/f2'})
def test_install_dataset_from_just_source(url, path): with chpwd(path, mkdir=True): ds = install(source=url) ok_startswith(ds.path, path) ok_(ds.is_installed()) ok_(GitRepo.is_valid_repo(ds.path)) ok_clean_git(ds.path, annex=None) assert_in('INFO.txt', ds.repo.get_indexed_files())
def test_clone_dataset_from_just_source(url, path): with chpwd(path, mkdir=True): ds = clone(url, result_xfm='datasets', return_type='item-or-list') ok_startswith(ds.path, path) ok_(ds.is_installed()) ok_(GitRepo.is_valid_repo(ds.path)) ok_clean_git(ds.path, annex=None) assert_in('INFO.txt', ds.repo.get_indexed_files())
def test_get_git_dir(path): # minimal, only missing coverage assert_raises(RuntimeError, GitRepo.get_git_dir, path) srcpath = opj(path, 'src') targetpath = opj(path, 'target') targetgitpath = opj(targetpath, '.git') os.makedirs(srcpath) os.makedirs(targetpath) if not on_windows: # with PY3 would also work with Windows 6+ os.symlink(srcpath, targetgitpath) eq_(srcpath, GitRepo.get_git_dir(targetpath)) # cleanup for following test unlink(targetgitpath) with open(targetgitpath, 'w') as f: f.write('gitdir: {}'.format(srcpath)) eq_(srcpath, GitRepo.get_git_dir(targetpath))
def test_publish_simple(origin, src_path, dst_path): # prepare src source = install(path=src_path, source=origin, recursive=True) # TODO: For now, circumnavigate the detached head issue. # Figure out, what to do. for subds in source.get_dataset_handles(recursive=True): AnnexRepo(opj(src_path, subds), init=True, create=True).git_checkout("master") # forget we cloned it (provide no 'origin' anymore), which should lead to # setting tracking branch to target: source.repo.git_remote_remove("origin") # create plain git at target: target = GitRepo(dst_path, create=True) target.git_checkout("TMP", "-b") source.repo.git_remote_add("target", dst_path) res = publish(dataset=source, dest="target") eq_(res, source) ok_clean_git(src_path, annex=False) ok_clean_git(dst_path, annex=False) eq_(list(target.git_get_branch_commits("master")), list(source.repo.git_get_branch_commits("master"))) # don't fail when doing it again res = publish(dataset=source, dest="target") eq_(res, source) ok_clean_git(src_path, annex=False) ok_clean_git(dst_path, annex=False) eq_(list(target.git_get_branch_commits("master")), list(source.repo.git_get_branch_commits("master"))) eq_(list(target.git_get_branch_commits("git-annex")), list(source.repo.git_get_branch_commits("git-annex"))) # 'target/master' should be tracking branch at this point, so # try publishing without `dest`: # some modification: with open(opj(src_path, 'test_mod_file'), "w") as f: f.write("Some additional stuff.") source.repo.git_add(opj(src_path, 'test_mod_file')) source.repo.git_commit("Modified.") ok_clean_git(src_path, annex=False) res = publish(dataset=source) eq_(res, source) ok_clean_git(dst_path, annex=False) eq_(list(target.git_get_branch_commits("master")), list(source.repo.git_get_branch_commits("master"))) eq_(list(target.git_get_branch_commits("git-annex")), list(source.repo.git_get_branch_commits("git-annex")))
def test_install_dataset_from_instance(src, dst): origin = Dataset(src) clone = install(source=origin, path=dst) assert_is_instance(clone, Dataset) ok_startswith(clone.path, dst) ok_(clone.is_installed()) ok_(GitRepo.is_valid_repo(clone.path)) ok_clean_git(clone.path, annex=None) assert_in('INFO.txt', clone.repo.get_indexed_files())
def test_install_plain_git(src, path): # make plain git repo gr = GitRepo(src, create=True) gr.git_add('test.txt') gr.git_commit('demo') # now install it somewhere else ds = install(path=path, source=src) # stays plain Git repo ok_(isinstance(ds.repo, GitRepo)) # now go back to original ds = Dataset(src) ok_(isinstance(ds.repo, GitRepo)) # installing a file must fail, as we decided not to perform magical upgrades # GitRepo -> AnnexRepo assert_raises(RuntimeError, ds.install, path='test2.txt', source=opj(src, 'test2.txt')) # but works when forced ifiles = ds.install(path='test2.txt', source=opj(src, 'test2.txt'), add_data_to_git=True) ok_startswith(ifiles, ds.path) ok_(ifiles.endswith('test2.txt')) ok_('test2.txt' in ds.repo.get_indexed_files())
def test_target_ssh_recursive(origin, src_path, target_path): # prepare src source = install(src_path, source=origin, recursive=True)[0] sub1 = Dataset(opj(src_path, "subm 1")) sub2 = Dataset(opj(src_path, "subm 2")) for flat in False, True: target_path_ = target_dir_tpl = target_path + "-" + str(flat) if flat: target_dir_tpl += "/%NAME" sep = '-' else: sep = os.path.sep if flat: # now that create_sibling also does fetch -- the related problem # so skipping this early raise SkipTest( 'TODO: Make publish work for flat datasets, it currently breaks' ) remote_name = 'remote-' + str(flat) # TODO: there is f.ckup with paths so assert_create fails ATM # And let's test without explicit dataset being provided with chpwd(source.path): #assert_create_sshwebserver( create_sibling(target=remote_name, sshurl="ssh://localhost" + target_path_, target_dir=target_dir_tpl, recursive=True, ui=True) # raise if git repos were not created for suffix in [sep + 'subm 1', sep + 'subm 2', '']: target_dir = opj(target_path_, basename(src_path) if flat else "").rstrip( os.path.sep) + suffix # raise if git repos were not created GitRepo(target_dir, create=False) _test_correct_publish(target_dir, rootds=not suffix, flat=flat) for repo in [source.repo, sub1.repo, sub2.repo]: assert_not_in("local_target", repo.get_remotes()) # now, push should work: publish(dataset=source, to=remote_name)
def func(arg, top, names): refpath, ignore, dirs = arg legit_names = [] for n in names: path = opj(top, n) if not isdir(path) or path in ignore: pass elif path != refpath and GitRepo.is_valid_repo(path): # mount point, keep but don't dive into dirs.append(path) else: legit_names.append(n) dirs.append(path) names[:] = legit_names
def test_ok_file_under_git_symlinks(path=None): # Test that works correctly under symlinked path orepo = GitRepo(path) orepo.add('ingit') orepo.commit('msg') orepo.add('staged') lpath = path + "-symlink" # will also be removed AFAIK by our tempfile handling Path(lpath).symlink_to(Path(path)) ok_symlink(lpath) ok_file_under_git(op.join(path, 'ingit')) ok_file_under_git(op.join(lpath, 'ingit')) ok_file_under_git(op.join(lpath, 'staged')) with assert_raises(AssertionError): ok_file_under_git(op.join(lpath, 'notingit')) with assert_raises(AssertionError): ok_file_under_git(op.join(lpath, 'nonexisting'))
def __call__(dataset=None, what=None, recursive=False, recursion_limit=None): ds = require_dataset(dataset, purpose='clean-up') res_kwargs = dict(action='clean', logger=lgr, refds=ds.path) for ap in AnnotatePaths.__call__( dataset=ds.path, recursive=recursive, recursion_limit=recursion_limit, action='clean', unavailable_path_status='impossible', nondataset_path_status='impossible', return_type='generator', on_failure='ignore'): if ap.get('status', None): yield ap continue if ap.get('type', None) != 'dataset': ap.update(status='impossible', message='only datasets can be cleaned') yield ap continue d = ap['path'] gitdir = GitRepo.get_git_dir(d) for dirpath, flag, msg, sing_pl in [ (ARCHIVES_TEMP_DIR, "cached-archives", "temporary archive", ("directory", "directories")), (ANNEX_TEMP_DIR, "annex-tmp", "temporary annex", ("file", "files")), (opj(gitdir, SEARCH_INDEX_DOTGITDIR), 'search-index', "metadata search index", ("file", "files")), ]: topdir = opj(d, dirpath) lgr.debug("Considering to clean %s:%s", d, dirpath) if not ((what is None) or (flag in what)): yield get_status_dict( path=topdir, status='notneeded', type='directory', **res_kwargs) continue paths = glob(opj(topdir, '*')) if not paths: yield get_status_dict( path=topdir, status='notneeded', type='directory', **res_kwargs) continue pl = len(paths) > 1 message = ("Removed %d %s %s: %s", len(paths), msg, sing_pl[int(pl)], ", ".join(sorted([x[len(topdir) + 1:] for x in paths]))) rmtree(topdir) yield get_status_dict( path=topdir, status='ok', type='dir', message=message, **res_kwargs)
def test_GitRepo_instance_from_clone(src, dst): gr = GitRepo.clone(src, dst) assert_is_instance(gr, GitRepo, "GitRepo was not created.") assert_is_instance(gr.repo, gitpy.Repo, "Failed to instantiate GitPython Repo object.") ok_(op.exists(op.join(dst, '.git'))) # do it again should raise GitCommandError since git will notice there's # already a git-repo at that path and therefore can't clone to `dst` # Note: Since GitRepo is now a WeakSingletonRepo, this is prevented from # happening atm. Disabling for now: # raise SkipTest("Disabled for RF: WeakSingletonRepo") with swallow_logs() as logs: assert_raises(GitCommandError, GitRepo.clone, src, dst)
def test_multiway_merge(path=None): # prepare ds with two siblings, but no tracking branch ds = Dataset(op.join(path, 'ds_orig')).create() r1 = AnnexRepo(path=op.join(path, 'ds_r1'), git_opts={'bare': True}) r2 = GitRepo(path=op.join(path, 'ds_r2'), git_opts={'bare': True}) ds.siblings(action='add', name='r1', url=r1.path) ds.siblings(action='add', name='r2', url=r2.path) assert_status('ok', ds.push(to='r1')) # push unlike publish reports on r2 not being an annex remote with a # 'notneeded' assert_status(('ok', 'notneeded'), ds.push(to='r2')) # just a fetch should be no issue assert_status('ok', ds.update()) # ATM we do not support multi-way merges assert_status('impossible', ds.update(merge=True, on_failure='ignore'))
def test_bare(path): # can we handle a bare repo? gr = GitRepo(path, create=True, bare=True) # do we read the correct local config? assert_in(gr.pathobj / 'config', gr.config._stores['git']['files']) # any sensible (and also our CI) test environment(s) should have this assert_in('user.name', gr.config) # not set something that wasn't there obscure_key = 'sec.reallyobscurename!@@.key' assert_not_in(obscure_key, gr.config) # to the local config, which is easily accessible gr.config.set(obscure_key, 'myvalue', where='local') assert_equal(gr.config.get(obscure_key), 'myvalue') # now make sure the config is where we think it is assert_in(obscure_key.split('.')[1], (gr.pathobj / 'config').read_text())
def test_crazy_cfg(path): cfg = ConfigManager(GitRepo(opj(path, 'ds'), create=True), source='dataset') assert_in('crazy.padry', cfg) # make sure crazy config is not read when in local mode cfg = ConfigManager(Dataset(opj(path, 'ds')), source='local') assert_not_in('crazy.padry', cfg) # it will make it in in 'any' mode though cfg = ConfigManager(Dataset(opj(path, 'ds')), source='any') assert_in('crazy.padry', cfg) # typos in the source mode arg will not have silent side-effects assert_raises(ValueError, ConfigManager, Dataset(opj(path, 'ds')), source='locale')
def knows_annex(path): """Returns whether at a given path there is information about an annex It is just a thin wrapper around GitRepo.is_with_annex() classmethod which also checks for `path` to exist first. This includes actually present annexes, but also uninitialized ones, or even the presence of a remote annex branch. """ from os.path import exists if not exists(path): lgr.debug("No annex: test path {0} doesn't exist".format(path)) return False from datalad.support.gitrepo import GitRepo return GitRepo(path, init=False, create=False).is_with_annex()
def test_normalize_path(git_path): cwd = os.getcwd() gr = GitRepo(git_path) # cwd is currently outside the repo, so any relative path # should be interpreted as relative to `annex_path` assert_raises(FileNotInRepositoryError, _normalize_path, gr.path, os.getcwd()) result = _normalize_path(gr.path, "testfile") assert_equal(result, "testfile", "_normalize_path() returned %s" % result) # result = _normalize_path(gr.path, os.path.join('.', 'testfile')) # assert_equal(result, "testfile", "_normalize_path() returned %s" % result) # # result = _normalize_path(gr.path, os.path.join('testdir', '..', 'testfile')) # assert_equal(result, "testfile", "_normalize_path() returned %s" % result) # Note: By now, normpath within normalize_paths() is disabled, therefore # disable these tests. result = _normalize_path(gr.path, os.path.join('testdir', 'testfile')) assert_equal(result, os.path.join("testdir", "testfile"), "_normalize_path() returned %s" % result) result = _normalize_path(gr.path, os.path.join(git_path, "testfile")) assert_equal(result, "testfile", "_normalize_path() returned %s" % result) # now we are inside, so relative paths are relative to cwd and have # to be converted to be relative to annex_path: os.chdir(os.path.join(git_path, 'd1', 'd2')) result = _normalize_path(gr.path, "testfile") assert_equal(result, os.path.join('d1', 'd2', 'testfile'), "_normalize_path() returned %s" % result) result = _normalize_path(gr.path, os.path.join('..', 'testfile')) assert_equal(result, os.path.join('d1', 'testfile'), "_normalize_path() returned %s" % result) assert_raises(FileNotInRepositoryError, _normalize_path, gr.path, os.path.join(git_path, '..', 'outside')) result = _normalize_path(gr.path, os.path.join(git_path, 'd1', 'testfile')) assert_equal(result, os.path.join('d1', 'testfile'), "_normalize_path() returned %s" % result) os.chdir(cwd)
def _get_new_vcs(ds, source, vcs): if source is None: # always come with annex when created from scratch lgr.info("Creating a new annex repo at %s", ds.path) vcs = AnnexRepo(ds.path, url=source, create=True) else: # when obtained from remote, try with plain Git lgr.info("Creating a new git repo at %s", ds.path) vcs = GitRepo(ds.path, url=source, create=True) if knows_annex(ds.path): # init annex when traces of a remote annex can be detected lgr.info("Initializing annex repo at %s", ds.path) vcs = AnnexRepo(ds.path, init=True) else: lgr.debug("New repository clone has no traces of an annex") return vcs
def test_get_default_title(path): repo = GitRepo(path) ds = Dataset(path) # There is no dataset initialized yet, so only path will be the title dirname = op.basename(path) eq_(_get_default_title(ds), dirname) # Initialize and get UUID ds.create(force=True) eq_(_get_default_title(ds), '{dirname}#{ds.id}'.format(**locals())) # Tag and get @version # cannot use ds.save since our tags are not annotated, # see https://github.com/datalad/datalad/issues/4139 ds.repo.tag("0.1", message="important version") eq_(_get_default_title(ds), '{dirname}#{ds.id}@0.1'.format(**locals()))
def test_repo_cache(path): ds = Dataset(path) # none by default eq_(ds.repo, None) # make Git repo manually git = GitRepo(path=path, create=True) repo = ds.repo # got one assert_false(repo is None) # stays that one assert_true(ds.repo is repo) # now turn into an annex annex = AnnexRepo(path=path, create=True) # repo instance must change assert_false(ds.repo is repo) assert_true(isinstance(ds.repo, AnnexRepo))
def test_hashable(path): path = ut.Path(path) tryme = set() # is it considered hashable at all tryme.add(Dataset(path / 'one')) eq_(len(tryme), 1) # do another one, same class different path tryme.add(Dataset(path / 'two')) eq_(len(tryme), 2) # test whether two different types of repo instances pointing # to the same repo on disk are considered different Dataset(path).create() tryme.add(GitRepo(path)) eq_(len(tryme), 3) tryme.add(AnnexRepo(path)) eq_(len(tryme), 4)
def test_ls_repos(toppath): # smoke test pretty much GitRepo(toppath + '1', create=True) AnnexRepo(toppath + '2', create=True) repos = glob(toppath + '*') for args in (repos, repos + ["/some/bogus/file"]): for recursive in [False, True]: # in both cases shouldn't fail with swallow_outputs() as cmo: ls(args, recursive=recursive) assert_equal(len(cmo.out.rstrip().split('\n')), len(args)) assert_in('[annex]', cmo.out) assert_in('[git]', cmo.out) assert_in('master', cmo.out) if "bogus" in args: assert_in('unknown', cmo.out)
def test_GitRepo_get_indexed_files(src, path): gr = GitRepo.clone(src, path) idx_list = gr.get_indexed_files() runner = Runner() out = runner(['git', 'ls-files'], cwd=path) out_list = list(filter(bool, out[0].split('\n'))) for item in idx_list: assert_in( item, out_list, "%s not found in output of git ls-files in %s" % (item, path)) for item in out_list: assert_in( item, idx_list, "%s not found in output of get_indexed_files in %s" % (item, path))
def _adj2subtrees(base, adj, subs): # given a set of parent-child mapping, compute a mapping of each parent # to all its (grand)children of any depth level subtrees = dict(adj) subs = set(subs) # from bottom up for ds in sorted(adj, reverse=True): subtree = [] for sub in subtrees[ds]: subtree.append(sub) subtree.extend(subtrees.get(sub, [])) subtrees[ds] = subtree # give each leaf dataset an entry too for sub in subs: if sub not in subtrees and GitRepo.is_valid_repo(sub): subtrees[sub] = [] return subtrees
def test_get_tracking_branch(o_path, c_path): clone = GitRepo.clone(o_path, c_path) # Note, that the default branch might differ even if it is always 'master'. # For direct mode annex repositories it would then be "annex/direct/master" # for example. Therefore use whatever branch is checked out by default: master_branch = clone.get_active_branch() ok_(master_branch) eq_(('origin', 'refs/heads/' + master_branch), clone.get_tracking_branch()) clone.checkout('new_branch', ['-b']) eq_((None, None), clone.get_tracking_branch()) eq_(('origin', 'refs/heads/' + master_branch), clone.get_tracking_branch(master_branch))
def test_GitRepo_count_objects(repo_path): repo = GitRepo(repo_path, create=True) # test if dictionary returned eq_(isinstance(repo.count_objects, dict), True) # test if dictionary contains keys and values we expect empty_count = { 'count': 0, 'garbage': 0, 'in-pack': 0, 'packs': 0, 'prune-packable': 0, 'size': 0, 'size-garbage': 0, 'size-pack': 0 } eq_(empty_count, repo.count_objects)
def test_clone_dataladri(src, topurl, path): # make plain git repo ds_path = Path(src) / 'ds' gr = GitRepo(ds_path, create=True) gr.add('test.txt') gr.commit('demo') Runner(cwd=gr.path).run(['git', 'update-server-info']) # now install it somewhere else with patch('datalad.consts.DATASETS_TOPURL', topurl): ds = clone('///ds', path, result_xfm='datasets', return_type='item-or-list') eq_(ds.path, path) assert_repo_status(path, annex=False) ok_file_has_content(ds.pathobj / 'test.txt', 'some')
def test_GitRepo_get_files(url, path): gr = GitRepo.clone(url, path) # get the expected files via os for comparison: os_files = set() for (dirpath, dirnames, filenames) in os.walk(path): rel_dir = os.path.relpath(dirpath, start=path) if rel_dir.startswith(".git"): continue for file_ in filenames: file_path = os.path.normpath(op.join(rel_dir, file_)) os_files.add(file_path) # get the files via GitRepo: local_files = set(gr.get_files()) remote_files = set(gr.get_files(branch="origin/master")) eq_(local_files, set(gr.get_indexed_files())) eq_(local_files, remote_files) eq_(local_files, os_files) # create a different branch: gr.checkout('new_branch', ['-b']) filename = 'another_file.dat' with open(op.join(path, filename), 'w') as f: f.write("something") gr.add(filename) gr.commit("Added.") # now get the files again: local_files = set(gr.get_files()) eq_(local_files, os_files.union({filename})) # retrieve remote branch again, which should not have changed: remote_files = set(gr.get_files(branch="origin/master")) eq_(remote_files, os_files) eq_(set([filename]), local_files.difference(remote_files)) # switch back and query non-active branch: gr.checkout('master') local_files = set(gr.get_files()) branch_files = set(gr.get_files(branch="new_branch")) eq_(set([filename]), branch_files.difference(local_files))
def test_subdataset_add_file_end_to_end(file_name): test_path = "d_1/d_1.0/f_1.0.0" json.dump({ **metadata_template, **additional_keys_template, "type": "file", "path": test_path }, open(file_name, "tw")) with tempfile.TemporaryDirectory() as temp_dir: git_repo = GitRepo(temp_dir) res = meta_add(metadata=file_name, metadata_store=git_repo.path) assert_result_count(res, 1) assert_result_count(res, 1, type='file') assert_result_count(res, 0, type='dataset') # Verify dataset level metadata was added root_dataset_id = UUID(additional_keys_template["root_dataset_id"]) root_dataset_version = additional_keys_template["root_dataset_version"] dataset_tree_path = MetadataPath( additional_keys_template["dataset_path"]) tree_version_list, uuid_set, mrr = _get_top_nodes( git_repo, root_dataset_id, root_dataset_version) _, dataset_tree = tree_version_list.get_dataset_tree( root_dataset_version) mrr = dataset_tree.get_metadata_root_record(dataset_tree_path) eq_(mrr.dataset_identifier, UUID(metadata_template["dataset_id"])) file_tree = mrr.get_file_tree() assert_is_not_none(file_tree) assert_true(test_path in file_tree) metadata = file_tree.get_metadata(MetadataPath(test_path)) metadata_content = _get_metadata_content(metadata) eq_(metadata_content, metadata_template["extracted_metadata"])
def test_get_refcommit(path): # # dataset without a single commit ds = Dataset(GitRepo(path, create=True).path) eq_(get_refcommit(ds), None) # we get a commit via create ds.create(force=True) # still not metadata-relevant changes eq_(get_refcommit(ds), None) # place irrelevant file and commit create_tree(ds.path, {'.datalad': {'ignored': 'content'}}) ds.save() # no change to the previous run, irrelevant changes are ignored eq_(get_refcommit(ds), None) # a real change create_tree(ds.path, {'real': 'othercontent'}) ds.save() real_change = get_refcommit(ds) eq_(real_change, ds.repo.get_hexsha('HEAD')) # another irrelevant change, no change in refcommit create_tree(ds.path, {'.datalad': {'ignored2': 'morecontent'}}) ds.save() eq_(get_refcommit(ds), real_change) # we can pick up deletions os.unlink(text_type(ds.pathobj / 'real')) ds.save() eq_(get_refcommit(ds), ds.repo.get_hexsha('HEAD')) # subdataset addition subds = ds.create('sub') subds_addition = get_refcommit(ds) eq_(subds_addition, ds.repo.get_hexsha('HEAD')) # another irrelevant change, no change in refcommit, despite subds presence create_tree(ds.path, {'.datalad': {'ignored3': 'evenmorecontent'}}) ds.save() eq_(get_refcommit(ds), subds_addition) # subdataset modification is a relevant change create_tree(subds.path, {'real': 'real'}) ds.save(recursive=True) eq_(get_refcommit(ds), ds.repo.get_hexsha('HEAD')) # and subdataset removal ds.remove('sub', check=False) assert_repo_status(ds.path) eq_(get_refcommit(ds), ds.repo.get_hexsha('HEAD'))
def require_dataset(dataset, check_installed=True, purpose=None): """Helper function to resolve a dataset. This function tries to resolve a dataset given an input argument, or based on the process' working directory, if `None` is given. Parameters ---------- dataset : None or path or Dataset Some value identifying a dataset or `None`. In the latter case a dataset will be searched based on the process working directory. check_installed : bool, optional If True, an optional check whether the resolved dataset is properly installed will be performed. purpose : str, optional This string will be inserted in error messages to make them more informative. The pattern is "... dataset for <STRING>". Returns ------- Dataset Or raises an exception (InsufficientArgumentsError). """ if dataset is not None and not isinstance(dataset, Dataset): dataset = Dataset(dataset) if dataset is None: # possible scenario of cmdline calls dspath = GitRepo.get_toppath(getpwd()) if not dspath: raise NoDatasetArgumentFound("No dataset found") dataset = Dataset(dspath) assert (dataset is not None) lgr.debug("Resolved dataset{0}: {1}".format( ' for {}'.format(purpose) if purpose else '', dataset)) if check_installed and not dataset.is_installed(): raise ValueError("No installed dataset found at " "{0}.".format(dataset.path)) return dataset
def download_dataset(repo=None, remote_path=None, local_folder=None, update_if_exists=False, unlock=False): assert HAVE_DATALAD, 'You need to install datalad' if repo is None: # print('Use gin NeuralEnsemble/ephy_testing_data') repo = 'https://gin.g-node.org/NeuralEnsemble/ephy_testing_data' if local_folder is None: base_local_folder = get_global_dataset_folder() base_local_folder.mkdir(exist_ok=True) # if not is_set_global_dataset_folder(): # print(f'Local folder is {base_local_folder}, Use set_global_dataset_folder() to set it globally') local_folder = base_local_folder / repo.split('/')[-1] if local_folder.exists() and GitRepo.is_valid_repo(local_folder): dataset = datalad.api.Dataset(path=local_folder) # make sure git repo is in clean state repo = dataset.repo if update_if_exists: repo.call_git(['checkout', '--force', 'master']) dataset.update(merge=True) else: dataset = datalad.api.install(path=local_folder, source=repo) if remote_path is None: print('Bad boy: you have to provide "remote_path"') return local_path = local_folder / remote_path dataset.get(remote_path) # unlocking is necessary for binding volume to containers if unlock: dataset.unlock(remote_path, recursive=True) return local_path
def test_clone_dataladri(src, topurl, path): # make plain git repo ds_path = opj(src, 'ds') gr = GitRepo(ds_path, create=True) gr.add('test.txt') gr.commit('demo') Runner(cwd=gr.path)(['git', 'update-server-info']) # now install it somewhere else with patch('datalad.support.network.DATASETS_TOPURL', topurl): ds = clone('///ds', path, result_xfm='datasets', return_type='item-or-list') eq_(ds.path, path) ok_clean_git(path, annex=False) ok_file_has_content(opj(path, 'test.txt'), 'some')
def test_clone_simple_local(src, path): origin = Dataset(path) # now install it somewhere else ds = clone(src, path, description='mydummy', result_xfm='datasets', return_type='item-or-list') eq_(ds.path, path) ok_(ds.is_installed()) if not isinstance(origin.repo, AnnexRepo): # this means it is a GitRepo ok_(isinstance(origin.repo, GitRepo)) # stays plain Git repo ok_(isinstance(ds.repo, GitRepo)) ok_(not isinstance(ds.repo, AnnexRepo)) ok_(GitRepo.is_valid_repo(ds.path)) eq_(set(ds.repo.get_indexed_files()), {'test.dat', 'INFO.txt'}) ok_clean_git(path, annex=False) else: # must be an annex ok_(isinstance(ds.repo, AnnexRepo)) ok_(AnnexRepo.is_valid_repo(ds.path, allow_noninitialized=False)) eq_(set(ds.repo.get_indexed_files()), {'test.dat', 'INFO.txt', 'test-annex.dat'}) ok_clean_git(path, annex=True) # no content was installed: ok_(not ds.repo.file_has_content('test-annex.dat')) uuid_before = ds.repo.uuid eq_(ds.repo.get_description(), 'mydummy') # installing it again, shouldn't matter: res = clone(src, path) assert_result_values_equal(res, 'source_url', [src]) assert_status('notneeded', res) assert_message("dataset %s was already cloned from '%s'", res) ok_(ds.is_installed()) if isinstance(origin.repo, AnnexRepo): eq_(uuid_before, ds.repo.uuid)