def test_install_into_dataset(source=None, top_path=None): src_ds = Dataset(source).create(result_renderer='disabled', force=True) src_ds.save(['INFO.txt', 'test.dat'], to_git=True) src_ds.save('test-annex.dat', to_git=False) ds = create(top_path) assert_repo_status(ds.path) subds = ds.install("sub", source=source) ok_(isdir(opj(subds.path, '.git'))) ok_(subds.is_installed()) assert_in('sub', ds.subdatasets(result_xfm='relpaths')) # sub is clean: assert_repo_status(subds.path, annex=None) # top is too: assert_repo_status(ds.path, annex=None) ds.save(message='addsub') # now it is: assert_repo_status(ds.path, annex=None) # but we could also save while installing and there should be no side-effect # of saving any other changes if we state to not auto-save changes # Create a dummy change create_tree(ds.path, {'dummy.txt': 'buga'}) assert_repo_status(ds.path, untracked=['dummy.txt']) subds_ = ds.install("sub2", source=source) eq_(subds_.path, opj(ds.path, "sub2")) # for paranoid yoh ;) assert_repo_status(ds.path, untracked=['dummy.txt']) # and we should achieve the same behavior if we create a dataset # and then decide to add it create(_path_(top_path, 'sub3')) assert_repo_status(ds.path, untracked=['dummy.txt', 'sub3/']) ds.save('sub3') assert_repo_status(ds.path, untracked=['dummy.txt'])
def test_sibling_enable_sameas(repo=None, clone_path=None): ds = Dataset(repo.path) create_tree(ds.path, {"f0": "0"}) ds.save(path="f0") ds.push(["f0"], to="r_dir") ds.repo.drop(["f0"]) ds_cloned = clone(ds.path, clone_path) assert_false(ds_cloned.repo.file_has_content("f0")) # does not work without a name res = ds_cloned.siblings( action="enable", result_renderer='disabled', on_failure='ignore', ) assert_in_results(res, status='error', message='require `name` of sibling to enable') # does not work with the wrong name res = ds_cloned.siblings( action="enable", name='wrong', result_renderer='disabled', on_failure='ignore', ) assert_in_results(res, status='impossible', message=("cannot enable sibling '%s', not known", 'wrong')) # works with the right name res = ds_cloned.siblings(action="enable", name="r_rsync") assert_status("ok", res) ds_cloned.get(path=["f0"]) ok_(ds_cloned.repo.file_has_content("f0"))
def test_add_recursive(path=None): # make simple hierarchy parent = Dataset(path).create() assert_repo_status(parent.path) sub1 = parent.create(op.join('down', 'sub1')) assert_repo_status(parent.path) sub2 = parent.create('sub2') # next one make the parent dirty subsub = sub2.create('subsub') assert_repo_status(parent.path, modified=['sub2']) res = parent.save() assert_repo_status(parent.path) # now add content deep in the hierarchy create_tree(subsub.path, {'new': 'empty'}) assert_repo_status(parent.path, modified=['sub2']) # recursive add should not even touch sub1, because # it knows that it is clean res = parent.save(recursive=True, jobs=5) # the key action is done assert_result_count(res, 1, path=op.join(subsub.path, 'new'), action='add', status='ok') # saved all the way up assert_result_count(res, 3, action='save', status='ok') assert_repo_status(parent.path)
def test_symlinked_relpath(path=None): # initially ran into on OSX https://github.com/datalad/datalad/issues/2406 os.makedirs(op.join(path, "origin")) dspath = op.join(path, "linked") os.symlink('origin', dspath) ds = Dataset(dspath).create() create_tree( dspath, { "mike1": 'mike1', # will be added from topdir "later": "later", # later from within subdir "d": { "mike2": 'mike2', # to be added within subdir } }) # in the root of ds with chpwd(dspath): ds.repo.add("mike1", git=True) ds.save(message="committing", path="./mike1") # Let's also do in subdirectory as CWD, check that relative path # given to a plain command (not dataset method) are treated as # relative to CWD with chpwd(op.join(dspath, 'd')): save(dataset=ds.path, message="committing", path="mike2") later = op.join(op.pardir, "later") ds.repo.add(later, git=True) save(dataset=ds.path, message="committing", path=later) assert_repo_status(dspath)
def test_subdataset_save(path=None): parent = Dataset(path).create() sub = parent.create('sub') assert_repo_status(parent.path) create_tree(parent.path, {"untracked": 'ignore', 'sub': {"new": "wanted"}}) sub.save('new') # defined state: one untracked, modified (but clean in itself) subdataset assert_repo_status(sub.path) assert_repo_status(parent.path, untracked=['untracked'], modified=['sub']) # `save sub` does not save the parent!! with chpwd(parent.path): assert_status('notneeded', save(dataset=sub.path)) assert_repo_status(parent.path, untracked=['untracked'], modified=['sub']) # `save -u .` saves the state change in the subdataset, # but leaves any untracked content alone with chpwd(parent.path): assert_status('ok', parent.save(updated=True)) assert_repo_status(parent.path, untracked=['untracked']) # get back to the original modified state and check that -S behaves in # exactly the same way create_tree(parent.path, {'sub': {"new2": "wanted2"}}) sub.save('new2') assert_repo_status(parent.path, untracked=['untracked'], modified=['sub'])
def test_invalid_call(path=None): with chpwd(path): # no dataset, no luck assert_raises(NoDatasetFound, run, 'doesntmatter') # dirty dataset ds = Dataset(path).create() create_tree(ds.path, {'this': 'dirty'}) assert_status('impossible', run('doesntmatter', on_failure='ignore'))
def test_newthings_coming_down(originpath=None, destpath=None): origin = GitRepo(originpath, create=True) create_tree(originpath, {'load.dat': 'heavy'}) Dataset(originpath).save('load.dat') ds = install(source=originpath, path=destpath, result_xfm='datasets', return_type='item-or-list') assert_is_instance(ds.repo, GitRepo) assert_in(DEFAULT_REMOTE, ds.repo.get_remotes()) # turn origin into an annex origin = AnnexRepo(originpath, create=True) # clone doesn't know yet assert_false(knows_annex(ds.path)) # but after an update it should # no merge, only one sibling, no parameters should be specific enough assert_result_count(ds.update(), 1, status='ok', type='dataset') assert (knows_annex(ds.path)) # no branches appeared eq_(ds.repo.get_branches(), [DEFAULT_BRANCH]) # now merge, and get an annex assert_result_count(ds.update(merge=True), 1, action='update', status='ok', type='dataset') assert_in('git-annex', ds.repo.get_branches()) assert_is_instance(ds.repo, AnnexRepo) # should be fully functional testfname = opj(ds.path, 'load.dat') assert_false(ds.repo.file_has_content(testfname)) ds.get('.') ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy') # check that a new tag comes down origin.tag('first!') assert_result_count(ds.update(), 1, status='ok', type='dataset') eq_(ds.repo.get_tags(output='name')[0], 'first!') # and now we destroy the remote annex origin.call_git(['config', '--remove-section', 'annex']) rmtree(opj(origin.path, '.git', 'annex'), chmod_files=True) origin.call_git(['branch', '-D', 'git-annex']) origin = GitRepo(originpath) assert_false(knows_annex(originpath)) # and update the local clone # for now this should simply not fail (see gh-793), later might be enhanced to a # graceful downgrade before_branches = ds.repo.get_branches() ok_(any("git-annex" in b for b in ds.repo.get_remote_branches())) assert_result_count(ds.update(), 1, status='ok', type='dataset') eq_(before_branches, ds.repo.get_branches()) # annex branch got pruned assert_false(any("git-annex" in b for b in ds.repo.get_remote_branches())) # check that a new tag comes down even if repo types mismatch origin.tag('second!') assert_result_count(ds.update(), 1, status='ok', type='dataset') eq_(ds.repo.get_tags(output='name')[-1], 'second!')
def test_no_interaction_with_untracked_content(path=None): # extracted from what was a metadata test originally ds = Dataset(op.join(path, 'origin')).create(force=True) create_tree(ds.path, {'sub': {'subsub': {'dat': 'lots of data'}}}) subds = ds.create('sub', force=True) subds.remove(op.join('.datalad', 'config')) nok_((subds.pathobj / '.datalad' / 'config').exists()) # this will only work, if `remove` didn't do anything stupid and # caused all content to be saved subds.create('subsub', force=True)
def test_windows_incompatible_names(path=None): ds = Dataset(path).create() create_tree( path, { 'imgood': 'Look what a nice name I have', 'illegal:character.txt': 'strange choice of name', 'spaceending ': 'who does these things?', 'lookmumadot.': 'why would you do this?', 'COM1.txt': 'I am a serial port', 'dirs with spaces': { 'seriously?': 'you are stupid', 'why somuch?wrongstuff.': "I gave up" }, }) ds.repo.config.set('datalad.save.windows-compat-warning', 'error') ds.save('.datalad/config') res = ds.save(on_failure='ignore') # check that none of the 6 problematic files was saved, but the good one was assert_result_count(res, 6, status='impossible', action='save') assert_result_count(res, 1, status='ok', action='save') # check that the warning is emitted ds.repo.config.set('datalad.save.windows-compat-warning', 'warning') ds.save('.datalad/config') with swallow_logs(new_level=logging.WARN) as cml: ds.save() cml.assert_logged( "Some elements of your dataset are not compatible with Windows " "systems. Disable this check by changing " "datalad.save.windows-compat-warning or consider renaming the " "following elements:") assert_in("Elements using a reserved filename:", cml.out) assert_in("Elements with illegal characters:", cml.out) assert_in("Elements ending with a dot:", cml.out) assert_in("Elements ending with a space:", cml.out) # check that a setting of 'none' really does nothing ds.repo.config.set('datalad.save.windows-compat-warning', 'none') ds.save('.datalad/config') create_tree( path, { 'more illegal:characters?.py': 'My arch nemesis uses Windows and I will' 'destroy them! Muahahaha' }) with swallow_logs(new_level=logging.WARN) as cml: res = ds.save() # we shouldn't see warnings assert_not_in( "Some elements of your dataset are not compatible with Windows " "systems. Disable this check by changing " "datalad.save.windows-compat-warning or consider renaming the " "following elements:", cml.out) # make sure the file is saved successfully assert_result_count(res, 1, status='ok', action='save')
def test_external_versions_rogue_module(topd=None): ev = ExternalVersions() # if module throws some other non-ImportError exception upon import # we must not crash, but issue a warning modname = 'verycustomrogue__' create_tree(topd, {modname + '.py': 'raise Exception("pickaboo")'}) with patch('sys.path', [topd]), \ swallow_logs(new_level=logging.WARNING) as cml: assert ev[modname] is None assert_true(ev.dumps(indent=True).endswith(linesep)) assert_in('pickaboo', cml.out)
def test_save_message_file(path=None): ds = Dataset(path).create() with assert_raises(ValueError): ds.save("blah", message="me", message_file="and me") create_tree(path, {"foo": "x", "msg": "add foo"}) ds.repo.add("foo") ds.save(message_file=op.join(ds.path, "msg")) # ATTN: Consider corresponding branch so that this check works when we're # on an adjusted branch too (e.g., when this test is executed under # Windows). eq_(ds.repo.format_commit("%s", DEFAULT_BRANCH), "add foo")
def test_preserve_attrs(src=None, dest=None): create_tree(src, {"src": {"foo": {"bar": "This is test text."}}}) os.utime(opj(src, "src", "foo", "bar"), (1234567890, 1234567890)) _RunnerAdapter().put(opj(src, "src"), dest, recursive=True, preserve_attrs=True) s = os.stat(opj(dest, "src", "foo", "bar")) assert s.st_atime == 1234567890 assert s.st_mtime == 1234567890 with open(opj(dest, "src", "foo", "bar")) as fp: assert fp.read() == "This is test text."
def setup(self): repo_path = tempfile.mkdtemp(**get_tempfile_kwargs(prefix="tree")) create_tree(repo_path, {'1.tar': { 'file.txt': 'load', '1.dat': 'load2' }}) self.ds = ds = Dataset(repo_path) ds.create(force=True) self.annex = ds.repo # Let's add first archive to the annex so we could test ds.save('1.tar', message="added 1.tar")
def test_save_partial_commit_shrinking_annex(path=None): # This is a variation on the test above. The main difference is that there # are other staged changes in addition to the unlocked filed. ds = create(path, force=True) ds.save() assert_repo_status(ds.path) ds.unlock(path="foo") create_tree(ds.path, tree={"foo": "a", "staged": ""}, remove_existing=True) # Even without this staged change, a plain 'git commit -- foo' would fail # with git-annex's partial index error, but save (or more specifically # GitRepo.save_) drops the pathspec if there are no staged changes. ds.repo.add("staged", git=True) ds.save(path="foo") assert_repo_status(ds.path, added=["staged"])
def test_update_git_smoke(src_path=None, dst_path=None): # Apparently was just failing on git repos for basic lack of coverage, hence this quick test ds = Dataset(src_path).create(annex=False) target = install(dst_path, source=src_path, result_xfm='datasets', return_type='item-or-list') create_tree(ds.path, {'file.dat': '123'}) ds.save('file.dat') assert_result_count(target.update(recursive=True, merge=True), 1, action='update', status='ok', type='dataset') ok_file_has_content(opj(target.path, 'file.dat'), '123')
def test_download_url_archive(toppath=None, topurl=None, path=None): ds = Dataset(path).create() ds.download_url([topurl + "archive.tar.gz"], archive=True) ok_(ds.repo.file_has_content(opj("archive", "file1.txt"))) assert_not_in(opj(ds.path, "archive.tar.gz"), ds.repo.format_commit("%B")) # we should yield an impossible from add archive content when there is # untracked content (gh-#6170) create_tree(ds.path, {'this': 'dirty'}) assert_in_results( ds.download_url([topurl + "archive.tar.gz"], archive=True, on_failure='ignore'), status='impossible', action='add-archive-content', message='clean dataset required. Use `datalad status` to inspect ' 'unsaved changes')
def check_renamed_file(recursive, annex, path): ds = Dataset(path).create(annex=annex) create_tree(path, {'old': ''}) ds.repo.add('old') ds.repo.call_git(["mv"], files=["old", "new"]) ds.save(recursive=recursive) assert_repo_status(path) # https://github.com/datalad/datalad/issues/6558 new = (ds.pathobj / "new") new.unlink() new.mkdir() (new / "file").touch() ds.repo.call_git(["add"], files=[str(new / "file")]) ds.save(recursive=recursive) assert_repo_status(path)
def test_override_existing_under_git(self): create_tree(self.ds.path, {'1.dat': 'load2'}) self.ds.save('1.dat', to_git=True, message='added to git') self.ds.add_archive_content( '1.tar', strip_leading_dirs=True, ) # and we did not bother adding it to annex (for now) -- just skipped # since we have it and it is the same ok_file_under_git(self.ds.path, '1.dat', annexed=False) # but if we say 'overwrite' -- we would remove and replace self.ds.add_archive_content('1.tar', strip_leading_dirs=True, delete=True, existing='overwrite') ok_file_under_git(self.ds.path, '1.dat', annexed=True)
def test_add_delete_after_and_drop_subdir(self=None): os.mkdir(opj(self.annex.path, 'subdir')) mv_out = self.annex.call_git(['mv', '1.tar', 'subdir']) self.annex.commit("moved into subdir") with chpwd(self.annex.path): # was failing since deleting without considering if tarball # was extracted in that tarball directory commits_prior_master = list(self.annex.get_branch_commits_()) commits_prior = list(self.annex.get_branch_commits_('git-annex')) add_out = self.ds.add_archive_content(opj('subdir', '1.tar'), delete_after=True, drop_after=True) assert_repo_status(self.annex.path) if not self.annex.is_managed_branch(): # whole counting logic here is ignorant of adjusted branches commits_after_master = list(self.annex.get_branch_commits_()) commits_after = list( self.annex.get_branch_commits_('git-annex')) # There should be a single commit for all additions +1 to # initiate datalad-archives gh-1258. If faking dates, # there should be another +1 because annex.alwayscommit # isn't set to false. assert_equal( len(commits_after), len(commits_prior) + 2 + self.annex.fake_dates_enabled) assert_equal(len(commits_after_master), len(commits_prior_master)) # there should be no .datalad temporary files hanging around self.assert_no_trash_left_behind() # and if we add some untracked file, redo, there should be no changes # to master and file should remain not committed create_tree(self.annex.path, {'dummy.txt': '123'}) assert_true(self.annex.dirty) # untracked file add_out = add_archive_content(opj('subdir', '1.tar'), delete_after=True, drop_after=True, allow_dirty=True) assert_repo_status(self.annex.path, untracked=['dummy.txt']) assert_equal(len(list(self.annex.get_branch_commits_())), len(commits_prior_master)) # there should be no .datalad temporary files hanging around self.assert_no_trash_left_behind()
def test_get_invalid_call(path=None, file_outside=None): # no argument at all: assert_raises(InsufficientArgumentsError, get, None) assert_raises(InsufficientArgumentsError, get, []) # invalid dataset: assert_raises(ValueError, get, None, dataset=path, on_failure='ignore') # have a plain git: ds = Dataset(path) ds.create(annex=False) with open(opj(path, "some.txt"), "w") as f: f.write("whatever") ds.save("some.txt", to_git=True, message="Initial commit.") # make it an annex (remove indicator file that create has placed # in the dataset to make it possible): (ds.pathobj / '.noannex').unlink() AnnexRepo(path, init=True, create=True) # call get again on a file in git: result = ds.get("some.txt") assert_status('notneeded', result) # invalid source: # yoh: but now we would need to add it to annex since clever code first # checks what needs to be fetched at all create_tree(path, {'annexed.dat': 'some'}) ds.save("annexed.dat") ds.repo.drop("annexed.dat", options=['--force']) with assert_raises(RemoteNotAvailableError) as cme: ds.get("annexed.dat", source='MysteriousRemote') eq_("MysteriousRemote", cme.value.remote) res = ds.get("NotExistingFile.txt", on_failure='ignore') assert_status('impossible', res) assert_message("path does not exist", res) # path outside repo errors as with most other commands: res = ds.get(file_outside, on_failure='ignore', result_renderer='default') assert_in_results(res, status='error', message=('path not associated with dataset %s', ds))
def test_no_annex(path=None): ds = create(path) assert_repo_status(ds.path) create_tree( ds.path, { 'code': { 'inannex': 'content', 'notinannex': 'othercontent' }, 'README': 'please' }) # add inannex pre configuration ds.save(opj('code', 'inannex')) no_annex(pattern=['code/**', 'README'], dataset=ds.path) inannex = (ds.pathobj / 'code' / 'inannex') # add inannex and README post configuration ds.save([opj('code', 'notinannex'), 'README']) repo = ds.repo try: assert_repo_status(ds.path) except AssertionError: # If on an adjusted branch and notinannex's mtime is as recent or newer # than .git/index's, the clean filter runs on it when save() is called. # This leads to a racy failure until after git-annex's 424bef6b6 # (smudge: check for known annexed inodes before checking # annex.largefiles, 2021-05-03). # # https://git-annex.branchable.com/forum/one-off_unlocked_annex_files_that_go_against_large/ if repo.is_managed_branch() and repo.git_annex_version <= "8.20210428": assert_repo_status(ds.path, modified=[inannex]) raise SkipTest("Known bug fixed in git-annex") raise # one is annex'ed, the other is not, despite no change in add call # importantly, also .gitattribute is not annexed eq_([opj('code', 'inannex')], [str(Path(p)) for p in repo.get_annexed_files()])
def setup_class(cls): mktmp_kws = get_tempfile_kwargs() path = tempfile.mkdtemp(**mktmp_kws) http_root = op.join(path, "srv") create_tree( http_root, { "udir": { x + ".dat" + ver: x + " content" for x in "abcd" for ver in ["", ".v1"] } }) cls._hpath = HTTPPath(http_root) cls._hpath.start() cls.url = cls._hpath.url cls.data = [{ "url": cls.url + "udir/a.dat", "name": "a", "subdir": "foo", "md5sum": "3fb7c40c70b0ed19da713bd69ee12014", "size": "9" }, { "url": cls.url + "udir/b.dat", "name": "b", "subdir": "bar", "md5sum": "", "size": "" }, { "url": cls.url + "udir/c.dat", "name": "c", "subdir": "foo", "md5sum": "9b72648021b70b8c522642e4490d7ac3", "size": "9" }] cls.json_file = op.join(path, "test_addurls.json") with open(cls.json_file, "w") as jfh: json.dump(cls.data, jfh) cls.temp_dir = path
def check_exists_interactive(use_ssh, path): origin = Dataset(opj(path, "origin")).create() sibling_path = opj(path, "sibling") # Initiate sibling directory with "stuff" create_tree(sibling_path, {'stuff': ''}) if use_ssh: sshurl = 'datalad-test:' + sibling_path else: sshurl = sibling_path # Should fail with assert_raises(RuntimeError): origin.create_sibling(sshurl) # Since first response is "no" - we should fail here again: with assert_raises(RuntimeError): origin.create_sibling(sshurl, existing='replace') # and there should be no initiated repository assert not Dataset(sibling_path).is_installed() # But we would succeed on the 2nd try, since answer will be yes origin.create_sibling(sshurl, existing='replace') assert Dataset(sibling_path).is_installed()
def test_reobtain_data(originpath=None, destpath=None): origin = Dataset(originpath).create() ds = install(source=originpath, path=destpath, result_xfm='datasets', return_type='item-or-list') # no harm assert_result_count(ds.update(merge=True, reobtain_data=True), 1, action="update", status="ok") # content create_tree(origin.path, {'load.dat': 'heavy'}) origin.save(opj(origin.path, 'load.dat')) # update does not bring data automatically assert_result_count(ds.update(merge=True, reobtain_data=True), 1, action="update", status="ok") assert_in('load.dat', ds.repo.get_annexed_files()) assert_false(ds.repo.file_has_content('load.dat')) # now get data ds.get('load.dat') ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy') # new content at origin create_tree(origin.path, {'novel': 'but boring'}) origin.save() # update must not bring in data for new file result = ds.update(merge=True, reobtain_data=True) assert_in_results(result, action='get', status='notneeded') ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy') assert_in('novel', ds.repo.get_annexed_files()) assert_false(ds.repo.file_has_content('novel')) # modify content at origin os.remove(opj(origin.path, 'load.dat')) create_tree(origin.path, {'load.dat': 'light'}) origin.save() # update must update file with existing data, but leave empty one alone res = ds.update(merge=True, reobtain_data=True) assert_result_count(res, 1, status='ok', type='dataset', action='update') assert_result_count(res, 1, status='ok', type='file', action='get') ok_file_has_content(opj(ds.path, 'load.dat'), 'light') assert_false(ds.repo.file_has_content('novel'))
def make_studyforrest_mockup(path): """Generate a dataset structure mimicking aspects of studyforrest.org Under the given path there are two directories: public - to be published datasets private - never to be published datasets The 'public' directory itself is a superdataset, the 'private' directory is just a directory that contains standalone datasets in subdirectories. """ public = create(opj(path, 'public'), description="umbrella dataset") # the following tries to capture the evolution of the project phase1 = public.create('phase1', description='old-style, no connection to RAW') structural = public.create('structural', description='anatomy') tnt = public.create('tnt', description='image templates') tnt.clone(source=phase1.path, path=opj('src', 'phase1'), reckless='auto') tnt.clone(source=structural.path, path=opj('src', 'structural'), reckless='auto') aligned = public.create('aligned', description='aligned image data') aligned.clone(source=phase1.path, path=opj('src', 'phase1'), reckless='auto') aligned.clone(source=tnt.path, path=opj('src', 'tnt'), reckless='auto') # new acquisition labet = create(opj(path, 'private', 'labet'), description="raw data ET") phase2_dicoms = create(opj(path, 'private', 'p2dicoms'), description="raw data P2MRI") phase2 = public.create('phase2', description='new-style, RAW connection') phase2.clone(source=labet.path, path=opj('src', 'labet'), reckless='auto') phase2.clone(source=phase2_dicoms.path, path=opj('src', 'dicoms'), reckless='auto') # add to derivatives tnt.clone(source=phase2.path, path=opj('src', 'phase2'), reckless='auto') aligned.clone(source=phase2.path, path=opj('src', 'phase2'), reckless='auto') # never to be published media files media = create(opj(path, 'private', 'media'), description="raw data ET") # assuming all annotations are in one dataset (in reality this is also # a superdatasets with about 10 subdatasets annot = public.create('annotations', description='stimulus annotation') annot.clone(source=media.path, path=opj('src', 'media'), reckless='auto') # a few typical analysis datasets # (just doing 3, actual status quo is just shy of 10) # and also the real goal -> meta analysis metaanalysis = public.create('metaanalysis', description="analysis of analyses") for i in range(1, 3): ana = public.create('analysis{}'.format(i), description='analysis{}'.format(i)) ana.clone(source=annot.path, path=opj('src', 'annot'), reckless='auto') ana.clone(source=aligned.path, path=opj('src', 'aligned'), reckless='auto') ana.clone(source=tnt.path, path=opj('src', 'tnt'), reckless='auto') # link to metaanalysis metaanalysis.clone(source=ana.path, path=opj('src', 'ana{}'.format(i)), reckless='auto') # simulate change in an input (but not raw) dataset create_tree(aligned.path, {'modification{}.txt'.format(i): 'unique{}'.format(i)}) aligned.save() # finally aggregate data aggregate = public.create('aggregate', description='aggregate data') aggregate.clone(source=aligned.path, path=opj('src', 'aligned'), reckless='auto')
def populate_dataset(ds): # create 2 commits for pl in [example_payload, example_payload2]: create_tree(ds.path, pl) ds.save()
def test_update_simple(origin=None, src_path=None, dst_path=None): ca = dict(result_renderer='disabled') # a remote dataset with a subdataset underneath origds = Dataset(origin).create(**ca) # naming is weird, but a legacy artifact _ = origds.create('subm 1', **ca) _ = origds.create('2', **ca) # prepare src source = install(src_path, source=origin, recursive=True) # forget we cloned it by removing remote, which should lead to # setting tracking branch to target: source.repo.remove_remote(DEFAULT_REMOTE) # also forget the declared absolute location of the submodules, and turn them # relative to this/a clone for sub in source.subdatasets(result_xfm=lambda x: x['gitmodule_name']): source.subdatasets(path=sub, set_property=[('url', './{}'.format(sub))]) # dataset without sibling will not need updates assert_status('notneeded', source.update()) # deprecation message doesn't ruin things assert_status('notneeded', source.update(fetch_all=True)) # but error if unknown sibling is given assert_status('impossible', source.update(sibling='funky', on_failure='ignore')) # get a clone to update later on: dest = install(dst_path, source=src_path, recursive=True) # test setup done; # assert all fine assert_repo_status(dst_path) assert_repo_status(src_path) # update yields nothing => up-to-date assert_status('ok', dest.update()) assert_repo_status(dst_path) # modify remote: with open(opj(src_path, "update.txt"), "w") as f: f.write("Additional content") source.save(path="update.txt", message="Added update.txt") assert_repo_status(src_path) # update without `merge` only fetches: assert_status('ok', dest.update()) # modification is not known to active branch: assert_not_in("update.txt", dest.repo.get_files(dest.repo.get_active_branch())) # modification is known to branch <default remote>/<default branch> assert_in("update.txt", dest.repo.get_files(DEFAULT_REMOTE + "/" + DEFAULT_BRANCH)) # merge: assert_status('ok', dest.update(merge=True)) # modification is now known to active branch: assert_in("update.txt", dest.repo.get_files(dest.repo.get_active_branch())) # it's known to annex, but has no content yet: annexprops = dest.repo.get_file_annexinfo("update.txt", eval_availability=True) annexprops['key'] # blows if unknown eq_(False, annexprops['has_content']) # check subdataset path constraints, baseline (parent + 2 subds) assert_result_count(dest.update(recursive=True), 3, status='ok', type='dataset') # no recursion and invalid path still updates the parent res = dest.update(path='whatever') assert_result_count(res, 1, status='ok', type='dataset') assert_result_count(res, 1, status='ok', path=dest.path) # invalid path with recursion also does res = dest.update(recursive=True, path='whatever') assert_result_count(res, 1, status='ok', type='dataset') assert_result_count(res, 1, status='ok', path=dest.path) # valid path and no recursion only updates the parent res = dest.update(path='subm 1') assert_result_count(res, 1, status='ok', type='dataset') assert_result_count(res, 1, status='ok', path=dest.path) # valid path and recursion updates matching res = dest.update(recursive=True, path='subm 1') assert_result_count(res, 2, status='ok', type='dataset') assert_result_count(res, 1, status='ok', path=dest.path) assert_result_count(res, 1, status='ok', path=str(dest.pathobj / 'subm 1')) # additional invalid path doesn't hurt res = dest.update(recursive=True, path=['subm 1', 'mike']) assert_result_count(res, 2, status='ok', type='dataset') # full match res = dest.update(recursive=True, path=['subm 1', '2']) assert_result_count(res, 3, status='ok', type='dataset') # test that update doesn't crash if we specify only a single path (submod) to # operate on with chpwd(dest.path): # in 0.11.x it would be a single result since "pwd" dataset is not # considered, and would be relative path (as specified). # In 0.12.0 - it would include implicit pwd dataset, and paths would be absolute res_update = update(path=['subm 1'], recursive=True) assert_result_count(res_update, 2) for p in dest.path, str(dest.pathobj / 'subm 1'): assert_in_results(res_update, path=p, action='update', status='ok', type='dataset') # and with merge we would also try to save (but there would be no changes) res_merge = update(path=['subm 1'], recursive=True, merge=True) assert_result_count(res_merge, 2, action='update') # 2 of "updates" really. assert_in_results(res_merge, action='update', status='ok', type='dataset') assert_in_results(res_merge, action='save', status='notneeded', type='dataset') # smoke-test if recursive update doesn't fail if submodule is removed # and that we can run it from within a dataset without providing it # explicitly assert_result_count(dest.remove('subm 1'), 1, status='ok', action='remove', path=opj(dest.path, 'subm 1')) with chpwd(dest.path): assert_result_count(update(recursive=True), 2, status='ok', type='dataset') assert_result_count(dest.update(merge=True, recursive=True), 2, action='update', status='ok', type='dataset') # and now test recursive update with merging in differences create_tree(opj(source.path, '2'), {'load.dat': 'heavy'}) source.save(opj('2', 'load.dat'), message="saving changes within subm2", recursive=True) assert_result_count(dest.update(merge=True, recursive=True), 2, action='update', status='ok', type='dataset') # and now we can get new file dest.get(opj('2', 'load.dat')) ok_file_has_content(opj(dest.path, '2', 'load.dat'), 'heavy')
def test_update_volatile_subds(originpath=None, otherpath=None, destpath=None): origin = Dataset(originpath).create() repo = origin.repo if repo.is_managed_branch() and repo.git_annex_version <= "8.20201129": # Fails before git-annex's fd161da2c (adjustTree: Consider submodule # deletions, 2021-01-06). raise SkipTest( "On adjusted branch, test requires fix in more recent git-annex") ds = install(source=originpath, path=destpath, result_xfm='datasets', return_type='item-or-list') # as a submodule sname = 'subm 1' osm1 = origin.create(sname) assert_result_count(ds.update(), 1, status='ok', type='dataset') # nothing without a merge, no inappropriate magic assert_not_in(sname, ds.subdatasets(result_xfm='relpaths')) assert_result_count(ds.update(merge=True), 1, action='update', status='ok', type='dataset') # and we should be able to do update with recursive invocation assert_result_count(ds.update(merge=True, recursive=True), 1, action='update', status='ok', type='dataset') # known, and placeholder exists assert_in(sname, ds.subdatasets(result_xfm='relpaths')) ok_(exists(opj(ds.path, sname))) # remove from origin origin.remove(sname, reckless='availability') assert_result_count(ds.update(merge=True), 1, action='update', status='ok', type='dataset') # gone locally, wasn't checked out assert_not_in(sname, ds.subdatasets(result_xfm='relpaths')) assert_false(exists(opj(ds.path, sname))) # re-introduce at origin osm1 = origin.create(sname) create_tree(osm1.path, {'load.dat': 'heavy'}) origin.save(opj(osm1.path, 'load.dat')) assert_result_count(ds.update(merge=True), 1, action='update', status='ok', type='dataset') # grab new content of uninstall subdataset, right away ds.get(opj(ds.path, sname, 'load.dat')) ok_file_has_content(opj(ds.path, sname, 'load.dat'), 'heavy') # modify ds and subds at origin create_tree(origin.path, {'mike': 'this', sname: {'probe': 'little'}}) origin.save(recursive=True) assert_repo_status(origin.path) # updates for both datasets should come down the pipe assert_result_count(ds.update(merge=True, recursive=True), 2, action='update', status='ok', type='dataset') assert_repo_status(ds.path) # now remove just-installed subdataset from origin again origin.remove(sname, reckless='kill') assert_not_in(sname, origin.subdatasets(result_xfm='relpaths')) assert_in(sname, ds.subdatasets(result_xfm='relpaths')) # merge should disconnect the installed subdataset, but leave the actual # ex-subdataset alone assert_result_count(ds.update(merge=True, recursive=True), 1, action='update', type='dataset') assert_not_in(sname, ds.subdatasets(result_xfm='relpaths')) ok_file_has_content(opj(ds.path, sname, 'load.dat'), 'heavy') ok_(Dataset(opj(ds.path, sname)).is_installed()) # now remove the now disconnected subdataset for further tests remove(dataset=op.join(ds.path, sname), reckless='kill') assert_repo_status(ds.path) # new separate subdataset, not within the origin dataset otherds = Dataset(otherpath).create() # install separate dataset as a submodule ds.install(source=otherds.path, path='other') create_tree(otherds.path, {'brand': 'new'}) otherds.save() assert_repo_status(otherds.path) # pull in changes res = ds.update(merge=True, recursive=True) assert_result_count(res, 2, status='ok', action='update', type='dataset') # the next is to check for #2858 assert_repo_status(ds.path)
def _test_target_ssh_inherit(standardgroup, ui, use_ssh, src_path, target_path): ds = Dataset(src_path).create() if use_ssh: target_url = 'datalad-test:%s' % target_path else: target_url = target_path remote = "magical" # for the test of setting a group, will just smoke test while using current # user's group ds.create_sibling(target_url, name=remote, shared='group', group=os.getgid(), ui=ui) # not doing recursively if standardgroup: ds.repo.set_preferred_content('wanted', 'standard', remote) ds.repo.set_preferred_content('group', standardgroup, remote) ds.publish(to=remote) # now a month later we created a new subdataset... a few of the nested ones # A known hiccup happened when there # is also subsub ds added - we might incorrectly traverse and not prepare # sub first for subsub to inherit etc parent_ds = ds subdss = [] nlevels = 2 # gets slow: 1 - 43 sec, 2 - 49 sec , 3 - 69 sec for levels in range(nlevels): subds = parent_ds.create('sub') create_tree(subds.path, {'sub.dat': 'lots of data'}) parent_ds.save('sub', recursive=True) ok_file_under_git(subds.path, 'sub.dat', annexed=True) parent_ds = subds subdss.append(subds) target_subdss = [ Dataset(opj(*([target_path] + ['sub'] * (i + 1)))) for i in range(nlevels) ] # since we do not have yet/thus have not used an option to record to publish # to that sibling by default (e.g. --set-upstream), if we run just ds.publish # -- should fail assert_result_count( ds.publish(on_failure='ignore'), 1, status='impossible', message= 'No target sibling configured for default publication, please specify via --to' ) ds.publish( to=remote) # should be ok, non recursive; BUT it (git or us?) would # create an empty sub/ directory assert_postupdate_hooks(target_path, installed=ui) for target_sub in target_subdss: ok_(not target_sub.is_installed()) # still not there res = ds.publish(to=remote, recursive=True, on_failure='ignore') assert_result_count(res, 1 + len(subdss)) assert_status(('error', 'notneeded'), res) assert_result_count(res, len(subdss), status='error', message=("Unknown target sibling '%s' for publication", 'magical')) # Finally publishing with inheritance ds.publish(to=remote, recursive=True, missing='inherit') assert_postupdate_hooks(target_path, installed=ui) def check_dss(): # we added the remote and set all the for subds in subdss: eq_(subds.repo.get_preferred_content('wanted', remote), 'standard' if standardgroup else '') eq_(subds.repo.get_preferred_content('group', remote), standardgroup or '') for target_sub in target_subdss: ok_(target_sub.is_installed()) # it is there now eq_(target_sub.repo.config.get('core.sharedrepository'), '1') # and we have transferred the content if standardgroup and standardgroup == 'backup': # only then content should be copied ok_file_has_content(opj(target_sub.path, 'sub.dat'), 'lots of data') else: # otherwise nothing is copied by default assert_false(target_sub.repo.file_has_content('sub.dat')) check_dss() # and it should be ok to reconfigure the full hierarchy of datasets # while "inheriting". No URL must be specified, and we must not blow # but just issue a warning for the top level dataset which has no super, # so cannot inherit anything - use case is to fixup/establish the full # hierarchy on the remote site ds.save( recursive=True) # so we have committed hierarchy for create_sibling with swallow_logs(logging.WARNING) as cml: out = ds.create_sibling(None, name=remote, existing="reconfigure", inherit=True, ui=ui, recursive=True) eq_(len(out), 1 + len(subdss)) assert_in("Cannot determine super dataset", cml.out) check_dss()
def test_run_explicit(origpath=None, path=None): origds = Dataset(origpath).create() (origds.pathobj / "test-annex.dat").write_text('content') origds.save() ds = clone(origpath, path) assert_false(ds.repo.file_has_content("test-annex.dat")) create_tree(ds.path, { "dirt_untracked": "untracked", "dirt_modified": "modified" }) ds.save("dirt_modified", to_git=True) with open(op.join(path, "dirt_modified"), "a") as ofh: ofh.write(", more") # We need explicit=True to run with dirty repo. assert_status( "impossible", ds.run(f"{cat_command} test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], on_failure="ignore")) hexsha_initial = ds.repo.get_hexsha() # If we specify test-annex.dat as an input, it will be retrieved before the # run. ds.run(f"{cat_command} test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], explicit=True, result_renderer='disabled') ok_(ds.repo.file_has_content("test-annex.dat")) # We didn't commit anything because outputs weren't specified. assert_false(ds.repo.file_has_content("doubled.dat")) eq_(hexsha_initial, ds.repo.get_hexsha()) # If an input doesn't exist, we just show the standard warning. with assert_raises(IncompleteResultsError): ds.run("ls", inputs=["not-there"], explicit=True, on_failure="stop", result_renderer='disabled') remove(op.join(path, "doubled.dat")) hexsha_initial = ds.repo.get_hexsha() ds.run(f"{cat_command} test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], outputs=["doubled.dat"], explicit=True, result_renderer='disabled') ok_(ds.repo.file_has_content("doubled.dat")) assert_repo_status(ds.path, modified=["dirt_modified"], untracked=['dirt_untracked']) neq_(hexsha_initial, ds.repo.get_hexsha()) # Saving explicit outputs works from subdirectories. subdir = op.join(path, "subdir") mkdir(subdir) with chpwd(subdir): run("echo insubdir >foo", explicit=True, outputs=["foo"], result_renderer='disabled') ok_(ds.repo.file_has_content(op.join("subdir", "foo")))