def test_install_into_dataset(source, top_path): ds = rev_create(top_path) ok_clean_git(ds.path) subds = ds.install("sub", source=source, save=False) ok_(isdir(opj(subds.path, '.git'))) ok_(subds.is_installed()) assert_in('sub', ds.subdatasets(result_xfm='relpaths')) # sub is clean: ok_clean_git(subds.path, annex=None) # top is too: ok_clean_git(ds.path, annex=None) ds.rev_save(message='addsub') # now it is: ok_clean_git(ds.path, annex=None) # but we could also save while installing and there should be no side-effect # of saving any other changes if we state to not auto-save changes # Create a dummy change create_tree(ds.path, {'dummy.txt': 'buga'}) ok_clean_git(ds.path, untracked=['dummy.txt']) subds_ = ds.install("sub2", source=source) eq_(subds_.path, opj(ds.path, "sub2")) # for paranoid yoh ;) ok_clean_git(ds.path, untracked=['dummy.txt']) # and we should achieve the same behavior if we create a dataset # and then decide to add it rev_create(_path_(top_path, 'sub3')) ok_clean_git(ds.path, untracked=['dummy.txt', 'sub3/']) ds.rev_save('sub3') ok_clean_git(ds.path, untracked=['dummy.txt'])
def test_remove_recreation(path): # test recreation is possible and doesn't conflict with in-memory # remainings of the old instances # see issue #1311 ds = rev_create(path) ds.remove() ds = rev_create(path) ok_clean_git(ds.path) ok_(ds.is_installed())
def test_require_dataset(path): with chpwd(path): assert_raises(InsufficientArgumentsError, require_dataset, None) rev_create('.') # in this folder by default assert_equal(require_dataset(None).path, path) assert_equal( require_dataset('some', check_installed=False).path, abspath('some')) assert_raises(ValueError, require_dataset, 'some', check_installed=True)
def test_uninstall_without_super(path): # a parent dataset with a proper subdataset, and another dataset that # is just placed underneath the parent, but not an actual subdataset parent = Dataset(path).rev_create() sub = parent.rev_create('sub') ok_clean_git(parent.path) nosub = rev_create(opj(parent.path, 'nosub')) ok_clean_git(nosub.path) subreport = parent.subdatasets() assert_result_count(subreport, 1, path=sub.path) assert_result_count(subreport, 0, path=nosub.path) # it should be possible to uninstall the proper subdataset, even without # explicitly calling the uninstall methods of the parent -- things should # be figured out by datalad uninstall(sub.path) assert not sub.is_installed() # no present subdatasets anymore subreport = parent.subdatasets() assert_result_count(subreport, 1) assert_result_count(subreport, 1, path=sub.path, state='absent') assert_result_count(subreport, 0, path=nosub.path) # but we should fail on an attempt to uninstall the non-subdataset res = uninstall(nosub.path, on_failure='ignore') assert_result_count( res, 1, path=nosub.path, status='error', message= "will not uninstall top-level dataset (consider `remove` command)")
def test_get_flexible_source_candidates_for_submodule(t, t2): f = _get_flexible_source_candidates_for_submodule # for now without mocking -- let's just really build a dataset ds = rev_create(t) clone = install(t2, source=t, result_xfm='datasets', return_type='item-or-list') # first one could just know about itself or explicit url provided sshurl = 'ssh://e.c' httpurl = 'http://e.c' # Expansion with '/.git' no longer done in this helper #sm_httpurls = [httpurl, httpurl + '/.git'] sm_httpurls = [httpurl] eq_(f(ds, 'sub'), []) eq_(f(ds, 'sub', sshurl), [sshurl]) eq_(f(ds, 'sub', httpurl), sm_httpurls) eq_(f(ds, 'sub', None), []) # otherwise really we have no clue were to get from # but if we work on dsclone then it should also add urls deduced from its # own location default remote for current branch eq_(f(clone, 'sub'), [t + '/sub']) eq_(f(clone, 'sub', sshurl), [t + '/sub', sshurl]) eq_(f(clone, 'sub', httpurl), [t + '/sub'] + sm_httpurls) eq_(f(clone, 'sub'), [t + '/sub']) # otherwise really we have no clue were to get from
def test_get_subdatasets_types(path): from datalad.api import rev_create ds = rev_create(path) ds.rev_create('1') ds.rev_create('true') # no types casting should happen eq_(ds.subdatasets(result_xfm='relpaths'), ['1', 'true'])
def test_implicit_install(src, dst): origin_top = rev_create(src) origin_sub = origin_top.rev_create("sub") origin_subsub = origin_sub.rev_create("subsub") with open(opj(origin_top.path, "file1.txt"), "w") as f: f.write("content1") origin_top.rev_save("file1.txt") with open(opj(origin_sub.path, "file2.txt"), "w") as f: f.write("content2") origin_sub.rev_save("file2.txt") with open(opj(origin_subsub.path, "file3.txt"), "w") as f: f.write("content3") origin_subsub.rev_save("file3.txt") origin_top.rev_save(recursive=True) # first, install toplevel: ds = install(dst, source=src) ok_(ds.is_installed()) sub = Dataset(opj(ds.path, "sub")) ok_(not sub.is_installed()) subsub = Dataset(opj(sub.path, "subsub")) ok_(not subsub.is_installed()) # fail on obscure non-existing one assert_raises(IncompleteResultsError, ds.install, source='obscure') # install 3rd level and therefore implicitly the 2nd: result = ds.install(path=opj("sub", "subsub")) ok_(sub.is_installed()) ok_(subsub.is_installed()) # but by default implicit results are not reported eq_(result, subsub) # fail on obscure non-existing one in subds assert_raises(IncompleteResultsError, ds.install, source=opj('sub', 'obscure')) # clean up, the nasty way rmtree(dst, chmod_files=True) ok_(not exists(dst)) # again first toplevel: ds = install(dst, source=src) ok_(ds.is_installed()) sub = Dataset(opj(ds.path, "sub")) ok_(not sub.is_installed()) subsub = Dataset(opj(sub.path, "subsub")) ok_(not subsub.is_installed()) # now implicit but without an explicit dataset to install into # (deriving from CWD): with chpwd(dst): # don't ask for the file content to make return value comparison # simpler result = get(path=opj("sub", "subsub"), get_data=False, result_xfm='datasets') ok_(sub.is_installed()) ok_(subsub.is_installed()) eq_(result, [sub, subsub])
def test_remove_nowhining(path): # when removing a dataset under a dataset (but not a subdataset) # should not provide a meaningless message that something was not right ds = rev_create(path) # just install/clone inside of it subds_path = _path_(path, 'subds') install(subds_path, source=path) remove(subds_path) # should remove just fine
def test_invalid_args(path): assert_raises(IncompleteResultsError, install, 'Zoidberg', source='Zoidberg') # install to an invalid URL assert_raises(ValueError, install, 'ssh://mars:Zoidberg', source='Zoidberg') # install to a remote location assert_raises(ValueError, install, 'ssh://mars/Zoidberg', source='Zoidberg') # make fake dataset ds = rev_create(path) assert_raises(IncompleteResultsError, install, '/higherup.', 'Zoidberg', dataset=ds)
def test_install_from_tilda(opath, tpath): ds = rev_create(opath) ds.rev_create('sub ds') orelpath = os.path.join( '~', os.path.relpath(opath, os.path.expanduser('~')) ) assert orelpath.startswith('~') # just to make sure no normalization install(tpath, source=orelpath, recursive=True) assert Dataset(opj(tpath, 'sub ds')).is_installed()
def test_wtf(path): # smoke test for now with swallow_outputs() as cmo: wtf(dataset=path) assert_not_in('## dataset', cmo.out) assert_in('## configuration', cmo.out) # Those sections get sensored out by default now assert_not_in('user.name: ', cmo.out) with chpwd(path): with swallow_outputs() as cmo: wtf() assert_not_in('## dataset', cmo.out) assert_in('## configuration', cmo.out) # now with a dataset ds = rev_create(path) with swallow_outputs() as cmo: wtf(dataset=ds.path) assert_in('## configuration', cmo.out) assert_in('## dataset', cmo.out) assert_in('path: {}'.format(ds.path), cmo.out) # and if we run with all sensitive for sensitive in ('some', True): with swallow_outputs() as cmo: wtf(dataset=ds.path, sensitive=sensitive) # we fake those for tests anyways, but we do show cfg in this mode # and explicitly not showing them assert_in('user.name: %s' % _HIDDEN, cmo.out) with swallow_outputs() as cmo: wtf(dataset=ds.path, sensitive='all') assert_not_in(_HIDDEN, cmo.out) # all is shown assert_in('user.name: ', cmo.out) skip_if_no_module('pyperclip') # verify that it works correctly in the env/platform import pyperclip with swallow_outputs() as cmo: try: pyperclip.copy("xxx") pyperclip_works = pyperclip.paste().strip() == "xxx" wtf(dataset=ds.path, clipboard=True) except (AttributeError, pyperclip.PyperclipException) as exc: # AttributeError could come from pyperclip if no DISPLAY raise SkipTest(exc_str(exc)) assert_in("WTF information of length", cmo.out) assert_not_in('user.name', cmo.out) if not pyperclip_works: # Some times does not throw but just fails to work raise SkipTest( "Pyperclip seems to be not functioning here correctly") assert_not_in('user.name', pyperclip.paste()) assert_in(_HIDDEN, pyperclip.paste()) # by default no sensitive info assert_in("cmd:annex:", pyperclip.paste()) # but the content is there
def test_bf1886(path): parent = Dataset(path).rev_create() sub = parent.rev_create('sub') ok_clean_git(parent.path) # create a symlink pointing down to the subdataset, and add it os.symlink('sub', opj(parent.path, 'down')) parent.add('down') ok_clean_git(parent.path) # now symlink pointing up os.makedirs(opj(parent.path, 'subdir', 'subsubdir')) os.symlink(opj(pardir, 'sub'), opj(parent.path, 'subdir', 'up')) parent.add(opj('subdir', 'up')) ok_clean_git(parent.path) # now symlink pointing 2xup, as in #1886 os.symlink(opj(pardir, pardir, 'sub'), opj(parent.path, 'subdir', 'subsubdir', 'upup')) parent.add(opj('subdir', 'subsubdir', 'upup')) ok_clean_git(parent.path) # simulatenously add a subds and a symlink pointing to it # create subds, but don't register it sub2 = rev_create(opj(parent.path, 'sub2')) os.symlink(opj(pardir, pardir, 'sub2'), opj(parent.path, 'subdir', 'subsubdir', 'upup2')) parent.add(['sub2', opj('subdir', 'subsubdir', 'upup2')]) ok_clean_git(parent.path) # full replication of #1886: the above but be in subdir of symlink # with no reference dataset sub3 = rev_create(opj(parent.path, 'sub3')) os.symlink(opj(pardir, pardir, 'sub3'), opj(parent.path, 'subdir', 'subsubdir', 'upup3')) # need to use absolute paths with chpwd(opj(parent.path, 'subdir', 'subsubdir')): rev_save([ opj(parent.path, 'sub3'), opj(parent.path, 'subdir', 'subsubdir', 'upup3') ]) # in contrast to `add` only operates on a single top-level dataset # although it is not specified, it get's discovered based on the PWD # the logic behind that feels a bit shaky # consult discussion in https://github.com/datalad/datalad/issues/3230 # if this comes up as an issue at some point ok_clean_git(parent.path)
def test_failed_install_multiple(top_path): ds = rev_create(top_path) rev_create(_path_(top_path, 'ds1')) rev_create(_path_(top_path, 'ds3')) ok_clean_git(ds.path, annex=None, untracked=['ds1/', 'ds3/']) # specify install with multiple paths and one non-existing with assert_raises(IncompleteResultsError) as cme: ds.install(['ds1', 'ds2', '///crcns', '///nonexisting', 'ds3'], on_failure='continue') # install doesn't add existing submodules -- add does that ok_clean_git(ds.path, annex=None, untracked=['ds1/', 'ds3/']) ds.rev_save(['ds1', 'ds3']) ok_clean_git(ds.path, annex=None) # those which succeeded should be saved now eq_(ds.subdatasets(result_xfm='relpaths'), ['crcns', 'ds1', 'ds3']) # and those which didn't -- listed eq_(set(r.get('source_url', r['path']) for r in cme.exception.failed), {'///nonexisting', _path_(top_path, 'ds2')})
def test_install_consistent_state(src, dest, dest2, dest3): # if we install a dataset, where sub-dataset "went ahead" in that branch, # while super-dataset was not yet updated (e.g. we installed super before) # then it is desired to get that default installed branch to get to the # position where previous location was pointing to. # It is indeed a mere heuristic which might not hold the assumption in some # cases, but it would work for most simple and thus mostly used ones ds1 = rev_create(src) sub1 = ds1.rev_create('sub1') def check_consistent_installation(ds): datasets = [ds] + list( map(Dataset, ds.subdatasets(recursive=True, fulfilled=True, result_xfm='paths'))) assert len(datasets) == 2 # in this test for ds in datasets: # all of them should be in master branch eq_(ds.repo.get_active_branch(), "master") # all of them should be clean, so sub should be installed in a "version" # as pointed by the super ok_(not ds.repo.dirty) dest_ds = install(dest, source=src) # now we progress sub1 by adding sub2 subsub2 = sub1.rev_create('sub2') # and progress subsub2 forward to stay really thorough put_file_under_git(subsub2.path, 'file.dat', content="data") subsub2.rev_save(message="added a file") # above function does not commit # just installing a submodule -- apparently different code/logic # but also the same story should hold - we should install the version pointed # by the super, and stay all clean dest_sub1 = dest_ds.install('sub1') check_consistent_installation(dest_ds) # So now we have source super-dataset "dirty" with sub1 progressed forward # Our install should try to "retain" consistency of the installation # whenever possible. # install entire hierarchy without specifying dataset # no filter, we want full report dest2_ds = install(dest2, source=src, recursive=True, result_filter=None) check_consistent_installation(dest2_ds[0]) # [1] is the subdataset # install entire hierarchy by first installing top level ds # and then specifying sub-dataset dest3_ds = install(dest3, source=src, recursive=False) # and then install both submodules recursively while pointing # to it based on dest3_ds dest3_ds.install('sub1', recursive=True) check_consistent_installation(dest3_ds)
def test_install_subds_with_space(opath, tpath): ds = rev_create(opath) ds.rev_create('sub ds') # works even now, boring # install(tpath, source=opath, recursive=True) if on_windows: # on windows we cannot simply prepend localhost: to a path # and get a working sshurl... install(tpath, source=opath, recursive=True) else: # do via ssh! install(tpath, source="localhost:" + opath, recursive=True) assert Dataset(opj(tpath, 'sub ds')).is_installed()
def test_no_annex(path): ds = rev_create(path) ok_clean_git(ds.path) create_tree( ds.path, { 'code': { 'inannex': 'content', 'notinannex': 'othercontent' }, 'README': 'please' }) # add inannex pre configuration ds.rev_save(opj('code', 'inannex')) no_annex(pattern=['code/**', 'README'], dataset=ds) # add inannex and README post configuration ds.rev_save([opj('code', 'notinannex'), 'README']) ok_clean_git(ds.path) # one is annex'ed, the other is not, despite no change in add call # importantly, also .gitattribute is not annexed eq_([opj('code', 'inannex')], ds.repo.get_annexed_files())
def test_gh1426(origin_path, target_path): # set up a pair of repos, one the published copy of the other origin = rev_create(origin_path) target = AnnexRepo(target_path, create=True) target.config.set('receive.denyCurrentBranch', 'updateInstead', where='local') origin.siblings('add', name='target', url=target_path) origin.publish(to='target') ok_clean_git(origin.path) ok_clean_git(target.path) eq_(origin.repo.get_hexsha(), target.get_hexsha()) # gist of #1426 is that a newly added subdataset does not cause the # superdataset to get published origin.rev_create('sub') ok_clean_git(origin.path) assert_not_equal(origin.repo.get_hexsha(), target.get_hexsha()) # now push res = origin.publish(to='target') assert_result_count(res, 1) assert_result_count(res, 1, status='ok', type='dataset', path=origin.path) eq_(origin.repo.get_hexsha(), target.get_hexsha())
def test_install_subds_from_another_remote(topdir): # https://github.com/datalad/datalad/issues/1905 from datalad.support.network import PathRI with chpwd(topdir): origin_ = 'origin' clone1_ = 'clone1' clone2_ = 'clone2' origin = rev_create(origin_, no_annex=True) clone1 = install(source=origin, path=clone1_) # print("Initial clone") clone1.create_sibling('ssh://localhost%s/%s' % (PathRI(getpwd()).posixpath, clone2_), name=clone2_) # print("Creating clone2") clone1.publish(to=clone2_) clone2 = Dataset(clone2_) # print("Initiating subdataset") clone2.rev_create('subds1') # print("Updating") clone1.update(merge=True, sibling=clone2_) # print("Installing within updated dataset -- should be able to install from clone2") clone1.install('subds1')
def time_rev_createadd_to_dataset(self): subds = rev_create(opj(self.ds.path, 'newsubds')) self.ds.rev_save(subds.path)
def test_failed_install(dspath): ds = rev_create(dspath) assert_raises(IncompleteResultsError, ds.install, "sub", source="http://nonexistingreallyanything.datalad.org/bla")
def make_studyforrest_mockup(path): """Generate a dataset structure mimicking aspects of studyforrest.org Under the given path there are two directories: public - to be published datasets private - never to be published datasets The 'public' directory itself is a superdataset, the 'private' directory is just a directory that contains standalone datasets in subdirectories. """ public = rev_create(opj(path, 'public'), description="umbrella dataset") # the following tries to capture the evolution of the project phase1 = public.rev_create('phase1', description='old-style, no connection to RAW') structural = public.rev_create('structural', description='anatomy') tnt = public.rev_create('tnt', description='image templates') tnt.clone(source=phase1.path, path=opj('src', 'phase1'), reckless=True) tnt.clone(source=structural.path, path=opj('src', 'structural'), reckless=True) aligned = public.rev_create('aligned', description='aligned image data') aligned.clone(source=phase1.path, path=opj('src', 'phase1'), reckless=True) aligned.clone(source=tnt.path, path=opj('src', 'tnt'), reckless=True) # new acquisition labet = rev_create(opj(path, 'private', 'labet'), description="raw data ET") phase2_dicoms = rev_create(opj(path, 'private', 'p2dicoms'), description="raw data P2MRI") phase2 = public.rev_create('phase2', description='new-style, RAW connection') phase2.clone(source=labet.path, path=opj('src', 'labet'), reckless=True) phase2.clone(source=phase2_dicoms.path, path=opj('src', 'dicoms'), reckless=True) # add to derivatives tnt.clone(source=phase2.path, path=opj('src', 'phase2'), reckless=True) aligned.clone(source=phase2.path, path=opj('src', 'phase2'), reckless=True) # never to be published media files media = rev_create(opj(path, 'private', 'media'), description="raw data ET") # assuming all annotations are in one dataset (in reality this is also # a superdatasets with about 10 subdatasets annot = public.rev_create('annotations', description='stimulus annotation') annot.clone(source=media.path, path=opj('src', 'media'), reckless=True) # a few typical analysis datasets # (just doing 3, actual status quo is just shy of 10) # and also the real goal -> meta analysis metaanalysis = public.rev_create('metaanalysis', description="analysis of analyses") for i in range(1, 3): ana = public.rev_create('analysis{}'.format(i), description='analysis{}'.format(i)) ana.clone(source=annot.path, path=opj('src', 'annot'), reckless=True) ana.clone(source=aligned.path, path=opj('src', 'aligned'), reckless=True) ana.clone(source=tnt.path, path=opj('src', 'tnt'), reckless=True) # link to metaanalysis metaanalysis.clone(source=ana.path, path=opj('src', 'ana{}'.format(i)), reckless=True) # simulate change in an input (but not raw) dataset create_tree(aligned.path, {'modification{}.txt'.format(i): 'unique{}'.format(i)}) aligned.rev_save() # finally aggregate data aggregate = public.rev_create('aggregate', description='aggregate data') aggregate.clone(source=aligned.path, path=opj('src', 'aligned'), reckless=True)
def test_install_source_relpath(src, dest): ds1 = rev_create(src) src_ = basename(src) with chpwd(dirname(src)): ds2 = install(dest, source=src_)
def time_rev_createadd_to_dataset(self, tarfile_path): subds = rev_create(opj(self.ds.path, 'newsubds')) self.ds.rev_save(subds.path)