def test_surprise_subds(path): # https://github.com/datalad/datalad/issues/3139 ds = create(path, force=True) # a lonely repo without any commit somerepo = AnnexRepo(path=op.join(path, 'd1', 'subrepo'), create=True) # a proper subdataset subds = create(op.join(path, 'd2', 'subds'), force=True) # save non-recursive ds.save(recursive=False) # the content of both subds and subrepo are not added to their # respective parent as no --recursive was given assert_repo_status(subds.path, untracked=['subfile']) assert_repo_status(somerepo.path, untracked=['subfile']) # however, while the subdataset is added (and reported as modified # because it content is still untracked) the subrepo # cannot be added (it has no commit) # worse: its untracked file add been added to the superdataset # XXX the next conditional really says: if the subrepo is not in an # adjusted branch: #datalad/3178 (that would have a commit) if not on_windows: assert_repo_status(ds.path, modified=['d2/subds']) assert_in(ds.repo.pathobj / 'd1' / 'subrepo' / 'subfile', ds.repo.get_content_info()) # with proper subdatasets, all evil is gone assert_not_in(ds.repo.pathobj / 'd2' / 'subds' / 'subfile', ds.repo.get_content_info())
def test_add_subdataset(path, other): subds = create(op.join(path, 'dir'), force=True) ds = create(path, force=True) ok_(subds.repo.dirty) ok_(ds.repo.dirty) assert_not_in('dir', ds.subdatasets(result_xfm='relpaths')) # "add everything in subds to subds" save(dataset=subds.path) assert_repo_status(subds.path) assert_not_in('dir', ds.subdatasets(result_xfm='relpaths')) # but with a base directory we add the dataset subds as a subdataset # to ds res = ds.save(subds.path) assert_in_results(res, action="add", path=subds.path, refds=ds.path) assert_in('dir', ds.subdatasets(result_xfm='relpaths')) # create another one other = create(other) # install into superdataset, but don't add other_clone = install(source=other.path, path=op.join(ds.path, 'other')) # little dance to get the revolution-type dataset other_clone = Dataset(other_clone.path) ok_(other_clone.is_installed) assert_not_in('other', ds.subdatasets(result_xfm='relpaths')) # now add, it should pick up the source URL ds.save('other') # and that is why, we can reobtain it from origin ds.uninstall('other') ok_(not other_clone.is_installed()) ds.get('other') ok_(other_clone.is_installed())
def test_install_into_dataset(source, top_path): ds = create(top_path) ok_clean_git(ds.path) subds = ds.install("sub", source=source, save=False) if isinstance(subds.repo, AnnexRepo) and subds.repo.is_direct_mode(): ok_(exists(opj(subds.path, '.git'))) else: ok_(isdir(opj(subds.path, '.git'))) ok_(subds.is_installed()) assert_in('sub', ds.subdatasets(result_xfm='relpaths')) # sub is clean: ok_clean_git(subds.path, annex=None) # top is too: ok_clean_git(ds.path, annex=None) ds.save('addsub') # now it is: ok_clean_git(ds.path, annex=None) # but we could also save while installing and there should be no side-effect # of saving any other changes if we state to not auto-save changes # Create a dummy change create_tree(ds.path, {'dummy.txt': 'buga'}) ok_clean_git(ds.path, untracked=['dummy.txt']) subds_ = ds.install("sub2", source=source) eq_(subds_.path, opj(ds.path, "sub2")) # for paranoid yoh ;) ok_clean_git(ds.path, untracked=['dummy.txt']) # and we should achieve the same behavior if we create a dataset # and then decide to add it create(_path_(top_path, 'sub3')) ok_clean_git(ds.path, untracked=['dummy.txt', 'sub3/']) ds.add('sub3') ok_clean_git(ds.path, untracked=['dummy.txt'])
def test_invalid_args(path, otherpath, alienpath): # source == path assert_raises(ValueError, clone, 'Zoidberg', path='Zoidberg') assert_raises(ValueError, clone, 'ssh://mars/Zoidberg', path='ssh://mars/Zoidberg') # "invalid URL" is a valid filepath... and since no clone to remote # is possible - we can just assume that it is the (legit) file path # which is provided, not a URL. So both below should fail as any # other clone from a non-existing source and not for the reason of # "invalid something". Behavior is similar to how Git performs - can # clone into a URL-like path. # install to an "invalid URL" path res = clone('Zoidberg', path='ssh://mars:Zoidberg', on_failure='ignore') assert_status('error', res) # install to a "remote location" path res = clone('Zoidberg', path='ssh://mars/Zoidberg', on_failure='ignore') assert_status('error', res) # make fake dataset ds = create(path) assert_raises(IncompleteResultsError, ds.clone, '/higherup.', 'Zoidberg') # make real dataset, try to install outside ds_target = create(opj(otherpath, 'target')) assert_raises(ValueError, ds_target.clone, ds.path, path=ds.path) assert_status('error', ds_target.clone(ds.path, path=alienpath, on_failure='ignore'))
def test_failed_install_multiple(top_path): ds = create(top_path) create(_path_(top_path, 'ds1')) create(_path_(top_path, 'ds3')) ok_clean_git(ds.path, annex=False, untracked=['ds1/', 'ds3/']) # specify install with multiple paths and one non-existing with assert_raises(IncompleteResultsError) as cme: ds.install(['ds1', 'ds2', '///crcns', '///nonexisting', 'ds3']) # install doesn't add existing submodules -- add does that ok_clean_git(ds.path, annex=False, untracked=['ds1/', 'ds3/']) ds.add(['ds1', 'ds3']) ok_clean_git(ds.path, annex=False) # those which succeeded should be saved now eq_(ds.get_subdatasets(), ['crcns', 'ds1', 'ds3']) # and those which didn't -- listed eq_(set(cme.exception.failed), {'///nonexisting', _path_(top_path, 'ds2')}) # but if there was only a single installation requested -- it will be # InstallFailedError to stay consistent with single install behavior # TODO: unify at some point with assert_raises(InstallFailedError) as cme: ds.install('ds2') with assert_raises(InstallFailedError) as cme: ds.install('///nonexisting')
def test_unlock_raises(path, path2, path3): # make sure, we are not within a dataset: _cwd = getpwd() chpwd(path) # no dataset and no path: assert_raises(InsufficientArgumentsError, unlock, dataset=None, path=None) # no dataset and path not within a dataset: with swallow_logs(new_level=logging.WARNING) as cml: unlock(dataset=None, path=path2) assert_in("ignored paths that do not belong to any dataset: ['{0}'".format(path2), cml.out) create(path=path, no_annex=True) ds = Dataset(path) # no complaints ds.unlock() # make it annex, but call unlock with invalid path: AnnexRepo(path, create=True) with swallow_logs(new_level=logging.WARNING) as cml: ds.unlock(path="notexistent.txt") assert_in("ignored non-existing paths", cml.out) chpwd(_cwd)
def test_add_subdataset(path, other): subds = create(opj(path, 'dir'), force=True) ds = create(path, force=True) ok_(subds.repo.dirty) ok_(ds.repo.dirty) assert_not_in('dir', ds.subdatasets(result_xfm='relpaths')) # without a base dataset the next is interpreted as "add everything # in subds to subds" add(subds.path) ok_clean_git(subds.path) assert_not_in('dir', ds.subdatasets(result_xfm='relpaths')) # but with a base directory we add the dataset subds as a subdataset # to ds ds.add(subds.path) assert_in('dir', ds.subdatasets(result_xfm='relpaths')) # create another one other = create(other) # install into superdataset, but don't add other_clone = install(source=other.path, path=opj(ds.path, 'other')) ok_(other_clone.is_installed) assert_not_in('other', ds.subdatasets(result_xfm='relpaths')) # now add, it should pick up the source URL ds.add('other') # and that is why, we can reobtain it from origin ds.uninstall('other') ok_(other_clone.is_installed) ds.get('other') ok_(other_clone.is_installed)
def test_unlock_raises(path, path2, path3): # make sure, we are not within a dataset: _cwd = getpwd() chpwd(path) # no dataset and no path: assert_raises(InsufficientArgumentsError, unlock, dataset=None, path=None) # no dataset and path not within a dataset: res = unlock(dataset=None, path=path2, result_xfm=None, on_failure='ignore', return_type='item-or-list') eq_(res['message'], "path not associated with any dataset") eq_(res['path'], path2) create(path=path, no_annex=True) ds = Dataset(path) # no complaints ds.unlock() # make it annex, but call unlock with invalid path: AnnexRepo(path, create=True) res = ds.unlock(path="notexistent.txt", result_xfm=None, on_failure='ignore', return_type='item-or-list') eq_(res['message'], "path does not exist") chpwd(_cwd)
def make_studyforrest_mockup(path): """Generate a dataset structure mimicking aspects of studyforrest.org Under the given path there are two directories: public - to be published datasets private - never to be published datasets The 'public' directory itself is a superdataset, the 'private' directory is just a directory that contains standalone datasets in subdirectories. """ public = create(opj(path, 'public'), description="umbrella dataset") # the following tries to capture the evolution of the project phase1 = public.create('phase1', description='old-style, no connection to RAW') structural = public.create('structural', description='anatomy') tnt = public.create('tnt', description='image templates') tnt.clone(source=phase1.path, path=opj('src', 'phase1'), reckless=True) tnt.clone(source=structural.path, path=opj('src', 'structural'), reckless=True) aligned = public.create('aligned', description='aligned image data') aligned.clone(source=phase1.path, path=opj('src', 'phase1'), reckless=True) aligned.clone(source=tnt.path, path=opj('src', 'tnt'), reckless=True) # new acquisition labet = create(opj(path, 'private', 'labet'), description="raw data ET") phase2_dicoms = create(opj(path, 'private', 'p2dicoms'), description="raw data P2MRI") phase2 = public.create('phase2', description='new-style, RAW connection') phase2.clone(source=labet.path, path=opj('src', 'labet'), reckless=True) phase2.clone(source=phase2_dicoms.path, path=opj('src', 'dicoms'), reckless=True) # add to derivatives tnt.clone(source=phase2.path, path=opj('src', 'phase2'), reckless=True) aligned.clone(source=phase2.path, path=opj('src', 'phase2'), reckless=True) # never to be published media files media = create(opj(path, 'private', 'media'), description="raw data ET") # assuming all annotations are in one dataset (in reality this is also # a superdatasets with about 10 subdatasets annot = public.create('annotations', description='stimulus annotation') annot.clone(source=media.path, path=opj('src', 'media'), reckless=True) # a few typical analysis datasets # (just doing 3, actual status quo is just shy of 10) # and also the real goal -> meta analysis metaanalysis = public.create('metaanalysis', description="analysis of analyses") for i in range(1, 3): ana = public.create('analysis{}'.format(i), description='analysis{}'.format(i)) ana.clone(source=annot.path, path=opj('src', 'annot'), reckless=True) ana.clone(source=aligned.path, path=opj('src', 'aligned'), reckless=True) ana.clone(source=tnt.path, path=opj('src', 'tnt'), reckless=True) # link to metaanalysis metaanalysis.clone(source=ana.path, path=opj('src', 'ana{}'.format(i)), reckless=True) # simulate change in an input (but not raw) dataset create_tree( aligned.path, {'modification{}.txt'.format(i): 'unique{}'.format(i)}) aligned.save() # finally aggregate data aggregate = public.create('aggregate', description='aggregate data') aggregate.clone(source=aligned.path, path=opj('src', 'aligned'), reckless=True)
def time_make_studyforrest_mockup(self): path = self.path # Carries a copy of the # datalad.tests.utils_testdatasets.py:make_studyforrest_mockup # as of 0.12.0rc2-76-g6ba6d53b # A copy is made so we do not reflect in the benchmark results changes # to that helper's code. This copy only tests on 2 not 3 analyses # subds public = create(opj(path, 'public'), description="umbrella dataset") # the following tries to capture the evolution of the project phase1 = public.create('phase1', description='old-style, no connection to RAW') structural = public.create('structural', description='anatomy') tnt = public.create('tnt', description='image templates') tnt.clone(source=phase1.path, path=opj('src', 'phase1'), reckless=True) tnt.clone(source=structural.path, path=opj('src', 'structural'), reckless=True) aligned = public.create('aligned', description='aligned image data') aligned.clone(source=phase1.path, path=opj('src', 'phase1'), reckless=True) aligned.clone(source=tnt.path, path=opj('src', 'tnt'), reckless=True) # new acquisition labet = create(opj(path, 'private', 'labet'), description="raw data ET") phase2_dicoms = create(opj(path, 'private', 'p2dicoms'), description="raw data P2MRI") phase2 = public.create('phase2', description='new-style, RAW connection') phase2.clone(source=labet.path, path=opj('src', 'labet'), reckless=True) phase2.clone(source=phase2_dicoms.path, path=opj('src', 'dicoms'), reckless=True) # add to derivatives tnt.clone(source=phase2.path, path=opj('src', 'phase2'), reckless=True) aligned.clone(source=phase2.path, path=opj('src', 'phase2'), reckless=True) # never to be published media files media = create(opj(path, 'private', 'media'), description="raw data ET") # assuming all annotations are in one dataset (in reality this is also # a superdatasets with about 10 subdatasets annot = public.create('annotations', description='stimulus annotation') annot.clone(source=media.path, path=opj('src', 'media'), reckless=True) # a few typical analysis datasets # (just doing 2, actual status quo is just shy of 10) # and also the real goal -> meta analysis metaanalysis = public.create('metaanalysis', description="analysis of analyses") for i in range(1, 2): ana = public.create('analysis{}'.format(i), description='analysis{}'.format(i)) ana.clone(source=annot.path, path=opj('src', 'annot'), reckless=True) ana.clone(source=aligned.path, path=opj('src', 'aligned'), reckless=True) ana.clone(source=tnt.path, path=opj('src', 'tnt'), reckless=True) # link to metaanalysis metaanalysis.clone(source=ana.path, path=opj('src', 'ana{}'.format(i)), reckless=True) # simulate change in an input (but not raw) dataset create_tree( aligned.path, {'modification{}.txt'.format(i): 'unique{}'.format(i)}) aligned.add('.') # finally aggregate data aggregate = public.create('aggregate', description='aggregate data') aggregate.clone(source=aligned.path, path=opj('src', 'aligned'), reckless=True)
def test_remove_recreation(path): # test recreation is possible and doesn't conflict with in-memory # remainings of the old instances # see issue #1311 ds = create(path) ds.remove() ds = create(path) ok_clean_git(ds.path) ok_(ds.is_installed())
def test_create_curdir(path, path2): with chpwd(path, mkdir=True): create() ds = Dataset(path) ok_(ds.is_installed()) assert_repo_status(ds.path, annex=True) with chpwd(path2, mkdir=True): create(no_annex=True) ds = Dataset(path2) ok_(ds.is_installed()) assert_repo_status(ds.path, annex=False) ok_(op.exists(op.join(ds.path, '.noannex')))
def test_create_withprocedure(path): # first without ds = create(path) assert(not op.lexists(op.join(ds.path, 'README.rst'))) ds.remove() assert(not op.lexists(ds.path)) # now for reals... ds = create( # needs to identify the dataset, otherwise post-proc # procedure doesn't know what to run on dataset=path, proc_post=[['cfg_metadatatypes', 'xmp', 'datacite']]) assert_repo_status(path) ds.config.reload() eq_(ds.config['datalad.metadata.nativetype'], ('xmp', 'datacite'))
def test_add_subdataset(path): subds = create(opj(path, 'dir'), force=True) ds = create(path, force=True) ok_(subds.repo.dirty) ok_(ds.repo.dirty) assert_not_in('dir', ds.get_subdatasets()) # without a base dataset the next is interpreted as "add everything # in subds to subds" add(subds.path) ok_clean_git(subds.path) assert_not_in('dir', ds.get_subdatasets()) # but with a base directory we add the dataset subds as a subdataset # to ds ds.add(subds.path) assert_in('dir', ds.get_subdatasets())
def test_get_metadata(path1, path2): for p in (path1, path2): print('PATH') ds = create(p, force=True) ds.save() meta = MetadataExtractor( ds, _get_metadatarelevant_paths(ds, []))._get_dataset_metadata() assert_equal( dumps(meta, sort_keys=True, indent=2), """\ { "author": [ "Last1, First1", "Last2, First2" ], "citation": [ "10.1016/j.cub.2011.08.031" ], "description": "Some long description.", "formats": [ "application/matlab", "NIFTY" ], "name": "CRCNS.org xxx-1", "sameas": "10.6080/K0QN64NG", "shortdescription": "Main title", "tag": [ "Neuroscience", "fMRI" ], "version": "1.0" }""")
def test_saving_prior(topdir): # the problem is that we might be saving what is actually needed to be # "created" # we would like to place this structure into a hierarchy of two datasets # so we create first top one ds1 = create(topdir, force=True) # and everything is ok, stuff is not added BUT ds1 will be considered dirty assert_repo_status(ds1.path, untracked=['ds2']) # And then we would like to initiate a sub1 subdataset ds2 = create('ds2', dataset=ds1, force=True) # But what will happen is file1.txt under ds2 would get committed first into # ds1, and then the whole procedure actually crashes since because ds2/file1.txt # is committed -- ds2 is already known to git and it just pukes with a bit # confusing 'ds2' already exists in the index assert_in('ds2', ds1.subdatasets(result_xfm='relpaths'))
def test_get_subdatasets_types(path): from datalad.api import create ds = create(path) ds.create('1') ds.create('true') # no types casting should happen eq_(ds.subdatasets(result_xfm='relpaths'), ['1', 'true'])
def test_get_containing_subdataset(path): ds = create(path, force=True) ds.add(path='test.txt') ds.save("Initial commit") subds = ds.create("sub") subsubds = subds.create("subsub") eq_(ds.get_containing_subdataset(opj("sub", "subsub", "some")).path, subsubds.path) # the top of a subdataset belongs to the subdataset eq_(ds.get_containing_subdataset(opj("sub", "subsub")).path, subsubds.path) eq_(get_dataset_root(opj(ds.path, "sub", "subsub")), subsubds.path) eq_(ds.get_containing_subdataset(opj("sub", "some")).path, subds.path) eq_(ds.get_containing_subdataset("sub").path, subds.path) eq_(ds.get_containing_subdataset("some").path, ds.path) # make sure the subds is found, even when it is not present, but still # known shutil.rmtree(subds.path) eq_(ds.get_containing_subdataset(opj("sub", "some")).path, subds.path) eq_(ds.get_containing_subdataset("sub").path, subds.path) # # but now GitRepo disagrees... eq_(get_dataset_root(opj(ds.path, "sub")), ds.path) # and this stays, even if we give the mount point directory back os.makedirs(subds.path) eq_(get_dataset_root(opj(ds.path, "sub")), ds.path) outside_path = opj(os.pardir, "somewhere", "else") assert_raises(PathOutsideRepositoryError, ds.get_containing_subdataset, outside_path) assert_raises(PathOutsideRepositoryError, ds.get_containing_subdataset, opj(os.curdir, outside_path)) assert_raises(PathOutsideRepositoryError, ds.get_containing_subdataset, abspath(outside_path))
def test_install_crcns(tdir, ds_path): with chpwd(tdir): with swallow_logs(new_level=logging.INFO) as cml: install("all-nonrecursive", source='///') # since we didn't log decorations such as log level atm while # swallowing so lets check if exit code is returned or not # I will test both assert_not_in('ERROR', cml.out) # below one must not fail alone! ;) assert_not_in('with exit code', cml.out) # should not hang in infinite recursion with chpwd('all-nonrecursive'): get("crcns") ok_(exists(_path_("all-nonrecursive/crcns/.git/config"))) # and we could repeat installation and get the same result ds1 = install(_path_("all-nonrecursive/crcns")) ds2 = Dataset('all-nonrecursive').install('crcns') ok_(ds1.is_installed()) eq_(ds1, ds2) eq_(ds1.path, ds2.path) # to make sure they are a single dataset # again, but into existing dataset: ds = create(ds_path) crcns = ds.install("///crcns") ok_(crcns.is_installed()) eq_(crcns.path, opj(ds_path, "crcns")) assert_in(crcns.path, ds.get_subdatasets(absolute=True))
def test_get_flexible_source_candidates_for_submodule(t, t2): f = _get_flexible_source_candidates_for_submodule # for now without mocking -- let's just really build a dataset from datalad.api import create from datalad.api import install ds = create(t) clone = install(t2, source=t) # first one could just know about itself or explicit url provided sshurl = 'ssh://e.c' httpurl = 'http://e.c' sm_httpurls = [httpurl, httpurl + '/.git'] eq_(f(ds, 'sub'), []) eq_(f(ds, 'sub', sshurl), [sshurl]) eq_(f(ds, 'sub', httpurl), sm_httpurls) eq_(f(ds, 'sub', None), []) # otherwise really we have no clue were to get from # but if we work on dsclone then it should also add urls deduced from its # own location default remote for current branch eq_(f(clone, 'sub'), [t + '/sub']) eq_(f(clone, 'sub', sshurl), [t + '/sub', sshurl]) eq_(f(clone, 'sub', httpurl), [t + '/sub'] + sm_httpurls) eq_(f(clone, 'sub'), [t + '/sub']) # otherwise really we have no clue were to get from # TODO: check that http:// urls for the dataset itself get resolved # TODO: many more!!
def test_uninstall_without_super(path): # a parent dataset with a proper subdataset, and another dataset that # is just placed underneath the parent, but not an actual subdataset parent = Dataset(path).create() sub = parent.create('sub') ok_clean_git(parent.path) nosub = create(opj(parent.path, 'nosub')) ok_clean_git(nosub.path) subreport = parent.subdatasets() assert_result_count(subreport, 1, path=sub.path) assert_result_count(subreport, 0, path=nosub.path) # it should be possible to uninstall the proper subdataset, even without # explicitly calling the uninstall methods of the parent -- things should # be figured out by datalad uninstall(sub.path) assert not sub.is_installed() # no present subdatasets anymore subreport = parent.subdatasets() assert_result_count(subreport, 1) assert_result_count(subreport, 1, path=sub.path, state='absent') assert_result_count(subreport, 0, path=nosub.path) # but we should fail on an attempt to uninstall the non-subdataset res = uninstall(nosub.path, on_failure='ignore') assert_result_count( res, 1, path=nosub.path, status='error', message="will not uninstall top-level dataset (consider `remove` command)")
def test_here(path): # few smoke tests regarding the 'here' sibling ds = create(path) res = ds.siblings( 'query', on_failure='ignore', result_renderer=None) assert_status('ok', res) assert_result_count(res, 1) assert_result_count(res, 1, name='here') here = res[0] eq_(ds.repo.uuid, here['annex-uuid']) assert_in('annex-description', here) assert_in('annex-bare', here) assert_in('available_local_disk_space', here) # set a description res = ds.siblings( 'configure', name='here', description='very special', on_failure='ignore', result_renderer=None) assert_status('ok', res) assert_result_count(res, 1) assert_result_count(res, 1, name='here') here = res[0] eq_('very special', here['annex-description'])
def test_get_flexible_source_candidates_for_submodule(t, t2): f = _get_flexible_source_candidates_for_submodule # for now without mocking -- let's just really build a dataset ds = create(t) clone = install( t2, source=t, result_xfm='datasets', return_type='item-or-list') # first one could just know about itself or explicit url provided sshurl = 'ssh://e.c' httpurl = 'http://e.c' # Expansion with '/.git' no longer done in this helper #sm_httpurls = [httpurl, httpurl + '/.git'] sm_httpurls = [httpurl] eq_(f(ds, 'sub'), []) eq_(f(ds, 'sub', sshurl), [sshurl]) eq_(f(ds, 'sub', httpurl), sm_httpurls) eq_(f(ds, 'sub', None), []) # otherwise really we have no clue were to get from # but if we work on dsclone then it should also add urls deduced from its # own location default remote for current branch eq_(f(clone, 'sub'), [t + '/sub']) eq_(f(clone, 'sub', sshurl), [t + '/sub', sshurl]) eq_(f(clone, 'sub', httpurl), [t + '/sub'] + sm_httpurls) eq_(f(clone, 'sub'), [t + '/sub']) # otherwise really we have no clue were to get from
def test_implicit_install(src, dst): origin_top = create(src) origin_sub = origin_top.create("sub") origin_subsub = origin_sub.create("subsub") with open(opj(origin_top.path, "file1.txt"), "w") as f: f.write("content1") origin_top.add("file1.txt") with open(opj(origin_sub.path, "file2.txt"), "w") as f: f.write("content2") origin_sub.add("file2.txt") with open(opj(origin_subsub.path, "file3.txt"), "w") as f: f.write("content3") origin_subsub.add("file3.txt") origin_top.save(recursive=True) # first, install toplevel: ds = install(dst, source=src) ok_(ds.is_installed()) sub = Dataset(opj(ds.path, "sub")) ok_(not sub.is_installed()) subsub = Dataset(opj(sub.path, "subsub")) ok_(not subsub.is_installed()) # fail on obscure non-existing one assert_raises(IncompleteResultsError, ds.install, source='obscure') # install 3rd level and therefore implicitly the 2nd: result = ds.install(path=opj("sub", "subsub")) ok_(sub.is_installed()) ok_(subsub.is_installed()) # but by default implicit results are not reported eq_(result, subsub) # fail on obscure non-existing one in subds assert_raises(IncompleteResultsError, ds.install, source=opj('sub', 'obscure')) # clean up, the nasty way rmtree(dst, chmod_files=True) ok_(not exists(dst)) # again first toplevel: ds = install(dst, source=src) ok_(ds.is_installed()) sub = Dataset(opj(ds.path, "sub")) ok_(not sub.is_installed()) subsub = Dataset(opj(sub.path, "subsub")) ok_(not subsub.is_installed()) # now implicit but without an explicit dataset to install into # (deriving from CWD): with chpwd(dst): # don't ask for the file content to make return value comparison # simpler result = get(path=opj("sub", "subsub"), get_data=False, result_xfm='datasets') ok_(sub.is_installed()) ok_(subsub.is_installed()) eq_(result, [sub, subsub])
def test_bf2541(path): ds = create(path) subds = ds.create('sub') ok_clean_git(ds.path) os.symlink('sub', op.join(ds.path, 'symlink')) with chpwd(ds.path): res = add('.', recursive=True) ok_clean_git(ds.path)
def test_failed_clone(dspath): ds = create(dspath) res = ds.clone("http://nonexistingreallyanything.datalad.org/bla", "sub", on_failure='ignore') assert_status('error', res) assert_message('Failed to clone from any candidate source URL. ' 'Encountered errors per each url were: %s', res)
def test_bf2541(path): ds = create(path) subds = ds.create('sub') assert_repo_status(ds.path) os.symlink('sub', op.join(ds.path, 'symlink')) with chpwd(ds.path): res = save(recursive=True) assert_repo_status(ds.path)
def test_remove_nowhining(path): # when removing a dataset under a dataset (but not a subdataset) # should not provide a meaningless message that something was not right ds = create(path) # just install/clone inside of it subds_path = _path_(path, 'subds') install(subds_path, source=path) remove(subds_path) # should remove just fine
def test_invalid_args(path): assert_raises(IncompleteResultsError, install, 'Zoidberg', source='Zoidberg') # install to an invalid URL assert_raises(ValueError, install, 'ssh://mars:Zoidberg', source='Zoidberg') # install to a remote location assert_raises(ValueError, install, 'ssh://mars/Zoidberg', source='Zoidberg') # make fake dataset ds = create(path) assert_raises(IncompleteResultsError, install, '/higherup.', 'Zoidberg', dataset=ds)
def get_baseline(p): ds = Dataset(p).create() with chpwd(ds.path): subds = create('sub') ds.add('sub', save=False) create_tree(subds.path, {"staged": ""}) subds.add("staged", save=False) # subdataset has staged changes. ok_clean_git(ds.path, index_modified=['sub']) return ds
def test_install_consistent_state(src, dest, dest2, dest3): # if we install a dataset, where sub-dataset "went ahead" in that branch, # while super-dataset was not yet updated (e.g. we installed super before) # then it is desired to get that default installed branch to get to the # position where previous location was pointing to. # It is indeed a mere heuristic which might not hold the assumption in some # cases, but it would work for most simple and thus mostly used ones ds1 = create(src) sub1 = ds1.create('sub1') def check_consistent_installation(ds): datasets = [ds] + list( map( Dataset, ds.subdatasets( recursive=True, fulfilled=True, result_xfm='paths'))) assert len(datasets) == 2 # in this test for ds in datasets: # all of them should be in master branch eq_(ds.repo.get_active_branch(), "master") # all of them should be clean, so sub should be installed in a "version" # as pointed by the super ok_(not ds.repo.dirty) dest_ds = install(dest, source=src) # now we progress sub1 by adding sub2 subsub2 = sub1.create('sub2') # and progress subsub2 forward to stay really thorough put_file_under_git(subsub2.path, 'file.dat', content="data") subsub2.save("added a file") # above function does not commit # just installing a submodule -- apparently different code/logic # but also the same story should hold - we should install the version pointed # by the super, and stay all clean dest_sub1 = dest_ds.install('sub1') check_consistent_installation(dest_ds) # So now we have source super-dataset "dirty" with sub1 progressed forward # Our install should try to "retain" consistency of the installation # whenever possible. # install entire hierarchy without specifying dataset # no filter, we want full report dest2_ds = install(dest2, source=src, recursive=True, result_filter=None) check_consistent_installation(dest2_ds[0]) # [1] is the subdataset # install entire hierarchy by first installing top level ds # and then specifying sub-dataset dest3_ds = install(dest3, source=src, recursive=False) # and then install both submodules recursively while pointing # to it based on dest3_ds dest3_ds.install('sub1', recursive=True) check_consistent_installation(dest3_ds)
def test_clone_crcns(tdir, ds_path): with chpwd(tdir): res = clone('///', path="all-nonrecursive", on_failure='ignore') assert_status('ok', res) # again, but into existing dataset: ds = create(ds_path) crcns = ds.clone("///crcns", result_xfm='datasets', return_type='item-or-list') ok_(crcns.is_installed()) eq_(crcns.path, opj(ds_path, "crcns")) assert_in(crcns.path, ds.subdatasets(result_xfm='paths'))
def test_bf1886(path): parent = Dataset(path).create() sub = parent.create('sub') ok_clean_git(parent.path) # create a symlink pointing down to the subdataset, and add it os.symlink('sub', opj(parent.path, 'down')) parent.add('down') ok_clean_git(parent.path) # now symlink pointing up os.makedirs(opj(parent.path, 'subdir', 'subsubdir')) os.symlink(opj(pardir, 'sub'), opj(parent.path, 'subdir', 'up')) parent.add(opj('subdir', 'up')) ok_clean_git(parent.path) # now symlink pointing 2xup, as in #1886 os.symlink(opj(pardir, pardir, 'sub'), opj(parent.path, 'subdir', 'subsubdir', 'upup')) parent.add(opj('subdir', 'subsubdir', 'upup')) ok_clean_git(parent.path) # simulatenously add a subds and a symlink pointing to it # create subds, but don't register it sub2 = create(opj(parent.path, 'sub2')) os.symlink( opj(pardir, pardir, 'sub2'), opj(parent.path, 'subdir', 'subsubdir', 'upup2')) parent.add(['sub2', opj('subdir', 'subsubdir', 'upup2')]) ok_clean_git(parent.path) # full replication of #1886: the above but be in subdir of symlink # with no reference dataset sub3 = create(opj(parent.path, 'sub3')) os.symlink( opj(pardir, pardir, 'sub3'), opj(parent.path, 'subdir', 'subsubdir', 'upup3')) # need to use absolute paths with chpwd(opj(parent.path, 'subdir', 'subsubdir')): rev_save([opj(parent.path, 'sub3'), opj(parent.path, 'subdir', 'subsubdir', 'upup3')]) # in contrast to `add` only operates on a single top-level dataset # although it is not specified, it get's discovered based on the PWD # the logic behind that feels a bit shaky # consult discussion in https://github.com/datalad/datalad/issues/3230 # if this comes up as an issue at some point ok_clean_git(parent.path)
def test_add_subdataset(path, other): subds = create(op.join(path, 'dir'), force=True) ds = create(path, force=True) ok_(subds.repo.dirty) ok_(ds.repo.dirty) assert_not_in('dir', ds.subdatasets(result_xfm='relpaths')) # "add everything in subds to subds" save(dataset=subds.path) assert_repo_status(subds.path) assert_not_in('dir', ds.subdatasets(result_xfm='relpaths')) # but with a base directory we add the dataset subds as a subdataset # to ds res = ds.save(subds.path) assert_in_results(res, action="add", path=subds.path, refds=ds.path) res = ds.subdatasets() assert_result_count(res, 1) assert_result_count( res, 1, # essentials path=op.join(ds.path, 'dir'), gitmodule_url='./dir', gitmodule_name='dir', # but also the branch, by default gitmodule_branch='master', ) # create another one other = create(other) # install into superdataset, but don't add other_clone = install(source=other.path, path=op.join(ds.path, 'other')) # little dance to get the revolution-type dataset other_clone = Dataset(other_clone.path) ok_(other_clone.is_installed) assert_not_in('other', ds.subdatasets(result_xfm='relpaths')) # now add, it should pick up the source URL ds.save('other') # and that is why, we can reobtain it from origin ds.uninstall('other') ok_(not other_clone.is_installed()) ds.get('other') ok_(other_clone.is_installed())
def test_failed_install_multiple(top_path): ds = create(top_path) create(_path_(top_path, 'ds1')) create(_path_(top_path, 'ds3')) ok_clean_git(ds.path, annex=None, untracked=['ds1/', 'ds3/']) # specify install with multiple paths and one non-existing with assert_raises(IncompleteResultsError) as cme: ds.install(['ds1', 'ds2', '///crcns', '///nonexisting', 'ds3'], on_failure='continue') # install doesn't add existing submodules -- add does that ok_clean_git(ds.path, annex=None, untracked=['ds1/', 'ds3/']) ds.add(['ds1', 'ds3']) ok_clean_git(ds.path, annex=None) # those which succeeded should be saved now eq_(ds.subdatasets(result_xfm='relpaths'), ['crcns', 'ds1', 'ds3']) # and those which didn't -- listed eq_(set(r.get('source_url', r['path']) for r in cme.exception.failed), {'///nonexisting', _path_(top_path, 'ds2')})
def test_crawl_autoaddtext(ind, topurl, outd): ds = create(outd, text_no_annex=True) with chpwd(outd): # TODO -- dataset argument crawl_init( {'url': topurl, 'a_href_match_': '.*'} , save=True , template='simple_with_archives') crawl() ok_clean_git(outd) ok_file_under_git(outd, "anothertext", annexed=False) ok_file_under_git(outd, "d/textfile", annexed=False) ok_file_under_git(outd, "d/tooshort", annexed=True)
def test_create_withcfg(path): ds = create( dataset=path, cfg_proc=['yoda']) assert_repo_status(path) assert (ds.pathobj / 'README.md').exists() # If we are creating a dataset within a reference dataset, we save _after_ # the procedure runs. ds.create('subds', cfg_proc=['yoda']) assert_repo_status(path) assert (ds.pathobj / 'subds' / 'README.md').exists()
def test_backup_archive(path, objtree, archivremote): """Similar to test_archive_layout(), but not focused on compatibility with the directory-type special remote. Instead, it tests build a second RIA remote from an existing one, e.g. for backup purposes. """ ds = create(path) setup_archive_remote(ds.repo, objtree) populate_dataset(ds) ds.save() assert_repo_status(ds.path) # copy files into the RIA archive ds.repo.copy_to('.', 'archive') targetpath = Path(archivremote) / ds.id[:3] / ds.id[3:] / 'archives' targetpath.mkdir(parents=True) subprocess.run( ['7z', 'u', str(targetpath / 'archive.7z'), '.'], cwd=str(Path(objtree) / ds.id[:3] / ds.id[3:] / 'annex' / 'objects'), ) initexternalremote(ds.repo, '7z', 'ria', config={'base-path': archivremote}) # wipe out the initial RIA remote (just for testing if the upcoming # one can fully take over) shutil.rmtree(objtree) # fsck to make git-annex aware of the loss assert_status('error', [ annexjson2result(r, ds) for r in ds.repo.fsck(remote='archive', fast=True) ]) # now only available "here" eq_(len(ds.repo.whereis('one.txt')), 1) # make the backup archive known initexternalremote(ds.repo, 'backup', 'ria', config={'base-path': archivremote}) # now fsck the new remote to get the new special remote indexed assert_status('ok', [ annexjson2result(r, ds) for r in ds.repo.fsck(remote='backup', fast=True) ]) eq_(len(ds.repo.whereis('one.txt')), 2) # now we can drop all content locally, reobtain it, and survive an # fsck ds.drop('.') ds.get('.') assert_status('ok', [annexjson2result(r, ds) for r in ds.repo.fsck()])
def test_gitannex_remoteio_url(path, objtree): ds = create(path) initexternalremote( ds.repo, 'ria-remote', 'ria', config={'url': "ria+ssh://datalad-test:{}".format(objtree)}) ds.repo._run_annex_command( 'testremote', annex_options=['ria-remote'], log_stdout=False, )
def test_bids(dspath, records): # make fake UKB datarecord downloads make_datarecord_zips('12345', records) # init dataset ds = create(dspath) ds.ukb_init('12345', ['20227_2_0', '25747_2_0', '25748_2_0', '25748_3_0'], bids=True) # dummy key file, no needed to bypass tests ds.config.add('datalad.ukbiobank.keyfile', 'dummy', where='local') bin_dir = make_ukbfetch(ds, records) # put fake ukbfetch in the path and run with patch.dict( 'os.environ', {'PATH': '{}:{}'.format(str(bin_dir), os.environ['PATH'])}): ds.ukb_update(merge=True) bids_files = ds.repo.get_files('incoming-bids') master_files = ds.repo.get_files() for i in [ 'ses-2/func/sub-12345_ses-2_task-rest_bold.nii.gz', 'ses-2/non-bids/fMRI/sub-12345_ses-2_task-hariri_eprime.txt', 'ses-3/non-bids/fMRI/sub-12345_ses-3_task-hariri_eprime.txt' ]: assert_in(i, bids_files) assert_in(i, master_files) # run again, nothing bad happens with patch.dict( 'os.environ', {'PATH': '{}:{}'.format(str(bin_dir), os.environ['PATH'])}): ds.ukb_update(merge=True, force=True) bids_files = ds.repo.get_files('incoming-bids') master_files = ds.repo.get_files() for i in [ 'ses-2/func/sub-12345_ses-2_task-rest_bold.nii.gz', 'ses-2/non-bids/fMRI/sub-12345_ses-2_task-hariri_eprime.txt', 'ses-3/non-bids/fMRI/sub-12345_ses-3_task-hariri_eprime.txt' ]: assert_in(i, bids_files) assert_in(i, master_files) # now re-init with a different record subset and rerun ds.ukb_init('12345', ['25747_2_0', '25748_2_0', '25748_3_0'], bids=True, force=True) with patch.dict( 'os.environ', {'PATH': '{}:{}'.format(str(bin_dir), os.environ['PATH'])}): ds.ukb_update(merge=True, force=True)
def _test_drop(path, drop_immediately): s3url = 's3://datalad-test0-nonversioned' providers = get_test_providers(s3url) # to verify having s3 credentials # vcr tape is getting bound to the session object, so we need to # force re-establishing the session for the bucket. # TODO (in datalad): make a dedicated API for that, now too obscure _ = providers.get_status(s3url, allow_old_session=False) create(path) # unfortunately this doesn't work without force dropping since I guess vcr # stops and then gets queried again for the same tape while testing for # drop :-/ with chpwd(path): crawl_init( template="simple_s3", args=dict( bucket="datalad-test0-nonversioned", drop=True, drop_force=True, # so test goes faster drop_immediately=drop_immediately, ), save=True) if drop_immediately: # cannot figure out but taping that interaction results in # git annex addurl error. No time to figure it out # so we just crawl without vcr for now. TODO: figure out WTF with chpwd(path): crawl() else: with externals_use_cassette( 'test_simple_s3_test0_nonversioned_crawl_ext' + ('_immediately' if drop_immediately else '')), \ chpwd(path): crawl() # test that all was dropped repo = AnnexRepo(path, create=False) files = glob(_path_(path, '*')) eq_(len(files), 8) for f in files: assert_false(repo.file_has_content(f))
def test_bf1886(path): parent = Dataset(path).create() parent.create('sub') assert_repo_status(parent.path) # create a symlink pointing down to the subdataset, and add it os.symlink('sub', op.join(parent.path, 'down')) parent.save('down') assert_repo_status(parent.path) # now symlink pointing up os.makedirs(op.join(parent.path, 'subdir', 'subsubdir')) os.symlink(op.join(op.pardir, 'sub'), op.join(parent.path, 'subdir', 'up')) parent.save(op.join('subdir', 'up')) # 'all' to avoid the empty dir being listed assert_repo_status(parent.path, untracked_mode='all') # now symlink pointing 2xup, as in #1886 os.symlink( op.join(op.pardir, op.pardir, 'sub'), op.join(parent.path, 'subdir', 'subsubdir', 'upup')) parent.save(op.join('subdir', 'subsubdir', 'upup')) assert_repo_status(parent.path) # simulatenously add a subds and a symlink pointing to it # create subds, but don't register it create(op.join(parent.path, 'sub2')) os.symlink( op.join(op.pardir, op.pardir, 'sub2'), op.join(parent.path, 'subdir', 'subsubdir', 'upup2')) parent.save(['sub2', op.join('subdir', 'subsubdir', 'upup2')]) assert_repo_status(parent.path) # full replication of #1886: the above but be in subdir of symlink # with no reference dataset create(op.join(parent.path, 'sub3')) os.symlink( op.join(op.pardir, op.pardir, 'sub3'), op.join(parent.path, 'subdir', 'subsubdir', 'upup3')) # need to use absolute paths with chpwd(op.join(parent.path, 'subdir', 'subsubdir')): save([op.join(parent.path, 'sub3'), op.join(parent.path, 'subdir', 'subsubdir', 'upup3')]) assert_repo_status(parent.path)
def client(tmpdir): ds = create(tmpdir.strpath) res = webapp( #'example_metadata', dataset=ds.path, mode='dry-run', return_type='item-or-list', ) app = res['app'] client = app.test_client() yield client
def test_arg_missing(path, path2): # test fix for gh-3553 ds = create(path) assert_raises( InsufficientArgumentsError, ds.siblings, 'add', url=path2, ) assert_status( 'ok', ds.siblings( 'add', url=path2, name='somename'))
def test_gitannex_remoteio(path, objtree): ds = create(path) initexternalremote( ds.repo, 'ria-remote', 'ria', config={ 'base-path': objtree, 'ssh-host': 'datalad-test', }) ds.repo._run_annex_command( 'testremote', annex_options=['ria-remote'], log_stdout=False, )
def test_create_fake_dates(path): ds = create(path, fake_dates=True) ok_(ds.config.getbool("datalad", "fake-dates")) ok_(ds.repo.fake_dates_enabled) # Another instance detects the fake date configuration. ok_(Dataset(path).repo.fake_dates_enabled) first_commit = ds.repo.get_revisions(options=["--reverse", "--all"])[0] eq_(ds.config.obtain("datalad.fake-dates-start") + 1, int(ds.repo.format_commit("%ct", first_commit)))
def test_install_subds_with_space(opath, tpath): ds = create(opath) ds.create('sub ds') # works even now, boring # install(tpath, source=opath, recursive=True) if on_windows: # on windows we cannot simply prepend localhost: to a path # and get a working sshurl... install(tpath, source=opath, recursive=True) else: # do via ssh! install(tpath, source="localhost:" + opath, recursive=True) assert Dataset(opj(tpath, 'sub ds')).is_installed()
def test_dataset_local_mode(path=None): ds = create(path) # any sensible (and also our CI) test environment(s) should have this assert_in('user.name', ds.config) # from .datalad/config assert_in('datalad.dataset.id', ds.config) # from .git/config assert_in('annex.version', ds.config) # now check that dataset-local mode doesn't have the global piece cfg = ConfigManager(ds, source='branch-local') assert_not_in('user.name', cfg) assert_in('datalad.dataset.id', cfg) assert_in('annex.version', cfg)
def test_cfg_override(path): with chpwd(path): # control out, err = Runner()('datalad wtf -s some', shell=True) assert_not_in('datalad.dummy: this', out) # ensure that this is not a dataset's cfg manager assert_not_in('datalad.dataset.id', out) # env var if on_windows: cmd_str = 'set DATALAD_DUMMY=this&& datalad wtf -s some' else: cmd_str = 'DATALAD_DUMMY=this datalad wtf -s some' out, err = Runner()(cmd_str, shell=True) assert_in('datalad.dummy: this', out) # cmdline arg out, err = Runner()('datalad -c datalad.dummy=this wtf -s some', shell=True) assert_in('datalad.dummy: this', out) # now create a dataset in the path. the wtf plugin will switch to # using the dataset's config manager, which must inherit the overrides create(dataset=path) # control out, err = Runner()('datalad wtf -s some', shell=True) assert_not_in('datalad.dummy: this', out) # ensure that this is a dataset's cfg manager assert_in('datalad.dataset.id', out) # env var if on_windows: cmd_str = 'set DATALAD_DUMMY=this&& datalad wtf -s some' else: cmd_str = 'DATALAD_DUMMY=this datalad wtf -s some' out, err = Runner()(cmd_str, shell=True) assert_in('datalad.dummy: this', out) # cmdline arg out, err = Runner()('datalad -c datalad.dummy=this wtf -s some', shell=True) assert_in('datalad.dummy: this', out)
def test_save_partial_commit_shrinking_annex(path): # This is a variation on the test above. The main difference is that there # are other staged changes in addition to the unlocked filed. ds = create(path, force=True) ds.save() assert_repo_status(ds.path) ds.unlock(path="foo") create_tree(ds.path, tree={"foo": "a", "staged": ""}, remove_existing=True) # Even without this staged change, a plain 'git commit -- foo' would fail # with git-annex's partial index error, but save (or more specifically # GitRepo.save_) drops the pathspec if there are no staged changes. ds.repo.add("staged", git=True) ds.save(path="foo") assert_repo_status(ds.path, added=["staged"])
def test_create_fake_dates(path): ds = create(path, fake_dates=True) ok_(ds.config.getbool("datalad", "fake-dates")) ok_(ds.repo.fake_dates_enabled) # Another instance detects the fake date configuration. ok_(Dataset(path).repo.fake_dates_enabled) first_commit = ds.repo.repo.commit( ds.repo.repo.git.rev_list("--reverse", "--all").split()[0]) eq_(ds.config.obtain("datalad.fake-dates-start") + 1, first_commit.committed_date)
def test_clone_isnt_a_smartass(origin_path, path): origin = create(origin_path) cloned = clone(origin, path, result_xfm='datasets', return_type='item-or-list') with chpwd(path): # no were are inside a dataset clone, and we make another one # we do not want automatic subdatasetification without given a dataset # explicitely clonedsub = clone(origin, 'testsub', result_xfm='datasets', return_type='item-or-list') # correct destination assert clonedsub.path.startswith(path) # no subdataset relation eq_(cloned.subdatasets(), [])
def test_drop(path): get_test_providers('s3://datalad-test0-nonversioned') # to verify having s3 credentials create(path) # unfortunately this doesn't work without force dropping since I guess vcr # stops and then gets queried again for the same tape while testing for # drop :-/ with externals_use_cassette('test_simple_s3_test0_nonversioned_crawl_ext'), \ chpwd(path): crawl_init(template="simple_s3", args=dict( bucket="datalad-test0-nonversioned", drop=True, drop_force=True # so test goes faster ), save=True ) crawl() # test that all was dropped repo = AnnexRepo(path, create=False) files = glob(_path_(path, '*')) eq_(len(files), 8) for f in files: assert_false(repo.file_has_content(f))
def test_dataset_systemglobal_mode(path=None): ds = create(path) # any sensible (and also our CI) test environment(s) should have this assert_in('user.name', ds.config) # from .datalad/config assert_in('datalad.dataset.id', ds.config) # from .git/config assert_in('annex.version', ds.config) with chpwd(path): # now check that no config from a random dataset at PWD is picked up # if not dataset instance was provided cfg = ConfigManager(dataset=None, source='any') assert_in('user.name', cfg) assert_not_in('datalad.dataset.id', cfg) assert_not_in('annex.version', cfg)
def test_invalid_args(path): assert_raises(ValueError, install, 'Zoidberg', source='Zoidberg') # install to an invalid URL assert_raises(ValueError, install, 'ssh://mars:Zoidberg', source='Zoidberg') # install to a remote location assert_raises(ValueError, install, 'ssh://mars/Zoidberg', source='Zoidberg') # make fake dataset ds = create(path) assert_raises(ValueError, install, '/higherup.', 'Zoidberg', dataset=ds)
def test_failed_install_multiple(top_path): ds = create(top_path) create(_path_(top_path, 'ds1')) create(_path_(top_path, 'ds3')) ok_clean_git(ds.path, annex=False, untracked=['ds1/', 'ds3/']) # specify install with multiple paths and one non-existing with assert_raises(IncompleteResultsError) as cme: ds.install(['ds1', 'ds2', '///crcns', '///nonexisting', 'ds3']) ok_clean_git(ds.path, annex=False) # those which succeeded should be saved now eq_(ds.get_subdatasets(), ['crcns', 'ds1', 'ds3']) # and those which didn't -- listed eq_(set(cme.exception.failed), {'///nonexisting', _path_(top_path, 'ds2')}) # but if there was only a single installation requested -- it will be # InstallFailedError to stay consistent with single install behavior # TODO: unify at some point with assert_raises(InstallFailedError) as cme: ds.install('ds2') with assert_raises(InstallFailedError) as cme: ds.install('///nonexisting')
def test_crawl(tempd): if not _get_github_cred().is_known: raise SkipTest("no github credential") ds = create(tempd) with chpwd(tempd): crawl_init(template='gh', save=True, args={ 'org': 'datalad-collection-1', 'include': 'kaggle' }) crawl() subdss = ds.subdatasets(fulfilled=True, result_xfm='datasets') assert all('kaggle' in d.path for d in subdss) assert_greater(len(subdss), 1) assert_false(ds.repo.dirty)
def test_bf3733(path=None): ds = create(path) # call siblings configure for an unknown sibling without a URL # doesn't work, but also doesn't crash assert_result_count( ds.siblings('configure', name='imaginary', publish_depends='doesntmatter', url=None, on_failure='ignore'), 1, status='error', action="configure-sibling", name="imaginary", path=ds.path, )
def test_no_annex(path): ds = create(path) ok_clean_git(ds.path) create_tree(ds.path, {'code': { 'inannex': 'content', 'notinannex': 'othercontent' }}) # add two files, pre and post configuration ds.add(opj('code', 'inannex')) plugin(['no_annex', 'pattern=code/**'], dataset=ds) ds.add(opj('code', 'notinannex')) ok_clean_git(ds.path) # one is annex'ed, the other is not, despite no change in add call # importantly, also .gitattribute is not annexed eq_([opj('code', 'inannex')], ds.repo.get_annexed_files())
def test_remove_subds(path): ds = create(path) ds.create('sub') ds.create(op.join('sub', 'subsub')) assert_repo_status(ds.path) assert_result_count(ds.subdatasets(), 1, path=op.join(ds.path, 'sub')) # all good at this point, subdataset known, dataset clean # now have some external force wipe out the subdatasets rmtree(op.join(ds.path, 'sub')) assert_result_count(ds.status(), 1, path=op.join(ds.path, 'sub'), state='deleted') # a single call to save() must fix up the mess assert_status('ok', ds.save()) assert_repo_status(ds.path)