def test_install_skip_failed_recursive(src=None, path=None): _mk_submodule_annex(src, fname="test-annex.dat", fcontent="whatever") # install top level: ds = install(path, source=src) sub1 = Dataset(opj(path, 'subm 1')) sub2 = Dataset(opj(path, '2')) # sabotage recursive installation of 'subm 1' by polluting the target: with open(opj(path, 'subm 1', 'blocking.txt'), "w") as f: f.write("sdfdsf") with swallow_logs(new_level=logging.WARNING) as cml: result = ds.get(os.curdir, recursive=True, on_failure='ignore', result_xfm=None) # toplevel dataset was in the house already assert_result_count(result, 0, path=ds.path, type='dataset') # subm 1 should fail to install. [1] since comes after '2' submodule assert_in_results( result, status='error', path=sub1.path, type='dataset', message='target path already exists and not empty, refuse to ' 'clone into target path') assert_in_results(result, status='ok', path=sub2.path)
def test_install_known_subdataset(src=None, path=None): _mk_submodule_annex(src, fname="test-annex.dat", fcontent="whatever") # get the superdataset: ds = install(path, source=src) # subdataset not installed: subds = Dataset(opj(path, 'subm 1')) assert_false(subds.is_installed()) assert_in('subm 1', ds.subdatasets(state='absent', result_xfm='relpaths')) assert_not_in('subm 1', ds.subdatasets(state='present', result_xfm='relpaths')) # install it: ds.install('subm 1') ok_(subds.is_installed()) ok_(AnnexRepo.is_valid_repo(subds.path, allow_noninitialized=False)) # Verify that it is the correct submodule installed and not # new repository initiated assert_in("test-annex.dat", subds.repo.get_indexed_files()), assert_not_in('subm 1', ds.subdatasets(state='absent', result_xfm='relpaths')) assert_in('subm 1', ds.subdatasets(state='present', result_xfm='relpaths')) # now, get the data by reinstalling with -g: ok_(subds.repo.file_has_content('test-annex.dat') is False) with chpwd(ds.path): result = get(path='subm 1', dataset=os.curdir) assert_in_results(result, path=opj(subds.path, 'test-annex.dat')) ok_(subds.repo.file_has_content('test-annex.dat') is True) ok_(subds.is_installed())
def test_install_list(path=None, top_path=None): _mk_submodule_annex(path, fname="test-annex.dat", fcontent="whatever") # we want to be able to install several things, if these are known # (no 'source' allowed). Therefore first toplevel: ds = install(top_path, source=path, recursive=False) assert_not_in('annex.hardlink', ds.config) ok_(ds.is_installed()) sub1 = Dataset(opj(top_path, 'subm 1')) sub2 = Dataset(opj(top_path, '2')) ok_(not sub1.is_installed()) ok_(not sub2.is_installed()) # fails, when `source` is passed: assert_raises(ValueError, ds.install, path=['subm 1', '2'], source='something') # now should work: result = ds.install(path=['subm 1', '2'], result_xfm='paths') ok_(sub1.is_installed()) ok_(sub2.is_installed()) eq_(set(result), {sub1.path, sub2.path}) # and if we request it again via get, result should be empty get_result = ds.get(path=['subm 1', '2'], get_data=False) assert_status('notneeded', get_result)
def test_get_install_missing_subdataset(src=None, path=None): _mk_submodule_annex(src, 'test-annex.dat', 'irrelevant') ds = install(path=path, source=src, result_xfm='datasets', return_type='item-or-list') ds.create(force=True) # force, to cause dataset initialization subs = ds.subdatasets(result_xfm='datasets') ok_(all([not sub.is_installed() for sub in subs])) # we don't install anything, if no explicitly given path points into a # not yet installed subdataset: ds.get(curdir) ok_(all([not sub.is_installed() for sub in subs])) # but we do, whenever a given path is contained in such a subdataset: file_ = opj(subs[0].path, 'test-annex.dat') ds.get(file_) ok_(subs[0].is_installed()) ok_(subs[0].repo.file_has_content('test-annex.dat') is True) # but we fulfill any handles, and dataset handles too ds.get(curdir, recursive=True) ok_(all([sub.is_installed() for sub in subs]))
def test_get_greedy_recurse_subdatasets(src=None, path=None): _mk_submodule_annex(src, 'test-annex.dat', 'irrelevant') ds = install(path, source=src, result_xfm='datasets', return_type='item-or-list') # GIMME EVERYTHING ds.get(['subm 1', '2']) # We got all content in the subdatasets subds1, subds2 = ds.subdatasets(result_xfm='datasets') ok_(ds.repo.file_has_content('test-annex.dat') is False) ok_(subds1.repo.file_has_content('test-annex.dat') is True) ok_(subds2.repo.file_has_content('test-annex.dat') is True)
def test_install_skip_list_arguments(src=None, path=None, path_outside=None): _mk_submodule_annex(src, fname="test-annex.dat", fcontent="whatever") ds = install(path, source=src) ok_(ds.is_installed()) # install a list with valid and invalid items: result = ds.install(path=['subm 1', 'not_existing', path_outside, '2'], get_data=False, on_failure='ignore', result_xfm=None, return_type='list') # good and bad results together ok_(isinstance(result, list)) eq_(len(result), 4) # check that we have an 'impossible/error' status for both invalid args # but all the other tasks have been accomplished assert_result_count(result, 1, status='impossible', message="path does not exist", path=opj(ds.path, 'not_existing')) assert_result_count(result, 1, status='error', message=("path not associated with dataset %s", ds), path=path_outside) for sub in [Dataset(opj(path, 'subm 1')), Dataset(opj(path, '2'))]: assert_result_count(result, 1, status='ok', message=('Installed subdataset in order to get %s', sub.path)) ok_(sub.is_installed()) # return of get is always a list, by default, even if just one thing was gotten # in this case 'subm1' was already obtained above, so this will get this # content of the subdataset with assert_raises(IncompleteResultsError) as cme: ds.install(path=['subm 1', 'not_existing']) with assert_raises(IncompleteResultsError) as cme: ds.get(path=['subm 1', 'not_existing'])
def test_reckless(path=None, top_path=None): _mk_submodule_annex(path, fname="test-annex.dat", fcontent="whatever") ds = install(top_path, source=path, reckless=True) eq_(ds.config.get('annex.hardlink', None), 'true') eq_(ds.repo.repo_info()['untrusted repositories'][0]['here'], True)
def check_target_ssh_since(use_ssh, origin, src_path, target_path): _mk_submodule_annex(origin, 'test-annex.dat', 'whatever') if use_ssh: sshurl = "ssh://datalad-test" + target_path else: sshurl = target_path # prepare src source = install(src_path, source=origin, recursive=True) eq_(len(source.subdatasets()), 2) # get a new subdataset and make sure it is committed in the super source.create('brandnew') eq_(len(source.subdatasets()), 3) assert_repo_status(source.path) # and now we create a sibling for the new subdataset only assert_create_sshwebserver(name='dominique_carrera', dataset=source, sshurl=sshurl, recursive=True, since='HEAD~1') # there is one thing in the target directory only, and that is the # remote repo of the newly added subdataset target = Dataset(target_path) ok_(not target.is_installed()) # since we didn't create it due to since eq_(['brandnew'], os.listdir(target_path)) # now test functionality if we add a subdataset with a subdataset brandnew2 = source.create('brandnew2') brandnewsub = brandnew2.create('sub') brandnewsubsub = brandnewsub.create('sub') # and now we create a sibling for the new subdataset only assert_create_sshwebserver(name='dominique_carrera', dataset=source, sshurl=sshurl, recursive=True, existing='skip') # verify that it created the immediate subdataset ok_(Dataset(_path_(target_path, 'brandnew2')).is_installed()) # but not the subs since they were not saved, thus even push would not operate # on them yet, so no reason for us to create them until subdatasets are saved ok_(not Dataset(_path_(target_path, 'brandnew2/sub')).is_installed()) source.save(recursive=True) # and if repeated now -- will create those sub/sub assert_create_sshwebserver(name='dominique_carrera', dataset=source, sshurl=sshurl, recursive=True, existing='skip') # verify that it created the immediate subdataset ok_(Dataset(_path_(target_path, 'brandnew2/sub')).is_installed()) ok_(Dataset(_path_(target_path, 'brandnew2/sub/sub')).is_installed()) # now we will try with --since while creating even deeper nested one, and ensuring # it is created -- see https://github.com/datalad/datalad/issues/6596 brandnewsubsub.create('sub') source.save(recursive=True) # and now we create a sibling for the new subdataset only assert_create_sshwebserver(name='dominique_carrera', dataset=source, sshurl=sshurl, recursive=True, existing='skip', since=f'{DEFAULT_REMOTE}/{DEFAULT_BRANCH}') # verify that it created the sub and sub/sub ok_(Dataset(_path_(target_path, 'brandnew2/sub/sub/sub')).is_installed()) # we installed without web ui - no hooks should be created/enabled assert_postupdate_hooks(_path_(target_path, 'brandnew'), installed=False)
def check_target_ssh_recursive(use_ssh, origin, src_path, target_path): _mk_submodule_annex(origin, 'test-annex.dat', 'whatever') # prepare src source = install(src_path, source=origin, recursive=True) sub1 = Dataset(opj(src_path, "subm 1")) sub2 = Dataset(opj(src_path, "2")) for flat in False, True: target_path_ = target_dir_tpl = target_path + "-" + str(flat) if flat: target_dir_tpl += "/prefix%RELNAME" sep = '-' else: sep = os.path.sep if use_ssh: sshurl = "ssh://datalad-test" + target_path_ else: sshurl = target_path_ remote_name = 'remote-' + str(flat) with chpwd(source.path): assert_create_sshwebserver(name=remote_name, sshurl=sshurl, target_dir=target_dir_tpl, recursive=True, ui=have_webui()) # raise if git repos were not created for suffix in [sep + 'subm 1', sep + '2', '']: target_dir = opj(target_path_, 'prefix' if flat else "").rstrip( os.path.sep) + suffix # raise if git repos were not created GitRepo(target_dir, create=False) if have_webui(): from datalad_deprecated.tests.test_create_sibling_webui import ( assert_publish_with_ui, ) assert_publish_with_ui(target_dir, rootds=not suffix, flat=flat) for repo in [source.repo, sub1.repo, sub2.repo]: assert_not_in("local_target", repo.get_remotes()) # now, push should work: push(dataset=source, to=remote_name) # verify that we can create-sibling which was created later and possibly # first published in super-dataset as an empty directory sub3_name = 'subm 3-%s' % flat sub3 = source.create(sub3_name) # since is an empty value to force it to consider all changes since we published # already with chpwd(source.path): # as we discussed in gh-1495 we use the last-published state of the base # dataset as the indicator for modification detection with since='^' # hence we must not publish the base dataset on its own without recursion, # if we want to have this mechanism do its job #push(to=remote_name) # no recursion out1 = assert_create_sshwebserver(name=remote_name, sshurl=sshurl, target_dir=target_dir_tpl, recursive=True, existing='skip', ui=have_webui(), since='^') assert_postupdate_hooks(target_path_, installed=have_webui(), flat=flat) assert_result_count(out1, 1, status='ok', sibling_name=remote_name) # ensure that nothing is created since since is used. # Also cover deprecation for since='' support. Takes just 60ms or so. # TODO: change or remove when removing since='' deprecation support out2 = assert_create_sshwebserver(name=remote_name, sshurl=sshurl, target_dir=target_dir_tpl, recursive=True, existing='skip', ui=have_webui(), since='') assert_result_count(out2, 1, status='notneeded', sibling_name=remote_name) # so it was created on remote correctly and wasn't just skipped assert (Dataset( _path_(target_path_, ('prefix-' if flat else '') + sub3_name)).is_installed()) push(dataset=source, to=remote_name, recursive=True, since='^') # just a smoke test
def test_get_recurse_subdatasets(src=None, path=None): _mk_submodule_annex(src, 'test-annex.dat', 'irrelevant') ds = clone(src, path, result_xfm='datasets', return_type='item-or-list') # ask for the two subdatasets specifically. This will obtain them, # but not any content of any files in them subds1, subds2 = ds.get(['subm 1', '2'], get_data=False, description="youcouldnotmakethisup", result_xfm='datasets') for d in (subds1, subds2): eq_(d.repo.get_description(), 'youcouldnotmakethisup') # there are 3 files to get: test-annex.dat within each dataset: rel_path_sub1 = opj(basename(subds1.path), 'test-annex.dat') rel_path_sub2 = opj(basename(subds2.path), 'test-annex.dat') annexed_files = {'test-annex.dat', rel_path_sub1, rel_path_sub2} # None of them is currently present: ok_(ds.repo.file_has_content('test-annex.dat') is False) ok_(subds1.repo.file_has_content('test-annex.dat') is False) ok_(subds2.repo.file_has_content('test-annex.dat') is False) assert_repo_status(subds1.path) # explicitly given path in subdataset => implicit recursion: # MIH: Nope, we fulfill the dataset handle, but that doesn't # imply fulfilling all file handles result = ds.get(rel_path_sub1, recursive=True) # the subdataset was already present assert_in_results(result, type='dataset', path=subds1.path, status='notneeded') # we got the file assert_in_results(result, path=opj(ds.path, rel_path_sub1), status='ok') assert_in_results(result, path=opj(ds.path, rel_path_sub1), status='ok') ok_(subds1.repo.file_has_content('test-annex.dat') is True) # drop it: subds1.repo.drop('test-annex.dat') ok_(subds1.repo.file_has_content('test-annex.dat') is False) # now, with a path not explicitly pointing within a # subdataset, but recursive option: # get everything: result = ds.get(recursive=True, result_filter=lambda x: x.get('type') != 'dataset') assert_status('ok', result) eq_( set([ item.get('path')[len(ds.path) + 1:] for item in result if item['type'] == 'file' ]), annexed_files) ok_(ds.repo.file_has_content('test-annex.dat') is True) ok_(subds1.repo.file_has_content('test-annex.dat') is True) ok_(subds2.repo.file_has_content('test-annex.dat') is True) # drop them: ds.repo.drop('test-annex.dat') subds1.repo.drop('test-annex.dat') subds2.repo.drop('test-annex.dat') ok_(ds.repo.file_has_content('test-annex.dat') is False) ok_(subds1.repo.file_has_content('test-annex.dat') is False) ok_(subds2.repo.file_has_content('test-annex.dat') is False) # now, the very same call, but without recursive: result = ds.get('.', recursive=False) assert_status('ok', result) # no duplicate reporting on subdataset install and annex-get of its # directory eq_(len(result), 1) assert_result_count(result, 1, path=opj(ds.path, 'test-annex.dat'), status='ok') ok_(ds.repo.file_has_content('test-annex.dat') is True) ok_(subds1.repo.file_has_content('test-annex.dat') is False) ok_(subds2.repo.file_has_content('test-annex.dat') is False)