def test_install_simple_local(src, path): origin = Dataset(path) # now install it somewhere else ds = install(path, source=src, description='mydummy') eq_(ds.path, path) ok_(ds.is_installed()) if not isinstance(origin.repo, AnnexRepo): # this means it is a GitRepo ok_(isinstance(origin.repo, GitRepo)) # stays plain Git repo ok_(isinstance(ds.repo, GitRepo)) ok_(not isinstance(ds.repo, AnnexRepo)) ok_(GitRepo.is_valid_repo(ds.path)) eq_(set(ds.repo.get_indexed_files()), {'test.dat', 'INFO.txt'}) ok_clean_git(path, annex=False) else: # must be an annex ok_(isinstance(ds.repo, AnnexRepo)) ok_(AnnexRepo.is_valid_repo(ds.path, allow_noninitialized=False)) eq_(set(ds.repo.get_indexed_files()), {'test.dat', 'INFO.txt', 'test-annex.dat'}) ok_clean_git(path, annex=True) # no content was installed: ok_(not ds.repo.file_has_content('test-annex.dat')) uuid_before = ds.repo.uuid eq_(ds.repo.get_description(), 'mydummy') # installing it again, shouldn't matter: res = install(path, source=src, result_xfm=None, return_type='list') assert_status('notneeded', res) ok_(ds.is_installed()) if isinstance(origin.repo, AnnexRepo): eq_(uuid_before, ds.repo.uuid)
def fetch_data(tmpdir, dicoms): """Fetches some test DICOMs using datalad""" data = os.path.join(tmpdir, 'data') api.install(path=data, source=DICOM_DIR) data = os.path.join(data, dicoms) api.get(path=data) return data
def test_recurse_existing(src, path): origin_ds = _make_dataset_hierarchy(src) # make sure recursion_limit works as expected across a range of depths for depth in range(len(origin_ds)): datasets = assure_list( install(path, source=src, recursive=True, recursion_limit=depth)) # we expect one dataset per level eq_(len(datasets), depth + 1) rmtree(path) # now install all but the last two levels, no data root, sub1, sub2 = install(path, source=src, recursive=True, recursion_limit=2) ok_(sub2.repo.file_has_content('file_in_annex.txt') is False) sub3 = Dataset(opj(sub2.path, 'sub3')) ok_(not sub3.is_installed()) # now get all content in all existing datasets, no new datasets installed # in the process files = root.get(curdir, recursive=True, recursion_limit='existing') eq_(len(files), 1) ok_(sub2.repo.file_has_content('file_in_annex.txt') is True) ok_(not sub3.is_installed()) # now pull down all remaining datasets, no data sub3, sub4 = root.get(curdir, recursive=True, get_data=False) ok_(sub4.is_installed()) ok_(sub3.repo.file_has_content('file_in_annex.txt') is False) # aaannd all data files = root.get(curdir, recursive=True) eq_(len(files), 1) ok_(sub3.repo.file_has_content('file_in_annex.txt') is True)
def fetch_data(tmpdir, subject): """Fetches some test dicoms using datalad""" from datalad import api targetdir = op.join(tmpdir, 'QA') api.install(path=targetdir, source='http://datasets-tests.datalad.org/dbic/QA') api.get('{}/sourcedata/{}'.format(targetdir, subject)) return targetdir
def test_install_crcns(tdir, ds_path): with chpwd(tdir): with swallow_logs(new_level=logging.INFO) as cml: install("all-nonrecursive", source='///') # since we didn't log decorations such as log level atm while # swallowing so lets check if exit code is returned or not # I will test both assert_not_in('ERROR', cml.out) # below one must not fail alone! ;) assert_not_in('with exit code', cml.out) # should not hang in infinite recursion with chpwd('all-nonrecursive'): get("crcns") ok_(exists(_path_("all-nonrecursive/crcns/.git/config"))) # and we could repeat installation and get the same result ds1 = install(_path_("all-nonrecursive/crcns")) ds2 = Dataset('all-nonrecursive').install('crcns') ok_(ds1.is_installed()) eq_(ds1, ds2) eq_(ds1.path, ds2.path) # to make sure they are a single dataset # again, but into existing dataset: ds = create(ds_path) crcns = ds.install("///crcns") ok_(crcns.is_installed()) eq_(crcns.path, opj(ds_path, "crcns")) assert_in(crcns.path, ds.get_subdatasets(absolute=True))
def test_implicit_install(src, dst): origin_top = create(src) origin_sub = origin_top.create("sub") origin_subsub = origin_sub.create("subsub") with open(opj(origin_top.path, "file1.txt"), "w") as f: f.write("content1") origin_top.add("file1.txt") with open(opj(origin_sub.path, "file2.txt"), "w") as f: f.write("content2") origin_sub.add("file2.txt") with open(opj(origin_subsub.path, "file3.txt"), "w") as f: f.write("content3") origin_subsub.add("file3.txt") origin_top.save(recursive=True) # first, install toplevel: ds = install(dst, source=src) ok_(ds.is_installed()) sub = Dataset(opj(ds.path, "sub")) ok_(not sub.is_installed()) subsub = Dataset(opj(sub.path, "subsub")) ok_(not subsub.is_installed()) # fail on obscure non-existing one assert_raises(IncompleteResultsError, ds.install, source='obscure') # install 3rd level and therefore implicitly the 2nd: result = ds.install(path=opj("sub", "subsub")) ok_(sub.is_installed()) ok_(subsub.is_installed()) # but by default implicit results are not reported eq_(result, subsub) # fail on obscure non-existing one in subds assert_raises(IncompleteResultsError, ds.install, source=opj('sub', 'obscure')) # clean up, the nasty way rmtree(dst, chmod_files=True) ok_(not exists(dst)) # again first toplevel: ds = install(dst, source=src) ok_(ds.is_installed()) sub = Dataset(opj(ds.path, "sub")) ok_(not sub.is_installed()) subsub = Dataset(opj(sub.path, "subsub")) ok_(not subsub.is_installed()) # now implicit but without an explicit dataset to install into # (deriving from CWD): with chpwd(dst): # don't ask for the file content to make return value comparison # simpler result = get(path=opj("sub", "subsub"), get_data=False, result_xfm='datasets') ok_(sub.is_installed()) ok_(subsub.is_installed()) eq_(result, [sub, subsub])
def test_remove_nowhining(path): # when removing a dataset under a dataset (but not a subdataset) # should not provide a meaningless message that something was not right ds = create(path) # just install/clone inside of it subds_path = _path_(path, 'subds') install(subds_path, source=path) remove(subds_path) # should remove just fine
def test_autoresolve_multiple_datasets(src, path): with chpwd(path): ds1 = install('ds1', source=src) ds2 = install('ds2', source=src) results = get([opj('ds1', 'test-annex.dat')] + glob(opj('ds2', '*.dat'))) # each ds has one file eq_(len(results), 2) ok_(ds1.repo.file_has_content('test-annex.dat') is True) ok_(ds2.repo.file_has_content('test-annex.dat') is True)
def test_update_simple(origin, src_path, dst_path): # prepare src source = install(src_path, source=origin, recursive=True)[0] # forget we cloned it (provide no 'origin' anymore), which should lead to # setting tracking branch to target: source.repo.remove_remote("origin") # get a clone to update later on: dest = install(dst_path, source=src_path, recursive=True)[0] # test setup done; # assert all fine ok_clean_git(dst_path) ok_clean_git(src_path) # update yields nothing => up-to-date # TODO: how to test besides not failing? dest.update() ok_clean_git(dst_path) # modify origin: with open(opj(src_path, "update.txt"), "w") as f: f.write("Additional content") source.add(path="update.txt") source.save("Added update.txt") ok_clean_git(src_path) # update without `merge` only fetches: dest.update() # modification is not known to active branch: assert_not_in("update.txt", dest.repo.get_files(dest.repo.get_active_branch())) # modification is known to branch origin/master assert_in("update.txt", dest.repo.get_files("origin/master")) # merge: dest.update(merge=True) # modification is now known to active branch: assert_in("update.txt", dest.repo.get_files(dest.repo.get_active_branch())) # it's known to annex, but has no content yet: dest.repo.get_file_key("update.txt") # raises if unknown eq_([False], dest.repo.file_has_content(["update.txt"])) # smoke-test if recursive update doesn't fail if submodule is removed dest.remove('subm 1') dest.update(recursive=True) dest.update(merge=True, recursive=True) # and now test recursive update with merging in differences create_tree(opj(source.path, 'subm 2'), {'load.dat': 'heavy'}) source.save(message="saving changes within subm2", recursive=True, all_changes=True) dest.update(merge=True, recursive=True) # and now we can get new file dest.get('subm 2/load.dat') ok_file_has_content(opj(dest.path, 'subm 2', 'load.dat'), 'heavy')
def test_install_from_tilda(opath, tpath): ds = create(opath) ds.create('sub ds') orelpath = os.path.join( '~', os.path.relpath(opath, os.path.expanduser('~')) ) assert orelpath.startswith('~') # just to make sure no normalization install(tpath, source=orelpath, recursive=True) assert Dataset(opj(tpath, 'sub ds')).is_installed()
def test_update_simple(origin, src_path, dst_path): # prepare src source = install(path=src_path, source=origin, recursive=True) # TODO: For now, circumnavigate the detached head issue. # Figure out, what to do. for subds in source.get_dataset_handles(recursive=True): AnnexRepo(opj(src_path, subds), init=True, create=True).git_checkout("master") # forget we cloned it (provide no 'origin' anymore), which should lead to # setting tracking branch to target: source.repo.git_remote_remove("origin") # get a clone to update later on: dest = install(path=dst_path, source=src_path, recursive=True) # TODO: For now, circumnavigate the detached head issue. # Figure out, what to do. for subds in dest.get_dataset_handles(recursive=True): AnnexRepo(opj(dst_path, subds), init=True, create=True).git_checkout("master") # test setup done; # assert all fine ok_clean_git(dst_path) ok_clean_git(src_path) # update yields nothing => up-to-date # TODO: how to test besides not failing? dest.update() ok_clean_git(dst_path) # modify origin: with open(opj(src_path, "update.txt"), "w") as f: f.write("Additional content") source.install(path="update.txt") source.remember_state("Added update.txt") ok_clean_git(src_path) # update without `merge` only fetches: dest.update() # modification is not known to active branch: assert_not_in("update.txt", dest.repo.git_get_files(dest.repo.git_get_active_branch())) # modification is known to branch origin/master assert_in("update.txt", dest.repo.git_get_files("origin/master")) # merge: dest.update(merge=True) # modification is now known to active branch: assert_in("update.txt", dest.repo.git_get_files(dest.repo.git_get_active_branch())) # it's known to annex, but has no content yet: dest.repo.get_file_key("update.txt") # raises if unknown eq_([False], dest.repo.file_has_content(["update.txt"]))
def test_install_consistent_state(src, dest, dest2, dest3): # if we install a dataset, where sub-dataset "went ahead" in that branch, # while super-dataset was not yet updated (e.g. we installed super before) # then it is desired to get that default installed branch to get to the # position where previous location was pointing to. # It is indeed a mere heuristic which might not hold the assumption in some # cases, but it would work for most simple and thus mostly used ones ds1 = create(src) sub1 = ds1.create('sub1') def check_consistent_installation(ds): datasets = [ds] + list( map(Dataset, ds.subdatasets(recursive=True, fulfilled=True, result_xfm='paths'))) assert len(datasets) == 2 # in this test for ds in datasets: # all of them should be in master branch eq_(ds.repo.get_active_branch(), "master") # all of them should be clean, so sub should be installed in a "version" # as pointed by the super ok_(not ds.repo.dirty) dest_ds = install(dest, source=src) # now we progress sub1 by adding sub2 subsub2 = sub1.create('sub2') # and progress subsub2 forward to stay really thorough put_file_under_git(subsub2.path, 'file.dat', content="data") subsub2.save("added a file") # above function does not commit # just installing a submodule -- apparently different code/logic # but also the same story should hold - we should install the version pointed # by the super, and stay all clean dest_sub1 = dest_ds.install('sub1') check_consistent_installation(dest_ds) # So now we have source super-dataset "dirty" with sub1 progressed forward # Our install should try to "retain" consistency of the installation # whenever possible. # install entire hierarchy without specifying dataset # no filter, we want full report dest2_ds = install(dest2, source=src, recursive=True, result_filter=None) check_consistent_installation(dest2_ds[0]) # [1] is the subdataset # install entire hierarchy by first installing top level ds # and then specifying sub-dataset dest3_ds = install(dest3, source=src, recursive=False) # and then install both submodules recursively while pointing # to it based on dest3_ds dest3_ds.install('sub1', recursive=True) check_consistent_installation(dest3_ds)
def test_autoresolve_multiple_datasets(src, path): with chpwd(path): ds1 = install( 'ds1', source=src, result_xfm='datasets', return_type='item-or-list') ds2 = install( 'ds2', source=src, result_xfm='datasets', return_type='item-or-list') results = get([opj('ds1', 'test-annex.dat')] + glob(opj('ds2', '*.dat'))) # each ds has one file assert_result_count(results, 2, type='file', action='get', status='ok') ok_(ds1.repo.file_has_content('test-annex.dat') is True) ok_(ds2.repo.file_has_content('test-annex.dat') is True)
def test_install_subds_with_space(opath, tpath): ds = create(opath) ds.create('sub ds') # works even now, boring # install(tpath, source=opath, recursive=True) if on_windows: # on windows we cannot simply prepend localhost: to a path # and get a working sshurl... install(tpath, source=opath, recursive=True) else: # do via ssh! install(tpath, source="localhost:" + opath, recursive=True) assert Dataset(opj(tpath, 'sub ds')).is_installed()
def test_install_skip_list_arguments(src, path, path_outside): ds = install(path, source=src) ok_(ds.is_installed()) # install a list with valid and invalid items: result = ds.install( path=['subm 1', 'not_existing', path_outside, '2'], get_data=False, on_failure='ignore', result_xfm=None, return_type='list') # good and bad results together ok_(isinstance(result, list)) eq_(len(result), 4) # check that we have an 'impossible' status for both invalid args # but all the other tasks have been accomplished for skipped, msg in [(opj(ds.path, 'not_existing'), "path does not exist"), (path_outside, "path not associated with any dataset")]: assert_result_count( result, 1, status='impossible', message=msg, path=skipped) for sub in [Dataset(opj(path, 'subm 1')), Dataset(opj(path, '2'))]: assert_result_count( result, 1, status='ok', message=('Installed subdataset in order to get %s', sub.path)) ok_(sub.is_installed()) # return of get is always a list, by default, even if just one thing was gotten # in this case 'subm1' was already obtained above, so this will get this # content of the subdataset with assert_raises(IncompleteResultsError) as cme: ds.install(path=['subm 1', 'not_existing']) with assert_raises(IncompleteResultsError) as cme: ds.get(path=['subm 1', 'not_existing'])
def test_add_subdataset(path, other): subds = create(opj(path, 'dir'), force=True) ds = create(path, force=True) ok_(subds.repo.dirty) ok_(ds.repo.dirty) assert_not_in('dir', ds.subdatasets(result_xfm='relpaths')) # without a base dataset the next is interpreted as "add everything # in subds to subds" add(subds.path) ok_clean_git(subds.path) assert_not_in('dir', ds.subdatasets(result_xfm='relpaths')) # but with a base directory we add the dataset subds as a subdataset # to ds ds.add(subds.path) assert_in('dir', ds.subdatasets(result_xfm='relpaths')) # create another one other = create(other) # install into superdataset, but don't add other_clone = install(source=other.path, path=opj(ds.path, 'other')) ok_(other_clone.is_installed) assert_not_in('other', ds.subdatasets(result_xfm='relpaths')) # now add, it should pick up the source URL ds.add('other') # and that is why, we can reobtain it from origin ds.uninstall('other') ok_(other_clone.is_installed) ds.get('other') ok_(other_clone.is_installed)
def test_install_list(path, top_path): # we want to be able to install several things, if these are known # (no 'source' allowed). Therefore first toplevel: ds = install(top_path, source=path, recursive=False) assert_not_in('annex.hardlink', ds.config) ok_(ds.is_installed()) sub1 = Dataset(opj(top_path, 'subm 1')) sub2 = Dataset(opj(top_path, '2')) ok_(not sub1.is_installed()) ok_(not sub2.is_installed()) # fails, when `source` is passed: assert_raises(ValueError, ds.install, path=['subm 1', '2'], source='something') # now should work: result = ds.install(path=['subm 1', '2'], result_xfm='paths') ok_(sub1.is_installed()) ok_(sub2.is_installed()) eq_(set(result), {sub1.path, sub2.path}) # and if we request it again via get, result should be empty get_result = ds.get(path=['subm 1', '2'], get_data=False) assert_status('notneeded', get_result)
def test_get_recurse_dirs(o_path, c_path): # prepare source: origin = Dataset(o_path).create(force=True) origin.add('.') ds = install( c_path, source=o_path, result_xfm='datasets', return_type='item-or-list') file_list = ['file1.txt', opj('subdir', 'file2.txt'), opj('subdir', 'subsubdir', 'file3.txt'), opj('subdir', 'subsubdir', 'file4.txt')] files_in_sub = [f for f in file_list if f.startswith(with_pathsep('subdir'))] # no content present: ok_(not any(ds.repo.file_has_content(file_list))) result = ds.get('subdir') # check result: assert_status('ok', result) eq_(set([item.get('path')[len(ds.path) + 1:] for item in result if item['type'] == 'file']), set(files_in_sub)) # we also get one report on the subdir eq_(len(result) - 1, len(files_in_sub)) # got all files beneath subdir: ok_(all(ds.repo.file_has_content(files_in_sub))) # additionally got file1.txt silently, since it has the same content as # subdir/subsubdir/file4.txt: ok_(ds.repo.file_has_content('file1.txt') is True)
def test_install_known_subdataset(src, path): # get the superdataset: ds = install(path, source=src) # subdataset not installed: subds = Dataset(opj(path, 'subm 1')) assert_false(subds.is_installed()) assert_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths')) assert_not_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths')) # install it: ds.install('subm 1') ok_(subds.is_installed()) ok_(AnnexRepo.is_valid_repo(subds.path, allow_noninitialized=False)) # Verify that it is the correct submodule installed and not # new repository initiated eq_(set(subds.repo.get_indexed_files()), {'test.dat', 'INFO.txt', 'test-annex.dat'}) assert_not_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths')) assert_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths')) # now, get the data by reinstalling with -g: ok_(subds.repo.file_has_content('test-annex.dat') is False) with chpwd(ds.path): result = get(path='subm 1', dataset=os.curdir) assert_in_results(result, path=opj(subds.path, 'test-annex.dat')) ok_(subds.repo.file_has_content('test-annex.dat') is True) ok_(subds.is_installed())
def test_install_skip_list_arguments(src, path, path_outside): ds = install(path, source=src) ok_(ds.is_installed()) # install a list with valid and invalid items: with swallow_logs(new_level=logging.WARNING) as cml: with assert_raises(IncompleteResultsError) as cme: ds.install( path=['subm 1', 'not_existing', path_outside, 'subm 2'], get_data=False) result = cme.exception.results for skipped in [opj(ds.path, 'not_existing'), path_outside]: cml.assert_logged(msg="ignored non-existing paths: {}\n".format( [opj(ds.path, 'not_existing'), path_outside]), regex=False, level='WARNING') pass ok_(isinstance(result, list)) eq_(len(result), 2) for sub in [Dataset(opj(path, 'subm 1')), Dataset(opj(path, 'subm 2'))]: assert_in(sub, result) ok_(sub.is_installed()) # return of get is always a list, even if just one thing was gotten # in this case 'subm1' was already obtained above, so this will get this # content of the subdataset with assert_raises(IncompleteResultsError) as cme: ds.install(path=['subm 1', 'not_existing']) with assert_raises(IncompleteResultsError) as cme: ds.get(path=['subm 1', 'not_existing']) result = cme.exception.results eq_(len(result), 1) eq_(result[0]['file'], 'subm 1/test-annex.dat')
def test_get_flexible_source_candidates_for_submodule(t, t2): f = _get_flexible_source_candidates_for_submodule # for now without mocking -- let's just really build a dataset from datalad.api import create from datalad.api import install ds = create(t) clone = install(t2, source=t) # first one could just know about itself or explicit url provided sshurl = 'ssh://e.c' httpurl = 'http://e.c' sm_httpurls = [httpurl, httpurl + '/.git'] eq_(f(ds, 'sub'), []) eq_(f(ds, 'sub', sshurl), [sshurl]) eq_(f(ds, 'sub', httpurl), sm_httpurls) eq_(f(ds, 'sub', None), []) # otherwise really we have no clue were to get from # but if we work on dsclone then it should also add urls deduced from its # own location default remote for current branch eq_(f(clone, 'sub'), [t + '/sub']) eq_(f(clone, 'sub', sshurl), [t + '/sub', sshurl]) eq_(f(clone, 'sub', httpurl), [t + '/sub'] + sm_httpurls) eq_(f(clone, 'sub'), [t + '/sub']) # otherwise really we have no clue were to get from # TODO: check that http:// urls for the dataset itself get resolved # TODO: many more!!
def test_get_flexible_source_candidates_for_submodule(t, t2): f = _get_flexible_source_candidates_for_submodule # for now without mocking -- let's just really build a dataset ds = create(t) clone = install( t2, source=t, result_xfm='datasets', return_type='item-or-list') # first one could just know about itself or explicit url provided sshurl = 'ssh://e.c' httpurl = 'http://e.c' # Expansion with '/.git' no longer done in this helper #sm_httpurls = [httpurl, httpurl + '/.git'] sm_httpurls = [httpurl] eq_(f(ds, 'sub'), []) eq_(f(ds, 'sub', sshurl), [sshurl]) eq_(f(ds, 'sub', httpurl), sm_httpurls) eq_(f(ds, 'sub', None), []) # otherwise really we have no clue were to get from # but if we work on dsclone then it should also add urls deduced from its # own location default remote for current branch eq_(f(clone, 'sub'), [t + '/sub']) eq_(f(clone, 'sub', sshurl), [t + '/sub', sshurl]) eq_(f(clone, 'sub', httpurl), [t + '/sub'] + sm_httpurls) eq_(f(clone, 'sub'), [t + '/sub']) # otherwise really we have no clue were to get from
def test_install_subdataset(src, path): # get the superdataset: ds = install(path=path, source=src) # subdataset not installed: subds = Dataset(opj(path, 'sub1')) assert_false(subds.is_installed()) # install it: ds.install('sub1') ok_(subds.is_installed()) # Verify that it is the correct submodule installed and not # new repository initiated assert_equal(set(subds.repo.get_indexed_files()), {'test.dat', 'INFO.txt', 'test-annex.dat'}) # Now the obnoxious install an annex file within not yet # initialized repository! with swallow_outputs(): # progress bar ds.install(opj('sub2', 'test-annex.dat')) subds2 = Dataset(opj(path, 'sub2')) assert(subds2.is_installed()) assert(subds2.repo.file_has_content('test-annex.dat')) # we shouldn't be able silently ignore attempt to provide source while # "installing" file under git assert_raises(FileInGitError, ds.install, opj('sub2', 'INFO.txt'), source="http://bogusbogus")
def test_publish_gh1691(origin, src_path, dst_path): # prepare src; no subdatasets installed, but mount points present source = install(src_path, source=origin, recursive=False) ok_(exists(opj(src_path, "subm 1"))) assert_false(Dataset(opj(src_path, "subm 1")).is_installed()) # some content modification of the superdataset create_tree(src_path, {'probe1': 'probe1'}) source.add('probe1') ok_clean_git(src_path) # create the target(s): source.create_sibling( 'ssh://localhost:' + dst_path, name='target', recursive=True) # publish recursively, which silently ignores non-installed datasets results = source.publish(to='target', recursive=True) assert_result_count(results, 1) assert_result_count(results, 1, status='ok', type='dataset', path=source.path) # if however, a non-installed subdataset is requsted explicitly, it'll fail results = source.publish(path='subm 1', to='target', on_failure='ignore') assert_result_count(results, 1, status='impossible', type='dataset', action='publish')
def test_add_subdataset(path, other): subds = create(op.join(path, 'dir'), force=True) ds = create(path, force=True) ok_(subds.repo.dirty) ok_(ds.repo.dirty) assert_not_in('dir', ds.subdatasets(result_xfm='relpaths')) # "add everything in subds to subds" save(dataset=subds.path) assert_repo_status(subds.path) assert_not_in('dir', ds.subdatasets(result_xfm='relpaths')) # but with a base directory we add the dataset subds as a subdataset # to ds res = ds.save(subds.path) assert_in_results(res, action="add", path=subds.path, refds=ds.path) assert_in('dir', ds.subdatasets(result_xfm='relpaths')) # create another one other = create(other) # install into superdataset, but don't add other_clone = install(source=other.path, path=op.join(ds.path, 'other')) # little dance to get the revolution-type dataset other_clone = Dataset(other_clone.path) ok_(other_clone.is_installed) assert_not_in('other', ds.subdatasets(result_xfm='relpaths')) # now add, it should pick up the source URL ds.save('other') # and that is why, we can reobtain it from origin ds.uninstall('other') ok_(not other_clone.is_installed()) ds.get('other') ok_(other_clone.is_installed())
def test_uninstall_subdataset(src, dst): ds = install(dst, source=src, recursive=True) ok_(ds.is_installed()) known_subdss = ds.subdatasets(result_xfm='datasets') for subds in ds.subdatasets(result_xfm='datasets'): ok_(subds.is_installed()) annexed_files = subds.repo.get_annexed_files() subds.repo.get(annexed_files) # drop data of subds: res = ds.drop(path=subds.path, result_xfm='paths') ok_(all([opj(subds.path, f) in res for f in annexed_files])) ok_(all([not i for i in subds.repo.file_has_content(annexed_files)])) # subdataset is still known assert_in(subds.path, ds.subdatasets(result_xfm='paths')) eq_(ds.subdatasets(result_xfm='datasets'), known_subdss) for subds in ds.subdatasets(result_xfm='datasets'): # uninstall subds itself: if os.environ.get('DATALAD_TESTS_DATALADREMOTE') \ and external_versions['git'] < '2.0.9': raise SkipTest( "Known problem with GitPython. See " "https://github.com/gitpython-developers/GitPython/pull/521") res = ds.uninstall(path=subds.path, result_xfm='datasets') eq_(res[0], subds) ok_(not subds.is_installed()) # just a deinit must not remove the subdataset registration eq_(ds.subdatasets(result_xfm='datasets'), known_subdss) # mountpoint of subdataset should still be there ok_(exists(subds.path))
def test_install_recursive_repeat(src, path): subsub_src = Dataset(opj(src, 'sub 1', 'subsub')).create(force=True) sub1_src = Dataset(opj(src, 'sub 1')).create(force=True) sub2_src = Dataset(opj(src, 'sub 2')).create(force=True) top_src = Dataset(src).create(force=True) top_src.add('.', recursive=True) ok_clean_git(top_src.path) # install top level: top_ds = install(path, source=src) ok_(top_ds.is_installed() is True) sub1 = Dataset(opj(path, 'sub 1')) ok_(sub1.is_installed() is False) sub2 = Dataset(opj(path, 'sub 2')) ok_(sub2.is_installed() is False) subsub = Dataset(opj(path, 'sub 1', 'subsub')) ok_(subsub.is_installed() is False) # install again, now with data and recursive, but recursion_limit 1: result = get(os.curdir, dataset=path, recursive=True, recursion_limit=1, result_xfm='datasets') # top-level dataset was not reobtained assert_not_in(top_ds, result) assert_in(sub1, result) assert_in(sub2, result) assert_not_in(subsub, result) ok_(top_ds.repo.file_has_content('top_file.txt') is True) ok_(sub1.repo.file_has_content('sub1file.txt') is True) ok_(sub2.repo.file_has_content('sub2file.txt') is True) # install sub1 again, recursively and with data top_ds.install('sub 1', recursive=True, get_data=True) ok_(subsub.is_installed()) ok_(subsub.repo.file_has_content('subsubfile.txt'))
def test_get_mixed_hierarchy(src, path): origin = Dataset(src).create(no_annex=True) origin_sub = origin.create('subds') with open(opj(origin.path, 'file_in_git.txt'), "w") as f: f.write('no idea') with open(opj(origin_sub.path, 'file_in_annex.txt'), "w") as f: f.write('content') origin.add('file_in_git.txt', to_git=True) origin_sub.add('file_in_annex.txt') origin.save(all_changes=True) # now, install that thing: ds, subds = install(path, source=src, recursive=True) ok_(subds.repo.file_has_content("file_in_annex.txt") is False) # and get: with swallow_logs(new_level=logging.DEBUG) as cml: result = ds.get(curdir, recursive=True) assert_re_in('.*Found no annex at {0}. Skipped.'.format(ds), cml.out, flags=re.DOTALL) eq_(len(result), 1) eq_(result[0]['file'], opj("subds", "file_in_annex.txt")) ok_(result[0]['success'] is True) ok_(subds.repo.file_has_content("file_in_annex.txt") is True)
def test_get_mixed_hierarchy(src, path): origin = Dataset(src).create(no_annex=True) origin_sub = origin.create('subds') with open(opj(origin.path, 'file_in_git.txt'), "w") as f: f.write('no idea') with open(opj(origin_sub.path, 'file_in_annex.txt'), "w") as f: f.write('content') origin.add('file_in_git.txt', to_git=True) origin_sub.add('file_in_annex.txt') origin.save() # now, install that thing: ds, subds = install( path, source=src, recursive=True, result_xfm='datasets', return_type='item-or-list', result_filter=None) ok_(subds.repo.file_has_content("file_in_annex.txt") is False) # and get: result = ds.get(curdir, recursive=True) # git repo and subds assert_status(['ok', 'notneeded'], result) assert_result_count( result, 1, path=opj(subds.path, "file_in_annex.txt"), status='ok') ok_(subds.repo.file_has_content("file_in_annex.txt") is True)
def test_our_metadataset_search(tdir): # smoke test for basic search operations on our super-megadataset # expensive operation but ok ds = install(path=tdir, source="///") assert list(ds.search('.', report='*', regex=True)) assert list(ds.search('.', report='*')) assert list(ds.search('.', report_matched=True)) # and either we could provide output in different formats import simplejson from datalad.utils import swallow_outputs from datalad.api import search_ with swallow_outputs() as cmo: assert list(search_('.', report='*', regex=True, format='json', dataset=ds)) out = cmo.out # since this one is just absorbs all first, we can't go one by one assert simplejson.loads(out) try: import yaml except ImportError: raise SkipTest("no yaml module") with swallow_outputs() as cmo: assert list(search_('.', report='*', regex=True, format='yaml', dataset=ds)) out = cmo.out assert yaml.load(out)
def test_get_multiple_files(path, url, ds_dir): from os import listdir from datalad.support.network import RI file_list = [f for f in listdir(path) if not f.startswith('.')] # prepare urls: [RI(url + f) for f in file_list] # prepare origin origin = Dataset(path).create(force=True) origin.save(file_list, message="initial") ds = install(ds_dir, source=path, result_xfm='datasets', return_type='item-or-list') # no content present: ok_(not any(ds.repo.file_has_content(file_list))) # get two plus an invalid one: result = ds.get(['file1.txt', 'file2.txt', 'not_existing.txt'], on_failure='ignore') assert_status('impossible', [result[0]]) assert_status(['ok', 'notneeded'], result[1:]) # explicitly given not existing file was skipped: # (see test_get_invalid_call) eq_(set([basename(item.get('path')) for item in result[1:]]), {'file1.txt', 'file2.txt'}) ok_(all(ds.repo.file_has_content(['file1.txt', 'file2.txt']))) # get all of them: result = ds.get(curdir) # there were two files left to get: eq_( set([ basename(item.get('path')) for item in result if item['type'] == 'file' ]), {'file3.txt', 'file4.txt'}) ok_(all(ds.repo.file_has_content(file_list)))
def test_get_recurse_dirs(o_path, c_path): # prepare source: origin = Dataset(o_path).create(force=True) origin.save() ds = install(c_path, source=o_path, result_xfm='datasets', return_type='item-or-list') file_list = [ 'file1.txt', opj('subdir', 'file2.txt'), opj('subdir', 'subsubdir', 'file3.txt'), opj('subdir', 'subsubdir', 'file4.txt') ] files_in_sub = [ f for f in file_list if f.startswith(with_pathsep('subdir')) ] # no content present: ok_(not any(ds.repo.file_has_content(file_list))) result = ds.get('subdir') # check result: assert_status('ok', result) eq_( set([ item.get('path')[len(ds.path) + 1:] for item in result if item['type'] == 'file' ]), set(files_in_sub)) # we also get one report on the subdir eq_(len(result) - 1, len(files_in_sub)) # got all files beneath subdir: ok_(all(ds.repo.file_has_content(files_in_sub))) # additionally got file1.txt silently, since it has the same content as # subdir/subsubdir/file4.txt: ok_(ds.repo.file_has_content('file1.txt') is True)
def test_add_subdataset(path, other): subds = create(op.join(path, 'dir'), force=True) ds = create(path, force=True) ok_(subds.repo.dirty) ok_(ds.repo.dirty) assert_not_in('dir', ds.subdatasets(result_xfm='relpaths')) # "add everything in subds to subds" save(dataset=subds.path) assert_repo_status(subds.path) assert_not_in('dir', ds.subdatasets(result_xfm='relpaths')) # but with a base directory we add the dataset subds as a subdataset # to ds res = ds.save(subds.path) assert_in_results(res, action="add", path=subds.path, refds=ds.path) res = ds.subdatasets() assert_result_count(res, 1) assert_result_count( res, 1, # essentials path=op.join(ds.path, 'dir'), gitmodule_url='./dir', gitmodule_name='dir', # but also the branch, by default gitmodule_branch='master', ) # create another one other = create(other) # install into superdataset, but don't add other_clone = install(source=other.path, path=op.join(ds.path, 'other')) # little dance to get the revolution-type dataset other_clone = Dataset(other_clone.path) ok_(other_clone.is_installed) assert_not_in('other', ds.subdatasets(result_xfm='relpaths')) # now add, it should pick up the source URL ds.save('other') # and that is why, we can reobtain it from origin ds.uninstall('other') ok_(not other_clone.is_installed()) ds.get('other') ok_(other_clone.is_installed())
def test_install_recursive_with_data(src, path): # now again; with data: res = install(path, source=src, recursive=True, get_data=True, result_filter=None, result_xfm=None) assert_status('ok', res) # installed a dataset and two subdatasets, and one file with content in # each assert_result_count(res, 3, type='dataset', action='install') assert_result_count(res, 3, type='file', action='get') # we recurse top down during installation, so toplevel should appear at # first position in returned list eq_(res[0]['path'], path) top_ds = YieldDatasets()(res[0]) ok_(top_ds.is_installed()) if isinstance(top_ds.repo, AnnexRepo): ok_(all(top_ds.repo.file_has_content(top_ds.repo.get_annexed_files()))) for subds in top_ds.subdatasets(recursive=True, result_xfm='datasets'): ok_(subds.is_installed(), "Not installed: %s" % (subds,)) if isinstance(subds.repo, AnnexRepo): ok_(all(subds.repo.file_has_content(subds.repo.get_annexed_files())))
def check_datasets_datalad_org(suffix, tdir): # Test that git annex / datalad install, get work correctly on our datasets.datalad.org # Apparently things can break, especially with introduction of the # smart HTTP backend for apache2 etc ds = install(tdir, source='///dicoms/dartmouth-phantoms/bids_test6-PD+T2w' + suffix) eq_(ds.config.get('remote.origin.annex-ignore', None), None) # assert_result_count and not just assert_status since for some reason on # Windows we get two records due to a duplicate attempt (as res[1]) to get it # again, which is reported as "notneeded". For the purpose of this test # it doesn't make a difference. # git-annex version is not "real" - but that is about when fix was introduced from datalad import cfg if on_windows \ and cfg.obtain("datalad.repo.version") < 6 \ and external_versions['cmd:annex'] <= '7.20181203': raise SkipTest("Known to fail, needs fixed git-annex") assert_result_count( ds.get(op.join('001-anat-scout_ses-{date}', '000001.dcm')), 1, status='ok') assert_status('ok', ds.remove())
def test_update_unborn_master(path): ds_a = Dataset(op.join(path, "ds-a")).create() ds_a.repo.call_git(["branch", "-m", DEFAULT_BRANCH, "other"]) ds_a.repo.checkout(DEFAULT_BRANCH, options=["--orphan"]) ds_b = install(source=ds_a.path, path=op.join(path, "ds-b")) ds_a.repo.checkout("other") (ds_a.pathobj / "foo").write_text("content") ds_a.save() # clone() will try to switch away from an unborn branch if there # is another ref available. Reverse these efforts so that we can # test that update() fails reasonably here because we should still # be able to update from remotes that datalad didn't clone. ds_b.repo.update_ref("HEAD", "refs/heads/" + DEFAULT_BRANCH, symbolic=True) assert_false(ds_b.repo.commit_exists("HEAD")) assert_status("impossible", ds_b.update(merge=True, on_failure="ignore")) ds_b.repo.checkout("other") assert_status("ok", ds_b.update(merge=True, on_failure="ignore")) eq_(ds_a.repo.get_hexsha(), ds_b.repo.get_hexsha())
def test_get_in_unavailable_subdataset(src, path): _make_dataset_hierarchy(src) root = install( path, source=src, result_xfm='datasets', return_type='item-or-list') targetpath = opj('sub1', 'sub2') targetabspath = opj(root.path, targetpath) with chpwd(path): res = get(targetabspath) assert_result_count(res, 2, status='ok', action='install', type='dataset') # dry-fit result filter that only returns the result that matched the requested # path filtered = [r for r in res if only_matching_paths(r, path=targetabspath)] assert_result_count( filtered, 1, status='ok', action='install', type='dataset', path=targetabspath) # we got the dataset, and its immediate content, but nothing below sub2 = Dataset(targetabspath) ok_(sub2.is_installed()) ok_(sub2.repo.file_has_content('file_in_annex.txt') is True) ok_(not Dataset(opj(targetabspath, 'sub3')).is_installed())
def test_install_recursive_with_data(src, path): # now again; with data: ds_list = install(path, source=src, recursive=True, get_data=True) # installed a dataset and two subdatasets, and two files: eq_(len(ds_list), 5) eq_(sum([isinstance(i, Dataset) for i in ds_list]), 3) # we recurse top down during installation, so toplevel should appear at # first position in returned list eq_(ds_list[0].path, path) top_ds = ds_list[0] ok_(top_ds.is_installed()) if isinstance(top_ds.repo, AnnexRepo): ok_(all(top_ds.repo.file_has_content(top_ds.repo.get_annexed_files()))) for sub in top_ds.get_subdatasets(recursive=True): subds = Dataset(opj(path, sub)) ok_(subds.is_installed(), "Not installed: %s" % opj(path, sub)) if isinstance(subds.repo, AnnexRepo): ok_( all(subds.repo.file_has_content( subds.repo.get_annexed_files())))
def check_get_subdataset_inherit_reckless(override, path): src = Dataset(opj(path, "a")).create() src_subds = src.create("sub") src_subds.create("subsub") src.save(recursive=True) clone = install(opj(path, "b"), source=src, reckless="auto", result_xfm="datasets", return_type="item-or-list") clone_sub = Dataset(clone.pathobj / "sub") assert_false(clone_sub.is_installed()) clone_subsub = Dataset(clone.pathobj / "sub" / "subsub") clone.get(opj("sub", "subsub"), reckless=False if override else None) ok_(clone_sub.is_installed()) ok_(clone_subsub.is_installed()) for sub in [clone_sub, clone_subsub]: eq_(sub.config.get("datalad.clone.reckless", None), None if override else "auto") eq_(sub.config.get("annex.hardlink", None), None if override else "true")
def test_get_install_missing_subdataset(src, path): ds = install(path=path, source=src) ds.create(force=True) # force, to cause dataset initialization subs = [Dataset(s_path) for s_path in ds.get_subdatasets(absolute=True)] ok_(all([not sub.is_installed() for sub in subs])) # we don't install anything, if no explicitly given path points into a # not yet installed subdataset: ds.get(curdir) ok_(all([not sub.is_installed() for sub in subs])) # but we do, whenever a given path is contained in such a subdataset: file_ = opj(subs[0].path, 'test-annex.dat') ds.get(file_) ok_(subs[0].is_installed()) ok_(subs[0].repo.file_has_content('test-annex.dat') is True) # but we fulfill any handles, and dataset handles too ds.get(curdir, recursive=True) ok_(all([sub.is_installed() for sub in subs]))
def test_target_ssh_since(origin, src_path, target_path): # prepare src source = install(src_path, source=origin, recursive=True) eq_(len(source.subdatasets()), 2) # get a new subdataset and make sure it is committed in the super source.create('brandnew') eq_(len(source.subdatasets()), 3) ok_clean_git(source.path) # and now we create a sibling for the new subdataset only assert_create_sshwebserver( name='dominique_carrera', dataset=source, sshurl="ssh://localhost" + target_path, recursive=True, since='HEAD~1') # there is one thing in the target directory only, and that is the # remote repo of the newly added subdataset target = Dataset(target_path) ok_(not target.is_installed()) # since we didn't create it due to since eq_(['brandnew'], os.listdir(target_path)) # now test functionality if we add a subdataset with a subdataset brandnew2 = source.create('brandnew2') brandnewsub = brandnew2.create('sub') brandnewsubsub = brandnewsub.create('sub') # and now we create a sibling for the new subdataset only assert_create_sshwebserver( name='dominique_carrera', dataset=source, sshurl="ssh://localhost" + target_path, recursive=True, existing='skip') # verify that it created the sub and sub/sub ok_(Dataset(_path_(target_path, 'brandnew2/sub')).is_installed()) ok_(Dataset(_path_(target_path, 'brandnew2/sub/sub')).is_installed()) # we installed without web ui - no hooks should be created/enabled assert_postupdate_hooks(_path_(target_path, 'brandnew'), installed=False)
def test_merge_conflict_in_subdataset_only(path): path = Path(path) ds_src = Dataset(path / "src").create() if ds_src.repo.is_managed_branch(): # `git annex sync REMOTE` is used on an adjusted branch, but this error # depends on `git merge TARGET` being used. raise SkipTest("Test depends on non-adjusted branch") ds_src_sub_conflict = ds_src.create("sub_conflict") ds_src_sub_noconflict = ds_src.create("sub_noconflict") ds_src.save() # Set up a scenario where one subdataset has a conflict between the remote # and local version, but the parent dataset does not have a conflict # because it hasn't recorded the subdataset state. ds_clone = install(source=ds_src.path, path=path / "clone", recursive=True, result_xfm="datasets") ds_clone_sub_conflict = Dataset(path / "clone" / "sub_conflict") ds_clone_sub_noconflict = Dataset(path / "clone" / "sub_noconflict") (ds_src_sub_conflict.pathobj / "foo").write_text("src content") ds_src_sub_conflict.save(to_git=True) (ds_clone_sub_conflict.pathobj / "foo").write_text("clone content") ds_clone_sub_conflict.save(to_git=True) (ds_src_sub_noconflict.pathobj / "foo").write_text("src content") ds_src_sub_noconflict.save() res = ds_clone.update(merge=True, recursive=True, on_failure="ignore") assert_in_results(res, action="merge", status="error", path=ds_clone_sub_conflict.path) assert_in_results(res, action="merge", status="ok", path=ds_clone_sub_noconflict.path) assert_in_results(res, action="save", status="ok", path=ds_clone.path) # We saved the subdataset without a conflict... assert_repo_status(ds_clone_sub_noconflict.path) # ... but the one with the conflict leaves it for the caller to handle. ok_(ds_clone_sub_conflict.repo.call_git( ["ls-files", "--unmerged", "--", "foo"]).strip())
def test_install_skip_list_arguments(src, path, path_outside): ds = install(path, source=src) ok_(ds.is_installed()) # install a list with valid and invalid items: result = ds.install(path=['subm 1', 'not_existing', path_outside, '2'], get_data=False, on_failure='ignore', result_xfm=None, return_type='list') # good and bad results together ok_(isinstance(result, list)) eq_(len(result), 4) # check that we have an 'impossible/error' status for both invalid args # but all the other tasks have been accomplished assert_result_count(result, 1, status='impossible', message="path does not exist", path=opj(ds.path, 'not_existing')) assert_result_count(result, 1, status='error', message=("path not associated with dataset %s", ds), path=path_outside) for sub in [Dataset(opj(path, 'subm 1')), Dataset(opj(path, '2'))]: assert_result_count(result, 1, status='ok', message=('Installed subdataset in order to get %s', sub.path)) ok_(sub.is_installed()) # return of get is always a list, by default, even if just one thing was gotten # in this case 'subm1' was already obtained above, so this will get this # content of the subdataset with assert_raises(IncompleteResultsError) as cme: ds.install(path=['subm 1', 'not_existing']) with assert_raises(IncompleteResultsError) as cme: ds.get(path=['subm 1', 'not_existing'])
def test_get_autoresolve_recurse_subdatasets(src, path): origin = Dataset(src).create() origin_sub = origin.create('sub') origin_subsub = origin_sub.create('subsub') with open(opj(origin_subsub.path, 'file_in_annex.txt'), "w") as f: f.write('content') origin.add('.', recursive=True) ds = install( path, source=src, result_xfm='datasets', return_type='item-or-list') eq_(len(ds.subdatasets(fulfilled=True)), 0) results = get(opj(ds.path, 'sub'), recursive=True, result_xfm='datasets') eq_(len(ds.subdatasets(fulfilled=True, recursive=True)), 2) subsub = Dataset(opj(ds.path, 'sub', 'subsub')) ok_(subsub.is_installed()) assert_in(subsub, results) # all file handles are fulfilled by default ok_(Dataset(opj(ds.path, 'sub', 'subsub')).repo.file_has_content( "file_in_annex.txt") is True)
def test_install_skip_failed_recursive(src, path): # install top level: ds = install(path, source=src) sub1 = Dataset(opj(path, 'subm 1')) sub2 = Dataset(opj(path, 'subm 2')) # sabotage recursive installation of 'subm 1' by polluting the target: with open(opj(path, 'subm 1', 'blocking.txt'), "w") as f: f.write("sdfdsf") with swallow_logs(new_level=logging.WARNING) as cml: result = ds.get(os.curdir, recursive=True) # toplevel dataset was in the house already assert_not_in(ds, result) assert_in(sub2, result) assert_not_in(sub1, result) cml.assert_logged( msg= "Target {} already exists and is not an installed dataset. Skipped." .format(sub1.path), regex=False, level='WARNING')
def test_update_fetch_failure(path): path = Path(path) ds_a = Dataset(path / "ds_a").create() s1 = ds_a.create("s1") ds_a.create("s2") ds_b = install(source=ds_a.path, path=str(path / "ds-b"), recursive=True) # Rename s1 to make fetch fail. s1.pathobj.rename(s1.pathobj.parent / "s3") res = ds_b.update(recursive=True, on_failure="ignore") assert_in_results(res, status="error", path=str(ds_b.pathobj / "s1"), action="update") assert_in_results(res, status="ok", path=str(ds_b.pathobj / "s2"), action="update") assert_in_results(res, status="ok", path=ds_b.path, action="update")
def test_update_follow_parentds_lazy_other_branch(path): path = Path(path) ds_src = Dataset(path / "source").create() ds_src_sub = ds_src.create("sub") ds_src_sub.repo.checkout(DEFAULT_BRANCH, options=["-bother"]) (ds_src_sub.pathobj / "foo").write_text("on other branch") ds_src_sub.save() ds_src_sub.repo.checkout(DEFAULT_BRANCH) ds_src.save(recursive=True) assert_repo_status(ds_src.path) ds_clone = install(source=ds_src.path, path=path / "clone", recursive=True, result_xfm="datasets") ds_src_sub.repo.checkout("other") ds_src.save(recursive=True) with patch("datalad.support.gitrepo.GitRepo.fetch") as fetch_cmd: ds_clone.update(follow="parentds", merge="ff-only", recursive=True, on_failure="ignore") eq_(fetch_cmd.call_count, 2) # With parentds-lazy, an unneeded fetch call in the subdataset is dropped. with patch("datalad.support.gitrepo.GitRepo.fetch") as fetch_cmd: ds_clone.update(follow="parentds-lazy", merge="ff-only", recursive=True, on_failure="ignore") eq_(fetch_cmd.call_count, 1) if not ds_clone.repo.is_managed_branch(): # Now the real thing. ds_clone.update(follow="parentds-lazy", merge="ff-only", recursive=True) ok_(op.lexists(str(ds_clone.pathobj / "sub" / "foo")))
def test_get_subdataset_direct_fetch(path): path = Path(path) origin = Dataset(path / "origin").create() if origin.repo.is_managed_branch(): # The setup here probably breaks down with adjusted branches. raise SkipTest("Test assumes non-adjusted branches") for sub in ["s0", "s1"]: sds = origin.create(origin.pathobj / sub) sds.repo.commit(msg="another commit", options=["--allow-empty"]) origin.save() s0 = Dataset(origin.pathobj / "s0") s1 = Dataset(origin.pathobj / "s1") # Abandon the recorded commit so that it needs to be brought down by a # direct fetch. s0.repo.call_git(["reset", "--hard", "HEAD~"]) s1.repo.call_git(["reset", "--hard", "HEAD~"]) # Tweak the configuration of s0 to make the direct fetch fail. # Disallow direct oid fetch (default). s0.repo.config.set("uploadpack.allowAnySHA1InWant", "false", where="local") # Configure the fetcher to avoid v2, which allows fetching unadvertised # objects regardless of the value of uploadpack.allowAnySHA1InWant. s0.repo.config.set("protocol.version", "0", where="local") # Configure s1 to succeed with direct fetch. s1.repo.config.set("uploadpack.allowAnySHA1InWant", "true", where="local") clone = install(str(path / "clone"), source="ssh://datalad-test:" + origin.repo.pathobj.as_posix()) res = clone.get(["s0", "s1"], on_failure="ignore") assert_result_count(res, 1, action="install", type="dataset", status="error") assert_result_count(res, 1, action="install", type="dataset", status="ok")
def test_uninstall_subdataset(src, dst): ds = install(dst, source=src, recursive=True) ok_(ds.is_installed()) known_subdss = ds.subdatasets(result_xfm='datasets') for subds in ds.subdatasets(result_xfm='datasets'): ok_(subds.is_installed()) annexed_files = subds.repo.get_annexed_files() subds.repo.get(annexed_files) # drop data of subds: res = ds.drop(path=subds.path, result_xfm='paths') ok_(all([opj(subds.path, f) in res for f in annexed_files])) ok_(all([not i for i in subds.repo.file_has_content(annexed_files)])) # subdataset is still known assert_in(subds.path, ds.subdatasets(result_xfm='paths')) eq_(ds.subdatasets(result_xfm='datasets'), known_subdss) for subds in ds.subdatasets(result_xfm='datasets'): # uninstall subds itself: # simulate a cmdline invocation pointing to the subdataset # with a relative path from outside the superdataset to catch # https://github.com/datalad/datalad/issues/4001 pwd = Path(dst).parent with chpwd(str(pwd)): res = uninstall( dataset=ds.path, path=str(subds.pathobj.relative_to(pwd)), result_xfm='datasets', ) eq_(res[0], subds) ok_(not subds.is_installed()) # just a deinit must not remove the subdataset registration eq_(ds.subdatasets(result_xfm='datasets'), known_subdss) # mountpoint of subdataset should still be there ok_(exists(subds.path))
def test_install_subds_from_another_remote(topdir): # https://github.com/datalad/datalad/issues/1905 with chpwd(topdir): origin_ = 'origin' clone1_ = 'clone1' clone2_ = 'clone2' origin = create(origin_, no_annex=True) clone1 = install(source=origin, path=clone1_) # print("Initial clone") clone1.create_sibling('ssh://localhost%s/%s' % (getpwd(), clone2_), name=clone2_) # print("Creating clone2") clone1.publish(to=clone2_) clone2 = Dataset(clone2_) # print("Initiating subdataset") clone2.create('subds1') # print("Updating") clone1.update(merge=True, sibling=clone2_) # print("Installing within updated dataset -- should be able to install from clone2") clone1.install('subds1')
def test_reobtain_data(originpath, destpath): origin = Dataset(originpath).create() ds = install(source=originpath, path=destpath, result_xfm='datasets', return_type='item-or-list') # no harm assert_result_count(ds.update(merge=True, reobtain_data=True), 1) # content create_tree(origin.path, {'load.dat': 'heavy'}) origin.add(opj(origin.path, 'load.dat')) # update does not bring data automatically assert_result_count(ds.update(merge=True, reobtain_data=True), 1) assert_in('load.dat', ds.repo.get_annexed_files()) assert_false(ds.repo.file_has_content('load.dat')) # now get data ds.get('load.dat') ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy') # new content at origin create_tree(origin.path, {'novel': 'but boring'}) origin.add('.') # update must not bring in data for new file result = ds.update(merge=True, reobtain_data=True) assert_in_results(result, action='get', status='notneeded') ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy') assert_in('novel', ds.repo.get_annexed_files()) assert_false(ds.repo.file_has_content('novel')) # modify content at origin os.remove(opj(origin.path, 'load.dat')) create_tree(origin.path, {'load.dat': 'light'}) origin.add('.') # update must update file with existing data, but leave empty one alone res = ds.update(merge=True, reobtain_data=True) assert_result_count(res, 2) assert_result_count(res, 1, status='ok', type='dataset', action='update') assert_result_count(res, 1, status='ok', type='file', action='get') ok_file_has_content(opj(ds.path, 'load.dat'), 'light') assert_false(ds.repo.file_has_content('novel'))
def test_uninstall_subdataset(src, dst): ds = install(dst, source=src, recursive=True)[0] ok_(ds.is_installed()) for subds_path in ds.get_subdatasets(): subds = Dataset(opj(ds.path, subds_path)) ok_(subds.is_installed()) annexed_files = subds.repo.get_annexed_files() subds.repo.get(annexed_files) # uninstall data of subds: assert_raises(ValueError, ds.uninstall, path=subds_path) res = ds.uninstall(path=subds_path, recursive=True) ok_(all([f in res for f in annexed_files])) ok_(all([not i for i in subds.repo.file_has_content(annexed_files)])) # subdataset is still known assert_in(subds_path, ds.get_subdatasets()) for subds_path in ds.get_subdatasets(): # uninstall subds itself: assert_raises(ValueError, ds.uninstall, path=subds_path, remove_handles=True, remove_history=True) if os.environ.get('DATALAD_TESTS_DATALADREMOTE') \ and external_versions['git'] < '2.0.9': raise SkipTest( "Known problem with GitPython. See " "https://github.com/gitpython-developers/GitPython/pull/521") res = ds.uninstall(path=subds_path, remove_handles=True, remove_history=True, recursive=True) subds = Dataset(opj(ds.path, subds_path)) eq_(res[0], subds) ok_(not subds.is_installed()) ok_(not exists(subds.path))
def test_install_branch(path=None): path = Path(path) ds_a = create(path / "ds_a") ds_a.create("sub") repo_a = ds_a.repo repo_a.commit(msg="c1", options=["--allow-empty"]) repo_a.checkout(DEFAULT_BRANCH + "-other", ["-b"]) repo_a.commit(msg="c2", options=["--allow-empty"]) repo_a.checkout(DEFAULT_BRANCH) ds_b = install(source=ds_a.path, path=str(path / "ds_b"), branch=DEFAULT_BRANCH + "-other", recursive=True) repo_b = ds_b.repo eq_(repo_b.get_corresponding_branch() or repo_b.get_active_branch(), DEFAULT_BRANCH + "-other") repo_sub = Dataset(ds_b.pathobj / "sub").repo eq_(repo_sub.get_corresponding_branch() or repo_sub.get_active_branch(), DEFAULT_BRANCH)
def test_install_skip_failed_recursive(src, path): # install top level: ds = install(path, source=src) sub1 = Dataset(opj(path, 'subm 1')) sub2 = Dataset(opj(path, '2')) # sabotage recursive installation of 'subm 1' by polluting the target: with open(opj(path, 'subm 1', 'blocking.txt'), "w") as f: f.write("sdfdsf") with swallow_logs(new_level=logging.WARNING) as cml: result = ds.get( os.curdir, recursive=True, on_failure='ignore', result_xfm=None) # toplevel dataset was in the house already assert_result_count( result, 0, path=ds.path, type='dataset') # subm 1 should fail to install. [1] since comes after '2' submodule assert_in_results( result, status='error', path=sub1.path, type='dataset', message='target path already exists and not empty, refuse to ' 'clone into target path') assert_in_results(result, status='ok', path=sub2.path)
def test_get_multiple_files(path, url, ds_dir): from os import listdir from datalad.support.network import RI file_list = [f for f in listdir(path) if not f.startswith('.')] # prepare urls: urls = [RI(url + f) for f in file_list] # prepare origin origin = Dataset(path).create(force=True) origin.add(file_list) origin.save("initial") ds = install(ds_dir, source=path) # no content present: ok_(not any(ds.repo.file_has_content(file_list))) # get two plus an invalid one: with assert_raises(IncompleteResultsError) as cme: ds.get(['file1.txt', 'file2.txt', 'not_existing.txt']) result = cme.exception.results # explicitly given not existing file was skipped: # (see test_get_invalid_call) eq_(set([item.get('file') for item in result]), {'file1.txt', 'file2.txt'}) ok_(all([x['success'] is True for x in result if x['file'] in ['file1.txt', 'file2.txt']])) ok_(all(ds.repo.file_has_content(['file1.txt', 'file2.txt']))) # get all of them: result = ds.get(curdir) # there were two files left to get: eq_(set([item.get('file') for item in result]), {'file3.txt', 'file4.txt'}) ok_(all(ds.repo.file_has_content(file_list)))
def test_uninstall_subdataset(src, dst): ds = install(dst, source=src, recursive=True)[0] ok_(ds.is_installed()) known_subdss = ds.get_subdatasets() for subds_path in ds.get_subdatasets(): subds = Dataset(opj(ds.path, subds_path)) ok_(subds.is_installed()) annexed_files = subds.repo.get_annexed_files() subds.repo.get(annexed_files) # drop data of subds: res = ds.drop(path=subds_path) ok_(all([opj(subds.path, f) in res for f in annexed_files])) ok_(all([not i for i in subds.repo.file_has_content(annexed_files)])) # subdataset is still known assert_in(subds_path, ds.get_subdatasets()) eq_(ds.get_subdatasets(), known_subdss) for subds_path in ds.get_subdatasets(): # uninstall subds itself: if os.environ.get('DATALAD_TESTS_DATALADREMOTE') \ and external_versions['git'] < '2.0.9': raise SkipTest( "Known problem with GitPython. See " "https://github.com/gitpython-developers/GitPython/pull/521") res = ds.uninstall(path=subds_path) subds = Dataset(opj(ds.path, subds_path)) eq_(res[0], subds) ok_(not subds.is_installed()) # just a deinit must not remove the subdataset registration eq_(ds.get_subdatasets(), known_subdss) # mountpoint of subdataset should still be there ok_(exists(subds.path))
def test_merge_follow_parentds_subdataset_other_branch(path): path = Path(path) ds_src = Dataset(path / "source").create() on_adjusted = ds_src.repo.is_managed_branch() ds_src_subds = ds_src.create("subds") ds_clone = install(source=ds_src.path, path=path / "clone", recursive=True, result_xfm="datasets") ds_clone_subds = Dataset(ds_clone.pathobj / "subds") ds_src_subds.repo.call_git(["checkout", "-b", "other"]) (ds_src_subds.pathobj / "foo").write_text("foo content") ds_src.save(recursive=True) assert_repo_status(ds_src.path) res = ds_clone.update(merge=True, follow="parentds", recursive=True, on_failure="ignore") if on_adjusted: # Our git-annex sync based on approach on adjusted branches is # incompatible with follow='parentds'. assert_in_results(res, action="update", status="impossible") return else: assert_in_results(res, action="update", status="ok") eq_(ds_clone.repo.get_hexsha(), ds_src.repo.get_hexsha()) ok_(ds_clone_subds.repo.is_under_annex("foo")) (ds_src_subds.pathobj / "bar").write_text("bar content") ds_src.save(recursive=True) ds_clone_subds.repo.checkout(DEFAULT_BRANCH, options=["-bnew"]) ds_clone.update(merge=True, follow="parentds", recursive=True) if not on_adjusted: eq_(ds_clone.repo.get_hexsha(), ds_src.repo.get_hexsha())
def test_gh3356(src, path): # create toy version of gh-3356 scenario origin = Dataset(src).create() origin_sub = origin.create(origin.pathobj / 'subdir'/ 'subds') for p in ( (origin_sub.pathobj / 'data' / 'file_in_annex.txt'), (origin_sub.pathobj / 'data' / 'file_in_annex2.txt')): p.parent.mkdir(parents=True, exist_ok=True) p.write_text(p.name) origin.save(recursive=True) clone = install( path, source=src, result_xfm='datasets', return_type='item-or-list') targetpaths = [ opj('subdir', 'subds', 'data', 'file_in_annex.txt'), opj('subdir', 'subds', 'data', 'file_in_annex2.txt'), ] with chpwd(path): res = get(targetpaths) # get() must report success on two files assert_result_count(res, 2, action='get', type='file', status='ok') # status must report content for two files assert_result_count( clone.status(recursive=True, annex='all', report_filetype='eval'), 2, action='status', has_content=True)
def test_report(path, orig): origds, subds = make_ds_hierarchy_with_metadata(orig) # now clone to a new place to ensure no content is present ds = install(source=origds.path, path=path) # only dataset-global metadata res = meta_extract(dataset=ds, process_type='dataset') assert_result_count(res, 1) core_dsmeta = _get_dsmeta_from_core_metadata( res[0]['metadata']['metalad_core']) assert_in( { '@type': 'Dataset', '@id': 'datalad:{}'.format(subds.repo.get_hexsha()), 'identifier': 'datalad:{}'.format(subds.id), 'name': 'sub' }, core_dsmeta['hasPart']) # has not seen the content assert_not_in('contentbytesize', core_dsmeta) res = meta_extract(dataset=ds, process_type='content') assert (any( dict(tag=['one', 'two']) == r['metadata'].get('metalad_annex', None) for r in res)) # we have a report on file(s) assert (len(res) > 0) # but no subdataset reports assert_result_count(res, 0, type='dataset') content_size = sum( (_get_dsmeta_from_core_metadata(r['metadata']['metalad_core']) if r['type'] == 'dataset' else r['metadata']['metalad_core'] )['contentbytesize'] for r in res) # and now all together res = meta_extract(dataset=ds, process_type='all') # got a content size report that sums up all individual sizes eq_((_get_dsmeta_from_core_metadata(res[0]['metadata']['metalad_core']) if res[0]['type'] == 'dataset' else res[0]['metadata']['metalad_core'])['contentbytesize'], content_size)