def test_put(client): client, ds = client with client as c: assert c.get('/api/v1/auth').status_code == 200 ok_clean_git(ds.path) testpath = 'subdir/dummy' file_content = '{"three": 3}' assert testpath not in c.get('/api/v1/file').get_json()['files'] count = 0 for kw, content in ( ({}, file_content), ({ 'json': 'stream' }, file_content), ({ 'json': 'yes' }, file_content), ): targetpath = '{}_{}'.format(testpath, count) rq = c.put( '/api/v1/file/{}'.format(targetpath), data=json.dumps(dict(content=file_content, )), content_type='application/json', ) assert rq.status_code == 200 assert targetpath in c.get('/api/v1/file').get_json()['files'] ok_file_has_content(op.join(ds.path, targetpath), content=content) count += 1 ok_clean_git(ds.path)
def _test_correct_publish(target_path, rootds=False, flat=True): paths = [_path_(".git/hooks/post-update")] # hooks enabled in all datasets not_paths = [] # _path_(".git/datalad/metadata")] # metadata only on publish # ATM we run post-update hook also upon create since it might # be a reconfiguration (TODO: I guess could be conditioned) # web-interface html pushed to dataset root web_paths = ['index.html', _path_(".git/datalad/web")] if rootds: paths += web_paths # and not to subdatasets elif not flat: not_paths += web_paths for path in paths: ok_exists(opj(target_path, path)) for path in not_paths: assert_false(exists(opj(target_path, path))) # correct ls_json command in hook content (path wrapped in quotes) ok_file_has_content(_path_(target_path, '.git/hooks/post-update'), '.*datalad ls -a --json file \'%s\'.*' % target_path, re_=True, flags=re.DOTALL)
def check_compress_file(ext, annex, path, name): # we base the archive name on the filename, in order to also # be able to properly test compressors where the corresponding # archive format has no capability of storing a filename # (i.e. where the archive name itself determines the filename # of the decompressed file, like .xz) archive = op.join(name, _filename + ext) compress_files([_filename], archive, path=path) assert_true(op.exists(archive)) if annex: # It should work even when file is annexed and is a symlink to the # key from datalad.support.annexrepo import AnnexRepo repo = AnnexRepo(path, init=True) repo.add(_filename) repo.commit(files=[_filename], msg="commit") dir_extracted = name + "_extracted" try: decompress_file(archive, dir_extracted) except MissingExternalDependency as exc: raise SkipTest(exc_str(exc)) _filepath = op.join(dir_extracted, _filename) ok_file_has_content(_filepath, 'content')
def test_basics(path): ds = Dataset(path).create(force=True) # TODO: this procedure would leave a clean dataset, but `run` cannot handle dirty # input yet, so manual for now # V6FACT: this leaves the file staged, but not committed ds.add('code', to_git=True) # V6FACT: even this leaves it staged ds.add('.') # V6FACT: but this finally commits it ds.save() # TODO remove above two lines ds.run_procedure('setup_yoda_dataset') ok_clean_git(ds.path) # configure dataset to look for procedures in its code folder ds.config.add('datalad.locations.dataset-procedures', 'code', where='dataset') # configure dataset to run the demo procedure prior to the clean command ds.config.add('datalad.clean.proc-pre', 'datalad_test_proc', where='dataset') # run command that should trigger the demo procedure ds.clean() # look for traces ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'hello\n') ok_clean_git(ds.path, index_modified=[op.join('.datalad', 'config')])
def check_crawl_autoaddtext(gz, ind, topurl, outd): ds = create(outd) ds.run_procedure("cfg_text2git") with chpwd(outd): # TODO -- dataset argument template_kwargs = { 'url': topurl, 'a_href_match_': '.*', } if gz: template_kwargs['archives_re'] = "\.gz$" crawl_init(template_kwargs, save=True, template='simple_with_archives') try: crawl() except MissingExternalDependency as exc: raise SkipTest(exc_str(exc)) ok_clean_git(outd) ok_file_under_git(outd, "anothertext", annexed=False) ok_file_under_git(outd, "d/textfile", annexed=False) ok_file_under_git(outd, "d/tooshort", annexed=True) if 'compressed.dat.gz' in TEST_TREE2: if gz: ok_file_under_git(outd, "compressed.dat", annexed=False) ok_file_has_content(op.join(outd, "compressed.dat"), u"мама мыла раму") else: ok_file_under_git(outd, "compressed.dat.gz", annexed=True) else: raise SkipTest( "Need datalad >= 0.11.2 to test .gz files decompression")
def test_basics(path, super_path): ds = Dataset(path).create(force=True) ds.run_procedure('setup_yoda_dataset') ok_clean_git(ds.path) assert_false(ds.repo.is_under_annex("README.md")) # configure dataset to look for procedures in its code folder ds.config.add('datalad.locations.dataset-procedures', 'code', where='dataset') # commit this procedure config for later use in a clone: ds.add(op.join('.datalad', 'config')) # configure dataset to run the demo procedure prior to the clean command ds.config.add('datalad.clean.proc-pre', 'datalad_test_proc', where='dataset') # run command that should trigger the demo procedure ds.clean() # look for traces ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'hello\n') ok_clean_git(ds.path, index_modified=[op.join('.datalad', 'config')]) # make a fresh dataset: super = Dataset(super_path).create() # configure dataset to run the demo procedure prior to the clean command super.config.add('datalad.clean.proc-pre', 'datalad_test_proc', where='dataset') # 'super' doesn't know any procedures but should get to know one by # installing the above as a subdataset super.install('sub', source=ds.path) # run command that should trigger the demo procedure super.clean() # look for traces ok_file_has_content(op.join(super.path, 'fromproc.txt'), 'hello\n') ok_clean_git(super.path, index_modified=[op.join('.datalad', 'config')])
def _test_correct_publish(target_path, rootds=False, flat=True): paths = [_path_(".git/hooks/post-update")] # hooks enabled in all datasets not_paths = [ ] # _path_(".git/datalad/metadata")] # metadata only on publish # ATM we run post-update hook also upon create since it might # be a reconfiguration (TODO: I guess could be conditioned) # web-interface html pushed to dataset root web_paths = ['index.html', _path_(".git/datalad/web")] if rootds: paths += web_paths # and not to subdatasets elif not flat: not_paths += web_paths for path in paths: ok_exists(opj(target_path, path)) for path in not_paths: assert_false(exists(opj(target_path, path))) # correct ls_json command in hook content (path wrapped in quotes) ok_file_has_content(_path_(target_path, '.git/hooks/post-update'), '.*datalad ls -a --json file \'%s\'.*' % target_path, re_=True, flags=re.DOTALL)
def test_run_no_explicit_dataset(path): raise SkipTest('SingularityHub is gone for now') ds = Dataset(path).create(force=True) ds.save() ds.containers_add("deb", url=testimg_url, call_fmt="singularity exec {img} {cmd}") # When no explicit dataset is given, paths are interpreted as relative to # the current working directory. # From top-level directory. with chpwd(path): containers_run("cat {inputs[0]} {inputs[0]} >doubled", inputs=[op.join("subdir", "in")], outputs=["doubled"]) ok_file_has_content(op.join(path, "doubled"), "innardsinnards") # From under a subdirectory. subdir = op.join(ds.path, "subdir") with chpwd(subdir): containers_run("cat {inputs[0]} {inputs[0]} >doubled", inputs=["in"], outputs=["doubled"]) ok_file_has_content(op.join(subdir, "doubled"), "innardsinnards")
def test_addurls_url_on_collision_choose(self, path): ds = Dataset(path).create(force=True) data = deepcopy(self.data) for row in data: row["name"] = "a" with patch("sys.stdin", new=StringIO(json.dumps(data))): assert_in_results(ds.addurls("-", "{url}", "{name}", on_failure="ignore"), action="addurls", status="error") with patch("sys.stdin", new=StringIO(json.dumps(data))): assert_in_results(ds.addurls("-", "{url}", "{name}", on_collision="error-if-different", on_failure="ignore"), action="addurls", status="error") with patch("sys.stdin", new=StringIO(json.dumps(data))): ds.addurls("-", "{url}", "{name}-first", on_collision="take-first") ok_file_has_content(op.join(ds.path, "a-first"), "a content", strip=True) with patch("sys.stdin", new=StringIO(json.dumps(data))): ds.addurls("-", "{url}", "{name}-last", on_collision="take-last") ok_file_has_content(op.join(ds.path, "a-last"), "c content", strip=True)
def _test_target_ssh_inherit(standardgroup, src_path, target_path): ds = Dataset(src_path).create() target_url = 'localhost:%s' % target_path remote = "magical" # for the test of setting a group, will just smoke test while using current # user's group ds.create_sibling(target_url, name=remote, shared='group', group=os.getgid()) # not doing recursively if standardgroup: ds.repo.set_preferred_content('wanted', 'standard', remote) ds.repo.set_preferred_content('group', standardgroup, remote) ds.publish(to=remote) # now a month later we created a new subdataset subds = ds.create('sub') # so now we got a hierarchy! create_tree(subds.path, {'sub.dat': 'lots of data'}) subds.add('sub.dat') ok_file_under_git(subds.path, 'sub.dat', annexed=True) target_sub = Dataset(opj(target_path, 'sub')) # since we do not have yet/thus have not used an option to record to publish # to that sibling by default (e.g. --set-upstream), if we run just ds.publish # -- should fail assert_result_count( ds.publish(on_failure='ignore'), 1, status='impossible', message= 'No target sibling configured for default publication, please specific via --to' ) ds.publish( to=remote) # should be ok, non recursive; BUT it (git or us?) would # create an empty sub/ directory ok_(not target_sub.is_installed()) # still not there res = ds.publish(to=remote, recursive=True, on_failure='ignore') assert_result_count(res, 2) assert_status(('error', 'notneeded'), res) assert_result_count(res, 1, status='error', message=("Unknown target sibling '%s' for publication", 'magical')) ds.publish(to=remote, recursive=True, missing='inherit') # we added the remote and set all the eq_(subds.repo.get_preferred_content('wanted', remote), 'standard' if standardgroup else '') eq_(subds.repo.get_preferred_content('group', remote), standardgroup or '') ok_(target_sub.is_installed()) # it is there now eq_(target_sub.repo.config.get('core.sharedrepository'), '1') # and we have transferred the content if standardgroup and standardgroup == 'backup': # only then content should be copied ok_file_has_content(opj(target_sub.path, 'sub.dat'), 'lots of data') else: # otherwise nothing is copied by default assert_false(target_sub.repo.file_has_content('sub.dat'))
def test_copy_file_recursion(srcdir, destdir): src_ds = Dataset(srcdir).create(force=True) src_ds.save() dest_ds = Dataset(destdir).create() copy_file([src_ds.pathobj / 'subdir', dest_ds.pathobj], recursive=True) # structure is mirrored ok_file_has_content(dest_ds.pathobj / 'subdir' / 'file1', '123') ok_file_has_content(dest_ds.pathobj / 'subdir' / 'file2', 'abc')
def test_newthings_coming_down(originpath, destpath): origin = GitRepo(originpath, create=True) create_tree(originpath, {'load.dat': 'heavy'}) Dataset(originpath).save('load.dat') ds = install(source=originpath, path=destpath, result_xfm='datasets', return_type='item-or-list') assert_is_instance(ds.repo, GitRepo) assert_in('origin', ds.repo.get_remotes()) # turn origin into an annex origin = AnnexRepo(originpath, create=True) # clone doesn't know yet assert_false(knows_annex(ds.path)) # but after an update it should # no merge, only one sibling, no parameters should be specific enough assert_result_count(ds.update(), 1, status='ok', type='dataset') assert (knows_annex(ds.path)) # no branches appeared eq_(ds.repo.get_branches(), [DEFAULT_BRANCH]) # now merge, and get an annex assert_result_count(ds.update(merge=True), 1, action='update', status='ok', type='dataset') assert_in('git-annex', ds.repo.get_branches()) assert_is_instance(ds.repo, AnnexRepo) # should be fully functional testfname = opj(ds.path, 'load.dat') assert_false(ds.repo.file_has_content(testfname)) ds.get('.') ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy') # check that a new tag comes down origin.tag('first!') assert_result_count(ds.update(), 1, status='ok', type='dataset') eq_(ds.repo.get_tags(output='name')[0], 'first!') # and now we destroy the remote annex origin.call_git(['config', '--remove-section', 'annex']) rmtree(opj(origin.path, '.git', 'annex'), chmod_files=True) origin.call_git(['branch', '-D', 'git-annex']) origin = GitRepo(originpath) assert_false(knows_annex(originpath)) # and update the local clone # for now this should simply not fail (see gh-793), later might be enhanced to a # graceful downgrade before_branches = ds.repo.get_branches() assert_result_count(ds.update(), 1, status='ok', type='dataset') eq_(before_branches, ds.repo.get_branches()) # annex branch got pruned eq_(['origin/HEAD', 'origin/' + DEFAULT_BRANCH], ds.repo.get_remote_branches()) # check that a new tag comes down even if repo types mismatch origin.tag('second!') assert_result_count(ds.update(), 1, status='ok', type='dataset') eq_(ds.repo.get_tags(output='name')[-1], 'second!')
def test_update_simple(origin, src_path, dst_path): # prepare src source = install(src_path, source=origin, recursive=True)[0] # forget we cloned it (provide no 'origin' anymore), which should lead to # setting tracking branch to target: source.repo.remove_remote("origin") # get a clone to update later on: dest = install(dst_path, source=src_path, recursive=True)[0] # test setup done; # assert all fine ok_clean_git(dst_path) ok_clean_git(src_path) # update yields nothing => up-to-date # TODO: how to test besides not failing? dest.update() ok_clean_git(dst_path) # modify origin: with open(opj(src_path, "update.txt"), "w") as f: f.write("Additional content") source.add(path="update.txt") source.save("Added update.txt") ok_clean_git(src_path) # update without `merge` only fetches: dest.update() # modification is not known to active branch: assert_not_in("update.txt", dest.repo.get_files(dest.repo.get_active_branch())) # modification is known to branch origin/master assert_in("update.txt", dest.repo.get_files("origin/master")) # merge: dest.update(merge=True) # modification is now known to active branch: assert_in("update.txt", dest.repo.get_files(dest.repo.get_active_branch())) # it's known to annex, but has no content yet: dest.repo.get_file_key("update.txt") # raises if unknown eq_([False], dest.repo.file_has_content(["update.txt"])) # smoke-test if recursive update doesn't fail if submodule is removed dest.remove('subm 1') dest.update(recursive=True) dest.update(merge=True, recursive=True) # and now test recursive update with merging in differences create_tree(opj(source.path, 'subm 2'), {'load.dat': 'heavy'}) source.save(message="saving changes within subm2", recursive=True, all_changes=True) dest.update(merge=True, recursive=True) # and now we can get new file dest.get('subm 2/load.dat') ok_file_has_content(opj(dest.path, 'subm 2', 'load.dat'), 'heavy')
def test_runner(tempfile): runner = Runner() content = 'Testing real run' if on_windows else 'Testing äöü東 real run' cmd = 'echo %s > %s' % (content, tempfile) res = runner.run(cmd) # no capture of any kind, by default ok_(not res['stdout']) ok_(not res['stderr']) ok_file_has_content(tempfile, content, strip=True) os.unlink(tempfile)
def test_publish_target_url(src, desttop, desturl): # https://github.com/datalad/datalad/issues/1762 ds = Dataset(src).create(force=True) ds.save('1') ds.create_sibling('ssh://datalad-test:%s/subdir' % desttop, name='target', target_url=desturl + 'subdir/.git') results = ds.push(to='target') assert results ok_file_has_content(Path(desttop, 'subdir', '1'), '123')
def test_publish_target_url(src, desttop, desturl): # https://github.com/datalad/datalad/issues/1762 ds = Dataset(src).create(force=True) ds.add('1') ds.create_sibling('ssh://localhost:%s/subdir' % desttop, name='target', target_url=desturl + 'subdir/.git') results = ds.publish(to='target', transfer_data='all') assert results ok_file_has_content(_path_(desttop, 'subdir/1'), '123')
def test_balsa_pipeline1(ind, topurl, outd, clonedir): list(initiate_dataset( template="balsa", dataset_name='dataladtest-WG33', path=outd, data_fields=['dataset_id'])({'dataset_id': 'WG33'})) with chpwd(outd): pipeline = ofpipeline('WG33', url=topurl) out = run_pipeline(pipeline) eq_(len(out), 1) repo = AnnexRepo(outd, create=False) # to be used in the checks # Inspect the tree -- that we have all the branches branches = {'master', 'incoming', 'incoming-processed', 'git-annex'} eq_(set(repo.get_branches()), branches) # since now we base incoming on master -- and there were nothing custom # in master after incoming-processed, both branches should be the same eq_(repo.get_hexsha('master'), repo.get_hexsha('incoming-processed')) # but that one is different from incoming assert_not_equal(repo.get_hexsha('incoming'), repo.get_hexsha('incoming-processed')) get_branch_commits = repo.get_branch_commits_ \ if hasattr(repo, 'get_branch_commits_') else repo.get_branch_commits commits = {b: list(get_branch_commits(b)) for b in branches} # all commits out there -- init ds + init crawler + 1*(incoming, processed) # The number of commits in master differs based on the create variant used # (the one DataLad's master makes only one commit). ncommits_master = len(commits["master"]) assert_in(ncommits_master, [4, 5]) # incoming branches from master but lacks one merge commit. eq_(len(commits['incoming']), ncommits_master - 1) # incoming-processed is on master. eq_(len(commits['incoming-processed']), ncommits_master) with chpwd(outd): eq_(set(glob('*')), {'dir1', 'file1.nii'}) all_files = sorted(find_files('.')) fpath = opj(outd, 'file1.nii') ok_file_has_content(fpath, "content of file1.nii") ok_file_under_git(fpath, annexed=True) fpath2 = opj(outd, 'dir1', 'file2.nii') ok_file_has_content(fpath2, "content of file2.nii") ok_file_under_git(fpath2, annexed=True) target_files = { './.datalad/crawl/crawl.cfg', './.datalad/crawl/statuses/incoming.json', './.datalad/meta/balsa.json', './.datalad/config', './file1.nii', './dir1/file2.nii', } eq_(set(all_files), target_files)
def test_update_git_smoke(src_path, dst_path): # Apparently was just failing on git repos for basic lack of coverage, hence this quick test ds = Dataset(src_path).create(no_annex=True) target = install( dst_path, source=src_path, result_xfm='datasets', return_type='item-or-list') create_tree(ds.path, {'file.dat': '123'}) ds.save('file.dat') assert_result_count( target.update(recursive=True, merge=True), 1, status='ok', type='dataset') ok_file_has_content(opj(target.path, 'file.dat'), '123')
def test_update_git_smoke(src_path, dst_path): # Apparently was just failing on git repos for basic lack of coverage, hence this quick test ds = Dataset(src_path).create(no_annex=True) target = install( dst_path, source=src_path, result_xfm='datasets', return_type='item-or-list') create_tree(ds.path, {'file.dat': '123'}) ds.save('file.dat') assert_result_count( target.update(recursive=True, merge=True), 1, action='update', status='ok', type='dataset') ok_file_has_content(opj(target.path, 'file.dat'), '123')
def test_balsa_pipeline1(ind, topurl, outd, clonedir): list( initiate_dataset(template="balsa", dataset_name='dataladtest-WG33', path=outd, data_fields=['dataset_id'])({ 'dataset_id': 'WG33' })) with chpwd(outd): pipeline = ofpipeline('WG33', url=topurl) out = run_pipeline(pipeline) eq_(len(out), 1) repo = AnnexRepo(outd, create=False) # to be used in the checks # Inspect the tree -- that we have all the branches branches = {'master', 'incoming', 'incoming-processed', 'git-annex'} eq_(set(repo.get_branches()), branches) assert_not_equal(repo.get_hexsha('master'), repo.get_hexsha('incoming-processed')) # and that one is different from incoming assert_not_equal(repo.get_hexsha('incoming'), repo.get_hexsha('incoming-processed')) commits = {b: list(repo.get_branch_commits(b)) for b in branches} eq_(len(commits['incoming']), 1) eq_(len(commits['incoming-processed']), 2) eq_( len(commits['master']), 6 ) # all commits out there -- init ds + init crawler + 1*(incoming, processed, merge) with chpwd(outd): eq_(set(glob('*')), {'dir1', 'file1.nii'}) all_files = sorted(find_files('.')) fpath = opj(outd, 'file1.nii') ok_file_has_content(fpath, "content of file1.nii") ok_file_under_git(fpath, annexed=True) fpath2 = opj(outd, 'dir1', 'file2.nii') ok_file_has_content(fpath2, "content of file2.nii") ok_file_under_git(fpath2, annexed=True) target_files = { './.datalad/crawl/crawl.cfg', './.datalad/crawl/statuses/incoming.json', './.datalad/meta/balsa.json', './.datalad/config', './file1.nii', './dir1/file2.nii', } eq_(set(all_files), target_files)
def test_newthings_coming_down(originpath, destpath): origin = GitRepo(originpath, create=True) create_tree(originpath, {'load.dat': 'heavy'}) Dataset(originpath).save('load.dat') ds = install( source=originpath, path=destpath, result_xfm='datasets', return_type='item-or-list') assert_is_instance(ds.repo, GitRepo) assert_in('origin', ds.repo.get_remotes()) # turn origin into an annex origin = AnnexRepo(originpath, create=True) # clone doesn't know yet assert_false(knows_annex(ds.path)) # but after an update it should # no merge, only one sibling, no parameters should be specific enough assert_result_count(ds.update(), 1, status='ok', type='dataset') assert(knows_annex(ds.path)) # no branches appeared eq_(ds.repo.get_branches(), ['master']) # now merge, and get an annex assert_result_count(ds.update(merge=True), 1, status='ok', type='dataset') assert_in('git-annex', ds.repo.get_branches()) assert_is_instance(ds.repo, AnnexRepo) # should be fully functional testfname = opj(ds.path, 'load.dat') assert_false(ds.repo.file_has_content(testfname)) ds.get('.') ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy') # check that a new tag comes down origin.tag('first!') assert_result_count(ds.update(), 1, status='ok', type='dataset') eq_(ds.repo.get_tags(output='name')[0], 'first!') # and now we destroy the remote annex origin._git_custom_command([], ['git', 'config', '--remove-section', 'annex']) rmtree(opj(origin.path, '.git', 'annex'), chmod_files=True) origin._git_custom_command([], ['git', 'branch', '-D', 'git-annex']) origin = GitRepo(originpath) assert_false(knows_annex(originpath)) # and update the local clone # for now this should simply not fail (see gh-793), later might be enhanced to a # graceful downgrade before_branches = ds.repo.get_branches() assert_result_count(ds.update(), 1, status='ok', type='dataset') eq_(before_branches, ds.repo.get_branches()) # annex branch got pruned eq_(['origin/HEAD', 'origin/master'], ds.repo.get_remote_branches()) # check that a new tag comes down even if repo types mismatch origin.tag('second!') assert_result_count(ds.update(), 1, status='ok', type='dataset') eq_(ds.repo.get_tags(output='name')[-1], 'second!')
def test_publish_target_url(src, desttop, desturl): # https://github.com/datalad/datalad/issues/1762 ds = Dataset(src).create(force=True) if ds.repo.is_managed_branch(): raise SkipTest( 'Skipped due to https://github.com/datalad/datalad/issues/4075') ds.save('1') ds.create_sibling('ssh://localhost:%s/subdir' % desttop, name='target', target_url=desturl + 'subdir/.git') results = ds.push(to='target') assert results ok_file_has_content(Path(desttop, 'subdir', '1'), '123')
def test_clone_dataladri(src, topurl, path): # make plain git repo ds_path = opj(src, 'ds') gr = GitRepo(ds_path, create=True) gr.add('test.txt') gr.commit('demo') Runner(cwd=gr.path)(['git', 'update-server-info']) # now install it somewhere else with patch('datalad.consts.DATASETS_TOPURL', topurl): ds = clone('///ds', path, result_xfm='datasets', return_type='item-or-list') eq_(ds.path, path) ok_clean_git(path, annex=False) ok_file_has_content(opj(path, 'test.txt'), 'some')
def test_clone_dataladri(src, topurl, path): # make plain git repo ds_path = Path(src) / 'ds' gr = GitRepo(ds_path, create=True) gr.add('test.txt') gr.commit('demo') Runner(cwd=gr.path).run(['git', 'update-server-info']) # now install it somewhere else with patch('datalad.consts.DATASETS_TOPURL', topurl): ds = clone('///ds', path, result_xfm='datasets', return_type='item-or-list') eq_(ds.path, path) assert_repo_status(path, annex=False) ok_file_has_content(ds.pathobj / 'test.txt', 'some')
def _test_target_ssh_inherit(standardgroup, src_path, target_path): ds = Dataset(src_path).create() target_url = 'localhost:%s' % target_path remote = "magical" # for the test of setting a group, will just smoke test while using current # user's group ds.create_sibling(target_url, name=remote, shared='group', group=os.getgid()) # not doing recursively if standardgroup: ds.repo.set_preferred_content('wanted', 'standard', remote) ds.repo.set_preferred_content('group', standardgroup, remote) ds.publish(to=remote) # now a month later we created a new subdataset subds = ds.create('sub') # so now we got a hierarchy! create_tree(subds.path, {'sub.dat': 'lots of data'}) subds.add('sub.dat') ok_file_under_git(subds.path, 'sub.dat', annexed=True) target_sub = Dataset(opj(target_path, 'sub')) # since we do not have yet/thus have not used an option to record to publish # to that sibling by default (e.g. --set-upstream), if we run just ds.publish # -- should fail assert_result_count( ds.publish(on_failure='ignore'), 1, status='impossible', message='No target sibling configured for default publication, please specific via --to') ds.publish(to=remote) # should be ok, non recursive; BUT it (git or us?) would # create an empty sub/ directory ok_(not target_sub.is_installed()) # still not there res = ds.publish(to=remote, recursive=True, on_failure='ignore') assert_result_count(res, 2) assert_status(('error', 'notneeded'), res) assert_result_count( res, 1, status='error', message=("Unknown target sibling '%s' for publication", 'magical')) ds.publish(to=remote, recursive=True, missing='inherit') # we added the remote and set all the eq_(subds.repo.get_preferred_content('wanted', remote), 'standard' if standardgroup else '') eq_(subds.repo.get_preferred_content('group', remote), standardgroup or '') ok_(target_sub.is_installed()) # it is there now eq_(target_sub.repo.config.get('core.sharedrepository'), '1') # and we have transferred the content if standardgroup and standardgroup == 'backup': # only then content should be copied ok_file_has_content(opj(target_sub.path, 'sub.dat'), 'lots of data') else: # otherwise nothing is copied by default assert_false(target_sub.repo.file_has_content('sub.dat'))
def test_spaces(path): """ Test whether args with spaces are correctly parsed. """ ds = Dataset(path).create(force=True) ds.run_procedure('cfg_yoda') # configure dataset to look for procedures in its code folder ds.config.add('datalad.locations.dataset-procedures', 'code', where='dataset') # 1. run procedure based on execution guessing by run_procedure: ds.run_procedure(spec=['datalad_test_proc', 'with spaces', 'unrelated']) # check whether file has name with spaces ok_file_has_content(op.join(ds.path, 'with spaces'), 'hello\n')
def test_update_volatile_subds(originpath, destpath): origin = Dataset(originpath).create() ds = install(source=originpath, path=destpath, result_xfm='datasets', return_type='item-or-list') # as a submodule sname = 'subm 1' osm1 = origin.create(sname) assert_result_count(ds.update(), 1, status='ok', type='dataset') # nothing without a merge, no inappropriate magic assert_not_in(sname, ds.subdatasets(result_xfm='relpaths')) assert_result_count(ds.update(merge=True), 1, status='ok', type='dataset') # and we should be able to do update with recursive invocation assert_result_count(ds.update(merge=True, recursive=True), 1, status='ok', type='dataset') # known, and placeholder exists assert_in(sname, ds.subdatasets(result_xfm='relpaths')) ok_(exists(opj(ds.path, sname))) # remove from origin origin.remove(sname) assert_result_count(ds.update(merge=True), 1, status='ok', type='dataset') # gone locally, wasn't checked out assert_not_in(sname, ds.subdatasets(result_xfm='relpaths')) assert_false(exists(opj(ds.path, sname))) # re-introduce at origin osm1 = origin.create(sname) create_tree(osm1.path, {'load.dat': 'heavy'}) origin.add(opj(osm1.path, 'load.dat')) assert_result_count(ds.update(merge=True), 1, status='ok', type='dataset') # grab new content of uninstall subdataset, right away ds.get(opj(ds.path, sname, 'load.dat')) ok_file_has_content(opj(ds.path, sname, 'load.dat'), 'heavy') # now remove just-installed subdataset from origin again origin.remove(sname, check=False) assert_not_in(sname, origin.subdatasets(result_xfm='relpaths')) assert_in(sname, ds.subdatasets(result_xfm='relpaths')) # merge should disconnect the installed subdataset, but leave the actual # ex-subdataset alone assert_result_count(ds.update(merge=True, recursive=True), 1, type='dataset') assert_not_in(sname, ds.subdatasets(result_xfm='relpaths')) ok_file_has_content(opj(ds.path, sname, 'load.dat'), 'heavy') ok_(Dataset(opj(ds.path, sname)).is_installed())
def test_gin_cloning(path): # can we clone a public ds anoynmously from gin and retrieve content ds = clone('https://gin.g-node.org/datalad/datalad-ci-target', path) ok_(ds.is_installed()) annex_path = op.join('annex', 'two') git_path = op.join('git', 'one') eq_(ds.repo.file_has_content(annex_path), False) eq_(ds.repo.is_under_annex(git_path), False) result = ds.get(annex_path) assert_result_count(result, 1) assert_status('ok', result) eq_(result[0]['path'], op.join(ds.path, annex_path)) ok_file_has_content(op.join(ds.path, annex_path), 'two\n') ok_file_has_content(op.join(ds.path, git_path), 'one\n')
def test_placeholders(path): ds = Dataset(path).create(force=True) ds.add(".") ds.run("echo {inputs} >{outputs}", inputs=[".", "*.in"], outputs=["c.out"]) ok_file_has_content(opj(path, "c.out"), "a.in b.in\n") hexsha_before = ds.repo.get_hexsha() ds.rerun() eq_(hexsha_before, ds.repo.get_hexsha()) ds.run("echo {inputs[0]} >getitem", inputs=["*.in"]) ok_file_has_content(opj(path, "getitem"), "a.in\n") ds.run("echo {pwd} >expanded-pwd") ok_file_has_content(opj(path, "expanded-pwd"), path, strip=True) subdir_path = opj(path, "subdir") with chpwd(subdir_path): run("echo {pwd} >expanded-pwd") ok_file_has_content(opj(path, "subdir", "expanded-pwd"), subdir_path, strip=True) # Double brackets can be used to escape placeholders. ds.run("touch {{inputs}}", inputs=["*.in"]) ok_exists(opj(path, "{inputs}"))
def test_install_dataladri(src, topurl, path): # make plain git repo ds_path = opj(src, 'ds') gr = GitRepo(ds_path, create=True) gr.add('test.txt') gr.commit('demo') Runner(cwd=gr.path)(['git', 'update-server-info']) # now install it somewhere else with patch('datalad.consts.DATASETS_TOPURL', topurl), \ swallow_logs(): ds = install(path, source='///ds') eq_(ds.path, path) ok_clean_git(path, annex=False) ok_file_has_content(opj(path, 'test.txt'), 'some')
def test_swallow_logs(logfile): lgr = logging.getLogger('datalad') with swallow_logs(new_level=9) as cm: eq_(cm.out, '') lgr.log(8, "very heavy debug") eq_(cm.out, '') # not even visible at level 9 lgr.log(9, "debug1") eq_(cm.out, '[Level 9] debug1\n') # not even visible at level 9 lgr.info("info") eq_(cm.out, '[Level 9] debug1\n[INFO] info\n') # not even visible at level 9 with swallow_logs(new_level=9, file_=logfile) as cm: eq_(cm.out, '') lgr.info("next info") from datalad.tests.utils import ok_file_has_content ok_file_has_content(logfile, "[INFO] next info", strip=True)
def check_dss(): # we added the remote and set all the for subds in subdss: eq_(subds.repo.get_preferred_content('wanted', remote), 'standard' if standardgroup else '') eq_(subds.repo.get_preferred_content('group', remote), standardgroup or '') for target_sub in target_subdss: ok_(target_sub.is_installed()) # it is there now eq_(target_sub.repo.config.get('core.sharedrepository'), '1') # and we have transferred the content if standardgroup and standardgroup == 'backup': # only then content should be copied ok_file_has_content(opj(target_sub.path, 'sub.dat'), 'lots of data') else: # otherwise nothing is copied by default assert_false(target_sub.repo.file_has_content('sub.dat'))
def test_clone_dataladri(src, topurl, path): # make plain git repo ds_path = opj(src, 'ds') gr = GitRepo(ds_path, create=True) gr.add('test.txt') gr.commit('demo') Runner(cwd=gr.path)(['git', 'update-server-info']) # now install it somewhere else with patch('datalad.support.network.DATASETS_TOPURL', topurl): ds = clone('///ds', path, result_xfm='datasets', return_type='item-or-list') eq_(ds.path, path) ok_clean_git(path, annex=False) ok_file_has_content(opj(path, 'test.txt'), 'some')
def test_balsa_pipeline2(ind, topurl, outd, clonedir): list( initiate_dataset(template="balsa", dataset_name='dataladtest-WG33', path=outd, data_fields=['dataset_id'])({ 'dataset_id': 'WG33' })) with chpwd(outd): with swallow_logs(new_level=logging.WARN) as cml: pipeline = ofpipeline('WG33', url=topurl) out = run_pipeline(pipeline) assert_true( 'The following files do not exist in the canonical tarball, ' 'but are individually listed files and will not be kept:' in cml.out) assert_true( './file1.nii varies in content from the individually downloaded ' 'file with the same name, it is removed and file from canonical ' 'tarball is kept' in cml.out) eq_(len(out), 1) with chpwd(outd): eq_(set(glob('*')), {'dir1', 'file1.nii'}) all_files = sorted(find_files('.')) fpath = opj(outd, 'file1.nii') ok_file_has_content(fpath, "content of file1.nii") ok_file_under_git(fpath, annexed=True) fpath2 = opj(outd, 'dir1', 'file2.nii') ok_file_has_content(fpath2, "content of file2.nii") ok_file_under_git(fpath2, annexed=True) target_files = { './.datalad/config', './.datalad/crawl/crawl.cfg', './.datalad/crawl/statuses/incoming.json', './.datalad/meta/balsa.json', './file1.nii', './dir1/file2.nii', } eq_(set(all_files), target_files)
def test_copy_file_into_nonannex(workdir): workdir = Path(workdir) src_ds = Dataset(workdir / 'src').create() (src_ds.pathobj / 'present.txt').write_text('123') (src_ds.pathobj / 'gone.txt').write_text('abc') src_ds.save() src_ds.drop('gone.txt', check=False) # destination has no annex dest_ds = Dataset(workdir / 'dest').create(annex=False) # no issue copying a file that has content copy_file([src_ds.pathobj / 'present.txt', dest_ds.pathobj]) ok_file_has_content(dest_ds.pathobj / 'present.txt', '123') # but cannot handle a dropped file, no chance to register # availability info in an annex assert_status( 'impossible', copy_file([src_ds.pathobj / 'gone.txt', dest_ds.pathobj], on_failure='ignore'))
def test_inputs_quotes_needed(path): ds = Dataset(path).create(force=True) ds.save() cmd = "import sys; open(sys.argv[-1], 'w').write('!'.join(sys.argv[1:]))" # The string form of a command works fine when the inputs/outputs have # spaces ... cmd_str = "{} -c \"{}\" {{inputs}} {{outputs[0]}}".format( sys.executable, cmd) ds.run(cmd_str, inputs=["*.t*"], outputs=["out0"], expand="inputs") expected = u"!".join( list(sorted([OBSCURE_FILENAME + u".t", "bar.txt", "foo blah.txt"])) + ["out0"]) with open(op.join(path, "out0")) as ifh: eq_(assure_unicode(ifh.read()), expected) # ... but the list form of a command does not. (Don't test this failure # with the obscure file name because we'd need to know its composition to # predict the failure.) cmd_list = [sys.executable, "-c", cmd, "{inputs}", "{outputs[0]}"] ds.run(cmd_list, inputs=["*.txt"], outputs=["out0"]) ok_file_has_content(op.join(path, "out0"), "bar.txt foo!blah.txt!out0")
def test_basics(path, super_path): ds = Dataset(path).create(force=True) ds.run_procedure('setup_yoda_dataset') ok_clean_git(ds.path) assert_false(ds.repo.is_under_annex("README.md")) # configure dataset to look for procedures in its code folder ds.config.add( 'datalad.locations.dataset-procedures', 'code', where='dataset') # commit this procedure config for later use in a clone: ds.add(op.join('.datalad', 'config')) # configure dataset to run the demo procedure prior to the clean command ds.config.add( 'datalad.clean.proc-pre', 'datalad_test_proc', where='dataset') # run command that should trigger the demo procedure ds.clean() # look for traces ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'hello\n') ok_clean_git(ds.path, index_modified=[op.join('.datalad', 'config')]) # make a fresh dataset: super = Dataset(super_path).create() # configure dataset to run the demo procedure prior to the clean command super.config.add( 'datalad.clean.proc-pre', 'datalad_test_proc', where='dataset') # 'super' doesn't know any procedures but should get to know one by # installing the above as a subdataset super.install('sub', source=ds.path) # run command that should trigger the demo procedure super.clean() # look for traces ok_file_has_content(op.join(super.path, 'fromproc.txt'), 'hello\n') ok_clean_git(super.path, index_modified=[op.join('.datalad', 'config')])
def _test_correct_publish(target_path, rootds=False, flat=True): paths = [_path_(".git/hooks/post-update")] # hooks enabled in all datasets not_paths = [] # _path_(".git/datalad/metadata")] # metadata only on publish # ATM we run post-update hook also upon create since it might # be a reconfiguration (TODO: I guess could be conditioned) # web-interface html pushed to dataset root web_paths = ['index.html', _path_(".git/datalad/web")] if rootds: paths += web_paths # and not to subdatasets elif not flat: not_paths += web_paths for path in paths: ok_exists(opj(target_path, path)) for path in not_paths: assert_false(exists(opj(target_path, path))) hook_path = _path_(target_path, '.git/hooks/post-update') # No longer the case -- we are no longer using absolute path in the # script # ok_file_has_content(hook_path, # '.*\ndsdir="%s"\n.*' % target_path, # re_=True, # flags=re.DOTALL) # No absolute path (so dataset could be moved) in the hook with open(hook_path) as f: assert_not_in(target_path, f.read()) # correct ls_json command in hook content (path wrapped in "quotes) ok_file_has_content(hook_path, '.*datalad ls -a --json file \..*', re_=True, flags=re.DOTALL)
def test_create_tree(path): content = u"мама мыла раму" create_tree(path, OrderedDict([ ('1', content), ('sd', OrderedDict( [ # right away an obscure case where we have both 1 and 1.gz ('1', content*2), ('1.gz', content*3), ] )), ])) ok_file_has_content(op.join(path, '1'), content) ok_file_has_content(op.join(path, 'sd', '1'), content*2) ok_file_has_content(op.join(path, 'sd', '1.gz'), content*3, decompress=True)
def test_reobtain_data(originpath, destpath): origin = Dataset(originpath).create() ds = install( source=originpath, path=destpath, result_xfm='datasets', return_type='item-or-list') # no harm assert_result_count(ds.update(merge=True, reobtain_data=True), 1) # content create_tree(origin.path, {'load.dat': 'heavy'}) origin.save(opj(origin.path, 'load.dat')) # update does not bring data automatically assert_result_count(ds.update(merge=True, reobtain_data=True), 1) assert_in('load.dat', ds.repo.get_annexed_files()) assert_false(ds.repo.file_has_content('load.dat')) # now get data ds.get('load.dat') ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy') # new content at origin create_tree(origin.path, {'novel': 'but boring'}) origin.save() # update must not bring in data for new file result = ds.update(merge=True, reobtain_data=True) assert_in_results(result, action='get', status='notneeded') ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy') assert_in('novel', ds.repo.get_annexed_files()) assert_false(ds.repo.file_has_content('novel')) # modify content at origin os.remove(opj(origin.path, 'load.dat')) create_tree(origin.path, {'load.dat': 'light'}) origin.save() # update must update file with existing data, but leave empty one alone res = ds.update(merge=True, reobtain_data=True) assert_result_count(res, 2) assert_result_count(res, 1, status='ok', type='dataset', action='update') assert_result_count(res, 1, status='ok', type='file', action='get') ok_file_has_content(opj(ds.path, 'load.dat'), 'light') assert_false(ds.repo.file_has_content('novel'))
def test_configs(path): # set up dataset with registered procedure (c&p from test_basics): ds = Dataset(path).create(force=True) ds.run_procedure('setup_yoda_dataset') ok_clean_git(ds.path) # configure dataset to look for procedures in its code folder ds.config.add( 'datalad.locations.dataset-procedures', 'code', where='dataset') # 1. run procedure based on execution guessing by run_procedure: ds.run_procedure(spec=['datalad_test_proc', 'some_arg']) # look for traces ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'some_arg\n') # 2. now configure specific call format including usage of substitution config # for run: ds.config.add( 'datalad.procedures.datalad_test_proc.call-format', 'python "{script}" "{ds}" {{mysub}} {args}', where='dataset' ) ds.config.add( 'datalad.run.substitutions.mysub', 'dataset-call-config', where='dataset' ) # TODO: Should we allow for --inputs/--outputs arguments for run_procedure # (to be passed into run)? ds.unlock("fromproc.txt") # run again: ds.run_procedure(spec=['datalad_test_proc', 'some_arg']) # look for traces ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'dataset-call-config\n') # 3. have a conflicting config at user-level, which should override the # config on dataset level: ds.config.add( 'datalad.procedures.datalad_test_proc.call-format', 'python "{script}" "{ds}" local {args}', where='local' ) ds.unlock("fromproc.txt") # run again: ds.run_procedure(spec=['datalad_test_proc', 'some_arg']) # look for traces ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'local\n') # 4. get configured help message: r = ds.run_procedure('datalad_test_proc', help_proc=True, on_failure='ignore') assert_true(len(r) == 1) assert_in_results(r, status="impossible") ds.config.add( 'datalad.procedures.datalad_test_proc.help', "This is a help message", where='dataset' ) r = ds.run_procedure('datalad_test_proc', help_proc=True) assert_true(len(r) == 1) assert_in_results(r, message="This is a help message", status='ok')
def test_update_volatile_subds(originpath, otherpath, destpath): origin = Dataset(originpath).create() ds = install( source=originpath, path=destpath, result_xfm='datasets', return_type='item-or-list') # as a submodule sname = 'subm 1' osm1 = origin.create(sname) assert_result_count(ds.update(), 1, status='ok', type='dataset') # nothing without a merge, no inappropriate magic assert_not_in(sname, ds.subdatasets(result_xfm='relpaths')) assert_result_count(ds.update(merge=True), 1, status='ok', type='dataset') # and we should be able to do update with recursive invocation assert_result_count(ds.update(merge=True, recursive=True), 1, status='ok', type='dataset') # known, and placeholder exists assert_in(sname, ds.subdatasets(result_xfm='relpaths')) ok_(exists(opj(ds.path, sname))) # remove from origin origin.remove(sname) assert_result_count(ds.update(merge=True), 1, status='ok', type='dataset') # gone locally, wasn't checked out assert_not_in(sname, ds.subdatasets(result_xfm='relpaths')) assert_false(exists(opj(ds.path, sname))) # re-introduce at origin osm1 = origin.create(sname) create_tree(osm1.path, {'load.dat': 'heavy'}) origin.save(opj(osm1.path, 'load.dat')) assert_result_count(ds.update(merge=True), 1, status='ok', type='dataset') # grab new content of uninstall subdataset, right away ds.get(opj(ds.path, sname, 'load.dat')) ok_file_has_content(opj(ds.path, sname, 'load.dat'), 'heavy') # modify ds and subds at origin create_tree(origin.path, {'mike': 'this', sname: {'probe': 'little'}}) origin.save(recursive=True) ok_clean_git(origin.path) # updates for both datasets should come down the pipe assert_result_count(ds.update(merge=True, recursive=True), 2, status='ok', type='dataset') ok_clean_git(ds.path) # now remove just-installed subdataset from origin again origin.remove(sname, check=False) assert_not_in(sname, origin.subdatasets(result_xfm='relpaths')) assert_in(sname, ds.subdatasets(result_xfm='relpaths')) # merge should disconnect the installed subdataset, but leave the actual # ex-subdataset alone assert_result_count(ds.update(merge=True, recursive=True), 1, type='dataset') assert_not_in(sname, ds.subdatasets(result_xfm='relpaths')) ok_file_has_content(opj(ds.path, sname, 'load.dat'), 'heavy') ok_(Dataset(opj(ds.path, sname)).is_installed()) # now remove the now disconnected subdataset for further tests # not using a bound method, not giving a parentds, should # not be needed to get a clean dataset remove(op.join(ds.path, sname), check=False) ok_clean_git(ds.path) # new separate subdataset, not within the origin dataset otherds = Dataset(otherpath).create() # install separate dataset as a submodule ds.install(source=otherds.path, path='other') create_tree(otherds.path, {'brand': 'new'}) otherds.save() ok_clean_git(otherds.path) # pull in changes res = ds.update(merge=True, recursive=True) assert_result_count( res, 2, status='ok', action='update', type='dataset') # the next is to check for #2858 ok_clean_git(ds.path)
def test_something(path, new_home): # read nothing, has nothing cfg = ConfigManager(dataset_only=True) assert_false(len(cfg)) # now read the example config cfg = ConfigManager(Dataset(opj(path, 'ds')), dataset_only=True) assert_equal(len(cfg), 3) assert_in('something.user', cfg) # multi-value assert_equal(len(cfg['something.user']), 2) assert_equal(cfg['something.user'], ('name=Jane Doe', '[email protected]')) assert_true(cfg.has_section('something')) assert_false(cfg.has_section('somethingelse')) assert_equal(sorted(cfg.sections()), [u'onemore.complicated の beast with.dot', 'something']) assert_true(cfg.has_option('something', 'user')) assert_false(cfg.has_option('something', 'us?er')) assert_false(cfg.has_option('some?thing', 'user')) assert_equal(sorted(cfg.options('something')), ['myint', 'user']) assert_equal(cfg.options(u'onemore.complicated の beast with.dot'), ['findme']) assert_equal( sorted(cfg.items()), [(u'onemore.complicated の beast with.dot.findme', '5.0'), ('something.myint', '3'), ('something.user', ('name=Jane Doe', '[email protected]'))]) assert_equal( sorted(cfg.items('something')), [('something.myint', '3'), ('something.user', ('name=Jane Doe', '[email protected]'))]) # always get all values assert_equal( cfg.get('something.user'), ('name=Jane Doe', '[email protected]')) assert_raises(KeyError, cfg.__getitem__, 'somedthing.user') assert_equal(cfg.getfloat(u'onemore.complicated の beast with.dot', 'findme'), 5.0) assert_equal(cfg.getint('something', 'myint'), 3) assert_equal(cfg.getbool('something', 'myint'), True) assert_equal(cfg.getbool('doesnot', 'exist', default=True), True) assert_raises(TypeError, cfg.getbool, 'something', 'user') # gitpython-style access assert_equal(cfg.get('something.myint'), cfg.get_value('something', 'myint')) assert_equal(cfg.get_value('doesnot', 'exist', default='oohaaa'), 'oohaaa') # weired, but that is how it is assert_raises(KeyError, cfg.get_value, 'doesnot', 'exist', default=None) # modification follows cfg.add('something.new', 'の') assert_equal(cfg.get('something.new'), u'の') # sections are added on demand cfg.add('unheard.of', 'fame') assert_true(cfg.has_section('unheard.of')) comp = cfg.items('something') cfg.rename_section('something', 'this') assert_true(cfg.has_section('this')) assert_false(cfg.has_section('something')) # direct comparision would fail, because of section prefix assert_equal(len(cfg.items('this')), len(comp)) # fail if no such section with swallow_logs(): assert_raises(CommandError, cfg.rename_section, 'nothere', 'irrelevant') assert_true(cfg.has_option('this', 'myint')) cfg.unset('this.myint') assert_false(cfg.has_option('this', 'myint')) # batch a changes cfg.add('mike.wants.to', 'know', reload=False) assert_false('mike.wants.to' in cfg) cfg.add('mike.wants.to', 'eat') assert_true('mike.wants.to' in cfg) assert_equal(len(cfg['mike.wants.to']), 2) # set a new one: cfg.set('mike.should.have', 'known') assert_in('mike.should.have', cfg) assert_equal(cfg['mike.should.have'], 'known') # set an existing one: cfg.set('mike.should.have', 'known better') assert_equal(cfg['mike.should.have'], 'known better') # set, while there are several matching ones already: cfg.add('mike.should.have', 'a meal') assert_equal(len(cfg['mike.should.have']), 2) # raises with force=False assert_raises(CommandError, cfg.set, 'mike.should.have', 'a beer', force=False) assert_equal(len(cfg['mike.should.have']), 2) # replaces all matching ones with force=True cfg.set('mike.should.have', 'a beer', force=True) assert_equal(cfg['mike.should.have'], 'a beer') # fails unknown location assert_raises(ValueError, cfg.add, 'somesuch', 'shit', where='umpalumpa') # very carefully test non-local config # so carefully that even in case of bad weather Yarik doesn't find some # lame datalad unittest sections in his precious ~/.gitconfig with patch.dict('os.environ', {'HOME': new_home, 'DATALAD_SNEAKY_ADDITION': 'ignore'}): global_gitconfig = opj(new_home, '.gitconfig') assert(not exists(global_gitconfig)) globalcfg = ConfigManager(dataset_only=False) assert_not_in('datalad.unittest.youcan', globalcfg) assert_in('datalad.sneaky.addition', globalcfg) cfg.add('datalad.unittest.youcan', 'removeme', where='global') assert(exists(global_gitconfig)) # it did not go into the dataset's config! assert_not_in('datalad.unittest.youcan', cfg) # does not monitor additions! globalcfg.reload(force=True) assert_in('datalad.unittest.youcan', globalcfg) with swallow_logs(): assert_raises( CommandError, globalcfg.unset, 'datalad.unittest.youcan', where='local') assert(globalcfg.has_section('datalad.unittest')) globalcfg.unset('datalad.unittest.youcan', where='global') # but after we unset the only value -- that section is no longer listed assert (not globalcfg.has_section('datalad.unittest')) assert_not_in('datalad.unittest.youcan', globalcfg) if external_versions['cmd:git'] < '2.18': # older versions leave empty section behind in the file ok_file_has_content(global_gitconfig, '[datalad "unittest"]', strip=True) # remove_section to clean it up entirely globalcfg.remove_section('datalad.unittest', where='global') ok_file_has_content(global_gitconfig, "") cfg = ConfigManager( Dataset(opj(path, 'ds')), dataset_only=True, overrides={'datalad.godgiven': True}) assert_equal(cfg.get('datalad.godgiven'), True) # setter has no effect cfg.set('datalad.godgiven', 'false') assert_equal(cfg.get('datalad.godgiven'), True)
def test_placeholders(path): ds = Dataset(path).create(force=True) ds.add(".") ds.run("echo {inputs} >{outputs}", inputs=[".", "*.in"], outputs=["c.out"]) ok_file_has_content(opj(path, "c.out"), "a.in b.in\n") hexsha_before = ds.repo.get_hexsha() ds.rerun() eq_(hexsha_before, ds.repo.get_hexsha()) ds.run("echo {inputs[0]} >getitem", inputs=["*.in"]) ok_file_has_content(opj(path, "getitem"), "a.in\n") ds.run("echo {pwd} >expanded-pwd") ok_file_has_content(opj(path, "expanded-pwd"), path, strip=True) ds.run("echo {dspath} >expanded-dspath") ok_file_has_content(opj(path, "expanded-dspath"), ds.path, strip=True) subdir_path = opj(path, "subdir") with chpwd(subdir_path): run("echo {pwd} >expanded-pwd") ok_file_has_content(opj(path, "subdir", "expanded-pwd"), subdir_path, strip=True) eq_(get_run_info(ds, ds.repo.format_commit("%B"))[1]["pwd"], "subdir") # Double brackets can be used to escape placeholders. ds.run("touch {{inputs}}", inputs=["*.in"]) ok_exists(opj(path, "{inputs}")) # rerun --script expands the placeholders. with patch("sys.stdout", new_callable=StringIO) as cmout: ds.rerun(script="-", since="") script_out = cmout.getvalue() assert_in("echo a.in b.in >c.out", script_out) assert_in("echo {} >expanded-pwd".format(subdir_path), script_out) assert_in("echo {} >expanded-dspath".format(ds.path), script_out) assert_result_count( ds.run("{unknown_placeholder}", on_failure="ignore"), 1, status="impossible", action="run") # Configured placeholders. ds.config.add("datalad.run.substitutions.license", "gpl3", where="local") ds.run("echo {license} >configured-license") ok_file_has_content(opj(path, "configured-license"), "gpl3", strip=True) # --script handles configured placeholders. with patch("sys.stdout", new_callable=StringIO) as cmout: ds.rerun(script="-") assert_in("gpl3", cmout.getvalue())
def test_update_simple(origin, src_path, dst_path): # prepare src source = install(src_path, source=origin, recursive=True) # forget we cloned it (provide no 'origin' anymore), which should lead to # setting tracking branch to target: source.repo.remove_remote("origin") # dataset without sibling will not need updates assert_status('notneeded', source.update()) # deprecation message doesn't ruin things assert_status('notneeded', source.update(fetch_all=True)) # but error if unknown sibling is given assert_status('impossible', source.update(sibling='funky', on_failure='ignore')) # get a clone to update later on: dest = install(dst_path, source=src_path, recursive=True) # test setup done; # assert all fine ok_clean_git(dst_path) ok_clean_git(src_path) # update yields nothing => up-to-date assert_status('ok', dest.update()) ok_clean_git(dst_path) # modify origin: with open(opj(src_path, "update.txt"), "w") as f: f.write("Additional content") source.save(path="update.txt", message="Added update.txt") ok_clean_git(src_path) # fail when asked to update a non-dataset assert_status( 'impossible', source.update("update.txt", on_failure='ignore')) # fail when asked to update a something non-existent assert_status( 'impossible', source.update("nothere", on_failure='ignore')) # update without `merge` only fetches: assert_status('ok', dest.update()) # modification is not known to active branch: assert_not_in("update.txt", dest.repo.get_files(dest.repo.get_active_branch())) # modification is known to branch origin/master assert_in("update.txt", dest.repo.get_files("origin/master")) # merge: assert_status('ok', dest.update(merge=True)) # modification is now known to active branch: assert_in("update.txt", dest.repo.get_files(dest.repo.get_active_branch())) # it's known to annex, but has no content yet: dest.repo.get_file_key("update.txt") # raises if unknown eq_([False], dest.repo.file_has_content(["update.txt"])) # smoke-test if recursive update doesn't fail if submodule is removed # and that we can run it from within a dataset without providing it # explicitly assert_result_count( dest.remove('subm 1'), 1, status='ok', action='remove', path=opj(dest.path, 'subm 1')) with chpwd(dest.path): assert_result_count( update(recursive=True), 2, status='ok', type='dataset') assert_result_count( dest.update(merge=True, recursive=True), 2, status='ok', type='dataset') # and now test recursive update with merging in differences create_tree(opj(source.path, '2'), {'load.dat': 'heavy'}) source.save(opj('2', 'load.dat'), message="saving changes within subm2", recursive=True) assert_result_count( dest.update(merge=True, recursive=True), 2, status='ok', type='dataset') # and now we can get new file dest.get('2/load.dat') ok_file_has_content(opj(dest.path, '2', 'load.dat'), 'heavy')
def test_publish_depends( origin, src_path, target1_path, target2_path, target3_path): # prepare src source = install(src_path, source=origin, recursive=True) source.repo.get('test-annex.dat') # pollute config depvar = 'remote.target2.datalad-publish-depends' source.config.add(depvar, 'stupid', where='local') eq_(source.config.get(depvar, None), 'stupid') # two remote sibling on two "different" hosts source.create_sibling( 'ssh://localhost' + target1_path, annex_wanted='standard', annex_group='backup', name='target1') # fails with unknown remote res = source.create_sibling( 'ssh://datalad-test' + target2_path, name='target2', existing='reconfigure', # because 'target2' is known in polluted cfg publish_depends='bogus', on_failure='ignore') assert_result_count( res, 1, status='error', message=( 'unknown sibling(s) specified as publication dependency: %s', set(['bogus']))) # for real source.create_sibling( 'ssh://datalad-test' + target2_path, name='target2', existing='reconfigure', # because 'target2' is known in polluted cfg annex_wanted='standard', annex_group='backup', publish_depends='target1') # wiped out previous dependencies eq_(source.config.get(depvar, None), 'target1') # and one more remote, on the same host but associated with a dependency source.create_sibling( 'ssh://datalad-test' + target3_path, name='target3') ok_clean_git(src_path) # introduce change in source create_tree(src_path, {'probe1': 'probe1'}) source.add('probe1') ok_clean_git(src_path) # only the source has the probe ok_file_has_content(opj(src_path, 'probe1'), 'probe1') for p in (target1_path, target2_path, target3_path): assert_false(lexists(opj(p, 'probe1'))) # publish to a standalone remote source.publish(to='target3') ok_(lexists(opj(target3_path, 'probe1'))) # but it has no data copied target3 = Dataset(target3_path) nok_(target3.repo.file_has_content('probe1')) # but if we publish specifying its path, it gets copied source.publish('probe1', to='target3') ok_file_has_content(opj(target3_path, 'probe1'), 'probe1') # no others are affected in either case for p in (target1_path, target2_path): assert_false(lexists(opj(p, 'probe1'))) # publish to all remaining, but via a dependency source.publish(to='target2') for p in (target1_path, target2_path, target3_path): ok_file_has_content(opj(p, 'probe1'), 'probe1')
def test_run_inputs_outputs(src, path): for subds in [("s0", "s1_0", "s2"), ("s0", "s1_1", "s2"), ("s0", "s1_0"), ("s0", "s1_1"), ("s0", "ss"), ("s0",)]: Dataset(op.join(*((src,) + subds))).create(force=True) src_ds = Dataset(src).create(force=True) src_ds.add(".", recursive=True) ds = install(path, source=src, result_xfm='datasets', return_type='item-or-list') assert_false(ds.repo.file_has_content("input.dat")) assert_false(ds.repo.file_has_content("extra-input.dat")) # The specified inputs and extra inputs will be retrieved before the run. # (Use run_command() to access the extra_inputs argument.) list(run_command("cat {inputs} {inputs} >doubled.dat", dataset=ds, inputs=["input.dat"], extra_inputs=["extra-input.dat"])) ok_clean_git(ds.path) ok_(ds.repo.file_has_content("input.dat")) ok_(ds.repo.file_has_content("extra-input.dat")) ok_(ds.repo.file_has_content("doubled.dat")) with open(opj(path, "doubled.dat")) as fh: content = fh.read() assert_in("input", content) assert_not_in("extra-input", content) # Rerunning the commit will also get the input file. ds.repo.drop(["input.dat", "extra-input.dat"], options=["--force"]) assert_false(ds.repo.file_has_content("input.dat")) assert_false(ds.repo.file_has_content("extra-input.dat")) ds.rerun() ok_(ds.repo.file_has_content("input.dat")) ok_(ds.repo.file_has_content("extra-input.dat")) with swallow_logs(new_level=logging.WARN) as cml: ds.run("touch dummy", inputs=["not-there"]) assert_in("Input does not exist: ", cml.out) # Test different combinations of globs and explicit files. inputs = ["a.dat", "b.dat", "c.txt", "d.txt"] create_tree(ds.path, {i: i for i in inputs}) ds.add(".") ds.repo.copy_to(inputs, remote="origin") ds.repo.drop(inputs, options=["--force"]) test_cases = [(["*.dat"], ["a.dat", "b.dat"]), (["*.dat", "c.txt"], ["a.dat", "b.dat", "c.txt"]), (["*"], inputs)] for idx, (inputs_arg, expected_present) in enumerate(test_cases): assert_false(any(ds.repo.file_has_content(i) for i in inputs)) ds.run("touch dummy{}".format(idx), inputs=inputs_arg) ok_(all(ds.repo.file_has_content(f) for f in expected_present)) # Globs are stored unexpanded by default. assert_in(inputs_arg[0], ds.repo.format_commit("%B")) ds.repo.drop(inputs, options=["--force"]) # --input can be passed a subdirectory. create_tree(ds.path, {"subdir": {"a": "subdir a", "b": "subdir b"}}) ds.add("subdir") ds.repo.copy_to(["subdir/a", "subdir/b"], remote="origin") ds.repo.drop("subdir", options=["--force"]) ds.run("touch subdir-dummy", inputs=[opj(ds.path, "subdir")]) ok_(all(ds.repo.file_has_content(opj("subdir", f)) for f in ["a", "b"])) # Inputs are specified relative to a dataset's subdirectory. ds.repo.drop(opj("subdir", "a"), options=["--force"]) with chpwd(opj(path, "subdir")): run("touch subdir-dummy1", inputs=["a"]) ok_(ds.repo.file_has_content(opj("subdir", "a"))) # --input=. runs "datalad get ." ds.run("touch dot-dummy", inputs=["."]) eq_(ds.repo.get_annexed_files(), ds.repo.get_annexed_files(with_content_only=True)) # On rerun, we get all files, even those that weren't in the tree at the # time of the run. create_tree(ds.path, {"after-dot-run": "after-dot-run content"}) ds.add(".") ds.repo.copy_to(["after-dot-run"], remote="origin") ds.repo.drop(["after-dot-run"], options=["--force"]) ds.rerun("HEAD^") ds.repo.file_has_content("after-dot-run") # --output will unlock files that are present. ds.repo.get("a.dat") ds.run("echo ' appended' >>a.dat", outputs=["a.dat"]) with open(opj(path, "a.dat")) as fh: eq_(fh.read(), "a.dat appended\n") # --output will remove files that are not present. ds.repo.drop(["a.dat", "d.txt"], options=["--force"]) ds.run("echo ' appended' >>a.dat", outputs=["a.dat"]) with open(opj(path, "a.dat")) as fh: eq_(fh.read(), " appended\n") # --input can be combined with --output. ds.repo.repo.git.reset("--hard", "HEAD~2") ds.run("echo ' appended' >>a.dat", inputs=["a.dat"], outputs=["a.dat"]) with open(opj(path, "a.dat")) as fh: eq_(fh.read(), "a.dat appended\n") with swallow_logs(new_level=logging.DEBUG) as cml: ds.run("echo blah", outputs=["not-there"]) assert_in("Filtered out non-existing path: ", cml.out) ds.create('sub') ds.run("echo sub_orig >sub/subfile") ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"]) ds.drop("sub/subfile", check=False) ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"]) # --input/--output globs can be stored in expanded form. ds.run("touch expand-dummy", inputs=["a.*"], outputs=["b.*"], expand="both") assert_in("a.dat", ds.repo.format_commit("%B")) assert_in("b.dat", ds.repo.format_commit("%B")) res = ds.rerun(report=True, return_type='item-or-list') eq_(res["run_info"]['inputs'], ["a.dat"]) eq_(res["run_info"]['outputs'], ["b.dat"]) # We install subdatasets to fully resolve globs. ds.uninstall("s0") assert_false(Dataset(op.join(path, "s0")).is_installed()) ds.run("echo {inputs} >globbed-subds", inputs=["s0/s1_*/s2/*.dat"]) ok_file_has_content(op.join(ds.path, "globbed-subds"), "s0/s1_0/s2/a.dat s0/s1_1/s2/c.dat", strip=True) ds_ss = Dataset(op.join(path, "s0", "ss")) assert_false(ds_ss.is_installed()) ds.run("echo blah >{outputs}", outputs=["s0/ss/out"]) ok_(ds_ss.is_installed()) ok_file_has_content(op.join(ds.path, "s0", "ss", "out"), "blah", strip=True)