def test_symlinked_relpath(path): # initially ran into on OSX https://github.com/datalad/datalad/issues/2406 os.makedirs(op.join(path, "origin")) dspath = op.join(path, "linked") os.symlink('origin', dspath) ds = Dataset(dspath).create() create_tree(dspath, { "mike1": 'mike1', # will be added from topdir "later": "later", # later from within subdir "d": { "mike2": 'mike2', # to be added within subdir } }) # in the root of ds with chpwd(dspath): ds.repo.add("mike1", git=True) ds.save(message="committing", path="./mike1") # Let's also do in subdirectory as CWD, check that relative path # given to a plain command (not dataset method) are treated as # relative to CWD with chpwd(op.join(dspath, 'd')): save(dataset=ds.path, message="committing", path="mike2") later = op.join(op.pardir, "later") ds.repo.add(later, git=True) save(dataset=ds.path, message="committing", path=later) assert_repo_status(dspath)
def test_add_subdataset(path, other): subds = create(op.join(path, 'dir'), force=True) ds = create(path, force=True) ok_(subds.repo.dirty) ok_(ds.repo.dirty) assert_not_in('dir', ds.subdatasets(result_xfm='relpaths')) # "add everything in subds to subds" save(dataset=subds.path) assert_repo_status(subds.path) assert_not_in('dir', ds.subdatasets(result_xfm='relpaths')) # but with a base directory we add the dataset subds as a subdataset # to ds res = ds.save(subds.path) assert_in_results(res, action="add", path=subds.path, refds=ds.path) assert_in('dir', ds.subdatasets(result_xfm='relpaths')) # create another one other = create(other) # install into superdataset, but don't add other_clone = install(source=other.path, path=op.join(ds.path, 'other')) # little dance to get the revolution-type dataset other_clone = Dataset(other_clone.path) ok_(other_clone.is_installed) assert_not_in('other', ds.subdatasets(result_xfm='relpaths')) # now add, it should pick up the source URL ds.save('other') # and that is why, we can reobtain it from origin ds.uninstall('other') ok_(not other_clone.is_installed()) ds.get('other') ok_(other_clone.is_installed())
def test_create_subdataset_hierarchy_from_top(path): # how it would look like to overlay a subdataset hierarchy onto # an existing directory tree ds = Dataset(op.join(path, 'origin')).create(force=True) # we got a dataset .... ok_(ds.is_installed()) # ... but it has untracked content ok_(ds.repo.dirty) subds = ds.create(u"ds-" + OBSCURE_FILENAME, force=True) ok_(subds.is_installed()) ok_(subds.repo.dirty) subsubds = subds.create('subsub', force=True) ok_(subsubds.is_installed()) ok_(subsubds.repo.dirty) ok_(ds.id != subds.id != subsubds.id) ds.save(updated=True, recursive=True) # 'file*' in each repo was untracked before and should remain as such # (we don't want a #1419 resurrection ok_(ds.repo.dirty) ok_(subds.repo.dirty) ok_(subsubds.repo.dirty) # if we add these three, we should get clean ds.save([ 'file1', op.join(subds.path, 'file2'), op.join(subsubds.path, 'file3')]) assert_repo_status(ds.path) ok_(ds.id != subds.id != subsubds.id)
def test_add_mimetypes(path): ds = Dataset(path).create(force=True) ds.repo.add('.gitattributes') ds.repo.commit('added attributes to git explicitly') # now test that those files will go into git/annex correspondingly # WINDOWS FAILURE NEXT __not_tested__ = ds.save(['file.txt', 'empty']) assert_repo_status(path, untracked=['file2.txt']) # But we should be able to force adding file to annex when desired ds.save('file2.txt', to_git=False) # check annex file status annexinfo = ds.repo.get_content_annexinfo() for path, in_annex in ( # Empty one considered to be application/octet-stream # i.e. non-text ('empty', True), ('file.txt', False), ('file2.txt', True)): # low-level API report -> repo path reference, no ds path p = ds.repo.pathobj / path assert_in(p, annexinfo) if in_annex: assert_in('key', annexinfo[p], p) else: assert_not_in('key', annexinfo[p], p)
def check_renamed_file(recursive, no_annex, path): ds = Dataset(path).create(no_annex=no_annex) create_tree(path, {'old': ''}) ds.repo.add('old') ds.repo._git_custom_command(['old', 'new'], ['git', 'mv']) ds.save(recursive=recursive) assert_repo_status(path)
def test_run_from_subds(path): if 'APPVEYOR' in os.environ: raise SkipTest('test causes appveyor (only) to crash, reason unknown') subds = Dataset(path).create().create("sub") subds.run("cd .> foo") assert_repo_status(subds.path)
def test_encoding(path): staged = OBSCURE_FILENAME + u'_staged' untracked = OBSCURE_FILENAME + u'_untracked' ds = Dataset(path).create(force=True) ds.repo.add(staged) assert_repo_status(ds.path, added=[staged], untracked=[untracked]) ds.save(updated=True) assert_repo_status(ds.path, untracked=[untracked])
def test_bf2541(path): ds = create(path) subds = ds.create('sub') assert_repo_status(ds.path) os.symlink('sub', op.join(ds.path, 'symlink')) with chpwd(ds.path): res = save(recursive=True) assert_repo_status(ds.path)
def test_relpath_add(path): ds = Dataset(path).create(force=True) with chpwd(op.join(path, 'dir')): eq_(save('testindir')[0]['path'], op.join(ds.path, 'dir', 'testindir')) # and now add all save('..') # auto-save enabled assert_repo_status(ds.path)
def test_bf2043p2(path): ds = Dataset(path).create(force=True) ds.repo.add('staged') assert_repo_status(ds.path, added=['staged'], untracked=['untracked']) # save -u does not commit untracked content # this tests the second issue in #2043 with chpwd(path): save(updated=True) assert_repo_status(ds.path, untracked=['untracked'])
def test_bf3285(path): ds = Dataset(path).create(force=True) # Note: Using repo.pathobj matters in the "TMPDIR=/var/tmp/sym\ link" case # because assert_repo_status is based off of {Annex,Git}Repo.path, which is # the realpath'd path (from the processing in _flyweight_id_from_args). subds = create(ds.repo.pathobj.joinpath("subds")) # Explicitly saving a path does not save an untracked, unspecified # subdataset. ds.save("foo") assert_repo_status(ds.path, untracked=[subds.path])
def test_inject(path): ds = Dataset(path).create(force=True) assert_repo_status(ds.path, untracked=['foo', 'bar']) list(run_command("nonsense command", dataset=ds, inject=True, extra_info={"custom_key": "custom_field"})) msg = ds.repo.format_commit("%B") assert_in("custom_key", msg) assert_in("nonsense command", msg)
def test_create_curdir(path, path2): with chpwd(path, mkdir=True): create() ds = Dataset(path) ok_(ds.is_installed()) assert_repo_status(ds.path, annex=True) with chpwd(path2, mkdir=True): create(no_annex=True) ds = Dataset(path2) ok_(ds.is_installed()) assert_repo_status(ds.path, annex=False) ok_(op.exists(op.join(ds.path, '.noannex')))
def test_gh2043p1(path): # this tests documents the interim agreement on what should happen # in the case documented in gh-2043 ds = Dataset(path).create(force=True) ds.save('1') assert_repo_status(ds.path, untracked=['2', '3']) ds.unlock('1') assert_repo_status( ds.path, # on windows we are in an unlocked branch by default, hence # we would see no change modified=[] if on_windows else ['1'], untracked=['2', '3']) # save(.) should recommit unlocked file, and not touch anything else # this tests the second issue in #2043 with chpwd(path): # only save modified bits save(path='.', updated=True) # state of the file (unlocked/locked) is committed as well, and the # test doesn't lock the file again assert_repo_status(ds.path, untracked=['2', '3']) with chpwd(path): # but when a path is given, anything that matches this path # untracked or not is added/saved save(path='.') # state of the file (unlocked/locked) is committed as well, and the # test doesn't lock the file again assert_repo_status(ds.path)
def test_compare_content_info(path): # TODO remove when `create` is RF to return the new Dataset ds = Dataset(path).create() assert_repo_status(path) # for a clean repo HEAD and worktree query should yield identical results # minus a 'bytesize' report that is readily available for HEAD, but would # not a stat call per file for the worktree, and is not done ATM wt = ds.repo.get_content_info(ref=None) assert_dict_equal( wt, {f: {k: v for k, v in iteritems(p) if k != 'bytesize'} for f, p in iteritems(ds.repo.get_content_info(ref='HEAD'))} )
def test_run_explicit(path): ds = Dataset(path) assert_false(ds.repo.file_has_content("test-annex.dat")) create_tree(ds.path, {"dirt_untracked": "untracked", "dirt_modified": "modified"}) ds.save("dirt_modified", to_git=True) with open(op.join(path, "dirt_modified"), "a") as ofh: ofh.write(", more") # We need explicit=True to run with dirty repo. assert_status("impossible", ds.run("cat test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], on_failure="ignore")) hexsha_initial = ds.repo.get_hexsha() # If we specify test-annex.dat as an input, it will be retrieved before the # run. ds.run("cat test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], explicit=True) ok_(ds.repo.file_has_content("test-annex.dat")) # We didn't commit anything because outputs weren't specified. assert_false(ds.repo.file_has_content("doubled.dat")) eq_(hexsha_initial, ds.repo.get_hexsha()) # If an input doesn't exist, we just show the standard warning. with swallow_logs(new_level=logging.WARN) as cml: with swallow_outputs(): ds.run("ls", inputs=["not-there"], explicit=True) assert_in("Input does not exist: ", cml.out) remove(op.join(path, "doubled.dat")) hexsha_initial = ds.repo.get_hexsha() ds.run("cat test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], outputs=["doubled.dat"], explicit=True) ok_(ds.repo.file_has_content("doubled.dat")) assert_repo_status(ds.path, modified=["dirt_modified"], untracked=['dirt_untracked']) neq_(hexsha_initial, ds.repo.get_hexsha()) # Saving explicit outputs works from subdirectories. subdir = op.join(path, "subdir") mkdir(subdir) with chpwd(subdir): run("echo insubdir >foo", explicit=True, outputs=["foo"]) ok_(ds.repo.file_has_content(op.join("subdir", "foo")))
def test_basics(path, nodspath): ds = Dataset(path).create() last_state = ds.repo.get_hexsha() # run inside the dataset with chpwd(path), \ swallow_outputs(): # provoke command failure with assert_raises(CommandError) as cme: ds.run('7i3amhmuch9invalid') # let's not speculate that the exit code is always 127 ok_(cme.code > 0) eq_(last_state, ds.repo.get_hexsha()) # now one that must work res = ds.run('cd .> empty', message='TEST') assert_repo_status(ds.path) assert_result_count(res, 2) # TODO 'state' is still untracked!!! assert_result_count(res, 1, action='add', path=op.join(ds.path, 'empty'), type='file') assert_result_count(res, 1, action='save', path=ds.path) commit_msg = ds.repo.format_commit("%B") ok_(commit_msg.startswith('[DATALAD RUNCMD] TEST')) # crude test that we have a record for the PWD assert_in('"pwd": "."', commit_msg) last_state = ds.repo.get_hexsha() # now run a command that will not alter the dataset noop_cmd = ':' res = ds.run(noop_cmd, message='NOOP_TEST') assert_result_count(res, 1, action='save', status='notneeded') eq_(last_state, ds.repo.get_hexsha()) # We can also run the command via a single-item list because this is # what the CLI interface passes in for quoted commands. res = ds.run([noop_cmd], message='NOOP_TEST') assert_result_count(res, 1, action='save', status='notneeded') # run outside the dataset, should still work but with limitations with chpwd(nodspath), \ swallow_outputs(): res = ds.run('cd . > empty2', message='TEST') assert_result_count(res, 1, action='add', path=op.join(ds.path, 'empty2'), type='file', status='ok') assert_result_count(res, 1, action='save', status='ok') # running without a command is a noop with chpwd(path): with swallow_logs(new_level=logging.WARN) as cml: ds.run() assert_in("No command given", cml.out)
def test_create_withprocedure(path): # first without ds = create(path) assert(not op.lexists(op.join(ds.path, 'README.rst'))) ds.remove() assert(not op.lexists(ds.path)) # now for reals... ds = create( # needs to identify the dataset, otherwise post-proc # procedure doesn't know what to run on dataset=path, proc_post=[['cfg_metadatatypes', 'xmp', 'datacite']]) assert_repo_status(path) ds.config.reload() eq_(ds.config['datalad.metadata.nativetype'], ('xmp', 'datacite'))
def test_update_known_submodule(path): def get_baseline(p): ds = Dataset(p).create() sub = create(text_type(ds.pathobj / 'sub')) assert_repo_status(ds.path, untracked=['sub']) return ds # attempt one ds = get_baseline(op.join(path, 'wo_ref')) with chpwd(ds.path): save(recursive=True) assert_repo_status(ds.path) # attempt two, same as above but call add via reference dataset ds = get_baseline(op.join(path, 'w_ref')) ds.save(recursive=True) assert_repo_status(ds.path)
def test_saving_prior(topdir): # the problem is that we might be saving what is actually needed to be # "created" # we would like to place this structure into a hierarchy of two datasets # so we create first top one ds1 = create(topdir, force=True) # and everything is ok, stuff is not added BUT ds1 will be considered dirty assert_repo_status(ds1.path, untracked=['ds2']) # And then we would like to initiate a sub1 subdataset ds2 = create('ds2', dataset=ds1, force=True) # But what will happen is file1.txt under ds2 would get committed first into # ds1, and then the whole procedure actually crashes since because ds2/file1.txt # is committed -- ds2 is already known to git and it just pukes with a bit # confusing 'ds2' already exists in the index assert_in('ds2', ds1.subdatasets(result_xfm='relpaths'))
def test_remove_subds(path): ds = create(path) ds.create('sub') ds.create(op.join('sub', 'subsub')) assert_repo_status(ds.path) assert_result_count( ds.subdatasets(), 1, path=op.join(ds.path, 'sub')) # all good at this point, subdataset known, dataset clean # now have some external force wipe out the subdatasets rmtree(op.join(ds.path, 'sub')) assert_result_count( ds.status(), 1, path=op.join(ds.path, 'sub'), state='deleted') # a single call to save() must fix up the mess assert_status('ok', ds.save()) assert_repo_status(ds.path)
def test_subds_path(path): # a dataset with a subdataset with a file, all neatly tracked ds = Dataset(path).create() subds = ds.create('sub') assert_repo_status(path) with (subds.pathobj / 'some.txt').open('w') as f: f.write(u'test') ds.save(recursive=True) assert_repo_status(path) # querying the toplevel dataset repo for a subdspath should # report the subdataset record in the dataset # (unlike `git status`, which is silent for subdataset paths), # but definitely not report the subdataset as deleted # https://github.com/datalad/datalad-revolution/issues/17 stat = ds.repo.status(paths=[op.join('sub', 'some.txt')]) assert_equal(list(stat.keys()), [subds.repo.pathobj]) assert_equal(stat[subds.repo.pathobj]['state'], 'clean')
def test_save_to_git(path): ds = Dataset(path).create() create_tree( ds.path, { 'file_ingit': 'file_ingit', 'file_inannex': 'file_inannex', } ) ds.repo.save(paths=['file_ingit'], git=True) ds.repo.save(paths=['file_inannex']) assert_repo_status(ds.repo) for f, p in iteritems(ds.repo.annexstatus()): eq_(p['state'], 'clean') if f.match('*ingit'): assert_not_in('key', p, f) elif f.match('*inannex'): assert_in('key', p, f)
def test_create(path): ds = Dataset(path) ds.create( description="funny", # custom git init option initopts=dict(shared='world')) ok_(ds.is_installed()) assert_repo_status(ds.path, annex=True) # check default backend eq_(ds.config.get("annex.backends"), 'MD5E') eq_(ds.config.get("core.sharedrepository"), '2') runner = Runner() # check description in `info` cmd = ['git', 'annex', 'info'] cmlout = runner.run(cmd, cwd=path) assert_in('funny [here]', cmlout[0]) # check datset ID eq_(ds.config.get_value('datalad.dataset', 'id'), ds.id)
def test_nested_create(path): # to document some more organic usage pattern ds = Dataset(path).create() assert_repo_status(ds.path) lvl2relpath = op.join('lvl1', 'lvl2') lvl2path = op.join(ds.path, lvl2relpath) os.makedirs(lvl2path) os.makedirs(op.join(ds.path, 'lvl1', 'empty')) with open(op.join(lvl2path, 'file'), 'w') as f: f.write('some') ok_(ds.save()) # Empty directories are filtered out. assert_repo_status(ds.path, untracked=[]) # later create subdataset in a fresh dir # WINDOWS FAILURE IS NEXT LINE subds1 = ds.create(op.join('lvl1', 'subds')) assert_repo_status(ds.path, untracked=[]) eq_(ds.subdatasets(result_xfm='relpaths'), [op.join('lvl1', 'subds')]) # later create subdataset in an existing empty dir subds2 = ds.create(op.join('lvl1', 'empty')) assert_repo_status(ds.path) # later try to wrap existing content into a new subdataset # but that won't work assert_in_results( ds.create(lvl2relpath, **raw), status='error', message=( 'collision with content in parent dataset at %s: %s', ds.path, [op.join(lvl2path, 'file')])) # even with force, as to do this properly complicated surgery would need to # take place # MIH disable shaky test till proper dedicated upfront check is in-place in `create` # gh-1725 #assert_in_results( # ds.create(lvl2relpath, force=True, # on_failure='ignore', result_xfm=None, result_filter=None), # status='error', action='add') # only way to make it work is to unannex the content upfront ds.repo._run_annex_command('unannex', annex_options=[op.join(lvl2relpath, 'file')]) # nothing to save, git-annex commits the unannex itself, but only on v5 ds.repo.commit() # still nothing without force # "err='lvl1/lvl2' already exists in the index" assert_in_results( ds.create(lvl2relpath, **raw), status='error', message='will not create a dataset in a non-empty directory, use `force` option to ignore') # XXX even force doesn't help, because (I assume) GitPython doesn't update # its representation of the Git index properly ds.create(lvl2relpath, force=True) assert_in(lvl2relpath, ds.subdatasets(result_xfm='relpaths'))
def test_gh1597(path): if 'APPVEYOR' in os.environ: # issue only happens on appveyor, Python itself implodes # cannot be reproduced on a real windows box raise SkipTest( 'this test causes appveyor to crash, reason unknown') ds = Dataset(path).create() sub = ds.create('sub') res = ds.subdatasets() assert_result_count(res, 1, path=sub.path) # now modify .gitmodules with another command ds.subdatasets(contains=sub.path, set_property=[('this', 'that')]) # now modify low-level with open(op.join(ds.path, '.gitmodules'), 'a') as f: f.write('\n') assert_repo_status(ds.path, modified=['.gitmodules']) ds.save('.gitmodules') # must not come under annex mangement assert_not_in( 'key', ds.repo.annexstatus(paths=['.gitmodules']).popitem()[1])
def test_gh1597(path): ds = Dataset(path).create() with chpwd(ds.path): sub = create('sub') ds.add('sub', save=False) # only staged at this point, but known, and not annexed ok_file_under_git(ds.path, '.gitmodules', annexed=False) res = ds.subdatasets() assert_result_count(res, 1, path=sub.path) # now modify .gitmodules with another command ds.subdatasets(contains=sub.path, set_property=[('this', 'that')]) assert_repo_status(ds.path, added=[sub.path]) # now modify low-level with open(opj(ds.path, '.gitmodules'), 'a') as f: f.write('\n') assert_repo_status(ds.path, modified=[ds.pathobj / ".gitmodules"], added=[sub.path]) ds.add('.gitmodules') # must not come under annex mangement ok_file_under_git(ds.path, '.gitmodules', annexed=False)
def test_surprise_subds(path): # https://github.com/datalad/datalad/issues/3139 ds = create(path, force=True) # a lonely repo without any commit somerepo = AnnexRepo(path=op.join(path, 'd1', 'subrepo'), create=True) # a proper subdataset subds = create(op.join(path, 'd2', 'subds'), force=True) # save non-recursive ds.save(recursive=False) # the content of both subds and subrepo are not added to their # respective parent as no --recursive was given assert_repo_status(subds.path, untracked=['subfile']) assert_repo_status(somerepo.path, untracked=['subfile']) # however, while the subdataset is added (and reported as modified # because it content is still untracked) the subrepo # cannot be added (it has no commit) # worse: its untracked file add been added to the superdataset # XXX the next conditional really says: if the subrepo is not in an # adjusted branch: #datalad/3178 (that would have a commit) if not on_windows: assert_repo_status(ds.path, modified=['d2/subds']) assert_in(ds.repo.pathobj / 'd1' / 'subrepo' / 'subfile', ds.repo.get_content_info()) # with proper subdatasets, all evil is gone assert_not_in(ds.repo.pathobj / 'd2' / 'subds' / 'subfile', ds.repo.get_content_info())
def test_py2_unicode_command(path): # Avoid OBSCURE_FILENAME to avoid windows-breakage (gh-2929). ds = Dataset(path).create() touch_cmd = "import sys; open(sys.argv[1], 'w').write('')" cmd_str = u"{} -c \"{}\" {}".format(sys.executable, touch_cmd, u"bβ0.dat") ds.run(cmd_str) assert_repo_status(ds.path) ok_exists(op.join(path, u"bβ0.dat")) if not on_windows: # FIXME ds.run([sys.executable, "-c", touch_cmd, u"bβ1.dat"]) assert_repo_status(ds.path) ok_exists(op.join(path, u"bβ1.dat")) # Send in a list of byte-strings to mimic a py2 command-line # invocation. ds.run([s.encode("utf-8") for s in [sys.executable, "-c", touch_cmd, u" β1 "]]) assert_repo_status(ds.path) ok_exists(op.join(path, u" β1 ")) with assert_raises(CommandError), swallow_outputs(): ds.run(u"bβ2.dat")
def test_save_partial_commit_shrinking_annex(path): # This is a variation on the test above. The main difference is that there # are other staged changes in addition to the unlocked filed. ds = create(path, force=True) ds.save() assert_repo_status(ds.path) ds.unlock(path="foo") create_tree(ds.path, tree={"foo": "a", "staged": ""}, remove_existing=True) # Even without this staged change, a plain 'git commit -- foo' would fail # with git-annex's partial index error, but rev-save (or more specifically # GitRepo.save_) drops the pathspec if there are no staged changes. ds.repo.add("staged", git=True) if ds.repo.supports_unlocked_pointers: ds.save(path="foo") assert_repo_status(ds.path, added=["staged"]) else: # Unlike the obsolete interface.save, save doesn't handle a partial # commit if there were other staged changes. with assert_raises(CommandError) as cm: ds.save(path="foo") assert_in("partial commit", str(cm.exception))
def test_update_volatile_subds(originpath, otherpath, destpath): origin = Dataset(originpath).create() ds = install( source=originpath, path=destpath, result_xfm='datasets', return_type='item-or-list') # as a submodule sname = 'subm 1' osm1 = origin.create(sname) assert_result_count(ds.update(), 1, status='ok', type='dataset') # nothing without a merge, no inappropriate magic assert_not_in(sname, ds.subdatasets(result_xfm='relpaths')) assert_result_count(ds.update(merge=True), 1, action='update', status='ok', type='dataset') # and we should be able to do update with recursive invocation assert_result_count(ds.update(merge=True, recursive=True), 1, action='update', status='ok', type='dataset') # known, and placeholder exists assert_in(sname, ds.subdatasets(result_xfm='relpaths')) ok_(exists(opj(ds.path, sname))) # remove from origin origin.remove(sname) assert_result_count(ds.update(merge=True), 1, action='update', status='ok', type='dataset') # gone locally, wasn't checked out assert_not_in(sname, ds.subdatasets(result_xfm='relpaths')) assert_false(exists(opj(ds.path, sname))) # re-introduce at origin osm1 = origin.create(sname) create_tree(osm1.path, {'load.dat': 'heavy'}) origin.save(opj(osm1.path, 'load.dat')) assert_result_count(ds.update(merge=True), 1, action='update', status='ok', type='dataset') # grab new content of uninstall subdataset, right away ds.get(opj(ds.path, sname, 'load.dat')) ok_file_has_content(opj(ds.path, sname, 'load.dat'), 'heavy') # modify ds and subds at origin create_tree(origin.path, {'mike': 'this', sname: {'probe': 'little'}}) origin.save(recursive=True) assert_repo_status(origin.path) # updates for both datasets should come down the pipe assert_result_count(ds.update(merge=True, recursive=True), 2, action='update', status='ok', type='dataset') assert_repo_status(ds.path) # now remove just-installed subdataset from origin again origin.remove(sname, check=False) assert_not_in(sname, origin.subdatasets(result_xfm='relpaths')) assert_in(sname, ds.subdatasets(result_xfm='relpaths')) # merge should disconnect the installed subdataset, but leave the actual # ex-subdataset alone assert_result_count(ds.update(merge=True, recursive=True), 1, action='update', type='dataset') assert_not_in(sname, ds.subdatasets(result_xfm='relpaths')) ok_file_has_content(opj(ds.path, sname, 'load.dat'), 'heavy') ok_(Dataset(opj(ds.path, sname)).is_installed()) # now remove the now disconnected subdataset for further tests # not using a bound method, not giving a parentds, should # not be needed to get a clean dataset remove(op.join(ds.path, sname), check=False) assert_repo_status(ds.path) # new separate subdataset, not within the origin dataset otherds = Dataset(otherpath).create() # install separate dataset as a submodule ds.install(source=otherds.path, path='other') create_tree(otherds.path, {'brand': 'new'}) otherds.save() assert_repo_status(otherds.path) # pull in changes res = ds.update(merge=True, recursive=True) assert_result_count( res, 2, status='ok', action='update', type='dataset') # the next is to check for #2858 assert_repo_status(ds.path)
def test_add_recursive(path): # make simple hierarchy parent = Dataset(path).create() assert_repo_status(parent.path) sub1 = parent.create(op.join('down', 'sub1')) assert_repo_status(parent.path) sub2 = parent.create('sub2') # next one make the parent dirty subsub = sub2.create('subsub') assert_repo_status(parent.path, modified=['sub2']) res = parent.save() assert_repo_status(parent.path) # now add content deep in the hierarchy create_tree(subsub.path, {'new': 'empty'}) assert_repo_status(parent.path, modified=['sub2']) # recursive add should not even touch sub1, because # it knows that it is clean res = parent.save(recursive=True) # the key action is done assert_result_count(res, 1, path=op.join(subsub.path, 'new'), action='add', status='ok') # saved all the way up assert_result_count(res, 3, action='save', status='ok') assert_repo_status(parent.path)
def test_bf1886(path): parent = Dataset(path).create() parent.create('sub') assert_repo_status(parent.path) # create a symlink pointing down to the subdataset, and add it os.symlink('sub', op.join(parent.path, 'down')) parent.save('down') assert_repo_status(parent.path) # now symlink pointing up os.makedirs(op.join(parent.path, 'subdir', 'subsubdir')) os.symlink(op.join(op.pardir, 'sub'), op.join(parent.path, 'subdir', 'up')) parent.save(op.join('subdir', 'up')) # 'all' to avoid the empty dir being listed assert_repo_status(parent.path, untracked_mode='all') # now symlink pointing 2xup, as in #1886 os.symlink(op.join(op.pardir, op.pardir, 'sub'), op.join(parent.path, 'subdir', 'subsubdir', 'upup')) parent.save(op.join('subdir', 'subsubdir', 'upup')) assert_repo_status(parent.path) # simulatenously add a subds and a symlink pointing to it # create subds, but don't register it create(op.join(parent.path, 'sub2')) os.symlink(op.join(op.pardir, op.pardir, 'sub2'), op.join(parent.path, 'subdir', 'subsubdir', 'upup2')) parent.save(['sub2', op.join('subdir', 'subsubdir', 'upup2')]) assert_repo_status(parent.path) # full replication of #1886: the above but be in subdir of symlink # with no reference dataset create(op.join(parent.path, 'sub3')) os.symlink(op.join(op.pardir, op.pardir, 'sub3'), op.join(parent.path, 'subdir', 'subsubdir', 'upup3')) # need to use absolute paths with chpwd(op.join(parent.path, 'subdir', 'subsubdir')): save([ op.join(parent.path, 'sub3'), op.join(parent.path, 'subdir', 'subsubdir', 'upup3') ]) assert_repo_status(parent.path)
def test_force_checkdatapresent(srcpath, dstpath): src = Dataset(srcpath).create() target = mk_push_target(src, 'target', dstpath, annex=True, bare=True) (src.pathobj / 'test_mod_annex_file').write_text("Heavy stuff.") src.save(to_git=False, message="New annex file") assert_repo_status(src.path, annex=True) whereis_prior = src.repo.whereis(files=['test_mod_annex_file'])[0] res = src.push(to='target', data='nothing') # nothing reported to be copied assert_not_in_results(res, action='copy') # we got the git-push nevertheless eq_(src.repo.get_hexsha(DEFAULT_BRANCH), target.get_hexsha(DEFAULT_BRANCH)) # nothing moved eq_(whereis_prior, src.repo.whereis(files=['test_mod_annex_file'])[0]) # now a push without forced no-transfer # we do not give since, so the non-transfered file is picked up # and transferred res = src.push(to='target', force=None) # no branch change, done before assert_in_results(res, action='publish', status='notneeded', refspec=DEFAULT_REFSPEC) # but availability update assert_in_results(res, action='publish', status='ok', refspec='refs/heads/git-annex:refs/heads/git-annex') assert_in_results(res, status='ok', path=str(src.pathobj / 'test_mod_annex_file'), action='copy') # whereis info reflects the change ok_( len(whereis_prior) < len( src.repo.whereis(files=['test_mod_annex_file'])[0])) # do it yet again will do nothing, because all is uptodate assert_status('notneeded', src.push(to='target', force=None)) # an explicit reference point doesn't change that assert_status('notneeded', src.push(to='target', force=None, since='HEAD~1')) # now force data transfer res = src.push(to='target', force='checkdatapresent') # no branch change, done before assert_in_results(res, action='publish', status='notneeded', refspec=DEFAULT_REFSPEC) # no availability update assert_in_results(res, action='publish', status='notneeded', refspec='refs/heads/git-annex:refs/heads/git-annex') # but data transfer assert_in_results(res, status='ok', path=str(src.pathobj / 'test_mod_annex_file'), action='copy') # force data transfer, but data isn't available src.repo.drop('test_mod_annex_file') res = src.push(to='target', path='.', force='checkdatapresent', on_failure='ignore') assert_in_results(res, status='impossible', path=str(src.pathobj / 'test_mod_annex_file'), action='copy', message='Slated for transport, but no content present')
def check_push(annex, src_path, dst_path): # prepare src src = Dataset(src_path).create(annex=annex) src_repo = src.repo # push should not add branches to the local dataset orig_branches = src_repo.get_branches() assert_not_in('synced/' + DEFAULT_BRANCH, orig_branches) res = src.push(on_failure='ignore') assert_result_count(res, 1) assert_in_results( res, status='impossible', message='No push target given, and none could be auto-detected, ' 'please specify via --to') eq_(orig_branches, src_repo.get_branches()) # target sibling target = mk_push_target(src, 'target', dst_path, annex=annex) eq_(orig_branches, src_repo.get_branches()) res = src.push(to="target") eq_(orig_branches, src_repo.get_branches()) assert_result_count(res, 2 if annex else 1) assert_in_results(res, action='publish', status='ok', target='target', refspec=DEFAULT_REFSPEC, operations=['new-branch']) assert_repo_status(src_repo, annex=annex) eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)), list(src_repo.get_branch_commits_(DEFAULT_BRANCH))) # configure a default merge/upstream target src.config.set('branch.{}.remote'.format(DEFAULT_BRANCH), 'target', where='local') src.config.set('branch.{}.merge'.format(DEFAULT_BRANCH), DEFAULT_BRANCH, where='local') # don't fail when doing it again, no explicit target specification # needed anymore res = src.push() eq_(orig_branches, src_repo.get_branches()) # and nothing is pushed assert_status('notneeded', res) assert_repo_status(src_repo, annex=annex) eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)), list(src_repo.get_branch_commits_(DEFAULT_BRANCH))) # some modification: (src.pathobj / 'test_mod_file').write_text("Some additional stuff.") src.save(to_git=True, message="Modified.") (src.pathobj / 'test_mod_annex_file').write_text("Heavy stuff.") src.save(to_git=not annex, message="Modified again.") assert_repo_status(src_repo, annex=annex) # we could say since='HEAD~2' to make things fast, or we are lazy # and say since='^' to indicate the state of the tracking remote # which is the same, because we made to commits since the last push. res = src.push(to='target', since="^", jobs=2) assert_in_results( res, action='publish', status='ok', target='target', refspec=DEFAULT_REFSPEC, # we get to see what happened operations=['fast-forward']) if annex: # we got to see the copy result for the annexed files assert_in_results(res, action='copy', status='ok', path=str(src.pathobj / 'test_mod_annex_file')) # we published, so we can drop and reobtain ok_(src_repo.file_has_content('test_mod_annex_file')) src_repo.drop('test_mod_annex_file') ok_(not src_repo.file_has_content('test_mod_annex_file')) src_repo.get('test_mod_annex_file') ok_(src_repo.file_has_content('test_mod_annex_file')) ok_file_has_content(src_repo.pathobj / 'test_mod_annex_file', 'Heavy stuff.') eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)), list(src_repo.get_branch_commits_(DEFAULT_BRANCH))) if not (annex and src_repo.is_managed_branch()): # the following doesn't make sense in managed branches, because # a commit that could be amended is no longer the last commit # of a branch after a sync has happened (which did happen # during the last push above # amend and change commit msg in order to test for force push: src_repo.commit("amended", options=['--amend']) # push should be rejected (non-fast-forward): res = src.push(to='target', since='HEAD~2', on_failure='ignore') # fails before even touching the annex branch assert_in_results(res, action='publish', status='error', target='target', refspec=DEFAULT_REFSPEC, operations=['rejected', 'error']) # push with force=True works: res = src.push(to='target', since='HEAD~2', force='gitpush') assert_in_results(res, action='publish', status='ok', target='target', refspec=DEFAULT_REFSPEC, operations=['forced-update']) eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)), list(src_repo.get_branch_commits_(DEFAULT_BRANCH))) # we do not have more branches than we had in the beginning # in particular no 'synced/<default branch>' eq_(orig_branches, src_repo.get_branches())
def test_uninstall_recursive(path): ds = Dataset(path).create(force=True) subds = ds.create('deep', force=True) # we add one file, but we get a response for the requested # directory too res = subds.save() assert_result_count(res, 1, action='add', status='ok', type='file') assert_result_count(res, 1, action='save', status='ok', type='dataset') # save all -> all clean ds.save(recursive=True) assert_repo_status(subds.path) assert_repo_status(ds.path) # now uninstall in subdataset through superdataset target_fname = opj('deep', 'dir', 'test') # sane starting point ok_(exists(opj(ds.path, target_fname))) # doesn't have the minimum number of copies for a safe drop res = ds.drop(target_fname, recursive=True, on_failure='ignore') assert_status('error', res) assert_result_values_cond( res, 'message', lambda x: "configured minimum number of copies not found" in x or "Could only verify the existence of 0 out of 1 necessary cop" in x) # this should do it ds.drop(target_fname, check=False, recursive=True) # link is dead lname = opj(ds.path, target_fname) ok_(not exists(lname)) # entire hierarchy saved assert_repo_status(subds.path) assert_repo_status(ds.path) # now same with actual handle removal # content is dropped already, so no checks in place anyway ds.remove(target_fname, check=True, recursive=True) ok_(not exists(lname) and not lexists(lname)) assert_repo_status(subds.path) assert_repo_status(ds.path)
def test_repo_diff(path, norepo): ds = Dataset(path).create() assert_repo_status(ds.path) assert_raises(ValueError, ds.repo.diff, fr='WTF', to='MIKE') # no diff eq_(ds.repo.diff('HEAD', None), {}) # bogus path makes no difference eq_(ds.repo.diff('HEAD', None, paths=['THIS']), {}) # let's introduce a known change create_tree(ds.path, {'new': 'empty'}) ds.save(to_git=True) assert_repo_status(ds.path) eq_( ds.repo.diff(fr='HEAD~1', to='HEAD'), { ut.Path(ds.repo.pathobj / 'new'): { 'state': 'added', 'type': 'file', 'bytesize': 5, 'gitshasum': '7b4d68d70fcae134d5348f5e118f5e9c9d3f05f6' } }) # modify known file create_tree(ds.path, {'new': 'notempty'}) eq_( ds.repo.diff(fr='HEAD', to=None), { ut.Path(ds.repo.pathobj / 'new'): { 'state': 'modified', 'type': 'file', # the beast is modified, but no change in shasum -> not staged 'gitshasum': '7b4d68d70fcae134d5348f5e118f5e9c9d3f05f6', 'prev_gitshasum': '7b4d68d70fcae134d5348f5e118f5e9c9d3f05f6' } }) # per path query gives the same result eq_(ds.repo.diff(fr='HEAD', to=None), ds.repo.diff(fr='HEAD', to=None, paths=['new'])) # also given a directory as a constraint does the same eq_(ds.repo.diff(fr='HEAD', to=None), ds.repo.diff(fr='HEAD', to=None, paths=['.'])) # but if we give another path, it doesn't show up eq_(ds.repo.diff(fr='HEAD', to=None, paths=['other']), {}) # make clean ds.save() assert_repo_status(ds.path) # untracked stuff create_tree(ds.path, {'deep': {'down': 'untracked', 'down2': 'tobeadded'}}) # default is to report all files eq_( ds.repo.diff(fr='HEAD', to=None), { ut.Path(ds.repo.pathobj / 'deep' / 'down'): { 'state': 'untracked', 'type': 'file' }, ut.Path(ds.repo.pathobj / 'deep' / 'down2'): { 'state': 'untracked', 'type': 'file' } }) # but can be made more compact eq_( ds.repo.diff(fr='HEAD', to=None, untracked='normal'), { ut.Path(ds.repo.pathobj / 'deep'): { 'state': 'untracked', 'type': 'directory' } }) # again a unmatching path constrainted will give an empty report eq_(ds.repo.diff(fr='HEAD', to=None, paths=['other']), {}) # perfect match and anything underneath will do eq_( ds.repo.diff(fr='HEAD', to=None, paths=['deep']), { ut.Path(ds.repo.pathobj / 'deep' / 'down'): { 'state': 'untracked', 'type': 'file' }, ut.Path(ds.repo.pathobj / 'deep' / 'down2'): { 'state': 'untracked', 'type': 'file' } })
def test_diff(path, norepo): with chpwd(norepo): assert_raises(NoDatasetArgumentFound, diff) ds = Dataset(path).create() assert_repo_status(ds.path) # reports stupid revision input assert_result_count(ds.diff(fr='WTF', on_failure='ignore'), 1, status='impossible', message="Git reference 'WTF' invalid") # no diff assert_result_count(_dirty_results(ds.diff()), 0) assert_result_count(_dirty_results(ds.diff(fr='HEAD')), 0) # bogus path makes no difference assert_result_count(_dirty_results(ds.diff(path='THIS', fr='HEAD')), 0) # let's introduce a known change create_tree(ds.path, {'new': 'empty'}) ds.save(to_git=True) assert_repo_status(ds.path) res = _dirty_results(ds.diff(fr='HEAD~1')) assert_result_count(res, 1) assert_result_count(res, 1, action='diff', path=op.join(ds.path, 'new'), state='added') # we can also find the diff without going through the dataset explicitly with chpwd(ds.path): assert_result_count(_dirty_results(diff(fr='HEAD~1')), 1, action='diff', path=op.join(ds.path, 'new'), state='added') # no diff against HEAD assert_result_count(_dirty_results(ds.diff()), 0) # modify known file create_tree(ds.path, {'new': 'notempty'}) res = _dirty_results(ds.diff()) assert_result_count(res, 1) assert_result_count(res, 1, action='diff', path=op.join(ds.path, 'new'), state='modified') # but if we give another path, it doesn't show up assert_result_count(ds.diff(path='otherpath'), 0) # giving the right path must work though assert_result_count(ds.diff(path='new'), 1, action='diff', path=op.join(ds.path, 'new'), state='modified') # stage changes ds.repo.add('.', git=True) # no change in diff, staged is not commited assert_result_count(_dirty_results(ds.diff()), 1) ds.save() assert_repo_status(ds.path) assert_result_count(_dirty_results(ds.diff()), 0) # untracked stuff create_tree(ds.path, {'deep': {'down': 'untracked', 'down2': 'tobeadded'}}) # a plain diff should report the untracked file # but not directly, because the parent dir is already unknown res = _dirty_results(ds.diff()) assert_result_count(res, 1) assert_result_count(res, 1, state='untracked', type='directory', path=op.join(ds.path, 'deep')) # report of individual files is also possible assert_result_count(ds.diff(untracked='all'), 2, state='untracked', type='file') # an unmatching path will hide this result assert_result_count(ds.diff(path='somewhere'), 0) # perfect match and anything underneath will do assert_result_count(ds.diff(path='deep'), 1, state='untracked', path=op.join(ds.path, 'deep'), type='directory') assert_result_count(ds.diff(path='deep'), 1, state='untracked', path=op.join(ds.path, 'deep')) ds.repo.add(op.join('deep', 'down2'), git=True) # now the remaining file is the only untracked one assert_result_count(ds.diff(), 1, state='untracked', path=op.join(ds.path, 'deep', 'down'), type='file')
def test_rerun(path, nodspath): ds = Dataset(path).create() sub = ds.create('sub') probe_path = op.join(sub.path, 'sequence') # run inside the dataset with chpwd(path), \ swallow_outputs(): ds.run('echo x$(cat sub/sequence) > sub/sequence') # command ran once, all clean assert_repo_status(ds.path) eq_('x\n', open(probe_path).read()) # now, for a rerun we can be anywhere, PWD and all are recorded # moreover, rerun must figure out which bits to unlock, even in # subdatasets with chpwd(nodspath), \ swallow_outputs(): ds.rerun() assert_repo_status(ds.path) # ran twice now eq_('xx\n', open(probe_path).read()) # Rerunning from a subdataset skips the command. _, sub_info = get_run_info(ds, last_commit_msg(sub.repo)) eq_(ds.id, sub_info["dsid"]) assert_result_count(sub.rerun(return_type="list", on_failure="ignore"), 1, status="impossible", action="run", rerun_action="skip") eq_('xx\n', open(probe_path).read()) # Rerun fails with a dirty repo. dirt = op.join(path, "dirt") with open(dirt, "w") as fh: fh.write("") assert_status('impossible', ds.rerun(on_failure="ignore")) remove(dirt) assert_repo_status(ds.path) # Make a non-run commit. with open(op.join(path, "nonrun-file"), "w") as f: f.write("foo") ds.save("nonrun-file") # Now rerun the buried command. ds.rerun(revision=DEFAULT_BRANCH + "~", message="rerun buried") eq_('xxx\n', open(probe_path).read()) # Also check that the messasge override worked. eq_( last_commit_msg(ds.repo).splitlines()[0], "[DATALAD RUNCMD] rerun buried") # Or a range of commits, skipping non-run commits. ds.rerun(since=DEFAULT_BRANCH + "~3") eq_('xxxxx\n', open(probe_path).read()) # Or --since= to run all reachable commits. ds.rerun(since="") eq_('xxxxxxxxxx\n', open(probe_path).read()) # We can get back a report of what would happen rather than actually # rerunning anything. report = ds.rerun(since="", report=True, return_type="list") # The "diff" section of the report doesn't include the unchanged files that # would come in "-f json diff" output. for entry in report: if entry["rerun_action"] == "run": # None of the run commits touch .datalad/config or any other config # file. assert_false( any(r["path"].endswith("config") for r in entry["diff"])) # Nothing changed. eq_('xxxxxxxxxx\n', open(probe_path).read()) assert_result_count(report, 1, rerun_action="skip-or-pick") report[-1]["commit"] == ds.repo.get_hexsha() # If a file is dropped, we remove it instead of unlocking it. ds.drop(probe_path, check=False) with swallow_outputs(): ds.rerun() eq_('x\n', open(probe_path).read())
def _test_create_store(host, base_path, ds_path, clone_path): ds = Dataset(ds_path).create(force=True) subds = ds.create('sub', force=True) subds2 = ds.create('sub2', force=True, annex=False) ds.save(recursive=True) assert_repo_status(ds.path) # don't specify special remote. By default should be git-remote + "-storage" res = ds.create_sibling_ria("ria+ssh://test-store:", "datastore") assert_result_count(res, 1, status='ok', action='create-sibling-ria') eq_(len(res), 1) # remotes exist, but only in super siblings = ds.siblings(result_renderer=None) eq_({'datastore', 'datastore-storage', 'here'}, {s['name'] for s in siblings}) sub_siblings = subds.siblings(result_renderer=None) eq_({'here'}, {s['name'] for s in sub_siblings}) sub2_siblings = subds2.siblings(result_renderer=None) eq_({'here'}, {s['name'] for s in sub2_siblings}) # TODO: post-update hook was enabled # check bare repo: git_config = Path(base_path) / ds.id[:3] / ds.id[3:] / 'config' assert git_config.exists() content = git_config.read_text() assert_in("[datalad \"ora-remote\"]", content) super_uuid = ds.config.get( "remote.{}.annex-uuid".format('datastore-storage')) assert_in("uuid = {}".format(super_uuid), content) # implicit test of success by ria-installing from store: ds.push(to="datastore") with chpwd(clone_path): if host: # note, we are not using the "test-store"-label here clone('ria+ssh://{}{}#{}'.format(host, base_path, ds.id), path='test_install') else: # TODO: Whenever ria+file supports special remote config (label), # change here: clone('ria+file://{}#{}'.format(base_path, ds.id), path='test_install') installed_ds = Dataset(op.join(clone_path, 'test_install')) assert installed_ds.is_installed() assert_repo_status(installed_ds.repo) eq_(installed_ds.id, ds.id) # Note: get_annexed_files() always reports POSIX paths. assert_in('ds/file1.txt', installed_ds.repo.get_annexed_files()) assert_result_count(installed_ds.get(op.join('ds', 'file1.txt')), 1, status='ok', action='get', path=op.join(installed_ds.path, 'ds', 'file1.txt')) # now, again but recursive. res = ds.create_sibling_ria("ria+ssh://test-store:", "datastore", recursive=True, existing='reconfigure') eq_(len(res), 3) assert_result_count(res, 1, path=str(ds.pathobj), status='ok', action="create-sibling-ria") assert_result_count(res, 1, path=str(subds.pathobj), status='ok', action="create-sibling-ria") assert_result_count(res, 1, path=str(subds2.pathobj), status='ok', action="create-sibling-ria") # remotes now exist in super and sub siblings = ds.siblings(result_renderer=None) eq_({'datastore', 'datastore-storage', 'here'}, {s['name'] for s in siblings}) sub_siblings = subds.siblings(result_renderer=None) eq_({'datastore', 'datastore-storage', 'here'}, {s['name'] for s in sub_siblings}) # but no special remote in plain git subdataset: sub2_siblings = subds2.siblings(result_renderer=None) eq_({'datastore', 'here'}, {s['name'] for s in sub2_siblings}) # for testing trust_level parameter, redo for each label: for trust in ['trust', 'semitrust', 'untrust']: ds.create_sibling_ria("ria+ssh://test-store:", "datastore", existing='reconfigure', trust_level=trust) res = ds.repo.repo_info() assert_in( '[datastore-storage]', [r['description'] for r in res['{}ed repositories'.format(trust)]])
def test_annotate_paths(dspath, nodspath): # this test doesn't use API`remove` to avoid circularities ds = make_demo_hierarchy_datasets(dspath, demo_hierarchy) ds.save(recursive=True) assert_repo_status(ds.path) with chpwd(dspath): # with and without an explicitly given path the result is almost the # same inside a dataset without_path = annotate_paths(on_failure='ignore') pwd_res = annotate_paths(path='.', on_failure='ignore') assert_result_count(without_path, 1, type='dataset', path=dspath) assert_result_count(pwd_res, 1, type='dataset', path=dspath, orig_request='.', raw_input=True) # make sure going into a subdataset vs giving it as a path has no # structural impact eq_([{ k: v for k, v in ap.items() if k not in ('registered_subds', 'raw_input', 'orig_request', 'refds') } for ap in annotate_paths(path='b', recursive=True)], [{ k: v for k, v in ap.items() if k not in ('registered_subds', 'raw_input', 'orig_request', 'refds') } for ap in annotate_paths(dataset='b', recursive=True)]) # when we point to a list of directories, there should be no # multiple rediscoveries of the subdatasets with swallow_logs(new_level=logging.DEBUG) as cml: annotate_paths(path=['a', 'b']) eq_( cml.out.count( 'Resolved dataset for subdataset reporting/modification'), 1) # now do it again, pointing to the ds directly res = ds.annotate_paths(on_failure='ignore') # no request, no refds, but otherwise the same eq_(len(res), len(pwd_res)) eq_( { k: pwd_res[0][k] for k in pwd_res[0] if k in ('path', 'type', 'action', 'status') }, {k: res[0][k] for k in res[0] if k not in ('refds', )}) # will refuse a path that is not a dataset as refds res = annotate_paths(dataset=nodspath, on_failure='ignore') assert_result_count(res, 1, status='error', path=nodspath, message='given reference dataset is not a dataset') # recursion with proper base dataset parentds = Dataset(opj(dspath, 'a')) base_res = parentds.annotate_paths(recursive=True) # needs to find 'aa' and the base assert_result_count(base_res, 2) assert_result_count(base_res, 2, type='dataset') assert_result_count(base_res, 1, type='dataset', parentds=parentds.path, path=opj(parentds.path, 'aa'), status='') # same recursion but without a base dataset res = annotate_paths(path=opj(dspath, 'a'), recursive=True) # needs to find 'aa' and 'a' again assert_result_count(res, 2) eq_(res[-1], {k: base_res[-1][k] for k in base_res[-1] if k not in ('refds', )}) assert_result_count( res, 1, type='dataset', status='', # it does not auto-discover parent datasets without force or a refds #parentds=parentds.path, path=parentds.path) # but we can force parent discovery res = parentds.annotate_paths(path=opj(dspath, 'a'), recursive=True, force_parentds_discovery=True) assert_result_count(res, 2) assert_result_count(res, 1, type='dataset', status='', parentds=dspath, path=parentds.path) # recursion with multiple disjoint seeds, no common base eq_([ basename(p) for p in annotate_paths(path=[ opj(dspath, 'a'), opj(dspath, 'b', 'bb', 'bba') ], recursive=True, result_xfm='paths') ], ['a', 'aa', 'bba', 'bbaa']) # recursion with partially overlapping seeds, no duplicate results eq_([ basename(p) for p in annotate_paths(path=[ opj(dspath, 'b'), opj(dspath, 'b', 'bb', 'bba') ], recursive=True, result_xfm='paths') ], ['b', 'ba', 'bb', 'bba', 'bbaa']) # get straight from a file fpath = opj('a', 'aa', 'file_aa') res = ds.annotate_paths(fpath) assert_result_count(res, 1) assert_result_count(res, 1, orig_request=fpath, raw_input=True, type='file', path=opj(ds.path, fpath), parentds=opj(ds.path, 'a', 'aa'), status='') # now drop it dropres = ds.drop(fpath, check=False) assert_result_count(dropres, 1, path=res[0]['path'], status='ok') # ask for same file again, use 'notneeded' for unavailable to try trigger # any difference droppedres = ds.annotate_paths(fpath, unavailable_path_status='notneeded') # but we get the same result eq_(res, droppedres) # now try the same on an uninstalled dataset subdspath = opj('b', 'bb') # before before_res = ds.annotate_paths(subdspath, recursive=True, unavailable_path_status='error') assert_result_count(before_res, 3, status='', type='dataset') uninstall_res = ds.uninstall(subdspath, recursive=True, check=False) assert_result_count(uninstall_res, 3, status='ok', type='dataset') # after after_res = ds.annotate_paths(subdspath, unavailable_path_status='error', on_failure='ignore') # uninstall hides all low-level datasets assert_result_count(after_res, 1) # but for the top-most uninstalled one it merely reports absent state now assert_result_count(after_res, 1, state='absent', **{ k: before_res[0][k] for k in before_res[0] if k not in ('state', 'status') }) # however, this beauty doesn't come for free, so it can be disabled # which will make the uninstalled subdataset like a directory in the # parent (or even just a non-existing path, if the mountpoint dir isn't # present after_res = ds.annotate_paths(subdspath, force_subds_discovery=False) assert_result_count(after_res, 1, type='directory', path=before_res[0]['path'], parentds=before_res[0]['parentds']) # feed annotated paths into annotate_paths, it shouldn't change things # upon second run # datasets and file res = ds.annotate_paths(['.', fpath], recursive=True) # make a copy, just to the sure orig_res = deepcopy(res) assert_result_count(res, 7) # and in again, no recursion this time res_again = ds.annotate_paths(res) # doesn't change a thing eq_(orig_res, res_again) # and in again, with recursion this time res_recursion_again = ds.annotate_paths(res, recursive=True) assert_result_count(res_recursion_again, 7) # doesn't change a thing eq_(orig_res, res_recursion_again)
def test_get_modified_subpaths(path): ds = Dataset(path).create(force=True) suba = ds.create('ba', force=True) subb = ds.create('bb', force=True) subsub = ds.create(opj('bb', 'bba', 'bbaa'), force=True) ds.save(recursive=True) assert_repo_status(path) orig_base_commit = ds.repo.get_hexsha() # nothing was modified compared to the status quo, output must be empty eq_([], list(get_modified_subpaths([dict(path=ds.path)], ds, orig_base_commit))) # modify one subdataset create_tree(subsub.path, {'added': 'test'}) subsub.save('added') # it will replace the requested path with the path of the closest # submodule that is modified assert_result_count(get_modified_subpaths([dict(path=ds.path)], ds, orig_base_commit), 1, type='dataset', path=subb.path) # make another one dirty create_tree(suba.path, {'added': 'test'}) # now a single query path will result in the two modified subdatasets assert_result_count(get_modified_subpaths([dict(path=ds.path)], ds, orig_base_commit), 2, type='dataset') # now save uptop, this will the new state of subb, but keep suba dirty ds.save(subb.path, recursive=True) # now if we ask for what was last saved, we only get the new state of subb assert_result_count(get_modified_subpaths([dict(path=ds.path)], ds, 'HEAD~1..HEAD'), 1, type='dataset', path=subb.path) # comparing the working tree to head will the dirty suba instead assert_result_count(get_modified_subpaths([dict(path=ds.path)], ds, 'HEAD'), 1, type='dataset', path=suba.path) # add/save everything, become clean ds.save(recursive=True) assert_repo_status(path) # nothing is reported as modified assert_result_count( get_modified_subpaths([dict(path=ds.path)], ds, 'HEAD'), 0) # but looking all the way back, we find all changes assert_result_count(get_modified_subpaths([dict(path=ds.path)], ds, orig_base_commit), 2, type='dataset') # now we ask specifically for the file we added to subsub above query = [dict(path=opj(subsub.path, 'added'))] res = list(get_modified_subpaths(query, ds, orig_base_commit)) # we only get this one result back, and not all the submodule state changes # that were also saved in the superdatasets assert_result_count(res, 1) assert_result_count(res, 1, type='file', path=opj(subsub.path, 'added'), state='added') # but if we are only looking at the last saved change (suba), we will not # find our query return something res = get_modified_subpaths(query, ds, 'HEAD^') assert_result_count(res, 0) # deal with removal (force insufiicient copies error) ds.remove(suba.path, check=False) assert_repo_status(path) res = list(get_modified_subpaths([dict(path=ds.path)], ds, 'HEAD~1..HEAD')) # removed submodule + .gitmodules update assert_result_count(res, 2) assert_result_count(res, 1, type_src='dataset', path=suba.path)
def test_diff(path, norepo): ds = Dataset(path).create() assert_repo_status(ds.path) # reports stupid revision input assert_result_count( ds._diff(revision='WTF', on_failure='ignore'), 1, status='impossible', message="fatal: bad revision 'WTF'") assert_result_count(ds._diff(), 0) # no diff assert_result_count(ds._diff(), 0) assert_result_count(ds._diff(revision='HEAD'), 0) # bogus path makes no difference assert_result_count(ds._diff(path='THIS', revision='HEAD'), 0) # let's introduce a known change create_tree(ds.path, {'new': 'empty'}) ds.save(to_git=True) assert_repo_status(ds.path) res = ds._diff(revision='HEAD~1') assert_result_count(res, 1) assert_result_count( res, 1, action='diff', path=opj(ds.path, 'new'), state='added') # no diff against HEAD assert_result_count(ds._diff(), 0) # modify known file create_tree(ds.path, {'new': 'notempty'}) for diffy in (None, 'HEAD'): res = ds._diff(revision=diffy) assert_result_count(res, 1) assert_result_count( res, 1, action='diff', path=opj(ds.path, 'new'), state='modified') # but if we give another path, it doesn't show up assert_result_count(ds._diff('otherpath'), 0) # giving the right path must work though assert_result_count( ds._diff('new'), 1, action='diff', path=opj(ds.path, 'new'), state='modified') # stage changes ds.repo.add('.', git=True) # no diff, because we staged the modification assert_result_count(ds._diff(), 0) # but we can get at it assert_result_count( ds._diff(staged=True), 1, action='diff', path=opj(ds.path, 'new'), state='modified') # OR assert_result_count( ds._diff(revision='HEAD'), 1, action='diff', path=opj(ds.path, 'new'), state='modified') ds.save() assert_repo_status(ds.path) # untracked stuff create_tree(ds.path, {'deep': {'down': 'untracked', 'down2': 'tobeadded'}}) # a plain diff should report the untracked file # but not directly, because the parent dir is already unknown res = ds._diff() assert_result_count(res, 1) assert_result_count( res, 1, state='untracked', type='directory', path=opj(ds.path, 'deep')) # report of individual files is also possible assert_result_count( ds._diff(report_untracked='all'), 2, state='untracked', type='file') # an unmatching path will hide this result assert_result_count(ds._diff(path='somewhere'), 0) # perfect match and anything underneath will do assert_result_count( ds._diff(path='deep'), 1, state='untracked', path=opj(ds.path, 'deep'), type='directory') assert_result_count( ds._diff(path='deep'), 1, state='untracked', path=opj(ds.path, 'deep')) # now we stage on of the two files in deep ds.repo.add(opj('deep', 'down2'), git=True) # without any reference it will ignore the staged stuff and report the remaining # untracked file assert_result_count( ds._diff(), 1, state='untracked', path=opj(ds.path, 'deep', 'down'), type='file') res = ds._diff(staged=True) assert_result_count( res, 1, state='untracked', path=opj(ds.path, 'deep', 'down'), type='file') assert_result_count( res, 1, state='added', path=opj(ds.path, 'deep', 'down2'), type='file')
def _test_version_check(host, dspath, store): dspath = Path(dspath) store = Path(store) ds = Dataset(dspath).create() populate_dataset(ds) ds.save() assert_repo_status(ds.path) # set up store: io = SSHRemoteIO(host) if host else LocalIO() if host: store_url = "ria+ssh://{host}{path}".format(host=host, path=store) else: store_url = "ria+{}".format(store.as_uri()) create_store(io, store, '1') # TODO: Re-establish test for version 1 # version 2: dirhash create_ds_in_store(io, store, ds.id, '2', '1') # add special remote init_opts = common_init_opts + ['url={}'.format(store_url)] ds.repo.init_remote('store', options=init_opts) ds.repo.copy_to('.', 'store') # check version files remote_ds_tree_version_file = store / 'ria-layout-version' dsgit_dir, archive_dir, dsobj_dir = \ get_layout_locations(1, store, ds.id) remote_obj_tree_version_file = dsgit_dir / 'ria-layout-version' assert_true(remote_ds_tree_version_file.exists()) assert_true(remote_obj_tree_version_file.exists()) with open(str(remote_ds_tree_version_file), 'r') as f: assert_equal(f.read().strip(), '1') with open(str(remote_obj_tree_version_file), 'r') as f: assert_equal(f.read().strip(), '2') # Accessing the remote should not yield any output regarding versioning, # since it's the "correct" version. Note that "fsck" is an arbitrary choice. # We need just something to talk to the special remote. with swallow_logs(new_level=logging.INFO) as cml: ds.repo.fsck(remote='store', fast=True) # TODO: For some reason didn't get cml.assert_logged to assert # "nothing was logged" assert not cml.out # Now fake-change the version with open(str(remote_obj_tree_version_file), 'w') as f: f.write('X\n') # Now we should see a message about it with swallow_logs(new_level=logging.INFO) as cml: ds.repo.fsck(remote='store', fast=True) cml.assert_logged(level="INFO", msg="Remote object tree reports version X", regex=False) # reading still works: ds.drop('.') assert_status('ok', ds.get('.')) # but writing doesn't: with open(str(Path(ds.path) / 'new_file'), 'w') as f: f.write("arbitrary addition") ds.save(message="Add a new_file") # TODO: use self.annex.error in special remote and see whether we get an # actual error result assert_raises(CommandError, ds.repo.copy_to, 'new_file', 'store') # However, we can force it by configuration ds.config.add("annex.ora-remote.store.force-write", "true", where='local') ds.repo.copy_to('new_file', 'store')
def test_diff_recursive(path): ds = Dataset(path).create() sub = ds.create('sub') # look at the last change, and confirm a dataset was added res = ds.diff(fr=DEFAULT_BRANCH + '~1', to=DEFAULT_BRANCH, result_renderer=None) assert_result_count(res, 1, action='diff', state='added', path=sub.path, type='dataset') # now recursive res = ds.diff(recursive=True, fr=DEFAULT_BRANCH + '~1', to=DEFAULT_BRANCH, result_renderer=None) # we also get the entire diff of the subdataset from scratch assert_status('ok', res) ok_(len(res) > 3) # one specific test assert_result_count(res, 1, action='diff', state='added', path=op.join(sub.path, '.datalad', 'config')) # now we add a file to just the parent create_tree(ds.path, { 'onefile': 'tobeadded', 'sub': { 'twofile': 'tobeadded' } }) res = ds.diff(recursive=True, untracked='all', result_renderer=None) assert_result_count(_dirty_results(res), 3) assert_result_count(res, 1, action='diff', state='untracked', path=op.join(ds.path, 'onefile'), type='file') assert_result_count(res, 1, action='diff', state='modified', path=sub.path, type='dataset') assert_result_count(res, 1, action='diff', state='untracked', path=op.join(sub.path, 'twofile'), type='file') # intentional save in two steps to make check below easier ds.save('sub', recursive=True) ds.save() assert_repo_status(ds.path) head_ref = DEFAULT_BRANCH if ds.repo.is_managed_branch() else 'HEAD' # look at the last change, only one file was added res = ds.diff(fr=head_ref + '~1', to=head_ref, result_renderer=None) assert_result_count(_dirty_results(res), 1) assert_result_count(res, 1, action='diff', state='added', path=op.join(ds.path, 'onefile'), type='file') # now the exact same thing with recursion, must not be different from the # call above res = ds.diff(recursive=True, fr=head_ref + '~1', to=head_ref, result_renderer=None) assert_result_count(_dirty_results(res), 1) # last change in parent assert_result_count(res, 1, action='diff', state='added', path=op.join(ds.path, 'onefile'), type='file') if ds.repo.is_managed_branch(): raise SkipTest( "Test assumption broken: https://github.com/datalad/datalad/issues/3818" ) # one further back brings in the modified subdataset, and the added file # within it res = ds.diff(recursive=True, fr=head_ref + '~2', to=head_ref, result_renderer=None) assert_result_count(_dirty_results(res), 3) assert_result_count(res, 1, action='diff', state='added', path=op.join(ds.path, 'onefile'), type='file') assert_result_count(res, 1, action='diff', state='added', path=op.join(sub.path, 'twofile'), type='file') assert_result_count(res, 1, action='diff', state='modified', path=sub.path, type='dataset')
def test_update_simple(origin, src_path, dst_path): # prepare src source = install(src_path, source=origin, recursive=True) # forget we cloned it (provide no 'origin' anymore), which should lead to # setting tracking branch to target: source.repo.remove_remote("origin") # dataset without sibling will not need updates assert_status('notneeded', source.update()) # deprecation message doesn't ruin things assert_status('notneeded', source.update(fetch_all=True)) # but error if unknown sibling is given assert_status('impossible', source.update(sibling='funky', on_failure='ignore')) # get a clone to update later on: dest = install(dst_path, source=src_path, recursive=True) # test setup done; # assert all fine assert_repo_status(dst_path) assert_repo_status(src_path) # update yields nothing => up-to-date assert_status('ok', dest.update()) assert_repo_status(dst_path) # modify origin: with open(opj(src_path, "update.txt"), "w") as f: f.write("Additional content") source.save(path="update.txt", message="Added update.txt") assert_repo_status(src_path) # update without `merge` only fetches: assert_status('ok', dest.update()) # modification is not known to active branch: assert_not_in("update.txt", dest.repo.get_files(dest.repo.get_active_branch())) # modification is known to branch origin/master assert_in("update.txt", dest.repo.get_files("origin/master")) # merge: assert_status('ok', dest.update(merge=True)) # modification is now known to active branch: assert_in("update.txt", dest.repo.get_files(dest.repo.get_active_branch())) # it's known to annex, but has no content yet: dest.repo.get_file_key("update.txt") # raises if unknown eq_([False], dest.repo.file_has_content(["update.txt"])) # check subdataset path constraints, baseline (parent + 2 subds) assert_result_count(dest.update(recursive=True), 3, status='ok', type='dataset') # no recursion and invalid path still updates the parent res = dest.update(path='whatever') assert_result_count(res, 1, status='ok', type='dataset') assert_result_count(res, 1, status='ok', path=dest.path) # invalid path with recursion also does res = dest.update(recursive=True, path='whatever') assert_result_count(res, 1, status='ok', type='dataset') assert_result_count(res, 1, status='ok', path=dest.path) # valid path and no recursion only updates the parent res = dest.update(path='subm 1') assert_result_count(res, 1, status='ok', type='dataset') assert_result_count(res, 1, status='ok', path=dest.path) # valid path and recursion updates matching res = dest.update(recursive=True, path='subm 1') assert_result_count(res, 2, status='ok', type='dataset') assert_result_count(res, 1, status='ok', path=dest.path) assert_result_count(res, 1, status='ok', path=str(dest.pathobj / 'subm 1')) # additional invalid path doesn't hurt res = dest.update(recursive=True, path=['subm 1', 'mike']) assert_result_count(res, 2, status='ok', type='dataset') # full match res = dest.update(recursive=True, path=['subm 1', '2']) assert_result_count(res, 3, status='ok', type='dataset') # test that update doesn't crash if we specify only a single path (submod) to # operate on with chpwd(dest.path): # in 0.11.x it would be a single result since "pwd" dataset is not # considered, and would be relative path (as specified). # In 0.12.0 - it would include implicit pwd dataset, and paths would be absolute res_update = update(path=['subm 1'], recursive=True) assert_result_count(res_update, 2) for p in dest.path, str(dest.pathobj / 'subm 1'): assert_in_results(res_update, path=p, action='update', status='ok', type='dataset') # and with merge we would also try to save (but there would be no changes) res_merge = update(path=['subm 1'], recursive=True, merge=True) assert_result_count(res_merge, 2, action='update') # 2 of "updates" really. assert_in_results(res_merge, action='update', status='ok', type='dataset') assert_in_results(res_merge, action='save', status='notneeded', type='dataset') # smoke-test if recursive update doesn't fail if submodule is removed # and that we can run it from within a dataset without providing it # explicitly assert_result_count( dest.remove('subm 1'), 1, status='ok', action='remove', path=opj(dest.path, 'subm 1')) with chpwd(dest.path): assert_result_count( update(recursive=True), 2, status='ok', type='dataset') assert_result_count( dest.update(merge=True, recursive=True), 2, action='update', status='ok', type='dataset') # and now test recursive update with merging in differences create_tree(opj(source.path, '2'), {'load.dat': 'heavy'}) source.save(opj('2', 'load.dat'), message="saving changes within subm2", recursive=True) assert_result_count( dest.update(merge=True, recursive=True), 2, action='update', status='ok', type='dataset') # and now we can get new file dest.get('2/load.dat') ok_file_has_content(opj(dest.path, '2', 'load.dat'), 'heavy')
def test_diff_recursive(path): ds = Dataset(path).create() sub = ds.create('sub') # look at the last change, and confirm a dataset was added res = ds.diff(fr='HEAD~1', to='HEAD') assert_result_count(res, 1, action='diff', state='added', path=sub.path, type='dataset') # now recursive res = ds.diff(recursive=True, fr='HEAD~1', to='HEAD') # we also get the entire diff of the subdataset from scratch assert_status('ok', res) ok_(len(res) > 3) # one specific test assert_result_count(res, 1, action='diff', state='added', path=op.join(sub.path, '.datalad', 'config')) # now we add a file to just the parent create_tree(ds.path, { 'onefile': 'tobeadded', 'sub': { 'twofile': 'tobeadded' } }) res = ds.diff(recursive=True, untracked='all') assert_result_count(_dirty_results(res), 3) assert_result_count(res, 1, action='diff', state='untracked', path=op.join(ds.path, 'onefile'), type='file') assert_result_count(res, 1, action='diff', state='modified', path=sub.path, type='dataset') assert_result_count(res, 1, action='diff', state='untracked', path=op.join(sub.path, 'twofile'), type='file') # intentional save in two steps to make check below easier ds.save('sub', recursive=True) ds.save() assert_repo_status(ds.path) # look at the last change, only one file was added res = ds.diff(fr='HEAD~1', to='HEAD') assert_result_count(_dirty_results(res), 1) assert_result_count(res, 1, action='diff', state='added', path=op.join(ds.path, 'onefile'), type='file') # now the exact same thing with recursion, must not be different from the # call above res = ds.diff(recursive=True, fr='HEAD~1', to='HEAD') assert_result_count(_dirty_results(res), 1) # last change in parent assert_result_count(res, 1, action='diff', state='added', path=op.join(ds.path, 'onefile'), type='file') # one further back brings in the modified subdataset, and the added file # within it res = ds.diff(recursive=True, fr='HEAD~2', to='HEAD') assert_result_count(_dirty_results(res), 3) assert_result_count(res, 1, action='diff', state='added', path=op.join(ds.path, 'onefile'), type='file') assert_result_count(res, 1, action='diff', state='added', path=op.join(sub.path, 'twofile'), type='file') assert_result_count(res, 1, action='diff', state='modified', path=sub.path, type='dataset')
def check_merge_follow_parentds_subdataset_detached(on_adjusted, path): # Note: For the adjusted case, this is not much more than a smoke test that # on an adjusted branch we fail sensibly. The resulting state is not easy # to reason about nor desirable. path = Path(path) # $path/source/s0/s1 # The additional dataset level is to gain some confidence that this works # for nested datasets. ds_src = Dataset(path / "source").create() if ds_src.repo.is_managed_branch(): if not on_adjusted: raise SkipTest("System only supports adjusted branches. " "Skipping non-adjusted test") ds_src_s0 = ds_src.create("s0") ds_src_s1 = ds_src_s0.create("s1") ds_src.save(recursive=True) if on_adjusted: # Note: We adjust after creating all the datasets above to avoid a bug # fixed in git-annex 7.20191024, specifically bbdeb1a1a (sync: Fix # crash when there are submodules and an adjusted branch is checked # out, 2019-10-23). for ds in [ds_src, ds_src_s0, ds_src_s1]: _adjust(ds.repo) ds_src.save(recursive=True) assert_repo_status(ds_src.path) ds_clone = install(source=ds_src.path, path=path / "clone", recursive=True, result_xfm="datasets") ds_clone_s1 = Dataset(ds_clone.pathobj / "s0" / "s1") ds_src_s1.repo.checkout("master^0") (ds_src_s1.pathobj / "foo").write_text("foo content") ds_src.save(recursive=True) assert_repo_status(ds_src.path) res = ds_clone.update(merge=True, recursive=True, follow="parentds", on_failure="ignore") if on_adjusted: # The top-level update is okay because there is no parent revision to # update to. assert_in_results( res, status="ok", path=ds_clone.path, action="update") # The subdataset, on the other hand, is impossible. assert_in_results( res, status="impossible", path=ds_clone_s1.path, action="update") return assert_repo_status(ds_clone.path) # We brought in the revision and got to the same state of the remote. # Blind saving here without bringing in the current subdataset revision # would have resulted in a new commit in ds_clone that reverting the # last subdataset ID recorded in ds_src. eq_(ds_clone.repo.get_hexsha(), ds_src.repo.get_hexsha()) # Record a revision in the parent and then move HEAD away from it so that # the explicit revision fetch fails. (ds_src_s1.pathobj / "bar").write_text("bar content") ds_src.save(recursive=True) ds_src_s1.repo.checkout( ds_src_s1.repo.get_corresponding_branch("master")) # This is the default, but just in case: ds_src_s1.repo.config.set("uploadpack.allowAnySHA1InWant", "false", where="local") res = ds_clone.update(merge=True, recursive=True, follow="parentds", on_failure="ignore") # The fetch with the explicit ref fails because it isn't advertised. assert_in_results( res, status="impossible", path=ds_clone_s1.path, action="update") # Back to the detached head. ds_src_s1.repo.checkout("HEAD@{1}") # Set up a case where update() will not resolve the sibling. ds_clone_s1.repo.call_git(["branch", "--unset-upstream"]) ds_clone_s1.config.reload(force=True) ds_clone_s1.repo.call_git(["remote", "add", "other", ds_src_s1.path]) res = ds_clone.update(recursive=True, follow="parentds", on_failure="ignore") # In this case, update() won't abort if we call with merge=False, but # it does if the revision wasn't brought down in the `fetch(all_=True)` # call. assert_in_results( res, status="impossible", path=ds_clone_s1.path, action="update")
def test_save(path): ds = Dataset(path) with open(op.join(path, "new_file.tst"), "w") as f: f.write("something") ds.repo.add("new_file.tst", git=True) ok_(ds.repo.dirty) ds.save(message="add a new file") assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo)) with open(op.join(path, "new_file.tst"), "w") as f: f.write("modify") ok_(ds.repo.dirty) ds.save(message="modified new_file.tst") assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo)) # save works without ds and files given in the PWD with open(op.join(path, "new_file.tst"), "w") as f: f.write("rapunzel") with chpwd(path): save(message="love rapunzel") assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo)) # and also without `-a` when things are staged with open(op.join(path, "new_file.tst"), "w") as f: f.write("exotic") ds.repo.add("new_file.tst", git=True) with chpwd(path): save(message="love marsians") assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo)) files = ['one.txt', 'two.txt'] for fn in files: with open(op.join(path, fn), "w") as f: f.write(fn) ds.save([op.join(path, f) for f in files]) # superfluous call to save (alll saved it already), should not fail # but report that nothing was saved assert_status('notneeded', ds.save(message="set of new files")) assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo)) # create subdataset subds = ds.create('subds') assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo)) # modify subds with open(op.join(subds.path, "some_file.tst"), "w") as f: f.write("something") subds.save() assert_repo_status(subds.path, annex=isinstance(subds.repo, AnnexRepo)) # ensure modified subds is committed ds.save() assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo)) # now introduce a change downstairs subds.create('someotherds') assert_repo_status(subds.path, annex=isinstance(subds.repo, AnnexRepo)) ok_(ds.repo.dirty) # and save via subdataset path ds.save('subds', version_tag='new_sub') assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo)) tags = ds.repo.get_tags() ok_(len(tags) == 1) eq_(tags[0], dict(hexsha=ds.repo.get_hexsha(), name='new_sub')) # fails when retagged, like git does res = ds.save(version_tag='new_sub', on_failure='ignore') assert_status('error', res) assert_result_count(res, 1, action='save', type='dataset', path=ds.path, message=('cannot tag this version: %s', "fatal: tag 'new_sub' already exists"))
def test_publish_simple(origin, src_path, dst_path): # prepare src source = install(src_path, source=origin, recursive=True) # forget we cloned it (provide no 'origin' anymore), which should lead to # setting tracking branch to target: source.repo.remove_remote("origin") # create plain git at target: target = GitRepo(dst_path, create=True) target.checkout("TMP", ["-b"]) source.repo.add_remote("target", dst_path) res = publish(dataset=source, to="target", result_xfm='datasets') eq_(res, [source]) assert_repo_status(source.repo, annex=None) assert_repo_status(target, annex=None) eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)), list(source.repo.get_branch_commits_(DEFAULT_BRANCH))) # don't fail when doing it again res = publish(dataset=source, to="target") # and nothing is pushed assert_result_count(res, 1, status='notneeded') assert_repo_status(source.repo, annex=None) assert_repo_status(target, annex=None) eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)), list(source.repo.get_branch_commits_(DEFAULT_BRANCH))) assert_git_annex_branch_published(source.repo, target) # 'target/<default branch>' should be tracking branch at this point, so # try publishing without `to`: # MIH: Nope, we don't automatically add this anymore # some modification: with open(opj(src_path, 'test_mod_file'), "w") as f: f.write("Some additional stuff.") source.save(opj(src_path, 'test_mod_file'), to_git=True, message="Modified.") assert_repo_status(source.repo, annex=None) res = publish(dataset=source, to='target', result_xfm='datasets') eq_(res, [source]) assert_repo_status(dst_path, annex=None) eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)), list(source.repo.get_branch_commits_(DEFAULT_BRANCH))) assert_git_annex_branch_published(source.repo, target) eq_(filter_fsck_error_msg(source.repo.fsck()), filter_fsck_error_msg(source.repo.fsck(remote='target')))
def test_aggregation(path): # a hierarchy of three (super/sub)datasets, each with some native metadata ds = Dataset(op.join(path, 'origin')).create(force=True) ds.config.add('datalad.metadata.nativetype', 'frictionless_datapackage', where='dataset') subds = ds.create('sub', force=True) subds.config.add('datalad.metadata.nativetype', 'frictionless_datapackage', where='dataset') subsubds = subds.create('subsub', force=True) subsubds.config.add('datalad.metadata.nativetype', 'frictionless_datapackage', where='dataset') assert_status('ok', ds.save(recursive=True)) # while we are at it: dot it again, nothing should happen assert_status('notneeded', ds.save(recursive=True)) assert_repo_status(ds.path) # aggregate metadata from all subdatasets into any superdataset, including # intermediate ones res = ds.meta_aggregate(recursive=True, into='all') # we get success report for both subdatasets and the superdataset, # and they get saved assert_result_count(res, 3, status='ok', action='meta_aggregate') # the respective super datasets see two saves, one to record the change # in the subdataset after its own aggregation, and one after the super # updated with aggregated metadata assert_result_count(res, 5, status='ok', action='save', type='dataset') # nice and tidy assert_repo_status(ds.path) # quick test of aggregate report aggs = ds.meta_dump(reporton='aggregates', recursive=True) # one for each dataset assert_result_count(aggs, 3) # mother also report layout version assert_result_count(aggs, 1, path=ds.path, layout_version=1) # store clean direct result origres = ds.meta_dump(recursive=True) # basic sanity check assert_result_count(origres, 3, type='dataset') assert_result_count([r for r in origres if r['path'].endswith('.json')], 3, type='file') # Now that we have annex.key # three different IDs eq_( 3, len( set([ _get_dsid_from_core_metadata(s['metadata']['metalad_core']) for s in origres if s['type'] == 'dataset' ]))) # and we know about all three datasets for name in ('MOTHER_äöü東', 'child_äöü東', 'grandchild_äöü東'): assert_true( sum([s['metadata']['frictionless_datapackage']['name'] \ == assure_unicode(name) for s in origres if s['type'] == 'dataset'])) # now clone the beast to simulate a new user installing an empty dataset clone = install(op.join(path, 'clone'), source=ds.path, result_xfm='datasets', return_type='item-or-list') # ID mechanism works eq_(ds.id, clone.id) # get fresh metadata cloneres = clone.meta_dump() # basic sanity check assert_result_count(cloneres, 1, type='dataset') # payload file assert_result_count(cloneres, 1, type='file') # now loop over the previous results from the direct metadata query of # origin and make sure we get the extact same stuff from the clone _compare_metadata_helper(origres, clone) # now obtain a subdataset in the clone, should make no difference assert_status('ok', clone.install('sub', result_xfm=None, return_type='list')) _compare_metadata_helper(origres, clone) # test search in search tests, not all over the place ## query smoke test assert_result_count(clone.search('mother', mode='egrep'), 1) assert_result_count(clone.search('(?i)MoTHER', mode='egrep'), 1) child_res = clone.search('child', mode='egrep') assert_result_count(child_res, 2) for r in child_res: if r['type'] == 'dataset': assert_in(r['query_matched']['frictionless_datapackage.name'], r['metadata']['frictionless_datapackage']['name'])
def test_publish_plain_git(origin, src_path, dst_path): # TODO: Since it's mostly the same, melt with test_publish_simple # prepare src source = install(src_path, source=origin, recursive=True) # forget we cloned it (provide no 'origin' anymore), which should lead to # setting tracking branch to target: source.repo.remove_remote("origin") # create plain git at target: target = GitRepo(dst_path, create=True) target.checkout("TMP", ["-b"]) source.repo.add_remote("target", dst_path) res = publish(dataset=source, to="target", result_xfm='datasets') eq_(res, [source]) assert_repo_status(source.repo, annex=None) assert_repo_status(target, annex=None) eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)), list(source.repo.get_branch_commits_(DEFAULT_BRANCH))) # don't fail when doing it again res = publish(dataset=source, to="target") # and nothing is pushed assert_result_count(res, 1, status='notneeded') assert_repo_status(source.repo, annex=None) assert_repo_status(target, annex=None) eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)), list(source.repo.get_branch_commits_(DEFAULT_BRANCH))) # some modification: with open(opj(src_path, 'test_mod_file'), "w") as f: f.write("Some additional stuff.") source.save(opj(src_path, 'test_mod_file'), to_git=True, message="Modified.") assert_repo_status(source.repo, annex=None) res = publish(dataset=source, to='target', result_xfm='datasets') eq_(res, [source]) assert_repo_status(dst_path, annex=None) eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)), list(source.repo.get_branch_commits_(DEFAULT_BRANCH))) # amend and change commit msg in order to test for force push: source.repo.commit("amended", options=['--amend']) # push should be rejected (non-fast-forward): assert_raises(IncompleteResultsError, publish, dataset=source, to='target', result_xfm='datasets') # push with force=True works: res = publish(dataset=source, to='target', result_xfm='datasets', force=True) eq_(res, [source])
def test_push_recursive(origin_path, src_path, dst_top, dst_sub, dst_subnoannex, dst_subsub): # dataset with two submodules and one subsubmodule origin = Dataset(origin_path).create() origin_subm1 = origin.create('sub m') origin_subm1.create('subsub m') origin.create('subm noannex', annex=False) origin.save() assert_repo_status(origin.path) # prepare src as a fresh clone with all subdatasets checkout out recursively # running on a clone should make the test scenario more different than # test_push(), even for the pieces that should be identical top = Clone.__call__(source=origin.path, path=src_path) subs = top.get('.', recursive=True, get_data=False, result_xfm='datasets') # order for '.' should not be relied upon, so sort by path sub, subsub, subnoannex = sorted(subs, key=lambda ds: ds.path) target_top = mk_push_target(top, 'target', dst_top, annex=True) # subdatasets have no remote yet, so recursive publishing should fail: res = top.push(to="target", recursive=True, on_failure='ignore') assert_in_results(res, path=top.path, type='dataset', refspec=DEFAULT_REFSPEC, operations=['new-branch'], action='publish', status='ok', target='target') for d in (sub, subsub, subnoannex): assert_in_results(res, status='error', type='dataset', path=d.path, message=("Unknown target sibling '%s'.", 'target')) # now fix that and set up targets for the submodules target_sub = mk_push_target(sub, 'target', dst_sub, annex=True) target_subnoannex = mk_push_target(subnoannex, 'target', dst_subnoannex, annex=False) target_subsub = mk_push_target(subsub, 'target', dst_subsub, annex=True) # and same push call as above res = top.push(to="target", recursive=True) # topds skipped assert_in_results(res, path=top.path, type='dataset', action='publish', status='notneeded', target='target') # the rest pushed for d in (sub, subsub, subnoannex): assert_in_results(res, status='ok', type='dataset', path=d.path, refspec=DEFAULT_REFSPEC) # all correspondig branches match across all datasets for s, d in zip( (top, sub, subnoannex, subsub), (target_top, target_sub, target_subnoannex, target_subsub)): eq_(list(s.repo.get_branch_commits_(DEFAULT_BRANCH)), list(d.get_branch_commits_(DEFAULT_BRANCH))) if s != subnoannex: eq_(list(s.repo.get_branch_commits_("git-annex")), list(d.get_branch_commits_("git-annex"))) # rerun should not result in further pushes of the default branch res = top.push(to="target", recursive=True) assert_not_in_results(res, status='ok', refspec=DEFAULT_REFSPEC) assert_in_results(res, status='notneeded', refspec=DEFAULT_REFSPEC) # now annex a file in subsub test_copy_file = subsub.pathobj / 'test_mod_annex_file' test_copy_file.write_text("Heavy stuff.") # save all the way up assert_status(('ok', 'notneeded'), top.save(message='subsub got something', recursive=True)) assert_repo_status(top.path) # publish straight up, should be smart by default res = top.push(to="target", recursive=True) # we see 3 out of 4 datasets pushed (sub noannex was left unchanged) for d in (top, sub, subsub): assert_in_results(res, status='ok', type='dataset', path=d.path, refspec=DEFAULT_REFSPEC) # file content copied too assert_in_results(res, action='copy', status='ok', path=str(test_copy_file)) # verify it is accessible, drop and bring back assert_status('ok', top.drop(str(test_copy_file))) ok_(not subsub.repo.file_has_content('test_mod_annex_file')) top.get(test_copy_file) ok_file_has_content(test_copy_file, 'Heavy stuff.') # make two modification (sub.pathobj / 'test_mod_annex_file').write_text('annex') (subnoannex.pathobj / 'test_mod_file').write_text('git') # save separately top.save(sub.pathobj, message='annexadd', recursive=True) top.save(subnoannex.pathobj, message='gitadd', recursive=True) # now only publish the latter one res = top.push(to="target", since=DEFAULT_BRANCH + '~1', recursive=True) # nothing copied, no reports on the other modification assert_not_in_results(res, action='copy') assert_not_in_results(res, path=sub.path) for d in (top, subnoannex): assert_in_results(res, status='ok', type='dataset', path=d.path, refspec=DEFAULT_REFSPEC) # an unconditional push should now pick up the remaining changes res = top.push(to="target", recursive=True) assert_in_results(res, action='copy', status='ok', path=str(sub.pathobj / 'test_mod_annex_file')) assert_in_results(res, status='ok', type='dataset', path=sub.path, refspec=DEFAULT_REFSPEC) for d in (top, subnoannex, subsub): assert_in_results(res, status='notneeded', type='dataset', path=d.path, refspec=DEFAULT_REFSPEC) # if noannex target gets some annex, we still should not fail to push target_subnoannex.call_git(['annex', 'init']) # just to ensure that we do need something to push (subnoannex.pathobj / "newfile").write_text("content") subnoannex.save() res = subnoannex.push(to="target") assert_in_results(res, status='ok', type='dataset')
def test_publish_depends(origin, src_path, target1_path, target2_path, target3_path): # prepare src source = install(src_path, source=origin, recursive=True) source.repo.get('test-annex.dat') # pollute config depvar = 'remote.target2.datalad-publish-depends' source.config.add(depvar, 'stupid', where='local') eq_(source.config.get(depvar, None), 'stupid') # two remote sibling on two "different" hosts source.create_sibling('ssh://datalad-test' + target1_path, annex_wanted='standard', annex_group='backup', name='target1') # fails with unknown remote res = source.create_sibling( 'ssh://datalad-test' + target2_path, name='target2', existing='reconfigure', # because 'target2' is known in polluted cfg publish_depends='bogus', on_failure='ignore') assert_result_count( res, 1, path=source.path, status='error', message=('unknown sibling(s) specified as publication dependency: %s', set(['bogus']))) # for real source.create_sibling( 'ssh://datalad-test' + target2_path, name='target2', existing='reconfigure', # because 'target2' is known in polluted cfg annex_wanted='standard', annex_group='backup', publish_depends='target1') # wiped out previous dependencies eq_(source.config.get(depvar, None), 'target1') # and one more remote, on the same host but associated with a dependency source.create_sibling('ssh://datalad-test' + target3_path, name='target3') assert_repo_status(src_path) # introduce change in source create_tree(src_path, {'probe1': 'probe1'}) source.save('probe1') assert_repo_status(src_path) # only the source has the probe ok_file_has_content(opj(src_path, 'probe1'), 'probe1') for p in (target1_path, target2_path, target3_path): assert_false(lexists(opj(p, 'probe1'))) # publish to a standalone remote source.publish(to='target3') ok_(lexists(opj(target3_path, 'probe1'))) # but it has no data copied target3 = Dataset(target3_path) nok_(target3.repo.file_has_content('probe1')) # but if we publish specifying its path, it gets copied source.publish('probe1', to='target3') ok_file_has_content(opj(target3_path, 'probe1'), 'probe1') # no others are affected in either case for p in (target1_path, target2_path): assert_false(lexists(opj(p, 'probe1'))) # publish to all remaining, but via a dependency source.publish(to='target2') for p in (target1_path, target2_path, target3_path): ok_file_has_content(opj(p, 'probe1'), 'probe1')
def test_subdataset_save(path): parent = Dataset(path).create() sub = parent.create('sub') assert_repo_status(parent.path) create_tree(parent.path, {"untracked": 'ignore', 'sub': {"new": "wanted"}}) sub.save('new') # defined state: one untracked, modified (but clean in itself) subdataset assert_repo_status(sub.path) assert_repo_status(parent.path, untracked=['untracked'], modified=['sub']) # `save sub` does not save the parent!! with chpwd(parent.path): assert_status('notneeded', save(dataset=sub.path)) assert_repo_status(parent.path, untracked=['untracked'], modified=['sub']) # `save -u .` saves the state change in the subdataset, # but leaves any untracked content alone with chpwd(parent.path): assert_status('ok', parent.save(updated=True)) assert_repo_status(parent.path, untracked=['untracked']) # get back to the original modified state and check that -S behaves in # exactly the same way create_tree(parent.path, {'sub': {"new2": "wanted2"}}) sub.save('new2') assert_repo_status(parent.path, untracked=['untracked'], modified=['sub'])
def test_ria_http(lcl, storepath, url): # create a local dataset with a subdataset lcl = Path(lcl) storepath = Path(storepath) subds = Dataset(lcl / 'ds' / 'subdir' / 'subds').create(force=True) subds.save() ds = Dataset(lcl / 'ds').create(force=True) ds.save(version_tag='original') assert_repo_status(ds.path) for d in (ds, subds): _move2store(storepath, d) # location of superds in store storeds_loc = str(storepath / ds.id[:3] / ds.id[3:]) # now we should be able to clone from a ria+http url # the super riaclone = clone( 'ria+{}#{}'.format(url, ds.id), lcl / 'clone', ) # due to default configuration, clone() should automatically look for the # subdataset in the store, too -- if not the following would fail, because # we never configured a proper submodule URL riaclonesub = riaclone.get( op.join('subdir', 'subds'), get_data=False, result_xfm='datasets', return_type='item-or-list') # both datasets came from the store and must be set up in an identical # fashion for origds, cloneds in ((ds, riaclone), (subds, riaclonesub)): eq_(origds.id, cloneds.id) if not ds.repo.is_managed_branch(): # test logic cannot handle adjusted branches eq_(origds.repo.get_hexsha(), cloneds.repo.get_hexsha()) ok_(cloneds.config.get('remote.origin.url').startswith(url)) eq_(cloneds.config.get('remote.origin.annex-ignore'), 'true') eq_(cloneds.config.get('datalad.get.subdataset-source-candidate-200origin'), 'ria+%s#{id}' % url) # now advance the source dataset (ds.pathobj / 'newfile.txt').write_text('new') ds.save() ds.publish(to='store') Runner(cwd=storeds_loc).run(['git', 'update-server-info']) # re-clone as before riaclone2 = clone( 'ria+{}#{}'.format(url, ds.id), lcl / 'clone2', ) # and now clone a specific version, here given be the tag name riaclone_orig = clone( 'ria+{}#{}@{}'.format(url, ds.id, 'original'), lcl / 'clone_orig', ) if not ds.repo.is_managed_branch(): # test logic cannot handle adjusted branches # we got the precise version we wanted eq_(riaclone.repo.get_hexsha(), riaclone_orig.repo.get_hexsha()) # and not the latest eq_(riaclone2.repo.get_hexsha(), ds.repo.get_hexsha()) neq_(riaclone2.repo.get_hexsha(), riaclone_orig.repo.get_hexsha()) # attempt to clone a version that doesn't exist with swallow_logs(): with assert_raises(IncompleteResultsError) as cme: clone('ria+{}#{}@impossible'.format(url, ds.id), lcl / 'clone_failed') assert_in("not found in upstream", str(cme.exception)) # lastly test if URL rewriting is in effect # on the surface we clone from an SSH source identified by some custom # label, no full URL, but URL rewriting setup maps it back to the # HTTP URL used above with patch_config({ 'url.ria+{}#.insteadof'.format(url): 'ria+ssh://somelabel#'}): cloned_by_label = clone( 'ria+ssh://somelabel#{}'.format(origds.id), lcl / 'cloned_by_label', ) # so we get the same setup as above, but.... eq_(origds.id, cloned_by_label.id) if not ds.repo.is_managed_branch(): # test logic cannot handle adjusted branches eq_(origds.repo.get_hexsha(), cloned_by_label.repo.get_hexsha()) ok_(cloned_by_label.config.get('remote.origin.url').startswith(url)) eq_(cloned_by_label.config.get('remote.origin.annex-ignore'), 'true') # ... the clone candidates go with the label-based URL such that # future get() requests acknowlege a (system-wide) configuration # update eq_(cloned_by_label.config.get('datalad.get.subdataset-source-candidate-200origin'), 'ria+ssh://somelabel#{id}') if not has_symlink_capability(): return # place a symlink in the store to serve as a dataset alias (storepath / 'alias').mkdir() (storepath / 'alias' / 'myname').symlink_to(storeds_loc) with chpwd(lcl): cloned_by_alias = clone('ria+{}#~{}'.format(url, 'myname')) # still get the same data eq_(cloned_by_alias.id, ds.id) # more sensible default install path eq_(cloned_by_alias.pathobj.name, 'myname')
def get_baseline(p): ds = Dataset(p).create() sub = create(text_type(ds.pathobj / 'sub')) assert_repo_status(ds.path, untracked=['sub']) return ds
def _postclonetest_prepare(lcl, storepath, link): from datalad.customremotes.ria_utils import ( create_store, create_ds_in_store, get_layout_locations ) from datalad.distributed.ora_remote import ( LocalIO, ) create_tree(lcl, tree={ 'ds': { 'test.txt': 'some', 'subdir': { 'subds': {'testsub.txt': 'somemore'}, 'subgit': {'testgit.txt': 'even more'} }, }, }) # create a local dataset with a subdataset lcl = Path(lcl) storepath = Path(storepath) link = Path(link) link.symlink_to(storepath) subds = Dataset(lcl / 'ds' / 'subdir' / 'subds').create(force=True) subds.save() # add a plain git dataset as well subgit = Dataset(lcl / 'ds' / 'subdir' / 'subgit').create(force=True, no_annex=True) subgit.save() ds = Dataset(lcl / 'ds').create(force=True) ds.save(version_tag='original') assert_repo_status(ds.path) io = LocalIO() create_store(io, storepath, '1') # URL to use for upload. Point is, that this should be invalid for the clone # so that autoenable would fail. Therefore let it be based on a to be # deleted symlink upl_url = "ria+{}".format(link.as_uri()) for d in (ds, subds, subgit): # TODO: create-sibling-ria required for config! => adapt to RF'd # creation (missed on rebase?) create_ds_in_store(io, storepath, d.id, '2', '1') d.create_sibling_ria(upl_url, "store") if d is not subgit: # Now, simulate the problem by reconfiguring the special remote to # not be autoenabled. # Note, however, that the actual intention is a URL, that isn't # valid from the point of view of the clone (doesn't resolve, no # credentials, etc.) and therefore autoenabling on git-annex-init # when datalad-cloning would fail to succeed. Runner(cwd=d.path).run(['git', 'annex', 'enableremote', 'store-storage', 'autoenable=false']) d.push('.', to='store') store_loc, _, _ = get_layout_locations(1, storepath, d.id) Runner(cwd=str(store_loc)).run(['git', 'update-server-info']) link.unlink() # We should now have a store with datasets that have an autoenabled ORA # remote relying on an inaccessible URL. # datalad-clone is supposed to reconfigure based on the URL we cloned from. # Test this feature for cloning via HTTP, SSH and FILE URLs. return ds.id
def _test_initremote_basic(host, ds_path, store, link): ds_path = Path(ds_path) store = Path(store) link = Path(link) ds = Dataset(ds_path).create() populate_dataset(ds) ds.save() if host: url = "ria+ssh://{host}{path}".format(host=host, path=store) else: url = "ria+{}".format(store.as_uri()) init_opts = common_init_opts + ['url={}'.format(url)] # fails on non-existing storage location assert_raises(CommandError, ds.repo.init_remote, 'ria-remote', options=init_opts) # Doesn't actually create a remote if it fails assert_not_in( 'ria-remote', [cfg['name'] for uuid, cfg in ds.repo.get_special_remotes().items()]) # fails on non-RIA URL assert_raises(CommandError, ds.repo.init_remote, 'ria-remote', options=common_init_opts + ['url={}'.format(store.as_uri())]) # Doesn't actually create a remote if it fails assert_not_in( 'ria-remote', [cfg['name'] for uuid, cfg in ds.repo.get_special_remotes().items()]) # set up store: io = SSHRemoteIO(host) if host else LocalIO() create_store(io, store, '1') # still fails, since ds isn't setup in the store assert_raises(CommandError, ds.repo.init_remote, 'ria-remote', options=init_opts) # Doesn't actually create a remote if it fails assert_not_in( 'ria-remote', [cfg['name'] for uuid, cfg in ds.repo.get_special_remotes().items()]) # set up the dataset as well create_ds_in_store(io, store, ds.id, '2', '1') # now should work ds.repo.init_remote('ria-remote', options=init_opts) assert_in( 'ria-remote', [cfg['name'] for uuid, cfg in ds.repo.get_special_remotes().items()]) assert_repo_status(ds.path) # git-annex:remote.log should have: # - url # - common_init_opts # - archive_id (which equals ds id) remote_log = ds.repo.call_git(['cat-file', 'blob', 'git-annex:remote.log'], read_only=True) assert_in("url={}".format(url), remote_log) [assert_in(c, remote_log) for c in common_init_opts] assert_in("archive-id={}".format(ds.id), remote_log) # re-configure with invalid URL should fail: assert_raises(CommandError, ds.repo.call_annex, ['enableremote', 'ria-remote'] + common_init_opts + ['url=ria+file:///non-existing']) # but re-configure with valid URL should work if has_symlink_capability(): link.symlink_to(store) new_url = 'ria+{}'.format(link.as_uri()) ds.repo.call_annex(['enableremote', 'ria-remote'] + common_init_opts + ['url={}'.format(new_url)]) # git-annex:remote.log should have: # - url # - common_init_opts # - archive_id (which equals ds id) remote_log = ds.repo.call_git( ['cat-file', 'blob', 'git-annex:remote.log'], read_only=True) assert_in("url={}".format(new_url), remote_log) [assert_in(c, remote_log) for c in common_init_opts] assert_in("archive-id={}".format(ds.id), remote_log) # we can deal with --sameas, which leads to a special remote not having a # 'name' property, but only a 'sameas-name'. See gh-4259 try: ds.repo.init_remote('ora2', options=init_opts + ['--sameas', 'ria-remote']) except CommandError as e: if 'Invalid option `--sameas' in e.stderr: # annex too old - doesn't know --sameas pass else: raise
def test_run_inputs_outputs(src, path): for subds in [("s0", "s1_0", "s2"), ("s0", "s1_1", "s2"), ("s0", "s1_0"), ("s0", "s1_1"), ("s0", "ss"), ("s0", )]: Dataset(op.join(*((src, ) + subds))).create(force=True).save() src_ds = Dataset(src).create(force=True) src_ds.save() ds = install(path, source=src, result_xfm='datasets', return_type='item-or-list') assert_false(ds.repo.file_has_content("input.dat")) assert_false(ds.repo.file_has_content("extra-input.dat")) # The specified inputs and extra inputs will be retrieved before the run. # (Use run_command() to access the extra_inputs argument.) list( run_command("{} {{inputs}} {{inputs}} >doubled.dat".format( 'type' if on_windows else 'cat'), dataset=ds, inputs=["input.dat"], extra_inputs=["extra-input.dat"])) assert_repo_status(ds.path) ok_(ds.repo.file_has_content("input.dat")) ok_(ds.repo.file_has_content("extra-input.dat")) ok_(ds.repo.file_has_content("doubled.dat")) with open(op.join(path, "doubled.dat")) as fh: content = fh.read() assert_in("input", content) assert_not_in("extra-input", content) # Rerunning the commit will also get the input file. ds.repo.drop(["input.dat", "extra-input.dat"], options=["--force"]) assert_false(ds.repo.file_has_content("input.dat")) assert_false(ds.repo.file_has_content("extra-input.dat")) ds.rerun() ok_(ds.repo.file_has_content("input.dat")) ok_(ds.repo.file_has_content("extra-input.dat")) with swallow_logs(new_level=logging.WARN) as cml: ds.run("cd .> dummy", inputs=["not-there"]) assert_in("Input does not exist: ", cml.out) # Test different combinations of globs and explicit files. inputs = ["a.dat", "b.dat", "c.txt", "d.txt"] create_tree(ds.path, {i: i for i in inputs}) ds.save() ds.repo.copy_to(inputs, remote="origin") ds.repo.drop(inputs, options=["--force"]) test_cases = [(["*.dat"], ["a.dat", "b.dat"]), (["*.dat", "c.txt"], ["a.dat", "b.dat", "c.txt"]), (["*"], inputs)] for idx, (inputs_arg, expected_present) in enumerate(test_cases): assert_false(any(ds.repo.file_has_content(i) for i in inputs)) ds.run("cd .> dummy{}".format(idx), inputs=inputs_arg) ok_(all(ds.repo.file_has_content(f) for f in expected_present)) # Globs are stored unexpanded by default. assert_in(inputs_arg[0], last_commit_msg(ds.repo)) ds.repo.drop(inputs, options=["--force"]) # --input can be passed a subdirectory. create_tree(ds.path, {"subdir": {"a": "subdir a", "b": "subdir b"}}) ds.save("subdir") ds.repo.copy_to(["subdir/a", "subdir/b"], remote="origin") ds.repo.drop("subdir", options=["--force"]) ds.run("cd .> subdir-dummy", inputs=[op.join(ds.path, "subdir")]) ok_(all( ds.repo.file_has_content(op.join("subdir", f)) for f in ["a", "b"])) # Inputs are specified relative to a dataset's subdirectory. ds.repo.drop(op.join("subdir", "a"), options=["--force"]) with chpwd(op.join(path, "subdir")): run("cd .> subdir-dummy1", inputs=["a"]) ok_(ds.repo.file_has_content(op.join("subdir", "a"))) # --input=. runs "datalad get ." ds.run("cd .> dot-dummy", inputs=["."]) eq_(ds.repo.get_annexed_files(), ds.repo.get_annexed_files(with_content_only=True)) # On rerun, we get all files, even those that weren't in the tree at the # time of the run. create_tree(ds.path, {"after-dot-run": "after-dot-run content"}) ds.save() ds.repo.copy_to(["after-dot-run"], remote="origin") ds.repo.drop(["after-dot-run"], options=["--force"]) ds.rerun(DEFAULT_BRANCH + "^") ds.repo.file_has_content("after-dot-run") # --output will unlock files that are present. ds.repo.get("a.dat") ds.run("echo ' appended' >>a.dat", outputs=["a.dat"]) with open(op.join(path, "a.dat")) as fh: eq_(fh.read(), "a.dat' appended' \n" if on_windows else "a.dat appended\n") # --output will remove files that are not present. ds.repo.drop(["a.dat", "d.txt"], options=["--force"]) ds.run("echo ' appended' >>a.dat", outputs=["a.dat"]) if not on_windows: # MIH doesn't yet understand how to port this with open(op.join(path, "a.dat")) as fh: eq_(fh.read(), " appended\n") # --input can be combined with --output. ds.run("echo ' appended' >>a.dat", inputs=["a.dat"], outputs=["a.dat"]) if not on_windows: # MIH doesn't yet understand how to port this with open(op.join(path, "a.dat")) as fh: eq_(fh.read(), " appended\n appended\n") if not on_windows: # see datalad#2606 with swallow_logs(new_level=logging.DEBUG) as cml: with swallow_outputs(): ds.run("echo blah", outputs=["not-there"]) assert_in("Filtered out non-existing path: ", cml.out) ds.create('sub') ds.run("echo sub_orig >sub/subfile") ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"]) ds.drop("sub/subfile", check=False) ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"]) # --input/--output globs can be stored in expanded form. ds.run("cd .> expand-dummy", inputs=["a.*"], outputs=["b.*"], expand="both") assert_in("a.dat", last_commit_msg(ds.repo)) assert_in("b.dat", last_commit_msg(ds.repo)) res = ds.rerun(report=True, return_type='item-or-list') eq_(res["run_info"]['inputs'], ["a.dat"]) eq_(res["run_info"]['outputs'], ["b.dat"]) # We install subdatasets to fully resolve globs. ds.uninstall("s0") assert_false(Dataset(op.join(path, "s0")).is_installed()) ds.run("echo {inputs} >globbed-subds", inputs=["s0/s1_*/s2/*.dat"]) ok_file_has_content(op.join(ds.path, "globbed-subds"), "'s0\\s1_0\\s2\\a.dat' 's0\\s1_1\\s2\\c.dat'" if on_windows else "s0/s1_0/s2/a.dat s0/s1_1/s2/c.dat", strip=True) ds_ss = Dataset(op.join(path, "s0", "ss")) assert_false(ds_ss.is_installed()) ds.run("echo blah >{outputs}", outputs=["s0/ss/out"]) ok_(ds_ss.is_installed()) ok_file_has_content(op.join(ds.path, "s0", "ss", "out"), "blah", strip=True)