def test_run_failure(path): ds = Dataset(path).create() hexsha_initial = ds.repo.get_hexsha() with assert_raises(CommandError): ds.run("echo x$(cat grows) > grows && false") eq_(hexsha_initial, ds.repo.get_hexsha()) ok_(ds.repo.dirty) msgfile = opj(ds.repo.repo.git_dir, "COMMIT_EDITMSG") ok_exists(msgfile) ds.add(".", save=False) ds.save(message_file=msgfile) ok_clean_git(ds.path) neq_(hexsha_initial, ds.repo.get_hexsha()) outfile = opj(ds.path, "grows") eq_('x\n', open(outfile).read()) # There is no CommandError on rerun if the non-zero error matches the # original code. ds.rerun() eq_('xx\n', open(outfile).read()) # On the other hand, we fail if we rerun a command and there is a non-zero # error that doesn't match. ds.run("[ ! -e bar ] && echo c >bar") ok_clean_git(ds.path) with assert_raises(CommandError): ds.rerun()
def _check_ri(ri, cls, exact_str=True, localpath=None, **fields): """just a helper to carry out few checks on urls""" with swallow_logs(new_level=logging.DEBUG) as cml: ri_ = cls(**fields) murl = RI(ri) eq_(murl.__class__, cls) # not just a subclass eq_(murl, ri_) eq_(str(RI(ri)), ri) eq_(eval(repr(ri_)), ri) # repr leads back to identical ri_ eq_(ri, ri_) # just in case ;) above should fail first if smth is wrong if not exact_str: assert_in('Parsed version of', cml.out) (eq_ if exact_str else neq_)(ri, str(ri_)) # that we can reconstruct it EXACTLY on our examples # and that we have access to all those fields nok_(set(fields).difference(set(cls._FIELDS))) for f, v in fields.items(): eq_(getattr(ri_, f), v) if localpath: eq_(ri_.localpath, localpath) old_localpath = ri_.localpath # for a test below else: # if not given -- must be a remote url, should raise exception with assert_raises(ValueError): ri_.localpath # do changes in the path persist? old_str = str(ri_) ri_.path = newpath = opj(ri_.path, 'sub') eq_(ri_.path, newpath) neq_(str(ri_), old_str) if localpath: eq_(ri_.localpath, opj(old_localpath, 'sub'))
def test_gh1426(origin_path, target_path): # set up a pair of repos, one the published copy of the other origin = Dataset(origin_path).create() target = mk_push_target(origin, 'target', target_path, annex=True, bare=False) origin.push(to='target') assert_repo_status(origin.path) assert_repo_status(target.path) eq_(origin.repo.get_hexsha('master'), target.get_hexsha('master')) # gist of #1426 is that a newly added subdataset does not cause the # superdataset to get published origin.create('sub') assert_repo_status(origin.path) neq_(origin.repo.get_hexsha('master'), target.get_hexsha('master')) # now push res = origin.push(to='target') assert_result_count(res, 1, status='ok', type='dataset', path=origin.path, action='publish', target='target', operations=['fast-forward']) eq_(origin.repo.get_hexsha('master'), target.get_hexsha('master'))
def test_rerun_octopus(path): ds = Dataset(path).create() ds.run("echo foo >>foo") with open(op.join(ds.path, "non-run"), "w") as nrfh: nrfh.write("non-run") ds.save() ds.repo.checkout(DEFAULT_BRANCH + "~", options=["-b", "topic-1"]) ds.run("echo bar >bar") ds.repo.checkout(DEFAULT_BRANCH + "~", options=["-b", "topic-2"]) ds.run("echo baz >baz") ds.repo.checkout(DEFAULT_BRANCH) ds.repo.call_git(["merge", "-m", "Merge octopus", "topic-1", "topic-2"]) # o-. f_M # |\ \ # | | o e_r # | o | d_r # | |/ # o | c_n # |/ # o b_r # o a_n ds.rerun(since="", onto="") neq_(ds.repo.get_hexsha("HEAD^3"), ds.repo.get_hexsha(DEFAULT_BRANCH + "^3")) eq_(ds.repo.get_hexsha("HEAD~3"), ds.repo.get_hexsha(DEFAULT_BRANCH + "~3")) ds.repo.checkout(DEFAULT_BRANCH) hexsha_before = ds.repo.get_hexsha() ds.rerun(since="") eq_(hexsha_before, ds.repo.get_hexsha(DEFAULT_BRANCH + "~"))
def test_rerun_exclude_side(path): ds = Dataset(path).create() ds.repo.checkout(DEFAULT_BRANCH, options=["-b", "side"]) ds.run("echo foo >foo") ds.repo.checkout(DEFAULT_BRANCH) ds.run("echo bar >bar") ds.repo.merge("side", options=["-m", "Merge side"]) # o d_n # |\ # o | c_r # | o b_r # |/ # o a_n ds.rerun("HEAD", since=DEFAULT_BRANCH + "^2", onto="") # o d_M # |\ # o | c_R # | o b_r # |/ # o a_n neq_(ds.repo.get_hexsha(DEFAULT_BRANCH), ds.repo.get_hexsha()) neq_(ds.repo.get_hexsha(DEFAULT_BRANCH + "^"), ds.repo.get_hexsha("HEAD^")) eq_(ds.repo.get_hexsha(DEFAULT_BRANCH + "^2"), ds.repo.get_hexsha("HEAD^2"))
def test_drop(dspath, records): make_datarecord_zips('12345', records) ds = create(dspath) ds.ukb_init('12345', ['20227_2_0', '25747_2_0', '25748_2_0', '25748_3_0']) ds.config.add('datalad.ukbiobank.keyfile', 'dummy', where='local') bin_dir = make_ukbfetch(ds, records) # baseline with patch.dict( 'os.environ', {'PATH': '{}:{}'.format(str(bin_dir), os.environ['PATH'])}): ds.ukb_update(merge=True, force=True) zips_in_ds = list(ds.pathobj.glob('**/*.zip')) neq_(zips_in_ds, []) # drop archives with patch.dict( 'os.environ', {'PATH': '{}:{}'.format(str(bin_dir), os.environ['PATH'])}): ds.ukb_update(merge=True, force=True, drop='archives') # no ZIPs can be found, also not in the annex eq_(list(ds.pathobj.glob('**/*.zip')), []) # we can get all we want (or rather still have it) assert_status('notneeded', ds.get('.')) # now drop extracted content instead with patch.dict( 'os.environ', {'PATH': '{}:{}'.format(str(bin_dir), os.environ['PATH'])}): ds.ukb_update(merge=True, force=True, drop='extracted') eq_(list(ds.pathobj.glob('**/*.zip')), zips_in_ds) # we can get all assert_status('ok', ds.get('.')) # a non-zip content file is still around eq_((ds.pathobj / '25747_2_0.adv').read_text(), '25747_2_0.adv')
def test_diff_rsync_syntax(path): # three nested datasets ds = Dataset(path).create() subds = ds.create('sub') subsubds = subds.create(Path('subdir', 'deep')) justtop = ds.diff(fr=PRE_INIT_COMMIT_SHA, path='sub') # we only get a single result, the subdataset in question assert_result_count(justtop, 1) assert_result_count(justtop, 1, type='dataset', path=subds.path) # now with "peak inside the dataset" syntax inside = ds.diff(fr=PRE_INIT_COMMIT_SHA, path='sub' + os.sep) # we get both subdatasets, but nothing else inside the nested one assert_result_count(inside, 2, type='dataset') assert_result_count(inside, 1, type='dataset', path=subds.path) assert_result_count(inside, 1, type='dataset', path=subsubds.path) assert_result_count(inside, 0, type='file', parentds=subsubds.path) # if we point to the subdir in 'sub' the reporting wrt the subsubds # doesn't change. It is merely a path constraint within the queried # subds, but because the subsubds is still underneath it, nothing changes inside_subdir = ds.diff(fr=PRE_INIT_COMMIT_SHA, path=op.join('sub', 'subdir')) assert_result_count(inside_subdir, 2, type='dataset') assert_result_count(inside_subdir, 1, type='dataset', path=subds.path) assert_result_count(inside_subdir, 1, type='dataset', path=subsubds.path) assert_result_count(inside_subdir, 0, type='file', parentds=subsubds.path) # but the rest is different (e.g. all the stuff in .datalad is gone) neq_(inside, inside_subdir) # just for completeness, we get more when going full recursive rec = ds.diff(fr=PRE_INIT_COMMIT_SHA, recursive=True, path='sub' + os.sep) assert(len(inside) < len(rec))
def test_rerun_fastforwardable_mutator(path): ds = Dataset(path).create() ds.repo.checkout(DEFAULT_BRANCH, options=["-b", "side"]) ds.run("echo foo >>foo") ds.repo.checkout(DEFAULT_BRANCH) ds.repo.merge("side", options=["-m", "Merge side", "--no-ff"]) # o c_n # |\ # | o b_r # |/ # o a_n ds.rerun(since="", onto=DEFAULT_BRANCH + "^2") # o b_R # o b_r # o a_n neq_(ds.repo.get_hexsha(DEFAULT_BRANCH + "^2"), ds.repo.get_hexsha()) ds.repo.checkout(DEFAULT_BRANCH) hexsha_before = ds.repo.get_hexsha() ds.rerun(since="") # o b_R # o c_n # |\ # | o b_r # |/ # o a_n eq_(ds.repo.get_active_branch(), DEFAULT_BRANCH) assert_false(ds.repo.commit_exists(DEFAULT_BRANCH + "^2")) eq_(hexsha_before, ds.repo.get_hexsha(DEFAULT_BRANCH + "^"))
def test_url_base(): # Basic checks assert_raises(ValueError, URL, "http://example.com", hostname='example.com') url = URL("http://example.com") eq_(url.hostname, 'example.com') eq_(url.scheme, 'http') eq_(url.port, '') # not specified -- empty strings eq_(url.username, '') # not specified -- empty strings eq_(repr(url), "URL(hostname='example.com', scheme='http')") eq_(url, "http://example.com") # automagic coercion in __eq__ neq_(URL(), URL(hostname='x')) smth = URL('smth') eq_(smth.hostname, '') ok_(bool(smth)) nok_(bool(URL())) assert_raises(ValueError, url._set_from_fields, unknown='1') with swallow_logs(new_level=logging.WARNING) as cml: # we don't "care" about params ATM so there is a warning if there are any purl = URL("http://example.com/;param") eq_(str(purl), 'http://example.com/;param') # but we do maintain original string assert_in('ParseResults contains params', cml.out) eq_(purl.as_str(), 'http://example.com/')
def test_rerun_onto(path): ds = Dataset(path).create() grow_file = opj(path, "grows") # Make sure we can handle range-specifications that yield no results. for since in ["", "HEAD"]: assert_result_count( ds.rerun("HEAD", onto="", since=since, on_failure="ignore"), 1, status="impossible", action="run") ds.run('echo static-content > static') ds.repo.tag("static") ds.run('echo x$(cat grows) > grows') ds.rerun() eq_('xx\n', open(grow_file).read()) # If we run the "static" change on top of itself, we end up in the # same (but detached) place. ds.rerun(revision="static", onto="static") ok_(ds.repo.get_active_branch() is None) eq_(ds.repo.get_hexsha(), ds.repo.get_hexsha("static")) # If we run the "static" change from the same "base", we end up # with a new commit. ds.repo.checkout("master") ds.rerun(revision="static", onto="static^") ok_(ds.repo.get_active_branch() is None) neq_(ds.repo.get_hexsha(), ds.repo.get_hexsha("static")) assert_result_count(ds.diff(revision="HEAD..static"), 0) for revrange in ["..static", "static.."]: assert_result_count( ds.repo.repo.git.rev_list(revrange).split(), 1) # Unlike the static change, if we run the ever-growing change on # top of itself, we end up with a new commit. ds.repo.checkout("master") ds.rerun(onto="HEAD") ok_(ds.repo.get_active_branch() is None) neq_(ds.repo.get_hexsha(), ds.repo.get_hexsha("master")) # An empty `onto` means use the parent of the first revision. ds.repo.checkout("master") ds.rerun(since="static^", onto="") ok_(ds.repo.get_active_branch() is None) for revrange in ["..master", "master.."]: assert_result_count( ds.repo.repo.git.rev_list(revrange).split(), 3) # An empty `onto` means use the parent of the first revision that # has a run command. ds.repo.checkout("master") ds.rerun(since="", onto="", branch="from-base") eq_(ds.repo.get_active_branch(), "from-base") assert_result_count(ds.diff(revision="master..from-base"), 0) eq_(ds.repo.get_merge_base(["static", "from-base"]), ds.repo.get_hexsha("static^"))
def test_push_wanted(srcpath, dstpath): src = Dataset(srcpath).create() if src.repo.is_managed_branch(): # on crippled FS post-update hook enabling via create-sibling doesn't # work ATM raise SkipTest("no create-sibling on crippled FS") (src.pathobj / 'data.0').write_text('0') (src.pathobj / 'secure.1').write_text('1') (src.pathobj / 'secure.2').write_text('2') src.save() # Dropping a file to mimic a case of simply not having it locally (thus not # to be "pushed") src.drop('secure.2', check=False) # Annotate sensitive content, actual value "verysecure" does not matter in # this example src.repo.set_metadata(add={'distribution-restrictions': 'verysecure'}, files=['secure.1', 'secure.2']) src.create_sibling( dstpath, annex_wanted="not metadata=distribution-restrictions=*", name='target', ) # check that wanted is obeyed, if instructed by configuration src.config.set('datalad.push.copy-auto-if-wanted', 'true', where='local') res = src.push(to='target') assert_in_results(res, action='copy', path=str(src.pathobj / 'data.0'), status='ok') for p in ('secure.1', 'secure.2'): assert_not_in_results(res, path=str(src.pathobj / p)) assert_status('notneeded', src.push(to='target')) # check that dataset-config cannot overrule this src.config.set('datalad.push.copy-auto-if-wanted', 'false', where='dataset') res = src.push(to='target') assert_status('notneeded', res) # check the target to really make sure dst = Dataset(dstpath) # normal file, yes eq_((dst.pathobj / 'data.0').read_text(), '0') # secure file, no if dst.repo.is_managed_branch(): neq_((dst.pathobj / 'secure.1').read_text(), '1') else: assert_raises(FileNotFoundError, (dst.pathobj / 'secure.1').read_text) # remove local config, must enable push of secure file src.config.unset('datalad.push.copy-auto-if-wanted', where='local') res = src.push(to='target') assert_in_results(res, path=str(src.pathobj / 'secure.1')) eq_((dst.pathobj / 'secure.1').read_text(), '1')
def test_GitRepo_equals(path1, path2): repo1 = GitRepo(path1) repo2 = GitRepo(path1) ok_(repo1 == repo2) eq_(repo1, repo2) repo2 = GitRepo(path2) neq_(repo1, repo2) ok_(repo1 != repo2)
def test_run_explicit(path): ds = Dataset(path) assert_false(ds.repo.file_has_content("test-annex.dat")) create_tree(ds.path, { "dirt_untracked": "untracked", "dirt_modified": "modified" }) ds.save("dirt_modified", to_git=True) with open(op.join(path, "dirt_modified"), "a") as ofh: ofh.write(", more") # We need explicit=True to run with dirty repo. assert_status( "impossible", ds.run("cat test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], on_failure="ignore")) hexsha_initial = ds.repo.get_hexsha() # If we specify test-annex.dat as an input, it will be retrieved before the # run. ds.run("cat test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], explicit=True) ok_(ds.repo.file_has_content("test-annex.dat")) # We didn't commit anything because outputs weren't specified. assert_false(ds.repo.file_has_content("doubled.dat")) eq_(hexsha_initial, ds.repo.get_hexsha()) # If an input doesn't exist, we just show the standard warning. with swallow_logs(new_level=logging.WARN) as cml: with swallow_outputs(): ds.run("ls", inputs=["not-there"], explicit=True) assert_in("Input does not exist: ", cml.out) remove(op.join(path, "doubled.dat")) hexsha_initial = ds.repo.get_hexsha() ds.run("cat test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], outputs=["doubled.dat"], explicit=True) ok_(ds.repo.file_has_content("doubled.dat")) assert_repo_status(ds.path, modified=["dirt_modified"], untracked=['dirt_untracked']) neq_(hexsha_initial, ds.repo.get_hexsha()) # Saving explicit outputs works from subdirectories. subdir = op.join(path, "subdir") mkdir(subdir) with chpwd(subdir): run("echo insubdir >foo", explicit=True, outputs=["foo"]) ok_(ds.repo.file_has_content(op.join("subdir", "foo")))
def test_rerun_mutator_stem_nonrun_merges(path): ds = Dataset(path).create() ds.run("echo foo >>foo") with open(op.join(path, "nonrun-file0"), "w") as f: f.write("blah") ds.save() ds.repo.checkout(DEFAULT_BRANCH + "~", options=["-b", "side"]) with open(op.join(path, "nonrun-file1"), "w") as f: f.write("more blah") ds.save() ds.repo.checkout(DEFAULT_BRANCH) ds.repo.merge("side", options=["-m", "Merge side"]) # o e_n # |\ # | o d_n # o | c_n # |/ # o b_r # o a_n ds.rerun(since="", onto="") # o e_M # |\ # | o d_C # o | c_C # |/ # o b_R # o a_n ok_(ds.repo.commit_exists("HEAD^2")) neq_(ds.repo.get_hexsha(DEFAULT_BRANCH), ds.repo.get_hexsha()) ds.repo.checkout(DEFAULT_BRANCH) ds.rerun(since="", onto=DEFAULT_BRANCH + "^2") # o c_C # o b_R # o d_n # o b_r # o a_n assert_false(ds.repo.commit_exists("HEAD^2")) eq_(ds.repo.get_hexsha("HEAD~2"), ds.repo.get_hexsha(DEFAULT_BRANCH + "^2")) ds.repo.checkout(DEFAULT_BRANCH) hexsha_before = ds.repo.get_hexsha() ds.rerun(since="") # o b_R # o e_n # |\ # | o d_n # o | c_n # |/ # o b_r # o a_n eq_(hexsha_before, ds.repo.get_hexsha(DEFAULT_BRANCH + "^")) assert_false(ds.repo.commit_exists("HEAD^2"))
def test_rerun_nonrun_left_run_right(path): ds = Dataset(path).create() with open(op.join(path, "nonrun-file"), "w") as f: f.write("blah") ds.save() ds.repo.checkout(DEFAULT_BRANCH + "~", options=["-b", "side"]) ds.run("echo foo >foo") ds.repo.checkout(DEFAULT_BRANCH) ds.repo.merge("side", options=["-m", "Merge side"]) # o d_n # |\ # | o c_r # o | b_n # |/ # o a_n ds.rerun(since="", onto="") # o d_M # |\ # | o c_R # o | b_n # |/ # o a_n eq_(ds.repo.get_hexsha(DEFAULT_BRANCH + "^"), ds.repo.get_hexsha("HEAD^")) neq_(ds.repo.get_hexsha(DEFAULT_BRANCH + "^2"), ds.repo.get_hexsha("HEAD^2")) ds.repo.checkout(DEFAULT_BRANCH) ds.rerun(since="", onto=DEFAULT_BRANCH + "^2") # o d_n # |\ # | o c_r # o | b_n # |/ # o a_n ok_(ds.repo.get_active_branch() is None) eq_(ds.repo.get_hexsha(DEFAULT_BRANCH), ds.repo.get_hexsha()) eq_(ds.repo.get_hexsha(DEFAULT_BRANCH + "^"), ds.repo.get_hexsha("HEAD^")) eq_(ds.repo.get_hexsha(DEFAULT_BRANCH + "^2"), ds.repo.get_hexsha("HEAD^2")) ds.repo.checkout(DEFAULT_BRANCH) hexsha_before = ds.repo.get_hexsha() ds.rerun(since="") # o d_n # |\ # | o c_r # o | b_n # |/ # o a_n eq_(hexsha_before, ds.repo.get_hexsha())
def test_run_explicit(path): ds = Dataset(path) assert_false(ds.repo.file_has_content("test-annex.dat")) create_tree(ds.path, {"dirt_untracked": "untracked", "dirt_modified": "modified"}) ds.save("dirt_modified", to_git=True) with open(op.join(path, "dirt_modified"), "a") as ofh: ofh.write(", more") # We need explicit=True to run with dirty repo. assert_status("impossible", ds.run("cat test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], on_failure="ignore")) hexsha_initial = ds.repo.get_hexsha() # If we specify test-annex.dat as an input, it will be retrieved before the # run. ds.run("cat test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], explicit=True) ok_(ds.repo.file_has_content("test-annex.dat")) # We didn't commit anything because outputs weren't specified. assert_false(ds.repo.file_has_content("doubled.dat")) eq_(hexsha_initial, ds.repo.get_hexsha()) # If an input doesn't exist, we just show the standard warning. with swallow_logs(new_level=logging.WARN) as cml: with swallow_outputs(): ds.run("ls", inputs=["not-there"], explicit=True) assert_in("Input does not exist: ", cml.out) remove(op.join(path, "doubled.dat")) hexsha_initial = ds.repo.get_hexsha() ds.run("cat test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], outputs=["doubled.dat"], explicit=True) ok_(ds.repo.file_has_content("doubled.dat")) assert_repo_status(ds.path, modified=["dirt_modified"], untracked=['dirt_untracked']) neq_(hexsha_initial, ds.repo.get_hexsha()) # Saving explicit outputs works from subdirectories. subdir = op.join(path, "subdir") mkdir(subdir) with chpwd(subdir): run("echo insubdir >foo", explicit=True, outputs=["foo"]) ok_(ds.repo.file_has_content(op.join("subdir", "foo")))
def test_run_failure(path): ds = Dataset(path).create() subds = ds.create("sub") hexsha_initial = ds.repo.get_hexsha() with assert_raises(CommandError): with swallow_outputs(): if on_windows: # this does not do exactly the same as the cmd on other systems # but is close enough to make running the test worthwhile ds.run("echo x>{} & false".format(op.join("sub", "grows"))) else: ds.run("echo x$(cat {0}) > {0} && false".format( op.join("sub", "grows"))) eq_(hexsha_initial, ds.repo.get_hexsha()) ok_(ds.repo.dirty) msgfile = op.join(path, ds.repo.get_git_dir(ds.repo), "COMMIT_EDITMSG") ok_exists(msgfile) ds.save(recursive=True, message_file=msgfile) assert_repo_status(ds.path) neq_(hexsha_initial, ds.repo.get_hexsha()) outfile = op.join(subds.path, "grows") eq_('x \n' if on_windows else 'x\n', open(outfile).read()) if on_windows: # FIXME: Make the remaining code compatible with Windows. return # There is no CommandError on rerun if the non-zero error matches the # original code. ds.rerun() eq_('xx\n', open(outfile).read()) # On the other hand, we fail if we rerun a command and there is a non-zero # error that doesn't match. ds.run("[ ! -e bar ] && echo c >bar") assert_repo_status(ds.path) with assert_raises(CommandError): ds.rerun() # We don't show instructions if the caller specified us not to save. remove(msgfile) with assert_raises(CommandError): ds.run("false", explicit=True, outputs=None) assert_false(op.exists(msgfile))
def test_url_eq(): eq_(URL(), URL()) # doesn't make sense to ask what kind of a url it is an empty URL #eq_(RI(), RI()) neq_(URL(), URL(hostname='x')) # Different types aren't equal even if have the same fields values neq_(URL(path='x'), PathRI(path='x')) neq_(URL(hostname='x'), SSHRI(hostname='x')) neq_(str(URL(hostname='x')), str(SSHRI(hostname='x')))
def test_run_explicit(path): ds = Dataset(path) assert_false(ds.repo.file_has_content("test-annex.dat")) create_tree(ds.path, { "dirt_untracked": "untracked", "dirt_modified": "modified" }) ds.add("dirt_modified", to_git=True) with open(opj(path, "dirt_modified"), "a") as ofh: ofh.write(", more") # We need explicit=True to run with dirty repo. assert_status( "impossible", ds.run("cat test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], on_failure="ignore")) hexsha_initial = ds.repo.get_hexsha() # If we specify test-annex.dat as an input, it will be retrieved before the # run. ds.run("cat test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], explicit=True) ok_(ds.repo.file_has_content("test-annex.dat")) # We didn't commit anything because outputs weren't specified. assert_false(ds.repo.file_has_content("doubled.dat")) eq_(hexsha_initial, ds.repo.get_hexsha()) # If an input doesn't exist, we just show the standard warning. with swallow_logs(new_level=logging.WARN) as cml: ds.run("ls", inputs=["not-there"], explicit=True) assert_in("Input does not exist: ", cml.out) remove(opj(path, "doubled.dat")) hexsha_initial = ds.repo.get_hexsha() ds.run("cat test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], outputs=["doubled.dat"], explicit=True) ok_(ds.repo.file_has_content("doubled.dat")) ok_(ds.repo.is_dirty(path="dirt_modified")) neq_(hexsha_initial, ds.repo.get_hexsha())
def test_rerun_unrelated_nonrun_left_run_right(path): ds = Dataset(path).create() ds.repo.checkout(DEFAULT_BRANCH, options=["--orphan", "side"]) ds.save(message="squashed") ds.run("echo foo >foo") ds.repo.checkout(DEFAULT_BRANCH) ds.repo.merge("side", options=["-m", "Merge side", "--allow-unrelated-histories"]) # o d_n # |\ # | o c_r # | o b_n # o a_n ds.rerun(since="", onto="") # o d_M # |\ # | o c_R # | o b_n # o a_n ok_(ds.repo.commit_exists("HEAD^2")) neq_(ds.repo.get_hexsha(DEFAULT_BRANCH), ds.repo.get_hexsha()) eq_(ds.repo.get_hexsha(DEFAULT_BRANCH + "^"), ds.repo.get_hexsha("HEAD^")) eq_(ds.repo.get_hexsha(DEFAULT_BRANCH + "^2^"), ds.repo.get_hexsha("HEAD^2^")) assert_false(ds.repo.commit_exists("HEAD^2^^")) ds.repo.checkout(DEFAULT_BRANCH) ds.rerun(since="", onto=DEFAULT_BRANCH + "^2") # o d_n # |\ # | o c_r # | o b_n # o a_n eq_(ds.repo.get_hexsha(DEFAULT_BRANCH), ds.repo.get_hexsha()) assert_false(ds.repo.commit_exists("HEAD^2^^")) ds.repo.checkout(DEFAULT_BRANCH) hexsha_before = ds.repo.get_hexsha() ds.rerun(since="") # o d_n # |\ # | o c_r # | o b_n # o a_n eq_(hexsha_before, ds.repo.get_hexsha())
def test_push_wanted(srcpath, dstpath): src = Dataset(srcpath).create() (src.pathobj / 'data.0').write_text('0') (src.pathobj / 'secure.1').write_text('1') (src.pathobj / 'secure.2').write_text('2') src.save() # Dropping a file to mimic a case of simply not having it locally (thus not # to be "pushed") src.drop('secure.2', check=False) # Annotate sensitive content, actual value "verysecure" does not matter in # this example src.repo.set_metadata(add={'distribution-restrictions': 'verysecure'}, files=['secure.1', 'secure.2']) src.create_sibling( dstpath, annex_wanted="not metadata=distribution-restrictions=*", name='target', ) # check that wanted is obeyed, since set in sibling configuration res = src.push(to='target') assert_in_results(res, action='copy', path=str(src.pathobj / 'data.0'), status='ok') for p in ('secure.1', 'secure.2'): assert_not_in_results(res, path=str(src.pathobj / p)) assert_status('notneeded', src.push(to='target')) # check the target to really make sure dst = Dataset(dstpath) # normal file, yes eq_((dst.pathobj / 'data.0').read_text(), '0') # secure file, no if dst.repo.is_managed_branch(): neq_((dst.pathobj / 'secure.1').read_text(), '1') else: assert_raises(FileNotFoundError, (dst.pathobj / 'secure.1').read_text) # reset wanted config, which must enable push of secure file src.repo.set_preferred_content('wanted', '', remote='target') res = src.push(to='target') assert_in_results(res, path=str(src.pathobj / 'secure.1')) eq_((dst.pathobj / 'secure.1').read_text(), '1')
def test_get_commit_date(path): gr = GitRepo(path, create=True) eq_(gr.get_commit_date(), None) # Let's make a commit with a custom date DATE = "Wed Mar 14 03:47:30 2018 -0000" DATE_EPOCH = 1520999250 gr.add('1') gr.commit("committed", date=DATE) gr = GitRepo(path, create=True) date = gr.get_commit_date() neq_(date, None) eq_(date, DATE_EPOCH) eq_(date, gr.get_commit_date('master')) # and even if we get into a detached head gr.checkout(gr.get_hexsha()) eq_(gr.get_active_branch(), None) eq_(date, gr.get_commit_date('master'))
def test_run_failure(path): ds = Dataset(path).create() subds = ds.create("sub") hexsha_initial = ds.repo.get_hexsha() with swallow_outputs(): with assert_raises(CommandError): ds.run("echo x$(cat sub/grows) > sub/grows && false") eq_(hexsha_initial, ds.repo.get_hexsha()) ok_(ds.repo.dirty) msgfile = opj(path, ds.repo.get_git_dir(ds.repo), "COMMIT_EDITMSG") ok_exists(msgfile) ds.add(".", recursive=True, message_file=msgfile) ok_clean_git(ds.path) neq_(hexsha_initial, ds.repo.get_hexsha()) outfile = opj(subds.path, "grows") eq_('x\n', open(outfile).read()) # There is no CommandError on rerun if the non-zero error matches the # original code. ds.rerun() eq_('xx\n', open(outfile).read()) # On the other hand, we fail if we rerun a command and there is a non-zero # error that doesn't match. ds.run("[ ! -e bar ] && echo c >bar") ok_clean_git(ds.path) with assert_raises(CommandError): ds.rerun() # We don't show instructions if the caller specified us not to save. remove(msgfile) with assert_raises(CommandError): ds.run("false", explicit=True, outputs=None) assert_false(op.exists(msgfile))
def test_run_failure(path): ds = Dataset(path).create() subds = ds.create("sub") hexsha_initial = ds.repo.get_hexsha() with assert_raises(CommandError): ds.run("echo x$(cat sub/grows) > sub/grows && false") eq_(hexsha_initial, ds.repo.get_hexsha()) ok_(ds.repo.dirty) msgfile = opj(path, ds.repo.get_git_dir(ds.repo), "COMMIT_EDITMSG") ok_exists(msgfile) ds.add(".", recursive=True, message_file=msgfile) ok_clean_git(ds.path) neq_(hexsha_initial, ds.repo.get_hexsha()) outfile = opj(subds.path, "grows") eq_('x\n', open(outfile).read()) # There is no CommandError on rerun if the non-zero error matches the # original code. ds.rerun() eq_('xx\n', open(outfile).read()) # On the other hand, we fail if we rerun a command and there is a non-zero # error that doesn't match. ds.run("[ ! -e bar ] && echo c >bar") ok_clean_git(ds.path) with assert_raises(CommandError): ds.rerun() # We don't show instructions if the caller specified us not to save. remove(msgfile) with assert_raises(CommandError): ds.run("false", explicit=True, outputs=None) assert_false(op.exists(msgfile))
def check_update_how_subds_different(follow, action, path): path = Path(path) ds_src = Dataset(path / "source").create() ds_src_sub = ds_src.create("sub") ds_src.save() ds_clone = install(source=ds_src.path, path=path / "clone", recursive=True, result_xfm="datasets") (ds_clone.pathobj / "foo").write_text("foo") ds_clone.save() ds_clone_sub = Dataset(ds_clone.pathobj / "sub") (ds_src_sub.pathobj / "bar").write_text("bar") ds_src.save(recursive=True) # Add unrecorded state to make --follow=sibling/parentds differ. (ds_src_sub.pathobj / "baz").write_text("baz") ds_src_sub.save() ds_clone_repo = ds_clone.repo ds_clone_hexsha_pre = ds_clone_repo.get_hexsha() ds_clone_sub_repo = ds_clone_sub.repo ds_clone_sub_branch_pre = ds_clone_sub_repo.get_active_branch() res = ds_clone.update(follow=follow, how="merge", how_subds=action, recursive=True) assert_result_count(res, 1, action="merge", status="ok", path=ds_clone.path) assert_result_count(res, 1, action=f"update.{action}", status="ok", path=ds_clone_sub.path) ds_clone_hexsha_post = ds_clone_repo.get_hexsha() neq_(ds_clone_hexsha_pre, ds_clone_hexsha_post) neq_(ds_src.repo.get_hexsha(), ds_clone_hexsha_post) ok_(ds_clone_repo.is_ancestor(ds_clone_hexsha_pre, ds_clone_hexsha_post)) eq_(ds_clone_sub.repo.get_hexsha(), ds_src_sub.repo.get_hexsha(None if follow == "sibling" else "HEAD~")) ds_clone_sub_branch_post = ds_clone_sub_repo.get_active_branch() if action == "checkout": neq_(ds_clone_sub_branch_pre, ds_clone_sub_branch_post) assert_false(ds_clone_sub_branch_post) else: eq_(ds_clone_sub_branch_pre, ds_clone_sub_branch_post)
def test_rerun_left_right_runs(path): ds = Dataset(path).create() ds.repo.checkout(DEFAULT_BRANCH, options=["-b", "side"]) ds.run("echo foo >foo") ds.repo.checkout(DEFAULT_BRANCH) ds.run("echo bar >bar") ds.repo.merge("side", options=["-m", "Merge side"]) # o d_n # |\ # o | c_r # | o b_r # |/ # o a_n ds.rerun(since="", onto="") # o d_M # |\ # o | c_R # | o b_R # |/ # o a_n neq_(ds.repo.get_hexsha(DEFAULT_BRANCH + "^"), ds.repo.get_hexsha("HEAD^")) neq_(ds.repo.get_hexsha(DEFAULT_BRANCH + "^2"), ds.repo.get_hexsha("HEAD^2")) ds.repo.checkout(DEFAULT_BRANCH) ds.rerun(since="", onto=DEFAULT_BRANCH + "^") # o d_M # |\ # | o b_R # |/ # o c_r # o a_n eq_(ds.repo.get_hexsha(DEFAULT_BRANCH + "^"), ds.repo.get_hexsha("HEAD^")) neq_(ds.repo.get_hexsha(DEFAULT_BRANCH + "^2"), ds.repo.get_hexsha("HEAD^2")) ds.repo.checkout(DEFAULT_BRANCH) hexsha_before = ds.repo.get_hexsha() ds.rerun(since="") # o d_n # |\ # o | c_r # | o b_r # |/ # o a_n eq_(hexsha_before, ds.repo.get_hexsha())
def test_rerun_onto(path): ds = Dataset(path).create() grow_file = opj(path, "grows") # Make sure we can handle range-specifications that yield no results. for since in ["", "HEAD"]: assert_result_count( ds.rerun("HEAD", onto="", since=since, on_failure="ignore"), 1, status="impossible", action="run") ds.run('echo static-content > static') ds.repo.tag("static") ds.run('echo x$(cat grows) > grows') ds.rerun() eq_('xx\n', open(grow_file).read()) # If we run the "static" change on top of itself, we end up in the # same (but detached) place. ds.rerun(revision="static", onto="static") ok_(ds.repo.get_active_branch() is None) eq_(ds.repo.get_hexsha(), ds.repo.get_hexsha("static")) # If we run the "static" change from the same "base", we end up # with a new commit. ds.repo.checkout("master") ds.rerun(revision="static", onto="static^") ok_(ds.repo.get_active_branch() is None) neq_(ds.repo.get_hexsha(), ds.repo.get_hexsha("static")) assert_result_count(ds.diff(revision="HEAD..static"), 0) for revrange in ["..static", "static.."]: assert_result_count( ds.repo.repo.git.rev_list(revrange).split(), 1) # Unlike the static change, if we run the ever-growing change on # top of itself, we end up with a new commit. ds.repo.checkout("master") ds.rerun(onto="HEAD") ok_(ds.repo.get_active_branch() is None) neq_(ds.repo.get_hexsha(), ds.repo.get_hexsha("master")) # An empty `onto` means use the parent of the first revision. ds.repo.checkout("master") ds.rerun(since="static^", onto="") ok_(ds.repo.get_active_branch() is None) for revrange in ["..master", "master.."]: assert_result_count( ds.repo.repo.git.rev_list(revrange).split(), 3) # An empty `onto` means use the parent of the first revision that # has a run command. ds.repo.checkout("master") ds.rerun(since="", onto="", branch="from-base") eq_(ds.repo.get_active_branch(), "from-base") assert_result_count(ds.diff(revision="master..from-base"), 0) eq_(ds.repo.get_merge_base(["static", "from-base"]), ds.repo.get_hexsha("static^")) # We abort when an explicitly specified `onto` doesn't exist. ds.repo.checkout("master") assert_result_count( ds.rerun(since="", onto="doesnotexist", branch="from-base", on_failure="ignore"), 1, status="error", action="run")
def test_rerun_multifork(path): ds = Dataset(path).create() ds.repo.checkout(DEFAULT_BRANCH, options=["-b", "side"]) ds.run("echo foo >foo") ds.repo.checkout("side", options=["-b", "side-nonrun"]) with open(op.join(path, "nonrun-file0"), "w") as f: f.write("blah 0") ds.save() ds.repo.checkout("side") with open(op.join(path, "nonrun-file1"), "w") as f: f.write("blah 1") ds.save() ds.run("echo bar >bar") ds.repo.checkout("side~1", options=["-b", "side-side"]) with open(op.join(path, "nonrun-file2"), "w") as f: f.write("blah 2") ds.save() ds.run("echo onside0 >onside0") ds.repo.checkout("side") ds.repo.merge("side-side") ds.run("echo after-side-side >after-side-side") ds.repo.checkout(DEFAULT_BRANCH) ds.repo.merge("side", options=["--no-ff"]) ds.repo.merge("side-nonrun") # o k_n # |\ # | o j_n # o | i_n # |\ \ # | o | h_r # | o | g_n # | |\ \ # | | o | f_r # | | o | e_n # | o | | d_r # | |/ / # | o | c_n # | |/ # | o b_r # |/ # o a_n ds.rerun(since="", onto="") # o k_M # |\ # | o j_n # o | i_M # |\ \ # | o | h_R # | o | g_M # | |\ \ # | | o | f_R # | | o | e_n # | o | | d_R # | |/ / # | o | c_n # | |/ # | o b_R # |/ # o a_n eq_(ds.repo.get_hexsha("HEAD~2"), ds.repo.get_hexsha(DEFAULT_BRANCH + "~2")) neq_(ds.repo.get_hexsha("HEAD^2"), ds.repo.get_hexsha(DEFAULT_BRANCH + "^2")) neq_(ds.repo.get_hexsha("HEAD^^2"), ds.repo.get_hexsha(DEFAULT_BRANCH + "^^2")) assert_false(ds.repo.commit_exists("HEAD^^2^2")) eq_(ds.repo.get_hexsha("HEAD^2^^"), ds.repo.get_hexsha(DEFAULT_BRANCH + "^2^^")) ds.repo.checkout(DEFAULT_BRANCH) hexsha_before = ds.repo.get_hexsha() ds.rerun(since="") eq_(hexsha_before, ds.repo.get_hexsha())
def test_rerun_onto(path): ds = Dataset(path).create() # Make sure we have more than one commit. The one commit case is checked # elsewhere. ds.repo.commit(msg="noop commit", options=["--allow-empty"]) grow_file = op.join(path, "grows") # Make sure we can handle range-specifications that yield no results. for since in ["", "HEAD"]: assert_result_count(ds.rerun("HEAD", onto="", since=since, on_failure="ignore"), 1, status="impossible", action="run") ds.run('echo static-content > static') ds.repo.tag("static") with swallow_outputs(): ds.run('echo x$(cat grows) > grows') ds.rerun() eq_('xx\n', open(grow_file).read()) # If we run the "static" change on top of itself, we end up in the # same (but detached) place. ds.rerun(revision="static", onto="static") ok_(ds.repo.get_active_branch() is None) eq_(ds.repo.get_hexsha(), ds.repo.get_hexsha("static")) # If we run the "static" change from the same "base", we end up # with a new commit. ds.repo.checkout("master") with swallow_outputs(): ds.rerun(revision="static", onto="static^") ok_(ds.repo.get_active_branch() is None) neq_(ds.repo.get_hexsha(), ds.repo.get_hexsha("static")) ok_(all(r["state"] == "clean" for r in ds.diff(fr="HEAD", to="static"))) for revrange in ["..static", "static.."]: eq_(len(ds.repo.get_revisions(revrange)), 1) # Unlike the static change, if we run the ever-growing change on # top of itself, we end up with a new commit. ds.repo.checkout("master") ds.rerun(onto="HEAD") ok_(ds.repo.get_active_branch() is None) neq_(ds.repo.get_hexsha(), ds.repo.get_hexsha("master")) # An empty `onto` means use the parent of the first revision. ds.repo.checkout("master") with swallow_outputs(): ds.rerun(since="static^", onto="") ok_(ds.repo.get_active_branch() is None) for revrange in ["..master", "master.."]: eq_(len(ds.repo.get_revisions(revrange)), 3) # An empty `onto` means use the parent of the first revision that # has a run command. ds.repo.checkout("master") with swallow_outputs(): ds.rerun(since="", onto="", branch="from-base") eq_(ds.repo.get_active_branch(), "from-base") ok_( all(r["state"] == "clean" for r in ds.diff(fr="master", to="from-base"))) eq_(ds.repo.get_merge_base(["static", "from-base"]), ds.repo.get_hexsha("static^")) # We abort when an explicitly specified `onto` doesn't exist. ds.repo.checkout("master") assert_result_count(ds.rerun(since="", onto="doesnotexist", branch="from-base", on_failure="ignore"), 1, status="error", action="run")
def test_ria_http(lcl, storepath, url): # create a local dataset with a subdataset lcl = Path(lcl) storepath = Path(storepath) subds = Dataset(lcl / 'ds' / 'subdir' / 'subds').create(force=True) subds.save() ds = Dataset(lcl / 'ds').create(force=True) ds.save(version_tag='original') assert_repo_status(ds.path) for d in (ds, subds): _move2store(storepath, d) # location of superds in store storeds_loc = str(storepath / ds.id[:3] / ds.id[3:]) # now we should be able to clone from a ria+http url # the super riaclone = clone( 'ria+{}#{}'.format(url, ds.id), lcl / 'clone', ) # due to default configuration, clone() should automatically look for the # subdataset in the store, too -- if not the following would fail, because # we never configured a proper submodule URL riaclonesub = riaclone.get( op.join('subdir', 'subds'), get_data=False, result_xfm='datasets', return_type='item-or-list') # both datasets came from the store and must be set up in an identical # fashion for origds, cloneds in ((ds, riaclone), (subds, riaclonesub)): eq_(origds.id, cloneds.id) if not ds.repo.is_managed_branch(): # test logic cannot handle adjusted branches eq_(origds.repo.get_hexsha(), cloneds.repo.get_hexsha()) ok_(cloneds.config.get('remote.origin.url').startswith(url)) eq_(cloneds.config.get('remote.origin.annex-ignore'), 'true') eq_(cloneds.config.get('datalad.get.subdataset-source-candidate-200origin'), 'ria+%s#{id}' % url) # now advance the source dataset (ds.pathobj / 'newfile.txt').write_text('new') ds.save() ds.publish(to='store') Runner(cwd=storeds_loc).run(['git', 'update-server-info']) # re-clone as before riaclone2 = clone( 'ria+{}#{}'.format(url, ds.id), lcl / 'clone2', ) # and now clone a specific version, here given be the tag name riaclone_orig = clone( 'ria+{}#{}@{}'.format(url, ds.id, 'original'), lcl / 'clone_orig', ) if not ds.repo.is_managed_branch(): # test logic cannot handle adjusted branches # we got the precise version we wanted eq_(riaclone.repo.get_hexsha(), riaclone_orig.repo.get_hexsha()) # and not the latest eq_(riaclone2.repo.get_hexsha(), ds.repo.get_hexsha()) neq_(riaclone2.repo.get_hexsha(), riaclone_orig.repo.get_hexsha()) # attempt to clone a version that doesn't exist with swallow_logs(): with assert_raises(IncompleteResultsError) as cme: clone('ria+{}#{}@impossible'.format(url, ds.id), lcl / 'clone_failed') assert_in("not found in upstream", str(cme.exception)) # lastly test if URL rewriting is in effect # on the surface we clone from an SSH source identified by some custom # label, no full URL, but URL rewriting setup maps it back to the # HTTP URL used above with patch_config({ 'url.ria+{}#.insteadof'.format(url): 'ria+ssh://somelabel#'}): cloned_by_label = clone( 'ria+ssh://somelabel#{}'.format(origds.id), lcl / 'cloned_by_label', ) # so we get the same setup as above, but.... eq_(origds.id, cloned_by_label.id) if not ds.repo.is_managed_branch(): # test logic cannot handle adjusted branches eq_(origds.repo.get_hexsha(), cloned_by_label.repo.get_hexsha()) ok_(cloned_by_label.config.get('remote.origin.url').startswith(url)) eq_(cloned_by_label.config.get('remote.origin.annex-ignore'), 'true') # ... the clone candidates go with the label-based URL such that # future get() requests acknowlege a (system-wide) configuration # update eq_(cloned_by_label.config.get('datalad.get.subdataset-source-candidate-200origin'), 'ria+ssh://somelabel#{id}') if not has_symlink_capability(): return # place a symlink in the store to serve as a dataset alias (storepath / 'alias').mkdir() (storepath / 'alias' / 'myname').symlink_to(storeds_loc) with chpwd(lcl): cloned_by_alias = clone('ria+{}#~{}'.format(url, 'myname')) # still get the same data eq_(cloned_by_alias.id, ds.id) # more sensible default install path eq_(cloned_by_alias.pathobj.name, 'myname')
def test_publish_recursive(pristine_origin, origin_path, src_path, dst_path, sub1_pub, sub2_pub): # we will be publishing back to origin, so to not alter testrepo # we will first clone it origin = install(origin_path, source=pristine_origin, recursive=True) # prepare src source = install(src_path, source=origin_path, recursive=True) # create plain git at target: target = GitRepo(dst_path, create=True) target.checkout("TMP", ["-b"]) source.repo.add_remote("target", dst_path) # subdatasets have no remote yet, so recursive publishing should fail: with assert_raises(ValueError) as cm: publish(dataset=source, to="target", recursive=True) assert_in("Unknown target sibling 'target'", exc_str(cm.exception)) # now, set up targets for the submodules: sub1_target = GitRepo(sub1_pub, create=True) sub1_target.checkout("TMP", ["-b"]) sub2_target = AnnexRepo(sub2_pub, create=True) # we will be testing presence of the file content, so let's make it progress sub2_target.config.set('receive.denyCurrentBranch', 'updateInstead', where='local') sub1 = GitRepo(opj(src_path, 'subm 1'), create=False) sub2 = GitRepo(opj(src_path, 'subm 2'), create=False) sub1.add_remote("target", sub1_pub) sub2.add_remote("target", sub2_pub) # publish recursively with swallow_logs(new_level=logging.DEBUG) as cml: res = publish(dataset=source, to="target", recursive=True) assert_not_in( 'forced update', cml.out, "we probably haven't merged git-annex before pushing" ) # testing result list # (Note: Dataset lacks __eq__ for now. Should this be based on path only?) assert_is_instance(res, tuple) assert_is_instance(res[0], list) assert_is_instance(res[1], list) eq_(res[1], []) # nothing failed/was skipped for item in res[0]: assert_is_instance(item, Dataset) eq_({res[0][0].path, res[0][1].path, res[0][2].path}, {src_path, sub1.path, sub2.path}) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) eq_(list(target.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex"))) eq_(list(sub1_target.get_branch_commits("master")), list(sub1.get_branch_commits("master"))) eq_(list(sub1_target.get_branch_commits("git-annex")), list(sub1.get_branch_commits("git-annex"))) eq_(list(sub2_target.get_branch_commits("master")), list(sub2.get_branch_commits("master"))) eq_(list(sub2_target.get_branch_commits("git-annex")), list(sub2.get_branch_commits("git-annex"))) # we are tracking origin but origin has different git-annex, since we # cloned from it, so it is not aware of our git-annex neq_(list(origin.repo.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex"))) # So if we first publish to it recursively, we would update # all sub-datasets since git-annex branch would need to be pushed res_ = publish(dataset=source, recursive=True) eq_(set(r.path for r in res_[0]), set(opj(*([source.path] + x)) for x in ([], ['subm 1'], ['subm 2']))) # and now should carry the same state for git-annex eq_(list(origin.repo.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex"))) # test for publishing with --since. By default since no changes, nothing pushed res_ = publish(dataset=source, recursive=True) eq_(set(r.path for r in res_[0]), set()) # still nothing gets pushed, because origin is up to date res_ = publish(dataset=source, recursive=True, since='HEAD^') eq_(set(r.path for r in res_[0]), set([])) # and we should not fail if we run it from within the dataset with chpwd(source.path): res_ = publish(recursive=True, since='HEAD^') eq_(set(r.path for r in res_[0]), set([])) # Let's now update one subm with open(opj(sub2.path, "file.txt"), 'w') as f: f.write('') # add to subdataset, does not alter super dataset! # MIH: use `to_git` because original test author used # and explicit `GitRepo.add` -- keeping this for now Dataset(sub2.path).add('file.txt', to_git=True) # Let's now update one subm create_tree(sub2.path, {'file.dat': 'content'}) # add to subdataset, without reflecting the change in its super(s) Dataset(sub2.path).add('file.dat') # note: will publish to origin here since that is what it tracks res_published, res_skipped = publish(dataset=source, recursive=True) # only updates published, i.e. just the subdataset, super wasn't altered # nothing copied! eq_(res_published, [Dataset(sub2.path)]) eq_(res_skipped, []) # since published to origin -- destination should not get that file nok_(lexists(opj(sub2_target.path, 'file.dat'))) res_published, res_skipped = publish(dataset=source, to='target', recursive=True) eq_(res_published, [Dataset(sub2.path)]) # Note: with updateInstead only in target2 and not saving change in # super-dataset we would have made remote dataset, if we had entire # hierarchy, to be somewhat inconsistent. # But here, since target datasets are independent -- it is ok # and the file itself was not transferred but now exists ok_(lexists(opj(sub2_target.path, 'file.dat'))) nok_(sub2_target.file_has_content('file.dat')) # but now we can redo publish recursively, at least stating to consider # explicitly to copy . res_published, res_skipped = publish( '.', dataset=source, to='target', recursive=True ) ok_(sub2_target.file_has_content('file.dat')) eq_(res_published, ['file.dat']) # note that this report makes little sense without path to the repository
def test_publish_recursive(pristine_origin, origin_path, src_path, dst_path, sub1_pub, sub2_pub): # we will be publishing back to origin, so to not alter testrepo # we will first clone it origin = install(origin_path, source=pristine_origin, recursive=True) # prepare src source = install(src_path, source=origin.path, recursive=True) # we will be trying to push into this later on, need to give permissions... origin_sub2 = Dataset(opj(origin_path, '2')) origin_sub2.config.set('receive.denyCurrentBranch', 'updateInstead', where='local') ## TODO this manual fixup is needed due to gh-1548 -- needs proper solution #os.remove(opj(origin_sub2.path, '.git')) #os.rename(opj(origin_path, '.git', 'modules', '2'), opj(origin_sub2.path, '.git')) # create plain git at target: target = GitRepo(dst_path, create=True) target.checkout("TMP", ["-b"]) source.repo.add_remote("target", dst_path) # subdatasets have no remote yet, so recursive publishing should fail: res = publish(dataset=source, to="target", recursive=True, on_failure='ignore') assert_result_count(res, 3) assert_result_count(res, 1, status='ok', type='dataset', path=source.path) assert_result_count(res, 2, status='error', message=("Unknown target sibling '%s' for publication", 'target')) # now, set up targets for the submodules: sub1_target = GitRepo(sub1_pub, create=True) sub1_target.checkout("TMP", ["-b"]) sub2_target = AnnexRepo(sub2_pub, create=True) # we will be testing presence of the file content, so let's make it progress sub2_target.config.set('receive.denyCurrentBranch', 'updateInstead', where='local') sub1 = GitRepo(opj(src_path, 'subm 1'), create=False) sub2 = GitRepo(opj(src_path, '2'), create=False) sub1.add_remote("target", sub1_pub) sub2.add_remote("target", sub2_pub) # publish recursively with swallow_logs(new_level=logging.DEBUG) as cml: res = publish(dataset=source, to="target", recursive=True) assert_not_in('forced update', cml.out, "we probably haven't merged git-annex before pushing") # testing result list # base dataset was already published above, notneeded again assert_status(('ok', 'notneeded'), res) # nothing failed assert_result_count(res, 3, type='dataset') eq_({r['path'] for r in res}, {src_path, sub1.path, sub2.path}) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) eq_(list(target.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex"))) eq_(list(sub1_target.get_branch_commits("master")), list(sub1.get_branch_commits("master"))) eq_(list(sub1_target.get_branch_commits("git-annex")), list(sub1.get_branch_commits("git-annex"))) eq_(list(sub2_target.get_branch_commits("master")), list(sub2.get_branch_commits("master"))) eq_(list(sub2_target.get_branch_commits("git-annex")), list(sub2.get_branch_commits("git-annex"))) # we are tracking origin but origin has different git-annex, since we # cloned from it, so it is not aware of our git-annex neq_(list(origin.repo.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex"))) # So if we first publish to it recursively, we would update # all sub-datasets since git-annex branch would need to be pushed res_ = publish(dataset=source, recursive=True) assert_result_count(res_, 1, status='ok', path=source.path) assert_result_count(res_, 1, status='ok', path=sub1.path) assert_result_count(res_, 1, status='ok', path=sub2.path) # and now should carry the same state for git-annex eq_(list(origin.repo.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex"))) # test for publishing with --since. By default since no changes, nothing pushed res_ = publish(dataset=source, recursive=True) assert_result_count(res_, 3, status='notneeded', type='dataset') # still nothing gets pushed, because origin is up to date res_ = publish(dataset=source, recursive=True, since='HEAD^') assert_result_count(res_, 3, status='notneeded', type='dataset') # and we should not fail if we run it from within the dataset with chpwd(source.path): res_ = publish(recursive=True, since='HEAD^') assert_result_count(res_, 3, status='notneeded', type='dataset') # Let's now update one subm with open(opj(sub2.path, "file.txt"), 'w') as f: f.write('') # add to subdataset, does not alter super dataset! # MIH: use `to_git` because original test author used # and explicit `GitRepo.add` -- keeping this for now Dataset(sub2.path).add('file.txt', to_git=True) # Let's now update one subm create_tree(sub2.path, {'file.dat': 'content'}) # add to subdataset, without reflecting the change in its super(s) Dataset(sub2.path).add('file.dat') # note: will publish to origin here since that is what it tracks res_ = publish(dataset=source, recursive=True, on_failure='ignore') ## only updates published, i.e. just the subdataset, super wasn't altered ## nothing copied! assert_status(('ok', 'notneeded'), res_) assert_result_count(res_, 1, status='ok', path=sub2.path, type='dataset') assert_result_count(res_, 0, path=opj(sub2.path, 'file.dat'), type='file') # since published to origin -- destination should not get that file nok_(lexists(opj(sub2_target.path, 'file.dat'))) res_ = publish(dataset=source, to='target', recursive=True) assert_status(('ok', 'notneeded'), res_) assert_result_count(res_, 1, status='ok', path=sub2.path, type='dataset') assert_result_count(res_, 0, path=opj(sub2.path, 'file.dat'), type='file') # Note: with updateInstead only in target2 and not saving change in # super-dataset we would have made remote dataset, if we had entire # hierarchy, to be somewhat inconsistent. # But here, since target datasets are independent -- it is ok # and the file itself was transferred ok_(lexists(opj(sub2_target.path, 'file.dat'))) nok_(sub2_target.file_has_content('file.dat')) ## but now we can redo publish recursively, with explicitly requested data transfer res_ = publish(dataset=source, to='target', recursive=True, transfer_data='all') ok_(sub2_target.file_has_content('file.dat')) assert_result_count(res_, 1, status='ok', path=opj(sub2.path, 'file.dat')) # Let's save those present changes and publish while implying "since last # merge point" source.save(message="Changes in subm2") # and test if it could deduce the remote/branch to push to source.config.set('branch.master.remote', 'target', where='local') with chpwd(source.path): res_ = publish(since='', recursive=True) # TODO: somehow test that there were no even attempt to diff within "subm 1" # since if `--since=''` worked correctly, nothing has changed there and it # should have not been even touched assert_status(('ok', 'notneeded'), res_) assert_result_count(res_, 1, status='ok', path=source.path, type='dataset')
def test_url_samples(): _check_ri("http://example.com", URL, scheme='http', hostname="example.com") # "complete" one for classical http _check_ri("http://*****:*****@example.com:8080/p/sp?p1=v1&p2=v2#frag", URL, scheme='http', hostname="example.com", port=8080, username='******', password='******', path='/p/sp', query='p1=v1&p2=v2', fragment='frag') # sample one for ssh with specifying the scheme # XXX? might be useful? https://github.com/FriendCode/giturlparse.py _check_ri("ssh://host/path/sp1", URL, scheme='ssh', hostname='host', path='/path/sp1') _check_ri("user@host:path/sp1", SSHRI, hostname='host', path='path/sp1', username='******') _check_ri("host:path/sp1", SSHRI, hostname='host', path='path/sp1') _check_ri("host:path", SSHRI, hostname='host', path='path') _check_ri("host:/path", SSHRI, hostname='host', path='/path') _check_ri("user@host", SSHRI, hostname='host', username='******') # TODO!!! should this be a legit URL like this? # _check_ri("host", SSHRI, hostname='host')) eq_(repr(RI("host:path")), "SSHRI(hostname='host', path='path')") # And now perspective 'datalad', implicit=True urls pointing to the canonical center location _check_ri("///", DataLadRI) _check_ri("///p/s1", DataLadRI, path='p/s1') # could be considered by someone as "URI reference" relative to scheme _check_ri("//a/", DataLadRI, remote='a') _check_ri("//a/data", DataLadRI, path='data', remote='a') # here we will do custom magic allowing only schemes with + in them, such as dl+archive # or not so custom as _check_ri("hg+https://host/user/proj", URL, scheme="hg+https", hostname='host', path='/user/proj') # "old" style _check_ri("dl+archive:KEY/path/sp1#size=123", URL, scheme='dl+archive', path='KEY/path/sp1', fragment='size=123') # "new" style _check_ri("dl+archive:KEY#path=path/sp1&size=123", URL, scheme='dl+archive', path='KEY', fragment='path=path/sp1&size=123') # actually above one is probably wrong since we need to encode the path _check_ri("dl+archive:KEY#path=path%2Fbsp1&size=123", URL, scheme='dl+archive', path='KEY', fragment='path=path%2Fbsp1&size=123') #https://en.wikipedia.org/wiki/File_URI_scheme _check_ri("file://host", URL, scheme='file', hostname='host') _check_ri("file://host/path/sp1", URL, scheme='file', hostname='host', path='/path/sp1') # stock libraries of Python aren't quite ready for ipv6 ipv6address = '2001:db8:85a3::8a2e:370:7334' _check_ri("file://%s/path/sp1" % ipv6address, URL, scheme='file', hostname=ipv6address, path='/path/sp1') for lh in ('localhost', '::1', '', '127.3.4.155'): _check_ri("file://%s/path/sp1" % lh, URL, localpath='/path/sp1', scheme='file', hostname=lh, path='/path/sp1') _check_ri('http://[1fff:0:a88:85a3::ac1f]:8001/index.html', URL, scheme='http', hostname='1fff:0:a88:85a3::ac1f', port=8001, path='/index.html') _check_ri("file:///path/sp1", URL, localpath='/path/sp1', scheme='file', path='/path/sp1') # we don't do any magical comprehension for home paths/drives for windows # of file:// urls, thus leaving /~ and /c: for now: _check_ri("file:///~/path/sp1", URL, localpath='/~/path/sp1', scheme='file', path='/~/path/sp1') _check_ri("file:///%7E/path/sp1", URL, localpath='/~/path/sp1', scheme='file', path='/~/path/sp1', exact_str=False) # not sure but let's check _check_ri("file:///c:/path/sp1", URL, localpath='/c:/path/sp1', scheme='file', path='/c:/path/sp1', exact_str=False) # and now implicit paths or actually they are also "URI references" _check_ri("f", PathRI, localpath='f', path='f') _check_ri("f/s1", PathRI, localpath='f/s1', path='f/s1') _check_ri("/f", PathRI, localpath='/f', path='/f') _check_ri("/f/s1", PathRI, localpath='/f/s1', path='/f/s1') # some github ones, just to make sure _check_ri("git://host/user/proj", URL, scheme="git", hostname="host", path="/user/proj") _check_ri("git@host:user/proj", SSHRI, hostname="host", path="user/proj", username='******') _check_ri('weired:/', SSHRI, hostname='weired', path='/') # since schema is not allowing some symbols so we need to add additional check _check_ri('weired_url:/', SSHRI, hostname='weired_url', path='/') _check_ri('example.com:/', SSHRI, hostname='example.com', path='/') _check_ri('example.com:path/sp1', SSHRI, hostname='example.com', path='path/sp1') _check_ri('example.com/path/sp1\:fname', PathRI, localpath='example.com/path/sp1\:fname', path='example.com/path/sp1\:fname') # ssh is as stupid as us, so we will stay "Consistently" dumb """ $> ssh example.com/path/sp1:fname ssh: Could not resolve hostname example.com/path/sp1:fname: Name or service not known """ _check_ri('example.com/path/sp1:fname', SSHRI, hostname='example.com/path/sp1', path='fname') # SSHRIs have .port, but it is empty eq_(SSHRI(hostname='example.com').port, '') # check that we are getting a warning logged when url can't be reconstructed # precisely # actually failed to come up with one -- becomes late here #_check_ri("http://host///..//p", scheme='http', path='/..//p') # actually this one is good enough to trigger a warning and I still don't know # what it should exactly be!? with swallow_logs(new_level=logging.DEBUG) as cml: weired_str = 'weired://' weired_url = RI(weired_str) repr(weired_url) cml.assert_logged( 'Parsed version of SSHRI .weired:/. ' 'differs from original .weired://.' ) # but we store original str eq_(str(weired_url), weired_str) neq_(weired_url.as_str(), weired_str) raise SkipTest("TODO: file://::1/some does complain about parsed version dropping ::1")
def test_publish_recursive(pristine_origin, origin_path, src_path, dst_path, sub1_pub, sub2_pub): # we will be publishing back to origin, so to not alter testrepo # we will first clone it origin = install(origin_path, source=pristine_origin, recursive=True) # prepare src source = install(src_path, source=origin.path, recursive=True) # we will be trying to push into this later on, need to give permissions... origin_sub2 = Dataset(opj(origin_path, '2')) origin_sub2.config.set( 'receive.denyCurrentBranch', 'updateInstead', where='local') ## TODO this manual fixup is needed due to gh-1548 -- needs proper solution #os.remove(opj(origin_sub2.path, '.git')) #os.rename(opj(origin_path, '.git', 'modules', '2'), opj(origin_sub2.path, '.git')) # create plain git at target: target = GitRepo(dst_path, create=True) target.checkout("TMP", ["-b"]) source.repo.add_remote("target", dst_path) # subdatasets have no remote yet, so recursive publishing should fail: res = publish(dataset=source, to="target", recursive=True, on_failure='ignore') assert_result_count(res, 3) assert_result_count( res, 1, status='ok', type='dataset', path=source.path) assert_result_count( res, 2, status='error', message=("Unknown target sibling '%s' for publication", 'target')) # now, set up targets for the submodules: sub1_target = GitRepo(sub1_pub, create=True) sub1_target.checkout("TMP", ["-b"]) sub2_target = AnnexRepo(sub2_pub, create=True) # we will be testing presence of the file content, so let's make it progress sub2_target.config.set('receive.denyCurrentBranch', 'updateInstead', where='local') sub1 = GitRepo(opj(src_path, 'subm 1'), create=False) sub2 = GitRepo(opj(src_path, '2'), create=False) sub1.add_remote("target", sub1_pub) sub2.add_remote("target", sub2_pub) # publish recursively with swallow_logs(new_level=logging.DEBUG) as cml: res = publish(dataset=source, to="target", recursive=True) assert_not_in( 'forced update', cml.out, "we probably haven't merged git-annex before pushing" ) # testing result list # base dataset was already published above, notneeded again assert_status(('ok', 'notneeded'), res) # nothing failed assert_result_count( res, 3, type='dataset') eq_({r['path'] for r in res}, {src_path, sub1.path, sub2.path}) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) eq_(list(target.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex"))) eq_(list(sub1_target.get_branch_commits("master")), list(sub1.get_branch_commits("master"))) eq_(list(sub1_target.get_branch_commits("git-annex")), list(sub1.get_branch_commits("git-annex"))) eq_(list(sub2_target.get_branch_commits("master")), list(sub2.get_branch_commits("master"))) eq_(list(sub2_target.get_branch_commits("git-annex")), list(sub2.get_branch_commits("git-annex"))) # we are tracking origin but origin has different git-annex, since we # cloned from it, so it is not aware of our git-annex neq_(list(origin.repo.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex"))) # So if we first publish to it recursively, we would update # all sub-datasets since git-annex branch would need to be pushed res_ = publish(dataset=source, recursive=True) assert_result_count(res_, 1, status='ok', path=source.path) assert_result_count(res_, 1, status='ok', path=sub1.path) assert_result_count(res_, 1, status='ok', path=sub2.path) # and now should carry the same state for git-annex eq_(list(origin.repo.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex"))) # test for publishing with --since. By default since no changes, nothing pushed res_ = publish(dataset=source, recursive=True) assert_result_count( res_, 3, status='notneeded', type='dataset') # still nothing gets pushed, because origin is up to date res_ = publish(dataset=source, recursive=True, since='HEAD^') assert_result_count( res_, 3, status='notneeded', type='dataset') # and we should not fail if we run it from within the dataset with chpwd(source.path): res_ = publish(recursive=True, since='HEAD^') assert_result_count( res_, 3, status='notneeded', type='dataset') # Let's now update one subm with open(opj(sub2.path, "file.txt"), 'w') as f: f.write('') # add to subdataset, does not alter super dataset! # MIH: use `to_git` because original test author used # and explicit `GitRepo.add` -- keeping this for now Dataset(sub2.path).add('file.txt', to_git=True) # Let's now update one subm create_tree(sub2.path, {'file.dat': 'content'}) # add to subdataset, without reflecting the change in its super(s) Dataset(sub2.path).add('file.dat') # note: will publish to origin here since that is what it tracks res_ = publish(dataset=source, recursive=True, on_failure='ignore') ## only updates published, i.e. just the subdataset, super wasn't altered ## nothing copied! assert_status(('ok', 'notneeded'), res_) assert_result_count(res_, 1, status='ok', path=sub2.path, type='dataset') assert_result_count(res_, 0, path=opj(sub2.path, 'file.dat'), type='file') # since published to origin -- destination should not get that file nok_(lexists(opj(sub2_target.path, 'file.dat'))) res_ = publish(dataset=source, to='target', recursive=True) assert_status(('ok', 'notneeded'), res_) assert_result_count(res_, 1, status='ok', path=sub2.path, type='dataset') assert_result_count(res_, 0, path=opj(sub2.path, 'file.dat'), type='file') # Note: with updateInstead only in target2 and not saving change in # super-dataset we would have made remote dataset, if we had entire # hierarchy, to be somewhat inconsistent. # But here, since target datasets are independent -- it is ok # and the file itself was transferred ok_(lexists(opj(sub2_target.path, 'file.dat'))) nok_(sub2_target.file_has_content('file.dat')) ## but now we can redo publish recursively, with explicitly requested data transfer res_ = publish( dataset=source, to='target', recursive=True, transfer_data='all' ) ok_(sub2_target.file_has_content('file.dat')) assert_result_count( res_, 1, status='ok', path=opj(sub2.path, 'file.dat')) # Let's save those present changes and publish while implying "since last # merge point" source.save(message="Changes in subm2") # and test if it could deduce the remote/branch to push to source.config.set('branch.master.remote', 'target', where='local') with chpwd(source.path): res_ = publish(since='', recursive=True) # TODO: somehow test that there were no even attempt to diff within "subm 1" # since if `--since=''` worked correctly, nothing has changed there and it # should have not been even touched assert_status(('ok', 'notneeded'), res_) assert_result_count(res_, 1, status='ok', path=source.path, type='dataset')