def test_invalid_call(origin, tdir): ds = Dataset(origin) ds.uninstall('subm 1', check=False) # nothing assert_status('error', publish('/notthere', on_failure='ignore')) # known, but not present assert_status('impossible', publish(opj(ds.path, 'subm 1'), on_failure='ignore')) # --since without dataset is now supported as long as it # could be identified # assert_raises(InsufficientArgumentsError, publish, since='HEAD') # but if it couldn't be, then should indeed crash with chpwd(tdir): assert_raises(InsufficientArgumentsError, publish, since='HEAD') # new dataset, with unavailable subdataset dummy = Dataset(tdir).create() dummy_sub = dummy.create('sub') dummy_sub.uninstall() assert_in('sub', dummy.subdatasets(fulfilled=False, result_xfm='relpaths')) # now an explicit call to publish the unavailable subdataset assert_result_count( dummy.publish('sub', on_failure='ignore'), 1, path=dummy_sub.path, status='impossible', type='dataset')
def test_publish_simple(origin, src_path, dst_path): # prepare src source = install(src_path, source=origin, recursive=True) # forget we cloned it (provide no 'origin' anymore), which should lead to # setting tracking branch to target: source.repo.remove_remote("origin") # create plain git at target: target = GitRepo(dst_path, create=True) target.checkout("TMP", ["-b"]) source.repo.add_remote("target", dst_path) res = publish(dataset=source, to="target", result_xfm='datasets') eq_(res, [source]) ok_clean_git(source.repo, annex=None) ok_clean_git(target, annex=None) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) # don't fail when doing it again res = publish(dataset=source, to="target") # and nothing is pushed assert_result_count(res, 1, status='notneeded') ok_clean_git(source.repo, annex=None) ok_clean_git(target, annex=None) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) eq_(list(target.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex"))) # 'target/master' should be tracking branch at this point, so # try publishing without `to`: # MIH: Nope, we don't automatically add this anymore # some modification: with open(opj(src_path, 'test_mod_file'), "w") as f: f.write("Some additional stuff.") source.add(opj(src_path, 'test_mod_file'), to_git=True, message="Modified.") ok_clean_git(source.repo, annex=None) res = publish(dataset=source, to='target', result_xfm='datasets') eq_(res, [source]) ok_clean_git(dst_path, annex=None) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) # Since git-annex 6.20170220, post-receive hook gets triggered # which results in entry being added for that repo into uuid.log on remote # end since then finally git-annex senses that it needs to init that remote, # so it might have 1 more commit than local. # see https://github.com/datalad/datalad/issues/1319 ok_(set(source.repo.get_branch_commits("git-annex")).issubset( set(target.get_branch_commits("git-annex"))))
def test_publish_plain_git(origin, src_path, dst_path): # TODO: Since it's mostly the same, melt with test_publish_simple # prepare src source = install(src_path, source=origin, recursive=True) # forget we cloned it (provide no 'origin' anymore), which should lead to # setting tracking branch to target: source.repo.remove_remote("origin") # create plain git at target: target = GitRepo(dst_path, create=True) target.checkout("TMP", ["-b"]) source.repo.add_remote("target", dst_path) res = publish(dataset=source, to="target", result_xfm='datasets') eq_(res, [source]) ok_clean_git(source.repo, annex=None) ok_clean_git(target, annex=None) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) # don't fail when doing it again res = publish(dataset=source, to="target") # and nothing is pushed assert_result_count(res, 1, status='notneeded') ok_clean_git(source.repo, annex=None) ok_clean_git(target, annex=None) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) # some modification: with open(opj(src_path, 'test_mod_file'), "w") as f: f.write("Some additional stuff.") source.add(opj(src_path, 'test_mod_file'), to_git=True, message="Modified.") ok_clean_git(source.repo, annex=None) res = publish(dataset=source, to='target', result_xfm='datasets') eq_(res, [source]) ok_clean_git(dst_path, annex=None) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) # amend and change commit msg in order to test for force push: source.repo.commit("amended", options=['--amend']) # push should be rejected (non-fast-forward): assert_raises(IncompleteResultsError, publish, dataset=source, to='target', result_xfm='datasets') # push with force=True works: res = publish(dataset=source, to='target', result_xfm='datasets', force=True) eq_(res, [source])
def test_publish_simple(origin, src_path, dst_path): # prepare src source = install(path=src_path, source=origin, recursive=True) # TODO: For now, circumnavigate the detached head issue. # Figure out, what to do. for subds in source.get_dataset_handles(recursive=True): AnnexRepo(opj(src_path, subds), init=True, create=True).git_checkout("master") # forget we cloned it (provide no 'origin' anymore), which should lead to # setting tracking branch to target: source.repo.git_remote_remove("origin") # create plain git at target: target = GitRepo(dst_path, create=True) target.git_checkout("TMP", "-b") source.repo.git_remote_add("target", dst_path) res = publish(dataset=source, dest="target") eq_(res, source) ok_clean_git(src_path, annex=False) ok_clean_git(dst_path, annex=False) eq_(list(target.git_get_branch_commits("master")), list(source.repo.git_get_branch_commits("master"))) # don't fail when doing it again res = publish(dataset=source, dest="target") eq_(res, source) ok_clean_git(src_path, annex=False) ok_clean_git(dst_path, annex=False) eq_(list(target.git_get_branch_commits("master")), list(source.repo.git_get_branch_commits("master"))) eq_(list(target.git_get_branch_commits("git-annex")), list(source.repo.git_get_branch_commits("git-annex"))) # 'target/master' should be tracking branch at this point, so # try publishing without `dest`: # some modification: with open(opj(src_path, 'test_mod_file'), "w") as f: f.write("Some additional stuff.") source.repo.git_add(opj(src_path, 'test_mod_file')) source.repo.git_commit("Modified.") ok_clean_git(src_path, annex=False) res = publish(dataset=source) eq_(res, source) ok_clean_git(dst_path, annex=False) eq_(list(target.git_get_branch_commits("master")), list(source.repo.git_get_branch_commits("master"))) eq_(list(target.git_get_branch_commits("git-annex")), list(source.repo.git_get_branch_commits("git-annex")))
def test_publish_recursive(origin, src_path, dst_path, sub1_pub, sub2_pub): # prepare src source = install(path=src_path, source=origin, recursive=True) # TODO: For now, circumnavigate the detached head issue. # Figure out, what to do. for subds in source.get_dataset_handles(recursive=True): AnnexRepo(opj(src_path, subds), init=True, create=True).git_checkout("master") # create plain git at target: target = GitRepo(dst_path, create=True) target.git_checkout("TMP", "-b") source.repo.git_remote_add("target", dst_path) # subdatasets have no remote yet, so recursive publishing should fail: with assert_raises(ValueError) as cm: publish(dataset=source, dest="target", recursive=True) assert_in("No sibling 'target' found.", str(cm.exception)) # now, set up targets for the submodules: sub1_target = GitRepo(sub1_pub, create=True) sub1_target.git_checkout("TMP", "-b") sub2_target = GitRepo(sub2_pub, create=True) sub2_target.git_checkout("TMP", "-b") sub1 = GitRepo(opj(src_path, 'sub1'), create=False) sub2 = GitRepo(opj(src_path, 'sub2'), create=False) sub1.git_remote_add("target", sub1_pub) sub2.git_remote_add("target", sub2_pub) # publish recursively res = publish(dataset=source, dest="target", recursive=True) # testing result list # (Note: Dataset lacks __eq__ for now. Should this be based on path only?) assert_is_instance(res, list) for item in res: assert_is_instance(item, Dataset) eq_(res[0].path, src_path) eq_(res[1].path, sub1.path) eq_(res[2].path, sub2.path) eq_(list(target.git_get_branch_commits("master")), list(source.repo.git_get_branch_commits("master"))) eq_(list(target.git_get_branch_commits("git-annex")), list(source.repo.git_get_branch_commits("git-annex"))) eq_(list(sub1_target.git_get_branch_commits("master")), list(sub1.git_get_branch_commits("master"))) eq_(list(sub1_target.git_get_branch_commits("git-annex")), list(sub1.git_get_branch_commits("git-annex"))) eq_(list(sub2_target.git_get_branch_commits("master")), list(sub2.git_get_branch_commits("master"))) eq_(list(sub2_target.git_get_branch_commits("git-annex")), list(sub2.git_get_branch_commits("git-annex")))
def test_publish_simple(origin, src_path, dst_path): # prepare src source = install(src_path, source=origin, recursive=True)[0] # forget we cloned it (provide no 'origin' anymore), which should lead to # setting tracking branch to target: source.repo.remove_remote("origin") # create plain git at target: target = GitRepo(dst_path, create=True) target.checkout("TMP", ["-b"]) source.repo.add_remote("target", dst_path) res = publish(dataset=source, to="target") eq_(res, ([source], [])) ok_clean_git(src_path, annex=False) ok_clean_git(dst_path, annex=False) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) # don't fail when doing it again res = publish(dataset=source, to="target") eq_(res, ([source], [])) ok_clean_git(src_path, annex=False) ok_clean_git(dst_path, annex=False) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) eq_(list(target.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex"))) # 'target/master' should be tracking branch at this point, so # try publishing without `to`: # some modification: with open(opj(src_path, 'test_mod_file'), "w") as f: f.write("Some additional stuff.") source.repo.add(opj(src_path, 'test_mod_file'), git=True, commit=True, msg="Modified.") ok_clean_git(src_path, annex=False) res = publish(dataset=source) eq_(res, ([source], [])) ok_clean_git(dst_path, annex=False) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) eq_(list(target.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex")))
def test_publish_file_handle(origin, src_path, dst_path): # prepare src source = install(path=src_path, source=origin, recursive=True) # TODO: For now, circumnavigate the detached head issue. # Figure out, what to do. for subds in source.get_dataset_handles(recursive=True): AnnexRepo(opj(src_path, subds), init=True, create=True).git_checkout("master") source.repo.get('test-annex.dat') # create plain git at target: target = AnnexRepo(dst_path, create=True) # actually not needed for this test, but provide same setup as # everywhere else: target.git_checkout("TMP", "-b") source.repo.git_remote_add("target", dst_path) # directly publish a file handle, not the dataset itself: res = publish(dataset=source, dest="target", path="test-annex.dat") eq_(res, opj(source.path, 'test-annex.dat')) # only file was published, not the dataset itself: assert_not_in("master", target.git_get_branches()) eq_(Dataset(dst_path).get_dataset_handles(), []) assert_not_in("test.dat", target.git_get_files()) # content is now available from 'target': assert_in("target", source.repo.annex_whereis('test-annex.dat', output="descriptions")) source.repo.annex_drop('test-annex.dat') eq_(source.repo.file_has_content(['test-annex.dat']), [False]) source.repo._run_annex_command('get', annex_options=['test-annex.dat', '--from=target']) eq_(source.repo.file_has_content(['test-annex.dat']), [True])
def test_publish_with_data(origin, src_path, dst_path): # prepare src source = install(path=src_path, source=origin, recursive=True) # TODO: For now, circumnavigate the detached head issue. # Figure out, what to do. for subds in source.get_dataset_handles(recursive=True): AnnexRepo(opj(src_path, subds), init=True, create=True).git_checkout("master") source.repo.get('test-annex.dat') # create plain git at target: target = AnnexRepo(dst_path, create=True) target.git_checkout("TMP", "-b") source.repo.git_remote_add("target", dst_path) res = publish(dataset=source, dest="target", with_data=['test-annex.dat']) eq_(res, source) eq_(list(target.git_get_branch_commits("master")), list(source.repo.git_get_branch_commits("master"))) # TODO: last commit in git-annex branch differs. Probably fine, # but figure out, when exactly to expect this for proper testing: eq_(list(target.git_get_branch_commits("git-annex"))[1:], list(source.repo.git_get_branch_commits("git-annex"))[1:]) # we need compare target/master: target.git_checkout("master") eq_(target.file_has_content(['test-annex.dat']), [True])
def test_target_ssh_recursive(origin, src_path, target_path): # prepare src source = install(src_path, source=origin, recursive=True)[0] sub1 = Dataset(opj(src_path, "subm 1")) sub2 = Dataset(opj(src_path, "subm 2")) for flat in False, True: target_path_ = target_dir_tpl = target_path + "-" + str(flat) if flat: target_dir_tpl += "/%NAME" sep = '-' else: sep = os.path.sep if flat: # now that create_sibling also does fetch -- the related problem # so skipping this early raise SkipTest('TODO: Make publish work for flat datasets, it currently breaks') remote_name = 'remote-' + str(flat) # TODO: there is f.ckup with paths so assert_create fails ATM # And let's test without explicit dataset being provided with chpwd(source.path): #assert_create_sshwebserver( create_sibling( target=remote_name, sshurl="ssh://localhost" + target_path_, target_dir=target_dir_tpl, recursive=True, ui=True) # raise if git repos were not created for suffix in [sep + 'subm 1', sep + 'subm 2', '']: target_dir = opj(target_path_, basename(src_path) if flat else "").rstrip(os.path.sep) + suffix # raise if git repos were not created GitRepo(target_dir, create=False) _test_correct_publish(target_dir, rootds=not suffix, flat=flat) for repo in [source.repo, sub1.repo, sub2.repo]: assert_not_in("local_target", repo.get_remotes()) # now, push should work: publish(dataset=source, to=remote_name)
def test_smth_about_not_supported(p1, p2): source = Dataset(p1).create() from datalad.support.network import PathRI source.create_sibling( 'ssh://localhost' + PathRI(p2).posixpath, name='target1') # source.publish(to='target1') with chpwd(p1): # since we have only two commits (set backend, init dataset) # -- there is no HEAD^^ assert_result_count( publish(to='target1', since='HEAD^^', on_failure='ignore'), 1, status='impossible', message="fatal: bad revision 'HEAD^^'") # but now let's add one more commit, we should be able to pusblish source.repo.commit("msg", options=['--allow-empty']) publish(to='target1', since='HEAD^') # must not fail now
def test_publish_submodule(origin, src_path, target_1, target_2): # prepare src source = install(path=src_path, source=origin, recursive=True) # TODO: For now, circumnavigate the detached head issue. # Figure out, what to do. for subds in source.get_dataset_handles(recursive=True): AnnexRepo(opj(src_path, subds), init=True, create=True).git_checkout("master") # first, try publishing from super dataset using `path` source_super = source source_sub = Dataset(opj(src_path, 'sub1')) target = GitRepo(target_1, create=True) target.git_checkout("TMP", "-b") source_sub.repo.git_remote_add("target", target_1) res = publish(dataset=source_super, dest="target", path="sub1") assert_is_instance(res, Dataset) eq_(res.path, source_sub.path) eq_(list(GitRepo(target_1, create=False).git_get_branch_commits("master")), list(source_sub.repo.git_get_branch_commits("master"))) eq_(list(GitRepo(target_1, create=False).git_get_branch_commits("git-annex")), list(source_sub.repo.git_get_branch_commits("git-annex"))) # now, publish directly from within submodule: target = GitRepo(target_2, create=True) target.git_checkout("TMP", "-b") source_sub.repo.git_remote_add("target2", target_2) res = publish(dataset=source_sub, dest="target2") eq_(res, source_sub) eq_(list(GitRepo(target_2, create=False).git_get_branch_commits("master")), list(source_sub.repo.git_get_branch_commits("master"))) eq_(list(GitRepo(target_2, create=False).git_get_branch_commits("git-annex")), list(source_sub.repo.git_get_branch_commits("git-annex")))
def test_publish_recursive(pristine_origin, origin_path, src_path, dst_path, sub1_pub, sub2_pub): # we will be publishing back to origin, so to not alter testrepo # we will first clone it origin = install(origin_path, source=pristine_origin, recursive=True) # prepare src source = install(src_path, source=origin.path, recursive=True) # we will be trying to push into this later on, need to give permissions... origin_sub2 = Dataset(opj(origin_path, '2')) origin_sub2.config.set( 'receive.denyCurrentBranch', 'updateInstead', where='local') ## TODO this manual fixup is needed due to gh-1548 -- needs proper solution #os.remove(opj(origin_sub2.path, '.git')) #os.rename(opj(origin_path, '.git', 'modules', '2'), opj(origin_sub2.path, '.git')) # create plain git at target: target = GitRepo(dst_path, create=True) target.checkout("TMP", ["-b"]) source.repo.add_remote("target", dst_path) # subdatasets have no remote yet, so recursive publishing should fail: res = publish(dataset=source, to="target", recursive=True, on_failure='ignore') assert_result_count(res, 3) assert_result_count( res, 1, status='ok', type='dataset', path=source.path) assert_result_count( res, 2, status='error', message=("Unknown target sibling '%s' for publication", 'target')) # now, set up targets for the submodules: sub1_target = GitRepo(sub1_pub, create=True) sub1_target.checkout("TMP", ["-b"]) sub2_target = AnnexRepo(sub2_pub, create=True) # we will be testing presence of the file content, so let's make it progress sub2_target.config.set('receive.denyCurrentBranch', 'updateInstead', where='local') sub1 = GitRepo(opj(src_path, 'subm 1'), create=False) sub2 = GitRepo(opj(src_path, '2'), create=False) sub1.add_remote("target", sub1_pub) sub2.add_remote("target", sub2_pub) # publish recursively with swallow_logs(new_level=logging.DEBUG) as cml: res = publish(dataset=source, to="target", recursive=True) assert_not_in( 'forced update', cml.out, "we probably haven't merged git-annex before pushing" ) # testing result list # base dataset was already published above, notneeded again assert_status(('ok', 'notneeded'), res) # nothing failed assert_result_count( res, 3, type='dataset') eq_({r['path'] for r in res}, {src_path, sub1.path, sub2.path}) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) eq_(list(target.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex"))) eq_(list(sub1_target.get_branch_commits("master")), list(sub1.get_branch_commits("master"))) eq_(list(sub1_target.get_branch_commits("git-annex")), list(sub1.get_branch_commits("git-annex"))) eq_(list(sub2_target.get_branch_commits("master")), list(sub2.get_branch_commits("master"))) eq_(list(sub2_target.get_branch_commits("git-annex")), list(sub2.get_branch_commits("git-annex"))) # we are tracking origin but origin has different git-annex, since we # cloned from it, so it is not aware of our git-annex neq_(list(origin.repo.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex"))) # So if we first publish to it recursively, we would update # all sub-datasets since git-annex branch would need to be pushed res_ = publish(dataset=source, recursive=True) assert_result_count(res_, 1, status='ok', path=source.path) assert_result_count(res_, 1, status='ok', path=sub1.path) assert_result_count(res_, 1, status='ok', path=sub2.path) # and now should carry the same state for git-annex eq_(list(origin.repo.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex"))) # test for publishing with --since. By default since no changes, nothing pushed res_ = publish(dataset=source, recursive=True) assert_result_count( res_, 3, status='notneeded', type='dataset') # still nothing gets pushed, because origin is up to date res_ = publish(dataset=source, recursive=True, since='HEAD^') assert_result_count( res_, 3, status='notneeded', type='dataset') # and we should not fail if we run it from within the dataset with chpwd(source.path): res_ = publish(recursive=True, since='HEAD^') assert_result_count( res_, 3, status='notneeded', type='dataset') # Let's now update one subm with open(opj(sub2.path, "file.txt"), 'w') as f: f.write('') # add to subdataset, does not alter super dataset! # MIH: use `to_git` because original test author used # and explicit `GitRepo.add` -- keeping this for now Dataset(sub2.path).add('file.txt', to_git=True) # Let's now update one subm create_tree(sub2.path, {'file.dat': 'content'}) # add to subdataset, without reflecting the change in its super(s) Dataset(sub2.path).add('file.dat') # note: will publish to origin here since that is what it tracks res_ = publish(dataset=source, recursive=True, on_failure='ignore') ## only updates published, i.e. just the subdataset, super wasn't altered ## nothing copied! assert_status(('ok', 'notneeded'), res_) assert_result_count(res_, 1, status='ok', path=sub2.path, type='dataset') assert_result_count(res_, 0, path=opj(sub2.path, 'file.dat'), type='file') # since published to origin -- destination should not get that file nok_(lexists(opj(sub2_target.path, 'file.dat'))) res_ = publish(dataset=source, to='target', recursive=True) assert_status(('ok', 'notneeded'), res_) assert_result_count(res_, 1, status='ok', path=sub2.path, type='dataset') assert_result_count(res_, 0, path=opj(sub2.path, 'file.dat'), type='file') # Note: with updateInstead only in target2 and not saving change in # super-dataset we would have made remote dataset, if we had entire # hierarchy, to be somewhat inconsistent. # But here, since target datasets are independent -- it is ok # and the file itself was transferred ok_(lexists(opj(sub2_target.path, 'file.dat'))) nok_(sub2_target.file_has_content('file.dat')) ## but now we can redo publish recursively, with explicitly requested data transfer res_ = publish( dataset=source, to='target', recursive=True, transfer_data='all' ) ok_(sub2_target.file_has_content('file.dat')) assert_result_count( res_, 1, status='ok', path=opj(sub2.path, 'file.dat')) # Let's save those present changes and publish while implying "since last # merge point" source.save(message="Changes in subm2") # and test if it could deduce the remote/branch to push to source.config.set('branch.master.remote', 'target', where='local') with chpwd(source.path): res_ = publish(since='', recursive=True) # TODO: somehow test that there were no even attempt to diff within "subm 1" # since if `--since=''` worked correctly, nothing has changed there and it # should have not been even touched assert_status(('ok', 'notneeded'), res_) assert_result_count(res_, 1, status='ok', path=source.path, type='dataset')
def test_target_ssh_recursive(origin, src_path, target_path): # prepare src source = install(src_path, source=origin, recursive=True) sub1 = Dataset(opj(src_path, "subm 1")) sub2 = Dataset(opj(src_path, "2")) for flat in False, True: target_path_ = target_dir_tpl = target_path + "-" + str(flat) if flat: target_dir_tpl += "/prefix%RELNAME" sep = '-' else: sep = os.path.sep remote_name = 'remote-' + str(flat) with chpwd(source.path): assert_create_sshwebserver(name=remote_name, sshurl="ssh://localhost" + target_path_, target_dir=target_dir_tpl, recursive=True, ui=True) # raise if git repos were not created for suffix in [sep + 'subm 1', sep + '2', '']: target_dir = opj(target_path_, 'prefix' if flat else "").rstrip( os.path.sep) + suffix # raise if git repos were not created GitRepo(target_dir, create=False) _test_correct_publish(target_dir, rootds=not suffix, flat=flat) for repo in [source.repo, sub1.repo, sub2.repo]: assert_not_in("local_target", repo.get_remotes()) # now, push should work: publish(dataset=source, to=remote_name) # verify that we can create-sibling which was created later and possibly # first published in super-dataset as an empty directory sub3_name = 'subm 3-%s' % flat sub3 = source.create(sub3_name) # since is an empty value to force it to consider all changes since we published # already with chpwd(source.path): # as we discussed in gh-1495 we use the last-published state of the base # dataset as the indicator for modification detection with since='' # hence we must not publish the base dataset on its own without recursion, # if we want to have this mechanism do its job #publish(to=remote_name) # no recursion assert_create_sshwebserver(name=remote_name, sshurl="ssh://localhost" + target_path_, target_dir=target_dir_tpl, recursive=True, existing='skip', ui=True, since='') # so it was created on remote correctly and wasn't just skipped assert (Dataset( _path_(target_path_, ('prefix-' if flat else '') + sub3_name)).is_installed()) publish(dataset=source, to=remote_name, recursive=True, since='') # just a smoke test
def test_target_ssh_simple(origin, src_path, target_rootpath): # prepare src source = install(src_path, source=origin) target_path = opj(target_rootpath, "basic") # it will try to fetch it so would fail as well since sshurl is wrong with swallow_logs(new_level=logging.ERROR) as cml, \ assert_raises(GitCommandError): create_sibling(dataset=source, target="local_target", sshurl="ssh://localhost", target_dir=target_path, ui=True) # is not actually happening on one of the two basic cases -- TODO figure it out # assert_in('enableremote local_target failed', cml.out) GitRepo(target_path, create=False) # raises if not a git repo assert_in("local_target", source.repo.get_remotes()) eq_("ssh://localhost", source.repo.get_remote_url("local_target")) # should NOT be able to push now, since url isn't correct: # TODO: assumption is wrong if ~ does have .git! fix up! assert_raises(GitCommandError, publish, dataset=source, to="local_target") # Both must be annex or git repositories src_is_annex = AnnexRepo.is_valid_repo(src_path) eq_(src_is_annex, AnnexRepo.is_valid_repo(target_path)) # And target one should be known to have a known UUID within the source if annex if src_is_annex: annex = AnnexRepo(src_path) local_target_cfg = annex.repo.remotes["local_target"].config_reader.get # for some reason this was "correct" # eq_(local_target_cfg('annex-ignore'), 'false') # but after fixing creating siblings in # 21f6dd012b2c7b9c0b8b348dcfb3b0ace7e8b2ec it started to fail # I think it is legit since we are trying to fetch now before calling # annex.enable_remote so it doesn't set it up, and fails before assert_raises(Exception, local_target_cfg, 'annex-ignore') # hm, but ATM wouldn't get a uuid since url is wrong assert_raises(Exception, local_target_cfg, 'annex-uuid') # do it again without force: with assert_raises(RuntimeError) as cm: assert_create_sshwebserver(dataset=source, target="local_target", sshurl="ssh://localhost", target_dir=target_path) eq_("Target directory %s already exists." % target_path, str(cm.exception)) if src_is_annex: target_description = AnnexRepo(target_path, create=False).get_description() assert_not_equal(target_description, None) assert_not_equal(target_description, target_path) ok_endswith(target_description, target_path) # now, with force and correct url, which is also used to determine # target_dir # Note: on windows absolute path is not url conform. But this way it's easy # to test, that ssh path is correctly used. if not on_windows: # add random file under target_path, to explicitly test existing=replace open(opj(target_path, 'random'), 'w').write('123') assert_create_sshwebserver(dataset=source, target="local_target", sshurl="ssh://localhost" + target_path, existing='replace') eq_("ssh://localhost" + target_path, source.repo.get_remote_url("local_target")) ok_(source.repo.get_remote_url("local_target", push=True) is None) # ensure target tree actually replaced by source assert_false(exists(opj(target_path, 'random'))) if src_is_annex: annex = AnnexRepo(src_path) local_target_cfg = annex.repo.remotes[ "local_target"].config_reader.get eq_(local_target_cfg('annex-ignore'), 'false') eq_(local_target_cfg('annex-uuid').count('-'), 4) # valid uuid # again, by explicitly passing urls. Since we are on localhost, the # local path should work: cpkwargs = dict( dataset=source, target="local_target", sshurl="ssh://localhost", target_dir=target_path, target_url=target_path, target_pushurl="ssh://localhost" + target_path, ui=True, ) assert_create_sshwebserver(existing='replace', **cpkwargs) if src_is_annex: target_description = AnnexRepo(target_path, create=False).get_description() eq_(target_description, target_path) eq_(target_path, source.repo.get_remote_url("local_target")) eq_("ssh://localhost" + target_path, source.repo.get_remote_url("local_target", push=True)) _test_correct_publish(target_path) # now, push should work: publish(dataset=source, to="local_target") # and we should be able to 'reconfigure' def process_digests_mtimes(digests, mtimes): # it should have triggered a hook, which would have created log and metadata files check_metadata = False for part in 'logs', 'metadata': metafiles = [ k for k in digests if k.startswith(_path_('.git/datalad/%s/' % part)) ] # This is in effect ONLY if we have "compatible" datalad installed on remote # end. ATM we don't have easy way to guarantee that AFAIK (yoh), # so let's not check/enforce (TODO) # assert(len(metafiles) >= 1) # we might have 2 logs if timestamps do not collide ;) # Let's actually do it to some degree if part == 'logs': # always should have those: assert (len(metafiles) >= 1) with open(opj(target_path, metafiles[0])) as f: if 'no datalad found' not in f.read(): check_metadata = True if part == 'metadata': eq_(len(metafiles), bool(check_metadata)) for f in metafiles: digests.pop(f) mtimes.pop(f) # and just pop some leftovers from annex for f in list(digests): if f.startswith('.git/annex/mergedrefs'): digests.pop(f) mtimes.pop(f) orig_digests, orig_mtimes = get_mtimes_and_digests(target_path) process_digests_mtimes(orig_digests, orig_mtimes) import time time.sleep(0.1) # just so that mtimes change assert_create_sshwebserver(existing='reconfigure', **cpkwargs) digests, mtimes = get_mtimes_and_digests(target_path) process_digests_mtimes(digests, mtimes) assert_dict_equal(orig_digests, digests) # nothing should change in terms of content # but some files should have been modified modified_files = { k for k in mtimes if orig_mtimes.get(k, 0) != mtimes.get(k, 0) } # collect which files were expected to be modified without incurring any changes ok_modified_files = { _path_('.git/hooks/post-update'), 'index.html', # files which hook would manage to generate _path_('.git/info/refs'), '.git/objects/info/packs' } if external_versions['cmd:system-git'] >= '2.4': # on elderly git we don't change receive setting ok_modified_files.add(_path_('.git/config')) ok_modified_files.update( {f for f in digests if f.startswith(_path_('.git/datalad/web'))}) assert_set_equal(modified_files, ok_modified_files)
def test_publish_recursive(pristine_origin, origin_path, src_path, dst_path, sub1_pub, sub2_pub): # we will be publishing back to origin, so to not alter testrepo # we will first clone it origin = install(origin_path, source=pristine_origin, recursive=True) # prepare src source = install(src_path, source=origin_path, recursive=True) # create plain git at target: target = GitRepo(dst_path, create=True) target.checkout("TMP", ["-b"]) source.repo.add_remote("target", dst_path) # subdatasets have no remote yet, so recursive publishing should fail: with assert_raises(ValueError) as cm: publish(dataset=source, to="target", recursive=True) assert_in("Unknown target sibling 'target'", exc_str(cm.exception)) # now, set up targets for the submodules: sub1_target = GitRepo(sub1_pub, create=True) sub1_target.checkout("TMP", ["-b"]) sub2_target = AnnexRepo(sub2_pub, create=True) # we will be testing presence of the file content, so let's make it progress sub2_target.config.set('receive.denyCurrentBranch', 'updateInstead', where='local') sub1 = GitRepo(opj(src_path, 'subm 1'), create=False) sub2 = GitRepo(opj(src_path, 'subm 2'), create=False) sub1.add_remote("target", sub1_pub) sub2.add_remote("target", sub2_pub) # publish recursively with swallow_logs(new_level=logging.DEBUG) as cml: res = publish(dataset=source, to="target", recursive=True) assert_not_in( 'forced update', cml.out, "we probably haven't merged git-annex before pushing" ) # testing result list # (Note: Dataset lacks __eq__ for now. Should this be based on path only?) assert_is_instance(res, tuple) assert_is_instance(res[0], list) assert_is_instance(res[1], list) eq_(res[1], []) # nothing failed/was skipped for item in res[0]: assert_is_instance(item, Dataset) eq_({res[0][0].path, res[0][1].path, res[0][2].path}, {src_path, sub1.path, sub2.path}) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) eq_(list(target.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex"))) eq_(list(sub1_target.get_branch_commits("master")), list(sub1.get_branch_commits("master"))) eq_(list(sub1_target.get_branch_commits("git-annex")), list(sub1.get_branch_commits("git-annex"))) eq_(list(sub2_target.get_branch_commits("master")), list(sub2.get_branch_commits("master"))) eq_(list(sub2_target.get_branch_commits("git-annex")), list(sub2.get_branch_commits("git-annex"))) # we are tracking origin but origin has different git-annex, since we # cloned from it, so it is not aware of our git-annex neq_(list(origin.repo.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex"))) # So if we first publish to it recursively, we would update # all sub-datasets since git-annex branch would need to be pushed res_ = publish(dataset=source, recursive=True) eq_(set(r.path for r in res_[0]), set(opj(*([source.path] + x)) for x in ([], ['subm 1'], ['subm 2']))) # and now should carry the same state for git-annex eq_(list(origin.repo.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex"))) # test for publishing with --since. By default since no changes, nothing pushed res_ = publish(dataset=source, recursive=True) eq_(set(r.path for r in res_[0]), set()) # still nothing gets pushed, because origin is up to date res_ = publish(dataset=source, recursive=True, since='HEAD^') eq_(set(r.path for r in res_[0]), set([])) # and we should not fail if we run it from within the dataset with chpwd(source.path): res_ = publish(recursive=True, since='HEAD^') eq_(set(r.path for r in res_[0]), set([])) # Let's now update one subm with open(opj(sub2.path, "file.txt"), 'w') as f: f.write('') # add to subdataset, does not alter super dataset! # MIH: use `to_git` because original test author used # and explicit `GitRepo.add` -- keeping this for now Dataset(sub2.path).add('file.txt', to_git=True) # Let's now update one subm create_tree(sub2.path, {'file.dat': 'content'}) # add to subdataset, without reflecting the change in its super(s) Dataset(sub2.path).add('file.dat') # note: will publish to origin here since that is what it tracks res_published, res_skipped = publish(dataset=source, recursive=True) # only updates published, i.e. just the subdataset, super wasn't altered # nothing copied! eq_(res_published, [Dataset(sub2.path)]) eq_(res_skipped, []) # since published to origin -- destination should not get that file nok_(lexists(opj(sub2_target.path, 'file.dat'))) res_published, res_skipped = publish(dataset=source, to='target', recursive=True) eq_(res_published, [Dataset(sub2.path)]) # Note: with updateInstead only in target2 and not saving change in # super-dataset we would have made remote dataset, if we had entire # hierarchy, to be somewhat inconsistent. # But here, since target datasets are independent -- it is ok # and the file itself was not transferred but now exists ok_(lexists(opj(sub2_target.path, 'file.dat'))) nok_(sub2_target.file_has_content('file.dat')) # but now we can redo publish recursively, at least stating to consider # explicitly to copy . res_published, res_skipped = publish( '.', dataset=source, to='target', recursive=True ) ok_(sub2_target.file_has_content('file.dat')) eq_(res_published, ['file.dat']) # note that this report makes little sense without path to the repository
def test_publish_simple(origin, src_path, dst_path): # prepare src source = install(src_path, source=origin, recursive=True) # forget we cloned it (provide no 'origin' anymore), which should lead to # setting tracking branch to target: source.repo.remove_remote("origin") # create plain git at target: target = GitRepo(dst_path, create=True) target.checkout("TMP", ["-b"]) source.repo.add_remote("target", dst_path) res = publish(dataset=source, to="target", result_xfm='datasets') eq_(res, [source]) assert_repo_status(source.repo, annex=None) assert_repo_status(target, annex=None) eq_(list(target.get_branch_commits_("master")), list(source.repo.get_branch_commits_("master"))) # don't fail when doing it again res = publish(dataset=source, to="target") # and nothing is pushed assert_result_count(res, 1, status='notneeded') assert_repo_status(source.repo, annex=None) assert_repo_status(target, annex=None) eq_(list(target.get_branch_commits_("master")), list(source.repo.get_branch_commits_("master"))) eq_(list(target.get_branch_commits_("git-annex")), list(source.repo.get_branch_commits_("git-annex"))) # 'target/master' should be tracking branch at this point, so # try publishing without `to`: # MIH: Nope, we don't automatically add this anymore # some modification: with open(opj(src_path, 'test_mod_file'), "w") as f: f.write("Some additional stuff.") source.save(opj(src_path, 'test_mod_file'), to_git=True, message="Modified.") assert_repo_status(source.repo, annex=None) res = publish(dataset=source, to='target', result_xfm='datasets') eq_(res, [source]) assert_repo_status(dst_path, annex=None) eq_(list(target.get_branch_commits_("master")), list(source.repo.get_branch_commits_("master"))) # Since git-annex 6.20170220, post-receive hook gets triggered # which results in entry being added for that repo into uuid.log on remote # end since then finally git-annex senses that it needs to init that remote, # so it might have 1 more commit than local. # see https://github.com/datalad/datalad/issues/1319 ok_( set(source.repo.get_branch_commits_("git-annex")).issubset( set(target.get_branch_commits_("git-annex")))) eq_(filter_fsck_error_msg(source.repo.fsck()), filter_fsck_error_msg(source.repo.fsck(remote='target')))
def test_publish_with_data(origin, src_path, dst_path, sub1_pub, sub2_pub, dst_clone_path): # prepare src source = install(src_path, source=origin, recursive=True) source.repo.get('test-annex.dat') # create plain git at target: target = AnnexRepo(dst_path, create=True) target.checkout("TMP", ["-b"]) source.repo.add_remote("target", dst_path) # now, set up targets for the submodules: # the need to be annexes, because we want to be able to copy data to them # further down sub1_target = AnnexRepo(sub1_pub, create=True) sub1_target.checkout("TMP", ["-b"]) sub2_target = AnnexRepo(sub2_pub, create=True) sub2_target.checkout("TMP", ["-b"]) sub1 = GitRepo(opj(src_path, 'subm 1'), create=False) sub2 = GitRepo(opj(src_path, '2'), create=False) sub1.add_remote("target", sub1_pub) sub2.add_remote("target", sub2_pub) res = publish(dataset=source, to="target", path=['test-annex.dat'], result_xfm='paths') # first it would publish data and then push # TODO order is not fixed (yet) #eq_(res, [opj(source.path, 'test-annex.dat'), source.path]) eq_(set(res), set([opj(source.path, 'test-annex.dat'), source.path])) # XXX master was not checked out in dst! eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) # TODO: last commit in git-annex branch differs. Probably fine, # but figure out, when exactly to expect this for proper testing: # yoh: they differ because local annex records information about now # file being available in that remote, and remote one does it via a call in # the hook I guess. So they both get the same information but in two # different commits. I do not observe such behavior of remote having git-annex # automagically updated in older clones # which do not have post-receive hook on remote side eq_(list(target.get_branch_commits("git-annex"))[1:], list(source.repo.get_branch_commits("git-annex"))[1:]) # we need compare target/master: target.checkout("master") ok_(target.file_has_content('test-annex.dat')) # make sure that whatever we published is actually consumable dst_clone = install( dst_clone_path, source=dst_path, result_xfm='datasets', return_type='item-or-list') nok_(dst_clone.repo.file_has_content('test-annex.dat')) res = dst_clone.get('test-annex.dat') ok_(dst_clone.repo.file_has_content('test-annex.dat')) res = publish(dataset=source, to="target", path=['.']) # there is nothing to publish on 2nd attempt #eq_(res, ([source, 'test-annex.dat'], [])) assert_result_count(res, 1, status='notneeded') import glob res = publish(dataset=source, to="target", path=glob.glob1(source.path, '*')) # Note: This leads to recursive publishing, since expansion of '*' # contains the submodules themselves in this setup # only the subdatasets, targets are plain git repos, hence # no file content is pushed, all content in super was pushed # before assert_result_count(res, 3) assert_result_count(res, 1, status='ok', path=sub1.path) assert_result_count(res, 1, status='ok', path=sub2.path) assert_result_count(res, 1, status='notneeded', path=source.path) # if we publish again -- nothing to be published res = source.publish(to="target") assert_result_count(res, 1, status='notneeded', path=source.path) # if we drop a file and publish again -- dataset should be published # since git-annex branch was updated source.drop('test-annex.dat') res = source.publish(to="target") assert_result_count(res, 1, status='ok', path=source.path) # and empty again if we try again res = source.publish(to="target") assert_result_count(res, 1, status='notneeded', path=source.path)
def test_publish_recursive(pristine_origin, origin_path, src_path, dst_path, sub1_pub, sub2_pub): # we will be publishing back to origin, so to not alter testrepo # we will first clone it origin = install(origin_path, source=pristine_origin, recursive=True) # prepare src source = install(src_path, source=origin.path, recursive=True) # we will be trying to push into this later on, need to give permissions... origin_sub2 = Dataset(opj(origin_path, '2')) origin_sub2.config.set('receive.denyCurrentBranch', 'updateInstead', where='local') ## TODO this manual fixup is needed due to gh-1548 -- needs proper solution #os.remove(opj(origin_sub2.path, '.git')) #os.rename(opj(origin_path, '.git', 'modules', '2'), opj(origin_sub2.path, '.git')) # create plain git at target: target = GitRepo(dst_path, create=True) target.checkout("TMP", ["-b"]) source.repo.add_remote("target", dst_path) # subdatasets have no remote yet, so recursive publishing should fail: res = publish(dataset=source, to="target", recursive=True, on_failure='ignore') assert_result_count(res, 3) assert_result_count(res, 1, status='ok', type='dataset', path=source.path) assert_result_count(res, 2, status='error', message=("Unknown target sibling '%s' for publication", 'target')) # now, set up targets for the submodules: sub1_target = GitRepo(sub1_pub, create=True) sub1_target.checkout("TMP", ["-b"]) sub2_target = AnnexRepo(sub2_pub, create=True) # we will be testing presence of the file content, so let's make it progress sub2_target.config.set('receive.denyCurrentBranch', 'updateInstead', where='local') sub1 = GitRepo(opj(src_path, 'subm 1'), create=False) sub2 = GitRepo(opj(src_path, '2'), create=False) sub1.add_remote("target", sub1_pub) sub2.add_remote("target", sub2_pub) # publish recursively with swallow_logs(new_level=logging.DEBUG) as cml: res = publish(dataset=source, to="target", recursive=True) assert_not_in('forced update', cml.out, "we probably haven't merged git-annex before pushing") # testing result list # base dataset was already published above, notneeded again assert_status(('ok', 'notneeded'), res) # nothing failed assert_result_count(res, 3, type='dataset') eq_({r['path'] for r in res}, {src_path, sub1.path, sub2.path}) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) eq_(list(target.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex"))) eq_(list(sub1_target.get_branch_commits("master")), list(sub1.get_branch_commits("master"))) eq_(list(sub1_target.get_branch_commits("git-annex")), list(sub1.get_branch_commits("git-annex"))) eq_(list(sub2_target.get_branch_commits("master")), list(sub2.get_branch_commits("master"))) eq_(list(sub2_target.get_branch_commits("git-annex")), list(sub2.get_branch_commits("git-annex"))) # we are tracking origin but origin has different git-annex, since we # cloned from it, so it is not aware of our git-annex neq_(list(origin.repo.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex"))) # So if we first publish to it recursively, we would update # all sub-datasets since git-annex branch would need to be pushed res_ = publish(dataset=source, recursive=True) assert_result_count(res_, 1, status='ok', path=source.path) assert_result_count(res_, 1, status='ok', path=sub1.path) assert_result_count(res_, 1, status='ok', path=sub2.path) # and now should carry the same state for git-annex eq_(list(origin.repo.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex"))) # test for publishing with --since. By default since no changes, nothing pushed res_ = publish(dataset=source, recursive=True) assert_result_count(res_, 3, status='notneeded', type='dataset') # still nothing gets pushed, because origin is up to date res_ = publish(dataset=source, recursive=True, since='HEAD^') assert_result_count(res_, 3, status='notneeded', type='dataset') # and we should not fail if we run it from within the dataset with chpwd(source.path): res_ = publish(recursive=True, since='HEAD^') assert_result_count(res_, 3, status='notneeded', type='dataset') # Let's now update one subm with open(opj(sub2.path, "file.txt"), 'w') as f: f.write('') # add to subdataset, does not alter super dataset! # MIH: use `to_git` because original test author used # and explicit `GitRepo.add` -- keeping this for now Dataset(sub2.path).add('file.txt', to_git=True) # Let's now update one subm create_tree(sub2.path, {'file.dat': 'content'}) # add to subdataset, without reflecting the change in its super(s) Dataset(sub2.path).add('file.dat') # note: will publish to origin here since that is what it tracks res_ = publish(dataset=source, recursive=True, on_failure='ignore') ## only updates published, i.e. just the subdataset, super wasn't altered ## nothing copied! assert_status(('ok', 'notneeded'), res_) assert_result_count(res_, 1, status='ok', path=sub2.path, type='dataset') assert_result_count(res_, 0, path=opj(sub2.path, 'file.dat'), type='file') # since published to origin -- destination should not get that file nok_(lexists(opj(sub2_target.path, 'file.dat'))) res_ = publish(dataset=source, to='target', recursive=True) assert_status(('ok', 'notneeded'), res_) assert_result_count(res_, 1, status='ok', path=sub2.path, type='dataset') assert_result_count(res_, 0, path=opj(sub2.path, 'file.dat'), type='file') # Note: with updateInstead only in target2 and not saving change in # super-dataset we would have made remote dataset, if we had entire # hierarchy, to be somewhat inconsistent. # But here, since target datasets are independent -- it is ok # and the file itself was transferred ok_(lexists(opj(sub2_target.path, 'file.dat'))) nok_(sub2_target.file_has_content('file.dat')) ## but now we can redo publish recursively, with explicitly requested data transfer res_ = publish(dataset=source, to='target', recursive=True, transfer_data='all') ok_(sub2_target.file_has_content('file.dat')) assert_result_count(res_, 1, status='ok', path=opj(sub2.path, 'file.dat')) # Let's save those present changes and publish while implying "since last # merge point" source.save(message="Changes in subm2") # and test if it could deduce the remote/branch to push to source.config.set('branch.master.remote', 'target', where='local') with chpwd(source.path): res_ = publish(since='', recursive=True) # TODO: somehow test that there were no even attempt to diff within "subm 1" # since if `--since=''` worked correctly, nothing has changed there and it # should have not been even touched assert_status(('ok', 'notneeded'), res_) assert_result_count(res_, 1, status='ok', path=source.path, type='dataset')
def test_publish_recursive(origin, src_path, dst_path, sub1_pub, sub2_pub): # prepare src source = install(src_path, source=origin, recursive=True)[0] # create plain git at target: target = GitRepo(dst_path, create=True) target.checkout("TMP", ["-b"]) source.repo.add_remote("target", dst_path) # subdatasets have no remote yet, so recursive publishing should fail: with assert_raises(ValueError) as cm: publish(dataset=source, to="target", recursive=True) assert_in("No sibling 'target' found", exc_str(cm.exception)) # now, set up targets for the submodules: sub1_target = GitRepo(sub1_pub, create=True) sub1_target.checkout("TMP", ["-b"]) sub2_target = AnnexRepo(sub2_pub, create=True) sub2_target.checkout("TMP", ["-b"]) sub1 = GitRepo(opj(src_path, 'subm 1'), create=False) sub2 = GitRepo(opj(src_path, 'subm 2'), create=False) sub1.add_remote("target", sub1_pub) sub2.add_remote("target", sub2_pub) # publish recursively with swallow_logs(new_level=logging.DEBUG) as cml: res = publish(dataset=source, to="target", recursive=True) assert_not_in( 'forced update', cml.out, "we probably haven't merged git-annex before pushing" ) # testing result list # (Note: Dataset lacks __eq__ for now. Should this be based on path only?) assert_is_instance(res, tuple) assert_is_instance(res[0], list) assert_is_instance(res[1], list) eq_(res[1], []) # nothing failed/was skipped for item in res[0]: assert_is_instance(item, Dataset) eq_({res[0][0].path, res[0][1].path, res[0][2].path}, {src_path, sub1.path, sub2.path}) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) eq_(list(target.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex"))) eq_(list(sub1_target.get_branch_commits("master")), list(sub1.get_branch_commits("master"))) eq_(list(sub1_target.get_branch_commits("git-annex")), list(sub1.get_branch_commits("git-annex"))) eq_(list(sub2_target.get_branch_commits("master")), list(sub2.get_branch_commits("master"))) eq_(list(sub2_target.get_branch_commits("git-annex")), list(sub2.get_branch_commits("git-annex"))) # test for publishing with --since. By default since no changes, only current pushed res_ = publish(dataset=source, recursive=True) # only current one would get pushed eq_(set(r.path for r in res_[0]), {src_path}) # all get pushed res_ = publish(dataset=source, recursive=True, since='HEAD^') eq_(set(r.path for r in res_[0]), {src_path, sub1.path, sub2.path}) # Let's now update one subm with open(opj(sub2.path, "file.txt"), 'w') as f: f.write('') sub2.add('file.txt') sub2.commit("") # TODO: Doesn't work: https://github.com/datalad/datalad/issues/636 #source.save("changed sub2", all_changes=True) source.repo.commit("", options=['-a']) res_ = publish(dataset=source, recursive=True) # only updated ones were published eq_(set(r.path for r in res_[0]), {src_path, sub2.path})
def test_target_ssh_simple(origin, src_path, target_rootpath): # prepare src source = install(src_path, source=origin) target_path = opj(target_rootpath, "basic") # it will try to fetch it so would fail as well since sshurl is wrong with swallow_logs(new_level=logging.ERROR) as cml, \ assert_raises(GitCommandError): create_sibling( dataset=source, target="local_target", sshurl="ssh://localhost", target_dir=target_path, ui=True) # is not actually happening on one of the two basic cases -- TODO figure it out # assert_in('enableremote local_target failed', cml.out) GitRepo(target_path, create=False) # raises if not a git repo assert_in("local_target", source.repo.get_remotes()) eq_("ssh://localhost", source.repo.get_remote_url("local_target")) # should NOT be able to push now, since url isn't correct: # TODO: assumption is wrong if ~ does have .git! fix up! assert_raises(GitCommandError, publish, dataset=source, to="local_target") # Both must be annex or git repositories src_is_annex = AnnexRepo.is_valid_repo(src_path) eq_(src_is_annex, AnnexRepo.is_valid_repo(target_path)) # And target one should be known to have a known UUID within the source if annex if src_is_annex: annex = AnnexRepo(src_path) local_target_cfg = annex.repo.remotes["local_target"].config_reader.get # for some reason this was "correct" # eq_(local_target_cfg('annex-ignore'), 'false') # but after fixing creating siblings in # 21f6dd012b2c7b9c0b8b348dcfb3b0ace7e8b2ec it started to fail # I think it is legit since we are trying to fetch now before calling # annex.enable_remote so it doesn't set it up, and fails before assert_raises(Exception, local_target_cfg, 'annex-ignore') # hm, but ATM wouldn't get a uuid since url is wrong assert_raises(Exception, local_target_cfg, 'annex-uuid') # do it again without force: with assert_raises(RuntimeError) as cm: assert_create_sshwebserver( dataset=source, target="local_target", sshurl="ssh://localhost", target_dir=target_path) eq_("Target directory %s already exists." % target_path, str(cm.exception)) if src_is_annex: target_description = AnnexRepo(target_path, create=False).get_description() assert_not_equal(target_description, None) assert_not_equal(target_description, target_path) ok_endswith(target_description, target_path) # now, with force and correct url, which is also used to determine # target_dir # Note: on windows absolute path is not url conform. But this way it's easy # to test, that ssh path is correctly used. if not on_windows: # add random file under target_path, to explicitly test existing=replace open(opj(target_path, 'random'), 'w').write('123') assert_create_sshwebserver( dataset=source, target="local_target", sshurl="ssh://localhost" + target_path, existing='replace') eq_("ssh://localhost" + target_path, source.repo.get_remote_url("local_target")) ok_(source.repo.get_remote_url("local_target", push=True) is None) # ensure target tree actually replaced by source assert_false(exists(opj(target_path, 'random'))) if src_is_annex: annex = AnnexRepo(src_path) local_target_cfg = annex.repo.remotes["local_target"].config_reader.get eq_(local_target_cfg('annex-ignore'), 'false') eq_(local_target_cfg('annex-uuid').count('-'), 4) # valid uuid # again, by explicitly passing urls. Since we are on localhost, the # local path should work: cpkwargs = dict( dataset=source, target="local_target", sshurl="ssh://localhost", target_dir=target_path, target_url=target_path, target_pushurl="ssh://localhost" + target_path, ui=True, ) assert_create_sshwebserver(existing='replace', **cpkwargs) if src_is_annex: target_description = AnnexRepo(target_path, create=False).get_description() eq_(target_description, target_path) eq_(target_path, source.repo.get_remote_url("local_target")) eq_("ssh://localhost" + target_path, source.repo.get_remote_url("local_target", push=True)) _test_correct_publish(target_path) # now, push should work: publish(dataset=source, to="local_target") # and we should be able to 'reconfigure' def process_digests_mtimes(digests, mtimes): # it should have triggered a hook, which would have created log and metadata files check_metadata = False for part in 'logs', 'metadata': metafiles = [k for k in digests if k.startswith(_path_('.git/datalad/%s/' % part))] # This is in effect ONLY if we have "compatible" datalad installed on remote # end. ATM we don't have easy way to guarantee that AFAIK (yoh), # so let's not check/enforce (TODO) # assert(len(metafiles) >= 1) # we might have 2 logs if timestamps do not collide ;) # Let's actually do it to some degree if part == 'logs': # always should have those: assert (len(metafiles) >= 1) with open(opj(target_path, metafiles[0])) as f: if 'no datalad found' not in f.read(): check_metadata = True if part == 'metadata': eq_(len(metafiles), bool(check_metadata)) for f in metafiles: digests.pop(f) mtimes.pop(f) # and just pop some leftovers from annex for f in list(digests): if f.startswith('.git/annex/mergedrefs'): digests.pop(f) mtimes.pop(f) orig_digests, orig_mtimes = get_mtimes_and_digests(target_path) process_digests_mtimes(orig_digests, orig_mtimes) import time; time.sleep(0.1) # just so that mtimes change assert_create_sshwebserver(existing='reconfigure', **cpkwargs) digests, mtimes = get_mtimes_and_digests(target_path) process_digests_mtimes(digests, mtimes) assert_dict_equal(orig_digests, digests) # nothing should change in terms of content # but some files should have been modified modified_files = {k for k in mtimes if orig_mtimes.get(k, 0) != mtimes.get(k, 0)} # collect which files were expected to be modified without incurring any changes ok_modified_files = { _path_('.git/hooks/post-update'), 'index.html', # files which hook would manage to generate _path_('.git/info/refs'), '.git/objects/info/packs' } if external_versions['cmd:system-git'] >= '2.4': # on elderly git we don't change receive setting ok_modified_files.add(_path_('.git/config')) ok_modified_files.update({f for f in digests if f.startswith(_path_('.git/datalad/web'))}) assert_set_equal(modified_files, ok_modified_files)
def test_publish_with_data(origin, src_path, dst_path, sub1_pub, sub2_pub, dst_clone_path): # prepare src source = install(src_path, source=origin, recursive=True) source.repo.get('test-annex.dat') # create plain git at target: target = AnnexRepo(dst_path, create=True) target.checkout("TMP", ["-b"]) source.repo.add_remote("target", dst_path) # now, set up targets for the submodules: # the need to be annexes, because we want to be able to copy data to them # further down sub1_target = AnnexRepo(sub1_pub, create=True) sub1_target.checkout("TMP", ["-b"]) sub2_target = AnnexRepo(sub2_pub, create=True) sub2_target.checkout("TMP", ["-b"]) sub1 = GitRepo(opj(src_path, 'subm 1'), create=False) sub2 = GitRepo(opj(src_path, '2'), create=False) sub1.add_remote("target", sub1_pub) sub2.add_remote("target", sub2_pub) res = publish(dataset=source, to="target", path=['test-annex.dat'], result_xfm='paths') # first it would publish data and then push # TODO order is not fixed (yet) #eq_(res, [opj(source.path, 'test-annex.dat'), source.path]) eq_(set(res), set([opj(source.path, 'test-annex.dat'), source.path])) # XXX master was not checked out in dst! eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) # TODO: last commit in git-annex branch differs. Probably fine, # but figure out, when exactly to expect this for proper testing: # yoh: they differ because local annex records information about now # file being available in that remote, and remote one does it via a call in # the hook I guess. So they both get the same information but in two # different commits. I do not observe such behavior of remote having git-annex # automagically updated in older clones # which do not have post-receive hook on remote side eq_( list(target.get_branch_commits("git-annex"))[1:], list(source.repo.get_branch_commits("git-annex"))[1:]) # we need compare target/master: target.checkout("master") ok_(target.file_has_content('test-annex.dat')) # make sure that whatever we published is actually consumable dst_clone = install(dst_clone_path, source=dst_path, result_xfm='datasets', return_type='item-or-list') nok_(dst_clone.repo.file_has_content('test-annex.dat')) res = dst_clone.get('test-annex.dat') ok_(dst_clone.repo.file_has_content('test-annex.dat')) res = publish(dataset=source, to="target", path=['.']) # there is nothing to publish on 2nd attempt #eq_(res, ([source, 'test-annex.dat'], [])) assert_result_count(res, 1, status='notneeded') import glob res = publish(dataset=source, to="target", path=glob.glob1(source.path, '*')) # Note: This leads to recursive publishing, since expansion of '*' # contains the submodules themselves in this setup # only the subdatasets, targets are plain git repos, hence # no file content is pushed, all content in super was pushed # before assert_result_count(res, 3) assert_result_count(res, 1, status='ok', path=sub1.path) assert_result_count(res, 1, status='ok', path=sub2.path) assert_result_count(res, 1, status='notneeded', path=source.path) # if we publish again -- nothing to be published res = source.publish(to="target") assert_result_count(res, 1, status='notneeded', path=source.path) # if we drop a file and publish again -- dataset should be published # since git-annex branch was updated source.drop('test-annex.dat') res = source.publish(to="target") assert_result_count(res, 1, status='ok', path=source.path) # and empty again if we try again res = source.publish(to="target") assert_result_count(res, 1, status='notneeded', path=source.path)
def test_target_ssh_simple(origin, src_path, target_rootpath): # prepare src source = install( src_path, source=origin, result_xfm='datasets', return_type='item-or-list') target_path = opj(target_rootpath, "basic") with swallow_logs(new_level=logging.ERROR) as cml: create_sibling( dataset=source, name="local_target", sshurl="ssh://localhost", target_dir=target_path, ui=True) assert_not_in('enableremote local_target failed', cml.out) GitRepo(target_path, create=False) # raises if not a git repo assert_in("local_target", source.repo.get_remotes()) # Both must be annex or git repositories src_is_annex = AnnexRepo.is_valid_repo(src_path) eq_(src_is_annex, AnnexRepo.is_valid_repo(target_path)) # And target one should be known to have a known UUID within the source if annex if src_is_annex: annex = AnnexRepo(src_path) local_target_cfg = annex.repo.remotes["local_target"].config_reader.get # basic config in place eq_(local_target_cfg('annex-ignore'), 'false') ok_(local_target_cfg('annex-uuid')) # do it again without force, but use a different name to avoid initial checks # for existing remotes: with assert_raises(RuntimeError) as cm: assert_create_sshwebserver( dataset=source, name="local_target_alt", sshurl="ssh://localhost", target_dir=target_path) ok_(text_type(cm.exception).startswith( "Target path %s already exists. And it fails to rmdir" % target_path)) if src_is_annex: target_description = AnnexRepo(target_path, create=False).get_description() assert_not_equal(target_description, None) assert_not_equal(target_description, target_path) # on yoh's laptop TMPDIR is under HOME, so things start to become # tricky since then target_path is shortened and we would need to know # remote $HOME. To not over-complicate and still test, test only for # the basename of the target_path ok_endswith(target_description, basename(target_path)) # now, with force and correct url, which is also used to determine # target_dir # Note: on windows absolute path is not url conform. But this way it's easy # to test, that ssh path is correctly used. if not on_windows: # add random file under target_path, to explicitly test existing=replace open(opj(target_path, 'random'), 'w').write('123') assert_create_sshwebserver( dataset=source, name="local_target", sshurl="ssh://localhost" + target_path, publish_by_default='master', existing='replace') eq_("ssh://localhost" + urlquote(target_path), source.repo.get_remote_url("local_target")) ok_(source.repo.get_remote_url("local_target", push=True) is None) # ensure target tree actually replaced by source assert_false(exists(opj(target_path, 'random'))) if src_is_annex: annex = AnnexRepo(src_path) local_target_cfg = annex.repo.remotes["local_target"].config_reader.get eq_(local_target_cfg('annex-ignore'), 'false') eq_(local_target_cfg('annex-uuid').count('-'), 4) # valid uuid # should be added too, even if URL matches prior state eq_(local_target_cfg('push'), 'master') # again, by explicitly passing urls. Since we are on localhost, the # local path should work: cpkwargs = dict( dataset=source, name="local_target", sshurl="ssh://localhost", target_dir=target_path, target_url=target_path, target_pushurl="ssh://localhost" + target_path, ui=True, ) assert_create_sshwebserver(existing='replace', **cpkwargs) if src_is_annex: target_description = AnnexRepo(target_path, create=False).get_description() eq_(target_description, target_path) eq_(target_path, source.repo.get_remote_url("local_target")) eq_("ssh://localhost" + target_path, source.repo.get_remote_url("local_target", push=True)) _test_correct_publish(target_path) # now, push should work: publish(dataset=source, to="local_target") # and we should be able to 'reconfigure' def process_digests_mtimes(digests, mtimes): # it should have triggered a hook, which would have created log and metadata files check_metadata = False for part in 'logs', 'metadata': metafiles = [k for k in digests if k.startswith(_path_('.git/datalad/%s/' % part))] # This is in effect ONLY if we have "compatible" datalad installed on remote # end. ATM we don't have easy way to guarantee that AFAIK (yoh), # so let's not check/enforce (TODO) # assert(len(metafiles) >= 1) # we might have 2 logs if timestamps do not collide ;) # Let's actually do it to some degree if part == 'logs': # always should have those: assert (len(metafiles) >= 1) with open(opj(target_path, metafiles[0])) as f: if 'no datalad found' not in f.read(): check_metadata = True if part == 'metadata': eq_(len(metafiles), bool(check_metadata)) for f in metafiles: digests.pop(f) mtimes.pop(f) # and just pop some leftovers from annex for f in list(digests): if f.startswith('.git/annex/mergedrefs'): digests.pop(f) mtimes.pop(f) orig_digests, orig_mtimes = get_mtimes_and_digests(target_path) process_digests_mtimes(orig_digests, orig_mtimes) import time time.sleep(0.1) # just so that mtimes change assert_create_sshwebserver(existing='reconfigure', **cpkwargs) digests, mtimes = get_mtimes_and_digests(target_path) process_digests_mtimes(digests, mtimes) assert_dict_equal(orig_digests, digests) # nothing should change in terms of content # but some files should have been modified modified_files = {k for k in mtimes if orig_mtimes.get(k, 0) != mtimes.get(k, 0)} # collect which files were expected to be modified without incurring any changes ok_modified_files = { _path_('.git/hooks/post-update'), 'index.html', # files which hook would manage to generate _path_('.git/info/refs'), '.git/objects/info/packs' } # on elderly git we don't change receive setting ok_modified_files.add(_path_('.git/config')) ok_modified_files.update({f for f in digests if f.startswith(_path_('.git/datalad/web'))}) # it seems that with some recent git behavior has changed a bit # and index might get touched if _path_('.git/index') in modified_files: ok_modified_files.add(_path_('.git/index')) assert_set_equal(modified_files, ok_modified_files)
def test_publish_with_data(origin, src_path, dst_path, sub1_pub, sub2_pub, dst_clone_path): # prepare src source = install(src_path, source=origin, recursive=True) source.repo.get('test-annex.dat') # create plain git at target: target = AnnexRepo(dst_path, create=True) target.checkout("TMP", ["-b"]) source.repo.add_remote("target", dst_path) # now, set up targets for the submodules: sub1_target = GitRepo(sub1_pub, create=True) sub1_target.checkout("TMP", ["-b"]) sub2_target = GitRepo(sub2_pub, create=True) sub2_target.checkout("TMP", ["-b"]) sub1 = GitRepo(opj(src_path, 'subm 1'), create=False) sub2 = GitRepo(opj(src_path, 'subm 2'), create=False) sub1.add_remote("target", sub1_pub) sub2.add_remote("target", sub2_pub) # TMP: Insert the fetch to prevent GitPython to fail after the push, # because it cannot resolve the SHA of the old commit of the remote, # that git reports back after the push. # TODO: Figure out, when to fetch things in general; Alternatively: # Is there an option for push, that prevents GitPython from failing? source.repo.fetch("target") res = publish(dataset=source, to="target", path=['test-annex.dat']) # first it would publish data and then push eq_(res, (['test-annex.dat', source], [])) # XXX master was not checked out in dst! eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) # TODO: last commit in git-annex branch differs. Probably fine, # but figure out, when exactly to expect this for proper testing: # yoh: they differ because local annex records information about now # file being available in that remote, and remote one does it via a call in # the hook I guess. So they both get the same information but in two # different commits. I do not observe such behavior of remote having git-annex # automagically updated in older clones # which do not have post-receive hook on remote side eq_(list(target.get_branch_commits("git-annex"))[1:], list(source.repo.get_branch_commits("git-annex"))[1:]) # we need compare target/master: target.checkout("master") ok_(target.file_has_content('test-annex.dat')) # make sure that whatever we published is actually consumable dst_clone = install( dst_clone_path, source=dst_path, result_xfm='datasets', return_type='item-or-list') nok_(dst_clone.repo.file_has_content('test-annex.dat')) res = dst_clone.get('test-annex.dat') ok_(dst_clone.repo.file_has_content('test-annex.dat')) source.repo.fetch("target") res = publish(dataset=source, to="target", path=['.']) # there is nothing to publish on 2nd attempt #eq_(res, ([source, 'test-annex.dat'], [])) eq_(res, ([], [])) source.repo.fetch("target") import glob res = publish(dataset=source, to="target", path=glob.glob1(source.path, '*')) # Note: This leads to recursive publishing, since expansion of '*' # contains the submodules themselves in this setup # collect result paths: result_paths = [] for item in res[0]: result_paths.append(item.path if isinstance(item, Dataset) else item) # only the subdatasets, targets are plain git repos, hence # no file content is pushed, all content in super was pushed # before eq_({sub1.path, sub2.path}, set(result_paths)) # if we publish again -- nothing to be published eq_(source.publish(to="target"), ([], [])) # if we drop a file and publish again -- dataset should be published # since git-annex branch was updated source.drop('test-annex.dat') eq_(source.publish(to="target"), ([source], [])) eq_(source.publish(to="target"), ([], [])) # and empty again if we try again
def test_target_ssh_recursive(origin, src_path, target_path): # prepare src source = install(src_path, source=origin, recursive=True) sub1 = Dataset(opj(src_path, "subm 1")) sub2 = Dataset(opj(src_path, "2")) for flat in False, True: target_path_ = target_dir_tpl = target_path + "-" + str(flat) if flat: target_dir_tpl += "/prefix%RELNAME" sep = '-' else: sep = os.path.sep remote_name = 'remote-' + str(flat) with chpwd(source.path): assert_create_sshwebserver( name=remote_name, sshurl="ssh://localhost" + target_path_, target_dir=target_dir_tpl, recursive=True, ui=True) # raise if git repos were not created for suffix in [sep + 'subm 1', sep + '2', '']: target_dir = opj(target_path_, 'prefix' if flat else "").rstrip(os.path.sep) + suffix # raise if git repos were not created GitRepo(target_dir, create=False) _test_correct_publish(target_dir, rootds=not suffix, flat=flat) for repo in [source.repo, sub1.repo, sub2.repo]: assert_not_in("local_target", repo.get_remotes()) # now, push should work: publish(dataset=source, to=remote_name) # verify that we can create-sibling which was created later and possibly # first published in super-dataset as an empty directory sub3_name = 'subm 3-%s' % flat sub3 = source.create(sub3_name) # since is an empty value to force it to consider all changes since we published # already with chpwd(source.path): # as we discussed in gh-1495 we use the last-published state of the base # dataset as the indicator for modification detection with since='' # hence we must not publish the base dataset on its own without recursion, # if we want to have this mechanism do its job #publish(to=remote_name) # no recursion assert_create_sshwebserver( name=remote_name, sshurl="ssh://localhost" + target_path_, target_dir=target_dir_tpl, recursive=True, existing='skip', ui=True, since='' ) # so it was created on remote correctly and wasn't just skipped assert(Dataset(_path_(target_path_, ('prefix-' if flat else '') + sub3_name)).is_installed()) publish(dataset=source, to=remote_name, recursive=True, since='') # just a smoke test
def test_target_ssh_simple(origin, src_path, target_rootpath): # prepare src source = install(src_path, source=origin, result_xfm='datasets', return_type='item-or-list') target_path = opj(target_rootpath, "basic") with swallow_logs(new_level=logging.ERROR) as cml: create_sibling(dataset=source, name="local_target", sshurl="ssh://localhost:22", target_dir=target_path, ui=True) assert_not_in('enableremote local_target failed', cml.out) GitRepo(target_path, create=False) # raises if not a git repo assert_in("local_target", source.repo.get_remotes()) # Both must be annex or git repositories src_is_annex = AnnexRepo.is_valid_repo(src_path) eq_(src_is_annex, AnnexRepo.is_valid_repo(target_path)) # And target one should be known to have a known UUID within the source if annex if src_is_annex: annex = AnnexRepo(src_path) local_target_cfg = annex.repo.remotes["local_target"].config_reader.get # basic config in place eq_(local_target_cfg('annex-ignore'), 'false') ok_(local_target_cfg('annex-uuid')) # do it again without force, but use a different name to avoid initial checks # for existing remotes: with assert_raises(RuntimeError) as cm: assert_create_sshwebserver(dataset=source, name="local_target_alt", sshurl="ssh://localhost", target_dir=target_path) ok_( text_type(cm.exception).startswith( "Target path %s already exists. And it fails to rmdir" % target_path)) if src_is_annex: target_description = AnnexRepo(target_path, create=False).get_description() assert_not_equal(target_description, None) assert_not_equal(target_description, target_path) # on yoh's laptop TMPDIR is under HOME, so things start to become # tricky since then target_path is shortened and we would need to know # remote $HOME. To not over-complicate and still test, test only for # the basename of the target_path ok_endswith(target_description, basename(target_path)) # now, with force and correct url, which is also used to determine # target_dir # Note: on windows absolute path is not url conform. But this way it's easy # to test, that ssh path is correctly used. if not on_windows: # add random file under target_path, to explicitly test existing=replace open(opj(target_path, 'random'), 'w').write('123') assert_create_sshwebserver(dataset=source, name="local_target", sshurl="ssh://localhost" + target_path, publish_by_default='master', existing='replace') eq_("ssh://localhost" + urlquote(target_path), source.repo.get_remote_url("local_target")) ok_(source.repo.get_remote_url("local_target", push=True) is None) # ensure target tree actually replaced by source assert_false(exists(opj(target_path, 'random'))) if src_is_annex: annex = AnnexRepo(src_path) local_target_cfg = annex.repo.remotes[ "local_target"].config_reader.get eq_(local_target_cfg('annex-ignore'), 'false') eq_(local_target_cfg('annex-uuid').count('-'), 4) # valid uuid # should be added too, even if URL matches prior state eq_(local_target_cfg('push'), 'master') # again, by explicitly passing urls. Since we are on localhost, the # local path should work: cpkwargs = dict( dataset=source, name="local_target", sshurl="ssh://localhost", target_dir=target_path, target_url=target_path, target_pushurl="ssh://localhost" + target_path, ui=True, ) assert_create_sshwebserver(existing='replace', **cpkwargs) if src_is_annex: target_description = AnnexRepo(target_path, create=False).get_description() eq_(target_description, target_path) eq_(target_path, source.repo.get_remote_url("local_target")) eq_("ssh://localhost" + target_path, source.repo.get_remote_url("local_target", push=True)) _test_correct_publish(target_path) # now, push should work: publish(dataset=source, to="local_target") # and we should be able to 'reconfigure' def process_digests_mtimes(digests, mtimes): # it should have triggered a hook, which would have created log and metadata files check_metadata = False for part in 'logs', 'metadata': metafiles = [ k for k in digests if k.startswith(_path_('.git/datalad/%s/' % part)) ] # This is in effect ONLY if we have "compatible" datalad installed on remote # end. ATM we don't have easy way to guarantee that AFAIK (yoh), # so let's not check/enforce (TODO) # assert(len(metafiles) >= 1) # we might have 2 logs if timestamps do not collide ;) # Let's actually do it to some degree if part == 'logs': # always should have those: assert (len(metafiles) >= 1) with open(opj(target_path, metafiles[0])) as f: if 'no datalad found' not in f.read(): check_metadata = True if part == 'metadata': eq_(len(metafiles), bool(check_metadata)) for f in metafiles: digests.pop(f) mtimes.pop(f) # and just pop some leftovers from annex for f in list(digests): if f.startswith('.git/annex/mergedrefs'): digests.pop(f) mtimes.pop(f) orig_digests, orig_mtimes = get_mtimes_and_digests(target_path) process_digests_mtimes(orig_digests, orig_mtimes) import time time.sleep(0.1) # just so that mtimes change assert_create_sshwebserver(existing='reconfigure', **cpkwargs) digests, mtimes = get_mtimes_and_digests(target_path) process_digests_mtimes(digests, mtimes) assert_dict_equal(orig_digests, digests) # nothing should change in terms of content # but some files should have been modified modified_files = { k for k in mtimes if orig_mtimes.get(k, 0) != mtimes.get(k, 0) } # collect which files were expected to be modified without incurring any changes ok_modified_files = { _path_('.git/hooks/post-update'), 'index.html', # files which hook would manage to generate _path_('.git/info/refs'), '.git/objects/info/packs' } # on elderly git we don't change receive setting ok_modified_files.add(_path_('.git/config')) ok_modified_files.update( {f for f in digests if f.startswith(_path_('.git/datalad/web'))}) # it seems that with some recent git behavior has changed a bit # and index might get touched if _path_('.git/index') in modified_files: ok_modified_files.add(_path_('.git/index')) assert_set_equal(modified_files, ok_modified_files)
def test_publish_with_data(origin, src_path, dst_path, sub1_pub, sub2_pub, dst_clone_path): # prepare src source = install(src_path, source=origin, recursive=True) source.repo.get('test-annex.dat') # create plain git at target: target = AnnexRepo(dst_path, create=True) target.checkout("TMP", ["-b"]) source.repo.add_remote("target", dst_path) # now, set up targets for the submodules: # the need to be annexes, because we want to be able to copy data to them # further down sub1_target = AnnexRepo(sub1_pub, create=True) sub1_target.checkout("TMP", ["-b"]) sub2_target = AnnexRepo(sub2_pub, create=True) sub2_target.checkout("TMP", ["-b"]) sub1 = GitRepo(opj(src_path, 'subm 1'), create=False) sub2 = GitRepo(opj(src_path, '2'), create=False) sub1.add_remote("target", sub1_pub) sub2.add_remote("target", sub2_pub) res = publish(dataset=source, to="target", path=['test-annex.dat'], result_xfm='paths') # first it would publish data and then push # TODO order is not fixed (yet) #eq_(res, [opj(source.path, 'test-annex.dat'), source.path]) eq_(set(res), set([opj(source.path, 'test-annex.dat'), source.path])) # XXX master was not checked out in dst! eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)), list(source.repo.get_branch_commits_(DEFAULT_BRANCH))) assert_git_annex_branch_published(source.repo, target) # we need compare target/<default branch>: target.checkout(DEFAULT_BRANCH) ok_(target.file_has_content('test-annex.dat')) # make sure that whatever we published is actually consumable dst_clone = install(dst_clone_path, source=dst_path, result_xfm='datasets', return_type='item-or-list') nok_(dst_clone.repo.file_has_content('test-annex.dat')) res = dst_clone.get('test-annex.dat') ok_(dst_clone.repo.file_has_content('test-annex.dat')) res = publish(dataset=source, to="target", path=['.']) # there is nothing to publish on 2nd attempt #eq_(res, ([source, 'test-annex.dat'], [])) assert_result_count(res, 1, status='notneeded') import glob res = publish(dataset=source, to="target", path=glob.glob1(source.path, '*')) # Note: This leads to recursive publishing, since expansion of '*' # contains the submodules themselves in this setup # only the subdatasets, targets are plain git repos, hence # no file content is pushed, all content in super was pushed # before assert_result_count(res, 3) assert_result_count(res, 1, status='ok', path=sub1.path) assert_result_count(res, 1, status='ok', path=sub2.path) assert_result_count(res, 1, status='notneeded', path=source.path) # if we publish again -- nothing to be published res = source.publish(to="target") assert_result_count(res, 1, status='notneeded', path=source.path) # if we drop a file and publish again -- dataset should be published # since git-annex branch was updated source.drop('test-annex.dat') res = source.publish(to="target") assert_result_count(res, 1, status='ok', path=source.path) # and empty again if we try again res = source.publish(to="target") assert_result_count(res, 1, status='notneeded', path=source.path) # data integrity check looks identical from all perspectives # minus "note" statements from git-annex eq_(filter_fsck_error_msg(source.repo.fsck()), filter_fsck_error_msg(source.repo.fsck(remote='target'))) eq_(filter_fsck_error_msg(target.fsck()), filter_fsck_error_msg(source.repo.fsck(remote='target')))
def test_publish_with_data(origin, src_path, dst_path, sub1_pub, sub2_pub): # prepare src source = install(src_path, source=origin, recursive=True)[0] source.repo.get('test-annex.dat') # create plain git at target: target = AnnexRepo(dst_path, create=True) target.checkout("TMP", ["-b"]) source.repo.add_remote("target", dst_path) # now, set up targets for the submodules: sub1_target = GitRepo(sub1_pub, create=True) sub1_target.checkout("TMP", ["-b"]) sub2_target = GitRepo(sub2_pub, create=True) sub2_target.checkout("TMP", ["-b"]) sub1 = GitRepo(opj(src_path, 'subm 1'), create=False) sub2 = GitRepo(opj(src_path, 'subm 2'), create=False) sub1.add_remote("target", sub1_pub) sub2.add_remote("target", sub2_pub) # TMP: Insert the fetch to prevent GitPython to fail after the push, # because it cannot resolve the SHA of the old commit of the remote, # that git reports back after the push. # TODO: Figure out, when to fetch things in general; Alternatively: # Is there an option for push, that prevents GitPython from failing? source.repo.fetch("target") res = publish(dataset=source, to="target", path=['test-annex.dat']) eq_(res, ([source, 'test-annex.dat'], [])) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) # TODO: last commit in git-annex branch differs. Probably fine, # but figure out, when exactly to expect this for proper testing: eq_( list(target.get_branch_commits("git-annex"))[1:], list(source.repo.get_branch_commits("git-annex"))[1:]) # we need compare target/master: target.checkout("master") eq_(target.file_has_content(['test-annex.dat']), [True]) source.repo.fetch("target") res = publish(dataset=source, to="target", path=['.']) eq_(res, ([source, 'test-annex.dat'], [])) source.repo.fetch("target") import glob res = publish(dataset=source, to="target", path=glob.glob1(source.path, '*')) # Note: This leads to recursive publishing, since expansion of '*' # contains the submodules themselves in this setup # collect result paths: result_paths = [] for item in res[0]: if isinstance(item, Dataset): result_paths.append(item.path) else: result_paths.append(item) eq_( { source.path, opj(source.path, "subm 1"), opj(source.path, "subm 2"), 'test-annex.dat' }, set(result_paths))
def test_publish_with_data(origin, src_path, dst_path, sub1_pub, sub2_pub): # prepare src source = install(src_path, source=origin, recursive=True)[0] source.repo.get('test-annex.dat') # create plain git at target: target = AnnexRepo(dst_path, create=True) target.checkout("TMP", ["-b"]) source.repo.add_remote("target", dst_path) # now, set up targets for the submodules: sub1_target = GitRepo(sub1_pub, create=True) sub1_target.checkout("TMP", ["-b"]) sub2_target = GitRepo(sub2_pub, create=True) sub2_target.checkout("TMP", ["-b"]) sub1 = GitRepo(opj(src_path, 'subm 1'), create=False) sub2 = GitRepo(opj(src_path, 'subm 2'), create=False) sub1.add_remote("target", sub1_pub) sub2.add_remote("target", sub2_pub) # TMP: Insert the fetch to prevent GitPython to fail after the push, # because it cannot resolve the SHA of the old commit of the remote, # that git reports back after the push. # TODO: Figure out, when to fetch things in general; Alternatively: # Is there an option for push, that prevents GitPython from failing? source.repo.fetch("target") res = publish(dataset=source, to="target", path=['test-annex.dat']) eq_(res, ([source, 'test-annex.dat'], [])) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) # TODO: last commit in git-annex branch differs. Probably fine, # but figure out, when exactly to expect this for proper testing: eq_(list(target.get_branch_commits("git-annex"))[1:], list(source.repo.get_branch_commits("git-annex"))[1:]) # we need compare target/master: target.checkout("master") eq_(target.file_has_content(['test-annex.dat']), [True]) source.repo.fetch("target") res = publish(dataset=source, to="target", path=['.']) eq_(res, ([source, 'test-annex.dat'], [])) source.repo.fetch("target") import glob res = publish(dataset=source, to="target", path=glob.glob1(source.path, '*')) # Note: This leads to recursive publishing, since expansion of '*' # contains the submodules themselves in this setup # collect result paths: result_paths = [] for item in res[0]: if isinstance(item, Dataset): result_paths.append(item.path) else: result_paths.append(item) eq_({source.path, opj(source.path, "subm 1"), opj(source.path, "subm 2"), 'test-annex.dat'}, set(result_paths))