def test_fake_dates(path): gr = GitRepo(path, create=True, fake_dates=True) gr.add("foo") gr.commit("commit foo") seconds_initial = gr.config.obtain("datalad.fake-dates-start") # First commit is incremented by 1 second. eq_(seconds_initial + 1, gr.get_commit_date()) # The second commit by 2. gr.add("bar") gr.commit("commit bar") eq_(seconds_initial + 2, gr.get_commit_date()) # If we checkout another branch, its time is still based on the latest # timestamp in any local branch. gr.checkout("other", options=["--orphan"]) with open(op.join(path, "baz"), "w") as ofh: ofh.write("baz content") gr.add("baz") gr.commit("commit baz") eq_(gr.get_active_branch(), "other") eq_(seconds_initial + 3, gr.get_commit_date())
def test_GitRepo_ssh_pull(remote_path, repo_path): from datalad import ssh_manager remote_repo = GitRepo(remote_path, create=True) url = "ssh://localhost" + op.abspath(remote_path) socket_path = op.join(ssh_manager.socket_dir, get_connection_hash('localhost')) repo = GitRepo(repo_path, create=True) repo.add_remote("ssh-remote", url) # modify remote: remote_repo.checkout("ssh-test", ['-b']) with open(op.join(remote_repo.path, "ssh_testfile.dat"), "w") as f: f.write("whatever") remote_repo.add("ssh_testfile.dat") remote_repo.commit("ssh_testfile.dat added.") # file is not locally known yet: assert_not_in("ssh_testfile.dat", repo.get_indexed_files()) # pull changes: repo.pull(remote="ssh-remote", refspec=remote_repo.get_active_branch()) ok_clean_git(repo.path, annex=False) # the connection is known to the SSH manager, since fetch() requested it: assert_in(socket_path, ssh_manager._connections) # and socket was created: ok_(op.exists(socket_path)) # we actually pulled the changes assert_in("ssh_testfile.dat", repo.get_indexed_files())
def test_GitRepo_get_merge_base(src): repo = GitRepo(src, create=True) with open(op.join(src, 'file.txt'), 'w') as f: f.write('load') repo.add('*') repo.commit('committing') assert_raises(ValueError, repo.get_merge_base, []) branch1 = repo.get_active_branch() branch1_hexsha = repo.get_hexsha() eq_(len(branch1_hexsha), 40) eq_(repo.get_merge_base(branch1), branch1_hexsha) # Let's create a detached branch branch2 = "_detach_" repo.checkout(branch2, options=["--orphan"]) # it will have all the files # Must not do: https://github.com/gitpython-developers/GitPython/issues/375 # repo.git_add('.') repo.add('*') # NOTE: fun part is that we should have at least a different commit message # so it results in a different checksum ;) repo.commit("committing again") assert(repo.get_indexed_files()) # we did commit assert(repo.get_merge_base(branch1) is None) assert(repo.get_merge_base([branch2, branch1]) is None) # Let's merge them up -- then merge base should match the master repo.merge(branch1, allow_unrelated=True) eq_(repo.get_merge_base(branch1), branch1_hexsha) # if points to some empty/non-existing branch - should also be None assert(repo.get_merge_base(['nonexistent', branch2]) is None)
def test_GitRepo_get_merge_base(src): repo = GitRepo(src, create=True) with open(op.join(src, 'file.txt'), 'w') as f: f.write('load') repo.add('*') repo.commit('committing') assert_raises(ValueError, repo.get_merge_base, []) branch1 = repo.get_active_branch() branch1_hexsha = repo.get_hexsha() eq_(len(branch1_hexsha), 40) eq_(repo.get_merge_base(branch1), branch1_hexsha) # Let's create a detached branch branch2 = "_detach_" repo.checkout(branch2, options=["--orphan"]) # it will have all the files # Must not do: https://github.com/gitpython-developers/GitPython/issues/375 # repo.git_add('.') repo.add('*') # NOTE: fun part is that we should have at least a different commit message # so it results in a different checksum ;) repo.commit("committing again") assert (repo.get_indexed_files()) # we did commit assert (repo.get_merge_base(branch1) is None) assert (repo.get_merge_base([branch2, branch1]) is None) # Let's merge them up -- then merge base should match the master repo.merge(branch1, allow_unrelated=True) eq_(repo.get_merge_base(branch1), branch1_hexsha) # if points to some empty/non-existing branch - should also be None assert (repo.get_merge_base(['nonexistent', branch2]) is None)
def test_publish_simple(origin, src_path, dst_path): # prepare src source = install(src_path, source=origin, recursive=True) # forget we cloned it (provide no 'origin' anymore), which should lead to # setting tracking branch to target: source.repo.remove_remote("origin") # create plain git at target: target = GitRepo(dst_path, create=True) target.checkout("TMP", ["-b"]) source.repo.add_remote("target", dst_path) res = publish(dataset=source, to="target", result_xfm='datasets') eq_(res, [source]) ok_clean_git(source.repo, annex=None) ok_clean_git(target, annex=None) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) # don't fail when doing it again res = publish(dataset=source, to="target") # and nothing is pushed assert_result_count(res, 1, status='notneeded') ok_clean_git(source.repo, annex=None) ok_clean_git(target, annex=None) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) eq_(list(target.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex"))) # 'target/master' should be tracking branch at this point, so # try publishing without `to`: # MIH: Nope, we don't automatically add this anymore # some modification: with open(opj(src_path, 'test_mod_file'), "w") as f: f.write("Some additional stuff.") source.add(opj(src_path, 'test_mod_file'), to_git=True, message="Modified.") ok_clean_git(source.repo, annex=None) res = publish(dataset=source, to='target', result_xfm='datasets') eq_(res, [source]) ok_clean_git(dst_path, annex=None) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) # Since git-annex 6.20170220, post-receive hook gets triggered # which results in entry being added for that repo into uuid.log on remote # end since then finally git-annex senses that it needs to init that remote, # so it might have 1 more commit than local. # see https://github.com/datalad/datalad/issues/1319 ok_( set(source.repo.get_branch_commits("git-annex")).issubset( set(target.get_branch_commits("git-annex"))))
def test_publish_simple(origin, src_path, dst_path): # prepare src source = install(src_path, source=origin, recursive=True) # forget we cloned it (provide no 'origin' anymore), which should lead to # setting tracking branch to target: source.repo.remove_remote("origin") # create plain git at target: target = GitRepo(dst_path, create=True) target.checkout("TMP", ["-b"]) source.repo.add_remote("target", dst_path) res = publish(dataset=source, to="target", result_xfm='datasets') eq_(res, [source]) ok_clean_git(source.repo, annex=None) ok_clean_git(target, annex=None) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) # don't fail when doing it again res = publish(dataset=source, to="target") # and nothing is pushed assert_result_count(res, 1, status='notneeded') ok_clean_git(source.repo, annex=None) ok_clean_git(target, annex=None) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) eq_(list(target.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex"))) # 'target/master' should be tracking branch at this point, so # try publishing without `to`: # MIH: Nope, we don't automatically add this anymore # some modification: with open(opj(src_path, 'test_mod_file'), "w") as f: f.write("Some additional stuff.") source.add(opj(src_path, 'test_mod_file'), to_git=True, message="Modified.") ok_clean_git(source.repo, annex=None) res = publish(dataset=source, to='target', result_xfm='datasets') eq_(res, [source]) ok_clean_git(dst_path, annex=None) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) # Since git-annex 6.20170220, post-receive hook gets triggered # which results in entry being added for that repo into uuid.log on remote # end since then finally git-annex senses that it needs to init that remote, # so it might have 1 more commit than local. # see https://github.com/datalad/datalad/issues/1319 ok_(set(source.repo.get_branch_commits("git-annex")).issubset( set(target.get_branch_commits("git-annex"))))
def test_publish_simple(origin, src_path, dst_path): # prepare src source = install(src_path, source=origin, recursive=True) # forget we cloned it (provide no 'origin' anymore), which should lead to # setting tracking branch to target: source.repo.remove_remote("origin") # create plain git at target: target = GitRepo(dst_path, create=True) target.checkout("TMP", ["-b"]) source.repo.add_remote("target", dst_path) res = publish(dataset=source, to="target", result_xfm='datasets') eq_(res, [source]) assert_repo_status(source.repo, annex=None) assert_repo_status(target, annex=None) eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)), list(source.repo.get_branch_commits_(DEFAULT_BRANCH))) # don't fail when doing it again res = publish(dataset=source, to="target") # and nothing is pushed assert_result_count(res, 1, status='notneeded') assert_repo_status(source.repo, annex=None) assert_repo_status(target, annex=None) eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)), list(source.repo.get_branch_commits_(DEFAULT_BRANCH))) assert_git_annex_branch_published(source.repo, target) # 'target/<default branch>' should be tracking branch at this point, so # try publishing without `to`: # MIH: Nope, we don't automatically add this anymore # some modification: with open(opj(src_path, 'test_mod_file'), "w") as f: f.write("Some additional stuff.") source.save(opj(src_path, 'test_mod_file'), to_git=True, message="Modified.") assert_repo_status(source.repo, annex=None) res = publish(dataset=source, to='target', result_xfm='datasets') eq_(res, [source]) assert_repo_status(dst_path, annex=None) eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)), list(source.repo.get_branch_commits_(DEFAULT_BRANCH))) assert_git_annex_branch_published(source.repo, target) eq_(filter_fsck_error_msg(source.repo.fsck()), filter_fsck_error_msg(source.repo.fsck(remote='target')))
def test_publish_plain_git(origin, src_path, dst_path): # TODO: Since it's mostly the same, melt with test_publish_simple # prepare src source = install(src_path, source=origin, recursive=True) # forget we cloned it (provide no 'origin' anymore), which should lead to # setting tracking branch to target: source.repo.remove_remote("origin") # create plain git at target: target = GitRepo(dst_path, create=True) target.checkout("TMP", ["-b"]) source.repo.add_remote("target", dst_path) res = publish(dataset=source, to="target", result_xfm='datasets') eq_(res, [source]) ok_clean_git(source.repo, annex=None) ok_clean_git(target, annex=None) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) # don't fail when doing it again res = publish(dataset=source, to="target") # and nothing is pushed assert_result_count(res, 1, status='notneeded') ok_clean_git(source.repo, annex=None) ok_clean_git(target, annex=None) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) # some modification: with open(opj(src_path, 'test_mod_file'), "w") as f: f.write("Some additional stuff.") source.add(opj(src_path, 'test_mod_file'), to_git=True, message="Modified.") ok_clean_git(source.repo, annex=None) res = publish(dataset=source, to='target', result_xfm='datasets') eq_(res, [source]) ok_clean_git(dst_path, annex=None) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) # amend and change commit msg in order to test for force push: source.repo.commit("amended", options=['--amend']) # push should be rejected (non-fast-forward): assert_raises(IncompleteResultsError, publish, dataset=source, to='target', result_xfm='datasets') # push with force=True works: res = publish(dataset=source, to='target', result_xfm='datasets', force=True) eq_(res, [source])
def test_publish_simple(origin, src_path, dst_path): # prepare src source = install(src_path, source=origin, recursive=True)[0] # forget we cloned it (provide no 'origin' anymore), which should lead to # setting tracking branch to target: source.repo.remove_remote("origin") # create plain git at target: target = GitRepo(dst_path, create=True) target.checkout("TMP", ["-b"]) source.repo.add_remote("target", dst_path) res = publish(dataset=source, to="target") eq_(res, ([source], [])) ok_clean_git(src_path, annex=False) ok_clean_git(dst_path, annex=False) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) # don't fail when doing it again res = publish(dataset=source, to="target") eq_(res, ([source], [])) ok_clean_git(src_path, annex=False) ok_clean_git(dst_path, annex=False) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) eq_(list(target.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex"))) # 'target/master' should be tracking branch at this point, so # try publishing without `to`: # some modification: with open(opj(src_path, 'test_mod_file'), "w") as f: f.write("Some additional stuff.") source.repo.add(opj(src_path, 'test_mod_file'), git=True, commit=True, msg="Modified.") ok_clean_git(src_path, annex=False) res = publish(dataset=source) eq_(res, ([source], [])) ok_clean_git(dst_path, annex=False) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) eq_(list(target.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex")))
def test_GitRepo_push_n_checkout(orig_path, clone_path): origin = GitRepo(orig_path) clone = GitRepo.clone(orig_path, clone_path) filename = get_most_obscure_supported_name() with open(op.join(clone_path, filename), 'w') as f: f.write("New file.") clone.add(filename) clone.commit("new file added.") # TODO: need checkout first: clone.push('origin', '+master:new-branch') origin.checkout('new-branch') ok_(op.exists(op.join(orig_path, filename)))
def test_GitRepo_ssh_push(repo_path, remote_path): from datalad import ssh_manager remote_repo = GitRepo(remote_path, create=True) url = "ssh://localhost" + op.abspath(remote_path) socket_path = op.join(ssh_manager.socket_dir, get_connection_hash('localhost')) repo = GitRepo(repo_path, create=True) repo.add_remote("ssh-remote", url) # modify local repo: repo.checkout("ssh-test", ['-b']) with open(op.join(repo.path, "ssh_testfile.dat"), "w") as f: f.write("whatever") repo.add("ssh_testfile.dat") repo.commit("ssh_testfile.dat added.") # file is not known to the remote yet: assert_not_in("ssh_testfile.dat", remote_repo.get_indexed_files()) # push changes: pushed = repo.push(remote="ssh-remote", refspec="ssh-test") # test PushInfo object for assert_in("ssh-remote/ssh-test", [commit.remote_ref.name for commit in pushed]) # the connection is known to the SSH manager, since fetch() requested it: assert_in(socket_path, ssh_manager._connections) # and socket was created: ok_(op.exists(socket_path)) # remote now knows the changes: assert_in("ssh-test", remote_repo.get_branches()) assert_in("ssh_testfile.dat", remote_repo.get_files("ssh-test")) # amend to make it require "--force": repo.commit("amended", options=['--amend']) # push without --force should yield an error: pushed = repo.push(remote="ssh-remote", refspec="ssh-test") assert_in("[rejected] (non-fast-forward)", pushed[0].summary) # now push using force: repo.push(remote="ssh-remote", refspec="ssh-test", force=True) # correct commit message in remote: assert_in("amended", list(remote_repo.get_branch_commits('ssh-test'))[-1].summary)
def test_get_commit_date(path): gr = GitRepo(path, create=True) eq_(gr.get_commit_date(), None) # Let's make a commit with a custom date DATE = "Wed Mar 14 03:47:30 2018 -0000" DATE_EPOCH = 1520999250 gr.add('1') gr.commit("committed", date=DATE) gr = GitRepo(path, create=True) date = gr.get_commit_date() neq_(date, None) eq_(date, DATE_EPOCH) eq_(date, gr.get_commit_date('master')) # and even if we get into a detached head gr.checkout(gr.get_hexsha()) eq_(gr.get_active_branch(), None) eq_(date, gr.get_commit_date('master'))
def test_GitRepo_remote_update(path1, path2, path3): git1 = GitRepo(path1) git2 = GitRepo(path2) git3 = GitRepo(path3) git1.add_remote('git2', path2) git1.add_remote('git3', path3) # Setting up remote 'git2' with open(op.join(path2, 'masterfile'), 'w') as f: f.write("git2 in master") git2.add('masterfile') git2.commit("Add something to master.") git2.checkout('branch2', ['-b']) with open(op.join(path2, 'branch2file'), 'w') as f: f.write("git2 in branch2") git2.add('branch2file') git2.commit("Add something to branch2.") # Setting up remote 'git3' with open(op.join(path3, 'masterfile'), 'w') as f: f.write("git3 in master") git3.add('masterfile') git3.commit("Add something to master.") git3.checkout('branch3', ['-b']) with open(op.join(path3, 'branch3file'), 'w') as f: f.write("git3 in branch3") git3.add('branch3file') git3.commit("Add something to branch3.") git1.update_remote() # checkouts are 'tests' themselves, since they'll raise CommandError # if something went wrong git1.checkout('branch2') git1.checkout('branch3') branches1 = git1.get_branches() eq_({'branch2', 'branch3'}, set(branches1))
def test_publish_with_data(origin, src_path, dst_path, sub1_pub, sub2_pub): # prepare src source = install(src_path, source=origin, recursive=True)[0] source.repo.get('test-annex.dat') # create plain git at target: target = AnnexRepo(dst_path, create=True) target.checkout("TMP", ["-b"]) source.repo.add_remote("target", dst_path) # now, set up targets for the submodules: sub1_target = GitRepo(sub1_pub, create=True) sub1_target.checkout("TMP", ["-b"]) sub2_target = GitRepo(sub2_pub, create=True) sub2_target.checkout("TMP", ["-b"]) sub1 = GitRepo(opj(src_path, 'subm 1'), create=False) sub2 = GitRepo(opj(src_path, 'subm 2'), create=False) sub1.add_remote("target", sub1_pub) sub2.add_remote("target", sub2_pub) # TMP: Insert the fetch to prevent GitPython to fail after the push, # because it cannot resolve the SHA of the old commit of the remote, # that git reports back after the push. # TODO: Figure out, when to fetch things in general; Alternatively: # Is there an option for push, that prevents GitPython from failing? source.repo.fetch("target") res = publish(dataset=source, to="target", path=['test-annex.dat']) eq_(res, ([source, 'test-annex.dat'], [])) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) # TODO: last commit in git-annex branch differs. Probably fine, # but figure out, when exactly to expect this for proper testing: eq_( list(target.get_branch_commits("git-annex"))[1:], list(source.repo.get_branch_commits("git-annex"))[1:]) # we need compare target/master: target.checkout("master") eq_(target.file_has_content(['test-annex.dat']), [True]) source.repo.fetch("target") res = publish(dataset=source, to="target", path=['.']) eq_(res, ([source, 'test-annex.dat'], [])) source.repo.fetch("target") import glob res = publish(dataset=source, to="target", path=glob.glob1(source.path, '*')) # Note: This leads to recursive publishing, since expansion of '*' # contains the submodules themselves in this setup # collect result paths: result_paths = [] for item in res[0]: if isinstance(item, Dataset): result_paths.append(item.path) else: result_paths.append(item) eq_( { source.path, opj(source.path, "subm 1"), opj(source.path, "subm 2"), 'test-annex.dat' }, set(result_paths))
def test_publish_with_data(origin, src_path, dst_path, sub1_pub, sub2_pub, dst_clone_path): # prepare src source = install(src_path, source=origin, recursive=True) source.repo.get('test-annex.dat') # create plain git at target: target = AnnexRepo(dst_path, create=True) target.checkout("TMP", ["-b"]) source.repo.add_remote("target", dst_path) # now, set up targets for the submodules: sub1_target = GitRepo(sub1_pub, create=True) sub1_target.checkout("TMP", ["-b"]) sub2_target = GitRepo(sub2_pub, create=True) sub2_target.checkout("TMP", ["-b"]) sub1 = GitRepo(opj(src_path, 'subm 1'), create=False) sub2 = GitRepo(opj(src_path, 'subm 2'), create=False) sub1.add_remote("target", sub1_pub) sub2.add_remote("target", sub2_pub) # TMP: Insert the fetch to prevent GitPython to fail after the push, # because it cannot resolve the SHA of the old commit of the remote, # that git reports back after the push. # TODO: Figure out, when to fetch things in general; Alternatively: # Is there an option for push, that prevents GitPython from failing? source.repo.fetch("target") res = publish(dataset=source, to="target", path=['test-annex.dat']) # first it would publish data and then push eq_(res, (['test-annex.dat', source], [])) # XXX master was not checked out in dst! eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) # TODO: last commit in git-annex branch differs. Probably fine, # but figure out, when exactly to expect this for proper testing: # yoh: they differ because local annex records information about now # file being available in that remote, and remote one does it via a call in # the hook I guess. So they both get the same information but in two # different commits. I do not observe such behavior of remote having git-annex # automagically updated in older clones # which do not have post-receive hook on remote side eq_(list(target.get_branch_commits("git-annex"))[1:], list(source.repo.get_branch_commits("git-annex"))[1:]) # we need compare target/master: target.checkout("master") ok_(target.file_has_content('test-annex.dat')) # make sure that whatever we published is actually consumable dst_clone = install( dst_clone_path, source=dst_path, result_xfm='datasets', return_type='item-or-list') nok_(dst_clone.repo.file_has_content('test-annex.dat')) res = dst_clone.get('test-annex.dat') ok_(dst_clone.repo.file_has_content('test-annex.dat')) source.repo.fetch("target") res = publish(dataset=source, to="target", path=['.']) # there is nothing to publish on 2nd attempt #eq_(res, ([source, 'test-annex.dat'], [])) eq_(res, ([], [])) source.repo.fetch("target") import glob res = publish(dataset=source, to="target", path=glob.glob1(source.path, '*')) # Note: This leads to recursive publishing, since expansion of '*' # contains the submodules themselves in this setup # collect result paths: result_paths = [] for item in res[0]: result_paths.append(item.path if isinstance(item, Dataset) else item) # only the subdatasets, targets are plain git repos, hence # no file content is pushed, all content in super was pushed # before eq_({sub1.path, sub2.path}, set(result_paths)) # if we publish again -- nothing to be published eq_(source.publish(to="target"), ([], [])) # if we drop a file and publish again -- dataset should be published # since git-annex branch was updated source.drop('test-annex.dat') eq_(source.publish(to="target"), ([source], [])) eq_(source.publish(to="target"), ([], [])) # and empty again if we try again
def test_publish_recursive(pristine_origin, origin_path, src_path, dst_path, sub1_pub, sub2_pub): # we will be publishing back to origin, so to not alter testrepo # we will first clone it origin = install(origin_path, source=pristine_origin, recursive=True) # prepare src source = install(src_path, source=origin_path, recursive=True) # create plain git at target: target = GitRepo(dst_path, create=True) target.checkout("TMP", ["-b"]) source.repo.add_remote("target", dst_path) # subdatasets have no remote yet, so recursive publishing should fail: with assert_raises(ValueError) as cm: publish(dataset=source, to="target", recursive=True) assert_in("Unknown target sibling 'target'", exc_str(cm.exception)) # now, set up targets for the submodules: sub1_target = GitRepo(sub1_pub, create=True) sub1_target.checkout("TMP", ["-b"]) sub2_target = AnnexRepo(sub2_pub, create=True) # we will be testing presence of the file content, so let's make it progress sub2_target.config.set('receive.denyCurrentBranch', 'updateInstead', where='local') sub1 = GitRepo(opj(src_path, 'subm 1'), create=False) sub2 = GitRepo(opj(src_path, 'subm 2'), create=False) sub1.add_remote("target", sub1_pub) sub2.add_remote("target", sub2_pub) # publish recursively with swallow_logs(new_level=logging.DEBUG) as cml: res = publish(dataset=source, to="target", recursive=True) assert_not_in( 'forced update', cml.out, "we probably haven't merged git-annex before pushing" ) # testing result list # (Note: Dataset lacks __eq__ for now. Should this be based on path only?) assert_is_instance(res, tuple) assert_is_instance(res[0], list) assert_is_instance(res[1], list) eq_(res[1], []) # nothing failed/was skipped for item in res[0]: assert_is_instance(item, Dataset) eq_({res[0][0].path, res[0][1].path, res[0][2].path}, {src_path, sub1.path, sub2.path}) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) eq_(list(target.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex"))) eq_(list(sub1_target.get_branch_commits("master")), list(sub1.get_branch_commits("master"))) eq_(list(sub1_target.get_branch_commits("git-annex")), list(sub1.get_branch_commits("git-annex"))) eq_(list(sub2_target.get_branch_commits("master")), list(sub2.get_branch_commits("master"))) eq_(list(sub2_target.get_branch_commits("git-annex")), list(sub2.get_branch_commits("git-annex"))) # we are tracking origin but origin has different git-annex, since we # cloned from it, so it is not aware of our git-annex neq_(list(origin.repo.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex"))) # So if we first publish to it recursively, we would update # all sub-datasets since git-annex branch would need to be pushed res_ = publish(dataset=source, recursive=True) eq_(set(r.path for r in res_[0]), set(opj(*([source.path] + x)) for x in ([], ['subm 1'], ['subm 2']))) # and now should carry the same state for git-annex eq_(list(origin.repo.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex"))) # test for publishing with --since. By default since no changes, nothing pushed res_ = publish(dataset=source, recursive=True) eq_(set(r.path for r in res_[0]), set()) # still nothing gets pushed, because origin is up to date res_ = publish(dataset=source, recursive=True, since='HEAD^') eq_(set(r.path for r in res_[0]), set([])) # and we should not fail if we run it from within the dataset with chpwd(source.path): res_ = publish(recursive=True, since='HEAD^') eq_(set(r.path for r in res_[0]), set([])) # Let's now update one subm with open(opj(sub2.path, "file.txt"), 'w') as f: f.write('') # add to subdataset, does not alter super dataset! # MIH: use `to_git` because original test author used # and explicit `GitRepo.add` -- keeping this for now Dataset(sub2.path).add('file.txt', to_git=True) # Let's now update one subm create_tree(sub2.path, {'file.dat': 'content'}) # add to subdataset, without reflecting the change in its super(s) Dataset(sub2.path).add('file.dat') # note: will publish to origin here since that is what it tracks res_published, res_skipped = publish(dataset=source, recursive=True) # only updates published, i.e. just the subdataset, super wasn't altered # nothing copied! eq_(res_published, [Dataset(sub2.path)]) eq_(res_skipped, []) # since published to origin -- destination should not get that file nok_(lexists(opj(sub2_target.path, 'file.dat'))) res_published, res_skipped = publish(dataset=source, to='target', recursive=True) eq_(res_published, [Dataset(sub2.path)]) # Note: with updateInstead only in target2 and not saving change in # super-dataset we would have made remote dataset, if we had entire # hierarchy, to be somewhat inconsistent. # But here, since target datasets are independent -- it is ok # and the file itself was not transferred but now exists ok_(lexists(opj(sub2_target.path, 'file.dat'))) nok_(sub2_target.file_has_content('file.dat')) # but now we can redo publish recursively, at least stating to consider # explicitly to copy . res_published, res_skipped = publish( '.', dataset=source, to='target', recursive=True ) ok_(sub2_target.file_has_content('file.dat')) eq_(res_published, ['file.dat']) # note that this report makes little sense without path to the repository
def test_publish_recursive(pristine_origin, origin_path, src_path, dst_path, sub1_pub, sub2_pub): # we will be publishing back to origin, so to not alter testrepo # we will first clone it origin = install(origin_path, source=pristine_origin, recursive=True) # prepare src source = install(src_path, source=origin.path, recursive=True) # we will be trying to push into this later on, need to give permissions... origin_sub2 = Dataset(opj(origin_path, '2')) origin_sub2.config.set('receive.denyCurrentBranch', 'updateInstead', where='local') ## TODO this manual fixup is needed due to gh-1548 -- needs proper solution #os.remove(opj(origin_sub2.path, '.git')) #os.rename(opj(origin_path, '.git', 'modules', '2'), opj(origin_sub2.path, '.git')) # create plain git at target: target = GitRepo(dst_path, create=True) target.checkout("TMP", ["-b"]) source.repo.add_remote("target", dst_path) # subdatasets have no remote yet, so recursive publishing should fail: res = publish(dataset=source, to="target", recursive=True, on_failure='ignore') assert_result_count(res, 3) assert_result_count(res, 1, status='ok', type='dataset', path=source.path) assert_result_count(res, 2, status='error', message=("Unknown target sibling '%s' for publication", 'target')) # now, set up targets for the submodules: sub1_target = GitRepo(sub1_pub, create=True) sub1_target.checkout("TMP", ["-b"]) sub2_target = AnnexRepo(sub2_pub, create=True) # we will be testing presence of the file content, so let's make it progress sub2_target.config.set('receive.denyCurrentBranch', 'updateInstead', where='local') sub1 = GitRepo(opj(src_path, 'subm 1'), create=False) sub2 = GitRepo(opj(src_path, '2'), create=False) sub1.add_remote("target", sub1_pub) sub2.add_remote("target", sub2_pub) # publish recursively with swallow_logs(new_level=logging.DEBUG) as cml: res = publish(dataset=source, to="target", recursive=True) assert_not_in('forced update', cml.out, "we probably haven't merged git-annex before pushing") # testing result list # base dataset was already published above, notneeded again assert_status(('ok', 'notneeded'), res) # nothing failed assert_result_count(res, 3, type='dataset') eq_({r['path'] for r in res}, {src_path, sub1.path, sub2.path}) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) eq_(list(target.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex"))) eq_(list(sub1_target.get_branch_commits("master")), list(sub1.get_branch_commits("master"))) eq_(list(sub1_target.get_branch_commits("git-annex")), list(sub1.get_branch_commits("git-annex"))) eq_(list(sub2_target.get_branch_commits("master")), list(sub2.get_branch_commits("master"))) eq_(list(sub2_target.get_branch_commits("git-annex")), list(sub2.get_branch_commits("git-annex"))) # we are tracking origin but origin has different git-annex, since we # cloned from it, so it is not aware of our git-annex neq_(list(origin.repo.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex"))) # So if we first publish to it recursively, we would update # all sub-datasets since git-annex branch would need to be pushed res_ = publish(dataset=source, recursive=True) assert_result_count(res_, 1, status='ok', path=source.path) assert_result_count(res_, 1, status='ok', path=sub1.path) assert_result_count(res_, 1, status='ok', path=sub2.path) # and now should carry the same state for git-annex eq_(list(origin.repo.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex"))) # test for publishing with --since. By default since no changes, nothing pushed res_ = publish(dataset=source, recursive=True) assert_result_count(res_, 3, status='notneeded', type='dataset') # still nothing gets pushed, because origin is up to date res_ = publish(dataset=source, recursive=True, since='HEAD^') assert_result_count(res_, 3, status='notneeded', type='dataset') # and we should not fail if we run it from within the dataset with chpwd(source.path): res_ = publish(recursive=True, since='HEAD^') assert_result_count(res_, 3, status='notneeded', type='dataset') # Let's now update one subm with open(opj(sub2.path, "file.txt"), 'w') as f: f.write('') # add to subdataset, does not alter super dataset! # MIH: use `to_git` because original test author used # and explicit `GitRepo.add` -- keeping this for now Dataset(sub2.path).add('file.txt', to_git=True) # Let's now update one subm create_tree(sub2.path, {'file.dat': 'content'}) # add to subdataset, without reflecting the change in its super(s) Dataset(sub2.path).add('file.dat') # note: will publish to origin here since that is what it tracks res_ = publish(dataset=source, recursive=True, on_failure='ignore') ## only updates published, i.e. just the subdataset, super wasn't altered ## nothing copied! assert_status(('ok', 'notneeded'), res_) assert_result_count(res_, 1, status='ok', path=sub2.path, type='dataset') assert_result_count(res_, 0, path=opj(sub2.path, 'file.dat'), type='file') # since published to origin -- destination should not get that file nok_(lexists(opj(sub2_target.path, 'file.dat'))) res_ = publish(dataset=source, to='target', recursive=True) assert_status(('ok', 'notneeded'), res_) assert_result_count(res_, 1, status='ok', path=sub2.path, type='dataset') assert_result_count(res_, 0, path=opj(sub2.path, 'file.dat'), type='file') # Note: with updateInstead only in target2 and not saving change in # super-dataset we would have made remote dataset, if we had entire # hierarchy, to be somewhat inconsistent. # But here, since target datasets are independent -- it is ok # and the file itself was transferred ok_(lexists(opj(sub2_target.path, 'file.dat'))) nok_(sub2_target.file_has_content('file.dat')) ## but now we can redo publish recursively, with explicitly requested data transfer res_ = publish(dataset=source, to='target', recursive=True, transfer_data='all') ok_(sub2_target.file_has_content('file.dat')) assert_result_count(res_, 1, status='ok', path=opj(sub2.path, 'file.dat')) # Let's save those present changes and publish while implying "since last # merge point" source.save(message="Changes in subm2") # and test if it could deduce the remote/branch to push to source.config.set('branch.master.remote', 'target', where='local') with chpwd(source.path): res_ = publish(since='', recursive=True) # TODO: somehow test that there were no even attempt to diff within "subm 1" # since if `--since=''` worked correctly, nothing has changed there and it # should have not been even touched assert_status(('ok', 'notneeded'), res_) assert_result_count(res_, 1, status='ok', path=source.path, type='dataset')
def test_publish_recursive(origin, src_path, dst_path, sub1_pub, sub2_pub): # prepare src source = install(src_path, source=origin, recursive=True)[0] # create plain git at target: target = GitRepo(dst_path, create=True) target.checkout("TMP", ["-b"]) source.repo.add_remote("target", dst_path) # subdatasets have no remote yet, so recursive publishing should fail: with assert_raises(ValueError) as cm: publish(dataset=source, to="target", recursive=True) assert_in("No sibling 'target' found", exc_str(cm.exception)) # now, set up targets for the submodules: sub1_target = GitRepo(sub1_pub, create=True) sub1_target.checkout("TMP", ["-b"]) sub2_target = AnnexRepo(sub2_pub, create=True) sub2_target.checkout("TMP", ["-b"]) sub1 = GitRepo(opj(src_path, 'subm 1'), create=False) sub2 = GitRepo(opj(src_path, 'subm 2'), create=False) sub1.add_remote("target", sub1_pub) sub2.add_remote("target", sub2_pub) # publish recursively with swallow_logs(new_level=logging.DEBUG) as cml: res = publish(dataset=source, to="target", recursive=True) assert_not_in( 'forced update', cml.out, "we probably haven't merged git-annex before pushing" ) # testing result list # (Note: Dataset lacks __eq__ for now. Should this be based on path only?) assert_is_instance(res, tuple) assert_is_instance(res[0], list) assert_is_instance(res[1], list) eq_(res[1], []) # nothing failed/was skipped for item in res[0]: assert_is_instance(item, Dataset) eq_({res[0][0].path, res[0][1].path, res[0][2].path}, {src_path, sub1.path, sub2.path}) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) eq_(list(target.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex"))) eq_(list(sub1_target.get_branch_commits("master")), list(sub1.get_branch_commits("master"))) eq_(list(sub1_target.get_branch_commits("git-annex")), list(sub1.get_branch_commits("git-annex"))) eq_(list(sub2_target.get_branch_commits("master")), list(sub2.get_branch_commits("master"))) eq_(list(sub2_target.get_branch_commits("git-annex")), list(sub2.get_branch_commits("git-annex"))) # test for publishing with --since. By default since no changes, only current pushed res_ = publish(dataset=source, recursive=True) # only current one would get pushed eq_(set(r.path for r in res_[0]), {src_path}) # all get pushed res_ = publish(dataset=source, recursive=True, since='HEAD^') eq_(set(r.path for r in res_[0]), {src_path, sub1.path, sub2.path}) # Let's now update one subm with open(opj(sub2.path, "file.txt"), 'w') as f: f.write('') sub2.add('file.txt') sub2.commit("") # TODO: Doesn't work: https://github.com/datalad/datalad/issues/636 #source.save("changed sub2", all_changes=True) source.repo.commit("", options=['-a']) res_ = publish(dataset=source, recursive=True) # only updated ones were published eq_(set(r.path for r in res_[0]), {src_path, sub2.path})
def test_publish_recursive(pristine_origin, origin_path, src_path, dst_path, sub1_pub, sub2_pub): # we will be publishing back to origin, so to not alter testrepo # we will first clone it origin = install(origin_path, source=pristine_origin, recursive=True) # prepare src source = install(src_path, source=origin.path, recursive=True) # we will be trying to push into this later on, need to give permissions... origin_sub2 = Dataset(opj(origin_path, '2')) origin_sub2.config.set( 'receive.denyCurrentBranch', 'updateInstead', where='local') ## TODO this manual fixup is needed due to gh-1548 -- needs proper solution #os.remove(opj(origin_sub2.path, '.git')) #os.rename(opj(origin_path, '.git', 'modules', '2'), opj(origin_sub2.path, '.git')) # create plain git at target: target = GitRepo(dst_path, create=True) target.checkout("TMP", ["-b"]) source.repo.add_remote("target", dst_path) # subdatasets have no remote yet, so recursive publishing should fail: res = publish(dataset=source, to="target", recursive=True, on_failure='ignore') assert_result_count(res, 3) assert_result_count( res, 1, status='ok', type='dataset', path=source.path) assert_result_count( res, 2, status='error', message=("Unknown target sibling '%s' for publication", 'target')) # now, set up targets for the submodules: sub1_target = GitRepo(sub1_pub, create=True) sub1_target.checkout("TMP", ["-b"]) sub2_target = AnnexRepo(sub2_pub, create=True) # we will be testing presence of the file content, so let's make it progress sub2_target.config.set('receive.denyCurrentBranch', 'updateInstead', where='local') sub1 = GitRepo(opj(src_path, 'subm 1'), create=False) sub2 = GitRepo(opj(src_path, '2'), create=False) sub1.add_remote("target", sub1_pub) sub2.add_remote("target", sub2_pub) # publish recursively with swallow_logs(new_level=logging.DEBUG) as cml: res = publish(dataset=source, to="target", recursive=True) assert_not_in( 'forced update', cml.out, "we probably haven't merged git-annex before pushing" ) # testing result list # base dataset was already published above, notneeded again assert_status(('ok', 'notneeded'), res) # nothing failed assert_result_count( res, 3, type='dataset') eq_({r['path'] for r in res}, {src_path, sub1.path, sub2.path}) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) eq_(list(target.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex"))) eq_(list(sub1_target.get_branch_commits("master")), list(sub1.get_branch_commits("master"))) eq_(list(sub1_target.get_branch_commits("git-annex")), list(sub1.get_branch_commits("git-annex"))) eq_(list(sub2_target.get_branch_commits("master")), list(sub2.get_branch_commits("master"))) eq_(list(sub2_target.get_branch_commits("git-annex")), list(sub2.get_branch_commits("git-annex"))) # we are tracking origin but origin has different git-annex, since we # cloned from it, so it is not aware of our git-annex neq_(list(origin.repo.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex"))) # So if we first publish to it recursively, we would update # all sub-datasets since git-annex branch would need to be pushed res_ = publish(dataset=source, recursive=True) assert_result_count(res_, 1, status='ok', path=source.path) assert_result_count(res_, 1, status='ok', path=sub1.path) assert_result_count(res_, 1, status='ok', path=sub2.path) # and now should carry the same state for git-annex eq_(list(origin.repo.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex"))) # test for publishing with --since. By default since no changes, nothing pushed res_ = publish(dataset=source, recursive=True) assert_result_count( res_, 3, status='notneeded', type='dataset') # still nothing gets pushed, because origin is up to date res_ = publish(dataset=source, recursive=True, since='HEAD^') assert_result_count( res_, 3, status='notneeded', type='dataset') # and we should not fail if we run it from within the dataset with chpwd(source.path): res_ = publish(recursive=True, since='HEAD^') assert_result_count( res_, 3, status='notneeded', type='dataset') # Let's now update one subm with open(opj(sub2.path, "file.txt"), 'w') as f: f.write('') # add to subdataset, does not alter super dataset! # MIH: use `to_git` because original test author used # and explicit `GitRepo.add` -- keeping this for now Dataset(sub2.path).add('file.txt', to_git=True) # Let's now update one subm create_tree(sub2.path, {'file.dat': 'content'}) # add to subdataset, without reflecting the change in its super(s) Dataset(sub2.path).add('file.dat') # note: will publish to origin here since that is what it tracks res_ = publish(dataset=source, recursive=True, on_failure='ignore') ## only updates published, i.e. just the subdataset, super wasn't altered ## nothing copied! assert_status(('ok', 'notneeded'), res_) assert_result_count(res_, 1, status='ok', path=sub2.path, type='dataset') assert_result_count(res_, 0, path=opj(sub2.path, 'file.dat'), type='file') # since published to origin -- destination should not get that file nok_(lexists(opj(sub2_target.path, 'file.dat'))) res_ = publish(dataset=source, to='target', recursive=True) assert_status(('ok', 'notneeded'), res_) assert_result_count(res_, 1, status='ok', path=sub2.path, type='dataset') assert_result_count(res_, 0, path=opj(sub2.path, 'file.dat'), type='file') # Note: with updateInstead only in target2 and not saving change in # super-dataset we would have made remote dataset, if we had entire # hierarchy, to be somewhat inconsistent. # But here, since target datasets are independent -- it is ok # and the file itself was transferred ok_(lexists(opj(sub2_target.path, 'file.dat'))) nok_(sub2_target.file_has_content('file.dat')) ## but now we can redo publish recursively, with explicitly requested data transfer res_ = publish( dataset=source, to='target', recursive=True, transfer_data='all' ) ok_(sub2_target.file_has_content('file.dat')) assert_result_count( res_, 1, status='ok', path=opj(sub2.path, 'file.dat')) # Let's save those present changes and publish while implying "since last # merge point" source.save(message="Changes in subm2") # and test if it could deduce the remote/branch to push to source.config.set('branch.master.remote', 'target', where='local') with chpwd(source.path): res_ = publish(since='', recursive=True) # TODO: somehow test that there were no even attempt to diff within "subm 1" # since if `--since=''` worked correctly, nothing has changed there and it # should have not been even touched assert_status(('ok', 'notneeded'), res_) assert_result_count(res_, 1, status='ok', path=source.path, type='dataset')
def test_publish_recursive(origin, src_path, dst_path, sub1_pub, sub2_pub): # prepare src source = install(src_path, source=origin, recursive=True)[0] # create plain git at target: target = GitRepo(dst_path, create=True) target.checkout("TMP", ["-b"]) source.repo.add_remote("target", dst_path) # subdatasets have no remote yet, so recursive publishing should fail: with assert_raises(ValueError) as cm: publish(dataset=source, to="target", recursive=True) assert_in("No sibling 'target' found.", str(cm.exception)) # now, set up targets for the submodules: sub1_target = GitRepo(sub1_pub, create=True) sub1_target.checkout("TMP", ["-b"]) sub2_target = AnnexRepo(sub2_pub, create=True) sub2_target.checkout("TMP", ["-b"]) sub1 = GitRepo(opj(src_path, 'subm 1'), create=False) sub2 = GitRepo(opj(src_path, 'subm 2'), create=False) sub1.add_remote("target", sub1_pub) sub2.add_remote("target", sub2_pub) # publish recursively with swallow_logs(new_level=logging.DEBUG) as cml: res = publish(dataset=source, to="target", recursive=True) assert_not_in('forced update', cml.out, "we probably haven't merged git-annex before pushing") # testing result list # (Note: Dataset lacks __eq__ for now. Should this be based on path only?) assert_is_instance(res, tuple) assert_is_instance(res[0], list) assert_is_instance(res[1], list) eq_(res[1], []) # nothing failed/was skipped for item in res[0]: assert_is_instance(item, Dataset) eq_({res[0][0].path, res[0][1].path, res[0][2].path}, {src_path, sub1.path, sub2.path}) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) eq_(list(target.get_branch_commits("git-annex")), list(source.repo.get_branch_commits("git-annex"))) eq_(list(sub1_target.get_branch_commits("master")), list(sub1.get_branch_commits("master"))) eq_(list(sub1_target.get_branch_commits("git-annex")), list(sub1.get_branch_commits("git-annex"))) eq_(list(sub2_target.get_branch_commits("master")), list(sub2.get_branch_commits("master"))) eq_(list(sub2_target.get_branch_commits("git-annex")), list(sub2.get_branch_commits("git-annex"))) # test for publishing with --since. By default since no changes, only current pushed res_ = publish(dataset=source, recursive=True) # only current one would get pushed eq_(set(r.path for r in res_[0]), {src_path}) # all get pushed res_ = publish(dataset=source, recursive=True, since='HEAD^') eq_(set(r.path for r in res_[0]), {src_path, sub1.path, sub2.path}) # Let's now update one subm with open(opj(sub2.path, "file.txt"), 'w') as f: f.write('') sub2.add('file.txt') sub2.commit("") # TODO: Doesn't work: https://github.com/datalad/datalad/issues/636 #source.save("changed sub2", auto_add_changes=True) source.repo.commit("", options=['-a']) res_ = publish(dataset=source, recursive=True) # only updated ones were published eq_(set(r.path for r in res_[0]), {src_path, sub2.path})
def test_publish_with_data(origin, src_path, dst_path, sub1_pub, sub2_pub): # prepare src source = install(src_path, source=origin, recursive=True)[0] source.repo.get('test-annex.dat') # create plain git at target: target = AnnexRepo(dst_path, create=True) target.checkout("TMP", ["-b"]) source.repo.add_remote("target", dst_path) # now, set up targets for the submodules: sub1_target = GitRepo(sub1_pub, create=True) sub1_target.checkout("TMP", ["-b"]) sub2_target = GitRepo(sub2_pub, create=True) sub2_target.checkout("TMP", ["-b"]) sub1 = GitRepo(opj(src_path, 'subm 1'), create=False) sub2 = GitRepo(opj(src_path, 'subm 2'), create=False) sub1.add_remote("target", sub1_pub) sub2.add_remote("target", sub2_pub) # TMP: Insert the fetch to prevent GitPython to fail after the push, # because it cannot resolve the SHA of the old commit of the remote, # that git reports back after the push. # TODO: Figure out, when to fetch things in general; Alternatively: # Is there an option for push, that prevents GitPython from failing? source.repo.fetch("target") res = publish(dataset=source, to="target", path=['test-annex.dat']) eq_(res, ([source, 'test-annex.dat'], [])) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) # TODO: last commit in git-annex branch differs. Probably fine, # but figure out, when exactly to expect this for proper testing: eq_(list(target.get_branch_commits("git-annex"))[1:], list(source.repo.get_branch_commits("git-annex"))[1:]) # we need compare target/master: target.checkout("master") eq_(target.file_has_content(['test-annex.dat']), [True]) source.repo.fetch("target") res = publish(dataset=source, to="target", path=['.']) eq_(res, ([source, 'test-annex.dat'], [])) source.repo.fetch("target") import glob res = publish(dataset=source, to="target", path=glob.glob1(source.path, '*')) # Note: This leads to recursive publishing, since expansion of '*' # contains the submodules themselves in this setup # collect result paths: result_paths = [] for item in res[0]: if isinstance(item, Dataset): result_paths.append(item.path) else: result_paths.append(item) eq_({source.path, opj(source.path, "subm 1"), opj(source.path, "subm 2"), 'test-annex.dat'}, set(result_paths))