示例#1
0
def test_invalid_call(origin, tdir):
    ds = Dataset(origin)
    ds.uninstall('subm 1', check=False)
    # nothing
    assert_status('error', publish('/notthere', on_failure='ignore'))
    # known, but not present
    assert_status('impossible', publish(opj(ds.path, 'subm 1'), on_failure='ignore'))
    # --since without dataset is now supported as long as it
    # could be identified
    # assert_raises(InsufficientArgumentsError, publish, since='HEAD')
    # but if it couldn't be, then should indeed crash
    with chpwd(tdir):
        assert_raises(InsufficientArgumentsError, publish, since='HEAD')
    # new dataset, with unavailable subdataset
    dummy = Dataset(tdir).create()
    dummy_sub = dummy.create('sub')
    dummy_sub.uninstall()
    assert_in('sub', dummy.subdatasets(fulfilled=False, result_xfm='relpaths'))
    # now an explicit call to publish the unavailable subdataset
    assert_result_count(
        dummy.publish('sub', on_failure='ignore'),
        1,
        path=dummy_sub.path,
        status='impossible',
        type='dataset')
示例#2
0
def test_publish_simple(origin, src_path, dst_path):

    # prepare src
    source = install(src_path, source=origin, recursive=True)
    # forget we cloned it (provide no 'origin' anymore), which should lead to
    # setting tracking branch to target:
    source.repo.remove_remote("origin")

    # create plain git at target:
    target = GitRepo(dst_path, create=True)
    target.checkout("TMP", ["-b"])
    source.repo.add_remote("target", dst_path)

    res = publish(dataset=source, to="target", result_xfm='datasets')
    eq_(res, [source])

    ok_clean_git(source.repo, annex=None)
    ok_clean_git(target, annex=None)
    eq_(list(target.get_branch_commits("master")),
        list(source.repo.get_branch_commits("master")))

    # don't fail when doing it again
    res = publish(dataset=source, to="target")
    # and nothing is pushed
    assert_result_count(res, 1, status='notneeded')

    ok_clean_git(source.repo, annex=None)
    ok_clean_git(target, annex=None)
    eq_(list(target.get_branch_commits("master")),
        list(source.repo.get_branch_commits("master")))
    eq_(list(target.get_branch_commits("git-annex")),
        list(source.repo.get_branch_commits("git-annex")))

    # 'target/master' should be tracking branch at this point, so
    # try publishing without `to`:
    # MIH: Nope, we don't automatically add this anymore

    # some modification:
    with open(opj(src_path, 'test_mod_file'), "w") as f:
        f.write("Some additional stuff.")
    source.add(opj(src_path, 'test_mod_file'), to_git=True,
               message="Modified.")
    ok_clean_git(source.repo, annex=None)

    res = publish(dataset=source, to='target', result_xfm='datasets')
    eq_(res, [source])

    ok_clean_git(dst_path, annex=None)
    eq_(list(target.get_branch_commits("master")),
        list(source.repo.get_branch_commits("master")))
    # Since git-annex 6.20170220, post-receive hook gets triggered
    # which results in entry being added for that repo into uuid.log on remote
    # end since then finally git-annex senses that it needs to init that remote,
    # so it might have 1 more commit than local.
    # see https://github.com/datalad/datalad/issues/1319
    ok_(set(source.repo.get_branch_commits("git-annex")).issubset(
        set(target.get_branch_commits("git-annex"))))
示例#3
0
def test_publish_plain_git(origin, src_path, dst_path):
    # TODO: Since it's mostly the same, melt with test_publish_simple

    # prepare src
    source = install(src_path, source=origin, recursive=True)
    # forget we cloned it (provide no 'origin' anymore), which should lead to
    # setting tracking branch to target:
    source.repo.remove_remote("origin")

    # create plain git at target:
    target = GitRepo(dst_path, create=True)
    target.checkout("TMP", ["-b"])
    source.repo.add_remote("target", dst_path)

    res = publish(dataset=source, to="target", result_xfm='datasets')
    eq_(res, [source])

    ok_clean_git(source.repo, annex=None)
    ok_clean_git(target, annex=None)
    eq_(list(target.get_branch_commits("master")),
        list(source.repo.get_branch_commits("master")))

    # don't fail when doing it again
    res = publish(dataset=source, to="target")
    # and nothing is pushed
    assert_result_count(res, 1, status='notneeded')

    ok_clean_git(source.repo, annex=None)
    ok_clean_git(target, annex=None)
    eq_(list(target.get_branch_commits("master")),
        list(source.repo.get_branch_commits("master")))

    # some modification:
    with open(opj(src_path, 'test_mod_file'), "w") as f:
        f.write("Some additional stuff.")
    source.add(opj(src_path, 'test_mod_file'), to_git=True,
               message="Modified.")
    ok_clean_git(source.repo, annex=None)

    res = publish(dataset=source, to='target', result_xfm='datasets')
    eq_(res, [source])

    ok_clean_git(dst_path, annex=None)
    eq_(list(target.get_branch_commits("master")),
        list(source.repo.get_branch_commits("master")))

    # amend and change commit msg in order to test for force push:
    source.repo.commit("amended", options=['--amend'])
    # push should be rejected (non-fast-forward):
    assert_raises(IncompleteResultsError,
                  publish, dataset=source, to='target', result_xfm='datasets')
    # push with force=True works:
    res = publish(dataset=source, to='target', result_xfm='datasets', force=True)
    eq_(res, [source])
示例#4
0
def test_publish_simple(origin, src_path, dst_path):

    # prepare src
    source = install(path=src_path, source=origin, recursive=True)
    # TODO: For now, circumnavigate the detached head issue.
    # Figure out, what to do.
    for subds in source.get_dataset_handles(recursive=True):
        AnnexRepo(opj(src_path, subds), init=True, create=True).git_checkout("master")
    # forget we cloned it (provide no 'origin' anymore), which should lead to
    # setting tracking branch to target:
    source.repo.git_remote_remove("origin")

    # create plain git at target:
    target = GitRepo(dst_path, create=True)
    target.git_checkout("TMP", "-b")
    source.repo.git_remote_add("target", dst_path)

    res = publish(dataset=source, dest="target")
    eq_(res, source)

    ok_clean_git(src_path, annex=False)
    ok_clean_git(dst_path, annex=False)
    eq_(list(target.git_get_branch_commits("master")),
        list(source.repo.git_get_branch_commits("master")))

    # don't fail when doing it again
    res = publish(dataset=source, dest="target")
    eq_(res, source)

    ok_clean_git(src_path, annex=False)
    ok_clean_git(dst_path, annex=False)
    eq_(list(target.git_get_branch_commits("master")),
        list(source.repo.git_get_branch_commits("master")))
    eq_(list(target.git_get_branch_commits("git-annex")),
        list(source.repo.git_get_branch_commits("git-annex")))

    # 'target/master' should be tracking branch at this point, so
    # try publishing without `dest`:

    # some modification:
    with open(opj(src_path, 'test_mod_file'), "w") as f:
        f.write("Some additional stuff.")
    source.repo.git_add(opj(src_path, 'test_mod_file'))
    source.repo.git_commit("Modified.")
    ok_clean_git(src_path, annex=False)

    res = publish(dataset=source)
    eq_(res, source)

    ok_clean_git(dst_path, annex=False)
    eq_(list(target.git_get_branch_commits("master")),
        list(source.repo.git_get_branch_commits("master")))
    eq_(list(target.git_get_branch_commits("git-annex")),
        list(source.repo.git_get_branch_commits("git-annex")))
示例#5
0
def test_publish_recursive(origin, src_path, dst_path, sub1_pub, sub2_pub):

    # prepare src
    source = install(path=src_path, source=origin, recursive=True)
    # TODO: For now, circumnavigate the detached head issue.
    # Figure out, what to do.
    for subds in source.get_dataset_handles(recursive=True):
        AnnexRepo(opj(src_path, subds), init=True, create=True).git_checkout("master")

    # create plain git at target:
    target = GitRepo(dst_path, create=True)
    target.git_checkout("TMP", "-b")
    source.repo.git_remote_add("target", dst_path)

    # subdatasets have no remote yet, so recursive publishing should fail:
    with assert_raises(ValueError) as cm:
        publish(dataset=source, dest="target", recursive=True)
    assert_in("No sibling 'target' found.", str(cm.exception))

    # now, set up targets for the submodules:
    sub1_target = GitRepo(sub1_pub, create=True)
    sub1_target.git_checkout("TMP", "-b")
    sub2_target = GitRepo(sub2_pub, create=True)
    sub2_target.git_checkout("TMP", "-b")
    sub1 = GitRepo(opj(src_path, 'sub1'), create=False)
    sub2 = GitRepo(opj(src_path, 'sub2'), create=False)
    sub1.git_remote_add("target", sub1_pub)
    sub2.git_remote_add("target", sub2_pub)

    # publish recursively
    res = publish(dataset=source, dest="target", recursive=True)

    # testing result list
    # (Note: Dataset lacks __eq__ for now. Should this be based on path only?)
    assert_is_instance(res, list)
    for item in res:
        assert_is_instance(item, Dataset)
    eq_(res[0].path, src_path)
    eq_(res[1].path, sub1.path)
    eq_(res[2].path, sub2.path)

    eq_(list(target.git_get_branch_commits("master")),
        list(source.repo.git_get_branch_commits("master")))
    eq_(list(target.git_get_branch_commits("git-annex")),
        list(source.repo.git_get_branch_commits("git-annex")))
    eq_(list(sub1_target.git_get_branch_commits("master")),
        list(sub1.git_get_branch_commits("master")))
    eq_(list(sub1_target.git_get_branch_commits("git-annex")),
        list(sub1.git_get_branch_commits("git-annex")))
    eq_(list(sub2_target.git_get_branch_commits("master")),
        list(sub2.git_get_branch_commits("master")))
    eq_(list(sub2_target.git_get_branch_commits("git-annex")),
        list(sub2.git_get_branch_commits("git-annex")))
示例#6
0
def test_publish_simple(origin, src_path, dst_path):

    # prepare src
    source = install(src_path, source=origin, recursive=True)[0]
    # forget we cloned it (provide no 'origin' anymore), which should lead to
    # setting tracking branch to target:
    source.repo.remove_remote("origin")

    # create plain git at target:
    target = GitRepo(dst_path, create=True)
    target.checkout("TMP", ["-b"])
    source.repo.add_remote("target", dst_path)

    res = publish(dataset=source, to="target")
    eq_(res, ([source], []))

    ok_clean_git(src_path, annex=False)
    ok_clean_git(dst_path, annex=False)
    eq_(list(target.get_branch_commits("master")),
        list(source.repo.get_branch_commits("master")))

    # don't fail when doing it again
    res = publish(dataset=source, to="target")
    eq_(res, ([source], []))

    ok_clean_git(src_path, annex=False)
    ok_clean_git(dst_path, annex=False)
    eq_(list(target.get_branch_commits("master")),
        list(source.repo.get_branch_commits("master")))
    eq_(list(target.get_branch_commits("git-annex")),
        list(source.repo.get_branch_commits("git-annex")))

    # 'target/master' should be tracking branch at this point, so
    # try publishing without `to`:

    # some modification:
    with open(opj(src_path, 'test_mod_file'), "w") as f:
        f.write("Some additional stuff.")
    source.repo.add(opj(src_path, 'test_mod_file'), git=True,
                    commit=True, msg="Modified.")
    ok_clean_git(src_path, annex=False)

    res = publish(dataset=source)
    eq_(res, ([source], []))

    ok_clean_git(dst_path, annex=False)
    eq_(list(target.get_branch_commits("master")),
        list(source.repo.get_branch_commits("master")))
    eq_(list(target.get_branch_commits("git-annex")),
        list(source.repo.get_branch_commits("git-annex")))
示例#7
0
def test_publish_file_handle(origin, src_path, dst_path):

    # prepare src
    source = install(path=src_path, source=origin, recursive=True)
    # TODO: For now, circumnavigate the detached head issue.
    # Figure out, what to do.
    for subds in source.get_dataset_handles(recursive=True):
        AnnexRepo(opj(src_path, subds), init=True, create=True).git_checkout("master")
    source.repo.get('test-annex.dat')

    # create plain git at target:
    target = AnnexRepo(dst_path, create=True)
    # actually not needed for this test, but provide same setup as
    # everywhere else:
    target.git_checkout("TMP", "-b")
    source.repo.git_remote_add("target", dst_path)

    # directly publish a file handle, not the dataset itself:
    res = publish(dataset=source, dest="target", path="test-annex.dat")
    eq_(res, opj(source.path, 'test-annex.dat'))

    # only file was published, not the dataset itself:
    assert_not_in("master", target.git_get_branches())
    eq_(Dataset(dst_path).get_dataset_handles(), [])
    assert_not_in("test.dat", target.git_get_files())

    # content is now available from 'target':
    assert_in("target",
              source.repo.annex_whereis('test-annex.dat',
                                        output="descriptions"))
    source.repo.annex_drop('test-annex.dat')
    eq_(source.repo.file_has_content(['test-annex.dat']), [False])
    source.repo._run_annex_command('get', annex_options=['test-annex.dat',
                                                         '--from=target'])
    eq_(source.repo.file_has_content(['test-annex.dat']), [True])
示例#8
0
def test_publish_with_data(origin, src_path, dst_path):

    # prepare src
    source = install(path=src_path, source=origin, recursive=True)
    # TODO: For now, circumnavigate the detached head issue.
    # Figure out, what to do.
    for subds in source.get_dataset_handles(recursive=True):
        AnnexRepo(opj(src_path, subds), init=True, create=True).git_checkout("master")
    source.repo.get('test-annex.dat')

    # create plain git at target:
    target = AnnexRepo(dst_path, create=True)
    target.git_checkout("TMP", "-b")
    source.repo.git_remote_add("target", dst_path)

    res = publish(dataset=source, dest="target", with_data=['test-annex.dat'])
    eq_(res, source)

    eq_(list(target.git_get_branch_commits("master")),
        list(source.repo.git_get_branch_commits("master")))
    # TODO: last commit in git-annex branch differs. Probably fine,
    # but figure out, when exactly to expect this for proper testing:
    eq_(list(target.git_get_branch_commits("git-annex"))[1:],
        list(source.repo.git_get_branch_commits("git-annex"))[1:])

    # we need compare target/master:
    target.git_checkout("master")
    eq_(target.file_has_content(['test-annex.dat']), [True])
示例#9
0
def test_target_ssh_recursive(origin, src_path, target_path):

    # prepare src
    source = install(src_path, source=origin, recursive=True)[0]

    sub1 = Dataset(opj(src_path, "subm 1"))
    sub2 = Dataset(opj(src_path, "subm 2"))

    for flat in False, True:
        target_path_ = target_dir_tpl = target_path + "-" + str(flat)

        if flat:
            target_dir_tpl += "/%NAME"
            sep = '-'
        else:
            sep = os.path.sep

        if flat:
            # now that create_sibling also does fetch -- the related problem
            # so skipping this early
            raise SkipTest('TODO: Make publish work for flat datasets, it currently breaks')

        remote_name = 'remote-' + str(flat)
        # TODO: there is f.ckup with paths so assert_create fails ATM
        # And let's test without explicit dataset being provided
        with chpwd(source.path):
            #assert_create_sshwebserver(
            create_sibling(
                target=remote_name,
                sshurl="ssh://localhost" + target_path_,
                target_dir=target_dir_tpl,
                recursive=True,
                ui=True)

        # raise if git repos were not created
        for suffix in [sep + 'subm 1', sep + 'subm 2', '']:
            target_dir = opj(target_path_, basename(src_path) if flat else "").rstrip(os.path.sep) + suffix
            # raise if git repos were not created
            GitRepo(target_dir, create=False)

            _test_correct_publish(target_dir, rootds=not suffix, flat=flat)

        for repo in [source.repo, sub1.repo, sub2.repo]:
            assert_not_in("local_target", repo.get_remotes())

        # now, push should work:
        publish(dataset=source, to=remote_name)
示例#10
0
def test_smth_about_not_supported(p1, p2):
    source = Dataset(p1).create()
    from datalad.support.network import PathRI
    source.create_sibling(
        'ssh://localhost' + PathRI(p2).posixpath,
        name='target1')
    # source.publish(to='target1')
    with chpwd(p1):
        # since we have only two commits (set backend, init dataset)
        # -- there is no HEAD^^
        assert_result_count(
            publish(to='target1', since='HEAD^^', on_failure='ignore'),
            1,
            status='impossible',
            message="fatal: bad revision 'HEAD^^'")
        # but now let's add one more commit, we should be able to pusblish
        source.repo.commit("msg", options=['--allow-empty'])
        publish(to='target1', since='HEAD^')  # must not fail now
示例#11
0
def test_publish_submodule(origin, src_path, target_1, target_2):
    # prepare src
    source = install(path=src_path, source=origin, recursive=True)
    # TODO: For now, circumnavigate the detached head issue.
    # Figure out, what to do.
    for subds in source.get_dataset_handles(recursive=True):
        AnnexRepo(opj(src_path, subds), init=True, create=True).git_checkout("master")

    # first, try publishing from super dataset using `path`
    source_super = source
    source_sub = Dataset(opj(src_path, 'sub1'))
    target = GitRepo(target_1, create=True)
    target.git_checkout("TMP", "-b")
    source_sub.repo.git_remote_add("target", target_1)

    res = publish(dataset=source_super, dest="target", path="sub1")
    assert_is_instance(res, Dataset)
    eq_(res.path, source_sub.path)

    eq_(list(GitRepo(target_1, create=False).git_get_branch_commits("master")),
        list(source_sub.repo.git_get_branch_commits("master")))
    eq_(list(GitRepo(target_1, create=False).git_get_branch_commits("git-annex")),
        list(source_sub.repo.git_get_branch_commits("git-annex")))

    # now, publish directly from within submodule:
    target = GitRepo(target_2, create=True)
    target.git_checkout("TMP", "-b")
    source_sub.repo.git_remote_add("target2", target_2)

    res = publish(dataset=source_sub, dest="target2")
    eq_(res, source_sub)

    eq_(list(GitRepo(target_2, create=False).git_get_branch_commits("master")),
        list(source_sub.repo.git_get_branch_commits("master")))
    eq_(list(GitRepo(target_2, create=False).git_get_branch_commits("git-annex")),
        list(source_sub.repo.git_get_branch_commits("git-annex")))
示例#12
0
def test_publish_recursive(pristine_origin, origin_path, src_path, dst_path, sub1_pub, sub2_pub):

    # we will be publishing back to origin, so to not alter testrepo
    # we will first clone it
    origin = install(origin_path, source=pristine_origin, recursive=True)
    # prepare src
    source = install(src_path, source=origin.path, recursive=True)
    # we will be trying to push into this later on, need to give permissions...
    origin_sub2 = Dataset(opj(origin_path, '2'))
    origin_sub2.config.set(
        'receive.denyCurrentBranch', 'updateInstead', where='local')
    ## TODO this manual fixup is needed due to gh-1548 -- needs proper solution
    #os.remove(opj(origin_sub2.path, '.git'))
    #os.rename(opj(origin_path, '.git', 'modules', '2'), opj(origin_sub2.path, '.git'))

    # create plain git at target:
    target = GitRepo(dst_path, create=True)
    target.checkout("TMP", ["-b"])
    source.repo.add_remote("target", dst_path)

    # subdatasets have no remote yet, so recursive publishing should fail:
    res = publish(dataset=source, to="target", recursive=True, on_failure='ignore')
    assert_result_count(res, 3)
    assert_result_count(
        res, 1, status='ok', type='dataset', path=source.path)
    assert_result_count(
        res, 2, status='error',
        message=("Unknown target sibling '%s' for publication", 'target'))

    # now, set up targets for the submodules:
    sub1_target = GitRepo(sub1_pub, create=True)
    sub1_target.checkout("TMP", ["-b"])
    sub2_target = AnnexRepo(sub2_pub, create=True)
    # we will be testing presence of the file content, so let's make it progress
    sub2_target.config.set('receive.denyCurrentBranch', 'updateInstead', where='local')
    sub1 = GitRepo(opj(src_path, 'subm 1'), create=False)
    sub2 = GitRepo(opj(src_path, '2'), create=False)
    sub1.add_remote("target", sub1_pub)
    sub2.add_remote("target", sub2_pub)

    # publish recursively
    with swallow_logs(new_level=logging.DEBUG) as cml:
        res = publish(dataset=source, to="target", recursive=True)
        assert_not_in(
            'forced update', cml.out,
            "we probably haven't merged git-annex before pushing"
        )

    # testing result list
    # base dataset was already published above, notneeded again
    assert_status(('ok', 'notneeded'), res)  # nothing failed
    assert_result_count(
        res, 3, type='dataset')
    eq_({r['path'] for r in res},
        {src_path, sub1.path, sub2.path})

    eq_(list(target.get_branch_commits("master")),
        list(source.repo.get_branch_commits("master")))
    eq_(list(target.get_branch_commits("git-annex")),
        list(source.repo.get_branch_commits("git-annex")))
    eq_(list(sub1_target.get_branch_commits("master")),
        list(sub1.get_branch_commits("master")))
    eq_(list(sub1_target.get_branch_commits("git-annex")),
        list(sub1.get_branch_commits("git-annex")))
    eq_(list(sub2_target.get_branch_commits("master")),
        list(sub2.get_branch_commits("master")))
    eq_(list(sub2_target.get_branch_commits("git-annex")),
        list(sub2.get_branch_commits("git-annex")))

    # we are tracking origin but origin has different git-annex, since we
    # cloned from it, so it is not aware of our git-annex
    neq_(list(origin.repo.get_branch_commits("git-annex")),
         list(source.repo.get_branch_commits("git-annex")))
    # So if we first publish to it recursively, we would update
    # all sub-datasets since git-annex branch would need to be pushed
    res_ = publish(dataset=source, recursive=True)
    assert_result_count(res_, 1, status='ok', path=source.path)
    assert_result_count(res_, 1, status='ok', path=sub1.path)
    assert_result_count(res_, 1, status='ok', path=sub2.path)
    # and now should carry the same state for git-annex
    eq_(list(origin.repo.get_branch_commits("git-annex")),
        list(source.repo.get_branch_commits("git-annex")))

    # test for publishing with  --since.  By default since no changes, nothing pushed
    res_ = publish(dataset=source, recursive=True)
    assert_result_count(
        res_, 3, status='notneeded', type='dataset')

    # still nothing gets pushed, because origin is up to date
    res_ = publish(dataset=source, recursive=True, since='HEAD^')
    assert_result_count(
        res_, 3, status='notneeded', type='dataset')

    # and we should not fail if we run it from within the dataset
    with chpwd(source.path):
        res_ = publish(recursive=True, since='HEAD^')
        assert_result_count(
            res_, 3, status='notneeded', type='dataset')

    # Let's now update one subm
    with open(opj(sub2.path, "file.txt"), 'w') as f:
        f.write('')
    # add to subdataset, does not alter super dataset!
    # MIH: use `to_git` because original test author used
    # and explicit `GitRepo.add` -- keeping this for now
    Dataset(sub2.path).add('file.txt', to_git=True)

    # Let's now update one subm
    create_tree(sub2.path, {'file.dat': 'content'})
    # add to subdataset, without reflecting the change in its super(s)
    Dataset(sub2.path).add('file.dat')

    # note: will publish to origin here since that is what it tracks
    res_ = publish(dataset=source, recursive=True, on_failure='ignore')
    ## only updates published, i.e. just the subdataset, super wasn't altered
    ## nothing copied!
    assert_status(('ok', 'notneeded'), res_)
    assert_result_count(res_, 1, status='ok', path=sub2.path, type='dataset')
    assert_result_count(res_, 0, path=opj(sub2.path, 'file.dat'), type='file')

    # since published to origin -- destination should not get that file
    nok_(lexists(opj(sub2_target.path, 'file.dat')))
    res_ = publish(dataset=source, to='target', recursive=True)
    assert_status(('ok', 'notneeded'), res_)
    assert_result_count(res_, 1, status='ok', path=sub2.path, type='dataset')
    assert_result_count(res_, 0, path=opj(sub2.path, 'file.dat'), type='file')

    # Note: with updateInstead only in target2 and not saving change in
    # super-dataset we would have made remote dataset, if we had entire
    # hierarchy, to be somewhat inconsistent.
    # But here, since target datasets are independent -- it is ok

    # and the file itself was transferred
    ok_(lexists(opj(sub2_target.path, 'file.dat')))
    nok_(sub2_target.file_has_content('file.dat'))

    ## but now we can redo publish recursively, with explicitly requested data transfer
    res_ = publish(
        dataset=source, to='target',
        recursive=True,
        transfer_data='all'
    )
    ok_(sub2_target.file_has_content('file.dat'))
    assert_result_count(
        res_, 1, status='ok', path=opj(sub2.path, 'file.dat'))

    # Let's save those present changes and publish while implying "since last
    # merge point"
    source.save(message="Changes in subm2")
    # and test if it could deduce the remote/branch to push to
    source.config.set('branch.master.remote', 'target', where='local')
    with chpwd(source.path):
        res_ = publish(since='', recursive=True)
    # TODO: somehow test that there were no even attempt to diff within "subm 1"
    # since if `--since=''` worked correctly, nothing has changed there and it
    # should have not been even touched
    assert_status(('ok', 'notneeded'), res_)
    assert_result_count(res_, 1, status='ok', path=source.path, type='dataset')
示例#13
0
def test_target_ssh_recursive(origin, src_path, target_path):

    # prepare src
    source = install(src_path, source=origin, recursive=True)

    sub1 = Dataset(opj(src_path, "subm 1"))
    sub2 = Dataset(opj(src_path, "2"))

    for flat in False, True:
        target_path_ = target_dir_tpl = target_path + "-" + str(flat)

        if flat:
            target_dir_tpl += "/prefix%RELNAME"
            sep = '-'
        else:
            sep = os.path.sep

        remote_name = 'remote-' + str(flat)
        with chpwd(source.path):
            assert_create_sshwebserver(name=remote_name,
                                       sshurl="ssh://localhost" + target_path_,
                                       target_dir=target_dir_tpl,
                                       recursive=True,
                                       ui=True)

        # raise if git repos were not created
        for suffix in [sep + 'subm 1', sep + '2', '']:
            target_dir = opj(target_path_, 'prefix' if flat else "").rstrip(
                os.path.sep) + suffix
            # raise if git repos were not created
            GitRepo(target_dir, create=False)

            _test_correct_publish(target_dir, rootds=not suffix, flat=flat)

        for repo in [source.repo, sub1.repo, sub2.repo]:
            assert_not_in("local_target", repo.get_remotes())

        # now, push should work:
        publish(dataset=source, to=remote_name)

        # verify that we can create-sibling which was created later and possibly
        # first published in super-dataset as an empty directory
        sub3_name = 'subm 3-%s' % flat
        sub3 = source.create(sub3_name)
        # since is an empty value to force it to consider all changes since we published
        # already
        with chpwd(source.path):
            # as we discussed in gh-1495 we use the last-published state of the base
            # dataset as the indicator for modification detection with since=''
            # hence we must not publish the base dataset on its own without recursion,
            # if we want to have this mechanism do its job
            #publish(to=remote_name)  # no recursion
            assert_create_sshwebserver(name=remote_name,
                                       sshurl="ssh://localhost" + target_path_,
                                       target_dir=target_dir_tpl,
                                       recursive=True,
                                       existing='skip',
                                       ui=True,
                                       since='')
        # so it was created on remote correctly and wasn't just skipped
        assert (Dataset(
            _path_(target_path_,
                   ('prefix-' if flat else '') + sub3_name)).is_installed())
        publish(dataset=source, to=remote_name, recursive=True,
                since='')  # just a smoke test
示例#14
0
def test_target_ssh_simple(origin, src_path, target_rootpath):

    # prepare src
    source = install(src_path, source=origin)

    target_path = opj(target_rootpath, "basic")
    # it will try to fetch it so would fail as well since sshurl is wrong
    with swallow_logs(new_level=logging.ERROR) as cml, \
        assert_raises(GitCommandError):
        create_sibling(dataset=source,
                       target="local_target",
                       sshurl="ssh://localhost",
                       target_dir=target_path,
                       ui=True)
    # is not actually happening on one of the two basic cases -- TODO figure it out
    # assert_in('enableremote local_target failed', cml.out)

    GitRepo(target_path, create=False)  # raises if not a git repo
    assert_in("local_target", source.repo.get_remotes())
    eq_("ssh://localhost", source.repo.get_remote_url("local_target"))
    # should NOT be able to push now, since url isn't correct:
    # TODO:  assumption is wrong if ~ does have .git! fix up!
    assert_raises(GitCommandError, publish, dataset=source, to="local_target")

    # Both must be annex or git repositories
    src_is_annex = AnnexRepo.is_valid_repo(src_path)
    eq_(src_is_annex, AnnexRepo.is_valid_repo(target_path))
    # And target one should be known to have a known UUID within the source if annex
    if src_is_annex:
        annex = AnnexRepo(src_path)
        local_target_cfg = annex.repo.remotes["local_target"].config_reader.get
        # for some reason this was "correct"
        # eq_(local_target_cfg('annex-ignore'), 'false')
        # but after fixing creating siblings in
        # 21f6dd012b2c7b9c0b8b348dcfb3b0ace7e8b2ec it started to fail
        # I think it is legit since we are trying to fetch now before calling
        # annex.enable_remote so it doesn't set it up, and fails before
        assert_raises(Exception, local_target_cfg, 'annex-ignore')
        # hm, but ATM wouldn't get a uuid since url is wrong
        assert_raises(Exception, local_target_cfg, 'annex-uuid')

    # do it again without force:
    with assert_raises(RuntimeError) as cm:
        assert_create_sshwebserver(dataset=source,
                                   target="local_target",
                                   sshurl="ssh://localhost",
                                   target_dir=target_path)
    eq_("Target directory %s already exists." % target_path, str(cm.exception))
    if src_is_annex:
        target_description = AnnexRepo(target_path,
                                       create=False).get_description()
        assert_not_equal(target_description, None)
        assert_not_equal(target_description, target_path)
        ok_endswith(target_description, target_path)
    # now, with force and correct url, which is also used to determine
    # target_dir
    # Note: on windows absolute path is not url conform. But this way it's easy
    # to test, that ssh path is correctly used.
    if not on_windows:
        # add random file under target_path, to explicitly test existing=replace
        open(opj(target_path, 'random'), 'w').write('123')

        assert_create_sshwebserver(dataset=source,
                                   target="local_target",
                                   sshurl="ssh://localhost" + target_path,
                                   existing='replace')
        eq_("ssh://localhost" + target_path,
            source.repo.get_remote_url("local_target"))
        ok_(source.repo.get_remote_url("local_target", push=True) is None)

        # ensure target tree actually replaced by source
        assert_false(exists(opj(target_path, 'random')))

        if src_is_annex:
            annex = AnnexRepo(src_path)
            local_target_cfg = annex.repo.remotes[
                "local_target"].config_reader.get
            eq_(local_target_cfg('annex-ignore'), 'false')
            eq_(local_target_cfg('annex-uuid').count('-'), 4)  # valid uuid

        # again, by explicitly passing urls. Since we are on localhost, the
        # local path should work:
        cpkwargs = dict(
            dataset=source,
            target="local_target",
            sshurl="ssh://localhost",
            target_dir=target_path,
            target_url=target_path,
            target_pushurl="ssh://localhost" + target_path,
            ui=True,
        )
        assert_create_sshwebserver(existing='replace', **cpkwargs)
        if src_is_annex:
            target_description = AnnexRepo(target_path,
                                           create=False).get_description()
            eq_(target_description, target_path)

        eq_(target_path, source.repo.get_remote_url("local_target"))
        eq_("ssh://localhost" + target_path,
            source.repo.get_remote_url("local_target", push=True))

        _test_correct_publish(target_path)

        # now, push should work:
        publish(dataset=source, to="local_target")

        # and we should be able to 'reconfigure'
        def process_digests_mtimes(digests, mtimes):
            # it should have triggered a hook, which would have created log and metadata files
            check_metadata = False
            for part in 'logs', 'metadata':
                metafiles = [
                    k for k in digests
                    if k.startswith(_path_('.git/datalad/%s/' % part))
                ]
                # This is in effect ONLY if we have "compatible" datalad installed on remote
                # end. ATM we don't have easy way to guarantee that AFAIK (yoh),
                # so let's not check/enforce (TODO)
                # assert(len(metafiles) >= 1)  # we might have 2 logs if timestamps do not collide ;)
                # Let's actually do it to some degree
                if part == 'logs':
                    # always should have those:
                    assert (len(metafiles) >= 1)
                    with open(opj(target_path, metafiles[0])) as f:
                        if 'no datalad found' not in f.read():
                            check_metadata = True
                if part == 'metadata':
                    eq_(len(metafiles), bool(check_metadata))
                for f in metafiles:
                    digests.pop(f)
                    mtimes.pop(f)
            # and just pop some leftovers from annex
            for f in list(digests):
                if f.startswith('.git/annex/mergedrefs'):
                    digests.pop(f)
                    mtimes.pop(f)

        orig_digests, orig_mtimes = get_mtimes_and_digests(target_path)
        process_digests_mtimes(orig_digests, orig_mtimes)

        import time
        time.sleep(0.1)  # just so that mtimes change
        assert_create_sshwebserver(existing='reconfigure', **cpkwargs)
        digests, mtimes = get_mtimes_and_digests(target_path)
        process_digests_mtimes(digests, mtimes)

        assert_dict_equal(orig_digests,
                          digests)  # nothing should change in terms of content

        # but some files should have been modified
        modified_files = {
            k
            for k in mtimes if orig_mtimes.get(k, 0) != mtimes.get(k, 0)
        }
        # collect which files were expected to be modified without incurring any changes
        ok_modified_files = {
            _path_('.git/hooks/post-update'),
            'index.html',
            # files which hook would manage to generate
            _path_('.git/info/refs'),
            '.git/objects/info/packs'
        }
        if external_versions['cmd:system-git'] >= '2.4':
            # on elderly git we don't change receive setting
            ok_modified_files.add(_path_('.git/config'))
        ok_modified_files.update(
            {f
             for f in digests if f.startswith(_path_('.git/datalad/web'))})
        assert_set_equal(modified_files, ok_modified_files)
示例#15
0
def test_publish_recursive(pristine_origin, origin_path, src_path, dst_path, sub1_pub, sub2_pub):

    # we will be publishing back to origin, so to not alter testrepo
    # we will first clone it
    origin = install(origin_path, source=pristine_origin, recursive=True)
    # prepare src
    source = install(src_path, source=origin_path, recursive=True)

    # create plain git at target:
    target = GitRepo(dst_path, create=True)
    target.checkout("TMP", ["-b"])
    source.repo.add_remote("target", dst_path)

    # subdatasets have no remote yet, so recursive publishing should fail:
    with assert_raises(ValueError) as cm:
        publish(dataset=source, to="target", recursive=True)
    assert_in("Unknown target sibling 'target'", exc_str(cm.exception))

    # now, set up targets for the submodules:
    sub1_target = GitRepo(sub1_pub, create=True)
    sub1_target.checkout("TMP", ["-b"])
    sub2_target = AnnexRepo(sub2_pub, create=True)
    # we will be testing presence of the file content, so let's make it progress
    sub2_target.config.set('receive.denyCurrentBranch', 'updateInstead', where='local')
    sub1 = GitRepo(opj(src_path, 'subm 1'), create=False)
    sub2 = GitRepo(opj(src_path, 'subm 2'), create=False)
    sub1.add_remote("target", sub1_pub)
    sub2.add_remote("target", sub2_pub)

    # publish recursively
    with swallow_logs(new_level=logging.DEBUG) as cml:
        res = publish(dataset=source, to="target", recursive=True)
        assert_not_in(
            'forced update', cml.out,
            "we probably haven't merged git-annex before pushing"
        )

    # testing result list
    # (Note: Dataset lacks __eq__ for now. Should this be based on path only?)
    assert_is_instance(res, tuple)
    assert_is_instance(res[0], list)
    assert_is_instance(res[1], list)
    eq_(res[1], [])  # nothing failed/was skipped
    for item in res[0]:
        assert_is_instance(item, Dataset)
    eq_({res[0][0].path, res[0][1].path, res[0][2].path},
        {src_path, sub1.path, sub2.path})

    eq_(list(target.get_branch_commits("master")),
        list(source.repo.get_branch_commits("master")))
    eq_(list(target.get_branch_commits("git-annex")),
        list(source.repo.get_branch_commits("git-annex")))
    eq_(list(sub1_target.get_branch_commits("master")),
        list(sub1.get_branch_commits("master")))
    eq_(list(sub1_target.get_branch_commits("git-annex")),
        list(sub1.get_branch_commits("git-annex")))
    eq_(list(sub2_target.get_branch_commits("master")),
        list(sub2.get_branch_commits("master")))
    eq_(list(sub2_target.get_branch_commits("git-annex")),
        list(sub2.get_branch_commits("git-annex")))

    # we are tracking origin but origin has different git-annex, since we
    # cloned from it, so it is not aware of our git-annex
    neq_(list(origin.repo.get_branch_commits("git-annex")),
         list(source.repo.get_branch_commits("git-annex")))
    # So if we first publish to it recursively, we would update
    # all sub-datasets since git-annex branch would need to be pushed
    res_ = publish(dataset=source, recursive=True)
    eq_(set(r.path for r in res_[0]),
        set(opj(*([source.path] + x)) for x in ([], ['subm 1'], ['subm 2'])))
    # and now should carry the same state for git-annex
    eq_(list(origin.repo.get_branch_commits("git-annex")),
        list(source.repo.get_branch_commits("git-annex")))

    # test for publishing with  --since.  By default since no changes, nothing pushed
    res_ = publish(dataset=source, recursive=True)
    eq_(set(r.path for r in res_[0]), set())

    # still nothing gets pushed, because origin is up to date
    res_ = publish(dataset=source, recursive=True, since='HEAD^')
    eq_(set(r.path for r in res_[0]), set([]))

    # and we should not fail if we run it from within the dataset
    with chpwd(source.path):
        res_ = publish(recursive=True, since='HEAD^')
        eq_(set(r.path for r in res_[0]), set([]))

    # Let's now update one subm
    with open(opj(sub2.path, "file.txt"), 'w') as f:
        f.write('')
    # add to subdataset, does not alter super dataset!
    # MIH: use `to_git` because original test author used
    # and explicit `GitRepo.add` -- keeping this for now
    Dataset(sub2.path).add('file.txt', to_git=True)

    # Let's now update one subm
    create_tree(sub2.path, {'file.dat': 'content'})
    # add to subdataset, without reflecting the change in its super(s)
    Dataset(sub2.path).add('file.dat')

    # note: will publish to origin here since that is what it tracks
    res_published, res_skipped = publish(dataset=source, recursive=True)
    # only updates published, i.e. just the subdataset, super wasn't altered
    # nothing copied!
    eq_(res_published, [Dataset(sub2.path)])
    eq_(res_skipped, [])

    # since published to origin -- destination should not get that file
    nok_(lexists(opj(sub2_target.path, 'file.dat')))
    res_published, res_skipped = publish(dataset=source, to='target', recursive=True)
    eq_(res_published, [Dataset(sub2.path)])
    # Note: with updateInstead only in target2 and not saving change in
    # super-dataset we would have made remote dataset, if we had entire
    # hierarchy, to be somewhat inconsistent.
    # But here, since target datasets are independent -- it is ok

    # and the file itself was not transferred but now exists
    ok_(lexists(opj(sub2_target.path, 'file.dat')))
    nok_(sub2_target.file_has_content('file.dat'))

    # but now we can redo publish recursively, at least stating to consider
    # explicitly to copy .
    res_published, res_skipped = publish(
        '.',
        dataset=source, to='target',
        recursive=True
    )
    ok_(sub2_target.file_has_content('file.dat'))
    eq_(res_published, ['file.dat'])  # note that this report makes little sense without path to the repository
示例#16
0
def test_publish_simple(origin, src_path, dst_path):

    # prepare src
    source = install(src_path, source=origin, recursive=True)
    # forget we cloned it (provide no 'origin' anymore), which should lead to
    # setting tracking branch to target:
    source.repo.remove_remote("origin")

    # create plain git at target:
    target = GitRepo(dst_path, create=True)
    target.checkout("TMP", ["-b"])
    source.repo.add_remote("target", dst_path)

    res = publish(dataset=source, to="target", result_xfm='datasets')
    eq_(res, [source])

    assert_repo_status(source.repo, annex=None)
    assert_repo_status(target, annex=None)
    eq_(list(target.get_branch_commits_("master")),
        list(source.repo.get_branch_commits_("master")))

    # don't fail when doing it again
    res = publish(dataset=source, to="target")
    # and nothing is pushed
    assert_result_count(res, 1, status='notneeded')

    assert_repo_status(source.repo, annex=None)
    assert_repo_status(target, annex=None)
    eq_(list(target.get_branch_commits_("master")),
        list(source.repo.get_branch_commits_("master")))
    eq_(list(target.get_branch_commits_("git-annex")),
        list(source.repo.get_branch_commits_("git-annex")))

    # 'target/master' should be tracking branch at this point, so
    # try publishing without `to`:
    # MIH: Nope, we don't automatically add this anymore

    # some modification:
    with open(opj(src_path, 'test_mod_file'), "w") as f:
        f.write("Some additional stuff.")
    source.save(opj(src_path, 'test_mod_file'),
                to_git=True,
                message="Modified.")
    assert_repo_status(source.repo, annex=None)

    res = publish(dataset=source, to='target', result_xfm='datasets')
    eq_(res, [source])

    assert_repo_status(dst_path, annex=None)
    eq_(list(target.get_branch_commits_("master")),
        list(source.repo.get_branch_commits_("master")))
    # Since git-annex 6.20170220, post-receive hook gets triggered
    # which results in entry being added for that repo into uuid.log on remote
    # end since then finally git-annex senses that it needs to init that remote,
    # so it might have 1 more commit than local.
    # see https://github.com/datalad/datalad/issues/1319
    ok_(
        set(source.repo.get_branch_commits_("git-annex")).issubset(
            set(target.get_branch_commits_("git-annex"))))

    eq_(filter_fsck_error_msg(source.repo.fsck()),
        filter_fsck_error_msg(source.repo.fsck(remote='target')))
示例#17
0
def test_publish_with_data(origin, src_path, dst_path, sub1_pub, sub2_pub, dst_clone_path):

    # prepare src
    source = install(src_path, source=origin, recursive=True)
    source.repo.get('test-annex.dat')

    # create plain git at target:
    target = AnnexRepo(dst_path, create=True)
    target.checkout("TMP", ["-b"])
    source.repo.add_remote("target", dst_path)

    # now, set up targets for the submodules:
    # the need to be annexes, because we want to be able to copy data to them
    # further down
    sub1_target = AnnexRepo(sub1_pub, create=True)
    sub1_target.checkout("TMP", ["-b"])
    sub2_target = AnnexRepo(sub2_pub, create=True)
    sub2_target.checkout("TMP", ["-b"])
    sub1 = GitRepo(opj(src_path, 'subm 1'), create=False)
    sub2 = GitRepo(opj(src_path, '2'), create=False)
    sub1.add_remote("target", sub1_pub)
    sub2.add_remote("target", sub2_pub)

    res = publish(dataset=source, to="target", path=['test-annex.dat'], result_xfm='paths')
    # first it would publish data and then push
    # TODO order is not fixed (yet)
    #eq_(res, [opj(source.path, 'test-annex.dat'), source.path])
    eq_(set(res), set([opj(source.path, 'test-annex.dat'), source.path]))
    # XXX master was not checked out in dst!

    eq_(list(target.get_branch_commits("master")),
        list(source.repo.get_branch_commits("master")))
    # TODO: last commit in git-annex branch differs. Probably fine,
    # but figure out, when exactly to expect this for proper testing:
    # yoh: they differ because local annex records information about now
    # file being available in that remote, and remote one does it via a call in
    # the hook I guess.  So they both get the same information but in two
    # different commits.  I do not observe such behavior of remote having git-annex
    # automagically updated in older clones
    # which do not have post-receive hook on remote side
    eq_(list(target.get_branch_commits("git-annex"))[1:],
        list(source.repo.get_branch_commits("git-annex"))[1:])

    # we need compare target/master:
    target.checkout("master")
    ok_(target.file_has_content('test-annex.dat'))

    # make sure that whatever we published is actually consumable
    dst_clone = install(
        dst_clone_path, source=dst_path,
        result_xfm='datasets', return_type='item-or-list')
    nok_(dst_clone.repo.file_has_content('test-annex.dat'))
    res = dst_clone.get('test-annex.dat')
    ok_(dst_clone.repo.file_has_content('test-annex.dat'))

    res = publish(dataset=source, to="target", path=['.'])
    # there is nothing to publish on 2nd attempt
    #eq_(res, ([source, 'test-annex.dat'], []))
    assert_result_count(res, 1, status='notneeded')

    import glob
    res = publish(dataset=source, to="target", path=glob.glob1(source.path, '*'))
    # Note: This leads to recursive publishing, since expansion of '*'
    #       contains the submodules themselves in this setup

    # only the subdatasets, targets are plain git repos, hence
    # no file content is pushed, all content in super was pushed
    # before
    assert_result_count(res, 3)
    assert_result_count(res, 1, status='ok', path=sub1.path)
    assert_result_count(res, 1, status='ok', path=sub2.path)
    assert_result_count(res, 1, status='notneeded', path=source.path)

    # if we publish again -- nothing to be published
    res = source.publish(to="target")
    assert_result_count(res, 1, status='notneeded', path=source.path)
    # if we drop a file and publish again -- dataset should be published
    # since git-annex branch was updated
    source.drop('test-annex.dat')
    res = source.publish(to="target")
    assert_result_count(res, 1, status='ok', path=source.path)
    # and empty again if we try again
    res = source.publish(to="target")
    assert_result_count(res, 1, status='notneeded', path=source.path)
示例#18
0
def test_publish_recursive(pristine_origin, origin_path, src_path, dst_path,
                           sub1_pub, sub2_pub):

    # we will be publishing back to origin, so to not alter testrepo
    # we will first clone it
    origin = install(origin_path, source=pristine_origin, recursive=True)
    # prepare src
    source = install(src_path, source=origin.path, recursive=True)
    # we will be trying to push into this later on, need to give permissions...
    origin_sub2 = Dataset(opj(origin_path, '2'))
    origin_sub2.config.set('receive.denyCurrentBranch',
                           'updateInstead',
                           where='local')
    ## TODO this manual fixup is needed due to gh-1548 -- needs proper solution
    #os.remove(opj(origin_sub2.path, '.git'))
    #os.rename(opj(origin_path, '.git', 'modules', '2'), opj(origin_sub2.path, '.git'))

    # create plain git at target:
    target = GitRepo(dst_path, create=True)
    target.checkout("TMP", ["-b"])
    source.repo.add_remote("target", dst_path)

    # subdatasets have no remote yet, so recursive publishing should fail:
    res = publish(dataset=source,
                  to="target",
                  recursive=True,
                  on_failure='ignore')
    assert_result_count(res, 3)
    assert_result_count(res, 1, status='ok', type='dataset', path=source.path)
    assert_result_count(res,
                        2,
                        status='error',
                        message=("Unknown target sibling '%s' for publication",
                                 'target'))

    # now, set up targets for the submodules:
    sub1_target = GitRepo(sub1_pub, create=True)
    sub1_target.checkout("TMP", ["-b"])
    sub2_target = AnnexRepo(sub2_pub, create=True)
    # we will be testing presence of the file content, so let's make it progress
    sub2_target.config.set('receive.denyCurrentBranch',
                           'updateInstead',
                           where='local')
    sub1 = GitRepo(opj(src_path, 'subm 1'), create=False)
    sub2 = GitRepo(opj(src_path, '2'), create=False)
    sub1.add_remote("target", sub1_pub)
    sub2.add_remote("target", sub2_pub)

    # publish recursively
    with swallow_logs(new_level=logging.DEBUG) as cml:
        res = publish(dataset=source, to="target", recursive=True)
        assert_not_in('forced update', cml.out,
                      "we probably haven't merged git-annex before pushing")

    # testing result list
    # base dataset was already published above, notneeded again
    assert_status(('ok', 'notneeded'), res)  # nothing failed
    assert_result_count(res, 3, type='dataset')
    eq_({r['path'] for r in res}, {src_path, sub1.path, sub2.path})

    eq_(list(target.get_branch_commits("master")),
        list(source.repo.get_branch_commits("master")))
    eq_(list(target.get_branch_commits("git-annex")),
        list(source.repo.get_branch_commits("git-annex")))
    eq_(list(sub1_target.get_branch_commits("master")),
        list(sub1.get_branch_commits("master")))
    eq_(list(sub1_target.get_branch_commits("git-annex")),
        list(sub1.get_branch_commits("git-annex")))
    eq_(list(sub2_target.get_branch_commits("master")),
        list(sub2.get_branch_commits("master")))
    eq_(list(sub2_target.get_branch_commits("git-annex")),
        list(sub2.get_branch_commits("git-annex")))

    # we are tracking origin but origin has different git-annex, since we
    # cloned from it, so it is not aware of our git-annex
    neq_(list(origin.repo.get_branch_commits("git-annex")),
         list(source.repo.get_branch_commits("git-annex")))
    # So if we first publish to it recursively, we would update
    # all sub-datasets since git-annex branch would need to be pushed
    res_ = publish(dataset=source, recursive=True)
    assert_result_count(res_, 1, status='ok', path=source.path)
    assert_result_count(res_, 1, status='ok', path=sub1.path)
    assert_result_count(res_, 1, status='ok', path=sub2.path)
    # and now should carry the same state for git-annex
    eq_(list(origin.repo.get_branch_commits("git-annex")),
        list(source.repo.get_branch_commits("git-annex")))

    # test for publishing with  --since.  By default since no changes, nothing pushed
    res_ = publish(dataset=source, recursive=True)
    assert_result_count(res_, 3, status='notneeded', type='dataset')

    # still nothing gets pushed, because origin is up to date
    res_ = publish(dataset=source, recursive=True, since='HEAD^')
    assert_result_count(res_, 3, status='notneeded', type='dataset')

    # and we should not fail if we run it from within the dataset
    with chpwd(source.path):
        res_ = publish(recursive=True, since='HEAD^')
        assert_result_count(res_, 3, status='notneeded', type='dataset')

    # Let's now update one subm
    with open(opj(sub2.path, "file.txt"), 'w') as f:
        f.write('')
    # add to subdataset, does not alter super dataset!
    # MIH: use `to_git` because original test author used
    # and explicit `GitRepo.add` -- keeping this for now
    Dataset(sub2.path).add('file.txt', to_git=True)

    # Let's now update one subm
    create_tree(sub2.path, {'file.dat': 'content'})
    # add to subdataset, without reflecting the change in its super(s)
    Dataset(sub2.path).add('file.dat')

    # note: will publish to origin here since that is what it tracks
    res_ = publish(dataset=source, recursive=True, on_failure='ignore')
    ## only updates published, i.e. just the subdataset, super wasn't altered
    ## nothing copied!
    assert_status(('ok', 'notneeded'), res_)
    assert_result_count(res_, 1, status='ok', path=sub2.path, type='dataset')
    assert_result_count(res_, 0, path=opj(sub2.path, 'file.dat'), type='file')

    # since published to origin -- destination should not get that file
    nok_(lexists(opj(sub2_target.path, 'file.dat')))
    res_ = publish(dataset=source, to='target', recursive=True)
    assert_status(('ok', 'notneeded'), res_)
    assert_result_count(res_, 1, status='ok', path=sub2.path, type='dataset')
    assert_result_count(res_, 0, path=opj(sub2.path, 'file.dat'), type='file')

    # Note: with updateInstead only in target2 and not saving change in
    # super-dataset we would have made remote dataset, if we had entire
    # hierarchy, to be somewhat inconsistent.
    # But here, since target datasets are independent -- it is ok

    # and the file itself was transferred
    ok_(lexists(opj(sub2_target.path, 'file.dat')))
    nok_(sub2_target.file_has_content('file.dat'))

    ## but now we can redo publish recursively, with explicitly requested data transfer
    res_ = publish(dataset=source,
                   to='target',
                   recursive=True,
                   transfer_data='all')
    ok_(sub2_target.file_has_content('file.dat'))
    assert_result_count(res_, 1, status='ok', path=opj(sub2.path, 'file.dat'))

    # Let's save those present changes and publish while implying "since last
    # merge point"
    source.save(message="Changes in subm2")
    # and test if it could deduce the remote/branch to push to
    source.config.set('branch.master.remote', 'target', where='local')
    with chpwd(source.path):
        res_ = publish(since='', recursive=True)
    # TODO: somehow test that there were no even attempt to diff within "subm 1"
    # since if `--since=''` worked correctly, nothing has changed there and it
    # should have not been even touched
    assert_status(('ok', 'notneeded'), res_)
    assert_result_count(res_, 1, status='ok', path=source.path, type='dataset')
示例#19
0
def test_publish_recursive(origin, src_path, dst_path, sub1_pub, sub2_pub):

    # prepare src
    source = install(src_path, source=origin, recursive=True)[0]

    # create plain git at target:
    target = GitRepo(dst_path, create=True)
    target.checkout("TMP", ["-b"])
    source.repo.add_remote("target", dst_path)

    # subdatasets have no remote yet, so recursive publishing should fail:
    with assert_raises(ValueError) as cm:
        publish(dataset=source, to="target", recursive=True)
    assert_in("No sibling 'target' found", exc_str(cm.exception))

    # now, set up targets for the submodules:
    sub1_target = GitRepo(sub1_pub, create=True)
    sub1_target.checkout("TMP", ["-b"])
    sub2_target = AnnexRepo(sub2_pub, create=True)
    sub2_target.checkout("TMP", ["-b"])
    sub1 = GitRepo(opj(src_path, 'subm 1'), create=False)
    sub2 = GitRepo(opj(src_path, 'subm 2'), create=False)
    sub1.add_remote("target", sub1_pub)
    sub2.add_remote("target", sub2_pub)

    # publish recursively
    with swallow_logs(new_level=logging.DEBUG) as cml:
        res = publish(dataset=source, to="target", recursive=True)
        assert_not_in(
            'forced update', cml.out,
            "we probably haven't merged git-annex before pushing"
        )

    # testing result list
    # (Note: Dataset lacks __eq__ for now. Should this be based on path only?)
    assert_is_instance(res, tuple)
    assert_is_instance(res[0], list)
    assert_is_instance(res[1], list)
    eq_(res[1], [])  # nothing failed/was skipped
    for item in res[0]:
        assert_is_instance(item, Dataset)
    eq_({res[0][0].path, res[0][1].path, res[0][2].path},
        {src_path, sub1.path, sub2.path})

    eq_(list(target.get_branch_commits("master")),
        list(source.repo.get_branch_commits("master")))
    eq_(list(target.get_branch_commits("git-annex")),
        list(source.repo.get_branch_commits("git-annex")))
    eq_(list(sub1_target.get_branch_commits("master")),
        list(sub1.get_branch_commits("master")))
    eq_(list(sub1_target.get_branch_commits("git-annex")),
        list(sub1.get_branch_commits("git-annex")))
    eq_(list(sub2_target.get_branch_commits("master")),
        list(sub2.get_branch_commits("master")))
    eq_(list(sub2_target.get_branch_commits("git-annex")),
        list(sub2.get_branch_commits("git-annex")))

    # test for publishing with  --since.  By default since no changes, only current pushed
    res_ = publish(dataset=source, recursive=True)
    # only current one would get pushed
    eq_(set(r.path for r in res_[0]), {src_path})

    # all get pushed
    res_ = publish(dataset=source, recursive=True, since='HEAD^')
    eq_(set(r.path for r in res_[0]), {src_path, sub1.path, sub2.path})

    # Let's now update one subm
    with open(opj(sub2.path, "file.txt"), 'w') as f:
        f.write('')
    sub2.add('file.txt')
    sub2.commit("")
    # TODO: Doesn't work:  https://github.com/datalad/datalad/issues/636
    #source.save("changed sub2", all_changes=True)
    source.repo.commit("", options=['-a'])

    res_ = publish(dataset=source, recursive=True)
    # only updated ones were published
    eq_(set(r.path for r in res_[0]), {src_path, sub2.path})
示例#20
0
def test_publish_plain_git(origin, src_path, dst_path):
    # TODO: Since it's mostly the same, melt with test_publish_simple

    # prepare src
    source = install(src_path, source=origin, recursive=True)
    # forget we cloned it (provide no 'origin' anymore), which should lead to
    # setting tracking branch to target:
    source.repo.remove_remote("origin")

    # create plain git at target:
    target = GitRepo(dst_path, create=True)
    target.checkout("TMP", ["-b"])
    source.repo.add_remote("target", dst_path)

    res = publish(dataset=source, to="target", result_xfm='datasets')
    eq_(res, [source])

    ok_clean_git(source.repo, annex=None)
    ok_clean_git(target, annex=None)
    eq_(list(target.get_branch_commits("master")),
        list(source.repo.get_branch_commits("master")))

    # don't fail when doing it again
    res = publish(dataset=source, to="target")
    # and nothing is pushed
    assert_result_count(res, 1, status='notneeded')

    ok_clean_git(source.repo, annex=None)
    ok_clean_git(target, annex=None)
    eq_(list(target.get_branch_commits("master")),
        list(source.repo.get_branch_commits("master")))

    # some modification:
    with open(opj(src_path, 'test_mod_file'), "w") as f:
        f.write("Some additional stuff.")
    source.add(opj(src_path, 'test_mod_file'),
               to_git=True,
               message="Modified.")
    ok_clean_git(source.repo, annex=None)

    res = publish(dataset=source, to='target', result_xfm='datasets')
    eq_(res, [source])

    ok_clean_git(dst_path, annex=None)
    eq_(list(target.get_branch_commits("master")),
        list(source.repo.get_branch_commits("master")))

    # amend and change commit msg in order to test for force push:
    source.repo.commit("amended", options=['--amend'])
    # push should be rejected (non-fast-forward):
    assert_raises(IncompleteResultsError,
                  publish,
                  dataset=source,
                  to='target',
                  result_xfm='datasets')
    # push with force=True works:
    res = publish(dataset=source,
                  to='target',
                  result_xfm='datasets',
                  force=True)
    eq_(res, [source])
示例#21
0
def test_target_ssh_simple(origin, src_path, target_rootpath):

    # prepare src
    source = install(src_path, source=origin)

    target_path = opj(target_rootpath, "basic")
    # it will try to fetch it so would fail as well since sshurl is wrong
    with swallow_logs(new_level=logging.ERROR) as cml, \
        assert_raises(GitCommandError):
            create_sibling(
                dataset=source,
                target="local_target",
                sshurl="ssh://localhost",
                target_dir=target_path,
                ui=True)
        # is not actually happening on one of the two basic cases -- TODO figure it out
        # assert_in('enableremote local_target failed', cml.out)

    GitRepo(target_path, create=False)  # raises if not a git repo
    assert_in("local_target", source.repo.get_remotes())
    eq_("ssh://localhost", source.repo.get_remote_url("local_target"))
    # should NOT be able to push now, since url isn't correct:
    # TODO:  assumption is wrong if ~ does have .git! fix up!
    assert_raises(GitCommandError, publish, dataset=source, to="local_target")

    # Both must be annex or git repositories
    src_is_annex = AnnexRepo.is_valid_repo(src_path)
    eq_(src_is_annex, AnnexRepo.is_valid_repo(target_path))
    # And target one should be known to have a known UUID within the source if annex
    if src_is_annex:
        annex = AnnexRepo(src_path)
        local_target_cfg = annex.repo.remotes["local_target"].config_reader.get
        # for some reason this was "correct"
        # eq_(local_target_cfg('annex-ignore'), 'false')
        # but after fixing creating siblings in
        # 21f6dd012b2c7b9c0b8b348dcfb3b0ace7e8b2ec it started to fail
        # I think it is legit since we are trying to fetch now before calling
        # annex.enable_remote so it doesn't set it up, and fails before
        assert_raises(Exception, local_target_cfg, 'annex-ignore')
        # hm, but ATM wouldn't get a uuid since url is wrong
        assert_raises(Exception, local_target_cfg, 'annex-uuid')

    # do it again without force:
    with assert_raises(RuntimeError) as cm:
        assert_create_sshwebserver(
            dataset=source,
            target="local_target",
            sshurl="ssh://localhost",
            target_dir=target_path)
    eq_("Target directory %s already exists." % target_path,
        str(cm.exception))
    if src_is_annex:
        target_description = AnnexRepo(target_path, create=False).get_description()
        assert_not_equal(target_description, None)
        assert_not_equal(target_description, target_path)
        ok_endswith(target_description, target_path)
    # now, with force and correct url, which is also used to determine
    # target_dir
    # Note: on windows absolute path is not url conform. But this way it's easy
    # to test, that ssh path is correctly used.
    if not on_windows:
        # add random file under target_path, to explicitly test existing=replace
        open(opj(target_path, 'random'), 'w').write('123')

        assert_create_sshwebserver(
            dataset=source,
            target="local_target",
            sshurl="ssh://localhost" + target_path,
            existing='replace')
        eq_("ssh://localhost" + target_path,
            source.repo.get_remote_url("local_target"))
        ok_(source.repo.get_remote_url("local_target", push=True) is None)

        # ensure target tree actually replaced by source
        assert_false(exists(opj(target_path, 'random')))

        if src_is_annex:
            annex = AnnexRepo(src_path)
            local_target_cfg = annex.repo.remotes["local_target"].config_reader.get
            eq_(local_target_cfg('annex-ignore'), 'false')
            eq_(local_target_cfg('annex-uuid').count('-'), 4)  # valid uuid

        # again, by explicitly passing urls. Since we are on localhost, the
        # local path should work:
        cpkwargs = dict(
            dataset=source,
            target="local_target",
            sshurl="ssh://localhost",
            target_dir=target_path,
            target_url=target_path,
            target_pushurl="ssh://localhost" + target_path,
            ui=True,
        )
        assert_create_sshwebserver(existing='replace', **cpkwargs)
        if src_is_annex:
            target_description = AnnexRepo(target_path,
                                           create=False).get_description()
            eq_(target_description, target_path)

        eq_(target_path,
            source.repo.get_remote_url("local_target"))
        eq_("ssh://localhost" + target_path,
            source.repo.get_remote_url("local_target", push=True))

        _test_correct_publish(target_path)

        # now, push should work:
        publish(dataset=source, to="local_target")

        # and we should be able to 'reconfigure'
        def process_digests_mtimes(digests, mtimes):
            # it should have triggered a hook, which would have created log and metadata files
            check_metadata = False
            for part in 'logs', 'metadata':
                metafiles = [k for k in digests if k.startswith(_path_('.git/datalad/%s/' % part))]
                # This is in effect ONLY if we have "compatible" datalad installed on remote
                # end. ATM we don't have easy way to guarantee that AFAIK (yoh),
                # so let's not check/enforce (TODO)
                # assert(len(metafiles) >= 1)  # we might have 2 logs if timestamps do not collide ;)
                # Let's actually do it to some degree
                if part == 'logs':
                    # always should have those:
                    assert (len(metafiles) >= 1)
                    with open(opj(target_path, metafiles[0])) as f:
                        if 'no datalad found' not in f.read():
                            check_metadata = True
                if part == 'metadata':
                    eq_(len(metafiles), bool(check_metadata))
                for f in metafiles:
                    digests.pop(f)
                    mtimes.pop(f)
            # and just pop some leftovers from annex
            for f in list(digests):
                if f.startswith('.git/annex/mergedrefs'):
                    digests.pop(f)
                    mtimes.pop(f)

        orig_digests, orig_mtimes = get_mtimes_and_digests(target_path)
        process_digests_mtimes(orig_digests, orig_mtimes)

        import time; time.sleep(0.1)  # just so that mtimes change
        assert_create_sshwebserver(existing='reconfigure', **cpkwargs)
        digests, mtimes = get_mtimes_and_digests(target_path)
        process_digests_mtimes(digests, mtimes)

        assert_dict_equal(orig_digests, digests)  # nothing should change in terms of content

        # but some files should have been modified
        modified_files = {k for k in mtimes if orig_mtimes.get(k, 0) != mtimes.get(k, 0)}
        # collect which files were expected to be modified without incurring any changes
        ok_modified_files = {
            _path_('.git/hooks/post-update'), 'index.html',
            # files which hook would manage to generate
            _path_('.git/info/refs'), '.git/objects/info/packs'
        }
        if external_versions['cmd:system-git'] >= '2.4':
            # on elderly git we don't change receive setting
            ok_modified_files.add(_path_('.git/config'))
        ok_modified_files.update({f for f in digests if f.startswith(_path_('.git/datalad/web'))})
        assert_set_equal(modified_files, ok_modified_files)
示例#22
0
def test_publish_with_data(origin, src_path, dst_path, sub1_pub, sub2_pub,
                           dst_clone_path):

    # prepare src
    source = install(src_path, source=origin, recursive=True)
    source.repo.get('test-annex.dat')

    # create plain git at target:
    target = AnnexRepo(dst_path, create=True)
    target.checkout("TMP", ["-b"])
    source.repo.add_remote("target", dst_path)

    # now, set up targets for the submodules:
    # the need to be annexes, because we want to be able to copy data to them
    # further down
    sub1_target = AnnexRepo(sub1_pub, create=True)
    sub1_target.checkout("TMP", ["-b"])
    sub2_target = AnnexRepo(sub2_pub, create=True)
    sub2_target.checkout("TMP", ["-b"])
    sub1 = GitRepo(opj(src_path, 'subm 1'), create=False)
    sub2 = GitRepo(opj(src_path, '2'), create=False)
    sub1.add_remote("target", sub1_pub)
    sub2.add_remote("target", sub2_pub)

    res = publish(dataset=source,
                  to="target",
                  path=['test-annex.dat'],
                  result_xfm='paths')
    # first it would publish data and then push
    # TODO order is not fixed (yet)
    #eq_(res, [opj(source.path, 'test-annex.dat'), source.path])
    eq_(set(res), set([opj(source.path, 'test-annex.dat'), source.path]))
    # XXX master was not checked out in dst!

    eq_(list(target.get_branch_commits("master")),
        list(source.repo.get_branch_commits("master")))
    # TODO: last commit in git-annex branch differs. Probably fine,
    # but figure out, when exactly to expect this for proper testing:
    # yoh: they differ because local annex records information about now
    # file being available in that remote, and remote one does it via a call in
    # the hook I guess.  So they both get the same information but in two
    # different commits.  I do not observe such behavior of remote having git-annex
    # automagically updated in older clones
    # which do not have post-receive hook on remote side
    eq_(
        list(target.get_branch_commits("git-annex"))[1:],
        list(source.repo.get_branch_commits("git-annex"))[1:])

    # we need compare target/master:
    target.checkout("master")
    ok_(target.file_has_content('test-annex.dat'))

    # make sure that whatever we published is actually consumable
    dst_clone = install(dst_clone_path,
                        source=dst_path,
                        result_xfm='datasets',
                        return_type='item-or-list')
    nok_(dst_clone.repo.file_has_content('test-annex.dat'))
    res = dst_clone.get('test-annex.dat')
    ok_(dst_clone.repo.file_has_content('test-annex.dat'))

    res = publish(dataset=source, to="target", path=['.'])
    # there is nothing to publish on 2nd attempt
    #eq_(res, ([source, 'test-annex.dat'], []))
    assert_result_count(res, 1, status='notneeded')

    import glob
    res = publish(dataset=source,
                  to="target",
                  path=glob.glob1(source.path, '*'))
    # Note: This leads to recursive publishing, since expansion of '*'
    #       contains the submodules themselves in this setup

    # only the subdatasets, targets are plain git repos, hence
    # no file content is pushed, all content in super was pushed
    # before
    assert_result_count(res, 3)
    assert_result_count(res, 1, status='ok', path=sub1.path)
    assert_result_count(res, 1, status='ok', path=sub2.path)
    assert_result_count(res, 1, status='notneeded', path=source.path)

    # if we publish again -- nothing to be published
    res = source.publish(to="target")
    assert_result_count(res, 1, status='notneeded', path=source.path)
    # if we drop a file and publish again -- dataset should be published
    # since git-annex branch was updated
    source.drop('test-annex.dat')
    res = source.publish(to="target")
    assert_result_count(res, 1, status='ok', path=source.path)
    # and empty again if we try again
    res = source.publish(to="target")
    assert_result_count(res, 1, status='notneeded', path=source.path)
示例#23
0
def test_target_ssh_simple(origin, src_path, target_rootpath):

    # prepare src
    source = install(
        src_path, source=origin,
        result_xfm='datasets', return_type='item-or-list')

    target_path = opj(target_rootpath, "basic")
    with swallow_logs(new_level=logging.ERROR) as cml:
        create_sibling(
            dataset=source,
            name="local_target",
            sshurl="ssh://localhost",
            target_dir=target_path,
            ui=True)
        assert_not_in('enableremote local_target failed', cml.out)

    GitRepo(target_path, create=False)  # raises if not a git repo
    assert_in("local_target", source.repo.get_remotes())
    # Both must be annex or git repositories
    src_is_annex = AnnexRepo.is_valid_repo(src_path)
    eq_(src_is_annex, AnnexRepo.is_valid_repo(target_path))
    # And target one should be known to have a known UUID within the source if annex
    if src_is_annex:
        annex = AnnexRepo(src_path)
        local_target_cfg = annex.repo.remotes["local_target"].config_reader.get
        # basic config in place
        eq_(local_target_cfg('annex-ignore'), 'false')
        ok_(local_target_cfg('annex-uuid'))

    # do it again without force, but use a different name to avoid initial checks
    # for existing remotes:
    with assert_raises(RuntimeError) as cm:
        assert_create_sshwebserver(
            dataset=source,
            name="local_target_alt",
            sshurl="ssh://localhost",
            target_dir=target_path)
    ok_(text_type(cm.exception).startswith(
        "Target path %s already exists. And it fails to rmdir" % target_path))
    if src_is_annex:
        target_description = AnnexRepo(target_path, create=False).get_description()
        assert_not_equal(target_description, None)
        assert_not_equal(target_description, target_path)
        # on yoh's laptop TMPDIR is under HOME, so things start to become
        # tricky since then target_path is shortened and we would need to know
        # remote $HOME.  To not over-complicate and still test, test only for
        # the basename of the target_path
        ok_endswith(target_description, basename(target_path))
    # now, with force and correct url, which is also used to determine
    # target_dir
    # Note: on windows absolute path is not url conform. But this way it's easy
    # to test, that ssh path is correctly used.
    if not on_windows:
        # add random file under target_path, to explicitly test existing=replace
        open(opj(target_path, 'random'), 'w').write('123')

        assert_create_sshwebserver(
            dataset=source,
            name="local_target",
            sshurl="ssh://localhost" + target_path,
            publish_by_default='master',
            existing='replace')
        eq_("ssh://localhost" + urlquote(target_path),
            source.repo.get_remote_url("local_target"))
        ok_(source.repo.get_remote_url("local_target", push=True) is None)

        # ensure target tree actually replaced by source
        assert_false(exists(opj(target_path, 'random')))

        if src_is_annex:
            annex = AnnexRepo(src_path)
            local_target_cfg = annex.repo.remotes["local_target"].config_reader.get
            eq_(local_target_cfg('annex-ignore'), 'false')
            eq_(local_target_cfg('annex-uuid').count('-'), 4)  # valid uuid
            # should be added too, even if URL matches prior state
            eq_(local_target_cfg('push'), 'master')

        # again, by explicitly passing urls. Since we are on localhost, the
        # local path should work:
        cpkwargs = dict(
            dataset=source,
            name="local_target",
            sshurl="ssh://localhost",
            target_dir=target_path,
            target_url=target_path,
            target_pushurl="ssh://localhost" + target_path,
            ui=True,
        )
        assert_create_sshwebserver(existing='replace', **cpkwargs)
        if src_is_annex:
            target_description = AnnexRepo(target_path,
                                           create=False).get_description()
            eq_(target_description, target_path)

        eq_(target_path,
            source.repo.get_remote_url("local_target"))
        eq_("ssh://localhost" + target_path,
            source.repo.get_remote_url("local_target", push=True))

        _test_correct_publish(target_path)

        # now, push should work:
        publish(dataset=source, to="local_target")

        # and we should be able to 'reconfigure'
        def process_digests_mtimes(digests, mtimes):
            # it should have triggered a hook, which would have created log and metadata files
            check_metadata = False
            for part in 'logs', 'metadata':
                metafiles = [k for k in digests if k.startswith(_path_('.git/datalad/%s/' % part))]
                # This is in effect ONLY if we have "compatible" datalad installed on remote
                # end. ATM we don't have easy way to guarantee that AFAIK (yoh),
                # so let's not check/enforce (TODO)
                # assert(len(metafiles) >= 1)  # we might have 2 logs if timestamps do not collide ;)
                # Let's actually do it to some degree
                if part == 'logs':
                    # always should have those:
                    assert (len(metafiles) >= 1)
                    with open(opj(target_path, metafiles[0])) as f:
                        if 'no datalad found' not in f.read():
                            check_metadata = True
                if part == 'metadata':
                    eq_(len(metafiles), bool(check_metadata))
                for f in metafiles:
                    digests.pop(f)
                    mtimes.pop(f)
            # and just pop some leftovers from annex
            for f in list(digests):
                if f.startswith('.git/annex/mergedrefs'):
                    digests.pop(f)
                    mtimes.pop(f)

        orig_digests, orig_mtimes = get_mtimes_and_digests(target_path)
        process_digests_mtimes(orig_digests, orig_mtimes)

        import time
        time.sleep(0.1)  # just so that mtimes change
        assert_create_sshwebserver(existing='reconfigure', **cpkwargs)
        digests, mtimes = get_mtimes_and_digests(target_path)
        process_digests_mtimes(digests, mtimes)

        assert_dict_equal(orig_digests, digests)  # nothing should change in terms of content

        # but some files should have been modified
        modified_files = {k for k in mtimes if orig_mtimes.get(k, 0) != mtimes.get(k, 0)}
        # collect which files were expected to be modified without incurring any changes
        ok_modified_files = {
            _path_('.git/hooks/post-update'), 'index.html',
            # files which hook would manage to generate
            _path_('.git/info/refs'), '.git/objects/info/packs'
        }
        # on elderly git we don't change receive setting
        ok_modified_files.add(_path_('.git/config'))
        ok_modified_files.update({f for f in digests if f.startswith(_path_('.git/datalad/web'))})
        # it seems that with some recent git behavior has changed a bit
        # and index might get touched
        if _path_('.git/index') in modified_files:
            ok_modified_files.add(_path_('.git/index'))
        assert_set_equal(modified_files, ok_modified_files)
示例#24
0
def test_publish_with_data(origin, src_path, dst_path, sub1_pub, sub2_pub, dst_clone_path):

    # prepare src
    source = install(src_path, source=origin, recursive=True)
    source.repo.get('test-annex.dat')

    # create plain git at target:
    target = AnnexRepo(dst_path, create=True)
    target.checkout("TMP", ["-b"])
    source.repo.add_remote("target", dst_path)

    # now, set up targets for the submodules:
    sub1_target = GitRepo(sub1_pub, create=True)
    sub1_target.checkout("TMP", ["-b"])
    sub2_target = GitRepo(sub2_pub, create=True)
    sub2_target.checkout("TMP", ["-b"])
    sub1 = GitRepo(opj(src_path, 'subm 1'), create=False)
    sub2 = GitRepo(opj(src_path, 'subm 2'), create=False)
    sub1.add_remote("target", sub1_pub)
    sub2.add_remote("target", sub2_pub)

    # TMP: Insert the fetch to prevent GitPython to fail after the push,
    # because it cannot resolve the SHA of the old commit of the remote,
    # that git reports back after the push.
    # TODO: Figure out, when to fetch things in general; Alternatively:
    # Is there an option for push, that prevents GitPython from failing?
    source.repo.fetch("target")
    res = publish(dataset=source, to="target", path=['test-annex.dat'])
    # first it would publish data and then push
    eq_(res, (['test-annex.dat', source], []))
    # XXX master was not checked out in dst!

    eq_(list(target.get_branch_commits("master")),
        list(source.repo.get_branch_commits("master")))
    # TODO: last commit in git-annex branch differs. Probably fine,
    # but figure out, when exactly to expect this for proper testing:
    # yoh: they differ because local annex records information about now
    # file being available in that remote, and remote one does it via a call in
    # the hook I guess.  So they both get the same information but in two
    # different commits.  I do not observe such behavior of remote having git-annex
    # automagically updated in older clones
    # which do not have post-receive hook on remote side
    eq_(list(target.get_branch_commits("git-annex"))[1:],
        list(source.repo.get_branch_commits("git-annex"))[1:])

    # we need compare target/master:
    target.checkout("master")
    ok_(target.file_has_content('test-annex.dat'))

    # make sure that whatever we published is actually consumable
    dst_clone = install(
        dst_clone_path, source=dst_path,
        result_xfm='datasets', return_type='item-or-list')
    nok_(dst_clone.repo.file_has_content('test-annex.dat'))
    res = dst_clone.get('test-annex.dat')
    ok_(dst_clone.repo.file_has_content('test-annex.dat'))

    source.repo.fetch("target")
    res = publish(dataset=source, to="target", path=['.'])
    # there is nothing to publish on 2nd attempt
    #eq_(res, ([source, 'test-annex.dat'], []))
    eq_(res, ([], []))

    source.repo.fetch("target")
    import glob
    res = publish(dataset=source, to="target", path=glob.glob1(source.path, '*'))
    # Note: This leads to recursive publishing, since expansion of '*'
    #       contains the submodules themselves in this setup

    # collect result paths:
    result_paths = []
    for item in res[0]:
        result_paths.append(item.path if isinstance(item, Dataset) else item)

    # only the subdatasets, targets are plain git repos, hence
    # no file content is pushed, all content in super was pushed
    # before
    eq_({sub1.path, sub2.path},
        set(result_paths))

    # if we publish again -- nothing to be published
    eq_(source.publish(to="target"), ([], []))
    # if we drop a file and publish again -- dataset should be published
    # since git-annex branch was updated
    source.drop('test-annex.dat')
    eq_(source.publish(to="target"), ([source], []))
    eq_(source.publish(to="target"), ([], []))  # and empty again if we try again
示例#25
0
def test_target_ssh_recursive(origin, src_path, target_path):

    # prepare src
    source = install(src_path, source=origin, recursive=True)

    sub1 = Dataset(opj(src_path, "subm 1"))
    sub2 = Dataset(opj(src_path, "2"))

    for flat in False, True:
        target_path_ = target_dir_tpl = target_path + "-" + str(flat)

        if flat:
            target_dir_tpl += "/prefix%RELNAME"
            sep = '-'
        else:
            sep = os.path.sep

        remote_name = 'remote-' + str(flat)
        with chpwd(source.path):
            assert_create_sshwebserver(
                name=remote_name,
                sshurl="ssh://localhost" + target_path_,
                target_dir=target_dir_tpl,
                recursive=True,
                ui=True)

        # raise if git repos were not created
        for suffix in [sep + 'subm 1', sep + '2', '']:
            target_dir = opj(target_path_, 'prefix' if flat else "").rstrip(os.path.sep) + suffix
            # raise if git repos were not created
            GitRepo(target_dir, create=False)

            _test_correct_publish(target_dir, rootds=not suffix, flat=flat)

        for repo in [source.repo, sub1.repo, sub2.repo]:
            assert_not_in("local_target", repo.get_remotes())

        # now, push should work:
        publish(dataset=source, to=remote_name)

        # verify that we can create-sibling which was created later and possibly
        # first published in super-dataset as an empty directory
        sub3_name = 'subm 3-%s' % flat
        sub3 = source.create(sub3_name)
        # since is an empty value to force it to consider all changes since we published
        # already
        with chpwd(source.path):
            # as we discussed in gh-1495 we use the last-published state of the base
            # dataset as the indicator for modification detection with since=''
            # hence we must not publish the base dataset on its own without recursion,
            # if we want to have this mechanism do its job
            #publish(to=remote_name)  # no recursion
            assert_create_sshwebserver(
                name=remote_name,
                sshurl="ssh://localhost" + target_path_,
                target_dir=target_dir_tpl,
                recursive=True,
                existing='skip',
                ui=True,
                since=''
            )
        # so it was created on remote correctly and wasn't just skipped
        assert(Dataset(_path_(target_path_, ('prefix-' if flat else '') + sub3_name)).is_installed())
        publish(dataset=source, to=remote_name, recursive=True, since='') # just a smoke test
示例#26
0
def test_target_ssh_simple(origin, src_path, target_rootpath):

    # prepare src
    source = install(src_path,
                     source=origin,
                     result_xfm='datasets',
                     return_type='item-or-list')

    target_path = opj(target_rootpath, "basic")
    with swallow_logs(new_level=logging.ERROR) as cml:
        create_sibling(dataset=source,
                       name="local_target",
                       sshurl="ssh://localhost:22",
                       target_dir=target_path,
                       ui=True)
        assert_not_in('enableremote local_target failed', cml.out)

    GitRepo(target_path, create=False)  # raises if not a git repo
    assert_in("local_target", source.repo.get_remotes())
    # Both must be annex or git repositories
    src_is_annex = AnnexRepo.is_valid_repo(src_path)
    eq_(src_is_annex, AnnexRepo.is_valid_repo(target_path))
    # And target one should be known to have a known UUID within the source if annex
    if src_is_annex:
        annex = AnnexRepo(src_path)
        local_target_cfg = annex.repo.remotes["local_target"].config_reader.get
        # basic config in place
        eq_(local_target_cfg('annex-ignore'), 'false')
        ok_(local_target_cfg('annex-uuid'))

    # do it again without force, but use a different name to avoid initial checks
    # for existing remotes:
    with assert_raises(RuntimeError) as cm:
        assert_create_sshwebserver(dataset=source,
                                   name="local_target_alt",
                                   sshurl="ssh://localhost",
                                   target_dir=target_path)
    ok_(
        text_type(cm.exception).startswith(
            "Target path %s already exists. And it fails to rmdir" %
            target_path))
    if src_is_annex:
        target_description = AnnexRepo(target_path,
                                       create=False).get_description()
        assert_not_equal(target_description, None)
        assert_not_equal(target_description, target_path)
        # on yoh's laptop TMPDIR is under HOME, so things start to become
        # tricky since then target_path is shortened and we would need to know
        # remote $HOME.  To not over-complicate and still test, test only for
        # the basename of the target_path
        ok_endswith(target_description, basename(target_path))
    # now, with force and correct url, which is also used to determine
    # target_dir
    # Note: on windows absolute path is not url conform. But this way it's easy
    # to test, that ssh path is correctly used.
    if not on_windows:
        # add random file under target_path, to explicitly test existing=replace
        open(opj(target_path, 'random'), 'w').write('123')

        assert_create_sshwebserver(dataset=source,
                                   name="local_target",
                                   sshurl="ssh://localhost" + target_path,
                                   publish_by_default='master',
                                   existing='replace')
        eq_("ssh://localhost" + urlquote(target_path),
            source.repo.get_remote_url("local_target"))
        ok_(source.repo.get_remote_url("local_target", push=True) is None)

        # ensure target tree actually replaced by source
        assert_false(exists(opj(target_path, 'random')))

        if src_is_annex:
            annex = AnnexRepo(src_path)
            local_target_cfg = annex.repo.remotes[
                "local_target"].config_reader.get
            eq_(local_target_cfg('annex-ignore'), 'false')
            eq_(local_target_cfg('annex-uuid').count('-'), 4)  # valid uuid
            # should be added too, even if URL matches prior state
            eq_(local_target_cfg('push'), 'master')

        # again, by explicitly passing urls. Since we are on localhost, the
        # local path should work:
        cpkwargs = dict(
            dataset=source,
            name="local_target",
            sshurl="ssh://localhost",
            target_dir=target_path,
            target_url=target_path,
            target_pushurl="ssh://localhost" + target_path,
            ui=True,
        )
        assert_create_sshwebserver(existing='replace', **cpkwargs)
        if src_is_annex:
            target_description = AnnexRepo(target_path,
                                           create=False).get_description()
            eq_(target_description, target_path)

        eq_(target_path, source.repo.get_remote_url("local_target"))
        eq_("ssh://localhost" + target_path,
            source.repo.get_remote_url("local_target", push=True))

        _test_correct_publish(target_path)

        # now, push should work:
        publish(dataset=source, to="local_target")

        # and we should be able to 'reconfigure'
        def process_digests_mtimes(digests, mtimes):
            # it should have triggered a hook, which would have created log and metadata files
            check_metadata = False
            for part in 'logs', 'metadata':
                metafiles = [
                    k for k in digests
                    if k.startswith(_path_('.git/datalad/%s/' % part))
                ]
                # This is in effect ONLY if we have "compatible" datalad installed on remote
                # end. ATM we don't have easy way to guarantee that AFAIK (yoh),
                # so let's not check/enforce (TODO)
                # assert(len(metafiles) >= 1)  # we might have 2 logs if timestamps do not collide ;)
                # Let's actually do it to some degree
                if part == 'logs':
                    # always should have those:
                    assert (len(metafiles) >= 1)
                    with open(opj(target_path, metafiles[0])) as f:
                        if 'no datalad found' not in f.read():
                            check_metadata = True
                if part == 'metadata':
                    eq_(len(metafiles), bool(check_metadata))
                for f in metafiles:
                    digests.pop(f)
                    mtimes.pop(f)
            # and just pop some leftovers from annex
            for f in list(digests):
                if f.startswith('.git/annex/mergedrefs'):
                    digests.pop(f)
                    mtimes.pop(f)

        orig_digests, orig_mtimes = get_mtimes_and_digests(target_path)
        process_digests_mtimes(orig_digests, orig_mtimes)

        import time
        time.sleep(0.1)  # just so that mtimes change
        assert_create_sshwebserver(existing='reconfigure', **cpkwargs)
        digests, mtimes = get_mtimes_and_digests(target_path)
        process_digests_mtimes(digests, mtimes)

        assert_dict_equal(orig_digests,
                          digests)  # nothing should change in terms of content

        # but some files should have been modified
        modified_files = {
            k
            for k in mtimes if orig_mtimes.get(k, 0) != mtimes.get(k, 0)
        }
        # collect which files were expected to be modified without incurring any changes
        ok_modified_files = {
            _path_('.git/hooks/post-update'),
            'index.html',
            # files which hook would manage to generate
            _path_('.git/info/refs'),
            '.git/objects/info/packs'
        }
        # on elderly git we don't change receive setting
        ok_modified_files.add(_path_('.git/config'))
        ok_modified_files.update(
            {f
             for f in digests if f.startswith(_path_('.git/datalad/web'))})
        # it seems that with some recent git behavior has changed a bit
        # and index might get touched
        if _path_('.git/index') in modified_files:
            ok_modified_files.add(_path_('.git/index'))
        assert_set_equal(modified_files, ok_modified_files)
示例#27
0
def test_publish_with_data(origin, src_path, dst_path, sub1_pub, sub2_pub,
                           dst_clone_path):

    # prepare src
    source = install(src_path, source=origin, recursive=True)
    source.repo.get('test-annex.dat')

    # create plain git at target:
    target = AnnexRepo(dst_path, create=True)
    target.checkout("TMP", ["-b"])
    source.repo.add_remote("target", dst_path)

    # now, set up targets for the submodules:
    # the need to be annexes, because we want to be able to copy data to them
    # further down
    sub1_target = AnnexRepo(sub1_pub, create=True)
    sub1_target.checkout("TMP", ["-b"])
    sub2_target = AnnexRepo(sub2_pub, create=True)
    sub2_target.checkout("TMP", ["-b"])
    sub1 = GitRepo(opj(src_path, 'subm 1'), create=False)
    sub2 = GitRepo(opj(src_path, '2'), create=False)
    sub1.add_remote("target", sub1_pub)
    sub2.add_remote("target", sub2_pub)

    res = publish(dataset=source,
                  to="target",
                  path=['test-annex.dat'],
                  result_xfm='paths')
    # first it would publish data and then push
    # TODO order is not fixed (yet)
    #eq_(res, [opj(source.path, 'test-annex.dat'), source.path])
    eq_(set(res), set([opj(source.path, 'test-annex.dat'), source.path]))
    # XXX master was not checked out in dst!

    eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)),
        list(source.repo.get_branch_commits_(DEFAULT_BRANCH)))
    assert_git_annex_branch_published(source.repo, target)

    # we need compare target/<default branch>:
    target.checkout(DEFAULT_BRANCH)
    ok_(target.file_has_content('test-annex.dat'))

    # make sure that whatever we published is actually consumable
    dst_clone = install(dst_clone_path,
                        source=dst_path,
                        result_xfm='datasets',
                        return_type='item-or-list')
    nok_(dst_clone.repo.file_has_content('test-annex.dat'))
    res = dst_clone.get('test-annex.dat')
    ok_(dst_clone.repo.file_has_content('test-annex.dat'))

    res = publish(dataset=source, to="target", path=['.'])
    # there is nothing to publish on 2nd attempt
    #eq_(res, ([source, 'test-annex.dat'], []))
    assert_result_count(res, 1, status='notneeded')

    import glob
    res = publish(dataset=source,
                  to="target",
                  path=glob.glob1(source.path, '*'))
    # Note: This leads to recursive publishing, since expansion of '*'
    #       contains the submodules themselves in this setup

    # only the subdatasets, targets are plain git repos, hence
    # no file content is pushed, all content in super was pushed
    # before
    assert_result_count(res, 3)
    assert_result_count(res, 1, status='ok', path=sub1.path)
    assert_result_count(res, 1, status='ok', path=sub2.path)
    assert_result_count(res, 1, status='notneeded', path=source.path)

    # if we publish again -- nothing to be published
    res = source.publish(to="target")
    assert_result_count(res, 1, status='notneeded', path=source.path)
    # if we drop a file and publish again -- dataset should be published
    # since git-annex branch was updated
    source.drop('test-annex.dat')
    res = source.publish(to="target")
    assert_result_count(res, 1, status='ok', path=source.path)
    # and empty again if we try again
    res = source.publish(to="target")
    assert_result_count(res, 1, status='notneeded', path=source.path)

    # data integrity check looks identical from all perspectives
    # minus "note" statements from git-annex
    eq_(filter_fsck_error_msg(source.repo.fsck()),
        filter_fsck_error_msg(source.repo.fsck(remote='target')))
    eq_(filter_fsck_error_msg(target.fsck()),
        filter_fsck_error_msg(source.repo.fsck(remote='target')))
示例#28
0
def test_publish_with_data(origin, src_path, dst_path, sub1_pub, sub2_pub):

    # prepare src
    source = install(src_path, source=origin, recursive=True)[0]
    source.repo.get('test-annex.dat')

    # create plain git at target:
    target = AnnexRepo(dst_path, create=True)
    target.checkout("TMP", ["-b"])
    source.repo.add_remote("target", dst_path)

    # now, set up targets for the submodules:
    sub1_target = GitRepo(sub1_pub, create=True)
    sub1_target.checkout("TMP", ["-b"])
    sub2_target = GitRepo(sub2_pub, create=True)
    sub2_target.checkout("TMP", ["-b"])
    sub1 = GitRepo(opj(src_path, 'subm 1'), create=False)
    sub2 = GitRepo(opj(src_path, 'subm 2'), create=False)
    sub1.add_remote("target", sub1_pub)
    sub2.add_remote("target", sub2_pub)

    # TMP: Insert the fetch to prevent GitPython to fail after the push,
    # because it cannot resolve the SHA of the old commit of the remote,
    # that git reports back after the push.
    # TODO: Figure out, when to fetch things in general; Alternatively:
    # Is there an option for push, that prevents GitPython from failing?
    source.repo.fetch("target")

    res = publish(dataset=source, to="target", path=['test-annex.dat'])
    eq_(res, ([source, 'test-annex.dat'], []))

    eq_(list(target.get_branch_commits("master")),
        list(source.repo.get_branch_commits("master")))
    # TODO: last commit in git-annex branch differs. Probably fine,
    # but figure out, when exactly to expect this for proper testing:
    eq_(
        list(target.get_branch_commits("git-annex"))[1:],
        list(source.repo.get_branch_commits("git-annex"))[1:])

    # we need compare target/master:
    target.checkout("master")
    eq_(target.file_has_content(['test-annex.dat']), [True])

    source.repo.fetch("target")
    res = publish(dataset=source, to="target", path=['.'])
    eq_(res, ([source, 'test-annex.dat'], []))

    source.repo.fetch("target")
    import glob
    res = publish(dataset=source,
                  to="target",
                  path=glob.glob1(source.path, '*'))
    # Note: This leads to recursive publishing, since expansion of '*'
    #       contains the submodules themselves in this setup

    # collect result paths:
    result_paths = []
    for item in res[0]:
        if isinstance(item, Dataset):
            result_paths.append(item.path)
        else:
            result_paths.append(item)
    eq_(
        {
            source.path,
            opj(source.path, "subm 1"),
            opj(source.path, "subm 2"), 'test-annex.dat'
        }, set(result_paths))
示例#29
0
def test_publish_with_data(origin, src_path, dst_path, sub1_pub, sub2_pub):

    # prepare src
    source = install(src_path, source=origin, recursive=True)[0]
    source.repo.get('test-annex.dat')

    # create plain git at target:
    target = AnnexRepo(dst_path, create=True)
    target.checkout("TMP", ["-b"])
    source.repo.add_remote("target", dst_path)

    # now, set up targets for the submodules:
    sub1_target = GitRepo(sub1_pub, create=True)
    sub1_target.checkout("TMP", ["-b"])
    sub2_target = GitRepo(sub2_pub, create=True)
    sub2_target.checkout("TMP", ["-b"])
    sub1 = GitRepo(opj(src_path, 'subm 1'), create=False)
    sub2 = GitRepo(opj(src_path, 'subm 2'), create=False)
    sub1.add_remote("target", sub1_pub)
    sub2.add_remote("target", sub2_pub)

    # TMP: Insert the fetch to prevent GitPython to fail after the push,
    # because it cannot resolve the SHA of the old commit of the remote,
    # that git reports back after the push.
    # TODO: Figure out, when to fetch things in general; Alternatively:
    # Is there an option for push, that prevents GitPython from failing?
    source.repo.fetch("target")
    res = publish(dataset=source, to="target", path=['test-annex.dat'])
    eq_(res, ([source, 'test-annex.dat'], []))

    eq_(list(target.get_branch_commits("master")),
        list(source.repo.get_branch_commits("master")))
    # TODO: last commit in git-annex branch differs. Probably fine,
    # but figure out, when exactly to expect this for proper testing:
    eq_(list(target.get_branch_commits("git-annex"))[1:],
        list(source.repo.get_branch_commits("git-annex"))[1:])

    # we need compare target/master:
    target.checkout("master")
    eq_(target.file_has_content(['test-annex.dat']), [True])

    source.repo.fetch("target")
    res = publish(dataset=source, to="target", path=['.'])
    eq_(res, ([source, 'test-annex.dat'], []))

    source.repo.fetch("target")
    import glob
    res = publish(dataset=source, to="target", path=glob.glob1(source.path, '*'))
    # Note: This leads to recursive publishing, since expansion of '*'
    #       contains the submodules themselves in this setup

    # collect result paths:
    result_paths = []
    for item in res[0]:
        if isinstance(item, Dataset):
            result_paths.append(item.path)
        else:
            result_paths.append(item)
    eq_({source.path, opj(source.path, "subm 1"),
         opj(source.path, "subm 2"), 'test-annex.dat'},
        set(result_paths))