示例#1
0
def test_run_failure(path):
    ds = Dataset(path).create()

    hexsha_initial = ds.repo.get_hexsha()

    with assert_raises(CommandError):
        ds.run("echo x$(cat grows) > grows && false")
    eq_(hexsha_initial, ds.repo.get_hexsha())
    ok_(ds.repo.dirty)

    msgfile = opj(ds.repo.repo.git_dir, "COMMIT_EDITMSG")
    ok_exists(msgfile)

    ds.add(".", save=False)
    ds.save(message_file=msgfile)
    ok_clean_git(ds.path)
    neq_(hexsha_initial, ds.repo.get_hexsha())

    outfile = opj(ds.path, "grows")
    eq_('x\n', open(outfile).read())

    # There is no CommandError on rerun if the non-zero error matches the
    # original code.
    ds.rerun()
    eq_('xx\n', open(outfile).read())

    # On the other hand, we fail if we rerun a command and there is a non-zero
    # error that doesn't match.
    ds.run("[ ! -e bar ] && echo c >bar")
    ok_clean_git(ds.path)
    with assert_raises(CommandError):
        ds.rerun()
示例#2
0
def _check_ri(ri, cls, exact_str=True, localpath=None, **fields):
    """just a helper to carry out few checks on urls"""
    with swallow_logs(new_level=logging.DEBUG) as cml:
        ri_ = cls(**fields)
        murl = RI(ri)
        eq_(murl.__class__, cls)  # not just a subclass
        eq_(murl, ri_)
        eq_(str(RI(ri)), ri)
        eq_(eval(repr(ri_)), ri)  # repr leads back to identical ri_
        eq_(ri, ri_)  # just in case ;)  above should fail first if smth is wrong
        if not exact_str:
            assert_in('Parsed version of', cml.out)
    (eq_ if exact_str else neq_)(ri, str(ri_))  # that we can reconstruct it EXACTLY on our examples
    # and that we have access to all those fields
    nok_(set(fields).difference(set(cls._FIELDS)))
    for f, v in fields.items():
        eq_(getattr(ri_, f), v)

    if localpath:
        eq_(ri_.localpath, localpath)
        old_localpath = ri_.localpath  # for a test below
    else:
        # if not given -- must be a remote url, should raise exception
        with assert_raises(ValueError):
            ri_.localpath

    # do changes in the path persist?
    old_str = str(ri_)
    ri_.path = newpath = opj(ri_.path, 'sub')
    eq_(ri_.path, newpath)
    neq_(str(ri_), old_str)
    if localpath:
        eq_(ri_.localpath, opj(old_localpath, 'sub'))
示例#3
0
def test_gh1426(origin_path, target_path):
    # set up a pair of repos, one the published copy of the other
    origin = Dataset(origin_path).create()
    target = mk_push_target(origin,
                            'target',
                            target_path,
                            annex=True,
                            bare=False)
    origin.push(to='target')
    assert_repo_status(origin.path)
    assert_repo_status(target.path)
    eq_(origin.repo.get_hexsha('master'), target.get_hexsha('master'))

    # gist of #1426 is that a newly added subdataset does not cause the
    # superdataset to get published
    origin.create('sub')
    assert_repo_status(origin.path)
    neq_(origin.repo.get_hexsha('master'), target.get_hexsha('master'))
    # now push
    res = origin.push(to='target')
    assert_result_count(res,
                        1,
                        status='ok',
                        type='dataset',
                        path=origin.path,
                        action='publish',
                        target='target',
                        operations=['fast-forward'])
    eq_(origin.repo.get_hexsha('master'), target.get_hexsha('master'))
示例#4
0
def test_rerun_octopus(path):
    ds = Dataset(path).create()
    ds.run("echo foo >>foo")
    with open(op.join(ds.path, "non-run"), "w") as nrfh:
        nrfh.write("non-run")
    ds.save()
    ds.repo.checkout(DEFAULT_BRANCH + "~", options=["-b", "topic-1"])
    ds.run("echo bar >bar")
    ds.repo.checkout(DEFAULT_BRANCH + "~", options=["-b", "topic-2"])
    ds.run("echo baz >baz")
    ds.repo.checkout(DEFAULT_BRANCH)
    ds.repo.call_git(["merge", "-m", "Merge octopus", "topic-1", "topic-2"])
    # o-.               f_M
    # |\ \
    # | | o             e_r
    # | o |             d_r
    # | |/
    # o |               c_n
    # |/
    # o                 b_r
    # o                 a_n

    ds.rerun(since="", onto="")
    neq_(ds.repo.get_hexsha("HEAD^3"),
         ds.repo.get_hexsha(DEFAULT_BRANCH + "^3"))
    eq_(ds.repo.get_hexsha("HEAD~3"),
        ds.repo.get_hexsha(DEFAULT_BRANCH + "~3"))

    ds.repo.checkout(DEFAULT_BRANCH)
    hexsha_before = ds.repo.get_hexsha()
    ds.rerun(since="")
    eq_(hexsha_before, ds.repo.get_hexsha(DEFAULT_BRANCH + "~"))
示例#5
0
def test_rerun_exclude_side(path):
    ds = Dataset(path).create()
    ds.repo.checkout(DEFAULT_BRANCH, options=["-b", "side"])
    ds.run("echo foo >foo")
    ds.repo.checkout(DEFAULT_BRANCH)
    ds.run("echo bar >bar")
    ds.repo.merge("side", options=["-m", "Merge side"])
    # o                 d_n
    # |\
    # o |               c_r
    # | o               b_r
    # |/
    # o                 a_n

    ds.rerun("HEAD", since=DEFAULT_BRANCH + "^2", onto="")
    # o                 d_M
    # |\
    # o |               c_R
    # | o               b_r
    # |/
    # o                 a_n
    neq_(ds.repo.get_hexsha(DEFAULT_BRANCH), ds.repo.get_hexsha())
    neq_(ds.repo.get_hexsha(DEFAULT_BRANCH + "^"), ds.repo.get_hexsha("HEAD^"))
    eq_(ds.repo.get_hexsha(DEFAULT_BRANCH + "^2"),
        ds.repo.get_hexsha("HEAD^2"))
def test_drop(dspath, records):
    make_datarecord_zips('12345', records)
    ds = create(dspath)
    ds.ukb_init('12345', ['20227_2_0', '25747_2_0', '25748_2_0', '25748_3_0'])
    ds.config.add('datalad.ukbiobank.keyfile', 'dummy', where='local')
    bin_dir = make_ukbfetch(ds, records)

    # baseline
    with patch.dict(
            'os.environ',
        {'PATH': '{}:{}'.format(str(bin_dir), os.environ['PATH'])}):
        ds.ukb_update(merge=True, force=True)
    zips_in_ds = list(ds.pathobj.glob('**/*.zip'))
    neq_(zips_in_ds, [])

    # drop archives
    with patch.dict(
            'os.environ',
        {'PATH': '{}:{}'.format(str(bin_dir), os.environ['PATH'])}):
        ds.ukb_update(merge=True, force=True, drop='archives')
    # no ZIPs can be found, also not in the annex
    eq_(list(ds.pathobj.glob('**/*.zip')), [])
    # we can get all we want (or rather still have it)
    assert_status('notneeded', ds.get('.'))

    # now drop extracted content instead
    with patch.dict(
            'os.environ',
        {'PATH': '{}:{}'.format(str(bin_dir), os.environ['PATH'])}):
        ds.ukb_update(merge=True, force=True, drop='extracted')
    eq_(list(ds.pathobj.glob('**/*.zip')), zips_in_ds)
    # we can get all
    assert_status('ok', ds.get('.'))
    # a non-zip content file is still around
    eq_((ds.pathobj / '25747_2_0.adv').read_text(), '25747_2_0.adv')
示例#7
0
def test_diff_rsync_syntax(path):
    # three nested datasets
    ds = Dataset(path).create()
    subds = ds.create('sub')
    subsubds = subds.create(Path('subdir', 'deep'))
    justtop = ds.diff(fr=PRE_INIT_COMMIT_SHA, path='sub')
    # we only get a single result, the subdataset in question
    assert_result_count(justtop, 1)
    assert_result_count(justtop, 1, type='dataset', path=subds.path)
    # now with "peak inside the dataset" syntax
    inside = ds.diff(fr=PRE_INIT_COMMIT_SHA, path='sub' + os.sep)
    # we get both subdatasets, but nothing else inside the nested one
    assert_result_count(inside, 2, type='dataset')
    assert_result_count(inside, 1, type='dataset', path=subds.path)
    assert_result_count(inside, 1, type='dataset', path=subsubds.path)
    assert_result_count(inside, 0, type='file', parentds=subsubds.path)
    # if we point to the subdir in 'sub' the reporting wrt the subsubds
    # doesn't change. It is merely a path constraint within the queried
    # subds, but because the subsubds is still underneath it, nothing changes
    inside_subdir = ds.diff(fr=PRE_INIT_COMMIT_SHA, path=op.join('sub', 'subdir'))
    assert_result_count(inside_subdir, 2, type='dataset')
    assert_result_count(inside_subdir, 1, type='dataset', path=subds.path)
    assert_result_count(inside_subdir, 1, type='dataset', path=subsubds.path)
    assert_result_count(inside_subdir, 0, type='file', parentds=subsubds.path)
    # but the rest is different (e.g. all the stuff in .datalad is gone)
    neq_(inside, inside_subdir)
    # just for completeness, we get more when going full recursive
    rec = ds.diff(fr=PRE_INIT_COMMIT_SHA, recursive=True, path='sub' + os.sep)
    assert(len(inside) < len(rec))
示例#8
0
def test_rerun_fastforwardable_mutator(path):
    ds = Dataset(path).create()
    ds.repo.checkout(DEFAULT_BRANCH, options=["-b", "side"])
    ds.run("echo foo >>foo")
    ds.repo.checkout(DEFAULT_BRANCH)
    ds.repo.merge("side", options=["-m", "Merge side", "--no-ff"])
    #  o                 c_n
    #  |\
    #  | o               b_r
    #  |/
    #  o                 a_n

    ds.rerun(since="", onto=DEFAULT_BRANCH + "^2")
    #  o                 b_R
    #  o                 b_r
    #  o                 a_n
    neq_(ds.repo.get_hexsha(DEFAULT_BRANCH + "^2"), ds.repo.get_hexsha())

    ds.repo.checkout(DEFAULT_BRANCH)
    hexsha_before = ds.repo.get_hexsha()
    ds.rerun(since="")
    #  o                 b_R
    #  o                 c_n
    #  |\
    #  | o               b_r
    #  |/
    #  o                 a_n
    eq_(ds.repo.get_active_branch(), DEFAULT_BRANCH)
    assert_false(ds.repo.commit_exists(DEFAULT_BRANCH + "^2"))
    eq_(hexsha_before, ds.repo.get_hexsha(DEFAULT_BRANCH + "^"))
示例#9
0
def test_url_base():
    # Basic checks
    assert_raises(ValueError, URL, "http://example.com", hostname='example.com')
    url = URL("http://example.com")
    eq_(url.hostname, 'example.com')
    eq_(url.scheme, 'http')
    eq_(url.port, '')  # not specified -- empty strings
    eq_(url.username, '')  # not specified -- empty strings
    eq_(repr(url), "URL(hostname='example.com', scheme='http')")
    eq_(url, "http://example.com")  # automagic coercion in __eq__

    neq_(URL(), URL(hostname='x'))

    smth = URL('smth')
    eq_(smth.hostname, '')
    ok_(bool(smth))
    nok_(bool(URL()))

    assert_raises(ValueError, url._set_from_fields, unknown='1')

    with swallow_logs(new_level=logging.WARNING) as cml:
        # we don't "care" about params ATM so there is a warning if there are any
        purl = URL("http://example.com/;param")
        eq_(str(purl), 'http://example.com/;param')  # but we do maintain original string
        assert_in('ParseResults contains params', cml.out)
        eq_(purl.as_str(), 'http://example.com/')
示例#10
0
def test_rerun_onto(path):
    ds = Dataset(path).create()

    grow_file = opj(path, "grows")

    # Make sure we can handle range-specifications that yield no results.
    for since in ["", "HEAD"]:
        assert_result_count(
            ds.rerun("HEAD", onto="", since=since, on_failure="ignore"),
            1, status="impossible", action="run")

    ds.run('echo static-content > static')
    ds.repo.tag("static")
    ds.run('echo x$(cat grows) > grows')
    ds.rerun()
    eq_('xx\n', open(grow_file).read())

    # If we run the "static" change on top of itself, we end up in the
    # same (but detached) place.
    ds.rerun(revision="static", onto="static")
    ok_(ds.repo.get_active_branch() is None)
    eq_(ds.repo.get_hexsha(),
        ds.repo.get_hexsha("static"))

    # If we run the "static" change from the same "base", we end up
    # with a new commit.
    ds.repo.checkout("master")
    ds.rerun(revision="static", onto="static^")
    ok_(ds.repo.get_active_branch() is None)
    neq_(ds.repo.get_hexsha(),
         ds.repo.get_hexsha("static"))
    assert_result_count(ds.diff(revision="HEAD..static"), 0)
    for revrange in ["..static", "static.."]:
        assert_result_count(
            ds.repo.repo.git.rev_list(revrange).split(), 1)

    # Unlike the static change, if we run the ever-growing change on
    # top of itself, we end up with a new commit.
    ds.repo.checkout("master")
    ds.rerun(onto="HEAD")
    ok_(ds.repo.get_active_branch() is None)
    neq_(ds.repo.get_hexsha(),
         ds.repo.get_hexsha("master"))

    # An empty `onto` means use the parent of the first revision.
    ds.repo.checkout("master")
    ds.rerun(since="static^", onto="")
    ok_(ds.repo.get_active_branch() is None)
    for revrange in ["..master", "master.."]:
        assert_result_count(
            ds.repo.repo.git.rev_list(revrange).split(), 3)

    # An empty `onto` means use the parent of the first revision that
    # has a run command.
    ds.repo.checkout("master")
    ds.rerun(since="", onto="", branch="from-base")
    eq_(ds.repo.get_active_branch(), "from-base")
    assert_result_count(ds.diff(revision="master..from-base"), 0)
    eq_(ds.repo.get_merge_base(["static", "from-base"]),
        ds.repo.get_hexsha("static^"))
示例#11
0
def test_push_wanted(srcpath, dstpath):
    src = Dataset(srcpath).create()

    if src.repo.is_managed_branch():
        # on crippled FS post-update hook enabling via create-sibling doesn't
        # work ATM
        raise SkipTest("no create-sibling on crippled FS")
    (src.pathobj / 'data.0').write_text('0')
    (src.pathobj / 'secure.1').write_text('1')
    (src.pathobj / 'secure.2').write_text('2')
    src.save()

    # Dropping a file to mimic a case of simply not having it locally (thus not
    # to be "pushed")
    src.drop('secure.2', check=False)

    # Annotate sensitive content, actual value "verysecure" does not matter in
    # this example
    src.repo.set_metadata(add={'distribution-restrictions': 'verysecure'},
                          files=['secure.1', 'secure.2'])

    src.create_sibling(
        dstpath,
        annex_wanted="not metadata=distribution-restrictions=*",
        name='target',
    )
    # check that wanted is obeyed, if instructed by configuration
    src.config.set('datalad.push.copy-auto-if-wanted', 'true', where='local')
    res = src.push(to='target')
    assert_in_results(res,
                      action='copy',
                      path=str(src.pathobj / 'data.0'),
                      status='ok')
    for p in ('secure.1', 'secure.2'):
        assert_not_in_results(res, path=str(src.pathobj / p))
    assert_status('notneeded', src.push(to='target'))

    # check that dataset-config cannot overrule this
    src.config.set('datalad.push.copy-auto-if-wanted',
                   'false',
                   where='dataset')
    res = src.push(to='target')
    assert_status('notneeded', res)

    # check the target to really make sure
    dst = Dataset(dstpath)
    # normal file, yes
    eq_((dst.pathobj / 'data.0').read_text(), '0')
    # secure file, no
    if dst.repo.is_managed_branch():
        neq_((dst.pathobj / 'secure.1').read_text(), '1')
    else:
        assert_raises(FileNotFoundError, (dst.pathobj / 'secure.1').read_text)

    # remove local config, must enable push of secure file
    src.config.unset('datalad.push.copy-auto-if-wanted', where='local')
    res = src.push(to='target')
    assert_in_results(res, path=str(src.pathobj / 'secure.1'))
    eq_((dst.pathobj / 'secure.1').read_text(), '1')
示例#12
0
def test_GitRepo_equals(path1, path2):

    repo1 = GitRepo(path1)
    repo2 = GitRepo(path1)
    ok_(repo1 == repo2)
    eq_(repo1, repo2)
    repo2 = GitRepo(path2)
    neq_(repo1, repo2)
    ok_(repo1 != repo2)
示例#13
0
def test_GitRepo_equals(path1, path2):

    repo1 = GitRepo(path1)
    repo2 = GitRepo(path1)
    ok_(repo1 == repo2)
    eq_(repo1, repo2)
    repo2 = GitRepo(path2)
    neq_(repo1, repo2)
    ok_(repo1 != repo2)
示例#14
0
def test_run_explicit(path):
    ds = Dataset(path)

    assert_false(ds.repo.file_has_content("test-annex.dat"))

    create_tree(ds.path, {
        "dirt_untracked": "untracked",
        "dirt_modified": "modified"
    })
    ds.save("dirt_modified", to_git=True)
    with open(op.join(path, "dirt_modified"), "a") as ofh:
        ofh.write(", more")

    # We need explicit=True to run with dirty repo.
    assert_status(
        "impossible",
        ds.run("cat test-annex.dat test-annex.dat >doubled.dat",
               inputs=["test-annex.dat"],
               on_failure="ignore"))

    hexsha_initial = ds.repo.get_hexsha()
    # If we specify test-annex.dat as an input, it will be retrieved before the
    # run.
    ds.run("cat test-annex.dat test-annex.dat >doubled.dat",
           inputs=["test-annex.dat"],
           explicit=True)
    ok_(ds.repo.file_has_content("test-annex.dat"))
    # We didn't commit anything because outputs weren't specified.
    assert_false(ds.repo.file_has_content("doubled.dat"))
    eq_(hexsha_initial, ds.repo.get_hexsha())

    # If an input doesn't exist, we just show the standard warning.
    with swallow_logs(new_level=logging.WARN) as cml:
        with swallow_outputs():
            ds.run("ls", inputs=["not-there"], explicit=True)
        assert_in("Input does not exist: ", cml.out)

    remove(op.join(path, "doubled.dat"))

    hexsha_initial = ds.repo.get_hexsha()
    ds.run("cat test-annex.dat test-annex.dat >doubled.dat",
           inputs=["test-annex.dat"],
           outputs=["doubled.dat"],
           explicit=True)
    ok_(ds.repo.file_has_content("doubled.dat"))
    assert_repo_status(ds.path,
                       modified=["dirt_modified"],
                       untracked=['dirt_untracked'])
    neq_(hexsha_initial, ds.repo.get_hexsha())

    # Saving explicit outputs works from subdirectories.
    subdir = op.join(path, "subdir")
    mkdir(subdir)
    with chpwd(subdir):
        run("echo insubdir >foo", explicit=True, outputs=["foo"])
    ok_(ds.repo.file_has_content(op.join("subdir", "foo")))
示例#15
0
def test_rerun_mutator_stem_nonrun_merges(path):
    ds = Dataset(path).create()
    ds.run("echo foo >>foo")
    with open(op.join(path, "nonrun-file0"), "w") as f:
        f.write("blah")
    ds.save()
    ds.repo.checkout(DEFAULT_BRANCH + "~", options=["-b", "side"])
    with open(op.join(path, "nonrun-file1"), "w") as f:
        f.write("more blah")
    ds.save()
    ds.repo.checkout(DEFAULT_BRANCH)
    ds.repo.merge("side", options=["-m", "Merge side"])
    # o                 e_n
    # |\
    # | o               d_n
    # o |               c_n
    # |/
    # o                 b_r
    # o                 a_n

    ds.rerun(since="", onto="")
    # o                 e_M
    # |\
    # | o               d_C
    # o |               c_C
    # |/
    # o                 b_R
    # o                 a_n
    ok_(ds.repo.commit_exists("HEAD^2"))
    neq_(ds.repo.get_hexsha(DEFAULT_BRANCH), ds.repo.get_hexsha())

    ds.repo.checkout(DEFAULT_BRANCH)
    ds.rerun(since="", onto=DEFAULT_BRANCH + "^2")
    # o                 c_C
    # o                 b_R
    # o                 d_n
    # o                 b_r
    # o                 a_n
    assert_false(ds.repo.commit_exists("HEAD^2"))
    eq_(ds.repo.get_hexsha("HEAD~2"),
        ds.repo.get_hexsha(DEFAULT_BRANCH + "^2"))

    ds.repo.checkout(DEFAULT_BRANCH)
    hexsha_before = ds.repo.get_hexsha()
    ds.rerun(since="")
    # o                 b_R
    # o                 e_n
    # |\
    # | o               d_n
    # o |               c_n
    # |/
    # o                 b_r
    # o                 a_n
    eq_(hexsha_before, ds.repo.get_hexsha(DEFAULT_BRANCH + "^"))
    assert_false(ds.repo.commit_exists("HEAD^2"))
示例#16
0
def test_rerun_nonrun_left_run_right(path):
    ds = Dataset(path).create()
    with open(op.join(path, "nonrun-file"), "w") as f:
        f.write("blah")
    ds.save()
    ds.repo.checkout(DEFAULT_BRANCH + "~", options=["-b", "side"])
    ds.run("echo foo >foo")
    ds.repo.checkout(DEFAULT_BRANCH)
    ds.repo.merge("side", options=["-m", "Merge side"])
    # o                 d_n
    # |\
    # | o               c_r
    # o |               b_n
    # |/
    # o                 a_n

    ds.rerun(since="", onto="")
    # o                 d_M
    # |\
    # | o               c_R
    # o |               b_n
    # |/
    # o                 a_n
    eq_(ds.repo.get_hexsha(DEFAULT_BRANCH + "^"), ds.repo.get_hexsha("HEAD^"))
    neq_(ds.repo.get_hexsha(DEFAULT_BRANCH + "^2"),
         ds.repo.get_hexsha("HEAD^2"))

    ds.repo.checkout(DEFAULT_BRANCH)
    ds.rerun(since="", onto=DEFAULT_BRANCH + "^2")
    # o                 d_n
    # |\
    # | o               c_r
    # o |               b_n
    # |/
    # o                 a_n
    ok_(ds.repo.get_active_branch() is None)
    eq_(ds.repo.get_hexsha(DEFAULT_BRANCH), ds.repo.get_hexsha())
    eq_(ds.repo.get_hexsha(DEFAULT_BRANCH + "^"), ds.repo.get_hexsha("HEAD^"))
    eq_(ds.repo.get_hexsha(DEFAULT_BRANCH + "^2"),
        ds.repo.get_hexsha("HEAD^2"))

    ds.repo.checkout(DEFAULT_BRANCH)
    hexsha_before = ds.repo.get_hexsha()
    ds.rerun(since="")
    # o                 d_n
    # |\
    # | o               c_r
    # o |               b_n
    # |/
    # o                 a_n
    eq_(hexsha_before, ds.repo.get_hexsha())
示例#17
0
def test_run_explicit(path):
    ds = Dataset(path)

    assert_false(ds.repo.file_has_content("test-annex.dat"))

    create_tree(ds.path, {"dirt_untracked": "untracked",
                          "dirt_modified": "modified"})
    ds.save("dirt_modified", to_git=True)
    with open(op.join(path, "dirt_modified"), "a") as ofh:
        ofh.write(", more")

    # We need explicit=True to run with dirty repo.
    assert_status("impossible",
                  ds.run("cat test-annex.dat test-annex.dat >doubled.dat",
                         inputs=["test-annex.dat"],
                         on_failure="ignore"))

    hexsha_initial = ds.repo.get_hexsha()
    # If we specify test-annex.dat as an input, it will be retrieved before the
    # run.
    ds.run("cat test-annex.dat test-annex.dat >doubled.dat",
           inputs=["test-annex.dat"], explicit=True)
    ok_(ds.repo.file_has_content("test-annex.dat"))
    # We didn't commit anything because outputs weren't specified.
    assert_false(ds.repo.file_has_content("doubled.dat"))
    eq_(hexsha_initial, ds.repo.get_hexsha())

    # If an input doesn't exist, we just show the standard warning.
    with swallow_logs(new_level=logging.WARN) as cml:
        with swallow_outputs():
            ds.run("ls", inputs=["not-there"], explicit=True)
        assert_in("Input does not exist: ", cml.out)

    remove(op.join(path, "doubled.dat"))

    hexsha_initial = ds.repo.get_hexsha()
    ds.run("cat test-annex.dat test-annex.dat >doubled.dat",
           inputs=["test-annex.dat"], outputs=["doubled.dat"],
           explicit=True)
    ok_(ds.repo.file_has_content("doubled.dat"))
    assert_repo_status(ds.path, modified=["dirt_modified"], untracked=['dirt_untracked'])
    neq_(hexsha_initial, ds.repo.get_hexsha())

    # Saving explicit outputs works from subdirectories.
    subdir = op.join(path, "subdir")
    mkdir(subdir)
    with chpwd(subdir):
        run("echo insubdir >foo", explicit=True, outputs=["foo"])
    ok_(ds.repo.file_has_content(op.join("subdir", "foo")))
示例#18
0
def test_run_failure(path):
    ds = Dataset(path).create()
    subds = ds.create("sub")

    hexsha_initial = ds.repo.get_hexsha()

    with assert_raises(CommandError):
        with swallow_outputs():
            if on_windows:
                # this does not do exactly the same as the cmd on other systems
                # but is close enough to make running the test worthwhile
                ds.run("echo x>{} & false".format(op.join("sub", "grows")))
            else:
                ds.run("echo x$(cat {0}) > {0} && false".format(
                    op.join("sub", "grows")))
    eq_(hexsha_initial, ds.repo.get_hexsha())
    ok_(ds.repo.dirty)

    msgfile = op.join(path, ds.repo.get_git_dir(ds.repo), "COMMIT_EDITMSG")
    ok_exists(msgfile)

    ds.save(recursive=True, message_file=msgfile)
    assert_repo_status(ds.path)
    neq_(hexsha_initial, ds.repo.get_hexsha())

    outfile = op.join(subds.path, "grows")
    eq_('x \n' if on_windows else 'x\n', open(outfile).read())

    if on_windows:
        # FIXME: Make the remaining code compatible with Windows.
        return

    # There is no CommandError on rerun if the non-zero error matches the
    # original code.
    ds.rerun()
    eq_('xx\n', open(outfile).read())

    # On the other hand, we fail if we rerun a command and there is a non-zero
    # error that doesn't match.
    ds.run("[ ! -e bar ] && echo c >bar")
    assert_repo_status(ds.path)
    with assert_raises(CommandError):
        ds.rerun()

    # We don't show instructions if the caller specified us not to save.
    remove(msgfile)
    with assert_raises(CommandError):
        ds.run("false", explicit=True, outputs=None)
    assert_false(op.exists(msgfile))
示例#19
0
def test_url_eq():
    eq_(URL(), URL())
    # doesn't make sense to ask what kind of a url it is an empty URL
    #eq_(RI(), RI())
    neq_(URL(), URL(hostname='x'))
    # Different types aren't equal even if have the same fields values
    neq_(URL(path='x'), PathRI(path='x'))
    neq_(URL(hostname='x'), SSHRI(hostname='x'))
    neq_(str(URL(hostname='x')), str(SSHRI(hostname='x')))
示例#20
0
def test_run_explicit(path):
    ds = Dataset(path)

    assert_false(ds.repo.file_has_content("test-annex.dat"))

    create_tree(ds.path, {
        "dirt_untracked": "untracked",
        "dirt_modified": "modified"
    })
    ds.add("dirt_modified", to_git=True)
    with open(opj(path, "dirt_modified"), "a") as ofh:
        ofh.write(", more")

    # We need explicit=True to run with dirty repo.
    assert_status(
        "impossible",
        ds.run("cat test-annex.dat test-annex.dat >doubled.dat",
               inputs=["test-annex.dat"],
               on_failure="ignore"))

    hexsha_initial = ds.repo.get_hexsha()
    # If we specify test-annex.dat as an input, it will be retrieved before the
    # run.
    ds.run("cat test-annex.dat test-annex.dat >doubled.dat",
           inputs=["test-annex.dat"],
           explicit=True)
    ok_(ds.repo.file_has_content("test-annex.dat"))
    # We didn't commit anything because outputs weren't specified.
    assert_false(ds.repo.file_has_content("doubled.dat"))
    eq_(hexsha_initial, ds.repo.get_hexsha())

    # If an input doesn't exist, we just show the standard warning.
    with swallow_logs(new_level=logging.WARN) as cml:
        ds.run("ls", inputs=["not-there"], explicit=True)
        assert_in("Input does not exist: ", cml.out)

    remove(opj(path, "doubled.dat"))

    hexsha_initial = ds.repo.get_hexsha()
    ds.run("cat test-annex.dat test-annex.dat >doubled.dat",
           inputs=["test-annex.dat"],
           outputs=["doubled.dat"],
           explicit=True)
    ok_(ds.repo.file_has_content("doubled.dat"))
    ok_(ds.repo.is_dirty(path="dirt_modified"))
    neq_(hexsha_initial, ds.repo.get_hexsha())
示例#21
0
def test_rerun_unrelated_nonrun_left_run_right(path):
    ds = Dataset(path).create()
    ds.repo.checkout(DEFAULT_BRANCH, options=["--orphan", "side"])
    ds.save(message="squashed")
    ds.run("echo foo >foo")
    ds.repo.checkout(DEFAULT_BRANCH)
    ds.repo.merge("side",
                  options=["-m", "Merge side", "--allow-unrelated-histories"])
    # o                 d_n
    # |\
    # | o               c_r
    # | o               b_n
    # o                 a_n

    ds.rerun(since="", onto="")
    # o                 d_M
    # |\
    # | o               c_R
    # | o               b_n
    # o                 a_n
    ok_(ds.repo.commit_exists("HEAD^2"))
    neq_(ds.repo.get_hexsha(DEFAULT_BRANCH), ds.repo.get_hexsha())
    eq_(ds.repo.get_hexsha(DEFAULT_BRANCH + "^"), ds.repo.get_hexsha("HEAD^"))
    eq_(ds.repo.get_hexsha(DEFAULT_BRANCH + "^2^"),
        ds.repo.get_hexsha("HEAD^2^"))
    assert_false(ds.repo.commit_exists("HEAD^2^^"))

    ds.repo.checkout(DEFAULT_BRANCH)
    ds.rerun(since="", onto=DEFAULT_BRANCH + "^2")
    # o                 d_n
    # |\
    # | o               c_r
    # | o               b_n
    # o                 a_n
    eq_(ds.repo.get_hexsha(DEFAULT_BRANCH), ds.repo.get_hexsha())
    assert_false(ds.repo.commit_exists("HEAD^2^^"))

    ds.repo.checkout(DEFAULT_BRANCH)
    hexsha_before = ds.repo.get_hexsha()
    ds.rerun(since="")
    # o                 d_n
    # |\
    # | o               c_r
    # | o               b_n
    # o                 a_n
    eq_(hexsha_before, ds.repo.get_hexsha())
示例#22
0
文件: test_push.py 项目: ypid/datalad
def test_push_wanted(srcpath, dstpath):
    src = Dataset(srcpath).create()
    (src.pathobj / 'data.0').write_text('0')
    (src.pathobj / 'secure.1').write_text('1')
    (src.pathobj / 'secure.2').write_text('2')
    src.save()

    # Dropping a file to mimic a case of simply not having it locally (thus not
    # to be "pushed")
    src.drop('secure.2', check=False)

    # Annotate sensitive content, actual value "verysecure" does not matter in
    # this example
    src.repo.set_metadata(add={'distribution-restrictions': 'verysecure'},
                          files=['secure.1', 'secure.2'])

    src.create_sibling(
        dstpath,
        annex_wanted="not metadata=distribution-restrictions=*",
        name='target',
    )
    # check that wanted is obeyed, since set in sibling configuration
    res = src.push(to='target')
    assert_in_results(res,
                      action='copy',
                      path=str(src.pathobj / 'data.0'),
                      status='ok')
    for p in ('secure.1', 'secure.2'):
        assert_not_in_results(res, path=str(src.pathobj / p))
    assert_status('notneeded', src.push(to='target'))

    # check the target to really make sure
    dst = Dataset(dstpath)
    # normal file, yes
    eq_((dst.pathobj / 'data.0').read_text(), '0')
    # secure file, no
    if dst.repo.is_managed_branch():
        neq_((dst.pathobj / 'secure.1').read_text(), '1')
    else:
        assert_raises(FileNotFoundError, (dst.pathobj / 'secure.1').read_text)

    # reset wanted config, which must enable push of secure file
    src.repo.set_preferred_content('wanted', '', remote='target')
    res = src.push(to='target')
    assert_in_results(res, path=str(src.pathobj / 'secure.1'))
    eq_((dst.pathobj / 'secure.1').read_text(), '1')
示例#23
0
def test_get_commit_date(path):
    gr = GitRepo(path, create=True)
    eq_(gr.get_commit_date(), None)

    # Let's make a commit with a custom date
    DATE = "Wed Mar 14 03:47:30 2018 -0000"
    DATE_EPOCH = 1520999250
    gr.add('1')
    gr.commit("committed", date=DATE)
    gr = GitRepo(path, create=True)
    date = gr.get_commit_date()
    neq_(date, None)
    eq_(date, DATE_EPOCH)

    eq_(date, gr.get_commit_date('master'))
    # and even if we get into a detached head
    gr.checkout(gr.get_hexsha())
    eq_(gr.get_active_branch(), None)
    eq_(date, gr.get_commit_date('master'))
示例#24
0
def test_get_commit_date(path):
    gr = GitRepo(path, create=True)
    eq_(gr.get_commit_date(), None)

    # Let's make a commit with a custom date
    DATE = "Wed Mar 14 03:47:30 2018 -0000"
    DATE_EPOCH = 1520999250
    gr.add('1')
    gr.commit("committed", date=DATE)
    gr = GitRepo(path, create=True)
    date = gr.get_commit_date()
    neq_(date, None)
    eq_(date, DATE_EPOCH)

    eq_(date, gr.get_commit_date('master'))
    # and even if we get into a detached head
    gr.checkout(gr.get_hexsha())
    eq_(gr.get_active_branch(), None)
    eq_(date, gr.get_commit_date('master'))
示例#25
0
def test_run_failure(path):
    ds = Dataset(path).create()
    subds = ds.create("sub")

    hexsha_initial = ds.repo.get_hexsha()

    with swallow_outputs():
        with assert_raises(CommandError):
            ds.run("echo x$(cat sub/grows) > sub/grows && false")
    eq_(hexsha_initial, ds.repo.get_hexsha())
    ok_(ds.repo.dirty)

    msgfile = opj(path, ds.repo.get_git_dir(ds.repo), "COMMIT_EDITMSG")
    ok_exists(msgfile)

    ds.add(".", recursive=True, message_file=msgfile)
    ok_clean_git(ds.path)
    neq_(hexsha_initial, ds.repo.get_hexsha())

    outfile = opj(subds.path, "grows")
    eq_('x\n', open(outfile).read())

    # There is no CommandError on rerun if the non-zero error matches the
    # original code.
    ds.rerun()
    eq_('xx\n', open(outfile).read())

    # On the other hand, we fail if we rerun a command and there is a non-zero
    # error that doesn't match.
    ds.run("[ ! -e bar ] && echo c >bar")
    ok_clean_git(ds.path)
    with assert_raises(CommandError):
        ds.rerun()

    # We don't show instructions if the caller specified us not to save.
    remove(msgfile)
    with assert_raises(CommandError):
        ds.run("false", explicit=True, outputs=None)
    assert_false(op.exists(msgfile))
示例#26
0
文件: test_run.py 项目: hanke/datalad
def test_run_failure(path):
    ds = Dataset(path).create()
    subds = ds.create("sub")

    hexsha_initial = ds.repo.get_hexsha()

    with assert_raises(CommandError):
        ds.run("echo x$(cat sub/grows) > sub/grows && false")
    eq_(hexsha_initial, ds.repo.get_hexsha())
    ok_(ds.repo.dirty)

    msgfile = opj(path, ds.repo.get_git_dir(ds.repo), "COMMIT_EDITMSG")
    ok_exists(msgfile)

    ds.add(".", recursive=True, message_file=msgfile)
    ok_clean_git(ds.path)
    neq_(hexsha_initial, ds.repo.get_hexsha())

    outfile = opj(subds.path, "grows")
    eq_('x\n', open(outfile).read())

    # There is no CommandError on rerun if the non-zero error matches the
    # original code.
    ds.rerun()
    eq_('xx\n', open(outfile).read())

    # On the other hand, we fail if we rerun a command and there is a non-zero
    # error that doesn't match.
    ds.run("[ ! -e bar ] && echo c >bar")
    ok_clean_git(ds.path)
    with assert_raises(CommandError):
        ds.rerun()

    # We don't show instructions if the caller specified us not to save.
    remove(msgfile)
    with assert_raises(CommandError):
        ds.run("false", explicit=True, outputs=None)
    assert_false(op.exists(msgfile))
示例#27
0
def check_update_how_subds_different(follow, action, path):
    path = Path(path)
    ds_src = Dataset(path / "source").create()
    ds_src_sub = ds_src.create("sub")
    ds_src.save()

    ds_clone = install(source=ds_src.path,
                       path=path / "clone",
                       recursive=True,
                       result_xfm="datasets")
    (ds_clone.pathobj / "foo").write_text("foo")
    ds_clone.save()
    ds_clone_sub = Dataset(ds_clone.pathobj / "sub")

    (ds_src_sub.pathobj / "bar").write_text("bar")
    ds_src.save(recursive=True)

    # Add unrecorded state to make --follow=sibling/parentds differ.
    (ds_src_sub.pathobj / "baz").write_text("baz")
    ds_src_sub.save()

    ds_clone_repo = ds_clone.repo
    ds_clone_hexsha_pre = ds_clone_repo.get_hexsha()

    ds_clone_sub_repo = ds_clone_sub.repo
    ds_clone_sub_branch_pre = ds_clone_sub_repo.get_active_branch()

    res = ds_clone.update(follow=follow,
                          how="merge",
                          how_subds=action,
                          recursive=True)

    assert_result_count(res,
                        1,
                        action="merge",
                        status="ok",
                        path=ds_clone.path)
    assert_result_count(res,
                        1,
                        action=f"update.{action}",
                        status="ok",
                        path=ds_clone_sub.path)

    ds_clone_hexsha_post = ds_clone_repo.get_hexsha()
    neq_(ds_clone_hexsha_pre, ds_clone_hexsha_post)
    neq_(ds_src.repo.get_hexsha(), ds_clone_hexsha_post)
    ok_(ds_clone_repo.is_ancestor(ds_clone_hexsha_pre, ds_clone_hexsha_post))

    eq_(ds_clone_sub.repo.get_hexsha(),
        ds_src_sub.repo.get_hexsha(None if follow == "sibling" else "HEAD~"))
    ds_clone_sub_branch_post = ds_clone_sub_repo.get_active_branch()

    if action == "checkout":
        neq_(ds_clone_sub_branch_pre, ds_clone_sub_branch_post)
        assert_false(ds_clone_sub_branch_post)
    else:
        eq_(ds_clone_sub_branch_pre, ds_clone_sub_branch_post)
示例#28
0
def test_rerun_left_right_runs(path):
    ds = Dataset(path).create()
    ds.repo.checkout(DEFAULT_BRANCH, options=["-b", "side"])
    ds.run("echo foo >foo")
    ds.repo.checkout(DEFAULT_BRANCH)
    ds.run("echo bar >bar")
    ds.repo.merge("side", options=["-m", "Merge side"])
    # o                 d_n
    # |\
    # o |               c_r
    # | o               b_r
    # |/
    # o                 a_n

    ds.rerun(since="", onto="")
    # o                 d_M
    # |\
    # o |               c_R
    # | o               b_R
    # |/
    # o                 a_n
    neq_(ds.repo.get_hexsha(DEFAULT_BRANCH + "^"), ds.repo.get_hexsha("HEAD^"))
    neq_(ds.repo.get_hexsha(DEFAULT_BRANCH + "^2"),
         ds.repo.get_hexsha("HEAD^2"))

    ds.repo.checkout(DEFAULT_BRANCH)
    ds.rerun(since="", onto=DEFAULT_BRANCH + "^")
    # o                 d_M
    # |\
    # | o               b_R
    # |/
    # o                 c_r
    # o                 a_n
    eq_(ds.repo.get_hexsha(DEFAULT_BRANCH + "^"), ds.repo.get_hexsha("HEAD^"))
    neq_(ds.repo.get_hexsha(DEFAULT_BRANCH + "^2"),
         ds.repo.get_hexsha("HEAD^2"))

    ds.repo.checkout(DEFAULT_BRANCH)
    hexsha_before = ds.repo.get_hexsha()
    ds.rerun(since="")
    # o                 d_n
    # |\
    # o |               c_r
    # | o               b_r
    # |/
    # o                 a_n
    eq_(hexsha_before, ds.repo.get_hexsha())
示例#29
0
文件: test_run.py 项目: hanke/datalad
def test_rerun_onto(path):
    ds = Dataset(path).create()

    grow_file = opj(path, "grows")

    # Make sure we can handle range-specifications that yield no results.
    for since in ["", "HEAD"]:
        assert_result_count(
            ds.rerun("HEAD", onto="", since=since, on_failure="ignore"),
            1, status="impossible", action="run")

    ds.run('echo static-content > static')
    ds.repo.tag("static")
    ds.run('echo x$(cat grows) > grows')
    ds.rerun()
    eq_('xx\n', open(grow_file).read())

    # If we run the "static" change on top of itself, we end up in the
    # same (but detached) place.
    ds.rerun(revision="static", onto="static")
    ok_(ds.repo.get_active_branch() is None)
    eq_(ds.repo.get_hexsha(),
        ds.repo.get_hexsha("static"))

    # If we run the "static" change from the same "base", we end up
    # with a new commit.
    ds.repo.checkout("master")
    ds.rerun(revision="static", onto="static^")
    ok_(ds.repo.get_active_branch() is None)
    neq_(ds.repo.get_hexsha(),
         ds.repo.get_hexsha("static"))
    assert_result_count(ds.diff(revision="HEAD..static"), 0)
    for revrange in ["..static", "static.."]:
        assert_result_count(
            ds.repo.repo.git.rev_list(revrange).split(), 1)

    # Unlike the static change, if we run the ever-growing change on
    # top of itself, we end up with a new commit.
    ds.repo.checkout("master")
    ds.rerun(onto="HEAD")
    ok_(ds.repo.get_active_branch() is None)
    neq_(ds.repo.get_hexsha(),
         ds.repo.get_hexsha("master"))

    # An empty `onto` means use the parent of the first revision.
    ds.repo.checkout("master")
    ds.rerun(since="static^", onto="")
    ok_(ds.repo.get_active_branch() is None)
    for revrange in ["..master", "master.."]:
        assert_result_count(
            ds.repo.repo.git.rev_list(revrange).split(), 3)

    # An empty `onto` means use the parent of the first revision that
    # has a run command.
    ds.repo.checkout("master")
    ds.rerun(since="", onto="", branch="from-base")
    eq_(ds.repo.get_active_branch(), "from-base")
    assert_result_count(ds.diff(revision="master..from-base"), 0)
    eq_(ds.repo.get_merge_base(["static", "from-base"]),
        ds.repo.get_hexsha("static^"))

    # We abort when an explicitly specified `onto` doesn't exist.
    ds.repo.checkout("master")
    assert_result_count(
        ds.rerun(since="", onto="doesnotexist", branch="from-base",
                 on_failure="ignore"),
        1, status="error", action="run")
示例#30
0
def test_rerun_multifork(path):
    ds = Dataset(path).create()
    ds.repo.checkout(DEFAULT_BRANCH, options=["-b", "side"])
    ds.run("echo foo >foo")
    ds.repo.checkout("side", options=["-b", "side-nonrun"])
    with open(op.join(path, "nonrun-file0"), "w") as f:
        f.write("blah 0")
    ds.save()
    ds.repo.checkout("side")
    with open(op.join(path, "nonrun-file1"), "w") as f:
        f.write("blah 1")
    ds.save()
    ds.run("echo bar >bar")
    ds.repo.checkout("side~1", options=["-b", "side-side"])
    with open(op.join(path, "nonrun-file2"), "w") as f:
        f.write("blah 2")
    ds.save()
    ds.run("echo onside0 >onside0")
    ds.repo.checkout("side")
    ds.repo.merge("side-side")
    ds.run("echo after-side-side >after-side-side")
    ds.repo.checkout(DEFAULT_BRANCH)
    ds.repo.merge("side", options=["--no-ff"])
    ds.repo.merge("side-nonrun")
    # o                 k_n
    # |\
    # | o               j_n
    # o |               i_n
    # |\ \
    # | o |             h_r
    # | o |             g_n
    # | |\ \
    # | | o |           f_r
    # | | o |           e_n
    # | o | |           d_r
    # | |/ /
    # | o |             c_n
    # | |/
    # | o               b_r
    # |/
    # o                 a_n

    ds.rerun(since="", onto="")
    # o                 k_M
    # |\
    # | o               j_n
    # o |               i_M
    # |\ \
    # | o |             h_R
    # | o |             g_M
    # | |\ \
    # | | o |           f_R
    # | | o |           e_n
    # | o | |           d_R
    # | |/ /
    # | o |             c_n
    # | |/
    # | o               b_R
    # |/
    # o                 a_n
    eq_(ds.repo.get_hexsha("HEAD~2"),
        ds.repo.get_hexsha(DEFAULT_BRANCH + "~2"))
    neq_(ds.repo.get_hexsha("HEAD^2"),
         ds.repo.get_hexsha(DEFAULT_BRANCH + "^2"))
    neq_(ds.repo.get_hexsha("HEAD^^2"),
         ds.repo.get_hexsha(DEFAULT_BRANCH + "^^2"))
    assert_false(ds.repo.commit_exists("HEAD^^2^2"))
    eq_(ds.repo.get_hexsha("HEAD^2^^"),
        ds.repo.get_hexsha(DEFAULT_BRANCH + "^2^^"))

    ds.repo.checkout(DEFAULT_BRANCH)
    hexsha_before = ds.repo.get_hexsha()
    ds.rerun(since="")
    eq_(hexsha_before, ds.repo.get_hexsha())
示例#31
0
def test_rerun_onto(path):
    ds = Dataset(path).create()
    # Make sure we have more than one commit. The one commit case is checked
    # elsewhere.
    ds.repo.commit(msg="noop commit", options=["--allow-empty"])

    grow_file = op.join(path, "grows")

    # Make sure we can handle range-specifications that yield no results.
    for since in ["", "HEAD"]:
        assert_result_count(ds.rerun("HEAD",
                                     onto="",
                                     since=since,
                                     on_failure="ignore"),
                            1,
                            status="impossible",
                            action="run")

    ds.run('echo static-content > static')
    ds.repo.tag("static")
    with swallow_outputs():
        ds.run('echo x$(cat grows) > grows')
    ds.rerun()
    eq_('xx\n', open(grow_file).read())

    # If we run the "static" change on top of itself, we end up in the
    # same (but detached) place.
    ds.rerun(revision="static", onto="static")
    ok_(ds.repo.get_active_branch() is None)
    eq_(ds.repo.get_hexsha(), ds.repo.get_hexsha("static"))

    # If we run the "static" change from the same "base", we end up
    # with a new commit.
    ds.repo.checkout("master")
    with swallow_outputs():
        ds.rerun(revision="static", onto="static^")
    ok_(ds.repo.get_active_branch() is None)
    neq_(ds.repo.get_hexsha(), ds.repo.get_hexsha("static"))
    ok_(all(r["state"] == "clean" for r in ds.diff(fr="HEAD", to="static")))
    for revrange in ["..static", "static.."]:
        eq_(len(ds.repo.get_revisions(revrange)), 1)

    # Unlike the static change, if we run the ever-growing change on
    # top of itself, we end up with a new commit.
    ds.repo.checkout("master")
    ds.rerun(onto="HEAD")
    ok_(ds.repo.get_active_branch() is None)
    neq_(ds.repo.get_hexsha(), ds.repo.get_hexsha("master"))

    # An empty `onto` means use the parent of the first revision.
    ds.repo.checkout("master")
    with swallow_outputs():
        ds.rerun(since="static^", onto="")
    ok_(ds.repo.get_active_branch() is None)
    for revrange in ["..master", "master.."]:
        eq_(len(ds.repo.get_revisions(revrange)), 3)

    # An empty `onto` means use the parent of the first revision that
    # has a run command.
    ds.repo.checkout("master")
    with swallow_outputs():
        ds.rerun(since="", onto="", branch="from-base")
    eq_(ds.repo.get_active_branch(), "from-base")
    ok_(
        all(r["state"] == "clean"
            for r in ds.diff(fr="master", to="from-base")))
    eq_(ds.repo.get_merge_base(["static", "from-base"]),
        ds.repo.get_hexsha("static^"))

    # We abort when an explicitly specified `onto` doesn't exist.
    ds.repo.checkout("master")
    assert_result_count(ds.rerun(since="",
                                 onto="doesnotexist",
                                 branch="from-base",
                                 on_failure="ignore"),
                        1,
                        status="error",
                        action="run")
示例#32
0
def test_ria_http(lcl, storepath, url):
    # create a local dataset with a subdataset
    lcl = Path(lcl)
    storepath = Path(storepath)
    subds = Dataset(lcl / 'ds' / 'subdir' / 'subds').create(force=True)
    subds.save()
    ds = Dataset(lcl / 'ds').create(force=True)
    ds.save(version_tag='original')
    assert_repo_status(ds.path)
    for d in (ds, subds):
        _move2store(storepath, d)
    # location of superds in store
    storeds_loc = str(storepath / ds.id[:3] / ds.id[3:])
    # now we should be able to clone from a ria+http url
    # the super
    riaclone = clone(
        'ria+{}#{}'.format(url, ds.id),
        lcl / 'clone',
    )

    # due to default configuration, clone() should automatically look for the
    # subdataset in the store, too -- if not the following would fail, because
    # we never configured a proper submodule URL
    riaclonesub = riaclone.get(
        op.join('subdir', 'subds'), get_data=False,
        result_xfm='datasets', return_type='item-or-list')

    # both datasets came from the store and must be set up in an identical
    # fashion
    for origds, cloneds in ((ds, riaclone), (subds, riaclonesub)):
        eq_(origds.id, cloneds.id)
        if not ds.repo.is_managed_branch():
            # test logic cannot handle adjusted branches
            eq_(origds.repo.get_hexsha(), cloneds.repo.get_hexsha())
        ok_(cloneds.config.get('remote.origin.url').startswith(url))
        eq_(cloneds.config.get('remote.origin.annex-ignore'), 'true')
        eq_(cloneds.config.get('datalad.get.subdataset-source-candidate-200origin'),
            'ria+%s#{id}' % url)

    # now advance the source dataset
    (ds.pathobj / 'newfile.txt').write_text('new')
    ds.save()
    ds.publish(to='store')
    Runner(cwd=storeds_loc).run(['git', 'update-server-info'])
    # re-clone as before
    riaclone2 = clone(
        'ria+{}#{}'.format(url, ds.id),
        lcl / 'clone2',
    )
    # and now clone a specific version, here given be the tag name
    riaclone_orig = clone(
        'ria+{}#{}@{}'.format(url, ds.id, 'original'),
        lcl / 'clone_orig',
    )
    if not ds.repo.is_managed_branch():
        # test logic cannot handle adjusted branches
        # we got the precise version we wanted
        eq_(riaclone.repo.get_hexsha(), riaclone_orig.repo.get_hexsha())
        # and not the latest
        eq_(riaclone2.repo.get_hexsha(), ds.repo.get_hexsha())
        neq_(riaclone2.repo.get_hexsha(), riaclone_orig.repo.get_hexsha())

    # attempt to clone a version that doesn't exist
    with swallow_logs():
        with assert_raises(IncompleteResultsError) as cme:
            clone('ria+{}#{}@impossible'.format(url, ds.id),
                  lcl / 'clone_failed')
        assert_in("not found in upstream", str(cme.exception))

    # lastly test if URL rewriting is in effect
    # on the surface we clone from an SSH source identified by some custom
    # label, no full URL, but URL rewriting setup maps it back to the
    # HTTP URL used above
    with patch_config({
            'url.ria+{}#.insteadof'.format(url): 'ria+ssh://somelabel#'}):
        cloned_by_label = clone(
            'ria+ssh://somelabel#{}'.format(origds.id),
            lcl / 'cloned_by_label',
        )
    # so we get the same setup as above, but....
    eq_(origds.id, cloned_by_label.id)
    if not ds.repo.is_managed_branch():
        # test logic cannot handle adjusted branches
        eq_(origds.repo.get_hexsha(), cloned_by_label.repo.get_hexsha())
    ok_(cloned_by_label.config.get('remote.origin.url').startswith(url))
    eq_(cloned_by_label.config.get('remote.origin.annex-ignore'), 'true')
    # ... the clone candidates go with the label-based URL such that
    # future get() requests acknowlege a (system-wide) configuration
    # update
    eq_(cloned_by_label.config.get('datalad.get.subdataset-source-candidate-200origin'),
        'ria+ssh://somelabel#{id}')

    if not has_symlink_capability():
        return
    # place a symlink in the store to serve as a dataset alias
    (storepath / 'alias').mkdir()
    (storepath / 'alias' / 'myname').symlink_to(storeds_loc)
    with chpwd(lcl):
        cloned_by_alias = clone('ria+{}#~{}'.format(url, 'myname'))
    # still get the same data
    eq_(cloned_by_alias.id, ds.id)
    # more sensible default install path
    eq_(cloned_by_alias.pathobj.name, 'myname')
示例#33
0
def test_publish_recursive(pristine_origin, origin_path, src_path, dst_path, sub1_pub, sub2_pub):

    # we will be publishing back to origin, so to not alter testrepo
    # we will first clone it
    origin = install(origin_path, source=pristine_origin, recursive=True)
    # prepare src
    source = install(src_path, source=origin_path, recursive=True)

    # create plain git at target:
    target = GitRepo(dst_path, create=True)
    target.checkout("TMP", ["-b"])
    source.repo.add_remote("target", dst_path)

    # subdatasets have no remote yet, so recursive publishing should fail:
    with assert_raises(ValueError) as cm:
        publish(dataset=source, to="target", recursive=True)
    assert_in("Unknown target sibling 'target'", exc_str(cm.exception))

    # now, set up targets for the submodules:
    sub1_target = GitRepo(sub1_pub, create=True)
    sub1_target.checkout("TMP", ["-b"])
    sub2_target = AnnexRepo(sub2_pub, create=True)
    # we will be testing presence of the file content, so let's make it progress
    sub2_target.config.set('receive.denyCurrentBranch', 'updateInstead', where='local')
    sub1 = GitRepo(opj(src_path, 'subm 1'), create=False)
    sub2 = GitRepo(opj(src_path, 'subm 2'), create=False)
    sub1.add_remote("target", sub1_pub)
    sub2.add_remote("target", sub2_pub)

    # publish recursively
    with swallow_logs(new_level=logging.DEBUG) as cml:
        res = publish(dataset=source, to="target", recursive=True)
        assert_not_in(
            'forced update', cml.out,
            "we probably haven't merged git-annex before pushing"
        )

    # testing result list
    # (Note: Dataset lacks __eq__ for now. Should this be based on path only?)
    assert_is_instance(res, tuple)
    assert_is_instance(res[0], list)
    assert_is_instance(res[1], list)
    eq_(res[1], [])  # nothing failed/was skipped
    for item in res[0]:
        assert_is_instance(item, Dataset)
    eq_({res[0][0].path, res[0][1].path, res[0][2].path},
        {src_path, sub1.path, sub2.path})

    eq_(list(target.get_branch_commits("master")),
        list(source.repo.get_branch_commits("master")))
    eq_(list(target.get_branch_commits("git-annex")),
        list(source.repo.get_branch_commits("git-annex")))
    eq_(list(sub1_target.get_branch_commits("master")),
        list(sub1.get_branch_commits("master")))
    eq_(list(sub1_target.get_branch_commits("git-annex")),
        list(sub1.get_branch_commits("git-annex")))
    eq_(list(sub2_target.get_branch_commits("master")),
        list(sub2.get_branch_commits("master")))
    eq_(list(sub2_target.get_branch_commits("git-annex")),
        list(sub2.get_branch_commits("git-annex")))

    # we are tracking origin but origin has different git-annex, since we
    # cloned from it, so it is not aware of our git-annex
    neq_(list(origin.repo.get_branch_commits("git-annex")),
         list(source.repo.get_branch_commits("git-annex")))
    # So if we first publish to it recursively, we would update
    # all sub-datasets since git-annex branch would need to be pushed
    res_ = publish(dataset=source, recursive=True)
    eq_(set(r.path for r in res_[0]),
        set(opj(*([source.path] + x)) for x in ([], ['subm 1'], ['subm 2'])))
    # and now should carry the same state for git-annex
    eq_(list(origin.repo.get_branch_commits("git-annex")),
        list(source.repo.get_branch_commits("git-annex")))

    # test for publishing with  --since.  By default since no changes, nothing pushed
    res_ = publish(dataset=source, recursive=True)
    eq_(set(r.path for r in res_[0]), set())

    # still nothing gets pushed, because origin is up to date
    res_ = publish(dataset=source, recursive=True, since='HEAD^')
    eq_(set(r.path for r in res_[0]), set([]))

    # and we should not fail if we run it from within the dataset
    with chpwd(source.path):
        res_ = publish(recursive=True, since='HEAD^')
        eq_(set(r.path for r in res_[0]), set([]))

    # Let's now update one subm
    with open(opj(sub2.path, "file.txt"), 'w') as f:
        f.write('')
    # add to subdataset, does not alter super dataset!
    # MIH: use `to_git` because original test author used
    # and explicit `GitRepo.add` -- keeping this for now
    Dataset(sub2.path).add('file.txt', to_git=True)

    # Let's now update one subm
    create_tree(sub2.path, {'file.dat': 'content'})
    # add to subdataset, without reflecting the change in its super(s)
    Dataset(sub2.path).add('file.dat')

    # note: will publish to origin here since that is what it tracks
    res_published, res_skipped = publish(dataset=source, recursive=True)
    # only updates published, i.e. just the subdataset, super wasn't altered
    # nothing copied!
    eq_(res_published, [Dataset(sub2.path)])
    eq_(res_skipped, [])

    # since published to origin -- destination should not get that file
    nok_(lexists(opj(sub2_target.path, 'file.dat')))
    res_published, res_skipped = publish(dataset=source, to='target', recursive=True)
    eq_(res_published, [Dataset(sub2.path)])
    # Note: with updateInstead only in target2 and not saving change in
    # super-dataset we would have made remote dataset, if we had entire
    # hierarchy, to be somewhat inconsistent.
    # But here, since target datasets are independent -- it is ok

    # and the file itself was not transferred but now exists
    ok_(lexists(opj(sub2_target.path, 'file.dat')))
    nok_(sub2_target.file_has_content('file.dat'))

    # but now we can redo publish recursively, at least stating to consider
    # explicitly to copy .
    res_published, res_skipped = publish(
        '.',
        dataset=source, to='target',
        recursive=True
    )
    ok_(sub2_target.file_has_content('file.dat'))
    eq_(res_published, ['file.dat'])  # note that this report makes little sense without path to the repository
示例#34
0
def test_publish_recursive(pristine_origin, origin_path, src_path, dst_path,
                           sub1_pub, sub2_pub):

    # we will be publishing back to origin, so to not alter testrepo
    # we will first clone it
    origin = install(origin_path, source=pristine_origin, recursive=True)
    # prepare src
    source = install(src_path, source=origin.path, recursive=True)
    # we will be trying to push into this later on, need to give permissions...
    origin_sub2 = Dataset(opj(origin_path, '2'))
    origin_sub2.config.set('receive.denyCurrentBranch',
                           'updateInstead',
                           where='local')
    ## TODO this manual fixup is needed due to gh-1548 -- needs proper solution
    #os.remove(opj(origin_sub2.path, '.git'))
    #os.rename(opj(origin_path, '.git', 'modules', '2'), opj(origin_sub2.path, '.git'))

    # create plain git at target:
    target = GitRepo(dst_path, create=True)
    target.checkout("TMP", ["-b"])
    source.repo.add_remote("target", dst_path)

    # subdatasets have no remote yet, so recursive publishing should fail:
    res = publish(dataset=source,
                  to="target",
                  recursive=True,
                  on_failure='ignore')
    assert_result_count(res, 3)
    assert_result_count(res, 1, status='ok', type='dataset', path=source.path)
    assert_result_count(res,
                        2,
                        status='error',
                        message=("Unknown target sibling '%s' for publication",
                                 'target'))

    # now, set up targets for the submodules:
    sub1_target = GitRepo(sub1_pub, create=True)
    sub1_target.checkout("TMP", ["-b"])
    sub2_target = AnnexRepo(sub2_pub, create=True)
    # we will be testing presence of the file content, so let's make it progress
    sub2_target.config.set('receive.denyCurrentBranch',
                           'updateInstead',
                           where='local')
    sub1 = GitRepo(opj(src_path, 'subm 1'), create=False)
    sub2 = GitRepo(opj(src_path, '2'), create=False)
    sub1.add_remote("target", sub1_pub)
    sub2.add_remote("target", sub2_pub)

    # publish recursively
    with swallow_logs(new_level=logging.DEBUG) as cml:
        res = publish(dataset=source, to="target", recursive=True)
        assert_not_in('forced update', cml.out,
                      "we probably haven't merged git-annex before pushing")

    # testing result list
    # base dataset was already published above, notneeded again
    assert_status(('ok', 'notneeded'), res)  # nothing failed
    assert_result_count(res, 3, type='dataset')
    eq_({r['path'] for r in res}, {src_path, sub1.path, sub2.path})

    eq_(list(target.get_branch_commits("master")),
        list(source.repo.get_branch_commits("master")))
    eq_(list(target.get_branch_commits("git-annex")),
        list(source.repo.get_branch_commits("git-annex")))
    eq_(list(sub1_target.get_branch_commits("master")),
        list(sub1.get_branch_commits("master")))
    eq_(list(sub1_target.get_branch_commits("git-annex")),
        list(sub1.get_branch_commits("git-annex")))
    eq_(list(sub2_target.get_branch_commits("master")),
        list(sub2.get_branch_commits("master")))
    eq_(list(sub2_target.get_branch_commits("git-annex")),
        list(sub2.get_branch_commits("git-annex")))

    # we are tracking origin but origin has different git-annex, since we
    # cloned from it, so it is not aware of our git-annex
    neq_(list(origin.repo.get_branch_commits("git-annex")),
         list(source.repo.get_branch_commits("git-annex")))
    # So if we first publish to it recursively, we would update
    # all sub-datasets since git-annex branch would need to be pushed
    res_ = publish(dataset=source, recursive=True)
    assert_result_count(res_, 1, status='ok', path=source.path)
    assert_result_count(res_, 1, status='ok', path=sub1.path)
    assert_result_count(res_, 1, status='ok', path=sub2.path)
    # and now should carry the same state for git-annex
    eq_(list(origin.repo.get_branch_commits("git-annex")),
        list(source.repo.get_branch_commits("git-annex")))

    # test for publishing with  --since.  By default since no changes, nothing pushed
    res_ = publish(dataset=source, recursive=True)
    assert_result_count(res_, 3, status='notneeded', type='dataset')

    # still nothing gets pushed, because origin is up to date
    res_ = publish(dataset=source, recursive=True, since='HEAD^')
    assert_result_count(res_, 3, status='notneeded', type='dataset')

    # and we should not fail if we run it from within the dataset
    with chpwd(source.path):
        res_ = publish(recursive=True, since='HEAD^')
        assert_result_count(res_, 3, status='notneeded', type='dataset')

    # Let's now update one subm
    with open(opj(sub2.path, "file.txt"), 'w') as f:
        f.write('')
    # add to subdataset, does not alter super dataset!
    # MIH: use `to_git` because original test author used
    # and explicit `GitRepo.add` -- keeping this for now
    Dataset(sub2.path).add('file.txt', to_git=True)

    # Let's now update one subm
    create_tree(sub2.path, {'file.dat': 'content'})
    # add to subdataset, without reflecting the change in its super(s)
    Dataset(sub2.path).add('file.dat')

    # note: will publish to origin here since that is what it tracks
    res_ = publish(dataset=source, recursive=True, on_failure='ignore')
    ## only updates published, i.e. just the subdataset, super wasn't altered
    ## nothing copied!
    assert_status(('ok', 'notneeded'), res_)
    assert_result_count(res_, 1, status='ok', path=sub2.path, type='dataset')
    assert_result_count(res_, 0, path=opj(sub2.path, 'file.dat'), type='file')

    # since published to origin -- destination should not get that file
    nok_(lexists(opj(sub2_target.path, 'file.dat')))
    res_ = publish(dataset=source, to='target', recursive=True)
    assert_status(('ok', 'notneeded'), res_)
    assert_result_count(res_, 1, status='ok', path=sub2.path, type='dataset')
    assert_result_count(res_, 0, path=opj(sub2.path, 'file.dat'), type='file')

    # Note: with updateInstead only in target2 and not saving change in
    # super-dataset we would have made remote dataset, if we had entire
    # hierarchy, to be somewhat inconsistent.
    # But here, since target datasets are independent -- it is ok

    # and the file itself was transferred
    ok_(lexists(opj(sub2_target.path, 'file.dat')))
    nok_(sub2_target.file_has_content('file.dat'))

    ## but now we can redo publish recursively, with explicitly requested data transfer
    res_ = publish(dataset=source,
                   to='target',
                   recursive=True,
                   transfer_data='all')
    ok_(sub2_target.file_has_content('file.dat'))
    assert_result_count(res_, 1, status='ok', path=opj(sub2.path, 'file.dat'))

    # Let's save those present changes and publish while implying "since last
    # merge point"
    source.save(message="Changes in subm2")
    # and test if it could deduce the remote/branch to push to
    source.config.set('branch.master.remote', 'target', where='local')
    with chpwd(source.path):
        res_ = publish(since='', recursive=True)
    # TODO: somehow test that there were no even attempt to diff within "subm 1"
    # since if `--since=''` worked correctly, nothing has changed there and it
    # should have not been even touched
    assert_status(('ok', 'notneeded'), res_)
    assert_result_count(res_, 1, status='ok', path=source.path, type='dataset')
示例#35
0
def test_url_samples():
    _check_ri("http://example.com", URL, scheme='http', hostname="example.com")
    # "complete" one for classical http
    _check_ri("http://*****:*****@example.com:8080/p/sp?p1=v1&p2=v2#frag", URL,
              scheme='http', hostname="example.com", port=8080,
              username='******', password='******', path='/p/sp',
              query='p1=v1&p2=v2', fragment='frag')

    # sample one for ssh with specifying the scheme
    # XXX? might be useful?  https://github.com/FriendCode/giturlparse.py
    _check_ri("ssh://host/path/sp1", URL, scheme='ssh', hostname='host', path='/path/sp1')
    _check_ri("user@host:path/sp1", SSHRI,
              hostname='host', path='path/sp1', username='******')
    _check_ri("host:path/sp1", SSHRI, hostname='host', path='path/sp1')
    _check_ri("host:path", SSHRI, hostname='host', path='path')
    _check_ri("host:/path", SSHRI, hostname='host', path='/path')
    _check_ri("user@host", SSHRI, hostname='host', username='******')
    # TODO!!!  should this be a legit URL like this?
    # _check_ri("host", SSHRI, hostname='host'))
    eq_(repr(RI("host:path")), "SSHRI(hostname='host', path='path')")

    # And now perspective 'datalad', implicit=True urls pointing to the canonical center location
    _check_ri("///", DataLadRI)
    _check_ri("///p/s1", DataLadRI, path='p/s1')
    # could be considered by someone as "URI reference" relative to scheme
    _check_ri("//a/", DataLadRI, remote='a')
    _check_ri("//a/data", DataLadRI, path='data', remote='a')

    # here we will do custom magic allowing only schemes with + in them, such as dl+archive
    # or not so custom as
    _check_ri("hg+https://host/user/proj", URL,
              scheme="hg+https", hostname='host', path='/user/proj')
    # "old" style
    _check_ri("dl+archive:KEY/path/sp1#size=123", URL,
              scheme='dl+archive', path='KEY/path/sp1', fragment='size=123')
    # "new" style
    _check_ri("dl+archive:KEY#path=path/sp1&size=123", URL,
              scheme='dl+archive', path='KEY', fragment='path=path/sp1&size=123')
    # actually above one is probably wrong since we need to encode the path
    _check_ri("dl+archive:KEY#path=path%2Fbsp1&size=123", URL,
              scheme='dl+archive', path='KEY', fragment='path=path%2Fbsp1&size=123')

    #https://en.wikipedia.org/wiki/File_URI_scheme
    _check_ri("file://host", URL, scheme='file', hostname='host')
    _check_ri("file://host/path/sp1", URL, scheme='file', hostname='host', path='/path/sp1')
    # stock libraries of Python aren't quite ready for ipv6
    ipv6address = '2001:db8:85a3::8a2e:370:7334'
    _check_ri("file://%s/path/sp1" % ipv6address, URL,
              scheme='file', hostname=ipv6address, path='/path/sp1')
    for lh in ('localhost', '::1', '', '127.3.4.155'):
        _check_ri("file://%s/path/sp1" % lh, URL, localpath='/path/sp1',
                  scheme='file', hostname=lh, path='/path/sp1')
    _check_ri('http://[1fff:0:a88:85a3::ac1f]:8001/index.html', URL,
              scheme='http', hostname='1fff:0:a88:85a3::ac1f', port=8001, path='/index.html')
    _check_ri("file:///path/sp1", URL, localpath='/path/sp1', scheme='file', path='/path/sp1')
    # we don't do any magical comprehension for home paths/drives for windows
    # of file:// urls, thus leaving /~ and /c: for now:
    _check_ri("file:///~/path/sp1", URL, localpath='/~/path/sp1', scheme='file', path='/~/path/sp1')
    _check_ri("file:///%7E/path/sp1", URL, localpath='/~/path/sp1', scheme='file', path='/~/path/sp1', exact_str=False)
    # not sure but let's check
    _check_ri("file:///c:/path/sp1", URL, localpath='/c:/path/sp1', scheme='file', path='/c:/path/sp1', exact_str=False)

    # and now implicit paths or actually they are also "URI references"
    _check_ri("f", PathRI, localpath='f', path='f')
    _check_ri("f/s1", PathRI, localpath='f/s1', path='f/s1')
    _check_ri("/f", PathRI, localpath='/f', path='/f')
    _check_ri("/f/s1", PathRI, localpath='/f/s1', path='/f/s1')

    # some github ones, just to make sure
    _check_ri("git://host/user/proj", URL, scheme="git", hostname="host", path="/user/proj")
    _check_ri("git@host:user/proj", SSHRI, hostname="host", path="user/proj", username='******')

    _check_ri('weired:/', SSHRI, hostname='weired', path='/')
    # since schema is not allowing some symbols so we need to add additional check
    _check_ri('weired_url:/', SSHRI, hostname='weired_url', path='/')
    _check_ri('example.com:/', SSHRI, hostname='example.com', path='/')
    _check_ri('example.com:path/sp1', SSHRI, hostname='example.com', path='path/sp1')
    _check_ri('example.com/path/sp1\:fname', PathRI, localpath='example.com/path/sp1\:fname',
              path='example.com/path/sp1\:fname')
    # ssh is as stupid as us, so we will stay "Consistently" dumb
    """
    $> ssh example.com/path/sp1:fname
    ssh: Could not resolve hostname example.com/path/sp1:fname: Name or service not known
    """
    _check_ri('example.com/path/sp1:fname', SSHRI, hostname='example.com/path/sp1', path='fname')

    # SSHRIs have .port, but it is empty
    eq_(SSHRI(hostname='example.com').port, '')

    # check that we are getting a warning logged when url can't be reconstructed
    # precisely
    # actually failed to come up with one -- becomes late here
    #_check_ri("http://host///..//p", scheme='http', path='/..//p')

    # actually this one is good enough to trigger a warning and I still don't know
    # what it should exactly be!?
    with swallow_logs(new_level=logging.DEBUG) as cml:
        weired_str = 'weired://'
        weired_url = RI(weired_str)
        repr(weired_url)
        cml.assert_logged(
            'Parsed version of SSHRI .weired:/. '
            'differs from original .weired://.'
        )
        # but we store original str
        eq_(str(weired_url), weired_str)
        neq_(weired_url.as_str(), weired_str)


    raise SkipTest("TODO: file://::1/some does complain about parsed version dropping ::1")
示例#36
0
def test_publish_recursive(pristine_origin, origin_path, src_path, dst_path, sub1_pub, sub2_pub):

    # we will be publishing back to origin, so to not alter testrepo
    # we will first clone it
    origin = install(origin_path, source=pristine_origin, recursive=True)
    # prepare src
    source = install(src_path, source=origin.path, recursive=True)
    # we will be trying to push into this later on, need to give permissions...
    origin_sub2 = Dataset(opj(origin_path, '2'))
    origin_sub2.config.set(
        'receive.denyCurrentBranch', 'updateInstead', where='local')
    ## TODO this manual fixup is needed due to gh-1548 -- needs proper solution
    #os.remove(opj(origin_sub2.path, '.git'))
    #os.rename(opj(origin_path, '.git', 'modules', '2'), opj(origin_sub2.path, '.git'))

    # create plain git at target:
    target = GitRepo(dst_path, create=True)
    target.checkout("TMP", ["-b"])
    source.repo.add_remote("target", dst_path)

    # subdatasets have no remote yet, so recursive publishing should fail:
    res = publish(dataset=source, to="target", recursive=True, on_failure='ignore')
    assert_result_count(res, 3)
    assert_result_count(
        res, 1, status='ok', type='dataset', path=source.path)
    assert_result_count(
        res, 2, status='error',
        message=("Unknown target sibling '%s' for publication", 'target'))

    # now, set up targets for the submodules:
    sub1_target = GitRepo(sub1_pub, create=True)
    sub1_target.checkout("TMP", ["-b"])
    sub2_target = AnnexRepo(sub2_pub, create=True)
    # we will be testing presence of the file content, so let's make it progress
    sub2_target.config.set('receive.denyCurrentBranch', 'updateInstead', where='local')
    sub1 = GitRepo(opj(src_path, 'subm 1'), create=False)
    sub2 = GitRepo(opj(src_path, '2'), create=False)
    sub1.add_remote("target", sub1_pub)
    sub2.add_remote("target", sub2_pub)

    # publish recursively
    with swallow_logs(new_level=logging.DEBUG) as cml:
        res = publish(dataset=source, to="target", recursive=True)
        assert_not_in(
            'forced update', cml.out,
            "we probably haven't merged git-annex before pushing"
        )

    # testing result list
    # base dataset was already published above, notneeded again
    assert_status(('ok', 'notneeded'), res)  # nothing failed
    assert_result_count(
        res, 3, type='dataset')
    eq_({r['path'] for r in res},
        {src_path, sub1.path, sub2.path})

    eq_(list(target.get_branch_commits("master")),
        list(source.repo.get_branch_commits("master")))
    eq_(list(target.get_branch_commits("git-annex")),
        list(source.repo.get_branch_commits("git-annex")))
    eq_(list(sub1_target.get_branch_commits("master")),
        list(sub1.get_branch_commits("master")))
    eq_(list(sub1_target.get_branch_commits("git-annex")),
        list(sub1.get_branch_commits("git-annex")))
    eq_(list(sub2_target.get_branch_commits("master")),
        list(sub2.get_branch_commits("master")))
    eq_(list(sub2_target.get_branch_commits("git-annex")),
        list(sub2.get_branch_commits("git-annex")))

    # we are tracking origin but origin has different git-annex, since we
    # cloned from it, so it is not aware of our git-annex
    neq_(list(origin.repo.get_branch_commits("git-annex")),
         list(source.repo.get_branch_commits("git-annex")))
    # So if we first publish to it recursively, we would update
    # all sub-datasets since git-annex branch would need to be pushed
    res_ = publish(dataset=source, recursive=True)
    assert_result_count(res_, 1, status='ok', path=source.path)
    assert_result_count(res_, 1, status='ok', path=sub1.path)
    assert_result_count(res_, 1, status='ok', path=sub2.path)
    # and now should carry the same state for git-annex
    eq_(list(origin.repo.get_branch_commits("git-annex")),
        list(source.repo.get_branch_commits("git-annex")))

    # test for publishing with  --since.  By default since no changes, nothing pushed
    res_ = publish(dataset=source, recursive=True)
    assert_result_count(
        res_, 3, status='notneeded', type='dataset')

    # still nothing gets pushed, because origin is up to date
    res_ = publish(dataset=source, recursive=True, since='HEAD^')
    assert_result_count(
        res_, 3, status='notneeded', type='dataset')

    # and we should not fail if we run it from within the dataset
    with chpwd(source.path):
        res_ = publish(recursive=True, since='HEAD^')
        assert_result_count(
            res_, 3, status='notneeded', type='dataset')

    # Let's now update one subm
    with open(opj(sub2.path, "file.txt"), 'w') as f:
        f.write('')
    # add to subdataset, does not alter super dataset!
    # MIH: use `to_git` because original test author used
    # and explicit `GitRepo.add` -- keeping this for now
    Dataset(sub2.path).add('file.txt', to_git=True)

    # Let's now update one subm
    create_tree(sub2.path, {'file.dat': 'content'})
    # add to subdataset, without reflecting the change in its super(s)
    Dataset(sub2.path).add('file.dat')

    # note: will publish to origin here since that is what it tracks
    res_ = publish(dataset=source, recursive=True, on_failure='ignore')
    ## only updates published, i.e. just the subdataset, super wasn't altered
    ## nothing copied!
    assert_status(('ok', 'notneeded'), res_)
    assert_result_count(res_, 1, status='ok', path=sub2.path, type='dataset')
    assert_result_count(res_, 0, path=opj(sub2.path, 'file.dat'), type='file')

    # since published to origin -- destination should not get that file
    nok_(lexists(opj(sub2_target.path, 'file.dat')))
    res_ = publish(dataset=source, to='target', recursive=True)
    assert_status(('ok', 'notneeded'), res_)
    assert_result_count(res_, 1, status='ok', path=sub2.path, type='dataset')
    assert_result_count(res_, 0, path=opj(sub2.path, 'file.dat'), type='file')

    # Note: with updateInstead only in target2 and not saving change in
    # super-dataset we would have made remote dataset, if we had entire
    # hierarchy, to be somewhat inconsistent.
    # But here, since target datasets are independent -- it is ok

    # and the file itself was transferred
    ok_(lexists(opj(sub2_target.path, 'file.dat')))
    nok_(sub2_target.file_has_content('file.dat'))

    ## but now we can redo publish recursively, with explicitly requested data transfer
    res_ = publish(
        dataset=source, to='target',
        recursive=True,
        transfer_data='all'
    )
    ok_(sub2_target.file_has_content('file.dat'))
    assert_result_count(
        res_, 1, status='ok', path=opj(sub2.path, 'file.dat'))

    # Let's save those present changes and publish while implying "since last
    # merge point"
    source.save(message="Changes in subm2")
    # and test if it could deduce the remote/branch to push to
    source.config.set('branch.master.remote', 'target', where='local')
    with chpwd(source.path):
        res_ = publish(since='', recursive=True)
    # TODO: somehow test that there were no even attempt to diff within "subm 1"
    # since if `--since=''` worked correctly, nothing has changed there and it
    # should have not been even touched
    assert_status(('ok', 'notneeded'), res_)
    assert_result_count(res_, 1, status='ok', path=source.path, type='dataset')