Exemplo n.º 1
0
def test_symlinked_relpath(path):
    # initially ran into on OSX https://github.com/datalad/datalad/issues/2406
    os.makedirs(op.join(path, "origin"))
    dspath = op.join(path, "linked")
    os.symlink('origin', dspath)
    ds = Dataset(dspath).create()
    create_tree(dspath, {
        "mike1": 'mike1',  # will be added from topdir
        "later": "later",  # later from within subdir
        "d": {
            "mike2": 'mike2', # to be added within subdir
        }
    })

    # in the root of ds
    with chpwd(dspath):
        ds.repo.add("mike1", git=True)
        ds.save(message="committing", path="./mike1")

    # Let's also do in subdirectory as CWD, check that relative path
    # given to a plain command (not dataset method) are treated as
    # relative to CWD
    with chpwd(op.join(dspath, 'd')):
        save(dataset=ds.path,
             message="committing",
             path="mike2")

        later = op.join(op.pardir, "later")
        ds.repo.add(later, git=True)
        save(dataset=ds.path, message="committing", path=later)

    assert_repo_status(dspath)
Exemplo n.º 2
0
def test_add_subdataset(path, other):
    subds = create(op.join(path, 'dir'), force=True)
    ds = create(path, force=True)
    ok_(subds.repo.dirty)
    ok_(ds.repo.dirty)
    assert_not_in('dir', ds.subdatasets(result_xfm='relpaths'))
    # "add everything in subds to subds"
    save(dataset=subds.path)
    assert_repo_status(subds.path)
    assert_not_in('dir', ds.subdatasets(result_xfm='relpaths'))
    # but with a base directory we add the dataset subds as a subdataset
    # to ds
    res = ds.save(subds.path)
    assert_in_results(res, action="add", path=subds.path, refds=ds.path)
    assert_in('dir', ds.subdatasets(result_xfm='relpaths'))
    #  create another one
    other = create(other)
    # install into superdataset, but don't add
    other_clone = install(source=other.path, path=op.join(ds.path, 'other'))
    # little dance to get the revolution-type dataset
    other_clone = Dataset(other_clone.path)
    ok_(other_clone.is_installed)
    assert_not_in('other', ds.subdatasets(result_xfm='relpaths'))
    # now add, it should pick up the source URL
    ds.save('other')
    # and that is why, we can reobtain it from origin
    ds.uninstall('other')
    ok_(not other_clone.is_installed())
    ds.get('other')
    ok_(other_clone.is_installed())
Exemplo n.º 3
0
def test_create_subdataset_hierarchy_from_top(path):
    # how it would look like to overlay a subdataset hierarchy onto
    # an existing directory tree
    ds = Dataset(op.join(path, 'origin')).create(force=True)
    # we got a dataset ....
    ok_(ds.is_installed())
    # ... but it has untracked content
    ok_(ds.repo.dirty)
    subds = ds.create(u"ds-" + OBSCURE_FILENAME, force=True)
    ok_(subds.is_installed())
    ok_(subds.repo.dirty)
    subsubds = subds.create('subsub', force=True)
    ok_(subsubds.is_installed())
    ok_(subsubds.repo.dirty)
    ok_(ds.id != subds.id != subsubds.id)
    ds.save(updated=True, recursive=True)
    # 'file*' in each repo was untracked before and should remain as such
    # (we don't want a #1419 resurrection
    ok_(ds.repo.dirty)
    ok_(subds.repo.dirty)
    ok_(subsubds.repo.dirty)
    # if we add these three, we should get clean
    ds.save([
        'file1',
        op.join(subds.path, 'file2'),
        op.join(subsubds.path, 'file3')])
    assert_repo_status(ds.path)
    ok_(ds.id != subds.id != subsubds.id)
Exemplo n.º 4
0
def test_add_mimetypes(path):
    ds = Dataset(path).create(force=True)
    ds.repo.add('.gitattributes')
    ds.repo.commit('added attributes to git explicitly')
    # now test that those files will go into git/annex correspondingly
    # WINDOWS FAILURE NEXT
    __not_tested__ = ds.save(['file.txt', 'empty'])
    assert_repo_status(path, untracked=['file2.txt'])
    # But we should be able to force adding file to annex when desired
    ds.save('file2.txt', to_git=False)
    # check annex file status
    annexinfo = ds.repo.get_content_annexinfo()
    for path, in_annex in (
           # Empty one considered to be  application/octet-stream
           # i.e. non-text
           ('empty', True),
           ('file.txt', False),
           ('file2.txt', True)):
        # low-level API report -> repo path reference, no ds path
        p = ds.repo.pathobj / path
        assert_in(p, annexinfo)
        if in_annex:
            assert_in('key', annexinfo[p], p)
        else:
            assert_not_in('key', annexinfo[p], p)
Exemplo n.º 5
0
 def check_renamed_file(recursive, no_annex, path):
     ds = Dataset(path).create(no_annex=no_annex)
     create_tree(path, {'old': ''})
     ds.repo.add('old')
     ds.repo._git_custom_command(['old', 'new'], ['git', 'mv'])
     ds.save(recursive=recursive)
     assert_repo_status(path)
Exemplo n.º 6
0
def test_run_from_subds(path):
    if 'APPVEYOR' in os.environ:
        raise SkipTest('test causes appveyor (only) to crash, reason unknown')

    subds = Dataset(path).create().create("sub")
    subds.run("cd .> foo")
    assert_repo_status(subds.path)
Exemplo n.º 7
0
def test_encoding(path):
    staged = OBSCURE_FILENAME + u'_staged'
    untracked = OBSCURE_FILENAME + u'_untracked'
    ds = Dataset(path).create(force=True)
    ds.repo.add(staged)
    assert_repo_status(ds.path, added=[staged], untracked=[untracked])
    ds.save(updated=True)
    assert_repo_status(ds.path, untracked=[untracked])
Exemplo n.º 8
0
def test_bf2541(path):
    ds = create(path)
    subds = ds.create('sub')
    assert_repo_status(ds.path)
    os.symlink('sub', op.join(ds.path, 'symlink'))
    with chpwd(ds.path):
        res = save(recursive=True)
    assert_repo_status(ds.path)
Exemplo n.º 9
0
def test_relpath_add(path):
    ds = Dataset(path).create(force=True)
    with chpwd(op.join(path, 'dir')):
        eq_(save('testindir')[0]['path'],
            op.join(ds.path, 'dir', 'testindir'))
        # and now add all
        save('..')
    # auto-save enabled
    assert_repo_status(ds.path)
Exemplo n.º 10
0
def test_bf2043p2(path):
    ds = Dataset(path).create(force=True)
    ds.repo.add('staged')
    assert_repo_status(ds.path, added=['staged'], untracked=['untracked'])
    # save -u does not commit untracked content
    # this tests the second issue in #2043
    with chpwd(path):
        save(updated=True)
    assert_repo_status(ds.path, untracked=['untracked'])
Exemplo n.º 11
0
def test_bf3285(path):
    ds = Dataset(path).create(force=True)
    # Note: Using repo.pathobj matters in the "TMPDIR=/var/tmp/sym\ link" case
    # because assert_repo_status is based off of {Annex,Git}Repo.path, which is
    # the realpath'd path (from the processing in _flyweight_id_from_args).
    subds = create(ds.repo.pathobj.joinpath("subds"))
    # Explicitly saving a path does not save an untracked, unspecified
    # subdataset.
    ds.save("foo")
    assert_repo_status(ds.path, untracked=[subds.path])
Exemplo n.º 12
0
def test_inject(path):
    ds = Dataset(path).create(force=True)
    assert_repo_status(ds.path, untracked=['foo', 'bar'])
    list(run_command("nonsense command",
                     dataset=ds,
                     inject=True,
                     extra_info={"custom_key": "custom_field"}))
    msg = ds.repo.format_commit("%B")
    assert_in("custom_key", msg)
    assert_in("nonsense command", msg)
Exemplo n.º 13
0
def test_create_curdir(path, path2):
    with chpwd(path, mkdir=True):
        create()
    ds = Dataset(path)
    ok_(ds.is_installed())
    assert_repo_status(ds.path, annex=True)

    with chpwd(path2, mkdir=True):
        create(no_annex=True)
    ds = Dataset(path2)
    ok_(ds.is_installed())
    assert_repo_status(ds.path, annex=False)
    ok_(op.exists(op.join(ds.path, '.noannex')))
Exemplo n.º 14
0
def test_gh2043p1(path):
    # this tests documents the interim agreement on what should happen
    # in the case documented in gh-2043
    ds = Dataset(path).create(force=True)
    ds.save('1')
    assert_repo_status(ds.path, untracked=['2', '3'])
    ds.unlock('1')
    assert_repo_status(
        ds.path,
        # on windows we are in an unlocked branch by default, hence
        # we would see no change
        modified=[] if on_windows else ['1'],
        untracked=['2', '3'])
    # save(.) should recommit unlocked file, and not touch anything else
    # this tests the second issue in #2043
    with chpwd(path):
        # only save modified bits
        save(path='.', updated=True)
    # state of the file (unlocked/locked) is committed as well, and the
    # test doesn't lock the file again
    assert_repo_status(ds.path, untracked=['2', '3'])
    with chpwd(path):
        # but when a path is given, anything that matches this path
        # untracked or not is added/saved
        save(path='.')
    # state of the file (unlocked/locked) is committed as well, and the
    # test doesn't lock the file again
    assert_repo_status(ds.path)
Exemplo n.º 15
0
def test_compare_content_info(path):
    # TODO remove when `create` is RF to return the new Dataset
    ds = Dataset(path).create()
    assert_repo_status(path)

    # for a clean repo HEAD and worktree query should yield identical results
    # minus a 'bytesize' report that is readily available for HEAD, but would
    # not a stat call per file for the worktree, and is not done ATM
    wt = ds.repo.get_content_info(ref=None)
    assert_dict_equal(
        wt,
        {f: {k: v for k, v in iteritems(p) if k != 'bytesize'}
         for f, p in iteritems(ds.repo.get_content_info(ref='HEAD'))}
    )
Exemplo n.º 16
0
def test_run_explicit(path):
    ds = Dataset(path)

    assert_false(ds.repo.file_has_content("test-annex.dat"))

    create_tree(ds.path, {"dirt_untracked": "untracked",
                          "dirt_modified": "modified"})
    ds.save("dirt_modified", to_git=True)
    with open(op.join(path, "dirt_modified"), "a") as ofh:
        ofh.write(", more")

    # We need explicit=True to run with dirty repo.
    assert_status("impossible",
                  ds.run("cat test-annex.dat test-annex.dat >doubled.dat",
                         inputs=["test-annex.dat"],
                         on_failure="ignore"))

    hexsha_initial = ds.repo.get_hexsha()
    # If we specify test-annex.dat as an input, it will be retrieved before the
    # run.
    ds.run("cat test-annex.dat test-annex.dat >doubled.dat",
           inputs=["test-annex.dat"], explicit=True)
    ok_(ds.repo.file_has_content("test-annex.dat"))
    # We didn't commit anything because outputs weren't specified.
    assert_false(ds.repo.file_has_content("doubled.dat"))
    eq_(hexsha_initial, ds.repo.get_hexsha())

    # If an input doesn't exist, we just show the standard warning.
    with swallow_logs(new_level=logging.WARN) as cml:
        with swallow_outputs():
            ds.run("ls", inputs=["not-there"], explicit=True)
        assert_in("Input does not exist: ", cml.out)

    remove(op.join(path, "doubled.dat"))

    hexsha_initial = ds.repo.get_hexsha()
    ds.run("cat test-annex.dat test-annex.dat >doubled.dat",
           inputs=["test-annex.dat"], outputs=["doubled.dat"],
           explicit=True)
    ok_(ds.repo.file_has_content("doubled.dat"))
    assert_repo_status(ds.path, modified=["dirt_modified"], untracked=['dirt_untracked'])
    neq_(hexsha_initial, ds.repo.get_hexsha())

    # Saving explicit outputs works from subdirectories.
    subdir = op.join(path, "subdir")
    mkdir(subdir)
    with chpwd(subdir):
        run("echo insubdir >foo", explicit=True, outputs=["foo"])
    ok_(ds.repo.file_has_content(op.join("subdir", "foo")))
Exemplo n.º 17
0
def test_basics(path, nodspath):
    ds = Dataset(path).create()
    last_state = ds.repo.get_hexsha()
    # run inside the dataset
    with chpwd(path), \
            swallow_outputs():
        # provoke command failure
        with assert_raises(CommandError) as cme:
            ds.run('7i3amhmuch9invalid')
            # let's not speculate that the exit code is always 127
            ok_(cme.code > 0)
        eq_(last_state, ds.repo.get_hexsha())
        # now one that must work
        res = ds.run('cd .> empty', message='TEST')
        assert_repo_status(ds.path)
        assert_result_count(res, 2)
        # TODO 'state' is still untracked!!!
        assert_result_count(res, 1, action='add',
                            path=op.join(ds.path, 'empty'), type='file')
        assert_result_count(res, 1, action='save', path=ds.path)
        commit_msg = ds.repo.format_commit("%B")
        ok_(commit_msg.startswith('[DATALAD RUNCMD] TEST'))
        # crude test that we have a record for the PWD
        assert_in('"pwd": "."', commit_msg)
        last_state = ds.repo.get_hexsha()
        # now run a command that will not alter the dataset
        noop_cmd = ':'
        res = ds.run(noop_cmd, message='NOOP_TEST')
        assert_result_count(res, 1, action='save', status='notneeded')
        eq_(last_state, ds.repo.get_hexsha())
        # We can also run the command via a single-item list because this is
        # what the CLI interface passes in for quoted commands.
        res = ds.run([noop_cmd], message='NOOP_TEST')
        assert_result_count(res, 1, action='save', status='notneeded')

    # run outside the dataset, should still work but with limitations
    with chpwd(nodspath), \
            swallow_outputs():
        res = ds.run('cd . > empty2', message='TEST')
        assert_result_count(res, 1, action='add',
                            path=op.join(ds.path, 'empty2'),
                            type='file', status='ok')
        assert_result_count(res, 1, action='save', status='ok')

    # running without a command is a noop
    with chpwd(path):
        with swallow_logs(new_level=logging.WARN) as cml:
            ds.run()
            assert_in("No command given", cml.out)
Exemplo n.º 18
0
def test_create_withprocedure(path):
    # first without
    ds = create(path)
    assert(not op.lexists(op.join(ds.path, 'README.rst')))
    ds.remove()
    assert(not op.lexists(ds.path))
    # now for reals...
    ds = create(
        # needs to identify the dataset, otherwise post-proc
        # procedure doesn't know what to run on
        dataset=path,
        proc_post=[['cfg_metadatatypes', 'xmp', 'datacite']])
    assert_repo_status(path)
    ds.config.reload()
    eq_(ds.config['datalad.metadata.nativetype'], ('xmp', 'datacite'))
Exemplo n.º 19
0
def test_update_known_submodule(path):
    def get_baseline(p):
        ds = Dataset(p).create()
        sub = create(text_type(ds.pathobj / 'sub'))
        assert_repo_status(ds.path, untracked=['sub'])
        return ds
    # attempt one
    ds = get_baseline(op.join(path, 'wo_ref'))
    with chpwd(ds.path):
        save(recursive=True)
    assert_repo_status(ds.path)

    # attempt two, same as above but call add via reference dataset
    ds = get_baseline(op.join(path, 'w_ref'))
    ds.save(recursive=True)
    assert_repo_status(ds.path)
Exemplo n.º 20
0
def test_saving_prior(topdir):
    # the problem is that we might be saving what is actually needed to be
    # "created"

    # we would like to place this structure into a hierarchy of two datasets
    # so we create first top one
    ds1 = create(topdir, force=True)
    # and everything is ok, stuff is not added BUT ds1 will be considered dirty
    assert_repo_status(ds1.path, untracked=['ds2'])
    # And then we would like to initiate a sub1 subdataset
    ds2 = create('ds2', dataset=ds1, force=True)
    # But what will happen is file1.txt under ds2 would get committed first into
    # ds1, and then the whole procedure actually crashes since because ds2/file1.txt
    # is committed -- ds2 is already known to git and it just pukes with a bit
    # confusing    'ds2' already exists in the index
    assert_in('ds2', ds1.subdatasets(result_xfm='relpaths'))
Exemplo n.º 21
0
def test_remove_subds(path):
    ds = create(path)
    ds.create('sub')
    ds.create(op.join('sub', 'subsub'))
    assert_repo_status(ds.path)
    assert_result_count(
        ds.subdatasets(), 1,
        path=op.join(ds.path, 'sub'))
    # all good at this point, subdataset known, dataset clean
    # now have some external force wipe out the subdatasets
    rmtree(op.join(ds.path, 'sub'))
    assert_result_count(
        ds.status(), 1,
        path=op.join(ds.path, 'sub'),
        state='deleted')
    # a single call to save() must fix up the mess
    assert_status('ok', ds.save())
    assert_repo_status(ds.path)
Exemplo n.º 22
0
def test_subds_path(path):
    # a dataset with a subdataset with a file, all neatly tracked
    ds = Dataset(path).create()
    subds = ds.create('sub')
    assert_repo_status(path)
    with (subds.pathobj / 'some.txt').open('w') as f:
        f.write(u'test')
    ds.save(recursive=True)
    assert_repo_status(path)

    # querying the toplevel dataset repo for a subdspath should
    # report the subdataset record in the dataset
    # (unlike `git status`, which is silent for subdataset paths),
    # but definitely not report the subdataset as deleted
    # https://github.com/datalad/datalad-revolution/issues/17
    stat = ds.repo.status(paths=[op.join('sub', 'some.txt')])
    assert_equal(list(stat.keys()), [subds.repo.pathobj])
    assert_equal(stat[subds.repo.pathobj]['state'], 'clean')
Exemplo n.º 23
0
def test_save_to_git(path):
    ds = Dataset(path).create()
    create_tree(
        ds.path,
        {
            'file_ingit': 'file_ingit',
            'file_inannex': 'file_inannex',
        }
    )
    ds.repo.save(paths=['file_ingit'], git=True)
    ds.repo.save(paths=['file_inannex'])
    assert_repo_status(ds.repo)
    for f, p in iteritems(ds.repo.annexstatus()):
        eq_(p['state'], 'clean')
        if f.match('*ingit'):
            assert_not_in('key', p, f)
        elif f.match('*inannex'):
            assert_in('key', p, f)
Exemplo n.º 24
0
def test_create(path):
    ds = Dataset(path)
    ds.create(
        description="funny",
        # custom git init option
        initopts=dict(shared='world'))
    ok_(ds.is_installed())
    assert_repo_status(ds.path, annex=True)

    # check default backend
    eq_(ds.config.get("annex.backends"), 'MD5E')
    eq_(ds.config.get("core.sharedrepository"), '2')
    runner = Runner()
    # check description in `info`
    cmd = ['git', 'annex', 'info']
    cmlout = runner.run(cmd, cwd=path)
    assert_in('funny [here]', cmlout[0])
    # check datset ID
    eq_(ds.config.get_value('datalad.dataset', 'id'),
        ds.id)
Exemplo n.º 25
0
def test_nested_create(path):
    # to document some more organic usage pattern
    ds = Dataset(path).create()
    assert_repo_status(ds.path)
    lvl2relpath = op.join('lvl1', 'lvl2')
    lvl2path = op.join(ds.path, lvl2relpath)
    os.makedirs(lvl2path)
    os.makedirs(op.join(ds.path, 'lvl1', 'empty'))
    with open(op.join(lvl2path, 'file'), 'w') as f:
        f.write('some')
    ok_(ds.save())
    # Empty directories are filtered out.
    assert_repo_status(ds.path, untracked=[])
    # later create subdataset in a fresh dir
    # WINDOWS FAILURE IS NEXT LINE
    subds1 = ds.create(op.join('lvl1', 'subds'))
    assert_repo_status(ds.path, untracked=[])
    eq_(ds.subdatasets(result_xfm='relpaths'), [op.join('lvl1', 'subds')])
    # later create subdataset in an existing empty dir
    subds2 = ds.create(op.join('lvl1', 'empty'))
    assert_repo_status(ds.path)
    # later try to wrap existing content into a new subdataset
    # but that won't work
    assert_in_results(
        ds.create(lvl2relpath, **raw),
        status='error',
        message=(
            'collision with content in parent dataset at %s: %s',
            ds.path, [op.join(lvl2path, 'file')]))
    # even with force, as to do this properly complicated surgery would need to
    # take place
    # MIH disable shaky test till proper dedicated upfront check is in-place in `create`
    # gh-1725
    #assert_in_results(
    #    ds.create(lvl2relpath, force=True,
    #              on_failure='ignore', result_xfm=None, result_filter=None),
    #    status='error', action='add')
    # only way to make it work is to unannex the content upfront
    ds.repo._run_annex_command('unannex', annex_options=[op.join(lvl2relpath, 'file')])
    # nothing to save, git-annex commits the unannex itself, but only on v5
    ds.repo.commit()
    # still nothing without force
    # "err='lvl1/lvl2' already exists in the index"
    assert_in_results(
        ds.create(lvl2relpath, **raw),
        status='error',
        message='will not create a dataset in a non-empty directory, use `force` option to ignore')
    # XXX even force doesn't help, because (I assume) GitPython doesn't update
    # its representation of the Git index properly
    ds.create(lvl2relpath, force=True)
    assert_in(lvl2relpath, ds.subdatasets(result_xfm='relpaths'))
Exemplo n.º 26
0
def test_gh1597(path):
    if 'APPVEYOR' in os.environ:
        # issue only happens on appveyor, Python itself implodes
        # cannot be reproduced on a real windows box
        raise SkipTest(
            'this test causes appveyor to crash, reason unknown')
    ds = Dataset(path).create()
    sub = ds.create('sub')
    res = ds.subdatasets()
    assert_result_count(res, 1, path=sub.path)
    # now modify .gitmodules with another command
    ds.subdatasets(contains=sub.path, set_property=[('this', 'that')])
    # now modify low-level
    with open(op.join(ds.path, '.gitmodules'), 'a') as f:
        f.write('\n')
    assert_repo_status(ds.path, modified=['.gitmodules'])
    ds.save('.gitmodules')
    # must not come under annex mangement
    assert_not_in(
        'key',
        ds.repo.annexstatus(paths=['.gitmodules']).popitem()[1])
Exemplo n.º 27
0
def test_gh1597(path):
    ds = Dataset(path).create()
    with chpwd(ds.path):
        sub = create('sub')
    ds.add('sub', save=False)
    # only staged at this point, but known, and not annexed
    ok_file_under_git(ds.path, '.gitmodules', annexed=False)
    res = ds.subdatasets()
    assert_result_count(res, 1, path=sub.path)
    # now modify .gitmodules with another command
    ds.subdatasets(contains=sub.path, set_property=[('this', 'that')])
    assert_repo_status(ds.path, added=[sub.path])
    # now modify low-level
    with open(opj(ds.path, '.gitmodules'), 'a') as f:
        f.write('\n')
    assert_repo_status(ds.path,
                       modified=[ds.pathobj / ".gitmodules"],
                       added=[sub.path])
    ds.add('.gitmodules')
    # must not come under annex mangement
    ok_file_under_git(ds.path, '.gitmodules', annexed=False)
Exemplo n.º 28
0
def test_surprise_subds(path):
    # https://github.com/datalad/datalad/issues/3139
    ds = create(path, force=True)
    # a lonely repo without any commit
    somerepo = AnnexRepo(path=op.join(path, 'd1', 'subrepo'), create=True)
    # a proper subdataset
    subds = create(op.join(path, 'd2', 'subds'), force=True)
    # save non-recursive
    ds.save(recursive=False)
    # the content of both subds and subrepo are not added to their
    # respective parent as no --recursive was given
    assert_repo_status(subds.path, untracked=['subfile'])
    assert_repo_status(somerepo.path, untracked=['subfile'])
    # however, while the subdataset is added (and reported as modified
    # because it content is still untracked) the subrepo
    # cannot be added (it has no commit)
    # worse: its untracked file add been added to the superdataset
    # XXX the next conditional really says: if the subrepo is not in an
    # adjusted branch: #datalad/3178 (that would have a commit)
    if not on_windows:
        assert_repo_status(ds.path, modified=['d2/subds'])
        assert_in(ds.repo.pathobj / 'd1' / 'subrepo' / 'subfile',
                  ds.repo.get_content_info())
    # with proper subdatasets, all evil is gone
    assert_not_in(ds.repo.pathobj / 'd2' / 'subds' / 'subfile',
                  ds.repo.get_content_info())
Exemplo n.º 29
0
def test_py2_unicode_command(path):
    # Avoid OBSCURE_FILENAME to avoid windows-breakage (gh-2929).
    ds = Dataset(path).create()
    touch_cmd = "import sys; open(sys.argv[1], 'w').write('')"
    cmd_str = u"{} -c \"{}\" {}".format(sys.executable,
                                        touch_cmd,
                                        u"bβ0.dat")
    ds.run(cmd_str)
    assert_repo_status(ds.path)
    ok_exists(op.join(path, u"bβ0.dat"))

    if not on_windows:  # FIXME
        ds.run([sys.executable, "-c", touch_cmd, u"bβ1.dat"])
        assert_repo_status(ds.path)
        ok_exists(op.join(path, u"bβ1.dat"))

        # Send in a list of byte-strings to mimic a py2 command-line
        # invocation.
        ds.run([s.encode("utf-8")
                for s in [sys.executable, "-c", touch_cmd, u" β1 "]])
        assert_repo_status(ds.path)
        ok_exists(op.join(path, u" β1 "))

    with assert_raises(CommandError), swallow_outputs():
        ds.run(u"bβ2.dat")
Exemplo n.º 30
0
def test_save_partial_commit_shrinking_annex(path):
    # This is a variation on the test above. The main difference is that there
    # are other staged changes in addition to the unlocked filed.
    ds = create(path, force=True)
    ds.save()
    assert_repo_status(ds.path)
    ds.unlock(path="foo")
    create_tree(ds.path, tree={"foo": "a", "staged": ""},
                remove_existing=True)
    # Even without this staged change, a plain 'git commit -- foo' would fail
    # with git-annex's partial index error, but rev-save (or more specifically
    # GitRepo.save_) drops the pathspec if there are no staged changes.
    ds.repo.add("staged", git=True)
    if ds.repo.supports_unlocked_pointers:
        ds.save(path="foo")
        assert_repo_status(ds.path, added=["staged"])
    else:
        # Unlike the obsolete interface.save, save doesn't handle a partial
        # commit if there were other staged changes.
        with assert_raises(CommandError) as cm:
            ds.save(path="foo")
        assert_in("partial commit", str(cm.exception))
Exemplo n.º 31
0
def test_update_volatile_subds(originpath, otherpath, destpath):
    origin = Dataset(originpath).create()
    ds = install(
        source=originpath, path=destpath,
        result_xfm='datasets', return_type='item-or-list')
    # as a submodule
    sname = 'subm 1'
    osm1 = origin.create(sname)
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    # nothing without a merge, no inappropriate magic
    assert_not_in(sname, ds.subdatasets(result_xfm='relpaths'))
    assert_result_count(ds.update(merge=True),
                        1, action='update', status='ok', type='dataset')
    # and we should be able to do update with recursive invocation
    assert_result_count(ds.update(merge=True, recursive=True),
                        1, action='update', status='ok', type='dataset')
    # known, and placeholder exists
    assert_in(sname, ds.subdatasets(result_xfm='relpaths'))
    ok_(exists(opj(ds.path, sname)))

    # remove from origin
    origin.remove(sname)
    assert_result_count(ds.update(merge=True),
                        1, action='update', status='ok', type='dataset')
    # gone locally, wasn't checked out
    assert_not_in(sname, ds.subdatasets(result_xfm='relpaths'))
    assert_false(exists(opj(ds.path, sname)))

    # re-introduce at origin
    osm1 = origin.create(sname)
    create_tree(osm1.path, {'load.dat': 'heavy'})
    origin.save(opj(osm1.path, 'load.dat'))
    assert_result_count(ds.update(merge=True),
                        1, action='update', status='ok', type='dataset')
    # grab new content of uninstall subdataset, right away
    ds.get(opj(ds.path, sname, 'load.dat'))
    ok_file_has_content(opj(ds.path, sname, 'load.dat'), 'heavy')

    # modify ds and subds at origin
    create_tree(origin.path, {'mike': 'this', sname: {'probe': 'little'}})
    origin.save(recursive=True)
    assert_repo_status(origin.path)

    # updates for both datasets should come down the pipe
    assert_result_count(ds.update(merge=True, recursive=True),
                        2, action='update', status='ok', type='dataset')
    assert_repo_status(ds.path)

    # now remove just-installed subdataset from origin again
    origin.remove(sname, check=False)
    assert_not_in(sname, origin.subdatasets(result_xfm='relpaths'))
    assert_in(sname, ds.subdatasets(result_xfm='relpaths'))
    # merge should disconnect the installed subdataset, but leave the actual
    # ex-subdataset alone
    assert_result_count(ds.update(merge=True, recursive=True),
                        1, action='update', type='dataset')
    assert_not_in(sname, ds.subdatasets(result_xfm='relpaths'))
    ok_file_has_content(opj(ds.path, sname, 'load.dat'), 'heavy')
    ok_(Dataset(opj(ds.path, sname)).is_installed())

    # now remove the now disconnected subdataset for further tests
    # not using a bound method, not giving a parentds, should
    # not be needed to get a clean dataset
    remove(op.join(ds.path, sname), check=False)
    assert_repo_status(ds.path)

    # new separate subdataset, not within the origin dataset
    otherds = Dataset(otherpath).create()
    # install separate dataset as a submodule
    ds.install(source=otherds.path, path='other')
    create_tree(otherds.path, {'brand': 'new'})
    otherds.save()
    assert_repo_status(otherds.path)
    # pull in changes
    res = ds.update(merge=True, recursive=True)
    assert_result_count(
        res, 2, status='ok', action='update', type='dataset')
    # the next is to check for #2858
    assert_repo_status(ds.path)
Exemplo n.º 32
0
def test_add_recursive(path):
    # make simple hierarchy
    parent = Dataset(path).create()
    assert_repo_status(parent.path)
    sub1 = parent.create(op.join('down', 'sub1'))
    assert_repo_status(parent.path)
    sub2 = parent.create('sub2')
    # next one make the parent dirty
    subsub = sub2.create('subsub')
    assert_repo_status(parent.path, modified=['sub2'])
    res = parent.save()
    assert_repo_status(parent.path)

    # now add content deep in the hierarchy
    create_tree(subsub.path, {'new': 'empty'})
    assert_repo_status(parent.path, modified=['sub2'])

    # recursive add should not even touch sub1, because
    # it knows that it is clean
    res = parent.save(recursive=True)
    # the key action is done
    assert_result_count(res,
                        1,
                        path=op.join(subsub.path, 'new'),
                        action='add',
                        status='ok')
    # saved all the way up
    assert_result_count(res, 3, action='save', status='ok')
    assert_repo_status(parent.path)
Exemplo n.º 33
0
def test_bf1886(path):
    parent = Dataset(path).create()
    parent.create('sub')
    assert_repo_status(parent.path)
    # create a symlink pointing down to the subdataset, and add it
    os.symlink('sub', op.join(parent.path, 'down'))
    parent.save('down')
    assert_repo_status(parent.path)
    # now symlink pointing up
    os.makedirs(op.join(parent.path, 'subdir', 'subsubdir'))
    os.symlink(op.join(op.pardir, 'sub'), op.join(parent.path, 'subdir', 'up'))
    parent.save(op.join('subdir', 'up'))
    # 'all' to avoid the empty dir being listed
    assert_repo_status(parent.path, untracked_mode='all')
    # now symlink pointing 2xup, as in #1886
    os.symlink(op.join(op.pardir, op.pardir, 'sub'),
               op.join(parent.path, 'subdir', 'subsubdir', 'upup'))
    parent.save(op.join('subdir', 'subsubdir', 'upup'))
    assert_repo_status(parent.path)
    # simulatenously add a subds and a symlink pointing to it
    # create subds, but don't register it
    create(op.join(parent.path, 'sub2'))
    os.symlink(op.join(op.pardir, op.pardir, 'sub2'),
               op.join(parent.path, 'subdir', 'subsubdir', 'upup2'))
    parent.save(['sub2', op.join('subdir', 'subsubdir', 'upup2')])
    assert_repo_status(parent.path)
    # full replication of #1886: the above but be in subdir of symlink
    # with no reference dataset
    create(op.join(parent.path, 'sub3'))
    os.symlink(op.join(op.pardir, op.pardir, 'sub3'),
               op.join(parent.path, 'subdir', 'subsubdir', 'upup3'))
    # need to use absolute paths
    with chpwd(op.join(parent.path, 'subdir', 'subsubdir')):
        save([
            op.join(parent.path, 'sub3'),
            op.join(parent.path, 'subdir', 'subsubdir', 'upup3')
        ])
    assert_repo_status(parent.path)
Exemplo n.º 34
0
def test_force_checkdatapresent(srcpath, dstpath):
    src = Dataset(srcpath).create()
    target = mk_push_target(src, 'target', dstpath, annex=True, bare=True)
    (src.pathobj / 'test_mod_annex_file').write_text("Heavy stuff.")
    src.save(to_git=False, message="New annex file")
    assert_repo_status(src.path, annex=True)
    whereis_prior = src.repo.whereis(files=['test_mod_annex_file'])[0]

    res = src.push(to='target', data='nothing')
    # nothing reported to be copied
    assert_not_in_results(res, action='copy')
    # we got the git-push nevertheless
    eq_(src.repo.get_hexsha(DEFAULT_BRANCH), target.get_hexsha(DEFAULT_BRANCH))
    # nothing moved
    eq_(whereis_prior, src.repo.whereis(files=['test_mod_annex_file'])[0])

    # now a push without forced no-transfer
    # we do not give since, so the non-transfered file is picked up
    # and transferred
    res = src.push(to='target', force=None)
    # no branch change, done before
    assert_in_results(res,
                      action='publish',
                      status='notneeded',
                      refspec=DEFAULT_REFSPEC)
    # but availability update
    assert_in_results(res,
                      action='publish',
                      status='ok',
                      refspec='refs/heads/git-annex:refs/heads/git-annex')
    assert_in_results(res,
                      status='ok',
                      path=str(src.pathobj / 'test_mod_annex_file'),
                      action='copy')
    # whereis info reflects the change
    ok_(
        len(whereis_prior) < len(
            src.repo.whereis(files=['test_mod_annex_file'])[0]))

    # do it yet again will do nothing, because all is uptodate
    assert_status('notneeded', src.push(to='target', force=None))
    # an explicit reference point doesn't change that
    assert_status('notneeded', src.push(to='target',
                                        force=None,
                                        since='HEAD~1'))

    # now force data transfer
    res = src.push(to='target', force='checkdatapresent')
    # no branch change, done before
    assert_in_results(res,
                      action='publish',
                      status='notneeded',
                      refspec=DEFAULT_REFSPEC)
    # no availability update
    assert_in_results(res,
                      action='publish',
                      status='notneeded',
                      refspec='refs/heads/git-annex:refs/heads/git-annex')
    # but data transfer
    assert_in_results(res,
                      status='ok',
                      path=str(src.pathobj / 'test_mod_annex_file'),
                      action='copy')

    # force data transfer, but data isn't available
    src.repo.drop('test_mod_annex_file')
    res = src.push(to='target',
                   path='.',
                   force='checkdatapresent',
                   on_failure='ignore')
    assert_in_results(res,
                      status='impossible',
                      path=str(src.pathobj / 'test_mod_annex_file'),
                      action='copy',
                      message='Slated for transport, but no content present')
Exemplo n.º 35
0
def check_push(annex, src_path, dst_path):
    # prepare src
    src = Dataset(src_path).create(annex=annex)
    src_repo = src.repo
    # push should not add branches to the local dataset
    orig_branches = src_repo.get_branches()
    assert_not_in('synced/' + DEFAULT_BRANCH, orig_branches)

    res = src.push(on_failure='ignore')
    assert_result_count(res, 1)
    assert_in_results(
        res,
        status='impossible',
        message='No push target given, and none could be auto-detected, '
        'please specify via --to')
    eq_(orig_branches, src_repo.get_branches())
    # target sibling
    target = mk_push_target(src, 'target', dst_path, annex=annex)
    eq_(orig_branches, src_repo.get_branches())

    res = src.push(to="target")
    eq_(orig_branches, src_repo.get_branches())
    assert_result_count(res, 2 if annex else 1)
    assert_in_results(res,
                      action='publish',
                      status='ok',
                      target='target',
                      refspec=DEFAULT_REFSPEC,
                      operations=['new-branch'])

    assert_repo_status(src_repo, annex=annex)
    eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)),
        list(src_repo.get_branch_commits_(DEFAULT_BRANCH)))

    # configure a default merge/upstream target
    src.config.set('branch.{}.remote'.format(DEFAULT_BRANCH),
                   'target',
                   where='local')
    src.config.set('branch.{}.merge'.format(DEFAULT_BRANCH),
                   DEFAULT_BRANCH,
                   where='local')

    # don't fail when doing it again, no explicit target specification
    # needed anymore
    res = src.push()
    eq_(orig_branches, src_repo.get_branches())
    # and nothing is pushed
    assert_status('notneeded', res)

    assert_repo_status(src_repo, annex=annex)
    eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)),
        list(src_repo.get_branch_commits_(DEFAULT_BRANCH)))

    # some modification:
    (src.pathobj / 'test_mod_file').write_text("Some additional stuff.")
    src.save(to_git=True, message="Modified.")
    (src.pathobj / 'test_mod_annex_file').write_text("Heavy stuff.")
    src.save(to_git=not annex, message="Modified again.")
    assert_repo_status(src_repo, annex=annex)

    # we could say since='HEAD~2' to make things fast, or we are lazy
    # and say since='^' to indicate the state of the tracking remote
    # which is the same, because we made to commits since the last push.
    res = src.push(to='target', since="^", jobs=2)
    assert_in_results(
        res,
        action='publish',
        status='ok',
        target='target',
        refspec=DEFAULT_REFSPEC,
        # we get to see what happened
        operations=['fast-forward'])
    if annex:
        # we got to see the copy result for the annexed files
        assert_in_results(res,
                          action='copy',
                          status='ok',
                          path=str(src.pathobj / 'test_mod_annex_file'))
        # we published, so we can drop and reobtain
        ok_(src_repo.file_has_content('test_mod_annex_file'))
        src_repo.drop('test_mod_annex_file')
        ok_(not src_repo.file_has_content('test_mod_annex_file'))
        src_repo.get('test_mod_annex_file')
        ok_(src_repo.file_has_content('test_mod_annex_file'))
        ok_file_has_content(src_repo.pathobj / 'test_mod_annex_file',
                            'Heavy stuff.')

    eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)),
        list(src_repo.get_branch_commits_(DEFAULT_BRANCH)))
    if not (annex and src_repo.is_managed_branch()):
        # the following doesn't make sense in managed branches, because
        # a commit that could be amended is no longer the last commit
        # of a branch after a sync has happened (which did happen
        # during the last push above

        # amend and change commit msg in order to test for force push:
        src_repo.commit("amended", options=['--amend'])
        # push should be rejected (non-fast-forward):
        res = src.push(to='target', since='HEAD~2', on_failure='ignore')
        # fails before even touching the annex branch
        assert_in_results(res,
                          action='publish',
                          status='error',
                          target='target',
                          refspec=DEFAULT_REFSPEC,
                          operations=['rejected', 'error'])
        # push with force=True works:
        res = src.push(to='target', since='HEAD~2', force='gitpush')
        assert_in_results(res,
                          action='publish',
                          status='ok',
                          target='target',
                          refspec=DEFAULT_REFSPEC,
                          operations=['forced-update'])
        eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)),
            list(src_repo.get_branch_commits_(DEFAULT_BRANCH)))

    # we do not have more branches than we had in the beginning
    # in particular no 'synced/<default branch>'
    eq_(orig_branches, src_repo.get_branches())
Exemplo n.º 36
0
def test_uninstall_recursive(path):
    ds = Dataset(path).create(force=True)
    subds = ds.create('deep', force=True)
    # we add one file, but we get a response for the requested
    # directory too
    res = subds.save()
    assert_result_count(res, 1, action='add', status='ok', type='file')
    assert_result_count(res, 1, action='save', status='ok', type='dataset')
    # save all -> all clean
    ds.save(recursive=True)
    assert_repo_status(subds.path)
    assert_repo_status(ds.path)
    # now uninstall in subdataset through superdataset
    target_fname = opj('deep', 'dir', 'test')
    # sane starting point
    ok_(exists(opj(ds.path, target_fname)))
    # doesn't have the minimum number of copies for a safe drop
    res = ds.drop(target_fname, recursive=True, on_failure='ignore')
    assert_status('error', res)
    assert_result_values_cond(
        res, 'message',
        lambda x: "configured minimum number of copies not found" in x or
        "Could only verify the existence of 0 out of 1 necessary cop" in x)

    # this should do it
    ds.drop(target_fname, check=False, recursive=True)
    # link is dead
    lname = opj(ds.path, target_fname)
    ok_(not exists(lname))
    # entire hierarchy saved
    assert_repo_status(subds.path)
    assert_repo_status(ds.path)
    # now same with actual handle removal
    # content is dropped already, so no checks in place anyway
    ds.remove(target_fname, check=True, recursive=True)
    ok_(not exists(lname) and not lexists(lname))
    assert_repo_status(subds.path)
    assert_repo_status(ds.path)
Exemplo n.º 37
0
def test_repo_diff(path, norepo):
    ds = Dataset(path).create()
    assert_repo_status(ds.path)
    assert_raises(ValueError, ds.repo.diff, fr='WTF', to='MIKE')
    # no diff
    eq_(ds.repo.diff('HEAD', None), {})
    # bogus path makes no difference
    eq_(ds.repo.diff('HEAD', None, paths=['THIS']), {})
    # let's introduce a known change
    create_tree(ds.path, {'new': 'empty'})
    ds.save(to_git=True)
    assert_repo_status(ds.path)
    eq_(
        ds.repo.diff(fr='HEAD~1', to='HEAD'), {
            ut.Path(ds.repo.pathobj / 'new'): {
                'state': 'added',
                'type': 'file',
                'bytesize': 5,
                'gitshasum': '7b4d68d70fcae134d5348f5e118f5e9c9d3f05f6'
            }
        })
    # modify known file
    create_tree(ds.path, {'new': 'notempty'})
    eq_(
        ds.repo.diff(fr='HEAD', to=None),
        {
            ut.Path(ds.repo.pathobj / 'new'): {
                'state': 'modified',
                'type': 'file',
                # the beast is modified, but no change in shasum -> not staged
                'gitshasum': '7b4d68d70fcae134d5348f5e118f5e9c9d3f05f6',
                'prev_gitshasum': '7b4d68d70fcae134d5348f5e118f5e9c9d3f05f6'
            }
        })
    # per path query gives the same result
    eq_(ds.repo.diff(fr='HEAD', to=None),
        ds.repo.diff(fr='HEAD', to=None, paths=['new']))
    # also given a directory as a constraint does the same
    eq_(ds.repo.diff(fr='HEAD', to=None),
        ds.repo.diff(fr='HEAD', to=None, paths=['.']))
    # but if we give another path, it doesn't show up
    eq_(ds.repo.diff(fr='HEAD', to=None, paths=['other']), {})

    # make clean
    ds.save()
    assert_repo_status(ds.path)

    # untracked stuff
    create_tree(ds.path, {'deep': {'down': 'untracked', 'down2': 'tobeadded'}})
    # default is to report all files
    eq_(
        ds.repo.diff(fr='HEAD', to=None), {
            ut.Path(ds.repo.pathobj / 'deep' / 'down'): {
                'state': 'untracked',
                'type': 'file'
            },
            ut.Path(ds.repo.pathobj / 'deep' / 'down2'): {
                'state': 'untracked',
                'type': 'file'
            }
        })
    # but can be made more compact
    eq_(
        ds.repo.diff(fr='HEAD', to=None, untracked='normal'), {
            ut.Path(ds.repo.pathobj / 'deep'): {
                'state': 'untracked',
                'type': 'directory'
            }
        })

    # again a unmatching path constrainted will give an empty report
    eq_(ds.repo.diff(fr='HEAD', to=None, paths=['other']), {})
    # perfect match and anything underneath will do
    eq_(
        ds.repo.diff(fr='HEAD', to=None, paths=['deep']), {
            ut.Path(ds.repo.pathobj / 'deep' / 'down'): {
                'state': 'untracked',
                'type': 'file'
            },
            ut.Path(ds.repo.pathobj / 'deep' / 'down2'): {
                'state': 'untracked',
                'type': 'file'
            }
        })
Exemplo n.º 38
0
def test_diff(path, norepo):
    with chpwd(norepo):
        assert_raises(NoDatasetArgumentFound, diff)
    ds = Dataset(path).create()
    assert_repo_status(ds.path)
    # reports stupid revision input
    assert_result_count(ds.diff(fr='WTF', on_failure='ignore'),
                        1,
                        status='impossible',
                        message="Git reference 'WTF' invalid")
    # no diff
    assert_result_count(_dirty_results(ds.diff()), 0)
    assert_result_count(_dirty_results(ds.diff(fr='HEAD')), 0)
    # bogus path makes no difference
    assert_result_count(_dirty_results(ds.diff(path='THIS', fr='HEAD')), 0)
    # let's introduce a known change
    create_tree(ds.path, {'new': 'empty'})
    ds.save(to_git=True)
    assert_repo_status(ds.path)
    res = _dirty_results(ds.diff(fr='HEAD~1'))
    assert_result_count(res, 1)
    assert_result_count(res,
                        1,
                        action='diff',
                        path=op.join(ds.path, 'new'),
                        state='added')
    # we can also find the diff without going through the dataset explicitly
    with chpwd(ds.path):
        assert_result_count(_dirty_results(diff(fr='HEAD~1')),
                            1,
                            action='diff',
                            path=op.join(ds.path, 'new'),
                            state='added')
    # no diff against HEAD
    assert_result_count(_dirty_results(ds.diff()), 0)
    # modify known file
    create_tree(ds.path, {'new': 'notempty'})
    res = _dirty_results(ds.diff())
    assert_result_count(res, 1)
    assert_result_count(res,
                        1,
                        action='diff',
                        path=op.join(ds.path, 'new'),
                        state='modified')
    # but if we give another path, it doesn't show up
    assert_result_count(ds.diff(path='otherpath'), 0)
    # giving the right path must work though
    assert_result_count(ds.diff(path='new'),
                        1,
                        action='diff',
                        path=op.join(ds.path, 'new'),
                        state='modified')
    # stage changes
    ds.repo.add('.', git=True)
    # no change in diff, staged is not commited
    assert_result_count(_dirty_results(ds.diff()), 1)
    ds.save()
    assert_repo_status(ds.path)
    assert_result_count(_dirty_results(ds.diff()), 0)

    # untracked stuff
    create_tree(ds.path, {'deep': {'down': 'untracked', 'down2': 'tobeadded'}})
    # a plain diff should report the untracked file
    # but not directly, because the parent dir is already unknown
    res = _dirty_results(ds.diff())
    assert_result_count(res, 1)
    assert_result_count(res,
                        1,
                        state='untracked',
                        type='directory',
                        path=op.join(ds.path, 'deep'))
    # report of individual files is also possible
    assert_result_count(ds.diff(untracked='all'),
                        2,
                        state='untracked',
                        type='file')
    # an unmatching path will hide this result
    assert_result_count(ds.diff(path='somewhere'), 0)
    # perfect match and anything underneath will do
    assert_result_count(ds.diff(path='deep'),
                        1,
                        state='untracked',
                        path=op.join(ds.path, 'deep'),
                        type='directory')
    assert_result_count(ds.diff(path='deep'),
                        1,
                        state='untracked',
                        path=op.join(ds.path, 'deep'))
    ds.repo.add(op.join('deep', 'down2'), git=True)
    # now the remaining file is the only untracked one
    assert_result_count(ds.diff(),
                        1,
                        state='untracked',
                        path=op.join(ds.path, 'deep', 'down'),
                        type='file')
Exemplo n.º 39
0
def test_rerun(path, nodspath):
    ds = Dataset(path).create()
    sub = ds.create('sub')
    probe_path = op.join(sub.path, 'sequence')
    # run inside the dataset
    with chpwd(path), \
            swallow_outputs():
        ds.run('echo x$(cat sub/sequence) > sub/sequence')
    # command ran once, all clean
    assert_repo_status(ds.path)
    eq_('x\n', open(probe_path).read())
    # now, for a rerun we can be anywhere, PWD and all are recorded
    # moreover, rerun must figure out which bits to unlock, even in
    # subdatasets
    with chpwd(nodspath), \
            swallow_outputs():
        ds.rerun()
    assert_repo_status(ds.path)
    # ran twice now
    eq_('xx\n', open(probe_path).read())

    # Rerunning from a subdataset skips the command.
    _, sub_info = get_run_info(ds, last_commit_msg(sub.repo))
    eq_(ds.id, sub_info["dsid"])
    assert_result_count(sub.rerun(return_type="list", on_failure="ignore"),
                        1,
                        status="impossible",
                        action="run",
                        rerun_action="skip")
    eq_('xx\n', open(probe_path).read())

    # Rerun fails with a dirty repo.
    dirt = op.join(path, "dirt")
    with open(dirt, "w") as fh:
        fh.write("")
    assert_status('impossible', ds.rerun(on_failure="ignore"))
    remove(dirt)
    assert_repo_status(ds.path)

    # Make a non-run commit.
    with open(op.join(path, "nonrun-file"), "w") as f:
        f.write("foo")
    ds.save("nonrun-file")
    # Now rerun the buried command.
    ds.rerun(revision=DEFAULT_BRANCH + "~", message="rerun buried")
    eq_('xxx\n', open(probe_path).read())
    # Also check that the messasge override worked.
    eq_(
        last_commit_msg(ds.repo).splitlines()[0],
        "[DATALAD RUNCMD] rerun buried")
    # Or a range of commits, skipping non-run commits.
    ds.rerun(since=DEFAULT_BRANCH + "~3")
    eq_('xxxxx\n', open(probe_path).read())
    # Or --since= to run all reachable commits.
    ds.rerun(since="")
    eq_('xxxxxxxxxx\n', open(probe_path).read())

    # We can get back a report of what would happen rather than actually
    # rerunning anything.
    report = ds.rerun(since="", report=True, return_type="list")
    # The "diff" section of the report doesn't include the unchanged files that
    # would come in "-f json diff" output.
    for entry in report:
        if entry["rerun_action"] == "run":
            # None of the run commits touch .datalad/config or any other config
            # file.
            assert_false(
                any(r["path"].endswith("config") for r in entry["diff"]))

    # Nothing changed.
    eq_('xxxxxxxxxx\n', open(probe_path).read())
    assert_result_count(report, 1, rerun_action="skip-or-pick")
    report[-1]["commit"] == ds.repo.get_hexsha()

    # If a file is dropped, we remove it instead of unlocking it.
    ds.drop(probe_path, check=False)
    with swallow_outputs():
        ds.rerun()

    eq_('x\n', open(probe_path).read())
Exemplo n.º 40
0
def _test_create_store(host, base_path, ds_path, clone_path):

    ds = Dataset(ds_path).create(force=True)

    subds = ds.create('sub', force=True)
    subds2 = ds.create('sub2', force=True, annex=False)
    ds.save(recursive=True)
    assert_repo_status(ds.path)

    # don't specify special remote. By default should be git-remote + "-storage"
    res = ds.create_sibling_ria("ria+ssh://test-store:", "datastore")
    assert_result_count(res, 1, status='ok', action='create-sibling-ria')
    eq_(len(res), 1)

    # remotes exist, but only in super
    siblings = ds.siblings(result_renderer=None)
    eq_({'datastore', 'datastore-storage', 'here'},
        {s['name']
         for s in siblings})
    sub_siblings = subds.siblings(result_renderer=None)
    eq_({'here'}, {s['name'] for s in sub_siblings})
    sub2_siblings = subds2.siblings(result_renderer=None)
    eq_({'here'}, {s['name'] for s in sub2_siblings})

    # TODO: post-update hook was enabled

    # check bare repo:
    git_config = Path(base_path) / ds.id[:3] / ds.id[3:] / 'config'
    assert git_config.exists()
    content = git_config.read_text()
    assert_in("[datalad \"ora-remote\"]", content)
    super_uuid = ds.config.get(
        "remote.{}.annex-uuid".format('datastore-storage'))
    assert_in("uuid = {}".format(super_uuid), content)

    # implicit test of success by ria-installing from store:
    ds.push(to="datastore")
    with chpwd(clone_path):
        if host:
            # note, we are not using the "test-store"-label here
            clone('ria+ssh://{}{}#{}'.format(host, base_path, ds.id),
                  path='test_install')
        else:
            # TODO: Whenever ria+file supports special remote config (label),
            # change here:
            clone('ria+file://{}#{}'.format(base_path, ds.id),
                  path='test_install')
        installed_ds = Dataset(op.join(clone_path, 'test_install'))
        assert installed_ds.is_installed()
        assert_repo_status(installed_ds.repo)
        eq_(installed_ds.id, ds.id)
        # Note: get_annexed_files() always reports POSIX paths.
        assert_in('ds/file1.txt', installed_ds.repo.get_annexed_files())
        assert_result_count(installed_ds.get(op.join('ds', 'file1.txt')),
                            1,
                            status='ok',
                            action='get',
                            path=op.join(installed_ds.path, 'ds', 'file1.txt'))

    # now, again but recursive.
    res = ds.create_sibling_ria("ria+ssh://test-store:",
                                "datastore",
                                recursive=True,
                                existing='reconfigure')
    eq_(len(res), 3)
    assert_result_count(res,
                        1,
                        path=str(ds.pathobj),
                        status='ok',
                        action="create-sibling-ria")
    assert_result_count(res,
                        1,
                        path=str(subds.pathobj),
                        status='ok',
                        action="create-sibling-ria")
    assert_result_count(res,
                        1,
                        path=str(subds2.pathobj),
                        status='ok',
                        action="create-sibling-ria")

    # remotes now exist in super and sub
    siblings = ds.siblings(result_renderer=None)
    eq_({'datastore', 'datastore-storage', 'here'},
        {s['name']
         for s in siblings})
    sub_siblings = subds.siblings(result_renderer=None)
    eq_({'datastore', 'datastore-storage', 'here'},
        {s['name']
         for s in sub_siblings})
    # but no special remote in plain git subdataset:
    sub2_siblings = subds2.siblings(result_renderer=None)
    eq_({'datastore', 'here'}, {s['name'] for s in sub2_siblings})

    # for testing trust_level parameter, redo for each label:
    for trust in ['trust', 'semitrust', 'untrust']:
        ds.create_sibling_ria("ria+ssh://test-store:",
                              "datastore",
                              existing='reconfigure',
                              trust_level=trust)
        res = ds.repo.repo_info()
        assert_in(
            '[datastore-storage]',
            [r['description'] for r in res['{}ed repositories'.format(trust)]])
Exemplo n.º 41
0
def test_annotate_paths(dspath, nodspath):
    # this test doesn't use API`remove` to avoid circularities
    ds = make_demo_hierarchy_datasets(dspath, demo_hierarchy)
    ds.save(recursive=True)
    assert_repo_status(ds.path)

    with chpwd(dspath):
        # with and without an explicitly given path the result is almost the
        # same inside a dataset
        without_path = annotate_paths(on_failure='ignore')
        pwd_res = annotate_paths(path='.', on_failure='ignore')
        assert_result_count(without_path, 1, type='dataset', path=dspath)
        assert_result_count(pwd_res,
                            1,
                            type='dataset',
                            path=dspath,
                            orig_request='.',
                            raw_input=True)
        # make sure going into a subdataset vs giving it as a path has no
        # structural impact
        eq_([{
            k: v
            for k, v in ap.items()
            if k not in ('registered_subds', 'raw_input', 'orig_request',
                         'refds')
        } for ap in annotate_paths(path='b', recursive=True)], [{
            k: v
            for k, v in ap.items()
            if k not in ('registered_subds', 'raw_input', 'orig_request',
                         'refds')
        } for ap in annotate_paths(dataset='b', recursive=True)])

        # when we point to a list of directories, there should be no
        # multiple rediscoveries of the subdatasets
        with swallow_logs(new_level=logging.DEBUG) as cml:
            annotate_paths(path=['a', 'b'])
            eq_(
                cml.out.count(
                    'Resolved dataset for subdataset reporting/modification'),
                1)

    # now do it again, pointing to the ds directly
    res = ds.annotate_paths(on_failure='ignore')
    # no request, no refds, but otherwise the same
    eq_(len(res), len(pwd_res))
    eq_(
        {
            k: pwd_res[0][k]
            for k in pwd_res[0] if k in ('path', 'type', 'action', 'status')
        }, {k: res[0][k]
            for k in res[0] if k not in ('refds', )})

    # will refuse a path that is not a dataset as refds
    res = annotate_paths(dataset=nodspath, on_failure='ignore')
    assert_result_count(res,
                        1,
                        status='error',
                        path=nodspath,
                        message='given reference dataset is not a dataset')

    # recursion with proper base dataset
    parentds = Dataset(opj(dspath, 'a'))
    base_res = parentds.annotate_paths(recursive=True)
    # needs to find 'aa' and the base
    assert_result_count(base_res, 2)
    assert_result_count(base_res, 2, type='dataset')
    assert_result_count(base_res,
                        1,
                        type='dataset',
                        parentds=parentds.path,
                        path=opj(parentds.path, 'aa'),
                        status='')
    # same recursion but without a base dataset
    res = annotate_paths(path=opj(dspath, 'a'), recursive=True)
    # needs to find 'aa' and 'a' again
    assert_result_count(res, 2)
    eq_(res[-1],
        {k: base_res[-1][k]
         for k in base_res[-1] if k not in ('refds', )})
    assert_result_count(
        res,
        1,
        type='dataset',
        status='',
        # it does not auto-discover parent datasets without force or a refds
        #parentds=parentds.path,
        path=parentds.path)
    # but we can force parent discovery
    res = parentds.annotate_paths(path=opj(dspath, 'a'),
                                  recursive=True,
                                  force_parentds_discovery=True)
    assert_result_count(res, 2)
    assert_result_count(res,
                        1,
                        type='dataset',
                        status='',
                        parentds=dspath,
                        path=parentds.path)

    # recursion with multiple disjoint seeds, no common base
    eq_([
        basename(p) for p in annotate_paths(path=[
            opj(dspath, 'a'), opj(dspath, 'b', 'bb', 'bba')
        ],
                                            recursive=True,
                                            result_xfm='paths')
    ], ['a', 'aa', 'bba', 'bbaa'])

    # recursion with partially overlapping seeds, no duplicate results
    eq_([
        basename(p) for p in annotate_paths(path=[
            opj(dspath, 'b'), opj(dspath, 'b', 'bb', 'bba')
        ],
                                            recursive=True,
                                            result_xfm='paths')
    ], ['b', 'ba', 'bb', 'bba', 'bbaa'])

    # get straight from a file
    fpath = opj('a', 'aa', 'file_aa')
    res = ds.annotate_paths(fpath)
    assert_result_count(res, 1)
    assert_result_count(res,
                        1,
                        orig_request=fpath,
                        raw_input=True,
                        type='file',
                        path=opj(ds.path, fpath),
                        parentds=opj(ds.path, 'a', 'aa'),
                        status='')
    # now drop it
    dropres = ds.drop(fpath, check=False)
    assert_result_count(dropres, 1, path=res[0]['path'], status='ok')
    # ask for same file again, use 'notneeded' for unavailable to try trigger
    # any difference
    droppedres = ds.annotate_paths(fpath, unavailable_path_status='notneeded')
    # but we get the same result
    eq_(res, droppedres)

    # now try the same on an uninstalled dataset
    subdspath = opj('b', 'bb')
    # before
    before_res = ds.annotate_paths(subdspath,
                                   recursive=True,
                                   unavailable_path_status='error')
    assert_result_count(before_res, 3, status='', type='dataset')
    uninstall_res = ds.uninstall(subdspath, recursive=True, check=False)
    assert_result_count(uninstall_res, 3, status='ok', type='dataset')
    # after
    after_res = ds.annotate_paths(subdspath,
                                  unavailable_path_status='error',
                                  on_failure='ignore')
    # uninstall hides all low-level datasets
    assert_result_count(after_res, 1)
    # but for the top-most uninstalled one it merely reports absent state now
    assert_result_count(after_res,
                        1,
                        state='absent',
                        **{
                            k: before_res[0][k]
                            for k in before_res[0]
                            if k not in ('state', 'status')
                        })
    # however, this beauty doesn't come for free, so it can be disabled
    # which will make the uninstalled subdataset like a directory in the
    # parent (or even just a non-existing path, if the mountpoint dir isn't
    # present
    after_res = ds.annotate_paths(subdspath, force_subds_discovery=False)
    assert_result_count(after_res,
                        1,
                        type='directory',
                        path=before_res[0]['path'],
                        parentds=before_res[0]['parentds'])
    # feed annotated paths into annotate_paths, it shouldn't change things
    # upon second run
    # datasets and file
    res = ds.annotate_paths(['.', fpath], recursive=True)
    # make a copy, just to the sure
    orig_res = deepcopy(res)
    assert_result_count(res, 7)
    # and in again, no recursion this time
    res_again = ds.annotate_paths(res)
    # doesn't change a thing
    eq_(orig_res, res_again)
    # and in again, with recursion this time
    res_recursion_again = ds.annotate_paths(res, recursive=True)
    assert_result_count(res_recursion_again, 7)
    # doesn't change a thing
    eq_(orig_res, res_recursion_again)
Exemplo n.º 42
0
def test_get_modified_subpaths(path):
    ds = Dataset(path).create(force=True)
    suba = ds.create('ba', force=True)
    subb = ds.create('bb', force=True)
    subsub = ds.create(opj('bb', 'bba', 'bbaa'), force=True)
    ds.save(recursive=True)
    assert_repo_status(path)

    orig_base_commit = ds.repo.get_hexsha()

    # nothing was modified compared to the status quo, output must be empty
    eq_([],
        list(get_modified_subpaths([dict(path=ds.path)], ds,
                                   orig_base_commit)))

    # modify one subdataset
    create_tree(subsub.path, {'added': 'test'})
    subsub.save('added')

    # it will replace the requested path with the path of the closest
    # submodule that is modified
    assert_result_count(get_modified_subpaths([dict(path=ds.path)], ds,
                                              orig_base_commit),
                        1,
                        type='dataset',
                        path=subb.path)

    # make another one dirty
    create_tree(suba.path, {'added': 'test'})

    # now a single query path will result in the two modified subdatasets
    assert_result_count(get_modified_subpaths([dict(path=ds.path)], ds,
                                              orig_base_commit),
                        2,
                        type='dataset')

    # now save uptop, this will the new state of subb, but keep suba dirty
    ds.save(subb.path, recursive=True)
    # now if we ask for what was last saved, we only get the new state of subb
    assert_result_count(get_modified_subpaths([dict(path=ds.path)], ds,
                                              'HEAD~1..HEAD'),
                        1,
                        type='dataset',
                        path=subb.path)
    # comparing the working tree to head will the dirty suba instead
    assert_result_count(get_modified_subpaths([dict(path=ds.path)], ds,
                                              'HEAD'),
                        1,
                        type='dataset',
                        path=suba.path)

    # add/save everything, become clean
    ds.save(recursive=True)
    assert_repo_status(path)
    # nothing is reported as modified
    assert_result_count(
        get_modified_subpaths([dict(path=ds.path)], ds, 'HEAD'), 0)
    # but looking all the way back, we find all changes
    assert_result_count(get_modified_subpaths([dict(path=ds.path)], ds,
                                              orig_base_commit),
                        2,
                        type='dataset')

    # now we ask specifically for the file we added to subsub above
    query = [dict(path=opj(subsub.path, 'added'))]
    res = list(get_modified_subpaths(query, ds, orig_base_commit))
    # we only get this one result back, and not all the submodule state changes
    # that were also saved in the superdatasets
    assert_result_count(res, 1)
    assert_result_count(res,
                        1,
                        type='file',
                        path=opj(subsub.path, 'added'),
                        state='added')
    # but if we are only looking at the last saved change (suba), we will not
    # find our query return something
    res = get_modified_subpaths(query, ds, 'HEAD^')
    assert_result_count(res, 0)

    # deal with removal (force insufiicient copies error)
    ds.remove(suba.path, check=False)
    assert_repo_status(path)
    res = list(get_modified_subpaths([dict(path=ds.path)], ds, 'HEAD~1..HEAD'))
    # removed submodule + .gitmodules update
    assert_result_count(res, 2)
    assert_result_count(res, 1, type_src='dataset', path=suba.path)
Exemplo n.º 43
0
def test_diff(path, norepo):
    ds = Dataset(path).create()
    assert_repo_status(ds.path)
    # reports stupid revision input
    assert_result_count(
        ds._diff(revision='WTF', on_failure='ignore'),
        1,
        status='impossible',
        message="fatal: bad revision 'WTF'")
    assert_result_count(ds._diff(), 0)
    # no diff
    assert_result_count(ds._diff(), 0)
    assert_result_count(ds._diff(revision='HEAD'), 0)
    # bogus path makes no difference
    assert_result_count(ds._diff(path='THIS', revision='HEAD'), 0)
    # let's introduce a known change
    create_tree(ds.path, {'new': 'empty'})
    ds.save(to_git=True)
    assert_repo_status(ds.path)
    res = ds._diff(revision='HEAD~1')
    assert_result_count(res, 1)
    assert_result_count(
        res, 1, action='diff', path=opj(ds.path, 'new'), state='added')
    # no diff against HEAD
    assert_result_count(ds._diff(), 0)
    # modify known file
    create_tree(ds.path, {'new': 'notempty'})
    for diffy in (None, 'HEAD'):
        res = ds._diff(revision=diffy)
        assert_result_count(res, 1)
        assert_result_count(
            res, 1, action='diff', path=opj(ds.path, 'new'), state='modified')
    # but if we give another path, it doesn't show up
    assert_result_count(ds._diff('otherpath'), 0)
    # giving the right path must work though
    assert_result_count(
        ds._diff('new'), 1,
        action='diff', path=opj(ds.path, 'new'), state='modified')
    # stage changes
    ds.repo.add('.', git=True)
    # no diff, because we staged the modification
    assert_result_count(ds._diff(), 0)
    # but we can get at it
    assert_result_count(
        ds._diff(staged=True), 1,
        action='diff', path=opj(ds.path, 'new'), state='modified')
    # OR
    assert_result_count(
        ds._diff(revision='HEAD'), 1,
        action='diff', path=opj(ds.path, 'new'), state='modified')
    ds.save()
    assert_repo_status(ds.path)

    # untracked stuff
    create_tree(ds.path, {'deep': {'down': 'untracked', 'down2': 'tobeadded'}})
    # a plain diff should report the untracked file
    # but not directly, because the parent dir is already unknown
    res = ds._diff()
    assert_result_count(res, 1)
    assert_result_count(
        res, 1, state='untracked', type='directory', path=opj(ds.path, 'deep'))
    # report of individual files is also possible
    assert_result_count(
        ds._diff(report_untracked='all'), 2, state='untracked', type='file')
    # an unmatching path will hide this result
    assert_result_count(ds._diff(path='somewhere'), 0)
    # perfect match and anything underneath will do
    assert_result_count(
        ds._diff(path='deep'), 1, state='untracked', path=opj(ds.path, 'deep'),
        type='directory')
    assert_result_count(
        ds._diff(path='deep'), 1,
        state='untracked', path=opj(ds.path, 'deep'))
    # now we stage on of the two files in deep
    ds.repo.add(opj('deep', 'down2'), git=True)
    # without any reference it will ignore the staged stuff and report the remaining
    # untracked file
    assert_result_count(
        ds._diff(), 1, state='untracked', path=opj(ds.path, 'deep', 'down'),
        type='file')
    res = ds._diff(staged=True)
    assert_result_count(
        res, 1, state='untracked', path=opj(ds.path, 'deep', 'down'), type='file')
    assert_result_count(
        res, 1, state='added', path=opj(ds.path, 'deep', 'down2'), type='file')
Exemplo n.º 44
0
def _test_version_check(host, dspath, store):

    dspath = Path(dspath)
    store = Path(store)

    ds = Dataset(dspath).create()
    populate_dataset(ds)
    ds.save()
    assert_repo_status(ds.path)

    # set up store:
    io = SSHRemoteIO(host) if host else LocalIO()
    if host:
        store_url = "ria+ssh://{host}{path}".format(host=host, path=store)
    else:
        store_url = "ria+{}".format(store.as_uri())

    create_store(io, store, '1')

    # TODO: Re-establish test for version 1
    # version 2: dirhash
    create_ds_in_store(io, store, ds.id, '2', '1')

    # add special remote
    init_opts = common_init_opts + ['url={}'.format(store_url)]
    ds.repo.init_remote('store', options=init_opts)
    ds.repo.copy_to('.', 'store')

    # check version files
    remote_ds_tree_version_file = store / 'ria-layout-version'
    dsgit_dir, archive_dir, dsobj_dir = \
        get_layout_locations(1, store, ds.id)
    remote_obj_tree_version_file = dsgit_dir / 'ria-layout-version'

    assert_true(remote_ds_tree_version_file.exists())
    assert_true(remote_obj_tree_version_file.exists())

    with open(str(remote_ds_tree_version_file), 'r') as f:
        assert_equal(f.read().strip(), '1')
    with open(str(remote_obj_tree_version_file), 'r') as f:
        assert_equal(f.read().strip(), '2')

    # Accessing the remote should not yield any output regarding versioning,
    # since it's the "correct" version. Note that "fsck" is an arbitrary choice.
    # We need just something to talk to the special remote.
    with swallow_logs(new_level=logging.INFO) as cml:
        ds.repo.fsck(remote='store', fast=True)
        # TODO: For some reason didn't get cml.assert_logged to assert
        #       "nothing was logged"
        assert not cml.out

    # Now fake-change the version
    with open(str(remote_obj_tree_version_file), 'w') as f:
        f.write('X\n')

    # Now we should see a message about it
    with swallow_logs(new_level=logging.INFO) as cml:
        ds.repo.fsck(remote='store', fast=True)
        cml.assert_logged(level="INFO",
                          msg="Remote object tree reports version X",
                          regex=False)

    # reading still works:
    ds.drop('.')
    assert_status('ok', ds.get('.'))

    # but writing doesn't:
    with open(str(Path(ds.path) / 'new_file'), 'w') as f:
        f.write("arbitrary addition")
    ds.save(message="Add a new_file")

    # TODO: use self.annex.error in special remote and see whether we get an
    #       actual error result
    assert_raises(CommandError, ds.repo.copy_to, 'new_file', 'store')

    # However, we can force it by configuration
    ds.config.add("annex.ora-remote.store.force-write", "true", where='local')
    ds.repo.copy_to('new_file', 'store')
Exemplo n.º 45
0
def test_diff_recursive(path):
    ds = Dataset(path).create()
    sub = ds.create('sub')
    # look at the last change, and confirm a dataset was added
    res = ds.diff(fr=DEFAULT_BRANCH + '~1',
                  to=DEFAULT_BRANCH,
                  result_renderer=None)
    assert_result_count(res,
                        1,
                        action='diff',
                        state='added',
                        path=sub.path,
                        type='dataset')
    # now recursive
    res = ds.diff(recursive=True,
                  fr=DEFAULT_BRANCH + '~1',
                  to=DEFAULT_BRANCH,
                  result_renderer=None)
    # we also get the entire diff of the subdataset from scratch
    assert_status('ok', res)
    ok_(len(res) > 3)
    # one specific test
    assert_result_count(res,
                        1,
                        action='diff',
                        state='added',
                        path=op.join(sub.path, '.datalad', 'config'))

    # now we add a file to just the parent
    create_tree(ds.path, {
        'onefile': 'tobeadded',
        'sub': {
            'twofile': 'tobeadded'
        }
    })
    res = ds.diff(recursive=True, untracked='all', result_renderer=None)
    assert_result_count(_dirty_results(res), 3)
    assert_result_count(res,
                        1,
                        action='diff',
                        state='untracked',
                        path=op.join(ds.path, 'onefile'),
                        type='file')
    assert_result_count(res,
                        1,
                        action='diff',
                        state='modified',
                        path=sub.path,
                        type='dataset')
    assert_result_count(res,
                        1,
                        action='diff',
                        state='untracked',
                        path=op.join(sub.path, 'twofile'),
                        type='file')
    # intentional save in two steps to make check below easier
    ds.save('sub', recursive=True)
    ds.save()
    assert_repo_status(ds.path)

    head_ref = DEFAULT_BRANCH if ds.repo.is_managed_branch() else 'HEAD'

    # look at the last change, only one file was added
    res = ds.diff(fr=head_ref + '~1', to=head_ref, result_renderer=None)
    assert_result_count(_dirty_results(res), 1)
    assert_result_count(res,
                        1,
                        action='diff',
                        state='added',
                        path=op.join(ds.path, 'onefile'),
                        type='file')

    # now the exact same thing with recursion, must not be different from the
    # call above
    res = ds.diff(recursive=True,
                  fr=head_ref + '~1',
                  to=head_ref,
                  result_renderer=None)
    assert_result_count(_dirty_results(res), 1)
    # last change in parent
    assert_result_count(res,
                        1,
                        action='diff',
                        state='added',
                        path=op.join(ds.path, 'onefile'),
                        type='file')

    if ds.repo.is_managed_branch():
        raise SkipTest(
            "Test assumption broken: https://github.com/datalad/datalad/issues/3818"
        )
    # one further back brings in the modified subdataset, and the added file
    # within it
    res = ds.diff(recursive=True,
                  fr=head_ref + '~2',
                  to=head_ref,
                  result_renderer=None)
    assert_result_count(_dirty_results(res), 3)
    assert_result_count(res,
                        1,
                        action='diff',
                        state='added',
                        path=op.join(ds.path, 'onefile'),
                        type='file')
    assert_result_count(res,
                        1,
                        action='diff',
                        state='added',
                        path=op.join(sub.path, 'twofile'),
                        type='file')
    assert_result_count(res,
                        1,
                        action='diff',
                        state='modified',
                        path=sub.path,
                        type='dataset')
Exemplo n.º 46
0
def test_update_simple(origin, src_path, dst_path):

    # prepare src
    source = install(src_path, source=origin, recursive=True)
    # forget we cloned it (provide no 'origin' anymore), which should lead to
    # setting tracking branch to target:
    source.repo.remove_remote("origin")

    # dataset without sibling will not need updates
    assert_status('notneeded', source.update())
    # deprecation message doesn't ruin things
    assert_status('notneeded', source.update(fetch_all=True))
    # but error if unknown sibling is given
    assert_status('impossible', source.update(sibling='funky', on_failure='ignore'))

    # get a clone to update later on:
    dest = install(dst_path, source=src_path, recursive=True)
    # test setup done;
    # assert all fine
    assert_repo_status(dst_path)
    assert_repo_status(src_path)

    # update yields nothing => up-to-date
    assert_status('ok', dest.update())
    assert_repo_status(dst_path)

    # modify origin:
    with open(opj(src_path, "update.txt"), "w") as f:
        f.write("Additional content")
    source.save(path="update.txt", message="Added update.txt")
    assert_repo_status(src_path)

    # update without `merge` only fetches:
    assert_status('ok', dest.update())
    # modification is not known to active branch:
    assert_not_in("update.txt",
                  dest.repo.get_files(dest.repo.get_active_branch()))
    # modification is known to branch origin/master
    assert_in("update.txt", dest.repo.get_files("origin/master"))

    # merge:
    assert_status('ok', dest.update(merge=True))
    # modification is now known to active branch:
    assert_in("update.txt",
              dest.repo.get_files(dest.repo.get_active_branch()))
    # it's known to annex, but has no content yet:
    dest.repo.get_file_key("update.txt")  # raises if unknown
    eq_([False], dest.repo.file_has_content(["update.txt"]))

    # check subdataset path constraints, baseline (parent + 2 subds)
    assert_result_count(dest.update(recursive=True),
                        3, status='ok', type='dataset')
    # no recursion and invalid path still updates the parent
    res = dest.update(path='whatever')
    assert_result_count(res, 1, status='ok', type='dataset')
    assert_result_count(res, 1, status='ok', path=dest.path)
    # invalid path with recursion also does
    res = dest.update(recursive=True, path='whatever')
    assert_result_count(res, 1, status='ok', type='dataset')
    assert_result_count(res, 1, status='ok', path=dest.path)
    # valid path and no recursion only updates the parent
    res = dest.update(path='subm 1')
    assert_result_count(res, 1, status='ok', type='dataset')
    assert_result_count(res, 1, status='ok', path=dest.path)
    # valid path and recursion updates matching
    res = dest.update(recursive=True, path='subm 1')
    assert_result_count(res, 2, status='ok', type='dataset')
    assert_result_count(res, 1, status='ok', path=dest.path)
    assert_result_count(res, 1, status='ok', path=str(dest.pathobj / 'subm 1'))
    # additional invalid path doesn't hurt
    res = dest.update(recursive=True, path=['subm 1', 'mike'])
    assert_result_count(res, 2, status='ok', type='dataset')
    # full match
    res = dest.update(recursive=True, path=['subm 1', '2'])
    assert_result_count(res, 3, status='ok', type='dataset')

    # test that update doesn't crash if we specify only a single path (submod) to
    # operate on
    with chpwd(dest.path):
        # in 0.11.x it would be a single result since "pwd" dataset is not
        # considered, and would be relative path (as specified).
        # In 0.12.0 - it would include implicit pwd dataset, and paths would be absolute
        res_update = update(path=['subm 1'], recursive=True)
        assert_result_count(res_update, 2)
        for p in dest.path, str(dest.pathobj / 'subm 1'):
            assert_in_results(res_update, path=p, action='update', status='ok', type='dataset')

        # and with merge we would also try to save (but there would be no changes)
        res_merge = update(path=['subm 1'], recursive=True, merge=True)
        assert_result_count(res_merge, 2, action='update')
        # 2 of "updates" really.
        assert_in_results(res_merge, action='update', status='ok', type='dataset')
        assert_in_results(res_merge, action='save', status='notneeded', type='dataset')

    # smoke-test if recursive update doesn't fail if submodule is removed
    # and that we can run it from within a dataset without providing it
    # explicitly
    assert_result_count(
        dest.remove('subm 1'), 1,
        status='ok', action='remove', path=opj(dest.path, 'subm 1'))
    with chpwd(dest.path):
        assert_result_count(
            update(recursive=True), 2,
            status='ok', type='dataset')
    assert_result_count(
        dest.update(merge=True, recursive=True), 2,
        action='update', status='ok', type='dataset')

    # and now test recursive update with merging in differences
    create_tree(opj(source.path, '2'), {'load.dat': 'heavy'})
    source.save(opj('2', 'load.dat'),
                message="saving changes within subm2",
                recursive=True)
    assert_result_count(
        dest.update(merge=True, recursive=True), 2,
        action='update', status='ok', type='dataset')
    # and now we can get new file
    dest.get('2/load.dat')
    ok_file_has_content(opj(dest.path, '2', 'load.dat'), 'heavy')
Exemplo n.º 47
0
def test_diff_recursive(path):
    ds = Dataset(path).create()
    sub = ds.create('sub')
    # look at the last change, and confirm a dataset was added
    res = ds.diff(fr='HEAD~1', to='HEAD')
    assert_result_count(res,
                        1,
                        action='diff',
                        state='added',
                        path=sub.path,
                        type='dataset')
    # now recursive
    res = ds.diff(recursive=True, fr='HEAD~1', to='HEAD')
    # we also get the entire diff of the subdataset from scratch
    assert_status('ok', res)
    ok_(len(res) > 3)
    # one specific test
    assert_result_count(res,
                        1,
                        action='diff',
                        state='added',
                        path=op.join(sub.path, '.datalad', 'config'))

    # now we add a file to just the parent
    create_tree(ds.path, {
        'onefile': 'tobeadded',
        'sub': {
            'twofile': 'tobeadded'
        }
    })
    res = ds.diff(recursive=True, untracked='all')
    assert_result_count(_dirty_results(res), 3)
    assert_result_count(res,
                        1,
                        action='diff',
                        state='untracked',
                        path=op.join(ds.path, 'onefile'),
                        type='file')
    assert_result_count(res,
                        1,
                        action='diff',
                        state='modified',
                        path=sub.path,
                        type='dataset')
    assert_result_count(res,
                        1,
                        action='diff',
                        state='untracked',
                        path=op.join(sub.path, 'twofile'),
                        type='file')
    # intentional save in two steps to make check below easier
    ds.save('sub', recursive=True)
    ds.save()
    assert_repo_status(ds.path)
    # look at the last change, only one file was added
    res = ds.diff(fr='HEAD~1', to='HEAD')
    assert_result_count(_dirty_results(res), 1)
    assert_result_count(res,
                        1,
                        action='diff',
                        state='added',
                        path=op.join(ds.path, 'onefile'),
                        type='file')

    # now the exact same thing with recursion, must not be different from the
    # call above
    res = ds.diff(recursive=True, fr='HEAD~1', to='HEAD')
    assert_result_count(_dirty_results(res), 1)
    # last change in parent
    assert_result_count(res,
                        1,
                        action='diff',
                        state='added',
                        path=op.join(ds.path, 'onefile'),
                        type='file')

    # one further back brings in the modified subdataset, and the added file
    # within it
    res = ds.diff(recursive=True, fr='HEAD~2', to='HEAD')
    assert_result_count(_dirty_results(res), 3)
    assert_result_count(res,
                        1,
                        action='diff',
                        state='added',
                        path=op.join(ds.path, 'onefile'),
                        type='file')
    assert_result_count(res,
                        1,
                        action='diff',
                        state='added',
                        path=op.join(sub.path, 'twofile'),
                        type='file')
    assert_result_count(res,
                        1,
                        action='diff',
                        state='modified',
                        path=sub.path,
                        type='dataset')
Exemplo n.º 48
0
def check_merge_follow_parentds_subdataset_detached(on_adjusted, path):
    # Note: For the adjusted case, this is not much more than a smoke test that
    # on an adjusted branch we fail sensibly. The resulting state is not easy
    # to reason about nor desirable.
    path = Path(path)
    # $path/source/s0/s1
    # The additional dataset level is to gain some confidence that this works
    # for nested datasets.
    ds_src = Dataset(path / "source").create()
    if ds_src.repo.is_managed_branch():
        if not on_adjusted:
            raise SkipTest("System only supports adjusted branches. "
                           "Skipping non-adjusted test")
    ds_src_s0 = ds_src.create("s0")
    ds_src_s1 = ds_src_s0.create("s1")
    ds_src.save(recursive=True)
    if on_adjusted:
        # Note: We adjust after creating all the datasets above to avoid a bug
        # fixed in git-annex 7.20191024, specifically bbdeb1a1a (sync: Fix
        # crash when there are submodules and an adjusted branch is checked
        # out, 2019-10-23).
        for ds in [ds_src, ds_src_s0, ds_src_s1]:
            _adjust(ds.repo)
        ds_src.save(recursive=True)
    assert_repo_status(ds_src.path)

    ds_clone = install(source=ds_src.path, path=path / "clone",
                       recursive=True, result_xfm="datasets")
    ds_clone_s1 = Dataset(ds_clone.pathobj / "s0" / "s1")

    ds_src_s1.repo.checkout("master^0")
    (ds_src_s1.pathobj / "foo").write_text("foo content")
    ds_src.save(recursive=True)
    assert_repo_status(ds_src.path)

    res = ds_clone.update(merge=True, recursive=True, follow="parentds",
                          on_failure="ignore")
    if on_adjusted:
        # The top-level update is okay because there is no parent revision to
        # update to.
        assert_in_results(
            res,
            status="ok",
            path=ds_clone.path,
            action="update")
        # The subdataset, on the other hand, is impossible.
        assert_in_results(
            res,
            status="impossible",
            path=ds_clone_s1.path,
            action="update")
        return
    assert_repo_status(ds_clone.path)

    # We brought in the revision and got to the same state of the remote.
    # Blind saving here without bringing in the current subdataset revision
    # would have resulted in a new commit in ds_clone that reverting the
    # last subdataset ID recorded in ds_src.
    eq_(ds_clone.repo.get_hexsha(), ds_src.repo.get_hexsha())

    # Record a revision in the parent and then move HEAD away from it so that
    # the explicit revision fetch fails.
    (ds_src_s1.pathobj / "bar").write_text("bar content")
    ds_src.save(recursive=True)
    ds_src_s1.repo.checkout(
        ds_src_s1.repo.get_corresponding_branch("master"))
    # This is the default, but just in case:
    ds_src_s1.repo.config.set("uploadpack.allowAnySHA1InWant", "false",
                              where="local")
    res = ds_clone.update(merge=True, recursive=True, follow="parentds",
                          on_failure="ignore")
    # The fetch with the explicit ref fails because it isn't advertised.
    assert_in_results(
        res,
        status="impossible",
        path=ds_clone_s1.path,
        action="update")

    # Back to the detached head.
    ds_src_s1.repo.checkout("HEAD@{1}")
    # Set up a case where update() will not resolve the sibling.
    ds_clone_s1.repo.call_git(["branch", "--unset-upstream"])
    ds_clone_s1.config.reload(force=True)
    ds_clone_s1.repo.call_git(["remote", "add", "other", ds_src_s1.path])
    res = ds_clone.update(recursive=True, follow="parentds",
                          on_failure="ignore")
    # In this case, update() won't abort if we call with merge=False, but
    # it does if the revision wasn't brought down in the `fetch(all_=True)`
    # call.
    assert_in_results(
        res,
        status="impossible",
        path=ds_clone_s1.path,
        action="update")
Exemplo n.º 49
0
def test_save(path):

    ds = Dataset(path)

    with open(op.join(path, "new_file.tst"), "w") as f:
        f.write("something")

    ds.repo.add("new_file.tst", git=True)
    ok_(ds.repo.dirty)

    ds.save(message="add a new file")
    assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo))

    with open(op.join(path, "new_file.tst"), "w") as f:
        f.write("modify")

    ok_(ds.repo.dirty)
    ds.save(message="modified new_file.tst")
    assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo))

    # save works without ds and files given in the PWD
    with open(op.join(path, "new_file.tst"), "w") as f:
        f.write("rapunzel")
    with chpwd(path):
        save(message="love rapunzel")
    assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo))

    # and also without `-a` when things are staged
    with open(op.join(path, "new_file.tst"), "w") as f:
        f.write("exotic")
    ds.repo.add("new_file.tst", git=True)
    with chpwd(path):
        save(message="love marsians")
    assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo))

    files = ['one.txt', 'two.txt']
    for fn in files:
        with open(op.join(path, fn), "w") as f:
            f.write(fn)

    ds.save([op.join(path, f) for f in files])
    # superfluous call to save (alll saved it already), should not fail
    # but report that nothing was saved
    assert_status('notneeded', ds.save(message="set of new files"))
    assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo))

    # create subdataset
    subds = ds.create('subds')
    assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo))
    # modify subds
    with open(op.join(subds.path, "some_file.tst"), "w") as f:
        f.write("something")
    subds.save()
    assert_repo_status(subds.path, annex=isinstance(subds.repo, AnnexRepo))
    # ensure modified subds is committed
    ds.save()
    assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo))

    # now introduce a change downstairs
    subds.create('someotherds')
    assert_repo_status(subds.path, annex=isinstance(subds.repo, AnnexRepo))
    ok_(ds.repo.dirty)
    # and save via subdataset path
    ds.save('subds', version_tag='new_sub')
    assert_repo_status(path, annex=isinstance(ds.repo, AnnexRepo))
    tags = ds.repo.get_tags()
    ok_(len(tags) == 1)
    eq_(tags[0], dict(hexsha=ds.repo.get_hexsha(), name='new_sub'))
    # fails when retagged, like git does
    res = ds.save(version_tag='new_sub', on_failure='ignore')
    assert_status('error', res)
    assert_result_count(res,
                        1,
                        action='save',
                        type='dataset',
                        path=ds.path,
                        message=('cannot tag this version: %s',
                                 "fatal: tag 'new_sub' already exists"))
Exemplo n.º 50
0
def test_publish_simple(origin, src_path, dst_path):

    # prepare src
    source = install(src_path, source=origin, recursive=True)
    # forget we cloned it (provide no 'origin' anymore), which should lead to
    # setting tracking branch to target:
    source.repo.remove_remote("origin")

    # create plain git at target:
    target = GitRepo(dst_path, create=True)
    target.checkout("TMP", ["-b"])
    source.repo.add_remote("target", dst_path)

    res = publish(dataset=source, to="target", result_xfm='datasets')
    eq_(res, [source])

    assert_repo_status(source.repo, annex=None)
    assert_repo_status(target, annex=None)
    eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)),
        list(source.repo.get_branch_commits_(DEFAULT_BRANCH)))

    # don't fail when doing it again
    res = publish(dataset=source, to="target")
    # and nothing is pushed
    assert_result_count(res, 1, status='notneeded')

    assert_repo_status(source.repo, annex=None)
    assert_repo_status(target, annex=None)
    eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)),
        list(source.repo.get_branch_commits_(DEFAULT_BRANCH)))
    assert_git_annex_branch_published(source.repo, target)

    # 'target/<default branch>' should be tracking branch at this point, so
    # try publishing without `to`:
    # MIH: Nope, we don't automatically add this anymore

    # some modification:
    with open(opj(src_path, 'test_mod_file'), "w") as f:
        f.write("Some additional stuff.")
    source.save(opj(src_path, 'test_mod_file'),
                to_git=True,
                message="Modified.")
    assert_repo_status(source.repo, annex=None)

    res = publish(dataset=source, to='target', result_xfm='datasets')
    eq_(res, [source])

    assert_repo_status(dst_path, annex=None)
    eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)),
        list(source.repo.get_branch_commits_(DEFAULT_BRANCH)))
    assert_git_annex_branch_published(source.repo, target)

    eq_(filter_fsck_error_msg(source.repo.fsck()),
        filter_fsck_error_msg(source.repo.fsck(remote='target')))
Exemplo n.º 51
0
def test_aggregation(path):
    # a hierarchy of three (super/sub)datasets, each with some native metadata
    ds = Dataset(op.join(path, 'origin')).create(force=True)
    ds.config.add('datalad.metadata.nativetype',
                  'frictionless_datapackage',
                  where='dataset')
    subds = ds.create('sub', force=True)
    subds.config.add('datalad.metadata.nativetype',
                     'frictionless_datapackage',
                     where='dataset')
    subsubds = subds.create('subsub', force=True)
    subsubds.config.add('datalad.metadata.nativetype',
                        'frictionless_datapackage',
                        where='dataset')
    assert_status('ok', ds.save(recursive=True))
    # while we are at it: dot it again, nothing should happen
    assert_status('notneeded', ds.save(recursive=True))

    assert_repo_status(ds.path)
    # aggregate metadata from all subdatasets into any superdataset, including
    # intermediate ones
    res = ds.meta_aggregate(recursive=True, into='all')
    # we get success report for both subdatasets and the superdataset,
    # and they get saved
    assert_result_count(res, 3, status='ok', action='meta_aggregate')
    # the respective super datasets see two saves, one to record the change
    # in the subdataset after its own aggregation, and one after the super
    # updated with aggregated metadata
    assert_result_count(res, 5, status='ok', action='save', type='dataset')
    # nice and tidy
    assert_repo_status(ds.path)

    # quick test of aggregate report
    aggs = ds.meta_dump(reporton='aggregates', recursive=True)
    # one for each dataset
    assert_result_count(aggs, 3)
    # mother also report layout version
    assert_result_count(aggs, 1, path=ds.path, layout_version=1)

    # store clean direct result
    origres = ds.meta_dump(recursive=True)
    # basic sanity check
    assert_result_count(origres, 3, type='dataset')
    assert_result_count([r for r in origres if r['path'].endswith('.json')],
                        3,
                        type='file')  # Now that we have annex.key
    # three different IDs
    eq_(
        3,
        len(
            set([
                _get_dsid_from_core_metadata(s['metadata']['metalad_core'])
                for s in origres if s['type'] == 'dataset'
            ])))
    # and we know about all three datasets
    for name in ('MOTHER_äöü東', 'child_äöü東', 'grandchild_äöü東'):
        assert_true(
            sum([s['metadata']['frictionless_datapackage']['name'] \
                    == assure_unicode(name) for s in origres
                 if s['type'] == 'dataset']))

    # now clone the beast to simulate a new user installing an empty dataset
    clone = install(op.join(path, 'clone'),
                    source=ds.path,
                    result_xfm='datasets',
                    return_type='item-or-list')
    # ID mechanism works
    eq_(ds.id, clone.id)

    # get fresh metadata
    cloneres = clone.meta_dump()
    # basic sanity check
    assert_result_count(cloneres, 1, type='dataset')
    # payload file
    assert_result_count(cloneres, 1, type='file')

    # now loop over the previous results from the direct metadata query of
    # origin and make sure we get the extact same stuff from the clone
    _compare_metadata_helper(origres, clone)

    # now obtain a subdataset in the clone, should make no difference
    assert_status('ok',
                  clone.install('sub', result_xfm=None, return_type='list'))
    _compare_metadata_helper(origres, clone)

    # test search in search tests, not all over the place
    ## query smoke test
    assert_result_count(clone.search('mother', mode='egrep'), 1)
    assert_result_count(clone.search('(?i)MoTHER', mode='egrep'), 1)

    child_res = clone.search('child', mode='egrep')
    assert_result_count(child_res, 2)
    for r in child_res:
        if r['type'] == 'dataset':
            assert_in(r['query_matched']['frictionless_datapackage.name'],
                      r['metadata']['frictionless_datapackage']['name'])
Exemplo n.º 52
0
def test_publish_plain_git(origin, src_path, dst_path):
    # TODO: Since it's mostly the same, melt with test_publish_simple

    # prepare src
    source = install(src_path, source=origin, recursive=True)
    # forget we cloned it (provide no 'origin' anymore), which should lead to
    # setting tracking branch to target:
    source.repo.remove_remote("origin")

    # create plain git at target:
    target = GitRepo(dst_path, create=True)
    target.checkout("TMP", ["-b"])
    source.repo.add_remote("target", dst_path)

    res = publish(dataset=source, to="target", result_xfm='datasets')
    eq_(res, [source])

    assert_repo_status(source.repo, annex=None)
    assert_repo_status(target, annex=None)
    eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)),
        list(source.repo.get_branch_commits_(DEFAULT_BRANCH)))

    # don't fail when doing it again
    res = publish(dataset=source, to="target")
    # and nothing is pushed
    assert_result_count(res, 1, status='notneeded')

    assert_repo_status(source.repo, annex=None)
    assert_repo_status(target, annex=None)
    eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)),
        list(source.repo.get_branch_commits_(DEFAULT_BRANCH)))

    # some modification:
    with open(opj(src_path, 'test_mod_file'), "w") as f:
        f.write("Some additional stuff.")
    source.save(opj(src_path, 'test_mod_file'),
                to_git=True,
                message="Modified.")
    assert_repo_status(source.repo, annex=None)

    res = publish(dataset=source, to='target', result_xfm='datasets')
    eq_(res, [source])

    assert_repo_status(dst_path, annex=None)
    eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)),
        list(source.repo.get_branch_commits_(DEFAULT_BRANCH)))

    # amend and change commit msg in order to test for force push:
    source.repo.commit("amended", options=['--amend'])
    # push should be rejected (non-fast-forward):
    assert_raises(IncompleteResultsError,
                  publish,
                  dataset=source,
                  to='target',
                  result_xfm='datasets')
    # push with force=True works:
    res = publish(dataset=source,
                  to='target',
                  result_xfm='datasets',
                  force=True)
    eq_(res, [source])
Exemplo n.º 53
0
def test_push_recursive(origin_path, src_path, dst_top, dst_sub,
                        dst_subnoannex, dst_subsub):
    # dataset with two submodules and one subsubmodule
    origin = Dataset(origin_path).create()
    origin_subm1 = origin.create('sub m')
    origin_subm1.create('subsub m')
    origin.create('subm noannex', annex=False)
    origin.save()
    assert_repo_status(origin.path)
    # prepare src as a fresh clone with all subdatasets checkout out recursively
    # running on a clone should make the test scenario more different than
    # test_push(), even for the pieces that should be identical
    top = Clone.__call__(source=origin.path, path=src_path)
    subs = top.get('.', recursive=True, get_data=False, result_xfm='datasets')
    # order for '.' should not be relied upon, so sort by path
    sub, subsub, subnoannex = sorted(subs, key=lambda ds: ds.path)

    target_top = mk_push_target(top, 'target', dst_top, annex=True)
    # subdatasets have no remote yet, so recursive publishing should fail:
    res = top.push(to="target", recursive=True, on_failure='ignore')
    assert_in_results(res,
                      path=top.path,
                      type='dataset',
                      refspec=DEFAULT_REFSPEC,
                      operations=['new-branch'],
                      action='publish',
                      status='ok',
                      target='target')
    for d in (sub, subsub, subnoannex):
        assert_in_results(res,
                          status='error',
                          type='dataset',
                          path=d.path,
                          message=("Unknown target sibling '%s'.", 'target'))
    # now fix that and set up targets for the submodules
    target_sub = mk_push_target(sub, 'target', dst_sub, annex=True)
    target_subnoannex = mk_push_target(subnoannex,
                                       'target',
                                       dst_subnoannex,
                                       annex=False)
    target_subsub = mk_push_target(subsub, 'target', dst_subsub, annex=True)

    # and same push call as above
    res = top.push(to="target", recursive=True)
    # topds skipped
    assert_in_results(res,
                      path=top.path,
                      type='dataset',
                      action='publish',
                      status='notneeded',
                      target='target')
    # the rest pushed
    for d in (sub, subsub, subnoannex):
        assert_in_results(res,
                          status='ok',
                          type='dataset',
                          path=d.path,
                          refspec=DEFAULT_REFSPEC)
    # all correspondig branches match across all datasets
    for s, d in zip(
        (top, sub, subnoannex, subsub),
        (target_top, target_sub, target_subnoannex, target_subsub)):
        eq_(list(s.repo.get_branch_commits_(DEFAULT_BRANCH)),
            list(d.get_branch_commits_(DEFAULT_BRANCH)))
        if s != subnoannex:
            eq_(list(s.repo.get_branch_commits_("git-annex")),
                list(d.get_branch_commits_("git-annex")))

    # rerun should not result in further pushes of the default branch
    res = top.push(to="target", recursive=True)
    assert_not_in_results(res, status='ok', refspec=DEFAULT_REFSPEC)
    assert_in_results(res, status='notneeded', refspec=DEFAULT_REFSPEC)

    # now annex a file in subsub
    test_copy_file = subsub.pathobj / 'test_mod_annex_file'
    test_copy_file.write_text("Heavy stuff.")
    # save all the way up
    assert_status(('ok', 'notneeded'),
                  top.save(message='subsub got something', recursive=True))
    assert_repo_status(top.path)
    # publish straight up, should be smart by default
    res = top.push(to="target", recursive=True)
    # we see 3 out of 4 datasets pushed (sub noannex was left unchanged)
    for d in (top, sub, subsub):
        assert_in_results(res,
                          status='ok',
                          type='dataset',
                          path=d.path,
                          refspec=DEFAULT_REFSPEC)
    # file content copied too
    assert_in_results(res,
                      action='copy',
                      status='ok',
                      path=str(test_copy_file))
    # verify it is accessible, drop and bring back
    assert_status('ok', top.drop(str(test_copy_file)))
    ok_(not subsub.repo.file_has_content('test_mod_annex_file'))
    top.get(test_copy_file)
    ok_file_has_content(test_copy_file, 'Heavy stuff.')

    # make two modification
    (sub.pathobj / 'test_mod_annex_file').write_text('annex')
    (subnoannex.pathobj / 'test_mod_file').write_text('git')
    # save separately
    top.save(sub.pathobj, message='annexadd', recursive=True)
    top.save(subnoannex.pathobj, message='gitadd', recursive=True)
    # now only publish the latter one
    res = top.push(to="target", since=DEFAULT_BRANCH + '~1', recursive=True)
    # nothing copied, no reports on the other modification
    assert_not_in_results(res, action='copy')
    assert_not_in_results(res, path=sub.path)
    for d in (top, subnoannex):
        assert_in_results(res,
                          status='ok',
                          type='dataset',
                          path=d.path,
                          refspec=DEFAULT_REFSPEC)
    # an unconditional push should now pick up the remaining changes
    res = top.push(to="target", recursive=True)
    assert_in_results(res,
                      action='copy',
                      status='ok',
                      path=str(sub.pathobj / 'test_mod_annex_file'))
    assert_in_results(res,
                      status='ok',
                      type='dataset',
                      path=sub.path,
                      refspec=DEFAULT_REFSPEC)
    for d in (top, subnoannex, subsub):
        assert_in_results(res,
                          status='notneeded',
                          type='dataset',
                          path=d.path,
                          refspec=DEFAULT_REFSPEC)

    # if noannex target gets some annex, we still should not fail to push
    target_subnoannex.call_git(['annex', 'init'])
    # just to ensure that we do need something to push
    (subnoannex.pathobj / "newfile").write_text("content")
    subnoannex.save()
    res = subnoannex.push(to="target")
    assert_in_results(res, status='ok', type='dataset')
Exemplo n.º 54
0
def test_publish_depends(origin, src_path, target1_path, target2_path,
                         target3_path):
    # prepare src
    source = install(src_path, source=origin, recursive=True)
    source.repo.get('test-annex.dat')
    # pollute config
    depvar = 'remote.target2.datalad-publish-depends'
    source.config.add(depvar, 'stupid', where='local')
    eq_(source.config.get(depvar, None), 'stupid')

    # two remote sibling on two "different" hosts
    source.create_sibling('ssh://datalad-test' + target1_path,
                          annex_wanted='standard',
                          annex_group='backup',
                          name='target1')
    # fails with unknown remote
    res = source.create_sibling(
        'ssh://datalad-test' + target2_path,
        name='target2',
        existing='reconfigure',  # because 'target2' is known in polluted cfg
        publish_depends='bogus',
        on_failure='ignore')
    assert_result_count(
        res,
        1,
        path=source.path,
        status='error',
        message=('unknown sibling(s) specified as publication dependency: %s',
                 set(['bogus'])))
    # for real
    source.create_sibling(
        'ssh://datalad-test' + target2_path,
        name='target2',
        existing='reconfigure',  # because 'target2' is known in polluted cfg
        annex_wanted='standard',
        annex_group='backup',
        publish_depends='target1')
    # wiped out previous dependencies
    eq_(source.config.get(depvar, None), 'target1')
    # and one more remote, on the same host but associated with a dependency
    source.create_sibling('ssh://datalad-test' + target3_path, name='target3')
    assert_repo_status(src_path)
    # introduce change in source
    create_tree(src_path, {'probe1': 'probe1'})
    source.save('probe1')
    assert_repo_status(src_path)
    # only the source has the probe
    ok_file_has_content(opj(src_path, 'probe1'), 'probe1')
    for p in (target1_path, target2_path, target3_path):
        assert_false(lexists(opj(p, 'probe1')))
    # publish to a standalone remote
    source.publish(to='target3')
    ok_(lexists(opj(target3_path, 'probe1')))
    # but it has no data copied
    target3 = Dataset(target3_path)
    nok_(target3.repo.file_has_content('probe1'))

    # but if we publish specifying its path, it gets copied
    source.publish('probe1', to='target3')
    ok_file_has_content(opj(target3_path, 'probe1'), 'probe1')

    # no others are affected in either case
    for p in (target1_path, target2_path):
        assert_false(lexists(opj(p, 'probe1')))

    # publish to all remaining, but via a dependency
    source.publish(to='target2')
    for p in (target1_path, target2_path, target3_path):
        ok_file_has_content(opj(p, 'probe1'), 'probe1')
Exemplo n.º 55
0
def test_subdataset_save(path):
    parent = Dataset(path).create()
    sub = parent.create('sub')
    assert_repo_status(parent.path)
    create_tree(parent.path, {"untracked": 'ignore', 'sub': {"new": "wanted"}})
    sub.save('new')
    # defined state: one untracked, modified (but clean in itself) subdataset
    assert_repo_status(sub.path)
    assert_repo_status(parent.path, untracked=['untracked'], modified=['sub'])

    # `save sub` does not save the parent!!
    with chpwd(parent.path):
        assert_status('notneeded', save(dataset=sub.path))
    assert_repo_status(parent.path, untracked=['untracked'], modified=['sub'])
    # `save -u .` saves the state change in the subdataset,
    # but leaves any untracked content alone
    with chpwd(parent.path):
        assert_status('ok', parent.save(updated=True))
    assert_repo_status(parent.path, untracked=['untracked'])

    # get back to the original modified state and check that -S behaves in
    # exactly the same way
    create_tree(parent.path, {'sub': {"new2": "wanted2"}})
    sub.save('new2')
    assert_repo_status(parent.path, untracked=['untracked'], modified=['sub'])
Exemplo n.º 56
0
def test_ria_http(lcl, storepath, url):
    # create a local dataset with a subdataset
    lcl = Path(lcl)
    storepath = Path(storepath)
    subds = Dataset(lcl / 'ds' / 'subdir' / 'subds').create(force=True)
    subds.save()
    ds = Dataset(lcl / 'ds').create(force=True)
    ds.save(version_tag='original')
    assert_repo_status(ds.path)
    for d in (ds, subds):
        _move2store(storepath, d)
    # location of superds in store
    storeds_loc = str(storepath / ds.id[:3] / ds.id[3:])
    # now we should be able to clone from a ria+http url
    # the super
    riaclone = clone(
        'ria+{}#{}'.format(url, ds.id),
        lcl / 'clone',
    )

    # due to default configuration, clone() should automatically look for the
    # subdataset in the store, too -- if not the following would fail, because
    # we never configured a proper submodule URL
    riaclonesub = riaclone.get(
        op.join('subdir', 'subds'), get_data=False,
        result_xfm='datasets', return_type='item-or-list')

    # both datasets came from the store and must be set up in an identical
    # fashion
    for origds, cloneds in ((ds, riaclone), (subds, riaclonesub)):
        eq_(origds.id, cloneds.id)
        if not ds.repo.is_managed_branch():
            # test logic cannot handle adjusted branches
            eq_(origds.repo.get_hexsha(), cloneds.repo.get_hexsha())
        ok_(cloneds.config.get('remote.origin.url').startswith(url))
        eq_(cloneds.config.get('remote.origin.annex-ignore'), 'true')
        eq_(cloneds.config.get('datalad.get.subdataset-source-candidate-200origin'),
            'ria+%s#{id}' % url)

    # now advance the source dataset
    (ds.pathobj / 'newfile.txt').write_text('new')
    ds.save()
    ds.publish(to='store')
    Runner(cwd=storeds_loc).run(['git', 'update-server-info'])
    # re-clone as before
    riaclone2 = clone(
        'ria+{}#{}'.format(url, ds.id),
        lcl / 'clone2',
    )
    # and now clone a specific version, here given be the tag name
    riaclone_orig = clone(
        'ria+{}#{}@{}'.format(url, ds.id, 'original'),
        lcl / 'clone_orig',
    )
    if not ds.repo.is_managed_branch():
        # test logic cannot handle adjusted branches
        # we got the precise version we wanted
        eq_(riaclone.repo.get_hexsha(), riaclone_orig.repo.get_hexsha())
        # and not the latest
        eq_(riaclone2.repo.get_hexsha(), ds.repo.get_hexsha())
        neq_(riaclone2.repo.get_hexsha(), riaclone_orig.repo.get_hexsha())

    # attempt to clone a version that doesn't exist
    with swallow_logs():
        with assert_raises(IncompleteResultsError) as cme:
            clone('ria+{}#{}@impossible'.format(url, ds.id),
                  lcl / 'clone_failed')
        assert_in("not found in upstream", str(cme.exception))

    # lastly test if URL rewriting is in effect
    # on the surface we clone from an SSH source identified by some custom
    # label, no full URL, but URL rewriting setup maps it back to the
    # HTTP URL used above
    with patch_config({
            'url.ria+{}#.insteadof'.format(url): 'ria+ssh://somelabel#'}):
        cloned_by_label = clone(
            'ria+ssh://somelabel#{}'.format(origds.id),
            lcl / 'cloned_by_label',
        )
    # so we get the same setup as above, but....
    eq_(origds.id, cloned_by_label.id)
    if not ds.repo.is_managed_branch():
        # test logic cannot handle adjusted branches
        eq_(origds.repo.get_hexsha(), cloned_by_label.repo.get_hexsha())
    ok_(cloned_by_label.config.get('remote.origin.url').startswith(url))
    eq_(cloned_by_label.config.get('remote.origin.annex-ignore'), 'true')
    # ... the clone candidates go with the label-based URL such that
    # future get() requests acknowlege a (system-wide) configuration
    # update
    eq_(cloned_by_label.config.get('datalad.get.subdataset-source-candidate-200origin'),
        'ria+ssh://somelabel#{id}')

    if not has_symlink_capability():
        return
    # place a symlink in the store to serve as a dataset alias
    (storepath / 'alias').mkdir()
    (storepath / 'alias' / 'myname').symlink_to(storeds_loc)
    with chpwd(lcl):
        cloned_by_alias = clone('ria+{}#~{}'.format(url, 'myname'))
    # still get the same data
    eq_(cloned_by_alias.id, ds.id)
    # more sensible default install path
    eq_(cloned_by_alias.pathobj.name, 'myname')
Exemplo n.º 57
0
 def get_baseline(p):
     ds = Dataset(p).create()
     sub = create(text_type(ds.pathobj / 'sub'))
     assert_repo_status(ds.path, untracked=['sub'])
     return ds
Exemplo n.º 58
0
def _postclonetest_prepare(lcl, storepath, link):

    from datalad.customremotes.ria_utils import (
        create_store,
        create_ds_in_store,
        get_layout_locations
    )
    from datalad.distributed.ora_remote import (
        LocalIO,
    )

    create_tree(lcl,
                tree={
                        'ds': {
                            'test.txt': 'some',
                            'subdir': {
                                'subds': {'testsub.txt': 'somemore'},
                                'subgit': {'testgit.txt': 'even more'}
                            },
                        },
                      })

    # create a local dataset with a subdataset
    lcl = Path(lcl)
    storepath = Path(storepath)
    link = Path(link)
    link.symlink_to(storepath)
    subds = Dataset(lcl / 'ds' / 'subdir' / 'subds').create(force=True)
    subds.save()
    # add a plain git dataset as well
    subgit = Dataset(lcl / 'ds' / 'subdir' / 'subgit').create(force=True,
                                                              no_annex=True)
    subgit.save()
    ds = Dataset(lcl / 'ds').create(force=True)
    ds.save(version_tag='original')
    assert_repo_status(ds.path)

    io = LocalIO()
    create_store(io, storepath, '1')

    # URL to use for upload. Point is, that this should be invalid for the clone
    # so that autoenable would fail. Therefore let it be based on a to be
    # deleted symlink
    upl_url = "ria+{}".format(link.as_uri())

    for d in (ds, subds, subgit):

        # TODO: create-sibling-ria required for config! => adapt to RF'd
        #       creation (missed on rebase?)
        create_ds_in_store(io, storepath, d.id, '2', '1')
        d.create_sibling_ria(upl_url, "store")

        if d is not subgit:
            # Now, simulate the problem by reconfiguring the special remote to
            # not be autoenabled.
            # Note, however, that the actual intention is a URL, that isn't
            # valid from the point of view of the clone (doesn't resolve, no
            # credentials, etc.) and therefore autoenabling on git-annex-init
            # when datalad-cloning would fail to succeed.
            Runner(cwd=d.path).run(['git', 'annex', 'enableremote',
                                    'store-storage',
                                    'autoenable=false'])
        d.push('.', to='store')
        store_loc, _, _ = get_layout_locations(1, storepath, d.id)
        Runner(cwd=str(store_loc)).run(['git', 'update-server-info'])

    link.unlink()
    # We should now have a store with datasets that have an autoenabled ORA
    # remote relying on an inaccessible URL.
    # datalad-clone is supposed to reconfigure based on the URL we cloned from.
    # Test this feature for cloning via HTTP, SSH and FILE URLs.

    return ds.id
Exemplo n.º 59
0
def _test_initremote_basic(host, ds_path, store, link):

    ds_path = Path(ds_path)
    store = Path(store)
    link = Path(link)
    ds = Dataset(ds_path).create()
    populate_dataset(ds)
    ds.save()

    if host:
        url = "ria+ssh://{host}{path}".format(host=host, path=store)
    else:
        url = "ria+{}".format(store.as_uri())
    init_opts = common_init_opts + ['url={}'.format(url)]

    # fails on non-existing storage location
    assert_raises(CommandError,
                  ds.repo.init_remote,
                  'ria-remote',
                  options=init_opts)
    # Doesn't actually create a remote if it fails
    assert_not_in(
        'ria-remote',
        [cfg['name'] for uuid, cfg in ds.repo.get_special_remotes().items()])

    # fails on non-RIA URL
    assert_raises(CommandError,
                  ds.repo.init_remote,
                  'ria-remote',
                  options=common_init_opts + ['url={}'.format(store.as_uri())])
    # Doesn't actually create a remote if it fails
    assert_not_in(
        'ria-remote',
        [cfg['name'] for uuid, cfg in ds.repo.get_special_remotes().items()])

    # set up store:
    io = SSHRemoteIO(host) if host else LocalIO()
    create_store(io, store, '1')
    # still fails, since ds isn't setup in the store
    assert_raises(CommandError,
                  ds.repo.init_remote,
                  'ria-remote',
                  options=init_opts)
    # Doesn't actually create a remote if it fails
    assert_not_in(
        'ria-remote',
        [cfg['name'] for uuid, cfg in ds.repo.get_special_remotes().items()])
    # set up the dataset as well
    create_ds_in_store(io, store, ds.id, '2', '1')
    # now should work
    ds.repo.init_remote('ria-remote', options=init_opts)
    assert_in(
        'ria-remote',
        [cfg['name'] for uuid, cfg in ds.repo.get_special_remotes().items()])
    assert_repo_status(ds.path)
    # git-annex:remote.log should have:
    #   - url
    #   - common_init_opts
    #   - archive_id (which equals ds id)
    remote_log = ds.repo.call_git(['cat-file', 'blob', 'git-annex:remote.log'],
                                  read_only=True)
    assert_in("url={}".format(url), remote_log)
    [assert_in(c, remote_log) for c in common_init_opts]
    assert_in("archive-id={}".format(ds.id), remote_log)

    # re-configure with invalid URL should fail:
    assert_raises(CommandError, ds.repo.call_annex,
                  ['enableremote', 'ria-remote'] + common_init_opts +
                  ['url=ria+file:///non-existing'])
    # but re-configure with valid URL should work
    if has_symlink_capability():
        link.symlink_to(store)
        new_url = 'ria+{}'.format(link.as_uri())
        ds.repo.call_annex(['enableremote', 'ria-remote'] + common_init_opts +
                           ['url={}'.format(new_url)])
        # git-annex:remote.log should have:
        #   - url
        #   - common_init_opts
        #   - archive_id (which equals ds id)
        remote_log = ds.repo.call_git(
            ['cat-file', 'blob', 'git-annex:remote.log'], read_only=True)
        assert_in("url={}".format(new_url), remote_log)
        [assert_in(c, remote_log) for c in common_init_opts]
        assert_in("archive-id={}".format(ds.id), remote_log)

    # we can deal with --sameas, which leads to a special remote not having a
    # 'name' property, but only a 'sameas-name'. See gh-4259
    try:
        ds.repo.init_remote('ora2',
                            options=init_opts + ['--sameas', 'ria-remote'])
    except CommandError as e:
        if 'Invalid option `--sameas' in e.stderr:
            # annex too old - doesn't know --sameas
            pass
        else:
            raise
Exemplo n.º 60
0
def test_run_inputs_outputs(src, path):
    for subds in [("s0", "s1_0", "s2"), ("s0", "s1_1", "s2"), ("s0", "s1_0"),
                  ("s0", "s1_1"), ("s0", "ss"), ("s0", )]:
        Dataset(op.join(*((src, ) + subds))).create(force=True).save()
    src_ds = Dataset(src).create(force=True)
    src_ds.save()

    ds = install(path,
                 source=src,
                 result_xfm='datasets',
                 return_type='item-or-list')
    assert_false(ds.repo.file_has_content("input.dat"))
    assert_false(ds.repo.file_has_content("extra-input.dat"))

    # The specified inputs and extra inputs will be retrieved before the run.
    # (Use run_command() to access the extra_inputs argument.)
    list(
        run_command("{} {{inputs}} {{inputs}} >doubled.dat".format(
            'type' if on_windows else 'cat'),
                    dataset=ds,
                    inputs=["input.dat"],
                    extra_inputs=["extra-input.dat"]))

    assert_repo_status(ds.path)
    ok_(ds.repo.file_has_content("input.dat"))
    ok_(ds.repo.file_has_content("extra-input.dat"))
    ok_(ds.repo.file_has_content("doubled.dat"))
    with open(op.join(path, "doubled.dat")) as fh:
        content = fh.read()
        assert_in("input", content)
        assert_not_in("extra-input", content)

    # Rerunning the commit will also get the input file.
    ds.repo.drop(["input.dat", "extra-input.dat"], options=["--force"])
    assert_false(ds.repo.file_has_content("input.dat"))
    assert_false(ds.repo.file_has_content("extra-input.dat"))
    ds.rerun()
    ok_(ds.repo.file_has_content("input.dat"))
    ok_(ds.repo.file_has_content("extra-input.dat"))

    with swallow_logs(new_level=logging.WARN) as cml:
        ds.run("cd .> dummy", inputs=["not-there"])
        assert_in("Input does not exist: ", cml.out)

    # Test different combinations of globs and explicit files.
    inputs = ["a.dat", "b.dat", "c.txt", "d.txt"]
    create_tree(ds.path, {i: i for i in inputs})

    ds.save()
    ds.repo.copy_to(inputs, remote="origin")
    ds.repo.drop(inputs, options=["--force"])

    test_cases = [(["*.dat"], ["a.dat", "b.dat"]),
                  (["*.dat", "c.txt"], ["a.dat", "b.dat", "c.txt"]),
                  (["*"], inputs)]

    for idx, (inputs_arg, expected_present) in enumerate(test_cases):
        assert_false(any(ds.repo.file_has_content(i) for i in inputs))

        ds.run("cd .> dummy{}".format(idx), inputs=inputs_arg)
        ok_(all(ds.repo.file_has_content(f) for f in expected_present))
        # Globs are stored unexpanded by default.
        assert_in(inputs_arg[0], last_commit_msg(ds.repo))
        ds.repo.drop(inputs, options=["--force"])

    # --input can be passed a subdirectory.
    create_tree(ds.path, {"subdir": {"a": "subdir a", "b": "subdir b"}})
    ds.save("subdir")
    ds.repo.copy_to(["subdir/a", "subdir/b"], remote="origin")
    ds.repo.drop("subdir", options=["--force"])
    ds.run("cd .> subdir-dummy", inputs=[op.join(ds.path, "subdir")])
    ok_(all(
        ds.repo.file_has_content(op.join("subdir", f)) for f in ["a", "b"]))

    # Inputs are specified relative to a dataset's subdirectory.
    ds.repo.drop(op.join("subdir", "a"), options=["--force"])
    with chpwd(op.join(path, "subdir")):
        run("cd .> subdir-dummy1", inputs=["a"])
    ok_(ds.repo.file_has_content(op.join("subdir", "a")))

    # --input=. runs "datalad get ."
    ds.run("cd .> dot-dummy", inputs=["."])
    eq_(ds.repo.get_annexed_files(),
        ds.repo.get_annexed_files(with_content_only=True))
    # On rerun, we get all files, even those that weren't in the tree at the
    # time of the run.
    create_tree(ds.path, {"after-dot-run": "after-dot-run content"})
    ds.save()
    ds.repo.copy_to(["after-dot-run"], remote="origin")
    ds.repo.drop(["after-dot-run"], options=["--force"])
    ds.rerun(DEFAULT_BRANCH + "^")
    ds.repo.file_has_content("after-dot-run")

    # --output will unlock files that are present.
    ds.repo.get("a.dat")
    ds.run("echo ' appended' >>a.dat", outputs=["a.dat"])
    with open(op.join(path, "a.dat")) as fh:
        eq_(fh.read(),
            "a.dat' appended' \n" if on_windows else "a.dat appended\n")

    # --output will remove files that are not present.
    ds.repo.drop(["a.dat", "d.txt"], options=["--force"])
    ds.run("echo ' appended' >>a.dat", outputs=["a.dat"])
    if not on_windows:
        # MIH doesn't yet understand how to port this
        with open(op.join(path, "a.dat")) as fh:
            eq_(fh.read(), " appended\n")

    # --input can be combined with --output.
    ds.run("echo ' appended' >>a.dat", inputs=["a.dat"], outputs=["a.dat"])
    if not on_windows:
        # MIH doesn't yet understand how to port this
        with open(op.join(path, "a.dat")) as fh:
            eq_(fh.read(), " appended\n appended\n")

    if not on_windows:
        # see datalad#2606
        with swallow_logs(new_level=logging.DEBUG) as cml:
            with swallow_outputs():
                ds.run("echo blah", outputs=["not-there"])
                assert_in("Filtered out non-existing path: ", cml.out)

    ds.create('sub')
    ds.run("echo sub_orig >sub/subfile")
    ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"])
    ds.drop("sub/subfile", check=False)
    ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"])

    # --input/--output globs can be stored in expanded form.
    ds.run("cd .> expand-dummy",
           inputs=["a.*"],
           outputs=["b.*"],
           expand="both")
    assert_in("a.dat", last_commit_msg(ds.repo))
    assert_in("b.dat", last_commit_msg(ds.repo))

    res = ds.rerun(report=True, return_type='item-or-list')
    eq_(res["run_info"]['inputs'], ["a.dat"])
    eq_(res["run_info"]['outputs'], ["b.dat"])

    # We install subdatasets to fully resolve globs.
    ds.uninstall("s0")
    assert_false(Dataset(op.join(path, "s0")).is_installed())
    ds.run("echo {inputs} >globbed-subds", inputs=["s0/s1_*/s2/*.dat"])
    ok_file_has_content(op.join(ds.path, "globbed-subds"),
                        "'s0\\s1_0\\s2\\a.dat' 's0\\s1_1\\s2\\c.dat'"
                        if on_windows else "s0/s1_0/s2/a.dat s0/s1_1/s2/c.dat",
                        strip=True)

    ds_ss = Dataset(op.join(path, "s0", "ss"))
    assert_false(ds_ss.is_installed())
    ds.run("echo blah >{outputs}", outputs=["s0/ss/out"])
    ok_(ds_ss.is_installed())
    ok_file_has_content(op.join(ds.path, "s0", "ss", "out"),
                        "blah",
                        strip=True)