Exemplo n.º 1
0
def test_create_1test_dataset():
    # and just a single dataset
    from datalad.api import create_test_dataset
    with swallow_outputs():
        dss = create_test_dataset()
    eq_(len(dss), 1)
    ok_clean_git(dss[0], annex=False)
Exemplo n.º 2
0
def test_install_simple_local(src, path):
    origin = Dataset(path)

    # now install it somewhere else
    ds = install(path, source=src, description='mydummy')
    eq_(ds.path, path)
    ok_(ds.is_installed())
    if not isinstance(origin.repo, AnnexRepo):
        # this means it is a GitRepo
        ok_(isinstance(origin.repo, GitRepo))
        # stays plain Git repo
        ok_(isinstance(ds.repo, GitRepo))
        ok_(not isinstance(ds.repo, AnnexRepo))
        ok_(GitRepo.is_valid_repo(ds.path))
        eq_(set(ds.repo.get_indexed_files()),
            {'test.dat', 'INFO.txt'})
        ok_clean_git(path, annex=False)
    else:
        # must be an annex
        ok_(isinstance(ds.repo, AnnexRepo))
        ok_(AnnexRepo.is_valid_repo(ds.path, allow_noninitialized=False))
        eq_(set(ds.repo.get_indexed_files()),
            {'test.dat', 'INFO.txt', 'test-annex.dat'})
        ok_clean_git(path, annex=True)
        # no content was installed:
        ok_(not ds.repo.file_has_content('test-annex.dat'))
        uuid_before = ds.repo.uuid
        eq_(ds.repo.get_description(), 'mydummy')

    # installing it again, shouldn't matter:
    res = install(path, source=src, result_xfm=None, return_type='list')
    assert_status('notneeded', res)
    ok_(ds.is_installed())
    if isinstance(origin.repo, AnnexRepo):
        eq_(uuid_before, ds.repo.uuid)
Exemplo n.º 3
0
def test_symlinked_relpath(path):
    # initially ran into on OSX https://github.com/datalad/datalad/issues/2406
    os.makedirs(opj(path, "origin"))
    dspath = opj(path, "linked")
    os.symlink('origin', dspath)
    ds = Dataset(dspath).create()
    create_tree(dspath, {
        "mike1": 'mike1',  # will be added from topdir
        "later": "later",  # later from within subdir
        "d": {
            "mike2": 'mike2', # to be added within subdir
        }
    })

    # in the root of ds
    with chpwd(dspath):
        ds.repo.add("mike1", git=True)
        ds._save("committing", path="./mike1")

    # Let's also do in subdirectory
    with chpwd(opj(dspath, 'd')):
        ds.repo.add("mike2", git=True)
        ds._save("committing", path="./mike2")

        later = opj(pardir, "later")
        ds.repo.add(later, git=True)
        ds._save("committing", path=later)

    ok_clean_git(dspath)
Exemplo n.º 4
0
def test_kill(path):
    # nested datasets with load
    ds = Dataset(path).create()
    testfile = opj(ds.path, "file.dat")
    with open(testfile, 'w') as f:
        f.write("load")
    ds.save("file.dat")
    subds = ds.create('deep1')
    eq_(sorted(ds.subdatasets(result_xfm='relpaths')), ['deep1'])
    ok_clean_git(ds.path)

    # and we fail to remove since content can't be dropped
    res = ds.remove(on_failure='ignore')
    assert_result_count(
        res, 1,
        status='error', path=testfile)
    # Following two assertions on message are relying on the actual error.
    # We have a second result with status 'impossible' for the ds, that we need
    # to filter out for those assertions:
    err_result = [r for r in res if r['status'] == 'error'][0]
    assert_result_values_cond(
        [err_result], 'message',
        lambda x: "configured minimum number of copies not found" in x or
        "Could only verify the existence of 0 out of 1 necessary copies" in x
    )
    eq_(ds.remove(recursive=True, check=False, result_xfm='datasets'),
        [subds, ds])
    ok_(not exists(path))
Exemplo n.º 5
0
def test_uninstall_without_super(path):
    # a parent dataset with a proper subdataset, and another dataset that
    # is just placed underneath the parent, but not an actual subdataset
    parent = Dataset(path).create()
    sub = parent.create('sub')
    ok_clean_git(parent.path)
    nosub = create(opj(parent.path, 'nosub'))
    ok_clean_git(nosub.path)
    subreport = parent.subdatasets()
    assert_result_count(subreport, 1, path=sub.path)
    assert_result_count(subreport, 0, path=nosub.path)
    # it should be possible to uninstall the proper subdataset, even without
    # explicitly calling the uninstall methods of the parent -- things should
    # be figured out by datalad
    uninstall(sub.path)
    assert not sub.is_installed()
    # no present subdatasets anymore
    subreport = parent.subdatasets()
    assert_result_count(subreport, 1)
    assert_result_count(subreport, 1, path=sub.path, state='absent')
    assert_result_count(subreport, 0, path=nosub.path)
    # but we should fail on an attempt to uninstall the non-subdataset
    res = uninstall(nosub.path, on_failure='ignore')
    assert_result_count(
        res, 1, path=nosub.path, status='error',
        message="will not uninstall top-level dataset (consider `remove` command)")
Exemplo n.º 6
0
def test_remove_file_handle_only(path):
    ds = Dataset(path).create(force=True)
    ds.save()
    ok_clean_git(ds.path)
    # make sure there is any key
    ok_(len(ds.repo.get_file_key('one')))
    # both files link to the same key
    eq_(ds.repo.get_file_key('one'),
        ds.repo.get_file_key('two'))
    rpath_one = realpath(opj(ds.path, 'one'))
    eq_(rpath_one, realpath(opj(ds.path, 'two')))
    path_two = opj(ds.path, 'two')
    ok_(exists(path_two))
    # remove one handle, should not affect the other
    ds.remove('two', check=False, message="custom msg")
    eq_(ds.repo.repo.head.commit.message.rstrip(), "custom msg")
    eq_(rpath_one, realpath(opj(ds.path, 'one')))
    ok_(exists(rpath_one))
    ok_(not exists(path_two))
    # remove file without specifying the dataset -- shouldn't fail
    with chpwd(path):
        remove('one', check=False)
        ok_(not exists("one"))
    # and we should be able to remove without saving
    ds.remove('three', check=False, save=False)
    ok_(ds.repo.dirty)
Exemplo n.º 7
0
def test_drop_nocrash_absent_subds(path):
    parent = Dataset(path).create()
    sub = parent.create('sub')
    parent.uninstall('sub')
    ok_clean_git(parent.path)
    with chpwd(path):
        assert_status('notneeded', drop('.', recursive=True))
Exemplo n.º 8
0
def test_install_recursive_repeat(src, path):
    subsub_src = Dataset(opj(src, 'sub 1', 'subsub')).create(force=True)
    sub1_src = Dataset(opj(src, 'sub 1')).create(force=True)
    sub2_src = Dataset(opj(src, 'sub 2')).create(force=True)
    top_src = Dataset(src).create(force=True)
    top_src.add('.', recursive=True)
    ok_clean_git(top_src.path)

    # install top level:
    top_ds = install(path, source=src)
    ok_(top_ds.is_installed() is True)
    sub1 = Dataset(opj(path, 'sub 1'))
    ok_(sub1.is_installed() is False)
    sub2 = Dataset(opj(path, 'sub 2'))
    ok_(sub2.is_installed() is False)
    subsub = Dataset(opj(path, 'sub 1', 'subsub'))
    ok_(subsub.is_installed() is False)

    # install again, now with data and recursive, but recursion_limit 1:
    result = get(os.curdir, dataset=path, recursive=True, recursion_limit=1,
                 result_xfm='datasets')
    # top-level dataset was not reobtained
    assert_not_in(top_ds, result)
    assert_in(sub1, result)
    assert_in(sub2, result)
    assert_not_in(subsub, result)
    ok_(top_ds.repo.file_has_content('top_file.txt') is True)
    ok_(sub1.repo.file_has_content('sub1file.txt') is True)
    ok_(sub2.repo.file_has_content('sub2file.txt') is True)

    # install sub1 again, recursively and with data
    top_ds.install('sub 1', recursive=True, get_data=True)
    ok_(subsub.is_installed())
    ok_(subsub.repo.file_has_content('subsubfile.txt'))
Exemplo n.º 9
0
def test_audio(path):
    ds = Dataset(path).create()
    ds.config.add('datalad.metadata.nativetype', 'audio', where='dataset')
    copy(
        opj(dirname(dirname(dirname(__file__))), 'tests', 'data', 'audio.mp3'),
        path)
    ds.add('.')
    ok_clean_git(ds.path)
    res = ds.aggregate_metadata()
    assert_status('ok', res)
    res = ds.metadata('audio.mp3')
    assert_result_count(res, 1)

    # from this extractor
    meta = res[0]['metadata']['audio']
    for k, v in target.items():
        eq_(meta[k], v)

    assert_in('@context', meta)

    uniques = ds.metadata(
        reporton='datasets', return_type='item-or-list')['metadata']['datalad_unique_content_properties']
    # test file has it, but uniques have it blanked out, because the extractor considers it worthless
    # for discovering whole datasets
    assert_in('bitrate', meta)
    eq_(uniques['audio']['bitrate'], None)

    # 'date' field carries not value, hence gets exclude from the unique report
    assert_in('date', meta)
    assert(not meta['date'])
    assert_not_in('date', uniques['audio'])
Exemplo n.º 10
0
 def check_renamed_file(recursive, no_annex, path):
     ds = Dataset(path).create(no_annex=no_annex)
     create_tree(path, {'old': ''})
     ds.add('old')
     ds.repo._git_custom_command(['old', 'new'], ['git', 'mv'])
     ds._save(recursive=recursive)
     ok_clean_git(path)
Exemplo n.º 11
0
def test_publish_aggregated(path):
    base = Dataset(opj(path, 'origin')).create(force=True)
    # force all metadata objects into the annex
    with open(opj(base.path, '.datalad', '.gitattributes'), 'w') as f:
        f.write(
            '** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n')
    base.create('sub', force=True)
    base.add('.', recursive=True)
    ok_clean_git(base.path)
    base.aggregate_metadata(recursive=True, update_mode='all')
    ok_clean_git(base.path)

    # create sibling and publish to it
    spath = opj(path, 'remote')
    base.create_sibling(
        name="local_target",
        sshurl="ssh://localhost",
        target_dir=spath)
    base.publish('.', to='local_target', transfer_data='all')
    remote = Dataset(spath)
    objpath = opj('.datalad', 'metadata', 'objects')
    objs = list(sorted(base.repo.find(objpath)))
    # all object files a present in both datasets
    eq_(all(base.repo.file_has_content(objs)), True)
    eq_(all(remote.repo.file_has_content(objs)), True)
    # and we can squeeze the same metadata out
    eq_(
        [{k: v for k, v in i.items() if k not in ('path', 'refds', 'parentds')}
         for i in base.metadata('sub')],
        [{k: v for k, v in i.items() if k not in ('path', 'refds', 'parentds')}
         for i in remote.metadata('sub')],
    )
Exemplo n.º 12
0
def test_get_missing(path):
    repo = GitRepo(path, create=True)
    os.makedirs(op.join(path, 'deep'))
    with open(op.join(path, 'test1'), 'w') as f:
        f.write('some')
    with open(op.join(path, 'deep', 'test2'), 'w') as f:
        f.write('some more')
    # no files tracked yet, so nothing changed
    eq_(repo.get_changed_files(), [])
    repo.add('.')
    # still no differences between worktree and staged
    eq_(repo.get_changed_files(), [])
    eq_(set(repo.get_changed_files(staged=True)),
        {'test1', op.join('deep', 'test2')})
    eq_(set(repo.get_changed_files(staged=True, diff_filter='AD')),
        {'test1', op.join('deep', 'test2')})
    eq_(repo.get_changed_files(staged=True, diff_filter='D'), [])
    repo.commit()
    eq_(repo.get_changed_files(), [])
    eq_(repo.get_changed_files(staged=True), [])
    ok_clean_git(path, annex=False)
    unlink(op.join(path, 'test1'))
    eq_(repo.get_missing_files(), ['test1'])
    rmtree(op.join(path, 'deep'))
    eq_(sorted(repo.get_missing_files()), [op.join('deep', 'test2'), 'test1'])
    # nothing is actually known to be deleted
    eq_(repo.get_deleted_files(), [])
    # do proper removal
    repo.remove(op.join(path, 'test1'))
    # no longer missing
    eq_(repo.get_missing_files(), [op.join('deep', 'test2')])
    # but deleted
    eq_(repo.get_deleted_files(), ['test1'])
Exemplo n.º 13
0
def test_aggregate_removal(path):
    base = Dataset(opj(path, 'origin')).create(force=True)
    # force all metadata objects into the annex
    with open(opj(base.path, '.datalad', '.gitattributes'), 'w') as f:
        f.write(
            '** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n')
    sub = base.create('sub', force=True)
    subsub = sub.create(opj('subsub'), force=True)
    base.add('.', recursive=True)
    base.aggregate_metadata(recursive=True, update_mode='all')
    ok_clean_git(base.path)
    res = base.metadata(get_aggregates=True)
    assert_result_count(res, 3)
    assert_result_count(res, 1, path=subsub.path)
    # check that we only have object files that are listed in agginfo
    eq_(_get_contained_objs(base), _get_referenced_objs(base))
    # now delete the deepest subdataset to test cleanup of aggregated objects
    # in the top-level ds
    base.remove(opj('sub', 'subsub'), check=False)
    # now aggregation has to detect that subsub is not simply missing, but gone
    # for good
    base.aggregate_metadata(recursive=True, update_mode='all')
    ok_clean_git(base.path)
    # internally consistent state
    eq_(_get_contained_objs(base), _get_referenced_objs(base))
    # info on subsub was removed at all levels
    res = base.metadata(get_aggregates=True)
    assert_result_count(res, 0, path=subsub.path)
    assert_result_count(res, 2)
    res = sub.metadata(get_aggregates=True)
    assert_result_count(res, 0, path=subsub.path)
    assert_result_count(res, 1)
Exemplo n.º 14
0
def test_GitRepo_add(src, path):

    gr = GitRepo.clone(src, path)
    filename = get_most_obscure_supported_name()
    with open(op.join(path, filename), 'w') as f:
        f.write("File to add to git")
    added = gr.add(filename)

    eq_(added, {'success': True, 'file': filename})
    assert_in(filename, gr.get_indexed_files(),
              "%s not successfully added to %s" % (filename, path))
    # uncommitted:
    ok_(gr.dirty)

    filename = "another.txt"
    with open(op.join(path, filename), 'w') as f:
        f.write("Another file to add to git")

    # include committing:
    added2 = gr.add(filename)
    gr.commit(msg="Add two files.")
    eq_(added2, {'success': True, 'file': filename})

    assert_in(filename, gr.get_indexed_files(),
              "%s not successfully added to %s" % (filename, path))
    ok_clean_git(path)
Exemplo n.º 15
0
def test_optimized_cloning(path):
    # make test repo with one file and one commit
    originpath = op.join(path, 'origin')
    repo = GitRepo(originpath, create=True)
    with open(op.join(originpath, 'test'), 'w') as f:
        f.write('some')
    repo.add('test')
    repo.commit('init')
    ok_clean_git(originpath, annex=False)
    from glob import glob

    def _get_inodes(repo):
        return dict(
            [(os.path.join(*o.split(os.sep)[-2:]),
              os.stat(o).st_ino)
             for o in glob(os.path.join(repo.path,
                                        repo.get_git_dir(repo),
                                        'objects', '*', '*'))])

    origin_inodes = _get_inodes(repo)
    # now clone it in different ways and see what happens to the object storage
    from datalad.support.network import get_local_file_url
    clonepath = op.join(path, 'clone')
    for src in (originpath, get_local_file_url(originpath)):
        # deprecated
        assert_raises(DeprecatedError, GitRepo, url=src, path=clonepath)
        clone = GitRepo.clone(url=src, path=clonepath, create=True)
        clone_inodes = _get_inodes(clone)
        eq_(origin_inodes, clone_inodes, msg='with src={}'.format(src))
        rmtree(clonepath)
Exemplo n.º 16
0
def test_GitRepo_ssh_pull(remote_path, repo_path):
    from datalad import ssh_manager

    remote_repo = GitRepo(remote_path, create=True)
    url = "ssh://localhost" + op.abspath(remote_path)
    socket_path = op.join(ssh_manager.socket_dir, get_connection_hash('localhost'))
    repo = GitRepo(repo_path, create=True)
    repo.add_remote("ssh-remote", url)

    # modify remote:
    remote_repo.checkout("ssh-test", ['-b'])
    with open(op.join(remote_repo.path, "ssh_testfile.dat"), "w") as f:
        f.write("whatever")
    remote_repo.add("ssh_testfile.dat")
    remote_repo.commit("ssh_testfile.dat added.")

    # file is not locally known yet:
    assert_not_in("ssh_testfile.dat", repo.get_indexed_files())

    # pull changes:
    repo.pull(remote="ssh-remote", refspec=remote_repo.get_active_branch())
    ok_clean_git(repo.path, annex=False)

    # the connection is known to the SSH manager, since fetch() requested it:
    assert_in(socket_path, ssh_manager._connections)
    # and socket was created:
    ok_(op.exists(socket_path))

    # we actually pulled the changes
    assert_in("ssh_testfile.dat", repo.get_indexed_files())
Exemplo n.º 17
0
def test_GitRepo_fetch(test_path, orig_path, clone_path):

    origin = GitRepo.clone(test_path, orig_path)
    clone = GitRepo.clone(orig_path, clone_path)
    filename = get_most_obscure_supported_name()

    origin.checkout("new_branch", ['-b'])
    with open(op.join(orig_path, filename), 'w') as f:
        f.write("New file.")
    origin.add(filename)
    origin.commit("new file added.")

    fetched = clone.fetch(remote='origin')
    # test FetchInfo list returned by fetch
    eq_([u'origin/' + clone.get_active_branch(), u'origin/new_branch'],
        [commit.name for commit in fetched])

    ok_clean_git(clone.path, annex=False)
    assert_in("origin/new_branch", clone.get_remote_branches())
    assert_in(filename, clone.get_files("origin/new_branch"))
    assert_false(op.exists(op.join(clone_path, filename)))  # not checked out

    # create a remote without an URL:
    origin.add_remote('not-available', 'git://example.com/not/existing')
    origin.config.unset('remote.not-available.url', where='local')

    # fetch without provided URL
    fetched = origin.fetch('not-available')
    # nothing was done, nothing returned:
    eq_([], fetched)
Exemplo n.º 18
0
def test_remove_more_than_one(path):
    ds = Dataset(path).create(force=True)
    ds.save()
    ok_clean_git(path)
    # ensure #1912 stays resolved
    ds.remove(['one', 'two'], check=False)
    ok_clean_git(path)
Exemplo n.º 19
0
def test_publish_gh1691(origin, src_path, dst_path):

    # prepare src; no subdatasets installed, but mount points present
    source = install(src_path, source=origin, recursive=False)
    ok_(exists(opj(src_path, "subm 1")))
    assert_false(Dataset(opj(src_path, "subm 1")).is_installed())

    # some content modification of the superdataset
    create_tree(src_path, {'probe1': 'probe1'})
    source.add('probe1')
    ok_clean_git(src_path)

    # create the target(s):
    source.create_sibling(
        'ssh://localhost:' + dst_path,
        name='target', recursive=True)

    # publish recursively, which silently ignores non-installed datasets
    results = source.publish(to='target', recursive=True)
    assert_result_count(results, 1)
    assert_result_count(results, 1, status='ok', type='dataset', path=source.path)

    # if however, a non-installed subdataset is requsted explicitly, it'll fail
    results = source.publish(path='subm 1', to='target', on_failure='ignore')
    assert_result_count(results, 1, status='impossible', type='dataset', action='publish')
Exemplo n.º 20
0
def test_add_readme(path):
    ds = Dataset(path).create(force=True)
    ds.save()
    ds.aggregate_metadata()
    ok_clean_git(ds.path)
    assert_status('ok', ds.add_readme())
    # should use default name
    eq_(
        open(opj(path, 'README.md')).read(),
        """\
# Dataset "demo_ds"

this is for play

### Authors

- Betty
- Tom

### License

PDDL

## General information

This is a DataLad dataset (id: {id}).

For more information on DataLad and on how to work with its datasets,
see the DataLad documentation at: http://docs.datalad.org
""".format(
    id=ds.id))

    # should skip on re-run
    assert_status('notneeded', ds.add_readme())
Exemplo n.º 21
0
def test_dirty(path):
    for mode in _dirty_modes:
        # does nothing without a dataset
        handle_dirty_dataset(None, mode)
    # placeholder, but not yet created
    ds = Dataset(path)
    # unknown mode
    assert_raises(ValueError, handle_dirty_dataset, ds, 'MADEUP')
    # not yet created is very dirty
    assert_raises(RuntimeError, handle_dirty_dataset, ds, 'fail')
    handle_dirty_dataset(ds, 'ignore')
    assert_raises(RuntimeError, handle_dirty_dataset, ds, 'save-before')
    # should yield a clean repo
    ds.create()
    orig_state = ds.repo.get_hexsha()
    _check_all_clean(ds, orig_state)
    # tainted: untracked
    with open(opj(ds.path, 'something'), 'w') as f:
        f.write('some')
    # we don't want to auto-add untracked files by saving (anymore)
    assert_raises(AssertionError, _check_auto_save, ds, orig_state)
    # tainted: staged
    ds.repo.add('something', git=True)
    orig_state = _check_auto_save(ds, orig_state)
    # tainted: submodule
    # not added to super on purpose!
    subds = ds.create('subds')
    _check_all_clean(subds, subds.repo.get_hexsha())
    ok_clean_git(ds.path)
    # subdataset must be added as a submodule!
    assert_equal(ds.subdatasets(result_xfm='relpaths'), ['subds'])
Exemplo n.º 22
0
def test_add_subdataset(path, other):
    subds = create(opj(path, 'dir'), force=True)
    ds = create(path, force=True)
    ok_(subds.repo.dirty)
    ok_(ds.repo.dirty)
    assert_not_in('dir', ds.subdatasets(result_xfm='relpaths'))
    # without a base dataset the next is interpreted as "add everything
    # in subds to subds"
    add(subds.path)
    ok_clean_git(subds.path)
    assert_not_in('dir', ds.subdatasets(result_xfm='relpaths'))
    # but with a base directory we add the dataset subds as a subdataset
    # to ds
    ds.add(subds.path)
    assert_in('dir', ds.subdatasets(result_xfm='relpaths'))
    #  create another one
    other = create(other)
    # install into superdataset, but don't add
    other_clone = install(source=other.path, path=opj(ds.path, 'other'))
    ok_(other_clone.is_installed)
    assert_not_in('other', ds.subdatasets(result_xfm='relpaths'))
    # now add, it should pick up the source URL
    ds.add('other')
    # and that is why, we can reobtain it from origin
    ds.uninstall('other')
    ok_(other_clone.is_installed)
    ds.get('other')
    ok_(other_clone.is_installed)
Exemplo n.º 23
0
def test_new_relpath(topdir):
    from datalad.api import create_test_dataset
    with swallow_logs(), chpwd(topdir), swallow_outputs():
        dss = create_test_dataset('testds', spec='1')
    eq_(dss[0], opj(topdir, 'testds'))
    eq_(len(dss), 2)  # 1 top + 1 sub-dataset as demanded
    for ds in dss:
        ok_clean_git(ds, annex=False)
Exemplo n.º 24
0
def test_install_dataset_from_just_source(url, path):

    with chpwd(path, mkdir=True):
        ds = install(source=url)

    ok_startswith(ds.path, path)
    ok_(ds.is_installed())
    ok_clean_git(ds.path, annex=False)
Exemplo n.º 25
0
def test_bf2541(path):
    ds = create(path)
    subds = ds.create('sub')
    ok_clean_git(ds.path)
    os.symlink('sub', op.join(ds.path, 'symlink'))
    with chpwd(ds.path):
        res = add('.', recursive=True)
    ok_clean_git(ds.path)
Exemplo n.º 26
0
def test_bf2043p2(path):
    ds = Dataset(path).create(force=True)
    ds.add('staged', save=False)
    ok_clean_git(ds.path, head_modified=['staged'], untracked=['untracked'])
    # plain save does not commit untracked content
    # this tests the second issue in #2043
    with chpwd(path):
        save()
    ok_clean_git(ds.path, untracked=['untracked'])
Exemplo n.º 27
0
def test_install_dataset_from_just_source(url, path):
    with chpwd(path, mkdir=True):
        ds = install(source=url)

    ok_startswith(ds.path, path)
    ok_(ds.is_installed())
    ok_(GitRepo.is_valid_repo(ds.path))
    ok_clean_git(ds.path, annex=None)
    assert_in('INFO.txt', ds.repo.get_indexed_files())
Exemplo n.º 28
0
def test_clone_dataset_from_just_source(url, path):
    with chpwd(path, mkdir=True):
        ds = clone(url, result_xfm='datasets', return_type='item-or-list')

    ok_startswith(ds.path, path)
    ok_(ds.is_installed())
    ok_(GitRepo.is_valid_repo(ds.path))
    ok_clean_git(ds.path, annex=None)
    assert_in('INFO.txt', ds.repo.get_indexed_files())
Exemplo n.º 29
0
def test_create_test_dataset():
    # rudimentary smoke test
    from datalad.api import create_test_dataset
    with swallow_logs(), swallow_outputs():
        dss = create_test_dataset(spec='2/1-2')
    ok_(5 <= len(dss) <= 7)  # at least five - 1 top, two on top level, 1 in each
    for ds in dss:
        ok_clean_git(ds, annex=None)  # some of them are annex but we just don't check
        ok_(len(glob(opj(ds, 'file*'))))
Exemplo n.º 30
0
def test_GitRepo_add_submodule(source, path):

    top_repo = GitRepo(path, create=True)

    top_repo.add_submodule('sub', name='sub', url=source)
    top_repo.commit('submodule added')
    eq_([s.name for s in top_repo.get_submodules()], ['sub'])
    ok_clean_git(path)
    ok_clean_git(op.join(path, 'sub'))
Exemplo n.º 31
0
def test_save(path):

    ds = Dataset(path)

    with open(opj(path, "new_file.tst"), "w") as f:
        f.write("something")

    ds.repo.add("new_file.tst", git=True)
    ok_(ds.repo.dirty)

    ds.save("add a new file", all_changes=False)
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))

    with open(opj(path, "new_file.tst"), "w") as f:
        f.write("modify")

    ok_(ds.repo.dirty)
    ds.save("modified new_file.tst", all_changes=True)
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))

    files = ['one.txt', 'two.txt']
    for fn in files:
        with open(opj(path, fn), "w") as f:
            f.write(fn)

    ds.add([opj(path, f) for f in files])
    # superfluous call to save (add saved it already), should not fail
    # but report that nothing was saved
    assert_false(ds.save("set of new files"))
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))

    # create subdataset
    subds = ds.create('subds')
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))
    # modify subds
    with open(opj(subds.path, "some_file.tst"), "w") as f:
        f.write("something")
    subds.add('.')
    ok_clean_git(subds.path, annex=isinstance(ds.repo, AnnexRepo))
    ok_(ds.repo.dirty)
    # ensure modified subds is committed
    ds.save(all_changes=True)
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))
Exemplo n.º 32
0
def test_bf1886(path):
    parent = Dataset(path).create()
    sub = parent.create('sub')
    ok_clean_git(parent.path)
    # create a symlink pointing down to the subdataset, and add it
    os.symlink('sub', opj(parent.path, 'down'))
    parent.add('down')
    ok_clean_git(parent.path)
    # now symlink pointing up
    os.makedirs(opj(parent.path, 'subdir', 'subsubdir'))
    os.symlink(opj(pardir, 'sub'), opj(parent.path, 'subdir', 'up'))
    parent.add(opj('subdir', 'up'))
    ok_clean_git(parent.path)
    # now symlink pointing 2xup, as in #1886
    os.symlink(opj(pardir, pardir, 'sub'), opj(parent.path, 'subdir', 'subsubdir', 'upup'))
    parent.add(opj('subdir', 'subsubdir', 'upup'))
    ok_clean_git(parent.path)
    # simulatenously add a subds and a symlink pointing to it
    # create subds, but don't register it
    sub2 = create(opj(parent.path, 'sub2'))
    os.symlink(
        opj(pardir, pardir, 'sub2'),
        opj(parent.path, 'subdir', 'subsubdir', 'upup2'))
    parent.add(['sub2', opj('subdir', 'subsubdir', 'upup2')])
    ok_clean_git(parent.path)
    # full replication of #1886: the above but be in subdir of symlink
    # with no reference dataset
    sub3 = create(opj(parent.path, 'sub3'))
    os.symlink(
        opj(pardir, pardir, 'sub3'),
        opj(parent.path, 'subdir', 'subsubdir', 'upup3'))
    # need to use absolute paths
    with chpwd(opj(parent.path, 'subdir', 'subsubdir')):
        add([opj(parent.path, 'sub3'),
             opj(parent.path, 'subdir', 'subsubdir', 'upup3')])
    # here is where we need to disagree with the repo in #1886
    # we would not expect that `add` registers sub3 as a subdataset
    # of parent, because no reference dataset was given and the
    # command cannot decide (with the current semantics) whether
    # it should "add anything in sub3 to sub3" or "add sub3 to whatever
    # sub3 is in"
    ok_clean_git(parent.path, untracked=['sub3/'])
Exemplo n.º 33
0
def test_procedure_discovery(path, super_path):
    ps = run_procedure(discover=True)
    # there are a few procedures coming with datalad, needs to find them
    assert_true(len(ps) > 2)
    # we get three essential properties
    eq_(
        sum([
            'procedure_type' in p and 'procedure_callfmt' in p and 'path' in p
            for p in ps
        ]), len(ps))

    # set up dataset with registered procedure (c&p from test_basics):
    ds = Dataset(path).create(force=True)
    # TODO: this procedure would leave a clean dataset, but `run` cannot handle dirty
    # input yet, so manual for now
    ds.add('code', to_git=True)
    ds.run_procedure('setup_yoda_dataset')
    ok_clean_git(ds.path)
    # configure dataset to look for procedures in its code folder
    ds.config.add('datalad.locations.dataset-procedures',
                  'code',
                  where='dataset')
    # configure dataset to run the demo procedure prior to the clean command
    ds.config.add('datalad.clean.proc-pre',
                  'datalad_test_proc',
                  where='dataset')
    ds.add(op.join('.datalad', 'config'))

    # run discovery on the dataset:
    ps = ds.run_procedure(discover=True)

    # still needs to find procedures coming with datalad
    assert_true(len(ps) > 2)
    # we get three essential properties
    eq_(
        sum([
            'procedure_type' in p and 'procedure_callfmt' in p and 'path' in p
            for p in ps
        ]), len(ps))
    # dataset's procedure needs to be in the results
    assert_in_results(ps,
                      path=op.join(ds.path, 'code', 'datalad_test_proc.py'))

    # make it a subdataset and try again:
    super = Dataset(super_path).create()
    super.install('sub', source=ds.path)

    ps = super.run_procedure(discover=True)
    # still needs to find procedures coming with datalad
    assert_true(len(ps) > 2)
    # we get three essential properties
    eq_(
        sum([
            'procedure_type' in p and 'procedure_callfmt' in p and 'path' in p
            for p in ps
        ]), len(ps))
    # dataset's procedure needs to be in the results
    assert_in_results(ps,
                      path=op.join(super.path, 'sub', 'code',
                                   'datalad_test_proc.py'))

    if not on_windows:  # no symlinks
        import os
        # create a procedure which is a broken symlink, but recognizable as a
        # python script:
        os.symlink(op.join(super.path, 'sub', 'not_existent'),
                   op.join(super.path, 'sub', 'code', 'broken_link_proc.py'))
        # broken symlink at procedure location, but we can't tell, whether it is
        # an actual procedure without any guess on how to execute it:
        os.symlink(op.join(super.path, 'sub', 'not_existent'),
                   op.join(super.path, 'sub', 'code', 'unknwon_broken_link'))

        ps = super.run_procedure(discover=True)
        # still needs to find procedures coming with datalad and the dataset
        # procedure registered before
        assert_true(len(ps) > 3)
        assert_in_results(ps,
                          path=op.join(super.path, 'sub', 'code',
                                       'broken_link_proc.py'),
                          state='absent')
        assert_not_in_results(ps,
                              path=op.join(super.path, 'sub', 'code',
                                           'unknwon_broken_link'))
Exemplo n.º 34
0
def test_uninstall_recursive(path):
    ds = Dataset(path).create(force=True)
    subds = ds.create('deep', force=True)
    # we add one file
    eq_(len(subds.add('.')), 1)
    # save all -> all clean
    ds.save(all_changes=True, recursive=True)
    ok_clean_git(subds.path)
    ok_clean_git(ds.path)
    # now uninstall in subdataset through superdataset
    target_fname = opj('deep', 'dir', 'test')
    # sane starting point
    ok_(exists(opj(ds.path, target_fname)))
    # doesn't have the minimum number of copies for a safe drop
    # TODO: better exception
    assert_raises(CommandError, ds.drop, target_fname, recursive=True)
    # this should do it
    ds.drop(target_fname, check=False, recursive=True)
    # link is dead
    lname = opj(ds.path, target_fname)
    ok_(not exists(lname))
    # entire hierarchy saved
    ok_clean_git(subds.path)
    ok_clean_git(ds.path)
    # now same with actual handle removal
    # content is dropped already, so no checks in place anyway
    ds.remove(target_fname, check=True, recursive=True)
    ok_(not exists(lname) and not lexists(lname))
    ok_clean_git(subds.path)
    ok_clean_git(ds.path)
Exemplo n.º 35
0
def test_get_modified_subpaths(path):
    ds = Dataset(path).create(force=True)
    suba = ds.create('ba', force=True)
    subb = ds.create('bb', force=True)
    subsub = ds.create(opj('bb', 'bba', 'bbaa'), force=True)
    ds.add('.', recursive=True)
    ok_clean_git(path)

    orig_base_commit = ds.repo.repo.commit().hexsha

    # nothing was modified compared to the status quo, output must be empty
    eq_([],
        list(get_modified_subpaths(
            [dict(path=ds.path)],
            ds, orig_base_commit)))

    # modify one subdataset
    create_tree(subsub.path, {'added': 'test'})
    subsub.add('added')

    # it will replace the requested path with the path of the closest
    # submodule that is modified
    assert_result_count(
        get_modified_subpaths(
            [dict(path=ds.path)],
            ds, orig_base_commit),
        1,
        type='dataset', path=subb.path)

    # make another one dirty
    create_tree(suba.path, {'added': 'test'})

    # now a single query path will result in the two modified subdatasets
    assert_result_count(
        get_modified_subpaths(
            [dict(path=ds.path)],
            ds, orig_base_commit),
        2,
        type='dataset')

    # now save uptop, this will the new state of subb, but keep suba dirty
    ds.save(subb.path, recursive=True)
    # now if we ask for what was last saved, we only get the new state of subb
    assert_result_count(
        get_modified_subpaths(
            [dict(path=ds.path)],
            ds,
            'HEAD~1..HEAD'),
        1,
        type='dataset', path=subb.path)
    # comparing the working tree to head will the dirty suba instead
    assert_result_count(
        get_modified_subpaths(
            [dict(path=ds.path)],
            ds,
            'HEAD'),
        1,
        type='dataset', path=suba.path)

    # add/save everything, become clean
    ds.add('.', recursive=True)
    ok_clean_git(path)
    # nothing is reported as modified
    assert_result_count(
        get_modified_subpaths(
            [dict(path=ds.path)],
            ds,
            'HEAD'),
        0)
    # but looking all the way back, we find all changes
    assert_result_count(
        get_modified_subpaths(
            [dict(path=ds.path)],
            ds,
            orig_base_commit),
        2,
        type='dataset')

    # now we ask specifically for the file we added to subsub above
    query = [dict(path=opj(subsub.path, 'added'))]
    res = list(get_modified_subpaths(query, ds, orig_base_commit))
    # we only get this one result back, and not all the submodule state changes
    # that were also saved in the superdatasets
    assert_result_count(res, 1)
    assert_result_count(
        res, 1, type='file', path=opj(subsub.path, 'added'), state='added')
    # but if we are only looking at the last saved change (suba), we will not
    # find our query return something
    res = get_modified_subpaths(query, ds, 'HEAD^')
    assert_result_count(res, 0)

    # deal with removal (force insufiicient copies error)
    ds.remove(suba.path, check=False)
    ok_clean_git(path)
    res = list(get_modified_subpaths([dict(path=ds.path)], ds, 'HEAD~1..HEAD'))
    # removed submodule + .gitmodules update
    assert_result_count(res, 2)
    assert_result_count(
        res, 1,
        type_src='dataset', path=suba.path)
Exemplo n.º 36
0
def test_annotate_paths(dspath, nodspath):
    # this test doesn't use API`remove` to avoid circularities
    ds = make_demo_hierarchy_datasets(dspath, demo_hierarchy)
    ds.add('.', recursive=True)
    ok_clean_git(ds.path)

    with chpwd(dspath):
        # with and without an explicitly given path the result is almost the
        # same inside a dataset
        without_path = annotate_paths(on_failure='ignore')
        pwd_res = annotate_paths(path='.', on_failure='ignore')
        assert_result_count(
            without_path, 1, type='dataset', path=dspath)
        assert_result_count(
            pwd_res, 1, type='dataset', path=dspath, orig_request='.',
            raw_input=True)
        # make sure going into a subdataset vs giving it as a path has no
        # structural impact
        eq_(
            [{k: v for k, v in ap.items()
              if k not in ('registered_subds', 'raw_input', 'orig_request', 'refds')}
             for ap in annotate_paths(path='b', recursive=True)],
            [{k: v for k, v in ap.items()
              if k not in ('registered_subds', 'raw_input', 'orig_request', 'refds')}
             for ap in annotate_paths(dataset='b', recursive=True)])
    # now do it again, pointing to the ds directly
    res = ds.annotate_paths(on_failure='ignore')
    # no request, no refds, but otherwise the same
    eq_(len(res), len(pwd_res))
    eq_({k: pwd_res[0][k] for k in pwd_res[0]
         if k in ('path', 'type', 'action', 'status')},
        {k: res[0][k] for k in res[0]
         if k not in ('refds',)})

    # will refuse a path that is not a dataset as refds
    res = annotate_paths(dataset=nodspath, on_failure='ignore')
    assert_result_count(
        res, 1, status='error', path=nodspath,
        message='given reference dataset is not a dataset')

    # recursion with proper base dataset
    parentds = Dataset(opj(dspath, 'a'))
    base_res = parentds.annotate_paths(recursive=True)
    # needs to find 'aa' and the base
    assert_result_count(base_res, 2)
    assert_result_count(base_res, 2, type='dataset')
    assert_result_count(
        base_res, 1, type='dataset', state='clean', parentds=parentds.path,
        path=opj(parentds.path, 'aa'), status='')
    # same recursion but without a base dataset
    res = annotate_paths(path=opj(dspath, 'a'), recursive=True)
    # needs to find 'aa' and 'a' again
    assert_result_count(res, 2)
    eq_(res[-1],
        {k: base_res[-1][k] for k in base_res[-1]
         if k not in ('refds',)})
    assert_result_count(
        res, 1, type='dataset', status='',
        # it does not auto-discover parent datasets without force or a refds
        #parentds=parentds.path,
        path=parentds.path)
    # but we can force parent discovery
    res = parentds.annotate_paths(
        path=opj(dspath, 'a'), recursive=True, force_parentds_discovery=True)
    assert_result_count(res, 2)
    assert_result_count(
        res, 1, type='dataset', status='', parentds=dspath,
        path=parentds.path)

    # recursion with multiple disjoint seeds, no common base
    eq_([basename(p) for p in annotate_paths(
         path=[opj(dspath, 'a'), opj(dspath, 'b', 'bb', 'bba')], recursive=True,
         result_xfm='paths')],
        ['a', 'aa', 'bba', 'bbaa'])

    # recursion with partially overlapping seeds, no duplicate results
    eq_([basename(p) for p in annotate_paths(
         path=[opj(dspath, 'b'), opj(dspath, 'b', 'bb', 'bba')], recursive=True,
         result_xfm='paths')],
        ['b', 'ba', 'bb', 'bba', 'bbaa'])

    # get straight from a file
    fpath = opj('a', 'aa', 'file_aa')
    res = ds.annotate_paths(fpath)
    assert_result_count(res, 1)
    assert_result_count(
        res, 1, orig_request=fpath, raw_input=True, type='file',
        path=opj(ds.path, fpath), parentds=opj(ds.path, 'a', 'aa'), status='')
    # now drop it
    dropres = ds.drop(fpath, check=False)
    assert_result_count(dropres, 1, path=res[0]['path'], status='ok')
    # ask for same file again, use 'notneeded' for unavailable to try trigger
    # any difference
    droppedres = ds.annotate_paths(fpath, unavailable_path_status='notneeded')
    # but we get the same result
    eq_(res, droppedres)

    # now try the same on an uninstalled dataset
    subdspath = opj('b', 'bb')
    # before
    before_res = ds.annotate_paths(subdspath, recursive=True,
                                   unavailable_path_status='error')
    assert_result_count(before_res, 3, status='', type='dataset')
    uninstall_res = ds.uninstall(subdspath, recursive=True, check=False)
    assert_result_count(uninstall_res, 3, status='ok', type='dataset')
    # after
    after_res = ds.annotate_paths(subdspath,
                                  unavailable_path_status='error',
                                  on_failure='ignore')
    # uninstall hides all low-level datasets
    assert_result_count(after_res, 1)
    # but for the top-most uninstalled one it merely reports absent state now
    assert_result_count(
        after_res, 1, state='absent',
        **{k: before_res[0][k] for k in before_res[0] if k not in ('state', 'status')})
    # however, this beauty doesn't come for free, so it can be disabled
    # which will make the uninstalled subdataset like a directory in the
    # parent (or even just a non-existing path, if the mountpoint dir isn't
    # present
    after_res = ds.annotate_paths(subdspath, force_subds_discovery=False)
    assert_result_count(
        after_res, 1, type='directory',
        path=before_res[0]['path'],
        parentds=before_res[0]['parentds'])
    # feed annotated paths into annotate_paths, it shouldn't change things
    # upon second run
    # datasets and file
    res = ds.annotate_paths(['.', fpath], recursive=True)
    # make a copy, just to the sure
    orig_res = deepcopy(res)
    assert_result_count(res, 7)
    # and in again, no recursion this time
    res_again = ds.annotate_paths(res)
    # doesn't change a thing
    eq_(orig_res, res_again)
    # and in again, with recursion this time
    res_recursion_again = ds.annotate_paths(res, recursive=True)
    assert_result_count(res_recursion_again, 7)
    # doesn't change a thing
    eq_(orig_res, res_recursion_again)
Exemplo n.º 37
0
def test_publish_simple(origin, src_path, dst_path):

    # prepare src
    source = install(path=src_path, source=origin, recursive=True)
    # TODO: For now, circumnavigate the detached head issue.
    # Figure out, what to do.
    for subds in source.get_dataset_handles(recursive=True):
        AnnexRepo(opj(src_path, subds), init=True,
                  create=True).git_checkout("master")
    # forget we cloned it (provide no 'origin' anymore), which should lead to
    # setting tracking branch to target:
    source.repo.git_remote_remove("origin")

    # create plain git at target:
    target = GitRepo(dst_path, create=True)
    target.git_checkout("TMP", "-b")
    source.repo.git_remote_add("target", dst_path)

    res = publish(dataset=source, dest="target")
    eq_(res, source)

    ok_clean_git(src_path, annex=False)
    ok_clean_git(dst_path, annex=False)
    eq_(list(target.git_get_branch_commits("master")),
        list(source.repo.git_get_branch_commits("master")))

    # don't fail when doing it again
    res = publish(dataset=source, dest="target")
    eq_(res, source)

    ok_clean_git(src_path, annex=False)
    ok_clean_git(dst_path, annex=False)
    eq_(list(target.git_get_branch_commits("master")),
        list(source.repo.git_get_branch_commits("master")))
    eq_(list(target.git_get_branch_commits("git-annex")),
        list(source.repo.git_get_branch_commits("git-annex")))

    # 'target/master' should be tracking branch at this point, so
    # try publishing without `dest`:

    # some modification:
    with open(opj(src_path, 'test_mod_file'), "w") as f:
        f.write("Some additional stuff.")
    source.repo.git_add(opj(src_path, 'test_mod_file'))
    source.repo.git_commit("Modified.")
    ok_clean_git(src_path, annex=False)

    res = publish(dataset=source)
    eq_(res, source)

    ok_clean_git(dst_path, annex=False)
    eq_(list(target.git_get_branch_commits("master")),
        list(source.repo.git_get_branch_commits("master")))
    eq_(list(target.git_get_branch_commits("git-annex")),
        list(source.repo.git_get_branch_commits("git-annex")))
Exemplo n.º 38
0
def test_diff_recursive(path):
    ds = Dataset(path).create()
    sub = ds.create('sub')
    # look at the last change, and confirm a dataset was added
    res = ds.diff(revision='HEAD~1..HEAD')
    assert_result_count(res,
                        1,
                        action='diff',
                        state='added',
                        path=sub.path,
                        type='dataset')
    # now recursive
    res = ds.diff(recursive=True, revision='HEAD~1..HEAD')
    # we also get the entire diff of the subdataset from scratch
    assert_status('ok', res)
    ok_(len(res) > 3)
    # one specific test
    assert_result_count(res,
                        1,
                        action='diff',
                        state='added',
                        path=opj(sub.path, '.datalad', 'config'))

    # now we add a file to just the parent
    create_tree(ds.path, {
        'onefile': 'tobeadded',
        'sub': {
            'twofile': 'tobeadded'
        }
    })
    res = ds.diff(recursive=True, report_untracked='all')
    assert_result_count(res, 3)
    assert_result_count(res,
                        1,
                        action='diff',
                        state='untracked',
                        path=opj(ds.path, 'onefile'),
                        type='file')
    assert_result_count(res,
                        1,
                        action='diff',
                        state='modified',
                        path=sub.path,
                        type='dataset')
    assert_result_count(res,
                        1,
                        action='diff',
                        state='untracked',
                        path=opj(sub.path, 'twofile'),
                        type='file')
    # save sub
    sub.add('.')
    # save sub in parent
    ds.save()
    # save addition in parent
    ds.add('.')
    ok_clean_git(ds.path)
    # look at the last change, only one file was added
    res = ds.diff(revision='HEAD~1..HEAD')
    assert_result_count(res, 1)
    assert_result_count(res,
                        1,
                        action='diff',
                        state='added',
                        path=opj(ds.path, 'onefile'),
                        type='file')

    # now the exact same thing with recursion, must not be different from the call
    # above
    res = ds.diff(recursive=True, revision='HEAD~1..HEAD')
    assert_result_count(res, 1)
    # last change in parent
    assert_result_count(res,
                        1,
                        action='diff',
                        state='added',
                        path=opj(ds.path, 'onefile'),
                        type='file')

    # one further back brings in the modified subdataset, and the added file within it
    res = ds.diff(recursive=True, revision='HEAD~2..HEAD')
    assert_result_count(res, 3)
    assert_result_count(res,
                        1,
                        action='diff',
                        state='added',
                        path=opj(ds.path, 'onefile'),
                        type='file')
    assert_result_count(res,
                        1,
                        action='diff',
                        state='added',
                        path=opj(sub.path, 'twofile'),
                        type='file')
    assert_result_count(res,
                        1,
                        action='diff',
                        state='modified',
                        path=sub.path,
                        type='dataset')
Exemplo n.º 39
0
def test_within_ds_file_search(path):
    try:
        import mutagen
    except ImportError:
        raise SkipTest
    ds = Dataset(path).create(force=True)
    # override default and search for datasets and files for this test
    for m in ('egrep', 'textblob', 'autofield'):
        ds.config.add('datalad.search.index-{}-documenttype'.format(m),
                      'all',
                      where='dataset')
    ds.config.add('datalad.metadata.nativetype', 'audio', where='dataset')
    makedirs(opj(path, 'stim'))
    for src, dst in (('audio.mp3', opj('stim', 'stim1.mp3')), ):
        copy(opj(dirname(dirname(__file__)), 'tests', 'data', src),
             opj(path, dst))
    ds.add('.')
    # yoh: CANNOT FIGURE IT OUT since in direct mode it gets added to git
    # directly BUT
    #  - output reports key, so seems to be added to annex!
    #  - when I do manually in cmdline - goes to annex
    ok_file_under_git(path, opj('stim', 'stim1.mp3'), annexed=True)
    # If it is not under annex, below addition of metadata silently does
    # not do anything
    list(
        ds.repo.set_metadata(opj('stim', 'stim1.mp3'),
                             init={'importance': 'very'}))
    ds.aggregate_metadata()
    ok_clean_git(ds.path)
    # basic sanity check on the metadata structure of the dataset
    dsmeta = ds.metadata('.', reporton='datasets')[0]['metadata']
    for src in ('audio', ):
        # something for each one
        assert_in(src, dsmeta)
        # each src declares its own context
        assert_in('@context', dsmeta[src])
        # we have a unique content metadata summary for each src
        assert_in(src, dsmeta['datalad_unique_content_properties'])

    # test default behavior
    with swallow_outputs() as cmo:
        ds.search(show_keys='name', mode='textblob')

        assert_in("""\
id
meta
parentds
path
type
""", cmo.out)

    target_out = """\
annex.importance
audio.bitrate
audio.duration(s)
audio.format
audio.music-Genre
audio.music-album
audio.music-artist
audio.music-channels
audio.music-sample_rate
audio.name
audio.tracknumber
datalad_core.id
datalad_core.refcommit
id
parentds
path
type
"""

    # check generated autofield index keys
    with swallow_outputs() as cmo:
        ds.search(mode='autofield', show_keys='name')
        # it is impossible to assess what is different from that dump
        assert_in(target_out, cmo.out)

    assert_result_count(ds.search('blablob#'), 0)
    # now check that we can discover things from the aggregated metadata
    for mode, query, hitpath, matched in (
        ('egrep', ':mp3', opj('stim', 'stim1.mp3'), {
            'audio.format': 'mp3'
        }),
            # same as above, leading : is stripped, in indicates "ALL FIELDS"
        ('egrep', 'mp3', opj('stim', 'stim1.mp3'), {
            'audio.format': 'mp3'
        }),
            # same as above, but with AND condition
            # get both matches
        ('egrep', ['mp3', 'type:file'], opj('stim', 'stim1.mp3'), {
            'type': 'file',
            'audio.format': 'mp3'
        }),
            # case insensitive search
        ('egrep', 'mp3', opj('stim', 'stim1.mp3'), {
            'audio.format': 'mp3'
        }),
            # field selection by expression
        ('egrep', 'audio\.+:mp3', opj('stim', 'stim1.mp3'), {
            'audio.format': 'mp3'
        }),
            # random keyword query
        ('textblob', 'mp3', opj('stim', 'stim1.mp3'), {
            'meta': 'mp3'
        }),
            # report which field matched with auto-field
        ('autofield', 'mp3', opj('stim', 'stim1.mp3'), {
            'audio.format': 'mp3'
        }),
            # XXX next one is not supported by current text field analyser
            # decomposes the mime type in [mime, audio, mp3]
            # ('autofield',
            # "'mime:audio/mp3'",
            # opj('stim', 'stim1.mp3'),
            # 'audio.format', 'mime:audio/mp3'),
            # but this one works
        ('autofield', "'mime audio mp3'", opj('stim', 'stim1.mp3'), {
            'audio.format': 'mp3'
        }),
            # TODO extend with more complex queries to test whoosh
            # query language configuration
    ):
        res = ds.search(query, mode=mode, full_record=True)
        assert_result_count(
            res,
            1,
            type='file',
            path=opj(ds.path, hitpath),
            # each file must report the ID of the dataset it is from, critical for
            # discovering related content
            dsid=ds.id)
        # in egrep we currently do not search unique values
        # and the queries above aim at files
        assert_result_count(res, 1 if mode == 'egrep' else 2)
        if mode != 'egrep':
            assert_result_count(res,
                                1,
                                type='dataset',
                                path=ds.path,
                                dsid=ds.id)
        # test the key and specific value of the match
        for matched_key, matched_val in matched.items():
            assert_in(matched_key, res[-1]['query_matched'])
            assert_equal(res[-1]['query_matched'][matched_key], matched_val)
Exemplo n.º 40
0
def test_update_simple(origin, src_path, dst_path):

    # prepare src
    source = install(src_path, source=origin, recursive=True)
    # forget we cloned it (provide no 'origin' anymore), which should lead to
    # setting tracking branch to target:
    source.repo.remove_remote("origin")

    # dataset without sibling will not need updates
    assert_status('notneeded', source.update())
    # deprecation message doesn't ruin things
    assert_status('notneeded', source.update(fetch_all=True))
    # but error if unknown sibling is given
    assert_status('impossible', source.update(sibling='funky', on_failure='ignore'))

    # get a clone to update later on:
    dest = install(dst_path, source=src_path, recursive=True)
    # test setup done;
    # assert all fine
    ok_clean_git(dst_path)
    ok_clean_git(src_path)

    # update yields nothing => up-to-date
    assert_status('ok', dest.update())
    ok_clean_git(dst_path)

    # modify origin:
    with open(opj(src_path, "update.txt"), "w") as f:
        f.write("Additional content")
    source.save(path="update.txt", message="Added update.txt")
    ok_clean_git(src_path)

    # fail when asked to update a non-dataset
    assert_status(
        'impossible',
        source.update("update.txt", on_failure='ignore'))
    # fail when asked to update a something non-existent
    assert_status(
        'impossible',
        source.update("nothere", on_failure='ignore'))

    # update without `merge` only fetches:
    assert_status('ok', dest.update())
    # modification is not known to active branch:
    assert_not_in("update.txt",
                  dest.repo.get_files(dest.repo.get_active_branch()))
    # modification is known to branch origin/master
    assert_in("update.txt", dest.repo.get_files("origin/master"))

    # merge:
    assert_status('ok', dest.update(merge=True))
    # modification is now known to active branch:
    assert_in("update.txt",
              dest.repo.get_files(dest.repo.get_active_branch()))
    # it's known to annex, but has no content yet:
    dest.repo.get_file_key("update.txt")  # raises if unknown
    eq_([False], dest.repo.file_has_content(["update.txt"]))

    # smoke-test if recursive update doesn't fail if submodule is removed
    # and that we can run it from within a dataset without providing it
    # explicitly
    assert_result_count(
        dest.remove('subm 1'), 1,
        status='ok', action='remove', path=opj(dest.path, 'subm 1'))
    with chpwd(dest.path):
        assert_result_count(
            update(recursive=True), 2,
            status='ok', type='dataset')
    assert_result_count(
        dest.update(merge=True, recursive=True), 2,
        status='ok', type='dataset')

    # and now test recursive update with merging in differences
    create_tree(opj(source.path, '2'), {'load.dat': 'heavy'})
    source.save(opj('2', 'load.dat'),
                message="saving changes within subm2",
                recursive=True)
    assert_result_count(
        dest.update(merge=True, recursive=True), 2,
        status='ok', type='dataset')
    # and now we can get new file
    dest.get('2/load.dat')
    ok_file_has_content(opj(dest.path, '2', 'load.dat'), 'heavy')
Exemplo n.º 41
0
def test_update_volatile_subds(originpath, otherpath, destpath):
    origin = Dataset(originpath).create()
    ds = install(
        source=originpath, path=destpath,
        result_xfm='datasets', return_type='item-or-list')
    # as a submodule
    sname = 'subm 1'
    osm1 = origin.create(sname)
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    # nothing without a merge, no inappropriate magic
    assert_not_in(sname, ds.subdatasets(result_xfm='relpaths'))
    assert_result_count(ds.update(merge=True), 1, status='ok', type='dataset')
    # and we should be able to do update with recursive invocation
    assert_result_count(ds.update(merge=True, recursive=True), 1, status='ok', type='dataset')
    # known, and placeholder exists
    assert_in(sname, ds.subdatasets(result_xfm='relpaths'))
    ok_(exists(opj(ds.path, sname)))

    # remove from origin
    origin.remove(sname)
    assert_result_count(ds.update(merge=True), 1, status='ok', type='dataset')
    # gone locally, wasn't checked out
    assert_not_in(sname, ds.subdatasets(result_xfm='relpaths'))
    assert_false(exists(opj(ds.path, sname)))

    # re-introduce at origin
    osm1 = origin.create(sname)
    create_tree(osm1.path, {'load.dat': 'heavy'})
    origin.save(opj(osm1.path, 'load.dat'))
    assert_result_count(ds.update(merge=True), 1, status='ok', type='dataset')
    # grab new content of uninstall subdataset, right away
    ds.get(opj(ds.path, sname, 'load.dat'))
    ok_file_has_content(opj(ds.path, sname, 'load.dat'), 'heavy')

    # modify ds and subds at origin
    create_tree(origin.path, {'mike': 'this', sname: {'probe': 'little'}})
    origin.save(recursive=True)
    ok_clean_git(origin.path)

    # updates for both datasets should come down the pipe
    assert_result_count(ds.update(merge=True, recursive=True),
                        2, status='ok', type='dataset')
    ok_clean_git(ds.path)

    # now remove just-installed subdataset from origin again
    origin.remove(sname, check=False)
    assert_not_in(sname, origin.subdatasets(result_xfm='relpaths'))
    assert_in(sname, ds.subdatasets(result_xfm='relpaths'))
    # merge should disconnect the installed subdataset, but leave the actual
    # ex-subdataset alone
    assert_result_count(ds.update(merge=True, recursive=True),
                        1, type='dataset')
    assert_not_in(sname, ds.subdatasets(result_xfm='relpaths'))
    ok_file_has_content(opj(ds.path, sname, 'load.dat'), 'heavy')
    ok_(Dataset(opj(ds.path, sname)).is_installed())

    # now remove the now disconnected subdataset for further tests
    # not using a bound method, not giving a parentds, should
    # not be needed to get a clean dataset
    remove(op.join(ds.path, sname), check=False)
    ok_clean_git(ds.path)

    # new separate subdataset, not within the origin dataset
    otherds = Dataset(otherpath).create()
    # install separate dataset as a submodule
    ds.install(source=otherds.path, path='other')
    create_tree(otherds.path, {'brand': 'new'})
    otherds.save()
    ok_clean_git(otherds.path)
    # pull in changes
    res = ds.update(merge=True, recursive=True)
    assert_result_count(
        res, 2, status='ok', action='update', type='dataset')
    # the next is to check for #2858
    ok_clean_git(ds.path)
Exemplo n.º 42
0
def test_gh3421(path):
    # failed to add d/sub:file
    ds = Dataset(path).create(force=True)
    ds.add('top:file')
    ds.add(opj('d', 'sub:file'))
    ok_clean_git(ds.path)
Exemplo n.º 43
0
def test_container_files(path, super_path):
    raise SkipTest('SingularityHub is gone for now')
    ds = Dataset(path).create()
    cmd = ['dir'] if on_windows else ['ls']

    # plug in a proper singularity image
    ds.containers_add(
        'mycontainer',
        url=testimg_url,
        image='righthere',
        # the next one is auto-guessed
        #call_fmt='singularity exec {img} {cmd}'
    )
    assert_result_count(ds.containers_list(),
                        1,
                        path=op.join(ds.path, 'righthere'),
                        name='mycontainer')
    ok_clean_git(path)

    def assert_no_change(res, path):
        # this command changed nothing
        #
        # Avoid specifying the action because it will change from "add" to
        # "save" in DataLad v0.12.
        assert_result_count(res,
                            1,
                            status='notneeded',
                            path=path,
                            type='dataset')

    # now we can run stuff in the container
    # and because there is just one, we don't even have to name the container
    res = ds.containers_run(cmd)
    # container becomes an 'input' for `run` -> get request, but "notneeded"
    assert_result_count(res,
                        1,
                        action='get',
                        status='notneeded',
                        path=op.join(ds.path, 'righthere'),
                        type='file')
    assert_no_change(res, ds.path)

    # same thing as we specify the container by its name:
    res = ds.containers_run(cmd, container_name='mycontainer')
    # container becomes an 'input' for `run` -> get request, but "notneeded"
    assert_result_count(res,
                        1,
                        action='get',
                        status='notneeded',
                        path=op.join(ds.path, 'righthere'),
                        type='file')
    assert_no_change(res, ds.path)

    # we can also specify the container by its path:
    res = ds.containers_run(cmd, container_name=op.join(ds.path, 'righthere'))
    # container becomes an 'input' for `run` -> get request, but "notneeded"
    assert_result_count(res,
                        1,
                        action='get',
                        status='notneeded',
                        path=op.join(ds.path, 'righthere'),
                        type='file')
    assert_no_change(res, ds.path)

    # Now, test the same thing, but with this dataset being a subdataset of
    # another one:

    super_ds = Dataset(super_path).create()
    super_ds.install("sub", source=path)

    # When running, we don't discover containers in subdatasets
    with assert_raises(ValueError) as cm:
        super_ds.containers_run(cmd)
    assert_in("No known containers", text_type(cm.exception))
    # ... unless we need to specify the name
    res = super_ds.containers_run(cmd, container_name="sub/mycontainer")
    # container becomes an 'input' for `run` -> get request (needed this time)
    assert_result_count(res,
                        1,
                        action='get',
                        status='ok',
                        path=op.join(super_ds.path, 'sub', 'righthere'),
                        type='file')
    assert_no_change(res, super_ds.path)
Exemplo n.º 44
0
def test_get_recurse_subdatasets(src, path):

    ds = install(path,
                 source=src,
                 result_xfm='datasets',
                 return_type='item-or-list')

    # ask for the two subdatasets specifically. This will obtain them,
    # but not any content of any files in them
    subds1, subds2 = ds.get(['subm 1', '2'],
                            get_data=False,
                            description="youcouldnotmakethisup",
                            result_xfm='datasets')
    for d in (subds1, subds2):
        eq_(d.repo.get_description(), 'youcouldnotmakethisup')

    # there are 3 files to get: test-annex.dat within each dataset:
    rel_path_sub1 = opj(basename(subds1.path), 'test-annex.dat')
    rel_path_sub2 = opj(basename(subds2.path), 'test-annex.dat')
    annexed_files = {'test-annex.dat', rel_path_sub1, rel_path_sub2}

    # None of them is currently present:
    ok_(ds.repo.file_has_content('test-annex.dat') is False)
    ok_(subds1.repo.file_has_content('test-annex.dat') is False)
    ok_(subds2.repo.file_has_content('test-annex.dat') is False)

    ok_clean_git(subds1.path)
    # explicitly given path in subdataset => implicit recursion:
    # MIH: Nope, we fulfill the dataset handle, but that doesn't
    #      imply fulfilling all file handles
    result = ds.get(rel_path_sub1, recursive=True)
    # all good actions
    assert_status('ok', result)

    assert_in_results(result, path=opj(ds.path, rel_path_sub1), status='ok')
    ok_(subds1.repo.file_has_content('test-annex.dat') is True)

    # drop it:
    subds1.repo.drop('test-annex.dat')
    ok_(subds1.repo.file_has_content('test-annex.dat') is False)

    # now, with a path not explicitly pointing within a
    # subdataset, but recursive option:
    # get everything:
    result = ds.get(recursive=True,
                    result_filter=lambda x: x.get('type') != 'dataset')
    assert_status('ok', result)

    eq_(
        set([
            item.get('path')[len(ds.path) + 1:] for item in result
            if item['type'] == 'file'
        ]), annexed_files)
    ok_(ds.repo.file_has_content('test-annex.dat') is True)
    ok_(subds1.repo.file_has_content('test-annex.dat') is True)
    ok_(subds2.repo.file_has_content('test-annex.dat') is True)

    # drop them:
    ds.repo.drop('test-annex.dat')
    subds1.repo.drop('test-annex.dat')
    subds2.repo.drop('test-annex.dat')
    ok_(ds.repo.file_has_content('test-annex.dat') is False)
    ok_(subds1.repo.file_has_content('test-annex.dat') is False)
    ok_(subds2.repo.file_has_content('test-annex.dat') is False)

    # now, the very same call, but without recursive:
    result = ds.get('.', recursive=False)
    assert_status('ok', result)
    # one report is on the requested dir
    eq_(len(result) - 1, 1)
    assert_result_count(result,
                        1,
                        path=opj(ds.path, 'test-annex.dat'),
                        status='ok')
    ok_(ds.repo.file_has_content('test-annex.dat') is True)
    ok_(subds1.repo.file_has_content('test-annex.dat') is False)
    ok_(subds2.repo.file_has_content('test-annex.dat') is False)
Exemplo n.º 45
0
 def get_baseline(p):
     ds = Dataset(p).create()
     sub = ds.create('sub', save=False)
     # subdataset saw another commit after becoming a submodule
     ok_clean_git(ds.path, index_modified=['sub'])
     return ds
Exemplo n.º 46
0
def test_diff(path, norepo):
    with chpwd(norepo):
        assert_status('impossible', diff(on_failure='ignore'))
    ds = Dataset(path).create()
    ok_clean_git(ds.path)
    # reports stupid revision input
    assert_result_count(ds.diff(revision='WTF', on_failure='ignore'),
                        1,
                        status='impossible',
                        message="fatal: bad revision 'WTF'")
    assert_result_count(ds.diff(), 0)
    # no diff
    assert_result_count(ds.diff(), 0)
    assert_result_count(ds.diff(revision='HEAD'), 0)
    # bogus path makes no difference
    assert_result_count(ds.diff(path='THIS', revision='HEAD'), 0)
    # comparing to a previous state we should get a diff in most cases
    # for this test, let's not care what exactly it is -- will do later
    assert len(ds.diff(revision='HEAD~1')) > 0
    # let's introduce a known change
    create_tree(ds.path, {'new': 'empty'})
    ds.add('.', to_git=True)
    ok_clean_git(ds.path)
    res = ds.diff(revision='HEAD~1')
    assert_result_count(res, 1)
    assert_result_count(res,
                        1,
                        action='diff',
                        path=opj(ds.path, 'new'),
                        state='added')
    # we can also find the diff without going through the dataset explicitly
    with chpwd(ds.path):
        assert_result_count(diff(revision='HEAD~1'),
                            1,
                            action='diff',
                            path=opj(ds.path, 'new'),
                            state='added')
    # no diff against HEAD
    assert_result_count(ds.diff(), 0)
    # modify known file
    create_tree(ds.path, {'new': 'notempty'})
    for diffy in (None, 'HEAD'):
        res = ds.diff(revision=diffy)
        assert_result_count(res, 1)
        assert_result_count(res,
                            1,
                            action='diff',
                            path=opj(ds.path, 'new'),
                            state='modified')
    # but if we give another path, it doesn't show up
    assert_result_count(ds.diff('otherpath'), 0)
    # giving the right path must work though
    assert_result_count(ds.diff('new'),
                        1,
                        action='diff',
                        path=opj(ds.path, 'new'),
                        state='modified')
    # stage changes
    ds.add('.', to_git=True, save=False)
    # no diff, because we staged the modification
    assert_result_count(ds.diff(), 0)
    # but we can get at it
    assert_result_count(ds.diff(staged=True),
                        1,
                        action='diff',
                        path=opj(ds.path, 'new'),
                        state='modified')
    # OR
    assert_result_count(ds.diff(revision='HEAD'),
                        1,
                        action='diff',
                        path=opj(ds.path, 'new'),
                        state='modified')
    ds.save()
    ok_clean_git(ds.path)

    # untracked stuff
    create_tree(ds.path, {'deep': {'down': 'untracked', 'down2': 'tobeadded'}})
    # a plain diff should report the untracked file
    # but not directly, because the parent dir is already unknown
    res = ds.diff()
    assert_result_count(res, 1)
    assert_result_count(res,
                        1,
                        state='untracked',
                        type='directory',
                        path=opj(ds.path, 'deep'))
    # report of individual files is also possible
    assert_result_count(ds.diff(report_untracked='all'),
                        2,
                        state='untracked',
                        type='file')
    # an unmatching path will hide this result
    assert_result_count(ds.diff(path='somewhere'), 0)
    # perfect match and anything underneath will do
    assert_result_count(ds.diff(path='deep'),
                        1,
                        state='untracked',
                        path=opj(ds.path, 'deep'),
                        type='directory')
    assert_result_count(ds.diff(path='deep'),
                        1,
                        state='untracked',
                        path=opj(ds.path, 'deep'))
    # now we stage on of the two files in deep
    ds.add(opj('deep', 'down2'), to_git=True, save=False)
    # without any reference it will ignore the staged stuff and report the remaining
    # untracked file
    assert_result_count(ds.diff(),
                        1,
                        state='untracked',
                        path=opj(ds.path, 'deep', 'down'),
                        type='file')
    res = ds.diff(staged=True)
    assert_result_count(res,
                        1,
                        state='untracked',
                        path=opj(ds.path, 'deep', 'down'),
                        type='file')
    assert_result_count(res,
                        1,
                        state='added',
                        path=opj(ds.path, 'deep', 'down2'),
                        type='file')
Exemplo n.º 47
0
def test_exif(path):
    ds = Dataset(path).create()
    ds.config.add('datalad.metadata.nativetype', 'exif', where='dataset')
    copy(opj(dirname(dirname(dirname(__file__))), 'tests', 'data', 'exif.jpg'),
         path)
    ds.add('.')
    ok_clean_git(ds.path)
    res = ds.aggregate_metadata()
    assert_status('ok', res)
    res = ds.metadata('exif.jpg')
    assert_result_count(res, 1)
    # compare full expected metadata set to catch any change of mind on the
    # side of the EXIF library
    assert_result_count(
        res,
        1,
        metadata={
            "exif:InteroperabilityVersion": "[48, 49, 48, 48]",
            "exif:ExifVersion": 221.0,
            "exif:FocalLengthIn35mmFilm": 38.0,
            "exif:CompressedBitsPerPixel": 5.0,
            "exif:GainControl": "None",
            "exif:Compression": "JPEG (old-style)",
            "exif:PrintIM":
            "[80, 114, 105, 110, 116, 73, 77, 0, 48, 51, 48, 48, 0, 0, 0, 5, 0, 1, 0, 22, 0, 22, 0, 2, 1, 0, 0, 0, 1, 0, 5, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 16, 131, 0, 0, 0]",
            "exif:Make": "CASIO COMPUTER CO.,LTD.",
            "exif:Sharpness": "Normal",
            "exif:Contrast": "Normal",
            "exif:ColorSpace": "sRGB",
            "exif:ExposureMode": "Auto Exposure",
            "exif:ExposureBiasValue": 0.0,
            "exif:ExifImageWidth": 4.0,
            "exif:ComponentsConfiguration": "YCbCr",
            "exif:DateTimeOriginal": "2011:03:13 16:36:02",
            "exif:MaxApertureValue": "14/5",
            "exif:DateTime": "2017:10:08 10:21:03",
            "exif:InteroperabilityOffset": 30412.0,
            "exif:InteroperabilityIndex": "R98",
            "exif:FileSource": "Digital Camera",
            "exif:ResolutionUnit": "Pixels/Inch",
            "exif:FNumber": "27/10",
            "exif:ExposureProgram": "Program Normal",
            "exif:DigitalZoomRatio": "0/0",
            "exif:LightSource": "Unknown",
            "exif:ExifImageLength": 3.0,
            "exif:FlashPixVersion": 100.0,
            "exif:CustomRendered": "Normal",
            "exif:Flash": "Flash fired, auto mode",
            "exif:WhiteBalance": "Auto",
            "exif:Orientation": "Horizontal (normal)",
            "exif:ExposureTime": "1/60",
            "exif:Software": "GIMP 2.8.20",
            "exif:Model": "EX-S600",
            "exif:FocalLength": "31/5",
            "exif:SceneCaptureType": "Standard",
            "exif:ExifOffset": 272.0,
            "exif:Saturation": "Normal",
            "exif:YCbCrPositioning": "Centered",
            "exif:DateTimeDigitized": "2011:03:13 16:36:02",
            "exif:XResolution": 72.0,
            "exif:YResolution": 72.0,
            "exif:MeteringMode": "Pattern",
        })
Exemplo n.º 48
0
def test_within_ds_file_search(path):
    try:
        import nibabel
    except ImportError:
        raise SkipTest
    ds = Dataset(path).create(force=True)
    ds.config.add('datalad.metadata.nativetype', 'nifti1', where='dataset')
    makedirs(opj(path, 'stim'))
    for src, dst in (('nifti1.nii.gz',
                      opj('sub-01', 'func', 'sub-01_task-some_bold.nii.gz')),
                     ('nifti1.nii.gz',
                      opj('sub-03', 'func', 'sub-03_task-other_bold.nii.gz'))):
        copy(opj(dirname(dirname(__file__)), 'tests', 'data', 'files', src),
             opj(path, dst))
    ds.save()
    ds.aggregate_metadata()
    ok_clean_git(ds.path)
    # basic sanity check on the metadata structure of the dataset
    dsmeta = ds.metadata('.', reporton='datasets')[0]['metadata']
    for src in ('bids', 'nifti1'):
        # something for each one
        assert_in(src, dsmeta)
        # each src declares its own context
        assert_in('@context', dsmeta[src])
        # we have a unique content metadata summary for each src
        assert_in(src, dsmeta['datalad_unique_content_properties'])

    # test default behavior
    with swallow_outputs() as cmo:
        ds.search(show_keys='name', mode='textblob')

        assert_in("""\
id
meta
parentds
path
type
""", cmo.out)

    target_out = """\
annex.key
bids.BIDSVersion
bids.author
bids.citation
bids.conformsto
bids.datatype
bids.description
"""
    if external_versions['bids'] >= '0.9':
        target_out += "bids.extension\n"
    target_out += """\
bids.fundedby
bids.license
bids.name
bids.subject.age(years)
bids.subject.gender
bids.subject.handedness
bids.subject.hearing_problems_current
bids.subject.id
bids.subject.language
bids.suffix
bids.task
datalad_core.id
datalad_core.refcommit
id
nifti1.cal_max
nifti1.cal_min
nifti1.datatype
nifti1.description
nifti1.dim
nifti1.freq_axis
nifti1.intent
nifti1.magic
nifti1.phase_axis
nifti1.pixdim
nifti1.qform_code
nifti1.sform_code
nifti1.sizeof_hdr
nifti1.slice_axis
nifti1.slice_duration
nifti1.slice_end
nifti1.slice_order
nifti1.slice_start
nifti1.spatial_resolution(mm)
nifti1.t_unit
nifti1.temporal_spacing(s)
nifti1.toffset
nifti1.vox_offset
nifti1.xyz_unit
parentds
path
type
"""

    # check generated autofield index keys
    with swallow_outputs() as cmo:
        ds.search(mode='autofield', show_keys='name')
        # it is impossible to assess what is different from that dump
        # so we will use diff
        diff = list(unified_diff(target_out.splitlines(),
                                 cmo.out.splitlines()))
        assert_in(target_out, cmo.out, msg="Diff: %s" % os.linesep.join(diff))

    assert_result_count(ds.search('blablob#'), 0)
    # now check that we can discover things from the aggregated metadata
    for mode, query, hitpath, matched_key, matched_val in (
            # random keyword query
            # multi word query implies AND
        ('textblob', ['bold', 'female'],
         opj('sub-03', 'func',
             'sub-03_task-some_bold.nii.gz'), 'meta', 'female'),
            # report which field matched with auto-field
        ('autofield', 'female',
         opj('sub-03', 'func', 'sub-03_task-other_bold.nii.gz'),
         'bids.subject.gender', 'female'),
            # autofield multi-word query is also AND
        ('autofield', ['bids.suffix:bold', 'bids.subject.id:01'],
         opj('sub-01', 'func',
             'sub-01_task-some_bold.nii.gz'), 'bids.suffix', 'bold'),
            # TODO extend with more complex queries to test whoosh
            # query language configuration
    ):
        res = ds.search(query, mode=mode, full_record=True)
        if mode == 'textblob':
            # 'textblob' does datasets by default only (be could be configured otherwise
            assert_result_count(res, 1)
        else:
            # the rest has always a file and the dataset, because they carry metadata in
            # the same structure
            assert_result_count(res, 2)
            assert_result_count(
                res,
                1,
                type='file',
                path=opj(ds.path, hitpath),
                # each file must report the ID of the dataset it is from, critical for
                # discovering related content
                dsid=ds.id)
        assert_result_count(res, 1, type='dataset', path=ds.path, dsid=ds.id)
        # test the key and specific value of the match
        assert_in(matched_key, res[-1]['query_matched'])
        assert_equal(res[-1]['query_matched'][matched_key], matched_val)
Exemplo n.º 49
0
def test_install_into_dataset(source, top_path):

    ds = create(top_path)
    ok_clean_git(ds.path)

    subds = ds.install("sub", source=source, save=False)
    if isinstance(subds.repo, AnnexRepo) and subds.repo.is_direct_mode():
        ok_(exists(opj(subds.path, '.git')))
    else:
        ok_(isdir(opj(subds.path, '.git')))
    ok_(subds.is_installed())
    assert_in('sub', ds.get_subdatasets())
    # sub is clean:
    ok_clean_git(subds.path, annex=False)
    # top is not:
    assert_raises(AssertionError, ok_clean_git, ds.path, annex=False)
    ds.save('addsub')
    # now it is:
    ok_clean_git(ds.path, annex=False)

    # but we could also save while installing and there should be no side-effect
    # of saving any other changes if we state to not auto-save changes
    # Create a dummy change
    create_tree(ds.path, {'dummy.txt': 'buga'})
    ok_clean_git(ds.path, untracked=['dummy.txt'])
    subds_ = ds.install("sub2", source=source, if_dirty='ignore')
    eq_(subds_.path, opj(ds.path, "sub2"))  # for paranoid yoh ;)
    ok_clean_git(ds.path, untracked=['dummy.txt'])

    # and we should achieve the same behavior if we create a dataset
    # and then decide to "add" it
    create(_path_(top_path, 'sub3'), if_dirty='ignore')
    ok_clean_git(ds.path, untracked=['dummy.txt', 'sub3/'])
    ds.install('sub3', if_dirty='ignore')
    ok_clean_git(ds.path, untracked=['dummy.txt'])
Exemplo n.º 50
0
def test_aggregation(path):
    with chpwd(path):
        assert_raises(InsufficientArgumentsError, aggregate_metadata, None)
    # a hierarchy of three (super/sub)datasets, each with some native metadata
    ds = Dataset(opj(path, 'origin')).create(force=True)
    # before anything aggregated we would get nothing and only a log warning
    with swallow_logs(new_level=logging.WARNING) as cml:
        assert_equal(list(query_aggregated_metadata('all', ds, [])), [])
    assert_re_in('.*Found no aggregated metadata.*update', cml.out)
    ds.config.add('datalad.metadata.nativetype',
                  'frictionless_datapackage',
                  where='dataset')
    subds = ds.create('sub', force=True)
    subds.config.add('datalad.metadata.nativetype',
                     'frictionless_datapackage',
                     where='dataset')
    subsubds = subds.create('subsub', force=True)
    subsubds.config.add('datalad.metadata.nativetype',
                        'frictionless_datapackage',
                        where='dataset')
    ds.add('.', recursive=True)
    ok_clean_git(ds.path)
    # aggregate metadata from all subdatasets into any superdataset, including
    # intermediate ones
    res = ds.aggregate_metadata(recursive=True, update_mode='all')
    # we get success report for both subdatasets and the superdataset,
    # and they get saved
    assert_result_count(res, 6)
    assert_result_count(res, 3, status='ok', action='aggregate_metadata')
    assert_result_count(res, 3, status='ok', action='save')
    # nice and tidy
    ok_clean_git(ds.path)

    # quick test of aggregate report
    aggs = ds.metadata(get_aggregates=True)
    # one for each dataset
    assert_result_count(aggs, 3)
    # mother also report layout version
    assert_result_count(aggs, 1, path=ds.path, layout_version=1)

    # store clean direct result
    origres = ds.metadata(recursive=True)
    # basic sanity check
    assert_result_count(origres, 6)
    assert_result_count(origres, 3, type='dataset')
    assert_result_count(origres, 3, type='file')  # Now that we have annex.key
    # three different IDs
    assert_equal(
        3, len(set([s['dsid'] for s in origres if s['type'] == 'dataset'])))
    # and we know about all three datasets
    for name in ('MOTHER_äöü東', 'child_äöü東', 'grandchild_äöü東'):
        assert_true(
            sum([s['metadata']['frictionless_datapackage']['name'] \
                    == assure_unicode(name) for s in origres
                 if s['type'] == 'dataset']))

    # now clone the beast to simulate a new user installing an empty dataset
    clone = install(opj(path, 'clone'),
                    source=ds.path,
                    result_xfm='datasets',
                    return_type='item-or-list')
    # ID mechanism works
    assert_equal(ds.id, clone.id)

    # get fresh metadata
    cloneres = clone.metadata()
    # basic sanity check
    assert_result_count(cloneres, 2)
    assert_result_count(cloneres, 1, type='dataset')
    assert_result_count(cloneres, 1, type='file')

    # now loop over the previous results from the direct metadata query of
    # origin and make sure we get the extact same stuff from the clone
    _compare_metadata_helper(origres, clone)

    # now obtain a subdataset in the clone, should make no difference
    assert_status('ok',
                  clone.install('sub', result_xfm=None, return_type='list'))
    _compare_metadata_helper(origres, clone)

    # test search in search tests, not all over the place
    ## query smoke test
    assert_result_count(clone.search('mother', mode='egrep'), 1)
    assert_result_count(clone.search('(?i)MoTHER', mode='egrep'), 1)

    child_res = clone.search('child', mode='egrep')
    assert_result_count(child_res, 2)
    for r in child_res:
        if r['type'] == 'dataset':
            assert_in(r['query_matched']['frictionless_datapackage.name'],
                      r['metadata']['frictionless_datapackage']['name'])
Exemplo n.º 51
0
def test_install_dataset_from(url, path):
    ds = install(path=path, source=url)
    eq_(ds.path, path)
    ok_(ds.is_installed())
    ok_clean_git(path, annex=False)
Exemplo n.º 52
0
def test_bf1886(path):
    parent = Dataset(path).create()
    sub = parent.create('sub')
    ok_clean_git(parent.path)
    # create a symlink pointing down to the subdataset, and add it
    os.symlink('sub', opj(parent.path, 'down'))
    parent.add('down')
    ok_clean_git(parent.path)
    # now symlink pointing up
    os.makedirs(opj(parent.path, 'subdir', 'subsubdir'))
    os.symlink(opj(pardir, 'sub'), opj(parent.path, 'subdir', 'up'))
    parent.add(opj('subdir', 'up'))
    ok_clean_git(parent.path)
    # now symlink pointing 2xup, as in #1886
    os.symlink(opj(pardir, pardir, 'sub'), opj(parent.path, 'subdir', 'subsubdir', 'upup'))
    parent.add(opj('subdir', 'subsubdir', 'upup'))
    ok_clean_git(parent.path)
    # simulatenously add a subds and a symlink pointing to it
    # create subds, but don't register it
    sub2 = create(opj(parent.path, 'sub2'))
    os.symlink(
        opj(pardir, pardir, 'sub2'),
        opj(parent.path, 'subdir', 'subsubdir', 'upup2'))
    parent.add(['sub2', opj('subdir', 'subsubdir', 'upup2')])
    ok_clean_git(parent.path)
    # full replication of #1886: the above but be in subdir of symlink
    # with no reference dataset
    sub3 = create(opj(parent.path, 'sub3'))
    os.symlink(
        opj(pardir, pardir, 'sub3'),
        opj(parent.path, 'subdir', 'subsubdir', 'upup3'))
    # need to use absolute paths
    with chpwd(opj(parent.path, 'subdir', 'subsubdir')):
        rev_save([opj(parent.path, 'sub3'),
                  opj(parent.path, 'subdir', 'subsubdir', 'upup3')])
    # in contrast to `add` only operates on a single top-level dataset
    # although it is not specified, it get's discovered based on the PWD
    # the logic behind that feels a bit shaky
    # consult discussion in https://github.com/datalad/datalad/issues/3230
    # if this comes up as an issue at some point
    ok_clean_git(parent.path)
Exemplo n.º 53
0
def test_configs(path):

    # set up dataset with registered procedure (c&p from test_basics):
    ds = Dataset(path).create(force=True)
    # TODO: this procedure would leave a clean dataset, but `run` cannot handle dirty
    # input yet, so manual for now
    ds.add('code', to_git=True)
    ds.run_procedure('setup_yoda_dataset')
    ok_clean_git(ds.path)
    # configure dataset to look for procedures in its code folder
    ds.config.add('datalad.locations.dataset-procedures',
                  'code',
                  where='dataset')

    # 1. run procedure based on execution guessing by run_procedure:
    ds.run_procedure(spec=['datalad_test_proc', 'some_arg'])
    # look for traces
    ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'some_arg\n')

    # 2. now configure specific call format including usage of substitution config
    # for run:
    ds.config.add('datalad.procedures.datalad_test_proc.call-format',
                  'python "{script}" "{ds}" {{mysub}} {args}',
                  where='dataset')
    ds.config.add('datalad.run.substitutions.mysub',
                  'dataset-call-config',
                  where='dataset')
    # TODO: Should we allow for --inputs/--outputs arguments for run_procedure
    #       (to be passed into run)?
    ds.unlock("fromproc.txt")
    # run again:
    ds.run_procedure(spec=['datalad_test_proc', 'some_arg'])
    # look for traces
    ok_file_has_content(op.join(ds.path, 'fromproc.txt'),
                        'dataset-call-config\n')

    # 3. have a conflicting config at user-level, which should override the
    # config on dataset level:
    ds.config.add('datalad.procedures.datalad_test_proc.call-format',
                  'python "{script}" "{ds}" local {args}',
                  where='local')
    ds.unlock("fromproc.txt")
    # run again:
    ds.run_procedure(spec=['datalad_test_proc', 'some_arg'])
    # look for traces
    ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'local\n')

    # 4. get configured help message:
    r = ds.run_procedure('datalad_test_proc',
                         help_proc=True,
                         on_failure='ignore')
    assert_true(len(r) == 1)
    assert_in_results(r, status="impossible")

    ds.config.add('datalad.procedures.datalad_test_proc.help',
                  "This is a help message",
                  where='dataset')

    r = ds.run_procedure('datalad_test_proc', help_proc=True)
    assert_true(len(r) == 1)
    assert_in_results(r, message="This is a help message", status='ok')
Exemplo n.º 54
0
def test_subdataset_save(path):
    parent = Dataset(path).create()
    sub = parent.create('sub')
    ok_clean_git(parent.path)
    create_tree(parent.path, {
        "untracked": 'ignore',
        'sub': {
            "new": "wanted"}})
    sub.add('new')
    # defined state: one untracked, modified (but clean in itself) subdataset
    ok_clean_git(sub.path)
    ok_clean_git(parent.path, untracked=['untracked'], index_modified=['sub'])

    # `save sub` does not save the parent!!
    with chpwd(parent.path):
        assert_status('notneeded', save(path=sub.path))
    ok_clean_git(parent.path, untracked=['untracked'], index_modified=['sub'])
    # `save -d .` saves the state change in the subdataset, but leaves any untracked
    # content alone
    with chpwd(parent.path):
        assert_status('ok', parent._save())
    ok_clean_git(parent.path, untracked=['untracked'])

    # get back to the original modified state and check that -S behaves in
    # exactly the same way
    create_tree(parent.path, {
        'sub': {
            "new2": "wanted2"}})
    sub.add('new2')
    ok_clean_git(parent.path, untracked=['untracked'], index_modified=['sub'])
    with chpwd(parent.path):
        assert_status(
            # notneeded to save sub, but need to save parent
            ['ok', 'notneeded'],
            # the key condition of this test is that no reference dataset is
            # given!
            save(path='sub', super_datasets=True))
    # save super must not cause untracked content to be commited!
    ok_clean_git(parent.path, untracked=['untracked'])
Exemplo n.º 55
0
def test_uninstall_recursive(path):
    ds = Dataset(path).create(force=True)
    subds = ds.create('deep', force=True)
    # we add one file, but we get a response for the requested
    # directory too
    res = subds.add('.')
    assert_result_count(res, 1, action='add', status='ok', type='file')
    assert_result_count(res, 1, action='save', status='ok', type='dataset')
    # save all -> all clean
    ds.save(recursive=True)
    ok_clean_git(subds.path)
    ok_clean_git(ds.path)
    # now uninstall in subdataset through superdataset
    target_fname = opj('deep', 'dir', 'test')
    # sane starting point
    ok_(exists(opj(ds.path, target_fname)))
    # doesn't have the minimum number of copies for a safe drop
    res = ds.drop(target_fname, recursive=True, on_failure='ignore')
    assert_status('error', res)
    assert_result_values_cond(
        res, 'message',
        lambda x: "configured minimum number of copies not found" in x or
        "Could only verify the existence of 0 out of 1 necessary copies" in x)

    # this should do it
    ds.drop(target_fname, check=False, recursive=True)
    # link is dead
    lname = opj(ds.path, target_fname)
    ok_(not exists(lname))
    # entire hierarchy saved
    ok_clean_git(subds.path)
    ok_clean_git(ds.path)
    # now same with actual handle removal
    # content is dropped already, so no checks in place anyway
    ds.remove(target_fname, check=True, recursive=True)
    ok_(not exists(lname) and not lexists(lname))
    ok_clean_git(subds.path)
    ok_clean_git(ds.path)
Exemplo n.º 56
0
def test_recursive_save(path):
    ds = Dataset(path).create()
    # nothing to save
    assert_status('notneeded', ds._save())
    subds = ds.create('sub')
    # subdataset presence already saved
    ok_clean_git(ds.path)
    subsubds = subds.create('subsub')
    assert_equal(
        ds.subdatasets(recursive=True, fulfilled=True, result_xfm='paths'),
        [subds.path, subsubds.path])
    newfile_name = opj(subsubds.path, 'test')
    with open(newfile_name, 'w') as f:
        f.write('some')
    # saves the status change of the subdataset due to the subsubdataset addition
    assert_result_values_equal(
        ds._save(result_filter=is_ok_dataset),
        'path',
        [ds.path])

    # make the new file known to its dataset
    ds.add(newfile_name, save=False)

    # but remains dirty because of the uncommited file down below
    assert ds.repo.dirty
    # auto-add will save nothing deep down without recursive
    assert_status('notneeded', ds._save())
    assert ds.repo.dirty
    # with recursive pick up the change in subsubds
    assert_result_values_equal(
        ds._save(recursive=True, result_filter=is_ok_dataset),
        'path',
        [subsubds.path, subds.path, ds.path])

    # at this point the entire tree is clean
    ok_clean_git(ds.path)
    states = [d.repo.get_hexsha() for d in (ds, subds, subsubds)]
    # now we save recursively, nothing should happen
    res = ds._save(recursive=True)
    # we do not get any report from a subdataset, because we detect at the
    # very top that the entire tree is clean
    assert_result_count(res, 1)
    assert_result_count(res, 1, status='notneeded', action='save', path=ds.path)
    # now we introduce new files all the way down
    create_tree(subsubds.path, {"mike1": 'mike1'})
    # because we cannot say from the top if there is anything to do down below,
    # we have to traverse and we will get reports for all dataset, but there is
    # nothing actually saved
    res = ds._save(recursive=True)
    assert_result_count(res, 3)
    assert_status('notneeded', res)
    subsubds_indexed = subsubds.repo.get_indexed_files()
    assert_not_in('mike1', subsubds_indexed)
    assert_equal(states, [d.repo.get_hexsha() for d in (ds, subds, subsubds)])
    unlink(opj(subsubds.path, 'mike1'))
    ok_clean_git(ds.path)

    # modify content in subsub and try saving
    testfname = newfile_name
    subsubds.unlock(testfname)
    with open(opj(ds.path, testfname), 'w') as f:
        f.write('I am in here!')
    # the following should all do nothing
    # no auto_add
    assert_status('notneeded', ds._save())
    # no recursive
    assert_status('notneeded', ds._save())
    # an explicit target saves only the corresponding dataset
    assert_result_values_equal(
        save(path=[testfname]),
        'path',
        [subsubds.path])
    # plain recursive without any files given will save the beast
    assert_result_values_equal(
        ds._save(recursive=True, result_filter=is_ok_dataset),
        'path',
        [subds.path, ds.path])
    # there is nothing else to save
    assert_status('notneeded', ds._save(recursive=True))
    ok_clean_git(ds.path)
    # one more time and check that all datasets in the hierarchy are not
    # contaminated with untracked files
    states = [d.repo.get_hexsha() for d in (ds, subds, subsubds)]
    testfname = opj('sub', 'subsub', 'saveme2')
    with open(opj(ds.path, testfname), 'w') as f:
        f.write('I am in here!')
    assert_status('notneeded', ds._save(recursive=True))
    newstates = [d.repo.get_hexsha() for d in (ds, subds, subsubds)]
    for old, new in zip(states, newstates):
        assert_equal(old, new)
    assert ds.repo.dirty
    unlink(opj(ds.path, testfname))
    ok_clean_git(ds.path)

    # now let's check saving "upwards"
    create_tree(subds.path, {"testnew": 'smth', "testadded": "added"})
    subds.repo.add("testadded")
    indexed_files = subds.repo.get_indexed_files()
    assert subds.repo.dirty
    assert ds.repo.dirty
    assert not subsubds.repo.dirty
    create_tree(subsubds.path, {"testnew2": 'smth'})
    assert subsubds.repo.dirty
    # and indexed files didn't change
    assert_equal(indexed_files, subds.repo.get_indexed_files())
    ok_clean_git(subds.repo, untracked=['testnew'],
                 index_modified=['subsub'], head_modified=['testadded'])
    old_states = [d.repo.get_hexsha() for d in (ds, subds, subsubds)]
    subsubds._save(message="savingtestmessage", super_datasets=True)
    # this save actually didn't save anything in subsub (or anywhere),
    # because there were only untracked bits pending
    for old, new in zip(old_states, [d.repo.get_hexsha()
                                     for d in (ds, subds, subsubds)]):
        assert_equal(old, new)
    # but now we are saving this untracked bit specifically
    subsubds._save(message="savingtestmessage", path=['testnew2'],
                   super_datasets=True)
    ok_clean_git(subsubds.repo)
    # but its super should have got only the subsub saved
    # not the file we created
    ok_clean_git(subds.repo, untracked=['testnew'], head_modified=['testadded'])

    # check commits to have correct messages
    # there are no more dedicated superdataset-save commits anymore, because
    # superdatasets get saved as part of the processed hierarchy and can contain
    # other parts in the commit (if so instructed)
    assert_equal(next(subsubds.repo.get_branch_commits('master')).message.rstrip(),
                 'savingtestmessage')
    assert_equal(next(subds.repo.get_branch_commits('master')).message.rstrip(),
                 'savingtestmessage')
    assert_equal(next(ds.repo.get_branch_commits('master')).message.rstrip(),
                 'savingtestmessage')

    # and if we try to save while being within that subsubds path
    subsubds.unlock('testnew2')
    create_tree(subsubds.path, {"testnew2": 'smth2'})

    # trying to replicate https://github.com/datalad/datalad/issues/1540
    subsubds._save(message="saving new changes", all_updated=True)  # no super
    with chpwd(subds.path):
        # no explicit dataset is provided by path is provided
        save(path=['subsub'], message='saving sub', super_datasets=True)
    # super should get it saved too
    assert_equal(next(ds.repo.get_branch_commits('master')).message.rstrip(),
                 'saving sub')
Exemplo n.º 57
0
def test_recursive_save(path):
    ds = Dataset(path).create()
    # nothing to save
    assert_false(ds.save())
    subds = ds.create('sub')
    # subdataset presence already saved
    ok_clean_git(ds.path)
    subsubds = subds.create('subsub')
    assert_equal(
        ds.get_subdatasets(recursive=True, absolute=True, fulfilled=True),
        [subsubds.path, subds.path])
    newfile_name = opj(subsubds.path, 'test')
    with open(newfile_name, 'w') as f:
        f.write('some')
    # saves the status change of the subdataset due to the subsubdataset addition
    assert_equal(ds.save(all_changes=True), [ds])

    # make the new file known to its dataset
    # with #1141 this would be
    #ds.add(newfile_name, save=False)
    subsubds.add(newfile_name, save=False)

    # but remains dirty because of the untracked file down below
    assert ds.repo.dirty
    # auto-add will save nothing deep down without recursive
    assert_equal(ds.save(all_changes=True), [])
    assert ds.repo.dirty
    # with recursive pick up the change in subsubds
    assert_equal(ds.save(all_changes=True, recursive=True),
                 [subsubds, subds, ds])
    # modify content in subsub and try saving
    testfname = newfile_name
    subsubds.unlock(testfname)
    with open(opj(ds.path, testfname), 'w') as f:
        f.write('I am in here!')
    # the following should all do nothing
    # no auto_add
    assert_false(ds.save())
    # no recursive
    assert_false(ds.save(all_changes=True))
    # an explicit target saves only the corresponding dataset
    assert_equal(save(files=[testfname]), [subsubds])
    # plain recursive without any files given will save the beast
    assert_equal(ds.save(recursive=True), [subds, ds])
    # there is nothing else to save
    assert_false(ds.save(all_changes=True, recursive=True))
    # one more time and check that all datasets in the hierarchy get updated
    states = [d.repo.get_hexsha() for d in (ds, subds, subsubds)]
    testfname = opj('sub', 'subsub', 'saveme2')
    with open(opj(ds.path, testfname), 'w') as f:
        f.write('I am in here!')
    assert_true(ds.save(all_changes=True, recursive=True))
    newstates = [d.repo.get_hexsha() for d in (ds, subds, subsubds)]
    for old, new in zip(states, newstates):
        assert_not_equal(old, new)
    # now let's check saving "upwards"
    assert not subds.repo.dirty
    create_tree(subds.path, {"testnew": 'smth', "testadded": "added"})
    subds.repo.add("testadded")
    indexed_files = subds.repo.get_indexed_files()
    assert subds.repo.dirty
    assert ds.repo.dirty

    assert not subsubds.repo.dirty
    create_tree(subsubds.path, {"testnew2": 'smth'})
    assert subsubds.repo.dirty
    # and indexed files didn't change
    assert_equal(indexed_files, subds.repo.get_indexed_files())
    ok_clean_git(subds.repo,
                 untracked=['testnew'],
                 index_modified=['subsub'],
                 head_modified=['testadded'])
    subsubds.save(message="savingtestmessage",
                  super_datasets=True,
                  all_changes=True)
    ok_clean_git(subsubds.repo)
    # but its super should have got only the subsub saved
    # not the file we created
    ok_clean_git(subds.repo,
                 untracked=['testnew'],
                 head_modified=['testadded'])

    # check commits to have correct messages
    # there are no more dedicated superdataset-save commits anymore, because
    # superdatasets get saved as part of the processed hierarchy and can contain
    # other parts in the commit (if so instructed)
    assert_equal(
        next(subsubds.repo.get_branch_commits('master')).message.rstrip(),
        'savingtestmessage')
    assert_equal(
        next(subds.repo.get_branch_commits('master')).message.rstrip(),
        'savingtestmessage')
    assert_equal(
        next(ds.repo.get_branch_commits('master')).message.rstrip(),
        'savingtestmessage')
Exemplo n.º 58
0
def test_add_recursive(path):
    # make simple hierarchy
    parent = Dataset(path).create()
    ok_clean_git(parent.path)
    sub1 = parent.create(opj('down', 'sub1'))
    ok_clean_git(parent.path)
    sub2 = parent.create('sub2')
    # next one make the parent dirty
    subsub = sub2.create('subsub')
    ok_clean_git(parent.path, index_modified=['sub2'])
    res = parent.rev_save()
    ok_clean_git(parent.path)

    # now add content deep in the hierarchy
    create_tree(subsub.path, {'new': 'empty'})
    ok_clean_git(parent.path, index_modified=['sub2'])

    # recursive add should not even touch sub1, because
    # it knows that it is clean
    res = parent.add('.', recursive=True)
    # the key action is done
    assert_result_count(res,
                        1,
                        path=opj(subsub.path, 'new'),
                        action='add',
                        status='ok')
    # sub1 is untouched, and not reported
    assert_result_count(res, 0, path=sub1.path)
    # saved all the way up
    assert_result_count(res, 3, action='save', status='ok')
    ok_clean_git(parent.path)
Exemplo n.º 59
0
    def __call__(
            # it is optional, because `rerun` can get a recorded one
            cmd=None,
            dataset=None,
            message=None,
            rerun=False):
        if rerun and cmd:
            lgr.warning('Ignoring provided command in --rerun mode')
            cmd = None
        if not dataset:
            # act on the whole dataset if nothing else was specified
            dataset = get_dataset_root(curdir)
        ds = require_dataset(dataset,
                             check_installed=True,
                             purpose='tracking outcomes of a command')
        # not needed ATM
        #refds_path = ds.path

        # delayed imports
        from datalad.cmd import Runner
        from datalad.tests.utils import ok_clean_git

        lgr.debug('tracking command output underneath %s', ds)
        try:
            # base assumption is that the animal smells superb
            ok_clean_git(ds.path)
        except AssertionError:
            yield get_status_dict(
                'run',
                ds=ds,
                status='impossible',
                message=
                'unsaved modifications present, cannot detect changes by command'
            )
            return

        if not cmd and not rerun:
            # TODO here we would need to recover a cmd when a rerun is attempted
            return

        if rerun:
            # pull run info out of the last commit message
            err_info = get_status_dict('run', ds=ds)
            if not ds.repo.get_hexsha():
                yield dict(err_info,
                           status='impossible',
                           message='cannot re-run command, nothing recorded')
                return
            last_commit_msg = ds.repo.repo.head.commit.message
            cmdrun_regex = r'\[DATALAD RUNCMD\] (.*)=== Do not change lines below ===\n(.*)\n\^\^\^ Do not change lines above \^\^\^'
            runinfo = re.match(cmdrun_regex, last_commit_msg,
                               re.MULTILINE | re.DOTALL)
            if not runinfo:
                yield dict(
                    err_info,
                    status='impossible',
                    message=
                    'cannot re-run command, last saved state does not look like a recorded command run'
                )
                return
            rec_msg, runinfo = runinfo.groups()
            if message is None:
                # re-use commit message, if nothing new was given
                message = rec_msg
            try:
                runinfo = json.loads(runinfo)
            except Exception as e:
                yield dict(
                    err_info,
                    status='error',
                    message=
                    ('cannot re-run command, command specification is not valid JSON: %s',
                     e.message))
                return
            if 'cmd' not in runinfo:
                yield dict(
                    err_info,
                    status='error',
                    message=
                    'cannot re-run command, command specification missing in recorded state'
                )
                return
            cmd = runinfo['cmd']
            rec_exitcode = runinfo.get('exit', 0)
            rel_pwd = runinfo.get('pwd', None)
            if rel_pwd:
                # recording is relative to the dataset
                pwd = normpath(opj(ds.path, rel_pwd))
            else:
                rel_pwd = None  # normalize, just in case
                pwd = None

            # now we have to find out what was modified during the last run, and enable re-modification
            # ideally, we would bring back the entire state of the tree with #1424, but we limit ourself
            # to file addition/not-in-place-modification for now
            to_unlock = []
            for r in ds.diff(recursive=True,
                             revision='HEAD~1...HEAD',
                             return_type='generator',
                             result_renderer=None):
                if r.get('type', None) == 'file' and \
                        r.get('state', None) in ('added', 'modified'):
                    r.pop('status', None)
                    to_unlock.append(r)
            if to_unlock:
                for r in ds.unlock(to_unlock,
                                   return_type='generator',
                                   result_xfm=None):
                    yield r
        else:
            # not a rerun, figure out where we are running
            pwd = ds.path
            rel_pwd = curdir

        # anticipate quoted compound shell commands
        cmd = cmd[0] if isinstance(cmd, list) and len(cmd) == 1 else cmd

        # TODO do our best to guess which files to unlock based on the command string
        #      in many cases this will be impossible (but see --rerun). however,
        #      generating new data (common case) will be just fine already

        # we have a clean dataset, let's run things
        cmd_exitcode = None
        runner = Runner(cwd=pwd)
        try:
            lgr.info("== Command start (output follows) =====")
            runner.run(
                cmd,
                # immediate output
                log_online=True,
                # not yet sure what we should do with the command output
                # IMHO `run` itself should be very silent and let the command talk
                log_stdout=False,
                log_stderr=False,
                expect_stderr=True,
                expect_fail=True,
                # TODO stdin
            )
        except CommandError as e:
            # strip our own info from the exception. The original command output
            # went to stdout/err -- we just have to exitcode in the same way
            cmd_exitcode = e.code
            if not rerun or rec_exitcode != cmd_exitcode:
                # we failed during a fresh run, or in a different way during a rerun
                # the latter can easily happen if we try to alter a locked file
                #
                # let's fail here, the command could have had a typo or some
                # other undesirable condition. If we would `add` nevertheless,
                # we would need to rerun and aggregate annex content that we
                # likely don't want
                # TODO add switch to ignore failure (some commands are stupid)
                # TODO add the ability to `git reset --hard` the dataset tree on failure
                # we know that we started clean, so we could easily go back, needs gh-1424
                # to be able to do it recursively
                raise CommandError(code=cmd_exitcode)

        lgr.info("== Command exit (modification check follows) =====")

        # ammend commit message with `run` info:
        # - pwd if inside the dataset
        # - the command itself
        # - exit code of the command
        run_info = {
            'cmd': cmd,
            'exit': cmd_exitcode if cmd_exitcode is not None else 0,
        }
        if rel_pwd is not None:
            # only when inside the dataset to not leak information
            run_info['pwd'] = rel_pwd

        # compose commit message
        cmd_shorty = (' '.join(cmd) if isinstance(cmd, list) else cmd)
        cmd_shorty = '{}{}'.format(cmd_shorty[:40],
                                   '...' if len(cmd_shorty) > 40 else '')
        msg = '[DATALAD RUNCMD] {}\n\n=== Do not change lines below ===\n{}\n^^^ Do not change lines above ^^^'.format(
            message if message is not None else cmd_shorty,
            json.dumps(run_info, indent=1),
            sort_keys=True,
            ensure_ascii=False,
            encoding='utf-8')

        for r in ds.add('.', recursive=True, message=msg):
            yield r
Exemplo n.º 60
0
def test_save(path):

    ds = Dataset(path)

    with open(opj(path, "new_file.tst"), "w") as f:
        f.write("something")

    ds.repo.add("new_file.tst", git=True)
    ok_(ds.repo.dirty)

    ds._save("add a new file")
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))

    with open(opj(path, "new_file.tst"), "w") as f:
        f.write("modify")

    ok_(ds.repo.dirty)
    ds._save("modified new_file.tst")
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))

    # save works without ds and files given in the PWD
    with open(opj(path, "new_file.tst"), "w") as f:
        f.write("rapunzel")
    with chpwd(path):
        save("love rapunzel")
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))

    # and also without `-a` when things are staged
    with open(opj(path, "new_file.tst"), "w") as f:
        f.write("exotic")
    ds.repo.add("new_file.tst", git=True)
    with chpwd(path):
        save("love marsians")
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))

    files = ['one.txt', 'two.txt']
    for fn in files:
        with open(opj(path, fn), "w") as f:
            f.write(fn)

    ds.add([opj(path, f) for f in files])
    # superfluous call to save (add saved it already), should not fail
    # but report that nothing was saved
    assert_status('notneeded', ds._save("set of new files"))
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))

    # create subdataset
    subds = ds.create('subds')
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))
    # modify subds
    with open(opj(subds.path, "some_file.tst"), "w") as f:
        f.write("something")
    subds.add('.')
    ok_clean_git(subds.path, annex=isinstance(subds.repo, AnnexRepo))
    # Note/TODO: ok_clean_git is failing in direct mode, due to staged but
    # uncommited .datalad (probably caused within create)
    ok_(ds.repo.dirty)
    # ensure modified subds is committed
    ds._save()
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))

    # now introduce a change downstairs
    subds.create('someotherds')
    ok_clean_git(subds.path, annex=isinstance(subds.repo, AnnexRepo))
    ok_(ds.repo.dirty)
    # and save via subdataset path
    ds._save('subds')
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))