Exemplo n.º 1
0
def test_copy_file_errors(dspath1=None, dspath2=None, nondspath=None):
    ds1 = Dataset(dspath1)
    # nothing given
    assert_raises(ValueError, copy_file)
    # no target directory given
    assert_raises(ValueError, ds1.copy_file, 'somefile')
    # using multiple sources and --specs-from
    assert_raises(ValueError, ds1.copy_file, ['1', '2', '3'], specs_from='-')
    # trying to copy to a dir that is not in a dataset
    ds1.create()
    assert_status(
        'error',
        ds1.copy_file('somepath', target_dir=nondspath, on_failure='ignore'))
    # copy into a dataset that is not in the reference dataset
    ds2 = Dataset(dspath2).create()
    assert_status(
        'error',
        ds1.copy_file('somepath', target_dir=dspath2, on_failure='ignore'))

    # attempt to copy from a directory, but no recursion is enabled.
    # use no reference ds to exercise a different code path
    assert_status('impossible',
                  copy_file([nondspath, dspath1], on_failure='ignore'))

    # attempt to copy a file that doesn't exist
    assert_status('impossible',
                  copy_file(['funky', dspath1], on_failure='ignore'))

    # attempt to copy a file without a destination given
    assert_raises(ValueError, copy_file, 'somepath')
    assert_status('impossible',
                  copy_file(specs_from=['somepath'], on_failure='ignore'))
Exemplo n.º 2
0
def test_status_basics(path=None, linkpath=None, otherdir=None):
    if has_symlink_capability():
        # make it more complicated by default
        ut.Path(linkpath).symlink_to(path, target_is_directory=True)
        path = linkpath

    with chpwd(path):
        assert_raises(NoDatasetFound, status)
    ds = Dataset(path).create()
    # outcome identical between ds= and auto-discovery
    with chpwd(path):
        assert_raises(IncompleteResultsError, status, path=otherdir)
        stat = status(result_renderer='disabled')
    eq_(stat, ds.status(result_renderer='disabled'))
    assert_status('ok', stat)
    # we have a bunch of reports (be vague to be robust to future changes
    assert len(stat) > 2
    # check the composition
    for s in stat:
        eq_(s['status'], 'ok')
        eq_(s['action'], 'status')
        eq_(s['state'], 'clean')
        eq_(s['type'], 'file')
        assert_in('gitshasum', s)
        assert_in('bytesize', s)
        eq_(s['refds'], ds.path)
Exemplo n.º 3
0
def test_get_mixed_hierarchy(src=None, path=None):

    origin = Dataset(src).create(annex=False)
    origin_sub = origin.create('subds')
    with open(opj(origin.path, 'file_in_git.txt'), "w") as f:
        f.write('no idea')
    with open(opj(origin_sub.path, 'file_in_annex.txt'), "w") as f:
        f.write('content')
    origin.save('file_in_git.txt', to_git=True)
    origin_sub.save('file_in_annex.txt')
    origin.save()

    # now, install that thing:
    ds, subds = install(path,
                        source=src,
                        recursive=True,
                        result_xfm='datasets',
                        return_type='item-or-list',
                        result_filter=None)
    ok_(subds.repo.file_has_content("file_in_annex.txt") is False)

    # and get:
    result = ds.get(curdir, recursive=True)
    # git repo and subds
    assert_status(['ok', 'notneeded'], result)
    assert_result_count(result,
                        1,
                        path=opj(subds.path, "file_in_annex.txt"),
                        status='ok')
    ok_(subds.repo.file_has_content("file_in_annex.txt") is True)
Exemplo n.º 4
0
def test_ria_push(srcpath=None, dstpath=None):
    # complex test involving a git remote, a special remote, and a
    # publication dependency
    src = Dataset(srcpath).create()
    testfile = src.pathobj / 'test_mod_annex_file'
    testfile.write_text("Heavy stuff.")
    src.save()
    assert_status(
        'ok',
        src.create_sibling_ria("ria+{}".format(
            get_local_file_url(dstpath, compatibility='git')),
                               "datastore",
                               new_store_ok=True))
    res = src.push(to='datastore')
    assert_in_results(res,
                      action='publish',
                      target='datastore',
                      status='ok',
                      refspec=DEFAULT_REFSPEC)
    assert_in_results(res,
                      action='publish',
                      target='datastore',
                      status='ok',
                      refspec='refs/heads/git-annex:refs/heads/git-annex')
    assert_in_results(res,
                      action='copy',
                      target='datastore-storage',
                      status='ok',
                      path=str(testfile))
Exemplo n.º 5
0
def test_push_subds_no_recursion(src_path=None,
                                 dst_top=None,
                                 dst_sub=None,
                                 dst_subsub=None):
    # dataset with one submodule and one subsubmodule
    top = Dataset(src_path).create()
    sub = top.create('sub m')
    test_file = sub.pathobj / 'subdir' / 'test_file'
    test_file.parent.mkdir()
    test_file.write_text('some')
    subsub = sub.create(sub.pathobj / 'subdir' / 'subsub m')
    top.save(recursive=True)
    assert_repo_status(top.path)
    target_top = mk_push_target(top, 'target', dst_top, annex=True)
    target_sub = mk_push_target(sub, 'target', dst_sub, annex=True)
    target_subsub = mk_push_target(subsub, 'target', dst_subsub, annex=True)
    # now publish, but NO recursion, instead give the parent dir of
    # both a subdataset and a file in the middle subdataset
    res = top.push(
        to='target',
        # give relative to top dataset to elevate the difficulty a little
        path=str(test_file.relative_to(top.pathobj).parent))
    assert_status('ok', res)
    assert_in_results(res, action='publish', type='dataset', path=top.path)
    assert_in_results(res, action='publish', type='dataset', path=sub.path)
    assert_in_results(res, action='copy', type='file', path=str(test_file))
    # the lowest-level subdataset isn't touched
    assert_not_in_results(res,
                          action='publish',
                          type='dataset',
                          path=subsub.path)
Exemplo n.º 6
0
def test_invalid_call(origin=None, tdir=None):
    ds = Dataset(origin).create()
    # no target
    assert_status('impossible', ds.push(on_failure='ignore'))
    # no dataset
    with chpwd(tdir):
        assert_raises(InsufficientArgumentsError, Push.__call__)
    # dataset, but outside path
    assert_raises(IncompleteResultsError, ds.push, path=tdir)
    # given a path constraint that doesn't match anything, will cause
    # nothing to be done
    assert_status('notneeded', ds.push(path=ds.pathobj / 'nothere'))

    # unavailable subdataset
    dummy_sub = ds.create('sub')
    dummy_sub.drop(what='all', reckless='kill', recursive=True)
    assert_in('sub', ds.subdatasets(state='absent', result_xfm='relpaths'))
    # now an explicit call to publish the unavailable subdataset
    assert_raises(ValueError, ds.push, 'sub')

    target = mk_push_target(ds, 'target', tdir, annex=True)
    # revision that doesn't exist
    assert_raises(ValueError,
                  ds.push,
                  to='target',
                  since='09320957509720437523')

    # If a publish() user accidentally passes since='', which push() spells as
    # since='^', the call is aborted.
    assert_raises(ValueError, ds.push, to='target', since='')
Exemplo n.º 7
0
def test_subdataset_save(path=None):
    parent = Dataset(path).create()
    sub = parent.create('sub')
    assert_repo_status(parent.path)
    create_tree(parent.path, {"untracked": 'ignore', 'sub': {"new": "wanted"}})
    sub.save('new')
    # defined state: one untracked, modified (but clean in itself) subdataset
    assert_repo_status(sub.path)
    assert_repo_status(parent.path, untracked=['untracked'], modified=['sub'])

    # `save sub` does not save the parent!!
    with chpwd(parent.path):
        assert_status('notneeded', save(dataset=sub.path))
    assert_repo_status(parent.path, untracked=['untracked'], modified=['sub'])
    # `save -u .` saves the state change in the subdataset,
    # but leaves any untracked content alone
    with chpwd(parent.path):
        assert_status('ok', parent.save(updated=True))
    assert_repo_status(parent.path, untracked=['untracked'])

    # get back to the original modified state and check that -S behaves in
    # exactly the same way
    create_tree(parent.path, {'sub': {"new2": "wanted2"}})
    sub.save('new2')
    assert_repo_status(parent.path, untracked=['untracked'], modified=['sub'])
Exemplo n.º 8
0
def test_sibling_enable_sameas(repo=None, clone_path=None):
    ds = Dataset(repo.path)
    create_tree(ds.path, {"f0": "0"})
    ds.save(path="f0")
    ds.push(["f0"], to="r_dir")
    ds.repo.drop(["f0"])

    ds_cloned = clone(ds.path, clone_path)

    assert_false(ds_cloned.repo.file_has_content("f0"))
    # does not work without a name
    res = ds_cloned.siblings(
        action="enable",
        result_renderer='disabled',
        on_failure='ignore',
    )
    assert_in_results(res,
                      status='error',
                      message='require `name` of sibling to enable')
    # does not work with the wrong name
    res = ds_cloned.siblings(
        action="enable",
        name='wrong',
        result_renderer='disabled',
        on_failure='ignore',
    )
    assert_in_results(res,
                      status='impossible',
                      message=("cannot enable sibling '%s', not known",
                               'wrong'))
    # works with the right name
    res = ds_cloned.siblings(action="enable", name="r_rsync")
    assert_status("ok", res)
    ds_cloned.get(path=["f0"])
    ok_(ds_cloned.repo.file_has_content("f0"))
Exemplo n.º 9
0
def test_install_recursive_with_data(src=None, path=None):

    _make_dataset_hierarchy(src)

    # now again; with data:
    res = install(path,
                  source=src,
                  recursive=True,
                  get_data=True,
                  result_filter=None,
                  result_xfm=None)
    assert_status('ok', res)
    # installed a dataset and two subdatasets, and one file with content in
    # each
    assert_result_count(res, 5, type='dataset', action='install')
    assert_result_count(res, 2, type='file', action='get')
    # we recurse top down during installation, so toplevel should appear at
    # first position in returned list
    eq_(res[0]['path'], path)
    top_ds = YieldDatasets()(res[0])
    ok_(top_ds.is_installed())

    def all_have_content(repo):
        ainfo = repo.get_content_annexinfo(init=None, eval_availability=True)
        return all(st["has_content"] for st in ainfo.values())

    if isinstance(top_ds.repo, AnnexRepo):
        ok_(all_have_content(top_ds.repo))

    for subds in top_ds.subdatasets(recursive=True, result_xfm='datasets'):
        ok_(subds.is_installed(), "Not installed: %s" % (subds, ))
        if isinstance(subds.repo, AnnexRepo):
            ok_(all_have_content(subds.repo))
Exemplo n.º 10
0
def test_audio(path=None):
    ds = Dataset(path).create()
    ds.config.add('datalad.metadata.nativetype', 'audio', scope='branch')
    copy(
        opj(dirname(dirname(dirname(__file__))), 'tests', 'data', 'audio.mp3'),
        path)
    ds.save()
    assert_repo_status(ds.path)
    res = ds.aggregate_metadata()
    assert_status('ok', res)
    res = ds.metadata('audio.mp3')
    assert_result_count(res, 1)

    # from this extractor
    meta = res[0]['metadata']['audio']
    for k, v in target.items():
        eq_(meta[k], v)

    assert_in('@context', meta)

    uniques = ds.metadata(reporton='datasets', return_type='item-or-list'
                          )['metadata']['datalad_unique_content_properties']
    # test file has it, but uniques have it blanked out, because the extractor considers it worthless
    # for discovering whole datasets
    assert_in('bitrate', meta)
    eq_(uniques['audio']['bitrate'], None)

    # 'date' field carries not value, hence gets exclude from the unique report
    assert_in('date', meta)
    assert (not meta['date'])
    assert_not_in('date', uniques['audio'])
Exemplo n.º 11
0
def test_install_list(path=None, top_path=None):

    _mk_submodule_annex(path, fname="test-annex.dat", fcontent="whatever")

    # we want to be able to install several things, if these are known
    # (no 'source' allowed). Therefore first toplevel:
    ds = install(top_path, source=path, recursive=False)
    assert_not_in('annex.hardlink', ds.config)
    ok_(ds.is_installed())
    sub1 = Dataset(opj(top_path, 'subm 1'))
    sub2 = Dataset(opj(top_path, '2'))
    ok_(not sub1.is_installed())
    ok_(not sub2.is_installed())

    # fails, when `source` is passed:
    assert_raises(ValueError,
                  ds.install,
                  path=['subm 1', '2'],
                  source='something')

    # now should work:
    result = ds.install(path=['subm 1', '2'], result_xfm='paths')
    ok_(sub1.is_installed())
    ok_(sub2.is_installed())
    eq_(set(result), {sub1.path, sub2.path})
    # and if we request it again via get, result should be empty
    get_result = ds.get(path=['subm 1', '2'], get_data=False)
    assert_status('notneeded', get_result)
Exemplo n.º 12
0
def test_invalid_call(path=None):
    with chpwd(path):
        # no dataset, no luck
        assert_raises(NoDatasetFound, run, 'doesntmatter')
        # dirty dataset
        ds = Dataset(path).create()
        create_tree(ds.path, {'this': 'dirty'})
        assert_status('impossible', run('doesntmatter', on_failure='ignore'))
Exemplo n.º 13
0
def test_drop_file_need_nocheck(path=None):
    ds = Dataset(path).create(force=True)
    ds.save()
    with assert_raises(IncompleteResultsError) as cme:
        ds.drop("foo")
    # The --force suggestion from git-annex-drop is translated to --reckless.
    assert_in("--reckless", str(cme.value))
    assert_status("ok", ds.drop("foo", reckless='kill', on_failure="ignore"))
Exemplo n.º 14
0
def test_run_empty_repo(path=None):
    ds = Dataset(path).create()
    cmd = [sys.executable, "-c", "open('foo', 'w').write('')"]
    # Using "*" in a completely empty repo will fail.
    with assert_raises(IncompleteResultsError):
        ds.run(cmd, inputs=["*"], on_failure="stop")
    assert_repo_status(ds.path)
    # "." will work okay, though.
    assert_status("ok", ds.run(cmd, inputs=["."]))
    assert_repo_status(ds.path)
    ok_exists(str(ds.pathobj / "foo"))
Exemplo n.º 15
0
def test_gh2927(path=None, linkpath=None):
    if has_symlink_capability():
        # make it more complicated by default
        Path(linkpath).symlink_to(path, target_is_directory=True)
        path = linkpath
    ds = Dataset(path).create()
    ds.create('subds_clean')
    assert_status(
        'ok',
        ds.create(op.join('subds_clean', 'subds_lvl1_clean'),
                  result_xfm=None,
                  return_type='list'))
Exemplo n.º 16
0
def test_update_strategy(path=None):
    base = Dataset(opj(path, 'origin')).create(force=True)
    # force all metadata objects into the annex
    with open(opj(base.path, '.datalad', '.gitattributes'), 'w') as f:
        f.write(
            '** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n'
        )
    sub = base.create('sub', force=True)
    subsub = sub.create(opj('subsub'), force=True)
    base.save(recursive=True)
    assert_repo_status(base.path)
    # we start clean
    for ds in base, sub, subsub:
        eq_(len(_get_contained_objs(ds)), 0)
    # aggregate the base dataset only, nothing below changes
    base.aggregate_metadata()
    eq_(len(_get_contained_objs(base)), 2)
    for ds in sub, subsub:
        eq_(len(_get_contained_objs(ds)), 0)
    # aggregate the entire tree, but by default only updates
    # the top-level dataset with all objects, none of the leaf
    # or intermediate datasets gets touched
    base.aggregate_metadata(recursive=True)
    eq_(len(_get_contained_objs(base)), 6)
    eq_(len(_get_referenced_objs(base)), 6)
    for ds in sub, subsub:
        eq_(len(_get_contained_objs(ds)), 0)
    res = base.metadata(get_aggregates=True)
    assert_result_count(res, 3)
    # it is impossible to query an intermediate or leaf dataset
    # for metadata
    for ds in sub, subsub:
        assert_status('impossible',
                      ds.metadata(get_aggregates=True, on_failure='ignore'))
    # get the full metadata report
    target_meta = base.metadata(return_type='list')

    # now redo full aggregation, this time updating all
    # (intermediate) datasets
    base.aggregate_metadata(recursive=True, update_mode='all')
    eq_(len(_get_contained_objs(base)), 6)
    eq_(len(_get_contained_objs(sub)), 4)
    eq_(len(_get_contained_objs(subsub)), 2)
    # it is now OK to query an intermediate or leaf dataset
    # for metadata
    for ds in sub, subsub:
        assert_status('ok',
                      ds.metadata(get_aggregates=True, on_failure='ignore'))

    # all of that has no impact on the reported metadata
    eq_(target_meta, base.metadata(return_type='list'))
Exemplo n.º 17
0
def test_rerun_branch(path=None):
    ds = Dataset(path).create()
    if ds.repo.is_managed_branch():
        assert_status('impossible',
                      ds.rerun(branch="triggers-abort", on_failure="ignore"))
        raise SkipTest("Test incompatible with adjusted branch")

    ds.repo.tag("prerun")

    outfile = op.join(path, "run-file")

    with swallow_outputs():
        ds.run(f'echo x$({cat_command} run-file) > run-file')
    ds.rerun()
    eq_('xx\n', open(outfile).read())

    with open(op.join(path, "nonrun-file"), "w") as f:
        f.write("foo")
    ds.save("nonrun-file")

    # Rerun the commands on a new branch that starts at the parent
    # commit of the first run.
    with swallow_outputs():
        ds.rerun(since="prerun", onto="prerun", branch="rerun")

    eq_(ds.repo.get_active_branch(), "rerun")
    eq_('xx\n', open(outfile).read())

    # NOTE: This test depends on the non-run commit above following a run
    # commit.  Otherwise, all the metadata (e.g., author date) aside from the
    # parent commit that is used to generate the commit ID may be set when
    # running the tests, which would result in two commits rather than three.
    for revrange in ["rerun.." + DEFAULT_BRANCH, DEFAULT_BRANCH + "..rerun"]:
        eq_(len(ds.repo.get_revisions(revrange)), 3)
    eq_(ds.repo.get_merge_base([DEFAULT_BRANCH, "rerun"]),
        ds.repo.get_hexsha("prerun"))

    # Start rerun branch at tip of current branch.
    ds.repo.checkout(DEFAULT_BRANCH)
    ds.rerun(since="prerun", branch="rerun2")
    eq_(ds.repo.get_active_branch(), "rerun2")
    eq_('xxxx\n', open(outfile).read())

    eq_(len(ds.repo.get_revisions(DEFAULT_BRANCH + "..rerun2")), 2)
    eq_(len(ds.repo.get_revisions("rerun2.." + DEFAULT_BRANCH)), 0)

    # Using an existing branch name fails.
    ds.repo.checkout(DEFAULT_BRANCH)
    assert_raises(IncompleteResultsError,
                  ds.rerun, since="prerun", branch="rerun2")
Exemplo n.º 18
0
def test_archive(path=None):
    ds = Dataset(opj(path, 'ds')).create(force=True)
    ds.save()
    committed_date = ds.repo.get_commit_date()
    default_outname = opj(path, 'datalad_{}.tar.gz'.format(ds.id))
    with chpwd(path):
        res = list(ds.export_archive())
        assert_status('ok', res)
        assert_result_count(res, 1)
        assert (isabs(res[0]['path']))
    assert_true(os.path.exists(default_outname))
    custom_outname = opj(path, 'myexport.tar.gz')
    # feed in without extension
    ds.export_archive(filename=custom_outname[:-7])
    assert_true(os.path.exists(custom_outname))
    custom1_md5 = md5sum(custom_outname)
    # encodes the original archive filename -> different checksum, despit
    # same content
    assert_not_equal(md5sum(default_outname), custom1_md5)
    # should really sleep so if they stop using time.time - we know
    time.sleep(1.1)
    ds.export_archive(filename=custom_outname)
    # should not encode mtime, so should be identical
    assert_equal(md5sum(custom_outname), custom1_md5)

    def check_contents(outname, prefix):
        with tarfile.open(outname) as tf:
            nfiles = 0
            for ti in tf:
                # any annex links resolved
                assert_false(ti.issym())
                ok_startswith(ti.name, prefix + '/')
                assert_equal(ti.mtime, committed_date)
                if '.datalad' not in ti.name:
                    # ignore any files in .datalad for this test to not be
                    # susceptible to changes in how much we generate a meta info
                    nfiles += 1
            # we have exactly four files (includes .gitattributes for default
            # MD5E backend), and expect no content for any directory
            assert_equal(nfiles, 4)

    check_contents(default_outname, 'datalad_%s' % ds.id)
    check_contents(custom_outname, 'myexport')

    # now loose some content
    ds.drop('file_up', reckless='kill')
    assert_raises(IOError, ds.export_archive, filename=opj(path, 'my'))
    ds.export_archive(filename=opj(path, 'partial'), missing_content='ignore')
    assert_true(os.path.exists(opj(path, 'partial.tar.gz')))
Exemplo n.º 19
0
def test_remove_subds(path=None):
    ds = create(path)
    ds.create('sub')
    ds.create(op.join('sub', 'subsub'))
    assert_repo_status(ds.path)
    assert_result_count(ds.subdatasets(), 1, path=op.join(ds.path, 'sub'))
    # all good at this point, subdataset known, dataset clean
    # now have some external force wipe out the subdatasets
    rmtree(op.join(ds.path, 'sub'))
    assert_result_count(ds.status(),
                        1,
                        path=op.join(ds.path, 'sub'),
                        state='deleted')
    # a single call to save() must fix up the mess
    assert_status('ok', ds.save())
    assert_repo_status(ds.path)
Exemplo n.º 20
0
def test_arg_missing(path=None, path2=None):
    # test fix for gh-3553
    ds = create(path)
    assert_raises(
        InsufficientArgumentsError,
        ds.siblings,
        'add',
        url=path2,
    )
    assert_status('ok', ds.siblings('add', url=path2, name='somename'))
    # trigger some name guessing functionality that will still not
    # being able to end up using a hostnames-spec despite being
    # given a URL
    if not on_windows:
        # the trick with the file:// URL creation only works on POSIX
        # the underlying tested code here is not about paths, though,
        # so it is good enough to run this on POSIX system to be
        # reasonably sure that things work
        assert_raises(
            InsufficientArgumentsError,
            ds.siblings,
            'add',
            url=f'file://{path2}',
        )

    # there is no name guessing with 'configure'
    assert_in_results(ds.siblings('configure',
                                  url='http://somename',
                                  on_failure='ignore'),
                      status='error',
                      message='need sibling `name` for configuration')

    # needs a URL
    assert_raises(InsufficientArgumentsError,
                  ds.siblings,
                  'add',
                  name='somename')
    # just pushurl is OK
    assert_status('ok', ds.siblings('add', pushurl=path2, name='somename2'))

    # needs group with groupwanted
    assert_raises(InsufficientArgumentsError,
                  ds.siblings,
                  'add',
                  url=path2,
                  name='somename',
                  annex_groupwanted='whatever')
Exemplo n.º 21
0
def test_copy_file_prevent_dotgit_placement(srcpath=None, destpath=None):
    src = Dataset(srcpath).create()
    sub = src.create('sub')
    dest = Dataset(destpath).create()

    # recursion doesn't capture .git/
    dest.copy_file(sub.path, recursive=True)
    nok_((dest.pathobj / 'sub' / '.git').exists())

    # explicit instruction results in failure
    assert_status(
        'impossible',
        dest.copy_file(sub.pathobj / '.git',
                       recursive=True,
                       on_failure='ignore'))

    # same when the source has an OK name, but the dest now
    assert_in_results(dest.copy_file(
        [sub.pathobj / '.git' / 'config', dest.pathobj / 'some' / '.git'],
        on_failure='ignore'),
                      status='impossible',
                      action='copy_file')

    # The last path above wasn't treated as a target directory because it
    # wasn't an existing directory. We also guard against a '.git' in the
    # target directory code path, though the handling is different.
    with assert_raises(ValueError):
        dest.copy_file(
            [sub.pathobj / '.git' / 'config', dest.pathobj / '.git'])

    # A source path can have a leading .git/ if the destination is outside of
    # .git/.
    nok_((dest.pathobj / "config").exists())
    dest.copy_file(sub.pathobj / '.git' / 'config')
    ok_((dest.pathobj / "config").exists())

    target = dest.pathobj / 'some'
    nok_(target.exists())
    dest.copy_file([sub.pathobj / '.git' / 'config', target])
    ok_(target.exists())

    # But we only waste so many cycles trying to prevent foot shooting. This
    # next one sneaks by because only .name, not all upstream parts, is checked
    # for each destination that comes out of _yield_specs().
    badobj = dest.pathobj / '.git' / 'objects' / 'i-do-not-exist'
    dest.copy_file([sub.pathobj / '.git' / 'config', badobj])
    ok_(badobj.exists())
Exemplo n.º 22
0
def test_no_annex(path=None):
    # few smoke tests regarding the 'here' sibling
    ds = create(path, annex=False)
    res = ds.siblings('configure',
                      name='here',
                      description='very special',
                      on_failure='ignore',
                      result_renderer='disabled')
    assert_status('impossible', res)

    res = ds.siblings('enable',
                      name='doesnotmatter',
                      on_failure='ignore',
                      result_renderer='disabled')
    assert_in_results(res,
                      status='impossible',
                      message='cannot enable sibling of non-annex dataset')
Exemplo n.º 23
0
def test_datasets_datalad_org(tdir=None, *, suffix):
    # Test that git annex / datalad install, get work correctly on our datasets.datalad.org
    # Apparently things can break, especially with introduction of the
    # smart HTTP backend for apache2 etc
    ds = install(tdir,
                 source='///dicoms/dartmouth-phantoms/bids_test6-PD+T2w' +
                 suffix)
    eq_(ds.config.get(f'remote.{DEFAULT_REMOTE}.annex-ignore', None), None)
    # assert_result_count and not just assert_status since for some reason on
    # Windows we get two records due to a duplicate attempt (as res[1]) to get it
    # again, which is reported as "notneeded".  For the purpose of this test
    # it doesn't make a difference.
    assert_result_count(ds.get(
        op.join('001-anat-scout_ses-{date}', '000001.dcm')),
                        1,
                        status='ok')
    assert_status('ok', ds.drop(what='all', reckless='kill', recursive=True))
Exemplo n.º 24
0
def test_push_wanted(srcpath=None, dstpath=None):
    src = Dataset(srcpath).create()
    (src.pathobj / 'data.0').write_text('0')
    (src.pathobj / 'secure.1').write_text('1')
    (src.pathobj / 'secure.2').write_text('2')
    src.save()

    # Dropping a file to mimic a case of simply not having it locally (thus not
    # to be "pushed")
    src.drop('secure.2', reckless='kill')

    # Annotate sensitive content, actual value "verysecure" does not matter in
    # this example
    src.repo.set_metadata(add={'distribution-restrictions': 'verysecure'},
                          files=['secure.1', 'secure.2'])

    src.create_sibling(
        dstpath,
        annex_wanted="not metadata=distribution-restrictions=*",
        name='target',
    )
    # check that wanted is obeyed, since set in sibling configuration
    res = src.push(to='target')
    assert_in_results(res,
                      action='copy',
                      path=str(src.pathobj / 'data.0'),
                      status='ok')
    for p in ('secure.1', 'secure.2'):
        assert_not_in_results(res, path=str(src.pathobj / p))
    assert_status('notneeded', src.push(to='target'))

    # check the target to really make sure
    dst = Dataset(dstpath)
    # normal file, yes
    eq_((dst.pathobj / 'data.0').read_text(), '0')
    # secure file, no
    if dst.repo.is_managed_branch():
        neq_((dst.pathobj / 'secure.1').read_text(), '1')
    else:
        assert_raises(FileNotFoundError, (dst.pathobj / 'secure.1').read_text)

    # reset wanted config, which must enable push of secure file
    src.repo.set_preferred_content('wanted', '', remote='target')
    res = src.push(to='target')
    assert_in_results(res, path=str(src.pathobj / 'secure.1'))
    eq_((dst.pathobj / 'secure.1').read_text(), '1')
Exemplo n.º 25
0
def test_as_common_datasource(testbed=None,
                              viapath=None,
                              viaurl=None,
                              remotepath=None,
                              url=None):
    ds = Dataset(remotepath).create()
    (ds.pathobj / 'testfile').write_text('likemagic')
    (ds.pathobj / 'testfile2').write_text('likemagic2')
    ds.save()

    # make clonable via HTTP
    ds.repo.call_git(['update-server-info'])

    # this does not work for remotes that have path URLs
    ds_frompath = clone(source=remotepath, path=viapath)
    res = ds_frompath.siblings(
        'configure',
        name=DEFAULT_REMOTE,
        as_common_datasrc='mike',
        on_failure='ignore',
        result_renderer='disabled',
    )
    assert_in_results(
        res,
        status='impossible',
        message='cannot configure as a common data source, URL protocol '
        'is not http or https',
    )

    # but it works for HTTP
    ds_fromurl = clone(source=url, path=viaurl)
    res = ds_fromurl.siblings(
        'configure',
        name=DEFAULT_REMOTE,
        as_common_datasrc='mike2',
        result_renderer='disabled',
    )
    assert_status('ok', res)
    # same thing should be possible by adding a fresh remote
    res = ds_fromurl.siblings(
        'add',
        name='fresh',
        url=url,
        as_common_datasrc='fresh-sr',
        result_renderer='disabled',
    )
    assert_status('ok', res)

    # now try if it works. we will clone the clone, and get a repo that does
    # not know its ultimate origin. still, we should be able to pull data
    # from it via the special remote
    testbed = clone(source=ds_fromurl, path=testbed)
    assert_status('ok', testbed.get('testfile'))
    eq_('likemagic', (testbed.pathobj / 'testfile').read_text())
    # and the other one
    assert_status('ok', testbed.get('testfile2'))
Exemplo n.º 26
0
def test_exif(path=None):
    ds = Dataset(path).create()
    ds.config.add('datalad.metadata.nativetype', 'exif', scope='branch')
    copy(opj(dirname(dirname(dirname(__file__))), 'tests', 'data', 'exif.jpg'),
         path)
    ds.save()
    assert_repo_status(ds.path)
    res = ds.aggregate_metadata()
    assert_status('ok', res)
    res = ds.metadata('exif.jpg')
    assert_result_count(res, 1)
    # from this extractor
    meta = res[0]['metadata']['exif']
    for k, v in target.items():
        eq_(meta[k], v)

    assert_in('@context', meta)
Exemplo n.º 27
0
def test_install_datasets_root(tdir=None):
    with chpwd(tdir):
        ds = install("///")
        ok_(ds.is_installed())
        eq_(ds.path, opj(tdir, get_datasets_topdir()))

        # do it a second time:
        result = install("///", result_xfm=None, return_type='list')
        assert_status('notneeded', result)
        eq_(YieldDatasets()(result[0]), ds)

        # and a third time into an existing something, that is not a dataset:
        with open(opj(tdir, 'sub', 'a_file.txt'), 'w') as f:
            f.write("something")

        with assert_raises(IncompleteResultsError) as cme:
            install("sub", source='///')
        assert_in("already exists and not empty", str(cme.value))
Exemplo n.º 28
0
def test_get_single_file(src=None, path=None):
    ca = dict(result_renderer='disabled')
    test_fname = 'test-annex.dat'
    orig = Dataset(src).create(**ca)
    (orig.pathobj / test_fname).write_text('some')
    orig.save(**ca)

    ds = clone(src, path, **ca)
    ok_(ds.is_installed())
    ok_(ds.repo.file_has_content('test-annex.dat') is False)
    result = ds.get("test-annex.dat", **ca)
    assert_result_count(result, 1)
    assert_status('ok', result)
    eq_(result[0]['path'], opj(ds.path, 'test-annex.dat'))
    annexprops = ds.repo.get_file_annexinfo('test-annex.dat',
                                            eval_availability=True)
    eq_(result[0]['annexkey'], annexprops['key'])
    ok_(annexprops['has_content'])
Exemplo n.º 29
0
def test_copy_file_specs_from(srcdir=None, destdir=None):
    srcdir = Path(srcdir)
    destdir = Path(destdir)
    files = [p for p in srcdir.glob('**/*') if not p.is_dir()]
    # plain list of absolute path objects
    r_srcabs, res = _check_copy_file_specs_from(srcdir, destdir / 'srcabs',
                                                files)
    # same, but with relative paths
    with chpwd(srcdir):
        r_srcrel, res = _check_copy_file_specs_from(
            srcdir, destdir / 'srcrel', [p.relative_to(srcdir) for p in files])
    # same, but as strings
    r_srcabs_str, res = _check_copy_file_specs_from(srcdir,
                                                    destdir / 'srcabs_str',
                                                    [str(p) for p in files])
    with chpwd(srcdir):
        r_srcrel_str, res = _check_copy_file_specs_from(
            srcdir, destdir / 'srcrel_str',
            [str(p.relative_to(srcdir)) for p in files])
    # same, but with src/dest pairs
    r_srcdestabs_str, res = _check_copy_file_specs_from(
        srcdir, destdir / 'srcdestabs_str', [
            '{}\0{}'.format(str(p), str(destdir / 'srcdestabs_str' / p.name))
            for p in files
        ])

    # all methods lead to the same dataset structure
    for a, b in ((r_srcabs, r_srcrel), (r_srcabs, r_srcabs_str),
                 (r_srcabs, r_srcrel_str), (r_srcabs, r_srcdestabs_str)):
        eq_(*[
            sorted(r for r in d.status(result_xfm='relpaths',
                                       result_renderer='disabled'))
            for d in (a, b)
        ])

    # fail on destination outside of the dest repo
    res = copy_file(specs_from=[
        '{}\0{}'.format(str(p),
                        str(destdir / 'srcdest_wrong' / p.relative_to(srcdir)))
        for p in files
    ],
                    on_failure='ignore')
    assert_status('error', res)
Exemplo n.º 30
0
def test_copy_file_into_nonannex(workdir=None):
    workdir = Path(workdir)
    src_ds = Dataset(workdir / 'src').create()
    (src_ds.pathobj / 'present.txt').write_text('123')
    (src_ds.pathobj / 'gone.txt').write_text('abc')
    src_ds.save()
    src_ds.drop('gone.txt', reckless='kill')

    # destination has no annex
    dest_ds = Dataset(workdir / 'dest').create(annex=False)
    # no issue copying a file that has content
    copy_file([src_ds.pathobj / 'present.txt', dest_ds.pathobj])
    ok_file_has_content(dest_ds.pathobj / 'present.txt', '123')
    # but cannot handle a dropped file, no chance to register
    # availability info in an annex
    assert_status(
        'impossible',
        copy_file([src_ds.pathobj / 'gone.txt', dest_ds.pathobj],
                  on_failure='ignore'))