Exemplo n.º 1
0
def test_basic_aggregate(path=None):
    # TODO give datasets some more metadata to actually aggregate stuff
    base = Dataset(opj(path, 'origin')).create(force=True)
    sub = base.create('sub', force=True)
    #base.metadata(sub.path, init=dict(homepage='this'), apply2global=True)
    subsub = base.create(opj('sub', 'subsub'), force=True)
    base.save(recursive=True)
    assert_repo_status(base.path)
    # we will first aggregate the middle dataset on its own, this will
    # serve as a smoke test for the reuse of metadata objects later on
    sub.aggregate_metadata()
    base.save()
    assert_repo_status(base.path)
    base.aggregate_metadata(recursive=True, update_mode='all')
    assert_repo_status(base.path)
    direct_meta = base.metadata(recursive=True, return_type='list')
    # loose the deepest dataset
    sub.drop('subsub', what='all', reckless='kill', recursive=True)
    # no we should eb able to reaggregate metadata, and loose nothing
    # because we can aggregate aggregated metadata of subsub from sub
    base.aggregate_metadata(recursive=True, update_mode='all')
    # same result for aggregate query than for (saved) direct query
    agg_meta = base.metadata(recursive=True, return_type='list')
    for d, a in zip(direct_meta, agg_meta):
        print(d['path'], a['path'])
        assert_dict_equal(d, a)
    # no we can throw away the subdataset tree, and loose no metadata
    base.drop('sub', what='all', reckless='kill', recursive=True)
    assert (not sub.is_installed())
    assert_repo_status(base.path)
    # same result for aggregate query than for (saved) direct query
    agg_meta = base.metadata(recursive=True, return_type='list')
    for d, a in zip(direct_meta, agg_meta):
        assert_dict_equal(d, a)
Exemplo n.º 2
0
def test_result_filter():
    # ensure baseline without filtering
    assert_equal([r['somekey'] for r in TestUtils().__call__(4)], [0, 1, 2, 3])
    # test two functionally equivalent ways to filter results
    # 1. Constraint-based -- filter by exception
    #    we have a full set of AND and OR operators for this
    # 2. custom filer function -- filter by boolean return value
    for filt in (EnsureKeyChoice('somekey', (0, 2)), lambda x: x['somekey'] in
                 (0, 2)):
        assert_equal([
            r['somekey'] for r in TestUtils().__call__(4, result_filter=filt)
        ], [0, 2])
        # constraint returns full dict
        assert_dict_equal(TestUtils().__call__(4, result_filter=filt)[-1], {
            'action': 'off',
            'path': 'some',
            'status': 'ok',
            'somekey': 2
        })

    # test more sophisticated filters that actually get to see the
    # API call's kwargs
    def greatfilter(res, **kwargs):
        assert_equal(kwargs.get('dataset', 'bob'), 'awesome')
        return True

    TestUtils().__call__(4, dataset='awesome', result_filter=greatfilter)

    def sadfilter(res, **kwargs):
        assert_equal(kwargs.get('dataset', 'bob'), None)
        return True

    TestUtils().__call__(4, result_filter=sadfilter)
Exemplo n.º 3
0
def test_get_file_parts():
    assert_dict_equal(
        au.get_file_parts("file.tar.gz", "prefix"), {
            "prefix": "file.tar.gz",
            "prefix_root_py": "file.tar",
            "prefix_ext_py": ".gz",
            "prefix_root": "file",
            "prefix_ext": ".tar.gz"
        })
Exemplo n.º 4
0
def test_extract():
    info, subpaths = au.extract(
        ST_DATA["rows"],
        url_format="{name}_{debut_season}.com",
        filename_format="{age_group}//{now_dead}//{name}.csv")

    eq_(subpaths, [
        "adult", "kid",
        op.join("adult", "no"),
        op.join("adult", "yes"),
        op.join("kid", "no")
    ])

    eq_([d["url"] for d in info],
        ["will_1.com", "bob_2.com", "scott_1.com", "max_2.com"])

    eq_([d["filename"] for d in info], [
        op.join("kid", "no", "will.csv"),
        op.join("adult", "yes", "bob.csv"),
        op.join("adult", "no", "scott.csv"),
        op.join("kid", "no", "max.csv")
    ])

    expects = [{
        "name": "will",
        "age_group": "kid",
        "debut_season": "1",
        "now_dead": "no"
    }, {
        "name": "bob",
        "age_group": "adult",
        "debut_season": "2",
        "now_dead": "yes"
    }, {
        "name": "scott",
        "age_group": "adult",
        "debut_season": "1",
        "now_dead": "no"
    }, {
        "name": "max",
        "age_group": "kid",
        "debut_season": "2",
        "now_dead": "no"
    }]
    for d, expect in zip(info, expects):
        assert_dict_equal(d["meta_args"], expect)

    eq_([d["subpath"] for d in info], [
        op.join("kid", "no"),
        op.join("adult", "yes"),
        op.join("adult", "no"),
        op.join("kid", "no")
    ])
Exemplo n.º 5
0
def test_assert_dict_equal():
    assert_dict_equal({}, {})
    assert_dict_equal({"a": 3}, {"a": 3})
    assert_raises(AssertionError, assert_dict_equal, {1: 3}, {1: 4})
    assert_raises(AssertionError, assert_dict_equal, {1: 3}, {2: 4})
    assert_raises(AssertionError, assert_dict_equal, {1: 3}, {2: 4, 1: 3})
    assert_raises(AssertionError, assert_dict_equal, {1: 3}, {2: 4, 1: 'a'})
    try:
        import numpy as np
    except:  # pragma: no cover
        pytest.skip("need numpy for this tiny one")
    # one is scalar another one array
    assert_raises(AssertionError, assert_dict_equal, {1: 0}, {1: np.arange(1)})
    assert_raises(AssertionError, assert_dict_equal, {1: 0}, {1: np.arange(3)})
Exemplo n.º 6
0
def test_get_url_parts():
    eq_(au.get_url_parts(""), {})
    assert_dict_equal(au.get_url_parts("http://datalad.org"),
                      {"_url_hostname": "datalad.org"})

    assert_dict_equal(
        au.get_url_parts("http://datalad.org/about.html"), {
            "_url_hostname": "datalad.org",
            "_url0": "about.html",
            "_url_basename": "about.html",
            "_url_basename_root_py": "about",
            "_url_basename_ext_py": ".html",
            "_url_basename_root": "about",
            "_url_basename_ext": ".html"
        })
    assert_dict_equal(au.get_url_parts("http://datalad.org/about.html"),
                      au.get_url_parts("http://datalad.org//about.html"))

    assert_dict_equal(
        au.get_url_parts("http://datalad.org/for/git-users"), {
            "_url_hostname": "datalad.org",
            "_url0": "for",
            "_url1": "git-users",
            "_url_basename": "git-users",
            "_url_basename_root_py": "git-users",
            "_url_basename_ext_py": "",
            "_url_basename_root": "git-users",
            "_url_basename_ext": ""
        })
Exemplo n.º 7
0
def _compare_metadata_helper(origres, compds):
    for ores in origres:
        rpath = relpath(ores['path'], ores['refds'])
        cres = compds.metadata(
            rpath,
            reporton='{}s'.format(ores['type']))
        if ores['type'] == 'file':
            # TODO implement file based lookup
            continue
        assert_result_count(cres, 1)
        cres = cres[0]
        assert_dict_equal(ores['metadata'], cres['metadata'])
        if ores['type'] == 'dataset':
            for i in ('dsid', ):
                eq_(ores[i], cres[i])
Exemplo n.º 8
0
def test_compare_content_info(path=None):
    # TODO remove when `create` is RF to return the new Dataset
    ds = Dataset(path).create()
    assert_repo_status(path)

    # for a clean repo HEAD and worktree query should yield identical results
    # minus a 'bytesize' report that is readily available for HEAD, but would
    # not a stat call per file for the worktree, and is not done ATM
    wt = ds.repo.get_content_info(ref=None)
    assert_dict_equal(
        wt, {
            f: {k: v
                for k, v in p.items() if k != 'bytesize'}
            for f, p in ds.repo.get_content_info(ref='HEAD').items()
        })
Exemplo n.º 9
0
def test_check_dates(path=None):
    skip_if_no_module("dateutil")

    ref_ts = 1218182889  # Fri, 08 Aug 2008 04:08:09 -0400
    refdate = "@{}".format(ref_ts)

    repo = os.path.join(path, "repo")
    with set_date(ref_ts + 5000):
        ar = AnnexRepo(repo)
        ar.add(".")
        ar.commit()

    # The standard renderer outputs json.
    with swallow_outputs() as cmo:
        # Set level to WARNING to avoid the progress bar when
        # DATALAD_TESTS_UI_BACKEND=console.
        with swallow_logs(new_level=logging.WARNING):
            check_dates([repo], reference_date=refdate, return_type="list")
        assert_in("report", json.loads(cmo.out).keys())

    # We find the newer objects.
    newer = call([path], reference_date=refdate)
    eq_(len(newer), 1)
    ok_(newer[0]["report"]["objects"])

    # There are no older objects to find.
    older = call([repo], reference_date=refdate, older=True)
    assert_false(older[0]["report"]["objects"])

    # We can pass the date in RFC 2822 format.
    assert_dict_equal(
        newer[0],
        call([path], reference_date="08 Aug 2008 04:08:09 -0400")[0])

    # paths=None defaults to the current directory.
    with chpwd(path):
        assert_dict_equal(
            newer[0]["report"],
            call(paths=None, reference_date=refdate)[0]["report"])

    # Only commit type is present when annex='none'.
    newer_noannex = call([path], reference_date=refdate, annex="none")
    for entry in newer_noannex[0]["report"]["objects"].values():
        ok_(entry["type"] == "commit")
Exemplo n.º 10
0
def test_rerun_commit_message_check():
    assert_raises(ValueError,
                  get_run_info,
                  None,
                  """\
[DATALAD RUNCMD] no command

=== Do not change lines below ===
{
 "pwd": ".",
 "exit": 0
}
^^^ Do not change lines above ^^^""")

    assert_raises(ValueError,
                  get_run_info,
                  None,
                  """\
[DATALAD RUNCMD] junk json

=== Do not change lines below ===
{
 "pwd": ".,
 "cmd": "echo ok >okfile",
 "exit": 0
}
^^^ Do not change lines above ^^^""")

    subject, info = get_run_info(
        None,
        """\
[DATALAD RUNCMD] fine

=== Do not change lines below ===
{
 "pwd": ".",
 "cmd": "echo ok >okfile",
 "exit": 0
}
^^^ Do not change lines above ^^^""")
    eq_(subject, "fine")
    assert_dict_equal(info,
                      {"pwd": ".", "cmd": "echo ok >okfile", "exit": 0})
Exemplo n.º 11
0
def test_extract_exclude_autometa_regexp():
    info, _ = au.extract(ST_DATA["rows"],
                         url_format="{name}_{debut_season}.com",
                         filename_format="{age_group}//{now_dead}//{name}.csv",
                         exclude_autometa="ea")

    expects = [{
        "name": "will",
        "age_group": "kid"
    }, {
        "name": "bob",
        "age_group": "adult"
    }, {
        "name": "scott",
        "age_group": "adult"
    }, {
        "name": "max",
        "age_group": "kid"
    }]
    for d, expect in zip(info, expects):
        assert_dict_equal(d["meta_args"], expect)
Exemplo n.º 12
0
    def test_addurls(self=None, path=None):
        ds = Dataset(path).create(force=True)

        def get_annex_commit_counts():
            return len(ds.repo.get_revisions("git-annex"))

        n_annex_commits = get_annex_commit_counts()

        # Meanwhile also test that we can specify path relative
        # to the top of the dataset, as we generally treat paths in
        # Python API, and it will be the one saved in commit
        # message record
        json_file = op.relpath(self.json_file, ds.path)

        ds.addurls(json_file,
                   "{url}",
                   "{name}",
                   exclude_autometa="(md5sum|size)",
                   result_renderer='disabled')
        ok_startswith(ds.repo.format_commit('%b', DEFAULT_BRANCH),
                      f"url_file='{json_file}'")

        filenames = ["a", "b", "c"]
        for fname in filenames:
            ok_exists(op.join(ds.path, fname))

        for (fname, meta), subdir in zip(ds.repo.get_metadata(filenames),
                                         ["foo", "bar", "foo"]):
            assert_dict_equal(meta, {"subdir": [subdir], "name": [fname]})

        # Ignore this check if we're faking dates because that disables
        # batch mode.
        # Also ignore if on Windows as it seems as if a git-annex bug
        # leads to separate meta data commits:
        # https://github.com/datalad/datalad/pull/5202#discussion_r535429704
        if not (dl_cfg.get('datalad.fake-dates') or on_windows):
            # We should have two new commits on the git-annex: one for the
            # added urls and one for the added metadata.
            eq_(n_annex_commits + 2, get_annex_commit_counts())

        # Add to already existing links, overwriting.
        with swallow_logs(new_level=logging.DEBUG) as cml:
            ds.addurls(self.json_file,
                       "{url}",
                       "{name}",
                       ifexists="overwrite",
                       result_renderer='disabled')
            for fname in filenames:
                assert_in("Removing {}".format(os.path.join(path, fname)),
                          cml.out)

        # Add to already existing links, skipping.
        assert_in_results(ds.addurls(self.json_file,
                                     "{url}",
                                     "{name}",
                                     ifexists="skip",
                                     result_renderer='disabled'),
                          action="addurls",
                          status="notneeded")

        # Add to already existing links works, as long content is the same.
        ds.addurls(self.json_file,
                   "{url}",
                   "{name}",
                   result_renderer='disabled')

        # But it fails if something has changed.
        ds.unlock("a")
        with open(op.join(ds.path, "a"), "w") as ofh:
            ofh.write("changed")
        ds.save("a")

        assert_raises(IncompleteResultsError,
                      ds.addurls,
                      self.json_file,
                      "{url}",
                      "{name}",
                      result_renderer='disabled')
Exemplo n.º 13
0
def test_get_paths_by_ds(path=None, otherdspath=None):
    otherds = Dataset(otherdspath).create()
    ds = get_deeply_nested_structure(path)

    # for testing below, a shortcut
    subds_modified = Dataset(ds.pathobj / 'subds_modified')

    # check docstrong of get_deeply_nested_structure() to understand
    # what is being tested here
    testcases = (
        # (
        #   (<dataset_arg>, <path arg>),
        #   {<path by ds dict>}
        #   [<error list>]
        # ),

        # find main dataset, pass-through arbitrary arguments, if no paths
        # go in, also no paths come out
        ((path, None), {
            ds.pathobj: None
        }, []),
        # a simple path in the rootds, stays just that, not traversal
        # into files underneaths
        ((ds, ['subdir']), {
            ds.pathobj: [ds.pathobj / 'subdir']
        }, []),
        # same for files, any number,
        # one record per dataset with multiple files
        ((ds, [op.join('subdir', 'git_file.txt'), 'directory_untracked']), {
            ds.pathobj: [
                ds.pathobj / 'directory_untracked',
                ds.pathobj / 'subdir' / 'git_file.txt'
            ]
        }, []),
        # same for a subdataset root -- still reported as part of
        # the superdataset!
        ((ds, ['subds_modified']), {
            ds.pathobj: [subds_modified.pathobj]
        }, []),
        # but not with a trailing slash, then it is the subdataset root
        # itself that becomes the record!!!
        ((ds, ['subds_modified' + op.sep]), {
            subds_modified.pathobj: [subds_modified.pathobj]
        }, []),
        # however, regardless of the path syntax, each behavior can be forced
        ((ds, ['subds_modified'], 'sub'), {
            subds_modified.pathobj: [subds_modified.pathobj]
        }, []),
        ((ds, ['subds_modified' + op.sep], 'super'), {
            ds.pathobj: [subds_modified.pathobj]
        }, []),
        # subdataset content is sorted into a subdataset record
        ((ds, [op.join('subds_modified', 'subdir')]), {
            subds_modified.pathobj: [ds.pathobj / 'subds_modified' / 'subdir']
        }, []),
        # content from different datasets ends up in different records
        ((ds, [
            op.join('subdir', 'git_file.txt'),
            op.join('subds_modified', 'subdir'),
            op.join('subds_modified', 'subds_lvl1_modified')
        ]), {
            ds.pathobj: [ds.pathobj / 'subdir' / 'git_file.txt'],
            subds_modified.pathobj: [
                subds_modified.pathobj / 'subdir',
                subds_modified.pathobj / 'subds_lvl1_modified'
            ]
        }, []),
        # paths not matching existing content are no problem
        ((ds, [
            'doesnotexist',
            op.join('subdir', 'nothere'),
            op.join('subds_modified', 'subdir', 'gone')
        ]), {
            ds.pathobj:
            [ds.pathobj / 'doesnotexist', ds.pathobj / 'subdir' / 'nothere'],
            subds_modified.pathobj:
            [subds_modified.pathobj / 'subdir' / 'gone']
        }, []),
        #
        # now error case
        #
        # a path that does sort under the root dataset
        ((path, [otherds.pathobj / 'totally' / 'different']), {},
         [otherds.pathobj / 'totally' / 'different']),
    )
    # evaluate the test cases
    for inp, pbd_target, error_target in testcases:
        paths_by_ds, errors = get_paths_by_ds(ds, *inp)
        assert_dict_equal(pbd_target, paths_by_ds)
        eq_(error_target, errors)

    # lastly, some more specialized test
    # paths get collapsed into dataset records, even when the path
    # order is not presorted to match individual datasets sequentially
    paths_by_ds, errors = get_paths_by_ds(ds, ds, [
        op.join('subdir', 'git_file.txt'),
        op.join('subds_modified', 'subdir'),
        op.join('subdir', 'annexed_file.txt'),
    ])
    eq_(list(paths_by_ds.keys()), [ds.pathobj, subds_modified.pathobj])
    # result order (top-level first) is stable, even when a path comes first
    # that sorts later. Also mixed types are not a problem
    paths_by_ds, errors = get_paths_by_ds(ds, ds, [
        ds.pathobj / 'subds_modified' / 'subdir',
        op.join('subdir', 'git_file.txt'),
        op.join('subds_modified', 'subdir', 'annexed_file.txt'),
    ])
    eq_(list(paths_by_ds.keys()), [ds.pathobj, subds_modified.pathobj])
Exemplo n.º 14
0
def test_target_ssh_simple(origin=None, src_path=None, target_rootpath=None):
    ca = dict(result_renderer='disabled')
    test_fname = 'test-annex.dat'
    orig = Dataset(origin).create(**ca)
    (orig.pathobj / test_fname).write_text('some')
    orig.save(**ca)

    port = get_ssh_port("datalad-test")
    # prepare src
    source = install(src_path,
                     source=origin,
                     result_xfm='datasets',
                     return_type='item-or-list')

    target_path = opj(target_rootpath, "basic")
    with swallow_logs(new_level=logging.ERROR) as cml:
        create_sibling(dataset=source,
                       name="local_target",
                       sshurl="ssh://*****:*****@with_testsui(responses=["yes"])
        def interactive_assert_create_sshwebserver():
            assert_create_sshwebserver(
                dataset=source,
                name="local_target",
                sshurl="ssh://datalad-test" + target_path,
                publish_by_default=DEFAULT_BRANCH,
                existing='replace',
                ui=have_webui(),
            )

        interactive_assert_create_sshwebserver()

        eq_("ssh://datalad-test" + urlquote(target_path),
            source.repo.get_remote_url("local_target"))
        ok_(source.repo.get_remote_url("local_target", push=True) is None)

        # ensure target tree actually replaced by source
        assert_false(exists(opj(target_path, 'random')))

        if src_is_annex:
            lclcfg = AnnexRepo(src_path).config
            eq_(lclcfg.get('remote.local_target.annex-ignore'), 'false')
            # valid uuid
            eq_(lclcfg.get('remote.local_target.annex-uuid').count('-'), 4)
            # should be added too, even if URL matches prior state
            eq_(lclcfg.get('remote.local_target.push'), DEFAULT_BRANCH)

        # again, by explicitly passing urls. Since we are on datalad-test, the
        # local path should work:
        cpkwargs = dict(
            dataset=source,
            name="local_target",
            sshurl="ssh://datalad-test",
            target_dir=target_path,
            target_url=target_path,
            target_pushurl="ssh://datalad-test" + target_path,
            ui=have_webui(),
        )

        @with_testsui(responses=['yes'])
        def interactive_assert_create_sshwebserver():
            assert_create_sshwebserver(existing='replace', **cpkwargs)

        interactive_assert_create_sshwebserver()

        if src_is_annex:
            target_description = AnnexRepo(target_path,
                                           create=False).get_description()
            eq_(target_description, target_path)

        eq_(target_path, source.repo.get_remote_url("local_target"))
        eq_("ssh://datalad-test" + target_path,
            source.repo.get_remote_url("local_target", push=True))

        if have_webui():
            from datalad_deprecated.tests.test_create_sibling_webui import (
                assert_publish_with_ui, )
            assert_publish_with_ui(target_path)

        # now, push should work:
        push(dataset=source, to="local_target")

        # and we should be able to 'reconfigure'
        def process_digests_mtimes(digests, mtimes):
            # it should have triggered a hook, which would have created log and metadata files
            check_metadata = False
            for part in 'logs', 'metadata':
                metafiles = [
                    k for k in digests
                    if k.startswith(_path_('.git/datalad/%s/' % part))
                ]
                # This is in effect ONLY if we have "compatible" datalad installed on remote
                # end. ATM we don't have easy way to guarantee that AFAIK (yoh),
                # so let's not check/enforce (TODO)
                # assert(len(metafiles) >= 1)  # we might have 2 logs if timestamps do not collide ;)
                # Let's actually do it to some degree
                if part == 'logs':
                    # always should have those:
                    assert (len(metafiles) >= 1)
                    with open(opj(target_path, metafiles[0])) as f:
                        if 'no datalad found' not in f.read():
                            check_metadata = True
                if part == 'metadata':
                    eq_(len(metafiles), bool(check_metadata))
                for f in metafiles:
                    digests.pop(f)
                    mtimes.pop(f)
            # and just pop some leftovers from annex
            # and ignore .git/logs content (gh-5298)
            for f in list(digests):
                if f.startswith('.git/annex/mergedrefs') \
                        or f.startswith('.git/logs/'):
                    digests.pop(f)
                    mtimes.pop(f)

        if not have_webui():
            # the rest of the test assumed that we have uploaded a UI
            return
        orig_digests, orig_mtimes = get_mtimes_and_digests(target_path)
        process_digests_mtimes(orig_digests, orig_mtimes)

        import time
        time.sleep(0.1)  # just so that mtimes change
        assert_create_sshwebserver(existing='reconfigure', **cpkwargs)
        digests, mtimes = get_mtimes_and_digests(target_path)
        process_digests_mtimes(digests, mtimes)

        assert_dict_equal(orig_digests,
                          digests)  # nothing should change in terms of content

        # but some files should have been modified
        modified_files = {
            k
            for k in mtimes if orig_mtimes.get(k, 0) != mtimes.get(k, 0)
        }
        # collect which files were expected to be modified without incurring any changes
        ok_modified_files = {
            _path_('.git/hooks/post-update'),
            'index.html',
        }
        ok_modified_files.add(_path_('.git/config'))
        ok_modified_files.update(
            {f
             for f in digests if f.startswith(_path_('.git/datalad/web'))})
        # it seems that with some recent git behavior has changed a bit
        # and index might get touched
        if _path_('.git/index') in modified_files:
            ok_modified_files.add(_path_('.git/index'))
        ok_(modified_files.issuperset(ok_modified_files))
Exemplo n.º 15
0
def test_discover_ds_trace(path=None, otherdir=None):
    ds = make_demo_hierarchy_datasets(
        path, {k: v
               for k, v in demo_hierarchy.items() if k in ['a', 'd']})
    a = opj(ds.path, 'a')
    aa = opj(a, 'aa')
    d = opj(ds.path, 'd')
    db = opj(d, 'db')
    # we have to check whether we get the correct hierarchy, as the test
    # subject is also involved in this
    assert_true(exists(opj(db, 'file_db')))
    ds.save(recursive=True)
    assert_repo_status(ds.path)
    # now two datasets which are not available locally, but we
    # know about them (e.g. from metadata)
    dba = opj(db, 'sub', 'dba')
    dbaa = opj(dba, 'subsub', 'dbaa')
    for input, eds, goal in (
        ([], None, {}),
        ([ds.path], None, {}),
        ([otherdir], None, {}),
        ([opj(ds.path, 'nothere')], None, {}),
        ([opj(d, 'nothere')], None, {}),
        ([opj(db, 'nothere')], None, {}),
        ([a], None, {
            ds.path: set([a])
        }),
        ([aa, a], None, {
            ds.path: set([a]),
            a: set([aa])
        }),
        ([db], None, {
            ds.path: set([d]),
            d: set([db])
        }),
        ([opj(db, 'file_db')], None, {
            ds.path: set([d]),
            d: set([db])
        }),
            # just a regular non-existing path
        ([dba], None, {}),
            # but if we inject this knowledge it must come back out
            # as the child of the closest existing dataset
        ([dba], [dba], {
            ds.path: set([d]),
            d: set([db]),
            db: set([dba])
        }),
            # regardless of the depth
        ([dbaa], [dbaa], {
            ds.path: set([d]),
            d: set([db]),
            db: set([dbaa])
        }),
        ([dba, dbaa], [dba, dbaa], {
            ds.path: set([d]),
            d: set([db]),
            db: set([dba, dbaa])
        }),
            # we can simply add existing and non-existing datasets to the
            # include list get the desired result
        ([d, dba, dbaa], [d, dba, dbaa], {
            ds.path: set([d]),
            d: set([db]),
            db: set([dba, dbaa])
        }),
    ):
        spec = {}
        discover_dataset_trace_to_targets(ds.path,
                                          input, [],
                                          spec,
                                          includeds=eds)
        assert_dict_equal(spec, goal)