Пример #1
0
def test_runner_log_stdout():
    # TODO: no idea of how to check correct logging via any kind of assertion yet.

    runner = Runner(dry=False)
    cmd_ = ['echo', 'stdout-Message should be logged']
    for cmd in [cmd_, ' '.join(cmd_)]:
        # should be identical runs, either as a string or as a list
        kw = {}
        # on Windows it can't find echo if ran outside the shell
        if on_windows and isinstance(cmd, list):
            kw['shell'] = True
        with swallow_logs(logging.DEBUG) as cm:
            ret = runner.run(cmd, log_stdout=True, **kw)
            eq_(cm.lines[0], "Running: %s" % cmd)
            if not on_windows:
                # we can just count on sanity
                eq_(cm.lines[1], "stdout| stdout-Message should be logged")
            else:
                # echo outputs quoted lines for some reason, so relax check
                ok_("stdout-Message should be logged" in cm.lines[1])
        assert_equal(runner.commands, [], "Run of: %s resulted in non-empty buffer: %s" % (cmd, runner.commands.__str__()))

    cmd = 'echo stdout-Message should not be logged'
    with swallow_outputs() as cmo:
        with swallow_logs(new_level=logging.INFO) as cml:
            ret = runner.run(cmd, log_stdout=False)
            eq_(cmo.out, "stdout-Message should not be logged\n")
            eq_(cml.out, "")
    assert_equal(runner.commands, [], "Run of: %s resulted in non-empty buffer: %s" % (cmd, runner.commands.__str__()))
Пример #2
0
def test_url_base():
    # Basic checks
    assert_raises(ValueError, URL, "http://example.com", hostname='example.com')
    url = URL("http://example.com")
    eq_(url.hostname, 'example.com')
    eq_(url.scheme, 'http')
    eq_(url.port, '')  # not specified -- empty strings
    eq_(url.username, '')  # not specified -- empty strings
    eq_(repr(url), "URL(hostname='example.com', scheme='http')")
    eq_(url, "http://example.com")  # automagic coercion in __eq__

    neq_(URL(), URL(hostname='x'))

    smth = URL('smth')
    eq_(smth.hostname, '')
    ok_(bool(smth))
    nok_(bool(URL()))

    assert_raises(ValueError, url._set_from_fields, unknown='1')

    with swallow_logs(new_level=logging.WARNING) as cml:
        # we don't "care" about params ATM so there is a warning if there are any
        purl = URL("http://example.com/;param")
        eq_(str(purl), 'http://example.com/;param')  # but we do maintain original string
        assert_in('ParseResults contains params', cml.out)
        eq_(purl.as_str(), 'http://example.com/')
Пример #3
0
def test_submodule_deinit(path):
    from datalad.support.annexrepo import AnnexRepo

    top_repo = AnnexRepo(path, create=False)
    eq_({'subm 1', '2'}, {s.name for s in top_repo.get_submodules()})
    # note: here init=True is ok, since we are using it just for testing
    with swallow_logs(new_level=logging.WARN) as cml:
        top_repo.update_submodule('subm 1', init=True)
        assert_in('Do not use update_submodule with init=True', cml.out)
    top_repo.update_submodule('2', init=True)

    # ok_(all([s.module_exists() for s in top_repo.get_submodules()]))
    # TODO: old assertion above if non-bare? (can't use "direct mode" in test_gitrepo)
    # Alternatively: New testrepo (plain git submodules) and have a dedicated
    # test for annexes in addition
    ok_(all([GitRepo.is_valid_repo(op.join(top_repo.path, s.path))
             for s in top_repo.get_submodules()]))

    # modify submodule:
    with open(op.join(top_repo.path, 'subm 1', 'file_ut.dat'), "w") as f:
        f.write("some content")

    assert_raises(CommandError, top_repo.deinit_submodule, 'sub1')

    # using force should work:
    top_repo.deinit_submodule('subm 1', force=True)

    ok_(not top_repo.repo.submodule('subm 1').module_exists())
Пример #4
0
def test_install_crcns(tdir, ds_path):
    with chpwd(tdir):
        with swallow_logs(new_level=logging.INFO) as cml:
            install("all-nonrecursive", source='///')
            # since we didn't log decorations such as log level atm while
            # swallowing so lets check if exit code is returned or not
            # I will test both
            assert_not_in('ERROR', cml.out)
            # below one must not fail alone! ;)
            assert_not_in('with exit code', cml.out)

        # should not hang in infinite recursion
        with chpwd('all-nonrecursive'):
            get("crcns")
        ok_(exists(_path_("all-nonrecursive/crcns/.git/config")))
        # and we could repeat installation and get the same result
        ds1 = install(_path_("all-nonrecursive/crcns"))
        ds2 = Dataset('all-nonrecursive').install('crcns')
        ok_(ds1.is_installed())
        eq_(ds1, ds2)
        eq_(ds1.path, ds2.path)  # to make sure they are a single dataset

    # again, but into existing dataset:
    ds = create(ds_path)
    crcns = ds.install("///crcns")
    ok_(crcns.is_installed())
    eq_(crcns.path, opj(ds_path, "crcns"))
    assert_in(crcns.path, ds.get_subdatasets(absolute=True))
Пример #5
0
def test_globbedpaths(path):
    dotdir = op.curdir + op.sep

    for patterns, expected in [
            (["1.txt", "2.dat"], {"1.txt", "2.dat"}),
            ([dotdir + "1.txt", "2.dat"], {dotdir + "1.txt", "2.dat"}),
            (["*.txt", "*.dat"], {"1.txt", "2.dat", u"bβ.dat", "3.txt"}),
            ([dotdir + "*.txt", "*.dat"],
             {dotdir + "1.txt", "2.dat", u"bβ.dat", dotdir + "3.txt"}),
            (["subdir/*.txt"], {"subdir/1.txt", "subdir/2.txt"}),
            ([dotdir + "subdir/*.txt"],
             {dotdir + p for p in ["subdir/1.txt", "subdir/2.txt"]}),
            (["*.txt"], {"1.txt", "3.txt"})]:
        gp = GlobbedPaths(patterns, pwd=path)
        eq_(set(gp.expand()), expected)
        eq_(set(gp.expand(full=True)),
            {op.join(path, p) for p in expected})

    pardir = op.pardir + op.sep
    subdir_path = op.join(path, "subdir")
    for patterns, expected in [
            (["*.txt"], {"1.txt", "2.txt"}),
            ([dotdir + "*.txt"], {dotdir + p for p in ["1.txt", "2.txt"]}),
            ([pardir + "*.txt"], {pardir + p for p in ["1.txt", "3.txt"]}),
            ([dotdir + pardir + "*.txt"],
             {dotdir + pardir + p for p in ["1.txt", "3.txt"]}),
            (["subdir/"], {"subdir/"})]:
        gp = GlobbedPaths(patterns, pwd=subdir_path)
        eq_(set(gp.expand()), expected)
        eq_(set(gp.expand(full=True)),
            {op.join(subdir_path, p) for p in expected})

    # Full patterns still get returned as relative to pwd.
    gp = GlobbedPaths([op.join(path, "*.dat")], pwd=path)
    eq_(gp.expand(), ["2.dat", u"bβ.dat"])

    # "." gets special treatment.
    gp = GlobbedPaths([".", "*.dat"], pwd=path)
    eq_(set(gp.expand()), {"2.dat", u"bβ.dat", "."})
    eq_(gp.expand(dot=False), ["2.dat", u"bβ.dat"])
    gp = GlobbedPaths(["."], pwd=path, expand=False)
    eq_(gp.expand(), ["."])
    eq_(gp.paths, ["."])

    # We can the glob outputs.
    glob_results = {"z": "z",
                    "a": ["x", "d", "b"]}
    with patch('glob.glob', glob_results.get):
        gp = GlobbedPaths(["z", "a"])
        eq_(gp.expand(), ["z", "b", "d", "x"])

    # glob expansion for paths property is determined by expand argument.
    for expand, expected in [(True, ["2.dat", u"bβ.dat"]),
                             (False, ["*.dat"])]:
        gp = GlobbedPaths(["*.dat"], pwd=path, expand=expand)
        eq_(gp.paths, expected)

    with swallow_logs(new_level=logging.DEBUG) as cml:
        GlobbedPaths(["not here"], pwd=path).expand()
        assert_in("No matching files found for 'not here'", cml.out)
Пример #6
0
def test_addurls_dry_run(path):
    ds = Dataset(path).create(force=True)

    with chpwd(path):
        json_file = "links.json"
        with open(json_file, "w") as jfh:
            json.dump([{"url": "URL/a.dat", "name": "a", "subdir": "foo"},
                       {"url": "URL/b.dat", "name": "b", "subdir": "bar"},
                       {"url": "URL/c.dat", "name": "c", "subdir": "foo"}],
                      jfh)

        ds.save(message="setup")

        with swallow_logs(new_level=logging.INFO) as cml:
            ds.addurls(json_file,
                       "{url}",
                       "{subdir}//{_url_filename_root}",
                       dry_run=True)

            for dir_ in ["foo", "bar"]:
                assert_in("Would create a subdataset at {}".format(dir_),
                          cml.out)
            assert_in(
                "Would download URL/a.dat to {}".format(
                    os.path.join(path, "foo", "BASE")),
                cml.out)

            assert_in("Metadata: {}".format([u"name=a", u"subdir=foo"]),
                      cml.out)
Пример #7
0
    def test_addurls_subdataset(self, path):
        ds = Dataset(path).create(force=True)

        with chpwd(path):
            for save in True, False:
                label = "save" if save else "nosave"
                hexsha_before = ds.repo.get_hexsha()
                ds.addurls(self.json_file, "{url}",
                           "{subdir}-" + label + "//{name}",
                           save=save)
                hexsha_after = ds.repo.get_hexsha()

                for fname in ["foo-{}/a", "bar-{}/b", "foo-{}/c"]:
                    ok_exists(fname.format(label))

                assert_true(save ^ (hexsha_before == hexsha_after))
                assert_true(save ^ ds.repo.dirty)

            # Now save the "--nosave" changes and check that we have
            # all the subdatasets.
            ds.add(".")
            eq_(set(subdatasets(ds, recursive=True,
                                result_xfm="relpaths")),
                {"foo-save", "bar-save", "foo-nosave", "bar-nosave"})

            # We don't try to recreate existing subdatasets.
            with swallow_logs(new_level=logging.DEBUG) as cml:
                ds.addurls(self.json_file, "{url}", "{subdir}-nosave//{name}")
                assert_in("Not creating subdataset at existing path", cml.out)
Пример #8
0
def test_install_skip_list_arguments(src, path, path_outside):
    ds = install(path, source=src)
    ok_(ds.is_installed())

    # install a list with valid and invalid items:
    with swallow_logs(new_level=logging.WARNING) as cml:
        with assert_raises(IncompleteResultsError) as cme:
            ds.install(
                path=['subm 1', 'not_existing', path_outside, 'subm 2'],
                get_data=False)
        result = cme.exception.results
        for skipped in [opj(ds.path, 'not_existing'), path_outside]:
            cml.assert_logged(msg="ignored non-existing paths: {}\n".format(
                              [opj(ds.path, 'not_existing'), path_outside]),
                              regex=False, level='WARNING')
            pass
        ok_(isinstance(result, list))
        eq_(len(result), 2)
        for sub in [Dataset(opj(path, 'subm 1')), Dataset(opj(path, 'subm 2'))]:
            assert_in(sub, result)
            ok_(sub.is_installed())

    # return of get is always a list, even if just one thing was gotten
    # in this case 'subm1' was already obtained above, so this will get this
    # content of the subdataset
    with assert_raises(IncompleteResultsError) as cme:
        ds.install(path=['subm 1', 'not_existing'])
    with assert_raises(IncompleteResultsError) as cme:
        ds.get(path=['subm 1', 'not_existing'])
    result = cme.exception.results
    eq_(len(result), 1)
    eq_(result[0]['file'], 'subm 1/test-annex.dat')
Пример #9
0
def _check_ri(ri, cls, exact_str=True, localpath=None, **fields):
    """just a helper to carry out few checks on urls"""
    with swallow_logs(new_level=logging.DEBUG) as cml:
        ri_ = cls(**fields)
        murl = RI(ri)
        eq_(murl.__class__, cls)  # not just a subclass
        eq_(murl, ri_)
        eq_(str(RI(ri)), ri)
        eq_(eval(repr(ri_)), ri)  # repr leads back to identical ri_
        eq_(ri, ri_)  # just in case ;)  above should fail first if smth is wrong
        if not exact_str:
            assert_in('Parsed version of', cml.out)
    (eq_ if exact_str else neq_)(ri, str(ri_))  # that we can reconstruct it EXACTLY on our examples
    # and that we have access to all those fields
    nok_(set(fields).difference(set(cls._FIELDS)))
    for f, v in fields.items():
        eq_(getattr(ri_, f), v)

    if localpath:
        eq_(ri_.localpath, localpath)
        old_localpath = ri_.localpath  # for a test below
    else:
        # if not given -- must be a remote url, should raise exception
        with assert_raises(ValueError):
            ri_.localpath

    # do changes in the path persist?
    old_str = str(ri_)
    ri_.path = newpath = opj(ri_.path, 'sub')
    eq_(ri_.path, newpath)
    neq_(str(ri_), old_str)
    if localpath:
        eq_(ri_.localpath, opj(old_localpath, 'sub'))
Пример #10
0
def test_ssh_custom_identity_file():
    ifile = "/tmp/dl-test-ssh-id"  # Travis
    if not op.exists(ifile):
        raise SkipTest("Travis-specific '{}' identity file does not exist"
                       .format(ifile))

    from datalad import cfg
    try:
        with patch.dict("os.environ", {"DATALAD_SSH_IDENTITYFILE": ifile}):
            cfg.reload(force=True)
            with swallow_logs(new_level=logging.DEBUG) as cml:
                manager = SSHManager()
                ssh = manager.get_connection('ssh://localhost')
                cmd_out, _ = ssh("echo blah")
                expected_socket = op.join(
                    text_type(manager.socket_dir),
                    get_connection_hash("localhost", identity_file=ifile,
                                        bundled=True))
                ok_(exists(expected_socket))
                manager.close()
                assert_in("-i", cml.out)
                assert_in(ifile, cml.out)
    finally:
        # Prevent overridden DATALAD_SSH_IDENTITYFILE from lingering.
        cfg.reload(force=True)
Пример #11
0
def _test_guess_dot_git(annex, path, url, tdir):
    repo = (AnnexRepo if annex else GitRepo)(path, create=True)
    repo.add('file.txt', commit=True, git=not annex)

    # we need to prepare to be served via http, otherwise it must fail
    with swallow_logs() as cml:
        assert_raises(GitCommandError, install, path=tdir, source=url)
    ok_(not exists(tdir))

    Runner(cwd=path)(['git', 'update-server-info'])

    with swallow_logs() as cml:
        installed = install(tdir, source=url)
        assert_not_in("Failed to get annex.uuid", cml.out)
    eq_(realpath(installed.path), realpath(tdir))
    ok_(exists(tdir))
    ok_clean_git(tdir, annex=annex)
Пример #12
0
def test_install_datasets_root(tdir):
    with chpwd(tdir):
        ds = install("///")
        ok_(ds.is_installed())
        eq_(ds.path, opj(tdir, 'datasets.datalad.org'))

        # do it a second time:
        with swallow_logs(new_level=logging.INFO) as cml:
            result = install("///")
            assert_in("was already installed from", cml.out)
            eq_(result, ds)

        # and a third time into an existing something, that is not a dataset:
        with open(opj(tdir, 'sub', 'a_file.txt'), 'w') as f:
            f.write("something")

        with swallow_logs(new_level=logging.WARNING) as cml:
            result = install("sub", source='///')
            assert_in("already exists and is not an installed dataset", cml.out)
            ok_(result is None)
Пример #13
0
def test_AnnexRepo_instance_from_clone(src, dst):

    ar = AnnexRepo(dst, src)
    assert_is_instance(ar, AnnexRepo, "AnnexRepo was not created.")
    assert_true(os.path.exists(os.path.join(dst, '.git', 'annex')))

    # do it again should raise GitCommandError since git will notice there's already a git-repo at that path
    # and therefore can't clone to `dst`
    with swallow_logs() as cm:
        assert_raises(GitCommandError, AnnexRepo, dst, src)
        assert("already exists" in cm.out)
Пример #14
0
def check_filters(name):
    with swallow_logs(new_level=logging.DEBUG, name=name) as cml:
        lgr1 = logging.getLogger(name + '.goodone')
        lgr2 = logging.getLogger(name + '.anotherone')
        lgr3 = logging.getLogger(name + '.bad')
        lgr1.debug('log1')
        lgr2.info('log2')
        lgr3.info('log3')
        assert_in('log1', cml.out)
        assert_in('log2', cml.out)
        assert 'log3' not in cml.out
Пример #15
0
    def test_addurls(self, path):
        ds = Dataset(path).create(force=True)

        def get_annex_commit_counts():
            return int(
                ds.repo.repo.git.rev_list("--count", "git-annex").strip())

        n_annex_commits = get_annex_commit_counts()

        with chpwd(path):
            ds.addurls(self.json_file, "{url}", "{name}")

            filenames = ["a", "b", "c"]
            for fname in filenames:
                ok_exists(fname)

            for (fname, meta), subdir in zip(ds.repo.get_metadata(filenames),
                                             ["foo", "bar", "foo"]):
                assert_dict_equal(meta,
                                  {"subdir": [subdir], "name": [fname]})

            # Ignore this check if we're faking dates because that disables
            # batch mode.
            if not os.environ.get('DATALAD_FAKE__DATES'):
                # We should have two new commits on the git-annex: one for the
                # added urls and one for the added metadata.
                eq_(n_annex_commits + 2, get_annex_commit_counts())

            # Add to already existing links, overwriting.
            with swallow_logs(new_level=logging.DEBUG) as cml:
                ds.addurls(self.json_file, "{url}", "{name}",
                           ifexists="overwrite")
                for fname in filenames:
                    assert_in("Removing {}".format(os.path.join(path, fname)),
                              cml.out)

            # Add to already existing links, skipping.
            assert_in_results(
                ds.addurls(self.json_file, "{url}", "{name}", ifexists="skip"),
                action="addurls",
                status="notneeded")

            # Add to already existing links works, as long content is the same.
            ds.addurls(self.json_file, "{url}", "{name}")

            # But it fails if something has changed.
            ds.unlock("a")
            with open("a", "w") as ofh:
                ofh.write("changed")
            ds.save("a")

            assert_raises(IncompleteResultsError,
                          ds.addurls,
                          self.json_file, "{url}", "{name}")
Пример #16
0
def test_rerun_old_flag_compatibility(path):
    ds = Dataset(path).create()
    ds.run("echo x$(cat grows) > grows")
    # Deprecated `datalad --rerun` still runs the last commit's
    # command.
    ds.run(rerun=True)
    eq_("xx\n", open(opj(path, "grows")).read())
    # Running with --rerun and a command ignores the command.
    with swallow_logs(new_level=logging.WARN) as cml:
        ds.run(rerun=True, cmd="ignored")
        assert_in("Ignoring provided command in --rerun mode", cml.out)
        eq_("xxx\n", open(opj(path, "grows")).read())
Пример #17
0
def test_basics(path, nodspath):
    ds = Dataset(path).create()
    direct_mode = ds.repo.is_direct_mode()
    last_state = ds.repo.get_hexsha()
    # run inside the dataset
    with chpwd(path), \
            swallow_outputs():
        # provoke command failure
        with assert_raises(CommandError) as cme:
            ds.run('7i3amhmuch9invalid')
            # let's not speculate that the exit code is always 127
            ok_(cme.code > 0)
        eq_(last_state, ds.repo.get_hexsha())
        # now one that must work
        res = ds.run('touch empty', message='TEST')
        ok_clean_git(ds.path)
        assert_result_count(res, 2)
        # TODO 'state' is still untracked!!!
        assert_result_count(res, 1, action='add', path=opj(ds.path, 'empty'), type='file')
        assert_result_count(res, 1, action='save', path=ds.path)
        commit_msg = ds.repo.format_commit("%B")
        ok_(commit_msg.startswith('[DATALAD RUNCMD] TEST'))
        # crude test that we have a record for the PWD
        assert_in('"pwd": "."', commit_msg)
        last_state = ds.repo.get_hexsha()
        # now run a command that will not alter the dataset
        res = ds.run('touch empty', message='NOOP_TEST')
        # When in direct mode, check at the level of save rather than add
        # because the annex files show up as typechanges and adding them won't
        # necessarily have a "notneeded" status.
        assert_result_count(res, 1, action='save' if direct_mode else 'add',
                            status='notneeded')
        eq_(last_state, ds.repo.get_hexsha())
        # We can also run the command via a single-item list because this is
        # what the CLI interface passes in for quoted commands.
        res = ds.run(['touch empty'], message='NOOP_TEST')
        assert_result_count(res, 1, action='save' if direct_mode else 'add',
                            status='notneeded')

    # run outside the dataset, should still work but with limitations
    with chpwd(nodspath), \
            swallow_outputs():
        res = ds.run(['touch', 'empty2'], message='TEST')
        assert_result_count(res, 1, action='add', path=opj(ds.path, 'empty2'), type='file',
                            status='ok')
        assert_result_count(res, 1, action='save', status='ok')

    # running without a command is a noop
    with chpwd(path):
        with swallow_logs(new_level=logging.WARN) as cml:
            ds.run()
            assert_in("No command given", cml.out)
Пример #18
0
def test_install_dataladri(src, topurl, path):
    # make plain git repo
    ds_path = opj(src, 'ds')
    gr = GitRepo(ds_path, create=True)
    gr.add('test.txt')
    gr.commit('demo')
    Runner(cwd=gr.path)(['git', 'update-server-info'])
    # now install it somewhere else
    with patch('datalad.consts.DATASETS_TOPURL', topurl), \
            swallow_logs():
        ds = install(path, source='///ds')
    eq_(ds.path, path)
    ok_clean_git(path, annex=False)
    ok_file_has_content(opj(path, 'test.txt'), 'some')
Пример #19
0
def test_add_insufficient_args(path):
    # no argument:
    assert_raises(InsufficientArgumentsError, add)
    # no `path`, no `source`:
    assert_raises(InsufficientArgumentsError, add, dataset=path)
    with chpwd(path):
        with swallow_logs(new_level=logging.WARNING) as cml:
            assert_raises(InsufficientArgumentsError, add, path="some")
            assert_in('ignoring non-existent', cml.out)

    ds = Dataset(path)
    ds.create()
    assert_raises(InsufficientArgumentsError, ds.add,
                  opj(pardir, 'path', 'outside'))
Пример #20
0
def test_run_explicit(path):
    ds = Dataset(path)

    assert_false(ds.repo.file_has_content("test-annex.dat"))

    create_tree(ds.path, {"dirt_untracked": "untracked",
                          "dirt_modified": "modified"})
    ds.save("dirt_modified", to_git=True)
    with open(op.join(path, "dirt_modified"), "a") as ofh:
        ofh.write(", more")

    # We need explicit=True to run with dirty repo.
    assert_status("impossible",
                  ds.run("cat test-annex.dat test-annex.dat >doubled.dat",
                         inputs=["test-annex.dat"],
                         on_failure="ignore"))

    hexsha_initial = ds.repo.get_hexsha()
    # If we specify test-annex.dat as an input, it will be retrieved before the
    # run.
    ds.run("cat test-annex.dat test-annex.dat >doubled.dat",
           inputs=["test-annex.dat"], explicit=True)
    ok_(ds.repo.file_has_content("test-annex.dat"))
    # We didn't commit anything because outputs weren't specified.
    assert_false(ds.repo.file_has_content("doubled.dat"))
    eq_(hexsha_initial, ds.repo.get_hexsha())

    # If an input doesn't exist, we just show the standard warning.
    with swallow_logs(new_level=logging.WARN) as cml:
        with swallow_outputs():
            ds.run("ls", inputs=["not-there"], explicit=True)
        assert_in("Input does not exist: ", cml.out)

    remove(op.join(path, "doubled.dat"))

    hexsha_initial = ds.repo.get_hexsha()
    ds.run("cat test-annex.dat test-annex.dat >doubled.dat",
           inputs=["test-annex.dat"], outputs=["doubled.dat"],
           explicit=True)
    ok_(ds.repo.file_has_content("doubled.dat"))
    assert_repo_status(ds.path, modified=["dirt_modified"], untracked=['dirt_untracked'])
    neq_(hexsha_initial, ds.repo.get_hexsha())

    # Saving explicit outputs works from subdirectories.
    subdir = op.join(path, "subdir")
    mkdir(subdir)
    with chpwd(subdir):
        run("echo insubdir >foo", explicit=True, outputs=["foo"])
    ok_(ds.repo.file_has_content(op.join("subdir", "foo")))
Пример #21
0
def test_link_file_load(tempfile):
    tempfile2 = tempfile + '_'

    with open(tempfile, 'w') as f:
        f.write("LOAD")

    link_file_load(tempfile, tempfile2) # this should work in general

    ok_(os.path.exists(tempfile2))

    with open(tempfile2, 'r') as f:
        assert_equal(f.read(), "LOAD")

    def inode(fname):
        with open(fname) as fd:
            return os.fstat(fd.fileno()).st_ino

    def stats(fname, times=True):
        """Return stats on the file which should have been preserved"""
        with open(fname) as fd:
            st = os.fstat(fd.fileno())
            stats = (st.st_mode, st.st_uid, st.st_gid, st.st_size)
            if times:
                return stats + (st.st_atime, st.st_mtime)
            else:
                return stats
            # despite copystat mtime is not copied. TODO
            #        st.st_mtime)


    if on_linux or on_osx:
        # above call should result in the hardlink
        assert_equal(inode(tempfile), inode(tempfile2))
        assert_equal(stats(tempfile), stats(tempfile2))
        # and if we mock absence of .link
        class raise_AttributeError:
            def __call__(*args):
                raise AttributeError("TEST")
        with patch('os.link', new_callable=raise_AttributeError):
            with swallow_logs(logging.WARNING) as cm:
                link_file_load(tempfile, tempfile2) # should still work
                ok_("failed (TEST), copying file" in cm.out)

    # should be a copy (either originally for windows, or after mocked call)
    ok_(inode(tempfile) != inode(tempfile2))
    with open(tempfile2, 'r') as f:
        assert_equal(f.read(), "LOAD")
    assert_equal(stats(tempfile, times=False), stats(tempfile2, times=False))
    os.unlink(tempfile2) # TODO: next two with_tempfile
Пример #22
0
def test_GitRepo_instance_from_clone(src, dst):

    gr = GitRepo.clone(src, dst)
    assert_is_instance(gr, GitRepo, "GitRepo was not created.")
    assert_is_instance(gr.repo, gitpy.Repo,
                       "Failed to instantiate GitPython Repo object.")
    ok_(op.exists(op.join(dst, '.git')))

    # do it again should raise GitCommandError since git will notice there's
    # already a git-repo at that path and therefore can't clone to `dst`
    # Note: Since GitRepo is now a WeakSingletonRepo, this is prevented from
    # happening atm. Disabling for now:
#    raise SkipTest("Disabled for RF: WeakSingletonRepo")
    with swallow_logs() as logs:
        assert_raises(GitCommandError, GitRepo.clone, src, dst)
Пример #23
0
def test_ssh_manager_close_no_throw():
    manager = SSHManager()

    class bogus:
        def close(self):
            raise Exception("oh I am so bad")

    manager._connections['bogus'] = bogus()
    assert_raises(Exception, manager.close)
    assert_raises(Exception, manager.close)

    # but should proceed just fine if allow_fail=False
    with swallow_logs(new_level=logging.DEBUG) as cml:
        manager.close(allow_fail=False)
        assert_in('Failed to close a connection: oh I am so bad', cml.out)
Пример #24
0
def test_runner_log_stderr():
    # TODO: no idea of how to check correct logging via any kind of assertion yet.

    runner = Runner(dry=False)
    cmd = 'echo stderr-Message should be logged >&2'
    ret = runner.run(cmd, log_stderr=True, expect_stderr=True)
    assert_equal(runner.commands, [], "Run of: %s resulted in non-empty buffer: %s" % (cmd, runner.commands.__str__()))

    cmd = 'echo stderr-Message should not be logged >&2'
    with swallow_outputs() as cmo:
        with swallow_logs(new_level=logging.INFO) as cml:
            ret = runner.run(cmd, log_stderr=False)
            eq_(cmo.err.rstrip(), "stderr-Message should not be logged")
            eq_(cml.out, "")
    assert_equal(runner.commands, [], "Run of: %s resulted in non-empty buffer: %s" % (cmd, runner.commands.__str__()))
Пример #25
0
def test_is_url():
    ok_(is_url('file://localhost/some'))
    ok_(is_url('http://localhost'))
    ok_(is_url('ssh://me@localhost'))
    # in current understanding it is indeed a url but an 'ssh', implicit=True, not just
    # a useless scheme=weired with a hope to point to a netloc
    with swallow_logs():
        ok_(is_url('weired://'))
    nok_(is_url('relative'))
    nok_(is_url('/absolute'))
    ok_(is_url('like@sshlogin'))  # actually we do allow ssh:implicit urls ATM
    nok_(is_url(''))
    nok_(is_url(' '))
    nok_(is_url(123))  # stuff of other types wouldn't be considered a URL

    # we can pass RI instance directly
    ok_(is_url(RI('file://localhost/some')))
    nok_(is_url(RI('relative')))
Пример #26
0
def _test_debug(msg, args=()):
    if 'empty' in args:
        def node(d):
            # just so Python marks it as a generator
            if False:
                yield d  # pragma: no cover
            else:
                return
    else:
        def node(d):
            yield updated(d, {'debugged': True})

    d1 = debug(node, *args)
    data = {'data': 1}
    with patch('pdb.set_trace') as set_trace:
        with swallow_logs(new_level=logging.INFO) as cml:
            list(d1(data))
            set_trace.assert_called_once_with()
            cml.assert_logged(msg, level='INFO')
Пример #27
0
def test_install_skip_failed_recursive(src, path):

    # install top level:
    ds = install(path, source=src)
    sub1 = Dataset(opj(path, 'subm 1'))
    sub2 = Dataset(opj(path, 'subm 2'))
    # sabotage recursive installation of 'subm 1' by polluting the target:
    with open(opj(path, 'subm 1', 'blocking.txt'), "w") as f:
        f.write("sdfdsf")

    with swallow_logs(new_level=logging.WARNING) as cml:
        result = ds.get(os.curdir, recursive=True)
        # toplevel dataset was in the house already
        assert_not_in(ds, result)
        assert_in(sub2, result)
        assert_not_in(sub1, result)
        cml.assert_logged(
            msg="Target {} already exists and is not an installed dataset. Skipped.".format(sub1.path),
            regex=False, level='WARNING')
Пример #28
0
def test_run_explicit(path):
    ds = Dataset(path)

    assert_false(ds.repo.file_has_content("test-annex.dat"))

    create_tree(ds.path, {"dirt_untracked": "untracked",
                          "dirt_modified": "modified"})
    ds.add("dirt_modified", to_git=True)
    with open(opj(path, "dirt_modified"), "a") as ofh:
        ofh.write(", more")

    # We need explicit=True to run with dirty repo.
    assert_status("impossible",
                  ds.run("cat test-annex.dat test-annex.dat >doubled.dat",
                         inputs=["test-annex.dat"],
                         on_failure="ignore"))

    hexsha_initial = ds.repo.get_hexsha()
    # If we specify test-annex.dat as an input, it will be retrieved before the
    # run.
    ds.run("cat test-annex.dat test-annex.dat >doubled.dat",
           inputs=["test-annex.dat"], explicit=True)
    ok_(ds.repo.file_has_content("test-annex.dat"))
    # We didn't commit anything because outputs weren't specified.
    assert_false(ds.repo.file_has_content("doubled.dat"))
    eq_(hexsha_initial, ds.repo.get_hexsha())

    # If an input doesn't exist, we just show the standard warning.
    with swallow_logs(new_level=logging.WARN) as cml:
        ds.run("ls", inputs=["not-there"], explicit=True)
        assert_in("Input does not exist: ", cml.out)

    remove(opj(path, "doubled.dat"))

    hexsha_initial = ds.repo.get_hexsha()
    ds.run("cat test-annex.dat test-annex.dat >doubled.dat",
           inputs=["test-annex.dat"], outputs=["doubled.dat"],
           explicit=True)
    ok_(ds.repo.file_has_content("doubled.dat"))
    ok_(ds.repo.is_dirty(path="dirt_modified"))
    neq_(hexsha_initial, ds.repo.get_hexsha())
Пример #29
0
    def test_addurls_subdataset(self, path):
        ds = Dataset(path).create(force=True)

        with chpwd(path):
            for save in True, False:
                label = "save" if save else "nosave"
                ds.addurls(self.json_file,
                           "{url}",
                           "{subdir}-" + label + "//{name}",
                           save=save)

                subdirs = ["{}-{}".format(d, label) for d in ["foo", "bar"]]
                subdir_files = dict(zip(subdirs, [["a", "c"], ["b"]]))

                for subds, fnames in subdir_files.items():
                    for fname in fnames:
                        ok_exists(op.join(subds, fname))

                if save:
                    assert_repo_status(path)
                else:
                    # The datasets are create and saved ...
                    assert_repo_status(path, modified=subdirs)
                    # but the downloaded files aren't.
                    for subds, fnames in subdir_files.items():
                        assert_repo_status(subds, added=fnames)

            # Now save the "--nosave" changes and check that we have
            # all the subdatasets.
            ds.save()
            eq_(
                set(
                    subdatasets(dataset=ds,
                                recursive=True,
                                result_xfm="relpaths")),
                {"foo-save", "bar-save", "foo-nosave", "bar-nosave"})

            # We don't try to recreate existing subdatasets.
            with swallow_logs(new_level=logging.DEBUG) as cml:
                ds.addurls(self.json_file, "{url}", "{subdir}-nosave//{name}")
                assert_in("Not creating subdataset at existing path", cml.out)
Пример #30
0
    def test_add_delete_after_and_drop(self):
        # To test that .tar gets removed
        # but that new stuff was added to annex repo.  We know the key since default
        # backend and content remain the same
        key1 = 'SHA256E-s5--16d3ad1974655987dd7801d70659990b89bfe7e931a0a358964e64e901761cc0.dat'

        # previous state of things:
        prev_files = list(find_files('.*', self.annex.path))
        with assert_raises(Exception), \
                swallow_logs():
            self.annex.whereis(key1, key=True, output='full')
        commits_prior = list(self.annex.get_branch_commits_('git-annex'))
        add_archive_content('1.tar',
                            annex=self.annex,
                            strip_leading_dirs=True,
                            delete_after=True)
        commits_after = list(self.annex.get_branch_commits_('git-annex'))
        # There should be a single commit for all additions +1 to initiate datalad-archives gh-1258
        # If faking dates, there should be another +1 because
        # annex.alwayscommit isn't set to false.
        assert_equal(
            len(commits_after),
            # We expect one more when faking dates because
            # annex.alwayscommit isn't set to false.
            len(commits_prior) + 2 + self.annex.fake_dates_enabled)
        assert_equal(prev_files, list(find_files('.*', self.annex.path)))
        w = self.annex.whereis(key1, key=True, output='full')
        assert_equal(len(w), 2)  # in archive, and locally since we didn't drop

        # Let's now do the same but also drop content
        add_archive_content('1.tar',
                            annex=self.annex,
                            strip_leading_dirs=True,
                            delete_after=True,
                            drop_after=True)
        assert_equal(prev_files, list(find_files('.*', self.annex.path)))
        w = self.annex.whereis(key1, key=True, output='full')
        assert_equal(len(w), 1)  # in archive

        # there should be no .datalad temporary files hanging around
        self.assert_no_trash_left_behind()
Пример #31
0
def _check_ri(ri, cls, exact_str=True, localpath=None, **fields):
    """just a helper to carry out few checks on urls"""
    with swallow_logs(new_level=logging.DEBUG) as cml:
        ri_ = cls(**fields)
        murl = RI(ri)
        eq_(murl.__class__, cls)  # not just a subclass
        eq_(murl, ri_)
        if isinstance(ri, str):
            eq_(str(RI(ri)), ri)
        eq_(eval(repr(ri_)), ri)  # repr leads back to identical ri_
        eq_(ri,
            ri_)  # just in case ;)  above should fail first if smth is wrong
        if not exact_str:
            assert_in('Parsed version of', cml.out)
    (eq_ if exact_str else neq_)(
        str(ri),
        str(ri_))  # that we can reconstruct it EXACTLY on our examples
    # and that we have access to all those fields
    nok_(set(fields).difference(set(cls._FIELDS)))
    for f, v in fields.items():
        eq_(getattr(ri_, f), v)

    if localpath:
        eq_(ri_.localpath, localpath)
        old_localpath = ri_.localpath  # for a test below
    else:
        # if not given -- must be a remote url, should raise exception
        with assert_raises(ValueError):
            ri_.localpath

    # This one does not have a path. TODO: either proxy path from its .RI or adjust
    # hierarchy of classes to make it more explicit
    if cls == GitTransportRI:
        return
    # do changes in the path persist?
    old_str = str(ri_)
    ri_.path = newpath = opj(ri_.path, 'sub')
    eq_(ri_.path, newpath)
    neq_(str(ri_), old_str)
    if localpath:
        eq_(ri_.localpath, opj(old_localpath, 'sub'))
Пример #32
0
def test_log_progress_noninteractive_filter():
    name = "dl-test"
    lgr = LoggerHelper(name).get_initialized_logger()
    pbar_id = "lp_test"
    with swallow_logs(new_level=logging.INFO, name=name) as cml:
        log_progress(lgr.info, pbar_id, "Start", label="testing", total=3)
        log_progress(lgr.info, pbar_id, "THERE0", update=1)
        log_progress(lgr.info,
                     pbar_id,
                     "NOT",
                     update=1,
                     noninteractive_level=logging.DEBUG)
        log_progress(lgr.info,
                     pbar_id,
                     "THERE1",
                     update=1,
                     noninteractive_level=logging.INFO)
        log_progress(lgr.info, pbar_id, "Done")
        for present in ["Start", "THERE0", "THERE1", "Done"]:
            assert_in(present, cml.out)
        assert_not_in("NOT", cml.out)
Пример #33
0
def _test_debug(msg, args=()):
    if 'empty' in args:

        def node(d):
            # just so Python marks it as a generator
            if False:
                yield d  # pragma: no cover
            else:
                return
    else:

        def node(d):
            yield updated(d, {'debugged': True})

    d1 = debug(node, *args)
    data = {'data': 1}
    with patch('pdb.set_trace') as set_trace:
        with swallow_logs(new_level=logging.INFO) as cml:
            list(d1(data))
            set_trace.assert_called_once_with()
            cml.assert_logged(msg, level='INFO')
Пример #34
0
def test_rerun_invalid_merge_run_commit(path):
    ds = Dataset(path).create()
    ds.run("echo foo >>foo")
    ds.run("echo invalid >>invalid")
    run_msg = last_commit_msg(ds.repo)
    run_hexsha = ds.repo.get_hexsha()
    ds.repo.call_git(["reset", "--hard", DEFAULT_BRANCH + "~"])
    with open(op.join(ds.path, "non-run"), "w") as nrfh:
        nrfh.write("non-run")
    ds.save()
    # Assign two parents to the invalid run commit.
    commit = ds.repo.call_git_oneline(
        ["commit-tree", run_hexsha + "^{tree}", "-m", run_msg,
         "-p", run_hexsha + "^",
         "-p", ds.repo.get_hexsha()])
    ds.repo.call_git(["reset", "--hard", commit])
    hexsha_orig = ds.repo.get_hexsha()
    with swallow_logs(new_level=logging.WARN) as cml:
        ds.rerun(since="")
        assert_in("has run information but is a merge commit", cml.out)
    eq_(len(ds.repo.get_revisions(hexsha_orig + ".." + DEFAULT_BRANCH)), 1)
def test_ssh_custom_identity_file():
    ifile = "/tmp/dl-test-ssh-id"  # Travis
    if not op.exists(ifile):
        raise SkipTest(
            "Travis-specific '{}' identity file does not exist".format(ifile))

    with patch_config({"datalad.ssh.identityfile": ifile}):
        with swallow_logs(new_level=logging.DEBUG) as cml:
            manager = SSHManager()
            ssh = manager.get_connection('ssh://datalad-test')
            cmd_out, _ = ssh("echo blah")
            if _ssh_manager_is_multiplex:
                expected_socket = op.join(
                    str(manager.socket_dir),
                    get_connection_hash("datalad-test",
                                        identity_file=ifile,
                                        bundled=True))
                ok_(exists(expected_socket))
            manager.close()
            assert_in("-i", cml.out)
            assert_in(ifile, cml.out)
Пример #36
0
def test_ssh_manager_close_no_throw(bogus_socket):
    manager = SSHManager()

    class bogus:
        def close(self):
            raise Exception("oh I am so bad")

        @property
        def ctrl_path(self):
            with open(bogus_socket, "w") as f:
                f.write("whatever")
            return bogus_socket

    manager._connections['bogus'] = bogus()
    assert_raises(Exception, manager.close)
    assert_raises(Exception, manager.close)

    # but should proceed just fine if allow_fail=False
    with swallow_logs(new_level=logging.DEBUG) as cml:
        manager.close(allow_fail=False)
        assert_in('Failed to close a connection: oh I am so bad', cml.out)
Пример #37
0
def test_runner_dry(tempfile):

    dry = DryRunProtocol()
    runner = Runner(protocol=dry)

    # test dry command call
    cmd = 'echo Testing äöü東 dry run > %s' % tempfile
    with swallow_logs(new_level=5) as cml:
        ret = runner.run(cmd)
        cml.assert_logged("{DryRunProtocol} Running: %s" % cmd, regex=False)
    assert_equal(("DRY", "DRY"), ret,
                 "Output of dry run (%s): %s" % (cmd, ret))
    assert_equal(split_cmdline(cmd), dry[0]['command'])
    assert_false(os.path.exists(tempfile))

    # test dry python function call
    output = runner.call(os.path.join, 'foo', 'bar')
    assert_is(None, output, "Dry call of: os.path.join, 'foo', 'bar' "
                            "returned: %s" % output)
    assert_in('join', dry[1]['command'][0])
    assert_equal("args=('foo', 'bar')", dry[1]['command'][1])
Пример #38
0
def _test_target_ssh_inherit(standardgroup, src_path, target_path):
    ds = Dataset(src_path).create()
    target_url = 'localhost:%s' % target_path
    remote = "magical"
    ds.create_sibling(target_url, name=remote, shared='group')  # not doing recursively
    if standardgroup:
        ds.repo.set_preferred_content('wanted', 'standard', remote)
        ds.repo.set_preferred_content('group', standardgroup, remote)
    ds.publish(to=remote)

    # now a month later we created a new subdataset
    subds = ds.create('sub')  # so now we got a hierarchy!
    create_tree(subds.path, {'sub.dat': 'lots of data'})
    subds.add('sub.dat')
    ok_file_under_git(subds.path, 'sub.dat', annexed=True)

    target_sub = Dataset(opj(target_path, 'sub'))
    # since we do not have yet/thus have not used an option to record to publish
    # to that sibling by default (e.g. --set-upstream), if we run just ds.publish
    # -- should fail
    assert_raises(InsufficientArgumentsError, ds.publish)
    ds.publish(to=remote)  # should be ok, non recursive; BUT it (git or us?) would
                  # create an empty sub/ directory
    ok_(not target_sub.is_installed())  # still not there
    with swallow_logs():  # so no warnings etc
        assert_raises(ValueError, ds.publish, recursive=True)  # since remote doesn't exist
    ds.publish(to=remote, recursive=True, missing='inherit')
    # we added the remote and set all the
    eq_(subds.repo.get_preferred_content('wanted', remote), 'standard' if standardgroup else '')
    eq_(subds.repo.get_preferred_content('group', remote), standardgroup or '')

    ok_(target_sub.is_installed())  # it is there now
    eq_(target_sub.repo.config.get('core.sharedrepository'), '1')
    # and we have transferred the content
    if standardgroup and standardgroup == 'backup':
        # only then content should be copied
        ok_file_has_content(opj(target_sub.path, 'sub.dat'), 'lots of data')
    else:
        # otherwise nothing is copied by default
        assert_false(target_sub.repo.file_has_content('sub.dat'))
Пример #39
0
def test_cfg_originorigin(path):
    path = Path(path)
    origin = Dataset(path / 'origin').create()
    (origin.pathobj / 'file1.txt').write_text('content')
    origin.save()
    clone_lev1 = clone(origin, path / 'clone_lev1')
    clone_lev2 = clone(clone_lev1, path / 'clone_lev2')
    # the goal is to be able to get file content from origin without
    # the need to configure it manually
    assert_result_count(
        clone_lev2.get('file1.txt', on_failure='ignore'),
        1,
        action='get',
        status='ok',
        path=str(clone_lev2.pathobj / 'file1.txt'),
    )
    eq_((clone_lev2.pathobj / 'file1.txt').read_text(), 'content')
    eq_(
        Path(
            clone_lev2.siblings('query',
                                name='origin-2',
                                return_type='item-or-list')['url']),
        origin.pathobj)

    # Clone another level, this time with a relative path. Drop content from
    # lev2 so that origin is the only place that the file is available from.
    clone_lev2.drop("file1.txt")
    with chpwd(path), swallow_logs(new_level=logging.DEBUG) as cml:
        clone_lev3 = clone('clone_lev2', 'clone_lev3')
        # we called git-annex-init; see gh-4367:
        cml.assert_logged(
            msg=r"[^[]*Async run \[('git', 'annex'|'git-annex'), "
            r"'init'",
            match=False,
            level='DEBUG')
    assert_result_count(clone_lev3.get('file1.txt', on_failure='ignore'),
                        1,
                        action='get',
                        status='ok',
                        path=str(clone_lev3.pathobj / 'file1.txt'))
Пример #40
0
def test_install_skip_failed_recursive(src, path):

    # install top level:
    ds = install(path, source=src)
    sub1 = Dataset(opj(path, 'subm 1'))
    sub2 = Dataset(opj(path, 'subm 2'))
    # sabotage recursive installation of 'subm 1' by polluting the target:
    with open(opj(path, 'subm 1', 'blocking.txt'), "w") as f:
        f.write("sdfdsf")

    with swallow_logs(new_level=logging.WARNING) as cml:
        result = ds.get(os.curdir, recursive=True)
        # toplevel dataset was in the house already
        assert_not_in(ds, result)
        assert_in(sub2, result)
        assert_not_in(sub1, result)
        cml.assert_logged(
            msg=
            "Target {} already exists and is not an installed dataset. Skipped."
            .format(sub1.path),
            regex=False,
            level='WARNING')
Пример #41
0
    def test_addurls_version(self, path):
        ds = Dataset(path).rev_create(force=True)

        def version_fn(url):
            if url.endswith("b.dat"):
                raise ValueError("Scheme error")
            return url + ".v1"

        with patch("datalad.plugin.addurls.get_versioned_url", version_fn):
            with swallow_logs(new_level=logging.WARNING) as cml:
                ds.addurls(self.json_file, "{url}", "{name}",
                           version_urls=True)
                assert_in("b.dat", str(cml.out))

        names = ["a", "c"]
        for fname in names:
            ok_exists(os.path.join(path, fname))

        whereis = ds.repo.whereis(names, output="full")
        for fname, info in whereis.items():
            eq_(info[ds.repo.WEB_UUID]['urls'],
                ["{}udir/{}.dat.v1".format(self.url, fname)])
Пример #42
0
    def test_addurls_version(self, path):
        ds = Dataset(path).create(force=True)

        def version_fn(url):
            if url.endswith("b.dat"):
                raise ValueError("Scheme error")
            return url + ".v1"

        with patch("datalad.plugin.addurls.get_versioned_url", version_fn):
            with swallow_logs(new_level=logging.WARNING) as cml:
                ds.addurls(self.json_file, "{url}", "{name}",
                           version_urls=True)
                assert_in("b.dat", str(cml.out))

        names = ["a", "c"]
        for fname in names:
            ok_exists(os.path.join(path, fname))

        whereis = ds.repo.whereis(names, output="full")
        for fname, info in whereis.items():
            eq_(info[ds.repo.WEB_UUID]['urls'],
                ["{}udir/{}.dat.v1".format(self.url, fname)])
Пример #43
0
def test_ssh_manager_close_no_throw(bogus_socket):
    manager = SSHManager()

    class bogus:
        def close(self):
            raise Exception("oh I am so bad")

        @property
        def ctrl_path(self):
            with open(bogus_socket, "w") as f:
                f.write("whatever")
            return bogus_socket

    # since we are digging into protected area - should also set _prev_connections
    manager._prev_connections = {}
    manager._connections['bogus'] = bogus()
    assert_raises(Exception, manager.close)
    assert_raises(Exception, manager.close)

    # but should proceed just fine if allow_fail=False
    with swallow_logs(new_level=logging.DEBUG) as cml:
        manager.close(allow_fail=False)
        assert_in('Failed to close a connection: oh I am so bad', cml.out)
Пример #44
0
def test_install_skip_failed_recursive(src, path):

    # install top level:
    ds = install(path, source=src)
    sub1 = Dataset(opj(path, 'subm 1'))
    sub2 = Dataset(opj(path, '2'))
    # sabotage recursive installation of 'subm 1' by polluting the target:
    with open(opj(path, 'subm 1', 'blocking.txt'), "w") as f:
        f.write("sdfdsf")

    with swallow_logs(new_level=logging.WARNING) as cml:
        result = ds.get(
            os.curdir, recursive=True,
            on_failure='ignore', result_xfm=None)
        # toplevel dataset was in the house already
        assert_result_count(
            result, 0, path=ds.path, type='dataset')
        # subm 1 should fail to install. [1] since comes after '2' submodule
        assert_in_results(
            result, status='error', path=sub1.path, type='dataset',
            message='target path already exists and not empty, refuse to '
                    'clone into target path')
        assert_in_results(result, status='ok', path=sub2.path)
Пример #45
0
def test_addurls_dry_run(path):
    ds = Dataset(path).create(force=True)

    with chpwd(path):
        json_file = "links.json"
        with open(json_file, "w") as jfh:
            json.dump([{
                "url": "URL/a.dat",
                "name": "a",
                "subdir": "foo"
            }, {
                "url": "URL/b.dat",
                "name": "b",
                "subdir": "bar"
            }, {
                "url": "URL/c.dat",
                "name": "c",
                "subdir": "foo"
            }], jfh)

        ds.add(".", message="setup")

        with swallow_logs(new_level=logging.INFO) as cml:
            ds.addurls(json_file,
                       "{url}",
                       "{subdir}//{_url_filename_root}",
                       dry_run=True)

            for dir_ in ["foo", "bar"]:
                assert_in("Would create a subdataset at {}".format(dir_),
                          cml.out)
            assert_in(
                "Would download URL/a.dat to {}".format(
                    os.path.join(path, "foo", "BASE")), cml.out)

            assert_in("Metadata: {}".format([u"name=a", u"subdir=foo"]),
                      cml.out)
Пример #46
0
    def test_addurls_subdataset(self, path):
        ds = Dataset(path).create(force=True)

        with chpwd(path):
            for save in True, False:
                label = "save" if save else "nosave"
                ds.addurls(self.json_file, "{url}",
                           "{subdir}-" + label + "//{name}",
                           save=save)

                subdirs = ["{}-{}".format(d, label) for d in ["foo", "bar"]]
                subdir_files = dict(zip(subdirs, [["a", "c"], ["b"]]))

                for subds, fnames in subdir_files.items():
                    for fname in fnames:
                        ok_exists(op.join(subds, fname))

                if save:
                    assert_repo_status(path)
                else:
                    # The datasets are create and saved ...
                    assert_repo_status(path, modified=subdirs)
                    # but the downloaded files aren't.
                    for subds, fnames in subdir_files.items():
                        assert_repo_status(subds, added=fnames)

            # Now save the "--nosave" changes and check that we have
            # all the subdatasets.
            ds.save()
            eq_(set(subdatasets(dataset=ds, recursive=True,
                                result_xfm="relpaths")),
                {"foo-save", "bar-save", "foo-nosave", "bar-nosave"})

            # We don't try to recreate existing subdatasets.
            with swallow_logs(new_level=logging.DEBUG) as cml:
                ds.addurls(self.json_file, "{url}", "{subdir}-nosave//{name}")
                assert_in("Not creating subdataset at existing path", cml.out)
Пример #47
0
def test_globbedpaths(path):
    for patterns, expected in [(["1.txt", "2.dat"], {"1.txt", "2.dat"}),
                               (["*.txt",
                                 "*.dat"], {"1.txt", "2.dat", "3.txt"}),
                               (["*.txt"], {"1.txt", "3.txt"})]:
        gp = GlobbedPaths(patterns, pwd=path)
        eq_(set(gp.expand()), expected)
        eq_(set(gp.expand(full=True)), {opj(path, p) for p in expected})

    # Full patterns still get returned as relative to pwd.
    gp = GlobbedPaths([opj(path, "*.dat")], pwd=path)
    eq_(gp.expand(), ["2.dat"])

    # "." gets special treatment.
    gp = GlobbedPaths([".", "*.dat"], pwd=path)
    eq_(set(gp.expand()), {"2.dat", "."})
    eq_(gp.expand(dot=False), ["2.dat"])
    gp = GlobbedPaths(["."], pwd=path, expand=False)
    eq_(gp.expand(), ["."])
    eq_(gp.paths, ["."])

    # We can the glob outputs.
    glob_results = {"z": "z", "a": ["x", "d", "b"]}
    with patch('datalad.interface.run.glob', glob_results.get):
        gp = GlobbedPaths(["z", "a"])
        eq_(gp.expand(), ["z", "b", "d", "x"])

    # glob expansion for paths property is determined by expand argument.
    for expand, expected in [(True, ["2.dat"]), (False, ["*.dat"])]:
        gp = GlobbedPaths(["*.dat"], pwd=path, expand=expand)
        eq_(gp.paths, expected)

    with swallow_logs(new_level=logging.WARN) as cml:
        GlobbedPaths(["not here"], pwd=path).expand()
        assert_in("No matching files found for 'not here'", cml.out)
        GlobbedPaths(["also not"], pwd=path, warn=False).expand()
        assert_not_in("No matching files found for 'also not'", cml.out)
Пример #48
0
def test_install_skip_failed_recursive(src, path):

    # install top level:
    ds = install(path, source=src)
    sub1 = Dataset(opj(path, 'subm 1'))
    sub2 = Dataset(opj(path, 'subm 2'))
    # sabotage recursive installation of 'subm 1' by polluting the target:
    with open(opj(path, 'subm 1', 'blocking.txt'), "w") as f:
        f.write("sdfdsf")

    with swallow_logs(new_level=logging.WARNING) as cml:
        result = ds.get(os.curdir,
                        recursive=True,
                        on_failure='ignore',
                        result_xfm=None)
        # toplevel dataset was in the house already
        assert_result_count(result, 0, path=ds.path, type='dataset')
        assert_status('error', [result[0]])
        assert_in_results(result, status='ok', path=sub2.path)

        cml.assert_logged(
            msg="target path already exists and not empty".format(sub1.path),
            regex=False,
            level='ERROR')
Пример #49
0
def test_ssh_custom_identity_file():
    ifile = "/tmp/dl-test-ssh-id"  # Travis
    if not op.exists(ifile):
        raise SkipTest("Travis-specific '{}' identity file does not exist"
                       .format(ifile))

    from datalad import cfg
    try:
        with patch.dict("os.environ", {"DATALAD_SSH_IDENTITYFILE": ifile}):
            cfg.reload(force=True)
            with swallow_logs(new_level=logging.DEBUG) as cml:
                manager = SSHManager()
                ssh = manager.get_connection('ssh://localhost')
                cmd_out, _ = ssh("echo blah")
                expected_socket = op.join(
                    manager.socket_dir,
                    get_connection_hash("localhost", identity_file=ifile))
                ok_(exists(expected_socket))
                manager.close()
                assert_in("-i", cml.out)
                assert_in(ifile, cml.out)
    finally:
        # Prevent overridden DATALAD_SSH_IDENTITYFILE from lingering.
        cfg.reload(force=True)
Пример #50
0
def test_install_simple_local(src, path):
    origin = Dataset(path)

    # now install it somewhere else
    ds = install(path, source=src)
    eq_(ds.path, path)
    ok_(ds.is_installed())
    if not isinstance(origin.repo, AnnexRepo):
        # this means it is a GitRepo
        ok_(isinstance(origin.repo, GitRepo))
        # stays plain Git repo
        ok_(isinstance(ds.repo, GitRepo))
        ok_(not isinstance(ds.repo, AnnexRepo))
        ok_(GitRepo.is_valid_repo(ds.path))
        eq_(set(ds.repo.get_indexed_files()), {'test.dat', 'INFO.txt'})
        ok_clean_git(path, annex=False)
    else:
        # must be an annex
        ok_(isinstance(ds.repo, AnnexRepo))
        ok_(AnnexRepo.is_valid_repo(ds.path, allow_noninitialized=False))
        eq_(set(ds.repo.get_indexed_files()),
            {'test.dat', 'INFO.txt', 'test-annex.dat'})
        ok_clean_git(path, annex=True)
        # no content was installed:
        ok_(not ds.repo.file_has_content('test-annex.dat'))
        uuid_before = ds.repo.uuid

    # installing it again, shouldn't matter:
    with swallow_logs(new_level=logging.INFO) as cml:
        ds = install(path, source=src)
        cml.assert_logged(msg="{0} was already installed from".format(ds),
                          regex=False,
                          level="INFO")
        ok_(ds.is_installed())
        if isinstance(origin.repo, AnnexRepo):
            eq_(uuid_before, ds.repo.uuid)
Пример #51
0
def test_target_ssh_simple(origin, src_path, target_rootpath):

    # prepare src
    source = install(src_path,
                     source=origin,
                     result_xfm='datasets',
                     return_type='item-or-list')

    target_path = opj(target_rootpath, "basic")
    with swallow_logs(new_level=logging.ERROR) as cml:
        create_sibling(dataset=source,
                       name="local_target",
                       sshurl="ssh://localhost:22",
                       target_dir=target_path,
                       ui=True)
        assert_not_in('enableremote local_target failed', cml.out)

    GitRepo(target_path, create=False)  # raises if not a git repo
    assert_in("local_target", source.repo.get_remotes())
    # Both must be annex or git repositories
    src_is_annex = AnnexRepo.is_valid_repo(src_path)
    eq_(src_is_annex, AnnexRepo.is_valid_repo(target_path))
    # And target one should be known to have a known UUID within the source if annex
    if src_is_annex:
        annex = AnnexRepo(src_path)
        local_target_cfg = annex.repo.remotes["local_target"].config_reader.get
        # basic config in place
        eq_(local_target_cfg('annex-ignore'), 'false')
        ok_(local_target_cfg('annex-uuid'))

    # do it again without force, but use a different name to avoid initial checks
    # for existing remotes:
    with assert_raises(RuntimeError) as cm:
        assert_create_sshwebserver(dataset=source,
                                   name="local_target_alt",
                                   sshurl="ssh://localhost",
                                   target_dir=target_path)
    ok_(
        text_type(cm.exception).startswith(
            "Target path %s already exists. And it fails to rmdir" %
            target_path))
    if src_is_annex:
        target_description = AnnexRepo(target_path,
                                       create=False).get_description()
        assert_not_equal(target_description, None)
        assert_not_equal(target_description, target_path)
        # on yoh's laptop TMPDIR is under HOME, so things start to become
        # tricky since then target_path is shortened and we would need to know
        # remote $HOME.  To not over-complicate and still test, test only for
        # the basename of the target_path
        ok_endswith(target_description, basename(target_path))
    # now, with force and correct url, which is also used to determine
    # target_dir
    # Note: on windows absolute path is not url conform. But this way it's easy
    # to test, that ssh path is correctly used.
    if not on_windows:
        # add random file under target_path, to explicitly test existing=replace
        open(opj(target_path, 'random'), 'w').write('123')

        assert_create_sshwebserver(dataset=source,
                                   name="local_target",
                                   sshurl="ssh://localhost" + target_path,
                                   publish_by_default='master',
                                   existing='replace')
        eq_("ssh://localhost" + urlquote(target_path),
            source.repo.get_remote_url("local_target"))
        ok_(source.repo.get_remote_url("local_target", push=True) is None)

        # ensure target tree actually replaced by source
        assert_false(exists(opj(target_path, 'random')))

        if src_is_annex:
            annex = AnnexRepo(src_path)
            local_target_cfg = annex.repo.remotes[
                "local_target"].config_reader.get
            eq_(local_target_cfg('annex-ignore'), 'false')
            eq_(local_target_cfg('annex-uuid').count('-'), 4)  # valid uuid
            # should be added too, even if URL matches prior state
            eq_(local_target_cfg('push'), 'master')

        # again, by explicitly passing urls. Since we are on localhost, the
        # local path should work:
        cpkwargs = dict(
            dataset=source,
            name="local_target",
            sshurl="ssh://localhost",
            target_dir=target_path,
            target_url=target_path,
            target_pushurl="ssh://localhost" + target_path,
            ui=True,
        )
        assert_create_sshwebserver(existing='replace', **cpkwargs)
        if src_is_annex:
            target_description = AnnexRepo(target_path,
                                           create=False).get_description()
            eq_(target_description, target_path)

        eq_(target_path, source.repo.get_remote_url("local_target"))
        eq_("ssh://localhost" + target_path,
            source.repo.get_remote_url("local_target", push=True))

        _test_correct_publish(target_path)

        # now, push should work:
        publish(dataset=source, to="local_target")

        # and we should be able to 'reconfigure'
        def process_digests_mtimes(digests, mtimes):
            # it should have triggered a hook, which would have created log and metadata files
            check_metadata = False
            for part in 'logs', 'metadata':
                metafiles = [
                    k for k in digests
                    if k.startswith(_path_('.git/datalad/%s/' % part))
                ]
                # This is in effect ONLY if we have "compatible" datalad installed on remote
                # end. ATM we don't have easy way to guarantee that AFAIK (yoh),
                # so let's not check/enforce (TODO)
                # assert(len(metafiles) >= 1)  # we might have 2 logs if timestamps do not collide ;)
                # Let's actually do it to some degree
                if part == 'logs':
                    # always should have those:
                    assert (len(metafiles) >= 1)
                    with open(opj(target_path, metafiles[0])) as f:
                        if 'no datalad found' not in f.read():
                            check_metadata = True
                if part == 'metadata':
                    eq_(len(metafiles), bool(check_metadata))
                for f in metafiles:
                    digests.pop(f)
                    mtimes.pop(f)
            # and just pop some leftovers from annex
            for f in list(digests):
                if f.startswith('.git/annex/mergedrefs'):
                    digests.pop(f)
                    mtimes.pop(f)

        orig_digests, orig_mtimes = get_mtimes_and_digests(target_path)
        process_digests_mtimes(orig_digests, orig_mtimes)

        import time
        time.sleep(0.1)  # just so that mtimes change
        assert_create_sshwebserver(existing='reconfigure', **cpkwargs)
        digests, mtimes = get_mtimes_and_digests(target_path)
        process_digests_mtimes(digests, mtimes)

        assert_dict_equal(orig_digests,
                          digests)  # nothing should change in terms of content

        # but some files should have been modified
        modified_files = {
            k
            for k in mtimes if orig_mtimes.get(k, 0) != mtimes.get(k, 0)
        }
        # collect which files were expected to be modified without incurring any changes
        ok_modified_files = {
            _path_('.git/hooks/post-update'),
            'index.html',
            # files which hook would manage to generate
            _path_('.git/info/refs'),
            '.git/objects/info/packs'
        }
        # on elderly git we don't change receive setting
        ok_modified_files.add(_path_('.git/config'))
        ok_modified_files.update(
            {f
             for f in digests if f.startswith(_path_('.git/datalad/web'))})
        # it seems that with some recent git behavior has changed a bit
        # and index might get touched
        if _path_('.git/index') in modified_files:
            ok_modified_files.add(_path_('.git/index'))
        assert_set_equal(modified_files, ok_modified_files)
Пример #52
0
def test_run_inputs_outputs(src, path):
    for subds in [("s0", "s1_0", "s2"), ("s0", "s1_1", "s2"), ("s0", "s1_0"),
                  ("s0", "s1_1"), ("s0", "ss"), ("s0", )]:
        Dataset(op.join(*((src, ) + subds))).create(force=True)
    src_ds = Dataset(src).create(force=True)
    src_ds.add(".", recursive=True)

    ds = install(path,
                 source=src,
                 result_xfm='datasets',
                 return_type='item-or-list')
    assert_false(ds.repo.file_has_content("input.dat"))
    assert_false(ds.repo.file_has_content("extra-input.dat"))

    # The specified inputs and extra inputs will be retrieved before the run.
    # (Use run_command() to access the extra_inputs argument.)
    list(
        run_command("cat {inputs} {inputs} >doubled.dat",
                    dataset=ds,
                    inputs=["input.dat"],
                    extra_inputs=["extra-input.dat"]))

    ok_clean_git(ds.path)
    ok_(ds.repo.file_has_content("input.dat"))
    ok_(ds.repo.file_has_content("extra-input.dat"))
    ok_(ds.repo.file_has_content("doubled.dat"))
    with open(opj(path, "doubled.dat")) as fh:
        content = fh.read()
        assert_in("input", content)
        assert_not_in("extra-input", content)

    # Rerunning the commit will also get the input file.
    ds.repo.drop(["input.dat", "extra-input.dat"], options=["--force"])
    assert_false(ds.repo.file_has_content("input.dat"))
    assert_false(ds.repo.file_has_content("extra-input.dat"))
    ds.rerun()
    ok_(ds.repo.file_has_content("input.dat"))
    ok_(ds.repo.file_has_content("extra-input.dat"))

    with swallow_logs(new_level=logging.WARN) as cml:
        ds.run("touch dummy", inputs=["not-there"])
        assert_in("Input does not exist: ", cml.out)

    # Test different combinations of globs and explicit files.
    inputs = ["a.dat", "b.dat", "c.txt", "d.txt"]
    create_tree(ds.path, {i: i for i in inputs})

    ds.add(".")
    ds.repo.copy_to(inputs, remote="origin")
    ds.repo.drop(inputs, options=["--force"])

    test_cases = [(["*.dat"], ["a.dat", "b.dat"]),
                  (["*.dat", "c.txt"], ["a.dat", "b.dat", "c.txt"]),
                  (["*"], inputs)]

    for idx, (inputs_arg, expected_present) in enumerate(test_cases):
        assert_false(any(ds.repo.file_has_content(i) for i in inputs))

        ds.run("touch dummy{}".format(idx), inputs=inputs_arg)
        ok_(all(ds.repo.file_has_content(f) for f in expected_present))
        # Globs are stored unexpanded by default.
        assert_in(inputs_arg[0], ds.repo.format_commit("%B"))
        ds.repo.drop(inputs, options=["--force"])

    # --input can be passed a subdirectory.
    create_tree(ds.path, {"subdir": {"a": "subdir a", "b": "subdir b"}})
    ds.add("subdir")
    ds.repo.copy_to(["subdir/a", "subdir/b"], remote="origin")
    ds.repo.drop("subdir", options=["--force"])
    ds.run("touch subdir-dummy", inputs=[opj(ds.path, "subdir")])
    ok_(all(ds.repo.file_has_content(opj("subdir", f)) for f in ["a", "b"]))

    # Inputs are specified relative to a dataset's subdirectory.
    ds.repo.drop(opj("subdir", "a"), options=["--force"])
    with chpwd(opj(path, "subdir")):
        run("touch subdir-dummy1", inputs=["a"])
    ok_(ds.repo.file_has_content(opj("subdir", "a")))

    # --input=. runs "datalad get ."
    ds.run("touch dot-dummy", inputs=["."])
    eq_(ds.repo.get_annexed_files(),
        ds.repo.get_annexed_files(with_content_only=True))
    # On rerun, we get all files, even those that weren't in the tree at the
    # time of the run.
    create_tree(ds.path, {"after-dot-run": "after-dot-run content"})
    ds.add(".")
    ds.repo.copy_to(["after-dot-run"], remote="origin")
    ds.repo.drop(["after-dot-run"], options=["--force"])
    ds.rerun("HEAD^")
    ds.repo.file_has_content("after-dot-run")

    # --output will unlock files that are present.
    ds.repo.get("a.dat")
    ds.run("echo ' appended' >>a.dat", outputs=["a.dat"])
    with open(opj(path, "a.dat")) as fh:
        eq_(fh.read(), "a.dat appended\n")

    # --output will remove files that are not present.
    ds.repo.drop(["a.dat", "d.txt"], options=["--force"])
    ds.run("echo ' appended' >>a.dat", outputs=["a.dat"])
    with open(opj(path, "a.dat")) as fh:
        eq_(fh.read(), " appended\n")

    # --input can be combined with --output.
    ds.repo.repo.git.reset("--hard", "HEAD~2")
    ds.run("echo ' appended' >>a.dat", inputs=["a.dat"], outputs=["a.dat"])
    with open(opj(path, "a.dat")) as fh:
        eq_(fh.read(), "a.dat appended\n")

    with swallow_logs(new_level=logging.DEBUG) as cml:
        with swallow_outputs():
            ds.run("echo blah", outputs=["not-there"])
        assert_in("Filtered out non-existing path: ", cml.out)

    ds.create('sub')
    ds.run("echo sub_orig >sub/subfile")
    ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"])
    ds.drop("sub/subfile", check=False)
    ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"])

    # --input/--output globs can be stored in expanded form.
    ds.run("touch expand-dummy",
           inputs=["a.*"],
           outputs=["b.*"],
           expand="both")
    assert_in("a.dat", ds.repo.format_commit("%B"))
    assert_in("b.dat", ds.repo.format_commit("%B"))

    res = ds.rerun(report=True, return_type='item-or-list')
    eq_(res["run_info"]['inputs'], ["a.dat"])
    eq_(res["run_info"]['outputs'], ["b.dat"])

    # We install subdatasets to fully resolve globs.
    ds.uninstall("s0")
    assert_false(Dataset(op.join(path, "s0")).is_installed())
    ds.run("echo {inputs} >globbed-subds", inputs=["s0/s1_*/s2/*.dat"])
    ok_file_has_content(op.join(ds.path, "globbed-subds"),
                        "s0/s1_0/s2/a.dat s0/s1_1/s2/c.dat",
                        strip=True)

    ds_ss = Dataset(op.join(path, "s0", "ss"))
    assert_false(ds_ss.is_installed())
    ds.run("echo blah >{outputs}", outputs=["s0/ss/out"])
    ok_(ds_ss.is_installed())
    ok_file_has_content(op.join(ds.path, "s0", "ss", "out"),
                        "blah",
                        strip=True)
Пример #53
0
def test_target_ssh_simple(origin, src_path, target_rootpath):

    # prepare src
    source = install(src_path, source=origin)

    target_path = opj(target_rootpath, "basic")
    # it will try to fetch it so would fail as well since sshurl is wrong
    with swallow_logs(new_level=logging.ERROR) as cml, \
        assert_raises(GitCommandError):
        create_sibling(dataset=source,
                       target="local_target",
                       sshurl="ssh://localhost",
                       target_dir=target_path,
                       ui=True)
    # is not actually happening on one of the two basic cases -- TODO figure it out
    # assert_in('enableremote local_target failed', cml.out)

    GitRepo(target_path, create=False)  # raises if not a git repo
    assert_in("local_target", source.repo.get_remotes())
    eq_("ssh://localhost", source.repo.get_remote_url("local_target"))
    # should NOT be able to push now, since url isn't correct:
    # TODO:  assumption is wrong if ~ does have .git! fix up!
    assert_raises(GitCommandError, publish, dataset=source, to="local_target")

    # Both must be annex or git repositories
    src_is_annex = AnnexRepo.is_valid_repo(src_path)
    eq_(src_is_annex, AnnexRepo.is_valid_repo(target_path))
    # And target one should be known to have a known UUID within the source if annex
    if src_is_annex:
        annex = AnnexRepo(src_path)
        local_target_cfg = annex.repo.remotes["local_target"].config_reader.get
        # for some reason this was "correct"
        # eq_(local_target_cfg('annex-ignore'), 'false')
        # but after fixing creating siblings in
        # 21f6dd012b2c7b9c0b8b348dcfb3b0ace7e8b2ec it started to fail
        # I think it is legit since we are trying to fetch now before calling
        # annex.enable_remote so it doesn't set it up, and fails before
        assert_raises(Exception, local_target_cfg, 'annex-ignore')
        # hm, but ATM wouldn't get a uuid since url is wrong
        assert_raises(Exception, local_target_cfg, 'annex-uuid')

    # do it again without force:
    with assert_raises(RuntimeError) as cm:
        assert_create_sshwebserver(dataset=source,
                                   target="local_target",
                                   sshurl="ssh://localhost",
                                   target_dir=target_path)
    eq_("Target directory %s already exists." % target_path, str(cm.exception))
    if src_is_annex:
        target_description = AnnexRepo(target_path,
                                       create=False).get_description()
        assert_not_equal(target_description, None)
        assert_not_equal(target_description, target_path)
        ok_endswith(target_description, target_path)
    # now, with force and correct url, which is also used to determine
    # target_dir
    # Note: on windows absolute path is not url conform. But this way it's easy
    # to test, that ssh path is correctly used.
    if not on_windows:
        # add random file under target_path, to explicitly test existing=replace
        open(opj(target_path, 'random'), 'w').write('123')

        assert_create_sshwebserver(dataset=source,
                                   target="local_target",
                                   sshurl="ssh://localhost" + target_path,
                                   existing='replace')
        eq_("ssh://localhost" + target_path,
            source.repo.get_remote_url("local_target"))
        ok_(source.repo.get_remote_url("local_target", push=True) is None)

        # ensure target tree actually replaced by source
        assert_false(exists(opj(target_path, 'random')))

        if src_is_annex:
            annex = AnnexRepo(src_path)
            local_target_cfg = annex.repo.remotes[
                "local_target"].config_reader.get
            eq_(local_target_cfg('annex-ignore'), 'false')
            eq_(local_target_cfg('annex-uuid').count('-'), 4)  # valid uuid

        # again, by explicitly passing urls. Since we are on localhost, the
        # local path should work:
        cpkwargs = dict(
            dataset=source,
            target="local_target",
            sshurl="ssh://localhost",
            target_dir=target_path,
            target_url=target_path,
            target_pushurl="ssh://localhost" + target_path,
            ui=True,
        )
        assert_create_sshwebserver(existing='replace', **cpkwargs)
        if src_is_annex:
            target_description = AnnexRepo(target_path,
                                           create=False).get_description()
            eq_(target_description, target_path)

        eq_(target_path, source.repo.get_remote_url("local_target"))
        eq_("ssh://localhost" + target_path,
            source.repo.get_remote_url("local_target", push=True))

        _test_correct_publish(target_path)

        # now, push should work:
        publish(dataset=source, to="local_target")

        # and we should be able to 'reconfigure'
        def process_digests_mtimes(digests, mtimes):
            # it should have triggered a hook, which would have created log and metadata files
            check_metadata = False
            for part in 'logs', 'metadata':
                metafiles = [
                    k for k in digests
                    if k.startswith(_path_('.git/datalad/%s/' % part))
                ]
                # This is in effect ONLY if we have "compatible" datalad installed on remote
                # end. ATM we don't have easy way to guarantee that AFAIK (yoh),
                # so let's not check/enforce (TODO)
                # assert(len(metafiles) >= 1)  # we might have 2 logs if timestamps do not collide ;)
                # Let's actually do it to some degree
                if part == 'logs':
                    # always should have those:
                    assert (len(metafiles) >= 1)
                    with open(opj(target_path, metafiles[0])) as f:
                        if 'no datalad found' not in f.read():
                            check_metadata = True
                if part == 'metadata':
                    eq_(len(metafiles), bool(check_metadata))
                for f in metafiles:
                    digests.pop(f)
                    mtimes.pop(f)
            # and just pop some leftovers from annex
            for f in list(digests):
                if f.startswith('.git/annex/mergedrefs'):
                    digests.pop(f)
                    mtimes.pop(f)

        orig_digests, orig_mtimes = get_mtimes_and_digests(target_path)
        process_digests_mtimes(orig_digests, orig_mtimes)

        import time
        time.sleep(0.1)  # just so that mtimes change
        assert_create_sshwebserver(existing='reconfigure', **cpkwargs)
        digests, mtimes = get_mtimes_and_digests(target_path)
        process_digests_mtimes(digests, mtimes)

        assert_dict_equal(orig_digests,
                          digests)  # nothing should change in terms of content

        # but some files should have been modified
        modified_files = {
            k
            for k in mtimes if orig_mtimes.get(k, 0) != mtimes.get(k, 0)
        }
        # collect which files were expected to be modified without incurring any changes
        ok_modified_files = {
            _path_('.git/hooks/post-update'),
            'index.html',
            # files which hook would manage to generate
            _path_('.git/info/refs'),
            '.git/objects/info/packs'
        }
        if external_versions['cmd:system-git'] >= '2.4':
            # on elderly git we don't change receive setting
            ok_modified_files.add(_path_('.git/config'))
        ok_modified_files.update(
            {f
             for f in digests if f.startswith(_path_('.git/datalad/web'))})
        assert_set_equal(modified_files, ok_modified_files)
Пример #54
0
def _test_version_check(host, dspath, store):

    dspath = Path(dspath)
    store = Path(store)

    ds = Dataset(dspath).create()
    populate_dataset(ds)
    ds.save()
    assert_repo_status(ds.path)

    # set up store:
    io = SSHRemoteIO(host) if host else LocalIO()
    if host:
        store_url = "ria+ssh://{host}{path}".format(host=host,
                                                    path=store)
    else:
        store_url = "ria+{}".format(store.as_uri())

    create_store(io, store, '1')

    # TODO: Re-establish test for version 1
    # version 2: dirhash
    create_ds_in_store(io, store, ds.id, '2', '1')

    # add special remote
    init_opts = common_init_opts + ['url={}'.format(store_url)]
    ds.repo.init_remote('store', options=init_opts)
    ds.repo.copy_to('.', 'store')

    # check version files
    remote_ds_tree_version_file = store / 'ria-layout-version'
    dsgit_dir, archive_dir, dsobj_dir = \
        get_layout_locations(1, store, ds.id)
    remote_obj_tree_version_file = dsgit_dir / 'ria-layout-version'

    assert_true(remote_ds_tree_version_file.exists())
    assert_true(remote_obj_tree_version_file.exists())

    with open(str(remote_ds_tree_version_file), 'r') as f:
        assert_equal(f.read().strip(), '1')
    with open(str(remote_obj_tree_version_file), 'r') as f:
        assert_equal(f.read().strip(), '2')

    # Accessing the remote should not yield any output regarding versioning,
    # since it's the "correct" version. Note that "fsck" is an arbitrary choice.
    # We need just something to talk to the special remote.
    with swallow_logs(new_level=logging.INFO) as cml:
        ds.repo.fsck(remote='store', fast=True)
        # TODO: For some reason didn't get cml.assert_logged to assert
        #       "nothing was logged"
        assert not cml.out

    # Now fake-change the version
    with open(str(remote_obj_tree_version_file), 'w') as f:
        f.write('X\n')

    # Now we should see a message about it
    with swallow_logs(new_level=logging.INFO) as cml:
        ds.repo.fsck(remote='store', fast=True)
        cml.assert_logged(level="INFO",
                          msg="Remote object tree reports version X",
                          regex=False)

    # reading still works:
    ds.drop('.')
    assert_status('ok', ds.get('.'))

    # but writing doesn't:
    with open(str(Path(ds.path) / 'new_file'), 'w') as f:
        f.write("arbitrary addition")
    ds.save(message="Add a new_file")

    # TODO: use self.annex.error in special remote and see whether we get an
    #       actual error result
    assert_raises(CommandError,
                  ds.repo.copy_to, 'new_file', 'store')

    # However, we can force it by configuration
    ds.config.add("annex.ora-remote.store.force-write", "true", where='local')
    ds.repo.copy_to('new_file', 'store')
Пример #55
0
def test_plugin_call(path, dspath):
    # make plugins
    create_tree(
        path,
        {
            'dlplugin_dummy.py': dummy_plugin,
            'dlplugin_nodocs.py': nodocs_plugin,
            'dlplugin_broken.py': broken_plugin,
        })
    fake_dummy_spec = {
        'dummy': {'file': opj(path, 'dlplugin_dummy.py')},
        'nodocs': {'file': opj(path, 'dlplugin_nodocs.py')},
        'broken': {'file': opj(path, 'dlplugin_broken.py')},
    }

    with patch('datalad.plugin._get_plugins', return_value=fake_dummy_spec):
        with swallow_outputs() as cmo:
            plugin(showplugininfo=True)
            # hyphen spacing depends on the longest plugin name!
            # sorted
            # summary list generation doesn't actually load plugins for speed,
            # hence broken is not known to be broken here
            eq_(cmo.out,
                "broken [no synopsis] ({})\ndummy  - real dummy ({})\nnodocs [no synopsis] ({})\n".format(
                    fake_dummy_spec['broken']['file'],
                    fake_dummy_spec['dummy']['file'],
                    fake_dummy_spec['nodocs']['file']))
        with swallow_outputs() as cmo:
            plugin(['dummy'], showpluginhelp=True)
            eq_(cmo.out.rstrip(), "Usage: dummy(dataset, noval, withval='test')\n\nmydocstring")
        with swallow_outputs() as cmo:
            plugin(['nodocs'], showpluginhelp=True)
            eq_(cmo.out.rstrip(), "Usage: nodocs()\n\nThis plugin has no documentation")
        # loading fails, no docs
        assert_raises(ValueError, plugin, ['broken'], showpluginhelp=True)

    # assume this most obscure plugin name is not used
    assert_raises(ValueError, plugin, '32sdfhvz984--^^')

    # broken plugin argument, must match Python keyword arg
    # specs
    assert_raises(ValueError, plugin, ['dummy', '1245'])

    def fake_is_installed(*args, **kwargs):
        return True
    with patch('datalad.plugin._get_plugins', return_value=fake_dummy_spec), \
        patch('datalad.distribution.dataset.Dataset.is_installed', return_value=True):
        # does not trip over unsupported argument, they get filtered out, because
        # we carry all kinds of stuff
        with swallow_logs(new_level=logging.WARNING) as cml:
            res = list(plugin(['dummy', 'noval=one', 'obscure=some']))
            assert_status('ok', res)
            cml.assert_logged(
                msg=".*Ignoring plugin argument\\(s\\).*obscure.*, not supported by plugin.*",
                regex=True, level='WARNING')
        # fails on missing positional arg
        assert_raises(TypeError, plugin, ['dummy'])
        # positional and kwargs actually make it into the plugin
        res = list(plugin(['dummy', 'noval=one', 'withval=two']))[0]
        eq_('one', res['args']['noval'])
        eq_('two', res['args']['withval'])
        # kwarg defaults are preserved
        res = list(plugin(['dummy', 'noval=one']))[0]
        eq_('test', res['args']['withval'])
        # repeated specification yields list input
        res = list(plugin(['dummy', 'noval=one', 'noval=two']))[0]
        eq_(['one', 'two'], res['args']['noval'])
        # can do the same thing  while bypassing argument parsing for calls
        # from within python, and even preserve native python dtypes
        res = list(plugin(['dummy', ('noval', 1), ('noval', 'two')]))[0]
        eq_([1, 'two'], res['args']['noval'])
        # and we can further simplify in this case by passing lists right
        # away
        res = list(plugin(['dummy', ('noval', [1, 'two'])]))[0]
        eq_([1, 'two'], res['args']['noval'])

    # dataset arg handling
    # run plugin that needs a dataset where there is none
    with patch('datalad.plugin._get_plugins', return_value=fake_dummy_spec):
        ds = None
        with chpwd(dspath):
            assert_raises(ValueError, plugin, ['dummy', 'noval=one'])
            # create a dataset here, fixes the error
            ds = create()
            res = list(plugin(['dummy', 'noval=one']))[0]
            # gives dataset instance
            eq_(ds, res['args']['dataset'])
        # no do again, giving the dataset path
        # but careful, `dataset` is a proper argument
        res = list(plugin(['dummy', 'noval=one'], dataset=dspath))[0]
        eq_(ds, res['args']['dataset'])
        # however, if passed alongside the plugins args it also works
        res = list(plugin(['dummy', 'dataset={}'.format(dspath), 'noval=one']))[0]
        eq_(ds, res['args']['dataset'])
        # but if both are given, the proper args takes precedence
        assert_raises(ValueError, plugin, ['dummy', 'dataset={}'.format(dspath), 'noval=one'],
                      dataset='rubbish')
Пример #56
0
 def test_addurls_dropped_urls(self, path):
     ds = Dataset(path).create(force=True)
     with swallow_logs(new_level=logging.WARNING) as cml:
         ds.addurls(self.json_file, "", "{subdir}//{name}")
         assert_re_in(r".*Dropped [0-9]+ row\(s\) that had an empty URL",
                      str(cml.out))
Пример #57
0
def test_url_samples():
    _check_ri("http://example.com", URL, scheme='http', hostname="example.com")
    # "complete" one for classical http
    _check_ri("http://*****:*****@example.com:8080/p/sp?p1=v1&p2=v2#frag", URL,
              scheme='http', hostname="example.com", port=8080,
              username='******', password='******', path='/p/sp',
              query='p1=v1&p2=v2', fragment='frag')

    # sample one for ssh with specifying the scheme
    # XXX? might be useful?  https://github.com/FriendCode/giturlparse.py
    _check_ri("ssh://host/path/sp1", URL, scheme='ssh', hostname='host', path='/path/sp1')
    _check_ri("user@host:path/sp1", SSHRI,
              hostname='host', path='path/sp1', username='******')
    _check_ri("host:path/sp1", SSHRI, hostname='host', path='path/sp1')
    _check_ri("host:path", SSHRI, hostname='host', path='path')
    _check_ri("host:/path", SSHRI, hostname='host', path='/path')
    _check_ri("user@host", SSHRI, hostname='host', username='******')
    # TODO!!!  should this be a legit URL like this?
    # _check_ri("host", SSHRI, hostname='host'))
    eq_(repr(RI("host:path")), "SSHRI(hostname='host', path='path')")

    # And now perspective 'datalad', implicit=True urls pointing to the canonical center location
    _check_ri("///", DataLadRI)
    _check_ri("///p/s1", DataLadRI, path='p/s1')
    # could be considered by someone as "URI reference" relative to scheme
    _check_ri("//a/", DataLadRI, remote='a')
    _check_ri("//a/data", DataLadRI, path='data', remote='a')

    # here we will do custom magic allowing only schemes with + in them, such as dl+archive
    # or not so custom as
    _check_ri("hg+https://host/user/proj", URL,
              scheme="hg+https", hostname='host', path='/user/proj')
    # "old" style
    _check_ri("dl+archive:KEY/path/sp1#size=123", URL,
              scheme='dl+archive', path='KEY/path/sp1', fragment='size=123')
    # "new" style
    _check_ri("dl+archive:KEY#path=path/sp1&size=123", URL,
              scheme='dl+archive', path='KEY', fragment='path=path/sp1&size=123')
    # actually above one is probably wrong since we need to encode the path
    _check_ri("dl+archive:KEY#path=path%2Fbsp1&size=123", URL,
              scheme='dl+archive', path='KEY', fragment='path=path%2Fbsp1&size=123')

    #https://en.wikipedia.org/wiki/File_URI_scheme
    _check_ri("file://host", URL, scheme='file', hostname='host')
    _check_ri("file://host/path/sp1", URL, scheme='file', hostname='host', path='/path/sp1')
    # stock libraries of Python aren't quite ready for ipv6
    ipv6address = '2001:db8:85a3::8a2e:370:7334'
    _check_ri("file://%s/path/sp1" % ipv6address, URL,
              scheme='file', hostname=ipv6address, path='/path/sp1')
    for lh in ('localhost', '::1', '', '127.3.4.155'):
        _check_ri("file://%s/path/sp1" % lh, URL, localpath='/path/sp1',
                  scheme='file', hostname=lh, path='/path/sp1')
    _check_ri('http://[1fff:0:a88:85a3::ac1f]:8001/index.html', URL,
              scheme='http', hostname='1fff:0:a88:85a3::ac1f', port=8001, path='/index.html')
    _check_ri("file:///path/sp1", URL, localpath='/path/sp1', scheme='file', path='/path/sp1')
    # we don't do any magical comprehension for home paths/drives for windows
    # of file:// urls, thus leaving /~ and /c: for now:
    _check_ri("file:///~/path/sp1", URL, localpath='/~/path/sp1', scheme='file', path='/~/path/sp1')
    _check_ri("file:///%7E/path/sp1", URL, localpath='/~/path/sp1', scheme='file', path='/~/path/sp1', exact_str=False)
    # not sure but let's check
    _check_ri("file:///c:/path/sp1", URL, localpath='/c:/path/sp1', scheme='file', path='/c:/path/sp1', exact_str=False)

    # and now implicit paths or actually they are also "URI references"
    _check_ri("f", PathRI, localpath='f', path='f')
    _check_ri("f/s1", PathRI, localpath='f/s1', path='f/s1')
    _check_ri("/f", PathRI, localpath='/f', path='/f')
    _check_ri("/f/s1", PathRI, localpath='/f/s1', path='/f/s1')

    # some github ones, just to make sure
    _check_ri("git://host/user/proj", URL, scheme="git", hostname="host", path="/user/proj")
    _check_ri("git@host:user/proj", SSHRI, hostname="host", path="user/proj", username='******')

    _check_ri('weired:/', SSHRI, hostname='weired', path='/')
    # since schema is not allowing some symbols so we need to add additional check
    _check_ri('weired_url:/', SSHRI, hostname='weired_url', path='/')
    _check_ri('example.com:/', SSHRI, hostname='example.com', path='/')
    _check_ri('example.com:path/sp1', SSHRI, hostname='example.com', path='path/sp1')
    _check_ri('example.com/path/sp1\:fname', PathRI, localpath='example.com/path/sp1\:fname',
              path='example.com/path/sp1\:fname')
    # ssh is as stupid as us, so we will stay "Consistently" dumb
    """
    $> ssh example.com/path/sp1:fname
    ssh: Could not resolve hostname example.com/path/sp1:fname: Name or service not known
    """
    _check_ri('example.com/path/sp1:fname', SSHRI, hostname='example.com/path/sp1', path='fname')

    # SSHRIs have .port, but it is empty
    eq_(SSHRI(hostname='example.com').port, '')

    # check that we are getting a warning logged when url can't be reconstructed
    # precisely
    # actually failed to come up with one -- becomes late here
    #_check_ri("http://host///..//p", scheme='http', path='/..//p')

    # actually this one is good enough to trigger a warning and I still don't know
    # what it should exactly be!?
    with swallow_logs(new_level=logging.DEBUG) as cml:
        weired_str = 'weired://'
        weired_url = RI(weired_str)
        repr(weired_url)
        cml.assert_logged(
            'Parsed version of SSHRI .weired:/. '
            'differs from original .weired://.'
        )
        # but we store original str
        eq_(str(weired_url), weired_str)
        neq_(weired_url.as_str(), weired_str)


    raise SkipTest("TODO: file://::1/some does complain about parsed version dropping ::1")
Пример #58
0
def test_aggregation(path):
    with chpwd(path):
        assert_raises(InsufficientArgumentsError, aggregate_metadata, None)
    # a hierarchy of three (super/sub)datasets, each with some native metadata
    ds = Dataset(opj(path, 'origin')).create(force=True)
    # before anything aggregated we would get nothing and only a log warning
    with swallow_logs(new_level=logging.WARNING) as cml:
        assert_equal(list(query_aggregated_metadata('all', ds, [])), [])
    assert_re_in('.*Found no aggregated metadata.*update', cml.out)
    ds.config.add('datalad.metadata.nativetype',
                  'frictionless_datapackage',
                  where='dataset')
    subds = ds.create('sub', force=True)
    subds.config.add('datalad.metadata.nativetype',
                     'frictionless_datapackage',
                     where='dataset')
    subsubds = subds.create('subsub', force=True)
    subsubds.config.add('datalad.metadata.nativetype',
                        'frictionless_datapackage',
                        where='dataset')
    ds.save(recursive=True)
    assert_repo_status(ds.path)
    # aggregate metadata from all subdatasets into any superdataset, including
    # intermediate ones
    res = ds.aggregate_metadata(recursive=True, update_mode='all')
    # we get success report for both subdatasets and the superdataset,
    # and they get saved
    assert_result_count(res, 3, status='ok', action='aggregate_metadata')
    assert_in_results(res, action='save', status="ok")
    # nice and tidy
    assert_repo_status(ds.path)

    # quick test of aggregate report
    aggs = ds.metadata(get_aggregates=True)
    # one for each dataset
    assert_result_count(aggs, 3)
    # mother also report layout version
    assert_result_count(aggs, 1, path=ds.path, layout_version=1)

    # store clean direct result
    origres = ds.metadata(recursive=True)
    # basic sanity check
    assert_result_count(origres, 6)
    assert_result_count(origres, 3, type='dataset')
    assert_result_count(origres, 3, type='file')  # Now that we have annex.key
    # three different IDs
    assert_equal(
        3, len(set([s['dsid'] for s in origres if s['type'] == 'dataset'])))
    # and we know about all three datasets
    for name in ('MOTHER_äöü東', 'child_äöü東', 'grandchild_äöü東'):
        assert_true(
            sum([s['metadata']['frictionless_datapackage']['name'] \
                    == ensure_unicode(name) for s in origres
                 if s['type'] == 'dataset']))

    # now clone the beast to simulate a new user installing an empty dataset
    clone = install(opj(path, 'clone'),
                    source=ds.path,
                    result_xfm='datasets',
                    return_type='item-or-list')
    # ID mechanism works
    assert_equal(ds.id, clone.id)

    # get fresh metadata
    cloneres = clone.metadata()
    # basic sanity check
    assert_result_count(cloneres, 2)
    assert_result_count(cloneres, 1, type='dataset')
    assert_result_count(cloneres, 1, type='file')

    # now loop over the previous results from the direct metadata query of
    # origin and make sure we get the extact same stuff from the clone
    _compare_metadata_helper(origres, clone)

    # now obtain a subdataset in the clone, should make no difference
    assert_status('ok',
                  clone.install('sub', result_xfm=None, return_type='list'))
    _compare_metadata_helper(origres, clone)

    # test search in search tests, not all over the place
    ## query smoke test
    assert_result_count(clone.search('mother', mode='egrep'), 1)
    assert_result_count(clone.search('(?i)MoTHER', mode='egrep'), 1)

    child_res = clone.search('child', mode='egrep')
    assert_result_count(child_res, 2)
    for r in child_res:
        if r['type'] == 'dataset':
            assert_in(r['query_matched']['frictionless_datapackage.name'],
                      r['metadata']['frictionless_datapackage']['name'])
Пример #59
0
def test_publish_recursive(origin, src_path, dst_path, sub1_pub, sub2_pub):

    # prepare src
    source = install(src_path, source=origin, recursive=True)[0]

    # create plain git at target:
    target = GitRepo(dst_path, create=True)
    target.checkout("TMP", ["-b"])
    source.repo.add_remote("target", dst_path)

    # subdatasets have no remote yet, so recursive publishing should fail:
    with assert_raises(ValueError) as cm:
        publish(dataset=source, to="target", recursive=True)
    assert_in("No sibling 'target' found.", str(cm.exception))

    # now, set up targets for the submodules:
    sub1_target = GitRepo(sub1_pub, create=True)
    sub1_target.checkout("TMP", ["-b"])
    sub2_target = AnnexRepo(sub2_pub, create=True)
    sub2_target.checkout("TMP", ["-b"])
    sub1 = GitRepo(opj(src_path, 'subm 1'), create=False)
    sub2 = GitRepo(opj(src_path, 'subm 2'), create=False)
    sub1.add_remote("target", sub1_pub)
    sub2.add_remote("target", sub2_pub)

    # publish recursively
    with swallow_logs(new_level=logging.DEBUG) as cml:
        res = publish(dataset=source, to="target", recursive=True)
        assert_not_in('forced update', cml.out,
                      "we probably haven't merged git-annex before pushing")

    # testing result list
    # (Note: Dataset lacks __eq__ for now. Should this be based on path only?)
    assert_is_instance(res, tuple)
    assert_is_instance(res[0], list)
    assert_is_instance(res[1], list)
    eq_(res[1], [])  # nothing failed/was skipped
    for item in res[0]:
        assert_is_instance(item, Dataset)
    eq_({res[0][0].path, res[0][1].path, res[0][2].path},
        {src_path, sub1.path, sub2.path})

    eq_(list(target.get_branch_commits("master")),
        list(source.repo.get_branch_commits("master")))
    eq_(list(target.get_branch_commits("git-annex")),
        list(source.repo.get_branch_commits("git-annex")))
    eq_(list(sub1_target.get_branch_commits("master")),
        list(sub1.get_branch_commits("master")))
    eq_(list(sub1_target.get_branch_commits("git-annex")),
        list(sub1.get_branch_commits("git-annex")))
    eq_(list(sub2_target.get_branch_commits("master")),
        list(sub2.get_branch_commits("master")))
    eq_(list(sub2_target.get_branch_commits("git-annex")),
        list(sub2.get_branch_commits("git-annex")))

    # test for publishing with  --since.  By default since no changes, only current pushed
    res_ = publish(dataset=source, recursive=True)
    # only current one would get pushed
    eq_(set(r.path for r in res_[0]), {src_path})

    # all get pushed
    res_ = publish(dataset=source, recursive=True, since='HEAD^')
    eq_(set(r.path for r in res_[0]), {src_path, sub1.path, sub2.path})

    # Let's now update one subm
    with open(opj(sub2.path, "file.txt"), 'w') as f:
        f.write('')
    sub2.add('file.txt')
    sub2.commit("")
    # TODO: Doesn't work:  https://github.com/datalad/datalad/issues/636
    #source.save("changed sub2", auto_add_changes=True)
    source.repo.commit("", options=['-a'])

    res_ = publish(dataset=source, recursive=True)
    # only updated ones were published
    eq_(set(r.path for r in res_[0]), {src_path, sub2.path})
Пример #60
0
def test_globbedpaths(path):
    dotdir = op.curdir + op.sep

    for patterns, expected in [
        (["1.txt", "2.dat"], {"1.txt", "2.dat"}),
        ([dotdir + "1.txt", "2.dat"], {dotdir + "1.txt", "2.dat"}),
        (["*.txt", "*.dat"], {"1.txt", "2.dat", u"bβ.dat", "3.txt"}),
        ([dotdir + "*.txt",
          "*.dat"], {dotdir + "1.txt", "2.dat", u"bβ.dat", dotdir + "3.txt"}),
        (["subdir/*.txt"], {"subdir/1.txt", "subdir/2.txt"}),
        ([dotdir + "subdir/*.txt"],
         {dotdir + p
          for p in ["subdir/1.txt", "subdir/2.txt"]}),
        (["*.txt"], {"1.txt", "3.txt"})
    ]:
        gp = GlobbedPaths(patterns, pwd=path)
        eq_(set(gp.expand()), expected)
        eq_(set(gp.expand(full=True)), {op.join(path, p) for p in expected})

    pardir = op.pardir + op.sep
    subdir_path = op.join(path, "subdir")
    for patterns, expected in [
        (["*.txt"], {"1.txt", "2.txt"}),
        ([dotdir + "*.txt"], {dotdir + p
                              for p in ["1.txt", "2.txt"]}),
        ([pardir + "*.txt"], {pardir + p
                              for p in ["1.txt", "3.txt"]}),
        ([dotdir + pardir + "*.txt"],
         {dotdir + pardir + p
          for p in ["1.txt", "3.txt"]}), (["subdir/"], {"subdir/"})
    ]:
        gp = GlobbedPaths(patterns, pwd=subdir_path)
        eq_(set(gp.expand()), expected)
        eq_(set(gp.expand(full=True)),
            {op.join(subdir_path, p)
             for p in expected})

    # Full patterns still get returned as relative to pwd.
    gp = GlobbedPaths([op.join(path, "*.dat")], pwd=path)
    eq_(gp.expand(), ["2.dat", u"bβ.dat"])

    # "." gets special treatment.
    gp = GlobbedPaths([".", "*.dat"], pwd=path)
    eq_(set(gp.expand()), {"2.dat", u"bβ.dat", "."})
    eq_(gp.expand(dot=False), ["2.dat", u"bβ.dat"])
    gp = GlobbedPaths(["."], pwd=path, expand=False)
    eq_(gp.expand(), ["."])
    eq_(gp.paths, ["."])

    # We can the glob outputs.
    glob_results = {"z": "z", "a": ["x", "d", "b"]}
    with patch('glob.glob', glob_results.get):
        gp = GlobbedPaths(["z", "a"])
        eq_(gp.expand(), ["z", "b", "d", "x"])

    # glob expansion for paths property is determined by expand argument.
    for expand, expected in [(True, ["2.dat", u"bβ.dat"]), (False, ["*.dat"])]:
        gp = GlobbedPaths(["*.dat"], pwd=path, expand=expand)
        eq_(gp.paths, expected)

    with swallow_logs(new_level=logging.DEBUG) as cml:
        GlobbedPaths(["not here"], pwd=path).expand()
        assert_in("No matching files found for 'not here'", cml.out)