示例#1
0
def test_symlinked_relpath(path):
    # initially ran into on OSX https://github.com/datalad/datalad/issues/2406
    os.makedirs(opj(path, "origin"))
    dspath = opj(path, "linked")
    os.symlink('origin', dspath)
    ds = Dataset(dspath).create()
    create_tree(dspath, {
        "mike1": 'mike1',  # will be added from topdir
        "later": "later",  # later from within subdir
        "d": {
            "mike2": 'mike2', # to be added within subdir
        }
    })

    # in the root of ds
    with chpwd(dspath):
        ds.repo.add("mike1", git=True)
        ds._save("committing", path="./mike1")

    # Let's also do in subdirectory
    with chpwd(opj(dspath, 'd')):
        ds.repo.add("mike2", git=True)
        ds._save("committing", path="./mike2")

        later = opj(pardir, "later")
        ds.repo.add(later, git=True)
        ds._save("committing", path=later)

    ok_clean_git(dspath)
示例#2
0
def test_status_basics(path, linkpath, otherdir):
    if not on_windows:
        # make it more complicated by default
        ut.Path(linkpath).symlink_to(path, target_is_directory=True)
        path = linkpath

    with chpwd(path):
        assert_raises(NoDatasetArgumentFound, status)
    ds = Dataset(path).create()
    # outcome identical between ds= and auto-discovery
    with chpwd(path):
        assert_raises(IncompleteResultsError, status, path=otherdir)
        stat = status()
    eq_(stat, ds.status())
    assert_status('ok', stat)
    # we have a bunch of reports (be vague to be robust to future changes
    assert len(stat) > 2
    # check the composition
    for s in stat:
        eq_(s['status'], 'ok')
        eq_(s['action'], 'status')
        eq_(s['state'], 'clean')
        eq_(s['type'], 'file')
        assert_in('gitshasum', s)
        assert_in('bytesize', s)
        eq_(s['refds'], ds.path)
示例#3
0
def test_install_crcns(tdir, ds_path):
    with chpwd(tdir):
        with swallow_logs(new_level=logging.INFO) as cml:
            install("all-nonrecursive", source='///')
            # since we didn't log decorations such as log level atm while
            # swallowing so lets check if exit code is returned or not
            # I will test both
            assert_not_in('ERROR', cml.out)
            # below one must not fail alone! ;)
            assert_not_in('with exit code', cml.out)

        # should not hang in infinite recursion
        with chpwd('all-nonrecursive'):
            get("crcns")
        ok_(exists(_path_("all-nonrecursive/crcns/.git/config")))
        # and we could repeat installation and get the same result
        ds1 = install(_path_("all-nonrecursive/crcns"))
        ds2 = Dataset('all-nonrecursive').install('crcns')
        ok_(ds1.is_installed())
        eq_(ds1, ds2)
        eq_(ds1.path, ds2.path)  # to make sure they are a single dataset

    # again, but into existing dataset:
    ds = create(ds_path)
    crcns = ds.install("///crcns")
    ok_(crcns.is_installed())
    eq_(crcns.path, opj(ds_path, "crcns"))
    assert_in(crcns.path, ds.get_subdatasets(absolute=True))
示例#4
0
def test_subdatasets(path):
    # from scratch
    ds = Dataset(path)
    assert_false(ds.is_installed())
    eq_(ds.subdatasets(), [])
    ds = ds.create()
    assert_true(ds.is_installed())
    eq_(ds.subdatasets(), [])
    # create some file and commit it
    open(os.path.join(ds.path, 'test'), 'w').write('some')
    ds.add(path='test')
    assert_true(ds.is_installed())
    ds.save("Hello!", version_tag=1)
    # Assuming that tmp location was not under a super-dataset
    eq_(ds.get_superdataset(), None)
    eq_(ds.get_superdataset(topmost=True), ds)

    # add itself as a subdataset (crazy, isn't it?)
    subds = ds.install('subds', source=path,
        result_xfm='datasets', return_type='item-or-list')
    assert_true(subds.is_installed())
    eq_(subds.get_superdataset(), ds)
    eq_(subds.get_superdataset(topmost=True), ds)

    subdss = ds.subdatasets()
    eq_(len(subdss), 1)
    eq_(subds.path, ds.subdatasets(result_xfm='paths')[0])
    eq_(subdss, ds.subdatasets(recursive=True))
    eq_(subdss, ds.subdatasets(fulfilled=True))
    ds.save("with subds", version_tag=2)
    ds.recall_state(1)
    assert_true(ds.is_installed())
    eq_(ds.subdatasets(), [])

    # very nested subdataset to test topmost
    subsubds = subds.install(
        _path_('d1/subds'), source=path,
        result_xfm='datasets', return_type='item-or-list')
    assert_true(subsubds.is_installed())
    eq_(subsubds.get_superdataset(), subds)
    # by default, it will only report a subperdataset that actually
    # has the queries dataset as a registered true subdataset
    eq_(subsubds.get_superdataset(topmost=True), subds)
    # by we can also ask for a dataset that is merely above
    eq_(subsubds.get_superdataset(topmost=True, registered_only=False), ds)

    # verify that '^' alias would work
    with chpwd(subsubds.path):
        dstop = Dataset('^')
        eq_(dstop, subds)
        # and while in the dataset we still can resolve into central one
        dscentral = Dataset('///')
        eq_(dscentral.path,
            cfg.obtain('datalad.locations.default-dataset'))

    with chpwd(ds.path):
        dstop = Dataset('^')
        eq_(dstop, ds)
示例#5
0
def test_subdatasets(path):
    # from scratch
    ds = Dataset(path)
    assert_false(ds.is_installed())
    eq_(ds.get_subdatasets(), [])
    ds = ds.create()
    assert_true(ds.is_installed())
    eq_(ds.get_subdatasets(), [])
    # create some file and commit it
    open(os.path.join(ds.path, 'test'), 'w').write('some')
    ds.add(path='test')
    assert_true(ds.is_installed())
    ds.save("Hello!", version_tag=1)
    # Assuming that tmp location was not under a super-dataset
    eq_(ds.get_superdataset(), None)
    eq_(ds.get_superdataset(topmost=True), ds)

    # add itself as a subdataset (crazy, isn't it?)
    subds = ds.install('subds', source=path)
    assert_true(subds.is_installed())
    eq_(subds.get_superdataset(), ds)
    eq_(subds.get_superdataset(topmost=True), ds)

    subdss = ds.get_subdatasets()
    eq_(len(subdss), 1)
    eq_(os.path.join(path, subdss[0]), subds.path)
    eq_(subds.path, ds.get_subdatasets(absolute=True)[0])
    eq_(subdss, ds.get_subdatasets(recursive=True))
    eq_(subdss, ds.get_subdatasets(fulfilled=True))
    # don't have that right now
    assert_raises(NotImplementedError, ds.get_subdatasets, pattern='sub*')
    ds.save("with subds", version_tag=2)
    ds.recall_state(1)
    assert_true(ds.is_installed())
    eq_(ds.get_subdatasets(), [])

    # very nested subdataset to test topmost
    subsubds = subds.install(_path_('d1/subds'), source=path)
    assert_true(subsubds.is_installed())
    eq_(subsubds.get_superdataset(), subds)
    eq_(subsubds.get_superdataset(topmost=True), ds)

    # verify that '^' alias would work
    with chpwd(subsubds.path):
        dstop = Dataset('^')
        eq_(dstop, ds)
        # and while in the dataset we still can resolve into central one
        dscentral = Dataset('///')
        eq_(dscentral.path, LOCAL_CENTRAL_PATH)

    with chpwd(ds.path):
        dstop = Dataset('^')
        eq_(dstop, ds)
示例#6
0
文件: test_run.py 项目: hanke/datalad
def test_basics(path, nodspath):
    ds = Dataset(path).create()
    direct_mode = ds.repo.is_direct_mode()
    last_state = ds.repo.get_hexsha()
    # run inside the dataset
    with chpwd(path), \
            swallow_outputs():
        # provoke command failure
        with assert_raises(CommandError) as cme:
            ds.run('7i3amhmuch9invalid')
            # let's not speculate that the exit code is always 127
            ok_(cme.code > 0)
        eq_(last_state, ds.repo.get_hexsha())
        # now one that must work
        res = ds.run('touch empty', message='TEST')
        ok_clean_git(ds.path)
        assert_result_count(res, 2)
        # TODO 'state' is still untracked!!!
        assert_result_count(res, 1, action='add', path=opj(ds.path, 'empty'), type='file')
        assert_result_count(res, 1, action='save', path=ds.path)
        commit_msg = ds.repo.format_commit("%B")
        ok_(commit_msg.startswith('[DATALAD RUNCMD] TEST'))
        # crude test that we have a record for the PWD
        assert_in('"pwd": "."', commit_msg)
        last_state = ds.repo.get_hexsha()
        # now run a command that will not alter the dataset
        res = ds.run('touch empty', message='NOOP_TEST')
        # When in direct mode, check at the level of save rather than add
        # because the annex files show up as typechanges and adding them won't
        # necessarily have a "notneeded" status.
        assert_result_count(res, 1, action='save' if direct_mode else 'add',
                            status='notneeded')
        eq_(last_state, ds.repo.get_hexsha())
        # We can also run the command via a single-item list because this is
        # what the CLI interface passes in for quoted commands.
        res = ds.run(['touch empty'], message='NOOP_TEST')
        assert_result_count(res, 1, action='save' if direct_mode else 'add',
                            status='notneeded')

    # run outside the dataset, should still work but with limitations
    with chpwd(nodspath), \
            swallow_outputs():
        res = ds.run(['touch', 'empty2'], message='TEST')
        assert_result_count(res, 1, action='add', path=opj(ds.path, 'empty2'), type='file',
                            status='ok')
        assert_result_count(res, 1, action='save', status='ok')

    # running without a command is a noop
    with chpwd(path):
        with swallow_logs(new_level=logging.WARN) as cml:
            ds.run()
            assert_in("No command given", cml.out)
示例#7
0
def test_create_curdir(path, path2):
    with chpwd(path, mkdir=True):
        create()
    ds = Dataset(path)
    ok_(ds.is_installed())
    assert_repo_status(ds.path, annex=True)

    with chpwd(path2, mkdir=True):
        create(no_annex=True)
    ds = Dataset(path2)
    ok_(ds.is_installed())
    assert_repo_status(ds.path, annex=False)
    ok_(op.exists(op.join(ds.path, '.noannex')))
示例#8
0
def test_save_directory(path):
    # Sequence of save invocations on subdirectories.
    ds = Dataset(path).create(force=True)
    ds._save(path='sdir1')
    ok_clean_git(ds.path, untracked=['sdir2/foo', 'sdir3/sdir/subsub/foo'])

    # There is also difference from
    with chpwd(path):
        save(path='sdir2')
    ok_clean_git(ds.path, untracked=['sdir3/sdir/subsub/foo'])

    with chpwd(opj(path, 'sdir3')):
        save(path='sdir')
    ok_clean_git(ds.path)
示例#9
0
def test_remove_file_handle_only(path):
    ds = Dataset(path).create(force=True)
    ds.save()
    ok_clean_git(ds.path)
    # make sure there is any key
    ok_(len(ds.repo.get_file_key('one')))
    # both files link to the same key
    eq_(ds.repo.get_file_key('one'),
        ds.repo.get_file_key('two'))
    rpath_one = realpath(opj(ds.path, 'one'))
    eq_(rpath_one, realpath(opj(ds.path, 'two')))
    path_two = opj(ds.path, 'two')
    ok_(exists(path_two))
    # remove one handle, should not affect the other
    ds.remove('two', check=False, message="custom msg")
    eq_(ds.repo.repo.head.commit.message.rstrip(), "custom msg")
    eq_(rpath_one, realpath(opj(ds.path, 'one')))
    ok_(exists(rpath_one))
    ok_(not exists(path_two))
    # remove file without specifying the dataset -- shouldn't fail
    with chpwd(path):
        remove('one', check=False)
        ok_(not exists("one"))
    # and we should be able to remove without saving
    ds.remove('three', check=False, save=False)
    ok_(ds.repo.dirty)
示例#10
0
def test_drop_nocrash_absent_subds(path):
    parent = Dataset(path).create()
    sub = parent.create('sub')
    parent.uninstall('sub')
    ok_clean_git(parent.path)
    with chpwd(path):
        assert_status('notneeded', drop('.', recursive=True))
示例#11
0
def test_invalid_call(origin, tdir):
    ds = Dataset(origin)
    ds.uninstall('subm 1', check=False)
    # nothing
    assert_status('error', publish('/notthere', on_failure='ignore'))
    # known, but not present
    assert_status('impossible', publish(opj(ds.path, 'subm 1'), on_failure='ignore'))
    # --since without dataset is now supported as long as it
    # could be identified
    # assert_raises(InsufficientArgumentsError, publish, since='HEAD')
    # but if it couldn't be, then should indeed crash
    with chpwd(tdir):
        assert_raises(InsufficientArgumentsError, publish, since='HEAD')
    # new dataset, with unavailable subdataset
    dummy = Dataset(tdir).create()
    dummy_sub = dummy.create('sub')
    dummy_sub.uninstall()
    assert_in('sub', dummy.subdatasets(fulfilled=False, result_xfm='relpaths'))
    # now an explicit call to publish the unavailable subdataset
    assert_result_count(
        dummy.publish('sub', on_failure='ignore'),
        1,
        path=dummy_sub.path,
        status='impossible',
        type='dataset')
示例#12
0
def test_invalid_call(path):
    with chpwd(path):
        # ^ Change directory so that we don't fail with an
        # InvalidGitRepositoryError if the test is executed from a git
        # worktree.

        # needs a SSH URL
        assert_raises(InsufficientArgumentsError, create_sibling, '')
        assert_raises(ValueError, create_sibling, 'http://ignore.me')
        # needs an actual dataset
        assert_raises(
            ValueError,
            create_sibling, 'localhost:/tmp/somewhere', dataset='/nothere')
    # pre-configure a bogus remote
    ds = Dataset(path).create()
    ds.repo.add_remote('bogus', 'http://bogus.url.com')
    # fails to reconfigure by default with generated
    # and also when given an existing name
    for res in (ds.create_sibling('bogus:/tmp/somewhere', on_failure='ignore'),
                ds.create_sibling('localhost:/tmp/somewhere', name='bogus', on_failure='ignore')):
        assert_result_count(
            res, 1,
            status='error',
            message=(
                "sibling '%s' already configured (specify alternative name, or force reconfiguration via --existing",
                'bogus'))
示例#13
0
def test_search_outside1_install_default_ds(tdir, default_dspath):
    with chpwd(tdir):
        # let's mock out even actual install/search calls
        with \
            patch_config({'datalad.locations.default-dataset': default_dspath}), \
            patch('datalad.api.install',
                  return_value=Dataset(default_dspath)) as mock_install, \
            patch('datalad.distribution.dataset.Dataset.search',
                  new_callable=_mock_search):
            _check_mocked_install(default_dspath, mock_install)

            # now on subsequent run, we want to mock as if dataset already exists
            # at central location and then do search again
            from datalad.ui import ui
            ui.add_responses('yes')
            mock_install.reset_mock()
            with patch(
                    'datalad.distribution.dataset.Dataset.is_installed',
                    True):
                _check_mocked_install(default_dspath, mock_install)

            # and what if we say "no" to install?
            ui.add_responses('no')
            mock_install.reset_mock()
            with assert_raises(NoDatasetArgumentFound):
                list(search("."))

            # and if path exists and is a valid dataset and we say "no"
            Dataset(default_dspath).create()
            ui.add_responses('no')
            mock_install.reset_mock()
            with assert_raises(NoDatasetArgumentFound):
                list(search("."))
示例#14
0
def test_install_known_subdataset(src, path):

    # get the superdataset:
    ds = install(path, source=src)
    # subdataset not installed:
    subds = Dataset(opj(path, 'subm 1'))
    assert_false(subds.is_installed())
    assert_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths'))
    assert_not_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths'))
    # install it:
    ds.install('subm 1')
    ok_(subds.is_installed())
    ok_(AnnexRepo.is_valid_repo(subds.path, allow_noninitialized=False))
    # Verify that it is the correct submodule installed and not
    # new repository initiated
    eq_(set(subds.repo.get_indexed_files()),
        {'test.dat', 'INFO.txt', 'test-annex.dat'})
    assert_not_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths'))
    assert_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths'))

    # now, get the data by reinstalling with -g:
    ok_(subds.repo.file_has_content('test-annex.dat') is False)
    with chpwd(ds.path):
        result = get(path='subm 1', dataset=os.curdir)
        assert_in_results(result, path=opj(subds.path, 'test-annex.dat'))
        ok_(subds.repo.file_has_content('test-annex.dat') is True)
        ok_(subds.is_installed())
示例#15
0
    def _expand_globs(self):
        def normalize_hit(h):
            normalized = op.relpath(h) + ("" if op.basename(h) else op.sep)
            if h == op.curdir + op.sep + normalized:
                # Don't let relpath prune "./fname" (gh-3034).
                return h
            return normalized

        expanded = []
        with chpwd(self.pwd):
            for pattern in self._paths["patterns"]:
                hits = glob.glob(pattern)
                if hits:
                    expanded.extend(sorted(map(normalize_hit, hits)))
                else:
                    lgr.debug("No matching files found for '%s'", pattern)
                    # We didn't find a hit for the complete pattern. If we find
                    # a sub-pattern hit, that may mean we have an uninstalled
                    # subdataset.
                    for sub_pattern in self._get_sub_patterns(pattern):
                        sub_hits = glob.glob(sub_pattern)
                        if sub_hits:
                            expanded.extend(
                                sorted(map(normalize_hit, sub_hits)))
                            break
                    # ... but we still want to retain the original pattern
                    # because we don't know for sure at this point, and it
                    # won't bother the "install, reglob" routine.
                    expanded.extend([pattern])
        return expanded
示例#16
0
def test_implicit_install(src, dst):

    origin_top = create(src)
    origin_sub = origin_top.create("sub")
    origin_subsub = origin_sub.create("subsub")
    with open(opj(origin_top.path, "file1.txt"), "w") as f:
        f.write("content1")
    origin_top.add("file1.txt")
    with open(opj(origin_sub.path, "file2.txt"), "w") as f:
        f.write("content2")
    origin_sub.add("file2.txt")
    with open(opj(origin_subsub.path, "file3.txt"), "w") as f:
        f.write("content3")
    origin_subsub.add("file3.txt")
    origin_top.save(recursive=True)

    # first, install toplevel:
    ds = install(dst, source=src)
    ok_(ds.is_installed())

    sub = Dataset(opj(ds.path, "sub"))
    ok_(not sub.is_installed())
    subsub = Dataset(opj(sub.path, "subsub"))
    ok_(not subsub.is_installed())

    # fail on obscure non-existing one
    assert_raises(IncompleteResultsError, ds.install, source='obscure')

    # install 3rd level and therefore implicitly the 2nd:
    result = ds.install(path=opj("sub", "subsub"))
    ok_(sub.is_installed())
    ok_(subsub.is_installed())
    # but by default implicit results are not reported
    eq_(result, subsub)

    # fail on obscure non-existing one in subds
    assert_raises(IncompleteResultsError, ds.install, source=opj('sub', 'obscure'))

    # clean up, the nasty way
    rmtree(dst, chmod_files=True)
    ok_(not exists(dst))

    # again first toplevel:
    ds = install(dst, source=src)
    ok_(ds.is_installed())
    sub = Dataset(opj(ds.path, "sub"))
    ok_(not sub.is_installed())
    subsub = Dataset(opj(sub.path, "subsub"))
    ok_(not subsub.is_installed())

    # now implicit but without an explicit dataset to install into
    # (deriving from CWD):
    with chpwd(dst):
        # don't ask for the file content to make return value comparison
        # simpler
        result = get(path=opj("sub", "subsub"), get_data=False, result_xfm='datasets')
        ok_(sub.is_installed())
        ok_(subsub.is_installed())
        eq_(result, [sub, subsub])
示例#17
0
def test_new_relpath(topdir):
    from datalad.api import create_test_dataset
    with swallow_logs(), chpwd(topdir), swallow_outputs():
        dss = create_test_dataset('testds', spec='1')
    eq_(dss[0], opj(topdir, 'testds'))
    eq_(len(dss), 2)  # 1 top + 1 sub-dataset as demanded
    for ds in dss:
        ok_clean_git(ds, annex=False)
示例#18
0
def test_install_dataset_from_just_source(url, path):

    with chpwd(path, mkdir=True):
        ds = install(source=url)

    ok_startswith(ds.path, path)
    ok_(ds.is_installed())
    ok_clean_git(ds.path, annex=False)
示例#19
0
def test_bf2541(path):
    ds = create(path)
    subds = ds.create('sub')
    ok_clean_git(ds.path)
    os.symlink('sub', op.join(ds.path, 'symlink'))
    with chpwd(ds.path):
        res = add('.', recursive=True)
    ok_clean_git(ds.path)
示例#20
0
def test_invalid_call(path):
    with chpwd(path):
        # no dataset, no luck
        assert_raises(NoDatasetArgumentFound, run, 'doesntmatter')
        # dirty dataset
        ds = Dataset(path).create()
        create_tree(ds.path, {'this': 'dirty'})
        assert_status('impossible', run('doesntmatter', on_failure='ignore'))
示例#21
0
def test_clone_dataset_from_just_source(url, path):
    with chpwd(path, mkdir=True):
        ds = clone(url, result_xfm='datasets', return_type='item-or-list')

    ok_startswith(ds.path, path)
    ok_(ds.is_installed())
    ok_(GitRepo.is_valid_repo(ds.path))
    ok_clean_git(ds.path, annex=None)
    assert_in('INFO.txt', ds.repo.get_indexed_files())
示例#22
0
def test_relpath_add(path):
    ds = Dataset(path).create(force=True)
    with chpwd(opj(path, 'dir')):
        eq_(add('testindir')[0]['path'],
            opj(ds.path, 'dir', 'testindir'))
        # and now add all
        add('..')
    # auto-save enabled
    assert_false(ds.repo.dirty)
示例#23
0
def test_bf2043p2(path):
    ds = Dataset(path).create(force=True)
    ds.add('staged', save=False)
    ok_clean_git(ds.path, head_modified=['staged'], untracked=['untracked'])
    # plain save does not commit untracked content
    # this tests the second issue in #2043
    with chpwd(path):
        save()
    ok_clean_git(ds.path, untracked=['untracked'])
示例#24
0
def test_autoresolve_multiple_datasets(src, path):
    with chpwd(path):
        ds1 = install('ds1', source=src)
        ds2 = install('ds2', source=src)
        results = get([opj('ds1', 'test-annex.dat')] + glob(opj('ds2', '*.dat')))
        # each ds has one file
        eq_(len(results), 2)
        ok_(ds1.repo.file_has_content('test-annex.dat') is True)
        ok_(ds2.repo.file_has_content('test-annex.dat') is True)
示例#25
0
def test_install_dataset_from_just_source(url, path):
    with chpwd(path, mkdir=True):
        ds = install(source=url)

    ok_startswith(ds.path, path)
    ok_(ds.is_installed())
    ok_(GitRepo.is_valid_repo(ds.path))
    ok_clean_git(ds.path, annex=None)
    assert_in('INFO.txt', ds.repo.get_indexed_files())
示例#26
0
def test_safetynet(path):
    ds = Dataset(path).create()
    os.makedirs(opj(ds.path, 'deep', 'down'))
    for p in (ds.path, opj(ds.path, 'deep'), opj(ds.path, 'deep', 'down')):
        with chpwd(p):
            # will never remove PWD, or anything outside the dataset
            for target in (ds.path, os.curdir, os.pardir, opj(os.pardir, os.pardir)):
                assert_status(
                    ('error', 'impossible'),
                    uninstall(path=target, on_failure='ignore'))
示例#27
0
 def get_baseline(p):
     ds = Dataset(p).create()
     with chpwd(ds.path):
         subds = create('sub')
     ds.add('sub', save=False)
     create_tree(subds.path, {"staged": ""})
     subds.add("staged", save=False)
     # subdataset has staged changes.
     ok_clean_git(ds.path, index_modified=['sub'])
     return ds
示例#28
0
文件: test_run.py 项目: hanke/datalad
def test_placeholders(path):
    ds = Dataset(path).create(force=True)
    ds.add(".")
    ds.run("echo {inputs} >{outputs}", inputs=[".", "*.in"], outputs=["c.out"])
    ok_file_has_content(opj(path, "c.out"), "a.in b.in\n")

    hexsha_before = ds.repo.get_hexsha()
    ds.rerun()
    eq_(hexsha_before, ds.repo.get_hexsha())

    ds.run("echo {inputs[0]} >getitem", inputs=["*.in"])
    ok_file_has_content(opj(path, "getitem"), "a.in\n")

    ds.run("echo {pwd} >expanded-pwd")
    ok_file_has_content(opj(path, "expanded-pwd"), path,
                        strip=True)

    ds.run("echo {dspath} >expanded-dspath")
    ok_file_has_content(opj(path, "expanded-dspath"), ds.path,
                        strip=True)

    subdir_path = opj(path, "subdir")
    with chpwd(subdir_path):
        run("echo {pwd} >expanded-pwd")
    ok_file_has_content(opj(path, "subdir", "expanded-pwd"), subdir_path,
                        strip=True)
    eq_(get_run_info(ds, ds.repo.format_commit("%B"))[1]["pwd"],
        "subdir")

    # Double brackets can be used to escape placeholders.
    ds.run("touch {{inputs}}", inputs=["*.in"])
    ok_exists(opj(path, "{inputs}"))

    # rerun --script expands the placeholders.
    with patch("sys.stdout", new_callable=StringIO) as cmout:
        ds.rerun(script="-", since="")
        script_out = cmout.getvalue()
        assert_in("echo a.in b.in >c.out", script_out)
        assert_in("echo {} >expanded-pwd".format(subdir_path),
                  script_out)
        assert_in("echo {} >expanded-dspath".format(ds.path),
                  script_out)

    assert_result_count(
        ds.run("{unknown_placeholder}", on_failure="ignore"),
        1, status="impossible", action="run")

    # Configured placeholders.
    ds.config.add("datalad.run.substitutions.license", "gpl3", where="local")
    ds.run("echo {license} >configured-license")
    ok_file_has_content(opj(path, "configured-license"), "gpl3", strip=True)
    # --script handles configured placeholders.
    with patch("sys.stdout", new_callable=StringIO) as cmout:
        ds.rerun(script="-")
        assert_in("gpl3", cmout.getvalue())
示例#29
0
文件: crawl.py 项目: glalteva/datalad
    def __call__(path=None, dry_run=False, is_pipeline=False, is_template=False, chdir=None):
        from datalad.crawler.pipeline import (
            load_pipeline_from_config, load_pipeline_from_module,
            get_repo_pipeline_config_path, get_repo_pipeline_script_path
        )
        from datalad.crawler.pipeline import run_pipeline
        from datalad.utils import chpwd  # import late so we could mock during tests
        with chpwd(chdir):

            assert not (is_pipeline and is_template), "it is either a pipeline or a template name, can't be both"

            if is_template:
                # generate a config and overload path with its filename
                path = initiate_pipeline_config(template=path,  # kwargs=TODO,
                                                commit=True)

            # TODO: centralize via _params_ handling
            if dry_run:
                if not 'crawl' in cfg.sections():
                    cfg.add_section('crawl')
                cfg.set('crawl', 'dryrun', "True")

            if path is None:

                # get config from the current repository/handle
                if is_pipeline:
                    raise ValueError("You must specify the file if --pipeline")
                # Let's see if there is a config or pipeline in this repo
                path = get_repo_pipeline_config_path()
                if not path or not exists(path):
                    # Check if there may be the pipeline provided
                    path = get_repo_pipeline_script_path()
                    if path and exists(path):
                        is_pipeline = True

            if not path:
                raise RuntimeError("Cannot locate crawler config or pipeline file")

            if is_pipeline:
                lgr.info("Loading pipeline definition from %s" % path)
                pipeline = load_pipeline_from_module(path)
            else:
                lgr.info("Loading pipeline specification from %s" % path)
                pipeline = load_pipeline_from_config(path)

            lgr.info("Running pipeline %s" % str(pipeline))
            # TODO: capture the state of all branches so in case of crash
            # we could gracefully reset back
            try:
                run_pipeline(pipeline)
            except Exception as exc:
                # TODO: config.crawl.failure = full-reset | last-good-master
                # probably ask via ui which action should be performed unless
                # explicitly specified
                raise
示例#30
0
def test_install_dataset_from_just_source_via_path(url, path):
    # for remote urls only, the source could be given to `path`
    # to allows for simplistic cmdline calls

    with chpwd(path, mkdir=True):
        ds = install(path=url)

    ok_startswith(ds.path, path)
    ok_(ds.is_installed())
    ok_clean_git(ds.path, annex=False)
    assert_true(os.path.lexists(opj(ds.path, 'test-annex.dat')))
示例#31
0
def test_dlabspath(path):
    if not has_symlink_capability():
        raise SkipTest
    # initially ran into on OSX https://github.com/datalad/datalad/issues/2406
    opath = opj(path, "origin")
    os.makedirs(opath)
    lpath = opj(path, "linked")
    os.symlink('origin', lpath)
    for d in opath, lpath:
        # regardless under which directory, all results should not resolve
        # anything
        eq_(d, dlabspath(d))
        # in the root of ds
        with chpwd(d):
            eq_(dlabspath("bu"), opj(d, "bu"))
            eq_(dlabspath("./bu"),
                opj(d, "./bu"))  # we do not normpath by default
            eq_(dlabspath("./bu", norm=True), opj(d, "bu"))
示例#32
0
def test_add_insufficient_args(path):
    # no argument:
    assert_raises(InsufficientArgumentsError, add)
    # no `path`, no `source`:
    assert_raises(InsufficientArgumentsError, add, dataset=path)
    with chpwd(path):
        res = add(path="some", on_failure='ignore')
        assert_status('impossible', res)
    ds = Dataset(opj(path, 'ds'))
    ds.create()
    # non-existing path outside
    assert_status('impossible',
                  ds.add(opj(path, 'outside'), on_failure='ignore'))
    # existing path outside
    with open(opj(path, 'outside'), 'w') as f:
        f.write('doesnt matter')
    assert_status('impossible',
                  ds.add(opj(path, 'outside'), on_failure='ignore'))
示例#33
0
def test_install_datasets_root(tdir):
    with chpwd(tdir):
        ds = install("///")
        ok_(ds.is_installed())
        eq_(ds.path, opj(tdir, 'datasets.datalad.org'))

        # do it a second time:
        result = install("///", result_xfm=None, return_type='list')
        assert_status('notneeded', result)
        eq_(YieldDatasets()(result[0]), ds)

        # and a third time into an existing something, that is not a dataset:
        with open(opj(tdir, 'sub', 'a_file.txt'), 'w') as f:
            f.write("something")

        with assert_raises(IncompleteResultsError) as cme:
            install("sub", source='///')
            assert_in("already exists and not empty", str(cme))
示例#34
0
def test_clone_isnt_a_smartass(origin_path, path):
    origin = create(origin_path)
    cloned = clone(origin,
                   path,
                   result_xfm='datasets',
                   return_type='item-or-list')
    with chpwd(path):
        # no were are inside a dataset clone, and we make another one
        # we do not want automatic subdatasetification without given a dataset
        # explicitely
        clonedsub = clone(origin,
                          'testsub',
                          result_xfm='datasets',
                          return_type='item-or-list')
    # correct destination
    assert clonedsub.path.startswith(path)
    # no subdataset relation
    eq_(cloned.subdatasets(), [])
示例#35
0
def test_update_known_submodule(path):
    def get_baseline(p):
        ds = Dataset(p).create()
        sub = ds.create('sub', save=False)
        # subdataset saw another commit after becoming a submodule
        ok_clean_git(ds.path, index_modified=['sub'])
        return ds

    # attempt one
    ds = get_baseline(opj(path, 'wo_ref'))
    with chpwd(ds.path):
        add('.', recursive=True)
    ok_clean_git(ds.path)

    # attempt two, same as above but call add via reference dataset
    ds = get_baseline(opj(path, 'w_ref'))
    ds.add('.', recursive=True)
    ok_clean_git(ds.path)
示例#36
0
def test_since_empty_and_unsupported(p1, p2):
    source = Dataset(p1).create()
    source.create_sibling(p2, name='target1')
    # see https://github.com/datalad/datalad/pull/4448#issuecomment-620847327
    # Test that it doesn't fail without a prior push
    source.publish(to='target1', since='')
    with chpwd(p1):
        # since we have only two commits (set backend, init dataset)
        # -- there is no HEAD^^
        assert_result_count(publish(to='target1',
                                    since='HEAD^^',
                                    on_failure='ignore'),
                            1,
                            status='impossible',
                            message="fatal: bad revision 'HEAD^^..HEAD'")
        # but now let's add one more commit, we should be able to pusblish
        source.repo.commit("msg", options=['--allow-empty'])
        publish(to='target1', since='HEAD^')  # must not fail now
示例#37
0
def test_clone_report_permission_issue(tdir):
    pdir = _path_(tdir, 'protected')
    mkdir(pdir)
    # make it read-only
    chmod(pdir, 0o555)
    with chpwd(pdir):
        res = clone('///',
                    result_xfm=None,
                    return_type='list',
                    on_failure='ignore')
        assert_status('error', res)
        assert_result_count(
            res,
            1,
            status='error',
            message=
            "could not create work tree dir '%s/datasets.datalad.org': Permission denied"
            % pdir)
示例#38
0
def test_GitRepo_flyweight(path1, path2):

    repo1 = GitRepo(path1, create=True)
    assert_is_instance(repo1, GitRepo)
    # instantiate again:
    repo2 = GitRepo(path1, create=False)
    assert_is_instance(repo2, GitRepo)
    # the very same object:
    ok_(repo1 is repo2)

    # reference the same in a different way:
    with chpwd(path1):
        repo3 = GitRepo(op.relpath(path1, start=path2), create=False)
    # it's the same object:
    ok_(repo1 is repo3)

    # and realpath attribute is the same, so they are still equal:
    ok_(repo1 == repo3)
示例#39
0
def test_smth_about_not_supported(p1, p2):
    source = Dataset(p1).create()
    from datalad.support.network import PathRI
    source.create_sibling('ssh://localhost' + PathRI(p2).posixpath,
                          name='target1')
    # source.publish(to='target1')
    with chpwd(p1):
        # since we have only two commits (set backend, init dataset)
        # -- there is no HEAD^^
        assert_result_count(publish(to='target1',
                                    since='HEAD^^',
                                    on_failure='ignore'),
                            1,
                            status='impossible',
                            message="fatal: bad revision 'HEAD^^'")
        # but now let's add one more commit, we should be able to pusblish
        source.repo.commit("msg", options=['--allow-empty'])
        publish(to='target1', since='HEAD^')  # must not fail now
示例#40
0
def _prep_worktree(ds_path,
                   pwd,
                   globbed,
                   assume_ready=None,
                   remove_outputs=False,
                   rerun_outputs=None,
                   jobs=None):
    """
    Yields
    ------
    dict
      Result records
    """
    # ATTN: For correct path handling, all dataset commands call should be
    # unbound. They should (1) receive a string dataset argument, (2) receive
    # relative paths, and (3) happen within a chpwd(pwd) context.
    with chpwd(pwd):
        for res in prepare_inputs(
                ds_path,
            [] if assume_ready in ["inputs", "both"] else globbed['inputs'],
                # Ignore --assume-ready for extra_inputs. It's an unexposed
                # implementation detail that lets wrappers sneak in inputs.
                extra_inputs=globbed['extra_inputs'],
                jobs=jobs):
            yield res

        if assume_ready not in ["outputs", "both"]:
            if globbed['outputs']:
                for res in _install_and_reglob(ds_path, globbed['outputs']):
                    yield res
                for res in _unlock_or_remove(
                        ds_path,
                        globbed['outputs'].expand_strict()
                        if not remove_outputs
                        # when force-removing, exclude declared inputs
                        else set(
                            globbed['outputs'].expand_strict()).difference(
                                globbed['inputs'].expand_strict()),
                        remove=remove_outputs):
                    yield res

            if rerun_outputs is not None:
                for res in _unlock_or_remove(ds_path, rerun_outputs):
                    yield res
示例#41
0
    def test_add_delete_after_and_drop_subdir(self):
        os.mkdir(opj(self.annex.path, 'subdir'))
        mv_out = self.annex.call_git(
            ['mv', '1.tar', 'subdir']
        )
        self.annex.commit("moved into subdir")
        with chpwd(self.annex.path):
            # was failing since deleting without considering if tarball
            # was extracted in that tarball directory
            commits_prior_master = list(self.annex.get_branch_commits_())
            commits_prior = list(self.annex.get_branch_commits_('git-annex'))
            add_out = add_archive_content(
                opj('subdir', '1.tar'),
                delete_after=True,
                drop_after=True)
            assert_repo_status(self.annex.path)
            commits_after_master = list(self.annex.get_branch_commits_())
            commits_after = list(self.annex.get_branch_commits_('git-annex'))
            # There should be a single commit for all additions +1 to
            # initiate datalad-archives gh-1258.  If faking dates,
            # there should be another +1 because annex.alwayscommit
            # isn't set to false.
            assert_equal(len(commits_after),
                         len(commits_prior) + 2 + self.annex.fake_dates_enabled)
            assert_equal(len(commits_after_master), len(commits_prior_master))
            assert(add_out is self.annex)
            # there should be no .datalad temporary files hanging around
            self.assert_no_trash_left_behind()

            # and if we add some untracked file, redo, there should be no changes
            # to master and file should remain not committed
            create_tree(self.annex.path, {'dummy.txt': '123'})
            assert_true(self.annex.dirty)  # untracked file
            add_out = add_archive_content(
                opj('subdir', '1.tar'),
                delete_after=True,
                drop_after=True,
                allow_dirty=True)
            assert_repo_status(self.annex.path, untracked=['dummy.txt'])
            assert_equal(len(list(self.annex.get_branch_commits_())),
                         len(commits_prior_master))

            # there should be no .datalad temporary files hanging around
            self.assert_no_trash_left_behind()
示例#42
0
def test_check_dates(path=None):
    skip_if_no_module("dateutil")

    ref_ts = 1218182889  # Fri, 08 Aug 2008 04:08:09 -0400
    refdate = "@{}".format(ref_ts)

    repo = os.path.join(path, "repo")
    with set_date(ref_ts + 5000):
        ar = AnnexRepo(repo)
        ar.add(".")
        ar.commit()

    # The standard renderer outputs json.
    with swallow_outputs() as cmo:
        # Set level to WARNING to avoid the progress bar when
        # DATALAD_TESTS_UI_BACKEND=console.
        with swallow_logs(new_level=logging.WARNING):
            check_dates([repo], reference_date=refdate, return_type="list")
        assert_in("report", json.loads(cmo.out).keys())

    # We find the newer objects.
    newer = call([path], reference_date=refdate)
    eq_(len(newer), 1)
    ok_(newer[0]["report"]["objects"])

    # There are no older objects to find.
    older = call([repo], reference_date=refdate, older=True)
    assert_false(older[0]["report"]["objects"])

    # We can pass the date in RFC 2822 format.
    assert_dict_equal(
        newer[0],
        call([path], reference_date="08 Aug 2008 04:08:09 -0400")[0])

    # paths=None defaults to the current directory.
    with chpwd(path):
        assert_dict_equal(
            newer[0]["report"],
            call(paths=None, reference_date=refdate)[0]["report"])

    # Only commit type is present when annex='none'.
    newer_noannex = call([path], reference_date=refdate, annex="none")
    for entry in newer_noannex[0]["report"]["objects"].values():
        ok_(entry["type"] == "commit")
示例#43
0
def test_rev_resolve_path(path):
    if op.realpath(path) != path:
        raise SkipTest("Test assumptions require non-symlinked parent paths")
    # initially ran into on OSX https://github.com/datalad/datalad/issues/2406
    opath = op.join(path, "origin")
    os.makedirs(opath)
    if not on_windows:
        lpath = op.join(path, "linked")
        os.symlink('origin', lpath)

    ds_global = Dataset(path)
    # path resolution of absolute paths is not influenced by symlinks
    # ignore the linked path on windows, it is not a symlink in the POSIX sense
    for d in (opath, ) if on_windows else (opath, lpath):
        ds_local = Dataset(d)
        # no symlink resolution
        eq_(str(rev_resolve_path(d)), d)
        with chpwd(d):
            # be aware: knows about cwd, but this CWD has symlinks resolved
            eq_(str(rev_resolve_path(d).cwd()), opath)
            # using pathlib's `resolve()` will resolve any
            # symlinks
            # also resolve `opath`, as on old windows systems the path might
            # come in crippled (e.g. C:\Users\MIKE~1/...)
            # and comparison would fails unjustified
            eq_(rev_resolve_path('.').resolve(), ut.Path(opath).resolve())
            # no norming, but absolute paths, without resolving links
            eq_(rev_resolve_path('.'), ut.Path(d))
            eq_(str(rev_resolve_path('.')), d)

            eq_(str(rev_resolve_path(op.join(os.curdir, 'bu'), ds=ds_global)),
                op.join(d, 'bu'))
            eq_(str(rev_resolve_path(op.join(os.pardir, 'bu'), ds=ds_global)),
                op.join(ds_global.path, 'bu'))

        # resolve against a dataset
        eq_(str(rev_resolve_path('bu', ds=ds_local)), op.join(d, 'bu'))
        eq_(str(rev_resolve_path('bu', ds=ds_global)), op.join(path, 'bu'))
        # but paths outside the dataset are left untouched
        eq_(str(rev_resolve_path(op.join(os.curdir, 'bu'), ds=ds_global)),
            op.join(getpwd(), 'bu'))
        eq_(str(rev_resolve_path(op.join(os.pardir, 'bu'), ds=ds_global)),
            op.normpath(op.join(getpwd(), os.pardir, 'bu')))
示例#44
0
def test_run_cmdline_disambiguation(path):
    Dataset(path).create()
    with chpwd(path):
        # Without a positional argument starting a command, any option is
        # treated as an option to 'datalad run'.
        with swallow_outputs() as cmo:
            with patch("datalad.interface.run._execute_command") as exec_cmd:
                with assert_raises(SystemExit):
                    main(["datalad", "run", "--message"])
                exec_cmd.assert_not_called()
            assert_in("message: expected one", cmo.err)
        # If we want to pass an option as the first value of a command (e.g.,
        # because we are using a runscript with containers-run), we can do this
        # with "--".
        with patch("datalad.interface.run._execute_command") as exec_cmd:
            with assert_raises(SystemExit):
                main(["datalad", "run", "--", "--message"])
            exec_cmd.assert_called_once_with("--message",
                                             path,
                                             expected_exit=None)

        # And a twist on above: Our parser mishandles --version (gh-3067),
        # treating 'datalad run CMD --version' as 'datalad --version'.
        version_stream = "err" if PY2 else "out"
        with swallow_outputs() as cmo:
            with assert_raises(SystemExit) as cm:
                main(["datalad", "run", "echo", "--version"])
            eq_(cm.exception.code, 0)
            out = getattr(cmo, version_stream)
        with swallow_outputs() as cmo:
            with assert_raises(SystemExit):
                main(["datalad", "--version"])
            version_out = getattr(cmo, version_stream)
        ok_(version_out)
        eq_(version_out, out)
        # We can work around that (i.e., make "--version" get passed as
        # command) with "--".
        with patch("datalad.interface.run._execute_command") as exec_cmd:
            with assert_raises(SystemExit):
                main(["datalad", "run", "--", "echo", "--version"])
            exec_cmd.assert_called_once_with("echo --version",
                                             path,
                                             expected_exit=None)
示例#45
0
def test_bf1886(path):
    parent = Dataset(path).create()
    sub = parent.create('sub')
    ok_clean_git(parent.path)
    # create a symlink pointing down to the subdataset, and add it
    os.symlink('sub', opj(parent.path, 'down'))
    parent.add('down')
    ok_clean_git(parent.path)
    # now symlink pointing up
    os.makedirs(opj(parent.path, 'subdir', 'subsubdir'))
    os.symlink(opj(pardir, 'sub'), opj(parent.path, 'subdir', 'up'))
    parent.add(opj('subdir', 'up'))
    ok_clean_git(parent.path)
    # now symlink pointing 2xup, as in #1886
    os.symlink(opj(pardir, pardir, 'sub'),
               opj(parent.path, 'subdir', 'subsubdir', 'upup'))
    parent.add(opj('subdir', 'subsubdir', 'upup'))
    ok_clean_git(parent.path)
    # simulatenously add a subds and a symlink pointing to it
    # create subds, but don't register it
    sub2 = create(opj(parent.path, 'sub2'))
    os.symlink(opj(pardir, pardir, 'sub2'),
               opj(parent.path, 'subdir', 'subsubdir', 'upup2'))
    parent.add(['sub2', opj('subdir', 'subsubdir', 'upup2')])
    ok_clean_git(parent.path)
    # full replication of #1886: the above but be in subdir of symlink
    # with no reference dataset
    sub3 = create(opj(parent.path, 'sub3'))
    os.symlink(opj(pardir, pardir, 'sub3'),
               opj(parent.path, 'subdir', 'subsubdir', 'upup3'))
    # need to use absolute paths
    with chpwd(opj(parent.path, 'subdir', 'subsubdir')):
        add([
            opj(parent.path, 'sub3'),
            opj(parent.path, 'subdir', 'subsubdir', 'upup3')
        ])
    # here is where we need to disagree with the repo in #1886
    # we would not expect that `add` registers sub3 as a subdataset
    # of parent, because no reference dataset was given and the
    # command cannot decide (with the current semantics) whether
    # it should "add anything in sub3 to sub3" or "add sub3 to whatever
    # sub3 is in"
    ok_clean_git(parent.path, untracked=['sub3/'])
示例#46
0
def test_search_outside1(tdir, newhome):
    with chpwd(tdir):
        # should fail since directory exists, but not a dataset
        # should not even waste our response ;)
        always_render = cfg.obtain('datalad.api.alwaysrender')
        with patch.object(search_mod, 'LOCAL_CENTRAL_PATH', newhome):
            if always_render:
                # we do try to render results which actually causes exception
                # to come right away
                assert_raises(NoDatasetArgumentFound, search, "bu")
            else:
                gen = search("bu")
                assert_is_generator(gen)
                assert_raises(NoDatasetArgumentFound, next, gen)

        # and if we point to some non-existing dataset -- the same in both cases
        # but might come before even next if always_render
        with assert_raises(ValueError):
            next(search("bu", dataset=newhome))
示例#47
0
def test_placeholders(path):
    ds = Dataset(path).create(force=True)
    ds.add(".")
    ds.run("echo {inputs} >{outputs}", inputs=[".", "*.in"], outputs=["c.out"])
    ok_file_has_content(opj(path, "c.out"), "a.in b.in\n")

    hexsha_before = ds.repo.get_hexsha()
    ds.rerun()
    eq_(hexsha_before, ds.repo.get_hexsha())

    ds.run("echo {inputs[0]} >getitem", inputs=["*.in"])
    ok_file_has_content(opj(path, "getitem"), "a.in\n")

    ds.run("echo {pwd} >expanded-pwd")
    ok_file_has_content(opj(path, "expanded-pwd"), path,
                        strip=True)

    ds.run("echo {dspath} >expanded-dspath")
    ok_file_has_content(opj(path, "expanded-dspath"), ds.path,
                        strip=True)

    subdir_path = opj(path, "subdir")
    with chpwd(subdir_path):
        run("echo {pwd} >expanded-pwd")
    ok_file_has_content(opj(path, "subdir", "expanded-pwd"), subdir_path,
                        strip=True)
    eq_(get_run_info(ds, ds.repo.repo.head.commit.message)[1]["pwd"],
        "subdir")

    # Double brackets can be used to escape placeholders.
    ds.run("touch {{inputs}}", inputs=["*.in"])
    ok_exists(opj(path, "{inputs}"))

    # rerun --script expands the placeholders.
    with patch("sys.stdout", new_callable=StringIO) as cmout:
        ds.rerun(script="-", since="")
        script_out = cmout.getvalue()
        assert_in("echo a.in b.in >c.out", script_out)
        assert_in("echo {} >expanded-pwd".format(subdir_path),
                  script_out)
        assert_in("echo {} >expanded-dspath".format(ds.path),
                  script_out)
示例#48
0
def test_bf1886(path):
    parent = Dataset(path).rev_create()
    sub = parent.rev_create('sub')
    ok_clean_git(parent.path)
    # create a symlink pointing down to the subdataset, and add it
    os.symlink('sub', opj(parent.path, 'down'))
    parent.add('down')
    ok_clean_git(parent.path)
    # now symlink pointing up
    os.makedirs(opj(parent.path, 'subdir', 'subsubdir'))
    os.symlink(opj(pardir, 'sub'), opj(parent.path, 'subdir', 'up'))
    parent.add(opj('subdir', 'up'))
    ok_clean_git(parent.path)
    # now symlink pointing 2xup, as in #1886
    os.symlink(opj(pardir, pardir, 'sub'),
               opj(parent.path, 'subdir', 'subsubdir', 'upup'))
    parent.add(opj('subdir', 'subsubdir', 'upup'))
    ok_clean_git(parent.path)
    # simulatenously add a subds and a symlink pointing to it
    # create subds, but don't register it
    sub2 = rev_create(opj(parent.path, 'sub2'))
    os.symlink(opj(pardir, pardir, 'sub2'),
               opj(parent.path, 'subdir', 'subsubdir', 'upup2'))
    parent.add(['sub2', opj('subdir', 'subsubdir', 'upup2')])
    ok_clean_git(parent.path)
    # full replication of #1886: the above but be in subdir of symlink
    # with no reference dataset
    sub3 = rev_create(opj(parent.path, 'sub3'))
    os.symlink(opj(pardir, pardir, 'sub3'),
               opj(parent.path, 'subdir', 'subsubdir', 'upup3'))
    # need to use absolute paths
    with chpwd(opj(parent.path, 'subdir', 'subsubdir')):
        rev_save([
            opj(parent.path, 'sub3'),
            opj(parent.path, 'subdir', 'subsubdir', 'upup3')
        ])
    # in contrast to `add` only operates on a single top-level dataset
    # although it is not specified, it get's discovered based on the PWD
    # the logic behind that feels a bit shaky
    # consult discussion in https://github.com/datalad/datalad/issues/3230
    # if this comes up as an issue at some point
    ok_clean_git(parent.path)
示例#49
0
def test_tarball(path):
    ds = Dataset(opj(path, 'ds')).create(force=True)
    ds.save(all_changes=True)
    committed_date = ds.repo.get_committed_date()
    with chpwd(path):
        _mod, tarball1 = ds.export('tarball')
        assert (not isabs(tarball1))
        tarball1 = opj(path, tarball1)
    default_outname = opj(path, 'datalad_{}.tar.gz'.format(ds.id))
    assert_equal(tarball1, default_outname)
    assert_true(os.path.exists(default_outname))
    custom_outname = opj(path, 'myexport.tar.gz')
    # feed in without extension
    ds.export('tarball', output=custom_outname[:-7])
    assert_true(os.path.exists(custom_outname))
    custom1_md5 = md5sum(custom_outname)
    # encodes the original tarball filename -> different checksum, despit
    # same content
    assert_not_equal(md5sum(default_outname), custom1_md5)
    # should really sleep so if they stop using time.time - we know
    time.sleep(1.1)
    ds.export('tarball', output=custom_outname)
    # should not encode mtime, so should be identical
    assert_equal(md5sum(custom_outname), custom1_md5)

    def check_contents(outname, prefix):
        with tarfile.open(outname) as tf:
            nfiles = 0
            for ti in tf:
                # any annex links resolved
                assert_false(ti.issym())
                ok_startswith(ti.name, prefix + '/')
                assert_equal(ti.mtime, committed_date)
                if '.datalad' not in ti.name:
                    # ignore any files in .datalad for this test to not be
                    # susceptible to changes in how much we generate a meta info
                    nfiles += 1
            # we have exactly three files, and expect no content for any directory
            assert_equal(nfiles, 3)

    check_contents(default_outname, 'datalad_%s' % ds.id)
    check_contents(custom_outname, 'myexport')
示例#50
0
def test_uninstall_subdataset(src, dst):

    ds = install(dst, source=src, recursive=True)
    ok_(ds.is_installed())
    known_subdss = ds.subdatasets(result_xfm='datasets')
    for subds in ds.subdatasets(result_xfm='datasets'):
        ok_(subds.is_installed())

        repo = subds.repo

        annexed_files = repo.get_content_annexinfo(init=None)
        repo.get([str(f) for f in annexed_files])

        # drop data of subds:
        res = ds.drop(path=subds.path, result_xfm='paths')
        ok_(all(str(f) in res for f in annexed_files))
        ainfo = repo.get_content_annexinfo(paths=annexed_files,
                                           eval_availability=True)
        ok_(all(not st["has_content"] for st in ainfo.values()))
        # subdataset is still known
        assert_in(subds.path, ds.subdatasets(result_xfm='paths'))

    eq_(ds.subdatasets(result_xfm='datasets'), known_subdss)

    for subds in ds.subdatasets(result_xfm='datasets'):
        # uninstall subds itself:
        # simulate a cmdline invocation pointing to the subdataset
        # with a relative path from outside the superdataset to catch
        # https://github.com/datalad/datalad/issues/4001
        pwd = Path(dst).parent
        with chpwd(str(pwd)):
            res = uninstall(
                dataset=ds.path,
                path=str(subds.pathobj.relative_to(pwd)),
                result_xfm='datasets',
            )
        eq_(res[0], subds)
        ok_(not subds.is_installed())
        # just a deinit must not remove the subdataset registration
        eq_(ds.subdatasets(result_xfm='datasets'), known_subdss)
        # mountpoint of subdataset should still be there
        ok_(exists(subds.path))
示例#51
0
def test_require_dataset(path):
    with chpwd(path):
        assert_raises(
            InsufficientArgumentsError,
            require_dataset,
            None)
        create('.')
        # in this folder by default
        assert_equal(
            require_dataset(None).path,
            path)

        assert_equal(
            require_dataset('some', check_installed=False).path,
            abspath('some'))
        assert_raises(
            ValueError,
            require_dataset,
            'some',
            check_installed=True)
示例#52
0
def test_get_dataset_root(path):
    eq_(get_dataset_root('/nonexistent'), None)
    with chpwd(path):
        repo = AnnexRepo(os.curdir, create=True)
        subdir = opj('some', 'deep')
        fname = opj(subdir, 'dummy')
        os.makedirs(subdir)
        with open(fname, 'w') as f:
            f.write('some')
        repo.add(fname)
        # we can find this repo
        eq_(get_dataset_root(os.curdir), os.curdir)
        # and we get the type of path that we fed in
        eq_(get_dataset_root(abspath(os.curdir)), abspath(os.curdir))
        # subdirs are no issue
        eq_(get_dataset_root(subdir), os.curdir)
        # even more subdirs are no issue
        eq_(get_dataset_root(opj(subdir, subdir)), os.curdir)
        # non-dir paths are no issue
        eq_(get_dataset_root(fname), os.curdir)
示例#53
0
def test_resolve_path_symlink_edition(path):
    deepest = ut.Path(path) / 'one' / 'two' / 'three'
    deepest_str = str(deepest)
    os.makedirs(deepest_str)
    with chpwd(deepest_str):
        # direct absolute
        eq_(deepest, resolve_path(deepest))
        eq_(deepest, resolve_path(deepest_str))
        # explicit direct relative
        eq_(deepest, resolve_path('.'))
        eq_(deepest, resolve_path(op.join('.', '.')))
        eq_(deepest, resolve_path(op.join('..', 'three')))
        eq_(deepest, resolve_path(op.join('..', '..', 'two', 'three')))
        eq_(deepest,
            resolve_path(op.join('..', '..', '..', 'one', 'two', 'three')))
        # weird ones
        eq_(deepest, resolve_path(op.join('..', '.', 'three')))
        eq_(deepest, resolve_path(op.join('..', 'three', '.')))
        eq_(deepest, resolve_path(op.join('..', 'three', '.')))
        eq_(deepest, resolve_path(op.join('.', '..', 'three')))
示例#54
0
def check_create_path_semantics(
        cwd, create_ds, path_arg, base_path, other_path):
    ds = Dataset(base_path).create()
    os.makedirs(op.join(ds.path, 'some'))
    target_path = ds.pathobj / "some" / "what" / "deeper"
    with chpwd(
            other_path if cwd == 'elsewhere' else
            base_path if cwd == 'parentds' else
            str(ds.pathobj / 'some') if cwd == 'subdir' else
            str(Path.cwd())):
        subds = create(
            dataset=ds.path if create_ds == 'abspath'
            else str(ds.pathobj.relative_to(cwd)) if create_ds == 'relpath'
            else ds if create_ds == 'instance'
            else create_ds,
            path=str(target_path) if path_arg == 'abspath'
            else str(target_path.relative_to(ds.pathobj)) if path_arg == 'relpath'
            else op.join('what', 'deeper') if path_arg == 'subdir_relpath'
            else path_arg)
        eq_(subds.pathobj, target_path)
示例#55
0
def test_clone_datasets_root(tdir):
    with chpwd(tdir):
        ds = clone("///", result_xfm='datasets', return_type='item-or-list')
        ok_(ds.is_installed())
        eq_(ds.path, opj(tdir, get_datasets_topdir()))

        # do it a second time:
        res = clone("///", on_failure='ignore')
        assert_message("dataset %s was already cloned from '%s'", res)
        assert_status('notneeded', res)

        # and a third time into an existing something, that is not a dataset:
        with open(opj(tdir, 'sub', 'a_file.txt'), 'w') as f:
            f.write("something")

        res = clone('///', path="sub", on_failure='ignore')
        assert_message(
            'target path already exists and not empty, refuse to clone into target path',
            res)
        assert_status('error', res)
示例#56
0
def test_cfg_originorigin(path):
    path = Path(path)
    origin = Dataset(path / 'origin').create()
    (origin.pathobj / 'file1.txt').write_text('content')
    origin.save()
    clone_lev1 = clone(origin, path / 'clone_lev1')
    clone_lev2 = clone(clone_lev1, path / 'clone_lev2')
    # the goal is to be able to get file content from origin without
    # the need to configure it manually
    assert_result_count(
        clone_lev2.get('file1.txt', on_failure='ignore'),
        1,
        action='get',
        status='ok',
        path=str(clone_lev2.pathobj / 'file1.txt'),
    )
    eq_((clone_lev2.pathobj / 'file1.txt').read_text(), 'content')
    eq_(
        Path(clone_lev2.siblings(
            'query',
            name='origin-2',
            return_type='item-or-list')['url']),
        origin.pathobj
    )

    # Clone another level, this time with a relative path. Drop content from
    # lev2 so that origin is the only place that the file is available from.
    clone_lev2.drop("file1.txt")
    with chpwd(path), swallow_logs(new_level=logging.DEBUG) as cml:
        clone_lev3 = clone('clone_lev2', 'clone_lev3')
        # we called git-annex-init; see gh-4367:
        cml.assert_logged(msg=r"[^[]*Async run \[('git', 'annex'|'git-annex'), "
                              r"'init'",
                          match=False,
                          level='DEBUG')
    assert_result_count(
        clone_lev3.get('file1.txt', on_failure='ignore'),
        1,
        action='get',
        status='ok',
        path=str(clone_lev3.pathobj / 'file1.txt'))
示例#57
0
def test_openfmri_addperms(ind, topurl, outd, clonedir):
    index_html = opj(ind, 'ds666', 'index.html')

    list(initiate_dataset(
        template="openfmri",
        dataset_name='dataladtest-ds666',
        path=outd,
        data_fields=['dataset'])({'dataset': 'ds666'}))

    ok_clean_git(outd)
    with chpwd(outd):
        pipeline = ofpipeline(
            'ds666', versioned_urls=False, topurl=topurl,
            s3_prefix=False  # so  we do not invoke s3 subpipeline
        )
        ok_clean_git(outd)
        out = run_pipeline(pipeline)
    eq_(len(out), 1)

    ok_clean_git(outd)
    ok_file_under_git(outd, 'dataset_description.json', annexed=False)
示例#58
0
文件: test_add.py 项目: leej3/datalad
def test_gh1597(path):
    ds = Dataset(path).create()
    with chpwd(ds.path):
        sub = create('sub')
    ds.add('sub', save=False)
    # only staged at this point, but known, and not annexed
    ok_file_under_git(ds.path, '.gitmodules', annexed=False)
    res = ds.subdatasets()
    assert_result_count(res, 1, path=sub.path)
    # now modify .gitmodules with another command
    ds.subdatasets(contains=sub.path, set_property=[('this', 'that')])
    assert_repo_status(ds.path, added=[sub.path])
    # now modify low-level
    with open(opj(ds.path, '.gitmodules'), 'a') as f:
        f.write('\n')
    assert_repo_status(ds.path,
                       modified=[ds.pathobj / ".gitmodules"],
                       added=[sub.path])
    ds.add('.gitmodules')
    # must not come under annex mangement
    ok_file_under_git(ds.path, '.gitmodules', annexed=False)
示例#59
0
def test_install_datasets_root(tdir):
    with chpwd(tdir):
        ds = install("///")
        ok_(ds.is_installed())
        eq_(ds.path, opj(tdir, 'datasets.datalad.org'))

        # do it a second time:
        with swallow_logs(new_level=logging.INFO) as cml:
            result = install("///")
            assert_in("was already installed from", cml.out)
            eq_(result, ds)

        # and a third time into an existing something, that is not a dataset:
        with open(opj(tdir, 'sub', 'a_file.txt'), 'w') as f:
            f.write("something")

        with swallow_logs(new_level=logging.WARNING) as cml:
            result = install("sub", source='///')
            assert_in("already exists and is not an installed dataset",
                      cml.out)
            ok_(result is None)
示例#60
0
def test_get_in_unavailable_subdataset(src, path):
    _make_dataset_hierarchy(src)
    root = install(
        path, source=src,
        result_xfm='datasets', return_type='item-or-list')
    targetpath = opj('sub1', 'sub2')
    targetabspath = opj(root.path, targetpath)
    with chpwd(path):
        res = get(targetabspath)
    assert_result_count(res, 2, status='ok', action='install', type='dataset')
    # dry-fit result filter that only returns the result that matched the requested
    # path
    filtered = [r for r in res if only_matching_paths(r, path=targetabspath)]
    assert_result_count(
        filtered, 1, status='ok', action='install', type='dataset',
        path=targetabspath)
    # we got the dataset, and its immediate content, but nothing below
    sub2 = Dataset(targetabspath)
    ok_(sub2.is_installed())
    ok_(sub2.repo.file_has_content('file_in_annex.txt') is True)
    ok_(not Dataset(opj(targetabspath, 'sub3')).is_installed())