Пример #1
0
def test_install_into_dataset(source=None, top_path=None):
    src_ds = Dataset(source).create(result_renderer='disabled', force=True)
    src_ds.save(['INFO.txt', 'test.dat'], to_git=True)
    src_ds.save('test-annex.dat', to_git=False)

    ds = create(top_path)
    assert_repo_status(ds.path)

    subds = ds.install("sub", source=source)
    ok_(isdir(opj(subds.path, '.git')))
    ok_(subds.is_installed())
    assert_in('sub', ds.subdatasets(result_xfm='relpaths'))
    # sub is clean:
    assert_repo_status(subds.path, annex=None)
    # top is too:
    assert_repo_status(ds.path, annex=None)
    ds.save(message='addsub')
    # now it is:
    assert_repo_status(ds.path, annex=None)

    # but we could also save while installing and there should be no side-effect
    # of saving any other changes if we state to not auto-save changes
    # Create a dummy change
    create_tree(ds.path, {'dummy.txt': 'buga'})
    assert_repo_status(ds.path, untracked=['dummy.txt'])
    subds_ = ds.install("sub2", source=source)
    eq_(subds_.path, opj(ds.path, "sub2"))  # for paranoid yoh ;)
    assert_repo_status(ds.path, untracked=['dummy.txt'])

    # and we should achieve the same behavior if we create a dataset
    # and then decide to add it
    create(_path_(top_path, 'sub3'))
    assert_repo_status(ds.path, untracked=['dummy.txt', 'sub3/'])
    ds.save('sub3')
    assert_repo_status(ds.path, untracked=['dummy.txt'])
Пример #2
0
def test_sibling_enable_sameas(repo=None, clone_path=None):
    ds = Dataset(repo.path)
    create_tree(ds.path, {"f0": "0"})
    ds.save(path="f0")
    ds.push(["f0"], to="r_dir")
    ds.repo.drop(["f0"])

    ds_cloned = clone(ds.path, clone_path)

    assert_false(ds_cloned.repo.file_has_content("f0"))
    # does not work without a name
    res = ds_cloned.siblings(
        action="enable",
        result_renderer='disabled',
        on_failure='ignore',
    )
    assert_in_results(res,
                      status='error',
                      message='require `name` of sibling to enable')
    # does not work with the wrong name
    res = ds_cloned.siblings(
        action="enable",
        name='wrong',
        result_renderer='disabled',
        on_failure='ignore',
    )
    assert_in_results(res,
                      status='impossible',
                      message=("cannot enable sibling '%s', not known",
                               'wrong'))
    # works with the right name
    res = ds_cloned.siblings(action="enable", name="r_rsync")
    assert_status("ok", res)
    ds_cloned.get(path=["f0"])
    ok_(ds_cloned.repo.file_has_content("f0"))
Пример #3
0
def test_add_recursive(path=None):
    # make simple hierarchy
    parent = Dataset(path).create()
    assert_repo_status(parent.path)
    sub1 = parent.create(op.join('down', 'sub1'))
    assert_repo_status(parent.path)
    sub2 = parent.create('sub2')
    # next one make the parent dirty
    subsub = sub2.create('subsub')
    assert_repo_status(parent.path, modified=['sub2'])
    res = parent.save()
    assert_repo_status(parent.path)

    # now add content deep in the hierarchy
    create_tree(subsub.path, {'new': 'empty'})
    assert_repo_status(parent.path, modified=['sub2'])

    # recursive add should not even touch sub1, because
    # it knows that it is clean
    res = parent.save(recursive=True, jobs=5)
    # the key action is done
    assert_result_count(res,
                        1,
                        path=op.join(subsub.path, 'new'),
                        action='add',
                        status='ok')
    # saved all the way up
    assert_result_count(res, 3, action='save', status='ok')
    assert_repo_status(parent.path)
Пример #4
0
def test_symlinked_relpath(path=None):
    # initially ran into on OSX https://github.com/datalad/datalad/issues/2406
    os.makedirs(op.join(path, "origin"))
    dspath = op.join(path, "linked")
    os.symlink('origin', dspath)
    ds = Dataset(dspath).create()
    create_tree(
        dspath,
        {
            "mike1": 'mike1',  # will be added from topdir
            "later": "later",  # later from within subdir
            "d": {
                "mike2": 'mike2',  # to be added within subdir
            }
        })

    # in the root of ds
    with chpwd(dspath):
        ds.repo.add("mike1", git=True)
        ds.save(message="committing", path="./mike1")

    # Let's also do in subdirectory as CWD, check that relative path
    # given to a plain command (not dataset method) are treated as
    # relative to CWD
    with chpwd(op.join(dspath, 'd')):
        save(dataset=ds.path, message="committing", path="mike2")

        later = op.join(op.pardir, "later")
        ds.repo.add(later, git=True)
        save(dataset=ds.path, message="committing", path=later)

    assert_repo_status(dspath)
Пример #5
0
def test_subdataset_save(path=None):
    parent = Dataset(path).create()
    sub = parent.create('sub')
    assert_repo_status(parent.path)
    create_tree(parent.path, {"untracked": 'ignore', 'sub': {"new": "wanted"}})
    sub.save('new')
    # defined state: one untracked, modified (but clean in itself) subdataset
    assert_repo_status(sub.path)
    assert_repo_status(parent.path, untracked=['untracked'], modified=['sub'])

    # `save sub` does not save the parent!!
    with chpwd(parent.path):
        assert_status('notneeded', save(dataset=sub.path))
    assert_repo_status(parent.path, untracked=['untracked'], modified=['sub'])
    # `save -u .` saves the state change in the subdataset,
    # but leaves any untracked content alone
    with chpwd(parent.path):
        assert_status('ok', parent.save(updated=True))
    assert_repo_status(parent.path, untracked=['untracked'])

    # get back to the original modified state and check that -S behaves in
    # exactly the same way
    create_tree(parent.path, {'sub': {"new2": "wanted2"}})
    sub.save('new2')
    assert_repo_status(parent.path, untracked=['untracked'], modified=['sub'])
Пример #6
0
def test_invalid_call(path=None):
    with chpwd(path):
        # no dataset, no luck
        assert_raises(NoDatasetFound, run, 'doesntmatter')
        # dirty dataset
        ds = Dataset(path).create()
        create_tree(ds.path, {'this': 'dirty'})
        assert_status('impossible', run('doesntmatter', on_failure='ignore'))
Пример #7
0
def test_newthings_coming_down(originpath=None, destpath=None):
    origin = GitRepo(originpath, create=True)
    create_tree(originpath, {'load.dat': 'heavy'})
    Dataset(originpath).save('load.dat')
    ds = install(source=originpath,
                 path=destpath,
                 result_xfm='datasets',
                 return_type='item-or-list')
    assert_is_instance(ds.repo, GitRepo)
    assert_in(DEFAULT_REMOTE, ds.repo.get_remotes())
    # turn origin into an annex
    origin = AnnexRepo(originpath, create=True)
    # clone doesn't know yet
    assert_false(knows_annex(ds.path))
    # but after an update it should
    # no merge, only one sibling, no parameters should be specific enough
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    assert (knows_annex(ds.path))
    # no branches appeared
    eq_(ds.repo.get_branches(), [DEFAULT_BRANCH])
    # now merge, and get an annex
    assert_result_count(ds.update(merge=True),
                        1,
                        action='update',
                        status='ok',
                        type='dataset')
    assert_in('git-annex', ds.repo.get_branches())
    assert_is_instance(ds.repo, AnnexRepo)
    # should be fully functional
    testfname = opj(ds.path, 'load.dat')
    assert_false(ds.repo.file_has_content(testfname))
    ds.get('.')
    ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy')
    # check that a new tag comes down
    origin.tag('first!')
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    eq_(ds.repo.get_tags(output='name')[0], 'first!')

    # and now we destroy the remote annex
    origin.call_git(['config', '--remove-section', 'annex'])
    rmtree(opj(origin.path, '.git', 'annex'), chmod_files=True)
    origin.call_git(['branch', '-D', 'git-annex'])
    origin = GitRepo(originpath)
    assert_false(knows_annex(originpath))

    # and update the local clone
    # for now this should simply not fail (see gh-793), later might be enhanced to a
    # graceful downgrade
    before_branches = ds.repo.get_branches()
    ok_(any("git-annex" in b for b in ds.repo.get_remote_branches()))
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    eq_(before_branches, ds.repo.get_branches())
    # annex branch got pruned
    assert_false(any("git-annex" in b for b in ds.repo.get_remote_branches()))
    # check that a new tag comes down even if repo types mismatch
    origin.tag('second!')
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    eq_(ds.repo.get_tags(output='name')[-1], 'second!')
Пример #8
0
def test_no_interaction_with_untracked_content(path=None):
    # extracted from what was a metadata test originally
    ds = Dataset(op.join(path, 'origin')).create(force=True)
    create_tree(ds.path, {'sub': {'subsub': {'dat': 'lots of data'}}})
    subds = ds.create('sub', force=True)
    subds.remove(op.join('.datalad', 'config'))
    nok_((subds.pathobj / '.datalad' / 'config').exists())
    # this will only work, if `remove` didn't do anything stupid and
    # caused all content to be saved
    subds.create('subsub', force=True)
Пример #9
0
def test_windows_incompatible_names(path=None):
    ds = Dataset(path).create()
    create_tree(
        path, {
            'imgood': 'Look what a nice name I have',
            'illegal:character.txt': 'strange choice of name',
            'spaceending ': 'who does these things?',
            'lookmumadot.': 'why would you do this?',
            'COM1.txt': 'I am a serial port',
            'dirs with spaces': {
                'seriously?': 'you are stupid',
                'why somuch?wrongstuff.': "I gave up"
            },
        })
    ds.repo.config.set('datalad.save.windows-compat-warning', 'error')
    ds.save('.datalad/config')
    res = ds.save(on_failure='ignore')
    # check that none of the 6 problematic files was saved, but the good one was
    assert_result_count(res, 6, status='impossible', action='save')
    assert_result_count(res, 1, status='ok', action='save')

    # check that the warning is emitted
    ds.repo.config.set('datalad.save.windows-compat-warning', 'warning')
    ds.save('.datalad/config')
    with swallow_logs(new_level=logging.WARN) as cml:
        ds.save()
        cml.assert_logged(
            "Some elements of your dataset are not compatible with Windows "
            "systems. Disable this check by changing "
            "datalad.save.windows-compat-warning or consider renaming the "
            "following elements:")
        assert_in("Elements using a reserved filename:", cml.out)
        assert_in("Elements with illegal characters:", cml.out)
        assert_in("Elements ending with a dot:", cml.out)
        assert_in("Elements ending with a space:", cml.out)

    # check that a setting of 'none' really does nothing
    ds.repo.config.set('datalad.save.windows-compat-warning', 'none')
    ds.save('.datalad/config')
    create_tree(
        path, {
            'more illegal:characters?.py':
            'My arch nemesis uses Windows and I will'
            'destroy them! Muahahaha'
        })
    with swallow_logs(new_level=logging.WARN) as cml:
        res = ds.save()
        # we shouldn't see warnings
        assert_not_in(
            "Some elements of your dataset are not compatible with Windows "
            "systems. Disable this check by changing "
            "datalad.save.windows-compat-warning or consider renaming the "
            "following elements:", cml.out)
        # make sure the file is saved successfully
        assert_result_count(res, 1, status='ok', action='save')
Пример #10
0
def test_external_versions_rogue_module(topd=None):
    ev = ExternalVersions()
    # if module throws some other non-ImportError exception upon import
    # we must not crash, but issue a warning
    modname = 'verycustomrogue__'
    create_tree(topd, {modname + '.py': 'raise Exception("pickaboo")'})
    with patch('sys.path', [topd]), \
        swallow_logs(new_level=logging.WARNING) as cml:
        assert ev[modname] is None
        assert_true(ev.dumps(indent=True).endswith(linesep))
        assert_in('pickaboo', cml.out)
Пример #11
0
def test_save_message_file(path=None):
    ds = Dataset(path).create()
    with assert_raises(ValueError):
        ds.save("blah", message="me", message_file="and me")

    create_tree(path, {"foo": "x", "msg": "add foo"})
    ds.repo.add("foo")
    ds.save(message_file=op.join(ds.path, "msg"))
    # ATTN: Consider corresponding branch so that this check works when we're
    # on an adjusted branch too (e.g., when this test is executed under
    # Windows).
    eq_(ds.repo.format_commit("%s", DEFAULT_BRANCH), "add foo")
Пример #12
0
def test_preserve_attrs(src=None, dest=None):
    create_tree(src, {"src": {"foo": {"bar": "This is test text."}}})
    os.utime(opj(src, "src", "foo", "bar"), (1234567890, 1234567890))
    _RunnerAdapter().put(opj(src, "src"),
                         dest,
                         recursive=True,
                         preserve_attrs=True)
    s = os.stat(opj(dest, "src", "foo", "bar"))
    assert s.st_atime == 1234567890
    assert s.st_mtime == 1234567890
    with open(opj(dest, "src", "foo", "bar")) as fp:
        assert fp.read() == "This is test text."
Пример #13
0
 def setup(self):
     repo_path = tempfile.mkdtemp(**get_tempfile_kwargs(prefix="tree"))
     create_tree(repo_path,
                 {'1.tar': {
                     'file.txt': 'load',
                     '1.dat': 'load2'
                 }})
     self.ds = ds = Dataset(repo_path)
     ds.create(force=True)
     self.annex = ds.repo
     # Let's add first archive to the annex so we could test
     ds.save('1.tar', message="added 1.tar")
Пример #14
0
def test_save_partial_commit_shrinking_annex(path=None):
    # This is a variation on the test above. The main difference is that there
    # are other staged changes in addition to the unlocked filed.
    ds = create(path, force=True)
    ds.save()
    assert_repo_status(ds.path)
    ds.unlock(path="foo")
    create_tree(ds.path, tree={"foo": "a", "staged": ""}, remove_existing=True)
    # Even without this staged change, a plain 'git commit -- foo' would fail
    # with git-annex's partial index error, but save (or more specifically
    # GitRepo.save_) drops the pathspec if there are no staged changes.
    ds.repo.add("staged", git=True)
    ds.save(path="foo")
    assert_repo_status(ds.path, added=["staged"])
Пример #15
0
def test_update_git_smoke(src_path=None, dst_path=None):
    # Apparently was just failing on git repos for basic lack of coverage, hence this quick test
    ds = Dataset(src_path).create(annex=False)
    target = install(dst_path,
                     source=src_path,
                     result_xfm='datasets',
                     return_type='item-or-list')
    create_tree(ds.path, {'file.dat': '123'})
    ds.save('file.dat')
    assert_result_count(target.update(recursive=True, merge=True),
                        1,
                        action='update',
                        status='ok',
                        type='dataset')
    ok_file_has_content(opj(target.path, 'file.dat'), '123')
Пример #16
0
def test_download_url_archive(toppath=None, topurl=None, path=None):
    ds = Dataset(path).create()
    ds.download_url([topurl + "archive.tar.gz"], archive=True)
    ok_(ds.repo.file_has_content(opj("archive", "file1.txt")))
    assert_not_in(opj(ds.path, "archive.tar.gz"), ds.repo.format_commit("%B"))
    # we should yield an impossible from add archive content when there is
    # untracked content (gh-#6170)
    create_tree(ds.path, {'this': 'dirty'})
    assert_in_results(
        ds.download_url([topurl + "archive.tar.gz"],
                        archive=True,
                        on_failure='ignore'),
        status='impossible',
        action='add-archive-content',
        message='clean dataset required. Use `datalad status` to inspect '
        'unsaved changes')
Пример #17
0
def check_renamed_file(recursive, annex, path):
    ds = Dataset(path).create(annex=annex)
    create_tree(path, {'old': ''})
    ds.repo.add('old')
    ds.repo.call_git(["mv"], files=["old", "new"])
    ds.save(recursive=recursive)
    assert_repo_status(path)

    # https://github.com/datalad/datalad/issues/6558
    new = (ds.pathobj / "new")
    new.unlink()
    new.mkdir()
    (new / "file").touch()
    ds.repo.call_git(["add"], files=[str(new / "file")])
    ds.save(recursive=recursive)
    assert_repo_status(path)
Пример #18
0
    def test_override_existing_under_git(self):
        create_tree(self.ds.path, {'1.dat': 'load2'})
        self.ds.save('1.dat', to_git=True, message='added to git')
        self.ds.add_archive_content(
            '1.tar',
            strip_leading_dirs=True,
        )
        # and we did not bother adding it to annex (for now) -- just skipped
        # since we have it and it is the same
        ok_file_under_git(self.ds.path, '1.dat', annexed=False)

        # but if we say 'overwrite' -- we would remove and replace
        self.ds.add_archive_content('1.tar',
                                    strip_leading_dirs=True,
                                    delete=True,
                                    existing='overwrite')
        ok_file_under_git(self.ds.path, '1.dat', annexed=True)
Пример #19
0
    def test_add_delete_after_and_drop_subdir(self=None):
        os.mkdir(opj(self.annex.path, 'subdir'))
        mv_out = self.annex.call_git(['mv', '1.tar', 'subdir'])
        self.annex.commit("moved into subdir")
        with chpwd(self.annex.path):
            # was failing since deleting without considering if tarball
            # was extracted in that tarball directory
            commits_prior_master = list(self.annex.get_branch_commits_())
            commits_prior = list(self.annex.get_branch_commits_('git-annex'))
            add_out = self.ds.add_archive_content(opj('subdir', '1.tar'),
                                                  delete_after=True,
                                                  drop_after=True)
            assert_repo_status(self.annex.path)
            if not self.annex.is_managed_branch():
                # whole counting logic here is ignorant of adjusted branches
                commits_after_master = list(self.annex.get_branch_commits_())
                commits_after = list(
                    self.annex.get_branch_commits_('git-annex'))
                # There should be a single commit for all additions +1 to
                # initiate datalad-archives gh-1258.  If faking dates,
                # there should be another +1 because annex.alwayscommit
                # isn't set to false.
                assert_equal(
                    len(commits_after),
                    len(commits_prior) + 2 + self.annex.fake_dates_enabled)
                assert_equal(len(commits_after_master),
                             len(commits_prior_master))
            # there should be no .datalad temporary files hanging around
            self.assert_no_trash_left_behind()

            # and if we add some untracked file, redo, there should be no changes
            # to master and file should remain not committed
            create_tree(self.annex.path, {'dummy.txt': '123'})
            assert_true(self.annex.dirty)  # untracked file
            add_out = add_archive_content(opj('subdir', '1.tar'),
                                          delete_after=True,
                                          drop_after=True,
                                          allow_dirty=True)
            assert_repo_status(self.annex.path, untracked=['dummy.txt'])
            assert_equal(len(list(self.annex.get_branch_commits_())),
                         len(commits_prior_master))

            # there should be no .datalad temporary files hanging around
            self.assert_no_trash_left_behind()
Пример #20
0
def test_get_invalid_call(path=None, file_outside=None):

    # no argument at all:
    assert_raises(InsufficientArgumentsError, get, None)
    assert_raises(InsufficientArgumentsError, get, [])
    # invalid dataset:
    assert_raises(ValueError, get, None, dataset=path, on_failure='ignore')

    # have a plain git:
    ds = Dataset(path)
    ds.create(annex=False)
    with open(opj(path, "some.txt"), "w") as f:
        f.write("whatever")
    ds.save("some.txt", to_git=True, message="Initial commit.")

    # make it an annex (remove indicator file that create has placed
    # in the dataset to make it possible):
    (ds.pathobj / '.noannex').unlink()
    AnnexRepo(path, init=True, create=True)
    # call get again on a file in git:
    result = ds.get("some.txt")
    assert_status('notneeded', result)

    # invalid source:
    # yoh:  but now we would need to add it to annex since clever code first
    # checks what needs to be fetched at all
    create_tree(path, {'annexed.dat': 'some'})
    ds.save("annexed.dat")
    ds.repo.drop("annexed.dat", options=['--force'])
    with assert_raises(RemoteNotAvailableError) as cme:
        ds.get("annexed.dat", source='MysteriousRemote')
    eq_("MysteriousRemote", cme.value.remote)

    res = ds.get("NotExistingFile.txt", on_failure='ignore')
    assert_status('impossible', res)
    assert_message("path does not exist", res)

    # path outside repo errors as with most other commands:
    res = ds.get(file_outside, on_failure='ignore', result_renderer='default')
    assert_in_results(res,
                      status='error',
                      message=('path not associated with dataset %s', ds))
Пример #21
0
def test_no_annex(path=None):
    ds = create(path)
    assert_repo_status(ds.path)
    create_tree(
        ds.path, {
            'code': {
                'inannex': 'content',
                'notinannex': 'othercontent'
            },
            'README': 'please'
        })
    # add inannex pre configuration
    ds.save(opj('code', 'inannex'))
    no_annex(pattern=['code/**', 'README'], dataset=ds.path)

    inannex = (ds.pathobj / 'code' / 'inannex')

    # add inannex and README post configuration
    ds.save([opj('code', 'notinannex'), 'README'])

    repo = ds.repo
    try:
        assert_repo_status(ds.path)
    except AssertionError:
        # If on an adjusted branch and notinannex's mtime is as recent or newer
        # than .git/index's, the clean filter runs on it when save() is called.
        # This leads to a racy failure until after git-annex's 424bef6b6
        # (smudge: check for known annexed inodes before checking
        # annex.largefiles, 2021-05-03).
        #
        # https://git-annex.branchable.com/forum/one-off_unlocked_annex_files_that_go_against_large/
        if repo.is_managed_branch() and repo.git_annex_version <= "8.20210428":
            assert_repo_status(ds.path, modified=[inannex])
            raise SkipTest("Known bug fixed in git-annex")
        raise

    # one is annex'ed, the other is not, despite no change in add call
    # importantly, also .gitattribute is not annexed
    eq_([opj('code', 'inannex')],
        [str(Path(p)) for p in repo.get_annexed_files()])
Пример #22
0
    def setup_class(cls):
        mktmp_kws = get_tempfile_kwargs()
        path = tempfile.mkdtemp(**mktmp_kws)
        http_root = op.join(path, "srv")
        create_tree(
            http_root, {
                "udir": {
                    x + ".dat" + ver: x + " content"
                    for x in "abcd" for ver in ["", ".v1"]
                }
            })

        cls._hpath = HTTPPath(http_root)
        cls._hpath.start()
        cls.url = cls._hpath.url

        cls.data = [{
            "url": cls.url + "udir/a.dat",
            "name": "a",
            "subdir": "foo",
            "md5sum": "3fb7c40c70b0ed19da713bd69ee12014",
            "size": "9"
        }, {
            "url": cls.url + "udir/b.dat",
            "name": "b",
            "subdir": "bar",
            "md5sum": "",
            "size": ""
        }, {
            "url": cls.url + "udir/c.dat",
            "name": "c",
            "subdir": "foo",
            "md5sum": "9b72648021b70b8c522642e4490d7ac3",
            "size": "9"
        }]
        cls.json_file = op.join(path, "test_addurls.json")
        with open(cls.json_file, "w") as jfh:
            json.dump(cls.data, jfh)

        cls.temp_dir = path
Пример #23
0
def check_exists_interactive(use_ssh, path):
    origin = Dataset(opj(path, "origin")).create()
    sibling_path = opj(path, "sibling")

    # Initiate sibling directory with "stuff"
    create_tree(sibling_path, {'stuff': ''})

    if use_ssh:
        sshurl = 'datalad-test:' + sibling_path
    else:
        sshurl = sibling_path

    # Should fail
    with assert_raises(RuntimeError):
        origin.create_sibling(sshurl)

    # Since first response is "no" - we should fail here again:
    with assert_raises(RuntimeError):
        origin.create_sibling(sshurl, existing='replace')
    # and there should be no initiated repository
    assert not Dataset(sibling_path).is_installed()
    # But we would succeed on the 2nd try, since answer will be yes
    origin.create_sibling(sshurl, existing='replace')
    assert Dataset(sibling_path).is_installed()
Пример #24
0
def test_reobtain_data(originpath=None, destpath=None):
    origin = Dataset(originpath).create()
    ds = install(source=originpath,
                 path=destpath,
                 result_xfm='datasets',
                 return_type='item-or-list')
    # no harm
    assert_result_count(ds.update(merge=True, reobtain_data=True),
                        1,
                        action="update",
                        status="ok")
    # content
    create_tree(origin.path, {'load.dat': 'heavy'})
    origin.save(opj(origin.path, 'load.dat'))
    # update does not bring data automatically
    assert_result_count(ds.update(merge=True, reobtain_data=True),
                        1,
                        action="update",
                        status="ok")
    assert_in('load.dat', ds.repo.get_annexed_files())
    assert_false(ds.repo.file_has_content('load.dat'))
    # now get data
    ds.get('load.dat')
    ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy')
    # new content at origin
    create_tree(origin.path, {'novel': 'but boring'})
    origin.save()
    # update must not bring in data for new file
    result = ds.update(merge=True, reobtain_data=True)
    assert_in_results(result, action='get', status='notneeded')

    ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy')
    assert_in('novel', ds.repo.get_annexed_files())
    assert_false(ds.repo.file_has_content('novel'))
    # modify content at origin
    os.remove(opj(origin.path, 'load.dat'))
    create_tree(origin.path, {'load.dat': 'light'})
    origin.save()
    # update must update file with existing data, but leave empty one alone
    res = ds.update(merge=True, reobtain_data=True)
    assert_result_count(res, 1, status='ok', type='dataset', action='update')
    assert_result_count(res, 1, status='ok', type='file', action='get')
    ok_file_has_content(opj(ds.path, 'load.dat'), 'light')
    assert_false(ds.repo.file_has_content('novel'))
Пример #25
0
def make_studyforrest_mockup(path):
    """Generate a dataset structure mimicking aspects of studyforrest.org

    Under the given path there are two directories:

    public - to be published datasets
    private - never to be published datasets

    The 'public' directory itself is a superdataset, the 'private' directory
    is just a directory that contains standalone datasets in subdirectories.
    """
    public = create(opj(path, 'public'), description="umbrella dataset")
    # the following tries to capture the evolution of the project
    phase1 = public.create('phase1',
                           description='old-style, no connection to RAW')
    structural = public.create('structural', description='anatomy')
    tnt = public.create('tnt', description='image templates')
    tnt.clone(source=phase1.path, path=opj('src', 'phase1'), reckless='auto')
    tnt.clone(source=structural.path,
              path=opj('src', 'structural'),
              reckless='auto')
    aligned = public.create('aligned', description='aligned image data')
    aligned.clone(source=phase1.path,
                  path=opj('src', 'phase1'),
                  reckless='auto')
    aligned.clone(source=tnt.path, path=opj('src', 'tnt'), reckless='auto')
    # new acquisition
    labet = create(opj(path, 'private', 'labet'), description="raw data ET")
    phase2_dicoms = create(opj(path, 'private', 'p2dicoms'),
                           description="raw data P2MRI")
    phase2 = public.create('phase2', description='new-style, RAW connection')
    phase2.clone(source=labet.path, path=opj('src', 'labet'), reckless='auto')
    phase2.clone(source=phase2_dicoms.path,
                 path=opj('src', 'dicoms'),
                 reckless='auto')
    # add to derivatives
    tnt.clone(source=phase2.path, path=opj('src', 'phase2'), reckless='auto')
    aligned.clone(source=phase2.path,
                  path=opj('src', 'phase2'),
                  reckless='auto')
    # never to be published media files
    media = create(opj(path, 'private', 'media'), description="raw data ET")
    # assuming all annotations are in one dataset (in reality this is also
    # a superdatasets with about 10 subdatasets
    annot = public.create('annotations', description='stimulus annotation')
    annot.clone(source=media.path, path=opj('src', 'media'), reckless='auto')
    # a few typical analysis datasets
    # (just doing 3, actual status quo is just shy of 10)
    # and also the real goal -> meta analysis
    metaanalysis = public.create('metaanalysis',
                                 description="analysis of analyses")
    for i in range(1, 3):
        ana = public.create('analysis{}'.format(i),
                            description='analysis{}'.format(i))
        ana.clone(source=annot.path, path=opj('src', 'annot'), reckless='auto')
        ana.clone(source=aligned.path,
                  path=opj('src', 'aligned'),
                  reckless='auto')
        ana.clone(source=tnt.path, path=opj('src', 'tnt'), reckless='auto')
        # link to metaanalysis
        metaanalysis.clone(source=ana.path,
                           path=opj('src', 'ana{}'.format(i)),
                           reckless='auto')
        # simulate change in an input (but not raw) dataset
        create_tree(aligned.path,
                    {'modification{}.txt'.format(i): 'unique{}'.format(i)})
        aligned.save()
    # finally aggregate data
    aggregate = public.create('aggregate', description='aggregate data')
    aggregate.clone(source=aligned.path,
                    path=opj('src', 'aligned'),
                    reckless='auto')
Пример #26
0
def populate_dataset(ds):
    # create 2 commits
    for pl in [example_payload, example_payload2]:
        create_tree(ds.path, pl)
        ds.save()
Пример #27
0
def test_update_simple(origin=None, src_path=None, dst_path=None):
    ca = dict(result_renderer='disabled')
    # a remote dataset with a subdataset underneath
    origds = Dataset(origin).create(**ca)
    # naming is weird, but a legacy artifact
    _ = origds.create('subm 1', **ca)
    _ = origds.create('2', **ca)

    # prepare src
    source = install(src_path, source=origin, recursive=True)
    # forget we cloned it by removing remote, which should lead to
    # setting tracking branch to target:
    source.repo.remove_remote(DEFAULT_REMOTE)
    # also forget the declared absolute location of the submodules, and turn them
    # relative to this/a clone
    for sub in source.subdatasets(result_xfm=lambda x: x['gitmodule_name']):
        source.subdatasets(path=sub,
                           set_property=[('url', './{}'.format(sub))])

    # dataset without sibling will not need updates
    assert_status('notneeded', source.update())
    # deprecation message doesn't ruin things
    assert_status('notneeded', source.update(fetch_all=True))
    # but error if unknown sibling is given
    assert_status('impossible',
                  source.update(sibling='funky', on_failure='ignore'))

    # get a clone to update later on:
    dest = install(dst_path, source=src_path, recursive=True)
    # test setup done;
    # assert all fine
    assert_repo_status(dst_path)
    assert_repo_status(src_path)

    # update yields nothing => up-to-date
    assert_status('ok', dest.update())
    assert_repo_status(dst_path)

    # modify remote:
    with open(opj(src_path, "update.txt"), "w") as f:
        f.write("Additional content")
    source.save(path="update.txt", message="Added update.txt")
    assert_repo_status(src_path)

    # update without `merge` only fetches:
    assert_status('ok', dest.update())
    # modification is not known to active branch:
    assert_not_in("update.txt",
                  dest.repo.get_files(dest.repo.get_active_branch()))
    # modification is known to branch <default remote>/<default branch>
    assert_in("update.txt",
              dest.repo.get_files(DEFAULT_REMOTE + "/" + DEFAULT_BRANCH))

    # merge:
    assert_status('ok', dest.update(merge=True))
    # modification is now known to active branch:
    assert_in("update.txt", dest.repo.get_files(dest.repo.get_active_branch()))
    # it's known to annex, but has no content yet:
    annexprops = dest.repo.get_file_annexinfo("update.txt",
                                              eval_availability=True)
    annexprops['key']  # blows if unknown
    eq_(False, annexprops['has_content'])

    # check subdataset path constraints, baseline (parent + 2 subds)
    assert_result_count(dest.update(recursive=True),
                        3,
                        status='ok',
                        type='dataset')
    # no recursion and invalid path still updates the parent
    res = dest.update(path='whatever')
    assert_result_count(res, 1, status='ok', type='dataset')
    assert_result_count(res, 1, status='ok', path=dest.path)
    # invalid path with recursion also does
    res = dest.update(recursive=True, path='whatever')
    assert_result_count(res, 1, status='ok', type='dataset')
    assert_result_count(res, 1, status='ok', path=dest.path)
    # valid path and no recursion only updates the parent
    res = dest.update(path='subm 1')
    assert_result_count(res, 1, status='ok', type='dataset')
    assert_result_count(res, 1, status='ok', path=dest.path)
    # valid path and recursion updates matching
    res = dest.update(recursive=True, path='subm 1')
    assert_result_count(res, 2, status='ok', type='dataset')
    assert_result_count(res, 1, status='ok', path=dest.path)
    assert_result_count(res, 1, status='ok', path=str(dest.pathobj / 'subm 1'))
    # additional invalid path doesn't hurt
    res = dest.update(recursive=True, path=['subm 1', 'mike'])
    assert_result_count(res, 2, status='ok', type='dataset')
    # full match
    res = dest.update(recursive=True, path=['subm 1', '2'])
    assert_result_count(res, 3, status='ok', type='dataset')

    # test that update doesn't crash if we specify only a single path (submod) to
    # operate on
    with chpwd(dest.path):
        # in 0.11.x it would be a single result since "pwd" dataset is not
        # considered, and would be relative path (as specified).
        # In 0.12.0 - it would include implicit pwd dataset, and paths would be absolute
        res_update = update(path=['subm 1'], recursive=True)
        assert_result_count(res_update, 2)
        for p in dest.path, str(dest.pathobj / 'subm 1'):
            assert_in_results(res_update,
                              path=p,
                              action='update',
                              status='ok',
                              type='dataset')

        # and with merge we would also try to save (but there would be no changes)
        res_merge = update(path=['subm 1'], recursive=True, merge=True)
        assert_result_count(res_merge, 2, action='update')
        # 2 of "updates" really.
        assert_in_results(res_merge,
                          action='update',
                          status='ok',
                          type='dataset')
        assert_in_results(res_merge,
                          action='save',
                          status='notneeded',
                          type='dataset')

    # smoke-test if recursive update doesn't fail if submodule is removed
    # and that we can run it from within a dataset without providing it
    # explicitly
    assert_result_count(dest.remove('subm 1'),
                        1,
                        status='ok',
                        action='remove',
                        path=opj(dest.path, 'subm 1'))
    with chpwd(dest.path):
        assert_result_count(update(recursive=True),
                            2,
                            status='ok',
                            type='dataset')
    assert_result_count(dest.update(merge=True, recursive=True),
                        2,
                        action='update',
                        status='ok',
                        type='dataset')

    # and now test recursive update with merging in differences
    create_tree(opj(source.path, '2'), {'load.dat': 'heavy'})
    source.save(opj('2', 'load.dat'),
                message="saving changes within subm2",
                recursive=True)
    assert_result_count(dest.update(merge=True, recursive=True),
                        2,
                        action='update',
                        status='ok',
                        type='dataset')
    # and now we can get new file
    dest.get(opj('2', 'load.dat'))
    ok_file_has_content(opj(dest.path, '2', 'load.dat'), 'heavy')
Пример #28
0
def test_update_volatile_subds(originpath=None, otherpath=None, destpath=None):
    origin = Dataset(originpath).create()
    repo = origin.repo
    if repo.is_managed_branch() and repo.git_annex_version <= "8.20201129":
        # Fails before git-annex's fd161da2c (adjustTree: Consider submodule
        # deletions, 2021-01-06).
        raise SkipTest(
            "On adjusted branch, test requires fix in more recent git-annex")
    ds = install(source=originpath,
                 path=destpath,
                 result_xfm='datasets',
                 return_type='item-or-list')
    # as a submodule
    sname = 'subm 1'
    osm1 = origin.create(sname)
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    # nothing without a merge, no inappropriate magic
    assert_not_in(sname, ds.subdatasets(result_xfm='relpaths'))
    assert_result_count(ds.update(merge=True),
                        1,
                        action='update',
                        status='ok',
                        type='dataset')
    # and we should be able to do update with recursive invocation
    assert_result_count(ds.update(merge=True, recursive=True),
                        1,
                        action='update',
                        status='ok',
                        type='dataset')
    # known, and placeholder exists
    assert_in(sname, ds.subdatasets(result_xfm='relpaths'))
    ok_(exists(opj(ds.path, sname)))

    # remove from origin
    origin.remove(sname, reckless='availability')
    assert_result_count(ds.update(merge=True),
                        1,
                        action='update',
                        status='ok',
                        type='dataset')
    # gone locally, wasn't checked out
    assert_not_in(sname, ds.subdatasets(result_xfm='relpaths'))
    assert_false(exists(opj(ds.path, sname)))

    # re-introduce at origin
    osm1 = origin.create(sname)
    create_tree(osm1.path, {'load.dat': 'heavy'})
    origin.save(opj(osm1.path, 'load.dat'))
    assert_result_count(ds.update(merge=True),
                        1,
                        action='update',
                        status='ok',
                        type='dataset')
    # grab new content of uninstall subdataset, right away
    ds.get(opj(ds.path, sname, 'load.dat'))
    ok_file_has_content(opj(ds.path, sname, 'load.dat'), 'heavy')

    # modify ds and subds at origin
    create_tree(origin.path, {'mike': 'this', sname: {'probe': 'little'}})
    origin.save(recursive=True)
    assert_repo_status(origin.path)

    # updates for both datasets should come down the pipe
    assert_result_count(ds.update(merge=True, recursive=True),
                        2,
                        action='update',
                        status='ok',
                        type='dataset')
    assert_repo_status(ds.path)

    # now remove just-installed subdataset from origin again
    origin.remove(sname, reckless='kill')
    assert_not_in(sname, origin.subdatasets(result_xfm='relpaths'))
    assert_in(sname, ds.subdatasets(result_xfm='relpaths'))
    # merge should disconnect the installed subdataset, but leave the actual
    # ex-subdataset alone
    assert_result_count(ds.update(merge=True, recursive=True),
                        1,
                        action='update',
                        type='dataset')
    assert_not_in(sname, ds.subdatasets(result_xfm='relpaths'))
    ok_file_has_content(opj(ds.path, sname, 'load.dat'), 'heavy')
    ok_(Dataset(opj(ds.path, sname)).is_installed())

    # now remove the now disconnected subdataset for further tests
    remove(dataset=op.join(ds.path, sname), reckless='kill')
    assert_repo_status(ds.path)

    # new separate subdataset, not within the origin dataset
    otherds = Dataset(otherpath).create()
    # install separate dataset as a submodule
    ds.install(source=otherds.path, path='other')
    create_tree(otherds.path, {'brand': 'new'})
    otherds.save()
    assert_repo_status(otherds.path)
    # pull in changes
    res = ds.update(merge=True, recursive=True)
    assert_result_count(res, 2, status='ok', action='update', type='dataset')
    # the next is to check for #2858
    assert_repo_status(ds.path)
Пример #29
0
def _test_target_ssh_inherit(standardgroup, ui, use_ssh, src_path,
                             target_path):
    ds = Dataset(src_path).create()
    if use_ssh:
        target_url = 'datalad-test:%s' % target_path
    else:
        target_url = target_path
    remote = "magical"
    # for the test of setting a group, will just smoke test while using current
    # user's group
    ds.create_sibling(target_url,
                      name=remote,
                      shared='group',
                      group=os.getgid(),
                      ui=ui)  # not doing recursively
    if standardgroup:
        ds.repo.set_preferred_content('wanted', 'standard', remote)
        ds.repo.set_preferred_content('group', standardgroup, remote)
    ds.publish(to=remote)

    # now a month later we created a new subdataset... a few of the nested ones
    # A known hiccup happened when there
    # is also subsub ds added - we might incorrectly traverse and not prepare
    # sub first for subsub to inherit etc
    parent_ds = ds
    subdss = []
    nlevels = 2  # gets slow: 1 - 43 sec, 2 - 49 sec , 3 - 69 sec
    for levels in range(nlevels):
        subds = parent_ds.create('sub')
        create_tree(subds.path, {'sub.dat': 'lots of data'})
        parent_ds.save('sub', recursive=True)
        ok_file_under_git(subds.path, 'sub.dat', annexed=True)
        parent_ds = subds
        subdss.append(subds)

    target_subdss = [
        Dataset(opj(*([target_path] + ['sub'] * (i + 1))))
        for i in range(nlevels)
    ]
    # since we do not have yet/thus have not used an option to record to publish
    # to that sibling by default (e.g. --set-upstream), if we run just ds.publish
    # -- should fail
    assert_result_count(
        ds.publish(on_failure='ignore'),
        1,
        status='impossible',
        message=
        'No target sibling configured for default publication, please specify via --to'
    )
    ds.publish(
        to=remote)  # should be ok, non recursive; BUT it (git or us?) would
    # create an empty sub/ directory
    assert_postupdate_hooks(target_path, installed=ui)
    for target_sub in target_subdss:
        ok_(not target_sub.is_installed())  # still not there
    res = ds.publish(to=remote, recursive=True, on_failure='ignore')
    assert_result_count(res, 1 + len(subdss))
    assert_status(('error', 'notneeded'), res)
    assert_result_count(res,
                        len(subdss),
                        status='error',
                        message=("Unknown target sibling '%s' for publication",
                                 'magical'))

    # Finally publishing with inheritance
    ds.publish(to=remote, recursive=True, missing='inherit')
    assert_postupdate_hooks(target_path, installed=ui)

    def check_dss():
        # we added the remote and set all the
        for subds in subdss:
            eq_(subds.repo.get_preferred_content('wanted', remote),
                'standard' if standardgroup else '')
            eq_(subds.repo.get_preferred_content('group', remote),
                standardgroup or '')

        for target_sub in target_subdss:
            ok_(target_sub.is_installed())  # it is there now
            eq_(target_sub.repo.config.get('core.sharedrepository'), '1')
            # and we have transferred the content
            if standardgroup and standardgroup == 'backup':
                # only then content should be copied
                ok_file_has_content(opj(target_sub.path, 'sub.dat'),
                                    'lots of data')
            else:
                # otherwise nothing is copied by default
                assert_false(target_sub.repo.file_has_content('sub.dat'))

    check_dss()
    # and it should be ok to reconfigure the full hierarchy of datasets
    # while "inheriting". No URL must be specified, and we must not blow
    # but just issue a warning for the top level dataset which has no super,
    # so cannot inherit anything - use case is to fixup/establish the full
    # hierarchy on the remote site
    ds.save(
        recursive=True)  # so we have committed hierarchy for create_sibling
    with swallow_logs(logging.WARNING) as cml:
        out = ds.create_sibling(None,
                                name=remote,
                                existing="reconfigure",
                                inherit=True,
                                ui=ui,
                                recursive=True)
        eq_(len(out), 1 + len(subdss))
        assert_in("Cannot determine super dataset", cml.out)

    check_dss()
Пример #30
0
def test_run_explicit(origpath=None, path=None):
    origds = Dataset(origpath).create()
    (origds.pathobj / "test-annex.dat").write_text('content')
    origds.save()
    ds = clone(origpath, path)

    assert_false(ds.repo.file_has_content("test-annex.dat"))

    create_tree(ds.path, {
        "dirt_untracked": "untracked",
        "dirt_modified": "modified"
    })
    ds.save("dirt_modified", to_git=True)
    with open(op.join(path, "dirt_modified"), "a") as ofh:
        ofh.write(", more")

    # We need explicit=True to run with dirty repo.
    assert_status(
        "impossible",
        ds.run(f"{cat_command} test-annex.dat test-annex.dat >doubled.dat",
               inputs=["test-annex.dat"],
               on_failure="ignore"))

    hexsha_initial = ds.repo.get_hexsha()
    # If we specify test-annex.dat as an input, it will be retrieved before the
    # run.
    ds.run(f"{cat_command} test-annex.dat test-annex.dat >doubled.dat",
           inputs=["test-annex.dat"],
           explicit=True,
           result_renderer='disabled')
    ok_(ds.repo.file_has_content("test-annex.dat"))
    # We didn't commit anything because outputs weren't specified.
    assert_false(ds.repo.file_has_content("doubled.dat"))
    eq_(hexsha_initial, ds.repo.get_hexsha())

    # If an input doesn't exist, we just show the standard warning.
    with assert_raises(IncompleteResultsError):
        ds.run("ls",
               inputs=["not-there"],
               explicit=True,
               on_failure="stop",
               result_renderer='disabled')

    remove(op.join(path, "doubled.dat"))

    hexsha_initial = ds.repo.get_hexsha()
    ds.run(f"{cat_command} test-annex.dat test-annex.dat >doubled.dat",
           inputs=["test-annex.dat"],
           outputs=["doubled.dat"],
           explicit=True,
           result_renderer='disabled')
    ok_(ds.repo.file_has_content("doubled.dat"))
    assert_repo_status(ds.path,
                       modified=["dirt_modified"],
                       untracked=['dirt_untracked'])
    neq_(hexsha_initial, ds.repo.get_hexsha())

    # Saving explicit outputs works from subdirectories.
    subdir = op.join(path, "subdir")
    mkdir(subdir)
    with chpwd(subdir):
        run("echo insubdir >foo",
            explicit=True,
            outputs=["foo"],
            result_renderer='disabled')
    ok_(ds.repo.file_has_content(op.join("subdir", "foo")))