示例#1
0
文件: wtf.py 项目: ypid/datalad
def get_max_path_length(top_path=None, maxl=1000):
    """Deduce the maximal length of the filename in a given path
    """
    if not top_path:
        top_path = getpwd()
    import random
    from datalad import lgr
    from datalad.dochelpers import exc_str
    from datalad.support import path
    prefix = path.join(top_path, "dl%d" % random.randint(1, 100000))
    # some smart folks could implement binary search for this
    max_path_length = None
    for i in range(maxl - len(prefix)):
        filename = prefix + '_' * i
        path_length = len(filename)
        try:
            with open(filename, 'w') as f:
                max_path_length = path_length
        except Exception as exc:
            lgr.debug(
                "Failed to create sample file for length %d. Last succeeded was %s. Exception: %s",
                path_length, max_path_length, exc_str(exc))
            break
        unlink(filename)
    return max_path_length
示例#2
0
def test_get_missing(path):
    repo = GitRepo(path, create=True)
    os.makedirs(op.join(path, 'deep'))
    with open(op.join(path, 'test1'), 'w') as f:
        f.write('some')
    with open(op.join(path, 'deep', 'test2'), 'w') as f:
        f.write('some more')
    # no files tracked yet, so nothing changed
    eq_(repo.get_changed_files(), [])
    repo.add('.')
    # still no differences between worktree and staged
    eq_(repo.get_changed_files(), [])
    eq_(set(repo.get_changed_files(staged=True)),
        {'test1', op.join('deep', 'test2')})
    eq_(set(repo.get_changed_files(staged=True, diff_filter='AD')),
        {'test1', op.join('deep', 'test2')})
    eq_(repo.get_changed_files(staged=True, diff_filter='D'), [])
    repo.commit()
    eq_(repo.get_changed_files(), [])
    eq_(repo.get_changed_files(staged=True), [])
    ok_clean_git(path, annex=False)
    unlink(op.join(path, 'test1'))
    eq_(repo.get_missing_files(), ['test1'])
    rmtree(op.join(path, 'deep'))
    eq_(sorted(repo.get_missing_files()), [op.join('deep', 'test2'), 'test1'])
    # nothing is actually known to be deleted
    eq_(repo.get_deleted_files(), [])
    # do proper removal
    repo.remove(op.join(path, 'test1'))
    # no longer missing
    eq_(repo.get_missing_files(), [op.join('deep', 'test2')])
    # but deleted
    eq_(repo.get_deleted_files(), ['test1'])
示例#3
0
文件: wtf.py 项目: hanke/datalad
def get_max_path_length(top_path=None, maxl=1000):
    """Deduce the maximal length of the filename in a given path
    """
    if not top_path:
        top_path = getpwd()
    import os
    import random
    from datalad import lgr
    from datalad.dochelpers import exc_str
    from datalad.support import path
    prefix = path.join(top_path, "dl%d" % random.randint(1 ,100000))
    # some smart folks could implement binary search for this
    max_path_length = None
    for i in range(maxl-len(prefix)):
        filename = prefix + '_' * i
        path_length = len(filename)
        try:
            with open(filename, 'w') as f:
                max_path_length = path_length
        except Exception as exc:
            lgr.debug(
                "Failed to create sample file for length %d. Last succeeded was %s. Exception: %s",
                path_length, max_path_length, exc_str(exc))
            break
        unlink(filename)
    return max_path_length
示例#4
0
def test_rotree(d):
    d2 = opj(d, 'd1', 'd2')  # deep nested directory
    f = opj(d2, 'f1')
    os.makedirs(d2)
    with open(f, 'w') as f_:
        f_.write("LOAD")
    with swallow_logs():
        ar = AnnexRepo(d2)
    rotree(d)
    # we shouldn't be able to delete anything UNLESS in "crippled" situation:
    # root, or filesystem is FAT etc
    # Theoretically annex should declare FS as crippled when ran as root, but
    # see http://git-annex.branchable.com/bugs/decides_that_FS_is_crippled_
    # under_cowbuilder___40__symlinks_supported_etc__41__/#comment-60c3cbe2710d6865fb9b7d6e247cd7aa
    # so explicit 'or'
    if not (ar.is_crippled_fs() or (os.getuid() == 0)):
        assert_raises(OSError, os.unlink, f)  # OK to use os.unlink
        assert_raises(OSError, unlink, f)  # and even with waiting and trying!
        assert_raises(OSError, shutil.rmtree, d)
        # but file should still be accessible
        with open(f) as f_:
            eq_(f_.read(), "LOAD")
    # make it RW
    rotree(d, False)
    unlink(f)
    shutil.rmtree(d)
示例#5
0
def test_get_missing(path):
    repo = GitRepo(path, create=True)
    os.makedirs(op.join(path, 'deep'))
    with open(op.join(path, 'test1'), 'w') as f:
        f.write('some')
    with open(op.join(path, 'deep', 'test2'), 'w') as f:
        f.write('some more')
    # no files tracked yet, so nothing changed
    eq_(repo.get_changed_files(), [])
    repo.add('.')
    # still no differences between worktree and staged
    eq_(repo.get_changed_files(), [])
    eq_(set(repo.get_changed_files(staged=True)),
        {'test1', op.join('deep', 'test2')})
    eq_(set(repo.get_changed_files(staged=True, diff_filter='AD')),
        {'test1', op.join('deep', 'test2')})
    eq_(repo.get_changed_files(staged=True, diff_filter='D'), [])
    repo.commit()
    eq_(repo.get_changed_files(), [])
    eq_(repo.get_changed_files(staged=True), [])
    ok_clean_git(path, annex=False)
    unlink(op.join(path, 'test1'))
    eq_(repo.get_missing_files(), ['test1'])
    rmtree(op.join(path, 'deep'))
    eq_(sorted(repo.get_missing_files()), [op.join('deep', 'test2'), 'test1'])
    # nothing is actually known to be deleted
    eq_(repo.get_deleted_files(), [])
    # do proper removal
    repo.remove(op.join(path, 'test1'))
    # no longer missing
    eq_(repo.get_missing_files(), [op.join('deep', 'test2')])
    # but deleted
    eq_(repo.get_deleted_files(), ['test1'])
示例#6
0
def link_file_load(src, dst, dry_run=False):
    """Just a little helper to hardlink files's load
    """
    dst_dir = op.dirname(dst)
    if not op.exists(dst_dir):
        os.makedirs(dst_dir)
    if op.lexists(dst):
        lgr.log(9, "Destination file %(dst)s exists. Removing it first",
                locals())
        # TODO: how would it interact with git/git-annex
        unlink(dst)
    lgr.log(9, "Hardlinking %(src)s under %(dst)s", locals())
    src_realpath = op.realpath(src)

    try:
        os.link(src_realpath, dst)
    except (OSError, AttributeError) as e:
        # we need to catch OSError too, because Python's own logic
        # of not providing link() where it is known to be unsupported
        # (e.g. Windows) will not cover scenarios where a particular
        # filesystem simply does not implement it on an otherwise
        # sane platform (e.g. exfat on Linux)
        lgr.warning("Linking of %s failed (%s), copying file" % (src, e))
        shutil.copyfile(src_realpath, dst)
        shutil.copystat(src_realpath, dst)
    else:
        lgr.log(2, "Hardlinking finished")
示例#7
0
def test_get_git_dir(path):
    # minimal, only missing coverage
    assert_raises(RuntimeError, GitRepo.get_git_dir, path)

    srcpath = opj(path, 'src')
    targetpath = opj(path, 'target')
    targetgitpath = opj(targetpath, '.git')
    os.makedirs(srcpath)
    os.makedirs(targetpath)
    if not on_windows:
        # with PY3 would also work with Windows 6+
        os.symlink(srcpath, targetgitpath)
        eq_(srcpath, GitRepo.get_git_dir(targetpath))
        # cleanup for following test
        unlink(targetgitpath)
    with open(targetgitpath, 'w') as f:
        f.write('gitdir: {}'.format(srcpath))
    eq_(srcpath, GitRepo.get_git_dir(targetpath))
示例#8
0
def test_get_git_dir(path):
    # minimal, only missing coverage
    assert_raises(RuntimeError, GitRepo.get_git_dir, path)

    srcpath = opj(path, 'src')
    targetpath = opj(path, 'target')
    targetgitpath = opj(targetpath, '.git')
    os.makedirs(srcpath)
    os.makedirs(targetpath)
    if not on_windows:
        # with PY3 would also work with Windows 6+
        os.symlink(srcpath, targetgitpath)
        eq_(srcpath, GitRepo.get_git_dir(targetpath))
        # cleanup for following test
        unlink(targetgitpath)
    with open(targetgitpath, 'w') as f:
        f.write('gitdir: {}'.format(srcpath))
    eq_(srcpath, GitRepo.get_git_dir(targetpath))
示例#9
0
def add_urls(rows, ifexists=None, options=None):
    """Call `git annex addurl` using information in `rows`.
    """
    for row in rows:
        filename_abs = row["filename_abs"]
        ds, filename = row["ds"], row["ds_filename"]
        lgr.debug("Adding metadata to %s in %s", filename, ds.path)

        if os.path.exists(filename_abs) or os.path.islink(filename_abs):
            if ifexists == "skip":
                yield get_status_dict(action="addurls",
                                      ds=ds,
                                      type="file",
                                      path=filename_abs,
                                      status="notneeded")
                continue
            elif ifexists == "overwrite":
                lgr.debug("Removing %s", filename_abs)
                unlink(filename_abs)
            else:
                lgr.debug("File %s already exists", filename_abs)

        try:
            out_json = ds.repo.add_url_to_file(filename,
                                               row["url"],
                                               batch=True,
                                               options=options)
        except AnnexBatchCommandError as exc:
            yield get_status_dict(action="addurls",
                                  ds=ds,
                                  type="file",
                                  path=filename_abs,
                                  message=exc_str(exc),
                                  status="error")
            continue

        # In the case of an error, the json object has file=None.
        if out_json["file"] is None:
            out_json["file"] = filename_abs
        yield annexjson2result(out_json,
                               ds,
                               action="addurls",
                               type="file",
                               logger=lgr)
示例#10
0
def add_urls(rows, ifexists=None, options=None):
    """Call `git annex addurl` using information in `rows`.
    """
    for row in rows:
        filename_abs = row["filename_abs"]
        ds, filename = row["ds"], row["ds_filename"]
        lgr.debug("Adding metadata to %s in %s", filename, ds.path)

        if os.path.exists(filename_abs) or os.path.islink(filename_abs):
            if ifexists == "skip":
                yield get_status_dict(action="addurls",
                                      ds=ds,
                                      type="file",
                                      path=filename_abs,
                                      status="notneeded")
                continue
            elif ifexists == "overwrite":
                lgr.debug("Removing %s", filename_abs)
                unlink(filename_abs)
            else:
                lgr.debug("File %s already exists", filename_abs)

        try:
            out_json = ds.repo.add_url_to_file(filename, row["url"],
                                               batch=True, options=options)
        except AnnexBatchCommandError as exc:
            yield get_status_dict(action="addurls",
                                  ds=ds,
                                  type="file",
                                  path=filename_abs,
                                  message=exc_str(exc),
                                  status="error")
            continue

        # In the case of an error, the json object has file=None.
        if out_json["file"] is None:
            out_json["file"] = filename_abs
        yield annexjson2result(out_json, ds, action="addurls",
                               type="file", logger=lgr)
示例#11
0
    def __call__(dataset, filename=None, missing_content='error', no_annex=False,
                 # TODO: support working with projects and articles within them
                 # project_id=None,
                 article_id=None):
        import os
        import logging
        lgr = logging.getLogger('datalad.plugin.export_to_figshare')

        from datalad.ui import ui
        from datalad.api import add_archive_content
        from datalad.api import export_archive
        from datalad.distribution.dataset import require_dataset
        from datalad.support.annexrepo import AnnexRepo

        dataset = require_dataset(dataset, check_installed=True,
                                  purpose='export to figshare')

        if not isinstance(dataset.repo, AnnexRepo):
            raise ValueError(
                "%s is not an annex repo, so annexification could be done"
                % dataset
            )

        if dataset.repo.is_dirty():
            raise RuntimeError(
                "Paranoid authors of DataLad refuse to proceed in a dirty repository"
            )
        if filename is None:
            filename = dataset.path
        lgr.info(
            "Exporting current tree as an archive under %s since figshare "
            "does not support directories",
            filename
        )
        archive_out = next(
            export_archive(
                dataset,
                filename=filename,
                archivetype='zip',
                missing_content=missing_content,
                return_type="generator"
            )
        )
        assert archive_out['status'] == 'ok'
        fname = archive_out['path']

        lgr.info("Uploading %s to figshare", fname)
        figshare = FigshareRESTLaison()

        if not article_id:
            # TODO: ask if it should be an article within a project
            if ui.is_interactive:
                # or should we just upload to a new article?
                if ui.yesno(
                    "Would you like to create a new article to upload to?  "
                    "If not - we will list existing articles",
                    title="Article"
                ):
                    article = figshare.create_article(
                        title=os.path.basename(dataset.path)
                    )
                    lgr.info(
                        "Created a new (private) article %(id)s at %(url_private_html)s. "
                        "Please visit it, enter additional meta-data and make public",
                        article
                    )
                    article_id = article['id']
                else:
                    article_id = int(ui.question(
                        "Which of the articles should we upload to.",
                        choices=list(map(str, figshare.get_article_ids()))
                    ))
            if not article_id:
                raise ValueError("We need an article to upload to.")

        file_info = figshare.upload_file(
            fname,
            files_url='account/articles/%s/files' % article_id
        )

        if no_annex:
            lgr.info("Removing generated tarball")
            unlink(fname)
        else:
            # I will leave all the complaining etc to the dataset add if path
            # is outside etc
            lgr.info("'Registering' %s within annex", fname)
            repo = dataset.repo
            repo.add(fname, git=False)
            key = repo.get_file_key(fname)
            lgr.info("Adding URL %(download_url)s for it", file_info)
            repo._annex_custom_command([],
                [
                    "git", "annex", "registerurl", '-c', 'annex.alwayscommit=false',
                    key, file_info['download_url']
                ]
            )

            lgr.info("Registering links back for the content of the archive")
            add_archive_content(
                fname,
                annex=dataset.repo,
                delete_after=True,  # just remove extracted into a temp dir
                allow_dirty=True,  # since we have a tarball
                commit=False  # we do not want to commit anything we have done here
            )

            lgr.info("Removing generated and now registered in annex archive")
            repo.drop(key, key=True, options=['--force'])
            repo.remove(fname, force=True)  # remove the tarball

            # if annex in {'delete'}:
            #     dataset.repo.remove(fname)
            # else:
            #     # kinda makes little sense I guess.
            #     # Made more sense if export_archive could export an arbitrary treeish
            #     # so we could create a branch where to dump and export to figshare
            #     # (kinda closer to my idea)
            #     dataset.save(fname, message="Added the entire dataset into a zip file")

        # TODO: add to downloader knowledge about figshare token so it could download-url
        # those zipballs before they go public
        yield dict(
            status='ok',
            # TODO: add article url (which needs to be queried if only ID is known
            message="Published archive {}".format(
                file_info['download_url']),
            file_info=file_info,
            path=dataset,
            action='export_to_figshare',
            logger=lgr
        )
示例#12
0
def test_recursive_save(path):
    ds = Dataset(path).create()
    # nothing to save
    assert_status('notneeded', ds.save())
    subds = ds.create('sub')
    # subdataset presence already saved
    ok_clean_git(ds.path)
    subsubds = subds.create('subsub')
    assert_equal(
        ds.subdatasets(recursive=True, fulfilled=True, result_xfm='paths'),
        [subds.path, subsubds.path])
    newfile_name = opj(subsubds.path, 'test')
    with open(newfile_name, 'w') as f:
        f.write('some')
    # saves the status change of the subdataset due to the subsubdataset addition
    assert_result_values_equal(
        ds.save(result_filter=is_ok_dataset),
        'path',
        [ds.path])

    # make the new file known to its dataset
    ds.add(newfile_name, save=False)

    # but remains dirty because of the uncommited file down below
    assert ds.repo.dirty
    # auto-add will save nothing deep down without recursive
    assert_status('notneeded', ds.save())
    assert ds.repo.dirty
    # with recursive pick up the change in subsubds
    assert_result_values_equal(
        ds.save(recursive=True, result_filter=is_ok_dataset),
        'path',
        [subsubds.path, subds.path, ds.path])

    # at this point the entire tree is clean
    ok_clean_git(ds.path)
    states = [d.repo.get_hexsha() for d in (ds, subds, subsubds)]
    # now we save recursively, nothing should happen
    res = ds.save(recursive=True)
    # we do not get any report from a subdataset, because we detect at the
    # very top that the entire tree is clean
    assert_result_count(res, 1)
    assert_result_count(res, 1, status='notneeded', action='save', path=ds.path)
    # now we introduce new files all the way down
    create_tree(subsubds.path, {"mike1": 'mike1'})
    # because we cannot say from the top if there is anything to do down below,
    # we have to traverse and we will get reports for all dataset, but there is
    # nothing actually saved
    res = ds.save(recursive=True)
    assert_result_count(res, 3)
    assert_status('notneeded', res)
    subsubds_indexed = subsubds.repo.get_indexed_files()
    assert_not_in('mike1', subsubds_indexed)
    assert_equal(states, [d.repo.get_hexsha() for d in (ds, subds, subsubds)])
    unlink(opj(subsubds.path, 'mike1'))
    ok_clean_git(ds.path)

    # modify content in subsub and try saving
    testfname = newfile_name
    subsubds.unlock(testfname)
    with open(opj(ds.path, testfname), 'w') as f:
        f.write('I am in here!')
    # the following should all do nothing
    # no auto_add
    assert_status('notneeded', ds.save())
    # no recursive
    assert_status('notneeded', ds.save())
    # an explicit target saves only the corresponding dataset
    assert_result_values_equal(
        save(path=[testfname]),
        'path',
        [subsubds.path])
    # plain recursive without any files given will save the beast
    assert_result_values_equal(
        ds.save(recursive=True, result_filter=is_ok_dataset),
        'path',
        [subds.path, ds.path])
    # there is nothing else to save
    assert_status('notneeded', ds.save(recursive=True))
    ok_clean_git(ds.path)
    # one more time and check that all datasets in the hierarchy are not
    # contaminated with untracked files
    states = [d.repo.get_hexsha() for d in (ds, subds, subsubds)]
    testfname = opj('sub', 'subsub', 'saveme2')
    with open(opj(ds.path, testfname), 'w') as f:
        f.write('I am in here!')
    assert_status('notneeded', ds.save(recursive=True))
    newstates = [d.repo.get_hexsha() for d in (ds, subds, subsubds)]
    for old, new in zip(states, newstates):
        assert_equal(old, new)
    assert ds.repo.dirty
    unlink(opj(ds.path, testfname))
    ok_clean_git(ds.path)

    # now let's check saving "upwards"
    create_tree(subds.path, {"testnew": 'smth', "testadded": "added"})
    subds.repo.add("testadded")
    indexed_files = subds.repo.get_indexed_files()
    assert subds.repo.dirty
    assert ds.repo.dirty
    assert not subsubds.repo.dirty
    create_tree(subsubds.path, {"testnew2": 'smth'})
    assert subsubds.repo.dirty
    # and indexed files didn't change
    assert_equal(indexed_files, subds.repo.get_indexed_files())
    ok_clean_git(subds.repo, untracked=['testnew'],
                 index_modified=['subsub'], head_modified=['testadded'])
    old_states = [d.repo.get_hexsha() for d in (ds, subds, subsubds)]
    subsubds.save(message="savingtestmessage", super_datasets=True)
    # this save actually didn't save anything in subsub (or anywhere),
    # because there were only untracked bits pending
    for old, new in zip(old_states, [d.repo.get_hexsha()
                                     for d in (ds, subds, subsubds)]):
        assert_equal(old, new)
    # but now we are saving this untracked bit specifically
    subsubds.save(message="savingtestmessage", path=['testnew2'],
                  super_datasets=True)
    ok_clean_git(subsubds.repo)
    # but its super should have got only the subsub saved
    # not the file we created
    ok_clean_git(subds.repo, untracked=['testnew'], head_modified=['testadded'])

    # check commits to have correct messages
    # there are no more dedicated superdataset-save commits anymore, because
    # superdatasets get saved as part of the processed hierarchy and can contain
    # other parts in the commit (if so instructed)
    assert_equal(next(subsubds.repo.get_branch_commits('master')).message.rstrip(),
                 'savingtestmessage')
    assert_equal(next(subds.repo.get_branch_commits('master')).message.rstrip(),
                 'savingtestmessage')
    assert_equal(next(ds.repo.get_branch_commits('master')).message.rstrip(),
                 'savingtestmessage')

    # and if we try to save while being within that subsubds path
    subsubds.unlock('testnew2')
    create_tree(subsubds.path, {"testnew2": 'smth2'})

    # trying to replicate https://github.com/datalad/datalad/issues/1540
    subsubds.save(message="saving new changes", all_updated=True)  # no super
    with chpwd(subds.path):
        # no explicit dataset is provided by path is provided
        save(path=['subsub'], message='saving sub', super_datasets=True)
    # super should get it saved too
    assert_equal(next(ds.repo.get_branch_commits('master')).message.rstrip(),
                 'saving sub')
示例#13
0
def test_recursive_save(path):
    ds = Dataset(path).create()
    # nothing to save
    assert_status('notneeded', ds._save())
    subds = ds.create('sub')
    # subdataset presence already saved
    ok_clean_git(ds.path)
    subsubds = subds.create('subsub')
    assert_equal(
        ds.subdatasets(recursive=True, fulfilled=True, result_xfm='paths'),
        [subds.path, subsubds.path])
    newfile_name = opj(subsubds.path, 'test')
    with open(newfile_name, 'w') as f:
        f.write('some')
    # saves the status change of the subdataset due to the subsubdataset addition
    assert_result_values_equal(
        ds._save(result_filter=is_ok_dataset),
        'path',
        [ds.path])

    # make the new file known to its dataset
    ds.add(newfile_name, save=False)

    # but remains dirty because of the uncommited file down below
    assert ds.repo.dirty
    # auto-add will save nothing deep down without recursive
    assert_status('notneeded', ds._save())
    assert ds.repo.dirty
    # with recursive pick up the change in subsubds
    assert_result_values_equal(
        ds._save(recursive=True, result_filter=is_ok_dataset),
        'path',
        [subsubds.path, subds.path, ds.path])

    # at this point the entire tree is clean
    ok_clean_git(ds.path)
    states = [d.repo.get_hexsha() for d in (ds, subds, subsubds)]
    # now we save recursively, nothing should happen
    res = ds._save(recursive=True)
    # we do not get any report from a subdataset, because we detect at the
    # very top that the entire tree is clean
    assert_result_count(res, 1)
    assert_result_count(res, 1, status='notneeded', action='save', path=ds.path)
    # now we introduce new files all the way down
    create_tree(subsubds.path, {"mike1": 'mike1'})
    # because we cannot say from the top if there is anything to do down below,
    # we have to traverse and we will get reports for all dataset, but there is
    # nothing actually saved
    res = ds._save(recursive=True)
    assert_result_count(res, 3)
    assert_status('notneeded', res)
    subsubds_indexed = subsubds.repo.get_indexed_files()
    assert_not_in('mike1', subsubds_indexed)
    assert_equal(states, [d.repo.get_hexsha() for d in (ds, subds, subsubds)])
    unlink(opj(subsubds.path, 'mike1'))
    ok_clean_git(ds.path)

    # modify content in subsub and try saving
    testfname = newfile_name
    subsubds.unlock(testfname)
    with open(opj(ds.path, testfname), 'w') as f:
        f.write('I am in here!')
    # the following should all do nothing
    # no auto_add
    assert_status('notneeded', ds._save())
    # no recursive
    assert_status('notneeded', ds._save())
    # an explicit target saves only the corresponding dataset
    assert_result_values_equal(
        save(path=[testfname]),
        'path',
        [subsubds.path])
    # plain recursive without any files given will save the beast
    assert_result_values_equal(
        ds._save(recursive=True, result_filter=is_ok_dataset),
        'path',
        [subds.path, ds.path])
    # there is nothing else to save
    assert_status('notneeded', ds._save(recursive=True))
    ok_clean_git(ds.path)
    # one more time and check that all datasets in the hierarchy are not
    # contaminated with untracked files
    states = [d.repo.get_hexsha() for d in (ds, subds, subsubds)]
    testfname = opj('sub', 'subsub', 'saveme2')
    with open(opj(ds.path, testfname), 'w') as f:
        f.write('I am in here!')
    assert_status('notneeded', ds._save(recursive=True))
    newstates = [d.repo.get_hexsha() for d in (ds, subds, subsubds)]
    for old, new in zip(states, newstates):
        assert_equal(old, new)
    assert ds.repo.dirty
    unlink(opj(ds.path, testfname))
    ok_clean_git(ds.path)

    # now let's check saving "upwards"
    create_tree(subds.path, {"testnew": 'smth', "testadded": "added"})
    subds.repo.add("testadded")
    indexed_files = subds.repo.get_indexed_files()
    assert subds.repo.dirty
    assert ds.repo.dirty
    assert not subsubds.repo.dirty
    create_tree(subsubds.path, {"testnew2": 'smth'})
    assert subsubds.repo.dirty
    # and indexed files didn't change
    assert_equal(indexed_files, subds.repo.get_indexed_files())
    ok_clean_git(subds.repo, untracked=['testnew'],
                 index_modified=['subsub'], head_modified=['testadded'])
    old_states = [d.repo.get_hexsha() for d in (ds, subds, subsubds)]
    subsubds._save(message="savingtestmessage", super_datasets=True)
    # this save actually didn't save anything in subsub (or anywhere),
    # because there were only untracked bits pending
    for old, new in zip(old_states, [d.repo.get_hexsha()
                                     for d in (ds, subds, subsubds)]):
        assert_equal(old, new)
    # but now we are saving this untracked bit specifically
    subsubds._save(message="savingtestmessage", path=['testnew2'],
                   super_datasets=True)
    ok_clean_git(subsubds.repo)
    # but its super should have got only the subsub saved
    # not the file we created
    ok_clean_git(subds.repo, untracked=['testnew'], head_modified=['testadded'])

    # check commits to have correct messages
    # there are no more dedicated superdataset-save commits anymore, because
    # superdatasets get saved as part of the processed hierarchy and can contain
    # other parts in the commit (if so instructed)
    assert_equal(next(subsubds.repo.get_branch_commits('master')).message.rstrip(),
                 'savingtestmessage')
    assert_equal(next(subds.repo.get_branch_commits('master')).message.rstrip(),
                 'savingtestmessage')
    assert_equal(next(ds.repo.get_branch_commits('master')).message.rstrip(),
                 'savingtestmessage')

    # and if we try to save while being within that subsubds path
    subsubds.unlock('testnew2')
    create_tree(subsubds.path, {"testnew2": 'smth2'})

    # trying to replicate https://github.com/datalad/datalad/issues/1540
    subsubds._save(message="saving new changes", all_updated=True)  # no super
    with chpwd(subds.path):
        # no explicit dataset is provided by path is provided
        save(path=['subsub'], message='saving sub', super_datasets=True)
    # super should get it saved too
    assert_equal(next(ds.repo.get_branch_commits('master')).message.rstrip(),
                 'saving sub')