示例#1
0
def test_install_known_subdataset(src=None, path=None):

    _mk_submodule_annex(src, fname="test-annex.dat", fcontent="whatever")

    # get the superdataset:
    ds = install(path, source=src)
    # subdataset not installed:
    subds = Dataset(opj(path, 'subm 1'))
    assert_false(subds.is_installed())
    assert_in('subm 1', ds.subdatasets(state='absent', result_xfm='relpaths'))
    assert_not_in('subm 1',
                  ds.subdatasets(state='present', result_xfm='relpaths'))
    # install it:
    ds.install('subm 1')
    ok_(subds.is_installed())
    ok_(AnnexRepo.is_valid_repo(subds.path, allow_noninitialized=False))
    # Verify that it is the correct submodule installed and not
    # new repository initiated
    assert_in("test-annex.dat", subds.repo.get_indexed_files()),
    assert_not_in('subm 1',
                  ds.subdatasets(state='absent', result_xfm='relpaths'))
    assert_in('subm 1', ds.subdatasets(state='present', result_xfm='relpaths'))

    # now, get the data by reinstalling with -g:
    ok_(subds.repo.file_has_content('test-annex.dat') is False)
    with chpwd(ds.path):
        result = get(path='subm 1', dataset=os.curdir)
        assert_in_results(result, path=opj(subds.path, 'test-annex.dat'))
        ok_(subds.repo.file_has_content('test-annex.dat') is True)
        ok_(subds.is_installed())
示例#2
0
def test_py2_unicode_command(path=None):
    # Avoid OBSCURE_FILENAME to avoid windows-breakage (gh-2929).
    ds = Dataset(path).create()
    touch_cmd = "import sys; open(sys.argv[1], 'w').write('')"
    cmd_str = u"{} -c \"{}\" {}".format(sys.executable, touch_cmd, u"bβ0.dat")
    ds.run(cmd_str)
    assert_repo_status(ds.path)
    ok_exists(op.join(path, u"bβ0.dat"))

    # somewhat desperate attempt to detect our own Github CI tests on a
    # crippled filesystem (VFAT) that is so crippled that it doesn't handle
    # what is needed here. It just goes mad with encoded bytestrings:
    # CommandError: ''python -c '"'"'import sys; open(sys.argv[1], '"'"'"'"'"'"'"'"'w'"'"'"'"'"'"'"'"').write('"'"'"'"'"'"'"'"''"'"'"'"'"'"'"'"')'"'"' '"'"' β1 '"'"''' failed with exitcode 1 under /crippledfs/
    if not on_windows and os.environ.get('TMPDIR',
                                         None) != '/crippledfs':  # FIXME
        ds.run([sys.executable, "-c", touch_cmd, u"bβ1.dat"])
        assert_repo_status(ds.path)
        ok_exists(op.join(path, u"bβ1.dat"))

        # Send in a list of byte-strings to mimic a py2 command-line
        # invocation.
        ds.run([
            s.encode("utf-8")
            for s in [sys.executable, "-c", touch_cmd, u" β1 "]
        ])
        assert_repo_status(ds.path)
        ok_exists(op.join(path, u" β1 "))

    assert_in_results(ds.run(u"bβ2.dat",
                             result_renderer=None,
                             on_failure="ignore"),
                      status="error",
                      action="run")
def test_add_archive_use_archive_dir(repo_path=None):
    ds = Dataset(repo_path).create(force=True)
    with chpwd(repo_path):
        # Let's add first archive to the repo with default setting
        archive_path = opj('4u', '1.tar.gz')
        # check it gives informative error if archive is not already added
        res = add_archive_content(archive_path, on_failure='ignore')
        message = \
            "Can not add an untracked archive. Run 'datalad save 4u\\1.tar.gz'"\
        if on_windows else \
            "Can not add an untracked archive. Run 'datalad save 4u/1.tar.gz'"
        assert_in_results(res,
                          action='add-archive-content',
                          message=message,
                          status='impossible')

        with swallow_outputs():
            ds.save(archive_path)

        ok_archives_caches(ds.path, 0)
        add_archive_content(archive_path,
                            strip_leading_dirs=True,
                            use_current_dir=True)
        ok_(not exists(opj('4u', '1 f.txt')))
        ok_file_under_git(ds.path, '1 f.txt', annexed=True)
        ok_archives_caches(ds.path, 0)

        # and now let's extract under archive dir
        add_archive_content(archive_path, strip_leading_dirs=True)
        ok_file_under_git(ds.path, opj('4u', '1 f.txt'), annexed=True)
        ok_archives_caches(ds.path, 0)

        add_archive_content(opj('4u', 'sub.tar.gz'))
        ok_file_under_git(ds.path, opj('4u', 'sub', '2 f.txt'), annexed=True)
        ok_archives_caches(ds.path, 0)
示例#4
0
def test_merge_follow_parentds_subdataset_adjusted_warning(path=None):
    path = Path(path)

    ds_src = Dataset(path / "source").create()
    ds_src_subds = ds_src.create("subds")

    ds_clone = install(source=ds_src.path,
                       path=path / "clone",
                       recursive=True,
                       result_xfm="datasets")
    ds_clone_subds = Dataset(ds_clone.pathobj / "subds")
    maybe_adjust_repo(ds_clone_subds.repo)
    # Note: Were we to save ds_clone here, we would get a merge conflict in the
    # top repo for the submodule (even if using 'git annex sync' rather than
    # 'git merge').

    ds_src_subds.repo.call_git(["checkout", DEFAULT_BRANCH + "^0"])
    (ds_src_subds.pathobj / "foo").write_text("foo content")
    ds_src.save(recursive=True)
    assert_repo_status(ds_src.path)

    assert_in_results(ds_clone.update(merge=True,
                                      recursive=True,
                                      follow="parentds",
                                      on_failure="ignore"),
                      status="impossible",
                      path=ds_clone_subds.path,
                      action="update")
    eq_(ds_clone.repo.get_hexsha(), ds_src.repo.get_hexsha())
示例#5
0
def test_install_skip_failed_recursive(src=None, path=None):
    _mk_submodule_annex(src, fname="test-annex.dat", fcontent="whatever")

    # install top level:
    ds = install(path, source=src)
    sub1 = Dataset(opj(path, 'subm 1'))
    sub2 = Dataset(opj(path, '2'))
    # sabotage recursive installation of 'subm 1' by polluting the target:
    with open(opj(path, 'subm 1', 'blocking.txt'), "w") as f:
        f.write("sdfdsf")

    with swallow_logs(new_level=logging.WARNING) as cml:
        result = ds.get(os.curdir,
                        recursive=True,
                        on_failure='ignore',
                        result_xfm=None)
        # toplevel dataset was in the house already
        assert_result_count(result, 0, path=ds.path, type='dataset')
        # subm 1 should fail to install. [1] since comes after '2' submodule
        assert_in_results(
            result,
            status='error',
            path=sub1.path,
            type='dataset',
            message='target path already exists and not empty, refuse to '
            'clone into target path')
        assert_in_results(result, status='ok', path=sub2.path)
示例#6
0
def test_sibling_enable_sameas(repo=None, clone_path=None):
    ds = Dataset(repo.path)
    create_tree(ds.path, {"f0": "0"})
    ds.save(path="f0")
    ds.push(["f0"], to="r_dir")
    ds.repo.drop(["f0"])

    ds_cloned = clone(ds.path, clone_path)

    assert_false(ds_cloned.repo.file_has_content("f0"))
    # does not work without a name
    res = ds_cloned.siblings(
        action="enable",
        result_renderer='disabled',
        on_failure='ignore',
    )
    assert_in_results(res,
                      status='error',
                      message='require `name` of sibling to enable')
    # does not work with the wrong name
    res = ds_cloned.siblings(
        action="enable",
        name='wrong',
        result_renderer='disabled',
        on_failure='ignore',
    )
    assert_in_results(res,
                      status='impossible',
                      message=("cannot enable sibling '%s', not known",
                               'wrong'))
    # works with the right name
    res = ds_cloned.siblings(action="enable", name="r_rsync")
    assert_status("ok", res)
    ds_cloned.get(path=["f0"])
    ok_(ds_cloned.repo.file_has_content("f0"))
示例#7
0
    def test_addurls_url_on_collision_error_if_different(self=None, path=None):
        ds = Dataset(path).create(force=True)

        data = [self.data[0].copy(), self.data[0].copy()]
        data[0]["some_metadata"] = "1"
        data[1]["some_metadata"] = "2"

        with patch("sys.stdin", new=StringIO(json.dumps(data))):
            assert_in_results(ds.addurls("-",
                                         "{url}",
                                         "{name}",
                                         on_failure="ignore"),
                              action="addurls",
                              status="error")

        with patch("sys.stdin", new=StringIO(json.dumps(data))):
            assert_in_results(ds.addurls("-",
                                         "{url}",
                                         "{name}",
                                         on_collision="error-if-different",
                                         on_failure="ignore"),
                              action="addurls",
                              status="error")

        with patch("sys.stdin", new=StringIO(json.dumps(data))):
            ds.addurls("-",
                       "{url}",
                       "{name}",
                       exclude_autometa="*",
                       on_collision="error-if-different")
        ok_exists(op.join(ds.path, "a"))
示例#8
0
def test_unlock_raises(path=None, path2=None, path3=None):

    # make sure, we are not within a dataset:
    _cwd = getpwd()
    chpwd(path)

    # no dataset and no path:
    assert_raises(InsufficientArgumentsError,
                  unlock, dataset=None, path=None)
    # no dataset and path not within a dataset:
    assert_raises(NoDatasetFound,
                  unlock, dataset=None, path=path2)

    create(path=path, annex=False)
    ds = Dataset(path)
    # no complaints
    ds.unlock()

    # make it annex, but call unlock with invalid path:
    (ds.pathobj / ".noannex").unlink()
    AnnexRepo(path, create=True)

    # One that doesn't exist.
    res = ds.unlock(path="notexistent.txt", result_xfm=None,
                    on_failure='ignore', return_type='item-or-list')
    eq_(res['message'], "path does not exist")

    # And one that isn't associated with a dataset.
    assert_in_results(
        ds.unlock(path=path2, on_failure="ignore"),
        status="error",
        message=("path not underneath the reference dataset %s", ds.path))

    chpwd(_cwd)
示例#9
0
def test_run_remove_keeps_leading_directory(path=None):
    ds = Dataset(op.join(path, "ds")).create()
    repo = ds.repo

    (ds.pathobj / "d").mkdir()
    output = (ds.pathobj / "d" / "foo")
    output.write_text("foo")
    ds.save()

    output_rel = str(output.relative_to(ds.pathobj))
    repo.drop(output_rel, options=["--force"])

    assert_in_results(ds.run("cd .> {}".format(output_rel),
                             outputs=[output_rel],
                             result_renderer='disabled'),
                      action="run.remove",
                      status="ok")

    assert_repo_status(ds.path)

    # Remove still gets saved() if command doesn't generate the output (just as
    # it would if git-rm were used instead of unlink).
    repo.drop(output_rel, options=["--force"])
    assert_in_results(ds.run("cd .> something-else",
                             outputs=[output_rel],
                             result_renderer='disabled'),
                      action="run.remove",
                      status="ok")
    assert_repo_status(ds.path)
示例#10
0
    def test_addurls_url_on_collision_choose(self=None, path=None):
        ds = Dataset(path).create(force=True)
        data = deepcopy(self.data)
        for row in data:
            row["name"] = "a"

        with patch("sys.stdin", new=StringIO(json.dumps(data))):
            assert_in_results(ds.addurls("-",
                                         "{url}",
                                         "{name}",
                                         on_failure="ignore"),
                              action="addurls",
                              status="error")
        with patch("sys.stdin", new=StringIO(json.dumps(data))):
            assert_in_results(ds.addurls("-",
                                         "{url}",
                                         "{name}",
                                         on_collision="error-if-different",
                                         on_failure="ignore"),
                              action="addurls",
                              status="error")

        with patch("sys.stdin", new=StringIO(json.dumps(data))):
            ds.addurls("-", "{url}", "{name}-first", on_collision="take-first")
        ok_file_has_content(op.join(ds.path, "a-first"),
                            "a content",
                            strip=True)

        with patch("sys.stdin", new=StringIO(json.dumps(data))):
            ds.addurls("-", "{url}", "{name}-last", on_collision="take-last")
        ok_file_has_content(op.join(ds.path, "a-last"),
                            "c content",
                            strip=True)
示例#11
0
def test_configs(path=None):

    # set up dataset with registered procedure (c&p from test_basics):
    ds = Dataset(path).create(force=True)
    ds.run_procedure('cfg_yoda')
    # configure dataset to look for procedures in its code folder
    ds.config.add('datalad.locations.dataset-procedures',
                  'code',
                  scope='branch')

    # 1. run procedure based on execution guessing by run_procedure:
    ds.run_procedure(spec=['datalad_test_proc', 'some_arg'])
    # look for traces
    ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'some_arg\n')

    # 2. now configure specific call format including usage of substitution config
    # for run:
    ds.config.add('datalad.procedures.datalad_test_proc.call-format',
                  u'%s {script} {ds} {{mysub}} {args}' %
                  quote_cmdlinearg(sys.executable),
                  scope='branch')
    ds.config.add('datalad.run.substitutions.mysub',
                  'dataset-call-config',
                  scope='branch')
    # TODO: Should we allow for --inputs/--outputs arguments for run_procedure
    #       (to be passed into run)?
    ds.unlock("fromproc.txt")
    # run again:
    ds.run_procedure(spec=['datalad_test_proc', 'some_arg'])
    # look for traces
    ok_file_has_content(op.join(ds.path, 'fromproc.txt'),
                        'dataset-call-config\n')

    # 3. have a conflicting config at user-level, which should override the
    # config on dataset level:
    ds.config.add('datalad.procedures.datalad_test_proc.call-format',
                  u'%s {script} {ds} local {args}' %
                  quote_cmdlinearg(sys.executable),
                  scope='local')
    ds.unlock("fromproc.txt")
    # run again:
    ds.run_procedure(spec=['datalad_test_proc', 'some_arg'])
    # look for traces
    ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'local\n')

    # 4. get configured help message:
    r = ds.run_procedure('datalad_test_proc',
                         help_proc=True,
                         on_failure='ignore')
    assert_true(len(r) == 1)
    assert_in_results(r, status="impossible")

    ds.config.add('datalad.procedures.datalad_test_proc.help',
                  "This is a help message",
                  scope='branch')

    r = ds.run_procedure('datalad_test_proc', help_proc=True)
    assert_true(len(r) == 1)
    assert_in_results(r, message="This is a help message", status='ok')
示例#12
0
def check_push(ds):
    # create a file and push it to GIN to see of the
    # access is set up properly
    (ds.pathobj / 'file').write_text('some')
    ds.save()
    assert_in_results(ds.push(to='gin', result_renderer='disabled'),
                      action='copy',
                      status='ok',
                      path=str(ds.pathobj / 'file'))
示例#13
0
def test_as_common_datasource(testbed=None,
                              viapath=None,
                              viaurl=None,
                              remotepath=None,
                              url=None):
    ds = Dataset(remotepath).create()
    (ds.pathobj / 'testfile').write_text('likemagic')
    (ds.pathobj / 'testfile2').write_text('likemagic2')
    ds.save()

    # make clonable via HTTP
    ds.repo.call_git(['update-server-info'])

    # this does not work for remotes that have path URLs
    ds_frompath = clone(source=remotepath, path=viapath)
    res = ds_frompath.siblings(
        'configure',
        name=DEFAULT_REMOTE,
        as_common_datasrc='mike',
        on_failure='ignore',
        result_renderer='disabled',
    )
    assert_in_results(
        res,
        status='impossible',
        message='cannot configure as a common data source, URL protocol '
        'is not http or https',
    )

    # but it works for HTTP
    ds_fromurl = clone(source=url, path=viaurl)
    res = ds_fromurl.siblings(
        'configure',
        name=DEFAULT_REMOTE,
        as_common_datasrc='mike2',
        result_renderer='disabled',
    )
    assert_status('ok', res)
    # same thing should be possible by adding a fresh remote
    res = ds_fromurl.siblings(
        'add',
        name='fresh',
        url=url,
        as_common_datasrc='fresh-sr',
        result_renderer='disabled',
    )
    assert_status('ok', res)

    # now try if it works. we will clone the clone, and get a repo that does
    # not know its ultimate origin. still, we should be able to pull data
    # from it via the special remote
    testbed = clone(source=ds_fromurl, path=testbed)
    assert_status('ok', testbed.get('testfile'))
    eq_('likemagic', (testbed.pathobj / 'testfile').read_text())
    # and the other one
    assert_status('ok', testbed.get('testfile2'))
示例#14
0
def test_download_url_dataset(toppath=None, topurl=None, path=None):
    # Non-dataset directory.
    file1_fullpath = opj(path, "file1.txt")
    with chpwd(path):
        download_url(topurl + "file1.txt")
        ok_exists(file1_fullpath)
    os.remove(file1_fullpath)

    files_tosave = ['file1.txt', 'file2.txt']
    urls_tosave = [topurl + f for f in files_tosave]

    ds = Dataset(opj(path, "ds")).create()

    # By default, files are saved when called in a dataset.
    ds.download_url(urls_tosave)
    for fname in files_tosave:
        ok_(ds.repo.file_has_content(fname))

    eq_(ds.repo.get_urls("file1.txt"), [urls_tosave[0]])
    eq_(ds.repo.get_urls("file2.txt"), [urls_tosave[1]])

    ds.download_url([topurl + "file3.txt"], save=False)
    assert_false(ds.repo.file_has_content("file3.txt"))

    # Leading paths for target are created if needed.
    subdir_target = opj("l1", "l2", "f")
    ds.download_url([opj(topurl, "file1.txt")], path=subdir_target)
    ok_(ds.repo.file_has_content(subdir_target))

    subdir_path = opj(ds.path, "subdir", "")
    os.mkdir(subdir_path)
    with chpwd(subdir_path):
        download_url(topurl + "file4.txt")
        download_url(topurl + "file5.txt", path="five.txt")
        ds.download_url(topurl + "file6.txt")
        download_url(topurl + "file7.txt", dataset=ds.path)
    # download_url calls within a subdirectory save the file there
    ok_(ds.repo.file_has_content(opj("subdir", "file4.txt")))
    ok_(ds.repo.file_has_content(opj("subdir", "five.txt")))
    # ... unless the dataset instance is provided
    ok_(ds.repo.file_has_content("file6.txt"))
    # ... but a string for the dataset (as it would be from the command line)
    # still uses CWD semantics
    ok_(ds.repo.file_has_content(opj("subdir", "file7.txt")))

    with chpwd(path):
        # We're in a non-dataset path and pass in a string as the dataset. The
        # path is taken as relative to the current working directory, so we get
        # an error when trying to save it.
        assert_in_results(download_url(topurl + "file8.txt",
                                       dataset=ds.path,
                                       on_failure="ignore"),
                          status="error",
                          action="status")
    assert_false((ds.pathobj / "file8.txt").exists())
示例#15
0
def test_merge_no_merge_target(path=None):
    path = Path(path)
    ds_src = Dataset(path / "source").create()
    ds_clone = install(source=ds_src.path,
                       path=path / "clone",
                       recursive=True,
                       result_xfm="datasets")
    assert_repo_status(ds_src.path)
    ds_clone.repo.checkout(DEFAULT_BRANCH, options=["-bnew"])
    res = ds_clone.update(merge=True, on_failure="ignore")
    assert_in_results(res, status="impossible", action="update")
示例#16
0
def test_on_failure_continue(path=None):
    ds = Dataset(op.join(path, "ds")).create(force=True)
    # save() calls status() in a way that respects on_failure.
    assert_in_results(ds.save(
        path=[op.join(path, "outside"),
              op.join(path, "ds", "within")],
        on_failure="ignore"),
                      action="status",
                      status="error")
    # save() continued despite the failure and saved ds/within.
    assert_repo_status(ds.path)
示例#17
0
 def test_addurls_no_rows(self=None, path=None):
     ds = Dataset(path).create(force=True)
     for fname in ["in.csv", "in.tsv", "in.json"]:
         with swallow_logs(new_level=logging.WARNING) as cml:
             assert_in_results(ds.addurls(fname,
                                          "{url}",
                                          "{name}",
                                          result_renderer='disabled'),
                               action="addurls",
                               status="notneeded")
             cml.assert_logged("No rows", regex=False)
示例#18
0
def test_rerun_cherry_pick(path=None):
    ds = Dataset(path).create()

    ds.repo.tag("prerun")
    ds.run('echo abc > runfile')
    with open(op.join(path, "nonrun-file"), "w") as f:
        f.write("foo")
    ds.save("nonrun-file")

    for onto, action in [("HEAD", "skip"), ("prerun", "pick")]:
        results = ds.rerun(since="prerun", onto=onto)
        assert_in_results(results, status='ok', rerun_action=action)
示例#19
0
def test_nested_create(path=None):
    # to document some more organic usage pattern
    ds = Dataset(path).create()
    assert_repo_status(ds.path)
    lvl2relpath = op.join('lvl1', 'lvl2')
    lvl2path = op.join(ds.path, lvl2relpath)
    os.makedirs(lvl2path)
    os.makedirs(op.join(ds.path, 'lvl1', 'empty'))
    with open(op.join(lvl2path, 'file'), 'w') as f:
        f.write('some')
    ok_(ds.save())
    # Empty directories are filtered out.
    assert_repo_status(ds.path, untracked=[])
    # later create subdataset in a fresh dir
    # WINDOWS FAILURE IS NEXT LINE
    subds1 = ds.create(op.join('lvl1', 'subds'))
    assert_repo_status(ds.path, untracked=[])
    eq_(ds.subdatasets(result_xfm='relpaths'), [op.join('lvl1', 'subds')])
    # later create subdataset in an existing empty dir
    subds2 = ds.create(op.join('lvl1', 'empty'))
    assert_repo_status(ds.path)
    # later try to wrap existing content into a new subdataset
    # but that won't work
    assert_in_results(
        ds.create(lvl2relpath, **raw),
        status='error',
        message=('collision with content in parent dataset at %s: %s', ds.path,
                 [op.join(lvl2path, 'file')]))
    # even with force, as to do this properly complicated surgery would need to
    # take place
    # MIH disable shaky test till proper dedicated upfront check is in-place in `create`
    # gh-1725
    #assert_in_results(
    #    ds.create(lvl2relpath, force=True,
    #              on_failure='ignore', result_xfm=None, result_filter=None),
    #    status='error', action='add')
    # only way to make it work is to unannex the content upfront
    ds.repo.call_annex(['unannex', op.join(lvl2relpath, 'file')])
    # nothing to save, git-annex commits the unannex itself, but only on v5
    ds.repo.commit()
    # still nothing without force
    # "err='lvl1/lvl2' already exists in the index"
    assert_in_results(
        ds.create(lvl2relpath, **raw),
        status='error',
        message=
        'will not create a dataset in a non-empty directory, use `--force` option to ignore'
    )
    # XXX even force doesn't help, because (I assume) GitPython doesn't update
    # its representation of the Git index properly
    ds.create(lvl2relpath, force=True)
    assert_in(lvl2relpath, ds.subdatasets(result_xfm='relpaths'))
示例#20
0
def test_specialremote(dspath=None, remotepath=None):
    ds = Dataset(dspath).create()
    ds.repo.call_annex([
        'initremote', 'myremote', 'type=directory', f'directory={remotepath}',
        'encryption=none'
    ])
    res = ds.siblings('query', result_renderer='disabled')
    assert_in_results(
        res, **{
            'name': 'myremote',
            'annex-type': 'directory',
            'annex-directory': remotepath
        })
示例#21
0
def test_dirty(path=None):
    ds = Dataset(path).create(force=True)
    # must fail, because README.md is to be modified, but already dirty
    assert_in_results(ds.run_procedure('cfg_yoda',
                                       on_failure="ignore",
                                       result_renderer=None),
                      action="run",
                      status="error")
    # make sure that was the issue
    # save to git explicitly to keep the test simple and avoid unlocking...
    ds.save('README.md', to_git=True)
    ds.run_procedure('cfg_yoda')
    assert_repo_status(ds.path)
示例#22
0
def test_run_failure(path=None):
    ds = Dataset(path).create()
    subds = ds.create("sub")

    hexsha_initial = ds.repo.get_hexsha()

    if on_windows:
        # this does not do exactly the same as the cmd on other systems
        # but is close enough to make running the test worthwhile
        cmd_failing = "echo x>{} & false".format(op.join("sub", "grows"))
    else:
        cmd_failing = ("echo x$(cat {0}) > {0} && false"
                       .format(op.join("sub", "grows")))

    with assert_raises(IncompleteResultsError):
        ds.run(cmd_failing, result_renderer=None)
    eq_(hexsha_initial, ds.repo.get_hexsha())
    ok_(ds.repo.dirty)

    msgfile = op.join(path, ds.repo.get_git_dir(ds.repo), "COMMIT_EDITMSG")
    ok_exists(msgfile)

    ds.save(recursive=True, message_file=msgfile)
    assert_repo_status(ds.path)
    neq_(hexsha_initial, ds.repo.get_hexsha())

    outfile = op.join(subds.path, "grows")
    eq_('x \n' if on_windows else 'x\n', open(outfile).read())

    if on_windows:
        # FIXME: Make the remaining code compatible with Windows.
        return

    # There is no CommandError on rerun if the non-zero error matches the
    # original code.
    ds.rerun()
    eq_('xx\n', open(outfile).read())

    # On the other hand, we fail if we rerun a command and there is a non-zero
    # error that doesn't match.
    ds.run("[ ! -e bar ] && echo c >bar")
    assert_repo_status(ds.path)
    assert_in_results(ds.rerun(result_renderer=None, on_failure="ignore"),
                      action="run", status="error")

    # We don't show instructions if the caller specified us not to save.
    remove(msgfile)
    with assert_raises(IncompleteResultsError):
        ds.run("false", explicit=True, outputs=None, on_failure="stop")
    assert_false(op.exists(msgfile))
示例#23
0
def test_surprise_subds(path=None):
    # https://github.com/datalad/datalad/issues/3139
    ds = create(path, force=True)
    # a lonely repo without any commit
    somerepo = AnnexRepo(path=op.join(path, 'd1', 'subrepo'), create=True)
    # a proper subdataset
    subds = create(op.join(path, 'd2', 'subds'), force=True)

    # If subrepo is an adjusted branch, it would have a commit, making most of
    # this test irrelevant because it is about the unborn branch edge case.
    adjusted = somerepo.is_managed_branch()
    # This edge case goes away with Git v2.22.0.
    fixed_git = somerepo.git_version >= '2.22.0'

    # save non-recursive
    res = ds.save(recursive=False, on_failure='ignore')
    if not adjusted and fixed_git:
        # We get an appropriate error about no commit being checked out.
        assert_in_results(res, action='add_submodule', status='error')

    # the content of both subds and subrepo are not added to their
    # respective parent as no --recursive was given
    assert_repo_status(subds.path, untracked=['subfile'])
    assert_repo_status(somerepo.path, untracked=['subfile'])

    if adjusted or fixed_git:
        if adjusted:
            # adjusted branch: #datalad/3178 (that would have a commit)
            modified = [subds.repo.pathobj, somerepo.pathobj]
            untracked = []
        else:
            # Newer Git versions refuse to add a sub-repository with no commits
            # checked out.
            modified = [subds.repo.pathobj]
            untracked = ['d1']
        assert_repo_status(ds.path, modified=modified, untracked=untracked)
        assert_not_in(ds.repo.pathobj / 'd1' / 'subrepo' / 'subfile',
                      ds.repo.get_content_info())
    else:
        # however, while the subdataset is added (and reported as modified
        # because it content is still untracked) the subrepo
        # cannot be added (it has no commit)
        # worse: its untracked file add been added to the superdataset
        assert_repo_status(ds.path, modified=['d2/subds'])
        assert_in(ds.repo.pathobj / 'd1' / 'subrepo' / 'subfile',
                  ds.repo.get_content_info())
    # with proper subdatasets, all evil is gone
    assert_not_in(ds.repo.pathobj / 'd2' / 'subds' / 'subfile',
                  ds.repo.get_content_info())
示例#24
0
def test_empty_git_upstairs(topdir=None):
    # create() doesn't get confused by an empty .git/ upstairs (gh-3473)
    assert_in_results(create(op.join(topdir, "empty", "ds"), **raw),
                      status="ok",
                      type="dataset",
                      action="create")
    # ... and it will ignore non-meaningful content in .git
    assert_in_results(create(op.join(topdir, "nonempty", "ds"), **raw),
                      status="ok",
                      type="dataset",
                      action="create")
    # ... but it will raise if it detects a valid repo
    # (by existence of .git/HEAD as defined in GitRepo._valid_git_test_path)
    with assert_raises(CommandError):
        create(op.join(topdir, "git_with_head", "ds"), **raw)
示例#25
0
def test_download_url_archive(toppath=None, topurl=None, path=None):
    ds = Dataset(path).create()
    ds.download_url([topurl + "archive.tar.gz"], archive=True)
    ok_(ds.repo.file_has_content(opj("archive", "file1.txt")))
    assert_not_in(opj(ds.path, "archive.tar.gz"), ds.repo.format_commit("%B"))
    # we should yield an impossible from add archive content when there is
    # untracked content (gh-#6170)
    create_tree(ds.path, {'this': 'dirty'})
    assert_in_results(
        ds.download_url([topurl + "archive.tar.gz"],
                        archive=True,
                        on_failure='ignore'),
        status='impossible',
        action='add-archive-content',
        message='clean dataset required. Use `datalad status` to inspect '
        'unsaved changes')
示例#26
0
def test_update_adjusted_incompatible_with_ff_only(path=None):
    path = Path(path)
    ds_src = Dataset(path / "source").create()

    ds_clone = install(source=ds_src.path,
                       path=path / "clone",
                       recursive=True,
                       result_xfm="datasets")
    maybe_adjust_repo(ds_clone.repo)

    assert_in_results(ds_clone.update(merge="ff-only", on_failure="ignore"),
                      action="update",
                      status="impossible")
    assert_in_results(ds_clone.update(on_failure="ignore"),
                      action="update",
                      status="ok")
示例#27
0
def test_arg_missing(path=None, path2=None):
    # test fix for gh-3553
    ds = create(path)
    assert_raises(
        InsufficientArgumentsError,
        ds.siblings,
        'add',
        url=path2,
    )
    assert_status('ok', ds.siblings('add', url=path2, name='somename'))
    # trigger some name guessing functionality that will still not
    # being able to end up using a hostnames-spec despite being
    # given a URL
    if not on_windows:
        # the trick with the file:// URL creation only works on POSIX
        # the underlying tested code here is not about paths, though,
        # so it is good enough to run this on POSIX system to be
        # reasonably sure that things work
        assert_raises(
            InsufficientArgumentsError,
            ds.siblings,
            'add',
            url=f'file://{path2}',
        )

    # there is no name guessing with 'configure'
    assert_in_results(ds.siblings('configure',
                                  url='http://somename',
                                  on_failure='ignore'),
                      status='error',
                      message='need sibling `name` for configuration')

    # needs a URL
    assert_raises(InsufficientArgumentsError,
                  ds.siblings,
                  'add',
                  name='somename')
    # just pushurl is OK
    assert_status('ok', ds.siblings('add', pushurl=path2, name='somename2'))

    # needs group with groupwanted
    assert_raises(InsufficientArgumentsError,
                  ds.siblings,
                  'add',
                  url=path2,
                  name='somename',
                  annex_groupwanted='whatever')
示例#28
0
def test_copy_file_prevent_dotgit_placement(srcpath=None, destpath=None):
    src = Dataset(srcpath).create()
    sub = src.create('sub')
    dest = Dataset(destpath).create()

    # recursion doesn't capture .git/
    dest.copy_file(sub.path, recursive=True)
    nok_((dest.pathobj / 'sub' / '.git').exists())

    # explicit instruction results in failure
    assert_status(
        'impossible',
        dest.copy_file(sub.pathobj / '.git',
                       recursive=True,
                       on_failure='ignore'))

    # same when the source has an OK name, but the dest now
    assert_in_results(dest.copy_file(
        [sub.pathobj / '.git' / 'config', dest.pathobj / 'some' / '.git'],
        on_failure='ignore'),
                      status='impossible',
                      action='copy_file')

    # The last path above wasn't treated as a target directory because it
    # wasn't an existing directory. We also guard against a '.git' in the
    # target directory code path, though the handling is different.
    with assert_raises(ValueError):
        dest.copy_file(
            [sub.pathobj / '.git' / 'config', dest.pathobj / '.git'])

    # A source path can have a leading .git/ if the destination is outside of
    # .git/.
    nok_((dest.pathobj / "config").exists())
    dest.copy_file(sub.pathobj / '.git' / 'config')
    ok_((dest.pathobj / "config").exists())

    target = dest.pathobj / 'some'
    nok_(target.exists())
    dest.copy_file([sub.pathobj / '.git' / 'config', target])
    ok_(target.exists())

    # But we only waste so many cycles trying to prevent foot shooting. This
    # next one sneaks by because only .name, not all upstream parts, is checked
    # for each destination that comes out of _yield_specs().
    badobj = dest.pathobj / '.git' / 'objects' / 'i-do-not-exist'
    dest.copy_file([sub.pathobj / '.git' / 'config', badobj])
    ok_(badobj.exists())
示例#29
0
def test_no_annex(path=None):
    # few smoke tests regarding the 'here' sibling
    ds = create(path, annex=False)
    res = ds.siblings('configure',
                      name='here',
                      description='very special',
                      on_failure='ignore',
                      result_renderer='disabled')
    assert_status('impossible', res)

    res = ds.siblings('enable',
                      name='doesnotmatter',
                      on_failure='ignore',
                      result_renderer='disabled')
    assert_in_results(res,
                      status='impossible',
                      message='cannot enable sibling of non-annex dataset')
示例#30
0
def test_push_wanted(srcpath=None, dstpath=None):
    src = Dataset(srcpath).create()
    (src.pathobj / 'data.0').write_text('0')
    (src.pathobj / 'secure.1').write_text('1')
    (src.pathobj / 'secure.2').write_text('2')
    src.save()

    # Dropping a file to mimic a case of simply not having it locally (thus not
    # to be "pushed")
    src.drop('secure.2', reckless='kill')

    # Annotate sensitive content, actual value "verysecure" does not matter in
    # this example
    src.repo.set_metadata(add={'distribution-restrictions': 'verysecure'},
                          files=['secure.1', 'secure.2'])

    src.create_sibling(
        dstpath,
        annex_wanted="not metadata=distribution-restrictions=*",
        name='target',
    )
    # check that wanted is obeyed, since set in sibling configuration
    res = src.push(to='target')
    assert_in_results(res,
                      action='copy',
                      path=str(src.pathobj / 'data.0'),
                      status='ok')
    for p in ('secure.1', 'secure.2'):
        assert_not_in_results(res, path=str(src.pathobj / p))
    assert_status('notneeded', src.push(to='target'))

    # check the target to really make sure
    dst = Dataset(dstpath)
    # normal file, yes
    eq_((dst.pathobj / 'data.0').read_text(), '0')
    # secure file, no
    if dst.repo.is_managed_branch():
        neq_((dst.pathobj / 'secure.1').read_text(), '1')
    else:
        assert_raises(FileNotFoundError, (dst.pathobj / 'secure.1').read_text)

    # reset wanted config, which must enable push of secure file
    src.repo.set_preferred_content('wanted', '', remote='target')
    res = src.push(to='target')
    assert_in_results(res, path=str(src.pathobj / 'secure.1'))
    eq_((dst.pathobj / 'secure.1').read_text(), '1')