Exemplo n.º 1
0
    def test_addurls_url_on_collision_choose(self=None, path=None):
        ds = Dataset(path).create(force=True)
        data = deepcopy(self.data)
        for row in data:
            row["name"] = "a"

        with patch("sys.stdin", new=StringIO(json.dumps(data))):
            assert_in_results(ds.addurls("-",
                                         "{url}",
                                         "{name}",
                                         on_failure="ignore"),
                              action="addurls",
                              status="error")
        with patch("sys.stdin", new=StringIO(json.dumps(data))):
            assert_in_results(ds.addurls("-",
                                         "{url}",
                                         "{name}",
                                         on_collision="error-if-different",
                                         on_failure="ignore"),
                              action="addurls",
                              status="error")

        with patch("sys.stdin", new=StringIO(json.dumps(data))):
            ds.addurls("-", "{url}", "{name}-first", on_collision="take-first")
        ok_file_has_content(op.join(ds.path, "a-first"),
                            "a content",
                            strip=True)

        with patch("sys.stdin", new=StringIO(json.dumps(data))):
            ds.addurls("-", "{url}", "{name}-last", on_collision="take-last")
        ok_file_has_content(op.join(ds.path, "a-last"),
                            "c content",
                            strip=True)
Exemplo n.º 2
0
def test_delete_not_crashing(path=None):
    # although in above test we just use/interact with Keyring without specifying
    # any custom one, there we do not change it so I guess it is ok. Here we want
    # a real keyring backend which we will alter
    from keyrings.alt.file import PlaintextKeyring
    kb = PlaintextKeyring()
    kb.filename = path

    keyring = Keyring(keyring_backend=kb)
    cred = UserPassword("test1", keyring=keyring)

    cred.set(user="******", password="******")
    ok_file_has_content(path, ".*test1.*",
                        re_=True)  # keyring backend saves where we expect

    # manually delete one component of the credential
    cred._keyring.delete(cred.name, next(iter(cred._FIELDS)))

    # now delete entire credential -- we must not crash
    cred.delete()
    try:
        ok_file_has_content(path, ".*test1.*",
                            re_=True)  # keyring backend saves where we expect
        raise AssertionError("keyring still has our key")
    except AssertionError:
        pass
Exemplo n.º 3
0
def test_create_tree(path=None):
    content = u"мама мыла раму"
    create_tree(
        path,
        OrderedDict([
            ('1', content),
            (
                'sd',
                OrderedDict([
                    # right away an obscure case where we have both 1 and 1.gz
                    ('1', content * 2),
                    ('1.gz', content * 3),
                    ('1.xz', content * 4),
                    ('1.lzma', content * 5),
                ])),
        ]))
    ok_file_has_content(op.join(path, '1'), content)
    ok_file_has_content(op.join(path, 'sd', '1'), content * 2)
    ok_file_has_content(op.join(path, 'sd', '1.gz'),
                        content * 3,
                        decompress=True)
    ok_file_has_content(op.join(path, 'sd', '1.xz'),
                        content * 4,
                        decompress=True)
    ok_file_has_content(op.join(path, 'sd', '1.lzma'),
                        content * 5,
                        decompress=True)
Exemplo n.º 4
0
def check_compress_file(ext, annex, path=None, name=None):
    # we base the archive name on the filename, in order to also
    # be able to properly test compressors where the corresponding
    # archive format has no capability of storing a filename
    # (i.e. where the archive name itself determines the filename
    # of the decompressed file, like .xz)
    archive = op.join(name, _filename + ext)
    compress_files([_filename], archive, path=path)
    assert_true(op.exists(archive))
    if annex:
        # It should work even when file is annexed and is a symlink to the
        # key
        from datalad.support.annexrepo import AnnexRepo
        repo = AnnexRepo(path, init=True)
        repo.add(_filename)
        repo.commit(files=[_filename], msg="commit")

    dir_extracted = name + "_extracted"
    try:
        decompress_file(archive, dir_extracted)
    except MissingExternalDependency as exc:
        raise SkipTest() from exc
    _filepath = op.join(dir_extracted, _filename)

    ok_file_has_content(_filepath, 'content')
Exemplo n.º 5
0
def test_copy_file_recursion(srcdir=None, destdir=None):
    src_ds = Dataset(srcdir).create(force=True)
    src_ds.save()
    dest_ds = Dataset(destdir).create()
    copy_file([src_ds.pathobj / 'subdir', dest_ds.pathobj], recursive=True)
    # structure is mirrored
    ok_file_has_content(dest_ds.pathobj / 'subdir' / 'file1', '123')
    ok_file_has_content(dest_ds.pathobj / 'subdir' / 'file2', 'abc')
Exemplo n.º 6
0
def test_newthings_coming_down(originpath=None, destpath=None):
    origin = GitRepo(originpath, create=True)
    create_tree(originpath, {'load.dat': 'heavy'})
    Dataset(originpath).save('load.dat')
    ds = install(source=originpath,
                 path=destpath,
                 result_xfm='datasets',
                 return_type='item-or-list')
    assert_is_instance(ds.repo, GitRepo)
    assert_in(DEFAULT_REMOTE, ds.repo.get_remotes())
    # turn origin into an annex
    origin = AnnexRepo(originpath, create=True)
    # clone doesn't know yet
    assert_false(knows_annex(ds.path))
    # but after an update it should
    # no merge, only one sibling, no parameters should be specific enough
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    assert (knows_annex(ds.path))
    # no branches appeared
    eq_(ds.repo.get_branches(), [DEFAULT_BRANCH])
    # now merge, and get an annex
    assert_result_count(ds.update(merge=True),
                        1,
                        action='update',
                        status='ok',
                        type='dataset')
    assert_in('git-annex', ds.repo.get_branches())
    assert_is_instance(ds.repo, AnnexRepo)
    # should be fully functional
    testfname = opj(ds.path, 'load.dat')
    assert_false(ds.repo.file_has_content(testfname))
    ds.get('.')
    ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy')
    # check that a new tag comes down
    origin.tag('first!')
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    eq_(ds.repo.get_tags(output='name')[0], 'first!')

    # and now we destroy the remote annex
    origin.call_git(['config', '--remove-section', 'annex'])
    rmtree(opj(origin.path, '.git', 'annex'), chmod_files=True)
    origin.call_git(['branch', '-D', 'git-annex'])
    origin = GitRepo(originpath)
    assert_false(knows_annex(originpath))

    # and update the local clone
    # for now this should simply not fail (see gh-793), later might be enhanced to a
    # graceful downgrade
    before_branches = ds.repo.get_branches()
    ok_(any("git-annex" in b for b in ds.repo.get_remote_branches()))
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    eq_(before_branches, ds.repo.get_branches())
    # annex branch got pruned
    assert_false(any("git-annex" in b for b in ds.repo.get_remote_branches()))
    # check that a new tag comes down even if repo types mismatch
    origin.tag('second!')
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    eq_(ds.repo.get_tags(output='name')[-1], 'second!')
Exemplo n.º 7
0
def test_runner(tempfile=None):
    runner = Runner()
    content = 'Testing real run' if on_windows else 'Testing äöü東 real run' 
    cmd = 'echo %s > %s' % (content, tempfile)
    res = runner.run(cmd)
    # no capture of any kind, by default
    ok_(not res['stdout'])
    ok_(not res['stderr'])
    ok_file_has_content(tempfile, content, strip=True)
    os.unlink(tempfile)
Exemplo n.º 8
0
def test_publish_target_url(src=None, desttop=None, desturl=None):
    # https://github.com/datalad/datalad/issues/1762
    ds = Dataset(src).create(force=True)
    ds.save('1')
    ds.create_sibling('ssh://datalad-test:%s/subdir' % desttop,
                      name='target',
                      target_url=desturl + 'subdir/.git')
    results = ds.push(to='target')
    assert results
    ok_file_has_content(Path(desttop, 'subdir', '1'), '123')
Exemplo n.º 9
0
def test_add_archive_single_file(repo_path=None):
    ds = Dataset(repo_path).create(force=True)
    with chpwd(repo_path):
        archives = glob('archives/*')
        ds.save(archives, message='Added archives')

        for archive in archives:
            archive_name = os.path.splitext(archive)[0]
            archive_content = os.path.basename(archive_name)
            ds.add_archive_content(archive)
            ok_file_has_content(archive_name, archive_content)
Exemplo n.º 10
0
def test_install_dataladri(src=None, topurl=None, path=None):
    # make plain git repo
    ds_path = opj(src, 'ds')
    gr = GitRepo(ds_path, create=True)
    gr.add('test.txt')
    gr.commit('demo')
    Runner(cwd=gr.path).run(['git', 'update-server-info'])
    # now install it somewhere else
    with patch('datalad.consts.DATASETS_TOPURL', topurl), \
            swallow_logs():
        ds = install(path, source='///ds')
    eq_(ds.path, path)
    assert_repo_status(path, annex=False)
    ok_file_has_content(opj(path, 'test.txt'), 'some')
Exemplo n.º 11
0
def test_spaces(path=None):
    """
    Test whether args with spaces are correctly parsed.
    """
    ds = Dataset(path).create(force=True)
    ds.run_procedure('cfg_yoda')
    # configure dataset to look for procedures in its code folder
    ds.config.add('datalad.locations.dataset-procedures',
                  'code',
                  scope='branch')
    # 1. run procedure based on execution guessing by run_procedure:
    ds.run_procedure(spec=['datalad_test_proc', 'with spaces', 'unrelated'])
    # check whether file has name with spaces
    ok_file_has_content(op.join(ds.path, 'with spaces'), 'hello\n')
Exemplo n.º 12
0
def test_update_git_smoke(src_path=None, dst_path=None):
    # Apparently was just failing on git repos for basic lack of coverage, hence this quick test
    ds = Dataset(src_path).create(annex=False)
    target = install(dst_path,
                     source=src_path,
                     result_xfm='datasets',
                     return_type='item-or-list')
    create_tree(ds.path, {'file.dat': '123'})
    ds.save('file.dat')
    assert_result_count(target.update(recursive=True, merge=True),
                        1,
                        action='update',
                        status='ok',
                        type='dataset')
    ok_file_has_content(opj(target.path, 'file.dat'), '123')
Exemplo n.º 13
0
def test_swallow_logs(logfile=None):
    lgr = logging.getLogger('datalad')
    with swallow_logs(new_level=9) as cm:
        eq_(cm.out, '')
        lgr.log(8, "very heavy debug")
        eq_(cm.out, '')  # not even visible at level 9
        lgr.log(9, "debug1")
        eq_(cm.out, '[Level 9] debug1\n')  # not even visible at level 9
        lgr.info("info")
        # not even visible at level 9
        eq_(cm.out, '[Level 9] debug1\n[INFO] info\n')
    with swallow_logs(new_level=9, file_=logfile) as cm:
        eq_(cm.out, '')
        lgr.info("next info")
    from datalad.tests.utils_pytest import ok_file_has_content
    ok_file_has_content(logfile, "[INFO] next info", strip=True)
Exemplo n.º 14
0
def test_copy_file_datalad_specialremote(workdir=None,
                                         webdir=None,
                                         weburl=None):
    workdir = Path(workdir)
    src_ds = Dataset(workdir / 'src').create()
    # enable datalad special remote
    src_ds.repo.init_remote(DATALAD_SPECIAL_REMOTE, [
        'encryption=none', 'type=external',
        'externaltype={}'.format(DATALAD_SPECIAL_REMOTE), 'autoenable=true'
    ])
    # put files into the dataset by URL
    src_ds.download_url('/'.join((weburl, 'webfile1')), path='myfile1.txt')
    src_ds.download_url('/'.join((weburl, 'webfile2')), path='myfile2.txt')
    # approx test that the file is known to a remote
    # that is not the web remote
    assert_in_results(
        src_ds.repo.whereis('myfile1.txt', output='full').values(),
        here=False,
        description='[{}]'.format(DATALAD_SPECIAL_REMOTE),
    )
    # now a new dataset
    dest_ds = Dataset(workdir / 'dest').create()
    # no special remotes
    eq_(dest_ds.repo.get_special_remotes(), {})
    # must call with a dataset to get change saved, in order for drop
    # below to work properly without getting in reckless mode
    dest_ds.copy_file([src_ds.pathobj / 'myfile1.txt', dest_ds.pathobj])
    # we have an special remote in the destination dataset now
    assert_in_results(
        dest_ds.repo.get_special_remotes().values(),
        externaltype=DATALAD_SPECIAL_REMOTE,
    )
    # and it works
    dest_ds.drop('myfile1.txt')
    dest_ds.repo.get('myfile1.txt', remote='datalad')
    ok_file_has_content(dest_ds.pathobj / 'myfile1.txt', '123')

    # now replace file in dest with a different content at the same path
    # must call with a dataset to get change saved, in order for drop
    dest_ds.copy_file(
        [src_ds.pathobj / 'myfile2.txt', dest_ds.pathobj / 'myfile1.txt'])
    dest_ds.drop('myfile1.txt')
    dest_ds.repo.get('myfile1.txt', remote='datalad')
    # no gets the "same path" but yields different content
    ok_file_has_content(dest_ds.pathobj / 'myfile1.txt', 'abc')
Exemplo n.º 15
0
    def check_dss():
        # we added the remote and set all the
        for subds in subdss:
            eq_(subds.repo.get_preferred_content('wanted', remote),
                'standard' if standardgroup else '')
            eq_(subds.repo.get_preferred_content('group', remote),
                standardgroup or '')

        for target_sub in target_subdss:
            ok_(target_sub.is_installed())  # it is there now
            eq_(target_sub.repo.config.get('core.sharedrepository'), '1')
            # and we have transferred the content
            if standardgroup and standardgroup == 'backup':
                # only then content should be copied
                ok_file_has_content(opj(target_sub.path, 'sub.dat'),
                                    'lots of data')
            else:
                # otherwise nothing is copied by default
                assert_false(target_sub.repo.file_has_content('sub.dat'))
Exemplo n.º 16
0
def test_copy_file_into_nonannex(workdir=None):
    workdir = Path(workdir)
    src_ds = Dataset(workdir / 'src').create()
    (src_ds.pathobj / 'present.txt').write_text('123')
    (src_ds.pathobj / 'gone.txt').write_text('abc')
    src_ds.save()
    src_ds.drop('gone.txt', reckless='kill')

    # destination has no annex
    dest_ds = Dataset(workdir / 'dest').create(annex=False)
    # no issue copying a file that has content
    copy_file([src_ds.pathobj / 'present.txt', dest_ds.pathobj])
    ok_file_has_content(dest_ds.pathobj / 'present.txt', '123')
    # but cannot handle a dropped file, no chance to register
    # availability info in an annex
    assert_status(
        'impossible',
        copy_file([src_ds.pathobj / 'gone.txt', dest_ds.pathobj],
                  on_failure='ignore'))
Exemplo n.º 17
0
def test_inputs_quotes_needed(path=None):
    ds = Dataset(path).create(force=True)
    ds.save()
    cmd = "import sys; open(sys.argv[-1], 'w').write('!'.join(sys.argv[1:]))"
    # The string form of a command works fine when the inputs/outputs have
    # spaces ...
    cmd_str = "{} -c \"{}\" {{inputs}} {{outputs[0]}}".format(
        sys.executable, cmd)
    ds.run(cmd_str, inputs=["*.t*"], outputs=["out0"], expand="inputs")
    expected = u"!".join(
        list(sorted([OBSCURE_FILENAME + u".t", "bar.txt", "foo blah.txt"])) +
        ["out0"])
    with open(op.join(path, "out0")) as ifh:
        eq_(ensure_unicode(ifh.read()), expected)
    # ... but the list form of a command does not. (Don't test this failure
    # with the obscure file name because we'd need to know its composition to
    # predict the failure.)
    cmd_list = [sys.executable, "-c", cmd, "{inputs}", "{outputs[0]}"]
    ds.run(cmd_list, inputs=["*.txt"], outputs=["out0"])
    ok_file_has_content(op.join(path, "out0"), "bar.txt foo!blah.txt!out0")
Exemplo n.º 18
0
def test_downloader_download(urlpath=None, url=None, path=None):
    path = Path(path)
    downloader = SHubDownloader()
    downloader.api_url = url
    create_tree(urlpath,
                tree={
                    "data": "foo",
                    "org": {
                        "repo":
                        '{{"name":"org/repo","image":"{}"}}'.format(url +
                                                                    "data")
                    }
                })

    target = str(path / "target")
    downloader.download("shub://org/repo", target)
    ok_file_has_content(target, "foo")

    other_target = str(path / "other-target")
    downloader.download("shub://org/repo", other_target)
Exemplo n.º 19
0
def test_paths_with_forward_slashes(path=None):
    # access file with native absolute path spec
    print(path)
    ok_file_has_content(op.join(path, 'subdir', 'testfile'), 'testcontent')
    with chpwd(path):
        # native relative path spec
        ok_file_has_content(op.join('subdir', 'testfile'), 'testcontent')
        # posix relative path spec
        ok_file_has_content('subdir/testfile', 'testcontent')
    # abspath with forward slash path sep char
    ok_file_has_content(
        op.join(path, 'subdir', 'testfile').replace(op.sep, '/'),
        'testcontent')
Exemplo n.º 20
0
def test_configs(path=None):

    # set up dataset with registered procedure (c&p from test_basics):
    ds = Dataset(path).create(force=True)
    ds.run_procedure('cfg_yoda')
    # configure dataset to look for procedures in its code folder
    ds.config.add('datalad.locations.dataset-procedures',
                  'code',
                  scope='branch')

    # 1. run procedure based on execution guessing by run_procedure:
    ds.run_procedure(spec=['datalad_test_proc', 'some_arg'])
    # look for traces
    ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'some_arg\n')

    # 2. now configure specific call format including usage of substitution config
    # for run:
    ds.config.add('datalad.procedures.datalad_test_proc.call-format',
                  u'%s {script} {ds} {{mysub}} {args}' %
                  quote_cmdlinearg(sys.executable),
                  scope='branch')
    ds.config.add('datalad.run.substitutions.mysub',
                  'dataset-call-config',
                  scope='branch')
    # TODO: Should we allow for --inputs/--outputs arguments for run_procedure
    #       (to be passed into run)?
    ds.unlock("fromproc.txt")
    # run again:
    ds.run_procedure(spec=['datalad_test_proc', 'some_arg'])
    # look for traces
    ok_file_has_content(op.join(ds.path, 'fromproc.txt'),
                        'dataset-call-config\n')

    # 3. have a conflicting config at user-level, which should override the
    # config on dataset level:
    ds.config.add('datalad.procedures.datalad_test_proc.call-format',
                  u'%s {script} {ds} local {args}' %
                  quote_cmdlinearg(sys.executable),
                  scope='local')
    ds.unlock("fromproc.txt")
    # run again:
    ds.run_procedure(spec=['datalad_test_proc', 'some_arg'])
    # look for traces
    ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'local\n')

    # 4. get configured help message:
    r = ds.run_procedure('datalad_test_proc',
                         help_proc=True,
                         on_failure='ignore')
    assert_true(len(r) == 1)
    assert_in_results(r, status="impossible")

    ds.config.add('datalad.procedures.datalad_test_proc.help',
                  "This is a help message",
                  scope='branch')

    r = ds.run_procedure('datalad_test_proc', help_proc=True)
    assert_true(len(r) == 1)
    assert_in_results(r, message="This is a help message", status='ok')
Exemplo n.º 21
0
def test_copy_file(workdir=None, webdir=None, weburl=None):
    workdir = Path(workdir)
    webdir = Path(webdir)
    src_ds = Dataset(workdir / 'src').create()
    # put a file into the dataset by URL and drop it again
    src_ds.download_url('/'.join((weburl, 'webfile1')), path='myfile1.txt')
    src_ds.download_url('/'.join((weburl, 'webfile2')),
                        path=opj('subdir', 'myfile2.txt'))
    ok_file_has_content(src_ds.pathobj / 'myfile1.txt', '123')
    # now create a fresh dataset
    dest_ds = Dataset(workdir / 'dest').create()
    if dest_ds.repo._check_version_kludges("fromkey-supports-unlocked") or \
       not dest_ds.repo.is_managed_branch():
        # unless we have a target ds on a cripples FS (where `annex fromkey`
        # doesn't work until after 8.20210428), we can even drop the file
        # content in the source repo
        src_ds.drop('myfile1.txt', reckless='kill')
        nok_(src_ds.repo.file_has_content('myfile1.txt'))
    # copy the file from the source dataset into it.
    # it must copy enough info to actually put datalad into the position
    # to obtain the file content from the original URL
    dest_ds.copy_file(src_ds.pathobj / 'myfile1.txt')
    dest_ds.get('myfile1.txt')
    ok_file_has_content(dest_ds.pathobj / 'myfile1.txt', '123')
    # purposefully pollute the employed tmp folder to check that we do not trip
    # over such a condition
    tmploc = dest_ds.pathobj / '.git' / 'tmp' / 'datalad-copy' / 'some'
    tmploc.parent.mkdir(parents=True)
    tmploc.touch()
    # copy again, but to different target file name
    # (source+dest pair now)
    dest_ds.copy_file(
        [src_ds.pathobj / 'myfile1.txt', dest_ds.pathobj / 'renamed.txt'])
    ok_file_has_content(dest_ds.pathobj / 'renamed.txt', '123')
    # copying more than one at once
    dest_ds.copy_file([
        src_ds.pathobj / 'myfile1.txt',
        src_ds.pathobj / 'subdir' / 'myfile2.txt', dest_ds.pathobj
    ])
    # copy directly from a non-dataset location
    dest_ds.copy_file(webdir / 'webfile1')

    # copy from annex dataset into gitrepo
    git_ds = Dataset(workdir / 'git').create(annex=False)
    git_ds.copy_file(src_ds.pathobj / 'subdir' / 'myfile2.txt')
Exemplo n.º 22
0
def test_reobtain_data(originpath=None, destpath=None):
    origin = Dataset(originpath).create()
    ds = install(source=originpath,
                 path=destpath,
                 result_xfm='datasets',
                 return_type='item-or-list')
    # no harm
    assert_result_count(ds.update(merge=True, reobtain_data=True),
                        1,
                        action="update",
                        status="ok")
    # content
    create_tree(origin.path, {'load.dat': 'heavy'})
    origin.save(opj(origin.path, 'load.dat'))
    # update does not bring data automatically
    assert_result_count(ds.update(merge=True, reobtain_data=True),
                        1,
                        action="update",
                        status="ok")
    assert_in('load.dat', ds.repo.get_annexed_files())
    assert_false(ds.repo.file_has_content('load.dat'))
    # now get data
    ds.get('load.dat')
    ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy')
    # new content at origin
    create_tree(origin.path, {'novel': 'but boring'})
    origin.save()
    # update must not bring in data for new file
    result = ds.update(merge=True, reobtain_data=True)
    assert_in_results(result, action='get', status='notneeded')

    ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy')
    assert_in('novel', ds.repo.get_annexed_files())
    assert_false(ds.repo.file_has_content('novel'))
    # modify content at origin
    os.remove(opj(origin.path, 'load.dat'))
    create_tree(origin.path, {'load.dat': 'light'})
    origin.save()
    # update must update file with existing data, but leave empty one alone
    res = ds.update(merge=True, reobtain_data=True)
    assert_result_count(res, 1, status='ok', type='dataset', action='update')
    assert_result_count(res, 1, status='ok', type='file', action='get')
    ok_file_has_content(opj(ds.path, 'load.dat'), 'light')
    assert_false(ds.repo.file_has_content('novel'))
Exemplo n.º 23
0
def test_run_unexpanded_placeholders(path=None):
    ds = Dataset(path).create()
    cmd = [
        sys.executable, "-c",
        "import sys; open(sys.argv[1], 'w').write(' '.join(sys.argv[2:]))"
    ]

    # It's weird, but for lack of better options, inputs and outputs that don't
    # have matches are available unexpanded.

    with assert_raises(IncompleteResultsError):
        ds.run(cmd + ["arg1", "{inputs}"],
               inputs=["foo*"],
               on_failure="continue")
    assert_repo_status(ds.path)
    ok_file_has_content(op.join(path, "arg1"), "foo*")

    ds.run(cmd + ["arg2", "{outputs}"], outputs=["bar*"])
    assert_repo_status(ds.path)
    ok_file_has_content(op.join(path, "arg2"), "bar*")

    ds.run(cmd + ["arg3", "{outputs[1]}"], outputs=["foo*", "bar"])
    ok_file_has_content(op.join(path, "arg3"), "bar")
Exemplo n.º 24
0
def test_update_volatile_subds(originpath=None, otherpath=None, destpath=None):
    origin = Dataset(originpath).create()
    repo = origin.repo
    if repo.is_managed_branch() and repo.git_annex_version <= "8.20201129":
        # Fails before git-annex's fd161da2c (adjustTree: Consider submodule
        # deletions, 2021-01-06).
        raise SkipTest(
            "On adjusted branch, test requires fix in more recent git-annex")
    ds = install(source=originpath,
                 path=destpath,
                 result_xfm='datasets',
                 return_type='item-or-list')
    # as a submodule
    sname = 'subm 1'
    osm1 = origin.create(sname)
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    # nothing without a merge, no inappropriate magic
    assert_not_in(sname, ds.subdatasets(result_xfm='relpaths'))
    assert_result_count(ds.update(merge=True),
                        1,
                        action='update',
                        status='ok',
                        type='dataset')
    # and we should be able to do update with recursive invocation
    assert_result_count(ds.update(merge=True, recursive=True),
                        1,
                        action='update',
                        status='ok',
                        type='dataset')
    # known, and placeholder exists
    assert_in(sname, ds.subdatasets(result_xfm='relpaths'))
    ok_(exists(opj(ds.path, sname)))

    # remove from origin
    origin.remove(sname, reckless='availability')
    assert_result_count(ds.update(merge=True),
                        1,
                        action='update',
                        status='ok',
                        type='dataset')
    # gone locally, wasn't checked out
    assert_not_in(sname, ds.subdatasets(result_xfm='relpaths'))
    assert_false(exists(opj(ds.path, sname)))

    # re-introduce at origin
    osm1 = origin.create(sname)
    create_tree(osm1.path, {'load.dat': 'heavy'})
    origin.save(opj(osm1.path, 'load.dat'))
    assert_result_count(ds.update(merge=True),
                        1,
                        action='update',
                        status='ok',
                        type='dataset')
    # grab new content of uninstall subdataset, right away
    ds.get(opj(ds.path, sname, 'load.dat'))
    ok_file_has_content(opj(ds.path, sname, 'load.dat'), 'heavy')

    # modify ds and subds at origin
    create_tree(origin.path, {'mike': 'this', sname: {'probe': 'little'}})
    origin.save(recursive=True)
    assert_repo_status(origin.path)

    # updates for both datasets should come down the pipe
    assert_result_count(ds.update(merge=True, recursive=True),
                        2,
                        action='update',
                        status='ok',
                        type='dataset')
    assert_repo_status(ds.path)

    # now remove just-installed subdataset from origin again
    origin.remove(sname, reckless='kill')
    assert_not_in(sname, origin.subdatasets(result_xfm='relpaths'))
    assert_in(sname, ds.subdatasets(result_xfm='relpaths'))
    # merge should disconnect the installed subdataset, but leave the actual
    # ex-subdataset alone
    assert_result_count(ds.update(merge=True, recursive=True),
                        1,
                        action='update',
                        type='dataset')
    assert_not_in(sname, ds.subdatasets(result_xfm='relpaths'))
    ok_file_has_content(opj(ds.path, sname, 'load.dat'), 'heavy')
    ok_(Dataset(opj(ds.path, sname)).is_installed())

    # now remove the now disconnected subdataset for further tests
    remove(dataset=op.join(ds.path, sname), reckless='kill')
    assert_repo_status(ds.path)

    # new separate subdataset, not within the origin dataset
    otherds = Dataset(otherpath).create()
    # install separate dataset as a submodule
    ds.install(source=otherds.path, path='other')
    create_tree(otherds.path, {'brand': 'new'})
    otherds.save()
    assert_repo_status(otherds.path)
    # pull in changes
    res = ds.update(merge=True, recursive=True)
    assert_result_count(res, 2, status='ok', action='update', type='dataset')
    # the next is to check for #2858
    assert_repo_status(ds.path)
Exemplo n.º 25
0
def test_update_simple(origin=None, src_path=None, dst_path=None):
    ca = dict(result_renderer='disabled')
    # a remote dataset with a subdataset underneath
    origds = Dataset(origin).create(**ca)
    # naming is weird, but a legacy artifact
    _ = origds.create('subm 1', **ca)
    _ = origds.create('2', **ca)

    # prepare src
    source = install(src_path, source=origin, recursive=True)
    # forget we cloned it by removing remote, which should lead to
    # setting tracking branch to target:
    source.repo.remove_remote(DEFAULT_REMOTE)
    # also forget the declared absolute location of the submodules, and turn them
    # relative to this/a clone
    for sub in source.subdatasets(result_xfm=lambda x: x['gitmodule_name']):
        source.subdatasets(path=sub,
                           set_property=[('url', './{}'.format(sub))])

    # dataset without sibling will not need updates
    assert_status('notneeded', source.update())
    # deprecation message doesn't ruin things
    assert_status('notneeded', source.update(fetch_all=True))
    # but error if unknown sibling is given
    assert_status('impossible',
                  source.update(sibling='funky', on_failure='ignore'))

    # get a clone to update later on:
    dest = install(dst_path, source=src_path, recursive=True)
    # test setup done;
    # assert all fine
    assert_repo_status(dst_path)
    assert_repo_status(src_path)

    # update yields nothing => up-to-date
    assert_status('ok', dest.update())
    assert_repo_status(dst_path)

    # modify remote:
    with open(opj(src_path, "update.txt"), "w") as f:
        f.write("Additional content")
    source.save(path="update.txt", message="Added update.txt")
    assert_repo_status(src_path)

    # update without `merge` only fetches:
    assert_status('ok', dest.update())
    # modification is not known to active branch:
    assert_not_in("update.txt",
                  dest.repo.get_files(dest.repo.get_active_branch()))
    # modification is known to branch <default remote>/<default branch>
    assert_in("update.txt",
              dest.repo.get_files(DEFAULT_REMOTE + "/" + DEFAULT_BRANCH))

    # merge:
    assert_status('ok', dest.update(merge=True))
    # modification is now known to active branch:
    assert_in("update.txt", dest.repo.get_files(dest.repo.get_active_branch()))
    # it's known to annex, but has no content yet:
    annexprops = dest.repo.get_file_annexinfo("update.txt",
                                              eval_availability=True)
    annexprops['key']  # blows if unknown
    eq_(False, annexprops['has_content'])

    # check subdataset path constraints, baseline (parent + 2 subds)
    assert_result_count(dest.update(recursive=True),
                        3,
                        status='ok',
                        type='dataset')
    # no recursion and invalid path still updates the parent
    res = dest.update(path='whatever')
    assert_result_count(res, 1, status='ok', type='dataset')
    assert_result_count(res, 1, status='ok', path=dest.path)
    # invalid path with recursion also does
    res = dest.update(recursive=True, path='whatever')
    assert_result_count(res, 1, status='ok', type='dataset')
    assert_result_count(res, 1, status='ok', path=dest.path)
    # valid path and no recursion only updates the parent
    res = dest.update(path='subm 1')
    assert_result_count(res, 1, status='ok', type='dataset')
    assert_result_count(res, 1, status='ok', path=dest.path)
    # valid path and recursion updates matching
    res = dest.update(recursive=True, path='subm 1')
    assert_result_count(res, 2, status='ok', type='dataset')
    assert_result_count(res, 1, status='ok', path=dest.path)
    assert_result_count(res, 1, status='ok', path=str(dest.pathobj / 'subm 1'))
    # additional invalid path doesn't hurt
    res = dest.update(recursive=True, path=['subm 1', 'mike'])
    assert_result_count(res, 2, status='ok', type='dataset')
    # full match
    res = dest.update(recursive=True, path=['subm 1', '2'])
    assert_result_count(res, 3, status='ok', type='dataset')

    # test that update doesn't crash if we specify only a single path (submod) to
    # operate on
    with chpwd(dest.path):
        # in 0.11.x it would be a single result since "pwd" dataset is not
        # considered, and would be relative path (as specified).
        # In 0.12.0 - it would include implicit pwd dataset, and paths would be absolute
        res_update = update(path=['subm 1'], recursive=True)
        assert_result_count(res_update, 2)
        for p in dest.path, str(dest.pathobj / 'subm 1'):
            assert_in_results(res_update,
                              path=p,
                              action='update',
                              status='ok',
                              type='dataset')

        # and with merge we would also try to save (but there would be no changes)
        res_merge = update(path=['subm 1'], recursive=True, merge=True)
        assert_result_count(res_merge, 2, action='update')
        # 2 of "updates" really.
        assert_in_results(res_merge,
                          action='update',
                          status='ok',
                          type='dataset')
        assert_in_results(res_merge,
                          action='save',
                          status='notneeded',
                          type='dataset')

    # smoke-test if recursive update doesn't fail if submodule is removed
    # and that we can run it from within a dataset without providing it
    # explicitly
    assert_result_count(dest.remove('subm 1'),
                        1,
                        status='ok',
                        action='remove',
                        path=opj(dest.path, 'subm 1'))
    with chpwd(dest.path):
        assert_result_count(update(recursive=True),
                            2,
                            status='ok',
                            type='dataset')
    assert_result_count(dest.update(merge=True, recursive=True),
                        2,
                        action='update',
                        status='ok',
                        type='dataset')

    # and now test recursive update with merging in differences
    create_tree(opj(source.path, '2'), {'load.dat': 'heavy'})
    source.save(opj('2', 'load.dat'),
                message="saving changes within subm2",
                recursive=True)
    assert_result_count(dest.update(merge=True, recursive=True),
                        2,
                        action='update',
                        status='ok',
                        type='dataset')
    # and now we can get new file
    dest.get(opj('2', 'load.dat'))
    ok_file_has_content(opj(dest.path, '2', 'load.dat'), 'heavy')
Exemplo n.º 26
0
def check_push(annex, src_path, dst_path):
    # prepare src
    src = Dataset(src_path).create(annex=annex)
    src_repo = src.repo
    # push should not add branches to the local dataset
    orig_branches = src_repo.get_branches()
    assert_not_in('synced/' + DEFAULT_BRANCH, orig_branches)

    res = src.push(on_failure='ignore')
    assert_result_count(res, 1)
    assert_in_results(
        res,
        status='impossible',
        message='No push target given, and none could be auto-detected, '
        'please specify via --to')
    eq_(orig_branches, src_repo.get_branches())
    # target sibling
    target = mk_push_target(src, 'target', dst_path, annex=annex)
    eq_(orig_branches, src_repo.get_branches())

    res = src.push(to="target")
    eq_(orig_branches, src_repo.get_branches())
    assert_result_count(res, 2 if annex else 1)
    assert_in_results(res,
                      action='publish',
                      status='ok',
                      target='target',
                      refspec=DEFAULT_REFSPEC,
                      operations=['new-branch'])

    assert_repo_status(src_repo, annex=annex)
    eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)),
        list(src_repo.get_branch_commits_(DEFAULT_BRANCH)))

    # configure a default merge/upstream target
    src.config.set('branch.{}.remote'.format(DEFAULT_BRANCH),
                   'target',
                   scope='local')
    src.config.set('branch.{}.merge'.format(DEFAULT_BRANCH),
                   DEFAULT_BRANCH,
                   scope='local')

    # don't fail when doing it again, no explicit target specification
    # needed anymore
    res = src.push()
    eq_(orig_branches, src_repo.get_branches())
    # and nothing is pushed
    assert_status('notneeded', res)

    assert_repo_status(src_repo, annex=annex)
    eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)),
        list(src_repo.get_branch_commits_(DEFAULT_BRANCH)))

    # some modification:
    (src.pathobj / 'test_mod_file').write_text("Some additional stuff.")
    src.save(to_git=True, message="Modified.")
    (src.pathobj / 'test_mod_annex_file').write_text("Heavy stuff.")
    src.save(to_git=not annex, message="Modified again.")
    assert_repo_status(src_repo, annex=annex)

    # we could say since='HEAD~2' to make things fast, or we are lazy
    # and say since='^' to indicate the state of the tracking remote
    # which is the same, because we made to commits since the last push.
    res = src.push(to='target', since="^", jobs=2)
    assert_in_results(
        res,
        action='publish',
        status='ok',
        target='target',
        refspec=DEFAULT_REFSPEC,
        # we get to see what happened
        operations=['fast-forward'])
    if annex:
        # we got to see the copy result for the annexed files
        assert_in_results(res,
                          action='copy',
                          status='ok',
                          path=str(src.pathobj / 'test_mod_annex_file'))
        # we published, so we can drop and reobtain
        ok_(src_repo.file_has_content('test_mod_annex_file'))
        src_repo.drop('test_mod_annex_file')
        ok_(not src_repo.file_has_content('test_mod_annex_file'))
        src_repo.get('test_mod_annex_file')
        ok_(src_repo.file_has_content('test_mod_annex_file'))
        ok_file_has_content(src_repo.pathobj / 'test_mod_annex_file',
                            'Heavy stuff.')

    eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)),
        list(src_repo.get_branch_commits_(DEFAULT_BRANCH)))
    if not (annex and src_repo.is_managed_branch()):
        # the following doesn't make sense in managed branches, because
        # a commit that could be amended is no longer the last commit
        # of a branch after a sync has happened (which did happen
        # during the last push above

        # amend and change commit msg in order to test for force push:
        src_repo.commit("amended", options=['--amend'])
        # push should be rejected (non-fast-forward):
        res = src.push(to='target', since='HEAD~2', on_failure='ignore')
        # fails before even touching the annex branch
        assert_in_results(res,
                          action='publish',
                          status='error',
                          target='target',
                          refspec=DEFAULT_REFSPEC,
                          operations=['rejected', 'error'])
        # push with force=True works:
        res = src.push(to='target', since='HEAD~2', force='gitpush')
        assert_in_results(res,
                          action='publish',
                          status='ok',
                          target='target',
                          refspec=DEFAULT_REFSPEC,
                          operations=['forced-update'])
        eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)),
            list(src_repo.get_branch_commits_(DEFAULT_BRANCH)))

    # we do not have more branches than we had in the beginning
    # in particular no 'synced/<default branch>'
    eq_(orig_branches, src_repo.get_branches())
Exemplo n.º 27
0
def test_with_tempfile_content(f=None):
    ok_file_has_content(f, "testtest")
    ok_file_has_content(f, "test*", re_=True)
Exemplo n.º 28
0
def test_push_recursive(origin_path=None,
                        src_path=None,
                        dst_top=None,
                        dst_sub=None,
                        dst_subnoannex=None,
                        dst_subsub=None):
    # dataset with two submodules and one subsubmodule
    origin = Dataset(origin_path).create()
    origin_subm1 = origin.create('sub m')
    origin_subm1.create('subsub m')
    origin.create('subm noannex', annex=False)
    origin.save()
    assert_repo_status(origin.path)
    # prepare src as a fresh clone with all subdatasets checkout out recursively
    # running on a clone should make the test scenario more different than
    # test_push(), even for the pieces that should be identical
    top = Clone.__call__(source=origin.path, path=src_path)
    subs = top.get('.', recursive=True, get_data=False, result_xfm='datasets')
    # order for '.' should not be relied upon, so sort by path
    sub, subsub, subnoannex = sorted(subs, key=lambda ds: ds.path)

    target_top = mk_push_target(top, 'target', dst_top, annex=True)
    # subdatasets have no remote yet, so recursive publishing should fail:
    res = top.push(to="target", recursive=True, on_failure='ignore')
    check_datasets_order(res)
    assert_in_results(res,
                      path=top.path,
                      type='dataset',
                      refspec=DEFAULT_REFSPEC,
                      operations=['new-branch'],
                      action='publish',
                      status='ok',
                      target='target')
    for d in (sub, subsub, subnoannex):
        assert_in_results(res,
                          status='error',
                          type='dataset',
                          path=d.path,
                          message=("Unknown target sibling '%s'.", 'target'))
    # now fix that and set up targets for the submodules
    target_sub = mk_push_target(sub, 'target', dst_sub, annex=True)
    target_subnoannex = mk_push_target(subnoannex,
                                       'target',
                                       dst_subnoannex,
                                       annex=False)
    target_subsub = mk_push_target(subsub, 'target', dst_subsub, annex=True)

    # and same push call as above
    res = top.push(to="target", recursive=True)
    check_datasets_order(res)
    # topds skipped
    assert_in_results(res,
                      path=top.path,
                      type='dataset',
                      action='publish',
                      status='notneeded',
                      target='target')
    # the rest pushed
    for d in (sub, subsub, subnoannex):
        assert_in_results(res,
                          status='ok',
                          type='dataset',
                          path=d.path,
                          refspec=DEFAULT_REFSPEC)
    # all corresponding branches match across all datasets
    for s, d in zip(
        (top, sub, subnoannex, subsub),
        (target_top, target_sub, target_subnoannex, target_subsub)):
        eq_(list(s.repo.get_branch_commits_(DEFAULT_BRANCH)),
            list(d.get_branch_commits_(DEFAULT_BRANCH)))
        if s != subnoannex:
            eq_(list(s.repo.get_branch_commits_("git-annex")),
                list(d.get_branch_commits_("git-annex")))

    # rerun should not result in further pushes of the default branch
    res = top.push(to="target", recursive=True)
    check_datasets_order(res)
    assert_not_in_results(res, status='ok', refspec=DEFAULT_REFSPEC)
    assert_in_results(res, status='notneeded', refspec=DEFAULT_REFSPEC)

    # now annex a file in subsub
    test_copy_file = subsub.pathobj / 'test_mod_annex_file'
    test_copy_file.write_text("Heavy stuff.")
    # save all the way up
    assert_status(('ok', 'notneeded'),
                  top.save(message='subsub got something', recursive=True))
    assert_repo_status(top.path)
    # publish straight up, should be smart by default
    res = top.push(to="target", recursive=True)
    check_datasets_order(res)
    # we see 3 out of 4 datasets pushed (sub noannex was left unchanged)
    for d in (top, sub, subsub):
        assert_in_results(res,
                          status='ok',
                          type='dataset',
                          path=d.path,
                          refspec=DEFAULT_REFSPEC)
    # file content copied too
    assert_in_results(res,
                      action='copy',
                      status='ok',
                      path=str(test_copy_file))
    # verify it is accessible, drop and bring back
    assert_status('ok', top.drop(str(test_copy_file)))
    ok_(not subsub.repo.file_has_content('test_mod_annex_file'))
    top.get(test_copy_file)
    ok_file_has_content(test_copy_file, 'Heavy stuff.')

    # make two modification
    (sub.pathobj / 'test_mod_annex_file').write_text('annex')
    (subnoannex.pathobj / 'test_mod_file').write_text('git')
    # save separately
    top.save(sub.pathobj, message='annexadd', recursive=True)
    top.save(subnoannex.pathobj, message='gitadd', recursive=True)
    # now only publish the latter one
    res = top.push(to="target", since=DEFAULT_BRANCH + '~1', recursive=True)
    # nothing copied, no reports on the other modification
    assert_not_in_results(res, action='copy')
    assert_not_in_results(res, path=sub.path)
    for d in (top, subnoannex):
        assert_in_results(res,
                          status='ok',
                          type='dataset',
                          path=d.path,
                          refspec=DEFAULT_REFSPEC)
    # an unconditional push should now pick up the remaining changes
    res = top.push(to="target", recursive=True)
    assert_in_results(res,
                      action='copy',
                      status='ok',
                      path=str(sub.pathobj / 'test_mod_annex_file'))
    assert_in_results(res,
                      status='ok',
                      type='dataset',
                      path=sub.path,
                      refspec=DEFAULT_REFSPEC)
    for d in (top, subnoannex, subsub):
        assert_in_results(res,
                          status='notneeded',
                          type='dataset',
                          path=d.path,
                          refspec=DEFAULT_REFSPEC)

    # if noannex target gets some annex, we still should not fail to push
    target_subnoannex.call_git(['annex', 'init'])
    # just to ensure that we do need something to push
    (subnoannex.pathobj / "newfile").write_text("content")
    subnoannex.save()
    res = subnoannex.push(to="target")
    assert_in_results(res, status='ok', type='dataset')
Exemplo n.º 29
0
def test_something(path=None, new_home=None):
    # will refuse to work on dataset without a dataset
    assert_raises(ValueError, ConfigManager, source='branch')
    # now read the example config
    cfg = ConfigManager(GitRepo(opj(path, 'ds'), create=True), source='branch')
    assert_equal(len(cfg), 5)
    assert_in('something.user', cfg)
    # multi-value
    assert_equal(len(cfg['something.user']), 2)
    assert_equal(cfg['something.user'],
                 ('name=Jane Doe', '[email protected]'))

    assert_true(cfg.has_section('something'))
    assert_false(cfg.has_section('somethingelse'))
    assert_equal(sorted(cfg.sections()),
                 [u'onemore.complicated の beast with.dot', 'something'])
    assert_true(cfg.has_option('something', 'user'))
    assert_false(cfg.has_option('something', 'us?er'))
    assert_false(cfg.has_option('some?thing', 'user'))
    assert_equal(sorted(cfg.options('something')),
                 ['empty', 'myint', 'novalue', 'user'])
    assert_equal(cfg.options(u'onemore.complicated の beast with.dot'),
                 ['findme'])

    assert_equal(sorted(cfg.items()),
                 [(u'onemore.complicated の beast with.dot.findme', '5.0'),
                  ('something.empty', ''), ('something.myint', '3'),
                  ('something.novalue', None),
                  ('something.user',
                   ('name=Jane Doe', '[email protected]'))])
    assert_equal(sorted(cfg.items('something')),
                 [('something.empty', ''), ('something.myint', '3'),
                  ('something.novalue', None),
                  ('something.user',
                   ('name=Jane Doe', '[email protected]'))])

    # by default get last value only
    assert_equal(cfg.get('something.user'), '[email protected]')
    # but can get all values
    assert_equal(cfg.get('something.user', get_all=True),
                 ('name=Jane Doe', '[email protected]'))
    assert_raises(KeyError, cfg.__getitem__, 'somedthing.user')
    assert_equal(
        cfg.getfloat(u'onemore.complicated の beast with.dot', 'findme'), 5.0)
    assert_equal(cfg.getint('something', 'myint'), 3)
    assert_equal(cfg.getbool('something', 'myint'), True)
    # git demands a key without value at all to be used as a flag, thus True
    assert_equal(cfg.getbool('something', 'novalue'), True)
    assert_equal(cfg.get('something.novalue'), None)
    # empty value is False
    assert_equal(cfg.getbool('something', 'empty'), False)
    assert_equal(cfg.get('something.empty'), '')
    assert_equal(cfg.getbool('doesnot', 'exist', default=True), True)
    assert_raises(TypeError, cfg.getbool, 'something', 'user')

    # gitpython-style access
    assert_equal(cfg.get('something.myint'),
                 cfg.get_value('something', 'myint'))
    assert_equal(cfg.get_value('doesnot', 'exist', default='oohaaa'), 'oohaaa')
    # weird, but that is how it is
    assert_raises(KeyError, cfg.get_value, 'doesnot', 'exist', default=None)

    # modification follows
    cfg.add('something.new', 'の')
    assert_equal(cfg.get('something.new'), u'の')
    # sections are added on demand
    cfg.add('unheard.of', 'fame')
    assert_true(cfg.has_section('unheard.of'))
    comp = cfg.items('something')
    cfg.rename_section('something', 'this')
    assert_true(cfg.has_section('this'))
    assert_false(cfg.has_section('something'))
    # direct comparison would fail, because of section prefix
    assert_equal(len(cfg.items('this')), len(comp))
    # fail if no such section
    with swallow_logs():
        assert_raises(CommandError, cfg.rename_section, 'nothere',
                      'irrelevant')
    assert_true(cfg.has_option('this', 'myint'))
    cfg.unset('this.myint')
    assert_false(cfg.has_option('this', 'myint'))

    # batch a changes
    cfg.add('mike.wants.to', 'know', reload=False)
    assert_false('mike.wants.to' in cfg)
    cfg.add('mike.wants.to', 'eat')
    assert_true('mike.wants.to' in cfg)
    assert_equal(len(cfg['mike.wants.to']), 2)

    # set a new one:
    cfg.set('mike.should.have', 'known')
    assert_in('mike.should.have', cfg)
    assert_equal(cfg['mike.should.have'], 'known')
    # set an existing one:
    cfg.set('mike.should.have', 'known better')
    assert_equal(cfg['mike.should.have'], 'known better')
    # set, while there are several matching ones already:
    cfg.add('mike.should.have', 'a meal')
    assert_equal(len(cfg['mike.should.have']), 2)
    # raises with force=False
    assert_raises(CommandError,
                  cfg.set,
                  'mike.should.have',
                  'a beer',
                  force=False)
    assert_equal(len(cfg['mike.should.have']), 2)
    # replaces all matching ones with force=True
    cfg.set('mike.should.have', 'a beer', force=True)
    assert_equal(cfg['mike.should.have'], 'a beer')

    # test deprecated 'where' interface and old 'dataset' (not 'branch') value
    # TODO: remove along with the removal of deprecated 'where'
    cfg.set('mike.should.have', 'wasknown', where='dataset')
    assert_equal(cfg['mike.should.have'], 'wasknown')
    assert_equal(cfg.get_from_source('dataset', 'mike.should.have'),
                 'wasknown')

    # fails unknown location
    assert_raises(ValueError, cfg.add, 'somesuch', 'shit', scope='umpalumpa')

    # very carefully test non-local config
    # so carefully that even in case of bad weather Yarik doesn't find some
    # lame datalad unittest sections in his precious ~/.gitconfig

    # Note: An easier way to test this, would be to just set GIT_CONFIG_GLOBAL
    # to point somewhere else. However, this is not supported by git before
    # 2.32. Hence, stick with changed HOME in this test, but be sure to unset a
    # possible GIT_CONFIG_GLOBAL in addition.

    patched_env = os.environ.copy()
    patched_env.pop('GIT_CONFIG_GLOBAL', None)
    patched_env.update(get_home_envvars(new_home))
    with patch.dict('os.environ',
                    dict(patched_env, DATALAD_SNEAKY_ADDITION='ignore'),
                    clear=True):
        global_gitconfig = opj(new_home, '.gitconfig')
        assert (not exists(global_gitconfig))
        globalcfg = ConfigManager()
        assert_not_in('datalad.unittest.youcan', globalcfg)
        assert_in('datalad.sneaky.addition', globalcfg)
        cfg.add('datalad.unittest.youcan', 'removeme', scope='global')
        assert (exists(global_gitconfig))
        # it did not go into the dataset's config!
        assert_not_in('datalad.unittest.youcan', cfg)
        # does not monitor additions!
        globalcfg.reload(force=True)
        assert_in('datalad.unittest.youcan', globalcfg)
        with swallow_logs():
            assert_raises(CommandError,
                          globalcfg.unset,
                          'datalad.unittest.youcan',
                          scope='local')
        assert (globalcfg.has_section('datalad.unittest'))
        globalcfg.unset('datalad.unittest.youcan', scope='global')
        # but after we unset the only value -- that section is no longer listed
        assert (not globalcfg.has_section('datalad.unittest'))
        assert_not_in('datalad.unittest.youcan', globalcfg)
        ok_file_has_content(global_gitconfig, "")

    cfg = ConfigManager(Dataset(opj(path, 'ds')),
                        source='branch',
                        overrides={'datalad.godgiven': True})
    assert_equal(cfg.get('datalad.godgiven'), True)
    # setter has no effect
    cfg.set('datalad.godgiven', 'false')
    assert_equal(cfg.get('datalad.godgiven'), True)
Exemplo n.º 30
0
def _test_expiring_token(outdir):
    url = "s3://datalad-test0-versioned/1version-removed-recreated.txt"
    outpath = op.join(outdir, "output")
    providers = get_test_providers(url, reload=True)
    downloader = providers.get_provider(url).get_downloader(url)

    from time import (
        sleep,
        time,
    )

    from datalad.downloaders.credentials import (
        AWS_S3,
        CompositeCredential,
        UserPassword,
    )
    from datalad.support.keyring_ import MemoryKeyring
    from datalad.tests.utils_pytest import ok_file_has_content
    credential = downloader.credential  # AWS_S3('datalad-test-s3')

    # We will replace credential with a CompositeCredential which will
    # mint new token after expiration
    # crap -- duration must be no shorter than 900, i.e. 15 minutes --
    # too long to wait for a test!
    duration = 900

    generated = []

    def _gen_session_token(_, key_id=None, secret_id=None):
        from boto.sts.connection import STSConnection
        sts = STSConnection(aws_access_key_id=key_id,
                            aws_secret_access_key=secret_id)
        # Note: without force_new=True it will not re-request a token and would
        # just return old one if not expired yet.  Testing below might fail
        # if not entirely new
        token = sts.get_session_token(duration=duration, force_new=True)
        generated.append(token)
        return dict(key_id=token.access_key,
                    secret_id=token.secret_key,
                    session=token.session_token,
                    expiration=token.expiration)

    class CustomS3(CompositeCredential):
        _CREDENTIAL_CLASSES = (UserPassword, AWS_S3)
        _CREDENTIAL_ADAPTERS = (_gen_session_token, )

    keyring = MemoryKeyring()
    downloader.credential = new_credential = CustomS3("testexpire",
                                                      keyring=keyring)
    # but reuse our existing credential for the first part:
    downloader.credential._credentials[0] = credential

    # now downloader must use the token generator
    assert not generated  # since we have not called it yet

    # do it twice so we reuse session and test that we do not
    # re-mint a new token
    t0 = time()  # not exactly when we generated, might be a bit racy?
    for i in range(2):
        downloader.download(url, outpath)
        ok_file_has_content(outpath, "version1")
        os.unlink(outpath)
    # but we should have asked for a new token only once
    assert len(generated) == 1
    assert downloader.credential is new_credential  # we did not reset it

    # sleep for a while and now do a number of downloads during which
    # token should get refreshed etc

    # -3 since we have offset -2 hardcoded to refresh a bit ahead of time
    to_sleep = duration - (time() - t0) - 3
    print("Sleeping for %d seconds. Token should expire at %s" %
          (to_sleep, generated[0].expiration))
    sleep(to_sleep)

    for i in range(5):
        # should have not been regenerated yet
        # -2 is our hardcoded buffer
        if time() - t0 < duration - 2:
            assert len(generated) == 1
        downloader.download(url, outpath)
        ok_file_has_content(outpath, "version1")
        os.unlink(outpath)
        sleep(1)
    assert len(generated) == 2