Пример #1
0
def test_GitRepo_ssh_pull(remote_path, repo_path):
    from datalad import ssh_manager

    remote_repo = GitRepo(remote_path, create=True)
    url = "ssh://localhost" + op.abspath(remote_path)
    socket_path = op.join(ssh_manager.socket_dir, get_connection_hash('localhost'))
    repo = GitRepo(repo_path, create=True)
    repo.add_remote("ssh-remote", url)

    # modify remote:
    remote_repo.checkout("ssh-test", ['-b'])
    with open(op.join(remote_repo.path, "ssh_testfile.dat"), "w") as f:
        f.write("whatever")
    remote_repo.add("ssh_testfile.dat")
    remote_repo.commit("ssh_testfile.dat added.")

    # file is not locally known yet:
    assert_not_in("ssh_testfile.dat", repo.get_indexed_files())

    # pull changes:
    repo.pull(remote="ssh-remote", refspec=remote_repo.get_active_branch())
    ok_clean_git(repo.path, annex=False)

    # the connection is known to the SSH manager, since fetch() requested it:
    assert_in(socket_path, ssh_manager._connections)
    # and socket was created:
    ok_(op.exists(socket_path))

    # we actually pulled the changes
    assert_in("ssh_testfile.dat", repo.get_indexed_files())
Пример #2
0
def test_install_known_subdataset(src, path):

    # get the superdataset:
    ds = install(path, source=src)
    # subdataset not installed:
    subds = Dataset(opj(path, 'subm 1'))
    assert_false(subds.is_installed())
    assert_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths'))
    assert_not_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths'))
    # install it:
    ds.install('subm 1')
    ok_(subds.is_installed())
    ok_(AnnexRepo.is_valid_repo(subds.path, allow_noninitialized=False))
    # Verify that it is the correct submodule installed and not
    # new repository initiated
    eq_(set(subds.repo.get_indexed_files()),
        {'test.dat', 'INFO.txt', 'test-annex.dat'})
    assert_not_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths'))
    assert_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths'))

    # now, get the data by reinstalling with -g:
    ok_(subds.repo.file_has_content('test-annex.dat') is False)
    with chpwd(ds.path):
        result = get(path='subm 1', dataset=os.curdir)
        assert_in_results(result, path=opj(subds.path, 'test-annex.dat'))
        ok_(subds.repo.file_has_content('test-annex.dat') is True)
        ok_(subds.is_installed())
Пример #3
0
def test_audio(path):
    ds = Dataset(path).create()
    ds.config.add('datalad.metadata.nativetype', 'audio', where='dataset')
    copy(
        opj(dirname(dirname(dirname(__file__))), 'tests', 'data', 'audio.mp3'),
        path)
    ds.add('.')
    ok_clean_git(ds.path)
    res = ds.aggregate_metadata()
    assert_status('ok', res)
    res = ds.metadata('audio.mp3')
    assert_result_count(res, 1)

    # from this extractor
    meta = res[0]['metadata']['audio']
    for k, v in target.items():
        eq_(meta[k], v)

    assert_in('@context', meta)

    uniques = ds.metadata(
        reporton='datasets', return_type='item-or-list')['metadata']['datalad_unique_content_properties']
    # test file has it, but uniques have it blanked out, because the extractor considers it worthless
    # for discovering whole datasets
    assert_in('bitrate', meta)
    eq_(uniques['audio']['bitrate'], None)

    # 'date' field carries not value, hence gets exclude from the unique report
    assert_in('date', meta)
    assert(not meta['date'])
    assert_not_in('date', uniques['audio'])
Пример #4
0
def test_add_mimetypes(path):
    ds = Dataset(path).create(force=True)
    ds.repo.add('.gitattributes')
    ds.repo.commit('added attributes to git explicitly')
    # now test that those files will go into git/annex correspondingly
    # WINDOWS FAILURE NEXT
    __not_tested__ = ds.save(['file.txt', 'empty'])
    assert_repo_status(path, untracked=['file2.txt'])
    # But we should be able to force adding file to annex when desired
    ds.save('file2.txt', to_git=False)
    # check annex file status
    annexinfo = ds.repo.get_content_annexinfo()
    for path, in_annex in (
           # Empty one considered to be  application/octet-stream
           # i.e. non-text
           ('empty', True),
           ('file.txt', False),
           ('file2.txt', True)):
        # low-level API report -> repo path reference, no ds path
        p = ds.repo.pathobj / path
        assert_in(p, annexinfo)
        if in_annex:
            assert_in('key', annexinfo[p], p)
        else:
            assert_not_in('key', annexinfo[p], p)
Пример #5
0
def test_publish_file_handle(origin, src_path, dst_path):

    # prepare src
    source = install(path=src_path, source=origin, recursive=True)
    # TODO: For now, circumnavigate the detached head issue.
    # Figure out, what to do.
    for subds in source.get_dataset_handles(recursive=True):
        AnnexRepo(opj(src_path, subds), init=True, create=True).git_checkout("master")
    source.repo.get('test-annex.dat')

    # create plain git at target:
    target = AnnexRepo(dst_path, create=True)
    # actually not needed for this test, but provide same setup as
    # everywhere else:
    target.git_checkout("TMP", "-b")
    source.repo.git_remote_add("target", dst_path)

    # directly publish a file handle, not the dataset itself:
    res = publish(dataset=source, dest="target", path="test-annex.dat")
    eq_(res, opj(source.path, 'test-annex.dat'))

    # only file was published, not the dataset itself:
    assert_not_in("master", target.git_get_branches())
    eq_(Dataset(dst_path).get_dataset_handles(), [])
    assert_not_in("test.dat", target.git_get_files())

    # content is now available from 'target':
    assert_in("target",
              source.repo.annex_whereis('test-annex.dat',
                                        output="descriptions"))
    source.repo.annex_drop('test-annex.dat')
    eq_(source.repo.file_has_content(['test-annex.dat']), [False])
    source.repo._run_annex_command('get', annex_options=['test-annex.dat',
                                                         '--from=target'])
    eq_(source.repo.file_has_content(['test-annex.dat']), [True])
Пример #6
0
def test_rerun_script(path):
    ds = Dataset(path).create()
    ds.run("echo a >foo")
    ds.run(["touch", "bar"], message='BAR', sidecar=True)
    # a run record sidecar file was added with the last commit
    assert(any(d['path'].startswith(opj(ds.path, '.datalad', 'runinfo'))
               for d in ds.rerun(report=True, return_type='item-or-list')['diff']))
    bar_hexsha = ds.repo.get_hexsha()

    script_file = opj(path, "commands.sh")

    ds.rerun(script=script_file)
    ok_exists(script_file)
    with open(script_file) as sf:
        lines = sf.readlines()
        assert_in("touch bar\n", lines)
        # The commit message is there too.
        assert_in("# BAR\n", lines)
        assert_in("# (record: {})\n".format(bar_hexsha), lines)
        assert_not_in("echo a >foo\n", lines)

    ds.rerun(since="", script=script_file)
    with open(script_file) as sf:
        lines = sf.readlines()
        assert_in("touch bar\n", lines)
        # Automatic commit messages aren't included.
        assert_not_in("# echo a >foo\n", lines)
        assert_in("echo a >foo\n", lines)

    # --script=- writes to stdout.
    with patch("sys.stdout", new_callable=StringIO) as cmout:
        ds.rerun(script="-")
        assert_in("touch bar",
                  cmout.getvalue().splitlines())
Пример #7
0
def test_surprise_subds(path):
    # https://github.com/datalad/datalad/issues/3139
    ds = create(path, force=True)
    # a lonely repo without any commit
    somerepo = AnnexRepo(path=op.join(path, 'd1', 'subrepo'), create=True)
    # a proper subdataset
    subds = create(op.join(path, 'd2', 'subds'), force=True)
    # save non-recursive
    ds.save(recursive=False)
    # the content of both subds and subrepo are not added to their
    # respective parent as no --recursive was given
    assert_repo_status(subds.path, untracked=['subfile'])
    assert_repo_status(somerepo.path, untracked=['subfile'])
    # however, while the subdataset is added (and reported as modified
    # because it content is still untracked) the subrepo
    # cannot be added (it has no commit)
    # worse: its untracked file add been added to the superdataset
    # XXX the next conditional really says: if the subrepo is not in an
    # adjusted branch: #datalad/3178 (that would have a commit)
    if not on_windows:
        assert_repo_status(ds.path, modified=['d2/subds'])
        assert_in(ds.repo.pathobj / 'd1' / 'subrepo' / 'subfile',
                  ds.repo.get_content_info())
    # with proper subdatasets, all evil is gone
    assert_not_in(ds.repo.pathobj / 'd2' / 'subds' / 'subfile',
                  ds.repo.get_content_info())
Пример #8
0
def test_install_crcns(tdir, ds_path):
    with chpwd(tdir):
        with swallow_logs(new_level=logging.INFO) as cml:
            install("all-nonrecursive", source='///')
            # since we didn't log decorations such as log level atm while
            # swallowing so lets check if exit code is returned or not
            # I will test both
            assert_not_in('ERROR', cml.out)
            # below one must not fail alone! ;)
            assert_not_in('with exit code', cml.out)

        # should not hang in infinite recursion
        with chpwd('all-nonrecursive'):
            get("crcns")
        ok_(exists(_path_("all-nonrecursive/crcns/.git/config")))
        # and we could repeat installation and get the same result
        ds1 = install(_path_("all-nonrecursive/crcns"))
        ds2 = Dataset('all-nonrecursive').install('crcns')
        ok_(ds1.is_installed())
        eq_(ds1, ds2)
        eq_(ds1.path, ds2.path)  # to make sure they are a single dataset

    # again, but into existing dataset:
    ds = create(ds_path)
    crcns = ds.install("///crcns")
    ok_(crcns.is_installed())
    eq_(crcns.path, opj(ds_path, "crcns"))
    assert_in(crcns.path, ds.get_subdatasets(absolute=True))
Пример #9
0
def test_dont_trip_over_missing_subds(path):
    ds1 = Dataset(opj(path, 'ds1')).create()
    ds2 = Dataset(opj(path, 'ds2')).create()
    subds2 = ds1.install(
        source=ds2.path, path='subds2',
        result_xfm='datasets', return_type='item-or-list')
    assert_true(subds2.is_installed())
    assert_in('subds2', ds1.subdatasets(result_xfm='relpaths'))
    subds2.uninstall()
    assert_in('subds2', ds1.subdatasets(result_xfm='relpaths'))
    assert_false(subds2.is_installed())
    # see if it wants to talk to github (and fail), or if it trips over something
    # before
    assert_raises(gh.BadCredentialsException,
        ds1.create_sibling_github, 'bogus', recursive=True,
        github_login='******')
    # inject remote config prior run
    assert_not_in('github', ds1.repo.get_remotes())
    # fail on existing
    ds1.repo.add_remote('github', 'http://nothere')
    assert_raises(ValueError,
        ds1.create_sibling_github, 'bogus', recursive=True,
        github_login='******')
    # talk to github when existing is OK
    assert_raises(gh.BadCredentialsException,
        ds1.create_sibling_github, 'bogus', recursive=True,
        github_login='******', existing='reconfigure')
    # return happy emptiness when all is skipped
    assert_equal(
        ds1.create_sibling_github(
            'bogus', recursive=True,
            github_login='******', existing='skip'),
        [])
Пример #10
0
def test_cached_load_document(tdir):

    target_schema = {'buga': 'duga'}
    cache_filename = opj(tdir, "crap")

    with open(cache_filename, 'wb') as f:
        f.write("CRAPNOTPICKLED".encode())

    with patch('datalad.support.network.get_url_cache_filename',
               return_value=cache_filename):
        with patch('pyld.jsonld.load_document', return_value=target_schema), \
            swallow_logs(new_level=logging.WARNING) as cml:
            schema = _cached_load_document("http://schema.org/")
            assert_equal(schema, target_schema)
            cml.assert_logged("cannot load cache from", level="WARNING")

        # but now pickled one should have been saved
        assert_equal(pickle.load(open(cache_filename, 'rb')), target_schema)

        # and if we reload it -- it should be all fine without warnings
        # should come from cache so no need to overload load_document
        with swallow_logs(new_level=logging.WARNING) as cml:
            schema = _cached_load_document("http://schema.org/")
            assert_equal(schema, target_schema)
            assert_not_in("cannot load cache from", cml.out)
Пример #11
0
def test_basic_metadata(path):
    ds = Dataset(opj(path, 'origin'))
    meta = get_metadata(ds)
    assert_equal(sorted(meta[0].keys()),
                 ['@context', 'dcterms:conformsTo'])
    ds.create(force=True, save=False)
    # with subdataset
    sub = ds.create('sub', force=True)
    ds.save()
    meta = get_metadata(ds)
    assert_equal(
        sorted(meta[0].keys()),
        ['@context', '@id', 'availableFrom', 'dcterms:conformsTo',
         'dcterms:modified', 'type', 'version'])
    assert_equal(meta[0]['type'], 'Dataset')
    # clone and get relationship info in metadata
    sibling = install(opj(path, 'sibling'), source=opj(path, 'origin'))
    sibling_meta = get_metadata(sibling)
    assert_equal(sibling_meta[0]['@id'], ds.id)
    # origin should learn about the clone
    sibling.repo.push(remote='origin', refspec='git-annex')
    meta = get_metadata(ds)
    assert_equal([m['@id'] for m in meta[0]['availableFrom']],
                 [m['@id'] for m in sibling_meta[0]['availableFrom']])
    meta = get_metadata(ds, guess_type=True)
    # without aggregation there is not trace of subdatasets in the metadata
    assert_not_in('dcterms:hasPart', meta[0])
Пример #12
0
def test_script_shims():
    runner = Runner()
    for script in [
        'datalad',
        'git-annex-remote-datalad-archives',
        'git-annex-remote-datalad']:
        if not on_windows:
            # those must be available for execution, and should not contain
            which, _ = runner(['which', script])
            # test if there is no easy install shim in there
            with open(which.rstrip()) as f:
                content = f.read()
        else:
            from distutils.spawn import find_executable
            content = find_executable(script)
        assert_not_in('EASY', content) # NOTHING easy should be there
        assert_not_in('pkg_resources', content)

        # and let's check that it is our script
        out, err = runner([script, '--version'])
        version = (out + err).splitlines()[0].split(' ', 1)[1]
        # we can get git and non git .dev version... so for now
        # relax
        get_numeric_portion = lambda v: [x for x in v.split('.') if x.isdigit()]
        # extract numeric portion
        assert get_numeric_portion(version) # that my lambda is correctish
        assert_equal(get_numeric_portion(__version__),
                     get_numeric_portion(version))
Пример #13
0
def test_install_list(path, top_path):

    # we want to be able to install several things, if these are known
    # (no 'source' allowed). Therefore first toplevel:
    ds = install(top_path, source=path, recursive=False)
    assert_not_in('annex.hardlink', ds.config)
    ok_(ds.is_installed())
    sub1 = Dataset(opj(top_path, 'subm 1'))
    sub2 = Dataset(opj(top_path, '2'))
    ok_(not sub1.is_installed())
    ok_(not sub2.is_installed())

    # fails, when `source` is passed:
    assert_raises(ValueError, ds.install,
                  path=['subm 1', '2'],
                  source='something')

    # now should work:
    result = ds.install(path=['subm 1', '2'], result_xfm='paths')
    ok_(sub1.is_installed())
    ok_(sub2.is_installed())
    eq_(set(result), {sub1.path, sub2.path})
    # and if we request it again via get, result should be empty
    get_result = ds.get(path=['subm 1', '2'], get_data=False)
    assert_status('notneeded', get_result)
Пример #14
0
def test_notclone_known_subdataset(src, path):
    # get the superdataset:
    ds = clone(src, path,
               result_xfm='datasets', return_type='item-or-list')

    # subdataset not installed:
    subds = Dataset(opj(path, 'subm 1'))
    assert_false(subds.is_installed())
    assert_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths'))
    assert_not_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths'))
    # clone is not meaningful
    res = ds.clone('subm 1', on_failure='ignore')
    assert_status('error', res)
    assert_message('Failed to clone from any candidate source URL. '
                   'Encountered errors per each url were: %s',
                   res)
    # get does the job
    res = ds.get(path='subm 1', get_data=False)
    assert_status('ok', res)
    ok_(subds.is_installed())
    ok_(AnnexRepo.is_valid_repo(subds.path, allow_noninitialized=False))
    # Verify that it is the correct submodule installed and not
    # new repository initiated
    eq_(set(subds.repo.get_indexed_files()),
        {'test.dat', 'INFO.txt', 'test-annex.dat'})
    assert_not_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths'))
    assert_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths'))
Пример #15
0
def test_install_recursive_repeat(src, path):
    subsub_src = Dataset(opj(src, 'sub 1', 'subsub')).create(force=True)
    sub1_src = Dataset(opj(src, 'sub 1')).create(force=True)
    sub2_src = Dataset(opj(src, 'sub 2')).create(force=True)
    top_src = Dataset(src).create(force=True)
    top_src.add('.', recursive=True)
    ok_clean_git(top_src.path)

    # install top level:
    top_ds = install(path, source=src)
    ok_(top_ds.is_installed() is True)
    sub1 = Dataset(opj(path, 'sub 1'))
    ok_(sub1.is_installed() is False)
    sub2 = Dataset(opj(path, 'sub 2'))
    ok_(sub2.is_installed() is False)
    subsub = Dataset(opj(path, 'sub 1', 'subsub'))
    ok_(subsub.is_installed() is False)

    # install again, now with data and recursive, but recursion_limit 1:
    result = get(os.curdir, dataset=path, recursive=True, recursion_limit=1,
                 result_xfm='datasets')
    # top-level dataset was not reobtained
    assert_not_in(top_ds, result)
    assert_in(sub1, result)
    assert_in(sub2, result)
    assert_not_in(subsub, result)
    ok_(top_ds.repo.file_has_content('top_file.txt') is True)
    ok_(sub1.repo.file_has_content('sub1file.txt') is True)
    ok_(sub2.repo.file_has_content('sub2file.txt') is True)

    # install sub1 again, recursively and with data
    top_ds.install('sub 1', recursive=True, get_data=True)
    ok_(subsub.is_installed())
    ok_(subsub.repo.file_has_content('subsubfile.txt'))
Пример #16
0
def test_alter_interface_docs_for_api():
    alt = alter_interface_docs_for_api(demo_doc)
    alt_l = alt.split('\n')
    # dedented
    assert_false(alt_l[0].startswith(' '))
    assert_false(alt_l[-1].startswith(' '))
    assert_not_in('CMD', alt)
    assert_not_in('Command line', alt)
Пример #17
0
def test_update_simple(origin, src_path, dst_path):

    # prepare src
    source = install(src_path, source=origin, recursive=True)[0]
    # forget we cloned it (provide no 'origin' anymore), which should lead to
    # setting tracking branch to target:
    source.repo.remove_remote("origin")

    # get a clone to update later on:
    dest = install(dst_path, source=src_path, recursive=True)[0]
    # test setup done;
    # assert all fine
    ok_clean_git(dst_path)
    ok_clean_git(src_path)

    # update yields nothing => up-to-date
    # TODO: how to test besides not failing?
    dest.update()
    ok_clean_git(dst_path)

    # modify origin:
    with open(opj(src_path, "update.txt"), "w") as f:
        f.write("Additional content")
    source.add(path="update.txt")
    source.save("Added update.txt")
    ok_clean_git(src_path)

    # update without `merge` only fetches:
    dest.update()
    # modification is not known to active branch:
    assert_not_in("update.txt",
                  dest.repo.get_files(dest.repo.get_active_branch()))
    # modification is known to branch origin/master
    assert_in("update.txt", dest.repo.get_files("origin/master"))

    # merge:
    dest.update(merge=True)
    # modification is now known to active branch:
    assert_in("update.txt",
              dest.repo.get_files(dest.repo.get_active_branch()))
    # it's known to annex, but has no content yet:
    dest.repo.get_file_key("update.txt")  # raises if unknown
    eq_([False], dest.repo.file_has_content(["update.txt"]))

    # smoke-test if recursive update doesn't fail if submodule is removed
    dest.remove('subm 1')
    dest.update(recursive=True)
    dest.update(merge=True, recursive=True)

    # and now test recursive update with merging in differences
    create_tree(opj(source.path, 'subm 2'), {'load.dat': 'heavy'})
    source.save(message="saving changes within subm2",
                recursive=True, all_changes=True)
    dest.update(merge=True, recursive=True)
    # and now we can get new file
    dest.get('subm 2/load.dat')
    ok_file_has_content(opj(dest.path, 'subm 2', 'load.dat'), 'heavy')
Пример #18
0
def test_alter_interface_docs_for_cmdline():
    alt = alter_interface_docs_for_cmdline(demo_doc)
    alt_l = alt.split('\n')
    # dedented
    assert_false(alt_l[0].startswith(' '))
    assert_false(alt_l[-1].startswith(' '))
    assert_not_in('PY', alt)
    assert_not_in('Python', alt)
    # args
    altarg = alter_interface_docs_for_cmdline(demo_argdoc)
Пример #19
0
def test_from_env():
    cfg = ConfigManager()
    assert_not_in('datalad.crazy.cfg', cfg)
    os.environ['DATALAD_CRAZY_CFG'] = 'impossibletoguess'
    cfg.reload()
    assert_in('datalad.crazy.cfg', cfg)
    assert_equal(cfg['datalad.crazy.cfg'], 'impossibletoguess')
    # not in dataset-only mode
    cfg = ConfigManager(Dataset('nowhere'), dataset_only=True)
    assert_not_in('datalad.crazy.cfg', cfg)
Пример #20
0
def test_version():
    stdout, stderr = run_main(['--version'], expect_stderr=True)

    # and output should contain our version, copyright, license

    # https://hg.python.org/cpython/file/default/Doc/whatsnew/3.4.rst#l1952
    out = stdout if sys.version_info >= (3, 4) else stderr
    ok_startswith(out, 'datalad %s\n' % datalad.__version__)
    # since https://github.com/datalad/datalad/pull/2733 no license in --version
    assert_not_in("Copyright", out)
    assert_not_in("Permission is hereby granted", out)
Пример #21
0
def test_update_simple(origin, src_path, dst_path):

    # prepare src
    source = install(path=src_path, source=origin, recursive=True)
    # TODO: For now, circumnavigate the detached head issue.
    # Figure out, what to do.
    for subds in source.get_dataset_handles(recursive=True):
        AnnexRepo(opj(src_path, subds), init=True,
                  create=True).git_checkout("master")
    # forget we cloned it (provide no 'origin' anymore), which should lead to
    # setting tracking branch to target:
    source.repo.git_remote_remove("origin")

    # get a clone to update later on:
    dest = install(path=dst_path, source=src_path, recursive=True)
    # TODO: For now, circumnavigate the detached head issue.
    # Figure out, what to do.
    for subds in dest.get_dataset_handles(recursive=True):
        AnnexRepo(opj(dst_path, subds), init=True,
                  create=True).git_checkout("master")
    # test setup done;
    # assert all fine
    ok_clean_git(dst_path)
    ok_clean_git(src_path)

    # update yields nothing => up-to-date
    # TODO: how to test besides not failing?
    dest.update()
    ok_clean_git(dst_path)

    # modify origin:
    with open(opj(src_path, "update.txt"), "w") as f:
        f.write("Additional content")
    source.install(path="update.txt")
    source.remember_state("Added update.txt")
    ok_clean_git(src_path)

    # update without `merge` only fetches:
    dest.update()
    # modification is not known to active branch:
    assert_not_in("update.txt",
                  dest.repo.git_get_files(dest.repo.git_get_active_branch()))
    # modification is known to branch origin/master
    assert_in("update.txt", dest.repo.git_get_files("origin/master"))

    # merge:
    dest.update(merge=True)
    # modification is now known to active branch:
    assert_in("update.txt",
              dest.repo.git_get_files(dest.repo.git_get_active_branch()))
    # it's known to annex, but has no content yet:
    dest.repo.get_file_key("update.txt")  # raises if unknown
    eq_([False], dest.repo.file_has_content(["update.txt"]))
Пример #22
0
def test_install_into_dataset(source, top_path):
    ds = install(top_path)
    subds = ds.install(path="sub", source=source)
    ok_(subds.is_installed())
    # sub is clean:
    ok_clean_git(subds.path, annex=False)
    # top is not:
    assert_raises(AssertionError, ok_clean_git, ds.path, annex=False)
    # unless committed the subds should not show up in the parent
    # this is the same behavior that 'git submodule status' implements
    assert_not_in('sub', ds.get_dataset_handles())
    ds.remember_state('addsub')
    assert_in('sub', ds.get_dataset_handles())
Пример #23
0
def test_alter_interface_docs_for_cmdline():
    alt = alter_interface_docs_for_cmdline(demo_doc)
    alt_l = alt.split('\n')
    # dedented
    assert_false(alt_l[0].startswith(' '))
    assert_false(alt_l[-1].startswith(' '))
    assert_not_in('PY', alt)
    assert_not_in('CMD', alt)
    assert_not_in('REFLOW', alt)
    assert_in('a b', alt)
    assert_in('not\n   reflowed', alt)
    assert_in("Something for the cmdline only Multiline!", alt)
    # args
    altarg = alter_interface_docs_for_cmdline(demo_argdoc)
    # RST role markup
    eq_(alter_interface_docs_for_cmdline(':murks:`me and my buddies`'),
        'me and my buddies')
    # spread across lines
    eq_(alter_interface_docs_for_cmdline(':term:`Barbara\nStreisand`'),
        'Barbara\nStreisand')
    # multiple on one line
    eq_(alter_interface_docs_for_cmdline(
        ':term:`one` bla bla :term:`two` bla'),
        'one bla bla two bla')

    altpd = alter_interface_docs_for_cmdline(demo_paramdoc)
    assert_not_in('python', altpd)
    assert_in('inbetween', altpd)
    assert_in('appended', altpd)
    assert_in('cmdline', altpd)
Пример #24
0
        def decorated_test2(ds):
            # we get a Dataset instance
            assert_is_instance(ds, Dataset)
            # it's a clone in a temp. location, not within the cache
            assert_not_in(cache_dir, ds.pathobj.parents)
            assert_result_count(ds.siblings(), 1, type="sibling",
                                name=DEFAULT_REMOTE,
                                url=str(cache_dir / name_in_cache))
            here = ds.config.get("annex.uuid")
            origin = ds.config.get(f"remote.{DEFAULT_REMOTE}.annex-uuid")
            where = ds.repo.whereis(str(annexed_file))
            assert_in(here, where)
            assert_in(origin, where)

            return ds.pathobj, ds.repo.pathobj
Пример #25
0
def test_crazy_cfg(path):
    cfg = ConfigManager(GitRepo(opj(path, 'ds'), create=True),
                        source='dataset')
    assert_in('crazy.padry', cfg)
    # make sure crazy config is not read when in local mode
    cfg = ConfigManager(Dataset(opj(path, 'ds')), source='local')
    assert_not_in('crazy.padry', cfg)
    # it will make it in in 'any' mode though
    cfg = ConfigManager(Dataset(opj(path, 'ds')), source='any')
    assert_in('crazy.padry', cfg)
    # typos in the source mode arg will not have silent side-effects
    assert_raises(ValueError,
                  ConfigManager,
                  Dataset(opj(path, 'ds')),
                  source='locale')
Пример #26
0
def test_dataset_systemglobal_mode(path):
    ds = create(path)
    # any sensible (and also our CI) test environment(s) should have this
    assert_in('user.name', ds.config)
    # from .datalad/config
    assert_in('datalad.dataset.id', ds.config)
    # from .git/config
    assert_in('annex.version', ds.config)
    with chpwd(path):
        # now check that no config from a random dataset at PWD is picked up
        # if not dataset instance was provided
        cfg = ConfigManager(dataset=None, source='any')
        assert_in('user.name', cfg)
        assert_not_in('datalad.dataset.id', cfg)
        assert_not_in('annex.version', cfg)
Пример #27
0
def test_bare(path):
    # can we handle a bare repo?
    gr = GitRepo(path, create=True, bare=True)
    # do we read the correct local config?
    assert_in(gr.pathobj / 'config', gr.config._stores['git']['files'])
    # any sensible (and also our CI) test environment(s) should have this
    assert_in('user.name', gr.config)
    # not set something that wasn't there
    obscure_key = 'sec.reallyobscurename!@@.key'
    assert_not_in(obscure_key, gr.config)
    # to the local config, which is easily accessible
    gr.config.set(obscure_key, 'myvalue', where='local')
    assert_equal(gr.config.get(obscure_key), 'myvalue')
    # now make sure the config is where we think it is
    assert_in(obscure_key.split('.')[1], (gr.pathobj / 'config').read_text())
Пример #28
0
def test_cmdline_example_to_rst():
    # don't puke on nothing
    out = fmt.cmdline_example_to_rst(SIO(''))
    out.seek(0)
    ok_startswith(out.read(), '.. AUTO-GENERATED')
    out = fmt.cmdline_example_to_rst(SIO(''), ref='dummy')
    out.seek(0)
    assert_in('.. dummy:', out.read())
    # full scale test
    out = fmt.cmdline_example_to_rst(SIO(demo_example), ref='mydemo')
    out.seek(0)
    out_text = out.read()
    assert_in('.. code-block:: sh', out_text)
    assert_not_in('shame', out_text)  # no SKIP'ed
    assert_not_in('#', out_text)  # no comments
Пример #29
0
def test_add_subdataset(path):
    subds = create(opj(path, 'dir'), force=True)
    ds = create(path, force=True)
    ok_(subds.repo.dirty)
    ok_(ds.repo.dirty)
    assert_not_in('dir', ds.get_subdatasets())
    # without a base dataset the next is interpreted as "add everything
    # in subds to subds"
    add(subds.path)
    ok_clean_git(subds.path)
    assert_not_in('dir', ds.get_subdatasets())
    # but with a base directory we add the dataset subds as a subdataset
    # to ds
    ds.add(subds.path)
    assert_in('dir', ds.get_subdatasets())
Пример #30
0
def test_eval_results_plus_build_doc():

    # test docs

    # docstring was build already:
    with swallow_logs(new_level=logging.DEBUG) as cml:
        TestUtils().__call__(1)
        assert_not_in("Building doc for", cml.out)
    # docstring accessible both ways:
    doc1 = Dataset.fake_command.__doc__
    doc2 = TestUtils().__call__.__doc__

    # docstring was built from Test_Util's definition:
    assert_equal(doc1, doc2)
    assert_in("TestUtil's fake command", doc1)
    assert_in("Parameters", doc1)
    assert_in("It's a number", doc1)

    # docstring also contains eval_result's parameters:
    assert_in("result_filter", doc1)
    assert_in("return_type", doc1)
    assert_in("list", doc1)
    assert_in("None", doc1)
    assert_in("return value behavior", doc1)
    assert_in("dictionary is passed", doc1)

    # test eval_results is able to determine the call, a method of which it is
    # decorating:
    with swallow_logs(new_level=logging.DEBUG) as cml:
        Dataset('/does/not/matter').fake_command(3)
        assert_in("Determined class of decorated function: {}"
                  "".format(TestUtils().__class__), cml.out)

    # test results:
    result = TestUtils().__call__(2)
    assert_equal(len(list(result)), 2)
    result = Dataset('/does/not/matter').fake_command(3)
    assert_equal(len(list(result)), 3)

    # test absent side-effect of popping eval_defaults
    kwargs = dict(return_type='list')
    TestUtils().__call__(2, **kwargs)
    assert_equal(list(kwargs), ['return_type'])

    # test signature:
    from inspect import getargspec
    assert_equal(getargspec(Dataset.fake_command)[0], ['number', 'dataset'])
    assert_equal(getargspec(TestUtils.__call__)[0], ['number', 'dataset'])
Пример #31
0
def test_eval_results_plus_build_doc():

    # test docs

    # docstring was build already:
    with swallow_logs(new_level=logging.DEBUG) as cml:
        TestUtils().__call__(1)
        assert_not_in("Building doc for", cml.out)
    # docstring accessible both ways:
    doc1 = Dataset.fake_command.__doc__
    doc2 = TestUtils().__call__.__doc__

    # docstring was built from Test_Util's definition:
    assert_equal(doc1, doc2)
    assert_in("TestUtil's fake command", doc1)
    assert_in("Parameters", doc1)
    assert_in("It's a number", doc1)

    # docstring also contains eval_result's parameters:
    assert_in("result_filter", doc1)
    assert_in("return_type", doc1)
    assert_in("list", doc1)
    assert_in("None", doc1)
    assert_in("return value behavior", doc1)
    assert_in("dictionary is passed", doc1)

    # test eval_results is able to determine the call, a method of which it is
    # decorating:
    with swallow_logs(new_level=logging.DEBUG) as cml:
        Dataset('/does/not/matter').fake_command(3)
        assert_in("Determined class of decorated function: {}"
                  "".format(TestUtils().__class__), cml.out)

    # test results:
    result = TestUtils().__call__(2)
    assert_equal(len(list(result)), 2)
    result = Dataset('/does/not/matter').fake_command(3)
    assert_equal(len(list(result)), 3)

    # test absent side-effect of popping eval_defaults
    kwargs = dict(return_type='list')
    TestUtils().__call__(2, **kwargs)
    assert_equal(list(kwargs), ['return_type'])

    # test signature:
    from inspect import getargspec
    assert_equal(getargspec(Dataset.fake_command)[0], ['number', 'dataset'])
    assert_equal(getargspec(TestUtils.__call__)[0], ['number', 'dataset'])
Пример #32
0
def _test_binary_data(host, store, dspath):
    # make sure, special remote deals with binary data and doesn't
    # accidentally involve any decode/encode etc.

    dspath = Path(dspath)
    store = Path(store)

    url = "https://github.com/datalad/example-dicom-functional/blob/master/dicoms/MR.1.3.46.670589.11.38317.5.0.4476.2014042516042547586"
    file = "dicomfile"
    ds = Dataset(dspath).create()
    ds.download_url(url, path=file, message="Add DICOM file from github")
    assert_repo_status(ds.path)

    # set up store:
    io = SSHRemoteIO(host) if host else LocalIO()
    if host:
        store_url = "ria+ssh://{host}{path}".format(host=host,
                                                    path=store)
    else:
        store_url = "ria+{}".format(store.as_uri())

    create_store(io, store, '1')
    create_ds_in_store(io, store, ds.id, '2', '1')

    # add special remote
    init_opts = common_init_opts + ['url={}'.format(store_url)]
    ds.repo.init_remote('store', options=init_opts)

    # actual data transfer (both directions)
    # Note, that we intentionally call annex commands instead of
    # datalad-publish/-get here. We are testing an annex-special-remote.

    store_uuid = ds.siblings(name='store',
                             return_type='item-or-list')['annex-uuid']
    here_uuid = ds.siblings(name='here',
                            return_type='item-or-list')['annex-uuid']

    known_sources = ds.repo.whereis(str(file))
    assert_in(here_uuid, known_sources)
    assert_not_in(store_uuid, known_sources)
    ds.repo.call_annex(['move', str(file), '--to', 'store'])
    known_sources = ds.repo.whereis(str(file))
    assert_not_in(here_uuid, known_sources)
    assert_in(store_uuid, known_sources)
    ds.repo.call_annex(['get', str(file), '--from', 'store'])
    known_sources = ds.repo.whereis(str(file))
    assert_in(here_uuid, known_sources)
    assert_in(store_uuid, known_sources)
Пример #33
0
def test_call_from_parser_result_filter():
    class DummyOne(Interface):
        @staticmethod
        def __call__(**kwargs):
            yield kwargs

    # call_from_parser doesn't add result_filter to the keyword arguments
    assert_not_in("result_filter", DummyOne.call_from_parser(_new_args())[0])
    # with dissolution of _OLD_STYLE_COMMANDS and just relying on having
    # @eval_results, no result_filter is added, since those commands are
    # not guaranteed to return/yield any record suitable for filtering.
    # The effect is the same -- those "common" options are not really applicable
    # to Interface's which do not return/yield expected records
    assert_not_in(
        "result_filter",
        DummyOne.call_from_parser(_new_args(common_report_type="dataset"))[0])
Пример #34
0
def test_gh1597(path):
    ds = Dataset(path).create()
    sub = ds.create('sub')
    res = ds.subdatasets()
    assert_result_count(res, 1, path=sub.path)
    # now modify .gitmodules with another command
    ds.subdatasets(contains=sub.path, set_property=[('this', 'that')])
    # now modify low-level
    with open(op.join(ds.path, '.gitmodules'), 'a') as f:
        f.write('\n')
    assert_repo_status(ds.path, modified=['.gitmodules'])
    ds.save('.gitmodules')
    # must not come under annex mangement
    assert_not_in(
        'key',
        ds.repo.annexstatus(paths=['.gitmodules']).popitem()[1])
Пример #35
0
def test_cmdline_example_to_rst():
    # don't puke on nothing
    out = fmt.cmdline_example_to_rst(SIO(''))
    out.seek(0)
    ok_startswith(out.read(), '.. AUTO-GENERATED')
    out = fmt.cmdline_example_to_rst(SIO(''), ref='dummy')
    out.seek(0)
    assert_in('.. dummy:', out.read())
    # full scale test
    out = fmt.cmdline_example_to_rst(
        SIO(demo_example), ref='mydemo')
    out.seek(0)
    out_text = out.read()
    assert_in('.. code-block:: sh', out_text)
    assert_not_in('shame', out_text)  # no SKIP'ed
    assert_not_in('#', out_text)      # no comments
Пример #36
0
def test_target_ssh_recursive(origin, src_path, target_path):

    # prepare src
    source = install(src_path, source=origin, recursive=True)[0]

    sub1 = Dataset(opj(src_path, "subm 1"))
    sub2 = Dataset(opj(src_path, "subm 2"))

    for flat in False, True:
        target_path_ = target_dir_tpl = target_path + "-" + str(flat)

        if flat:
            target_dir_tpl += "/%NAME"
            sep = '-'
        else:
            sep = os.path.sep

        if flat:
            # now that create_sibling also does fetch -- the related problem
            # so skipping this early
            raise SkipTest('TODO: Make publish work for flat datasets, it currently breaks')

        remote_name = 'remote-' + str(flat)
        # TODO: there is f.ckup with paths so assert_create fails ATM
        # And let's test without explicit dataset being provided
        with chpwd(source.path):
            #assert_create_sshwebserver(
            create_sibling(
                target=remote_name,
                sshurl="ssh://localhost" + target_path_,
                target_dir=target_dir_tpl,
                recursive=True,
                ui=True)

        # raise if git repos were not created
        for suffix in [sep + 'subm 1', sep + 'subm 2', '']:
            target_dir = opj(target_path_, basename(src_path) if flat else "").rstrip(os.path.sep) + suffix
            # raise if git repos were not created
            GitRepo(target_dir, create=False)

            _test_correct_publish(target_dir, rootds=not suffix, flat=flat)

        for repo in [source.repo, sub1.repo, sub2.repo]:
            assert_not_in("local_target", repo.get_remotes())

        # now, push should work:
        publish(dataset=source, to=remote_name)
Пример #37
0
def _test_guess_dot_git(annex, path, url, tdir):
    repo = (AnnexRepo if annex else GitRepo)(path, create=True)
    repo.add('file.txt', commit=True, git=not annex)

    # we need to prepare to be served via http, otherwise it must fail
    with swallow_logs() as cml:
        assert_raises(GitCommandError, install, path=tdir, source=url)
    ok_(not exists(tdir))

    Runner(cwd=path)(['git', 'update-server-info'])

    with swallow_logs() as cml:
        installed = install(tdir, source=url)
        assert_not_in("Failed to get annex.uuid", cml.out)
    eq_(realpath(installed.path), realpath(tdir))
    ok_(exists(tdir))
    ok_clean_git(tdir, annex=annex)
Пример #38
0
def test_gh1597_simpler(path):
    ds = Dataset(path).create()
    # same goes for .gitattributes
    with open(op.join(ds.path, '.gitignore'), 'a') as f:
        f.write('*.swp\n')
    ds.save('.gitignore')
    assert_repo_status(ds.path)
    # put .gitattributes in some subdir and add all, should also go into Git
    attrfile = op.join('subdir', '.gitattributes')
    ds.repo.set_gitattributes([('*', dict(mycustomthing='this'))], attrfile)
    assert_repo_status(ds.path, untracked=[attrfile], untracked_mode='all')
    ds.save()
    assert_repo_status(ds.path)
    # no annex key, not in annex
    assert_not_in(
        'key',
        ds.repo.get_content_annexinfo([ut.Path(attrfile)]).popitem()[1])
Пример #39
0
def _test_guess_dot_git(annex, path, url, tdir):
    repo = (AnnexRepo if annex else GitRepo)(path, create=True)
    repo.add('file.txt', commit=True, git=not annex)

    # we need to prepare to be served via http, otherwise it must fail
    with swallow_logs() as cml:
        assert_raises(IncompleteResultsError, install, path=tdir, source=url)
    ok_(not exists(tdir))

    Runner(cwd=path)(['git', 'update-server-info'])

    with swallow_logs() as cml:
        installed = install(tdir, source=url)
        assert_not_in("Failed to get annex.uuid", cml.out)
    eq_(realpath(installed.path), realpath(tdir))
    ok_(exists(tdir))
    ok_clean_git(tdir, annex=annex)
def check_integration1(login,
                       keyring,
                       path,
                       organization=None,
                       kwargs={},
                       oauthtokens=None):
    kwargs = kwargs.copy()
    if organization:
        kwargs['github_organization'] = organization

    ds = Dataset(path).create()
    if oauthtokens:
        for oauthtoken in assure_list(oauthtokens):
            ds.config.add('hub.oauthtoken', oauthtoken, where='local')

    # so we do not pick up local repo configuration/token
    repo_name = 'test_integration1'
    with chpwd(path):
        # ATM all the github goodness does not care about "this dataset"
        # so force "process wide" cfg to pick up our defined above oauthtoken
        cfg.reload(force=True)
        # everything works just nice, no conflicts etc
        res = ds.create_sibling_github(repo_name, **kwargs)

        if organization:
            url_fmt = 'https://{login}@github.com/{organization}/{repo_name}.git'
        else:
            url_fmt = 'https://github.com/{login}/{repo_name}.git'
        eq_(res, [(ds, url_fmt.format(**locals()), False)])

        # but if we rerun - should kaboom since already has this sibling:
        with assert_raises(ValueError) as cme:
            ds.create_sibling_github(repo_name, **kwargs)
        assert_in("already has a configured sibling", str(cme.exception))

        # but we can give it a new name, but it should kaboom since the remote one
        # exists already
        with assert_raises(ValueError) as cme:
            ds.create_sibling_github(repo_name, name="github2", **kwargs)
        assert_in("already exists on", str(cme.exception))
        # we should not leave the broken sibling behind
        assert_not_in('github2', ds.repo.get_remotes())

        # If we ask to reconfigure - should proceed normally
        ds.create_sibling_github(repo_name, existing='reconfigure', **kwargs)
    cfg.reload(force=True)
Пример #41
0
def test_GitRepo_ssh_push(repo_path, remote_path):
    from datalad import ssh_manager

    remote_repo = GitRepo(remote_path, create=True)
    url = "ssh://localhost" + op.abspath(remote_path)
    socket_path = op.join(ssh_manager.socket_dir,
                          get_connection_hash('localhost'))
    repo = GitRepo(repo_path, create=True)
    repo.add_remote("ssh-remote", url)

    # modify local repo:
    repo.checkout("ssh-test", ['-b'])
    with open(op.join(repo.path, "ssh_testfile.dat"), "w") as f:
        f.write("whatever")
    repo.add("ssh_testfile.dat")
    repo.commit("ssh_testfile.dat added.")

    # file is not known to the remote yet:
    assert_not_in("ssh_testfile.dat", remote_repo.get_indexed_files())

    # push changes:
    pushed = repo.push(remote="ssh-remote", refspec="ssh-test")
    # test PushInfo object for
    assert_in("ssh-remote/ssh-test",
              [commit.remote_ref.name for commit in pushed])

    # the connection is known to the SSH manager, since fetch() requested it:
    assert_in(socket_path, ssh_manager._connections)
    # and socket was created:
    ok_(op.exists(socket_path))

    # remote now knows the changes:
    assert_in("ssh-test", remote_repo.get_branches())
    assert_in("ssh_testfile.dat", remote_repo.get_files("ssh-test"))

    # amend to make it require "--force":
    repo.commit("amended", options=['--amend'])
    # push without --force should yield an error:
    pushed = repo.push(remote="ssh-remote", refspec="ssh-test")
    assert_in("[rejected] (non-fast-forward)", pushed[0].summary)
    # now push using force:
    repo.push(remote="ssh-remote", refspec="ssh-test", force=True)
    # correct commit message in remote:
    assert_in("amended",
              list(remote_repo.get_branch_commits('ssh-test'))[-1].summary)
def test_create_osf_simple(path):

    ds = Dataset(path).create(force=True)
    ds.save()

    file1 = Path('ds') / "file1.txt"

    create_results = ds.create_sibling_osf(title="CI dl-create",
                                           name="osf-storage")

    assert_result_count(create_results, 2, status='ok', type='dataset')

    # if we got here, we created something at OSF;
    # make sure, we clean up afterwards
    try:
        # special remote is configured:
        remote_log = ds.repo.call_git(
            ['cat-file', 'blob', 'git-annex:remote.log'])
        assert_in("project={}".format(create_results[0]['id']), remote_log)

        # copy files over
        ds.repo.copy_to('.', "osf-storage")
        whereis = ds.repo.whereis(str(file1))
        here = ds.config.get("annex.uuid")
        # files should be 'here' and on remote end:
        assert_equal(len(whereis), 2)
        assert_in(here, whereis)

        # drop content here
        ds.drop('.')
        whereis = ds.repo.whereis(str(file1))
        # now on remote end only
        assert_equal(len(whereis), 1)
        assert_not_in(here, whereis)

        # and get content again from remote:
        ds.get('.')
        whereis = ds.repo.whereis(str(file1))
        assert_equal(len(whereis), 2)
        assert_in(here, whereis)
    finally:
        # clean remote end:
        cred = get_credentials(allow_interactive=False)
        osf = OSF(**cred)
        delete_project(osf.session, create_results[0]['id'])
Пример #43
0
def check_integration1(login, keyring,
                       path,
                       organization=None,
                       kwargs={},
                       oauthtokens=None):
    kwargs = kwargs.copy()
    if organization:
        kwargs['github_organization'] = organization

    ds = Dataset(path).create()
    if oauthtokens:
        for oauthtoken in assure_list(oauthtokens):
            ds.config.add('hub.oauthtoken', oauthtoken, where='local')

    # so we do not pick up local repo configuration/token
    repo_name = 'test_integration1'
    with chpwd(path):
        # ATM all the github goodness does not care about "this dataset"
        # so force "process wide" cfg to pick up our defined above oauthtoken
        cfg.reload(force=True)
        # everything works just nice, no conflicts etc
        res = ds.create_sibling_github(repo_name, **kwargs)

        if organization:
            url_fmt = 'https://{login}@github.com/{organization}/{repo_name}.git'
        else:
            url_fmt = 'https://github.com/{login}/{repo_name}.git'
        eq_(res, [(ds, url_fmt.format(**locals()), False)])

        # but if we rerun - should kaboom since already has this sibling:
        with assert_raises(ValueError) as cme:
            ds.create_sibling_github(repo_name, **kwargs)
        assert_in("already has a configured sibling", str(cme.exception))

        # but we can give it a new name, but it should kaboom since the remote one
        # exists already
        with assert_raises(ValueError) as cme:
            ds.create_sibling_github(repo_name, name="github2", **kwargs)
        assert_in("already exists on", str(cme.exception))
        # we should not leave the broken sibling behind
        assert_not_in('github2', ds.repo.get_remotes())

        # If we ask to reconfigure - should proceed normally
        ds.create_sibling_github(repo_name, existing='reconfigure', **kwargs)
    cfg.reload(force=True)
Пример #44
0
def test_wtf(path):
    # smoke test for now
    with swallow_outputs() as cmo:
        plugin(['wtf'], dataset=path)
        assert_not_in('Dataset information', cmo.out)
        assert_in('Configuration', cmo.out)
    with chpwd(path):
        with swallow_outputs() as cmo:
            plugin(['wtf'])
            assert_not_in('Dataset information', cmo.out)
            assert_in('Configuration', cmo.out)
    # now with a dataset
    ds = create(path)
    with swallow_outputs() as cmo:
        plugin(['wtf'], dataset=ds.path)
        assert_in('Configuration', cmo.out)
        assert_in('Dataset information', cmo.out)
        assert_in('path: {}'.format(ds.path), cmo.out)
Пример #45
0
def test_save_to_git(path):
    ds = Dataset(path).create()
    create_tree(
        ds.path,
        {
            'file_ingit': 'file_ingit',
            'file_inannex': 'file_inannex',
        }
    )
    ds.repo.save(paths=['file_ingit'], git=True)
    ds.repo.save(paths=['file_inannex'])
    assert_repo_status(ds.repo)
    for f, p in ds.repo.annexstatus().items():
        eq_(p['state'], 'clean')
        if f.match('*ingit'):
            assert_not_in('key', p, f)
        elif f.match('*inannex'):
            assert_in('key', p, f)
Пример #46
0
def test_state(path):
    ds = Dataset.create(path)
    sub = ds.create('sub')
    res = ds.subdatasets()
    assert_result_count(res, 1, path=sub.path)
    # by default we are not reporting any state info
    assert_not_in('state', res[0])
    # uninstall the subdataset
    ds.uninstall('sub')
    # normale 'gone' is "absent"
    assert_false(sub.is_installed())
    assert_result_count(ds.subdatasets(), 1, path=sub.path, state='absent')
    # with directory totally gone also
    os.rmdir(sub.path)
    assert_result_count(ds.subdatasets(), 1, path=sub.path, state='absent')
    # putting dir back, no change
    os.makedirs(sub.path)
    assert_result_count(ds.subdatasets(), 1, path=sub.path, state='absent')
        def decorated_test3(ds):
            # we get a Dataset instance
            assert_is_instance(ds, Dataset)
            # it's a clone in a temp. location, not within the cache
            assert_not_in(cache_dir, ds.pathobj.parents)
            assert_result_count(ds.siblings(), 1, type="sibling",
                                name="origin",
                                url=str(cache_dir / name_in_cache))
            # origin is the same cached dataset, that got this content in
            # decorated_test2 before. Should still be there. But "here" we
            # didn't request it
            here = ds.config.get("annex.uuid")
            origin = ds.config.get("remote.origin.annex-uuid")
            where = ds.repo.whereis(str(annexed_file))
            assert_not_in(here, where)
            assert_in(origin, where)

            return ds.pathobj, ds.repo.pathobj
Пример #48
0
def test_save_to_git(path):
    ds = Dataset(path).create()
    create_tree(
        ds.path,
        {
            'file_ingit': 'file_ingit',
            'file_inannex': 'file_inannex',
        }
    )
    ds.repo.save(paths=['file_ingit'], git=True)
    ds.repo.save(paths=['file_inannex'])
    assert_repo_status(ds.repo)
    for f, p in iteritems(ds.repo.annexstatus()):
        eq_(p['state'], 'clean')
        if f.match('*ingit'):
            assert_not_in('key', p, f)
        elif f.match('*inannex'):
            assert_in('key', p, f)
Пример #49
0
def _test_assert_Xwd_unchanged_ok_chdir(func):
    # Test that we are not masking out other "more important" exceptions

    orig_cwd = os.getcwd()
    orig_pwd = getpwd()

    @assert_cwd_unchanged(ok_to_chdir=True)
    def do_chdir_value_error():
        func(os.pardir)
        return "a value"

    with swallow_logs() as cml:
        eq_(do_chdir_value_error(), "a value")
        eq_(orig_cwd, os.getcwd(),
            "assert_cwd_unchanged didn't return us back to cwd %s" % orig_cwd)
        eq_(orig_pwd, getpwd(),
            "assert_cwd_unchanged didn't return us back to cwd %s" % orig_pwd)
        assert_not_in("Mitigating and changing back", cml.out)
def test_dont_trip_over_missing_subds(path):
    ds1 = Dataset(opj(path, 'ds1')).create()
    ds2 = Dataset(opj(path, 'ds2')).create()
    subds2 = ds1.install(
        source=ds2.path, path='subds2',
        result_xfm='datasets', return_type='item-or-list')
    assert_true(subds2.is_installed())
    assert_in('subds2', ds1.subdatasets(result_xfm='relpaths'))
    subds2.uninstall()
    assert_in('subds2', ds1.subdatasets(result_xfm='relpaths'))
    assert_false(subds2.is_installed())
    # see if it wants to talk to github (and fail), or if it trips over something
    # before
    assert_raises(gh.BadCredentialsException,
        ds1.create_sibling_github, 'bogus', recursive=True,
        github_login='******')
    # inject remote config prior run
    assert_not_in('github', ds1.repo.get_remotes())
    # fail on existing
    ds1.repo.add_remote('github', 'http://nothere')
    assert_in_results(
        ds1.create_sibling_github(
            'bogus', recursive=True,
            github_login='******',
            on_failure='ignore'),
        status='error',
        message=('already has a configured sibling "%s"', 'github'),
    )
    assert_in_results(
        ds1.create_sibling_github(
            'bogus', recursive=True,
            github_login='******',
            existing='reconfigure'),
        status='notneeded',
        message=('already has a configured sibling "%s"', 'github'),
    )
    assert_in_results(
        ds1.create_sibling_github(
            'bogus', recursive=True,
            github_login='******',
            existing='skip',),
        status='notneeded',
        message=('already has a configured sibling "%s"', 'github'),
    )
Пример #51
0
def test_version():
    # we just get a version if not asking for a version of some command
    stdout, stderr = run_main(['--version'], expect_stderr=True)
    eq_(stdout.rstrip(), "datalad %s" % datalad.__version__)

    stdout, stderr = run_main(['clone', '--version'], expect_stderr=True)
    ok_startswith(stdout, 'datalad %s\n' % datalad.__version__)
    # since https://github.com/datalad/datalad/pull/2733 no license in --version
    assert_not_in("Copyright", stdout)
    assert_not_in("Permission is hereby granted", stdout)

    try:
        import datalad_container
    except ImportError:
        pass  # not installed, cannot test with extension
    else:
        stdout, stderr = run_main(['containers-list', '--version'],
                                  expect_stderr=True)
        eq_(stdout, 'datalad_container %s\n' % datalad_container.__version__)
Пример #52
0
def test_gh1597(path):
    if 'APPVEYOR' in os.environ:
        # issue only happens on appveyor, Python itself implodes
        # cannot be reproduced on a real windows box
        raise SkipTest('this test causes appveyor to crash, reason unknown')
    ds = Dataset(path).create()
    sub = ds.create('sub')
    res = ds.subdatasets()
    assert_result_count(res, 1, path=sub.path)
    # now modify .gitmodules with another command
    ds.subdatasets(contains=sub.path, set_property=[('this', 'that')])
    # now modify low-level
    with open(op.join(ds.path, '.gitmodules'), 'a') as f:
        f.write('\n')
    assert_repo_status(ds.path, modified=['.gitmodules'])
    ds.save('.gitmodules')
    # must not come under annex mangement
    assert_not_in('key',
                  ds.repo.annexstatus(paths=['.gitmodules']).popitem()[1])
Пример #53
0
def test_add_local_path(path, local_file):
    ds = Dataset(path).create()
    res = ds.containers_add(name="foobert", url=op.join(local_file, "foo.img"))
    foo_target = op.join(path, ".datalad", "environments", "foobert", "image")
    assert_result_count(res,
                        1,
                        status="ok",
                        type="file",
                        path=foo_target,
                        action="containers_add")
    # We've just copied and added the file.
    assert_not_in(ds.repo.WEB_UUID, ds.repo.whereis(foo_target))

    # We can force the URL to be added. (Note: This works because datalad
    # overrides 'annex.security.allowed-url-schemes' in its tests.)
    ds.containers_add(name="barry",
                      url=get_local_file_url(op.join(local_file, "bar.img")))
    bar_target = op.join(path, ".datalad", "environments", "barry", "image")
    assert_in(ds.repo.WEB_UUID, ds.repo.whereis(bar_target))
Пример #54
0
def test_GitRepo_gitpy_injection(path, path2):

    gr = GitRepo(path, create=True)
    gr._GIT_COMMON_OPTIONS.extend(['test-option'])

    with assert_raises(GitCommandError) as cme:
        gr.repo.git.unknown_git_command()
    assert_in('test-option', exc_str(cme.exception))

    # once set, these option should be persistent across git calls:
    with assert_raises(GitCommandError) as cme:
        gr.repo.git.another_unknown_git_command()
    assert_in('test-option', exc_str(cme.exception))

    # but other repos should not be affected:
    gr2 = GitRepo(path2, create=True)
    with assert_raises(GitCommandError) as cme:
        gr2.repo.git.unknown_git_command()
    assert_not_in('test-option', exc_str(cme.exception))
Пример #55
0
def test_recurseinto(dspath, dest):
    # make fresh dataset hierarchy
    ds = make_demo_hierarchy_datasets(dspath, demo_hierarchy)
    ds.add('.', recursive=True)
    # label intermediate dataset as 'norecurseinto'
    res = Dataset(opj(ds.path,
                      'b')).subdatasets(contains='bb',
                                        set_property=[
                                            ('datalad-recursiveinstall',
                                             'skip')
                                        ])
    assert_result_count(res, 1, path=opj(ds.path, 'b', 'bb'))
    ds.add('b/', recursive=True)
    ok_clean_git(ds.path)

    # recursive install, should skip the entire bb branch
    res = install(source=ds.path,
                  path=dest,
                  recursive=True,
                  result_xfm=None,
                  result_filter=None)
    assert_result_count(res, 5)
    assert_result_count(res, 5, type='dataset')
    # we got the neighbor subdataset
    assert_result_count(res, 1, type='dataset', path=opj(dest, 'b', 'ba'))
    # we did not get the one we wanted to skip
    assert_result_count(res, 0, type='dataset', path=opj(dest, 'b', 'bb'))
    assert_not_in(
        opj(dest, 'b', 'bb'),
        Dataset(dest).subdatasets(fulfilled=True, result_xfm='paths'))
    assert (not Dataset(opj(dest, 'b', 'bb')).is_installed())

    # cleanup
    Dataset(dest).remove(recursive=True)
    assert (not lexists(dest))
    # again but just clone the base, and then get content and grab 'bb'
    # explicitly -- must get it installed
    dest = install(source=ds.path, path=dest)
    res = dest.get(['.', opj('b', 'bb')], get_data=False, recursive=True)
    assert_result_count(res, 8)
    assert_result_count(res, 8, type='dataset')
    assert_result_count(res, 1, type='dataset', path=opj(dest.path, 'b', 'bb'))
    assert (Dataset(opj(dest.path, 'b', 'bb')).is_installed())
def test_nested_metadata(path):
    ds = Dataset(path).create(force=True)
    ds.save()
    ds.aggregate_metadata()
    # BIDS returns participant info as a nested dict for each file in the
    # content metadata. On the dataset-level this should automatically
    # yield a sequence of participant info dicts, without any further action
    # or BIDS-specific configuration
    meta = ds.metadata('.', reporton='datasets',
                       return_type='item-or-list')['metadata']
    for i in zip(
            sorted(
                meta['datalad_unique_content_properties']['bids']['subject'],
                key=lambda x: x['id']),
            sorted([{
                "age(years)": "20-25",
                "id": "03",
                "gender": "female",
                "handedness": "r",
                "hearing_problems_current": "n",
                "language": "english"
            }, {
                "age(years)": "30-35",
                "id": "01",
                "gender": 'n/a',
                "handedness": "r",
                "hearing_problems_current": "n",
                "language": u"русский"
            }],
                   key=lambda x: x['id'])):
        assert_dict_equal(i[0], i[1])
    # we can turn off this kind of auto-summary
    ds.config.add('datalad.metadata.generate-unique-bids',
                  'false',
                  where='dataset')
    ds.aggregate_metadata()
    meta = ds.metadata('.', reporton='datasets',
                       return_type='item-or-list')['metadata']
    # protect next test a little, in case we enhance our core extractor in the future
    # to provide more info
    if 'datalad_unique_content_properties' in meta:
        assert_not_in('bids', meta['datalad_unique_content_properties'])
Пример #57
0
def test_replace_and_relative_sshpath(src_path, dst_path):
    # We need to come up with the path relative to our current home directory
    # https://github.com/datalad/datalad/issues/1653
    dst_relpath = os.path.relpath(dst_path, os.path.expanduser('~'))
    url = 'localhost:%s' % dst_relpath
    ds = Dataset(src_path).create()
    create_tree(ds.path, {'sub.dat': 'lots of data'})
    ds.add('sub.dat')

    ds.create_sibling(url)
    published = ds.publish('.', to='localhost')
    assert_in('sub.dat', published[0])
    # verify that hook runs and there is nothing in stderr
    # since it exits with 0 exit even if there was a problem
    out, err = Runner(cwd=opj(dst_path, '.git'))(_path_('hooks/post-update'))
    assert_false(out)
    assert_false(err)

    # Verify that we could replace and publish no problem
    # https://github.com/datalad/datalad/issues/1656
    # Strangely it spits outs IncompleteResultsError exception atm... so just
    # checking that it fails somehow
    assert_raises(Exception, ds.create_sibling, url)
    ds.create_sibling(url, existing='replace')
    published2 = ds.publish('.', to='localhost')
    assert_in('sub.dat', published2[0])

    # and one more test since in above test it would not puke ATM but just
    # not even try to copy since it assumes that file is already there
    create_tree(ds.path, {'sub2.dat': 'more data'})
    ds.add('sub2.dat')
    published3 = ds.publish(to='localhost')  # we publish just git
    assert_not_in('sub2.dat', published3[0])
    # now publish "with" data, which should also trigger the hook!
    # https://github.com/datalad/datalad/issues/1658
    from glob import glob
    from datalad.consts import WEB_META_LOG
    logs_prior = glob(_path_(dst_path, WEB_META_LOG, '*'))
    published4 = ds.publish('.', to='localhost')
    assert_in('sub2.dat', published4[0])
    logs_post = glob(_path_(dst_path, WEB_META_LOG, '*'))
    eq_(len(logs_post), len(logs_prior) + 1)
Пример #58
0
def test_create_sub(path):

    ds = Dataset(path)
    ds.create()

    # 1. create sub and add to super:
    subds = ds.create(op.join("some", "what", "deeper"))
    ok_(isinstance(subds, Dataset))
    ok_(subds.is_installed())
    assert_repo_status(subds.path, annex=True)
    assert_in(
        'submodule.some/what/deeper.datalad-id={}'.format(
            subds.id),
        list(ds.repo.call_git_items_(['config', '--file', '.gitmodules',
                                      '--list'],
                                     read_only=True))
    )

    # subdataset is known to superdataset:
    assert_in(op.join("some", "what", "deeper"),
              ds.subdatasets(result_xfm='relpaths'))
    # and was committed:
    assert_repo_status(ds.path)

    # subds finds superdataset
    ok_(subds.get_superdataset() == ds)

    # 2. create sub without adding to super:
    subds2 = Dataset(op.join(path, "someother")).create()
    ok_(isinstance(subds2, Dataset))
    ok_(subds2.is_installed())
    assert_repo_status(subds2.path, annex=True)

    # unknown to superdataset:
    assert_not_in("someother", ds.subdatasets(result_xfm='relpaths'))

    # 3. create sub via super:
    subds3 = ds.create("third", annex=False)
    ok_(isinstance(subds3, Dataset))
    ok_(subds3.is_installed())
    assert_repo_status(subds3.path, annex=False)
    assert_in("third", ds.subdatasets(result_xfm='relpaths'))
Пример #59
0
def test_crawl_s3_commit_versions(path):
    annex = _annex(path)

    # Fancier setup so we could do any of desired actions within a single sweep
    pipeline = [
        crawl_s3('datalad-test0-versioned', strategy='commit-versions', repo=annex.repo),
        switch('datalad_action',
               {
                   'commit': annex.finalize(tag=True),
                   'remove': annex.remove,
                   'annex':  annex,
               })
    ]

    with externals_use_cassette('test_crawl_s3-pipeline1'):
        with swallow_logs(new_level=logging.WARN) as cml:
            out = run_pipeline(pipeline)
            assert_in("There is already a tag %s" % target_version, cml.out)
    # things are committed and thus stats are empty
    eq_(out, [{'datalad_stats': ActivityStats()}])
    total_stats = out[0]['datalad_stats'].get_total()

    eq_(set(total_stats.versions), {target_version})  # we have a bunch of them since not uniq'ing them and they are all the same
    # override for easier checking
    total_stats.versions = []
    eq_(total_stats,
        # Deletions come as 'files' as well atm
        ActivityStats(files=17, overwritten=3, downloaded=14, urls=14, add_annex=14, removed=3, downloaded_size=112))
    tags = annex.repo.get_tags(output='name')
    assert_in(target_version, tags)
    # and we actually got 7 more commits
    for t in range(1, 8):
        assert_in(target_version + "+%d" % t, tags)

    # if we rerun -- nothing new should have been done.  I.e. it is the
    # and ATM we can reuse the same cassette
    with externals_use_cassette('test_crawl_s3-pipeline1'):
        with swallow_logs() as cml:
            out = run_pipeline(pipeline)
            assert_not_in("There is already a tag %s" % target_version, cml.out)
    eq_(out, [{'datalad_stats': ActivityStats(skipped=17)}])
    eq_(out[0]['datalad_stats'].get_total(), ActivityStats(skipped=17))  # Really nothing was done
Пример #60
0
def test_sidecar(path):
    ds = Dataset(path).create()
    # Simple sidecar message checks.
    ds.run(["touch", "dummy0"], message="sidecar arg", sidecar=True)
    assert_not_in('"cmd":', ds.repo.format_commit("%B"))

    ds.config.set("datalad.run.record-sidecar", "false", where="local")
    ds.run(["touch", "dummy1"], message="sidecar config")
    assert_in('"cmd":', ds.repo.format_commit("%B"))

    ds.config.set("datalad.run.record-sidecar", "true", where="local")
    ds.run(["touch", "dummy1"], message="sidecar config")
    assert_not_in('"cmd":', ds.repo.format_commit("%B"))

    # Don't break when config.get() returns multiple values. Here it's two
    # values in .gitconfig, but a more realistic scenario is a value in
    # $repo/.git/config that overrides a setting in ~/.config/git/config.
    ds.config.add("datalad.run.record-sidecar", "false", where="local")
    ds.run(["touch", "dummy2"], message="sidecar config")
    assert_in('"cmd":', ds.repo.format_commit("%B"))