示例#1
0
def test_uninstall_multiple_paths(path):
    ds = Dataset(path).create(force=True)
    subds = ds.create('deep', force=True)
    subds.save(recursive=True)
    ok_clean_git(subds.path)
    # needs to be able to add a combination of staged files, modified submodule,
    # and untracked files
    ds.save(recursive=True)
    ok_clean_git(ds.path)
    # drop content of all 'kill' files
    topfile = 'kill'
    deepfile = opj('deep', 'dir', 'kill')
    # use a tuple not a list! should also work
    ds.drop((topfile, deepfile), check=False)
    ok_clean_git(ds.path)
    files_left = glob(opj(ds.path, '*', '*', '*')) + glob(opj(ds.path, '*'))
    ok_(all([f.endswith('keep') for f in files_left if exists(f) and not isdir(f)]))
    ok_(not ds.repo.file_has_content(topfile))
    ok_(not subds.repo.file_has_content(opj(*psplit(deepfile)[1:])))
    # remove handles for all 'kill' files
    ds.remove([topfile, deepfile], check=False)
    ok_clean_git(ds.path)
    files_left = glob(opj(ds.path, '*', '*', '*')) + glob(opj(ds.path, '*'))
    ok_(all([f.endswith('keep') for f in files_left if exists(f) and not isdir(f)]))
    ok_(not any([f.endswith(topfile) for f in files_left]))
示例#2
0
def test_kill(path):
    # nested datasets with load
    ds = Dataset(path).create()
    testfile = opj(ds.path, "file.dat")
    with open(testfile, 'w') as f:
        f.write("load")
    ds.save("file.dat")
    subds = ds.create('deep1')
    eq_(sorted(ds.subdatasets(result_xfm='relpaths')), ['deep1'])
    ok_clean_git(ds.path)

    # and we fail to remove since content can't be dropped
    res = ds.remove(on_failure='ignore')
    assert_result_count(
        res, 1,
        status='error', path=testfile)
    # Following two assertions on message are relying on the actual error.
    # We have a second result with status 'impossible' for the ds, that we need
    # to filter out for those assertions:
    err_result = [r for r in res if r['status'] == 'error'][0]
    assert_result_values_cond(
        [err_result], 'message',
        lambda x: "configured minimum number of copies not found" in x or
        "Could only verify the existence of 0 out of 1 necessary copies" in x
    )
    eq_(ds.remove(recursive=True, check=False, result_xfm='datasets'),
        [subds, ds])
    ok_(not exists(path))
示例#3
0
def test_install_into_dataset(source, top_path):

    ds = create(top_path)
    ok_clean_git(ds.path)

    subds = ds.install("sub", source=source, save=False)
    ok_(isdir(opj(subds.path, '.git')))
    ok_(subds.is_installed())
    assert_in('sub', ds.subdatasets(result_xfm='relpaths'))
    # sub is clean:
    ok_clean_git(subds.path, annex=None)
    # top is too:
    ok_clean_git(ds.path, annex=None)
    ds.save(message='addsub')
    # now it is:
    ok_clean_git(ds.path, annex=None)

    # but we could also save while installing and there should be no side-effect
    # of saving any other changes if we state to not auto-save changes
    # Create a dummy change
    create_tree(ds.path, {'dummy.txt': 'buga'})
    ok_clean_git(ds.path, untracked=['dummy.txt'])
    subds_ = ds.install("sub2", source=source)
    eq_(subds_.path, opj(ds.path, "sub2"))  # for paranoid yoh ;)
    ok_clean_git(ds.path, untracked=['dummy.txt'])

    # and we should achieve the same behavior if we create a dataset
    # and then decide to add it
    create(_path_(top_path, 'sub3'))
    ok_clean_git(ds.path, untracked=['dummy.txt', 'sub3/'])
    ds.save('sub3')
    ok_clean_git(ds.path, untracked=['dummy.txt'])
示例#4
0
def test_clean_subds_removal(path):
    ds = Dataset(path).create()
    subds1 = ds.create('one')
    subds2 = ds.create('two')
    eq_(sorted(ds.subdatasets(result_xfm='relpaths')), ['one', 'two'])
    ok_clean_git(ds.path)
    # now kill one
    res = ds.remove('one', result_xfm=None)
    # subds1 got uninstalled, and ds got the removal of subds1 saved
    assert_result_count(res, 1, path=subds1.path, action='uninstall', status='ok')
    assert_result_count(res, 1, path=subds1.path, action='remove', status='ok')
    assert_result_count(res, 1, path=ds.path, action='save', status='ok')
    ok_(not subds1.is_installed())
    ok_clean_git(ds.path)
    # two must remain
    eq_(ds.subdatasets(result_xfm='relpaths'), ['two'])
    # one is gone
    assert(not exists(subds1.path))
    # and now again, but this time remove something that is not installed
    ds.create('three')
    eq_(sorted(ds.subdatasets(result_xfm='relpaths')), ['three', 'two'])
    ds.uninstall('two')
    ok_clean_git(ds.path)
    eq_(sorted(ds.subdatasets(result_xfm='relpaths')), ['three', 'two'])
    ok_(not subds2.is_installed())
    assert(exists(subds2.path))
    res = ds.remove('two', result_xfm='datasets')
    ok_clean_git(ds.path)
    # subds2 was already uninstalled, now ds got the removal of subds2 saved
    assert(not exists(subds2.path))
    eq_(ds.subdatasets(result_xfm='relpaths'), ['three'])
    eq_(res, [subds2, ds])
示例#5
0
def test_ssh_manager_close():

    manager = SSHManager()

    # check for previously existing sockets:
    existed_before_1 = exists(opj(manager.socket_dir, 'localhost'))
    existed_before_2 = exists(opj(manager.socket_dir, 'datalad-test'))

    manager.get_connection('ssh://localhost').open()
    manager.get_connection('ssh://datalad-test').open()

    if existed_before_1 and existed_before_2:
        # we need one connection to be closed and therefore being opened
        # by `manager`
        manager.get_connection('ssh://localhost').close()
        manager.get_connection('ssh://localhost').open()

    ok_(exists(opj(manager.socket_dir, get_connection_hash('localhost'))))
    ok_(exists(opj(manager.socket_dir, get_connection_hash('datalad-test'))))

    manager.close()

    still_exists_1 = exists(opj(manager.socket_dir, 'localhost'))
    still_exists_2 = exists(opj(manager.socket_dir, 'datalad-test'))

    eq_(existed_before_1, still_exists_1)
    eq_(existed_before_2, still_exists_2)
示例#6
0
def test_GitRepo_ssh_pull(remote_path, repo_path):
    from datalad import ssh_manager

    remote_repo = GitRepo(remote_path, create=True)
    url = "ssh://localhost" + op.abspath(remote_path)
    socket_path = op.join(ssh_manager.socket_dir, get_connection_hash('localhost'))
    repo = GitRepo(repo_path, create=True)
    repo.add_remote("ssh-remote", url)

    # modify remote:
    remote_repo.checkout("ssh-test", ['-b'])
    with open(op.join(remote_repo.path, "ssh_testfile.dat"), "w") as f:
        f.write("whatever")
    remote_repo.add("ssh_testfile.dat")
    remote_repo.commit("ssh_testfile.dat added.")

    # file is not locally known yet:
    assert_not_in("ssh_testfile.dat", repo.get_indexed_files())

    # pull changes:
    repo.pull(remote="ssh-remote", refspec=remote_repo.get_active_branch())
    ok_clean_git(repo.path, annex=False)

    # the connection is known to the SSH manager, since fetch() requested it:
    assert_in(socket_path, ssh_manager._connections)
    # and socket was created:
    ok_(op.exists(socket_path))

    # we actually pulled the changes
    assert_in("ssh_testfile.dat", repo.get_indexed_files())
示例#7
0
def test_add_subdataset(path, other):
    subds = create(opj(path, 'dir'), force=True)
    ds = create(path, force=True)
    ok_(subds.repo.dirty)
    ok_(ds.repo.dirty)
    assert_not_in('dir', ds.subdatasets(result_xfm='relpaths'))
    # without a base dataset the next is interpreted as "add everything
    # in subds to subds"
    add(subds.path)
    ok_clean_git(subds.path)
    assert_not_in('dir', ds.subdatasets(result_xfm='relpaths'))
    # but with a base directory we add the dataset subds as a subdataset
    # to ds
    ds.add(subds.path)
    assert_in('dir', ds.subdatasets(result_xfm='relpaths'))
    #  create another one
    other = create(other)
    # install into superdataset, but don't add
    other_clone = install(source=other.path, path=opj(ds.path, 'other'))
    ok_(other_clone.is_installed)
    assert_not_in('other', ds.subdatasets(result_xfm='relpaths'))
    # now add, it should pick up the source URL
    ds.add('other')
    # and that is why, we can reobtain it from origin
    ds.uninstall('other')
    ok_(other_clone.is_installed)
    ds.get('other')
    ok_(other_clone.is_installed)
示例#8
0
def test_url_base():
    # Basic checks
    assert_raises(ValueError, URL, "http://example.com", hostname='example.com')
    url = URL("http://example.com")
    eq_(url.hostname, 'example.com')
    eq_(url.scheme, 'http')
    eq_(url.port, '')  # not specified -- empty strings
    eq_(url.username, '')  # not specified -- empty strings
    eq_(repr(url), "URL(hostname='example.com', scheme='http')")
    eq_(url, "http://example.com")  # automagic coercion in __eq__

    neq_(URL(), URL(hostname='x'))

    smth = URL('smth')
    eq_(smth.hostname, '')
    ok_(bool(smth))
    nok_(bool(URL()))

    assert_raises(ValueError, url._set_from_fields, unknown='1')

    with swallow_logs(new_level=logging.WARNING) as cml:
        # we don't "care" about params ATM so there is a warning if there are any
        purl = URL("http://example.com/;param")
        eq_(str(purl), 'http://example.com/;param')  # but we do maintain original string
        assert_in('ParseResults contains params', cml.out)
        eq_(purl.as_str(), 'http://example.com/')
示例#9
0
def test_install_list(path, top_path):

    # we want to be able to install several things, if these are known
    # (no 'source' allowed). Therefore first toplevel:
    ds = install(top_path, source=path, recursive=False)
    assert_not_in('annex.hardlink', ds.config)
    ok_(ds.is_installed())
    sub1 = Dataset(opj(top_path, 'subm 1'))
    sub2 = Dataset(opj(top_path, '2'))
    ok_(not sub1.is_installed())
    ok_(not sub2.is_installed())

    # fails, when `source` is passed:
    assert_raises(ValueError, ds.install,
                  path=['subm 1', '2'],
                  source='something')

    # now should work:
    result = ds.install(path=['subm 1', '2'], result_xfm='paths')
    ok_(sub1.is_installed())
    ok_(sub2.is_installed())
    eq_(set(result), {sub1.path, sub2.path})
    # and if we request it again via get, result should be empty
    get_result = ds.get(path=['subm 1', '2'], get_data=False)
    assert_status('notneeded', get_result)
示例#10
0
文件: test_get.py 项目: hanke/datalad
def test_get_mixed_hierarchy(src, path):

    origin = Dataset(src).create(no_annex=True)
    origin_sub = origin.create('subds')
    with open(opj(origin.path, 'file_in_git.txt'), "w") as f:
        f.write('no idea')
    with open(opj(origin_sub.path, 'file_in_annex.txt'), "w") as f:
        f.write('content')
    origin.add('file_in_git.txt', to_git=True)
    origin_sub.add('file_in_annex.txt')
    origin.save()

    # now, install that thing:
    ds, subds = install(
        path, source=src, recursive=True,
        result_xfm='datasets', return_type='item-or-list', result_filter=None)
    ok_(subds.repo.file_has_content("file_in_annex.txt") is False)

    # and get:
    result = ds.get(curdir, recursive=True)
    # git repo and subds
    assert_status(['ok', 'notneeded'], result)
    assert_result_count(
        result, 1, path=opj(subds.path, "file_in_annex.txt"), status='ok')
    ok_(subds.repo.file_has_content("file_in_annex.txt") is True)
示例#11
0
def test_GitRepo_pull(test_path, orig_path, clone_path):

    origin = GitRepo.clone(test_path, orig_path)
    clone = GitRepo.clone(orig_path, clone_path)
    filename = get_most_obscure_supported_name()

    with open(op.join(orig_path, filename), 'w') as f:
        f.write("New file.")
    origin.add(filename)
    origin.commit("new file added.")
    clone.pull()
    ok_(op.exists(op.join(clone_path, filename)))

    # While at it, let's test _get_remotes_having_commit a bit
    clone.add_remote("very_origin", test_path)
    clone.fetch("very_origin")
    eq_(
        clone._get_remotes_having_commit(clone.get_hexsha()),
        ['origin']
    )
    prev_commit = clone.get_hexsha('HEAD^')
    eq_(
        set(clone._get_remotes_having_commit(prev_commit)),
        {'origin', 'very_origin'}
    )
示例#12
0
def test_install_known_subdataset(src, path):

    # get the superdataset:
    ds = install(path, source=src)
    # subdataset not installed:
    subds = Dataset(opj(path, 'subm 1'))
    assert_false(subds.is_installed())
    assert_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths'))
    assert_not_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths'))
    # install it:
    ds.install('subm 1')
    ok_(subds.is_installed())
    ok_(AnnexRepo.is_valid_repo(subds.path, allow_noninitialized=False))
    # Verify that it is the correct submodule installed and not
    # new repository initiated
    eq_(set(subds.repo.get_indexed_files()),
        {'test.dat', 'INFO.txt', 'test-annex.dat'})
    assert_not_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths'))
    assert_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths'))

    # now, get the data by reinstalling with -g:
    ok_(subds.repo.file_has_content('test-annex.dat') is False)
    with chpwd(ds.path):
        result = get(path='subm 1', dataset=os.curdir)
        assert_in_results(result, path=opj(subds.path, 'test-annex.dat'))
        ok_(subds.repo.file_has_content('test-annex.dat') is True)
        ok_(subds.is_installed())
示例#13
0
def test_help():
    stdout, stderr = run_main(['--help'])

    # Let's extract section titles:
    sections = filter(re.compile('[a-zA-Z ]{4,50}:').match, stdout.split('\n'))
    ok_(sections[0].startswith('Usage:')) # == Usage: nosetests [-h] if running using nose
    assert_equal(sections[1:], ['Positional arguments:', 'Options:'])
示例#14
0
def test_ssh_get_connection():

    manager = SSHManager()
    assert manager._socket_dir is None, \
        "Should be unset upon initialization. Got %s" % str(manager._socket_dir)
    c1 = manager.get_connection('ssh://localhost')
    assert manager._socket_dir, "Should be set after interactions with the manager"
    assert_is_instance(c1, SSHConnection)

    # subsequent call returns the very same instance:
    ok_(manager.get_connection('ssh://localhost') is c1)

    # fail on malformed URls (meaning: our fancy URL parser can't correctly
    # deal with them):
    #assert_raises(ValueError, manager.get_connection, 'localhost')
    # we now allow those simple specifications of host to get_connection
    c2 = manager.get_connection('localhost')
    assert_is_instance(c2, SSHConnection)

    # but should fail if it looks like something else
    assert_raises(ValueError, manager.get_connection, 'localhost/')
    assert_raises(ValueError, manager.get_connection, ':localhost')

    # we can do what urlparse cannot
    # assert_raises(ValueError, manager.get_connection, 'someone@localhost')
    # next one is considered a proper url by urlparse (netloc:'',
    # path='/localhost), but eventually gets turned into SSHRI(hostname='ssh',
    # path='/localhost') -- which is fair IMHO -> invalid test
    # assert_raises(ValueError, manager.get_connection, 'ssh:/localhost')

    manager.close()
示例#15
0
def test_GitRepo_add(src, path):

    gr = GitRepo.clone(src, path)
    filename = get_most_obscure_supported_name()
    with open(op.join(path, filename), 'w') as f:
        f.write("File to add to git")
    added = gr.add(filename)

    eq_(added, {'success': True, 'file': filename})
    assert_in(filename, gr.get_indexed_files(),
              "%s not successfully added to %s" % (filename, path))
    # uncommitted:
    ok_(gr.dirty)

    filename = "another.txt"
    with open(op.join(path, filename), 'w') as f:
        f.write("Another file to add to git")

    # include committing:
    added2 = gr.add(filename)
    gr.commit(msg="Add two files.")
    eq_(added2, {'success': True, 'file': filename})

    assert_in(filename, gr.get_indexed_files(),
              "%s not successfully added to %s" % (filename, path))
    ok_clean_git(path)
示例#16
0
def test_ssh_custom_identity_file():
    ifile = "/tmp/dl-test-ssh-id"  # Travis
    if not op.exists(ifile):
        raise SkipTest("Travis-specific '{}' identity file does not exist"
                       .format(ifile))

    from datalad import cfg
    try:
        with patch.dict("os.environ", {"DATALAD_SSH_IDENTITYFILE": ifile}):
            cfg.reload(force=True)
            with swallow_logs(new_level=logging.DEBUG) as cml:
                manager = SSHManager()
                ssh = manager.get_connection('ssh://localhost')
                cmd_out, _ = ssh("echo blah")
                expected_socket = op.join(
                    text_type(manager.socket_dir),
                    get_connection_hash("localhost", identity_file=ifile,
                                        bundled=True))
                ok_(exists(expected_socket))
                manager.close()
                assert_in("-i", cml.out)
                assert_in(ifile, cml.out)
    finally:
        # Prevent overridden DATALAD_SSH_IDENTITYFILE from lingering.
        cfg.reload(force=True)
示例#17
0
def test_submodule_deinit(path):
    from datalad.support.annexrepo import AnnexRepo

    top_repo = AnnexRepo(path, create=False)
    eq_({'subm 1', '2'}, {s.name for s in top_repo.get_submodules()})
    # note: here init=True is ok, since we are using it just for testing
    with swallow_logs(new_level=logging.WARN) as cml:
        top_repo.update_submodule('subm 1', init=True)
        assert_in('Do not use update_submodule with init=True', cml.out)
    top_repo.update_submodule('2', init=True)

    # ok_(all([s.module_exists() for s in top_repo.get_submodules()]))
    # TODO: old assertion above if non-bare? (can't use "direct mode" in test_gitrepo)
    # Alternatively: New testrepo (plain git submodules) and have a dedicated
    # test for annexes in addition
    ok_(all([GitRepo.is_valid_repo(op.join(top_repo.path, s.path))
             for s in top_repo.get_submodules()]))

    # modify submodule:
    with open(op.join(top_repo.path, 'subm 1', 'file_ut.dat'), "w") as f:
        f.write("some content")

    assert_raises(CommandError, top_repo.deinit_submodule, 'sub1')

    # using force should work:
    top_repo.deinit_submodule('subm 1', force=True)

    ok_(not top_repo.repo.submodule('subm 1').module_exists())
示例#18
0
def test_add_subdataset(path, other):
    subds = create(op.join(path, 'dir'), force=True)
    ds = create(path, force=True)
    ok_(subds.repo.dirty)
    ok_(ds.repo.dirty)
    assert_not_in('dir', ds.subdatasets(result_xfm='relpaths'))
    # "add everything in subds to subds"
    save(dataset=subds.path)
    assert_repo_status(subds.path)
    assert_not_in('dir', ds.subdatasets(result_xfm='relpaths'))
    # but with a base directory we add the dataset subds as a subdataset
    # to ds
    res = ds.save(subds.path)
    assert_in_results(res, action="add", path=subds.path, refds=ds.path)
    assert_in('dir', ds.subdatasets(result_xfm='relpaths'))
    #  create another one
    other = create(other)
    # install into superdataset, but don't add
    other_clone = install(source=other.path, path=op.join(ds.path, 'other'))
    # little dance to get the revolution-type dataset
    other_clone = Dataset(other_clone.path)
    ok_(other_clone.is_installed)
    assert_not_in('other', ds.subdatasets(result_xfm='relpaths'))
    # now add, it should pick up the source URL
    ds.save('other')
    # and that is why, we can reobtain it from origin
    ds.uninstall('other')
    ok_(not other_clone.is_installed())
    ds.get('other')
    ok_(other_clone.is_installed())
示例#19
0
def test_notclone_known_subdataset(src, path):
    # get the superdataset:
    ds = clone(src, path,
               result_xfm='datasets', return_type='item-or-list')

    # subdataset not installed:
    subds = Dataset(opj(path, 'subm 1'))
    assert_false(subds.is_installed())
    assert_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths'))
    assert_not_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths'))
    # clone is not meaningful
    res = ds.clone('subm 1', on_failure='ignore')
    assert_status('error', res)
    assert_message('Failed to clone from any candidate source URL. '
                   'Encountered errors per each url were: %s',
                   res)
    # get does the job
    res = ds.get(path='subm 1', get_data=False)
    assert_status('ok', res)
    ok_(subds.is_installed())
    ok_(AnnexRepo.is_valid_repo(subds.path, allow_noninitialized=False))
    # Verify that it is the correct submodule installed and not
    # new repository initiated
    eq_(set(subds.repo.get_indexed_files()),
        {'test.dat', 'INFO.txt', 'test-annex.dat'})
    assert_not_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths'))
    assert_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths'))
示例#20
0
def _test_match_basic(matcher, query):
    extracts = dict(
        xpaths={'text': 'text()'},
        csss={'favorite': '.class1::text'}
    )
    m = matcher(query, **extracts)

    mg = m(dict(response="<div></div>"))
    ok_(inspect.isgenerator(mg))
    eq_(list(mg), [])  # there should be no hits

    mg = m(dict(response=sample1.response))
    ok_(inspect.isgenerator(mg))
    hits = list(mg)
    eq_(len(hits), 3)
    for hit, a_html, a_text, class1_text in zip(
            hits, sample1.a_htmls, sample1.a_texts, sample1.class1_texts):
        ok_(hit['response'])
        eq_(hit['match'], a_html)
        eq_(hit['text'], a_text)
        eq_(hit.get('favorite', None), class1_text)

    m = matcher(query, min_count=4, **extracts)
    mg = m(dict(response=sample1.response))
    ok_(inspect.isgenerator(mg))
    assert_raises(ValueError, list, mg)

    m = matcher(query, max_count=2, **extracts)
    mg = m(dict(response=sample1.response))
    ok_(inspect.isgenerator(mg))
    assert_raises(ValueError, list, mg)
示例#21
0
def test_ssh_open_close(tfile1):

    manager = SSHManager()

    path = opj(manager.socket_dir, get_connection_hash('localhost'))
    # TODO: facilitate the test when it didn't exist
    existed_before = exists(path)
    print("%s existed: %s" % (path, existed_before))

    c1 = manager.get_connection('ssh://localhost')
    c1.open()
    # control master exists for sure now
    ok_(exists(path))

    # use connection to execute remote command:
    local_home = os.path.expanduser('~')
    # we list explicitly local HOME since we override it in module_setup
    out, err = c1('ls -a %r' % local_home)
    remote_ls = [entry for entry in out.splitlines()
                 if entry != '.' and entry != '..']
    local_ls = os.listdir(local_home)
    eq_(set(remote_ls), set(local_ls))

    # now test for arguments containing spaces and other pleasant symbols
    out, err = c1('ls -l {}'.format(sh_quote(tfile1)))
    assert_in(tfile1, out)
    eq_(err, '')

    c1.close()
    # control master doesn't exist anymore:
    ok_(exists(path) == existed_before)
示例#22
0
def test_clone_isnot_recursive(src, path_nr, path_r):
    ds = clone(src, path_nr, result_xfm='datasets', return_type='item-or-list')
    ok_(ds.is_installed())
    # check nothin is unintentionally installed
    subdss = ds.subdatasets(recursive=True)
    assert_result_count(subdss, len(subdss), state='absent')
    # this also means, subdatasets to be listed as not fulfilled:
    eq_(set(ds.subdatasets(recursive=True, fulfilled=False, result_xfm='relpaths')),
        {'subm 1', '2'})
示例#23
0
def test_create_test_dataset():
    # rudimentary smoke test
    from datalad.api import create_test_dataset
    with swallow_logs(), swallow_outputs():
        dss = create_test_dataset(spec='2/1-2')
    ok_(5 <= len(dss) <= 7)  # at least five - 1 top, two on top level, 1 in each
    for ds in dss:
        ok_clean_git(ds, annex=None)  # some of them are annex but we just don't check
        ok_(len(glob(opj(ds, 'file*'))))
示例#24
0
def test_GitRepo_equals(path1, path2):

    repo1 = GitRepo(path1)
    repo2 = GitRepo(path1)
    ok_(repo1 == repo2)
    eq_(repo1, repo2)
    repo2 = GitRepo(path2)
    neq_(repo1, repo2)
    ok_(repo1 != repo2)
示例#25
0
def test_install_dataset_from_just_source(url, path):
    with chpwd(path, mkdir=True):
        ds = install(source=url)

    ok_startswith(ds.path, path)
    ok_(ds.is_installed())
    ok_(GitRepo.is_valid_repo(ds.path))
    ok_clean_git(ds.path, annex=None)
    assert_in('INFO.txt', ds.repo.get_indexed_files())
示例#26
0
def test_autoresolve_multiple_datasets(src, path):
    with chpwd(path):
        ds1 = install('ds1', source=src)
        ds2 = install('ds2', source=src)
        results = get([opj('ds1', 'test-annex.dat')] + glob(opj('ds2', '*.dat')))
        # each ds has one file
        eq_(len(results), 2)
        ok_(ds1.repo.file_has_content('test-annex.dat') is True)
        ok_(ds2.repo.file_has_content('test-annex.dat') is True)
示例#27
0
def test_clone_dataset_from_just_source(url, path):
    with chpwd(path, mkdir=True):
        ds = clone(url, result_xfm='datasets', return_type='item-or-list')

    ok_startswith(ds.path, path)
    ok_(ds.is_installed())
    ok_(GitRepo.is_valid_repo(ds.path))
    ok_clean_git(ds.path, annex=None)
    assert_in('INFO.txt', ds.repo.get_indexed_files())
示例#28
0
def test_install_dataset_from_instance(src, dst):
    origin = Dataset(src)
    clone = install(source=origin, path=dst)

    assert_is_instance(clone, Dataset)
    ok_startswith(clone.path, dst)
    ok_(clone.is_installed())
    ok_(GitRepo.is_valid_repo(clone.path))
    ok_clean_git(clone.path, annex=None)
    assert_in('INFO.txt', clone.repo.get_indexed_files())
示例#29
0
文件: test_run.py 项目: hanke/datalad
def test_inject(path):
    ds = Dataset(path).create(force=True)
    ok_(ds.repo.is_dirty())
    list(run_command("nonsense command",
                     dataset=ds,
                     inject=True,
                     extra_info={"custom_key": "custom_field"}))
    msg = ds.repo.format_commit("%B")
    assert_in("custom_key", msg)
    assert_in("nonsense command", msg)
示例#30
0
def test_no_interaction_with_untracked_content(path):
    # extracted from what was a metadata test originally
    ds = Dataset(opj(path, 'origin')).create(force=True)
    create_tree(ds.path, {'sub': {'subsub': {'dat': 'lots of data'}}})
    subds = ds.create('sub', force=True)
    subds.remove(opj('.datalad', 'config'), if_dirty='ignore')
    ok_(not exists(opj(subds.path, '.datalad', 'config')))
    # this will only work, if `remove` didn't do anything stupid and
    # caused all content to be saved
    subds.create('subsub', force=True)
def test_is_url():
    ok_(is_url('file://localhost/some'))
    ok_(is_url('http://localhost'))
    ok_(is_url('ssh://me@localhost'))
    # in current understanding it is indeed a url but an 'ssh', implicit=True, not just
    # a useless scheme=weired with a hope to point to a netloc
    with swallow_logs():
        ok_(is_url('weired://'))
    nok_(is_url('relative'))
    nok_(is_url('/absolute'))
    ok_(is_url('like@sshlogin'))  # actually we do allow ssh:implicit urls ATM
    nok_(is_url(''))
    nok_(is_url(' '))
    nok_(is_url(123))  # stuff of other types wouldn't be considered a URL

    # we can pass RI instance directly
    ok_(is_url(RI('file://localhost/some')))
    nok_(is_url(RI('relative')))
示例#32
0
def test_target_ssh_simple(origin, src_path, target_rootpath):

    # prepare src
    source = install(src_path,
                     source=origin,
                     result_xfm='datasets',
                     return_type='item-or-list')

    target_path = opj(target_rootpath, "basic")
    with swallow_logs(new_level=logging.ERROR) as cml:
        create_sibling(dataset=source,
                       name="local_target",
                       sshurl="ssh://*****:*****@with_testsui(responses=["yes"])
        def interactive_assert_create_sshwebserver():
            assert_create_sshwebserver(
                dataset=source,
                name="local_target",
                sshurl="ssh://localhost" + target_path,
                publish_by_default='master',
                existing='replace',
                ui=True,
            )

        interactive_assert_create_sshwebserver()

        eq_("ssh://localhost" + urlquote(target_path),
            source.repo.get_remote_url("local_target"))
        ok_(source.repo.get_remote_url("local_target", push=True) is None)

        # ensure target tree actually replaced by source
        assert_false(exists(opj(target_path, 'random')))

        if src_is_annex:
            lclcfg = AnnexRepo(src_path).config
            eq_(lclcfg.get('remote.local_target.annex-ignore'), 'false')
            # valid uuid
            eq_(lclcfg.get('remote.local_target.annex-uuid').count('-'), 4)
            # should be added too, even if URL matches prior state
            eq_(lclcfg.get('remote.local_target.push'), 'master')

        # again, by explicitly passing urls. Since we are on localhost, the
        # local path should work:
        cpkwargs = dict(
            dataset=source,
            name="local_target",
            sshurl="ssh://localhost",
            target_dir=target_path,
            target_url=target_path,
            target_pushurl="ssh://localhost" + target_path,
            ui=True,
        )

        @with_testsui(responses=['yes'])
        def interactive_assert_create_sshwebserver():
            assert_create_sshwebserver(existing='replace', **cpkwargs)

        interactive_assert_create_sshwebserver()

        if src_is_annex:
            target_description = AnnexRepo(target_path,
                                           create=False).get_description()
            eq_(target_description, target_path)

        eq_(target_path, source.repo.get_remote_url("local_target"))
        eq_("ssh://localhost" + target_path,
            source.repo.get_remote_url("local_target", push=True))

        assert_publish_with_ui(target_path)

        # now, push should work:
        publish(dataset=source, to="local_target")

        # and we should be able to 'reconfigure'
        def process_digests_mtimes(digests, mtimes):
            # it should have triggered a hook, which would have created log and metadata files
            check_metadata = False
            for part in 'logs', 'metadata':
                metafiles = [
                    k for k in digests
                    if k.startswith(_path_('.git/datalad/%s/' % part))
                ]
                # This is in effect ONLY if we have "compatible" datalad installed on remote
                # end. ATM we don't have easy way to guarantee that AFAIK (yoh),
                # so let's not check/enforce (TODO)
                # assert(len(metafiles) >= 1)  # we might have 2 logs if timestamps do not collide ;)
                # Let's actually do it to some degree
                if part == 'logs':
                    # always should have those:
                    assert (len(metafiles) >= 1)
                    with open(opj(target_path, metafiles[0])) as f:
                        if 'no datalad found' not in f.read():
                            check_metadata = True
                if part == 'metadata':
                    eq_(len(metafiles), bool(check_metadata))
                for f in metafiles:
                    digests.pop(f)
                    mtimes.pop(f)
            # and just pop some leftovers from annex
            for f in list(digests):
                if f.startswith('.git/annex/mergedrefs'):
                    digests.pop(f)
                    mtimes.pop(f)

        orig_digests, orig_mtimes = get_mtimes_and_digests(target_path)
        process_digests_mtimes(orig_digests, orig_mtimes)

        import time
        time.sleep(0.1)  # just so that mtimes change
        assert_create_sshwebserver(existing='reconfigure', **cpkwargs)
        digests, mtimes = get_mtimes_and_digests(target_path)
        process_digests_mtimes(digests, mtimes)

        assert_dict_equal(orig_digests,
                          digests)  # nothing should change in terms of content

        # but some files should have been modified
        modified_files = {
            k
            for k in mtimes if orig_mtimes.get(k, 0) != mtimes.get(k, 0)
        }
        # collect which files were expected to be modified without incurring any changes
        ok_modified_files = {
            _path_('.git/hooks/post-update'),
            'index.html',
        }
        ok_modified_files.add(_path_('.git/config'))
        ok_modified_files.update(
            {f
             for f in digests if f.startswith(_path_('.git/datalad/web'))})
        # it seems that with some recent git behavior has changed a bit
        # and index might get touched
        if _path_('.git/index') in modified_files:
            ok_modified_files.add(_path_('.git/index'))
        ok_(modified_files.issuperset(ok_modified_files))
示例#33
0
def _test_target_ssh_inherit(standardgroup, ui, use_ssh, src_path,
                             target_path):
    ds = Dataset(src_path).create()
    if use_ssh:
        target_url = 'localhost:%s' % target_path
    else:
        target_url = target_path
    remote = "magical"
    # for the test of setting a group, will just smoke test while using current
    # user's group
    ds.create_sibling(target_url,
                      name=remote,
                      shared='group',
                      group=os.getgid(),
                      ui=ui)  # not doing recursively
    if standardgroup:
        ds.repo.set_preferred_content('wanted', 'standard', remote)
        ds.repo.set_preferred_content('group', standardgroup, remote)
    ds.publish(to=remote)

    # now a month later we created a new subdataset... a few of the nested ones
    # A known hiccup happened when there
    # is also subsub ds added - we might incorrectly traverse and not prepare
    # sub first for subsub to inherit etc
    parent_ds = ds
    subdss = []
    nlevels = 2  # gets slow: 1 - 43 sec, 2 - 49 sec , 3 - 69 sec
    for levels in range(nlevels):
        subds = parent_ds.create('sub')
        create_tree(subds.path, {'sub.dat': 'lots of data'})
        parent_ds.save('sub', recursive=True)
        ok_file_under_git(subds.path, 'sub.dat', annexed=True)
        parent_ds = subds
        subdss.append(subds)

    target_subdss = [
        Dataset(opj(*([target_path] + ['sub'] * (i + 1))))
        for i in range(nlevels)
    ]
    # since we do not have yet/thus have not used an option to record to publish
    # to that sibling by default (e.g. --set-upstream), if we run just ds.publish
    # -- should fail
    assert_result_count(
        ds.publish(on_failure='ignore'),
        1,
        status='impossible',
        message=
        'No target sibling configured for default publication, please specific via --to'
    )
    ds.publish(
        to=remote)  # should be ok, non recursive; BUT it (git or us?) would
    # create an empty sub/ directory
    assert_postupdate_hooks(target_path, installed=ui)
    for target_sub in target_subdss:
        ok_(not target_sub.is_installed())  # still not there
    res = ds.publish(to=remote, recursive=True, on_failure='ignore')
    assert_result_count(res, 1 + len(subdss))
    assert_status(('error', 'notneeded'), res)
    assert_result_count(res,
                        len(subdss),
                        status='error',
                        message=("Unknown target sibling '%s' for publication",
                                 'magical'))

    # Finally publishing with inheritance
    ds.publish(to=remote, recursive=True, missing='inherit')
    assert_postupdate_hooks(target_path, installed=ui)

    def check_dss():
        # we added the remote and set all the
        for subds in subdss:
            eq_(subds.repo.get_preferred_content('wanted', remote),
                'standard' if standardgroup else '')
            eq_(subds.repo.get_preferred_content('group', remote),
                standardgroup or '')

        for target_sub in target_subdss:
            ok_(target_sub.is_installed())  # it is there now
            eq_(target_sub.repo.config.get('core.sharedrepository'), '1')
            # and we have transferred the content
            if standardgroup and standardgroup == 'backup':
                # only then content should be copied
                ok_file_has_content(opj(target_sub.path, 'sub.dat'),
                                    'lots of data')
            else:
                # otherwise nothing is copied by default
                assert_false(target_sub.repo.file_has_content('sub.dat'))

    check_dss()
    # and it should be ok to reconfigure the full hierarchy of datasets
    # while "inheriting". No URL must be specified, and we must not blow
    # but just issue a warning for the top level dataset which has no super,
    # so cannot inherit anything - use case is to fixup/establish the full
    # hierarchy on the remote site
    with swallow_logs(logging.WARNING) as cml:
        out = ds.create_sibling(None,
                                name=remote,
                                existing="reconfigure",
                                inherit=True,
                                ui=ui,
                                recursive=True)
        eq_(len(out), 1 + len(subdss))
        assert_in("Cannot determine super dataset", cml.out)

    check_dss()
示例#34
0
def test_local_path_target_dir(path):
    path = Path(path)
    ds_main = Dataset(path / "main").create()

    ds_main.create_sibling(name="abspath-targetdir",
                           sshurl=str(path / "a"),
                           target_dir="tdir")
    ok_((path / "a" / "tdir").exists())

    ds_main.create_sibling(name="relpath-bound-targetdir",
                           sshurl=os.path.relpath(str(path / "b"),
                                                  ds_main.path),
                           target_dir="tdir")
    ok_((path / "b" / "tdir").exists())

    with chpwd(path):
        create_sibling(dataset=ds_main.path,
                       name="relpath-unbound-targetdir",
                       sshurl="c",
                       target_dir="tdir")
    ok_((path / "c" / "tdir").exists())

    ds_main.create("subds")

    ds_main.create_sibling(name="rec-plain-targetdir",
                           recursive=True,
                           sshurl=str(path / "d"),
                           target_dir="tdir")
    ok_((path / "d" / "tdir" / "subds").exists())

    ds_main.create_sibling(name="rec-template-targetdir",
                           recursive=True,
                           sshurl=str(path / "e"),
                           target_dir="d%RELNAME")
    ok_((path / "e" / "d").exists())
    ok_((path / "e" / "d-subds").exists())
def test_same_website():
    ok_(same_website("http://a.b", "http://a.b/2014/01/xxx/"))
    ok_(same_website("http://a.b/page/2/", "http://a.b/2014/01/xxx/"))
    ok_(same_website("https://a.b/page/2/", "http://a.b/2014/01/xxx/"))
    ok_(same_website("http://a.b/page/2/", "https://a.b/2014/01/xxx/"))