Exemplo n.º 1
0
def test_dont_trip_over_missing_subds(path):
    ds1 = Dataset(opj(path, 'ds1')).create()
    ds2 = Dataset(opj(path, 'ds2')).create()
    subds2 = ds1.install(
        source=ds2.path, path='subds2',
        result_xfm='datasets', return_type='item-or-list')
    assert_true(subds2.is_installed())
    assert_in('subds2', ds1.subdatasets(result_xfm='relpaths'))
    subds2.uninstall()
    assert_in('subds2', ds1.subdatasets(result_xfm='relpaths'))
    assert_false(subds2.is_installed())
    # see if it wants to talk to github (and fail), or if it trips over something
    # before
    assert_raises(gh.BadCredentialsException,
        ds1.create_sibling_github, 'bogus', recursive=True,
        github_login='******')
    # inject remote config prior run
    assert_not_in('github', ds1.repo.get_remotes())
    # fail on existing
    ds1.repo.add_remote('github', 'http://nothere')
    assert_raises(ValueError,
        ds1.create_sibling_github, 'bogus', recursive=True,
        github_login='******')
    # talk to github when existing is OK
    assert_raises(gh.BadCredentialsException,
        ds1.create_sibling_github, 'bogus', recursive=True,
        github_login='******', existing='reconfigure')
    # return happy emptiness when all is skipped
    assert_equal(
        ds1.create_sibling_github(
            'bogus', recursive=True,
            github_login='******', existing='skip'),
        [])
Exemplo n.º 2
0
def test_publish_gh1691(origin, src_path, dst_path):

    # prepare src; no subdatasets installed, but mount points present
    source = install(src_path, source=origin, recursive=False)
    ok_(exists(opj(src_path, "subm 1")))
    assert_false(Dataset(opj(src_path, "subm 1")).is_installed())

    # some content modification of the superdataset
    create_tree(src_path, {'probe1': 'probe1'})
    source.add('probe1')
    ok_clean_git(src_path)

    # create the target(s):
    source.create_sibling(
        'ssh://localhost:' + dst_path,
        name='target', recursive=True)

    # publish recursively, which silently ignores non-installed datasets
    results = source.publish(to='target', recursive=True)
    assert_result_count(results, 1)
    assert_result_count(results, 1, status='ok', type='dataset', path=source.path)

    # if however, a non-installed subdataset is requsted explicitly, it'll fail
    results = source.publish(path='subm 1', to='target', on_failure='ignore')
    assert_result_count(results, 1, status='impossible', type='dataset', action='publish')
Exemplo n.º 3
0
def test_install_known_subdataset(src, path):

    # get the superdataset:
    ds = install(path, source=src)
    # subdataset not installed:
    subds = Dataset(opj(path, 'subm 1'))
    assert_false(subds.is_installed())
    assert_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths'))
    assert_not_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths'))
    # install it:
    ds.install('subm 1')
    ok_(subds.is_installed())
    ok_(AnnexRepo.is_valid_repo(subds.path, allow_noninitialized=False))
    # Verify that it is the correct submodule installed and not
    # new repository initiated
    eq_(set(subds.repo.get_indexed_files()),
        {'test.dat', 'INFO.txt', 'test-annex.dat'})
    assert_not_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths'))
    assert_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths'))

    # now, get the data by reinstalling with -g:
    ok_(subds.repo.file_has_content('test-annex.dat') is False)
    with chpwd(ds.path):
        result = get(path='subm 1', dataset=os.curdir)
        assert_in_results(result, path=opj(subds.path, 'test-annex.dat'))
        ok_(subds.repo.file_has_content('test-annex.dat') is True)
        ok_(subds.is_installed())
Exemplo n.º 4
0
def test_GitRepo_fetch(test_path, orig_path, clone_path):

    origin = GitRepo.clone(test_path, orig_path)
    clone = GitRepo.clone(orig_path, clone_path)
    filename = get_most_obscure_supported_name()

    origin.checkout("new_branch", ['-b'])
    with open(op.join(orig_path, filename), 'w') as f:
        f.write("New file.")
    origin.add(filename)
    origin.commit("new file added.")

    fetched = clone.fetch(remote='origin')
    # test FetchInfo list returned by fetch
    eq_([u'origin/' + clone.get_active_branch(), u'origin/new_branch'],
        [commit.name for commit in fetched])

    ok_clean_git(clone.path, annex=False)
    assert_in("origin/new_branch", clone.get_remote_branches())
    assert_in(filename, clone.get_files("origin/new_branch"))
    assert_false(op.exists(op.join(clone_path, filename)))  # not checked out

    # create a remote without an URL:
    origin.add_remote('not-available', 'git://example.com/not/existing')
    origin.config.unset('remote.not-available.url', where='local')

    # fetch without provided URL
    fetched = origin.fetch('not-available')
    # nothing was done, nothing returned:
    eq_([], fetched)
Exemplo n.º 5
0
def test_alter_interface_docs_for_cmdline():
    alt = alter_interface_docs_for_cmdline(demo_doc)
    alt_l = alt.split('\n')
    # dedented
    assert_false(alt_l[0].startswith(' '))
    assert_false(alt_l[-1].startswith(' '))
    assert_not_in('PY', alt)
    assert_not_in('CMD', alt)
    assert_not_in('REFLOW', alt)
    assert_in('a b', alt)
    assert_in('not\n   reflowed', alt)
    assert_in("Something for the cmdline only Multiline!", alt)
    # args
    altarg = alter_interface_docs_for_cmdline(demo_argdoc)
    # RST role markup
    eq_(alter_interface_docs_for_cmdline(':murks:`me and my buddies`'),
        'me and my buddies')
    # spread across lines
    eq_(alter_interface_docs_for_cmdline(':term:`Barbara\nStreisand`'),
        'Barbara\nStreisand')
    # multiple on one line
    eq_(alter_interface_docs_for_cmdline(
        ':term:`one` bla bla :term:`two` bla'),
        'one bla bla two bla')

    altpd = alter_interface_docs_for_cmdline(demo_paramdoc)
    assert_not_in('python', altpd)
    assert_in('inbetween', altpd)
    assert_in('appended', altpd)
    assert_in('cmdline', altpd)
Exemplo n.º 6
0
def test_notclone_known_subdataset(src, path):
    # get the superdataset:
    ds = clone(src, path,
               result_xfm='datasets', return_type='item-or-list')

    # subdataset not installed:
    subds = Dataset(opj(path, 'subm 1'))
    assert_false(subds.is_installed())
    assert_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths'))
    assert_not_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths'))
    # clone is not meaningful
    res = ds.clone('subm 1', on_failure='ignore')
    assert_status('error', res)
    assert_message('Failed to clone from any candidate source URL. '
                   'Encountered errors per each url were: %s',
                   res)
    # get does the job
    res = ds.get(path='subm 1', get_data=False)
    assert_status('ok', res)
    ok_(subds.is_installed())
    ok_(AnnexRepo.is_valid_repo(subds.path, allow_noninitialized=False))
    # Verify that it is the correct submodule installed and not
    # new repository initiated
    eq_(set(subds.repo.get_indexed_files()),
        {'test.dat', 'INFO.txt', 'test-annex.dat'})
    assert_not_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths'))
    assert_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths'))
Exemplo n.º 7
0
def test_alter_interface_docs_for_api():
    alt = alter_interface_docs_for_api(demo_doc)
    alt_l = alt.split('\n')
    # dedented
    assert_false(alt_l[0].startswith(' '))
    assert_false(alt_l[-1].startswith(' '))
    assert_not_in('CMD', alt)
    assert_not_in('Command line', alt)
Exemplo n.º 8
0
def test_relpath_add(path):
    ds = Dataset(path).create(force=True)
    with chpwd(opj(path, 'dir')):
        eq_(add('testindir')[0]['path'],
            opj(ds.path, 'dir', 'testindir'))
        # and now add all
        add('..')
    # auto-save enabled
    assert_false(ds.repo.dirty)
Exemplo n.º 9
0
def test_bundle_invariance(path):
    remote_url = 'ssh://localhost'
    manager = SSHManager()
    testfile = Path(path) / 'dummy'
    for flag in (True, False):
        assert_false(testfile.exists())
        ssh = manager.get_connection(remote_url, use_remote_annex_bundle=flag)
        ssh('cd .>{}'.format(text_type(testfile)))
        ok_(testfile.exists())
        testfile.unlink()
Exemplo n.º 10
0
def test_alter_interface_docs_for_cmdline():
    alt = alter_interface_docs_for_cmdline(demo_doc)
    alt_l = alt.split('\n')
    # dedented
    assert_false(alt_l[0].startswith(' '))
    assert_false(alt_l[-1].startswith(' '))
    assert_not_in('PY', alt)
    assert_not_in('Python', alt)
    # args
    altarg = alter_interface_docs_for_cmdline(demo_argdoc)
Exemplo n.º 11
0
def test_cred1_enter_new():
    keyring = MemoryKeyring()
    cred = UserPassword("name", keyring=keyring)
    assert_false(cred.is_known)
    assert_equal(cred.enter_new(), None)
    assert_true(cred.is_known)
    assert_equal(keyring.get('name', 'user'), 'user1')
    assert_equal(keyring.get('name', 'password'), 'password1')
    keyring.delete('name')
    assert_raises(KeyError, keyring.delete, 'name', 'user')
    assert_raises(KeyError, keyring.delete, 'name')
    assert_equal(keyring.get('name', 'user'), None)
Exemplo n.º 12
0
def test_install_noautoget_data(src, path):
    subsub_src = Dataset(opj(src, 'sub 1', 'subsub')).create(force=True)
    sub1_src = Dataset(opj(src, 'sub 1')).create(force=True)
    sub2_src = Dataset(opj(src, 'sub 2')).create(force=True)
    top_src = Dataset(src).create(force=True)
    top_src.add('.', recursive=True)

    # install top level:
    cdss = install(path, source=src, recursive=True)
    # there should only be datasets in the list of installed items,
    # and none of those should have any data for their annexed files yet
    for ds in cdss:
        assert_false(any(ds.repo.file_has_content(ds.repo.get_annexed_files())))
Exemplo n.º 13
0
 def check_contents(outname, prefix):
     with tarfile.open(outname) as tf:
         nfiles = 0
         for ti in tf:
             # any annex links resolved
             assert_false(ti.issym())
             ok_startswith(ti.name, prefix + '/')
             assert_equal(ti.mtime, committed_date)
             if '.datalad' not in ti.name:
                 # ignore any files in .datalad for this test to not be
                 # susceptible to changes in how much we generate a meta info
                 nfiles += 1
         # we have exactly three files, and expect no content for any directory
         assert_equal(nfiles, 3)
Exemplo n.º 14
0
def test_run_explicit(path):
    ds = Dataset(path)

    assert_false(ds.repo.file_has_content("test-annex.dat"))

    create_tree(ds.path, {"dirt_untracked": "untracked",
                          "dirt_modified": "modified"})
    ds.save("dirt_modified", to_git=True)
    with open(op.join(path, "dirt_modified"), "a") as ofh:
        ofh.write(", more")

    # We need explicit=True to run with dirty repo.
    assert_status("impossible",
                  ds.run("cat test-annex.dat test-annex.dat >doubled.dat",
                         inputs=["test-annex.dat"],
                         on_failure="ignore"))

    hexsha_initial = ds.repo.get_hexsha()
    # If we specify test-annex.dat as an input, it will be retrieved before the
    # run.
    ds.run("cat test-annex.dat test-annex.dat >doubled.dat",
           inputs=["test-annex.dat"], explicit=True)
    ok_(ds.repo.file_has_content("test-annex.dat"))
    # We didn't commit anything because outputs weren't specified.
    assert_false(ds.repo.file_has_content("doubled.dat"))
    eq_(hexsha_initial, ds.repo.get_hexsha())

    # If an input doesn't exist, we just show the standard warning.
    with swallow_logs(new_level=logging.WARN) as cml:
        with swallow_outputs():
            ds.run("ls", inputs=["not-there"], explicit=True)
        assert_in("Input does not exist: ", cml.out)

    remove(op.join(path, "doubled.dat"))

    hexsha_initial = ds.repo.get_hexsha()
    ds.run("cat test-annex.dat test-annex.dat >doubled.dat",
           inputs=["test-annex.dat"], outputs=["doubled.dat"],
           explicit=True)
    ok_(ds.repo.file_has_content("doubled.dat"))
    assert_repo_status(ds.path, modified=["dirt_modified"], untracked=['dirt_untracked'])
    neq_(hexsha_initial, ds.repo.get_hexsha())

    # Saving explicit outputs works from subdirectories.
    subdir = op.join(path, "subdir")
    mkdir(subdir)
    with chpwd(subdir):
        run("echo insubdir >foo", explicit=True, outputs=["foo"])
    ok_(ds.repo.file_has_content(op.join("subdir", "foo")))
Exemplo n.º 15
0
def test_alter_interface_docs_for_api():
    alt = alter_interface_docs_for_api(demo_doc)
    alt_l = alt.split('\n')
    # dedented
    assert_false(alt_l[0].startswith(' '))
    assert_false(alt_l[-1].startswith(' '))
    assert_not_in('CMD', alt)
    assert_not_in('PY', alt)
    assert_not_in('REFLOW', alt)
    assert_in("Some Python-only bits Multiline!", alt)

    altpd = alter_interface_docs_for_api(demo_paramdoc)
    assert_in('python', altpd)
    assert_in('inbetween', altpd)
    assert_in('appended', altpd)
    assert_not_in('cmdline', altpd)
Exemplo n.º 16
0
def test_check_dates(path):
    skip_if_no_module("dateutil")

    ref_ts = 1218182889  # Fri, 08 Aug 2008 04:08:09 -0400
    refdate = "@{}".format(ref_ts)

    repo = os.path.join(path, "repo")
    with set_date(ref_ts + 5000):
        ar = AnnexRepo(repo)
        ar.add(".")
        ar.commit()

    # The standard renderer outputs json.
    with swallow_outputs() as cmo:
        # Set level to WARNING to avoid the progress bar when
        # DATALAD_TESTS_UI_BACKEND=console.
        with swallow_logs(new_level=logging.WARNING):
            check_dates([repo],
                        reference_date=refdate,
                        return_type="list")
        assert_in("report", json.loads(cmo.out).keys())

    # We find the newer objects.
    newer = call([path], reference_date=refdate)
    eq_(len(newer), 1)
    ok_(newer[0]["report"]["objects"])

    # There are no older objects to find.
    older = call([repo], reference_date=refdate, older=True)
    assert_false(older[0]["report"]["objects"])

    # We can pass the date in RFC 2822 format.
    assert_dict_equal(
        newer[0],
        call([path], reference_date="08 Aug 2008 04:08:09 -0400")[0])

    # paths=None defaults to the current directory.
    with chpwd(path):
        assert_dict_equal(
            newer[0]["report"],
            call(paths=None, reference_date=refdate)[0]["report"])

    # Only commit type is present when annex='none'.
    newer_noannex = call([path], reference_date=refdate, annex="none")
    for entry in newer_noannex[0]["report"]["objects"].values():
        ok_(entry["type"] == "commit")
Exemplo n.º 17
0
def test_save(path):

    ds = Dataset(path)

    with open(opj(path, "new_file.tst"), "w") as f:
        f.write("something")

    ds.repo.add("new_file.tst", git=True)
    ok_(ds.repo.dirty)

    ds.save("add a new file", all_changes=False)
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))

    with open(opj(path, "new_file.tst"), "w") as f:
        f.write("modify")

    ok_(ds.repo.dirty)
    ds.save("modified new_file.tst", all_changes=True)
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))

    files = ['one.txt', 'two.txt']
    for fn in files:
        with open(opj(path, fn), "w") as f:
            f.write(fn)

    ds.add([opj(path, f) for f in files])
    # superfluous call to save (add saved it already), should not fail
    # but report that nothing was saved
    assert_false(ds.save("set of new files"))
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))

    # create subdataset
    subds = ds.create('subds')
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))
    # modify subds
    with open(opj(subds.path, "some_file.tst"), "w") as f:
        f.write("something")
    subds.add('.')
    ok_clean_git(subds.path, annex=isinstance(ds.repo, AnnexRepo))
    ok_(ds.repo.dirty)
    # ensure modified subds is committed
    ds.save(all_changes=True)
    ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))
Exemplo n.º 18
0
def test_new_or_modified(path):
    def get_new_or_modified(*args, **kwargs):
        return [relpath(ap["path"], path)
                for ap in new_or_modified(diff_revision(*args, **kwargs))]

    ds = Dataset(path).create(force=True, no_annex=True)

    # Check out an orphan branch so that we can test the "one commit
    # in a repo" case.
    ds.repo.checkout("orph", options=["--orphan"])
    ds.add(".")
    assert_false(ds.repo.dirty)
    assert_result_count(ds.repo.repo.git.rev_list("HEAD").split(), 1)
    # Diffing doesn't fail when the branch contains a single commit.
    assert_in("to_modify", get_new_or_modified(ds, "HEAD"))

    # New files are detected, deletions are not.
    ds.repo.remove(["to_remove"])
    ok_(ds.repo.dirty)

    with open(opj(path, "to_add"), "w") as f:
        f.write("content5")
    ds.repo.add(["to_add"])
    ds.repo.commit("add one, remove another")

    eq_(get_new_or_modified(ds, "HEAD"),
        ["to_add"])

    # Modifications are detected.
    with open(opj(path, "to_modify"), "w") as f:
        f.write("updated 1")
    with open(opj(path, "d/to_modify"), "w") as f:
        f.write("updated 2")
    ds.add(["to_modify", "d/to_modify"])

    eq_(set(get_new_or_modified(ds, "HEAD")),
        {"to_modify", "d/to_modify"})

    # Non-HEAD revisions work.
    ds.repo.commit("empty", options=["--allow-empty"])
    assert_false(get_new_or_modified(ds, "HEAD"))
    eq_(set(get_new_or_modified(ds, "HEAD~")),
        {"to_modify", "d/to_modify"})
Exemplo n.º 19
0
def test_state(path):
    ds = Dataset.create(path)
    sub = ds.create('sub')
    res = ds.subdatasets()
    assert_result_count(res, 1, path=sub.path)
    # by default we are not reporting any state info
    assert_not_in('state', res[0])
    # uninstall the subdataset
    ds.uninstall('sub')
    # normale 'gone' is "absent"
    assert_false(sub.is_installed())
    assert_result_count(
        ds.subdatasets(), 1, path=sub.path, state='absent')
    # with directory totally gone also
    os.rmdir(sub.path)
    assert_result_count(
        ds.subdatasets(), 1, path=sub.path, state='absent')
    # putting dir back, no change
    os.makedirs(sub.path)
    assert_result_count(
        ds.subdatasets(), 1, path=sub.path, state='absent')
Exemplo n.º 20
0
def test_GitRepo_instance_from_not_existing(path, path2):
    # 1. create=False and path doesn't exist:
    assert_raises(NoSuchPathError, GitRepo, path, create=False)
    assert_false(op.exists(path))

    # 2. create=False, path exists, but no git repo:
    os.mkdir(path)
    ok_(op.exists(path))
    assert_raises(InvalidGitRepositoryError, GitRepo, path, create=False)
    assert_false(op.exists(op.join(path, '.git')))

    # 3. create=True, path doesn't exist:
    gr = GitRepo(path2, create=True)
    assert_is_instance(gr, GitRepo, "GitRepo was not created.")
    ok_(op.exists(op.join(path2, '.git')))
    ok_clean_git(path2, annex=False)

    # 4. create=True, path exists, but no git repo:
    gr = GitRepo(path, create=True)
    assert_is_instance(gr, GitRepo, "GitRepo was not created.")
    ok_(op.exists(op.join(path, '.git')))
    ok_clean_git(path, annex=False)
Exemplo n.º 21
0
def test_run_failure(path):
    ds = Dataset(path).create()
    subds = ds.create("sub")

    hexsha_initial = ds.repo.get_hexsha()

    with assert_raises(CommandError):
        ds.run("echo x$(cat sub/grows) > sub/grows && false")
    eq_(hexsha_initial, ds.repo.get_hexsha())
    ok_(ds.repo.dirty)

    msgfile = opj(path, ds.repo.get_git_dir(ds.repo), "COMMIT_EDITMSG")
    ok_exists(msgfile)

    ds.add(".", recursive=True, message_file=msgfile)
    ok_clean_git(ds.path)
    neq_(hexsha_initial, ds.repo.get_hexsha())

    outfile = opj(subds.path, "grows")
    eq_('x\n', open(outfile).read())

    # There is no CommandError on rerun if the non-zero error matches the
    # original code.
    ds.rerun()
    eq_('xx\n', open(outfile).read())

    # On the other hand, we fail if we rerun a command and there is a non-zero
    # error that doesn't match.
    ds.run("[ ! -e bar ] && echo c >bar")
    ok_clean_git(ds.path)
    with assert_raises(CommandError):
        ds.rerun()

    # We don't show instructions if the caller specified us not to save.
    remove(msgfile)
    with assert_raises(CommandError):
        ds.run("false", explicit=True, outputs=None)
    assert_false(op.exists(msgfile))
Exemplo n.º 22
0
def test_basics(path, super_path):
    ds = Dataset(path).create(force=True)
    ds.run_procedure('setup_yoda_dataset')
    ok_clean_git(ds.path)
    assert_false(ds.repo.is_under_annex("README.md"))
    # configure dataset to look for procedures in its code folder
    ds.config.add(
        'datalad.locations.dataset-procedures',
        'code',
        where='dataset')
    # commit this procedure config for later use in a clone:
    ds.add(op.join('.datalad', 'config'))
    # configure dataset to run the demo procedure prior to the clean command
    ds.config.add(
        'datalad.clean.proc-pre',
        'datalad_test_proc',
        where='dataset')
    # run command that should trigger the demo procedure
    ds.clean()
    # look for traces
    ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'hello\n')
    ok_clean_git(ds.path, index_modified=[op.join('.datalad', 'config')])

    # make a fresh dataset:
    super = Dataset(super_path).create()
    # configure dataset to run the demo procedure prior to the clean command
    super.config.add(
        'datalad.clean.proc-pre',
        'datalad_test_proc',
        where='dataset')
    # 'super' doesn't know any procedures but should get to know one by
    # installing the above as a subdataset
    super.install('sub', source=ds.path)
    # run command that should trigger the demo procedure
    super.clean()
    # look for traces
    ok_file_has_content(op.join(super.path, 'fromproc.txt'), 'hello\n')
    ok_clean_git(super.path, index_modified=[op.join('.datalad', 'config')])
Exemplo n.º 23
0
def test_cred1_enter_new():
    keyring = MemoryKeyring()
    cred = UserPassword("name", keyring=keyring)
    assert_false(cred.is_known)
    assert_equal(cred.enter_new(), None)
    assert_true(cred.is_known)
    assert_equal(keyring.get('name', 'user'), 'user1')
    assert_equal(keyring.get('name', 'password'), 'password1')
    keyring.delete('name')
    assert_raises(KeyError, keyring.delete, 'name', 'user')
    assert_raises(KeyError, keyring.delete, 'name')
    assert_equal(keyring.get('name', 'user'), None)

    # Test it blowing up if we provide unknown field
    with assert_raises(ValueError) as cme:
        cred.enter_new(username='******')
    assert_in('field(s): username.  Known but not specified: password, user',
              str(cme.exception))

    # Test that if user is provided, it is not asked
    cred.enter_new(user='******')
    assert_equal(keyring.get('name', 'user'), 'user2')
    assert_equal(keyring.get('name', 'password'), 'newpassword')
Exemplo n.º 24
0
def test_credentials_from_env():
    keyring = Keyring()
    cred = AWS_S3("test-s3", keyring=keyring)
    assert_false(cred.is_known)
    assert_equal(cred.get('key_id'), None)
    assert_equal(cred.get('secret_id'), None)

    with patch.dict('os.environ', {'DATALAD_test_s3_key_id': '1'}):
        assert_equal(cred.get('key_id'), '1')
        assert_false(cred.is_known)
        with patch.dict('os.environ', {'DATALAD_test_s3_secret_id': '2'}):
            assert_equal(cred.get('key_id'), '1')
            assert_equal(cred.get('secret_id'), '2')
            assert_true(cred.is_known)
        assert_false(cred.is_known)  # no memory of the past
Exemplo n.º 25
0
def test_add_archive_dirs(path_orig, url, repo_path):
    # change to repo_path
    with chpwd(repo_path):
        # create annex repo
        repo = AnnexRepo(repo_path, create=True)

        # add archive to the repo so we could test
        with swallow_outputs():
            repo.add_urls([opj(url, '1.tar.gz')],
                          options=["--pathdepth", "-1"])
        repo.commit("added 1.tar.gz")

        # test with excludes and annex options
        add_archive_content(
            '1.tar.gz',
            existing='archive-suffix',
            # Since inconsistent and seems in many cases no leading dirs to strip, keep them as provided
            strip_leading_dirs=True,
            delete=True,
            leading_dirs_consider=['crcns.*', '1'],
            leading_dirs_depth=2,
            use_current_dir=False,
            exclude='.*__MACOSX.*')  # some junk penetrates

        if external_versions['cmd:annex'] >= '6.20170208':
            # should have fixed remotes
            eq_(
                repo.get_description(
                    uuid=DATALAD_SPECIAL_REMOTES_UUIDS[ARCHIVES_SPECIAL_REMOTE]
                ), '[%s]' % ARCHIVES_SPECIAL_REMOTE)

        all_files = sorted(find_files('.'))
        target_files = {
            './CR24A/behaving1/1 f.txt',
            './CR24C/behaving3/3 f.txt',
            './CR24D/behaving2/2 f.txt',
        }
        eq_(set(all_files), target_files)

        # regression test: the subdir in MACOSX wasn't excluded and its name was getting stripped by leading_dir_len
        assert_false(exists(
            '__MACOSX'))  # if stripping and exclude didn't work this fails
        assert_false(
            exists('c-1_data')
        )  # if exclude doesn't work then name of subdir gets stripped by leading_dir_len
        assert_false(
            exists('CR24B')
        )  # if exclude doesn't work but everything else works this fails
Exemplo n.º 26
0
def test_ArchivesCache():
    # we don't actually need to test archives handling itself
    path1 = "/zuba/duba"
    path2 = "/zuba/duba2"
    # should not be able to create a persistent cache without topdir
    assert_raises(ValueError, ArchivesCache, persistent=True)
    cache = ArchivesCache()  # by default -- non persistent

    archive1_path = op.join(path1, fn_archive_obscure_ext)
    archive2_path = op.join(path2, fn_archive_obscure_ext)
    cached_archive1_path = cache[archive1_path].path
    assert_false(cache[archive1_path].path == cache[archive2_path].path)
    assert_true(cache[archive1_path] is cache[archive1_path])
    cache.clean()
    assert_false(op.exists(cached_archive1_path))
    assert_false(op.exists(cache.path))

    # test del
    cache = ArchivesCache()  # by default -- non persistent
    assert_true(op.exists(cache.path))
    cache_path = cache.path
    del cache
    assert_false(op.exists(cache_path))
Exemplo n.º 27
0
def test_func_to_node():
    int_node = func_to_node(int)  # node which requires nothing and nothing of output is used
    assert int_node.__doc__
    in_dict = {'in': 1}
    ok_generator(int_node(in_dict))

    # xrange is not considered to be a generator
    def xrange_(n, offset=0):
        for x in range(offset, offset + n):
            yield x

    xrange_node = func_to_node(xrange_, data_args='in', outputs='out')
    assert_in('assigned to out', xrange_node.__doc__)
    assert_false('Additional keyword arguments' in xrange_node.__doc__)
    range_node_gen = xrange_node(in_dict)
    ok_generator(range_node_gen)
    assert_equal(list(range_node_gen), [{'in': 1, 'out': 0}])

    # with additional kwargs
    xrange_node = func_to_node(xrange_, data_args='in', outputs='out', kwargs={'offset': 10})
    assert_in('assigned to out', xrange_node.__doc__)
    assert_in('Additional keyword arguments', xrange_node.__doc__)
    range_node_gen = xrange_node(in_dict)
    ok_generator(range_node_gen)
    assert_equal(list(range_node_gen), [{'in': 1, 'out': 10}])

    # testing func_node
    data = {'offset': 5, 'in': 1}

    xrange_node = func_to_node(xrange_, data_args='in', data_kwargs=['offset'], outputs='out')
    assert_in('assigned to out', xrange_node.__doc__)
    assert_false('Additional keyword arguments' in xrange_node.__doc__)
    gen = xrange_node(data)
    ok_generator(gen)
    assert_equal(list(gen), [{'offset': 5, 'out': 5, 'in': 1}])

    # with multiple outputs
    def split_(s, num):
        yield s.split('/', num)

    data = {'num': 3, 'in': 'datalad/crawler/nodes'}
    split_node = func_to_node(split_, data_args='in', data_kwargs=['num'], outputs=['a', 'b', 'c'])
    assert_in('assigned to a, b, c', split_node.__doc__)
    assert_false('Additional keyword arguments' in split_node.__doc__)
    split_node_gen = split_node(data)
    assert_equal(list(split_node_gen), [{'a': 'datalad', 'c': 'nodes', 'b': 'crawler', 'num': 3, 'in': 'datalad/crawler/nodes'}])
Exemplo n.º 28
0
def test_reobtain_data(originpath, destpath):
    origin = Dataset(originpath).create()
    ds = install(source=originpath,
                 path=destpath,
                 result_xfm='datasets',
                 return_type='item-or-list')
    # no harm
    assert_result_count(ds.update(merge=True, reobtain_data=True),
                        1,
                        action="update",
                        status="ok")
    # content
    create_tree(origin.path, {'load.dat': 'heavy'})
    origin.save(opj(origin.path, 'load.dat'))
    # update does not bring data automatically
    assert_result_count(ds.update(merge=True, reobtain_data=True),
                        1,
                        action="update",
                        status="ok")
    assert_in('load.dat', ds.repo.get_annexed_files())
    assert_false(ds.repo.file_has_content('load.dat'))
    # now get data
    ds.get('load.dat')
    ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy')
    # new content at origin
    create_tree(origin.path, {'novel': 'but boring'})
    origin.save()
    # update must not bring in data for new file
    result = ds.update(merge=True, reobtain_data=True)
    assert_in_results(result, action='get', status='notneeded')

    ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy')
    assert_in('novel', ds.repo.get_annexed_files())
    assert_false(ds.repo.file_has_content('novel'))
    # modify content at origin
    os.remove(opj(origin.path, 'load.dat'))
    create_tree(origin.path, {'load.dat': 'light'})
    origin.save()
    # update must update file with existing data, but leave empty one alone
    res = ds.update(merge=True, reobtain_data=True)
    assert_result_count(res, 1, status='ok', type='dataset', action='update')
    assert_result_count(res, 1, status='ok', type='file', action='get')
    ok_file_has_content(opj(ds.path, 'load.dat'), 'light')
    assert_false(ds.repo.file_has_content('novel'))
Exemplo n.º 29
0
def test_property_reevaluation(repo1):
    ds = Dataset(repo1)
    assert_is_none(ds.repo)
    assert_is_not_none(ds.config)
    first_config = ds.config
    assert_false(ds._cfg_bound)
    assert_is_none(ds.id)

    ds.create()
    assert_repo_status(repo1)
    # after creation, we have `repo`, and `config` was reevaluated to point
    # to the repo's config:
    assert_is_not_none(ds.repo)
    assert_is_not_none(ds.config)
    second_config = ds.config
    assert_true(ds._cfg_bound)
    assert_is(ds.config, ds.repo.config)
    assert_is_not(first_config, second_config)
    assert_is_not_none(ds.id)
    first_id = ds.id

    ds.remove()
    # repo is gone, and config is again reevaluated to only provide user/system
    # level config:
    assert_false(lexists(ds.path))
    assert_is_none(ds.repo)
    assert_is_not_none(ds.config)
    third_config = ds.config
    assert_false(ds._cfg_bound)
    assert_is_not(second_config, third_config)
    assert_is_none(ds.id)

    ds.create()
    assert_repo_status(repo1)
    # after recreation everything is sane again:
    assert_is_not_none(ds.repo)
    assert_is_not_none(ds.config)
    assert_is(ds.config, ds.repo.config)
    forth_config = ds.config
    assert_true(ds._cfg_bound)
    assert_is_not(third_config, forth_config)
    assert_is_not_none(ds.id)
    assert_not_equal(ds.id, first_id)
Exemplo n.º 30
0
def test_is_installed(src, path):
    ds = Dataset(path)
    assert_false(ds.is_installed())

    # get a clone:
    AnnexRepo.clone(src, path)
    ok_(ds.is_installed())
    # submodule still not installed:
    subds = Dataset(opj(path, 'subm 1'))
    assert_false(subds.is_installed())
    # We must not be able to create a new repository under a known
    # subdataset path.
    # Note: Unfortunately we would still be able to generate it under
    # subdirectory within submodule, e.g. `subm 1/subdir` but that is
    # not checked here. `create` provides that protection though.
    res = subds.create(on_failure='ignore',
                       return_type='list',
                       result_filter=None,
                       result_xfm=None)
    assert_result_count(res, 1)
    assert_result_count(res,
                        1,
                        status='error',
                        path=subds.path,
                        message=('collision with %s (dataset) in dataset %s',
                                 subds.path, ds.path))
    # get the submodule
    # This would init so there is a .git file with symlink info, which is
    # as we agreed is more pain than gain, so let's use our install which would
    # do it right, after all we are checking 'is_installed' ;)
    # from datalad.cmd import Runner
    # Runner().run(['git', 'submodule', 'update', '--init', 'subm 1'], cwd=path)
    with chpwd(path):
        get('subm 1')
    ok_(subds.is_installed())
    # wipe it out
    rmtree(ds.path)
    assert_false(ds.is_installed())
Exemplo n.º 31
0
def test_credentials_from_env():
    keyring = Keyring()
    cred = AWS_S3("test-s3", keyring=keyring)
    assert_false(cred.is_known)
    assert_equal(cred.get('key_id'), None)
    assert_equal(cred.get('secret_id'), None)

    def _check1():
        assert_equal(cred.get('key_id'), '1')
        assert_false(cred.is_known)

    def _check2():
        assert_equal(cred.get('key_id'), '1')
        assert_equal(cred.get('secret_id'), '2')
        assert_true(cred.is_known)

    # this is the old way, should still work
    with patch.dict('os.environ', {'DATALAD_test_s3_key_id': '1'}):
        _check1()
        with patch.dict('os.environ', {'DATALAD_test_s3_secret_id': '2'}):
            _check2()
        assert_false(cred.is_known)  # no memory of the past

    # here is the new way
    import datalad
    try:
        with patch.dict('os.environ',
                        {'DATALAD_CREDENTIAL_test__s3_key__id': '1'}):
            datalad.cfg.reload()
            _check1()
            with patch.dict('os.environ',
                            {'DATALAD_CREDENTIAL_test__s3_secret__id': '2'}):
                datalad.cfg.reload()
                _check2()
            datalad.cfg.reload()
            assert_false(cred.is_known)  # no memory of the past
    finally:
        datalad.cfg.reload()
Exemplo n.º 32
0
def test_dedent():
    assert_false(dedent_docstring("one liner").endswith("\n"))
Exemplo n.º 33
0
def test_check_dates_empty_repo(path):
    assert_false(check_dates(GitRepo(path, create=True))["objects"])
Exemplo n.º 34
0
 def check(a, kw=False):
     assert_false('http_proxy' in os.environ)
     assert_false('https_proxy' in os.environ)
     assert_in(kw, [False, 'custom'])
Exemplo n.º 35
0
def test_is_legal_metafield():
    for legal in ["legal", "0", "legal_"]:
        assert_true(au.is_legal_metafield(legal))
    for notlegal in ["_not", "with space"]:
        assert_false(au.is_legal_metafield(notlegal))
Exemplo n.º 36
0
def test_publish_depends(
        origin,
        src_path,
        target1_path,
        target2_path,
        target3_path):
    # prepare src
    source = install(src_path, source=origin, recursive=True)
    source.repo.get('test-annex.dat')
    # pollute config
    depvar = 'remote.target2.datalad-publish-depends'
    # TODO next line would require `add_sibling` to be called with force
    # see gh-1235
    #source.config.add(depvar, 'stupid', where='local')
    #eq_(source.config.get(depvar, None), 'stupid')

    # two remote sibling on two "different" hosts
    source.create_sibling(
        'ssh://localhost' + target1_path,
        annex_wanted='standard',
        annex_group='backup',
        name='target1')
    # fails with unknown remote
    res = source.create_sibling(
        'ssh://datalad-test' + target2_path,
        name='target2',
        existing='reconfigure',  # because 'target2' is known in polluted cfg
        publish_depends='bogus',
        on_failure='ignore')
    assert_result_count(
        res, 1,
        status='error',
        message=(
            'unknown sibling(s) specified as publication dependency: %s',
            set(['bogus'])))
    # for real
    source.create_sibling(
        'ssh://datalad-test' + target2_path,
        name='target2',
        existing='reconfigure',  # because 'target2' is known in polluted cfg
        annex_wanted='standard',
        annex_group='backup',
        publish_depends='target1')
    # wiped out previous dependencies
    eq_(source.config.get(depvar, None), 'target1')
    # and one more remote, on the same host but associated with a dependency
    source.create_sibling(
        'ssh://datalad-test' + target3_path,
        name='target3')
    ok_clean_git(src_path)
    # introduce change in source
    create_tree(src_path, {'probe1': 'probe1'})
    source.add('probe1')
    ok_clean_git(src_path)
    # only the source has the probe
    ok_file_has_content(opj(src_path, 'probe1'), 'probe1')
    for p in (target1_path, target2_path, target3_path):
        assert_false(lexists(opj(p, 'probe1')))
    # publish to a standalone remote
    source.publish(to='target3')
    ok_(lexists(opj(target3_path, 'probe1')))
    # but it has no data copied
    target3 = Dataset(target3_path)
    nok_(target3.repo.file_has_content('probe1'))

    # but if we publish specifying its path, it gets copied
    source.publish('probe1', to='target3')
    ok_file_has_content(opj(target3_path, 'probe1'), 'probe1')

    # no others are affected in either case
    for p in (target1_path, target2_path):
        assert_false(lexists(opj(p, 'probe1')))

    # publish to all remaining, but via a dependency
    source.publish(to='target2')
    for p in (target1_path, target2_path, target3_path):
        ok_file_has_content(opj(p, 'probe1'), 'probe1')
Exemplo n.º 37
0
def test_is_legal_metafield():
    for legal in ["legal", "0", "legal_"]:
        assert_true(au.is_legal_metafield(legal))
    for notlegal in ["_not", "with space"]:
        assert_false(au.is_legal_metafield(notlegal))
Exemplo n.º 38
0
def test_get_content_info_dotgit(path):
    ds = Dataset(path).create()
    # Files in .git/ won't be reported, though this takes a kludge on our side
    # before Git 2.25.
    assert_false(ds.repo.get_content_info(paths=[op.join(".git", "config")]))
Exemplo n.º 39
0
def check_replace_and_relative_sshpath(use_ssh, src_path, dst_path):
    # We need to come up with the path relative to our current home directory
    # https://github.com/datalad/datalad/issues/1653
    # but because we override HOME the HOME on the remote end would be
    # different even though a datalad-test. So we need to query it
    if use_ssh:
        from datalad import ssh_manager
        ssh = ssh_manager.get_connection('datalad-test')
        remote_home, err = ssh('pwd')
        remote_home = remote_home.rstrip('\n')
        dst_relpath = os.path.relpath(dst_path, remote_home)
        url = 'datalad-test:%s' % dst_relpath
        sibname = 'datalad-test'
    else:
        url = dst_path
        sibname = 'local'

    ds = Dataset(src_path).create()
    create_tree(ds.path, {'sub.dat': 'lots of data'})
    ds.save('sub.dat')
    try:
        res = ds.create_sibling(url, ui=have_webui())
    except UnicodeDecodeError:
        if sys.version_info < (3, 7):
            # observed test failing on ubuntu 18.04 with python 3.6
            # (reproduced in conda env locally with python 3.6.10 when LANG=C
            # We will just skip this tricky one
            raise SkipTest("Known failure")
        raise
    assert_in_results(res, action="create_sibling", sibling_name=sibname)
    published = ds.publish(to=sibname, transfer_data='all')
    assert_result_count(published, 1, path=opj(ds.path, 'sub.dat'))
    if have_webui():
        # verify that hook runs and there is nothing in stderr
        # since it exits with 0 exit even if there was a problem
        out = Runner(cwd=opj(dst_path, '.git')).run(
            [_path_('hooks/post-update')], protocol=StdOutErrCapture)
        assert_false(out['stdout'])
        assert_false(out['stderr'])

    # Verify that we could replace and publish no problem
    # https://github.com/datalad/datalad/issues/1656
    # Strangely it spits outs IncompleteResultsError exception atm... so just
    # checking that it fails somehow
    res = ds.create_sibling(url, on_failure='ignore')
    assert_status('error', res)
    assert_in('already configured', res[0]['message'][0])
    # "Settings" such as UI do not persist, so we specify it again
    # for the test below depending on it
    with assert_raises(RuntimeError):
        # but we cannot replace in non-interactive mode
        ds.create_sibling(url, existing='replace', ui=have_webui())

    # We don't have context manager like @with_testsui, so
    @with_testsui(responses=["yes"])
    def interactive_create_sibling():
        ds.create_sibling(url, existing='replace', ui=have_webui())

    interactive_create_sibling()

    published2 = ds.publish(to=sibname, transfer_data='all')
    assert_result_count(published2, 1, path=opj(ds.path, 'sub.dat'))

    # and one more test since in above test it would not puke ATM but just
    # not even try to copy since it assumes that file is already there
    create_tree(ds.path, {'sub2.dat': 'more data'})
    ds.save('sub2.dat')
    published3 = ds.publish(to=sibname,
                            transfer_data='none')  # we publish just git
    assert_result_count(published3, 0, path=opj(ds.path, 'sub2.dat'))

    if not have_webui():
        return

    # now publish "with" data, which should also trigger the hook!
    # https://github.com/datalad/datalad/issues/1658
    from glob import glob
    from datalad.consts import WEB_META_LOG
    logs_prior = glob(_path_(dst_path, WEB_META_LOG, '*'))
    published4 = ds.publish(to=sibname, transfer_data='all')
    assert_result_count(published4, 1, path=opj(ds.path, 'sub2.dat'))
    logs_post = glob(_path_(dst_path, WEB_META_LOG, '*'))
    eq_(len(logs_post), len(logs_prior) + 1)

    assert_postupdate_hooks(dst_path)
Exemplo n.º 40
0
def test_dedent():
    assert_false(dedent_docstring("one liner").endswith("\n"))
Exemplo n.º 41
0
def test_external_version_contains():
    ev = ExternalVersions()
    assert_true("datalad" in ev)
    assert_false("does not exist" in ev)
Exemplo n.º 42
0
def test_container_from_subdataset(ds_path, src_subds_path, local_file):

    # prepare a to-be subdataset with a registered container
    src_subds = Dataset(src_subds_path).create()
    src_subds.containers_add(name="first",
                             url=get_local_file_url(
                                 op.join(local_file, 'some_container.img')))
    # add it as subdataset to a super ds:
    ds = Dataset(ds_path).create()
    subds = ds.install("sub", source=src_subds_path)
    # add it again one level down to see actual recursion:
    subds.install("subsub", source=src_subds_path)

    # We come up empty without recursive:
    res = ds.containers_list(recursive=False, **RAW_KWDS)
    assert_result_count(res, 0)

    # query available containers from within super:
    res = ds.containers_list(recursive=True, **RAW_KWDS)
    assert_result_count(res, 2)
    assert_in_results(res, action="containers", refds=ds.path)

    # default location within the subdataset:
    target_path = op.join(subds.path, '.datalad', 'environments', 'first',
                          'image')
    assert_result_count(res,
                        1,
                        name='sub/first',
                        type='file',
                        action='containers',
                        status='ok',
                        path=target_path,
                        parentds=subds.path)

    # not installed subdataset doesn't pose an issue:
    sub2 = ds.create("sub2")
    assert_result_count(ds.subdatasets(), 2, type="dataset")
    ds.uninstall("sub2")
    from datalad.tests.utils import assert_false
    assert_false(sub2.is_installed())

    # same results as before, not crashing or somehow confused by a not present
    # subds:
    res = ds.containers_list(recursive=True, **RAW_KWDS)
    assert_result_count(res, 2)
    assert_result_count(res,
                        1,
                        name='sub/first',
                        type='file',
                        action='containers',
                        status='ok',
                        path=target_path,
                        parentds=subds.path)

    # The default renderer includes the image names.
    with swallow_outputs() as out:
        ds.containers_list(recursive=True)
        lines = out.out.splitlines()
    assert_re_in("sub/first", lines)
    assert_re_in("sub/subsub/first", lines)
    # But we are careful not to render partial names from subdataset traversals
    # (i.e. we recurse with containers_list(..., result_renderer=None)).
    with assert_raises(AssertionError):
        assert_re_in("subsub/first", lines)
Exemplo n.º 43
0
def test_something(path, new_home):
    # will refuse to work on dataset without a dataset
    assert_raises(ValueError, ConfigManager, source='dataset')
    # now read the example config
    cfg = ConfigManager(Dataset(opj(path, 'ds')), source='dataset')
    assert_equal(len(cfg), 5)
    assert_in('something.user', cfg)
    # multi-value
    assert_equal(len(cfg['something.user']), 2)
    assert_equal(cfg['something.user'],
                 ('name=Jane Doe', '[email protected]'))

    assert_true(cfg.has_section('something'))
    assert_false(cfg.has_section('somethingelse'))
    assert_equal(sorted(cfg.sections()),
                 [u'onemore.complicated の beast with.dot', 'something'])
    assert_true(cfg.has_option('something', 'user'))
    assert_false(cfg.has_option('something', 'us?er'))
    assert_false(cfg.has_option('some?thing', 'user'))
    assert_equal(sorted(cfg.options('something')),
                 ['empty', 'myint', 'novalue', 'user'])
    assert_equal(cfg.options(u'onemore.complicated の beast with.dot'),
                 ['findme'])

    assert_equal(sorted(cfg.items()),
                 [(u'onemore.complicated の beast with.dot.findme', '5.0'),
                  ('something.empty', ''), ('something.myint', '3'),
                  ('something.novalue', None),
                  ('something.user',
                   ('name=Jane Doe', '[email protected]'))])
    assert_equal(sorted(cfg.items('something')),
                 [('something.empty', ''), ('something.myint', '3'),
                  ('something.novalue', None),
                  ('something.user',
                   ('name=Jane Doe', '[email protected]'))])

    # always get all values
    assert_equal(cfg.get('something.user'),
                 ('name=Jane Doe', '[email protected]'))
    assert_raises(KeyError, cfg.__getitem__, 'somedthing.user')
    assert_equal(
        cfg.getfloat(u'onemore.complicated の beast with.dot', 'findme'), 5.0)
    assert_equal(cfg.getint('something', 'myint'), 3)
    assert_equal(cfg.getbool('something', 'myint'), True)
    # git demands a key without value at all to be used as a flag, thus True
    assert_equal(cfg.getbool('something', 'novalue'), True)
    assert_equal(cfg.get('something.novalue'), None)
    # empty value is False
    assert_equal(cfg.getbool('something', 'empty'), False)
    assert_equal(cfg.get('something.empty'), '')
    assert_equal(cfg.getbool('doesnot', 'exist', default=True), True)
    assert_raises(TypeError, cfg.getbool, 'something', 'user')

    # gitpython-style access
    assert_equal(cfg.get('something.myint'),
                 cfg.get_value('something', 'myint'))
    assert_equal(cfg.get_value('doesnot', 'exist', default='oohaaa'), 'oohaaa')
    # weired, but that is how it is
    assert_raises(KeyError, cfg.get_value, 'doesnot', 'exist', default=None)

    # modification follows
    cfg.add('something.new', 'の')
    assert_equal(cfg.get('something.new'), u'の')
    # sections are added on demand
    cfg.add('unheard.of', 'fame')
    assert_true(cfg.has_section('unheard.of'))
    comp = cfg.items('something')
    cfg.rename_section('something', 'this')
    assert_true(cfg.has_section('this'))
    assert_false(cfg.has_section('something'))
    # direct comparision would fail, because of section prefix
    assert_equal(len(cfg.items('this')), len(comp))
    # fail if no such section
    with swallow_logs():
        assert_raises(CommandError, cfg.rename_section, 'nothere',
                      'irrelevant')
    assert_true(cfg.has_option('this', 'myint'))
    cfg.unset('this.myint')
    assert_false(cfg.has_option('this', 'myint'))

    # batch a changes
    cfg.add('mike.wants.to', 'know', reload=False)
    assert_false('mike.wants.to' in cfg)
    cfg.add('mike.wants.to', 'eat')
    assert_true('mike.wants.to' in cfg)
    assert_equal(len(cfg['mike.wants.to']), 2)

    # set a new one:
    cfg.set('mike.should.have', 'known')
    assert_in('mike.should.have', cfg)
    assert_equal(cfg['mike.should.have'], 'known')
    # set an existing one:
    cfg.set('mike.should.have', 'known better')
    assert_equal(cfg['mike.should.have'], 'known better')
    # set, while there are several matching ones already:
    cfg.add('mike.should.have', 'a meal')
    assert_equal(len(cfg['mike.should.have']), 2)
    # raises with force=False
    assert_raises(CommandError,
                  cfg.set,
                  'mike.should.have',
                  'a beer',
                  force=False)
    assert_equal(len(cfg['mike.should.have']), 2)
    # replaces all matching ones with force=True
    cfg.set('mike.should.have', 'a beer', force=True)
    assert_equal(cfg['mike.should.have'], 'a beer')

    # fails unknown location
    assert_raises(ValueError, cfg.add, 'somesuch', 'shit', where='umpalumpa')

    # very carefully test non-local config
    # so carefully that even in case of bad weather Yarik doesn't find some
    # lame datalad unittest sections in his precious ~/.gitconfig
    with patch.dict('os.environ', {
            'HOME': new_home,
            'DATALAD_SNEAKY_ADDITION': 'ignore'
    }):
        global_gitconfig = opj(new_home, '.gitconfig')
        assert (not exists(global_gitconfig))
        globalcfg = ConfigManager()
        assert_not_in('datalad.unittest.youcan', globalcfg)
        assert_in('datalad.sneaky.addition', globalcfg)
        cfg.add('datalad.unittest.youcan', 'removeme', where='global')
        assert (exists(global_gitconfig))
        # it did not go into the dataset's config!
        assert_not_in('datalad.unittest.youcan', cfg)
        # does not monitor additions!
        globalcfg.reload(force=True)
        assert_in('datalad.unittest.youcan', globalcfg)
        with swallow_logs():
            assert_raises(CommandError,
                          globalcfg.unset,
                          'datalad.unittest.youcan',
                          where='local')
        assert (globalcfg.has_section('datalad.unittest'))
        globalcfg.unset('datalad.unittest.youcan', where='global')
        # but after we unset the only value -- that section is no longer listed
        assert (not globalcfg.has_section('datalad.unittest'))
        assert_not_in('datalad.unittest.youcan', globalcfg)
        if external_versions['cmd:git'] < '2.18':
            # older versions leave empty section behind in the file
            ok_file_has_content(global_gitconfig,
                                '[datalad "unittest"]',
                                strip=True)
            # remove_section to clean it up entirely
            globalcfg.remove_section('datalad.unittest', where='global')
        ok_file_has_content(global_gitconfig, "")

    cfg = ConfigManager(Dataset(opj(path, 'ds')),
                        source='dataset',
                        overrides={'datalad.godgiven': True})
    assert_equal(cfg.get('datalad.godgiven'), True)
    # setter has no effect
    cfg.set('datalad.godgiven', 'false')
    assert_equal(cfg.get('datalad.godgiven'), True)
Exemplo n.º 44
0
def test_rerun(path, nodspath):
    ds = Dataset(path).create()
    sub = ds.create('sub')
    probe_path = op.join(sub.path, 'sequence')
    # run inside the dataset
    with chpwd(path), \
            swallow_outputs():
        ds.run('echo x$(cat sub/sequence) > sub/sequence')
    # command ran once, all clean
    assert_repo_status(ds.path)
    eq_('x\n', open(probe_path).read())
    # now, for a rerun we can be anywhere, PWD and all are recorded
    # moreover, rerun must figure out which bits to unlock, even in
    # subdatasets
    with chpwd(nodspath), \
            swallow_outputs():
        ds.rerun()
    assert_repo_status(ds.path)
    # ran twice now
    eq_('xx\n', open(probe_path).read())

    # Rerunning from a subdataset skips the command.
    _, sub_info = get_run_info(ds, sub.repo.format_commit("%B"))
    eq_(ds.id, sub_info["dsid"])
    assert_result_count(sub.rerun(return_type="list", on_failure="ignore"),
                        1,
                        status="impossible",
                        action="run",
                        rerun_action="skip")
    eq_('xx\n', open(probe_path).read())

    # Rerun fails with a dirty repo.
    dirt = op.join(path, "dirt")
    with open(dirt, "w") as fh:
        fh.write("")
    assert_status('impossible', ds.rerun(on_failure="ignore"))
    remove(dirt)
    assert_repo_status(ds.path)

    # Make a non-run commit.
    with open(op.join(path, "nonrun-file"), "w") as f:
        f.write("foo")
    ds.save("nonrun-file")
    # Now rerun the buried command.
    ds.rerun(revision="HEAD~", message="rerun buried")
    eq_('xxx\n', open(probe_path).read())
    # Also check that the messasge override worked.
    eq_(
        ds.repo.format_commit("%B").splitlines()[0],
        "[DATALAD RUNCMD] rerun buried")
    # Or a range of commits, skipping non-run commits.
    ds.rerun(since="HEAD~3")
    eq_('xxxxx\n', open(probe_path).read())
    # Or --since= to run all reachable commits.
    ds.rerun(since="")
    eq_('xxxxxxxxxx\n', open(probe_path).read())

    # We can get back a report of what would happen rather than actually
    # rerunning anything.
    report = ds.rerun(since="", report=True, return_type="list")
    # The "diff" section of the report doesn't include the unchanged files that
    # would come in "-f json diff" output.
    for entry in report:
        if entry["rerun_action"] == "run":
            # None of the run commits touch .datalad/config or any other config
            # file.
            assert_false(
                any(r["path"].endswith("config") for r in entry["diff"]))

    # Nothing changed.
    eq_('xxxxxxxxxx\n', open(probe_path).read())
    assert_result_count(report, 1, rerun_action="skip-or-pick")
    report[-1]["commit"] == ds.repo.get_hexsha()

    # If a file is dropped, we remove it instead of unlocking it.
    ds.drop(probe_path, check=False)
    with swallow_outputs():
        ds.rerun()

    eq_('x\n', open(probe_path).read())
Exemplo n.º 45
0
 def func3(x):
     assert_false(ui.is_interactive)
     return x * 3
Exemplo n.º 46
0
def test_publish_depends(
        origin,
        src_path,
        target1_path,
        target2_path,
        target3_path):
    # prepare src
    source = install(src_path, source=origin, recursive=True)
    source.repo.get('test-annex.dat')
    # pollute config
    depvar = 'remote.target2.datalad-publish-depends'
    source.config.add(depvar, 'stupid', where='local')
    eq_(source.config.get(depvar, None), 'stupid')

    # two remote sibling on two "different" hosts
    source.create_sibling(
        'ssh://localhost' + target1_path,
        annex_wanted='standard',
        annex_group='backup',
        name='target1')
    # fails with unknown remote
    res = source.create_sibling(
        'ssh://datalad-test' + target2_path,
        name='target2',
        existing='reconfigure',  # because 'target2' is known in polluted cfg
        publish_depends='bogus',
        on_failure='ignore')
    assert_result_count(
        res, 1,
        status='error',
        message=(
            'unknown sibling(s) specified as publication dependency: %s',
            set(['bogus'])))
    # for real
    source.create_sibling(
        'ssh://datalad-test' + target2_path,
        name='target2',
        existing='reconfigure',  # because 'target2' is known in polluted cfg
        annex_wanted='standard',
        annex_group='backup',
        publish_depends='target1')
    # wiped out previous dependencies
    eq_(source.config.get(depvar, None), 'target1')
    # and one more remote, on the same host but associated with a dependency
    source.create_sibling(
        'ssh://datalad-test' + target3_path,
        name='target3')
    ok_clean_git(src_path)
    # introduce change in source
    create_tree(src_path, {'probe1': 'probe1'})
    source.add('probe1')
    ok_clean_git(src_path)
    # only the source has the probe
    ok_file_has_content(opj(src_path, 'probe1'), 'probe1')
    for p in (target1_path, target2_path, target3_path):
        assert_false(lexists(opj(p, 'probe1')))
    # publish to a standalone remote
    source.publish(to='target3')
    ok_(lexists(opj(target3_path, 'probe1')))
    # but it has no data copied
    target3 = Dataset(target3_path)
    nok_(target3.repo.file_has_content('probe1'))

    # but if we publish specifying its path, it gets copied
    source.publish('probe1', to='target3')
    ok_file_has_content(opj(target3_path, 'probe1'), 'probe1')

    # no others are affected in either case
    for p in (target1_path, target2_path):
        assert_false(lexists(opj(p, 'probe1')))

    # publish to all remaining, but via a dependency
    source.publish(to='target2')
    for p in (target1_path, target2_path, target3_path):
        ok_file_has_content(opj(p, 'probe1'), 'probe1')
Exemplo n.º 47
0
def test_check_dates_empty_repo(path):
    assert_false(check_dates(GitRepo(path, create=True))["objects"])
Exemplo n.º 48
0
def test_subdatasets(path):
    # from scratch
    ds = Dataset(path)
    assert_false(ds.is_installed())
    assert_raises(ValueError, ds.subdatasets)
    ds = ds.create()
    assert_true(ds.is_installed())
    eq_(ds.subdatasets(), [])
    # create some file and commit it
    open(os.path.join(ds.path, 'test'), 'w').write('some')
    ds.save(path='test', message="Hello!", version_tag=1)
    assert_true(ds.is_installed())
    # Assuming that tmp location was not under a super-dataset
    eq_(ds.get_superdataset(), None)
    eq_(ds.get_superdataset(topmost=True), ds)

    # add itself as a subdataset (crazy, isn't it?)
    subds = ds.install('subds',
                       source=path,
                       result_xfm='datasets',
                       return_type='item-or-list')
    assert_true(subds.is_installed())
    eq_(subds.get_superdataset(), ds)
    eq_(subds.get_superdataset(topmost=True), ds)

    subdss = ds.subdatasets()
    eq_(len(subdss), 1)
    eq_(subds.path, ds.subdatasets(result_xfm='paths')[0])
    eq_(subdss, ds.subdatasets(recursive=True))
    eq_(subdss, ds.subdatasets(fulfilled=True))
    ds.save(message="with subds", version_tag=2)
    ds.recall_state(1)
    assert_true(ds.is_installed())
    eq_(ds.subdatasets(), [])

    # very nested subdataset to test topmost
    subsubds = subds.install(_path_('d1/subds'),
                             source=path,
                             result_xfm='datasets',
                             return_type='item-or-list')
    assert_true(subsubds.is_installed())
    eq_(subsubds.get_superdataset(), subds)
    # by default, it will only report a subperdataset that actually
    # has the queries dataset as a registered true subdataset
    eq_(subsubds.get_superdataset(topmost=True), subds)
    # by we can also ask for a dataset that is merely above
    eq_(subsubds.get_superdataset(topmost=True, registered_only=False), ds)

    # verify that '^' alias would work
    with chpwd(subsubds.path):
        dstop = Dataset('^')
        eq_(dstop, subds)
        # and while in the dataset we still can resolve into central one
        dscentral = Dataset('///')
        eq_(dscentral.path, dl_cfg.obtain('datalad.locations.default-dataset'))

    with chpwd(ds.path):
        dstop = Dataset('^')
        eq_(dstop, ds)

    # TODO actual submodule checkout is still there

    # Test ^. (the dataset for curdir) shortcut
    # At the top should point to the top
    with chpwd(ds.path):
        dstop = Dataset('^.')
        eq_(dstop, ds)

    # and still does within subdir
    os.mkdir(opj(ds.path, 'subdir'))
    with chpwd(opj(ds.path, 'subdir')):
        dstop = Dataset('^.')
        eq_(dstop, ds)

    # within submodule will point to submodule
    with chpwd(subsubds.path):
        dstop = Dataset('^.')
        eq_(dstop, subsubds)
Exemplo n.º 49
0
def test_get_content_info_paths_empty_list(path):
    ds = Dataset(path).create()

    # Unlike None, passing any empty list as paths to get_content_info() does
    # not report on all content.
    assert_false(ds.repo.get_content_info(paths=[]))
    assert_false(ds.repo.get_content_info(paths=[], ref="HEAD"))

    # Add annex content to make sure its not reported.
    (ds.pathobj / "foo").write_text("foo")
    ds.save()

    # Same for get_content_annexinfo()...
    assert_false(ds.repo.get_content_annexinfo(paths=[]))
    assert_false(ds.repo.get_content_annexinfo(paths=[], init=None))
    assert_false(ds.repo.get_content_annexinfo(paths=[], ref="HEAD"))
    assert_false(ds.repo.get_content_annexinfo(paths=[], ref="HEAD",
                                               init=None))
    # ... where whatever was passed for init will be returned as is.
    assert_equal(
        ds.repo.get_content_annexinfo(paths=[],
                                      ref="HEAD",
                                      init={"random": {
                                          "entry": "a"
                                      }}), {"random": {
                                          "entry": "a"
                                      }})
Exemplo n.º 50
0
def test_ls_json(topdir, topurl):
    annex = AnnexRepo(topdir, create=True)
    ds = Dataset(topdir)
    # create some file and commit it
    with open(opj(ds.path, 'subdsfile.txt'), 'w') as f:
        f.write('123')
    ds.add(path='subdsfile.txt')
    ds.save("Hello!", version_tag=1)

    # add a subdataset
    ds.install('subds', source=topdir)

    subdirds = ds.create(_path_('dir/subds2'), force=True)
    subdirds.add('file')

    git = GitRepo(opj(topdir, 'dir', 'subgit'), create=True)  # create git repo
    git.add(opj(topdir, 'dir', 'subgit',
                'fgit.txt'))  # commit to git to init git repo
    git.commit()
    annex.add(opj(topdir, 'dir',
                  'subgit'))  # add the non-dataset git repo to annex
    annex.add(opj(topdir, 'dir'))  # add to annex (links)
    annex.drop(opj(topdir, 'dir', 'subdir', 'file2.txt'),
               options=['--force'])  # broken-link
    annex.commit()

    git.add('fgit.txt')  # commit to git to init git repo
    git.commit()
    # annex.add doesn't add submodule, so using ds.add
    ds.add(opj('dir', 'subgit'))  # add the non-dataset git repo to annex
    ds.add('dir')  # add to annex (links)
    ds.drop(opj('dir', 'subdir', 'file2.txt'), check=False)  # broken-link

    # register "external" submodule  by installing and uninstalling it
    ext_url = topurl + '/dir/subgit/.git'
    # need to make it installable via http
    Runner()('git update-server-info', cwd=opj(topdir, 'dir', 'subgit'))
    ds.install(opj('dir', 'subgit_ext'), source=ext_url)
    ds.uninstall(opj('dir', 'subgit_ext'))
    meta_dir = opj('.git', 'datalad', 'metadata')

    def get_metahash(*path):
        if not path:
            path = ['/']
        return hashlib.md5(opj(*path).encode('utf-8')).hexdigest()

    def get_metapath(dspath, *path):
        return _path_(dspath, meta_dir, get_metahash(*path))

    def get_meta(dspath, *path):
        with open(get_metapath(dspath, *path)) as f:
            return js.load(f)

    # Let's see that there is no crash if one of the files is available only
    # in relaxed URL mode, so no size could be picked up
    ds.repo.add_url_to_file('fromweb',
                            topurl + '/noteventhere',
                            options=['--relaxed'])

    for all_ in [True, False]:  # recurse directories
        for recursive in [True, False]:
            for state in ['file', 'delete']:
                # subdataset should have its json created and deleted when
                # all=True else not
                subds_metapath = get_metapath(opj(topdir, 'subds'))
                exists_prior = exists(subds_metapath)

                #with swallow_logs(), swallow_outputs():
                dsj = _ls_json(topdir,
                               json=state,
                               all_=all_,
                               recursive=recursive)
                ok_startswith(dsj['tags'], '1-')

                exists_post = exists(subds_metapath)
                # print("%s %s -> %s" % (state, exists_prior, exists_post))
                assert_equal(exists_post, (state == 'file' and recursive))

                # root should have its json file created and deleted in all cases
                ds_metapath = get_metapath(topdir)
                assert_equal(exists(ds_metapath), state == 'file')

                # children should have their metadata json's created and deleted only when recursive=True
                child_metapath = get_metapath(topdir, 'dir', 'subdir')
                assert_equal(exists(child_metapath),
                             (state == 'file' and all_))

                # ignored directories should not have json files created in any case
                for subdir in [('.hidden', ), ('dir', 'subgit')]:
                    assert_false(exists(get_metapath(topdir, *subdir)))

                # check if its updated in its nodes sublist too. used by web-ui json. regression test
                assert_equal(dsj['nodes'][0]['size']['total'],
                             dsj['size']['total'])

                # check size of subdataset
                subds = [
                    item for item in dsj['nodes']
                    if item['name'] == ('subdsfile.txt' or 'subds')
                ][0]
                assert_equal(subds['size']['total'], '3 Bytes')

                # dir/subds2 must not be listed among nodes of the top dataset:
                topds_nodes = {x['name']: x for x in dsj['nodes']}

                assert_in('subds', topds_nodes)
                # XXX
                # # condition here is a bit a guesswork by yoh later on
                # # TODO: here and below clear destiny/interaction of all_ and recursive
                # assert_equal(dsj['size']['total'],
                #              '15 Bytes' if (recursive and all_) else
                #              ('9 Bytes' if (recursive or all_) else '3 Bytes')
                # )

                # https://github.com/datalad/datalad/issues/1674
                if state == 'file' and all_:
                    dirj = get_meta(topdir, 'dir')
                    dir_nodes = {x['name']: x for x in dirj['nodes']}
                    # it should be present in the subdir meta
                    assert_in('subds2', dir_nodes)
                    assert_not_in('url_external', dir_nodes['subds2'])
                    assert_in('subgit_ext', dir_nodes)
                    assert_equal(dir_nodes['subgit_ext']['url'], ext_url)
                # and not in topds
                assert_not_in('subds2', topds_nodes)

                # run non-recursive dataset traversal after subdataset metadata already created
                # to verify sub-dataset metadata being picked up from its metadata file in such cases
                if state == 'file' and recursive and not all_:
                    dsj = _ls_json(topdir, json='file', all_=False)
                    subds = [
                        item for item in dsj['nodes']
                        if item['name'] == ('subdsfile.txt' or 'subds')
                    ][0]
                    assert_equal(subds['size']['total'], '3 Bytes')

                assert_equal(topds_nodes['fromweb']['size']['total'],
                             UNKNOWN_SIZE)
Exemplo n.º 51
0
def test_fmt_to_name():
    eq_(au.fmt_to_name("{name}", {}), "name")
    eq_(au.fmt_to_name("{0}", {0: "name"}), "name")
    eq_(au.fmt_to_name("{1}", {0: "name"}), "1")

    assert_false(au.fmt_to_name("frontmatter{name}", {}))
    assert_false(au.fmt_to_name("{name}backmatter", {}))
    assert_false(au.fmt_to_name("{two}{names}", {}))
    assert_false(au.fmt_to_name("", {}))
    assert_false(au.fmt_to_name("nonames", {}))
    assert_false(au.fmt_to_name("{}", {}))
Exemplo n.º 52
0
def test_run_inputs_outputs(src, path):
    for subds in [("s0", "s1_0", "s2"), ("s0", "s1_1", "s2"), ("s0", "s1_0"),
                  ("s0", "s1_1"), ("s0", "ss"), ("s0", )]:
        Dataset(op.join(*((src, ) + subds))).create(force=True)
    src_ds = Dataset(src).create(force=True)
    src_ds.add(".", recursive=True)

    ds = install(path,
                 source=src,
                 result_xfm='datasets',
                 return_type='item-or-list')
    assert_false(ds.repo.file_has_content("input.dat"))
    assert_false(ds.repo.file_has_content("extra-input.dat"))

    # The specified inputs and extra inputs will be retrieved before the run.
    # (Use run_command() to access the extra_inputs argument.)
    list(
        run_command("cat {inputs} {inputs} >doubled.dat",
                    dataset=ds,
                    inputs=["input.dat"],
                    extra_inputs=["extra-input.dat"]))

    ok_clean_git(ds.path)
    ok_(ds.repo.file_has_content("input.dat"))
    ok_(ds.repo.file_has_content("extra-input.dat"))
    ok_(ds.repo.file_has_content("doubled.dat"))
    with open(opj(path, "doubled.dat")) as fh:
        content = fh.read()
        assert_in("input", content)
        assert_not_in("extra-input", content)

    # Rerunning the commit will also get the input file.
    ds.repo.drop(["input.dat", "extra-input.dat"], options=["--force"])
    assert_false(ds.repo.file_has_content("input.dat"))
    assert_false(ds.repo.file_has_content("extra-input.dat"))
    ds.rerun()
    ok_(ds.repo.file_has_content("input.dat"))
    ok_(ds.repo.file_has_content("extra-input.dat"))

    with swallow_logs(new_level=logging.WARN) as cml:
        ds.run("touch dummy", inputs=["not-there"])
        assert_in("Input does not exist: ", cml.out)

    # Test different combinations of globs and explicit files.
    inputs = ["a.dat", "b.dat", "c.txt", "d.txt"]
    create_tree(ds.path, {i: i for i in inputs})

    ds.add(".")
    ds.repo.copy_to(inputs, remote="origin")
    ds.repo.drop(inputs, options=["--force"])

    test_cases = [(["*.dat"], ["a.dat", "b.dat"]),
                  (["*.dat", "c.txt"], ["a.dat", "b.dat", "c.txt"]),
                  (["*"], inputs)]

    for idx, (inputs_arg, expected_present) in enumerate(test_cases):
        assert_false(any(ds.repo.file_has_content(i) for i in inputs))

        ds.run("touch dummy{}".format(idx), inputs=inputs_arg)
        ok_(all(ds.repo.file_has_content(f) for f in expected_present))
        # Globs are stored unexpanded by default.
        assert_in(inputs_arg[0], ds.repo.format_commit("%B"))
        ds.repo.drop(inputs, options=["--force"])

    # --input can be passed a subdirectory.
    create_tree(ds.path, {"subdir": {"a": "subdir a", "b": "subdir b"}})
    ds.add("subdir")
    ds.repo.copy_to(["subdir/a", "subdir/b"], remote="origin")
    ds.repo.drop("subdir", options=["--force"])
    ds.run("touch subdir-dummy", inputs=[opj(ds.path, "subdir")])
    ok_(all(ds.repo.file_has_content(opj("subdir", f)) for f in ["a", "b"]))

    # Inputs are specified relative to a dataset's subdirectory.
    ds.repo.drop(opj("subdir", "a"), options=["--force"])
    with chpwd(opj(path, "subdir")):
        run("touch subdir-dummy1", inputs=["a"])
    ok_(ds.repo.file_has_content(opj("subdir", "a")))

    # --input=. runs "datalad get ."
    ds.run("touch dot-dummy", inputs=["."])
    eq_(ds.repo.get_annexed_files(),
        ds.repo.get_annexed_files(with_content_only=True))
    # On rerun, we get all files, even those that weren't in the tree at the
    # time of the run.
    create_tree(ds.path, {"after-dot-run": "after-dot-run content"})
    ds.add(".")
    ds.repo.copy_to(["after-dot-run"], remote="origin")
    ds.repo.drop(["after-dot-run"], options=["--force"])
    ds.rerun("HEAD^")
    ds.repo.file_has_content("after-dot-run")

    # --output will unlock files that are present.
    ds.repo.get("a.dat")
    ds.run("echo ' appended' >>a.dat", outputs=["a.dat"])
    with open(opj(path, "a.dat")) as fh:
        eq_(fh.read(), "a.dat appended\n")

    # --output will remove files that are not present.
    ds.repo.drop(["a.dat", "d.txt"], options=["--force"])
    ds.run("echo ' appended' >>a.dat", outputs=["a.dat"])
    with open(opj(path, "a.dat")) as fh:
        eq_(fh.read(), " appended\n")

    # --input can be combined with --output.
    ds.repo.repo.git.reset("--hard", "HEAD~2")
    ds.run("echo ' appended' >>a.dat", inputs=["a.dat"], outputs=["a.dat"])
    with open(opj(path, "a.dat")) as fh:
        eq_(fh.read(), "a.dat appended\n")

    with swallow_logs(new_level=logging.DEBUG) as cml:
        with swallow_outputs():
            ds.run("echo blah", outputs=["not-there"])
        assert_in("Filtered out non-existing path: ", cml.out)

    ds.create('sub')
    ds.run("echo sub_orig >sub/subfile")
    ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"])
    ds.drop("sub/subfile", check=False)
    ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"])

    # --input/--output globs can be stored in expanded form.
    ds.run("touch expand-dummy",
           inputs=["a.*"],
           outputs=["b.*"],
           expand="both")
    assert_in("a.dat", ds.repo.format_commit("%B"))
    assert_in("b.dat", ds.repo.format_commit("%B"))

    res = ds.rerun(report=True, return_type='item-or-list')
    eq_(res["run_info"]['inputs'], ["a.dat"])
    eq_(res["run_info"]['outputs'], ["b.dat"])

    # We install subdatasets to fully resolve globs.
    ds.uninstall("s0")
    assert_false(Dataset(op.join(path, "s0")).is_installed())
    ds.run("echo {inputs} >globbed-subds", inputs=["s0/s1_*/s2/*.dat"])
    ok_file_has_content(op.join(ds.path, "globbed-subds"),
                        "s0/s1_0/s2/a.dat s0/s1_1/s2/c.dat",
                        strip=True)

    ds_ss = Dataset(op.join(path, "s0", "ss"))
    assert_false(ds_ss.is_installed())
    ds.run("echo blah >{outputs}", outputs=["s0/ss/out"])
    ok_(ds_ss.is_installed())
    ok_file_has_content(op.join(ds.path, "s0", "ss", "out"),
                        "blah",
                        strip=True)
Exemplo n.º 53
0
def test_update_volatile_subds(originpath, otherpath, destpath):
    origin = Dataset(originpath).create()
    ds = install(source=originpath,
                 path=destpath,
                 result_xfm='datasets',
                 return_type='item-or-list')
    # as a submodule
    sname = 'subm 1'
    osm1 = origin.create(sname)
    assert_result_count(ds.update(), 1, status='ok', type='dataset')
    # nothing without a merge, no inappropriate magic
    assert_not_in(sname, ds.subdatasets(result_xfm='relpaths'))
    assert_result_count(ds.update(merge=True),
                        1,
                        action='update',
                        status='ok',
                        type='dataset')
    # and we should be able to do update with recursive invocation
    assert_result_count(ds.update(merge=True, recursive=True),
                        1,
                        action='update',
                        status='ok',
                        type='dataset')
    # known, and placeholder exists
    assert_in(sname, ds.subdatasets(result_xfm='relpaths'))
    ok_(exists(opj(ds.path, sname)))

    # remove from origin
    origin.remove(sname)
    assert_result_count(ds.update(merge=True),
                        1,
                        action='update',
                        status='ok',
                        type='dataset')
    # gone locally, wasn't checked out
    assert_not_in(sname, ds.subdatasets(result_xfm='relpaths'))
    assert_false(exists(opj(ds.path, sname)))

    # re-introduce at origin
    osm1 = origin.create(sname)
    create_tree(osm1.path, {'load.dat': 'heavy'})
    origin.save(opj(osm1.path, 'load.dat'))
    assert_result_count(ds.update(merge=True),
                        1,
                        action='update',
                        status='ok',
                        type='dataset')
    # grab new content of uninstall subdataset, right away
    ds.get(opj(ds.path, sname, 'load.dat'))
    ok_file_has_content(opj(ds.path, sname, 'load.dat'), 'heavy')

    # modify ds and subds at origin
    create_tree(origin.path, {'mike': 'this', sname: {'probe': 'little'}})
    origin.save(recursive=True)
    assert_repo_status(origin.path)

    # updates for both datasets should come down the pipe
    assert_result_count(ds.update(merge=True, recursive=True),
                        2,
                        action='update',
                        status='ok',
                        type='dataset')
    assert_repo_status(ds.path)

    # now remove just-installed subdataset from origin again
    origin.remove(sname, check=False)
    assert_not_in(sname, origin.subdatasets(result_xfm='relpaths'))
    assert_in(sname, ds.subdatasets(result_xfm='relpaths'))
    # merge should disconnect the installed subdataset, but leave the actual
    # ex-subdataset alone
    assert_result_count(ds.update(merge=True, recursive=True),
                        1,
                        action='update',
                        type='dataset')
    assert_not_in(sname, ds.subdatasets(result_xfm='relpaths'))
    ok_file_has_content(opj(ds.path, sname, 'load.dat'), 'heavy')
    ok_(Dataset(opj(ds.path, sname)).is_installed())

    # now remove the now disconnected subdataset for further tests
    # not using a bound method, not giving a parentds, should
    # not be needed to get a clean dataset
    remove(op.join(ds.path, sname), check=False)
    assert_repo_status(ds.path)

    # new separate subdataset, not within the origin dataset
    otherds = Dataset(otherpath).create()
    # install separate dataset as a submodule
    ds.install(source=otherds.path, path='other')
    create_tree(otherds.path, {'brand': 'new'})
    otherds.save()
    assert_repo_status(otherds.path)
    # pull in changes
    res = ds.update(merge=True, recursive=True)
    assert_result_count(res, 2, status='ok', action='update', type='dataset')
    # the next is to check for #2858
    assert_repo_status(ds.path)
def test_get_cached_dataset(cache_dir):

    # patch DATALAD_TESTS_CACHE to not use the actual cache with
    # the test testing that very cache.
    cache_dir = Path(cache_dir)

    # store file-based values for testrepo-minimalds for readability:
    annexed_file = opj('inannex', 'animated.gif')
    annexed_file_key = "MD5E-s144625--4c458c62b7ac8ec8e19c8ff14b2e34ad.gif"

    with patch(CACHE_PATCH_STR, new=cache_dir):

        # tuples to test (url, version, keys, class):
        test_cases = [

            # a simple testrepo
            ("https://github.com/datalad/testrepo--minimalds",
             "541cf855d13c2a338ff2803d4488daf0035e568f", None, AnnexRepo),
            # Same repo, but request paths to be present. This should work
            # with a subsequent call, although the first one did not already
            # request any:
            ("https://github.com/datalad/testrepo--minimalds",
             "9dd8b56cc706ab56185f2ceb75fbe9de9b606724", annexed_file_key,
             AnnexRepo),
            # Same repo again, but invalid version
            (
                "https://github.com/datalad/testrepo--minimalds",
                "nonexistent",
                "irrelevantkey",  # invalid version; don't even try to get the key
                AnnexRepo),
            # same thing with different name should be treated as a new thing:
            ("https://github.com/datalad/testrepo--minimalds", "git-annex",
             None, AnnexRepo),
            # try a plain git repo to make sure we can deal with that:
            # Note, that we first need a test case w/o a `key` parameter to not
            # blow up the test when Clone is patched, resulting in a MagicMock
            # instead of a Dataset instance within get_cached_dataset. In the
            # second case it's already cached then, so the patched Clone is
            # never executed.
            ("https://github.com/datalad/datalad.org", None, None, GitRepo),
            (
                "https://github.com/datalad/datalad.org",
                "gh-pages",
                "ignored-key",  # it's a git repo; don't even try to get a key
                GitRepo),
        ]
        for url, version, keys, cls in test_cases:
            target = cache_dir / url2filename(url)

            # assuming it doesn't exist yet - patched cache dir!
            in_cache_before = target.exists()
            with patch(CLONE_PATCH_STR) as exec_clone:
                try:
                    ds = get_cached_dataset(url, version, keys)
                    invalid_version = False
                except AssertionError:
                    # should happen only if `version` wasn't found. Implies
                    # that the dataset exists in cache (although not returned
                    # due to exception)
                    assert_true(version)
                    assert_false(Dataset(target).repo.commit_exists(version))
                    # mark for later assertions (most of them should still hold
                    # true)
                    invalid_version = True

            assert_equal(exec_clone.call_count, 0 if in_cache_before else 1)

            # Patch prevents actual execution. Now do it for real. Note, that
            # this might be necessary for content retrieval even if dataset was
            # in cache before.
            try:
                ds = get_cached_dataset(url, version, keys)
            except AssertionError:
                # see previous call
                assert_true(invalid_version)

            assert_is_instance(ds, Dataset)
            assert_true(ds.is_installed())
            assert_equal(target, ds.pathobj)
            assert_is_instance(ds.repo, cls)

            if keys and not invalid_version and \
                    AnnexRepo.is_valid_repo(ds.path):
                # Note: it's not supposed to get that content if passed
                # `version` wasn't available. get_cached_dataset would then
                # raise before and not download anything only to raise
                # afterwards.
                here = ds.config.get("annex.uuid")
                where = ds.repo.whereis(ensure_list(keys), key=True)
                assert_true(all(here in remotes for remotes in where))

            # version check. Note, that all `get_cached_dataset` is supposed to
            # do, is verifying, that specified version exists - NOT check it
            # out"
            if version and not invalid_version:
                assert_true(ds.repo.commit_exists(version))

            # re-execution
            with patch(CLONE_PATCH_STR) as exec_clone:
                try:
                    ds2 = get_cached_dataset(url, version, keys)
                except AssertionError:
                    assert_true(invalid_version)
            exec_clone.assert_not_called()
            # returns the same Dataset as before:
            assert_is(ds, ds2)
Exemplo n.º 55
0
def test_fmt_to_name():
    eq_(au.fmt_to_name("{name}", {}), "name")
    eq_(au.fmt_to_name("{0}", {0: "name"}), "name")
    eq_(au.fmt_to_name("{1}", {0: "name"}), "1")

    assert_false(au.fmt_to_name("frontmatter{name}", {}))
    assert_false(au.fmt_to_name("{name}backmatter", {}))
    assert_false(au.fmt_to_name("{two}{names}", {}))
    assert_false(au.fmt_to_name("", {}))
    assert_false(au.fmt_to_name("nonames", {}))
    assert_false(au.fmt_to_name("{}", {}))
Exemplo n.º 56
0
def test_run_subdataset_install(path):
    path = Path(path)
    ds_src = Dataset(path / "src").create()
    # Repository setup
    #
    # .
    # |-- a/
    # |   |-- a2/
    # |   |   `-- img
    # |   `-- img
    # |-- b/               / module name: b-name /
    # |   `-- b2/
    # |       `-- img
    # |-- c/
    # |   `-- c2/
    # |       `-- img
    # `-- d/               / module name: d-name /
    #     `-- d2/
    #         `-- img
    ds_src_a = ds_src.create("a")
    ds_src_a2 = ds_src_a.create("a2")
    ds_src_b = Dataset(ds_src.pathobj / "b").create()
    ds_src_b2 = ds_src_b.create("b2")
    ds_src_c = ds_src.create("c")
    ds_src_c2 = ds_src_c.create("c2")
    ds_src_d = Dataset(ds_src.pathobj / "d").create()
    ds_src_d2 = ds_src_d.create("d2")

    ds_src.repo.add_submodule("b", name="b-name")
    ds_src.repo.add_submodule("d", name="d-name")
    ds_src.save()

    add_pyscript_image(ds_src_a, "in-a", "img")
    add_pyscript_image(ds_src_a2, "in-a2", "img")
    add_pyscript_image(ds_src_b2, "in-b2", "img")
    add_pyscript_image(ds_src_c2, "in-c2", "img")
    add_pyscript_image(ds_src_d2, "in-d2", "img")

    ds_src.save(recursive=True)

    ds_dest = clone(ds_src.path, str(path / "dest"))
    ds_dest_a2 = Dataset(ds_dest.pathobj / "a" / "a2")
    ds_dest_b2 = Dataset(ds_dest.pathobj / "b" / "b2")
    ds_dest_c2 = Dataset(ds_dest.pathobj / "c" / "c2")
    ds_dest_d2 = Dataset(ds_dest.pathobj / "d" / "d2")
    assert_false(ds_dest_a2.is_installed())
    assert_false(ds_dest_b2.is_installed())
    assert_false(ds_dest_c2.is_installed())
    assert_false(ds_dest_d2.is_installed())

    # Needed subdatasets are installed if container name is given...
    res = ds_dest.containers_run(["arg"], container_name="a/a2/in-a2")
    assert_result_count(res,
                        1,
                        action="install",
                        status="ok",
                        path=ds_dest_a2.path)
    assert_result_count(res,
                        1,
                        action="get",
                        status="ok",
                        path=str(ds_dest_a2.pathobj / "img"))
    ok_(ds_dest_a2.is_installed())
    # ... even if the name and path do not match.
    res = ds_dest.containers_run(["arg"], container_name="b-name/b2/in-b2")
    assert_result_count(res,
                        1,
                        action="install",
                        status="ok",
                        path=ds_dest_b2.path)
    assert_result_count(res,
                        1,
                        action="get",
                        status="ok",
                        path=str(ds_dest_b2.pathobj / "img"))
    ok_(ds_dest_b2.is_installed())
    # Subdatasets will also be installed if given an image path...
    res = ds_dest.containers_run(["arg"], container_name=str(Path("c/c2/img")))
    assert_result_count(res,
                        1,
                        action="install",
                        status="ok",
                        path=ds_dest_c2.path)
    assert_result_count(res,
                        1,
                        action="get",
                        status="ok",
                        path=str(ds_dest_c2.pathobj / "img"))
    ok_(ds_dest_c2.is_installed())
    # ... unless the module name chain doesn't match the subdataset path. In
    # that case, the caller needs to install the subdatasets beforehand.
    with assert_raises(ValueError):
        ds_dest.containers_run(["arg"], container_name=str(Path("d/d2/img")))
    ds_dest.get(ds_dest_d2.path, recursive=True, get_data=False)
    ds_dest.containers_run(["arg"], container_name=str(Path("d/d2/img")))

    # There's no install record if subdataset is already present.
    res = ds_dest.containers_run(["arg"], container_name="a/a2/in-a2")
    assert_not_in_results(res, action="install")
Exemplo n.º 57
0
def test_rerun_multifork(path):
    ds = Dataset(path).create()
    ds.repo.checkout(DEFAULT_BRANCH, options=["-b", "side"])
    ds.run("echo foo >foo")
    ds.repo.checkout("side", options=["-b", "side-nonrun"])
    with open(op.join(path, "nonrun-file0"), "w") as f:
        f.write("blah 0")
    ds.save()
    ds.repo.checkout("side")
    with open(op.join(path, "nonrun-file1"), "w") as f:
        f.write("blah 1")
    ds.save()
    ds.run("echo bar >bar")
    ds.repo.checkout("side~1", options=["-b", "side-side"])
    with open(op.join(path, "nonrun-file2"), "w") as f:
        f.write("blah 2")
    ds.save()
    ds.run("echo onside0 >onside0")
    ds.repo.checkout("side")
    ds.repo.merge("side-side")
    ds.run("echo after-side-side >after-side-side")
    ds.repo.checkout(DEFAULT_BRANCH)
    ds.repo.merge("side", options=["--no-ff"])
    ds.repo.merge("side-nonrun")
    # o                 k_n
    # |\
    # | o               j_n
    # o |               i_n
    # |\ \
    # | o |             h_r
    # | o |             g_n
    # | |\ \
    # | | o |           f_r
    # | | o |           e_n
    # | o | |           d_r
    # | |/ /
    # | o |             c_n
    # | |/
    # | o               b_r
    # |/
    # o                 a_n

    ds.rerun(since="", onto="")
    # o                 k_M
    # |\
    # | o               j_n
    # o |               i_M
    # |\ \
    # | o |             h_R
    # | o |             g_M
    # | |\ \
    # | | o |           f_R
    # | | o |           e_n
    # | o | |           d_R
    # | |/ /
    # | o |             c_n
    # | |/
    # | o               b_R
    # |/
    # o                 a_n
    eq_(ds.repo.get_hexsha("HEAD~2"),
        ds.repo.get_hexsha(DEFAULT_BRANCH + "~2"))
    neq_(ds.repo.get_hexsha("HEAD^2"),
         ds.repo.get_hexsha(DEFAULT_BRANCH + "^2"))
    neq_(ds.repo.get_hexsha("HEAD^^2"),
         ds.repo.get_hexsha(DEFAULT_BRANCH + "^^2"))
    assert_false(ds.repo.commit_exists("HEAD^^2^2"))
    eq_(ds.repo.get_hexsha("HEAD^2^^"),
        ds.repo.get_hexsha(DEFAULT_BRANCH + "^2^^"))

    ds.repo.checkout(DEFAULT_BRANCH)
    hexsha_before = ds.repo.get_hexsha()
    ds.rerun(since="")
    eq_(hexsha_before, ds.repo.get_hexsha())
Exemplo n.º 58
0
def test_run_inputs_outputs(path):
    ds = Dataset(path)

    assert_false(ds.repo.file_has_content("test-annex.dat"))

    # If we specify test-annex.dat as an input, it will be retrieved before the
    # run.
    ds.run("cat test-annex.dat test-annex.dat >doubled.dat",
           inputs=["test-annex.dat"])

    ok_clean_git(ds.path)
    ok_(ds.repo.file_has_content("test-annex.dat"))
    ok_(ds.repo.file_has_content("doubled.dat"))

    # Rerunning the commit will also get the input file.
    ds.repo.drop("test-annex.dat", options=["--force"])
    assert_false(ds.repo.file_has_content("test-annex.dat"))
    ds.rerun()
    ok_(ds.repo.file_has_content("test-annex.dat"))

    with swallow_logs(new_level=logging.WARN) as cml:
        ds.run("touch dummy", inputs=["*.not-an-extension"])
        assert_in("No matching files found for '*.not-an-extension'", cml.out)

    # Test different combinations of globs and explicit files.
    inputs = ["a.dat", "b.dat", "c.txt", "d.txt"]
    create_tree(ds.path, {i: i for i in inputs})

    ds.add(".")
    ds.repo.copy_to(inputs, remote="origin")
    ds.repo.drop(inputs, options=["--force"])

    test_cases = [(["*.dat"], ["a.dat", "b.dat"]),
                  (["*.dat", "c.txt"], ["a.dat", "b.dat", "c.txt"]),
                  (["*"], inputs)]

    for idx, (inputs_arg, expected_present) in enumerate(test_cases):
        assert_false(any(ds.repo.file_has_content(i) for i in inputs))

        ds.run("touch dummy{}".format(idx), inputs=inputs_arg)
        ok_(all(ds.repo.file_has_content(f) for f in expected_present))
        # Globs are stored unexpanded by default.
        assert_in(inputs_arg[0], ds.repo.repo.head.commit.message)
        ds.repo.drop(inputs, options=["--force"])

    # --input can be passed a subdirectory.
    create_tree(ds.path, {"subdir": {"a": "subdir a", "b": "subdir b"}})
    ds.add("subdir")
    ds.repo.copy_to(["subdir/a", "subdir/b"], remote="origin")
    ds.repo.drop("subdir", options=["--force"])
    ds.run("touch subdir-dummy", inputs=[opj(ds.path, "subdir")])
    ok_(all(ds.repo.file_has_content(opj("subdir", f)) for f in ["a", "b"]))

    # Inputs are specified relative to a dataset's subdirectory.
    ds.repo.drop(opj("subdir", "a"), options=["--force"])
    with chpwd(opj(path, "subdir")):
        run("touch subdir-dummy1", inputs=["a"])
    ok_(ds.repo.file_has_content(opj("subdir", "a")))

    # --input=. runs "datalad get ."
    ds.run("touch dot-dummy", inputs=["."])
    eq_(ds.repo.get_annexed_files(),
        ds.repo.get_annexed_files(with_content_only=True))
    # On rerun, we get all files, even those that weren't in the tree at the
    # time of the run.
    create_tree(ds.path, {"after-dot-run": "after-dot-run content"})
    ds.add(".")
    ds.repo.copy_to(["after-dot-run"], remote="origin")
    ds.repo.drop(["after-dot-run"], options=["--force"])
    ds.rerun("HEAD^")
    ds.repo.file_has_content("after-dot-run")

    # --output will unlock files that are present.
    ds.repo.get("a.dat")
    ds.run("echo ' appended' >>a.dat", outputs=["a.dat"])
    with open(opj(path, "a.dat")) as fh:
        eq_(fh.read(), "a.dat appended\n")

    # --output will remove files that are not present.
    ds.repo.drop("a.dat", options=["--force"])
    ds.run("echo ' appended' >>a.dat", outputs=["a.dat"])
    with open(opj(path, "a.dat")) as fh:
        eq_(fh.read(), " appended\n")

    # --input can be combined with --output.
    ds.repo.repo.git.reset("--hard", "HEAD~2")
    ds.run("echo ' appended' >>a.dat", inputs=["a.dat"], outputs=["a.dat"])
    with open(opj(path, "a.dat")) as fh:
        eq_(fh.read(), "a.dat appended\n")

    with swallow_logs(new_level=logging.WARN) as cml:
        ds.run("echo blah", outputs=["*.not-an-extension"])
        assert_in("No matching files found for '*.not-an-extension'", cml.out)

    ds.create('sub')
    ds.run("echo sub_orig >sub/subfile")
    ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"])
    ds.drop("sub/subfile", check=False)
    ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"])

    # --input/--output globs can be stored in expanded form.
    ds.run("touch expand-dummy",
           inputs=["a.*"],
           outputs=["b.*"],
           expand="both")
    assert_in("a.dat", ds.repo.repo.head.commit.message)
    assert_in("b.dat", ds.repo.repo.head.commit.message)

    res = ds.rerun(report=True, return_type='item-or-list')
    eq_(res["run_info"]['inputs'], ["a.dat"])
    eq_(res["run_info"]['outputs'], ["b.dat"])
Exemplo n.º 59
0
def test_target_ssh_simple(origin, src_path, target_rootpath):
    port = get_ssh_port("datalad-test")
    # prepare src
    source = install(src_path,
                     source=origin,
                     result_xfm='datasets',
                     return_type='item-or-list')

    target_path = opj(target_rootpath, "basic")
    with swallow_logs(new_level=logging.ERROR) as cml:
        create_sibling(dataset=source,
                       name="local_target",
                       sshurl="ssh://*****:*****@with_testsui(responses=["yes"])
        def interactive_assert_create_sshwebserver():
            assert_create_sshwebserver(
                dataset=source,
                name="local_target",
                sshurl="ssh://datalad-test" + target_path,
                publish_by_default=DEFAULT_BRANCH,
                existing='replace',
                ui=have_webui(),
            )

        interactive_assert_create_sshwebserver()

        eq_("ssh://datalad-test" + urlquote(target_path),
            source.repo.get_remote_url("local_target"))
        ok_(source.repo.get_remote_url("local_target", push=True) is None)

        # ensure target tree actually replaced by source
        assert_false(exists(opj(target_path, 'random')))

        if src_is_annex:
            lclcfg = AnnexRepo(src_path).config
            eq_(lclcfg.get('remote.local_target.annex-ignore'), 'false')
            # valid uuid
            eq_(lclcfg.get('remote.local_target.annex-uuid').count('-'), 4)
            # should be added too, even if URL matches prior state
            eq_(lclcfg.get('remote.local_target.push'), DEFAULT_BRANCH)

        # again, by explicitly passing urls. Since we are on datalad-test, the
        # local path should work:
        cpkwargs = dict(
            dataset=source,
            name="local_target",
            sshurl="ssh://datalad-test",
            target_dir=target_path,
            target_url=target_path,
            target_pushurl="ssh://datalad-test" + target_path,
            ui=have_webui(),
        )

        @with_testsui(responses=['yes'])
        def interactive_assert_create_sshwebserver():
            assert_create_sshwebserver(existing='replace', **cpkwargs)

        interactive_assert_create_sshwebserver()

        if src_is_annex:
            target_description = AnnexRepo(target_path,
                                           create=False).get_description()
            eq_(target_description, target_path)

        eq_(target_path, source.repo.get_remote_url("local_target"))
        eq_("ssh://datalad-test" + target_path,
            source.repo.get_remote_url("local_target", push=True))

        if have_webui():
            from datalad_deprecated.tests.test_create_sibling_webui \
                import assert_publish_with_ui
            assert_publish_with_ui(target_path)

        # now, push should work:
        publish(dataset=source, to="local_target")

        # and we should be able to 'reconfigure'
        def process_digests_mtimes(digests, mtimes):
            # it should have triggered a hook, which would have created log and metadata files
            check_metadata = False
            for part in 'logs', 'metadata':
                metafiles = [
                    k for k in digests
                    if k.startswith(_path_('.git/datalad/%s/' % part))
                ]
                # This is in effect ONLY if we have "compatible" datalad installed on remote
                # end. ATM we don't have easy way to guarantee that AFAIK (yoh),
                # so let's not check/enforce (TODO)
                # assert(len(metafiles) >= 1)  # we might have 2 logs if timestamps do not collide ;)
                # Let's actually do it to some degree
                if part == 'logs':
                    # always should have those:
                    assert (len(metafiles) >= 1)
                    with open(opj(target_path, metafiles[0])) as f:
                        if 'no datalad found' not in f.read():
                            check_metadata = True
                if part == 'metadata':
                    eq_(len(metafiles), bool(check_metadata))
                for f in metafiles:
                    digests.pop(f)
                    mtimes.pop(f)
            # and just pop some leftovers from annex
            # and ignore .git/logs content (gh-5298)
            for f in list(digests):
                if f.startswith('.git/annex/mergedrefs') \
                        or f.startswith('.git/logs/'):
                    digests.pop(f)
                    mtimes.pop(f)

        if not have_webui():
            # the rest of the test assumed that we have uploaded a UI
            return
        orig_digests, orig_mtimes = get_mtimes_and_digests(target_path)
        process_digests_mtimes(orig_digests, orig_mtimes)

        import time
        time.sleep(0.1)  # just so that mtimes change
        assert_create_sshwebserver(existing='reconfigure', **cpkwargs)
        digests, mtimes = get_mtimes_and_digests(target_path)
        process_digests_mtimes(digests, mtimes)

        assert_dict_equal(orig_digests,
                          digests)  # nothing should change in terms of content

        # but some files should have been modified
        modified_files = {
            k
            for k in mtimes if orig_mtimes.get(k, 0) != mtimes.get(k, 0)
        }
        # collect which files were expected to be modified without incurring any changes
        ok_modified_files = {
            _path_('.git/hooks/post-update'),
            'index.html',
        }
        ok_modified_files.add(_path_('.git/config'))
        ok_modified_files.update(
            {f
             for f in digests if f.startswith(_path_('.git/datalad/web'))})
        # it seems that with some recent git behavior has changed a bit
        # and index might get touched
        if _path_('.git/index') in modified_files:
            ok_modified_files.add(_path_('.git/index'))
        ok_(modified_files.issuperset(ok_modified_files))
Exemplo n.º 60
0
def test_add_archive_content(path_orig, url, repo_path):
    with chpwd(repo_path):
        # TODO we need to be able to pass path into add_archive_content
        # We could mock but I mean for the API
        assert_raises(RuntimeError, add_archive_content,
                      "nonexisting.tar.gz")  # no repo yet

        repo = AnnexRepo(repo_path, create=True)
        assert_raises(ValueError, add_archive_content, "nonexisting.tar.gz")
        # we can't add a file from outside the repo ATM
        assert_raises(FileNotInRepositoryError, add_archive_content,
                      opj(path_orig, '1.tar.gz'))

        # Let's add first archive to the repo so we could test
        with swallow_outputs():
            repo.add_urls([opj(url, '1.tar.gz')],
                          options=["--pathdepth", "-1"])
            for s in range(1, 5):
                repo.add_urls([opj(url, '%du/1.tar.gz' % s)],
                              options=["--pathdepth", "-2"])
        repo.commit("added 1.tar.gz")

        key_1tar = repo.get_file_key(
            '1.tar.gz')  # will be used in the test later

        def d1_basic_checks():
            ok_(exists('1'))
            ok_file_under_git('1', '1 f.txt', annexed=True)
            ok_file_under_git(opj('1', 'd', '1d'), annexed=True)
            ok_archives_caches(repo_path, 0)

        # and by default it just does it, everything goes to annex
        repo_ = add_archive_content('1.tar.gz')
        eq_(repo.path, repo_.path)
        d1_basic_checks()

        # If ran again, should proceed just fine since the content is the same so no changes would be made really
        add_archive_content('1.tar.gz')

        # But that other one carries updated file, so should fail due to overwrite
        with assert_raises(RuntimeError) as cme:
            add_archive_content(opj('1u', '1.tar.gz'), use_current_dir=True)

        # TODO: somewhat not precise since we have two possible "already exists"
        # -- in caching and overwrite check
        assert_in("already exists", str(cme.exception))
        # but should do fine if overrides are allowed
        add_archive_content(opj('1u', '1.tar.gz'),
                            existing='overwrite',
                            use_current_dir=True)
        add_archive_content(opj('2u', '1.tar.gz'),
                            existing='archive-suffix',
                            use_current_dir=True)
        add_archive_content(opj('3u', '1.tar.gz'),
                            existing='archive-suffix',
                            use_current_dir=True)
        add_archive_content(opj('4u', '1.tar.gz'),
                            existing='archive-suffix',
                            use_current_dir=True)

        # rudimentary test
        assert_equal(sorted(map(basename, glob(opj(repo_path, '1', '1*')))),
                     ['1 f-1.1.txt', '1 f-1.2.txt', '1 f-1.txt', '1 f.txt'])
        whereis = repo.whereis(glob(opj(repo_path, '1', '1*')))
        # they all must be the same
        assert (all([x == whereis[0] for x in whereis[1:]]))

    # and we should be able to reference it while under subdirectory
    subdir = opj(repo_path, 'subdir')
    with chpwd(subdir, mkdir=True):
        add_archive_content(opj(pardir, '1.tar.gz'), use_current_dir=True)
        d1_basic_checks()
        # or we could keep relative path and also demand to keep the archive prefix
        # while extracting under original (annex root) dir
        add_archive_content(opj(pardir, '1.tar.gz'),
                            add_archive_leading_dir=True)

    with chpwd(opj(repo_path, '1')):
        d1_basic_checks()

    with chpwd(repo_path):
        # test with excludes and renames and annex options
        add_archive_content('1.tar.gz',
                            exclude=['d'],
                            rename=['/ /_', '/^1/2'],
                            annex_options="-c annex.largefiles=exclude=*.txt",
                            delete=True)
        # no conflicts since new name
        ok_file_under_git('2', '1_f.txt', annexed=False)
        assert_false(exists(opj('2', 'd')))
        assert_false(exists('1.tar.gz'))  # delete was in effect

    # now test ability to extract within subdir
    with chpwd(opj(repo_path, 'd1'), mkdir=True):
        # Let's add first archive to the repo so we could test
        # named the same way but different content
        with swallow_outputs():
            repo.add_urls([opj(url, 'd1', '1.tar.gz')],
                          options=["--pathdepth", "-1"],
                          cwd=getpwd())  # invoke under current subdir
        repo.commit("added 1.tar.gz in d1")

        def d2_basic_checks():
            ok_(exists('1'))
            ok_file_under_git('1', '2 f.txt', annexed=True)
            ok_file_under_git(opj('1', 'd2', '2d'), annexed=True)
            ok_archives_caches(repo.path, 0)

        add_archive_content('1.tar.gz')
        d2_basic_checks()

    # in manual tests ran into the situation of inability to obtain on a single run
    # a file from an archive which was coming from a dropped key.  I thought it was
    # tested in custom remote tests, but I guess not sufficiently well enough
    repo.drop(opj('1', '1 f.txt'))  # should be all kosher
    repo.get(opj('1', '1 f.txt'))
    ok_archives_caches(repo.path, 1, persistent=True)
    ok_archives_caches(repo.path, 0, persistent=False)

    repo.drop(opj('1', '1 f.txt'))  # should be all kosher
    repo.drop(key_1tar,
              key=True)  # is available from the URL -- should be kosher
    repo.get(opj('1', '1 f.txt'))  # that what managed to not work

    # TODO: check if persistent archive is there for the 1.tar.gz

    # We should be able to drop everything since available online
    with swallow_outputs():
        clean(dataset=repo.path)
    repo.drop(key_1tar,
              key=True)  # is available from the URL -- should be kosher

    repo.drop(opj('1', '1 f.txt'))  # should be all kosher
    repo.get(opj('1', '1 f.txt'))  # and should be able to get it again

    # bug was that dropping didn't work since archive was dropped first
    repo.call_annex(["drop", "--all"])

    # verify that we can't drop a file if archive key was dropped and online archive was removed or changed size! ;)
    repo.get(key_1tar, key=True)
    unlink(opj(path_orig, '1.tar.gz'))
    with assert_raises(CommandError) as e:
        repo.drop(key_1tar, key=True)
        assert_equal(e.kwargs['stdout_json'][0]['success'], False)
        assert_result_values_cond(
            e.kwargs['stdout_json'], 'note', lambda x:
            '(Use --force to override this check, or adjust numcopies.)' in x)
    assert exists(opj(repo.path, repo.get_contentlocation(key_1tar)))