Пример #1
0
def _test_correct_publish(target_path, rootds=False, flat=True):

    paths = [_path_(".git/hooks/post-update")]  # hooks enabled in all datasets
    not_paths = [
    ]  # _path_(".git/datalad/metadata")]  # metadata only on publish
    # ATM we run post-update hook also upon create since it might
    # be a reconfiguration (TODO: I guess could be conditioned)

    # web-interface html pushed to dataset root
    web_paths = ['index.html', _path_(".git/datalad/web")]
    if rootds:
        paths += web_paths
    # and not to subdatasets
    elif not flat:
        not_paths += web_paths

    for path in paths:
        ok_exists(opj(target_path, path))

    for path in not_paths:
        assert_false(exists(opj(target_path, path)))

    # correct ls_json command in hook content (path wrapped in quotes)
    ok_file_has_content(_path_(target_path, '.git/hooks/post-update'),
                        '.*datalad ls -a --json file \'%s\'.*' % target_path,
                        re_=True,
                        flags=re.DOTALL)
Пример #2
0
def _test_correct_publish(target_path, rootds=False, flat=True):

    paths = [_path_(".git/hooks/post-update")]     # hooks enabled in all datasets
    not_paths = []  # _path_(".git/datalad/metadata")]  # metadata only on publish
                    # ATM we run post-update hook also upon create since it might
                    # be a reconfiguration (TODO: I guess could be conditioned)

    # web-interface html pushed to dataset root
    web_paths = ['index.html', _path_(".git/datalad/web")]
    if rootds:
        paths += web_paths
    # and not to subdatasets
    elif not flat:
        not_paths += web_paths

    for path in paths:
        ok_exists(opj(target_path, path))

    for path in not_paths:
        assert_false(exists(opj(target_path, path)))

    # correct ls_json command in hook content (path wrapped in quotes)
    ok_file_has_content(_path_(target_path, '.git/hooks/post-update'),
                        '.*datalad ls -a --json file \'%s\'.*' % target_path,
                        re_=True,
                        flags=re.DOTALL)
Пример #3
0
def test_install_crcns(tdir, ds_path):
    with chpwd(tdir):
        with swallow_logs(new_level=logging.INFO) as cml:
            install("all-nonrecursive", source='///')
            # since we didn't log decorations such as log level atm while
            # swallowing so lets check if exit code is returned or not
            # I will test both
            assert_not_in('ERROR', cml.out)
            # below one must not fail alone! ;)
            assert_not_in('with exit code', cml.out)

        # should not hang in infinite recursion
        with chpwd('all-nonrecursive'):
            get("crcns")
        ok_(exists(_path_("all-nonrecursive/crcns/.git/config")))
        # and we could repeat installation and get the same result
        ds1 = install(_path_("all-nonrecursive/crcns"))
        ds2 = Dataset('all-nonrecursive').install('crcns')
        ok_(ds1.is_installed())
        eq_(ds1, ds2)
        eq_(ds1.path, ds2.path)  # to make sure they are a single dataset

    # again, but into existing dataset:
    ds = create(ds_path)
    crcns = ds.install("///crcns")
    ok_(crcns.is_installed())
    eq_(crcns.path, opj(ds_path, "crcns"))
    assert_in(crcns.path, ds.get_subdatasets(absolute=True))
Пример #4
0
def test_install_crcns(tdir, ds_path):
    with chpwd(tdir):
        with swallow_logs(new_level=logging.INFO) as cml:
            install("all-nonrecursive", source='///')
            # since we didn't log decorations such as log level atm while
            # swallowing so lets check if exit code is returned or not
            # I will test both
            assert_not_in('ERROR', cml.out)
            # below one must not fail alone! ;)
            assert_not_in('with exit code', cml.out)

        # should not hang in infinite recursion
        with chpwd('all-nonrecursive'):
            get("crcns")
        ok_(exists(_path_("all-nonrecursive/crcns/.git/config")))
        # and we could repeat installation and get the same result
        ds1 = install(_path_("all-nonrecursive/crcns"))
        ds2 = Dataset('all-nonrecursive').install('crcns')
        ok_(ds1.is_installed())
        eq_(ds1, ds2)
        eq_(ds1.path, ds2.path)  # to make sure they are a single dataset

    # again, but into existing dataset:
    ds = create(ds_path)
    crcns = ds.install("///crcns")
    ok_(crcns.is_installed())
    eq_(crcns.path, opj(ds_path, "crcns"))
    assert_in(crcns.path, ds.get_subdatasets(absolute=True))
Пример #5
0
def test_target_ssh_since(origin, src_path, target_path):
    # prepare src
    source = install(src_path, source=origin, recursive=True)
    eq_(len(source.subdatasets()), 2)
    # get a new subdataset and make sure it is committed in the super
    source.create('brandnew')
    eq_(len(source.subdatasets()), 3)
    ok_clean_git(source.path)

    # and now we create a sibling for the new subdataset only
    assert_create_sshwebserver(name='dominique_carrera',
                               dataset=source,
                               sshurl="ssh://localhost" + target_path,
                               recursive=True,
                               since='HEAD~1')
    # there is one thing in the target directory only, and that is the
    # remote repo of the newly added subdataset

    target = Dataset(target_path)
    ok_(not target.is_installed())  # since we didn't create it due to since
    eq_(['brandnew'], os.listdir(target_path))

    # now test functionality if we add a subdataset with a subdataset
    brandnew2 = source.create('brandnew2')
    brandnewsub = brandnew2.create('sub')
    brandnewsubsub = brandnewsub.create('sub')
    # and now we create a sibling for the new subdataset only
    assert_create_sshwebserver(name='dominique_carrera',
                               dataset=source,
                               sshurl="ssh://localhost" + target_path,
                               recursive=True,
                               existing='skip')
    # verify that it created the sub and sub/sub
    ok_(Dataset(_path_(target_path, 'brandnew2/sub')).is_installed())
    ok_(Dataset(_path_(target_path, 'brandnew2/sub/sub')).is_installed())
Пример #6
0
def test_failed_install_multiple(top_path):
    ds = create(top_path)

    create(_path_(top_path, 'ds1'))
    create(_path_(top_path, 'ds3'))
    ok_clean_git(ds.path, annex=False, untracked=['ds1/', 'ds3/'])

    # specify install with multiple paths and one non-existing
    with assert_raises(IncompleteResultsError) as cme:
        ds.install(['ds1', 'ds2', '///crcns', '///nonexisting', 'ds3'])

    # install doesn't add existing submodules -- add does that
    ok_clean_git(ds.path, annex=False, untracked=['ds1/', 'ds3/'])
    ds.add(['ds1', 'ds3'])
    ok_clean_git(ds.path, annex=False)
    # those which succeeded should be saved now
    eq_(ds.get_subdatasets(), ['crcns', 'ds1', 'ds3'])
    # and those which didn't -- listed
    eq_(set(cme.exception.failed), {'///nonexisting', _path_(top_path, 'ds2')})

    # but if there was only a single installation requested -- it will be
    # InstallFailedError to stay consistent with single install behavior
    # TODO: unify at some point
    with assert_raises(InstallFailedError) as cme:
        ds.install('ds2')
    with assert_raises(InstallFailedError) as cme:
        ds.install('///nonexisting')
Пример #7
0
def test_failed_install_multiple(top_path):
    ds = create(top_path)

    create(_path_(top_path, 'ds1'))
    create(_path_(top_path, 'ds3'))
    ok_clean_git(ds.path, annex=False, untracked=['ds1/', 'ds3/'])

    # specify install with multiple paths and one non-existing
    with assert_raises(IncompleteResultsError) as cme:
        ds.install(['ds1', 'ds2', '///crcns', '///nonexisting', 'ds3'])

    # install doesn't add existing submodules -- add does that
    ok_clean_git(ds.path, annex=False, untracked=['ds1/', 'ds3/'])
    ds.add(['ds1', 'ds3'])
    ok_clean_git(ds.path, annex=False)
    # those which succeeded should be saved now
    eq_(ds.get_subdatasets(), ['crcns', 'ds1', 'ds3'])
    # and those which didn't -- listed
    eq_(set(cme.exception.failed), {'///nonexisting', _path_(top_path, 'ds2')})

    # but if there was only a single installation requested -- it will be
    # InstallFailedError to stay consistent with single install behavior
    # TODO: unify at some point
    with assert_raises(InstallFailedError) as cme:
        ds.install('ds2')
    with assert_raises(InstallFailedError) as cme:
        ds.install('///nonexisting')
Пример #8
0
def test_add_recursive(path):
    ds = Dataset(path)
    ds.create(force=True, save=False)
    ds.create('dir', force=True, if_dirty='ignore')
    ds.save("Submodule added.")

    # TODO: CommandError to something meaningful
    # fail without recursive:
    assert_raises(CommandError, ds.add, opj('dir', 'testindir'), recursive=False)
    # fail with recursion limit too low:
    assert_raises(CommandError, ds.add, opj('dir', 'testindir'),
                  recursive=True, recursion_limit=0)

    # add while also instructing annex to add in parallel 2 jobs (smoke testing
    # for that effect ATM)
    added1 = ds.add(opj('dir', 'testindir'), recursive=True, jobs=2)
    # added to annex, so annex output record
    eq_(added1, [{'file': _path_('dir/testindir'), 'command': 'add',
                  'key': 'MD5E-s9--3f0f870d18d6ba60a79d9463ff3827ea',
                  'success': True}])
    assert_in('testindir', Dataset(opj(path, 'dir')).repo.get_annexed_files())

    added2 = ds.add(opj('dir', 'testindir2'), recursive=True, to_git=True)
    # added to git, so parsed git output record
    eq_(added2, [{'success': True, 'file': _path_('dir/testindir2')}])
    assert_in('testindir2', Dataset(opj(path, 'dir')).repo.get_indexed_files())

    # We used to fail to add to pure git repository, but now it should all be
    # just fine
    subds = ds.create('git-sub', no_annex=True)
    with open(opj(subds.path, 'somefile.txt'), "w") as f:
        f.write("bla bla")
    result = ds.add(opj('git-sub', 'somefile.txt'), recursive=True, to_git=False)
    eq_(result, [{'file': _path_('git-sub/somefile.txt'), 'success': True}])
Пример #9
0
def test_path_():
    eq_(_path_('a'), 'a')
    if on_windows:
        eq_(_path_('a/b'), r'a\b')
    else:
        p = 'a/b/c'
        assert (_path_(p) is p)  # nothing is done to it whatsoever
        eq_(_path_(p, 'd'), 'a/b/c/d')
Пример #10
0
def test_replace_and_relative_sshpath(src_path, dst_path):
    # We need to come up with the path relative to our current home directory
    # https://github.com/datalad/datalad/issues/1653
    # but because we override HOME the HOME on the remote end would be
    # different even though a localhost. So we need to query it
    from datalad import ssh_manager
    ssh = ssh_manager.get_connection('localhost')
    remote_home, err = ssh('pwd')
    assert not err
    remote_home = remote_home.rstrip('\n')
    dst_relpath = os.path.relpath(dst_path, remote_home)
    url = 'localhost:%s' % dst_relpath
    ds = Dataset(src_path).create()
    create_tree(ds.path, {'sub.dat': 'lots of data'})
    ds.add('sub.dat')
    ds.create_sibling(url, ui=True)
    published = ds.publish(to='localhost', transfer_data='all')
    assert_result_count(published, 1, path=opj(ds.path, 'sub.dat'))
    # verify that hook runs and there is nothing in stderr
    # since it exits with 0 exit even if there was a problem
    out, err = Runner(cwd=opj(dst_path, '.git'))(_path_('hooks/post-update'))
    assert_false(out)
    assert_false(err)

    # Verify that we could replace and publish no problem
    # https://github.com/datalad/datalad/issues/1656
    # Strangely it spits outs IncompleteResultsError exception atm... so just
    # checking that it fails somehow
    res = ds.create_sibling(url, on_failure='ignore')
    assert_status('error', res)
    assert_in('already configured', res[0]['message'][0])
    # "Settings" such as UI do not persist, so we specify it again
    # for the test below depending on it
    ds.create_sibling(url, existing='replace', ui=True)
    published2 = ds.publish(to='localhost', transfer_data='all')
    assert_result_count(published2, 1, path=opj(ds.path, 'sub.dat'))

    # and one more test since in above test it would not puke ATM but just
    # not even try to copy since it assumes that file is already there
    create_tree(ds.path, {'sub2.dat': 'more data'})
    ds.add('sub2.dat')
    published3 = ds.publish(to='localhost',
                            transfer_data='none')  # we publish just git
    assert_result_count(published3, 0, path=opj(ds.path, 'sub2.dat'))
    # now publish "with" data, which should also trigger the hook!
    # https://github.com/datalad/datalad/issues/1658
    from glob import glob
    from datalad.consts import WEB_META_LOG
    logs_prior = glob(_path_(dst_path, WEB_META_LOG, '*'))
    published4 = ds.publish(to='localhost', transfer_data='all')
    assert_result_count(published4, 1, path=opj(ds.path, 'sub2.dat'))
    logs_post = glob(_path_(dst_path, WEB_META_LOG, '*'))
    eq_(len(logs_post), len(logs_prior) + 1)

    assert_postupdate_hooks(dst_path)
Пример #11
0
def test_replace_and_relative_sshpath(src_path, dst_path):
    # We need to come up with the path relative to our current home directory
    # https://github.com/datalad/datalad/issues/1653
    # but because we override HOME the HOME on the remote end would be
    # different even though a localhost. So we need to query it
    from datalad import ssh_manager
    ssh = ssh_manager.get_connection('localhost')
    remote_home, err = ssh('pwd')
    assert not err
    remote_home = remote_home.rstrip('\n')
    dst_relpath = os.path.relpath(dst_path, remote_home)
    url = 'localhost:%s' % dst_relpath
    ds = Dataset(src_path).create()
    create_tree(ds.path, {'sub.dat': 'lots of data'})
    ds.save('sub.dat')
    ds.create_sibling(url, ui=True)
    published = ds.publish(to='localhost', transfer_data='all')
    assert_result_count(published, 1, path=opj(ds.path, 'sub.dat'))
    # verify that hook runs and there is nothing in stderr
    # since it exits with 0 exit even if there was a problem
    out, err = Runner(cwd=opj(dst_path, '.git'))(_path_('hooks/post-update'))
    assert_false(out)
    assert_false(err)

    # Verify that we could replace and publish no problem
    # https://github.com/datalad/datalad/issues/1656
    # Strangely it spits outs IncompleteResultsError exception atm... so just
    # checking that it fails somehow
    res = ds.create_sibling(url, on_failure='ignore')
    assert_status('error', res)
    assert_in('already configured', res[0]['message'][0])
    # "Settings" such as UI do not persist, so we specify it again
    # for the test below depending on it
    ds.create_sibling(url, existing='replace', ui=True)
    published2 = ds.publish(to='localhost', transfer_data='all')
    assert_result_count(published2, 1, path=opj(ds.path, 'sub.dat'))

    # and one more test since in above test it would not puke ATM but just
    # not even try to copy since it assumes that file is already there
    create_tree(ds.path, {'sub2.dat': 'more data'})
    ds.save('sub2.dat')
    published3 = ds.publish(to='localhost', transfer_data='none')  # we publish just git
    assert_result_count(published3, 0, path=opj(ds.path, 'sub2.dat'))
    # now publish "with" data, which should also trigger the hook!
    # https://github.com/datalad/datalad/issues/1658
    from glob import glob
    from datalad.consts import WEB_META_LOG
    logs_prior = glob(_path_(dst_path, WEB_META_LOG, '*'))
    published4 = ds.publish(to='localhost', transfer_data='all')
    assert_result_count(published4, 1, path=opj(ds.path, 'sub2.dat'))
    logs_post = glob(_path_(dst_path, WEB_META_LOG, '*'))
    eq_(len(logs_post), len(logs_prior) + 1)

    assert_postupdate_hooks(dst_path)
Пример #12
0
def test_subdatasets(path):
    # from scratch
    ds = Dataset(path)
    assert_false(ds.is_installed())
    eq_(ds.subdatasets(), [])
    ds = ds.create()
    assert_true(ds.is_installed())
    eq_(ds.subdatasets(), [])
    # create some file and commit it
    open(os.path.join(ds.path, 'test'), 'w').write('some')
    ds.add(path='test')
    assert_true(ds.is_installed())
    ds.save("Hello!", version_tag=1)
    # Assuming that tmp location was not under a super-dataset
    eq_(ds.get_superdataset(), None)
    eq_(ds.get_superdataset(topmost=True), ds)

    # add itself as a subdataset (crazy, isn't it?)
    subds = ds.install('subds',
                       source=path,
                       result_xfm='datasets',
                       return_type='item-or-list')
    assert_true(subds.is_installed())
    eq_(subds.get_superdataset(), ds)
    eq_(subds.get_superdataset(topmost=True), ds)

    subdss = ds.subdatasets()
    eq_(len(subdss), 1)
    eq_(subds.path, ds.subdatasets(result_xfm='paths')[0])
    eq_(subdss, ds.subdatasets(recursive=True))
    eq_(subdss, ds.subdatasets(fulfilled=True))
    ds.save("with subds", version_tag=2)
    ds.recall_state(1)
    assert_true(ds.is_installed())
    eq_(ds.subdatasets(), [])

    # very nested subdataset to test topmost
    subsubds = subds.install(_path_('d1/subds'),
                             source=path,
                             result_xfm='datasets',
                             return_type='item-or-list')
    assert_true(subsubds.is_installed())
    eq_(subsubds.get_superdataset(), subds)
    # by default, it will only report a subperdataset that actually
    # has the queries dataset as a registered true subdataset
    eq_(subsubds.get_superdataset(topmost=True), subds)
    # by we can also ask for a dataset that is merely above
    eq_(subsubds.get_superdataset(topmost=True, registered_only=False), ds)

    # verify that '^' alias would work
    with chpwd(subsubds.path):
        dstop = Dataset('^')
        eq_(dstop, subds)
        # and while in the dataset we still can resolve into central one
        dscentral = Dataset('///')
        eq_(dscentral.path, cfg.obtain('datalad.locations.default-dataset'))

    with chpwd(ds.path):
        dstop = Dataset('^')
        eq_(dstop, ds)
Пример #13
0
 def process_digests_mtimes(digests, mtimes):
     # it should have triggered a hook, which would have created log and metadata files
     check_metadata = False
     for part in 'logs', 'metadata':
         metafiles = [
             k for k in digests
             if k.startswith(_path_('.git/datalad/%s/' % part))
         ]
         # This is in effect ONLY if we have "compatible" datalad installed on remote
         # end. ATM we don't have easy way to guarantee that AFAIK (yoh),
         # so let's not check/enforce (TODO)
         # assert(len(metafiles) >= 1)  # we might have 2 logs if timestamps do not collide ;)
         # Let's actually do it to some degree
         if part == 'logs':
             # always should have those:
             assert (len(metafiles) >= 1)
             with open(opj(target_path, metafiles[0])) as f:
                 if 'no datalad found' not in f.read():
                     check_metadata = True
         if part == 'metadata':
             eq_(len(metafiles), bool(check_metadata))
         for f in metafiles:
             digests.pop(f)
             mtimes.pop(f)
     # and just pop some leftovers from annex
     for f in list(digests):
         if f.startswith('.git/annex/mergedrefs'):
             digests.pop(f)
             mtimes.pop(f)
Пример #14
0
def test_install_into_dataset(source, top_path):

    ds = create(top_path)
    ok_clean_git(ds.path)

    subds = ds.install("sub", source=source, save=False)
    if isinstance(subds.repo, AnnexRepo) and subds.repo.is_direct_mode():
        ok_(exists(opj(subds.path, '.git')))
    else:
        ok_(isdir(opj(subds.path, '.git')))
    ok_(subds.is_installed())
    assert_in('sub', ds.subdatasets(result_xfm='relpaths'))
    # sub is clean:
    ok_clean_git(subds.path, annex=None)
    # top is too:
    ok_clean_git(ds.path, annex=None)
    ds.save('addsub')
    # now it is:
    ok_clean_git(ds.path, annex=None)

    # but we could also save while installing and there should be no side-effect
    # of saving any other changes if we state to not auto-save changes
    # Create a dummy change
    create_tree(ds.path, {'dummy.txt': 'buga'})
    ok_clean_git(ds.path, untracked=['dummy.txt'])
    subds_ = ds.install("sub2", source=source)
    eq_(subds_.path, opj(ds.path, "sub2"))  # for paranoid yoh ;)
    ok_clean_git(ds.path, untracked=['dummy.txt'])

    # and we should achieve the same behavior if we create a dataset
    # and then decide to add it
    create(_path_(top_path, 'sub3'))
    ok_clean_git(ds.path, untracked=['dummy.txt', 'sub3/'])
    ds.add('sub3')
    ok_clean_git(ds.path, untracked=['dummy.txt'])
Пример #15
0
def test_crawl_api_recursive(get_subdatasets_, run_pipeline_,
                             load_pipeline_from_config_,
                             get_repo_pipeline_script_path_, get_lofilename_,
                             chpwd_, tdir):
    pwd = getpwd()
    with chpwd(tdir):
        output, stats = crawl(recursive=True)
    assert_equal(pwd, getpwd())
    if external_versions['mock'] < '1.0.1':
        raise SkipTest(
            "needs a more recent mock which throws exceptions in side_effects")
    assert_equal(output,
                 [[]] * 4 + [None])  # for now output is just a list of outputs
    assert_equal(
        stats, ActivityStats(
            datasets_crawled=5,
            datasets_crawl_failed=1))  # nothing was done but we got it crawled
    chpwd_.assert_has_calls([
        call(None),
        call('path1'),
        call('path1/path1_1'),
        call('path2'),
    ],
                            any_order=True)
    assert_equal(
        list(find_files('.*', tdir, exclude_vcs=False)),
        [_path_(tdir, 'some.log')])  # no files were generated besides the log
Пример #16
0
 def process_digests_mtimes(digests, mtimes):
     # it should have triggered a hook, which would have created log and metadata files
     check_metadata = False
     for part in 'logs', 'metadata':
         metafiles = [k for k in digests if k.startswith(_path_('.git/datalad/%s/' % part))]
         # This is in effect ONLY if we have "compatible" datalad installed on remote
         # end. ATM we don't have easy way to guarantee that AFAIK (yoh),
         # so let's not check/enforce (TODO)
         # assert(len(metafiles) >= 1)  # we might have 2 logs if timestamps do not collide ;)
         # Let's actually do it to some degree
         if part == 'logs':
             # always should have those:
             assert (len(metafiles) >= 1)
             with open(opj(target_path, metafiles[0])) as f:
                 if 'no datalad found' not in f.read():
                     check_metadata = True
         if part == 'metadata':
             eq_(len(metafiles), bool(check_metadata))
         for f in metafiles:
             digests.pop(f)
             mtimes.pop(f)
     # and just pop some leftovers from annex
     for f in list(digests):
         if f.startswith('.git/annex/mergedrefs'):
             digests.pop(f)
             mtimes.pop(f)
Пример #17
0
def test_install_into_dataset(source=None, top_path=None):
    src_ds = Dataset(source).create(result_renderer='disabled', force=True)
    src_ds.save(['INFO.txt', 'test.dat'], to_git=True)
    src_ds.save('test-annex.dat', to_git=False)

    ds = create(top_path)
    assert_repo_status(ds.path)

    subds = ds.install("sub", source=source)
    ok_(isdir(opj(subds.path, '.git')))
    ok_(subds.is_installed())
    assert_in('sub', ds.subdatasets(result_xfm='relpaths'))
    # sub is clean:
    assert_repo_status(subds.path, annex=None)
    # top is too:
    assert_repo_status(ds.path, annex=None)
    ds.save(message='addsub')
    # now it is:
    assert_repo_status(ds.path, annex=None)

    # but we could also save while installing and there should be no side-effect
    # of saving any other changes if we state to not auto-save changes
    # Create a dummy change
    create_tree(ds.path, {'dummy.txt': 'buga'})
    assert_repo_status(ds.path, untracked=['dummy.txt'])
    subds_ = ds.install("sub2", source=source)
    eq_(subds_.path, opj(ds.path, "sub2"))  # for paranoid yoh ;)
    assert_repo_status(ds.path, untracked=['dummy.txt'])

    # and we should achieve the same behavior if we create a dataset
    # and then decide to add it
    create(_path_(top_path, 'sub3'))
    assert_repo_status(ds.path, untracked=['dummy.txt', 'sub3/'])
    ds.save('sub3')
    assert_repo_status(ds.path, untracked=['dummy.txt'])
Пример #18
0
def test_install_into_dataset(source, top_path):

    ds = create(top_path)
    ok_clean_git(ds.path)

    subds = ds.install("sub", source=source, save=False)
    if isinstance(subds.repo, AnnexRepo) and subds.repo.is_direct_mode():
        ok_(exists(opj(subds.path, '.git')))
    else:
        ok_(isdir(opj(subds.path, '.git')))
    ok_(subds.is_installed())
    assert_in('sub', ds.subdatasets(result_xfm='relpaths'))
    # sub is clean:
    ok_clean_git(subds.path, annex=None)
    # top is too:
    ok_clean_git(ds.path, annex=None)
    ds.save('addsub')
    # now it is:
    ok_clean_git(ds.path, annex=None)

    # but we could also save while installing and there should be no side-effect
    # of saving any other changes if we state to not auto-save changes
    # Create a dummy change
    create_tree(ds.path, {'dummy.txt': 'buga'})
    ok_clean_git(ds.path, untracked=['dummy.txt'])
    subds_ = ds.install("sub2", source=source)
    eq_(subds_.path, opj(ds.path, "sub2"))  # for paranoid yoh ;)
    ok_clean_git(ds.path, untracked=['dummy.txt'])

    # and we should achieve the same behavior if we create a dataset
    # and then decide to add it
    create(_path_(top_path, 'sub3'))
    ok_clean_git(ds.path, untracked=['dummy.txt', 'sub3/'])
    ds.add('sub3')
    ok_clean_git(ds.path, untracked=['dummy.txt'])
Пример #19
0
def test_remove_nowhining(path):
    # when removing a dataset under a dataset (but not a subdataset)
    # should not provide a meaningless message that something was not right
    ds = create(path)
    # just install/clone inside of it
    subds_path = _path_(path, 'subds')
    install(subds_path, source=path)
    remove(subds_path)  # should remove just fine
Пример #20
0
def test_create_raises(path, outside_path):
    ds = Dataset(path)
    # incompatible arguments (annex only):
    assert_raises(ValueError, ds.create, no_annex=True, description='some')

    with open(op.join(path, "somefile.tst"), 'w') as f:
        f.write("some")
    # non-empty without `force`:
    assert_in_results(
        ds.rev_create(force=False, **raw),
        status='error',
        message=
        'will not create a dataset in a non-empty directory, use `force` option to ignore'
    )
    # non-empty with `force`:
    ds.rev_create(force=True)
    # create sub outside of super:
    assert_in_results(
        ds.rev_create(outside_path, **raw),
        status='error',
        message=(
            'dataset containing given paths is not underneath the reference '
            'dataset %s: %s', ds, outside_path))
    # create a sub:
    ds.rev_create('sub')
    # fail when doing it again
    assert_in_results(
        ds.rev_create('sub', **raw),
        status='error',
        message=('collision with content in parent dataset at %s: %s', ds.path,
                 [str(ds.pathobj / 'sub')]),
    )

    # now deinstall the sub and fail trying to create a new one at the
    # same location
    ds.uninstall('sub', check=False)
    assert_in('sub', ds.subdatasets(fulfilled=False, result_xfm='relpaths'))
    # and now should fail to also create inplace or under
    assert_in_results(
        ds.rev_create('sub', **raw),
        status='error',
        message=('collision with content in parent dataset at %s: %s', ds.path,
                 [str(ds.pathobj / 'sub')]),
    )
    assert_in_results(ds.rev_create(_path_('sub/subsub'), **raw),
                      status='error',
                      message=('collision with %s (dataset) in dataset %s',
                               str(ds.pathobj / 'sub'), ds.path))
    os.makedirs(op.join(ds.path, 'down'))
    with open(op.join(ds.path, 'down', "someotherfile.tst"), 'w') as f:
        f.write("someother")
    ds.rev_save()
    assert_in_results(
        ds.rev_create('down', **raw),
        status='error',
        message=('collision with content in parent dataset at %s: %s', ds.path,
                 [str(ds.pathobj / 'down' / 'someotherfile.tst')]),
    )
Пример #21
0
def test_subdatasets(path):
    # from scratch
    ds = Dataset(path)
    assert_false(ds.is_installed())
    eq_(ds.subdatasets(), [])
    ds = ds.create()
    assert_true(ds.is_installed())
    eq_(ds.subdatasets(), [])
    # create some file and commit it
    open(os.path.join(ds.path, 'test'), 'w').write('some')
    ds.add(path='test')
    assert_true(ds.is_installed())
    ds.save("Hello!", version_tag=1)
    # Assuming that tmp location was not under a super-dataset
    eq_(ds.get_superdataset(), None)
    eq_(ds.get_superdataset(topmost=True), ds)

    # add itself as a subdataset (crazy, isn't it?)
    subds = ds.install('subds', source=path,
        result_xfm='datasets', return_type='item-or-list')
    assert_true(subds.is_installed())
    eq_(subds.get_superdataset(), ds)
    eq_(subds.get_superdataset(topmost=True), ds)

    subdss = ds.subdatasets()
    eq_(len(subdss), 1)
    eq_(subds.path, ds.subdatasets(result_xfm='paths')[0])
    eq_(subdss, ds.subdatasets(recursive=True))
    eq_(subdss, ds.subdatasets(fulfilled=True))
    ds.save("with subds", version_tag=2)
    ds.recall_state(1)
    assert_true(ds.is_installed())
    eq_(ds.subdatasets(), [])

    # very nested subdataset to test topmost
    subsubds = subds.install(
        _path_('d1/subds'), source=path,
        result_xfm='datasets', return_type='item-or-list')
    assert_true(subsubds.is_installed())
    eq_(subsubds.get_superdataset(), subds)
    # by default, it will only report a subperdataset that actually
    # has the queries dataset as a registered true subdataset
    eq_(subsubds.get_superdataset(topmost=True), subds)
    # by we can also ask for a dataset that is merely above
    eq_(subsubds.get_superdataset(topmost=True, registered_only=False), ds)

    # verify that '^' alias would work
    with chpwd(subsubds.path):
        dstop = Dataset('^')
        eq_(dstop, subds)
        # and while in the dataset we still can resolve into central one
        dscentral = Dataset('///')
        eq_(dscentral.path,
            cfg.obtain('datalad.locations.default-dataset'))

    with chpwd(ds.path):
        dstop = Dataset('^')
        eq_(dstop, ds)
Пример #22
0
def test_replace_and_relative_sshpath(src_path, dst_path):
    # We need to come up with the path relative to our current home directory
    # https://github.com/datalad/datalad/issues/1653
    dst_relpath = os.path.relpath(dst_path, os.path.expanduser('~'))
    url = 'localhost:%s' % dst_relpath
    ds = Dataset(src_path).create()
    create_tree(ds.path, {'sub.dat': 'lots of data'})
    ds.add('sub.dat')

    ds.create_sibling(url)
    published = ds.publish(to='localhost', transfer_data='all')
    assert_result_count(published, 1, path=opj(ds.path, 'sub.dat'))
    # verify that hook runs and there is nothing in stderr
    # since it exits with 0 exit even if there was a problem
    out, err = Runner(cwd=opj(dst_path, '.git'))(_path_('hooks/post-update'))
    assert_false(out)
    assert_false(err)

    # Verify that we could replace and publish no problem
    # https://github.com/datalad/datalad/issues/1656
    # Strangely it spits outs IncompleteResultsError exception atm... so just
    # checking that it fails somehow
    res = ds.create_sibling(url, on_failure='ignore')
    assert_status('error', res)
    assert_in('already configured', res[0]['message'][0])
    ds.create_sibling(url, existing='replace')
    published2 = ds.publish(to='localhost', transfer_data='all')
    assert_result_count(published2, 1, path=opj(ds.path, 'sub.dat'))

    # and one more test since in above test it would not puke ATM but just
    # not even try to copy since it assumes that file is already there
    create_tree(ds.path, {'sub2.dat': 'more data'})
    ds.add('sub2.dat')
    published3 = ds.publish(to='localhost',
                            transfer_data='none')  # we publish just git
    assert_result_count(published3, 0, path=opj(ds.path, 'sub2.dat'))
    # now publish "with" data, which should also trigger the hook!
    # https://github.com/datalad/datalad/issues/1658
    from glob import glob
    from datalad.consts import WEB_META_LOG
    logs_prior = glob(_path_(dst_path, WEB_META_LOG, '*'))
    published4 = ds.publish(to='localhost', transfer_data='all')
    assert_result_count(published4, 1, path=opj(ds.path, 'sub2.dat'))
    logs_post = glob(_path_(dst_path, WEB_META_LOG, '*'))
    eq_(len(logs_post), len(logs_prior) + 1)
Пример #23
0
def check_target_ssh_since(use_ssh, origin, src_path, target_path):
    if use_ssh:
        sshurl = "ssh://datalad-test" + target_path
    else:
        sshurl = target_path
    # prepare src
    source = install(src_path, source=origin, recursive=True)
    eq_(len(source.subdatasets()), 2)
    # get a new subdataset and make sure it is committed in the super
    source.create('brandnew')
    eq_(len(source.subdatasets()), 3)
    assert_repo_status(source.path)

    # and now we create a sibling for the new subdataset only
    assert_create_sshwebserver(
        name='dominique_carrera',
        dataset=source,
        sshurl=sshurl,
        recursive=True,
        since='HEAD~1')
    # there is one thing in the target directory only, and that is the
    # remote repo of the newly added subdataset

    target = Dataset(target_path)
    ok_(not target.is_installed())  # since we didn't create it due to since
    eq_(['brandnew'], os.listdir(target_path))

    # now test functionality if we add a subdataset with a subdataset
    brandnew2 = source.create('brandnew2')
    brandnewsub = brandnew2.create('sub')
    brandnewsubsub = brandnewsub.create('sub')
    # and now we create a sibling for the new subdataset only
    assert_create_sshwebserver(
        name='dominique_carrera',
        dataset=source,
        sshurl=sshurl,
        recursive=True,
        existing='skip')
    # verify that it created the sub and sub/sub
    ok_(Dataset(_path_(target_path, 'brandnew2/sub')).is_installed())
    ok_(Dataset(_path_(target_path, 'brandnew2/sub/sub')).is_installed())

    # we installed without web ui - no hooks should be created/enabled
    assert_postupdate_hooks(_path_(target_path, 'brandnew'), installed=False)
Пример #24
0
def test_subdatasets(path):
    # from scratch
    ds = Dataset(path)
    assert_false(ds.is_installed())
    eq_(ds.subdatasets(), [])
    ds = ds.create()
    assert_true(ds.is_installed())
    eq_(ds.subdatasets(), [])
    # create some file and commit it
    open(os.path.join(ds.path, 'test'), 'w').write('some')
    ds.add(path='test')
    assert_true(ds.is_installed())
    ds.save("Hello!", version_tag=1)
    # Assuming that tmp location was not under a super-dataset
    eq_(ds.get_superdataset(), None)
    eq_(ds.get_superdataset(topmost=True), ds)

    # add itself as a subdataset (crazy, isn't it?)
    subds = ds.install('subds',
                       source=path,
                       result_xfm='datasets',
                       return_type='item-or-list')
    assert_true(subds.is_installed())
    eq_(subds.get_superdataset(), ds)
    eq_(subds.get_superdataset(topmost=True), ds)

    subdss = ds.subdatasets()
    eq_(len(subdss), 1)
    eq_(subds.path, ds.subdatasets(result_xfm='paths')[0])
    eq_(subdss, ds.subdatasets(recursive=True))
    eq_(subdss, ds.subdatasets(fulfilled=True))
    ds.save("with subds", version_tag=2)
    ds.recall_state(1)
    assert_true(ds.is_installed())
    eq_(ds.subdatasets(), [])

    # very nested subdataset to test topmost
    subsubds = subds.install(_path_('d1/subds'),
                             source=path,
                             result_xfm='datasets',
                             return_type='item-or-list')
    assert_true(subsubds.is_installed())
    eq_(subsubds.get_superdataset(), subds)
    eq_(subsubds.get_superdataset(topmost=True), ds)

    # verify that '^' alias would work
    with chpwd(subsubds.path):
        dstop = Dataset('^')
        eq_(dstop, ds)
        # and while in the dataset we still can resolve into central one
        dscentral = Dataset('///')
        eq_(dscentral.path, LOCAL_CENTRAL_PATH)

    with chpwd(ds.path):
        dstop = Dataset('^')
        eq_(dstop, ds)
Пример #25
0
def test_publish_target_url(src, desttop, desturl):
    # https://github.com/datalad/datalad/issues/1762
    ds = Dataset(src).create(force=True)
    ds.add('1')
    ds.create_sibling('ssh://localhost:%s/subdir' % desttop,
                      name='target',
                      target_url=desturl + 'subdir/.git')
    results = ds.publish(to='target', transfer_data='all')
    assert results
    ok_file_has_content(_path_(desttop, 'subdir/1'), '123')
Пример #26
0
def test_publish_target_url(src, desttop, desturl):
    # https://github.com/datalad/datalad/issues/1762
    ds = Dataset(src).create(force=True)
    ds.add('1')
    ds.create_sibling('ssh://localhost:%s/subdir' % desttop,
                      name='target',
                      target_url=desturl + 'subdir/.git')
    results = ds.publish(to='target', transfer_data='all')
    assert results
    ok_file_has_content(_path_(desttop, 'subdir/1'), '123')
Пример #27
0
def test_subdatasets(path):
    # from scratch
    ds = Dataset(path)
    assert_false(ds.is_installed())
    eq_(ds.get_subdatasets(), [])
    ds = ds.create()
    assert_true(ds.is_installed())
    eq_(ds.get_subdatasets(), [])
    # create some file and commit it
    open(os.path.join(ds.path, 'test'), 'w').write('some')
    ds.add(path='test')
    assert_true(ds.is_installed())
    ds.save("Hello!", version_tag=1)
    # Assuming that tmp location was not under a super-dataset
    eq_(ds.get_superdataset(), None)
    eq_(ds.get_superdataset(topmost=True), ds)

    # add itself as a subdataset (crazy, isn't it?)
    subds = ds.install('subds', source=path)
    assert_true(subds.is_installed())
    eq_(subds.get_superdataset(), ds)
    eq_(subds.get_superdataset(topmost=True), ds)

    subdss = ds.get_subdatasets()
    eq_(len(subdss), 1)
    eq_(os.path.join(path, subdss[0]), subds.path)
    eq_(subds.path, ds.get_subdatasets(absolute=True)[0])
    eq_(subdss, ds.get_subdatasets(recursive=True))
    eq_(subdss, ds.get_subdatasets(fulfilled=True))
    # don't have that right now
    assert_raises(NotImplementedError, ds.get_subdatasets, pattern='sub*')
    ds.save("with subds", version_tag=2)
    ds.recall_state(1)
    assert_true(ds.is_installed())
    eq_(ds.get_subdatasets(), [])

    # very nested subdataset to test topmost
    subsubds = subds.install(_path_('d1/subds'), source=path)
    assert_true(subsubds.is_installed())
    eq_(subsubds.get_superdataset(), subds)
    eq_(subsubds.get_superdataset(topmost=True), ds)

    # verify that '^' alias would work
    with chpwd(subsubds.path):
        dstop = Dataset('^')
        eq_(dstop, ds)
        # and while in the dataset we still can resolve into central one
        dscentral = Dataset('///')
        eq_(dscentral.path, LOCAL_CENTRAL_PATH)

    with chpwd(ds.path):
        dstop = Dataset('^')
        eq_(dstop, ds)
Пример #28
0
def test_failed_install_multiple(top_path):
    ds = create(top_path)

    create(_path_(top_path, 'ds1'))
    create(_path_(top_path, 'ds3'))
    ok_clean_git(ds.path, annex=None, untracked=['ds1/', 'ds3/'])

    # specify install with multiple paths and one non-existing
    with assert_raises(IncompleteResultsError) as cme:
        ds.install(['ds1', 'ds2', '///crcns', '///nonexisting', 'ds3'],
                   on_failure='continue')

    # install doesn't add existing submodules -- add does that
    ok_clean_git(ds.path, annex=None, untracked=['ds1/', 'ds3/'])
    ds.add(['ds1', 'ds3'])
    ok_clean_git(ds.path, annex=None)
    # those which succeeded should be saved now
    eq_(ds.subdatasets(result_xfm='relpaths'), ['crcns', 'ds1', 'ds3'])
    # and those which didn't -- listed
    eq_(set(r.get('source_url', r['path']) for r in cme.exception.failed),
        {'///nonexisting', _path_(top_path, 'ds2')})
Пример #29
0
def test_failed_install_multiple(top_path):
    ds = create(top_path)

    create(_path_(top_path, 'ds1'))
    create(_path_(top_path, 'ds3'))
    ok_clean_git(ds.path, annex=None, untracked=['ds1/', 'ds3/'])

    # specify install with multiple paths and one non-existing
    with assert_raises(IncompleteResultsError) as cme:
        ds.install(['ds1', 'ds2', '///crcns', '///nonexisting', 'ds3'],
                   on_failure='continue')

    # install doesn't add existing submodules -- add does that
    ok_clean_git(ds.path, annex=None, untracked=['ds1/', 'ds3/'])
    ds.add(['ds1', 'ds3'])
    ok_clean_git(ds.path, annex=None)
    # those which succeeded should be saved now
    eq_(ds.subdatasets(result_xfm='relpaths'), ['crcns', 'ds1', 'ds3'])
    # and those which didn't -- listed
    eq_(set(r.get('source_url', r['path']) for r in cme.exception.failed),
        {'///nonexisting', _path_(top_path, 'ds2')})
Пример #30
0
def test_target_ssh_since(origin, src_path, target_path):
    # prepare src
    source = install(src_path, source=origin, recursive=True)
    eq_(len(source.subdatasets()), 2)
    # get a new subdataset and make sure it is committed in the super
    source.create('brandnew')
    eq_(len(source.subdatasets()), 3)
    ok_clean_git(source.path)

    # and now we create a sibling for the new subdataset only
    assert_create_sshwebserver(
        name='dominique_carrera',
        dataset=source,
        sshurl="ssh://localhost" + target_path,
        recursive=True,
        since='HEAD~1')
    # there is one thing in the target directory only, and that is the
    # remote repo of the newly added subdataset

    target = Dataset(target_path)
    ok_(not target.is_installed())  # since we didn't create it due to since
    eq_(['brandnew'], os.listdir(target_path))

    # now test functionality if we add a subdataset with a subdataset
    brandnew2 = source.create('brandnew2')
    brandnewsub = brandnew2.create('sub')
    brandnewsubsub = brandnewsub.create('sub')
    # and now we create a sibling for the new subdataset only
    assert_create_sshwebserver(
        name='dominique_carrera',
        dataset=source,
        sshurl="ssh://localhost" + target_path,
        recursive=True,
        existing='skip')
    # verify that it created the sub and sub/sub
    ok_(Dataset(_path_(target_path, 'brandnew2/sub')).is_installed())
    ok_(Dataset(_path_(target_path, 'brandnew2/sub/sub')).is_installed())

    # we installed without web ui - no hooks should be created/enabled
    assert_postupdate_hooks(_path_(target_path, 'brandnew'), installed=False)
Пример #31
0
def test_clone_report_permission_issue(tdir):
    pdir = _path_(tdir, 'protected')
    mkdir(pdir)
    # make it read-only
    chmod(pdir, 0o555)
    with chpwd(pdir):
        res = clone('///', result_xfm=None, return_type='list', on_failure='ignore')
        assert_status('error', res)
        assert_result_count(
            res, 1, status='error',
            message="could not create work tree dir '%s/%s': Permission denied"
                    % (pdir, get_datasets_topdir())
        )
Пример #32
0
 def __call__(self, data):
     # we do not take anything from data
     meta = get_metadata(self.dataset)
     if meta:
         meta_encoded = meta.encode('utf-8')
         if not os.path.exists('.datalad'):
             os.makedirs('.datalad')
         path_ = _path_('.datalad', 'meta.datacite.xml')
         with open(path_, 'w') as f:
             f.write(meta_encoded)
         yield updated(data, {'filename': path_})
     else:
         yield data
Пример #33
0
def _test_correct_publish(target_path, rootds=False, flat=True):

    paths = [_path_(".git/hooks/post-update")]  # hooks enabled in all datasets
    not_paths = [
    ]  # _path_(".git/datalad/metadata")]  # metadata only on publish
    # ATM we run post-update hook also upon create since it might
    # be a reconfiguration (TODO: I guess could be conditioned)

    # web-interface html pushed to dataset root
    web_paths = ['index.html', _path_(".git/datalad/web")]
    if rootds:
        paths += web_paths
    # and not to subdatasets
    elif not flat:
        not_paths += web_paths

    for path in paths:
        ok_exists(opj(target_path, path))

    for path in not_paths:
        assert_false(exists(opj(target_path, path)))

    hook_path = _path_(target_path, '.git/hooks/post-update')
    # No longer the case -- we are no longer using absolute path in the
    # script
    # ok_file_has_content(hook_path,
    #                     '.*\ndsdir="%s"\n.*' % target_path,
    #                     re_=True,
    #                     flags=re.DOTALL)
    # No absolute path (so dataset could be moved) in the hook
    with open(hook_path) as f:
        assert_not_in(target_path, f.read())
    # correct ls_json command in hook content (path wrapped in "quotes)
    ok_file_has_content(hook_path,
                        '.*datalad ls -a --json file \..*',
                        re_=True,
                        flags=re.DOTALL)
Пример #34
0
def _test_correct_publish(target_path, rootds=False, flat=True):

    paths = [_path_(".git/hooks/post-update")]     # hooks enabled in all datasets
    not_paths = []  # _path_(".git/datalad/metadata")]  # metadata only on publish
                    # ATM we run post-update hook also upon create since it might
                    # be a reconfiguration (TODO: I guess could be conditioned)

    # web-interface html pushed to dataset root
    web_paths = ['index.html', _path_(".git/datalad/web")]
    if rootds:
        paths += web_paths
    # and not to subdatasets
    elif not flat:
        not_paths += web_paths

    for path in paths:
        ok_exists(opj(target_path, path))

    for path in not_paths:
        assert_false(exists(opj(target_path, path)))

    hook_path = _path_(target_path, '.git/hooks/post-update')
    # No longer the case -- we are no longer using absolute path in the
    # script
    # ok_file_has_content(hook_path,
    #                     '.*\ndsdir="%s"\n.*' % target_path,
    #                     re_=True,
    #                     flags=re.DOTALL)
    # No absolute path (so dataset could be moved) in the hook
    with open(hook_path) as f:
        assert_not_in(target_path, f.read())
    # correct ls_json command in hook content (path wrapped in "quotes)
    ok_file_has_content(hook_path,
                        '.*datalad ls -a --json file \..*',
                        re_=True,
                        flags=re.DOTALL)
Пример #35
0
def test_create_raises(path, outside_path):
    ds = Dataset(path)
    # incompatible arguments (annex only):
    assert_raises(ValueError, ds.create, no_annex=True, description='some')
    assert_raises(ValueError, ds.create, no_annex=True, annex_opts=['some'])
    assert_raises(ValueError,
                  ds.create,
                  no_annex=True,
                  annex_init_opts=['some'])

    with open(opj(path, "somefile.tst"), 'w') as f:
        f.write("some")
    # non-empty without `force`:
    assert_in_results(
        ds.create(force=False, **raw),
        status='error',
        message=
        'will not create a dataset in a non-empty directory, use `force` option to ignore'
    )
    # non-empty with `force`:
    ds.create(force=True)
    # create sub outside of super:
    assert_in_results(ds.create(outside_path, **raw),
                      status='error',
                      message='path not associated with any dataset')

    # create a sub:
    ds.create('sub')
    # fail when doing it again
    assert_in_results(
        ds.create('sub', **raw),
        status='error',
        message=('collision with known subdataset %s/ in dataset %s', 'sub',
                 ds.path))

    # now deinstall the sub and fail trying to create a new one at the
    # same location
    ds.uninstall('sub', check=False)
    assert_in('sub', ds.subdatasets(fulfilled=False, result_xfm='relpaths'))
    # and now should fail to also create inplace or under
    for s in 'sub', _path_('sub/subsub'):
        assert_in_results(
            ds.create(s, **raw),
            status='error',
            message=('collision with known subdataset %s/ in dataset %s',
                     'sub', ds.path))
Пример #36
0
def extract_meta(data):
    content = [
        x['match']
        for x in xpath_match('//*[@class="attributeLabel"]/..')(data)
    ]
    content = [(re.sub('</li>', ', ', x)) for x in content]
    content = [(re.sub('<[^<]+?>|[\t|\n|\r]', '',
                       (str(x.encode('ascii', 'ignore'))))).strip()
               for x in content]
    [content.remove(x) for x in content if x.find('SCENES:' or 'OWNERS:') >= 0]

    json_dict = OrderedDict(map(str, x.split(':', 1)) for x in content)

    if not exists(".datalad/meta"):
        makedirs(".datalad/meta")

    with open(_path_(".datalad/meta/balsa.json"), "w") as fi:
        json.dump(json_dict, fi, indent=1)
        lgr.info("Generated descriptor file")
        yield {'filename': ".datalad/meta/balsa.json"}
Пример #37
0
def _test_drop(path, drop_immediately):
    s3url = 's3://datalad-test0-nonversioned'
    providers = get_test_providers(s3url)  # to verify having s3 credentials
    # vcr tape is getting bound to the session object, so we need to
    # force re-establishing the session for the bucket.
    # TODO (in datalad): make a dedicated API for that, now too obscure
    _ = providers.get_status(s3url, allow_old_session=False)
    create(path)
    # unfortunately this doesn't work without force dropping since I guess vcr
    # stops and then gets queried again for the same tape while testing for
    # drop :-/
    with chpwd(path):
        crawl_init(
            template="simple_s3",
            args=dict(
                bucket="datalad-test0-nonversioned",
                drop=True,
                drop_force=True,  # so test goes faster
                drop_immediately=drop_immediately,
            ),
            save=True)
    if drop_immediately:
        # cannot figure out but taping that interaction results in
        # git annex addurl  error.  No time to figure it out
        # so we just crawl without vcr for now. TODO: figure out WTF
        with chpwd(path):
            crawl()
    else:
        with externals_use_cassette(
                'test_simple_s3_test0_nonversioned_crawl_ext'
                + ('_immediately' if drop_immediately else '')), \
                chpwd(path):
            crawl()
    # test that all was dropped
    repo = AnnexRepo(path, create=False)
    files = glob(_path_(path, '*'))
    eq_(len(files), 8)
    for f in files:
        assert_false(repo.file_has_content(f))
Пример #38
0
def test_create_text_no_annex(path):
    ds = create(path, text_no_annex=True)
    ok_clean_git(path)
    import re
    ok_file_has_content(
        _path_(path, '.gitattributes'),
        content='\* annex\.largefiles=\(not\(mimetype=text/\*\)\)',
        re_=True,
        match=False,
        flags=re.MULTILINE)
    # and check that it is really committing text files to git and binaries
    # to annex
    create_tree(
        path,
        {
            't': 'some text',
            'b': ''  # empty file is not considered to be a text file
            # should we adjust the rule to consider only non empty files?
        })
    ds.add(['t', 'b'])
    ok_file_under_git(path, 't', annexed=False)
    ok_file_under_git(path, 'b', annexed=True)
Пример #39
0
def test_simple1(ind, topurl, outd):

    list(
        initiate_dataset(template="simple_with_archives",
                         dataset_name='test1',
                         path=outd,
                         add_fields={
                             'url': topurl + 'study/show/WG33',
                             'a_href_match_': '.*download.*'
                         })({}))

    with chpwd(outd):
        out, stats = crawl()

    eq_(stats.add_annex, 3)

    ok_file_under_git(outd, 'file1.nii', annexed=True)
    ok_file_has_content(opj(outd, 'file1.nii'), 'content of file1.nii')

    ok_file_under_git(outd, _path_('dir1/file2.nii'), annexed=True)
    ok_file_has_content(opj(outd, 'dir1', 'file2.nii'), 'content of file2.nii')

    eq_(len(out), 1)
Пример #40
0
def _parse_git_submodules(dspath):
    """All known ones with some properties"""
    if not exists(opj(dspath, ".gitmodules")):
        # easy way out. if there is no .gitmodules file
        # we cannot have (functional) subdatasets
        return

    # this will not work in direct mode, need better way #1422
    cmd = ['git', 'ls-files', '--stage', '-z']

    # need to go rogue  and cannot use proper helper in GitRepo
    # as they also pull in all of GitPython's magic
    try:
        stdout, stderr = GitRunner(cwd=dspath).run(
            cmd,
            log_stderr=True,
            log_stdout=True,
            # not sure why exactly, but log_online has to be false!
            log_online=False,
            expect_stderr=False,
            shell=False,
            # we don't want it to scream on stdout
            expect_fail=True)
    except CommandError as e:
        raise InvalidGitRepositoryError(exc_str(e))

    for line in stdout.split('\0'):
        if not line or not line.startswith('160000'):
            continue
        sm = {}
        props = submodule_full_props.match(line)
        sm['revision'] = props.group(2)
        subpath = _path_(dspath, props.group(4))
        sm['path'] = subpath
        if not exists(subpath) or not GitRepo.is_valid_repo(subpath):
            sm['state'] = 'absent'
        yield sm
Пример #41
0
def _parse_git_submodules(dspath):
    """All known ones with some properties"""
    if not exists(opj(dspath, ".gitmodules")):
        # easy way out. if there is no .gitmodules file
        # we cannot have (functional) subdatasets
        return

    # this will not work in direct mode, need better way #1422
    cmd = ['git', 'ls-files', '--stage', '-z']

    # need to go rogue  and cannot use proper helper in GitRepo
    # as they also pull in all of GitPython's magic
    try:
        stdout, stderr = GitRunner(cwd=dspath).run(
            cmd,
            log_stderr=True,
            log_stdout=True,
            # not sure why exactly, but log_online has to be false!
            log_online=False,
            expect_stderr=False,
            shell=False,
            # we don't want it to scream on stdout
            expect_fail=True)
    except CommandError as e:
        raise InvalidGitRepositoryError(exc_str(e))

    for line in stdout.split('\0'):
        if not line or not line.startswith('160000'):
            continue
        sm = {}
        props = submodule_full_props.match(line)
        sm['revision'] = props.group(2)
        subpath = _path_(dspath, props.group(4))
        sm['path'] = subpath
        if not exists(subpath) or not GitRepo.is_valid_repo(subpath):
            sm['state'] = 'absent'
        yield sm
Пример #42
0
def test_drop(path):
    get_test_providers(
        's3://datalad-test0-nonversioned')  # to verify having s3 credentials
    create(path)
    # unfortunately this doesn't work without force dropping since I guess vcr
    # stops and then gets queried again for the same tape while testing for
    # drop :-/
    with externals_use_cassette('test_simple_s3_test0_nonversioned_crawl_ext'), \
         chpwd(path):
        crawl_init(
            template="simple_s3",
            args=dict(
                bucket="datalad-test0-nonversioned",
                drop=True,
                drop_force=True  # so test goes faster
            ),
            save=True)
        crawl()
    # test that all was dropped
    repo = AnnexRepo(path, create=False)
    files = glob(_path_(path, '*'))
    eq_(len(files), 8)
    for f in files:
        assert_false(repo.file_has_content(f))
Пример #43
0
def test_target_ssh_recursive(origin, src_path, target_path):

    # prepare src
    source = install(src_path, source=origin, recursive=True)

    sub1 = Dataset(opj(src_path, "subm 1"))
    sub2 = Dataset(opj(src_path, "2"))

    for flat in False, True:
        target_path_ = target_dir_tpl = target_path + "-" + str(flat)

        if flat:
            target_dir_tpl += "/prefix%RELNAME"
            sep = '-'
        else:
            sep = os.path.sep

        remote_name = 'remote-' + str(flat)
        with chpwd(source.path):
            assert_create_sshwebserver(name=remote_name,
                                       sshurl="ssh://localhost" + target_path_,
                                       target_dir=target_dir_tpl,
                                       recursive=True,
                                       ui=True)

        # raise if git repos were not created
        for suffix in [sep + 'subm 1', sep + '2', '']:
            target_dir = opj(target_path_, 'prefix' if flat else "").rstrip(
                os.path.sep) + suffix
            # raise if git repos were not created
            GitRepo(target_dir, create=False)

            _test_correct_publish(target_dir, rootds=not suffix, flat=flat)

        for repo in [source.repo, sub1.repo, sub2.repo]:
            assert_not_in("local_target", repo.get_remotes())

        # now, push should work:
        publish(dataset=source, to=remote_name)

        # verify that we can create-sibling which was created later and possibly
        # first published in super-dataset as an empty directory
        sub3_name = 'subm 3-%s' % flat
        sub3 = source.create(sub3_name)
        # since is an empty value to force it to consider all changes since we published
        # already
        with chpwd(source.path):
            # as we discussed in gh-1495 we use the last-published state of the base
            # dataset as the indicator for modification detection with since=''
            # hence we must not publish the base dataset on its own without recursion,
            # if we want to have this mechanism do its job
            #publish(to=remote_name)  # no recursion
            assert_create_sshwebserver(name=remote_name,
                                       sshurl="ssh://localhost" + target_path_,
                                       target_dir=target_dir_tpl,
                                       recursive=True,
                                       existing='skip',
                                       ui=True,
                                       since='')
        # so it was created on remote correctly and wasn't just skipped
        assert (Dataset(
            _path_(target_path_,
                   ('prefix-' if flat else '') + sub3_name)).is_installed())
        publish(dataset=source, to=remote_name, recursive=True,
                since='')  # just a smoke test
Пример #44
0
def test_ls_json(topdir, topurl):
    annex = AnnexRepo(topdir, create=True)
    ds = Dataset(topdir)
    # create some file and commit it
    with open(opj(ds.path, 'subdsfile.txt'), 'w') as f:
        f.write('123')
    ds.add(path='subdsfile.txt')
    ds.save("Hello!", version_tag=1)

    # add a subdataset
    ds.install('subds', source=topdir)

    subdirds = ds.create(_path_('dir/subds2'), force=True)
    subdirds.add('file')

    git = GitRepo(opj(topdir, 'dir', 'subgit'), create=True)                    # create git repo
    git.add(opj(topdir, 'dir', 'subgit', 'fgit.txt'))                           # commit to git to init git repo
    git.commit()
    annex.add(opj(topdir, 'dir', 'subgit'))                                     # add the non-dataset git repo to annex
    annex.add(opj(topdir, 'dir'))                                               # add to annex (links)
    annex.drop(opj(topdir, 'dir', 'subdir', 'file2.txt'), options=['--force'])  # broken-link
    annex.commit()

    git.add('fgit.txt')              # commit to git to init git repo
    git.commit()
    # annex.add doesn't add submodule, so using ds.add
    ds.add(opj('dir', 'subgit'))                        # add the non-dataset git repo to annex
    ds.add('dir')                                  # add to annex (links)
    ds.drop(opj('dir', 'subdir', 'file2.txt'), check=False)  # broken-link

    # register "external" submodule  by installing and uninstalling it
    ext_url = topurl + '/dir/subgit/.git'
    # need to make it installable via http
    Runner()('git update-server-info', cwd=opj(topdir, 'dir', 'subgit'))
    ds.install(opj('dir', 'subgit_ext'), source=ext_url)
    ds.uninstall(opj('dir', 'subgit_ext'))
    meta_dir = opj('.git', 'datalad', 'metadata')

    def get_metahash(*path):
        if not path:
            path = ['/']
        return hashlib.md5(opj(*path).encode('utf-8')).hexdigest()

    def get_metapath(dspath, *path):
        return _path_(dspath, meta_dir, get_metahash(*path))

    def get_meta(dspath, *path):
        with open(get_metapath(dspath, *path)) as f:
            return js.load(f)

    # Let's see that there is no crash if one of the files is available only
    # in relaxed URL mode, so no size could be picked up
    ds.repo.add_url_to_file(
        'fromweb', topurl + '/noteventhere', options=['--relaxed'])

    for all_ in [True, False]:  # recurse directories
        for recursive in [True, False]:
            for state in ['file', 'delete']:
                # subdataset should have its json created and deleted when
                # all=True else not
                subds_metapath = get_metapath(opj(topdir, 'subds'))
                exists_prior = exists(subds_metapath)

                #with swallow_logs(), swallow_outputs():
                dsj = _ls_json(
                    topdir,
                    json=state,
                    all_=all_,
                    recursive=recursive
                )
                ok_startswith(dsj['tags'], '1-')

                exists_post = exists(subds_metapath)
                # print("%s %s -> %s" % (state, exists_prior, exists_post))
                assert_equal(exists_post, (state == 'file' and recursive))

                # root should have its json file created and deleted in all cases
                ds_metapath = get_metapath(topdir)
                assert_equal(exists(ds_metapath), state == 'file')

                # children should have their metadata json's created and deleted only when recursive=True
                child_metapath = get_metapath(topdir, 'dir', 'subdir')
                assert_equal(exists(child_metapath), (state == 'file' and all_))

                # ignored directories should not have json files created in any case
                for subdir in [('.hidden',), ('dir', 'subgit')]:
                    assert_false(exists(get_metapath(topdir, *subdir)))

                # check if its updated in its nodes sublist too. used by web-ui json. regression test
                assert_equal(dsj['nodes'][0]['size']['total'], dsj['size']['total'])

                # check size of subdataset
                subds = [item for item in dsj['nodes'] if item['name'] == ('subdsfile.txt' or 'subds')][0]
                assert_equal(subds['size']['total'], '3 Bytes')

                # dir/subds2 must not be listed among nodes of the top dataset:
                topds_nodes = {x['name']: x for x in dsj['nodes']}

                assert_in('subds', topds_nodes)
                # XXX
                # # condition here is a bit a guesswork by yoh later on
                # # TODO: here and below clear destiny/interaction of all_ and recursive
                # assert_equal(dsj['size']['total'],
                #              '15 Bytes' if (recursive and all_) else
                #              ('9 Bytes' if (recursive or all_) else '3 Bytes')
                # )

                # https://github.com/datalad/datalad/issues/1674
                if state == 'file' and all_:
                    dirj = get_meta(topdir, 'dir')
                    dir_nodes = {x['name']: x for x in dirj['nodes']}
                    # it should be present in the subdir meta
                    assert_in('subds2', dir_nodes)
                    assert_not_in('url_external', dir_nodes['subds2'])
                    assert_in('subgit_ext', dir_nodes)
                    assert_equal(dir_nodes['subgit_ext']['url'], ext_url)
                # and not in topds
                assert_not_in('subds2', topds_nodes)

                # run non-recursive dataset traversal after subdataset metadata already created
                # to verify sub-dataset metadata being picked up from its metadata file in such cases
                if state == 'file' and recursive and not all_:
                    dsj = _ls_json(topdir, json='file', all_=False)
                    subds = [
                        item for item in dsj['nodes']
                        if item['name'] == ('subdsfile.txt' or 'subds')
                    ][0]
                    assert_equal(subds['size']['total'], '3 Bytes')

                assert_equal(
                    topds_nodes['fromweb']['size']['total'], UNKNOWN_SIZE
                )
Пример #45
0
 def get_metapath(dspath, *path):
     return _path_(dspath, meta_dir, get_metahash(*path))
Пример #46
0
def test_target_ssh_simple(origin, src_path, target_rootpath):

    # prepare src
    source = install(
        src_path, source=origin,
        result_xfm='datasets', return_type='item-or-list')

    target_path = opj(target_rootpath, "basic")
    with swallow_logs(new_level=logging.ERROR) as cml:
        create_sibling(
            dataset=source,
            name="local_target",
            sshurl="ssh://localhost",
            target_dir=target_path,
            ui=True)
        assert_not_in('enableremote local_target failed', cml.out)

    GitRepo(target_path, create=False)  # raises if not a git repo
    assert_in("local_target", source.repo.get_remotes())
    # Both must be annex or git repositories
    src_is_annex = AnnexRepo.is_valid_repo(src_path)
    eq_(src_is_annex, AnnexRepo.is_valid_repo(target_path))
    # And target one should be known to have a known UUID within the source if annex
    if src_is_annex:
        annex = AnnexRepo(src_path)
        local_target_cfg = annex.repo.remotes["local_target"].config_reader.get
        # basic config in place
        eq_(local_target_cfg('annex-ignore'), 'false')
        ok_(local_target_cfg('annex-uuid'))

    # do it again without force, but use a different name to avoid initial checks
    # for existing remotes:
    with assert_raises(RuntimeError) as cm:
        assert_create_sshwebserver(
            dataset=source,
            name="local_target_alt",
            sshurl="ssh://localhost",
            target_dir=target_path)
    ok_(text_type(cm.exception).startswith(
        "Target path %s already exists. And it fails to rmdir" % target_path))
    if src_is_annex:
        target_description = AnnexRepo(target_path, create=False).get_description()
        assert_not_equal(target_description, None)
        assert_not_equal(target_description, target_path)
        # on yoh's laptop TMPDIR is under HOME, so things start to become
        # tricky since then target_path is shortened and we would need to know
        # remote $HOME.  To not over-complicate and still test, test only for
        # the basename of the target_path
        ok_endswith(target_description, basename(target_path))
    # now, with force and correct url, which is also used to determine
    # target_dir
    # Note: on windows absolute path is not url conform. But this way it's easy
    # to test, that ssh path is correctly used.
    if not on_windows:
        # add random file under target_path, to explicitly test existing=replace
        open(opj(target_path, 'random'), 'w').write('123')

        assert_create_sshwebserver(
            dataset=source,
            name="local_target",
            sshurl="ssh://localhost" + target_path,
            publish_by_default='master',
            existing='replace')
        eq_("ssh://localhost" + urlquote(target_path),
            source.repo.get_remote_url("local_target"))
        ok_(source.repo.get_remote_url("local_target", push=True) is None)

        # ensure target tree actually replaced by source
        assert_false(exists(opj(target_path, 'random')))

        if src_is_annex:
            annex = AnnexRepo(src_path)
            local_target_cfg = annex.repo.remotes["local_target"].config_reader.get
            eq_(local_target_cfg('annex-ignore'), 'false')
            eq_(local_target_cfg('annex-uuid').count('-'), 4)  # valid uuid
            # should be added too, even if URL matches prior state
            eq_(local_target_cfg('push'), 'master')

        # again, by explicitly passing urls. Since we are on localhost, the
        # local path should work:
        cpkwargs = dict(
            dataset=source,
            name="local_target",
            sshurl="ssh://localhost",
            target_dir=target_path,
            target_url=target_path,
            target_pushurl="ssh://localhost" + target_path,
            ui=True,
        )
        assert_create_sshwebserver(existing='replace', **cpkwargs)
        if src_is_annex:
            target_description = AnnexRepo(target_path,
                                           create=False).get_description()
            eq_(target_description, target_path)

        eq_(target_path,
            source.repo.get_remote_url("local_target"))
        eq_("ssh://localhost" + target_path,
            source.repo.get_remote_url("local_target", push=True))

        _test_correct_publish(target_path)

        # now, push should work:
        publish(dataset=source, to="local_target")

        # and we should be able to 'reconfigure'
        def process_digests_mtimes(digests, mtimes):
            # it should have triggered a hook, which would have created log and metadata files
            check_metadata = False
            for part in 'logs', 'metadata':
                metafiles = [k for k in digests if k.startswith(_path_('.git/datalad/%s/' % part))]
                # This is in effect ONLY if we have "compatible" datalad installed on remote
                # end. ATM we don't have easy way to guarantee that AFAIK (yoh),
                # so let's not check/enforce (TODO)
                # assert(len(metafiles) >= 1)  # we might have 2 logs if timestamps do not collide ;)
                # Let's actually do it to some degree
                if part == 'logs':
                    # always should have those:
                    assert (len(metafiles) >= 1)
                    with open(opj(target_path, metafiles[0])) as f:
                        if 'no datalad found' not in f.read():
                            check_metadata = True
                if part == 'metadata':
                    eq_(len(metafiles), bool(check_metadata))
                for f in metafiles:
                    digests.pop(f)
                    mtimes.pop(f)
            # and just pop some leftovers from annex
            for f in list(digests):
                if f.startswith('.git/annex/mergedrefs'):
                    digests.pop(f)
                    mtimes.pop(f)

        orig_digests, orig_mtimes = get_mtimes_and_digests(target_path)
        process_digests_mtimes(orig_digests, orig_mtimes)

        import time
        time.sleep(0.1)  # just so that mtimes change
        assert_create_sshwebserver(existing='reconfigure', **cpkwargs)
        digests, mtimes = get_mtimes_and_digests(target_path)
        process_digests_mtimes(digests, mtimes)

        assert_dict_equal(orig_digests, digests)  # nothing should change in terms of content

        # but some files should have been modified
        modified_files = {k for k in mtimes if orig_mtimes.get(k, 0) != mtimes.get(k, 0)}
        # collect which files were expected to be modified without incurring any changes
        ok_modified_files = {
            _path_('.git/hooks/post-update'), 'index.html',
            # files which hook would manage to generate
            _path_('.git/info/refs'), '.git/objects/info/packs'
        }
        # on elderly git we don't change receive setting
        ok_modified_files.add(_path_('.git/config'))
        ok_modified_files.update({f for f in digests if f.startswith(_path_('.git/datalad/web'))})
        # it seems that with some recent git behavior has changed a bit
        # and index might get touched
        if _path_('.git/index') in modified_files:
            ok_modified_files.add(_path_('.git/index'))
        assert_set_equal(modified_files, ok_modified_files)
Пример #47
0
    def __call__(sshurl, target=None, target_dir=None,
                 target_url=None, target_pushurl=None,
                 dataset=None, recursive=False,
                 existing='error', shared=False, ui=False,
                 as_common_datasrc=None,
                 publish_by_default=None,
                 publish_depends=None):

        if sshurl is None:
            raise ValueError("""insufficient information for target creation
            (needs at least a dataset and a SSH URL).""")

        if target is None and (target_url is not None or
                               target_pushurl is not None):
            raise ValueError("""insufficient information for adding the target
            as a sibling (needs at least a name)""")

        # shortcut
        ds = require_dataset(dataset, check_installed=True,
                             purpose='creating a sibling')

        assert(ds is not None and sshurl is not None and ds.repo is not None)

        # determine target parameters:
        sshri = RI(sshurl)

        if not isinstance(sshri, SSHRI) \
                and not (isinstance(sshri, URL) and sshri.scheme == 'ssh'):
                    raise ValueError("Unsupported SSH URL: '{0}', use ssh://host/path or host:path syntax".format(sshurl))

        if target_dir is None:
            if sshri.path:
                target_dir = sshri.path
            else:
                target_dir = '.'

        # TODO: centralize and generalize template symbol handling
        replicate_local_structure = False
        if "%NAME" not in target_dir:
            replicate_local_structure = True

        # collect datasets to use:
        datasets = dict()
        datasets[basename(ds.path)] = ds
        if recursive:
            for subds in ds.get_subdatasets(recursive=True):
                sub_path = opj(ds.path, subds)
                # TODO: when enhancing Dataset/*Repo classes and therefore
                # adapt to moved code, make proper distinction between name and
                # path of a submodule, which are technically different. This
                # probably will become important on windows as well as whenever
                # we want to allow for moved worktrees.
                datasets[basename(ds.path) + '/' + subds] = \
                    Dataset(sub_path)

        # request ssh connection:
        not_supported_on_windows("TODO")
        lgr.info("Connecting ...")
        ssh = ssh_manager.get_connection(sshurl)
        ssh.open()

        # flag to check if at dataset_root
        at_root = True

        # loop over all datasets, ordered from top to bottom to make test
        # below valid (existing directories would cause the machinery to halt)
        # But we need to run post-update hook in depth-first fashion, so
        # would only collect first and then run (see gh #790)
        remote_repos_to_run_hook_for = []
        for current_dspath in \
                sorted(datasets.keys(), key=lambda x: x.count('/')):
            current_ds = datasets[current_dspath]
            if not current_ds.is_installed():
                lgr.info("Skipping %s since not installed locally", current_dspath)
                continue
            if not replicate_local_structure:
                path = target_dir.replace("%NAME",
                                          current_dspath.replace("/", "-"))
            else:
                # TODO: opj depends on local platform, not the remote one.
                # check how to deal with it. Does windows ssh server accept
                # posix paths? vice versa? Should planned SSH class provide
                # tools for this issue?
                path = normpath(opj(target_dir,
                                    relpath(datasets[current_dspath].path,
                                            start=ds.path)))

            lgr.info("Creating target dataset {0} at {1}".format(current_dspath, path))
            # Must be set to True only if exists and existing='reconfigure'
            # otherwise we might skip actions if we say existing='reconfigure'
            # but it did not even exist before
            only_reconfigure = False
            if path != '.':
                # check if target exists
                # TODO: Is this condition valid for != '.' only?
                path_exists = True
                try:
                    out, err = ssh(["ls", path])
                except CommandError as e:
                    if "No such file or directory" in e.stderr and \
                            path in e.stderr:
                        path_exists = False
                    else:
                        raise  # It's an unexpected failure here

                if path_exists:
                    if existing == 'error':
                        raise RuntimeError("Target directory %s already exists." % path)
                    elif existing == 'skip':
                        continue
                    elif existing == 'replace':
                        ssh(["chmod", "+r+w", "-R", path])  # enable write permissions to allow removing dir
                        ssh(["rm", "-rf", path])            # remove target at path
                        path_exists = False                 # if we succeeded in removing it
                    elif existing == 'reconfigure':
                        only_reconfigure = True
                    else:
                        raise ValueError("Do not know how to handle existing=%s" % repr(existing))

                if not path_exists:
                    try:
                        ssh(["mkdir", "-p", path])
                    except CommandError as e:
                        lgr.error("Remotely creating target directory failed at "
                                  "%s.\nError: %s" % (path, exc_str(e)))
                        continue

            # don't (re-)initialize dataset if existing == reconfigure
            if not only_reconfigure:
                # init git and possibly annex repo
                if not CreateSibling.init_remote_repo(
                        path, ssh, shared, datasets[current_dspath],
                        description=target_url):
                    continue

            # check git version on remote end
            lgr.info("Adjusting remote git configuration")
            remote_git_version = CreateSibling.get_remote_git_version(ssh)
            if remote_git_version and remote_git_version >= "2.4":
                # allow for pushing to checked out branch
                try:
                    ssh(["git", "-C", path] +
                        ["config", "receive.denyCurrentBranch", "updateInstead"])
                except CommandError as e:
                    lgr.error("git config failed at remote location %s.\n"
                              "You will not be able to push to checked out "
                              "branch. Error: %s", path, exc_str(e))
            else:
                lgr.error("Git version >= 2.4 needed to configure remote."
                          " Version detected on server: %s\nSkipping configuration"
                          " of receive.denyCurrentBranch - you will not be able to"
                          " publish updates to this repository. Upgrade your git"
                          " and run with --existing=reconfigure"
                          % remote_git_version)

            # enable metadata refresh on dataset updates to publication server
            lgr.info("Enabling git post-update hook ...")
            try:
                CreateSibling.create_postupdate_hook(
                    path, ssh, datasets[current_dspath])
            except CommandError as e:
                lgr.error("Failed to add json creation command to post update "
                          "hook.\nError: %s" % exc_str(e))

            # publish web-interface to root dataset on publication server
            if at_root and ui:
                lgr.info("Uploading web interface to %s" % path)
                at_root = False
                try:
                    CreateSibling.upload_web_interface(path, ssh, shared, ui)
                except CommandError as e:
                    lgr.error("Failed to push web interface to the remote "
                              "datalad repository.\nError: %s" % exc_str(e))

            remote_repos_to_run_hook_for.append(path)

        # in reverse order would be depth first
        lgr.debug("Running post-update hooks in all created siblings")
        for path in remote_repos_to_run_hook_for[::-1]:
            # Trigger the hook
            try:
                ssh(
                    ["cd '" + _path_(path, ".git") + "' && hooks/post-update"],
                    wrap_args=False  # we wrapped here manually
                )
            except CommandError as e:
                lgr.error("Failed to run post-update hook under path %s. "
                          "Error: %s" % (path, exc_str(e)))

        if target:
            # add the sibling(s):
            lgr.debug("Adding the siblings")
            if target_url is None:
                target_url = sshurl
            if target_pushurl is None and sshurl != target_url:
                target_pushurl = sshurl
            AddSibling()(dataset=ds,
                         name=target,
                         url=target_url,
                         pushurl=target_pushurl,
                         recursive=recursive,
                         fetch=True,
                         force=existing in {'replace'},
                         as_common_datasrc=as_common_datasrc,
                         publish_by_default=publish_by_default,
                         publish_depends=publish_depends)
Пример #48
0
    def __call__(sshurl, name=None, target_dir=None,
                 target_url=None, target_pushurl=None,
                 dataset=None,
                 recursive=False,
                 recursion_limit=None,
                 existing='error',
                 shared=None,
                 group=None,
                 ui=False,
                 as_common_datasrc=None,
                 publish_by_default=None,
                 publish_depends=None,
                 annex_wanted=None, annex_group=None, annex_groupwanted=None,
                 inherit=False,
                 since=None):
        #
        # nothing without a base dataset
        #
        ds = require_dataset(dataset, check_installed=True,
                             purpose='creating a sibling')
        refds_path = ds.path

        #
        # all checks that are possible before we start parsing the dataset
        #

        # possibly use sshurl to get the name in case if not specified
        if not sshurl:
            if not inherit:
                raise InsufficientArgumentsError(
                    "needs at least an SSH URL, if no inherit option"
                )
            if name is None:
                raise ValueError(
                    "Neither SSH URL, nor the name of sibling to inherit from "
                    "was specified"
                )
            # It might well be that we already have this remote setup
            try:
                sshurl = CreateSibling._get_remote_url(ds, name)
            except Exception as exc:
                lgr.debug('%s does not know about url for %s: %s', ds, name, exc_str(exc))
        elif inherit:
            raise ValueError(
                "For now, for clarity not allowing specifying a custom sshurl "
                "while inheriting settings"
            )
            # may be could be safely dropped -- still WiP

        if not sshurl:
            # TODO: may be more back up before _prep?
            super_ds = ds.get_superdataset()
            if not super_ds:
                raise ValueError(
                    "Could not determine super dataset for %s to inherit URL"
                    % ds
                )
            super_url = CreateSibling._get_remote_url(super_ds, name)
            # for now assuming hierarchical setup
            # (TODO: to be able to destinguish between the two, probably
            # needs storing datalad.*.target_dir to have %RELNAME in there)
            sshurl = slash_join(super_url, relpath(ds.path, super_ds.path))

        # check the login URL
        sshri = RI(sshurl)
        if not is_ssh(sshri):
            raise ValueError(
                "Unsupported SSH URL: '{0}', "
                "use ssh://host/path or host:path syntax".format(sshurl))

        if not name:
            # use the hostname as default remote name
            name = sshri.hostname
            lgr.debug(
                "No sibling name given, use URL hostname '%s' as sibling name",
                name)

        if since == '':
            # consider creating siblings only since the point of
            # the last update
            # XXX here we assume one to one mapping of names from local branches
            # to the remote
            active_branch = ds.repo.get_active_branch()
            since = '%s/%s' % (name, active_branch)

        #
        # parse the base dataset to find all subdatasets that need processing
        #
        to_process = []
        for ap in AnnotatePaths.__call__(
                dataset=refds_path,
                # only a single path!
                path=refds_path,
                recursive=recursive,
                recursion_limit=recursion_limit,
                action='create_sibling',
                # both next should not happen anyways
                unavailable_path_status='impossible',
                nondataset_path_status='error',
                modified=since,
                return_type='generator',
                on_failure='ignore'):
            if ap.get('status', None):
                # this is done
                yield ap
                continue
            if ap.get('type', None) != 'dataset' or ap.get('state', None) == 'absent':
                # this can happen when there is `since`, but we have no
                # use for anything but datasets here
                continue
            checkds_remotes = Dataset(ap['path']).repo.get_remotes() \
                if ap.get('state', None) != 'absent' \
                else []
            if publish_depends:
                # make sure dependencies are valid
                # TODO: inherit -- we might want to automagically create
                # those dependents as well???
                unknown_deps = set(assure_list(publish_depends)).difference(checkds_remotes)
                if unknown_deps:
                    ap['status'] = 'error'
                    ap['message'] = (
                        'unknown sibling(s) specified as publication dependency: %s',
                        unknown_deps)
                    yield ap
                    continue
            if name in checkds_remotes and existing in ('error', 'skip'):
                ap['status'] = 'error' if existing == 'error' else 'notneeded'
                ap['message'] = (
                    "sibling '%s' already configured (specify alternative name, or force "
                    "reconfiguration via --existing",
                    name)
                yield ap
                continue
            to_process.append(ap)

        if not to_process:
            # we ruled out all possibilities
            # TODO wait for gh-1218 and make better return values
            lgr.info("No datasets qualify for sibling creation. "
                     "Consider different settings for --existing "
                     "or --since if this is unexpected")
            return

        if target_dir is None:
            if sshri.path:
                target_dir = sshri.path
            else:
                target_dir = '.'

        # TODO: centralize and generalize template symbol handling
        replicate_local_structure = "%RELNAME" not in target_dir

        # request ssh connection:
        lgr.info("Connecting ...")
        assert(sshurl is not None)  # delayed anal verification
        ssh = ssh_manager.get_connection(sshurl)
        if not ssh.get_annex_version():
            raise MissingExternalDependency(
                'git-annex',
                msg='on the remote system')

        #
        # all checks done and we have a connection, now do something
        #

        # loop over all datasets, ordered from top to bottom to make test
        # below valid (existing directories would cause the machinery to halt)
        # But we need to run post-update hook in depth-first fashion, so
        # would only collect first and then run (see gh #790)
        yielded = set()
        remote_repos_to_run_hook_for = []
        for currentds_ap in \
                sorted(to_process, key=lambda x: x['path'].count('/')):
            current_ds = Dataset(currentds_ap['path'])

            path = _create_dataset_sibling(
                name,
                current_ds,
                ds.path,
                ssh,
                replicate_local_structure,
                sshri,
                target_dir,
                target_url,
                target_pushurl,
                existing,
                shared,
                group,
                publish_depends,
                publish_by_default,
                ui,
                as_common_datasrc,
                annex_wanted,
                annex_group,
                annex_groupwanted,
                inherit
            )
            if not path:
                # nothing new was created
                # TODO is 'notneeded' appropriate in this case?
                currentds_ap['status'] = 'notneeded'
                # TODO explain status in 'message'
                yield currentds_ap
                yielded.add(currentds_ap['path'])
                continue
            remote_repos_to_run_hook_for.append((path, currentds_ap))

            # publish web-interface to root dataset on publication server
            if current_ds.path == ds.path and ui:
                lgr.info("Uploading web interface to %s" % path)
                try:
                    CreateSibling.upload_web_interface(path, ssh, shared, ui)
                except CommandError as e:
                    currentds_ap['status'] = 'error'
                    currentds_ap['message'] = (
                        "failed to push web interface to the remote datalad repository (%s)",
                        exc_str(e))
                    yield currentds_ap
                    yielded.add(currentds_ap['path'])
                    continue

        # in reverse order would be depth first
        lgr.info("Running post-update hooks in all created siblings")
        # TODO: add progressbar
        for path, currentds_ap in remote_repos_to_run_hook_for[::-1]:
            # Trigger the hook
            lgr.debug("Running hook for %s (if exists and executable)", path)
            try:
                ssh("cd {} "
                    "&& ( [ -x hooks/post-update ] && hooks/post-update || : )"
                    "".format(sh_quote(_path_(path, ".git"))))
            except CommandError as e:
                currentds_ap['status'] = 'error'
                currentds_ap['message'] = (
                    "failed to run post-update hook under remote path %s (%s)",
                    path, exc_str(e))
                yield currentds_ap
                yielded.add(currentds_ap['path'])
                continue
            if not currentds_ap['path'] in yielded:
                # if we were silent until now everything is just splendid
                currentds_ap['status'] = 'ok'
                yield currentds_ap
Пример #49
0
def test_target_ssh_recursive(origin, src_path, target_path):

    # prepare src
    source = install(src_path, source=origin, recursive=True)

    sub1 = Dataset(opj(src_path, "subm 1"))
    sub2 = Dataset(opj(src_path, "2"))

    for flat in False, True:
        target_path_ = target_dir_tpl = target_path + "-" + str(flat)

        if flat:
            target_dir_tpl += "/prefix%RELNAME"
            sep = '-'
        else:
            sep = os.path.sep

        remote_name = 'remote-' + str(flat)
        with chpwd(source.path):
            assert_create_sshwebserver(
                name=remote_name,
                sshurl="ssh://localhost" + target_path_,
                target_dir=target_dir_tpl,
                recursive=True,
                ui=True)

        # raise if git repos were not created
        for suffix in [sep + 'subm 1', sep + '2', '']:
            target_dir = opj(target_path_, 'prefix' if flat else "").rstrip(os.path.sep) + suffix
            # raise if git repos were not created
            GitRepo(target_dir, create=False)

            _test_correct_publish(target_dir, rootds=not suffix, flat=flat)

        for repo in [source.repo, sub1.repo, sub2.repo]:
            assert_not_in("local_target", repo.get_remotes())

        # now, push should work:
        publish(dataset=source, to=remote_name)

        # verify that we can create-sibling which was created later and possibly
        # first published in super-dataset as an empty directory
        sub3_name = 'subm 3-%s' % flat
        sub3 = source.create(sub3_name)
        # since is an empty value to force it to consider all changes since we published
        # already
        with chpwd(source.path):
            # as we discussed in gh-1495 we use the last-published state of the base
            # dataset as the indicator for modification detection with since=''
            # hence we must not publish the base dataset on its own without recursion,
            # if we want to have this mechanism do its job
            #publish(to=remote_name)  # no recursion
            assert_create_sshwebserver(
                name=remote_name,
                sshurl="ssh://localhost" + target_path_,
                target_dir=target_dir_tpl,
                recursive=True,
                existing='skip',
                ui=True,
                since=''
            )
        # so it was created on remote correctly and wasn't just skipped
        assert(Dataset(_path_(target_path_, ('prefix-' if flat else '') + sub3_name)).is_installed())
        publish(dataset=source, to=remote_name, recursive=True, since='') # just a smoke test
Пример #50
0
def test_target_ssh_simple(origin, src_path, target_rootpath):

    # prepare src
    source = install(src_path, source=origin)

    target_path = opj(target_rootpath, "basic")
    # it will try to fetch it so would fail as well since sshurl is wrong
    with swallow_logs(new_level=logging.ERROR) as cml, \
        assert_raises(GitCommandError):
            create_sibling(
                dataset=source,
                target="local_target",
                sshurl="ssh://localhost",
                target_dir=target_path,
                ui=True)
        # is not actually happening on one of the two basic cases -- TODO figure it out
        # assert_in('enableremote local_target failed', cml.out)

    GitRepo(target_path, create=False)  # raises if not a git repo
    assert_in("local_target", source.repo.get_remotes())
    eq_("ssh://localhost", source.repo.get_remote_url("local_target"))
    # should NOT be able to push now, since url isn't correct:
    # TODO:  assumption is wrong if ~ does have .git! fix up!
    assert_raises(GitCommandError, publish, dataset=source, to="local_target")

    # Both must be annex or git repositories
    src_is_annex = AnnexRepo.is_valid_repo(src_path)
    eq_(src_is_annex, AnnexRepo.is_valid_repo(target_path))
    # And target one should be known to have a known UUID within the source if annex
    if src_is_annex:
        annex = AnnexRepo(src_path)
        local_target_cfg = annex.repo.remotes["local_target"].config_reader.get
        # for some reason this was "correct"
        # eq_(local_target_cfg('annex-ignore'), 'false')
        # but after fixing creating siblings in
        # 21f6dd012b2c7b9c0b8b348dcfb3b0ace7e8b2ec it started to fail
        # I think it is legit since we are trying to fetch now before calling
        # annex.enable_remote so it doesn't set it up, and fails before
        assert_raises(Exception, local_target_cfg, 'annex-ignore')
        # hm, but ATM wouldn't get a uuid since url is wrong
        assert_raises(Exception, local_target_cfg, 'annex-uuid')

    # do it again without force:
    with assert_raises(RuntimeError) as cm:
        assert_create_sshwebserver(
            dataset=source,
            target="local_target",
            sshurl="ssh://localhost",
            target_dir=target_path)
    eq_("Target directory %s already exists." % target_path,
        str(cm.exception))
    if src_is_annex:
        target_description = AnnexRepo(target_path, create=False).get_description()
        assert_not_equal(target_description, None)
        assert_not_equal(target_description, target_path)
        ok_endswith(target_description, target_path)
    # now, with force and correct url, which is also used to determine
    # target_dir
    # Note: on windows absolute path is not url conform. But this way it's easy
    # to test, that ssh path is correctly used.
    if not on_windows:
        # add random file under target_path, to explicitly test existing=replace
        open(opj(target_path, 'random'), 'w').write('123')

        assert_create_sshwebserver(
            dataset=source,
            target="local_target",
            sshurl="ssh://localhost" + target_path,
            existing='replace')
        eq_("ssh://localhost" + target_path,
            source.repo.get_remote_url("local_target"))
        ok_(source.repo.get_remote_url("local_target", push=True) is None)

        # ensure target tree actually replaced by source
        assert_false(exists(opj(target_path, 'random')))

        if src_is_annex:
            annex = AnnexRepo(src_path)
            local_target_cfg = annex.repo.remotes["local_target"].config_reader.get
            eq_(local_target_cfg('annex-ignore'), 'false')
            eq_(local_target_cfg('annex-uuid').count('-'), 4)  # valid uuid

        # again, by explicitly passing urls. Since we are on localhost, the
        # local path should work:
        cpkwargs = dict(
            dataset=source,
            target="local_target",
            sshurl="ssh://localhost",
            target_dir=target_path,
            target_url=target_path,
            target_pushurl="ssh://localhost" + target_path,
            ui=True,
        )
        assert_create_sshwebserver(existing='replace', **cpkwargs)
        if src_is_annex:
            target_description = AnnexRepo(target_path,
                                           create=False).get_description()
            eq_(target_description, target_path)

        eq_(target_path,
            source.repo.get_remote_url("local_target"))
        eq_("ssh://localhost" + target_path,
            source.repo.get_remote_url("local_target", push=True))

        _test_correct_publish(target_path)

        # now, push should work:
        publish(dataset=source, to="local_target")

        # and we should be able to 'reconfigure'
        def process_digests_mtimes(digests, mtimes):
            # it should have triggered a hook, which would have created log and metadata files
            check_metadata = False
            for part in 'logs', 'metadata':
                metafiles = [k for k in digests if k.startswith(_path_('.git/datalad/%s/' % part))]
                # This is in effect ONLY if we have "compatible" datalad installed on remote
                # end. ATM we don't have easy way to guarantee that AFAIK (yoh),
                # so let's not check/enforce (TODO)
                # assert(len(metafiles) >= 1)  # we might have 2 logs if timestamps do not collide ;)
                # Let's actually do it to some degree
                if part == 'logs':
                    # always should have those:
                    assert (len(metafiles) >= 1)
                    with open(opj(target_path, metafiles[0])) as f:
                        if 'no datalad found' not in f.read():
                            check_metadata = True
                if part == 'metadata':
                    eq_(len(metafiles), bool(check_metadata))
                for f in metafiles:
                    digests.pop(f)
                    mtimes.pop(f)
            # and just pop some leftovers from annex
            for f in list(digests):
                if f.startswith('.git/annex/mergedrefs'):
                    digests.pop(f)
                    mtimes.pop(f)

        orig_digests, orig_mtimes = get_mtimes_and_digests(target_path)
        process_digests_mtimes(orig_digests, orig_mtimes)

        import time; time.sleep(0.1)  # just so that mtimes change
        assert_create_sshwebserver(existing='reconfigure', **cpkwargs)
        digests, mtimes = get_mtimes_and_digests(target_path)
        process_digests_mtimes(digests, mtimes)

        assert_dict_equal(orig_digests, digests)  # nothing should change in terms of content

        # but some files should have been modified
        modified_files = {k for k in mtimes if orig_mtimes.get(k, 0) != mtimes.get(k, 0)}
        # collect which files were expected to be modified without incurring any changes
        ok_modified_files = {
            _path_('.git/hooks/post-update'), 'index.html',
            # files which hook would manage to generate
            _path_('.git/info/refs'), '.git/objects/info/packs'
        }
        if external_versions['cmd:system-git'] >= '2.4':
            # on elderly git we don't change receive setting
            ok_modified_files.add(_path_('.git/config'))
        ok_modified_files.update({f for f in digests if f.startswith(_path_('.git/datalad/web'))})
        assert_set_equal(modified_files, ok_modified_files)