Exemplo n.º 1
0
def _test_initremote_alias(host, ds_path, store):

    ds_path = Path(ds_path)
    store = Path(store)
    ds = Dataset(ds_path).create()
    populate_dataset(ds)
    ds.save()

    if host:
        url = "ria+ssh://{host}{path}".format(host=host,
                                              path=store)
    else:
        url = "ria+{}".format(store.as_uri())
    init_opts = common_init_opts + ['url={}'.format(url)]

    # set up store:
    io = SSHRemoteIO(host) if host else LocalIO()
    create_store(io, store, '1')
    # set up the dataset with alias
    create_ds_in_store(io, store, ds.id, '2', '1', 'ali')
    ds.repo.init_remote('ria-remote', options=init_opts)
    assert_in('ria-remote',
              [cfg['name']
               for uuid, cfg in ds.repo.get_special_remotes().items()]
              )
    assert_repo_status(ds.path)
    assert_true(io.exists(store / "alias" / "ali"))
Exemplo n.º 2
0
def test_push_url(storepath, dspath, blockfile):

    dspath = Path(dspath)
    store = Path(storepath)
    blockfile = Path(blockfile)
    blockfile.touch()

    ds = Dataset(dspath).create()
    populate_dataset(ds)
    ds.save()
    assert_repo_status(ds.path)

    # set up store:
    io = LocalIO()
    store_url = "ria+{}".format(store.as_uri())
    create_store(io, store, '1')
    create_ds_in_store(io, store, ds.id, '2', '1')

    # initremote fails with invalid url (not a ria+ URL):
    invalid_url = (store.parent / "non-existent").as_uri()
    init_opts = common_init_opts + [
        'url={}'.format(store_url), 'push-url={}'.format(invalid_url)
    ]
    assert_raises(CommandError,
                  ds.repo.init_remote,
                  'store',
                  options=init_opts)

    # initremote succeeds with valid but inaccessible URL (pointing to a file
    # instead of a store):
    block_url = "ria+" + blockfile.as_uri()
    init_opts = common_init_opts + [
        'url={}'.format(store_url), 'push-url={}'.format(block_url)
    ]
    ds.repo.init_remote('store', options=init_opts)

    # but a push will fail:
    assert_raises(CommandError, ds.repo.call_annex,
                  ['copy', 'one.txt', '--to', 'store'])

    # reconfigure with correct push-url:
    init_opts = common_init_opts + [
        'url={}'.format(store_url), 'push-url={}'.format(store_url)
    ]
    ds.repo.enable_remote('store', options=init_opts)

    # push works now:
    ds.repo.call_annex(['copy', 'one.txt', '--to', 'store'])

    store_uuid = ds.siblings(name='store',
                             return_type='item-or-list')['annex-uuid']
    here_uuid = ds.siblings(name='here',
                            return_type='item-or-list')['annex-uuid']

    known_sources = ds.repo.whereis('one.txt')
    assert_in(here_uuid, known_sources)
    assert_in(store_uuid, known_sources)
Exemplo n.º 3
0
def test_read_access(store_path, store_url, ds_path):

    ds = Dataset(ds_path).create()
    populate_dataset(ds)
    ds.save()

    if ds.repo.is_managed_branch():
        # TODO: on crippled FS copytree to populate store doesn't seem to work.
        #       Or may be it's just the serving via HTTP that doesn't work.
        #       Either way, after copytree and fsck, whereis doesn't report
        #       the store as an available source.
        raise SkipTest("Skip on crippled FS")

    files = [Path('one.txt'), Path('subdir') / 'two']
    store_path = Path(store_path)
    url = "ria+" + store_url
    init_opts = common_init_opts + ['url={}'.format(url)]

    io = LocalIO()
    create_store(io, store_path, '1')
    create_ds_in_store(io, store_path, ds.id, '2', '1')
    ds.repo.init_remote('ora-remote', options=init_opts)
    ds.repo.fsck(remote='ora-remote', fast=True)
    store_uuid = ds.siblings(name='ora-remote',
                             return_type='item-or-list')['annex-uuid']
    here_uuid = ds.siblings(name='here',
                            return_type='item-or-list')['annex-uuid']

    # nothing in store yet:
    for f in files:
        known_sources = ds.repo.whereis(str(f))
        assert_in(here_uuid, known_sources)
        assert_not_in(store_uuid, known_sources)

    annex_obj_target = str(store_path / ds.id[:3] / ds.id[3:] / 'annex' /
                           'objects')
    shutil.rmtree(annex_obj_target)
    shutil.copytree(src=str(ds.repo.dot_git / 'annex' / 'objects'),
                    dst=annex_obj_target)

    ds.repo.fsck(remote='ora-remote', fast=True)
    # all in store now:
    for f in files:
        known_sources = ds.repo.whereis(str(f))
        assert_in(here_uuid, known_sources)
        assert_in(store_uuid, known_sources)

    ds.drop('.')
    res = ds.get('.')
    assert_equal(len(res), 2)
    assert_result_count(res,
                        2,
                        status='ok',
                        type='file',
                        action='get',
                        message="from ora-remote...")
Exemplo n.º 4
0
def test_read_access(store_path, store_url, ds_path):

    ds = Dataset(ds_path).create()
    populate_dataset(ds)
    ds.save()

    files = [Path('one.txt'), Path('subdir') / 'two']
    store_path = Path(store_path)
    url = "ria+" + store_url
    init_opts = common_init_opts + ['url={}'.format(url)]

    io = LocalIO()
    create_store(io, store_path, '1')
    create_ds_in_store(io, store_path, ds.id, '2', '1')
    ds.repo.init_remote('ora-remote', options=init_opts)
    ds.repo.fsck(remote='ora-remote', fast=True)
    store_uuid = ds.siblings(name='ora-remote',
                             return_type='item-or-list')['annex-uuid']
    here_uuid = ds.siblings(name='here',
                            return_type='item-or-list')['annex-uuid']

    # nothing in store yet:
    for f in files:
        known_sources = ds.repo.whereis(str(f))
        assert_in(here_uuid, known_sources)
        assert_not_in(store_uuid, known_sources)

    annex_obj_target = str(store_path / ds.id[:3] / ds.id[3:] / 'annex' /
                           'objects')
    shutil.rmtree(annex_obj_target)
    shutil.copytree(src=str(ds.repo.dot_git / 'annex' / 'objects'),
                    dst=annex_obj_target)

    ds.repo.fsck(remote='ora-remote', fast=True)
    # all in store now:
    for f in files:
        known_sources = ds.repo.whereis(str(f))
        assert_in(here_uuid, known_sources)
        assert_in(store_uuid, known_sources)

    ds.drop('.')
    res = ds.get('.')
    assert_equal(len(res), 2)
    assert_result_count(res,
                        2,
                        status='ok',
                        type='file',
                        action='get',
                        message="from ora-remote...")
Exemplo n.º 5
0
def _test_binary_data(host, store, dspath):
    # make sure, special remote deals with binary data and doesn't
    # accidentally involve any decode/encode etc.

    dspath = Path(dspath)
    store = Path(store)

    url = "https://github.com/datalad/example-dicom-functional/blob/master/dicoms/MR.1.3.46.670589.11.38317.5.0.4476.2014042516042547586"
    file = "dicomfile"
    ds = Dataset(dspath).create()
    ds.download_url(url, path=file, message="Add DICOM file from github")
    assert_repo_status(ds.path)

    # set up store:
    io = SSHRemoteIO(host) if host else LocalIO()
    if host:
        store_url = "ria+ssh://{host}{path}".format(host=host,
                                                    path=store)
    else:
        store_url = "ria+{}".format(store.as_uri())

    create_store(io, store, '1')
    create_ds_in_store(io, store, ds.id, '2', '1')

    # add special remote
    init_opts = common_init_opts + ['url={}'.format(store_url)]
    ds.repo.init_remote('store', options=init_opts)

    # actual data transfer (both directions)
    # Note, that we intentionally call annex commands instead of
    # datalad-publish/-get here. We are testing an annex-special-remote.

    store_uuid = ds.siblings(name='store',
                             return_type='item-or-list')['annex-uuid']
    here_uuid = ds.siblings(name='here',
                            return_type='item-or-list')['annex-uuid']

    known_sources = ds.repo.whereis(str(file))
    assert_in(here_uuid, known_sources)
    assert_not_in(store_uuid, known_sources)
    ds.repo.call_annex(['move', str(file), '--to', 'store'])
    known_sources = ds.repo.whereis(str(file))
    assert_not_in(here_uuid, known_sources)
    assert_in(store_uuid, known_sources)
    ds.repo.call_annex(['get', str(file), '--from', 'store'])
    known_sources = ds.repo.whereis(str(file))
    assert_in(here_uuid, known_sources)
    assert_in(store_uuid, known_sources)
Exemplo n.º 6
0
def test_initremote(store_path=None, store_url=None, ds_path=None):
    ds = Dataset(ds_path).create()
    store_path = Path(store_path)
    url = "ria+" + store_url
    init_opts = common_init_opts + ['url={}'.format(url)]

    # fail when there's no RIA store at the destination
    assert_raises(CommandError,
                  ds.repo.init_remote,
                  'ora-remote',
                  options=init_opts)
    # Doesn't actually create a remote if it fails
    assert_not_in(
        'ora-remote',
        [cfg['name'] for uuid, cfg in ds.repo.get_special_remotes().items()])

    # now make it a store
    io = LocalIO()
    create_store(io, store_path, '1')
    create_ds_in_store(io, store_path, ds.id, '2', '1')

    # fails on non-RIA URL
    assert_raises(CommandError,
                  ds.repo.init_remote,
                  'ora-remote',
                  options=common_init_opts +
                  ['url={}'
                   ''.format(store_path.as_uri())])
    # Doesn't actually create a remote if it fails
    assert_not_in(
        'ora-remote',
        [cfg['name'] for uuid, cfg in ds.repo.get_special_remotes().items()])

    ds.repo.init_remote('ora-remote', options=init_opts)
    assert_in(
        'ora-remote',
        [cfg['name'] for uuid, cfg in ds.repo.get_special_remotes().items()])
    assert_repo_status(ds.path)
    # git-annex:remote.log should have:
    #   - url
    #   - common_init_opts
    #   - archive_id (which equals ds id)
    remote_log = ds.repo.call_git(['cat-file', 'blob', 'git-annex:remote.log'],
                                  read_only=True)
    assert_in("url={}".format(url), remote_log)
    [assert_in(c, remote_log) for c in common_init_opts]
    assert_in("archive-id={}".format(ds.id), remote_log)
Exemplo n.º 7
0
def _test_gitannex(host, store, dspath):
    store = Path(store)

    dspath = Path(dspath)
    store = Path(store)

    ds = Dataset(dspath).create()

    if ds.repo.is_managed_branch():
        # git-annex-testremote is way too slow on crippled FS.
        # Use is_managed_branch() as a proxy and skip only here
        # instead of in a decorator
        raise SkipTest("Test too slow on crippled FS")

    populate_dataset(ds)
    ds.save()
    assert_repo_status(ds.path)

    # set up store:
    io = SSHRemoteIO(host) if host else LocalIO()
    if host:
        store_url = "ria+ssh://{host}{path}".format(host=host, path=store)
    else:
        store_url = "ria+{}".format(store.as_uri())

    create_store(io, store, '1')

    # TODO: Re-establish test for version 1
    # version 2: dirhash
    create_ds_in_store(io, store, ds.id, '2', '1')

    # add special remote
    init_opts = common_init_opts + ['url={}'.format(store_url)]
    ds.repo.init_remote('store', options=init_opts)

    from datalad.support.external_versions import external_versions
    if '8.20200330' < external_versions['cmd:annex'] < '8.20200624':
        # https://git-annex.branchable.com/bugs/testremote_breeds_way_too_many_instances_of_the_externals_remote/?updated
        raise SkipTest(
            "git-annex might lead to overwhelming number of external "
            "special remote instances")

    # run git-annex-testremote
    # note, that we don't want to capture output. If something goes wrong we
    # want to see it in test build's output log.
    GitWitlessRunner(cwd=dspath).run(['git', 'annex', 'testremote', 'store'])
Exemplo n.º 8
0
def _test_initremote_rewrite(host, ds_path, store):

    # rudimentary repetition of test_initremote_basic, but
    # with url.<base>.insteadOf config, which should not only
    # be respected, but lead to the rewritten URL stored in
    # git-annex:remote.log

    ds_path = Path(ds_path)
    store = Path(store)
    ds = Dataset(ds_path).create()
    populate_dataset(ds)
    ds.save()
    assert_repo_status(ds.path)

    url = "mystore:"
    init_opts = common_init_opts + ['url={}'.format(url)]

    if host:
        replacement = "ria+ssh://{host}{path}".format(host=host,
                                                      path=store)
    else:
        replacement = "ria+{}".format(store.as_uri())

    ds.config.set("url.{}.insteadOf".format(replacement), url, where='local')

    # set up store:
    io = SSHRemoteIO(host) if host else LocalIO()
    create_store(io, store, '1')
    create_ds_in_store(io, store, ds.id, '2', '1')

    # run initremote and check what's stored:
    ds.repo.init_remote('ria-remote', options=init_opts)
    assert_in('ria-remote',
              [cfg['name']
               for uuid, cfg in ds.repo.get_special_remotes().items()]
              )
    # git-annex:remote.log should have:
    #   - rewritten url
    #   - common_init_opts
    #   - archive_id (which equals ds id)
    remote_log = ds.repo.call_git(['cat-file', 'blob', 'git-annex:remote.log'],
                                  read_only=True)
    assert_in("url={}".format(replacement), remote_log)
    [assert_in(c, remote_log) for c in common_init_opts]
    assert_in("archive-id={}".format(ds.id), remote_log)
Exemplo n.º 9
0
def _test_remote_layout(host, dspath, store, archiv_store):

    dspath = Path(dspath)
    store = Path(store)
    archiv_store = Path(archiv_store)
    ds = Dataset(dspath).create()
    populate_dataset(ds)
    ds.save()
    assert_repo_status(ds.path)

    # set up store:
    io = SSHRemoteIO(host) if host else LocalIO()
    if host:
        store_url = "ria+ssh://{host}{path}".format(host=host,
                                                    path=store)
        arch_url = "ria+ssh://{host}{path}".format(host=host,
                                                   path=archiv_store)
    else:
        store_url = "ria+{}".format(store.as_uri())
        arch_url = "ria+{}".format(archiv_store.as_uri())

    create_store(io, store, '1')

    # TODO: Re-establish test for version 1
    # version 2: dirhash
    create_ds_in_store(io, store, ds.id, '2', '1')

    # add special remote
    init_opts = common_init_opts + ['url={}'.format(store_url)]
    ds.repo.init_remote('store', options=init_opts)

    # copy files into the RIA store
    ds.repo.copy_to('.', 'store')

    # we should see the exact same annex object tree
    dsgit_dir, archive_dir, dsobj_dir = \
        get_layout_locations(1, store, ds.id)
    store_objects = get_all_files(dsobj_dir)
    local_objects = get_all_files(ds.pathobj / '.git' / 'annex' / 'objects')
    assert_equal(len(store_objects), 2)

    if not ds.repo.is_managed_branch():
        # with managed branches the local repo uses hashdirlower instead
        # TODO: However, with dataset layout version 1 this should therefore
        #       work on adjusted branch the same way
        # TODO: Wonder whether export-archive-ora should account for that and
        #       rehash according to target layout.
        assert_equal(sorted([p for p in store_objects]),
                     sorted([p for p in local_objects])
                     )

        if not io.get_7z():
            raise SkipTest("No 7z available in RIA store")

        # we can simply pack up the content of the remote into a
        # 7z archive and place it in the right location to get a functional
        # archive remote

        create_store(io, archiv_store, '1')
        create_ds_in_store(io, archiv_store, ds.id, '2', '1')

        whereis = ds.repo.whereis('one.txt')
        dsgit_dir, archive_dir, dsobj_dir = \
            get_layout_locations(1, archiv_store, ds.id)
        ds.export_archive_ora(archive_dir / 'archive.7z')
        init_opts = common_init_opts + ['url={}'.format(arch_url)]
        ds.repo.init_remote('archive', options=init_opts)
        # now fsck the new remote to get the new special remote indexed
        ds.repo.fsck(remote='archive', fast=True)
        assert_equal(len(ds.repo.whereis('one.txt')), len(whereis) + 1)
Exemplo n.º 10
0
def test_initremote_basic_httpsurl(storepath=None, storeurl=None):
    _test_initremote_basic(
        f"ria+{storeurl}",
        LocalIO(),
        storepath,
    )
Exemplo n.º 11
0
def _test_bare_git_version_2(host, dspath, store):
    # Similarly to test_bare_git_version_1, this should ensure a bare git repo
    # at the store location for a dataset doesn't conflict with the ORA remote.
    # Note: Usability of git remote by annex depends on dataset layout version
    #       (dirhashlower vs. -mixed).
    #       For version 2 (mixed) upload via ORA and consumption via git should
    #       work. But not the other way around, since git-annex uses
    #       dirhashlower with bare repos.

    ds_path = Path(dspath)
    store = Path(store)
    ds = Dataset(ds_path).create()
    populate_dataset(ds)
    ds.save()

    bare_repo_path, _, _ = get_layout_locations(1, store, ds.id)
    # Use git to make sure the remote end is what git thinks a bare clone of it
    # should look like
    subprocess.run([
        'git', 'clone', '--bare',
        quote_cmdlinearg(str(dspath)),
        quote_cmdlinearg(str(bare_repo_path))
    ])

    if host:
        url = "ria+ssh://{host}{path}".format(host=host, path=store)
    else:
        url = "ria+{}".format(store.as_uri())
    init_opts = common_init_opts + ['url={}'.format(url)]
    # set up store:
    io = SSHRemoteIO(host) if host else LocalIO()
    create_store(io, store, '1')
    # set up the dataset location, too.
    # Note: Dataset layout version 2 (dirhash mixed):
    create_ds_in_store(io, store, ds.id, '2', '1')

    # Now, let's have the bare repo as a git remote
    git_url = "ssh://{host}{path}".format(host=host, path=bare_repo_path) \
        if host else bare_repo_path.as_uri()
    ds.repo.add_remote('bare-git', git_url)
    ds.repo.enable_remote('bare-git')
    # and the ORA remote in addition:
    ds.repo.init_remote('ora-remote', options=init_opts)
    # upload keys via ORA:
    ds.repo.copy_to('.', 'ora-remote')
    # bare-git doesn't know yet:
    eq_(len(ds.repo.whereis('one.txt')), 2)
    # fsck to make availability known
    assert_status('ok', [
        annexjson2result(r, ds)
        for r in ds.repo.fsck(remote='bare-git', fast=True)
    ])
    eq_(len(ds.repo.whereis('one.txt')), 3)
    ds.drop('.')
    eq_(len(ds.repo.whereis('one.txt')), 2)
    # actually consumable via git remote:
    ds.repo.call_annex(['move', 'one.txt', '--from', 'bare-git'])
    eq_(len(ds.repo.whereis('one.txt')), 2)
    # now, move back via git - shouldn't be consumable via ORA
    ds.repo.call_annex(['move', 'one.txt', '--to', 'bare-git'])
    # fsck to make availability known, but there's nothing from POV of ORA:
    fsck_res = [
        annexjson2result(r, ds)
        for r in ds.repo.fsck(remote='ora-remote', fast=True)
    ]
    assert_result_count(fsck_res,
                        1,
                        status='error',
                        message='** Based on the location log, one.txt\n'
                        '** was expected to be present, '
                        'but its content is missing.')
    assert_result_count(fsck_res, 1, status='ok')
    eq_(len(fsck_res), 2)
    eq_(len(ds.repo.whereis('one.txt')), 1)
Exemplo n.º 12
0
def _test_initremote_basic(host, ds_path, store, link):

    ds_path = Path(ds_path)
    store = Path(store)
    link = Path(link)
    ds = Dataset(ds_path).create()
    populate_dataset(ds)
    ds.save()

    if host:
        url = "ria+ssh://{host}{path}".format(host=host,
                                              path=store)
    else:
        url = "ria+{}".format(store.as_uri())
    init_opts = common_init_opts + ['url={}'.format(url)]

    # fails on non-existing storage location
    assert_raises(CommandError,
                  ds.repo.init_remote, 'ria-remote', options=init_opts)
    # Doesn't actually create a remote if it fails
    assert_not_in('ria-remote',
                  [cfg['name']
                   for uuid, cfg in ds.repo.get_special_remotes().items()]
                  )

    # fails on non-RIA URL
    assert_raises(CommandError, ds.repo.init_remote, 'ria-remote',
                  options=common_init_opts + ['url={}'.format(store.as_uri())]
                  )
    # Doesn't actually create a remote if it fails
    assert_not_in('ria-remote',
                  [cfg['name']
                   for uuid, cfg in ds.repo.get_special_remotes().items()]
                  )

    # set up store:
    io = SSHRemoteIO(host) if host else LocalIO()
    create_store(io, store, '1')
    # still fails, since ds isn't setup in the store
    assert_raises(CommandError,
                  ds.repo.init_remote, 'ria-remote', options=init_opts)
    # Doesn't actually create a remote if it fails
    assert_not_in('ria-remote',
                  [cfg['name']
                   for uuid, cfg in ds.repo.get_special_remotes().items()]
                  )
    # set up the dataset as well
    create_ds_in_store(io, store, ds.id, '2', '1')
    # now should work
    ds.repo.init_remote('ria-remote', options=init_opts)
    assert_in('ria-remote',
              [cfg['name']
               for uuid, cfg in ds.repo.get_special_remotes().items()]
              )
    assert_repo_status(ds.path)
    # git-annex:remote.log should have:
    #   - url
    #   - common_init_opts
    #   - archive_id (which equals ds id)
    remote_log = ds.repo.call_git(['cat-file', 'blob', 'git-annex:remote.log'],
                                  read_only=True)
    assert_in("url={}".format(url), remote_log)
    [assert_in(c, remote_log) for c in common_init_opts]
    assert_in("archive-id={}".format(ds.id), remote_log)

    # re-configure with invalid URL should fail:
    assert_raises(
        CommandError,
        ds.repo.call_annex,
        ['enableremote', 'ria-remote'] + common_init_opts + [
            'url=ria+file:///non-existing'])
    # but re-configure with valid URL should work
    if has_symlink_capability():
        link.symlink_to(store)
        new_url = 'ria+{}'.format(link.as_uri())
        ds.repo.call_annex(
            ['enableremote', 'ria-remote'] + common_init_opts + [
                'url={}'.format(new_url)])
        # git-annex:remote.log should have:
        #   - url
        #   - common_init_opts
        #   - archive_id (which equals ds id)
        remote_log = ds.repo.call_git(['cat-file', 'blob',
                                       'git-annex:remote.log'],
                                      read_only=True)
        assert_in("url={}".format(new_url), remote_log)
        [assert_in(c, remote_log) for c in common_init_opts]
        assert_in("archive-id={}".format(ds.id), remote_log)

    # we can deal with --sameas, which leads to a special remote not having a
    # 'name' property, but only a 'sameas-name'. See gh-4259
    try:
        ds.repo.init_remote('ora2',
                            options=init_opts + ['--sameas', 'ria-remote'])
    except CommandError as e:
        if 'Invalid option `--sameas' in e.stderr:
            # annex too old - doesn't know --sameas
            pass
        else:
            raise 
Exemplo n.º 13
0
    def __call__(
            url,
            name,
            *,  # note that `name` is required but not posarg in CLI
            dataset=None,
            storage_name=None,
            alias=None,
            post_update_hook=False,
            shared=None,
            group=None,
            storage_sibling=True,
            existing='error',
            new_store_ok=False,
            trust_level=None,
            recursive=False,
            recursion_limit=None,
            disable_storage__=None,
            push_url=None):
        if disable_storage__ is not None:
            import warnings
            warnings.warn(
                "datalad-create-sibling-ria --no-storage-sibling "
                "is deprecated, use --storage-sibling off instead.",
                DeprecationWarning)
            # recode to new setup
            disable_storage__ = None
            storage_sibling = False

        if storage_sibling == 'only' and storage_name:
            lgr.warning(
                "Sibling name will be used for storage sibling in "
                "storage-sibling-only mode, but a storage sibling name "
                "was provided")

        ds = require_dataset(dataset,
                             check_installed=True,
                             purpose='create RIA sibling(s)')
        res_kwargs = dict(
            ds=ds,
            action="create-sibling-ria",
            logger=lgr,
        )

        # parse target URL
        # Note: URL parsing is done twice ATM (for top-level ds). This can't be
        # reduced to single instance, since rewriting url based on config could
        # be different for subdatasets.
        try:
            ssh_host, base_path, rewritten_url = \
                verify_ria_url(push_url if push_url else url, ds.config)
        except ValueError as e:
            yield get_status_dict(status='error', message=str(e), **res_kwargs)
            return

        if ds.repo.get_hexsha() is None or ds.id is None:
            raise RuntimeError("Repository at {} is not a DataLad dataset, "
                               "run 'datalad create [--force]' first.".format(
                                   ds.path))

        if not storage_sibling and storage_name:
            lgr.warning(
                "Storage sibling setup disabled, but a storage sibling name "
                "was provided")

        if storage_sibling and not storage_name:
            storage_name = "{}-storage".format(name)

        if storage_sibling and name == storage_name:
            # leads to unresolvable, circular dependency with publish-depends
            raise ValueError("sibling names must not be equal")

        if not isinstance(url, str):
            raise TypeError("url is not a string, but %s" % type(url))

        # Query existing siblings upfront in order to fail early on
        # existing=='error', since misconfiguration (particularly of special
        # remotes) only to fail in a subdataset later on with that config, can
        # be quite painful.
        # TODO: messages - this is "create-sibling". Don't confuse existence of
        #       local remotes with existence of the actual remote sibling
        #       in wording
        if existing == 'error':
            failed = False
            for dpath, sname in _yield_ds_w_matching_siblings(
                    ds, (name, storage_name),
                    recursive=recursive,
                    recursion_limit=recursion_limit):
                res = get_status_dict(
                    status='error',
                    message=(
                        "a sibling %r is already configured in dataset %r",
                        sname, dpath),
                    type='sibling',
                    name=sname,
                    ds=ds,
                    **res_kwargs,
                )
                failed = True
                yield res
            if failed:
                return
        # TODO: - URL parsing + store creation needs to be RF'ed based on
        #         command abstractions
        #       - more generally consider store creation a dedicated command or
        #         option

        io = SSHRemoteIO(ssh_host) if ssh_host else LocalIO()
        try:
            # determine the existence of a store by trying to read its layout.
            # Because this raises a FileNotFound error if non-existent, we need
            # to catch it
            io.read_file(Path(base_path) / 'ria-layout-version')
        except (FileNotFoundError, RIARemoteError,
                RemoteCommandFailedError) as e:
            if not new_store_ok:
                # we're instructed to only act in case of an existing RIA store
                res = get_status_dict(status='error',
                                      message="No store found at '{}'. Forgot "
                                      "--new-store-ok ?".format(
                                          Path(base_path)),
                                      **res_kwargs)
                yield res
                return

        log_progress(
            lgr.info,
            'create-sibling-ria',
            'Creating a new RIA store at %s',
            Path(base_path),
        )
        create_store(io, Path(base_path), '1')

        yield from _create_sibling_ria(ds, url, push_url, name,
                                       storage_sibling, storage_name, alias,
                                       existing, shared, group,
                                       post_update_hook, trust_level,
                                       res_kwargs)

        if recursive:
            # Note: subdatasets can be treated independently, so go full
            # recursion when querying for them and _no_recursion with the
            # actual call. Theoretically this can be parallelized.

            for subds in ds.subdatasets(state='present',
                                        recursive=True,
                                        recursion_limit=recursion_limit,
                                        return_type='generator',
                                        result_renderer='disabled',
                                        result_xfm='datasets'):
                yield from _create_sibling_ria(
                    subds,
                    url,
                    push_url,
                    name,
                    storage_sibling,
                    storage_name,
                    None,  # subdatasets can't have the same alias as the parent
                    existing,
                    shared,
                    group,
                    post_update_hook,
                    trust_level,
                    res_kwargs)
Exemplo n.º 14
0
def test_read_access(store_path=None, store_url=None, ds_path=None):

    ds = Dataset(ds_path).create()
    populate_dataset(ds)

    files = [Path('one.txt'), Path('subdir') / 'two']
    store_path = Path(store_path)
    url = "ria+" + store_url
    init_opts = common_init_opts + ['url={}'.format(url)]

    io = LocalIO()
    create_store(io, store_path, '1')
    create_ds_in_store(io, store_path, ds.id, '2', '1')
    ds.repo.init_remote('ora-remote', options=init_opts)
    fsck_results = ds.repo.fsck(remote='ora-remote', fast=True)
    # Note: Failures in the special remote will show up as a success=False
    # result for fsck -> the call itself would not fail.
    for r in fsck_results:
        if "note" in r:
            # we could simply assert "note" to not be in r, but we want proper
            # error reporting - content of note, not just its unexpected
            # existence.
            assert_equal(r["success"],
                         "true",
                         msg="git-annex-fsck failed with ORA over HTTP: %s" %
                         r)
        assert_equal(r["error-messages"], [])
    store_uuid = ds.siblings(name='ora-remote',
                             return_type='item-or-list',
                             result_renderer='disabled')['annex-uuid']
    here_uuid = ds.siblings(name='here',
                            return_type='item-or-list',
                            result_renderer='disabled')['annex-uuid']

    # nothing in store yet:
    for f in files:
        known_sources = ds.repo.whereis(str(f))
        assert_in(here_uuid, known_sources)
        assert_not_in(store_uuid, known_sources)

    annex_obj_target = str(store_path / ds.id[:3] / ds.id[3:] / 'annex' /
                           'objects')
    shutil.rmtree(annex_obj_target)
    shutil.copytree(src=str(ds.repo.dot_git / 'annex' / 'objects'),
                    dst=annex_obj_target)

    ds.repo.fsck(remote='ora-remote', fast=True)
    # all in store now:
    for f in files:
        known_sources = ds.repo.whereis(str(f))
        assert_in(here_uuid, known_sources)
        assert_in(store_uuid, known_sources)

    ds.drop('.')
    res = ds.get('.')
    assert_equal(len(res), 4)
    assert_result_count(res,
                        4,
                        status='ok',
                        type='file',
                        action='get',
                        message="from ora-remote...")

    # try whether the reported access URL is correct
    one_url = ds.repo.whereis('one.txt',
                              output='full')[store_uuid]['urls'].pop()
    assert_status(
        'ok', ds.download_url(urls=[one_url], path=str(ds.pathobj / 'dummy')))
Exemplo n.º 15
0
def _test_permission(host, storepath, dspath):

    # Test whether ORA correctly revokes and obtains write permissions within
    # the annex object tree. That is: Revoke after ORA pushed a key to store
    # in order to allow the object tree to safely be used with an ephemeral
    # clone. And on removal obtain write permissions, like annex would
    # internally on a drop (but be sure to restore if something went wrong).

    dspath = Path(dspath)
    storepath = Path(storepath)
    ds = Dataset(dspath).create()
    populate_dataset(ds)
    ds.save()
    assert_repo_status(ds.path)
    testfile = 'one.txt'

    # set up store:
    io = SSHRemoteIO(host) if host else LocalIO()
    if host:
        store_url = "ria+ssh://{host}{path}".format(host=host, path=storepath)
    else:
        store_url = "ria+{}".format(storepath.as_uri())

    create_store(io, storepath, '1')
    create_ds_in_store(io, storepath, ds.id, '2', '1')
    _, _, obj_tree = get_layout_locations(1, storepath, ds.id)
    assert_true(obj_tree.is_dir())
    file_key_in_store = obj_tree / 'X9' / '6J' / 'MD5E-s8--7e55db001d319a94b0b713529a756623.txt' / 'MD5E-s8--7e55db001d319a94b0b713529a756623.txt'

    init_opts = common_init_opts + ['url={}'.format(store_url)]
    ds.repo.init_remote('store', options=init_opts)

    store_uuid = ds.siblings(name='store',
                             return_type='item-or-list')['annex-uuid']
    here_uuid = ds.siblings(name='here',
                            return_type='item-or-list')['annex-uuid']

    known_sources = ds.repo.whereis(testfile)
    assert_in(here_uuid, known_sources)
    assert_not_in(store_uuid, known_sources)
    assert_false(file_key_in_store.exists())

    ds.repo.call_annex(['copy', testfile, '--to', 'store'])
    known_sources = ds.repo.whereis(testfile)
    assert_in(here_uuid, known_sources)
    assert_in(store_uuid, known_sources)
    assert_true(file_key_in_store.exists())

    # Revoke write permissions from parent dir in-store to test whether we
    # still can drop (if we can obtain the permissions). Note, that this has
    # no effect on VFAT.
    file_key_in_store.parent.chmod(file_key_in_store.parent.stat().st_mode
                                   & ~stat.S_IWUSR)
    # we can't directly delete; key in store should be protected
    assert_raises(PermissionError, file_key_in_store.unlink)

    # ORA can still drop, since it obtains permission to:
    ds.repo.call_annex(['drop', testfile, '--from', 'store'])
    known_sources = ds.repo.whereis(testfile)
    assert_in(here_uuid, known_sources)
    assert_not_in(store_uuid, known_sources)
    assert_false(file_key_in_store.exists())
Exemplo n.º 16
0
def _test_version_check(host, dspath, store):

    dspath = Path(dspath)
    store = Path(store)

    ds = Dataset(dspath).create()
    populate_dataset(ds)
    ds.save()
    assert_repo_status(ds.path)

    # set up store:
    io = SSHRemoteIO(host) if host else LocalIO()
    if host:
        store_url = "ria+ssh://{host}{path}".format(host=host,
                                                    path=store)
    else:
        store_url = "ria+{}".format(store.as_uri())

    create_store(io, store, '1')

    # TODO: Re-establish test for version 1
    # version 2: dirhash
    create_ds_in_store(io, store, ds.id, '2', '1')

    # add special remote
    init_opts = common_init_opts + ['url={}'.format(store_url)]
    ds.repo.init_remote('store', options=init_opts)
    ds.repo.copy_to('.', 'store')

    # check version files
    remote_ds_tree_version_file = store / 'ria-layout-version'
    dsgit_dir, archive_dir, dsobj_dir = \
        get_layout_locations(1, store, ds.id)
    remote_obj_tree_version_file = dsgit_dir / 'ria-layout-version'

    assert_true(remote_ds_tree_version_file.exists())
    assert_true(remote_obj_tree_version_file.exists())

    with open(str(remote_ds_tree_version_file), 'r') as f:
        assert_equal(f.read().strip(), '1')
    with open(str(remote_obj_tree_version_file), 'r') as f:
        assert_equal(f.read().strip(), '2')

    # Accessing the remote should not yield any output regarding versioning,
    # since it's the "correct" version. Note that "fsck" is an arbitrary choice.
    # We need just something to talk to the special remote.
    with swallow_logs(new_level=logging.INFO) as cml:
        ds.repo.fsck(remote='store', fast=True)
        # TODO: For some reason didn't get cml.assert_logged to assert
        #       "nothing was logged"
        assert not cml.out

    # Now fake-change the version
    with open(str(remote_obj_tree_version_file), 'w') as f:
        f.write('X\n')

    # Now we should see a message about it
    with swallow_logs(new_level=logging.INFO) as cml:
        ds.repo.fsck(remote='store', fast=True)
        cml.assert_logged(level="INFO",
                          msg="Remote object tree reports version X",
                          regex=False)

    # reading still works:
    ds.drop('.')
    assert_status('ok', ds.get('.'))

    # but writing doesn't:
    with open(str(Path(ds.path) / 'new_file'), 'w') as f:
        f.write("arbitrary addition")
    ds.save(message="Add a new_file")

    # TODO: use self.annex.error in special remote and see whether we get an
    #       actual error result
    assert_raises(CommandError,
                  ds.repo.copy_to, 'new_file', 'store')

    # However, we can force it by configuration
    ds.config.add("annex.ora-remote.store.force-write", "true", where='local')
    ds.repo.copy_to('new_file', 'store')
Exemplo n.º 17
0
def _postclonetest_prepare(lcl, storepath, link):

    from datalad.customremotes.ria_utils import (
        create_store,
        create_ds_in_store,
        get_layout_locations
    )
    from datalad.distributed.ora_remote import (
        LocalIO,
    )

    create_tree(lcl,
                tree={
                        'ds': {
                            'test.txt': 'some',
                            'subdir': {
                                'subds': {'testsub.txt': 'somemore'},
                                'subgit': {'testgit.txt': 'even more'}
                            },
                        },
                      })

    # create a local dataset with a subdataset
    lcl = Path(lcl)
    storepath = Path(storepath)
    link = Path(link)
    link.symlink_to(storepath)
    subds = Dataset(lcl / 'ds' / 'subdir' / 'subds').create(force=True)
    subds.save()
    # add a plain git dataset as well
    subgit = Dataset(lcl / 'ds' / 'subdir' / 'subgit').create(force=True,
                                                              annex=False)
    subgit.save()
    ds = Dataset(lcl / 'ds').create(force=True)
    ds.save(version_tag='original')
    assert_repo_status(ds.path)

    io = LocalIO()
    create_store(io, storepath, '1')

    # URL to use for upload. Point is, that this should be invalid for the clone
    # so that autoenable would fail. Therefore let it be based on a to be
    # deleted symlink
    upl_url = "ria+{}".format(link.as_uri())

    for d in (ds, subds, subgit):

        # TODO: create-sibling-ria required for config! => adapt to RF'd
        #       creation (missed on rebase?)
        create_ds_in_store(io, storepath, d.id, '2', '1')
        d.create_sibling_ria(upl_url, "store")

        if d is not subgit:
            # Now, simulate the problem by reconfiguring the special remote to
            # not be autoenabled.
            # Note, however, that the actual intention is a URL, that isn't
            # valid from the point of view of the clone (doesn't resolve, no
            # credentials, etc.) and therefore autoenabling on git-annex-init
            # when datalad-cloning would fail to succeed.
            Runner(cwd=d.path).run(['git', 'annex', 'enableremote',
                                    'store-storage',
                                    'autoenable=false'])
        d.push('.', to='store')
        store_loc, _, _ = get_layout_locations(1, storepath, d.id)
        Runner(cwd=str(store_loc)).run(['git', 'update-server-info'])

    link.unlink()
    # We should now have a store with datasets that have an autoenabled ORA
    # remote relying on an inaccessible URL.
    # datalad-clone is supposed to reconfigure based on the URL we cloned from.
    # Test this feature for cloning via HTTP, SSH and FILE URLs.

    return ds.id
Exemplo n.º 18
0
    def __call__(url,
                 name,
                 dataset=None,
                 storage_name=None,
                 post_update_hook=False,
                 shared=None,
                 group=None,
                 storage_sibling=True,
                 existing='error',
                 trust_level=None,
                 recursive=False,
                 recursion_limit=None,
                 disable_storage__=None,
                 ):
        if disable_storage__ is not None:
            import warnings
            warnings.warn("datalad-create-sibling-ria --no-storage-sibling "
                          "is deprecated, use --storage-sibling off instead.",
                          DeprecationWarning)
            # recode to new setup
            disable_storage__ = None
            storage_sibling = False

        if storage_sibling == 'only' and storage_name:
            lgr.warning(
                "Sibling name will be used for storage sibling in "
                "storage-sibling-only mode, but a storage sibling name "
                "was provided"
            )

        ds = require_dataset(
            dataset, check_installed=True, purpose='create sibling RIA')
        res_kwargs = dict(
            ds=ds,
            action="create-sibling-ria",
            logger=lgr,
        )

        # parse target URL
        try:
            ssh_host, base_path, rewritten_url = verify_ria_url(url, ds.config)
        except ValueError as e:
            yield get_status_dict(
                status='error',
                message=str(e),
                **res_kwargs
            )
            return

        if ds.repo.get_hexsha() is None or ds.id is None:
            raise RuntimeError(
                "Repository at {} is not a DataLad dataset, "
                "run 'datalad create [--force]' first.".format(ds.path))

        if not storage_sibling and storage_name:
            lgr.warning(
                "Storage sibling setup disabled, but a storage sibling name "
                "was provided"
            )

        if storage_sibling and not storage_name:
            storage_name = "{}-storage".format(name)

        if storage_sibling and name == storage_name:
            # leads to unresolvable, circular dependency with publish-depends
            raise ValueError("sibling names must not be equal")

        if not isinstance(url, str):
            raise TypeError("url is not a string, but %s" % type(url))

        # Query existing siblings upfront in order to fail early on
        # existing=='error', since misconfiguration (particularly of special
        # remotes) only to fail in a subdataset later on with that config, can
        # be quite painful.
        # TODO: messages - this is "create-sibling". Don't confuse existence of
        #       local remotes with existence of the actual remote sibling
        #       in wording
        if existing == 'error':
            # in recursive mode this check could take a substantial amount of
            # time: employ a progress bar (or rather a counter, because we don't
            # know the total in advance
            pbar_id = 'check-siblings-{}'.format(id(ds))
            log_progress(
                lgr.info, pbar_id,
                'Start checking pre-existing sibling configuration %s', ds,
                label='Query siblings',
                unit=' Siblings',
            )
            # even if we have to fail, let's report all conflicting siblings
            # in subdatasets
            failed = False
            for r in ds.siblings(result_renderer=None,
                                 recursive=recursive,
                                 recursion_limit=recursion_limit):
                log_progress(
                    lgr.info, pbar_id,
                    'Discovered sibling %s in dataset at %s',
                    r['name'], r['path'],
                    update=1,
                    increment=True)
                if not r['type'] == 'sibling' or r['status'] != 'ok':
                    # this is an internal status query that has not consequence
                    # for the outside world. Be silent unless something useful
                    # can be said
                    #yield r
                    continue
                if r['name'] == name:
                    res = get_status_dict(
                        status='error',
                        message="a sibling '{}' is already configured in "
                        "dataset {}".format(name, r['path']),
                        **res_kwargs,
                    )
                    failed = True
                    yield res
                    continue
                if storage_name and r['name'] == storage_name:
                    res = get_status_dict(
                        status='error',
                        message="a sibling '{}' is already configured in "
                        "dataset {}".format(storage_name, r['path']),
                        **res_kwargs,
                    )
                    failed = True
                    yield res
                    continue
            log_progress(
                lgr.info, pbar_id,
                'Finished checking pre-existing sibling configuration %s', ds,
            )
            if failed:
                return

        # TODO: - URL parsing + store creation needs to be RF'ed based on
        #         command abstractions
        #       - more generally consider store creation a dedicated command or
        #         option
        # Note: URL parsing is done twice ATM (for top-level ds). This can't be
        # reduced to single instance, since rewriting url based on config could
        # be different for subdatasets.

        create_store(SSHRemoteIO(ssh_host) if ssh_host else LocalIO(),
                     Path(base_path),
                     '1')

        yield from _create_sibling_ria(
            ds,
            url,
            name,
            storage_sibling,
            storage_name,
            existing,
            shared,
            group,
            post_update_hook,
            trust_level,
            res_kwargs)

        if recursive:
            # Note: subdatasets can be treated independently, so go full
            # recursion when querying for them and _no_recursion with the
            # actual call. Theoretically this can be parallelized.

            for subds in ds.subdatasets(fulfilled=True,
                                        recursive=True,
                                        recursion_limit=recursion_limit,
                                        result_xfm='datasets'):
                yield from _create_sibling_ria(
                    subds,
                    url,
                    name,
                    storage_sibling,
                    storage_name,
                    existing,
                    shared,
                    group,
                    post_update_hook,
                    trust_level,
                    res_kwargs)
Exemplo n.º 19
0
def postclonecfg_ria(ds, props):
    """Configure a dataset freshly cloned from a RIA store"""
    repo = ds.repo
    # RIA uses hashdir mixed, copying data to it via git-annex (if cloned via
    # ssh) would make it see a bare repo and establish a hashdir lower annex
    # object tree.
    # Moreover, we want the ORA remote to receive all data for the store, so its
    # objects could be moved into archives (the main point of a RIA store).
    RIA_REMOTE_NAME = 'origin'  # don't hardcode everywhere
    ds.config.set(
        'remote.{}.annex-ignore'.format(RIA_REMOTE_NAME), 'true',
        where='local')

    # chances are that if this dataset came from a RIA store, its subdatasets
    # may live there too. Place a subdataset source candidate config that makes
    # get probe this RIA store when obtaining subdatasets
    ds.config.set(
        # we use the label 'origin' for this candidate in order to not have to
        # generate a complicated name from the actual source specification.
        # we pick a cost of 200 to sort it before datalad's default candidates
        # for non-RIA URLs, because they prioritize hierarchical layouts that
        # cannot be found in a RIA store
        'datalad.get.subdataset-source-candidate-200origin',
        # use the entire original URL, up to the fragment + plus dataset ID
        # placeholder, this should make things work with any store setup we
        # support (paths, ports, ...)
        props['source'].split('#', maxsplit=1)[0] + '#{id}',
        where='local')

    # setup publication dependency, if a corresponding special remote exists
    # and was enabled (there could be RIA stores that actually only have repos)
    # make this function be a generator
    ora_remotes = [s for s in ds.siblings('query', result_renderer='disabled')
                   if s.get('annex-externaltype') == 'ora']
    if not ora_remotes and any(
            r.get('externaltype') == 'ora'
            for r in (repo.get_special_remotes().values()
                      if hasattr(repo, 'get_special_remotes')
                      else [])):
        # no ORA remote autoenabled, but configuration known about at least one.
        # Let's check origin's config for datalad.ora-remote.uuid as stored by
        # create-sibling-ria and enable try enabling that one.
        lgr.debug("Found no autoenabled ORA special remote. Trying to look it "
                  "up in source config ...")

        # First figure whether we cloned via SSH, HTTP or local path and then
        # get that config file the same way:
        config_content = None
        scheme = props['giturl'].split(':', 1)[0]
        if scheme in ['http', 'https']:
            try:
                config_content = download_url(
                    "{}{}config".format(
                        props['giturl'],
                        '/' if not props['giturl'].endswith('/') else ''))
            except DownloadError as e:
                lgr.debug("Failed to get config file from source:\n%s",
                          exc_str(e))
        elif scheme == 'ssh':
            # TODO: switch the following to proper command abstraction:
            # SSHRemoteIO ignores the path part ATM. No remote CWD! (To be
            # changed with command abstractions). So we need to get that part to
            # have a valid path to origin's config file:
            cfg_path = PurePosixPath(URL(props['giturl']).path) / 'config'
            op = SSHRemoteIO(props['giturl'])
            try:
                config_content = op.read_file(cfg_path)
            except RIARemoteError as e:
                lgr.debug("Failed to get config file from source: %s",
                          exc_str(e))

        elif scheme == 'file':
            # TODO: switch the following to proper command abstraction:
            op = LocalIO()
            cfg_path = Path(URL(props['giturl']).localpath) / 'config'
            try:
                config_content = op.read_file(cfg_path)
            except (RIARemoteError, OSError) as e:
                lgr.debug("Failed to get config file from source: %s",
                          exc_str(e))
        else:
            lgr.debug("Unknown URL-Scheme %s in %s. Can handle SSH, HTTP or "
                      "FILE scheme URLs.", scheme, props['source'])

        # 3. And read it
        org_uuid = None
        if config_content:
            # TODO: We might be able to spare the saving to a file.
            #       "git config -f -" is not explicitly documented but happens
            #       to work and would read from stdin. Make sure we know this
            #       works for required git versions and on all platforms.
            with make_tempfile(content=config_content) as cfg_file:
                runner = GitWitlessRunner()
                try:
                    result = runner.run(
                        ['git', 'config', '-f', cfg_file,
                         'datalad.ora-remote.uuid'],
                        protocol=StdOutCapture
                    )
                    org_uuid = result['stdout'].strip()
                except CommandError as e:
                    # doesn't contain what we are looking for
                    lgr.debug("Found no UUID for ORA special remote at "
                              "'%s' (%s)", RIA_REMOTE_NAME, exc_str(e))

        # Now, enable it. If annex-init didn't fail to enable it as stored, we
        # wouldn't end up here, so enable with store URL as suggested by the URL
        # we cloned from.
        if org_uuid:
            srs = repo.get_special_remotes()
            if org_uuid in srs.keys():
                # TODO: - Double-check autoenable value and only do this when
                #         true?
                #       - What if still fails? -> Annex shouldn't change config
                #         in that case

                # we only need the store:
                new_url = props['source'].split('#')[0]
                try:
                    repo.enable_remote(srs[org_uuid]['name'],
                                       options=['url={}'.format(new_url)]
                                       )
                    lgr.info("Reconfigured %s for %s",
                             srs[org_uuid]['name'], new_url)
                    # update ora_remotes for considering publication dependency
                    # below
                    ora_remotes = [s for s in
                                   ds.siblings('query',
                                               result_renderer='disabled')
                                   if s.get('annex-externaltype', None) ==
                                   'ora']
                except CommandError as e:
                    lgr.debug("Failed to reconfigure ORA special remote: %s",
                              exc_str(e))
            else:
                lgr.debug("Unknown ORA special remote uuid at '%s': %s",
                          RIA_REMOTE_NAME, org_uuid)
    if ora_remotes:
        if len(ora_remotes) == 1:
            yield from ds.siblings('configure',
                                   name=RIA_REMOTE_NAME,
                                   publish_depends=ora_remotes[0]['name'],
                                   result_filter=None,
                                   result_renderer='disabled')
        else:
            lgr.warning("Found multiple ORA remotes. Couldn't decide which "
                        "publishing to 'origin' should depend on: %s. Consider "
                        "running 'datalad siblings configure -s origin "
                        "--publish-depends ORAREMOTENAME' to set publication "
                        "dependency manually.",
                        [r['name'] for r in ora_remotes])
Exemplo n.º 20
0
def _create_sibling_ria(
        ds,
        url,
        name,
        storage_sibling,
        storage_name,
        existing,
        shared,
        group,
        post_update_hook,
        trust_level,
        res_kwargs):
    # be safe across datasets
    res_kwargs = res_kwargs.copy()
    # update dataset
    res_kwargs['ds'] = ds

    if not isinstance(ds.repo, AnnexRepo):
        # No point in dealing with a special remote when there's no annex.
        # Note, that in recursive invocations this might only apply to some of
        # the datasets. Therefore dealing with it here rather than one level up.
        lgr.debug("No annex at %s. Ignoring special remote options.", ds.path)
        storage_sibling = False
        storage_name = None

    # parse target URL
    try:
        ssh_host, base_path, rewritten_url = verify_ria_url(url, ds.config)
    except ValueError as e:
        yield get_status_dict(
            status='error',
            message=str(e),
            **res_kwargs
        )
        return

    base_path = Path(base_path)

    git_url = decode_source_spec(
        # append dataset id to url and use magic from clone-helper:
        url + '#{}'.format(ds.id),
        cfg=ds.config
    )['giturl']
    # determine layout locations; go for a v1 layout
    repo_path, _, _ = get_layout_locations(1, base_path, ds.id)

    ds_siblings = [r['name'] for r in ds.siblings(result_renderer=None)]
    # Figure whether we are supposed to skip this very dataset
    if existing == 'skip' and (
            name in ds_siblings or (
                storage_name and storage_name in ds_siblings)):
        yield get_status_dict(
            status='notneeded',
            message="Skipped on existing sibling",
            **res_kwargs
        )
        # if we skip here, nothing else can change that decision further
        # down
        return

    # figure whether we need to skip or error due an existing target repo before
    # we try to init a special remote.
    if ssh_host:
        from datalad import ssh_manager
        ssh = ssh_manager.get_connection(
            ssh_host,
            use_remote_annex_bundle=False)
        ssh.open()

    if existing in ['skip', 'error']:
        config_path = repo_path / 'config'
        # No .git -- if it's an existing repo in a RIA store it should be a
        # bare repo.
        # Theoretically we could have additional checks for whether we have
        # an empty repo dir or a non-bare repo or whatever else.
        if ssh_host:
            try:
                ssh('[ -e {p} ]'.format(p=quote_cmdlinearg(str(config_path))))
                exists = True
            except CommandError:
                exists = False
        else:
            exists = config_path.exists()

        if exists:
            if existing == 'skip':
                # 1. not rendered by default
                # 2. message doesn't show up in ultimate result
                #    record as shown by -f json_pp
                yield get_status_dict(
                    status='notneeded',
                    message="Skipped on existing remote "
                            "directory {}".format(repo_path),
                    **res_kwargs
                )
                return
            else:  # existing == 'error'
                yield get_status_dict(
                    status='error',
                    message="remote directory {} already "
                            "exists.".format(repo_path),
                    **res_kwargs
                )
                return

    if storage_sibling == 'only':
        lgr.info("create storage sibling '{}' ...".format(name))
    else:
        lgr.info("create sibling{} '{}'{} ...".format(
            's' if storage_name else '',
            name,
            " and '{}'".format(storage_name) if storage_name else '',
        ))
    create_ds_in_store(SSHRemoteIO(ssh_host) if ssh_host else LocalIO(),
                       base_path, ds.id, '2', '1')
    if storage_sibling:
        # we are using the main `name`, if the only thing we are creating
        # is the storage sibling
        srname = name if storage_sibling == 'only' else storage_name

        lgr.debug('init special remote {}'.format(srname))
        special_remote_options = [
            'type=external',
            'externaltype=ora',
            'encryption=none',
            'autoenable=true',
            'url={}'.format(url)]
        try:
            ds.repo.init_remote(
                srname,
                options=special_remote_options)
        except CommandError as e:
            if existing == 'reconfigure' \
                    and 'git-annex: There is already a special remote' \
                    in e.stderr:
                # run enableremote instead
                lgr.debug(
                    "special remote '%s' already exists. "
                    "Run enableremote instead.",
                    srname)
                # TODO: Use AnnexRepo.enable_remote (which needs to get
                #       `options` first)
                ds.repo.call_annex([
                    'enableremote',
                    srname] + special_remote_options)
            else:
                yield get_status_dict(
                    status='error',
                    message="initremote failed.\nstdout: %s\nstderr: %s"
                    % (e.stdout, e.stderr),
                    **res_kwargs
                )
                return

        if trust_level:
            ds.repo.call_annex([trust_level, srname])
        # get uuid for use in bare repo's config
        uuid = ds.config.get("remote.{}.annex-uuid".format(srname))

    if storage_sibling == 'only':
        # we can stop here, the rest of the function is about setting up
        # the git remote part of the sibling
        yield get_status_dict(
            status='ok',
            **res_kwargs,
        )
        return

    # 2. create a bare repository in-store:

    lgr.debug("init bare repository")
    # TODO: we should prob. check whether it's there already. How?
    # Note: like the special remote itself, we assume local FS if no
    # SSH host is specified
    disabled_hook = repo_path / 'hooks' / 'post-update.sample'
    enabled_hook = repo_path / 'hooks' / 'post-update'

    if group:
        chgrp_cmd = "chgrp -R {} {}".format(
            quote_cmdlinearg(str(group)),
            quote_cmdlinearg(str(repo_path)))

    if ssh_host:
        ssh('cd {rootdir} && git init --bare{shared}'.format(
            rootdir=quote_cmdlinearg(str(repo_path)),
            shared=" --shared='{}'".format(
                quote_cmdlinearg(shared)) if shared else ''
        ))

        if storage_sibling:
            # write special remote's uuid into git-config, so clone can
            # which one it is supposed to be and enable it even with
            # fallback URL
            ssh("cd {rootdir} && git config datalad.ora-remote.uuid {uuid}"
                "".format(rootdir=quote_cmdlinearg(str(repo_path)),
                          uuid=uuid))

        if post_update_hook:
            ssh('mv {} {}'.format(quote_cmdlinearg(str(disabled_hook)),
                                  quote_cmdlinearg(str(enabled_hook))))

        if group:
            # Either repository existed before or a new directory was
            # created for it, set its group to a desired one if was
            # provided with the same chgrp
            ssh(chgrp_cmd)
    else:
        gr = GitRepo(repo_path, create=True, bare=True,
                     shared=shared if shared else None)
        if storage_sibling:
            # write special remote's uuid into git-config, so clone can
            # which one it is supposed to be and enable it even with
            # fallback URL
            gr.config.add("datalad.ora-remote.uuid", uuid, where='local')

        if post_update_hook:
            disabled_hook.rename(enabled_hook)
        if group:
            # TODO; do we need a cwd here?
            subprocess.run(chgrp_cmd, cwd=quote_cmdlinearg(ds.path))

    # add a git remote to the bare repository
    # Note: needs annex-ignore! Otherwise we might push into dirhash
    # lower annex/object tree instead of mixed, since it's a bare
    # repo. This in turn would be an issue, if we want to pack the
    # entire thing into an archive. Special remote will then not be
    # able to access content in the "wrong" place within the archive
    lgr.debug("set up git remote")
    if name in ds_siblings:
        # otherwise we should have skipped or failed before
        assert existing == 'reconfigure'
    ds.config.set(
        "remote.{}.annex-ignore".format(name),
        value="true",
        where="local")
    ds.siblings(
        'configure',
        name=name,
        url=git_url
        if ssh_host
        else str(repo_path),
        recursive=False,
        # Note, that this should be None if storage_sibling was not set
        publish_depends=storage_name,
        result_renderer=None,
        # Note, that otherwise a subsequent publish will report
        # "notneeded".
        fetch=True
    )

    yield get_status_dict(
        status='ok',
        **res_kwargs,
    )
Exemplo n.º 21
0
def _test_bare_git_version_1(host, dspath, store):
    # This test should take a dataset and create a bare repository at the remote
    # end from it.
    # Given, that it is placed correctly within a tree of dataset, that remote
    # thing should then be usable as an ora-remote as well as as a git-type
    # remote.
    # Note: Usability of git remote by annex depends on dataset layout version
    #       (dirhashlower vs. -mixed).
    #       For version 1 (lower) upload and consumption should be
    #       interchangeable. It doesn't matter which remote is used for what
    #       direction.
    ds_path = Path(dspath)
    store = Path(store)
    ds = Dataset(ds_path).create()
    populate_dataset(ds)
    ds.save()

    bare_repo_path, _, _ = get_layout_locations(1, store, ds.id)
    # Use git to make sure the remote end is what git thinks a bare clone of it
    # should look like
    subprocess.run([
        'git', 'clone', '--bare',
        quote_cmdlinearg(str(dspath)),
        quote_cmdlinearg(str(bare_repo_path))
    ])

    if host:
        url = "ria+ssh://{host}{path}".format(host=host, path=store)
    else:
        url = "ria+{}".format(store.as_uri())
    init_opts = common_init_opts + ['url={}'.format(url)]
    # set up store:
    io = SSHRemoteIO(host) if host else LocalIO()
    create_store(io, store, '1')
    # set up the dataset location, too.
    # Note: Dataset layout version 1 (dirhash lower):
    create_ds_in_store(io, store, ds.id, '1', '1')

    # Now, let's have the bare repo as a git remote and use it with annex
    git_url = "ssh://{host}{path}".format(host=host, path=bare_repo_path) \
        if host else bare_repo_path.as_uri()
    ds.repo.add_remote('bare-git', git_url)
    ds.repo.enable_remote('bare-git')

    # copy files to the remote
    ds.repo.copy_to('.', 'bare-git')
    eq_(len(ds.repo.whereis('one.txt')), 2)

    # now we can drop all content locally, reobtain it, and survive an
    # fsck
    ds.drop('.')
    ds.get('.')
    assert_status('ok', [annexjson2result(r, ds) for r in ds.repo.fsck()])

    # Now, add the ora remote:
    ds.repo.init_remote('ora-remote', options=init_opts)
    # fsck to make availability known
    assert_status('ok', [
        annexjson2result(r, ds)
        for r in ds.repo.fsck(remote='ora-remote', fast=True)
    ])
    eq_(len(ds.repo.whereis('one.txt')), 3)

    # Now move content from git-remote to local and see it not being available
    # via bare-git anymore.
    ds.repo.call_annex(['move', '--all', '--from=bare-git'])
    # ora-remote doesn't know yet:
    eq_(len(ds.repo.whereis('one.txt')), 2)

    # But after fsck it does:
    fsck_res = [
        annexjson2result(r, ds)
        for r in ds.repo.fsck(remote='ora-remote', fast=True)
    ]
    assert_result_count(fsck_res,
                        1,
                        status='error',
                        message='** Based on the location log, one.txt\n'
                        '** was expected to be present, '
                        'but its content is missing.')
    assert_result_count(fsck_res,
                        1,
                        status='error',
                        message='** Based on the location log, subdir/two\n'
                        '** was expected to be present, '
                        'but its content is missing.')
    eq_(len(ds.repo.whereis('one.txt')), 1)
    # and the other way around: upload via ora-remote and have it available via
    # git-remote:
    ds.repo.copy_to('.', 'ora-remote')
    # fsck to make availability known
    assert_status('ok', [
        annexjson2result(r, ds)
        for r in ds.repo.fsck(remote='bare-git', fast=True)
    ])
    eq_(len(ds.repo.whereis('one.txt')), 3)
Exemplo n.º 22
0
def test_initremote_basic_fileurl(storepath=None):
    _test_initremote_basic(
        "ria+{}".format(Path(storepath).as_uri()),
        LocalIO(),
        storepath,
    )