Python urlquote примеры, datalad.support.network.urlquote Python примеры использования

Пример #1

0

Показать файл

def _get_flexible_source_candidates_for_submodule(ds, sm_path, sm_url=None):
    """Retrieve candidates from where to install the submodule

    Even if url for submodule is provided explicitly -- first tries urls under
    parent's module tracking branch remote.
    """
    clone_urls = []

    # should be our first candidate
    tracking_remote, tracking_branch = ds.repo.get_tracking_branch()
    candidate_remotes = [tracking_remote] if tracking_remote else []

    # if we have a remote, let's check the location of that remote
    # for the presence of the desired submodule
    try:
        last_commit = next(ds.repo._get_files_history(sm_path)).hexsha
        # ideally should also give preference to the remotes which have
        # the same branch checked out I guess
        candidate_remotes += list(ds.repo._get_remotes_having_commit(last_commit))
    except StopIteration:
        # no commit for it known yet, ... oh well
        pass

    for remote in unique(candidate_remotes):
        remote_url = ds.repo.get_remote_url(remote, push=False)

        # Directly on parent's ds url
        if remote_url:
            # attempt: submodule checkout at parent remote URL
            # We might need to quote sm_path portion, e.g. for spaces etc
            if isinstance(RI(remote_url), URL):
                sm_path_url = urlquote(sm_path)
            else:
                sm_path_url = sm_path

            clone_urls.extend(
                _get_flexible_source_candidates(
                    # alternate suffixes are tested by `clone` anyways
                    sm_path_url, remote_url, alternate_suffix=False))

            # attempt: provided (configured?) submodule URL
            # TODO: consider supporting DataLadRI here?  or would confuse
            #  git and we wouldn't want that (i.e. not allow pure git clone
            #  --recursive)
            if sm_url:
                clone_urls += _get_flexible_source_candidates(
                    sm_url,
                    remote_url,
                    alternate_suffix=False
                )

    # Do based on the ds.path as the last resort
    if sm_url:
        clone_urls += _get_flexible_source_candidates(
            sm_url,
            ds.path,
            alternate_suffix=False)

    return unique(clone_urls)

Пример #2

0

Показать файл

def get_key_url(e, schema='http', versioned=True):
    """Generate an s3:// or http:// url given a key
    """
    e.name_urlquoted = urlquote(e.name)
    if schema == 'http':
        fmt = "http://{e.bucket.name}.s3.amazonaws.com/{e.name_urlquoted}"
    elif schema == 's3':
        fmt = "s3://{e.bucket.name}/{e.name_urlquoted}"
    else:
        raise ValueError(schema)
    if versioned:
        fmt += "?versionId={e.version_id}"
    return fmt.format(e=e)

Пример #3

0

Показать файл

Файл: s3.py Проект: xlecours/datalad

def get_key_url(e, schema='http', versioned=True):
    """Generate an s3:// or http:// url given a key
    """
    # TODO: here we would need to encode the name since urlquote actually
    # can't do that on its own... but then we should get a copy of the thing
    # so we could still do the .format....
    # ... = e.name.encode('utf-8')  # unicode isn't advised in URLs
    e.name_urlquoted = urlquote(e.name)
    if schema == 'http':
        fmt = "http://{e.bucket.name}.s3.amazonaws.com/{e.name_urlquoted}"
    elif schema == 's3':
        fmt = "s3://{e.bucket.name}/{e.name_urlquoted}"
    else:
        raise ValueError(schema)
    if versioned:
        fmt += "?versionId={e.version_id}"
    return fmt.format(e=e)

Пример #4

0

Показать файл

Файл: s3.py Проект: datalad/datalad

def get_key_url(e, schema='http', versioned=True):
    """Generate an s3:// or http:// url given a key

    if versioned url is requested but version_id is None, no versionId suffix
    will be added
    """
    # TODO: here we would need to encode the name since urlquote actually
    # can't do that on its own... but then we should get a copy of the thing
    # so we could still do the .format....
    # ... = e.name.encode('utf-8')  # unicode isn't advised in URLs
    e.name_urlquoted = urlquote(e.name)
    if schema == 'http':
        fmt = "http://{e.bucket.name}.s3.amazonaws.com/{e.name_urlquoted}"
    elif schema == 's3':
        fmt = "s3://{e.bucket.name}/{e.name_urlquoted}"
    else:
        raise ValueError(schema)
    if versioned and e.version_id is not None:
        fmt += "?versionId={e.version_id}"
    return fmt.format(e=e)

Пример #5

0

Показать файл

def test_get_local_file_url():
    for path, url in (
            # relpaths are special-cased below
        ('test.txt', 'test.txt'), ) + (
            ('C:\\Windows\\notepad.exe', 'file://C/Windows/notepad.exe'),
        ) if on_windows else (
            (OBSCURE_FILENAME, urlquote(OBSCURE_FILENAME)),
            ('/a', 'file:///a'),
            ('/a/b/c', 'file:///a/b/c'),
            ('/a~', 'file:///a~'),
            # there are no files with trailing slashes in the name
            #('/a b/', 'file:///a%20b/'),
            ('/a b/name', 'file:///a%20b/name'),
        ):
        # Yarik found no better way to trigger.  .decode() isn't enough
        print("D: %s" % path)
        if isabs(path):
            eq_(get_local_file_url(path), url)
        else:
            eq_(get_local_file_url(path), '/'.join(
                (get_local_file_url(os.getcwd()), url)))

Пример #6

0

Показать файл

Файл: test_create_sibling.py Проект: shots47s/datalad

def test_target_ssh_simple(origin, src_path, target_rootpath):

    # prepare src
    source = install(src_path,
                     source=origin,
                     result_xfm='datasets',
                     return_type='item-or-list')

    target_path = opj(target_rootpath, "basic")
    with swallow_logs(new_level=logging.ERROR) as cml:
        create_sibling(dataset=source,
                       name="local_target",
                       sshurl="ssh://localhost:22",
                       target_dir=target_path,
                       ui=True)
        assert_not_in('enableremote local_target failed', cml.out)

    GitRepo(target_path, create=False)  # raises if not a git repo
    assert_in("local_target", source.repo.get_remotes())
    # Both must be annex or git repositories
    src_is_annex = AnnexRepo.is_valid_repo(src_path)
    eq_(src_is_annex, AnnexRepo.is_valid_repo(target_path))
    # And target one should be known to have a known UUID within the source if annex
    if src_is_annex:
        annex = AnnexRepo(src_path)
        local_target_cfg = annex.repo.remotes["local_target"].config_reader.get
        # basic config in place
        eq_(local_target_cfg('annex-ignore'), 'false')
        ok_(local_target_cfg('annex-uuid'))

    # do it again without force, but use a different name to avoid initial checks
    # for existing remotes:
    with assert_raises(RuntimeError) as cm:
        assert_create_sshwebserver(dataset=source,
                                   name="local_target_alt",
                                   sshurl="ssh://localhost",
                                   target_dir=target_path)
    ok_(
        text_type(cm.exception).startswith(
            "Target path %s already exists. And it fails to rmdir" %
            target_path))
    if src_is_annex:
        target_description = AnnexRepo(target_path,
                                       create=False).get_description()
        assert_not_equal(target_description, None)
        assert_not_equal(target_description, target_path)
        # on yoh's laptop TMPDIR is under HOME, so things start to become
        # tricky since then target_path is shortened and we would need to know
        # remote $HOME.  To not over-complicate and still test, test only for
        # the basename of the target_path
        ok_endswith(target_description, basename(target_path))
    # now, with force and correct url, which is also used to determine
    # target_dir
    # Note: on windows absolute path is not url conform. But this way it's easy
    # to test, that ssh path is correctly used.
    if not on_windows:
        # add random file under target_path, to explicitly test existing=replace
        open(opj(target_path, 'random'), 'w').write('123')

        assert_create_sshwebserver(dataset=source,
                                   name="local_target",
                                   sshurl="ssh://localhost" + target_path,
                                   publish_by_default='master',
                                   existing='replace')
        eq_("ssh://localhost" + urlquote(target_path),
            source.repo.get_remote_url("local_target"))
        ok_(source.repo.get_remote_url("local_target", push=True) is None)

        # ensure target tree actually replaced by source
        assert_false(exists(opj(target_path, 'random')))

        if src_is_annex:
            annex = AnnexRepo(src_path)
            local_target_cfg = annex.repo.remotes[
                "local_target"].config_reader.get
            eq_(local_target_cfg('annex-ignore'), 'false')
            eq_(local_target_cfg('annex-uuid').count('-'), 4)  # valid uuid
            # should be added too, even if URL matches prior state
            eq_(local_target_cfg('push'), 'master')

        # again, by explicitly passing urls. Since we are on localhost, the
        # local path should work:
        cpkwargs = dict(
            dataset=source,
            name="local_target",
            sshurl="ssh://localhost",
            target_dir=target_path,
            target_url=target_path,
            target_pushurl="ssh://localhost" + target_path,
            ui=True,
        )
        assert_create_sshwebserver(existing='replace', **cpkwargs)
        if src_is_annex:
            target_description = AnnexRepo(target_path,
                                           create=False).get_description()
            eq_(target_description, target_path)

        eq_(target_path, source.repo.get_remote_url("local_target"))
        eq_("ssh://localhost" + target_path,
            source.repo.get_remote_url("local_target", push=True))

        _test_correct_publish(target_path)

        # now, push should work:
        publish(dataset=source, to="local_target")

        # and we should be able to 'reconfigure'
        def process_digests_mtimes(digests, mtimes):
            # it should have triggered a hook, which would have created log and metadata files
            check_metadata = False
            for part in 'logs', 'metadata':
                metafiles = [
                    k for k in digests
                    if k.startswith(_path_('.git/datalad/%s/' % part))
                ]
                # This is in effect ONLY if we have "compatible" datalad installed on remote
                # end. ATM we don't have easy way to guarantee that AFAIK (yoh),
                # so let's not check/enforce (TODO)
                # assert(len(metafiles) >= 1)  # we might have 2 logs if timestamps do not collide ;)
                # Let's actually do it to some degree
                if part == 'logs':
                    # always should have those:
                    assert (len(metafiles) >= 1)
                    with open(opj(target_path, metafiles[0])) as f:
                        if 'no datalad found' not in f.read():
                            check_metadata = True
                if part == 'metadata':
                    eq_(len(metafiles), bool(check_metadata))
                for f in metafiles:
                    digests.pop(f)
                    mtimes.pop(f)
            # and just pop some leftovers from annex
            for f in list(digests):
                if f.startswith('.git/annex/mergedrefs'):
                    digests.pop(f)
                    mtimes.pop(f)

        orig_digests, orig_mtimes = get_mtimes_and_digests(target_path)
        process_digests_mtimes(orig_digests, orig_mtimes)

        import time
        time.sleep(0.1)  # just so that mtimes change
        assert_create_sshwebserver(existing='reconfigure', **cpkwargs)
        digests, mtimes = get_mtimes_and_digests(target_path)
        process_digests_mtimes(digests, mtimes)

        assert_dict_equal(orig_digests,
                          digests)  # nothing should change in terms of content

        # but some files should have been modified
        modified_files = {
            k
            for k in mtimes if orig_mtimes.get(k, 0) != mtimes.get(k, 0)
        }
        # collect which files were expected to be modified without incurring any changes
        ok_modified_files = {
            _path_('.git/hooks/post-update'),
            'index.html',
            # files which hook would manage to generate
            _path_('.git/info/refs'),
            '.git/objects/info/packs'
        }
        # on elderly git we don't change receive setting
        ok_modified_files.add(_path_('.git/config'))
        ok_modified_files.update(
            {f
             for f in digests if f.startswith(_path_('.git/datalad/web'))})
        # it seems that with some recent git behavior has changed a bit
        # and index might get touched
        if _path_('.git/index') in modified_files:
            ok_modified_files.add(_path_('.git/index'))
        assert_set_equal(modified_files, ok_modified_files)

Пример #7

0

Показать файл

Файл: get.py Проект: m-hess/datalad

def _get_flexible_source_candidates_for_submodule(ds, sm):
    """Assemble candidate locations from where to clone a submodule

    The following locations candidates are considered. For each candidate a
    cost is given in parenthesis, lower values indicate higher cost:

    - URL of any configured superdataset remote that is known to have the
      desired submodule commit, with the submodule path appended to it.
      There can be more than one candidate (cost 500).

    - A URL or absolute path recorded in `.gitmodules` (cost 600).

    - In case `.gitmodules` contains a relative path instead of a URL,
      the URL of any configured superdataset remote that is known to have the
      desired submodule commit, with this relative path appended to it.
      There can be more than one candidate (cost 500).

    - In case `.gitmodules` contains a relative path as a URL, the absolute
      path of the superdataset, appended with this relative path (cost 900).

    Additional candidate URLs can be generated based on templates specified as
    configuration variables with the pattern

      `datalad.get.subdataset-source-candidate-<name>`

    where `name` is an arbitrary identifier. If name starts with three digits
    (e.g. '400myserver') these will be interpreted as a cost, and the
    respective candidate will be sorted into the generated candidate list
    according to this cost. If no cost is given, a default of 700
    is used.

    A template string assigned to such a variable can utilize the Python format
    mini language and may reference a number of properties that are inferred
    from the parent dataset's knowledge about the target subdataset. Properties
    include any submodule property specified in the respective `.gitmodules`
    record. For convenience, an existing `datalad-id` record is made available
    under the shortened name `id`.

    Additionally, the URL of any configured remote that contains the respective
    submodule commit is available as `remote-<name>` properties, where `name`
    is the configured remote name.

    Lastly, all candidates are sorted according to their cost (lower values
    first, and duplicate URLs are stripped, while preserving the first item in the
    candidate list.

    Parameters
    ----------
    ds : Dataset
      Parent dataset of to-be-installed subdataset.
    sm : dict
      Submodule record as produced by `subdatasets()`.

    Returns
    -------
    list of dict
      Where each dict has keys 'cost' (int), 'name' (str), 'url' (str).
      Names are not unique and either derived from the name of the respective
      remote, template configuration variable, or 'local'.
    """
    # short cuts
    ds_repo = ds.repo
    sm_url = sm.get('gitmodule_url', None)
    sm_path = op.relpath(sm['path'], start=sm['parentds'])

    clone_urls = []

    # CANDIDATE: tracking remote of the current branch
    tracking_remote, tracking_branch = ds_repo.get_tracking_branch()
    candidate_remotes = [tracking_remote] if tracking_remote else []

    # if we have a remote, let's check the location of that remote
    # for the presence of the desired submodule
    last_commit = ds_repo.get_last_commit_hexsha(sm_path)
    if last_commit:
        # CANDIDATE: any remote that has the commit when the submodule was
        # last modified

        # ideally should also give preference to the remotes which have
        # the same branch checked out I guess
        candidate_remotes += list(
            _get_remotes_having_commit(ds_repo, last_commit))

    # prepare a dict to generate URL candidates from templates
    sm_candidate_props = {
        k[10:].replace('datalad-id', 'id'): v
        for k, v in sm.items() if k.startswith('gitmodule_')
    }

    for remote in unique(candidate_remotes):
        remote_url = ds_repo.get_remote_url(remote, push=False)

        # Directly on parent's ds url
        if remote_url:
            # make remotes and their URLs available to template rendering
            sm_candidate_props['remoteurl-{}'.format(remote)] = remote_url
            # attempt: submodule checkout at parent remote URL
            # We might need to quote sm_path portion, e.g. for spaces etc
            if isinstance(RI(remote_url), URL):
                sm_path_url = urlquote(sm_path)
            else:
                sm_path_url = sm_path

            clone_urls.extend(
                dict(cost=500, name=remote, url=url)
                for url in _get_flexible_source_candidates(
                    # alternate suffixes are tested by `clone` anyways
                    sm_path_url,
                    remote_url,
                    alternate_suffix=False))

            # attempt: provided (configured?) submodule URL
            # TODO: consider supporting DataLadRI here?  or would confuse
            #  git and we wouldn't want that (i.e. not allow pure git clone
            #  --recursive)
            if sm_url:
                clone_urls.extend(
                    dict(cost=600, name=remote, url=url)
                    for url in _get_flexible_source_candidates(
                        sm_url, remote_url, alternate_suffix=False))
    cost_candidate_expr = re.compile('[0-9][0-9][0-9].*')
    candcfg_prefix = 'datalad.get.subdataset-source-candidate-'
    for name, tmpl in [(c[len(candcfg_prefix):], ds_repo.config[c])
                       for c in ds_repo.config.keys()
                       if c.startswith(candcfg_prefix)]:
        url = tmpl.format(**sm_candidate_props)
        # we don't want "flexible_source_candidates" here, this is
        # configuration that can be made arbitrarily precise from the
        # outside. Additional guesswork can only make it slower
        has_cost = cost_candidate_expr.match(name) is not None
        clone_urls.append(
            # assign a default cost, if a config doesn't have one
            dict(
                cost=int(name[:3]) if has_cost else 700,
                name=name[3:] if has_cost else name,
                url=url,
                from_config=True,
            ))

    # CANDIDATE: the actual configured gitmodule URL
    if sm_url:
        clone_urls.extend(
            dict(cost=900, name='local', url=url)
            for url in _get_flexible_source_candidates(
                sm_url, ds.path, alternate_suffix=False)
            # avoid inclusion of submodule location itself
            if url != sm['path'])

    # sort all candidates by their label, thereby allowing a
    # candidate provided by configuration to purposefully
    # sort before or after automatically generated configuration
    clone_urls = sorted(clone_urls, key=lambda x: x['cost'])
    # take out any duplicate source candidates
    # unique() takes out the duplicated at the tail end
    clone_urls = unique(clone_urls, lambda x: x['url'])

    return clone_urls

Пример #8

0

Показать файл

Файл: test_create_sibling.py Проект: hanke/datalad

def test_target_ssh_simple(origin, src_path, target_rootpath):

    # prepare src
    source = install(
        src_path, source=origin,
        result_xfm='datasets', return_type='item-or-list')

    target_path = opj(target_rootpath, "basic")
    with swallow_logs(new_level=logging.ERROR) as cml:
        create_sibling(
            dataset=source,
            name="local_target",
            sshurl="ssh://localhost",
            target_dir=target_path,
            ui=True)
        assert_not_in('enableremote local_target failed', cml.out)

    GitRepo(target_path, create=False)  # raises if not a git repo
    assert_in("local_target", source.repo.get_remotes())
    # Both must be annex or git repositories
    src_is_annex = AnnexRepo.is_valid_repo(src_path)
    eq_(src_is_annex, AnnexRepo.is_valid_repo(target_path))
    # And target one should be known to have a known UUID within the source if annex
    if src_is_annex:
        annex = AnnexRepo(src_path)
        local_target_cfg = annex.repo.remotes["local_target"].config_reader.get
        # basic config in place
        eq_(local_target_cfg('annex-ignore'), 'false')
        ok_(local_target_cfg('annex-uuid'))

    # do it again without force, but use a different name to avoid initial checks
    # for existing remotes:
    with assert_raises(RuntimeError) as cm:
        assert_create_sshwebserver(
            dataset=source,
            name="local_target_alt",
            sshurl="ssh://localhost",
            target_dir=target_path)
    ok_(text_type(cm.exception).startswith(
        "Target path %s already exists. And it fails to rmdir" % target_path))
    if src_is_annex:
        target_description = AnnexRepo(target_path, create=False).get_description()
        assert_not_equal(target_description, None)
        assert_not_equal(target_description, target_path)
        # on yoh's laptop TMPDIR is under HOME, so things start to become
        # tricky since then target_path is shortened and we would need to know
        # remote $HOME.  To not over-complicate and still test, test only for
        # the basename of the target_path
        ok_endswith(target_description, basename(target_path))
    # now, with force and correct url, which is also used to determine
    # target_dir
    # Note: on windows absolute path is not url conform. But this way it's easy
    # to test, that ssh path is correctly used.
    if not on_windows:
        # add random file under target_path, to explicitly test existing=replace
        open(opj(target_path, 'random'), 'w').write('123')

        assert_create_sshwebserver(
            dataset=source,
            name="local_target",
            sshurl="ssh://localhost" + target_path,
            publish_by_default='master',
            existing='replace')
        eq_("ssh://localhost" + urlquote(target_path),
            source.repo.get_remote_url("local_target"))
        ok_(source.repo.get_remote_url("local_target", push=True) is None)

        # ensure target tree actually replaced by source
        assert_false(exists(opj(target_path, 'random')))

        if src_is_annex:
            annex = AnnexRepo(src_path)
            local_target_cfg = annex.repo.remotes["local_target"].config_reader.get
            eq_(local_target_cfg('annex-ignore'), 'false')
            eq_(local_target_cfg('annex-uuid').count('-'), 4)  # valid uuid
            # should be added too, even if URL matches prior state
            eq_(local_target_cfg('push'), 'master')

        # again, by explicitly passing urls. Since we are on localhost, the
        # local path should work:
        cpkwargs = dict(
            dataset=source,
            name="local_target",
            sshurl="ssh://localhost",
            target_dir=target_path,
            target_url=target_path,
            target_pushurl="ssh://localhost" + target_path,
            ui=True,
        )
        assert_create_sshwebserver(existing='replace', **cpkwargs)
        if src_is_annex:
            target_description = AnnexRepo(target_path,
                                           create=False).get_description()
            eq_(target_description, target_path)

        eq_(target_path,
            source.repo.get_remote_url("local_target"))
        eq_("ssh://localhost" + target_path,
            source.repo.get_remote_url("local_target", push=True))

        _test_correct_publish(target_path)

        # now, push should work:
        publish(dataset=source, to="local_target")

        # and we should be able to 'reconfigure'
        def process_digests_mtimes(digests, mtimes):
            # it should have triggered a hook, which would have created log and metadata files
            check_metadata = False
            for part in 'logs', 'metadata':
                metafiles = [k for k in digests if k.startswith(_path_('.git/datalad/%s/' % part))]
                # This is in effect ONLY if we have "compatible" datalad installed on remote
                # end. ATM we don't have easy way to guarantee that AFAIK (yoh),
                # so let's not check/enforce (TODO)
                # assert(len(metafiles) >= 1)  # we might have 2 logs if timestamps do not collide ;)
                # Let's actually do it to some degree
                if part == 'logs':
                    # always should have those:
                    assert (len(metafiles) >= 1)
                    with open(opj(target_path, metafiles[0])) as f:
                        if 'no datalad found' not in f.read():
                            check_metadata = True
                if part == 'metadata':
                    eq_(len(metafiles), bool(check_metadata))
                for f in metafiles:
                    digests.pop(f)
                    mtimes.pop(f)
            # and just pop some leftovers from annex
            for f in list(digests):
                if f.startswith('.git/annex/mergedrefs'):
                    digests.pop(f)
                    mtimes.pop(f)

        orig_digests, orig_mtimes = get_mtimes_and_digests(target_path)
        process_digests_mtimes(orig_digests, orig_mtimes)

        import time
        time.sleep(0.1)  # just so that mtimes change
        assert_create_sshwebserver(existing='reconfigure', **cpkwargs)
        digests, mtimes = get_mtimes_and_digests(target_path)
        process_digests_mtimes(digests, mtimes)

        assert_dict_equal(orig_digests, digests)  # nothing should change in terms of content

        # but some files should have been modified
        modified_files = {k for k in mtimes if orig_mtimes.get(k, 0) != mtimes.get(k, 0)}
        # collect which files were expected to be modified without incurring any changes
        ok_modified_files = {
            _path_('.git/hooks/post-update'), 'index.html',
            # files which hook would manage to generate
            _path_('.git/info/refs'), '.git/objects/info/packs'
        }
        # on elderly git we don't change receive setting
        ok_modified_files.add(_path_('.git/config'))
        ok_modified_files.update({f for f in digests if f.startswith(_path_('.git/datalad/web'))})
        # it seems that with some recent git behavior has changed a bit
        # and index might get touched
        if _path_('.git/index') in modified_files:
            ok_modified_files.add(_path_('.git/index'))
        assert_set_equal(modified_files, ok_modified_files)

Пример #9

0

Показать файл

def _get_flexible_source_candidates_for_submodule(ds, sm):
    """Assemble candidates from where to install a submodule

    Even if a URL for submodule is provided explicitly -- first tries urls under
    parent's module tracking branch remote.

    Additional candidate URLs can be generated based on templates specified as
    configuration variables with the pattern

      `datalad.get.subdataset-source-candidate-<name>`

    where `name` is an arbitrary identifier.

    A template string assigned to such a variable can utilize the Python format
    mini language and may reference a number of properties that are inferred
    from the parent dataset's knowledge about the target subdataset. Properties
    include any submodule property specified in the respective .gitmodules
    record. For convenience, an existing `datalad-id` record is made available
    under the shortened name `id`.

    Additionally, the URL of any configured remote that contains the respective
    submodule commit is available as `remote-<name>` properties, where `name`
    is the configured remote name.

    Parameters
    ----------
    ds : Dataset
      Parent dataset of to-be-installed subdataset.
    sm : dict
      Submodule record as produced by `subdatasets()`.

    Returns
    -------
    list of tuples
      Where each tuples consists of a name and a URL. Names are not unique
      and either derived from the name of the respective remote, template
      configuration variable, or 'origin' for the candidate URL that was
      obtained from the .gitmodule record.
    """
    # short cuts
    ds_repo = ds.repo
    sm_url = sm.get('gitmodule_url', None)
    sm_path = op.relpath(sm['path'], start=sm['parentds'])

    clone_urls = []

    # CANDIDATE: tracking remote of the current branch
    tracking_remote, tracking_branch = ds_repo.get_tracking_branch()
    candidate_remotes = [tracking_remote] if tracking_remote else []

    # if we have a remote, let's check the location of that remote
    # for the presence of the desired submodule
    last_commit = ds_repo.get_last_commit_hexsha(sm_path)
    if last_commit:
        # CANDIDATE: any remote that has the commit when the submodule was
        # last modified

        # ideally should also give preference to the remotes which have
        # the same branch checked out I guess
        candidate_remotes += list(
            _get_remotes_having_commit(ds_repo, last_commit))

    # prepare a dict to generate URL candidates from templates
    sm_candidate_props = {
        k[10:].replace('datalad-id', 'id'): v
        for k, v in sm.items() if k.startswith('gitmodule_')
    }

    for remote in unique(candidate_remotes):
        remote_url = ds_repo.get_remote_url(remote, push=False)

        # Directly on parent's ds url
        if remote_url:
            # make remotes and their URLs available to template rendering
            sm_candidate_props['remoteurl-{}'.format(remote)] = remote_url
            # attempt: submodule checkout at parent remote URL
            # We might need to quote sm_path portion, e.g. for spaces etc
            if isinstance(RI(remote_url), URL):
                sm_path_url = urlquote(sm_path)
            else:
                sm_path_url = sm_path

            clone_urls.extend(
                (remote, url) for url in _get_flexible_source_candidates(
                    # alternate suffixes are tested by `clone` anyways
                    sm_path_url,
                    remote_url,
                    alternate_suffix=False))

            # attempt: provided (configured?) submodule URL
            # TODO: consider supporting DataLadRI here?  or would confuse
            #  git and we wouldn't want that (i.e. not allow pure git clone
            #  --recursive)
            if sm_url:
                clone_urls.extend(
                    (remote, url) for url in _get_flexible_source_candidates(
                        sm_url, remote_url, alternate_suffix=False))

        for name, tmpl in [
            (c[12:], ds_repo.config[c]) for c in ds_repo.config.keys()
                if c.startswith('datalad.get.subdataset-source-candidate-')
        ]:
            url = tmpl.format(**sm_candidate_props)
            # we don't want "flexible_source_candidates" here, this is
            # configuration that can be made arbitrarily precise from the
            # outside. Additional guesswork can only make it slower
            clone_urls.append((name, url))

    # CANDIDATE: the actual configured gitmodule URL
    if sm_url:
        clone_urls.extend(('local', url)
                          for url in _get_flexible_source_candidates(
                              sm_url, ds.path, alternate_suffix=False)
                          # avoid inclusion of submodule location itself
                          if url != sm['path'])

    return unique(clone_urls, lambda x: x[1])

Python urlquote примеры использования