def verify_ria_url(url, cfg): """Verify and decode ria url Expects a ria-URL pointing to a RIA store, applies rewrites and tries to decode potential host and base path for the store from it. Additionally raises if `url` is considered invalid. ria+ssh://somehost:/path/to/store ria+file:///path/to/store Parameters ---------- url : str URL to verify an decode. cfg : dict-like Configuration settings for rewrite_url() Raises ------ ValueError Returns ------- tuple (host, base-path, rewritten url) """ from datalad.config import rewrite_url from datalad.support.network import URL if not url: raise ValueError("Got no URL") url = rewrite_url(cfg, url) url_ri = URL(url) if not url_ri.scheme.startswith('ria+'): raise ValueError("Missing ria+ prefix in final URL: %s" % url) if url_ri.fragment: raise ValueError( "Unexpected fragment in RIA-store URL: %s" % url_ri.fragment) protocol = url_ri.scheme[4:] if protocol not in ['ssh', 'file', 'http', 'https']: raise ValueError("Unsupported protocol: %s. " "Supported: ssh, file, http(s)" % protocol) return url_ri.hostname if protocol != 'file' else None, \ url_ri.path if url_ri.path else '/', \ url
def test_rewrite_url(): test_cases = ( # no match ('unicorn', 'unicorn'), # custom label replacement ('example:datalad/datalad.git', '[email protected]:datalad/datalad.git'), # protocol enforcement ('git://example.com/some', 'https://example.com/some'), # multi-match ('mylabel', 'ria+ssh://fully.qualified.com'), ('myotherlabel', 'ria+ssh://fully.qualified.com'), # conflicts, same label pointing to different URLs ('conflict', 'conflict'), # also conflicts, but hidden in a multi-value definition ('conflict2', 'conflict2'), ) cfg_in = { # label rewrite '[email protected]:': 'example:', # protocol change 'https://example': 'git://example', # multi-value 'ria+ssh://fully.qualified.com': ('mylabel', 'myotherlabel'), # conflicting definitions 'http://host1': 'conflict', 'http://host2': 'conflict', # hidden conflict 'http://host3': 'conflict2', 'http://host4': ('someokish', 'conflict2'), } cfg = { 'url.{}.insteadof'.format(k): v for k, v in cfg_in.items() } for input, output in test_cases: with swallow_logs(logging.WARNING) as msg: assert_equal(rewrite_url(cfg, input), output) if input.startswith('conflict'): assert_in("Ignoring URL rewrite", msg.out)
def verify_ria_url(url, cfg): """Verify and decode ria url Expects a ria-URL pointing to a RIA store, applies rewrites and tries to decode potential host and base path for the store from it. Additionally raises if `url` is considered invalid. ria+ssh://somehost:/path/to/store ria+file:///path/to/store Parameters ---------- url : str URL to verify an decode. cfg : dict-like Configuration settings for rewrite_url() Raises ------ ValueError Returns ------- tuple (host, base-path, rewritten url) `host` is not just a hostname, but is a stub URL that may also contain username, password, and port, if specified in a given URL. """ from datalad.config import rewrite_url from datalad.support.network import URL if not url: raise ValueError("Got no URL") url = rewrite_url(cfg, url) url_ri = URL(url) if not url_ri.scheme.startswith('ria+'): raise ValueError("Missing ria+ prefix in final URL: %s" % url) if url_ri.fragment: raise ValueError("Unexpected fragment in RIA-store URL: %s" % url_ri.fragment) protocol = url_ri.scheme[4:] if protocol not in ['ssh', 'file', 'http', 'https']: raise ValueError("Unsupported protocol: %s. " "Supported: ssh, file, http(s)" % protocol) host = '{proto}://{user}{pdlm}{passwd}{udlm}{host}{portdlm}{port}'.format( proto=protocol, user=url_ri.username or '', pdlm=':' if url_ri.password else '', passwd=url_ri.password or '', udlm='@' if url_ri.username else '', host=url_ri.hostname or '', portdlm=':' if url_ri.port else '', port=url_ri.port or '', ) # this != file is critical behavior, if removed, it will ruin the IO selection # in RIARemote!! return host if protocol != 'file' else None, \ url_ri.path if url_ri.path else '/', \ url