示例#1
0
def test_url_base():
    # Basic checks
    assert_raises(ValueError,
                  URL,
                  "http://example.com",
                  hostname='example.com')
    url = URL("http://example.com")
    eq_(url.hostname, 'example.com')
    eq_(url.scheme, 'http')
    eq_(url.port, '')  # not specified -- empty strings
    eq_(url.username, '')  # not specified -- empty strings
    eq_(repr(url), "URL(hostname='example.com', scheme='http')")
    eq_(url, "http://example.com")  # automagic coercion in __eq__

    neq_(URL(), URL(hostname='x'))

    smth = URL('smth')
    eq_(smth.hostname, '')
    ok_(bool(smth))
    nok_(bool(URL()))

    assert_raises(ValueError, url._set_from_fields, unknown='1')

    with swallow_logs(new_level=logging.WARNING) as cml:
        # we don't "care" about params ATM so there is a warning if there are any
        purl = URL("http://example.com/;param")
        eq_(str(purl),
            'http://example.com/;param')  # but we do maintain original string
        assert_in('ParseResults contains params', cml.out)
        eq_(purl.as_str(), 'http://example.com/')
示例#2
0
def get_repo_url(repo, access_protocol, github_login):
    """Report the repository access URL for Git matching the protocol"""
    prop = {'https': repo.clone_url, 'ssh': repo.ssh_url}[access_protocol]
    if access_protocol == 'https' and github_login:
        # we were provided explicit github login.  For ssh access it is
        # impossible to specify different login within ssh RI, but it is
        # possible to do so for https logins
        url = URL(prop)
        assert url.scheme in ('http', 'https')
        url.username = github_login
        prop = url.as_str()
    return prop
示例#3
0
def get_repo_url(repo, access_protocol, github_login):
    """Report the repository access URL for Git matching the protocol"""
    prop = {
        'https': repo.clone_url,
        'ssh': repo.ssh_url
    }[access_protocol]
    if access_protocol == 'https' and github_login:
        # we were provided explicit github login.  For ssh access it is
        # impossible to specify different login within ssh RI, but it is
        # possible to do so for https logins
        url = URL(prop)
        assert url.scheme in ('http', 'https')
        url.username = github_login
        prop = url.as_str()
    return prop
示例#4
0
def get_versioned_url(url,
                      guarantee_versioned=False,
                      return_all=False,
                      verify=False,
                      s3conn=None,
                      update=False):
    """Given a url return a versioned URL

    Originally targeting AWS S3 buckets with versioning enabled

    Parameters
    ----------
    url : string
    guarantee_versioned : bool, optional
      Would fail if buckets is determined to have no versioning enabled.
      It will not fail if we fail to determine if bucket is versioned or
      not
    return_all: bool, optional
      If True, would return a list with URLs for all the versions of this
      file, sorted chronologically with latest first (when possible, e.g.
      for S3).  Remove markers get ignored
    verify: bool, optional
      Verify that URL is accessible. As discovered some versioned keys might
      be denied access to
    update : bool, optional
      If the URL already contains a version ID, update it to the latest version
      ID.  This option has no effect if return_all is true.

    Returns
    -------
    string or list of string
    """
    url_rec = URL(url)

    s3_bucket, fpath = None, url_rec.path.lstrip('/')

    if url_rec.hostname.endswith('.s3.amazonaws.com'):
        if url_rec.scheme not in ('http', 'https'):
            raise ValueError("Do not know how to handle %s scheme" %
                             url_rec.scheme)
        # we know how to slice this cat
        s3_bucket = url_rec.hostname.split('.', 1)[0]
    elif url_rec.hostname == 's3.amazonaws.com':
        if url_rec.scheme not in ('http', 'https'):
            raise ValueError("Do not know how to handle %s scheme" %
                             url_rec.scheme)
        # url is s3.amazonaws.com/bucket/PATH
        s3_bucket, fpath = fpath.split('/', 1)
    elif url_rec.scheme == 's3':
        s3_bucket = url_rec.hostname  # must be
        # and for now implement magical conversion to URL
        # TODO: wouldn't work if needs special permissions etc
        # actually for now
        raise NotImplementedError

    was_versioned = False
    all_versions = []
    if s3_bucket:
        # TODO: cache
        if s3conn is None:
            # we need to reuse our providers
            from ..downloaders.providers import Providers
            providers = Providers.from_config_files()
            s3url = "s3://%s/" % s3_bucket
            s3provider = providers.get_provider(s3url)
            if s3provider.authenticator.bucket is not None and s3provider.authenticator.bucket.name == s3_bucket:
                # we have established connection before, so let's just reuse
                bucket = s3provider.authenticator.bucket
            else:
                bucket = s3provider.authenticator.authenticate(
                    s3_bucket, s3provider.credential
                )  # s3conn or _get_bucket_connection(S3_TEST_CREDENTIAL)
        else:
            bucket = s3conn.get_bucket(s3_bucket)

        supports_versioning = True  # assume that it does
        try:
            supports_versioning = bucket.get_versioning_status()  # TODO cache
        except S3ResponseError as e:
            # might be forbidden, i.e. "403 Forbidden" so we try then anyways
            supports_versioning = 'maybe'

        if supports_versioning:
            all_keys = bucket.list_versions(fpath)
            # Filter and sort them so the newest one on top
            all_keys = [
                x for x in sorted(all_keys,
                                  key=lambda x: (x.last_modified, x.is_latest))
                if ((x.name == fpath)  # match exact name, not just prefix
                    )
            ][::-1]
            # our current assumptions
            assert (all_keys[0].is_latest)
            # and now filter out delete markers etc
            all_keys = [x for x in all_keys
                        if isinstance(x, Key)]  # ignore DeleteMarkers
            assert (all_keys)

            for key in all_keys:
                url_versioned = add_version_to_url(url_rec,
                                                   key.version_id,
                                                   replace=update
                                                   and not return_all)

                all_versions.append(url_versioned)
                if verify:
                    # it would throw HTTPError exception if not accessible
                    _ = urlopen(Request(url_versioned))
                was_versioned = True
                if not return_all:
                    break

    if guarantee_versioned and not was_versioned:
        raise RuntimeError("Could not version %s" % url)

    if not all_versions:
        # we didn't get a chance
        all_versions = [url_rec.as_str()]

    if return_all:
        return all_versions
    else:
        return all_versions[0]
示例#5
0
文件: s3.py 项目: datalad/datalad
def get_versioned_url(url, guarantee_versioned=False, return_all=False, verify=False,
                      s3conn=None, update=False):
    """Given a url return a versioned URL

    Originally targeting AWS S3 buckets with versioning enabled

    Parameters
    ----------
    url : string
    guarantee_versioned : bool, optional
      Would fail if buckets is determined to have no versioning enabled.
      It will not fail if we fail to determine if bucket is versioned or
      not
    return_all: bool, optional
      If True, would return a list with URLs for all the versions of this
      file, sorted chronologically with latest first (when possible, e.g.
      for S3).  Remove markers get ignored
    verify: bool, optional
      Verify that URL is accessible. As discovered some versioned keys might
      be denied access to
    update : bool, optional
      If the URL already contains a version ID, update it to the latest version
      ID.  This option has no effect if return_all is true.

    Returns
    -------
    string or list of string
    """
    url_rec = URL(url)

    s3_bucket, fpath = None, url_rec.path.lstrip('/')

    if url_rec.hostname.endswith('.s3.amazonaws.com'):
        if url_rec.scheme not in ('http', 'https'):
            raise ValueError("Do not know how to handle %s scheme" % url_rec.scheme)
        # we know how to slice this cat
        s3_bucket = url_rec.hostname.split('.', 1)[0]
    elif url_rec.hostname == 's3.amazonaws.com':
        if url_rec.scheme not in ('http', 'https'):
            raise ValueError("Do not know how to handle %s scheme" % url_rec.scheme)
        # url is s3.amazonaws.com/bucket/PATH
        s3_bucket, fpath = fpath.split('/', 1)
    elif url_rec.scheme == 's3':
        s3_bucket = url_rec.hostname  # must be
        # and for now implement magical conversion to URL
        # TODO: wouldn't work if needs special permissions etc
        # actually for now
        raise NotImplementedError

    was_versioned = False
    all_versions = []
    if s3_bucket:
        # TODO: cache
        if s3conn is None:
            # we need to reuse our providers
            from ..downloaders.providers import Providers
            providers = Providers.from_config_files()
            s3url = "s3://%s/" % s3_bucket
            s3provider = providers.get_provider(s3url)
            if s3provider.authenticator.bucket is not None and s3provider.authenticator.bucket.name == s3_bucket:
                # we have established connection before, so let's just reuse
                bucket = s3provider.authenticator.bucket
            else:
                bucket = s3provider.authenticator.authenticate(s3_bucket, s3provider.credential)  # s3conn or _get_bucket_connection(S3_TEST_CREDENTIAL)
        else:
            bucket = s3conn.get_bucket(s3_bucket)

        supports_versioning = True  # assume that it does
        try:
            supports_versioning = bucket.get_versioning_status()  # TODO cache
        except S3ResponseError as e:
            # might be forbidden, i.e. "403 Forbidden" so we try then anyways
            supports_versioning = 'maybe'

        if supports_versioning:
            all_keys = bucket.list_versions(fpath)
            # Filter and sort them so the newest one on top
            all_keys = [x for x in sorted(all_keys, key=lambda x: (x.last_modified, x.is_latest))
                        if ((x.name == fpath)  # match exact name, not just prefix
                            )
                        ][::-1]
            # our current assumptions
            assert(all_keys[0].is_latest)
            # and now filter out delete markers etc
            all_keys = [x for x in all_keys if isinstance(x, Key)]  # ignore DeleteMarkers
            assert(all_keys)

            for key in all_keys:
                url_versioned = add_version_to_url(
                    url_rec, key.version_id, replace=update and not return_all)

                all_versions.append(url_versioned)
                if verify:
                    # it would throw HTTPError exception if not accessible
                    _ = urlopen(Request(url_versioned))
                was_versioned = True
                if not return_all:
                    break

    if guarantee_versioned and not was_versioned:
        raise RuntimeError("Could not version %s" % url)

    if not all_versions:
        # we didn't get a chance
        all_versions = [url_rec.as_str()]

    if return_all:
        return all_versions
    else:
        return all_versions[0]