Exemplo n.º 1
0
Arquivo: web.py Projeto: key4hep/spack
def url_exists(url):
    url = url_util.parse(url)
    local_path = url_util.local_file_path(url)
    if local_path:
        return os.path.exists(local_path)

    if url.scheme == 's3':
        s3 = s3_util.create_s3_session(url)
        try:
            s3.get_object(Bucket=url.netloc, Key=url.path.lstrip('/'))
            return True
        except s3.ClientError as err:
            if err.response['Error']['Code'] == 'NoSuchKey':
                return False
            raise err

    elif url.scheme == 'gs':
        gcs = gcs_util.GCSBlob(url)
        return gcs.exists()

    # otherwise, just try to "read" from the URL, and assume that *any*
    # non-throwing response contains the resource represented by the URL
    try:
        read_from_url(url)
        return True
    except (SpackWebError, URLError):
        return False
Exemplo n.º 2
0
def push_to_url(local_file_path,
                remote_path,
                keep_original=True,
                extra_args=None):
    if sys.platform == "win32":
        if remote_path[1] == ':':
            remote_path = "file://" + remote_path
    remote_url = url_util.parse(remote_path)
    verify_ssl = spack.config.get('config:verify_ssl')

    if __UNABLE_TO_VERIFY_SSL and verify_ssl and uses_ssl(remote_url):
        warn_no_ssl_cert_checking()

    remote_file_path = url_util.local_file_path(remote_url)
    if remote_file_path is not None:
        mkdirp(os.path.dirname(remote_file_path))
        if keep_original:
            shutil.copy(local_file_path, remote_file_path)
        else:
            try:
                rename(local_file_path, remote_file_path)
            except OSError as e:
                if e.errno == errno.EXDEV:
                    # NOTE(opadron): The above move failed because it crosses
                    # filesystem boundaries.  Copy the file (plus original
                    # metadata), and then delete the original.  This operation
                    # needs to be done in separate steps.
                    shutil.copy2(local_file_path, remote_file_path)
                    os.remove(local_file_path)
                else:
                    raise

    elif remote_url.scheme == 's3':
        if extra_args is None:
            extra_args = {}

        remote_path = remote_url.path
        while remote_path.startswith('/'):
            remote_path = remote_path[1:]

        s3 = s3_util.create_s3_session(
            remote_url,
            connection=s3_util.get_mirror_connection(remote_url))  # noqa: E501
        s3.upload_file(local_file_path,
                       remote_url.netloc,
                       remote_path,
                       ExtraArgs=extra_args)

        if not keep_original:
            os.remove(local_file_path)

    elif remote_url.scheme == 'gs':
        gcs = gcs_util.GCSBlob(remote_url)
        gcs.upload_to_blob(local_file_path)
        if not keep_original:
            os.remove(local_file_path)

    else:
        raise NotImplementedError('Unrecognized URL scheme: {SCHEME}'.format(
            SCHEME=remote_url.scheme))
Exemplo n.º 3
0
def list_url(url):
    url = url_util.parse(url)

    local_path = url_util.local_file_path(url)
    if local_path:
        return os.listdir(local_path)

    if url.scheme == 's3':
        s3 = s3_util.create_s3_session(url)
        return list(
            set(key.split('/', 1)[0] for key in _iter_s3_prefix(s3, url)))
Exemplo n.º 4
0
Arquivo: web.py Projeto: eic/spack
def remove_url(url):
    url = url_util.parse(url)

    local_path = url_util.local_file_path(url)
    if local_path:
        os.remove(local_path)
        return

    if url.scheme == 's3':
        s3 = s3_util.create_s3_session(url)
        s3.delete_object(Bucket=url.netloc, Key=url.path)
        return
Exemplo n.º 5
0
def remove_url(url, recursive=False):
    url = url_util.parse(url)

    local_path = url_util.local_file_path(url)
    if local_path:
        if recursive:
            shutil.rmtree(local_path)
        else:
            os.remove(local_path)
        return

    if url.scheme == 's3':
        # Try to find a mirror for potential connection information
        s3 = s3_util.create_s3_session(
            url, connection=s3_util.get_mirror_connection(url))  # noqa: E501
        bucket = url.netloc
        if recursive:
            # Because list_objects_v2 can only return up to 1000 items
            # at a time, we have to paginate to make sure we get it all
            prefix = url.path.strip('/')
            paginator = s3.get_paginator('list_objects_v2')
            pages = paginator.paginate(Bucket=bucket, Prefix=prefix)

            delete_request = {'Objects': []}
            for item in pages.search('Contents'):
                if not item:
                    continue

                delete_request['Objects'].append({'Key': item['Key']})

                # Make sure we do not try to hit S3 with a list of more
                # than 1000 items
                if len(delete_request['Objects']) >= 1000:
                    r = s3.delete_objects(Bucket=bucket, Delete=delete_request)
                    _debug_print_delete_results(r)
                    delete_request = {'Objects': []}

            # Delete any items that remain
            if len(delete_request['Objects']):
                r = s3.delete_objects(Bucket=bucket, Delete=delete_request)
                _debug_print_delete_results(r)
        else:
            s3.delete_object(Bucket=bucket, Key=url.path.lstrip('/'))
        return

    elif url.scheme == 'gs':
        if recursive:
            bucket = gcs_util.GCSBucket(url)
            bucket.destroy(recursive=recursive)
        else:
            blob = gcs_util.GCSBlob(url)
            blob.delete_blob()
        return
Exemplo n.º 6
0
def _s3_open(url):
    parsed = url_util.parse(url)
    s3 = s3_util.create_s3_session(parsed)

    bucket = parsed.netloc
    key = parsed.path

    if key.startswith('/'):
        key = key[1:]

    obj = s3.get_object(Bucket=bucket, Key=key)

    # NOTE(opadron): Apply workaround here (see above)
    stream = WrapStream(obj['Body'])
    headers = obj['ResponseMetadata']['HTTPHeaders']

    return url, headers, stream
Exemplo n.º 7
0
Arquivo: web.py Projeto: timkphd/spack
def push_to_url(local_file_path, remote_path, **kwargs):
    keep_original = kwargs.get('keep_original', True)

    remote_url = url_util.parse(remote_path)
    verify_ssl = spack.config.get('config:verify_ssl')

    if __UNABLE_TO_VERIFY_SSL and verify_ssl and uses_ssl(remote_url):
        warn_no_ssl_cert_checking()

    remote_file_path = url_util.local_file_path(remote_url)
    if remote_file_path is not None:
        mkdirp(os.path.dirname(remote_file_path))
        if keep_original:
            shutil.copy(local_file_path, remote_file_path)
        else:
            try:
                os.rename(local_file_path, remote_file_path)
            except OSError as e:
                if e.errno == errno.EXDEV:
                    # NOTE(opadron): The above move failed because it crosses
                    # filesystem boundaries.  Copy the file (plus original
                    # metadata), and then delete the original.  This operation
                    # needs to be done in separate steps.
                    shutil.copy2(local_file_path, remote_file_path)
                    os.remove(local_file_path)

    elif remote_url.scheme == 's3':
        extra_args = kwargs.get('extra_args', {})

        remote_path = remote_url.path
        while remote_path.startswith('/'):
            remote_path = remote_path[1:]

        s3 = s3_util.create_s3_session(remote_url)
        s3.upload_file(local_file_path,
                       remote_url.netloc,
                       remote_path,
                       ExtraArgs=extra_args)

        if not keep_original:
            os.remove(local_file_path)

    else:
        raise NotImplementedError('Unrecognized URL scheme: {SCHEME}'.format(
            SCHEME=remote_url.scheme))
Exemplo n.º 8
0
def list_url(url, recursive=False):
    url = url_util.parse(url)

    local_path = url_util.local_file_path(url)
    if local_path:
        if recursive:
            return list(_iter_local_prefix(local_path))
        return [subpath for subpath in os.listdir(local_path)
                if os.path.isfile(os.path.join(local_path, subpath))]

    if url.scheme == 's3':
        s3 = s3_util.create_s3_session(url)
        if recursive:
            return list(_iter_s3_prefix(s3, url))

        return list(set(
            key.split('/', 1)[0]
            for key in _iter_s3_prefix(s3, url)))
Exemplo n.º 9
0
def url_exists(url):
    url = url_util.parse(url)
    local_path = url_util.local_file_path(url)
    if local_path:
        return os.path.exists(local_path)

    if url.scheme == 's3':
        s3 = s3_util.create_s3_session(url)
        from botocore.exceptions import ClientError
        try:
            s3.get_object(Bucket=url.netloc, Key=url.path)
            return True
        except ClientError as err:
            if err.response['Error']['Code'] == 'NoSuchKey':
                return False
            raise err

    # otherwise, just try to "read" from the URL, and assume that *any*
    # non-throwing response contains the resource represented by the URL
    try:
        read_from_url(url)
        return True
    except URLError:
        return False
Exemplo n.º 10
0
Arquivo: web.py Projeto: mcuma/spack
def list_url(url, recursive=False):
    url = url_util.parse(url)

    local_path = url_util.local_file_path(url)
    if local_path:
        if recursive:
            return list(_iter_local_prefix(local_path))
        return [
            subpath for subpath in os.listdir(local_path)
            if os.path.isfile(os.path.join(local_path, subpath))
        ]

    if url.scheme == 's3':
        s3 = s3_util.create_s3_session(
            url, connection=s3_util.get_mirror_connection(url))  # noqa: E501
        if recursive:
            return list(_iter_s3_prefix(s3, url))

        return list(
            set(key.split('/', 1)[0] for key in _iter_s3_prefix(s3, url)))

    elif url.scheme == 'gs':
        gcs = gcs_util.GCSBucket(url)
        return gcs.get_all_blobs(recursive=recursive)