示例#1
0
def _copy_objects(
    batch: List[Tuple[str, str]],
    use_threads: bool,
    boto3_session: boto3.Session,
    s3_additional_kwargs: Optional[Dict[str, Any]],
) -> None:
    _logger.debug("len(batch): %s", len(batch))
    client_s3: boto3.client = _utils.client(service_name="s3",
                                            session=boto3_session)
    resource_s3: boto3.resource = _utils.resource(service_name="s3",
                                                  session=boto3_session)
    if s3_additional_kwargs is None:
        boto3_kwargs: Optional[Dict[str, Any]] = None
    else:
        boto3_kwargs = get_botocore_valid_kwargs(
            function_name="copy_object",
            s3_additional_kwargs=s3_additional_kwargs)
    for source, target in batch:
        source_bucket, source_key = _utils.parse_path(path=source)
        copy_source: Dict[str, str] = {
            "Bucket": source_bucket,
            "Key": source_key
        }
        target_bucket, target_key = _utils.parse_path(path=target)
        resource_s3.meta.client.copy(
            CopySource=copy_source,
            Bucket=target_bucket,
            Key=target_key,
            SourceClient=client_s3,
            ExtraArgs=boto3_kwargs,
            Config=TransferConfig(num_download_attempts=10,
                                  use_threads=use_threads),
        )
示例#2
0
def _describe_object(
    path: str,
    boto3_session: boto3.Session,
    s3_additional_kwargs: Optional[Dict[str, Any]],
    version_id: Optional[str] = None,
) -> Tuple[str, Dict[str, Any]]:
    client_s3: boto3.client = _utils.client(service_name="s3",
                                            session=boto3_session)
    bucket: str
    key: str
    bucket, key = _utils.parse_path(path=path)
    if s3_additional_kwargs:
        extra_kwargs: Dict[str, Any] = _fs.get_botocore_valid_kwargs(
            function_name="head_object",
            s3_additional_kwargs=s3_additional_kwargs)
    else:
        extra_kwargs = {}
    desc: Dict[str, Any]
    if version_id:
        extra_kwargs["VersionId"] = version_id
    desc = _utils.try_it(f=client_s3.head_object,
                         ex=client_s3.exceptions.NoSuchKey,
                         Bucket=bucket,
                         Key=key,
                         **extra_kwargs)
    return path, desc
示例#3
0
def does_object_exist(path: str,
                      s3_additional_kwargs: Optional[Dict[str, Any]] = None,
                      boto3_session: Optional[boto3.Session] = None) -> bool:
    """Check if object exists on S3.

    Parameters
    ----------
    path: str
        S3 path (e.g. s3://bucket/key).
    s3_additional_kwargs : Optional[Dict[str, Any]]
        Forward to botocore requests. Valid parameters: "RequestPayer", "ExpectedBucketOwner".
        e.g. s3_additional_kwargs={'RequestPayer': 'requester'}
    boto3_session : boto3.Session(), optional
        Boto3 Session. The default boto3 session will be used if boto3_session receive None.

    Returns
    -------
    bool
        True if exists, False otherwise.

    Examples
    --------
    Using the default boto3 session

    >>> import awswrangler as wr
    >>> wr.s3.does_object_exist('s3://bucket/key_real')
    True
    >>> wr.s3.does_object_exist('s3://bucket/key_unreal')
    False

    Using a custom boto3 session

    >>> import boto3
    >>> import awswrangler as wr
    >>> wr.s3.does_object_exist('s3://bucket/key_real', boto3_session=boto3.Session())
    True
    >>> wr.s3.does_object_exist('s3://bucket/key_unreal', boto3_session=boto3.Session())
    False

    """
    client_s3: boto3.client = _utils.client(service_name="s3",
                                            session=boto3_session)
    bucket: str
    key: str
    bucket, key = _utils.parse_path(path=path)
    if s3_additional_kwargs:
        extra_kwargs: Dict[str, Any] = _fs.get_botocore_valid_kwargs(
            function_name="head_object",
            s3_additional_kwargs=s3_additional_kwargs)
    else:
        extra_kwargs = {}
    try:
        client_s3.head_object(Bucket=bucket, Key=key, **extra_kwargs)
        return True
    except botocore.exceptions.ClientError as ex:
        if ex.response["ResponseMetadata"]["HTTPStatusCode"] == 404:
            return False
        raise ex
示例#4
0
def _delete_objects(
    bucket: str,
    keys: List[str],
    boto3_session: boto3.Session,
    s3_additional_kwargs: Optional[Dict[str, Any]],
    attempt: int = 1,
) -> None:
    client_s3: boto3.client = _utils.client(service_name="s3",
                                            session=boto3_session)
    _logger.debug("len(keys): %s", len(keys))
    batch: List[Dict[str, str]] = [{"Key": key} for key in keys]
    if s3_additional_kwargs:
        extra_kwargs: Dict[str, Any] = get_botocore_valid_kwargs(
            function_name="list_objects_v2",
            s3_additional_kwargs=s3_additional_kwargs)
    else:
        extra_kwargs = {}
    res = client_s3.delete_objects(Bucket=bucket,
                                   Delete={"Objects": batch},
                                   **extra_kwargs)
    deleted: List[Dict[str, Any]] = res.get("Deleted", [])
    for obj in deleted:
        _logger.debug("s3://%s/%s has been deleted.", bucket, obj.get("Key"))
    errors: List[Dict[str, Any]] = res.get("Errors", [])
    internal_errors: List[str] = []
    for error in errors:
        _logger.debug("error: %s", error)
        if "Code" not in error or error["Code"] != "InternalError":
            raise exceptions.ServiceApiError(errors)
        internal_errors.append(_unquote_plus(error["Key"]))
    if len(internal_errors) > 0:
        if attempt > 5:  # Maximum of 5 attempts (Total of 15 seconds)
            raise exceptions.ServiceApiError(errors)
        time.sleep(attempt)  # Incremental delay (linear)
        _delete_objects(
            bucket=bucket,
            keys=internal_errors,
            boto3_session=boto3_session,
            s3_additional_kwargs=s3_additional_kwargs,
            attempt=(attempt + 1),
        )
示例#5
0
def _list_objects(  # pylint: disable=too-many-branches
    path: str,
    s3_additional_kwargs: Optional[Dict[str, Any]],
    delimiter: Optional[str] = None,
    suffix: Union[str, List[str], None] = None,
    ignore_suffix: Union[str, List[str], None] = None,
    last_modified_begin: Optional[datetime.datetime] = None,
    last_modified_end: Optional[datetime.datetime] = None,
    boto3_session: Optional[boto3.Session] = None,
    ignore_empty: bool = False,
) -> Iterator[List[str]]:
    bucket: str
    prefix_original: str
    bucket, prefix_original = _utils.parse_path(path=path)
    prefix: str = _prefix_cleanup(prefix=prefix_original)
    _suffix: Union[List[str],
                   None] = [suffix] if isinstance(suffix, str) else suffix
    _ignore_suffix: Union[List[str], None] = [ignore_suffix] if isinstance(
        ignore_suffix, str) else ignore_suffix
    client_s3: boto3.client = _utils.client(service_name="s3",
                                            session=boto3_session)
    default_pagination: Dict[str, int] = {"PageSize": 1000}
    extra_kwargs: Dict[str, Any] = {"PaginationConfig": default_pagination}
    if s3_additional_kwargs:
        extra_kwargs = _fs.get_botocore_valid_kwargs(
            function_name="list_objects_v2",
            s3_additional_kwargs=s3_additional_kwargs)
        extra_kwargs["PaginationConfig"] = (
            s3_additional_kwargs["PaginationConfig"] if "PaginationConfig"
            in s3_additional_kwargs else default_pagination)
    paginator = client_s3.get_paginator("list_objects_v2")
    args: Dict[str, Any] = {"Bucket": bucket, "Prefix": prefix, **extra_kwargs}
    if delimiter is not None:
        args["Delimiter"] = delimiter
    _logger.debug("args: %s", args)
    response_iterator = paginator.paginate(**args)
    paths: List[str] = []
    _validate_datetimes(last_modified_begin=last_modified_begin,
                        last_modified_end=last_modified_end)

    for page in response_iterator:  # pylint: disable=too-many-nested-blocks
        if delimiter is None:
            contents: Optional[List[Dict[str, Any]]] = page.get("Contents")
            if contents is not None:
                for content in contents:
                    key: str = content["Key"]
                    if ignore_empty and content.get("Size", 0) == 0:
                        _logger.debug("Skipping empty file: %s",
                                      f"s3://{bucket}/{key}")
                    elif (content is not None) and ("Key" in content):
                        if (_suffix is None) or key.endswith(tuple(_suffix)):
                            if last_modified_begin is not None:
                                if content[
                                        "LastModified"] < last_modified_begin:
                                    continue
                            if last_modified_end is not None:
                                if content["LastModified"] > last_modified_end:
                                    continue
                            paths.append(f"s3://{bucket}/{key}")
        else:
            prefixes: Optional[List[Optional[Dict[str, str]]]] = page.get(
                "CommonPrefixes")
            if prefixes is not None:
                for pfx in prefixes:
                    if (pfx is not None) and ("Prefix" in pfx):
                        key = pfx["Prefix"]
                        paths.append(f"s3://{bucket}/{key}")

        if prefix != prefix_original:
            paths = fnmatch.filter(paths, path)

        if _ignore_suffix is not None:
            paths = [
                p for p in paths if p.endswith(tuple(_ignore_suffix)) is False
            ]

        if paths:
            yield paths
        paths = []