def _test_objects_exist( session: boto3.session.Session, bucket_name: str, bucket_root: str, file_list: List[str], ) -> None: s3 = session.resource("s3") bucket = s3.Bucket(bucket_name) # dirnames = _file_list_dirnames(file_list, bucket_root) bucket_objects = [] for obj in bucket.objects.filter(Prefix=bucket_root): # skip directory objects if not os.path.splitext(obj.key)[-1]: continue bucket_objects.append(obj.key) assert len(bucket_objects) == len(file_list) for path in file_list: found = False for bucket_path in bucket_objects: if bucket_path.endswith(path): found = True continue if not found: log.error("{0} not found in bucket".format(bucket_path)) assert False
def empty_bucket(bucket_name: str, boto3_session: boto3.session.Session) -> None: s3 = boto3_session.resource("s3") try: bucket = s3.Bucket(bucket_name) bucket.objects.all().delete() except ClientError as e: if e.response["Error"]["Code"] != "NoSuchBucket": raise e
def get_group_managed_policy_data(boto3_session: boto3.session.Session, group_list: List[Dict]) -> Dict: resource_client = boto3_session.resource('iam') policies = {} for group in group_list: name = group["GroupName"] arn = group["Arn"] resource_group = resource_client.Group(name) policies[arn] = { p.policy_name: p.default_version.document["Statement"] for p in resource_group.attached_policies.all() } return policies
def __init__( self, session: boto3.session.Session, bucket_name: str, bucket_root: str, ) -> None: super().__init__() self._logger = logging.getLogger(__name__) s3 = session.resource("s3") bucket = s3.Bucket(bucket_name) self._session = session self._bucket = bucket self._bucket_root = bucket_root # Strip trailing '/' from bucket_root for comparisons self._bucket_root = self._bucket_root.rstrip("/")
def get_user_policy_data(boto3_session: boto3.session.Session, user_list: List[Dict]) -> Dict: resource_client = boto3_session.resource('iam') policies = {} for user in user_list: name = user["UserName"] arn = user["Arn"] resource_user = resource_client.User(name) try: policies[arn] = { p.name: p.policy_document["Statement"] for p in resource_user.policies.all() } except resource_client.meta.client.exceptions.NoSuchEntityException: logger.warning( f"Could not get policies for user {name} due to NoSuchEntityException; skipping.", ) return policies
def get_role_managed_policy_data(boto3_session: boto3.session.Session, role_list: List[Dict]) -> Dict: resource_client = boto3_session.resource('iam') policies = {} for role in role_list: name = role["RoleName"] arn = role["Arn"] resource_role = resource_client.Role(name) try: policies[arn] = { p.policy_name: p.default_version.document["Statement"] for p in resource_role.attached_policies.all() } except resource_client.meta.client.exceptions.NoSuchEntityException: logger.warning( f"Could not get policies for role {name} due to NoSuchEntityException; skipping.", ) return policies
def _clean_bucket(session: boto3.session.Session, bucket_name: str, root_path: str) -> None: s3 = session.resource("s3") bucket = s3.Bucket(bucket_name) # Normalize directory path for searching patch prefixes of objects if not root_path.endswith("/"): root_path += "/" key_objects = [{ "Key": obj.key } for obj in bucket.objects.filter(Prefix=root_path)] if len(key_objects) == 0: return delete_keys: Dict[str, List[Any]] = {"Objects": []} delete_keys["Objects"] = key_objects # based on http://stackoverflow.com/a/34888103 s3.meta.client.delete_objects(Bucket=bucket.name, Delete=delete_keys)
def __init__( self, url: str, boto3_session: boto3.session.Session, S3Url: Callable[[str, Any], Union['S3DirectoryUrl', 'S3FileUrl']]) -> None: parsed = urlparse(url) self.scheme = parsed.scheme self.url = url # https://docs.python.org/2/library/urlparse.html # # "The components are not broken up in smaller parts (for # example, the network location is a single string), and % # escapes are not expanded." self.key = unquote(parsed.path[1:]) self.bucket = parsed.netloc self.region = boto3_session.region_name self._boto3_session = boto3_session self.s3_resource: S3ResourceTypeStub = boto3_session.resource('s3') self.s3_client: S3ClientTypeStub = boto3_session.client('s3') self.S3Url = S3Url
def _test_content_types(session: boto3.session.Session, bucket_name: str, bucket_root: str) -> None: """Verify that the expected Content-Type header was set.""" s3 = session.resource("s3") bucket = s3.Bucket(bucket_name) # AWS api doesn't give Content-Type header, so we'll test it directly # via HTTP # see http://stackoverflow.com/a/34698521 for making object URLs bucket_location = s3.meta.client.get_bucket_location(Bucket=bucket_name) for obj in bucket.objects.filter(Prefix=bucket_root): # skip directory redirect objects if "ltd-redirect" in obj.Object().metadata: continue guess, _ = mimetypes.guess_type(obj.key) object_url = "https://s3-{0}.amazonaws.com/{1}/{2}".format( bucket_location["LocationConstraint"], bucket_name, obj.key) if guess is not None: r = requests.head(object_url) assert r.headers["content-type"] == guess
def _test_directory_redirects( session: boto3.session.Session, bucket_name: str, bucket_root: str, file_list: Sequence[str], ) -> None: """Verify that the directory redirect objects exist.""" # Make a list of all directories, including the root directory dirnames = _file_list_dirnames(file_list, bucket_root) # see http://stackoverflow.com/a/34698521 for making object URLs s3 = session.resource("s3") # bucket = s3.Bucket(bucket_name) bucket_location = s3.meta.client.get_bucket_location(Bucket=bucket_name) for dirname in dirnames: # Try to request the object obj = s3.Object(bucket_name, dirname) object_url = "https://s3-{0}.amazonaws.com/{1}/{2}".format( bucket_location["LocationConstraint"], bucket_name, obj.key) r = requests.head(object_url) assert r.headers["x-amz-meta-dir-redirect"] == "true"
def _test_headers( session: boto3.session.Session, bucket_name: str, bucket_root: str, expected_headers: Dict[str, str], ) -> None: """Generically test that header key-value pairs in `expected_headers` actually are served by S3. """ s3 = session.resource("s3") bucket = s3.Bucket(bucket_name) # see http://stackoverflow.com/a/34698521 for making object URLs bucket_location = s3.meta.client.get_bucket_location(Bucket=bucket_name) for obj in bucket.objects.filter(Prefix=bucket_root): object_url = "https://s3-{0}.amazonaws.com/{1}/{2}".format( bucket_location["LocationConstraint"], bucket_name, obj.key) r = requests.head(object_url) # skip directory redirect objects if "x-amz-ltd-redirect" in r.headers: continue for key, expected_value in expected_headers.items(): assert r.headers[key] == expected_value