示例#1
0
def verify_stack_status(stack_name, waiting_states, successful_states):
    """
    Wait for the stack creation to be completed and notify if the stack creation fails.

    :param stack_name: the stack name that we should verify
    :param waiting_states: list of status to wait for
    :param successful_states: list of final status considered as successful
    :return: True if the final status is in the successful_states list, False otherwise.
    """
    from pcluster.aws.aws_api import AWSApi  # pylint: disable=import-outside-toplevel

    status = AWSApi.instance().cfn.describe_stack(stack_name).get(
        "StackStatus")
    resource_status = ""
    while status in waiting_states:
        status = AWSApi.instance().cfn.describe_stack(stack_name).get(
            "StackStatus")
        events = AWSApi.instance().cfn.get_stack_events(
            stack_name)["StackEvents"][0]
        resource_status = ("Status: %s - %s" %
                           (events.get("LogicalResourceId"),
                            events.get("ResourceStatus"))).ljust(80)
        sys.stdout.write("\r%s" % resource_status)
        sys.stdout.flush()
        time.sleep(5)
    # print the last status update in the logs
    if resource_status != "":
        LOGGER.debug(resource_status)
    return status in successful_states
示例#2
0
def upload_archive(bucket: str, bucket_prefix: str, archive_path: str):
    archive_filename = os.path.basename(archive_path)
    with open(archive_path, "rb") as archive_file:
        archive_data = archive_file.read()
    AWSApi.instance().s3.put_object(bucket, archive_data,
                                    f"{bucket_prefix}/{archive_filename}")
    return f"s3://{bucket}/{bucket_prefix}/{archive_filename}"
 def delete_s3_artifacts(self):
     """Cleanup S3 bucket artifact directory."""
     LOGGER.debug(
         "Cleaning up S3 resources bucket_name=%s, service_name=%s, remove_artifact=%s",
         self.name,
         self._service_name,
         self._cleanup_on_deletion,
     )
     if self.artifact_directory and self._cleanup_on_deletion:
         try:
             LOGGER.info("Deleting artifacts under %s/%s", self.name,
                         self.artifact_directory)
             AWSApi.instance().s3_resource.delete_object(
                 bucket_name=self.name,
                 prefix=f"{self.artifact_directory}/")
             AWSApi.instance().s3_resource.delete_object_versions(
                 bucket_name=self.name,
                 prefix=f"{self.artifact_directory}/")
         except AWSClientError as e:
             LOGGER.warning(
                 "Failed to delete S3 artifact under %s/%s with error %s. Please delete them manually.",
                 self.name,
                 self.artifact_directory,
                 str(e),
             )
示例#4
0
    def delete(self, force=False):  # noqa: C901
        """Delete CFN Stack and associate resources and deregister the image."""
        if force or (not self._check_instance_using_image() and not self._check_image_is_shared()):
            try:
                if AWSApi.instance().cfn.stack_exists(self.image_id):
                    if self.stack.imagebuilder_image_is_building:
                        raise BadRequestImageBuilderActionError(
                            "Image cannot be deleted because EC2 ImageBuilder Image has a running workflow."
                        )
                    # Delete stack
                    AWSApi.instance().cfn.delete_stack(self.image_id)

                if AWSApi.instance().ec2.image_exists(image_id=self.image_id, build_status_avaliable=False):
                    # Deregister image
                    AWSApi.instance().ec2.deregister_image(self.image.id)

                    # Delete snapshot
                    for snapshot_id in self.image.snapshot_ids:
                        AWSApi.instance().ec2.delete_snapshot(snapshot_id)

                # Delete s3 image directory
                try:
                    self.bucket.check_bucket_exists()
                    self.bucket.delete_s3_artifacts()
                except AWSClientError:
                    logging.warning("S3 bucket %s does not exist, skip image s3 artifacts deletion.", self.bucket.name)

                # Delete log group
                try:
                    AWSApi.instance().logs.delete_log_group(self._log_group_name)
                except AWSClientError:
                    logging.warning("Unable to delete log group %s.", self._log_group_name)

            except (AWSClientError, ImageError) as e:
                raise _imagebuilder_error_mapper(e, f"Unable to delete image and stack, due to {str(e)}")
 def _ec2_run_instance(self, availability_zone: str,
                       **kwargs):  # noqa: C901 FIXME!!!
     """Wrap ec2 run_instance call. Useful since a successful run_instance call signals 'DryRunOperation'."""
     try:
         AWSApi.instance().ec2.run_instances(**kwargs)
     except AWSClientError as e:
         code = e.error_code
         message = str(e)
         subnet_id = kwargs["NetworkInterfaces"][0]["SubnetId"]
         if code == "UnsupportedOperation":
             if "does not support specifying CpuOptions" in message:
                 message.replace("specifying CpuOptions",
                                 "disabling simultaneous multithreading")
             self._add_failure(message, FailureLevel.ERROR)
         elif code == "InstanceLimitExceeded":
             self._add_failure(
                 "You've reached the limit on the number of instances you can run concurrently "
                 f"for the configured instance type. {message}",
                 FailureLevel.ERROR,
             )
         elif code == "InsufficientInstanceCapacity":
             self._add_failure(
                 f"There is not enough capacity to fulfill your request. {message}",
                 FailureLevel.ERROR)
         elif code == "InsufficientFreeAddressesInSubnet":
             self._add_failure(
                 "The specified subnet does not contain enough free private IP addresses "
                 f"to fulfill your request. {message}",
                 FailureLevel.ERROR,
             )
         elif code == "InvalidParameterCombination":
             if "associatePublicIPAddress" in message:
                 # Instances with multiple Network Interfaces cannot currently take public IPs.
                 # This check is meant to warn users about this problem until services are fixed.
                 self._add_failure(
                     f"The instance type {kwargs['InstanceType']} cannot take public IPs. "
                     f"Please make sure that the subnet with id '{subnet_id}' has the proper routing configuration "
                     "to allow private IPs reaching the Internet (e.g. a NAT Gateway and a valid route table).",
                     FailureLevel.WARNING,
                 )
         elif (code == "Unsupported"
               and availability_zone not in AWSApi.instance().ec2.
               get_supported_az_for_instance_type(kwargs["InstanceType"])):
             # If an availability zone without desired instance type is selected, error code is "Unsupported"
             # Therefore, we need to write our own code to tell the specific problem
             qualified_az = AWSApi.instance(
             ).ec2.get_supported_az_for_instance_type(
                 kwargs["InstanceType"])
             self._add_failure(
                 f"Your requested instance type ({kwargs['InstanceType']}) is not supported in the "
                 f"Availability Zone ({availability_zone}) of your requested subnet ({subnet_id}). "
                 f"Please retry your request by choosing a subnet in {qualified_az}. ",
                 FailureLevel.ERROR,
             )
         else:
             self._add_failure(
                 f"Unable to validate configuration parameters for instance type {kwargs['InstanceType']}. "
                 f"Please double check your cluster configuration. {message}",
                 FailureLevel.ERROR,
             )
 def upload_bootstrapped_file(self):
     """Upload bootstrapped file to identify bucket is configured successfully."""
     AWSApi.instance().s3.put_object(
         bucket_name=self.name,
         body="bucket is configured successfully.",
         key="/".join([self._root_directory, self._bootstrapped_file_name]),
     )
 def _upload_file(self,
                  content,
                  file_name,
                  file_type,
                  format=S3FileFormat.YAML):
     """Upload file to S3 bucket."""
     if format == S3FileFormat.YAML:
         result = AWSApi.instance().s3.put_object(
             bucket_name=self.name,
             body=yaml.dump(content),
             key=self.get_object_key(file_type, file_name),
         )
     elif format == S3FileFormat.JSON:
         result = AWSApi.instance().s3.put_object(
             bucket_name=self.name,
             body=json.dumps(content),
             key=self.get_object_key(file_type, file_name),
         )
     else:
         result = AWSApi.instance().s3.put_object(
             bucket_name=self.name,
             body=content,
             key=self.get_object_key(file_type, file_name),
         )
     return result
示例#8
0
 def _validate(self, security_group_ids: List[str]):
     if security_group_ids:
         for sg_id in security_group_ids:
             try:
                 AWSApi.instance().ec2.describe_security_group(sg_id)
             except AWSClientError as e:
                 self._add_failure(str(e), FailureLevel.ERROR)
 def check_bucket_is_bootstrapped(self):
     """Check bucket is configured successfully or not by bootstrapped file."""
     AWSApi.instance().s3.head_object(bucket_name=self.name,
                                      object_name="/".join([
                                          self._root_directory,
                                          self._bootstrapped_file_name
                                      ]))
def _get_supported_batch_instance_types():
    """
    Get the instance types supported by Batch in the desired region.

    This is done by calling Batch's CreateComputeEnvironment with a bad
    instance type and parsing the error message.
    """
    supported_instance_types = AWSApi.instance().ec2.list_instance_types()
    supported_instance_families = _get_instance_families_from_types(supported_instance_types)
    known_exceptions = ["optimal"]
    supported_instance_types_and_families = supported_instance_types + supported_instance_families + known_exceptions
    try:
        parsed_instance_types_and_families = AWSApi.instance().batch.get_supported_instance_types_and_families()
        if _batch_instance_types_and_families_are_supported(
            parsed_instance_types_and_families, supported_instance_types_and_families
        ):
            supported_batch_types = parsed_instance_types_and_families
        else:
            supported_batch_types = supported_instance_types_and_families
    except Exception as e:
        # When the instance types supported by Batch can't be parsed from an error message,
        # log the reason for the failure and return instead a list of all instance types
        # supported in the region.
        LOGGER.debug(
            "Failed to parse supported Batch instance types from a CreateComputeEnvironment error message: %s", e
        )
        supported_batch_types = supported_instance_types_and_families
    return supported_batch_types
示例#11
0
    def _validate(self, subnet_ids: List[str]):
        try:
            subnets = AWSApi.instance().ec2.describe_subnets(
                subnet_ids=subnet_ids)

            # Check all subnets are in the same VPC
            vpc_id = None
            for subnet in subnets:
                if vpc_id is None:
                    vpc_id = subnet["VpcId"]
                elif vpc_id != subnet["VpcId"]:
                    self._add_failure(
                        "Subnet {0} is not in VPC {1}. Please make sure all subnets are in the same VPC."
                        .format(subnet["SubnetId"], vpc_id),
                        FailureLevel.ERROR,
                    )

            # Check for DNS support in the VPC
            if not AWSApi.instance().ec2.is_enable_dns_support(vpc_id):
                self._add_failure(
                    f"DNS Support is not enabled in the VPC {vpc_id}.",
                    FailureLevel.ERROR)
            if not AWSApi.instance().ec2.is_enable_dns_hostnames(vpc_id):
                self._add_failure(
                    f"DNS Hostnames not enabled in the VPC {vpc_id}.",
                    FailureLevel.ERROR)

        except AWSClientError as e:
            self._add_failure(str(e), FailureLevel.ERROR)
示例#12
0
    def _validate(self, hosted_zone_id, cluster_vpc, cluster_name):
        if AWSApi.instance().route53.is_hosted_zone_private(hosted_zone_id):
            vpc_ids = AWSApi.instance().route53.get_hosted_zone_vpcs(hosted_zone_id)
            if cluster_vpc not in vpc_ids:
                self._add_failure(
                    f"Private Route53 hosted zone {hosted_zone_id} need to be associated with "
                    f"the VPC of the cluster: {cluster_vpc}. "
                    f"The VPCs associated with hosted zone are {vpc_ids}.",
                    FailureLevel.ERROR,
                )
        else:
            self._add_failure(
                f"Hosted zone {hosted_zone_id} cannot be used. "
                f"Public Route53 hosted zone is not officially supported by ParallelCluster.",
                FailureLevel.ERROR,
            )

        domain_name = AWSApi.instance().route53.get_hosted_zone_domain_name(hosted_zone_id)
        total_length = len(cluster_name) + len(domain_name)
        if total_length > CLUSTER_NAME_AND_CUSTOM_DOMAIN_NAME_MAX_LENGTH:
            self._add_failure(
                (
                    "Error: When specifying HostedZoneId, "
                    f"the total length of cluster name {cluster_name} and domain name {domain_name} can not be "
                    f"longer than {CLUSTER_NAME_AND_CUSTOM_DOMAIN_NAME_MAX_LENGTH} character, "
                    f"current length is {total_length}"
                ),
                FailureLevel.ERROR,
            )
示例#13
0
 def _validate(self, placement_group_id: str):
     if placement_group_id:
         try:
             AWSApi.instance().ec2.describe_placement_group(
                 placement_group_id)
         except AWSClientError as e:
             self._add_failure(str(e), FailureLevel.ERROR)
示例#14
0
    def __init__(self,
                 resource_id,
                 log_group_name,
                 bucket,
                 output_dir,
                 bucket_prefix=None,
                 keep_s3_objects=False):
        # check bucket
        bucket_region = AWSApi.instance().s3.get_bucket_region(
            bucket_name=bucket)
        if bucket_region != get_region():
            raise LogsExporterError(
                f"The bucket used for exporting logs must be in the same region as the {resource_id}. "
                f"The given resource is in {get_region()}, but the bucket's region is {bucket_region}."
            )
        self.bucket = bucket
        self.log_group_name = log_group_name
        self.output_dir = output_dir
        self.keep_s3_objects = keep_s3_objects

        if bucket_prefix:
            self.bucket_prefix = bucket_prefix
            self.delete_everything_under_prefix = False
        else:
            # If the default bucket prefix is being used and there's nothing underneath that prefix already
            # then we can delete everything under that prefix after downloading the data
            # (unless keep-s3-objects is specified)
            self.bucket_prefix = f"{resource_id}-logs-{datetime.datetime.now().strftime('%Y%m%d%H%M')}"
            self.delete_everything_under_prefix = AWSApi.instance(
            ).s3_resource.is_empty(bucket, self.bucket_prefix)
示例#15
0
 def execute(self,
             log_stream_prefix=None,
             start_time: datetime.datetime = None,
             end_time: datetime.datetime = None):
     """Start export task. Returns logs streams folder."""
     # Export logs to S3
     task_id = self._export_logs_to_s3(log_stream_prefix=log_stream_prefix,
                                       start_time=start_time,
                                       end_time=end_time)
     LOGGER.info("Log export task id: %s", task_id)
     # Download exported S3 objects to output dir subfolder
     try:
         log_streams_dir = os.path.join(self.output_dir, "cloudwatch-logs")
         self._download_s3_objects_with_prefix(task_id, log_streams_dir)
         LOGGER.info("Archive of CloudWatch logs saved to %s",
                     self.output_dir)
     except OSError:
         raise LogsExporterError(
             "Unable to download archive logs from S3, double check your filters are correct."
         )
     finally:
         if not self.keep_s3_objects:
             if self.delete_everything_under_prefix:
                 delete_key = self.bucket_prefix
             else:
                 delete_key = "/".join((self.bucket_prefix, task_id))
             LOGGER.debug(
                 "Cleaning up S3 bucket %s. Deleting all objects under %s",
                 self.bucket, delete_key)
             AWSApi.instance().s3_resource.delete_objects(
                 bucket_name=self.bucket, prefix=delete_key)
示例#16
0
    def list_log_streams(self, next_token: str = None):
        """
        List image builder's logs.

        :param next_token: Token for paginated requests.
        :returns ListLogsResponse
        """
        try:
            log_streams = []
            if AWSApi.instance().logs.log_group_exists(self._log_group_name):
                LOGGER.debug("Listing log streams from log group %s", self._log_group_name)
                log_stream_resp = AWSApi.instance().logs.describe_log_streams(
                    log_group_name=self._log_group_name, next_token=next_token
                )
                log_streams.extend(log_stream_resp["logStreams"])
                next_token = log_stream_resp.get("nextToken")
            else:
                LOGGER.debug("Log Group %s doesn't exist.", self._log_group_name)
                raise NotFoundImageBuilderActionError(
                    ("Unable to find image logs, please double check if image id=" f"{self.image_id} is correct.")
                )

            return LogStreams(log_streams, next_token)

        except AWSClientError as e:
            raise ImageBuilderActionError(f"Unexpected error when retrieving image's logs: {e}")
示例#17
0
    def _download_s3_objects_with_prefix(self, task_id, destdir):
        """Download all object in bucket with given prefix into destdir."""
        prefix = f"{self.bucket_prefix}/{task_id}"
        LOGGER.debug(
            "Downloading exported logs from s3 bucket %s (under key %s) to %s",
            self.bucket, prefix, destdir)
        for archive_object in AWSApi.instance().s3_resource.get_objects(
                bucket_name=self.bucket, prefix=prefix):
            decompressed_path = os.path.dirname(
                os.path.join(destdir, archive_object.key))
            decompressed_path = decompressed_path.replace(
                r"{unwanted_path_segment}{sep}".format(
                    unwanted_path_segment=prefix, sep=os.path.sep), "")
            compressed_path = f"{decompressed_path}.gz"

            LOGGER.debug("Downloading object with key=%s to %s",
                         archive_object.key, compressed_path)
            os.makedirs(os.path.dirname(compressed_path), exist_ok=True)
            AWSApi.instance().s3_resource.download_file(
                bucket_name=self.bucket,
                key=archive_object.key,
                output=compressed_path)

            # Create a decompressed copy of the downloaded archive and remove the original
            LOGGER.debug("Extracting object at %s to %s", compressed_path,
                         decompressed_path)
            with gzip.open(compressed_path) as gfile, open(
                    decompressed_path, "wb") as outfile:
                outfile.write(gfile.read())
            os.remove(compressed_path)
示例#18
0
 def _validate_instance_type(self, instance_type: str):
     if instance_type not in AWSApi.instance().ec2.list_instance_types():
         self._add_failure(
             f"The instance type '{instance_type}' is not supported.",
             FailureLevel.ERROR,
         )
         return []
     return AWSApi.instance().ec2.get_supported_architectures(instance_type)
示例#19
0
 def disable_awsbatch_compute_environment(self):
     """Disable AWS Batch compute environment."""
     LOGGER.info("Disabling AWS Batch compute environment : %s", self.name)
     try:
         AWSApi.instance().batch.disable_compute_environment(
             ce_name=self.stack.batch_compute_environment)
     except Exception as e:
         raise _cluster_error_mapper(
             e, f"Unable to disable Batch compute environment. {str(e)}")
示例#20
0
    def _validate_no_existing_image(self):
        """Validate that no existing image or stack with the same ImageBuilder image_id exists."""
        if AWSApi.instance().ec2.image_exists(self.image_id):
            raise ConflictImageBuilderActionError(f"ParallelCluster image {self.image_id} already exists.")

        if AWSApi.instance().cfn.stack_exists(self.image_id):
            raise ConflictImageBuilderActionError(
                f"ParallelCluster build infrastructure for image {self.image_id} already exists"
            )
示例#21
0
 def _validate(self, backup_id):
     if backup_id:
         try:
             AWSApi.instance().fsx.describe_backup(backup_id)
         except AWSClientError as e:
             self._add_failure(
                 "Failed to retrieve backup with Id '{0}': {1}".format(
                     backup_id, str(e)),
                 FailureLevel.ERROR,
             )
示例#22
0
 def _update_stack_template(self, template_url):
     """Update template of the running stack according to updated template."""
     try:
         AWSApi.instance().cfn.update_stack_from_url(
             self.stack_name, template_url)
         self._wait_for_stack_update()
     except AWSClientError as e:
         if "no updates are to be performed" in str(e).lower():
             return  # If updated_template was the same as the stack's current one, consider the update a success
         raise e
示例#23
0
    def _validate(self, url):

        if get_url_scheme(url) == "s3":
            try:
                bucket = get_bucket_name_from_s3_url(url)
                AWSApi.instance().s3.head_bucket(bucket_name=bucket)
            except AWSClientError as e:
                self._add_failure(str(e), FailureLevel.ERROR)
        else:
            self._add_failure(f"The value '{url}' is not a valid S3 URI.",
                              FailureLevel.ERROR)
    def _validate(self, file_system_id, head_node_subnet_id):
        try:

            # Check to see if there is any existing mt on the fs
            file_system = AWSApi.instance().fsx.get_filesystem_info(
                file_system_id).file_system_data

            vpc_id = AWSApi.instance().ec2.get_subnet_vpc(head_node_subnet_id)

            # Check to see if fs is in the same VPC as the stack
            if file_system.get("VpcId") != vpc_id:
                self._add_failure(
                    "Currently only support using FSx file system that is in the same VPC as the cluster. "
                    "The file system provided is in {0}.".format(
                        file_system.get("VpcId")),
                    FailureLevel.ERROR,
                )

            # If there is an existing mt in the az, need to check the inbound and outbound rules of the security groups
            network_interface_ids = file_system.get("NetworkInterfaceIds")
            if not network_interface_ids:
                self._add_failure(
                    "Unable to validate FSx security groups. The given FSx file system '{0}' doesn't have "
                    "Elastic Network Interfaces attached to it.".format(
                        file_system_id),
                    FailureLevel.ERROR,
                )
            else:
                network_interface_responses = AWSApi.instance(
                ).ec2.describe_network_interfaces(network_interface_ids)

                fs_access = False
                network_interfaces = [
                    ni for ni in network_interface_responses
                    if ni.get("VpcId") == vpc_id
                ]
                for network_interface in network_interfaces:
                    # Get list of security group IDs
                    sg_ids = [
                        sg.get("GroupId")
                        for sg in network_interface.get("Groups")
                    ]
                    if _check_in_out_access(sg_ids, port=988):
                        fs_access = True
                        break
                if not fs_access:
                    self._add_failure(
                        "The current security group settings on file system '{0}' does not satisfy mounting requirement"
                        ". The file system must be associated to a security group that allows inbound and outbound "
                        "TCP traffic through port 988.".format(file_system_id),
                        FailureLevel.ERROR,
                    )
        except AWSClientError as e:
            self._add_failure(str(e), FailureLevel.ERROR)
示例#25
0
    def get_log_events(
        self,
        log_stream_name: str,
        start_time: datetime = None,
        end_time: datetime = None,
        start_from_head: bool = False,
        limit: int = None,
        next_token: str = None,
    ):
        """
        Get the log stream events.

        :param log_stream_name: Log stream name
        :param start_time: Start time of interval of interest for log events. ISO 8601 format: YYYY-MM-DDThh:mm:ssTZD
        :param end_time: End time of interval of interest for log events. ISO 8601 format: YYYY-MM-DDThh:mm:ssTZD
        :param start_from_head: If the value is true, the earliest log events are returned first.
            If the value is false, the latest log events are returned first. The default value is false.
        :param limit: The maximum number of log events returned. If you don't specify a value,
            the maximum is as many log events as can fit in a response size of 1 MB, up to 10,000 log events.
        :param next_token: Token for paginated requests.
        """
        if not AWSApi.instance().cfn.stack_exists(self.stack_name):
            raise NotFoundClusterActionError(
                f"Cluster {self.name} does not exist.")

        try:
            log_events_response = AWSApi.instance().logs.get_log_events(
                log_group_name=self.stack.log_group_name,
                log_stream_name=log_stream_name,
                end_time=datetime_to_epoch(end_time) if end_time else None,
                start_time=datetime_to_epoch(start_time)
                if start_time else None,
                limit=limit,
                start_from_head=start_from_head,
                next_token=next_token,
            )

            return LogStream(self.stack_name, log_stream_name,
                             log_events_response)
        except AWSClientError as e:
            if e.message.startswith("The specified log group"):
                LOGGER.debug("Log Group %s doesn't exist.",
                             self.stack.log_group_name)
                raise NotFoundClusterActionError(
                    f"CloudWatch logging is not enabled for cluster {self.name}."
                )
            if e.message.startswith("The specified log stream"):
                LOGGER.debug("Log Stream %s doesn't exist.", log_stream_name)
                raise NotFoundClusterActionError(
                    f"The specified log stream {log_stream_name} does not exist."
                )
            raise _cluster_error_mapper(
                e, f"Unexpected error when retrieving log events: {e}.")
示例#26
0
 def _validate(self, key_name: str):
     if key_name:
         try:
             AWSApi.instance().ec2.describe_key_pair(key_name)
         except AWSClientError as e:
             self._add_failure(str(e), FailureLevel.ERROR)
     else:
         self._add_failure(
             "If you do not specify a key pair, you can't connect to the instance unless you choose an AMI "
             "that is configured to allow users another way to log in",
             FailureLevel.WARNING,
         )
示例#27
0
    def create(
        self,
        disable_rollback: bool = True,
        validator_suppressors: Set[ValidatorSuppressor] = None,
        validation_failure_level: FailureLevel = FailureLevel.ERROR,
    ):
        """Create the CFN Stack and associate resources."""
        suppressed_validation_failures = self.validate_create_request(validator_suppressors, validation_failure_level)

        # Generate artifact directory for image
        self._generate_artifact_dir()

        creation_result = None
        artifacts_uploaded = False
        try:
            self._upload_config()

            LOGGER.info("Building ParallelCluster image: %s", self.image_id)

            # Generate cdk cfn template
            self.template_body = CDKTemplateBuilder().build_imagebuilder_template(
                image_config=self.config, image_id=self.image_id, bucket=self.bucket
            )

            # upload generated template
            self._upload_artifacts()
            artifacts_uploaded = True

            # Stack creation
            creation_result = AWSApi.instance().cfn.create_stack_from_url(
                stack_name=self.image_id,
                template_url=self.bucket.get_cfn_template_url(
                    template_name=self._s3_artifacts_dict.get("template_name")
                ),
                disable_rollback=disable_rollback,
                tags=self._get_cfn_tags(),
                capabilities="CAPABILITY_NAMED_IAM",
            )

            self.__stack = ImageBuilderStack(AWSApi.instance().cfn.describe_stack(self.image_id))

            LOGGER.debug("StackId: %s", self.stack.id)
            LOGGER.info("Status: %s", self.stack.status)

            return suppressed_validation_failures

        except Exception as e:
            LOGGER.critical(e)
            if not creation_result and artifacts_uploaded:
                # Cleanup S3 artifacts if stack is not created yet
                self.bucket.delete_s3_artifacts()
            raise _imagebuilder_error_mapper(e, f"ParallelCluster image build infrastructure creation failed.\n{e}")
示例#28
0
def export_stack_events(stack_name: str, output_file: str):
    """Save CFN stack events into a file."""
    stack_events = []
    chunk = AWSApi.instance().cfn.get_stack_events(stack_name)
    stack_events.append(chunk["StackEvents"])
    while chunk.get("nextToken"):
        chunk = AWSApi.instance().cfn.get_stack_events(
            stack_name, next_token=chunk["nextToken"])
        stack_events.append(chunk["StackEvents"])

    with open(output_file, "w", encoding="utf-8") as cfn_events_file:
        cfn_events_file.write(
            json.dumps(stack_events, cls=JSONEncoder, indent=2))
示例#29
0
    def get_stack_events(self, next_token: str = None):
        """
        Get the CloudFormation stack events for the cluster.

        :param next_token Start from next_token if provided.
        """
        try:
            if not AWSApi.instance().cfn.stack_exists(self.stack_name):
                raise NotFoundClusterActionError(
                    f"Cluster {self.name} does not exist.")
            return AWSApi.instance().cfn.get_stack_events(
                self.stack_name, next_token=next_token)
        except AWSClientError as e:
            raise _cluster_error_mapper(
                e, f"Unexpected error when retrieving stack events: {e}")
示例#30
0
 def _validate(self, bucket):
     try:
         AWSApi.instance().s3.head_bucket(bucket_name=bucket)
         # Check versioning is enabled on the bucket
         bucket_versioning_status = AWSApi.instance(
         ).s3.get_bucket_versioning_status(bucket)
         if bucket_versioning_status != "Enabled":
             self._add_failure(
                 "The S3 bucket {0} specified cannot be used by cluster "
                 "because versioning setting is: {1}, not 'Enabled'. Please enable bucket versioning."
                 .format(bucket, bucket_versioning_status),
                 FailureLevel.ERROR,
             )
     except AWSClientError as e:
         self._add_failure(str(e), FailureLevel.ERROR)