Exemplo n.º 1
0
    def __init_sections_from_cfn(self, cluster_name):
        try:
            self.cfn_stack = get_stack(get_stack_name(cluster_name))
            if self.__enforce_version and get_stack_version(
                    self.cfn_stack) != get_installed_version():
                self.error(
                    "The cluster {0} was created with a different version of ParallelCluster: {1}. "
                    "Installed version is {2}. This operation may only be performed using the same ParallelCluster "
                    "version used to create the cluster.".format(
                        cluster_name, get_stack_version(self.cfn_stack),
                        get_installed_version()))

            cfn_params = self.cfn_stack.get("Parameters")
            json_params = self.__load_json_config(
                self.cfn_stack) if not self.__skip_load_json_config else None
            cfn_tags = self.cfn_stack.get("Tags")

            # Infer cluster model and load cluster section accordingly
            cluster_model = infer_cluster_model(cfn_stack=self.cfn_stack)
            section = ClusterCfnSection(section_definition=cluster_model.
                                        get_cluster_section_definition(),
                                        pcluster_config=self)

            self.add_section(section)

            section.from_storage(StorageData(cfn_params, json_params,
                                             cfn_tags))

        except ClientError as e:
            self.error(
                "Unable to retrieve the configuration of the cluster '{0}'.\n{1}"
                .format(cluster_name,
                        e.response.get("Error").get("Message")))
Exemplo n.º 2
0
    def __retrieve_cluster_config(self, bucket, artifact_directory):
        table = boto3.resource("dynamodb").Table(
            get_stack_name(self.cluster_name))
        config_version = None  # Use latest if not found
        try:
            config_version_item = table.get_item(ConsistentRead=True,
                                                 Key={"Id": "CLUSTER_CONFIG"})
            if config_version_item or "Item" in config_version_item:
                config_version = config_version_item["Item"].get("Version")
        except Exception as e:
            self.error(
                "Failed when retrieving cluster config version from DynamoDB with error {0}"
                .format(e))

        try:
            config_version_args = {
                "VersionId": config_version
            } if config_version else {}
            s3_object = boto3.resource("s3").Object(
                bucket, "{prefix}/configs/cluster-config.json".format(
                    prefix=artifact_directory))
            json_str = s3_object.get(
                **config_version_args)["Body"].read().decode("utf-8")
            return json.loads(json_str, object_pairs_hook=OrderedDict)
        except Exception as e:
            self.error(
                "Unable to load configuration from bucket '{bucket}/{prefix}'.\n{error}"
                .format(bucket=bucket, prefix=artifact_directory, error=e))
Exemplo n.º 3
0
def start(args):
    """Restore ASG limits or awsbatch CE to min/max/desired."""
    stack_name = utils.get_stack_name(args.cluster_name)
    pcluster_config = PclusterConfig(config_file=args.config_file,
                                     cluster_name=args.cluster_name)
    cluster_section = pcluster_config.get_section("cluster")

    if cluster_section.get_param_value("scheduler") == "awsbatch":
        LOGGER.info("Enabling AWS Batch compute environment : %s",
                    args.cluster_name)
        max_vcpus = cluster_section.get_param_value("max_vcpus")
        desired_vcpus = cluster_section.get_param_value("desired_vcpus")
        min_vcpus = cluster_section.get_param_value("min_vcpus")
        ce_name = _get_batch_ce(stack_name)
        _start_batch_ce(ce_name=ce_name,
                        min_vcpus=min_vcpus,
                        desired_vcpus=desired_vcpus,
                        max_vcpus=max_vcpus)
    else:
        LOGGER.info("Starting compute fleet : %s", args.cluster_name)
        max_queue_size = cluster_section.get_param_value("max_queue_size")
        min_desired_size = (
            cluster_section.get_param_value("initial_queue_size")
            if cluster_section.get_param_value("maintain_initial_size") else 0)
        asg_name = _get_asg_name(stack_name)
        _set_asg_limits(asg_name=asg_name,
                        min=min_desired_size,
                        max=max_queue_size,
                        desired=min_desired_size)
Exemplo n.º 4
0
def status(args):  # noqa: C901 FIXME!!!
    stack_name = utils.get_stack_name(args.cluster_name)

    # Parse configuration file to read the AWS section
    PclusterConfig.init_aws(config_file=args.config_file)

    cfn = boto3.client("cloudformation")
    try:
        stack = utils.get_stack(stack_name, cfn)
        sys.stdout.write("\rStatus: %s" % stack.get("StackStatus"))
        sys.stdout.flush()
        if not args.nowait:
            while stack.get("StackStatus") not in [
                "CREATE_COMPLETE",
                "UPDATE_COMPLETE",
                "UPDATE_ROLLBACK_COMPLETE",
                "ROLLBACK_COMPLETE",
                "CREATE_FAILED",
                "DELETE_FAILED",
            ]:
                time.sleep(5)
                stack = utils.get_stack(stack_name, cfn)
                events = utils.get_stack_events(stack_name)[0]
                resource_status = (
                    "Status: %s - %s" % (events.get("LogicalResourceId"), events.get("ResourceStatus"))
                ).ljust(80)
                sys.stdout.write("\r%s" % resource_status)
                sys.stdout.flush()
            sys.stdout.write("\rStatus: %s\n" % stack.get("StackStatus"))
            sys.stdout.flush()
            if stack.get("StackStatus") in ["CREATE_COMPLETE", "UPDATE_COMPLETE", "UPDATE_ROLLBACK_COMPLETE"]:
                state = _poll_head_node_state(stack_name)
                if state == "running":
                    _print_stack_outputs(stack)
                _print_compute_fleet_status(args.cluster_name, stack)
            elif stack.get("StackStatus") in ["ROLLBACK_COMPLETE", "CREATE_FAILED", "DELETE_FAILED"]:
                events = utils.get_stack_events(stack_name)
                for event in events:
                    if event.get("ResourceStatus") in ["CREATE_FAILED", "DELETE_FAILED", "UPDATE_FAILED"]:
                        LOGGER.info(
                            "%s %s %s %s %s",
                            event.get("Timestamp"),
                            event.get("ResourceStatus"),
                            event.get("ResourceType"),
                            event.get("LogicalResourceId"),
                            event.get("ResourceStatusReason"),
                        )
        else:
            sys.stdout.write("\n")
            sys.stdout.flush()
    except ClientError as e:
        LOGGER.critical(e.response.get("Error").get("Message"))
        sys.stdout.flush()
        sys.exit(1)
    except KeyboardInterrupt:
        LOGGER.info("\nExiting...")
        sys.exit(0)
Exemplo n.º 5
0
 def start(self, args, pcluster_config):
     """Start the compute fleet."""
     LOGGER.info("Enabling AWS Batch compute environment : %s", args.cluster_name)
     stack_name = utils.get_stack_name(args.cluster_name)
     cluster_section = pcluster_config.get_section("cluster")
     max_vcpus = cluster_section.get_param_value("max_vcpus")
     desired_vcpus = cluster_section.get_param_value("desired_vcpus")
     min_vcpus = cluster_section.get_param_value("min_vcpus")
     ce_name = utils.get_batch_ce(stack_name)
     self._start_batch_ce(ce_name=ce_name, min_vcpus=min_vcpus, desired_vcpus=desired_vcpus, max_vcpus=max_vcpus)
Exemplo n.º 6
0
def delete(args):
    saw_update = False
    LOGGER.info("Deleting: %s", args.cluster_name)
    stack_name = utils.get_stack_name(args.cluster_name)

    # Parse configuration file to read the AWS section
    PclusterConfig.init_aws(config_file=args.config_file)

    cfn = boto3.client("cloudformation")
    try:
        # delete_stack does not raise an exception if stack does not exist
        # Use describe_stacks to explicitly check if the stack exists
        cfn.describe_stacks(StackName=stack_name)
        cfn.delete_stack(StackName=stack_name)
        saw_update = True
        stack_status = utils.get_stack(stack_name, cfn).get("StackStatus")
        sys.stdout.write("\rStatus: %s" % stack_status)
        sys.stdout.flush()
        LOGGER.debug("Status: %s", stack_status)
        if not args.nowait:
            while stack_status == "DELETE_IN_PROGRESS":
                time.sleep(5)
                stack_status = utils.get_stack(stack_name,
                                               cfn).get("StackStatus")
                events = cfn.describe_stack_events(
                    StackName=stack_name).get("StackEvents")[0]
                resource_status = ("Status: %s - %s" %
                                   (events.get("LogicalResourceId"),
                                    events.get("ResourceStatus"))).ljust(80)
                sys.stdout.write("\r%s" % resource_status)
                sys.stdout.flush()
            sys.stdout.write("\rStatus: %s\n" % stack_status)
            sys.stdout.flush()
            LOGGER.debug("Status: %s", stack_status)
        else:
            sys.stdout.write("\n")
            sys.stdout.flush()
        if stack_status == "DELETE_FAILED":
            LOGGER.info(
                "Cluster did not delete successfully. Run 'pcluster delete %s' again",
                args.cluster_name)
    except ClientError as e:
        if e.response.get("Error").get("Message").endswith("does not exist"):
            if saw_update:
                LOGGER.info("\nCluster deleted successfully.")
                sys.exit(0)
        LOGGER.critical(e.response.get("Error").get("Message"))
        sys.stdout.flush()
        sys.exit(1)
    except KeyboardInterrupt:
        LOGGER.info("\nExiting...")
        sys.exit(0)
Exemplo n.º 7
0
def _delete_cluster(cluster_name, nowait):
    """Delete cluster described by cluster_name."""
    cfn = boto3.client("cloudformation")
    saw_update = False
    terminate_compute_fleet = not nowait
    stack_name = utils.get_stack_name(cluster_name)
    try:
        # delete_stack does not raise an exception if stack does not exist
        # Use describe_stacks to explicitly check if the stack exists
        cfn.delete_stack(StackName=stack_name)
        saw_update = True
        stack_status = utils.get_stack(stack_name, cfn).get("StackStatus")
        sys.stdout.write("\rStatus: %s" % stack_status)
        sys.stdout.flush()
        LOGGER.debug("Status: %s", stack_status)
        if not nowait:
            while stack_status == "DELETE_IN_PROGRESS":
                time.sleep(5)
                stack_status = utils.get_stack(
                    stack_name, cfn, raise_on_error=True).get("StackStatus")
                events = utils.get_stack_events(stack_name,
                                                raise_on_error=True)[0]
                resource_status = ("Status: %s - %s" %
                                   (events.get("LogicalResourceId"),
                                    events.get("ResourceStatus"))).ljust(80)
                sys.stdout.write("\r%s" % resource_status)
                sys.stdout.flush()
            sys.stdout.write("\rStatus: %s\n" % stack_status)
            sys.stdout.flush()
            LOGGER.debug("Status: %s", stack_status)
        else:
            sys.stdout.write("\n")
            sys.stdout.flush()
        if stack_status == "DELETE_FAILED":
            LOGGER.info(
                "Cluster did not delete successfully. Run 'pcluster delete %s' again",
                cluster_name)
    except ClientError as e:
        if e.response.get("Error").get("Message").endswith("does not exist"):
            if saw_update:
                LOGGER.info("\nCluster deleted successfully.")
                sys.exit(0)
        LOGGER.critical(e.response.get("Error").get("Message"))
        sys.stdout.flush()
        sys.exit(1)
    except KeyboardInterrupt:
        terminate_compute_fleet = False
        LOGGER.info("\nExiting...")
        sys.exit(0)
    finally:
        if terminate_compute_fleet:
            _terminate_cluster_nodes(stack_name)
Exemplo n.º 8
0
 def start(self, args, pcluster_config):
     """Start the compute fleet."""
     LOGGER.info("Starting compute fleet: %s", args.cluster_name)
     cluster_section = pcluster_config.get_section("cluster")
     stack_name = utils.get_stack_name(args.cluster_name)
     max_queue_size = cluster_section.get_param_value("max_queue_size")
     min_desired_size = (
         cluster_section.get_param_value("initial_queue_size")
         if cluster_section.get_param_value("maintain_initial_size")
         else 0
     )
     asg_name = utils.get_asg_name(stack_name)
     utils.set_asg_limits(asg_name=asg_name, min=min_desired_size, max=max_queue_size, desired=min_desired_size)
Exemplo n.º 9
0
    def __init_sections_from_cfn(self, cluster_name):
        try:
            stack = get_stack(get_stack_name(cluster_name))

            section_type = CLUSTER.get("type")
            section = section_type(section_definition=CLUSTER,
                                   pcluster_config=self).from_cfn_params(
                                       cfn_params=stack.get("Parameters", []))
            self.add_section(section)
        except ClientError as e:
            self.error(
                "Unable to retrieve the configuration of the cluster '{0}'.\n{1}"
                .format(cluster_name,
                        e.response.get("Error").get("Message")))
Exemplo n.º 10
0
def delete(args):
    PclusterConfig.init_aws(config_file=args.config_file)
    LOGGER.info("Deleting: %s", args.cluster_name)
    stack_name = utils.get_stack_name(args.cluster_name)
    if not utils.stack_exists(stack_name):
        if args.keep_logs:
            utils.warn(
                "Stack for {0} does not exist. Cannot prevent its log groups from being deleted."
                .format(args.cluster_name))
        utils.warn("Cluster {0} has already been deleted.".format(
            args.cluster_name))
        sys.exit(0)
    elif args.keep_logs:
        _persist_cloudwatch_log_groups(args.cluster_name)
    _delete_cluster(args.cluster_name, args.nowait)
Exemplo n.º 11
0
def stop(args):
    """Set ASG limits or awsbatch ce to min/max/desired = 0/0/0."""
    stack_name = utils.get_stack_name(args.cluster_name)
    pcluster_config = PclusterConfig(config_file=args.config_file,
                                     cluster_name=args.cluster_name)
    cluster_section = pcluster_config.get_section("cluster")

    if cluster_section.get_param_value("scheduler") == "awsbatch":
        LOGGER.info("Disabling AWS Batch compute environment : %s",
                    args.cluster_name)
        ce_name = _get_batch_ce(stack_name)
        _stop_batch_ce(ce_name=ce_name)
    else:
        LOGGER.info("Stopping compute fleet : %s", args.cluster_name)
        asg_name = _get_asg_name(stack_name)
        _set_asg_limits(asg_name=asg_name, min=0, max=0, desired=0)
Exemplo n.º 12
0
def instances(args):
    stack_name = utils.get_stack_name(args.cluster_name)
    pcluster_config = PclusterConfig(config_file=args.config_file,
                                     cluster_name=args.cluster_name)
    cluster_section = pcluster_config.get_section("cluster")

    instances = []
    instances.extend(_get_ec2_instances(stack_name))

    if cluster_section.get_param_value("scheduler") != "awsbatch":
        instances.extend(_get_asg_instances(stack_name))

    for instance in instances:
        LOGGER.info("%s         %s", instance[0], instance[1])

    if cluster_section.get_param_value("scheduler") == "awsbatch":
        LOGGER.info(
            "Run 'awsbhosts --cluster %s' to list the compute instances",
            args.cluster_name)
Exemplo n.º 13
0
def dcv_connect(args):
    """
    Execute pcluster dcv connect command.

    :param args: pcluster cli arguments.
    """
    # Parse configuration file to read the AWS section
    PclusterConfig.init_aws(
    )  # FIXME it always searches for the default configuration file

    # Prepare ssh command to execute in the head node instance
    stack = get_stack(get_stack_name(args.cluster_name))
    shared_dir = get_cfn_param(stack.get("Parameters"), "SharedDir")
    head_node_ip, username = get_head_node_ip_and_username(args.cluster_name)
    cmd = 'ssh {CFN_USER}@{HEAD_NODE_IP} {KEY} "{REMOTE_COMMAND} {DCV_SHARED_DIR}"'.format(
        CFN_USER=username,
        HEAD_NODE_IP=head_node_ip,
        KEY="-i {0}".format(args.key_path) if args.key_path else "",
        REMOTE_COMMAND=DCV_CONNECT_SCRIPT,
        DCV_SHARED_DIR=shared_dir,
    )

    try:
        url = retry(_retrieve_dcv_session_url,
                    func_args=[cmd, args.cluster_name, head_node_ip],
                    attempts=4)
        url_message = "Please use the following one-time URL in your browser within 30 seconds:\n{0}".format(
            url)
    except DCVConnectionError as e:
        error("Something went wrong during DCV connection.\n{0}"
              "Please check the logs in the /var/log/parallelcluster/ folder "
              "of the head node and submit an issue {1}\n".format(
                  e, PCLUSTER_ISSUES_LINK))

    if args.show_url:
        LOGGER.info(url_message)
        return

    try:
        if not webbrowser.open_new(url):
            raise webbrowser.Error("Unable to open the Web browser.")
    except webbrowser.Error as e:
        LOGGER.info("{0}\n{1}".format(e, url_message))
Exemplo n.º 14
0
def instances(args):
    stack_name = utils.get_stack_name(args.cluster_name)
    PclusterConfig.init_aws(config_file=args.config_file)
    cfn_stack = utils.get_stack(stack_name)
    scheduler = utils.get_cfn_param(cfn_stack.get("Parameters"), "Scheduler")

    instances = []
    head_node_server = utils.describe_cluster_instances(stack_name, node_type=utils.NodeType.head_node)
    if head_node_server:
        instances.append(("MasterServer", head_node_server[0].get("InstanceId")))

    if scheduler != "awsbatch":
        instances.extend(_get_compute_instances(stack_name))

    for instance in instances:
        LOGGER.info("%s         %s", instance[0], instance[1])

    if scheduler == "awsbatch":
        LOGGER.info("Run 'awsbhosts --cluster %s' to list the compute instances", args.cluster_name)
Exemplo n.º 15
0
def dcv_connect(args):
    """
    Execute pcluster dcv connect command.

    :param args: pcluster cli arguments.
    """
    # Parse configuration file to read the AWS section
    PclusterConfig.init_aws(
    )  # FIXME it always searches for the default configuration file

    # Prepare ssh command to execute in the master instance
    stack = get_stack(get_stack_name(args.cluster_name))
    shared_dir = get_cfn_param(stack.get("Parameters"), "SharedDir")
    master_ip, username = get_master_ip_and_username(args.cluster_name)
    cmd = 'ssh {CFN_USER}@{MASTER_IP} {KEY} "{REMOTE_COMMAND} {DCV_SHARED_DIR}"'.format(
        CFN_USER=username,
        MASTER_IP=master_ip,
        KEY="-i {0}".format(args.key_path) if args.key_path else "",
        REMOTE_COMMAND=DCV_CONNECT_SCRIPT,
        DCV_SHARED_DIR=shared_dir,
    )

    # Connect by ssh to the master instance and prepare DCV session
    try:
        LOGGER.debug("SSH command: {0}".format(cmd))
        output = _check_command_output(cmd)
        # At first ssh connection, the ssh command alerts it is adding the host to the known hosts list
        if re.search("Permanently added .* to the list of known hosts.",
                     output):
            output = _check_command_output(cmd)

        dcv_parameters = re.search(
            r"PclusterDcvServerPort=([\d]+) PclusterDcvSessionId=([\w]+) PclusterDcvSessionToken=([\w-]+)",
            output)
        if dcv_parameters:
            dcv_server_port = dcv_parameters.group(1)
            dcv_session_id = dcv_parameters.group(2)
            dcv_session_token = dcv_parameters.group(3)
        else:
            error(
                "Something went wrong during DCV connection. Please manually execute the command:\n{0}\n"
                "If the problem persists, please check the logs in the /var/log/parallelcluster/ folder "
                "of the master instance and submit an issue {1}.".format(
                    cmd, PCLUSTER_ISSUES_LINK))

    except sub.CalledProcessError as e:
        if "{0}: No such file or directory".format(
                DCV_CONNECT_SCRIPT) in e.output:
            error(
                "The cluster {0} has been created with an old version of ParallelCluster "
                "without the DCV support.".format(args.cluster_name))
        else:
            error("Something went wrong during DCV connection.\n{0}".format(
                e.output))

    # Open web browser
    url = "https://{IP}:{PORT}?authToken={TOKEN}#{SESSION_ID}".format(
        IP=master_ip,
        PORT=dcv_server_port,
        TOKEN=dcv_session_token,
        SESSION_ID=dcv_session_id)
    try:
        webbrowser.open_new(url)
    except webbrowser.Error:
        LOGGER.info(
            "Unable to open the Web browser. "
            "Please use the following URL in your browser within 30 seconds:\n{0}"
            .format(url))
Exemplo n.º 16
0
def create(args):  # noqa: C901 FIXME!!!
    LOGGER.info("Beginning cluster creation for cluster: %s",
                args.cluster_name)
    LOGGER.debug("Building cluster config based on args %s", str(args))

    # Build the config based on args
    pcluster_config = PclusterConfig(config_file=args.config_file,
                                     cluster_label=args.cluster_template,
                                     fail_on_file_absence=True)
    pcluster_config.validate()
    # get CFN parameters, template url and tags from config
    cluster_section = pcluster_config.get_section("cluster")
    cfn_params = pcluster_config.to_cfn()

    _check_for_updates(pcluster_config)

    batch_temporary_bucket = None
    try:
        cfn_client = boto3.client("cloudformation")
        stack_name = utils.get_stack_name(args.cluster_name)

        # If scheduler is awsbatch create bucket with resources
        if cluster_section.get_param_value("scheduler") == "awsbatch":
            batch_resources = pkg_resources.resource_filename(
                __name__, "resources/batch")
            batch_temporary_bucket = _create_bucket_with_batch_resources(
                stack_name=stack_name,
                resources_dir=batch_resources,
                region=pcluster_config.region)
            cfn_params["ResourcesS3Bucket"] = batch_temporary_bucket

        LOGGER.info("Creating stack named: %s", stack_name)
        LOGGER.debug(cfn_params)

        # determine the CloudFormation Template URL to use
        template_url = _evaluate_pcluster_template_url(
            pcluster_config, preferred_template_url=args.template_url)

        # merge tags from configuration, command-line and internal ones
        tags = _evaluate_tags(pcluster_config, preferred_tags=args.tags)

        # append extra parameters from command-line
        if args.extra_parameters:
            LOGGER.debug("Adding extra parameters to the CFN parameters")
            cfn_params.update(dict(args.extra_parameters))

        # prepare input parameters for stack creation and create the stack
        LOGGER.debug(cfn_params)
        params = [{
            "ParameterKey": key,
            "ParameterValue": value
        } for key, value in cfn_params.items()]
        stack = cfn_client.create_stack(
            StackName=stack_name,
            TemplateURL=template_url,
            Parameters=params,
            Capabilities=["CAPABILITY_IAM"],
            DisableRollback=args.norollback,
            Tags=tags,
        )
        LOGGER.debug("StackId: %s", stack.get("StackId"))

        if not args.nowait:
            utils.verify_stack_creation(stack_name, cfn_client)
            LOGGER.info("")
            result_stack = utils.get_stack(stack_name, cfn_client)
            _print_stack_outputs(result_stack)
        else:
            stack_status = utils.get_stack(stack_name,
                                           cfn_client).get("StackStatus")
            LOGGER.info("Status: %s", stack_status)
    except ClientError as e:
        LOGGER.critical(e.response.get("Error").get("Message"))
        sys.stdout.flush()
        if batch_temporary_bucket:
            utils.delete_s3_bucket(bucket_name=batch_temporary_bucket)
        sys.exit(1)
    except KeyboardInterrupt:
        LOGGER.info("\nExiting...")
        sys.exit(0)
    except KeyError as e:
        LOGGER.critical("ERROR: KeyError - reason:")
        LOGGER.critical(e)
        if batch_temporary_bucket:
            utils.delete_s3_bucket(bucket_name=batch_temporary_bucket)
        sys.exit(1)
    except Exception as e:
        LOGGER.critical(e)
        if batch_temporary_bucket:
            utils.delete_s3_bucket(bucket_name=batch_temporary_bucket)
        sys.exit(1)
Exemplo n.º 17
0
 def stack_name(self):
     """Get the name of the stack this patch is referred to."""
     return (utils.get_stack_name(self.base_config.cluster_name) if hasattr(
         self.base_config, "cluster_name") else None)
Exemplo n.º 18
0
 def stop(self, args, pcluster_config):
     """Stop the compute fleet."""
     LOGGER.info("Stopping compute fleet: %s", args.cluster_name)
     stack_name = utils.get_stack_name(args.cluster_name)
     asg_name = utils.get_asg_name(stack_name)
     utils.set_asg_limits(asg_name=asg_name, min=0, max=0, desired=0)
Exemplo n.º 19
0
def create(args):  # noqa: C901 FIXME!!!
    LOGGER.info("Beginning cluster creation for cluster: %s", args.cluster_name)
    LOGGER.debug("Building cluster config based on args %s", str(args))

    _validate_cluster_name(args.cluster_name)

    # Build the config based on args
    pcluster_config = PclusterConfig(
        config_file=args.config_file, cluster_label=args.cluster_template, fail_on_file_absence=True
    )
    pcluster_config.validate()

    # Automatic SIT -> HIT conversion, if needed
    HitConverter(pcluster_config).convert()

    # get CFN parameters, template url and tags from config
    storage_data = pcluster_config.to_storage()
    cfn_params = storage_data.cfn_params

    _check_for_updates(pcluster_config)

    bucket_name = None
    artifact_directory = None
    cleanup_bucket = False
    try:
        cfn_client = boto3.client("cloudformation")
        stack_name = utils.get_stack_name(args.cluster_name)

        # merge tags from configuration, command-line and internal ones
        tags = _evaluate_tags(pcluster_config, preferred_tags=args.tags)

        bucket_name, artifact_directory, cleanup_bucket = _setup_bucket_with_resources(
            pcluster_config, storage_data, stack_name, tags
        )
        cfn_params["ResourcesS3Bucket"] = bucket_name
        cfn_params["ArtifactS3RootDirectory"] = artifact_directory
        cfn_params["RemoveBucketOnDeletion"] = str(cleanup_bucket)

        LOGGER.info("Creating stack named: %s", stack_name)

        # determine the CloudFormation Template URL to use
        template_url = evaluate_pcluster_template_url(pcluster_config, preferred_template_url=args.template_url)

        # append extra parameters from command-line
        if args.extra_parameters:
            LOGGER.debug("Adding extra parameters to the CFN parameters")
            cfn_params.update(dict(args.extra_parameters))

        # prepare input parameters for stack creation and create the stack
        LOGGER.debug(cfn_params)
        params = [{"ParameterKey": key, "ParameterValue": value} for key, value in cfn_params.items()]
        stack = cfn_client.create_stack(
            StackName=stack_name,
            TemplateURL=template_url,
            Parameters=params,
            Capabilities=["CAPABILITY_IAM"],
            DisableRollback=args.norollback,
            Tags=tags,
        )
        LOGGER.debug("StackId: %s", stack.get("StackId"))

        if not args.nowait:
            verified = utils.verify_stack_creation(stack_name, cfn_client)
            LOGGER.info("")
            result_stack = utils.get_stack(stack_name, cfn_client)
            _print_stack_outputs(result_stack)
            if not verified:
                sys.exit(1)
        else:
            stack_status = utils.get_stack(stack_name, cfn_client).get("StackStatus")
            LOGGER.info("Status: %s", stack_status)
    except ClientError as e:
        LOGGER.critical(e.response.get("Error").get("Message"))
        sys.stdout.flush()
        utils.cleanup_s3_resources(bucket_name, artifact_directory, cleanup_bucket)
        sys.exit(1)
    except KeyboardInterrupt:
        LOGGER.info("\nExiting...")
        if not utils.stack_exists(stack_name):
            # Cleanup S3 artifacts if stack is not created yet
            utils.cleanup_s3_resources(bucket_name, artifact_directory, cleanup_bucket)
        sys.exit(0)
    except KeyError as e:
        LOGGER.critical("ERROR: KeyError - reason:\n%s", e)
        utils.cleanup_s3_resources(bucket_name, artifact_directory, cleanup_bucket)
        sys.exit(1)
    except Exception as e:
        LOGGER.critical(e)
        utils.cleanup_s3_resources(bucket_name, artifact_directory, cleanup_bucket)
        sys.exit(1)
Exemplo n.º 20
0
 def stop(self, args, pcluster_config):
     """Stop the compute fleet."""
     LOGGER.info("Disabling AWS Batch compute environment : %s", args.cluster_name)
     stack_name = utils.get_stack_name(args.cluster_name)
     ce_name = utils.get_batch_ce(stack_name)
     self._stop_batch_ce(ce_name=ce_name)
Exemplo n.º 21
0
def update(args):  # noqa: C901 FIXME!!!
    LOGGER.info("Updating: %s", args.cluster_name)
    stack_name = utils.get_stack_name(args.cluster_name)
    pcluster_config = PclusterConfig(config_file=args.config_file,
                                     cluster_label=args.cluster_template,
                                     fail_on_file_absence=True)
    pcluster_config.validate()
    cfn_params = pcluster_config.to_cfn()

    cluster_section = pcluster_config.get_section("cluster")
    cfn = boto3.client("cloudformation")
    if cluster_section.get_param_value("scheduler") != "awsbatch":
        if not args.reset_desired:
            asg_name = _get_asg_name(stack_name)
            desired_capacity = (
                boto3.client("autoscaling").describe_auto_scaling_groups(
                    AutoScalingGroupNames=[asg_name]).get(
                        "AutoScalingGroups")[0].get("DesiredCapacity"))
            cfn_params["DesiredSize"] = str(desired_capacity)
    else:
        if args.reset_desired:
            LOGGER.info(
                "reset_desired flag does not work with awsbatch scheduler")
        params = utils.get_stack(stack_name, cfn).get("Parameters")

        for parameter in params:
            if parameter.get("ParameterKey") == "ResourcesS3Bucket":
                cfn_params["ResourcesS3Bucket"] = parameter.get(
                    "ParameterValue")

    try:
        LOGGER.debug(cfn_params)
        if args.extra_parameters:
            LOGGER.debug("Adding extra parameters to the CFN parameters")
            cfn_params.update(dict(args.extra_parameters))

        cfn_params = [{
            "ParameterKey": key,
            "ParameterValue": value
        } for key, value in cfn_params.items()]
        LOGGER.info("Calling update_stack")
        cfn.update_stack(StackName=stack_name,
                         UsePreviousTemplate=True,
                         Parameters=cfn_params,
                         Capabilities=["CAPABILITY_IAM"])
        stack_status = utils.get_stack(stack_name, cfn).get("StackStatus")
        if not args.nowait:
            while stack_status == "UPDATE_IN_PROGRESS":
                stack_status = utils.get_stack(stack_name,
                                               cfn).get("StackStatus")
                events = cfn.describe_stack_events(
                    StackName=stack_name).get("StackEvents")[0]
                resource_status = ("Status: %s - %s" %
                                   (events.get("LogicalResourceId"),
                                    events.get("ResourceStatus"))).ljust(80)
                sys.stdout.write("\r%s" % resource_status)
                sys.stdout.flush()
                time.sleep(5)
        else:
            stack_status = utils.get_stack(stack_name, cfn).get("StackStatus")
            LOGGER.info("Status: %s", stack_status)
    except ClientError as e:
        LOGGER.critical(e.response.get("Error").get("Message"))
        sys.exit(1)
    except KeyboardInterrupt:
        LOGGER.info("\nExiting...")
        sys.exit(0)
Exemplo n.º 22
0
def test_get_stack_name():
    """Test utils.get_stack_name."""
    expected_stack_name = "parallelcluster-{0}".format(FAKE_CLUSTER_NAME)
    assert_that(utils.get_stack_name(FAKE_CLUSTER_NAME)).is_equal_to(
        expected_stack_name)
Exemplo n.º 23
0
"""This module provides unit tests for the functions in the pcluster.utils module."""

import json

import pytest

import pcluster.utils as utils
from assertpy import assert_that
from tests.common import MockedBoto3Request

FAKE_CLUSTER_NAME = "cluster_name"
FAKE_STACK_NAME = utils.get_stack_name(FAKE_CLUSTER_NAME)
STACK_TYPE = "AWS::CloudFormation::Stack"


@pytest.fixture()
def boto3_stubber_path():
    """Specify that boto3_mocker should stub calls to boto3 for the pcluster.utils module."""
    return "pcluster.utils.boto3"


def test_get_stack_name():
    """Test utils.get_stack_name."""
    expected_stack_name = "parallelcluster-{0}".format(FAKE_CLUSTER_NAME)
    assert_that(utils.get_stack_name(FAKE_CLUSTER_NAME)).is_equal_to(
        expected_stack_name)


@pytest.mark.parametrize(
    "template_body,error_message",
    [
Exemplo n.º 24
0
def execute(args):
    LOGGER.info("Retrieving configuration from CloudFormation for cluster {0}...".format(args.cluster_name))
    base_config = PclusterConfig(config_file=args.config_file, cluster_name=args.cluster_name)
    stack_status = base_config.cfn_stack.get("StackStatus")
    if "IN_PROGRESS" in stack_status:
        utils.error("Cannot execute update while stack is in {} status.".format(stack_status))

    LOGGER.info("Validating configuration file {0}...".format(args.config_file if args.config_file else ""))
    stack_name = utils.get_stack_name(args.cluster_name)
    target_config = PclusterConfig(
        config_file=args.config_file, cluster_label=args.cluster_template, fail_on_file_absence=True
    )
    target_config.validate()

    if _check_cluster_models(base_config, target_config, args.cluster_template) and _check_changes(
        args, base_config, target_config
    ):
        # Update base config settings
        base_config.update(target_config)

        cfn_params = base_config.to_cfn()
        cfn_client = boto3.client("cloudformation")
        _restore_cfn_only_params(cfn_client, args, cfn_params, stack_name, target_config)

        s3_bucket_name = cfn_params["ResourcesS3Bucket"]
        tags = _get_target_config_tags_list(target_config)
        artifact_directory = cfn_params["ArtifactS3RootDirectory"]

        is_hit = utils.is_hit_enabled_cluster(base_config.cfn_stack)
        template_url = None
        if is_hit:
            try:
                upload_hit_resources(
                    s3_bucket_name, artifact_directory, target_config, target_config.to_storage().json_params, tags
                )
            except Exception:
                utils.error("Failed when uploading resources to cluster S3 bucket {0}".format(s3_bucket_name))
            template_url = evaluate_pcluster_template_url(target_config)

        try:
            upload_dashboard_resource(
                s3_bucket_name,
                artifact_directory,
                target_config,
                target_config.to_storage().json_params,
                target_config.to_storage().cfn_params,
            )
        except Exception:
            utils.error("Failed when uploading the dashboard resource to cluster S3 bucket {0}".format(s3_bucket_name))

        _update_cluster(
            args,
            cfn_client,
            cfn_params,
            stack_name,
            use_previous_template=not is_hit,
            template_url=template_url,
            tags=tags,
        )
    else:
        LOGGER.info("Update aborted.")
        sys.exit(1)