def status(args): # noqa: C901 FIXME!!! stack_name = utils.get_stack_name(args.cluster_name) # Parse configuration file to read the AWS section PclusterConfig.init_aws(config_file=args.config_file) cfn = boto3.client("cloudformation") try: stack = utils.get_stack(stack_name, cfn) sys.stdout.write("\rStatus: %s" % stack.get("StackStatus")) sys.stdout.flush() if not args.nowait: while stack.get("StackStatus") not in [ "CREATE_COMPLETE", "UPDATE_COMPLETE", "UPDATE_ROLLBACK_COMPLETE", "ROLLBACK_COMPLETE", "CREATE_FAILED", "DELETE_FAILED", ]: time.sleep(5) stack = utils.get_stack(stack_name, cfn) events = utils.get_stack_events(stack_name)[0] resource_status = ( "Status: %s - %s" % (events.get("LogicalResourceId"), events.get("ResourceStatus")) ).ljust(80) sys.stdout.write("\r%s" % resource_status) sys.stdout.flush() sys.stdout.write("\rStatus: %s\n" % stack.get("StackStatus")) sys.stdout.flush() if stack.get("StackStatus") in ["CREATE_COMPLETE", "UPDATE_COMPLETE", "UPDATE_ROLLBACK_COMPLETE"]: state = _poll_head_node_state(stack_name) if state == "running": _print_stack_outputs(stack) _print_compute_fleet_status(args.cluster_name, stack) elif stack.get("StackStatus") in ["ROLLBACK_COMPLETE", "CREATE_FAILED", "DELETE_FAILED"]: events = utils.get_stack_events(stack_name) for event in events: if event.get("ResourceStatus") in ["CREATE_FAILED", "DELETE_FAILED", "UPDATE_FAILED"]: LOGGER.info( "%s %s %s %s %s", event.get("Timestamp"), event.get("ResourceStatus"), event.get("ResourceType"), event.get("LogicalResourceId"), event.get("ResourceStatusReason"), ) else: sys.stdout.write("\n") sys.stdout.flush() except ClientError as e: LOGGER.critical(e.response.get("Error").get("Message")) sys.stdout.flush() sys.exit(1) except KeyboardInterrupt: LOGGER.info("\nExiting...") sys.exit(0)
def delete(args): saw_update = False LOGGER.info("Deleting: %s", args.cluster_name) stack_name = utils.get_stack_name(args.cluster_name) # Parse configuration file to read the AWS section PclusterConfig.init_aws(config_file=args.config_file) cfn = boto3.client("cloudformation") try: # delete_stack does not raise an exception if stack does not exist # Use describe_stacks to explicitly check if the stack exists cfn.describe_stacks(StackName=stack_name) cfn.delete_stack(StackName=stack_name) saw_update = True stack_status = utils.get_stack(stack_name, cfn).get("StackStatus") sys.stdout.write("\rStatus: %s" % stack_status) sys.stdout.flush() LOGGER.debug("Status: %s", stack_status) if not args.nowait: while stack_status == "DELETE_IN_PROGRESS": time.sleep(5) stack_status = utils.get_stack(stack_name, cfn).get("StackStatus") events = cfn.describe_stack_events( StackName=stack_name).get("StackEvents")[0] resource_status = ("Status: %s - %s" % (events.get("LogicalResourceId"), events.get("ResourceStatus"))).ljust(80) sys.stdout.write("\r%s" % resource_status) sys.stdout.flush() sys.stdout.write("\rStatus: %s\n" % stack_status) sys.stdout.flush() LOGGER.debug("Status: %s", stack_status) else: sys.stdout.write("\n") sys.stdout.flush() if stack_status == "DELETE_FAILED": LOGGER.info( "Cluster did not delete successfully. Run 'pcluster delete %s' again", args.cluster_name) except ClientError as e: if e.response.get("Error").get("Message").endswith("does not exist"): if saw_update: LOGGER.info("\nCluster deleted successfully.") sys.exit(0) LOGGER.critical(e.response.get("Error").get("Message")) sys.stdout.flush() sys.exit(1) except KeyboardInterrupt: LOGGER.info("\nExiting...") sys.exit(0)
def delete(args): PclusterConfig.init_aws(config_file=args.config_file) LOGGER.info("Deleting: %s", args.cluster_name) stack_name = utils.get_stack_name(args.cluster_name) if not utils.stack_exists(stack_name): if args.keep_logs: utils.warn( "Stack for {0} does not exist. Cannot prevent its log groups from being deleted." .format(args.cluster_name)) utils.warn("Cluster {0} has already been deleted.".format( args.cluster_name)) sys.exit(0) elif args.keep_logs: _persist_cloudwatch_log_groups(args.cluster_name) _delete_cluster(args.cluster_name, args.nowait)
def instances(args): stack_name = utils.get_stack_name(args.cluster_name) PclusterConfig.init_aws(config_file=args.config_file) cfn_stack = utils.get_stack(stack_name) scheduler = utils.get_cfn_param(cfn_stack.get("Parameters"), "Scheduler") instances = [] head_node_server = utils.describe_cluster_instances(stack_name, node_type=utils.NodeType.head_node) if head_node_server: instances.append(("MasterServer", head_node_server[0].get("InstanceId"))) if scheduler != "awsbatch": instances.extend(_get_compute_instances(stack_name)) for instance in instances: LOGGER.info("%s %s", instance[0], instance[1]) if scheduler == "awsbatch": LOGGER.info("Run 'awsbhosts --cluster %s' to list the compute instances", args.cluster_name)
def dcv_connect(args): """ Execute pcluster dcv connect command. :param args: pcluster cli arguments. """ # Parse configuration file to read the AWS section PclusterConfig.init_aws( ) # FIXME it always searches for the default configuration file # Prepare ssh command to execute in the head node instance stack = get_stack(get_stack_name(args.cluster_name)) shared_dir = get_cfn_param(stack.get("Parameters"), "SharedDir") head_node_ip, username = get_head_node_ip_and_username(args.cluster_name) cmd = 'ssh {CFN_USER}@{HEAD_NODE_IP} {KEY} "{REMOTE_COMMAND} {DCV_SHARED_DIR}"'.format( CFN_USER=username, HEAD_NODE_IP=head_node_ip, KEY="-i {0}".format(args.key_path) if args.key_path else "", REMOTE_COMMAND=DCV_CONNECT_SCRIPT, DCV_SHARED_DIR=shared_dir, ) try: url = retry(_retrieve_dcv_session_url, func_args=[cmd, args.cluster_name, head_node_ip], attempts=4) url_message = "Please use the following one-time URL in your browser within 30 seconds:\n{0}".format( url) except DCVConnectionError as e: error("Something went wrong during DCV connection.\n{0}" "Please check the logs in the /var/log/parallelcluster/ folder " "of the head node and submit an issue {1}\n".format( e, PCLUSTER_ISSUES_LINK)) if args.show_url: LOGGER.info(url_message) return try: if not webbrowser.open_new(url): raise webbrowser.Error("Unable to open the Web browser.") except webbrowser.Error as e: LOGGER.info("{0}\n{1}".format(e, url_message))
def list_stacks(args): # Parse configuration file to read the AWS section PclusterConfig.init_aws(config_file=args.config_file) try: result = [] for stack in utils.paginate_boto3(boto3.client("cloudformation").describe_stacks): if stack.get("ParentId") is None and stack.get("StackName").startswith(PCLUSTER_STACK_PREFIX): pcluster_version = _get_pcluster_version_from_stack(stack) result.append( [ stack.get("StackName")[len(PCLUSTER_STACK_PREFIX) :], # noqa: E203 _colorize(stack.get("StackStatus"), args), pcluster_version, ] ) LOGGER.info(tabulate(result, tablefmt="plain")) except ClientError as e: LOGGER.critical(e.response.get("Error").get("Message")) sys.exit(1) except KeyboardInterrupt: LOGGER.info("Exiting...") sys.exit(0)
def dcv_connect(args): """ Execute pcluster dcv connect command. :param args: pcluster cli arguments. """ # Parse configuration file to read the AWS section PclusterConfig.init_aws( ) # FIXME it always searches for the default configuration file # Prepare ssh command to execute in the master instance stack = get_stack(get_stack_name(args.cluster_name)) shared_dir = get_cfn_param(stack.get("Parameters"), "SharedDir") master_ip, username = get_master_ip_and_username(args.cluster_name) cmd = 'ssh {CFN_USER}@{MASTER_IP} {KEY} "{REMOTE_COMMAND} {DCV_SHARED_DIR}"'.format( CFN_USER=username, MASTER_IP=master_ip, KEY="-i {0}".format(args.key_path) if args.key_path else "", REMOTE_COMMAND=DCV_CONNECT_SCRIPT, DCV_SHARED_DIR=shared_dir, ) # Connect by ssh to the master instance and prepare DCV session try: LOGGER.debug("SSH command: {0}".format(cmd)) output = _check_command_output(cmd) # At first ssh connection, the ssh command alerts it is adding the host to the known hosts list if re.search("Permanently added .* to the list of known hosts.", output): output = _check_command_output(cmd) dcv_parameters = re.search( r"PclusterDcvServerPort=([\d]+) PclusterDcvSessionId=([\w]+) PclusterDcvSessionToken=([\w-]+)", output) if dcv_parameters: dcv_server_port = dcv_parameters.group(1) dcv_session_id = dcv_parameters.group(2) dcv_session_token = dcv_parameters.group(3) else: error( "Something went wrong during DCV connection. Please manually execute the command:\n{0}\n" "If the problem persists, please check the logs in the /var/log/parallelcluster/ folder " "of the master instance and submit an issue {1}.".format( cmd, PCLUSTER_ISSUES_LINK)) except sub.CalledProcessError as e: if "{0}: No such file or directory".format( DCV_CONNECT_SCRIPT) in e.output: error( "The cluster {0} has been created with an old version of ParallelCluster " "without the DCV support.".format(args.cluster_name)) else: error("Something went wrong during DCV connection.\n{0}".format( e.output)) # Open web browser url = "https://{IP}:{PORT}?authToken={TOKEN}#{SESSION_ID}".format( IP=master_ip, PORT=dcv_server_port, TOKEN=dcv_session_token, SESSION_ID=dcv_session_id) try: webbrowser.open_new(url) except webbrowser.Error: LOGGER.info( "Unable to open the Web browser. " "Please use the following URL in your browser within 30 seconds:\n{0}" .format(url))