def terminate_virtualized_cluster(cls, keyname, is_verbose): """Stops all API services running on all nodes in the currently running AppScale deployment. Args: keyname: The name of the SSH keypair used for this AppScale deployment. is_verbose: A bool that indicates if we should print the commands executed to stdout. """ AppScaleLogger.log( "Terminating appscale deployment with keyname {0}".format(keyname)) time.sleep(2) shadow_host = LocalState.get_host_with_role(keyname, 'shadow') try: secret = LocalState.get_secret_key(keyname) except IOError: # We couldn't find the secret key: AppScale is most likely not # running. raise AppScaleException("Couldn't find AppScale secret key.") acc = AppControllerClient(shadow_host, secret) try: all_ips = acc.get_all_public_ips() except Exception as exception: AppScaleLogger.warn( 'Saw Exception while getting deployments IPs {0}'.format( str(exception))) all_ips = LocalState.get_all_public_ips(keyname) threads = [] for ip in all_ips: thread = threading.Thread(target=cls.stop_remote_appcontroller, args=(ip, keyname, is_verbose)) thread.start() threads.append(thread) for thread in threads: thread.join() boxes_shut_down = 0 is_running_regex = re.compile("appscale-controller stop") for ip in all_ips: AppScaleLogger.log( "Shutting down AppScale API services at {0}".format(ip)) while True: remote_output = cls.ssh(ip, keyname, 'ps x', is_verbose) AppScaleLogger.verbose(remote_output, is_verbose) if not is_running_regex.match(remote_output): break time.sleep(0.3) boxes_shut_down += 1 if boxes_shut_down != len(all_ips): raise AppScaleException( "Couldn't terminate your AppScale deployment on" " all machines - please do so manually.") AppScaleLogger.log( "Terminated AppScale on {0} machines.".format(boxes_shut_down))
def terminate_virtualized_cluster(cls, keyname, is_verbose): """Stops all API services running on all nodes in the currently running AppScale deployment. Args: keyname: The name of the SSH keypair used for this AppScale deployment. is_verbose: A bool that indicates if we should print the commands executed to stdout. """ AppScaleLogger.log("Terminating appscale deployment with keyname {0}" .format(keyname)) time.sleep(2) shadow_host = LocalState.get_host_with_role(keyname, 'shadow') try: secret = LocalState.get_secret_key(keyname) except IOError: # We couldn't find the secret key: AppScale is most likely not # running. raise AppScaleException("Couldn't find AppScale secret key.") acc = AppControllerClient(shadow_host, secret) try: all_ips = acc.get_all_public_ips() except Exception as exception: AppScaleLogger.warn('Saw Exception while getting deployments IPs {0}'. format(str(exception))) all_ips = LocalState.get_all_public_ips(keyname) threads = [] for ip in all_ips: thread = threading.Thread(target=cls.stop_remote_appcontroller, args=(ip, keyname, is_verbose)) thread.start() threads.append(thread) for thread in threads: thread.join() boxes_shut_down = 0 is_running_regex = re.compile("appscale-controller stop") for ip in all_ips: AppScaleLogger.log("Shutting down AppScale API services at {0}". format(ip)) while True: remote_output = cls.ssh(ip, keyname, 'ps x', is_verbose) AppScaleLogger.verbose(remote_output, is_verbose) if not is_running_regex.match(remote_output): break time.sleep(0.3) boxes_shut_down += 1 if boxes_shut_down != len(all_ips): raise AppScaleException("Couldn't terminate your AppScale deployment on" " all machines - please do so manually.") AppScaleLogger.log("Terminated AppScale on {0} machines.". format(boxes_shut_down))
def ssh(self, node): """ 'ssh' provides a simple way to log into virtual machines in an AppScale deployment, using the SSH key provided in the user's AppScalefile. Args: node: An int that represents the node to SSH to. The value is used as an index into the list of nodes running in the AppScale deployment, starting with zero. Raises: AppScalefileException: If there is no AppScalefile in the current directory. TypeError: If the user does not provide an integer for 'node'. """ contents = self.read_appscalefile() contents_as_yaml = yaml.safe_load(contents) if 'keyname' in contents_as_yaml: keyname = contents_as_yaml['keyname'] else: keyname = "appscale" if node is None: node = "shadow" try: index = int(node) nodes = self.get_nodes(keyname) # make sure there is a node at position 'index' ip = nodes[index]['public_ip'] except IndexError: raise AppScaleException( "Cannot ssh to node at index " + ", as there are only " + str(len(nodes)) + " in the currently running AppScale deployment.") except ValueError: try: ip = LocalState.get_host_with_role(keyname, node.lower()) except AppScaleException: raise AppScaleException("No role exists by that name. " "Valid roles are {}".format( NodeLayout.ADVANCED_FORMAT_KEYS)) # construct the ssh command to exec with that IP address command = [ "ssh", "-o", "StrictHostkeyChecking=no", "-i", self.get_key_location(keyname), "root@" + ip ] # exec the ssh command try: subprocess.check_call(command) except subprocess.CalledProcessError: raise AppScaleException( "Unable to ssh to the machine at " "{}. Please make sure this machine is reachable, " "has a public ip, or that the role is in use by " "the deployment.".format(ip))
def set_property(cls, options): """Instructs AppScale to replace the value it uses for a particular AppController instance variable (property) with a new value. Args: options: A Namespace that has fields for each parameter that can be passed in via the command-line interface. """ shadow_host = LocalState.get_host_with_role(options.keyname, "shadow") acc = AppControllerClient(shadow_host, LocalState.get_secret_key(options.keyname)) result = acc.set_property(options.property_name, options.property_value) if result == "OK": AppScaleLogger.success("Successfully updated the given property.") else: raise AppControllerException("Unable to update the given property " + "because: {0}".format(result))
def get_property(cls, options): """Queries AppScale for a list of system properties matching the provided regular expression, as well as the values associated with each matching property. Args: options: A Namespace that has fields for each parameter that can be passed in via the command-line interface. Returns: A dict mapping each property matching the given regex to its associated value. """ shadow_host = LocalState.get_host_with_role(options.keyname, "shadow") acc = AppControllerClient(shadow_host, LocalState.get_secret_key(options.keyname)) return acc.get_property(options.property)
def get_property(cls, options): """Queries AppScale for a list of system properties matching the provided regular expression, as well as the values associated with each matching property. Args: options: A Namespace that has fields for each parameter that can be passed in via the command-line interface. Returns: A dict mapping each property matching the given regex to its associated value. """ shadow_host = LocalState.get_host_with_role(options.keyname, 'shadow') acc = AppControllerClient(shadow_host, LocalState.get_secret_key(options.keyname)) return acc.get_property(options.property)
def set_property(cls, options): """Instructs AppScale to replace the value it uses for a particular AppController instance variable (property) with a new value. Args: options: A Namespace that has fields for each parameter that can be passed in via the command-line interface. """ shadow_host = LocalState.get_host_with_role(options.keyname, 'shadow') acc = AppControllerClient(shadow_host, LocalState.get_secret_key( options.keyname)) result = acc.set_property(options.property_name, options.property_value) if result == 'OK': AppScaleLogger.success("Successfully updated the given property.") else: raise AppControllerException("Unable to update the given property " + "because: {0}".format(result))
def terminate_virtualized_cluster(cls, keyname, clean, is_verbose): """Stops all API services running on all nodes in the currently running AppScale deployment. Args: keyname: The name of the SSH keypair used for this AppScale deployment. is_verbose: A bool that indicates if we should print the commands executed to stdout. clean: A bool representing whether clean should be ran on the nodes. """ AppScaleLogger.log("Stopping appscale deployment with keyname {0}" .format(keyname)) time.sleep(2) shadow_host = LocalState.get_host_with_role(keyname, 'shadow') try: secret = LocalState.get_secret_key(keyname) except IOError: # We couldn't find the secret key: AppScale is most likely not # running. raise AppScaleException("Couldn't find AppScale secret key.") acc = AppControllerClient(shadow_host, secret) try: machines = len(acc.get_all_public_ips()) - 1 acc.run_terminate(clean) terminated_successfully = True log_dump = u"" while not acc.is_appscale_terminated(): # For terminate receive_server_message will return a JSON string that # is a list of dicts with keys: ip, status, output try: output_list = yaml.safe_load(acc.receive_server_message()) except Exception as e: log_dump += e.message continue for node in output_list: if node.get("status"): machines -= 1 AppScaleLogger.success("Node at {node_ip}: {status}".format( node_ip=node.get("ip"), status="Stopping AppScale finished")) else: AppScaleLogger.warn("Node at {node_ip}: {status}".format( node_ip=node.get("ip"), status="Stopping AppScale failed")) terminated_successfully = False log_dump += u"Node at {node_ip}: {status}\nNode Output:"\ u"{output}".format(node_ip=node.get("ip"), status="Stopping AppScale failed", output=node.get("output")) AppScaleLogger.verbose(u"Output of node at {node_ip}:\n" u"{output}".format(node_ip=node.get("ip"), output=node.get("output")), is_verbose) if not terminated_successfully or machines > 0: LocalState.generate_crash_log(AppControllerException, log_dump) raise AppScaleException("{0} node(s) failed stopping AppScale, " "head node is still running AppScale services." .format(machines)) cls.stop_remote_appcontroller(shadow_host, keyname, is_verbose, clean) except socket.error as socket_error: AppScaleLogger.warn(u'Unable to talk to AppController: {}'. format(socket_error.message)) raise except Exception as exception: AppScaleLogger.verbose(u'Saw Exception while stopping AppScale {0}'. format(str(exception)), is_verbose) raise
def start_all_nodes(cls, options, node_layout): """ Starts all nodes in the designated public cloud. Args: options: A Namespace that includes parameters passed in by the user that define non-placement-strategy-related deployment options (e.g., keypair names, security group names). node_layout: The node layout of the system including roles. Returns: The node layout (dummy values in non-cloud deployments) corresponding to the nodes that were started. """ agent = InfrastructureAgentFactory.create_agent(options.infrastructure) params = agent.get_params_from_args(options) # If we have running instances under the current keyname, we try to # re-attach to them. If we have issue finding the locations file or the # IP of the head node, we throw an exception. head_node_public_ip = None public_ips, private_ips, instance_ids = agent.describe_instances(params) if public_ips: try: head_node_public_ip = LocalState.get_host_with_role( options.keyname, 'shadow') except (IOError, BadConfigurationException): raise AppScaleException( "Couldn't get login ip for running deployment with keyname" " {}.".format(options.keyname)) if head_node_public_ip not in public_ips: raise AppScaleException( "Couldn't recognize running instances for deployment with" " keyname {}.".format(options.keyname)) if head_node_public_ip in public_ips: AppScaleLogger.log("Reusing already running instances.") # Get the node_info from the locations JSON. node_info = LocalState.get_local_nodes_info(keyname=options.keyname) previous_node_list = node_layout.from_locations_json_list(node_info) node_layout.nodes = previous_node_list for node_index, node in enumerate(node_layout.nodes): try: index = instance_ids.index(node.instance_id) except ValueError: raise BadConfigurationException("Previous instance_id {} does not " "currently exist." .format(node.instance_id)) node_layout.nodes[node_index].public_ip = public_ips[index] node_layout.nodes[node_index].private_ip = private_ips[index] node_layout.nodes[node_index].instance_id = instance_ids[index] return node_layout agent.configure_instance_security(params) load_balancer_roles = {} instance_type_disks_roles = {'with_disks':{},'without_disks':{}} for node in node_layout.nodes: if node.is_role('load_balancer'): load_balancer_roles.setdefault(node.instance_type, []).append(node) continue instance_type = instance_type_disks_roles['with_disks'] if node.disk else \ instance_type_disks_roles['without_disks'] instance_type.setdefault(node.instance_type, []).append(node) spawned_instance_ids = [] for instance_type, load_balancer_nodes in load_balancer_roles.items(): params['instance_type'] = instance_type params['disks'] = any([node.disk for node in load_balancer_nodes]) try: instance_ids, public_ips, private_ips = cls.spawn_nodes_in_cloud( agent, params, count=len(load_balancer_nodes), load_balancer=True) except (AgentRuntimeException, BotoServerError): AppScaleLogger.warn("AppScale was unable to start the requested number " "of instances, attempting to terminate those that " "were started.") if len(spawned_instance_ids) > 0: AppScaleLogger.warn("Attempting to terminate those that were started.") cls.terminate_spawned_instances(spawned_instance_ids, agent, params) # Cleanup the keyname since it failed. LocalState.cleanup_keyname(options.keyname) # Re-raise the original exception. raise # Keep track of instances we have started. spawned_instance_ids.extend(instance_ids) for node_index, node in enumerate(load_balancer_nodes): index = node_layout.nodes.index(node) node_layout.nodes[index].public_ip = public_ips[node_index] node_layout.nodes[index].private_ip = private_ips[node_index] node_layout.nodes[index].instance_id = instance_ids[node_index] if options.static_ip: node = node_layout.head_node() agent.associate_static_ip(params, node.instance_id, options.static_ip) node.public_ip = options.static_ip AppScaleLogger.log("Static IP associated with head node.") AppScaleLogger.log("\nPlease wait for AppScale to prepare your machines " "for use. This can take few minutes.") for disks_needed, instance_type_nodes in instance_type_disks_roles.items(): for instance_type, nodes in instance_type_nodes.items(): if len(nodes) <= 0: break # Copy parameters so we can modify the instance type. params['instance_type'] = instance_type params['disks'] = (disks_needed == 'with_disks') try: _instance_ids, _public_ips, _private_ips = \ cls.spawn_nodes_in_cloud(agent, params, count=len(nodes)) except (AgentRuntimeException, BotoServerError): if len(spawned_instance_ids) > 0: AppScaleLogger.warn("AppScale was unable to start the requested " "number of instances, attempting to terminate " "those that were started.") cls.terminate_spawned_instances(spawned_instance_ids, agent, params) # Cleanup the keyname since it failed. LocalState.cleanup_keyname(options.keyname) # Re-raise the original exception. raise # Keep track of instances we have started. spawned_instance_ids.extend(_instance_ids) for node_index, node in enumerate(nodes): index = node_layout.nodes.index(node) node_layout.nodes[index].public_ip = _public_ips[node_index] node_layout.nodes[index].private_ip = _private_ips[node_index] node_layout.nodes[index].instance_id = _instance_ids[node_index] return node_layout
def copy_app_to_host(cls, app_location, app_id, keyname, is_verbose, extras=None, custom_service_yaml=None): """Copies the given application to a machine running the Login service within an AppScale deployment. Args: app_location: The location on the local filesystem where the application can be found. app_id: The project to use for this application. keyname: The name of the SSH keypair that uniquely identifies this AppScale deployment. is_verbose: A bool that indicates if we should print the commands we exec to copy the app to the remote host to stdout. extras: A dictionary containing a list of files to include in the upload. custom_service_yaml: A string specifying the location of the service yaml being deployed. Returns: A str corresponding to the location on the remote filesystem where the application was copied to. """ AppScaleLogger.log("Tarring application") rand = str(uuid.uuid4()).replace('-', '')[:8] local_tarred_app = "{0}/appscale-app-{1}-{2}.tar.gz".\ format(tempfile.gettempdir(), app_id, rand) # Collect list of files that should be included in the tarball. app_files = {} for root, _, filenames in os.walk(app_location, followlinks=True): relative_dir = os.path.relpath(root, app_location) for filename in filenames: # Ignore compiled Python files. if filename.endswith('.pyc'): continue relative_path = os.path.join(relative_dir, filename) app_files[relative_path] = os.path.join(root, filename) if extras is not None: app_files.update(extras) with tarfile.open(local_tarred_app, 'w:gz') as app_tar: for tarball_path, local_path in app_files.items(): # Replace app.yaml with the service yaml being deployed. if custom_service_yaml and os.path.normpath(tarball_path) == 'app.yaml': continue app_tar.add(local_path, tarball_path) if custom_service_yaml: app_tar.add(custom_service_yaml, 'app.yaml') AppScaleLogger.log("Copying over application") remote_app_tar = "{0}/{1}.tar.gz".format(cls.REMOTE_APP_DIR, app_id) head_node_public_ip = LocalState.get_host_with_role(keyname, 'shadow') cls.scp(head_node_public_ip, keyname, local_tarred_app, remote_app_tar, is_verbose) AppScaleLogger.verbose("Removing local copy of tarred application", is_verbose) os.remove(local_tarred_app) return remote_app_tar