def __globusonline_remove(self, inst, eps): go_helper = GlobusOnlineHelper.from_instance(inst) try: for ep in eps: go_helper.connect(ep.user) try: go_helper.endpoint_remove(ep) except: pass go_helper.disconnect() except GlobusOnlineException, goe: log.warning("Unable to remove GO endpoint/s: %s" % goe)
def instance_stop(self, inst_id): (success, message, inst) = self.__get_instance(inst_id) if not success: return (API.STATUS_FAIL, message) log.set_logging_instance(inst) try: if inst.topology.state != Topology.STATE_RUNNING: message = "Cannot start an instance that is in state '%s'" % (Topology.state_str[inst.topology.state]) return (API.STATUS_FAIL, message) deployer_class = self.__get_deployer_class(inst) deployer = deployer_class() try: deployer.set_instance(inst) except DeploymentException, de: message = "Deployer failed to initialize. %s " % de return (API.STATUS_FAIL, message) inst.topology.state = Topology.STATE_STOPPING inst.topology.save() nodes = inst.topology.get_nodes() (success, message) = self.__stop_vms(deployer, nodes) if not success: inst.topology.state = Topology.STATE_FAILED inst.topology.save() return (API.STATUS_FAIL, message) inst.topology.state = Topology.STATE_STOPPED inst.topology.save() log.info("Stopping Globus Online endpoints") try: eps = inst.topology.get_go_endpoints() self.__globusonline_stop(inst, eps) inst.topology.save() except GlobusOnlineException, goe: log.warning("Unable to stop GO endpoint/s: %s" % goe)
def instance_start(self, inst_id, extra_files, run_cmds): (success, message, inst) = self.__get_instance(inst_id) if not success: return (API.STATUS_FAIL, message) log.set_logging_instance(inst) try: deployer_class = self.__get_deployer_class(inst) deployer = deployer_class(extra_files, run_cmds) try: deployer.set_instance(inst) except DeploymentException, de: message = "Deployer failed to initialize. %s " % de return (API.STATUS_FAIL, message) if inst.topology.state == Topology.STATE_NEW: resuming = False elif inst.topology.state == Topology.STATE_STOPPED: resuming = True else: message = "Cannot start an instance that is in state '%s'" % (Topology.state_str[inst.topology.state]) return (API.STATUS_FAIL, message) if not resuming: inst.topology.state = Topology.STATE_STARTING else: inst.topology.state = Topology.STATE_RESUMING inst.topology.save() if not resuming: try: eps = inst.topology.get_go_endpoints() self.__globusonline_pre_start(inst, eps) except GlobusOnlineException, goe: log.warning("Unable to create GO endpoint/s: %s" % goe)
def configure(self, ssh): domain = self.domain node = self.node instance_dir = self.deployer.instance.instance_dir if self.basic: # Make backup copies of hostname and /etc/hosts if node.state in (Node.STATE_CONFIGURING, Node.STATE_RESUMED_RECONFIGURING): ssh.run("sudo cp /etc/hosts /etc/hosts.gp-bak", expectnooutput=True) ssh.run("sudo cp /etc/hostname /etc/hostname.gp-bak", expectnooutput=True) # Upload host file and update hostname log.debug("Uploading host file and updating hostname", node) ssh.scp("%s/hosts" % instance_dir, "/chef/cookbooks/provision/files/default/hosts") ssh.run("sudo cp /chef/cookbooks/provision/files/default/hosts /etc/hosts", expectnooutput=True) ssh.run("sudo bash -c \"echo %s > /etc/hostname\"" % node.hostname, expectnooutput=True) ssh.run("sudo /etc/init.d/hostname.sh || sudo /etc/init.d/hostname restart", expectnooutput=True) self.check_continue() if self.chef: # Upload topology file log.debug("Uploading topology file", node) ssh.scp("%s/topology.rb" % instance_dir, "/chef/cookbooks/provision/attributes/topology.rb") # Copy certificates log.debug("Copying certificates", node) ssh.scp_dir("%s/certs" % instance_dir, "/chef/cookbooks/provision/files/default/") # Upload extra files log.debug("Copying extra files", node) for src, dst in self.deployer.extra_files: ssh.scp(src, dst) self.check_continue() # Run chef log.debug("Running chef", node) ssh.run("echo -e \"cookbook_path \\\"/chef/cookbooks\\\"\\nrole_path \\\"/chef/roles\\\"\" > /tmp/chef.conf", expectnooutput=True) ssh.run("echo '{ \"run_list\": [ %s ], \"scratch_dir\": \"%s\", \"domain_id\": \"%s\", \"node_id\": \"%s\" }' > /tmp/chef.json" % (",".join("\"%s\"" % r for r in node.run_list), self.config.get("scratch-dir"), domain.id, node.id), expectnooutput=True) # Sometimes, Chef will fail because a service didn't start or restart # properly (NFS-related services seem to do this occasionally). # In most cases, the problem just "goes away" if you try to restart the # service again. So, if Chef fails, we don't give up and try again # (since the recipes are idempotent, there's no harm to running them # multiple times) chef_tries = 3 while chef_tries > 0: rc = ssh.run("sudo -i chef-solo -c /tmp/chef.conf -j /tmp/chef.json", exception_on_error = False) if rc != 0: chef_tries -= 1 log.debug("chef-solo failed. %i attempts left" % chef_tries, node) else: break if chef_tries == 0: raise DeploymentException, "Failed to configure node %s" % node.id self.check_continue() for cmd in self.deployer.run_cmds: rc = ssh.run(cmd, exception_on_error = False) if rc != 0: log.warning("Extra command failed with status %i: %s" % (rc, cmd), node) log.info("Configuration done.", node)
def instance_update(self, inst_id, topology_json, extra_files, run_cmds): try: (success, message, inst) = self.__get_instance(inst_id) if not success: return (API.STATUS_FAIL, message) log.set_logging_instance(inst) if inst.topology.state == Topology.STATE_NEW: # If the topology is still in a New state, we simply # validate that the update is valid, and replace # the old topology. We don't need to deploy or # configure any hosts.. if topology_json != None: (success, message, topology_changes) = inst.update_topology(topology_json) if not success: message = "Error in topology file: %s" % message return (API.STATUS_FAIL, message) return (API.STATUS_SUCCESS, "Success") elif inst.topology.state not in (Topology.STATE_RUNNING, Topology.STATE_FAILED): message = "Cannot update the topology of an instance that is in state '%s'" % (Topology.state_str[inst.topology.state]) return (API.STATUS_FAIL, message) deployer_class = self.__get_deployer_class(inst) deployer = deployer_class(extra_files, run_cmds) try: deployer.set_instance(inst) except DeploymentException, de: message = "Deployer failed to initialize. %s " % de return (API.STATUS_FAIL, message) if topology_json != None: old_topology = inst.topology try: (success, message, topology_changes) = inst.update_topology(topology_json) if not success: return (API.STATUS_FAIL, message) except ObjectValidationException, ove: message = "Error in topology file: %s" % ove return (API.STATUS_FAIL, message) create_hosts = [] destroy_hosts = [] create_endpoints = [] remove_endpoints = [] if topology_changes.changes.has_key("domains"): for domain in topology_changes.changes["domains"].add: d = inst.topology.domains[domain] create_hosts += [n.id for n in d.nodes.values()] for domain in topology_changes.changes["domains"].remove: d = inst.topology.domains[domain].keys() destroy_hosts += [n.id for n in d.nodes.values()] for domain in topology_changes.changes["domains"].edit: if topology_changes.changes["domains"].edit[domain].changes.has_key("nodes"): nodes_changes = topology_changes.changes["domains"].edit[domain].changes["nodes"] create_hosts += nodes_changes.add destroy_hosts += nodes_changes.remove if topology_changes.changes["domains"].edit[domain].changes.has_key("go_endpoints"): ep_changes = topology_changes.changes["domains"].edit[domain].changes["go_endpoints"] if ep_changes.change_type == PropertyChange.ADD: create_endpoints += inst.topology.domains[domain].go_endpoints elif ep_changes.change_type == PropertyChange.REMOVE: remove_endpoints += old_topology.domains[domain].go_endpoints elif ep_changes.change_type == PropertyChange.EDIT: create_endpoints += ep_changes.add remove_endpoints += ep_changes.remove nodes = inst.topology.get_nodes() if len(destroy_hosts) > 0: old_nodes = old_topology.get_nodes() log.info("Terminating hosts %s" % destroy_hosts) old_nodes = [n for n in old_nodes if n.id in destroy_hosts] (success, message) = self.__terminate_vms(deployer, old_nodes) if not success: inst.topology.state = Topology.STATE_FAILED inst.topology.save() return (API.STATUS_FAIL, message) inst.topology.save() if len(create_endpoints) > 0: try: self.__globusonline_pre_start(inst, create_endpoints) except GlobusOnlineException, goe: log.warning("Unable to create GO endpoint/s: %s" % goe)
(success, message) = self.__configure_vms(deployer, node_vm) if not success: inst.topology.state = Topology.STATE_FAILED inst.topology.save() return (API.STATUS_FAIL, message) inst.topology.state = Topology.STATE_RUNNING inst.topology.save() log.info("Creating Globus Online endpoints") eps = inst.topology.get_go_endpoints() if not resuming: try: self.__globusonline_post_start(inst, eps) except GlobusOnlineException, goe: log.warning("Unable to create GO endpoint/s: %s" % goe) else: try: self.__globusonline_resume(inst, eps) except GlobusOnlineException, goe: log.warning("Unable to resume GO endpoint/s: %s" % goe) inst.topology.save() return (API.STATUS_SUCCESS, "Success") except: message = self.__unexpected_exception_to_text(what = "starting the instance.") try: if inst != None: inst.topology.state = Topology.STATE_FAILED inst.topology.save()