Exemplo n.º 1
0
 def __globusonline_remove(self, inst, eps):
     go_helper = GlobusOnlineHelper.from_instance(inst)
     
     try:
         for ep in eps:     
             go_helper.connect(ep.user)
             try:
                 go_helper.endpoint_remove(ep)
             except:
                 pass   
             go_helper.disconnect()
     except GlobusOnlineException, goe:
         log.warning("Unable to remove GO endpoint/s: %s" % goe)        
Exemplo n.º 2
0
    def instance_stop(self, inst_id):
        (success, message, inst) = self.__get_instance(inst_id)
        
        if not success:
            return (API.STATUS_FAIL, message)

        log.set_logging_instance(inst)
        
        try:
            if inst.topology.state != Topology.STATE_RUNNING:
                message = "Cannot start an instance that is in state '%s'" % (Topology.state_str[inst.topology.state])
                return (API.STATUS_FAIL, message)

            deployer_class = self.__get_deployer_class(inst)
            deployer = deployer_class()
            
            try:
                deployer.set_instance(inst)
            except DeploymentException, de:
                message = "Deployer failed to initialize. %s " % de
                return (API.STATUS_FAIL, message)       
        
            inst.topology.state = Topology.STATE_STOPPING
            inst.topology.save()
            
            nodes = inst.topology.get_nodes()            
    
            (success, message) = self.__stop_vms(deployer, nodes)
            
            if not success:
                inst.topology.state = Topology.STATE_FAILED
                inst.topology.save()
                return (API.STATUS_FAIL, message)            
        
            inst.topology.state = Topology.STATE_STOPPED
            inst.topology.save()
              
            log.info("Stopping Globus Online endpoints")
            try:
                eps = inst.topology.get_go_endpoints()        
                self.__globusonline_stop(inst, eps)     
                inst.topology.save()       
            except GlobusOnlineException, goe:
                log.warning("Unable to stop GO endpoint/s: %s" % goe)              
Exemplo n.º 3
0
    def instance_start(self, inst_id, extra_files, run_cmds):
        
        (success, message, inst) = self.__get_instance(inst_id)
        
        if not success:
            return (API.STATUS_FAIL, message)

        log.set_logging_instance(inst)
            
        try:
            deployer_class = self.__get_deployer_class(inst)
            deployer = deployer_class(extra_files, run_cmds)
            
            try:
                deployer.set_instance(inst)
            except DeploymentException, de:
                message = "Deployer failed to initialize. %s " % de
                return (API.STATUS_FAIL, message)               
            
            if inst.topology.state == Topology.STATE_NEW:
                resuming = False
            elif inst.topology.state == Topology.STATE_STOPPED:
                resuming = True
            else:
                message = "Cannot start an instance that is in state '%s'" % (Topology.state_str[inst.topology.state])
                return (API.STATUS_FAIL, message)
            
            if not resuming:
                inst.topology.state = Topology.STATE_STARTING
            else:
                inst.topology.state = Topology.STATE_RESUMING
    
            inst.topology.save()

            if not resuming:
                try:
                    eps = inst.topology.get_go_endpoints()        
                    self.__globusonline_pre_start(inst, eps)
                except GlobusOnlineException, goe:
                    log.warning("Unable to create GO endpoint/s: %s" % goe)
Exemplo n.º 4
0
    def configure(self, ssh):
        domain = self.domain
        node = self.node
        instance_dir = self.deployer.instance.instance_dir        
        
        if self.basic:
            # Make backup copies of hostname and /etc/hosts
            if node.state in (Node.STATE_CONFIGURING, Node.STATE_RESUMED_RECONFIGURING):
                ssh.run("sudo cp /etc/hosts /etc/hosts.gp-bak", expectnooutput=True)
                ssh.run("sudo cp /etc/hostname /etc/hostname.gp-bak", expectnooutput=True)
            
            # Upload host file and update hostname
            log.debug("Uploading host file and updating hostname", node)
            ssh.scp("%s/hosts" % instance_dir,
                    "/chef/cookbooks/provision/files/default/hosts")             
            ssh.run("sudo cp /chef/cookbooks/provision/files/default/hosts /etc/hosts", expectnooutput=True)
    
            ssh.run("sudo bash -c \"echo %s > /etc/hostname\"" % node.hostname, expectnooutput=True)
            ssh.run("sudo /etc/init.d/hostname.sh || sudo /etc/init.d/hostname restart", expectnooutput=True)
        
        self.check_continue()

        if self.chef:        
            # Upload topology file
            log.debug("Uploading topology file", node)
            ssh.scp("%s/topology.rb" % instance_dir,
                    "/chef/cookbooks/provision/attributes/topology.rb")             
            
            # Copy certificates
            log.debug("Copying certificates", node)
            ssh.scp_dir("%s/certs" % instance_dir, 
                        "/chef/cookbooks/provision/files/default/")
    
            # Upload extra files
            log.debug("Copying extra files", node)
            for src, dst in self.deployer.extra_files:
                ssh.scp(src, dst)
            
            self.check_continue()

            # Run chef
            log.debug("Running chef", node)
            ssh.run("echo -e \"cookbook_path \\\"/chef/cookbooks\\\"\\nrole_path \\\"/chef/roles\\\"\" > /tmp/chef.conf", expectnooutput=True)        
            ssh.run("echo '{ \"run_list\": [ %s ], \"scratch_dir\": \"%s\", \"domain_id\": \"%s\", \"node_id\": \"%s\"  }' > /tmp/chef.json" % (",".join("\"%s\"" % r for r in node.run_list), self.config.get("scratch-dir"), domain.id, node.id), expectnooutput=True)
            
            # Sometimes, Chef will fail because a service didn't start or restart
            # properly (NFS-related services seem to do this occasionally).
            # In most cases, the problem just "goes away" if you try to restart the
            # service again. So, if Chef fails, we don't give up and try again
            # (since the recipes are idempotent, there's no harm to running them
            # multiple times)
            chef_tries = 3
            while chef_tries > 0:
                rc = ssh.run("sudo -i chef-solo -c /tmp/chef.conf -j /tmp/chef.json", exception_on_error = False)    
                if rc != 0:
                    chef_tries -= 1
                    log.debug("chef-solo failed. %i attempts left" % chef_tries, node)
                else:
                    break
                    
            if chef_tries == 0:
                raise DeploymentException, "Failed to configure node %s" % node.id
                    
            self.check_continue() 

        for cmd in self.deployer.run_cmds:
            rc = ssh.run(cmd, exception_on_error = False)
            if rc != 0:
                log.warning("Extra command failed with status %i: %s" % (rc, cmd), node)
                
        log.info("Configuration done.", node)
Exemplo n.º 5
0
    def instance_update(self, inst_id, topology_json, extra_files, run_cmds):
        try:
            (success, message, inst) = self.__get_instance(inst_id)
            
            if not success:
                return (API.STATUS_FAIL, message)
    
            log.set_logging_instance(inst)
                            
            if inst.topology.state == Topology.STATE_NEW:
                # If the topology is still in a New state, we simply
                # validate that the update is valid, and replace
                # the old topology. We don't need to deploy or
                # configure any hosts..
                if topology_json != None:
                    (success, message, topology_changes) = inst.update_topology(topology_json)
                    if not success:
                        message = "Error in topology file: %s" % message
                        return (API.STATUS_FAIL, message)
                        
                return (API.STATUS_SUCCESS, "Success")
            elif inst.topology.state not in (Topology.STATE_RUNNING, Topology.STATE_FAILED):
                message = "Cannot update the topology of an instance that is in state '%s'" % (Topology.state_str[inst.topology.state])
                return (API.STATUS_FAIL, message)        
    
            deployer_class = self.__get_deployer_class(inst)
            deployer = deployer_class(extra_files, run_cmds)
            
            try:
                deployer.set_instance(inst)
            except DeploymentException, de:
                message = "Deployer failed to initialize. %s " % de
                return (API.STATUS_FAIL, message)    
                
            if topology_json != None:
                old_topology = inst.topology
                try:
                    (success, message, topology_changes) = inst.update_topology(topology_json)
                    if not success:
                        return (API.STATUS_FAIL, message)
                except ObjectValidationException, ove:
                    message = "Error in topology file: %s" % ove
                    return (API.STATUS_FAIL, message)
                
                create_hosts = []
                destroy_hosts = []
                
                create_endpoints = []
                remove_endpoints = []
    
                if topology_changes.changes.has_key("domains"):
                    for domain in topology_changes.changes["domains"].add:
                        d = inst.topology.domains[domain]
                        create_hosts += [n.id for n in d.nodes.values()] 
        
                    for domain in topology_changes.changes["domains"].remove:
                        d = inst.topology.domains[domain].keys()
                        destroy_hosts += [n.id for n in d.nodes.values()] 
                    
                    for domain in topology_changes.changes["domains"].edit:
                        if topology_changes.changes["domains"].edit[domain].changes.has_key("nodes"):
                            nodes_changes = topology_changes.changes["domains"].edit[domain].changes["nodes"]
                            create_hosts += nodes_changes.add
                            destroy_hosts += nodes_changes.remove
                            
                        if topology_changes.changes["domains"].edit[domain].changes.has_key("go_endpoints"):
                            ep_changes = topology_changes.changes["domains"].edit[domain].changes["go_endpoints"]
                            if ep_changes.change_type == PropertyChange.ADD:
                                create_endpoints += inst.topology.domains[domain].go_endpoints
                            elif ep_changes.change_type == PropertyChange.REMOVE:
                                remove_endpoints += old_topology.domains[domain].go_endpoints            
                            elif ep_changes.change_type == PropertyChange.EDIT:
                                create_endpoints += ep_changes.add
                                remove_endpoints += ep_changes.remove                            
    
                nodes = inst.topology.get_nodes()
    
                if len(destroy_hosts) > 0:
                    old_nodes = old_topology.get_nodes()
                    log.info("Terminating hosts %s" % destroy_hosts)   
                    old_nodes = [n for n in old_nodes if n.id in destroy_hosts]
                    (success, message) = self.__terminate_vms(deployer, old_nodes)
                    
                    if not success:
                        inst.topology.state = Topology.STATE_FAILED
                        inst.topology.save()
                        return (API.STATUS_FAIL, message)       
                    
                    inst.topology.save()         

                if len(create_endpoints) > 0:
                    try:      
                        self.__globusonline_pre_start(inst, create_endpoints)
                    except GlobusOnlineException, goe:
                        log.warning("Unable to create GO endpoint/s: %s" % goe)                      
Exemplo n.º 6
0
            (success, message) = self.__configure_vms(deployer, node_vm)
            if not success:
                inst.topology.state = Topology.STATE_FAILED
                inst.topology.save()
                return (API.STATUS_FAIL, message)
    
            inst.topology.state = Topology.STATE_RUNNING
            inst.topology.save()         
            
            log.info("Creating Globus Online endpoints")
            eps = inst.topology.get_go_endpoints()        
            if not resuming:
                try:
                    self.__globusonline_post_start(inst, eps)
                except GlobusOnlineException, goe:
                    log.warning("Unable to create GO endpoint/s: %s" % goe)
            else:        
                try:
                    self.__globusonline_resume(inst, eps)
                except GlobusOnlineException, goe:
                    log.warning("Unable to resume GO endpoint/s: %s" % goe)            

            inst.topology.save()       
            
            return (API.STATUS_SUCCESS, "Success")
        except:
            message = self.__unexpected_exception_to_text(what = "starting the instance.")
            try:
                if inst != None:
                    inst.topology.state = Topology.STATE_FAILED
                    inst.topology.save()