Пример #1
0
    def resume_vm(self, node):
        ec2_instance_id = node.deploy_data.ec2.instance_id

        log.info(" |- Resuming instance %s for %s." % (ec2_instance_id, node.id))        
        started = self.conn.start_instances([ec2_instance_id])            
        log.info(" |- Resumed instance %s." % ",".join([i.id for i in started]))
        
        return EC2VM(started[0])
Пример #2
0
 def wait(self):
     if self.state in (Node.STATE_RUNNING_UNCONFIGURED, Node.STATE_RESUMED_UNCONFIGURED):
         self.deployer.wait_state(self.ec2_instance, "running")
         log.info("Instance %s is running. Hostname: %s" % (self.ec2_instance.id, self.ec2_instance.public_dns_name))
     elif self.state == Node.STATE_STOPPED:
         self.deployer.wait_state(self.ec2_instance, "stopped")
     elif self.state == Node.STATE_TERMINATED:
         self.deployer.wait_state(self.ec2_instance, "terminated")
Пример #3
0
     def run2(self):
         topology = self.deployer.instance.topology
         
         self.node.state = Node.STATE_CONFIGURING
         topology.save()
 
         self.node.state = Node.STATE_RUNNING
         topology.save()            
         log.info("Dummy configure done")
Пример #4
0
    def configure_stop(self, ssh):      
        node = self.node
  
        log.info("Configuring node for shutdown", node)
        ssh.run("sudo cp /etc/hosts.gp-bak /etc/hosts", expectnooutput=True)
        ssh.run("sudo cp /etc/hostname.gp-bak /etc/hostname", expectnooutput=True)
        ssh.run("sudo /etc/init.d/hostname.sh || sudo /etc/init.d/hostname restart", expectnooutput=True)
        ssh.run("sudo bash -c \"echo +auto.master > /etc/auto.master\"", exception_on_error = False)
        ssh.run("sudo bash -c \"echo > /etc/yp.conf\"", exception_on_error = False)
        ssh.run("sudo bash -c \"echo > /etc/default/nfs-common\"", exception_on_error = False)

        ssh.run("sudo update-rc.d -f nis remove", exception_on_error = False)
        log.info("Configuration done.", node)
Пример #5
0
    def __allocate_vms(self, deployer, nodes, resuming):
        # TODO: Make this an option
        sequential = False
        topology = deployer.instance.topology
        
        if not resuming:
            log.info("Allocating %i VMs." % len(nodes))
            next_state = Node.STATE_RUNNING_UNCONFIGURED
        else:
            log.info("Resuming %i VMs" % len(nodes))
            next_state = Node.STATE_RESUMED_UNCONFIGURED
        node_vm = {}
        for n in nodes:
            try:
                if not resuming:
                    n.set_property("state", Node.STATE_STARTING)
                    topology.save()
                    vm = deployer.allocate_vm(n)
                else:
                    n.set_property("state", Node.STATE_RESUMING)
                    topology.save()
                    vm = deployer.resume_vm(n)
                node_vm[n] = vm
            except Exception:
                message = self.__unexpected_exception_to_text()
                return (False, message, None)
        
            if sequential:
                log.debug("Waiting for instance to start.")
                wait = deployer.NodeWaitThread(None, "wait-%s" % str(vm), n, vm, deployer, state = next_state)
                wait.run2()
                
        if not sequential:        
            log.debug("Waiting for instances to start.")
            mt_instancewait = MultiThread()
            for node, vm in node_vm.items():
                mt_instancewait.add_thread(deployer.NodeWaitThread(mt_instancewait, "wait-%s" % str(vm), node, vm, deployer, state = next_state))

            mt_instancewait.run()
            if not mt_instancewait.all_success():
                message = self.__mt_exceptions_to_text(mt_instancewait.get_exceptions(), "Exception raised while waiting for instances.")
                return (False, message, None)
            
        return (True, "Success", node_vm)
Пример #6
0
    def instance_stop(self, inst_id):
        (success, message, inst) = self.__get_instance(inst_id)
        
        if not success:
            return (API.STATUS_FAIL, message)

        log.set_logging_instance(inst)
        
        try:
            if inst.topology.state != Topology.STATE_RUNNING:
                message = "Cannot start an instance that is in state '%s'" % (Topology.state_str[inst.topology.state])
                return (API.STATUS_FAIL, message)

            deployer_class = self.__get_deployer_class(inst)
            deployer = deployer_class()
            
            try:
                deployer.set_instance(inst)
            except DeploymentException, de:
                message = "Deployer failed to initialize. %s " % de
                return (API.STATUS_FAIL, message)       
        
            inst.topology.state = Topology.STATE_STOPPING
            inst.topology.save()
            
            nodes = inst.topology.get_nodes()            
    
            (success, message) = self.__stop_vms(deployer, nodes)
            
            if not success:
                inst.topology.state = Topology.STATE_FAILED
                inst.topology.save()
                return (API.STATUS_FAIL, message)            
        
            inst.topology.state = Topology.STATE_STOPPED
            inst.topology.save()
              
            log.info("Stopping Globus Online endpoints")
            try:
                eps = inst.topology.get_go_endpoints()        
                self.__globusonline_stop(inst, eps)     
                inst.topology.save()       
            except GlobusOnlineException, goe:
                log.warning("Unable to stop GO endpoint/s: %s" % goe)              
Пример #7
0
    def instance_terminate(self, inst_id):
        (success, message, inst) = self.__get_instance(inst_id)
        
        if not success:
            return (API.STATUS_FAIL, message)

        log.set_logging_instance(inst)
        
        try:
            if inst.topology.state in [Topology.STATE_NEW]:
                message = "Cannot terminate an instance that is in state '%s'" % (Topology.state_str[inst.topology.state])
                return (API.STATUS_FAIL, message)

            deployer_class = self.__get_deployer_class(inst)
            deployer = deployer_class()
            
            try:
                deployer.set_instance(inst)
            except DeploymentException, de:
                message = "Deployer failed to initialize. %s " % de
                return (API.STATUS_FAIL, message)       
        
            inst.topology.state = Topology.STATE_TERMINATING
            inst.topology.save()
            
            nodes = inst.topology.get_nodes()            
    
            (success, message) = self.__terminate_vms(deployer, nodes)
            
            if not success:
                inst.topology.state = Topology.STATE_FAILED
                inst.topology.save()
                return (API.STATUS_FAIL, message)            
        
            # Remove GO endpoints
            eps = inst.topology.get_go_endpoints()        
            self.__globusonline_remove(inst, eps)
        
            inst.topology.state = Topology.STATE_TERMINATED
            inst.topology.save()
              
            log.info("Instances have been terminated.")
            return (API.STATUS_SUCCESS, "Success")
Пример #8
0
        def pre_configure(self, ssh):
            node = self.node
            instance = self.ec2_instance
            
            log.info("Setting up instance %s. Hostname: %s" % (instance.id, instance.public_dns_name), node)
           
            try:
                ssh.run("ls -l /chef")
            except SSHCommandFailureException:
                #The image is not properly setup, so do all pre-configuration for globus-provision
                log.info("Image is not configured with Chef, so installing...")

                ssh.run("sudo chown -R %s /chef" % self.config.get("ec2-username"))
                ssh.scp_dir("%s" % self.chef_dir, "/chef")



                ssh.run("addgroup admin", exception_on_error = False)
                ssh.run("echo \"%s `hostname`\" | sudo tee -a /etc/hosts" % instance.private_ip_address)

                ssh.run("sudo apt-get install lsb-release wget")
                ssh.run("echo \"deb http://apt.opscode.com/ `lsb_release -cs` main\" | sudo tee /etc/apt/sources.list.d/opscode.list")
                ssh.run("wget -qO - http://apt.opscode.com/[email protected] | sudo apt-key add -")
                ssh.run("sudo apt-get update")
                ssh.run("echo 'chef chef/chef_server_url string http://127.0.0.1:4000' | sudo debconf-set-selections")
                ssh.run("sudo apt-get -q=2 install chef")
        
                ssh.run("echo -e \"cookbook_path \\\"/chef/cookbooks\\\"\\nrole_path \\\"/chef/roles\\\"\" > /tmp/chef.conf")        
                ssh.run("echo '{ \"run_list\": \"recipe[provision::ec2]\", \"scratch_dir\": \"%s\" }' > /tmp/chef.json" % self.scratch_dir)

                ssh.run("sudo chef-solo -c /tmp/chef.conf -j /tmp/chef.json")    
        
                ssh.run("sudo update-rc.d -f nis remove")
                ssh.run("sudo update-rc.d -f condor remove")
                ssh.run("sudo update-rc.d -f chef-client remove")
       

                log.debug("Removing private data...")
         
                ssh.run("sudo find /root/.*history /home/*/.*history -exec rm -f {} \;", exception_on_error = False)
Пример #9
0
    def instance_stop(self, inst_id):
        (success, message, inst) = self.__get_instance(inst_id)
        
        if not success:
            return (API.STATUS_FAIL, message)

        log.set_logging_instance(inst_id)
        
        try:
            if inst.topology.state != Topology.STATE_RUNNING:
                message = "Cannot start an instance that is in state '%s'" % (Topology.state_str[inst.topology.state])
                return (API.STATUS_FAIL, message)

            deployer_class = self.__get_deployer_class(inst)
            deployer = deployer_class()
            
            try:
                deployer.set_instance(inst)
            except DeploymentException, de:
                message = "Deployer failed to initialize. %s " % de
                return (API.STATUS_FAIL, message)       
        
            inst.topology.state = Topology.STATE_STOPPING
            inst.topology.save()
            
            nodes = inst.topology.get_nodes()            
    
            (success, message) = self.__stop_vms(deployer, nodes)
            
            if not success:
                inst.topology.state = Topology.STATE_FAILED
                inst.topology.save()
                return (API.STATUS_FAIL, message)            
        
            inst.topology.state = Topology.STATE_STOPPED
            inst.topology.save()
              
            log.info("Instances have been stopped running.")
            return (API.STATUS_SUCCESS, "Success")
Пример #10
0
 def allocate_vm(self, node):
     log.info("Allocated dummy VM.")     
     return DummyVM()
Пример #11
0
 def stop_vms(self, nodes):
     ec2_instance_ids = [n.deploy_data.ec2.instance_id for n in nodes]
     log.info("Stopping EC2 instances %s." % ", ".join(ec2_instance_ids))
     stopped = self.conn.stop_instances(ec2_instance_ids)
     log.info("Stopped EC2 instances %s." % ", ".join([i.id for i in stopped]))
Пример #12
0
 def terminate_vms(self, nodes):
     ec2_instance_ids = [n.deploy_data.ec2.instance_id for n in nodes]
     log.info("Terminating EC2 instances %s." % ", ".join(ec2_instance_ids))
     terminated = self.conn.terminate_instances(ec2_instance_ids)
     log.info("Terminated EC2 instances %s." % ", ".join([i.id for i in terminated]))
Пример #13
0
    def instance_update(self, inst_id, topology_json, extra_files, run_cmds):
        try:
            (success, message, inst) = self.__get_instance(inst_id)
            
            if not success:
                return (API.STATUS_FAIL, message)
    
            log.set_logging_instance(inst)
                            
            if inst.topology.state == Topology.STATE_NEW:
                # If the topology is still in a New state, we simply
                # validate that the update is valid, and replace
                # the old topology. We don't need to deploy or
                # configure any hosts..
                if topology_json != None:
                    (success, message, topology_changes) = inst.update_topology(topology_json)
                    if not success:
                        message = "Error in topology file: %s" % message
                        return (API.STATUS_FAIL, message)
                        
                return (API.STATUS_SUCCESS, "Success")
            elif inst.topology.state not in (Topology.STATE_RUNNING, Topology.STATE_FAILED):
                message = "Cannot update the topology of an instance that is in state '%s'" % (Topology.state_str[inst.topology.state])
                return (API.STATUS_FAIL, message)        
    
            deployer_class = self.__get_deployer_class(inst)
            deployer = deployer_class(extra_files, run_cmds)
            
            try:
                deployer.set_instance(inst)
            except DeploymentException, de:
                message = "Deployer failed to initialize. %s " % de
                return (API.STATUS_FAIL, message)    
                
            if topology_json != None:
                old_topology = inst.topology
                try:
                    (success, message, topology_changes) = inst.update_topology(topology_json)
                    if not success:
                        return (API.STATUS_FAIL, message)
                except ObjectValidationException, ove:
                    message = "Error in topology file: %s" % ove
                    return (API.STATUS_FAIL, message)
                
                create_hosts = []
                destroy_hosts = []
                
                create_endpoints = []
                remove_endpoints = []
    
                if topology_changes.changes.has_key("domains"):
                    for domain in topology_changes.changes["domains"].add:
                        d = inst.topology.domains[domain]
                        create_hosts += [n.id for n in d.nodes.values()] 
        
                    for domain in topology_changes.changes["domains"].remove:
                        d = inst.topology.domains[domain].keys()
                        destroy_hosts += [n.id for n in d.nodes.values()] 
                    
                    for domain in topology_changes.changes["domains"].edit:
                        if topology_changes.changes["domains"].edit[domain].changes.has_key("nodes"):
                            nodes_changes = topology_changes.changes["domains"].edit[domain].changes["nodes"]
                            create_hosts += nodes_changes.add
                            destroy_hosts += nodes_changes.remove
                            
                        if topology_changes.changes["domains"].edit[domain].changes.has_key("go_endpoints"):
                            ep_changes = topology_changes.changes["domains"].edit[domain].changes["go_endpoints"]
                            if ep_changes.change_type == PropertyChange.ADD:
                                create_endpoints += inst.topology.domains[domain].go_endpoints
                            elif ep_changes.change_type == PropertyChange.REMOVE:
                                remove_endpoints += old_topology.domains[domain].go_endpoints            
                            elif ep_changes.change_type == PropertyChange.EDIT:
                                create_endpoints += ep_changes.add
                                remove_endpoints += ep_changes.remove                            
    
                nodes = inst.topology.get_nodes()
    
                if len(destroy_hosts) > 0:
                    old_nodes = old_topology.get_nodes()
                    log.info("Terminating hosts %s" % destroy_hosts)   
                    old_nodes = [n for n in old_nodes if n.id in destroy_hosts]
                    (success, message) = self.__terminate_vms(deployer, old_nodes)
                    
                    if not success:
                        inst.topology.state = Topology.STATE_FAILED
                        inst.topology.save()
                        return (API.STATUS_FAIL, message)       
                    
                    inst.topology.save()         

                if len(create_endpoints) > 0:
                    try:      
                        self.__globusonline_pre_start(inst, create_endpoints)
                    except GlobusOnlineException, goe:
                        log.warning("Unable to create GO endpoint/s: %s" % goe)                      
Пример #14
0
                map[device_name] = device
                user_data_mounts += """- [ ephemeral%i, /ephemeral/%i, auto, "defaults,noexec" ]\n""" % (i, i)

        # The following will only work with Ubuntu AMIs (including the AMI we provide)
        # If using a different AMI, you may need to manually mount the ephemeral partitions.
        user_data = """#cloud-config
manage_etc_hosts: true        
""" + user_data_mounts

        if instance_type in ("cc1.4xlarge", "cg1.4xlarge"):
            pg = self.__get_placement_group()
            placement_group = pg.name
        else:
            placement_group = None
        
        log.info(" |- Launching a %s instance for %s." % (instance_type, node.id))
        reservation = image.run(min_count=1, 
                                max_count=1,
                                instance_type=instance_type,
                                security_groups=security_groups,
                                key_name=self.instance.config.get("ec2-keypair"),
                                user_data=user_data,
                                block_device_map=map,
                                placement_group = placement_group,
                                placement = None)
        instance = reservation.instances[0]
        
        return EC2VM(instance)

    def resume_vm(self, node):
        ec2_instance_id = node.deploy_data.ec2.instance_id
Пример #15
0
               
            inst.topology.save()                            
            
            nodes = inst.topology.get_nodes()
    
            (success, message, node_vm) = self.__allocate_vms(deployer, nodes, resuming)
            
            if not success:
                inst.topology.state = Topology.STATE_FAILED
                inst.topology.save()
                return (API.STATUS_FAIL, message)
    
            inst.topology.state = Topology.STATE_CONFIGURING
            inst.topology.save()
                          
            log.info("Instances are running.")
    
            for node, vm in node_vm.items():
                deployer.post_allocate(node, vm)

            inst.topology.save()
                            
            # Generate certificates
            if not resuming:
                inst.gen_certificates(force_hosts=False, force_users=False)
            else:
                inst.gen_certificates(force_hosts=True, force_users=False)    
            
            inst.topology.gen_chef_ruby_file(inst.instance_dir + "/topology.rb")
            inst.topology.gen_hosts_file(inst.instance_dir + "/hosts")               
    
Пример #16
0
    def instance_start(self, inst_id, extra_files, run_cmds):
        
        (success, message, inst) = self.__get_instance(inst_id)
        
        if not success:
            return (API.STATUS_FAIL, message)

        log.set_logging_instance(inst_id)
            
        try:
            deployer_class = self.__get_deployer_class(inst)
            deployer = deployer_class(extra_files, run_cmds)
            
            try:
                deployer.set_instance(inst)
            except DeploymentException, de:
                message = "Deployer failed to initialize. %s " % de
                return (API.STATUS_FAIL, message)               
            
            if inst.topology.state == Topology.STATE_NEW:
                resuming = False
            elif inst.topology.state == Topology.STATE_STOPPED:
                resuming = True
            else:
                message = "Cannot start an instance that is in state '%s'" % (Topology.state_str[inst.topology.state])
                return (API.STATUS_FAIL, message)
            
            if not resuming:
                inst.topology.state = Topology.STATE_STARTING
            else:
                inst.topology.state = Topology.STATE_RESUMING
    
            inst.topology.save()
            
            nodes = inst.topology.get_nodes()
    
            (success, message, node_vm) = self.__allocate_vms(deployer, nodes, resuming)
            
            if not success:
                inst.topology.state = Topology.STATE_FAILED
                inst.topology.save()
                return (API.STATUS_FAIL, message)
    
            inst.topology.state = Topology.STATE_CONFIGURING
            inst.topology.save()
                          
            log.info("Instances are running.")
    
            for node, vm in node_vm.items():
                deployer.post_allocate(node, vm)

            inst.topology.save()
    
            # Generate certificates
            if not resuming:
                inst.gen_certificates(force_hosts=False, force_users=False)
            else:
                inst.gen_certificates(force_hosts=True, force_users=False)    
            
            inst.topology.gen_chef_ruby_file(inst.instance_dir + "/topology.rb")
            inst.topology.gen_hosts_file(inst.instance_dir + "/hosts")               
    
            log.info("Setting up Globus Provision on instances")        
            
            (success, message) = self.__configure_vms(deployer, node_vm)
            if not success:
                inst.topology.state = Topology.STATE_FAILED
                inst.topology.save()
                return (API.STATUS_FAIL, message)
    
            inst.topology.state = Topology.STATE_RUNNING
            inst.topology.save()
            
            return (API.STATUS_SUCCESS, "Success")
Пример #17
0
 def instance_update(self, inst_id, topology_json, extra_files, run_cmds):
     try:
         (success, message, inst) = self.__get_instance(inst_id)
         
         if not success:
             return (API.STATUS_FAIL, message)
 
         log.set_logging_instance(inst_id)
                         
         if inst.topology.state != Topology.STATE_RUNNING:
             message = "Cannot update the topology of an instance that is in state '%s'" % (Topology.state_str[inst.topology.state])
             return (API.STATUS_FAIL, message)        
 
         deployer_class = self.__get_deployer_class(inst)
         deployer = deployer_class(extra_files, run_cmds)
         
         try:
             deployer.set_instance(inst)
         except DeploymentException, de:
             message = "Deployer failed to initialize. %s " % de
             return (API.STATUS_FAIL, message)    
             
         if topology_json != None:
             old_topology = inst.topology
             try:
                 (success, message, create_hosts, destroy_hosts) = inst.update_topology(topology_json)
             except ObjectValidationException, ove:
                 message = "Error in topology file: %s" % ove
                 return (API.STATUS_FAIL, message)   
 
             nodes = inst.topology.get_nodes()
 
             if len(destroy_hosts) > 0:
                 old_nodes = old_topology.get_nodes()
                 log.info("Terminating hosts %s" % destroy_hosts)   
                 old_nodes = [n for n in old_nodes if n.id in destroy_hosts]
                 (success, message) = self.__terminate_vms(deployer, old_nodes)
                 
                 if not success:
                     inst.topology.state = Topology.STATE_FAILED
                     inst.topology.save()
                     return (API.STATUS_FAIL, message)       
                 
                 inst.topology.save()         
                   
             if len(create_hosts) > 0:
                 nodes = inst.topology.get_nodes()
                 log.info("Allocating VMs for hosts %s" % create_hosts)   
                 new_nodes = [n for n in nodes if n.id in create_hosts]
                 (success, message, node_vm) = self.__allocate_vms(deployer, new_nodes, resuming = False)
         
                 if not success:
                     inst.topology.state = Topology.STATE_FAILED
                     inst.topology.save()
                     return (API.STATUS_FAIL, message)
             
                 inst.topology.save()
                 
                 for node, vm in node_vm.items():
                     deployer.post_allocate(node, vm)
 
                 inst.topology.save()
 
             # Generate certificates
             inst.gen_certificates()
 
             inst.topology.gen_chef_ruby_file(inst.instance_dir + "/topology.rb")
             inst.topology.gen_hosts_file(inst.instance_dir + "/hosts")               
Пример #18
0
                        return (API.STATUS_FAIL, message)
                
                    inst.topology.save()
                    
                    for node, vm in node_vm.items():
                        deployer.post_allocate(node, vm)
    
                    inst.topology.save()
    
                # Generate certificates
                inst.gen_certificates()
    
                inst.topology.gen_chef_ruby_file(inst.instance_dir + "/topology.rb")
                inst.topology.gen_hosts_file(inst.instance_dir + "/hosts")               

            log.info("Setting up Globus Provision on instances")        
            
            # Right now we reconfigure all nodes. It shouldn't be hard to follow
            # the dependency tree to make sure only the new nodes and "ancestor"
            # nodes are updated
            nodes = inst.topology.get_nodes()
            node_vm = deployer.get_node_vm(nodes)
            (success, message) = self.__configure_vms(deployer, node_vm)
            if not success:
                inst.topology.state = Topology.STATE_FAILED
                inst.topology.save()
                return (API.STATUS_FAIL, message)
    
            inst.topology.state = Topology.STATE_RUNNING
            inst.topology.save()
            
Пример #19
0
 def resume_vm(self, node):
     log.info("Resumed dummy VM.")     
     return DummyVM()
Пример #20
0
 def terminate_vms(self, nodes):
     log.info("Dummy nodes terminated.")
Пример #21
0
 def wait(self):
     log.info("Waiting for state %s" % Node.state_str[self.state])
Пример #22
0
    def configure(self, ssh):
        domain = self.domain
        node = self.node
        instance_dir = self.deployer.instance.instance_dir

        if self.basic:
            # Upload host file and update hostname
            log.debug("Uploading host file and updating hostname", node)
            ssh.scp("%s/hosts" % instance_dir,
                    "/chef/cookbooks/provision/files/default/hosts")
            ssh.run(
                "sudo cp /chef/cookbooks/provision/files/default/hosts /etc/hosts",
                expectnooutput=True)

            ssh.run(
                "sudo bash -c \"echo %s > /etc/hostname\"" % node.hostname,
                expectnooutput=True)
            ssh.run(
                "sudo /etc/init.d/hostname.sh || sudo /etc/init.d/hostname restart",
                expectnooutput=True)

        self.check_continue()

        if self.chef:
            # Upload topology file
            log.debug("Uploading topology file", node)
            ssh.scp("%s/topology.rb" % instance_dir,
                    "/chef/cookbooks/provision/attributes/topology.rb")

            # Copy certificates
            log.debug("Copying certificates", node)
            ssh.scp_dir("%s/certs" % instance_dir,
                        "/chef/cookbooks/provision/files/default/")

            # Upload extra files
            log.debug("Copying extra files", node)
            for src, dst in self.deployer.extra_files:
                ssh.scp(src, dst)

            self.check_continue()

            #temporarily add admin group
            log.debug("Create new admin group")
            try:
                ssh.run("addgroup admin")
            except SSHCommandFailureException:
                log.debug("Admin group already exists, skipping..")

            # Run chef
            log.debug("Running chef", node)
            ssh.run(
                "echo -e \"cookbook_path \\\"/chef/cookbooks\\\"\\nrole_path \\\"/chef/roles\\\"\" > /tmp/chef.conf",
                expectnooutput=True)
            ssh.run(
                "echo '{ \"run_list\": [ %s ], \"scratch_dir\": \"%s\", \"domain_id\": \"%s\", \"node_id\": \"%s\"  }' > /tmp/chef.json"
                % (",".join("\"%s\"" % r for r in node.run_list),
                   self.config.get("scratch-dir"), domain.id, node.id),
                expectnooutput=True)

            # Sometimes, Chef will fail because a service didn't start or restart
            # properly (NFS-related services seem to do this occasionally).
            # In most cases, the problem just "goes away" if you try to restart the
            # service again. So, if Chef fails, we don't give up and try again
            # (since the recipes are idempotent, there's no harm to running them
            # multiple times)
            chef_tries = 3
            while chef_tries > 0:
                rc = ssh.run(
                    "sudo -i chef-solo -c /tmp/chef.conf -j /tmp/chef.json",
                    exception_on_error=False)
                if rc != 0:
                    chef_tries -= 1
                    log.debug("chef-solo failed. %i attempts left", node)
                else:
                    break

            if chef_tries == 0:
                raise DeploymentException, "Failed to configure node %s" % node.id

            self.check_continue()

        if self.basic:
            ssh.run("sudo update-rc.d nis defaults")

        for cmd in self.deployer.run_cmds:
            ssh.run(cmd)

        log.info("Configuration done.", node)
Пример #23
0
 def pre_configure(self, ssh):
     node = self.node
     instance = self.ec2_instance
     
     log.info("Setting up instance %s. Hostname: %s" % (instance.id, instance.public_dns_name), node)