Пример #1
0
    def prepare_machines(self):
        logging.info(
            "prepare_machines: inside method with machine_info : \n%s",
            pprint.pformat(self.machines))

        queue_head = self.__get_queue_head_machine_info()

        # push queue head to be the first node to be prepared
        self.machines.remove(queue_head)
        self.machines.insert(0, queue_head)

        logging.info("queue head = \n{0}".format(pprint.pformat(queue_head)))

        try:
            logging.info("Preparing environment on remote machines...")
            for machine in self.machines:
                logging.info(
                    "For machine {ip}".format(ip=machine['public_ip']))

                success = self.__copy_prepare_script_to_vm(machine)

                if success != 0:
                    raise Exception(
                        "Remote copy command failed on {ip}!".format(
                            ip=machine['public_ip']))

                success = self.__run_prepare_script_on_vm(machine)

                if success != 0:
                    raise Exception("Remote command failed on {ip}!".format(
                        ip=machine['public_ip']))

            helper.update_celery_config_with_queue_head_ip(
                queue_head_ip=queue_head["public_ip"],
                agent_type=self.AGENT_TYPE)
            logging.info(
                "Updated celery config with queue head ip: {0}".format(
                    queue_head["public_ip"]))

            self.__configure_celery(queue_head)

            return True

        except Exception, e:
            traceback.print_exc()
            logging.error(
                "prepare_machines : exiting method with error : {0}".format(
                    str(e)))
            return False
Пример #2
0
    def prepare_machines(self):
        logging.info("prepare_machines: inside method with machine_info : \n%s", pprint.pformat(self.machines))

        queue_head = self.__get_queue_head_machine_info()

        # push queue head to be the first node to be prepared
        self.machines.remove(queue_head)
        self.machines.insert(0, queue_head)

        logging.info("queue head = \n{0}".format(pprint.pformat(queue_head)))

        try:
            logging.info("Preparing environment on remote machines...")
            for machine in self.machines:
                logging.info("For machine {ip}".format(ip=machine['public_ip']))

                success = self.__copy_prepare_script_to_vm(machine)

                if success != 0:
                    raise Exception("Remote copy command failed on {ip}!".format(ip=machine['public_ip']))

                success = self.__run_prepare_script_on_vm(machine)

                if success != 0:
                    raise Exception("Remote command failed on {ip}!".format(ip=machine['public_ip']))

            helper.update_celery_config_with_queue_head_ip(queue_head_ip=queue_head["public_ip"],
                                                           agent_type=self.AGENT_TYPE)
            logging.info("Updated celery config with queue head ip: {0}".format(queue_head["public_ip"]))

            self.__configure_celery(queue_head)

            return True

        except Exception, e:
            traceback.print_exc()
            logging.error("prepare_machines : exiting method with error : {0}".format(str(e)))
            return False
    def prepare_vms(self, parameters):
        """
        Public method for preparing a set of VMs

        Args:
        parameters      A dictionary of parameters
        """
        logging.debug("prepare_vms(): nparameters = {0}".format(parameters))

        if not parameters["vms"] and 'head_node' not in parameters:
            logging.error(
                "No vms are waiting to be prepared or head_node is not specified!"
            )
            return

        try:
            # ##################################################
            # step 1: run instance based on queue head or not #
            # ##################################################

            num_vms, parameters = self.__prepare_queue_head(parameters)
            if num_vms == None and parameters == None:
                return

            # ########################################################################
            # step 2: poll the status of instances, if not running, terminate them  #
            #########################################################################
            public_ips, private_ips, instance_ids = self.__poll_instances_status(
                num_vms, parameters)
            if public_ips == None:
                if not self.__is_queue_head_running(parameters):
                    # if last time of spawning queue head failed, spawn another queue head again
                    self.prepare_vms(parameters)
                else:
                    return

            ############################################################
            # step 3: set alarm for the nodes, if it is NOT queue head #
            ############################################################
            # logging.info('Set shutdown alarm')
            #
            # try:
            #     if "queue_head" not in parameters or parameters["queue_head"] == False:
            #         for ins_id in instance_ids:
            #             agent.make_sleepy(parameters, ins_id)
            #     else:
            #         agent.make_sleepy(parameters, instance_ids[0], '7200')
            #
            # except:
            #     raise Exception('Errors in set alarm for instances.')

            ########################################################
            # step 4: verify whether nodes are connectable via ssh #
            ########################################################
            connected_public_ips, connected_instance_ids = self.__verify_ec2_instances_via_ssh(
                parameters=parameters,
                public_ips=public_ips,
                instance_ids=instance_ids)

            if len(connected_public_ips) == 0:
                if not self.__is_queue_head_running(parameters):
                    # if last time of spawning queue head failed, spawn another queue head again
                    self.prepare_vms(parameters)
                else:
                    return

            #########################################
            # step 5: configure celery on each node #
            #########################################
            if "queue_head" in parameters and parameters["queue_head"] == True:
                queue_head_ip = connected_public_ips[0]
                logging.info('queue_head_ip: {0}'.format(queue_head_ip))
                # celery configuration needs to be updated with the queue head ip
                helper.update_celery_config_with_queue_head_ip(
                    queue_head_ip=queue_head_ip, agent_type=self.agent_type)

            # copy celery configure to nodes.
            self.__configure_celery(parameters, connected_public_ips,
                                    connected_instance_ids)

            #####################################################################
            # step 6: if current node is queue head, may need to spawn the rest #
            #####################################################################

            if "queue_head" in parameters and parameters["queue_head"] == True:
                self.prepare_vms(parameters)
            else:
                # else all vms requested are finished spawning. Done!
                return

        except Exception as e:
            logging.exception(e)
    def prepare_vms(self, parameters):
        logging.debug('prepare_vms(): parameters={0}'.format(parameters))

        queue_head_machine = parameters[self.PARAM_FLEX_QUEUE_HEAD]

        user_data = self.__get_user_data(parameters['user_id'])

        if self.PARAM_FLEX_CLOUD_MACHINE_INFO not in parameters \
                or parameters[self.PARAM_FLEX_CLOUD_MACHINE_INFO] == None \
                or parameters[self.PARAM_FLEX_CLOUD_MACHINE_INFO] == []:

            logging.error('Error: No {0} param!'.format(
                self.PARAM_FLEX_CLOUD_MACHINE_INFO))
            # Report Error
            user_data.flex_cloud_status = False
            user_data.flex_cloud_info_msg = 'Invalid Parameters'
            user_data.put()
            return

        flex_cloud_machine_info = parameters[
            self.PARAM_FLEX_CLOUD_MACHINE_INFO]

        # Set the user message to "configuring..."
        user_data.flex_cloud_status = True
        user_data.flex_cloud_info_msg = 'Flex Cloud configured. Waiting for workers to become available...'
        user_data.put()

        # Initialize the VMstateModel db
        all_accessible = True
        for machine in flex_cloud_machine_info:
            if self.agent.check_network_ports(machine['ip'], [22, 443]):
                state = VMStateModel.STATE_ACCESSIBLE
            else:
                state = VMStateModel.STATE_INACCESSIBLE
                all_accessible = False
            vm_state = VMStateModel(state=state,
                                    infra=self.agent_type,
                                    ins_type=FlexConfig.INSTANCE_TYPE,
                                    pri_ip=machine['ip'],
                                    pub_ip=machine['ip'],
                                    username=machine['username'],
                                    keyfile=machine['keyfile'],
                                    ins_id=self.agent.get_flex_instance_id(
                                        machine['ip']),
                                    user_id=parameters['user_id'],
                                    res_id=self.reservation_id)
            vm_state.put()

        if not all_accessible:
            # Report Failure
            user_data.flex_cloud_status = False
            user_data.flex_cloud_info_msg = 'Error: not all workers are accessible'
            user_data.put()
            return

        if queue_head_machine == None or not helper.wait_for_ssh_connection(
                queue_head_machine['keyfile'],
                queue_head_machine['ip'],
                username=queue_head_machine['username']):
            logging.error(
                'Found no viable ssh-able/running queue head machine!')
            # Report Failure
            user_data.flex_cloud_status = False
            user_data.flex_cloud_info_msg = 'Error: Can not connect {0} (queue head) via SSH'.format(
                queue_head_machine['ip'])
            user_data.put()
            return

        if not self.__prepare_queue_head(queue_head_machine, parameters):
            logging.error('Error: could not prepare queue head!')
            # Report Failure
            user_data.flex_cloud_status = False
            user_data.flex_cloud_info_msg = 'Error preparing the queue head'
            user_data.put()
            return

        flex_cloud_workers = []
        for machine in parameters[self.PARAM_FLEX_CLOUD_MACHINE_INFO]:
            if machine[self.PARAM_IS_QUEUE_HEAD] != True:
                if helper.wait_for_ssh_connection(
                        machine['keyfile'],
                        machine['ip'],
                        username=machine['username']):
                    flex_cloud_workers.append(machine)
                else:
                    # Report Failure
                    user_data.flex_cloud_status = False
                    user_data.flex_cloud_info_msg = 'Error: Can not connect to {0} via SSH'.format(
                        machine['ip'])
                    user_data.put()
                    return

            if len(flex_cloud_workers) > 0:
                logging.debug(
                    'Preparing workers: {0}'.format(flex_cloud_workers))
                params = {
                    'infrastructure':
                    AgentTypes.FLEX,
                    self.PARAM_FLEX_CLOUD_MACHINE_INFO:
                    flex_cloud_workers,
                    'credentials':
                    parameters['credentials'],
                    'user_id':
                    parameters['user_id'],
                    self.PARAM_FLEX_QUEUE_HEAD:
                    parameters[self.PARAM_FLEX_QUEUE_HEAD],
                    'reservation_id':
                    parameters['reservation_id']
                }
                self.agent.prepare_instances(params)

        helper.update_celery_config_with_queue_head_ip(
            queue_head_ip=queue_head_machine['ip'], agent_type=self.agent_type)

        self.__configure_celery(params=parameters)

        # Report Success
        logging.debug('Flex Cloud Deployed')
        user_data.flex_cloud_status = True
        user_data.flex_cloud_info_msg = 'Flex Cloud Deployed'
        user_data.put()

        # Force the update of the instance status
        VMStateModel.synchronize(agent=self.agent, parameters=parameters)

        return
Пример #5
0
    def prepare_vms(self, parameters):
        """
        Public method for preparing a set of VMs

        Args:
        parameters      A dictionary of parameters
        """
        logging.debug("prepare_vms(): nparameters = {0}".format(parameters))

        if not parameters["vms"] and "head_node" not in parameters:
            logging.error("No vms are waiting to be prepared or head_node is not specified!")
            return

        try:
            # ##################################################
            # step 1: run instance based on queue head or not #
            # ##################################################

            num_vms, parameters = self.__prepare_queue_head(parameters)
            if num_vms == None and parameters == None:
                return

            # ########################################################################
            # step 2: poll the status of instances, if not running, terminate them  #
            #########################################################################
            public_ips, private_ips, instance_ids = self.__poll_instances_status(num_vms, parameters)
            if public_ips == None:
                if not self.__is_queue_head_running(parameters):
                    # if last time of spawning queue head failed, spawn another queue head again
                    self.prepare_vms(parameters)
                else:
                    return

            ############################################################
            # step 3: set alarm for the nodes, if it is NOT queue head #
            ############################################################
            # logging.info('Set shutdown alarm')
            #
            # try:
            #     if "queue_head" not in parameters or parameters["queue_head"] == False:
            #         for ins_id in instance_ids:
            #             agent.make_sleepy(parameters, ins_id)
            #     else:
            #         agent.make_sleepy(parameters, instance_ids[0], '7200')
            #
            # except:
            #     raise Exception('Errors in set alarm for instances.')

            ########################################################
            # step 4: verify whether nodes are connectable via ssh #
            ########################################################
            connected_public_ips, connected_instance_ids = self.__verify_ec2_instances_via_ssh(
                parameters=parameters, public_ips=public_ips, instance_ids=instance_ids
            )

            if len(connected_public_ips) == 0:
                if not self.__is_queue_head_running(parameters):
                    # if last time of spawning queue head failed, spawn another queue head again
                    self.prepare_vms(parameters)
                else:
                    return

            #########################################
            # step 5: configure celery on each node #
            #########################################
            if "queue_head" in parameters and parameters["queue_head"] == True:
                queue_head_ip = connected_public_ips[0]
                logging.info("queue_head_ip: {0}".format(queue_head_ip))
                # celery configuration needs to be updated with the queue head ip
                helper.update_celery_config_with_queue_head_ip(queue_head_ip=queue_head_ip, agent_type=self.agent_type)

            # copy celery configure to nodes.
            self.__configure_celery(parameters, connected_public_ips, connected_instance_ids)

            #####################################################################
            # step 6: if current node is queue head, may need to spawn the rest #
            #####################################################################

            if "queue_head" in parameters and parameters["queue_head"] == True:
                self.prepare_vms(parameters)
            else:
                # else all vms requested are finished spawning. Done!
                return

        except Exception as e:
            logging.exception(e)
Пример #6
0
    def prepare_vms(self, parameters):
        logging.debug("prepare_vms(): parameters={0}".format(parameters))

        queue_head_machine = parameters[self.PARAM_FLEX_QUEUE_HEAD]

        user_data = self.__get_user_data(parameters["user_id"])

        if (
            self.PARAM_FLEX_CLOUD_MACHINE_INFO not in parameters
            or parameters[self.PARAM_FLEX_CLOUD_MACHINE_INFO] == None
            or parameters[self.PARAM_FLEX_CLOUD_MACHINE_INFO] == []
        ):

            logging.error("Error: No {0} param!".format(self.PARAM_FLEX_CLOUD_MACHINE_INFO))
            # Report Error
            user_data.flex_cloud_status = False
            user_data.flex_cloud_info_msg = "Invalid Parameters"
            user_data.put()
            return

        flex_cloud_machine_info = parameters[self.PARAM_FLEX_CLOUD_MACHINE_INFO]

        # Set the user message to "configuring..."
        user_data.flex_cloud_status = True
        user_data.flex_cloud_info_msg = "Flex Cloud configured. Waiting for workers to become available..."
        user_data.put()

        # Initialize the VMstateModel db
        all_accessible = True
        for machine in flex_cloud_machine_info:
            if self.agent.check_network_ports(machine["ip"], [22, 443]):
                state = VMStateModel.STATE_ACCESSIBLE
            else:
                state = VMStateModel.STATE_INACCESSIBLE
                all_accessible = False
            vm_state = VMStateModel(
                state=state,
                infra=self.agent_type,
                ins_type=FlexConfig.INSTANCE_TYPE,
                pri_ip=machine["ip"],
                pub_ip=machine["ip"],
                username=machine["username"],
                keyfile=machine["keyfile"],
                ins_id=self.agent.get_flex_instance_id(machine["ip"]),
                user_id=parameters["user_id"],
                res_id=self.reservation_id,
            )
            vm_state.put()

        if not all_accessible:
            # Report Failure
            user_data.flex_cloud_status = False
            user_data.flex_cloud_info_msg = "Error: not all workers are accessible"
            user_data.put()
            return

        if queue_head_machine == None or not helper.wait_for_ssh_connection(
            queue_head_machine["keyfile"], queue_head_machine["ip"], username=queue_head_machine["username"]
        ):
            logging.error("Found no viable ssh-able/running queue head machine!")
            # Report Failure
            user_data.flex_cloud_status = False
            user_data.flex_cloud_info_msg = "Error: Can not connect {0} (queue head) via SSH".format(
                queue_head_machine["ip"]
            )
            user_data.put()
            return

        if not self.__prepare_queue_head(queue_head_machine, parameters):
            logging.error("Error: could not prepare queue head!")
            # Report Failure
            user_data.flex_cloud_status = False
            user_data.flex_cloud_info_msg = "Error preparing the queue head"
            user_data.put()
            return

        flex_cloud_workers = []
        for machine in parameters[self.PARAM_FLEX_CLOUD_MACHINE_INFO]:
            if machine[self.PARAM_IS_QUEUE_HEAD] != True:
                if helper.wait_for_ssh_connection(machine["keyfile"], machine["ip"], username=machine["username"]):
                    flex_cloud_workers.append(machine)
                else:
                    # Report Failure
                    user_data.flex_cloud_status = False
                    user_data.flex_cloud_info_msg = "Error: Can not connect to {0} via SSH".format(machine["ip"])
                    user_data.put()
                    return

            if len(flex_cloud_workers) > 0:
                logging.debug("Preparing workers: {0}".format(flex_cloud_workers))
                params = {
                    "infrastructure": AgentTypes.FLEX,
                    self.PARAM_FLEX_CLOUD_MACHINE_INFO: flex_cloud_workers,
                    "credentials": parameters["credentials"],
                    "user_id": parameters["user_id"],
                    self.PARAM_FLEX_QUEUE_HEAD: parameters[self.PARAM_FLEX_QUEUE_HEAD],
                    "reservation_id": parameters["reservation_id"],
                }
                self.agent.prepare_instances(params)

        helper.update_celery_config_with_queue_head_ip(
            queue_head_ip=queue_head_machine["ip"], agent_type=self.agent_type
        )

        self.__configure_celery(params=parameters)

        # Report Success
        logging.debug("Flex Cloud Deployed")
        user_data.flex_cloud_status = True
        user_data.flex_cloud_info_msg = "Flex Cloud Deployed"
        user_data.put()

        # Force the update of the instance status
        VMStateModel.synchronize(agent=self.agent, parameters=parameters)

        return