Exemplo n.º 1
0
    def _check_cluster_size(self, min_nodes):
        """Checks the size of the cluster to fit the needs of the user. It
        considers the minimum values for the node groups if present.
        Otherwise it will imply the user wants the amount of specified
        nodes at least.

        :param min_nodes: minimum number of nodes for each kind
        :type min_nodes: dict [node_kind] = number
        :raises: ClusterError in case the size does not fit the minimum
                 number specified by the user.
        """
        # check the total sizes before moving the nodes around
        minimum_nodes = 0
        for group, size in min_nodes.items():
            minimum_nodes = minimum_nodes + size

        if len(self.get_all_nodes()) < minimum_nodes:
            raise ClusterError("The cluster does not provide the minimum "
                               "amount of nodes specified in the "
                               "configuration. The nodes are still running, "
                               "but will not be setup yet. Please change the"
                               " minimum amount of nodes in the "
                               "configuration or try to start a new cluster "
                               "after checking the cloud provider settings.")

        # finding all node groups with an unsatisfied amount of nodes
        unsatisfied_groups = []
        for group, size in min_nodes.items():
            if len(self.nodes[group]) < size:
                unsatisfied_groups.append(group)

        # trying to move nodes around to fill the groups with missing nodes
        for ugroup in unsatisfied_groups[:]:
            missing = min_nodes[ugroup] - len(self.nodes[ugroup])
            for group, nodes in self.nodes.items():
                spare = len(self.nodes[group]) - min_nodes[group]
                while spare > 0 and missing > 0:
                    self.nodes[ugroup].append(self.nodes[group][-1])
                    del self.nodes[group][-1]
                    spare -= 1
                    missing -= 1

                    if missing == 0:
                        unsatisfied_groups.remove(ugroup)

        if unsatisfied_groups:
            raise ClusterError("Could not find an optimal solution to "
                               "distribute the started nodes into the node "
                               "groups to satisfy the minimum amount of "
                               "nodes. Please change the minimum amount of "
                               "nodes in the configuration or try to start a"
                               " new clouster after checking the cloud "
                               "provider settings")
Exemplo n.º 2
0
 def load(self, fp):
     data = yaml.safe_load(fp)
     if not data:
         raise ClusterError("Empty cluster state file: {0}".format(fp.name))
     cluster = Cluster(**data)
     cluster.repository = self
     return cluster
Exemplo n.º 3
0
    def start_instance(self, key_name, public_key_path, private_key_path,
                       security_group, flavor, image_id, image_userdata,
                       username=None):
        """
        Starts an instance in the cloud on the specified cloud
        provider (configuration option) and returns the id of the
        started instance.
        """
        connection = self._connect()

        log.debug("Checking keypair `%s`.", key_name)
        self._check_keypair(key_name, public_key_path, private_key_path)
        log.debug("Checking security group `%s`.", security_group)
        self._check_security_group(security_group)
        # image_id = self._find_image_id(image_id)

        try:
            reservation = connection.run_instances(
                image_id, key_name=key_name, security_groups=[security_group],
                instance_type=flavor, user_data=image_userdata)
        except Exception, ex:
            log.error("Error starting instance: %s", ex)
            if "TooManyInstances" in ex:
                raise ClusterError(ex)
            else:
                raise InstanceError(ex)
Exemplo n.º 4
0
    def start_instance(self,
                       key_name,
                       public_key_path,
                       private_key_path,
                       security_group,
                       flavor,
                       image_id,
                       image_userdata,
                       username=None,
                       **kwargs):
        """Starts a new instance on the cloud using the given properties.
        The following tasks are done to start an instance:

        * establish a connection to the cloud web service
        * check ssh keypair and upload it if it does not yet exist. This is
          a locked process, since this function might be called in multiple
          threads and we only want the key to be stored once.
        * check if the security group exists
        * run the instance with the given properties

        :param str key_name: name of the ssh key to connect
        :param str public_key_path: path to ssh public key
        :param str private_key_path: path to ssh private key
        :param str security_group: firewall rule definition to apply on the
                                   instance
        :param str flavor: machine type to use for the instance
        :param str image_id: image type (os) to use for the instance
        :param str image_userdata: command to execute after startup
        :param str username: username for the given ssh key, default None

        :return: str - instance id of the started instance
        """
        connection = self._connect()

        log.debug("Checking keypair `%s`.", key_name)
        # the `_check_keypair` method has to be called within a lock,
        # since it will upload the key if it does not exist and if this
        # happens for every node at the same time ec2 will throw an error
        # message (see issue #79)
        with BotoCloudProvider.__node_start_lock:
            self._check_keypair(key_name, public_key_path, private_key_path)

        log.debug("Checking security group `%s`.", security_group)
        self._check_security_group(security_group)
        # image_id = self._find_image_id(image_id)

        try:
            reservation = connection.run_instances(
                image_id,
                key_name=key_name,
                security_groups=[security_group],
                instance_type=flavor,
                user_data=image_userdata)
        except Exception, ex:
            log.error("Error starting instance: %s", ex)
            if "TooManyInstances" in ex:
                raise ClusterError(ex)
            else:
                raise InstanceError(ex)
Exemplo n.º 5
0
 def load(self, fp):
     data = yaml.load(fp)
     if not data:
         raise ClusterError("Empty yml file: {0}.".format(fp.name))
     from elasticluster import Cluster
     cluster = Cluster(**data)
     cluster.repository = self
     return cluster
Exemplo n.º 6
0
    def start_instance(self,
                       key_name,
                       public_key_path,
                       private_key_path,
                       security_group,
                       flavor,
                       image_id,
                       image_userdata,
                       username=None,
                       node_name=None,
                       network_ids=None,
                       price=None,
                       timeout=None,
                       **kwargs):
        """Starts a new instance on the cloud using the given properties.
        The following tasks are done to start an instance:

        * establish a connection to the cloud web service
        * check ssh keypair and upload it if it does not yet exist. This is
          a locked process, since this function might be called in multiple
          threads and we only want the key to be stored once.
        * check if the security group exists
        * run the instance with the given properties

        :param str key_name: name of the ssh key to connect
        :param str public_key_path: path to ssh public key
        :param str private_key_path: path to ssh private key
        :param str security_group: firewall rule definition to apply on the
                                   instance
        :param str flavor: machine type to use for the instance
        :param str image_id: image type (os) to use for the instance
        :param str image_userdata: command to execute after startup
        :param str username: username for the given ssh key, default None
        :param float price: Spot instance price (if 0, do not use spot instances).
        :param int price: Timeout (in seconds) waiting for spot instances;
                          only used if price > 0.

        :return: str - instance id of the started instance
        """
        connection = self._connect()

        log.debug("Checking keypair `%s`.", key_name)
        # the `_check_keypair` method has to be called within a lock,
        # since it will upload the key if it does not exist and if this
        # happens for every node at the same time ec2 will throw an error
        # message (see issue #79)
        with BotoCloudProvider.__node_start_lock:
            self._check_keypair(key_name, public_key_path, private_key_path)

        log.debug("Checking security group `%s`.", security_group)
        security_group_id = self._check_security_group(security_group)
        # image_id = self._find_image_id(image_id)

        if network_ids:
            interfaces = []
            for subnet in network_ids.split(','):
                subnet_id = self._check_subnet(subnet)

                interfaces.append(
                    boto.ec2.networkinterface.NetworkInterfaceSpecification(
                        subnet_id=subnet_id,
                        groups=[security_group_id],
                        associate_public_ip_address=self.request_floating_ip))
            interfaces = boto.ec2.networkinterface.NetworkInterfaceCollection(
                *interfaces)

            security_groups = []
        else:
            interfaces = None
            security_groups = [security_group]

        # get defaults for `price` and `timeout` from class instance
        if price is None:
            price = self.price
        if timeout is None:
            timeout = self.timeout

        try:
            #start spot instance if bid is specified
            if price:
                log.info("Requesting spot instance with price `%s` ...", price)
                request = connection.request_spot_instances(
                    price,
                    image_id,
                    key_name=key_name,
                    security_groups=security_groups,
                    instance_type=flavor,
                    user_data=image_userdata,
                    network_interfaces=interfaces,
                    instance_profile_name=self._instance_profile)[-1]

                # wait until spot request is fullfilled (will wait
                # forever if no timeout is given)
                start_time = time.time()
                timeout = (float(timeout) if timeout else 0)
                log.info(
                    "Waiting for spot instance (will time out in %d seconds) ...",
                    timeout)
                while request.status.code != 'fulfilled':
                    if timeout and time.time() - start_time > timeout:
                        request.cancel()
                        raise RuntimeError('spot instance timed out')
                    time.sleep(self.POLL_INTERVAL)
                    # update request status
                    request = connection.get_all_spot_instance_requests(
                        request_ids=request.id)[-1]
            else:
                reservation = connection.run_instances(
                    image_id,
                    key_name=key_name,
                    security_groups=security_groups,
                    instance_type=flavor,
                    user_data=image_userdata,
                    network_interfaces=interfaces,
                    instance_profile_name=self._instance_profile)
        except Exception as ex:
            log.error("Error starting instance: %s", ex)
            if "TooManyInstances" in ex:
                raise ClusterError(ex)
            else:
                raise InstanceError(ex)
        if price:
            vm = connection.get_only_instances(
                instance_ids=[request.instance_id])[-1]
        else:
            vm = reservation.instances[-1]
        vm.add_tag("Name", node_name)

        # cache instance object locally for faster access later on
        self._instances[vm.id] = vm

        return vm.id
Exemplo n.º 7
0
    def start_instance(self,
                       key_name,
                       public_key_path,
                       private_key_path,
                       security_group,
                       flavor,
                       image_id,
                       image_userdata,
                       username=None,
                       node_name=None,
                       network_ids=None,
                       **kwargs):
        """Starts a new instance on the cloud using the given properties.
        The following tasks are done to start an instance:

        * establish a connection to the cloud web service
        * check ssh keypair and upload it if it does not yet exist. This is
          a locked process, since this function might be called in multiple
          threads and we only want the key to be stored once.
        * check if the security group exists
        * run the instance with the given properties

        :param str key_name: name of the ssh key to connect
        :param str public_key_path: path to ssh public key
        :param str private_key_path: path to ssh private key
        :param str security_group: firewall rule definition to apply on the
                                   instance
        :param str flavor: machine type to use for the instance
        :param str image_id: image type (os) to use for the instance
        :param str image_userdata: command to execute after startup
        :param str username: username for the given ssh key, default None

        :return: str - instance id of the started instance
        """
        connection = self._connect()

        log.debug("Checking keypair `%s`.", key_name)
        # the `_check_keypair` method has to be called within a lock,
        # since it will upload the key if it does not exist and if this
        # happens for every node at the same time ec2 will throw an error
        # message (see issue #79)
        with BotoCloudProvider.__node_start_lock:
            self._check_keypair(key_name, public_key_path, private_key_path)

        log.debug("Checking security group `%s`.", security_group)
        security_group_id = self._check_security_group(security_group)
        # image_id = self._find_image_id(image_id)

        if network_ids:
            interfaces = []
            for subnet in network_ids.split(','):
                subnet_id = self._check_subnet(subnet)

                interfaces.append(
                    ec2.networkinterface.NetworkInterfaceSpecification(
                        subnet_id=subnet_id,
                        groups=[security_group_id],
                        associate_public_ip_address=self.request_floating_ip))
            interfaces = ec2.networkinterface.NetworkInterfaceCollection(
                *interfaces)

            security_groups = []
        else:
            interfaces = None
            security_groups = [security_group]

        try:
            reservation = connection.run_instances(
                image_id,
                key_name=key_name,
                security_groups=security_groups,
                instance_type=flavor,
                user_data=image_userdata,
                network_interfaces=interfaces)
        except Exception as ex:
            log.error("Error starting instance: %s", ex)
            if "TooManyInstances" in ex:
                raise ClusterError(ex)
            else:
                raise InstanceError(ex)

        vm = reservation.instances[-1]
        vm.add_tag("Name", node_name)

        # cache instance object locally for faster access later on
        self._instances[vm.id] = vm

        return vm.id