def scale_nodes_in(self, nodes): """ scale down asg by terminating the given node. returns a future indicating when the request completes. """ for node in nodes: try: # if we somehow end up in a situation where we have # more capacity than desired capacity, and the desired # capacity is at asg min size, then when we try to # terminate the instance while decrementing the desired # capacity, the aws api call will fail decrement_capacity = self.desired_capacity > self.min_size self.client.terminate_instance_in_auto_scaling_group( InstanceId=node.instance_id, ShouldDecrementDesiredCapacity=decrement_capacity) self.nodes.remove(node) logger.info('Scaled node %s in', node) except botocore.exceptions.ClientError as e: if str(e).find("Terminating instance without replacement will " "violate group's min size constraint.") == -1: raise e logger.error("Failed to terminate instance: %s", e) return utils.CompletedFuture(None)
def scale(self, new_desired_capacity): """ scales the ASG to the new desired capacity. returns a future with the result True if desired capacity has been increased. """ desired_capacity = min(self.max_size, new_desired_capacity) num_unschedulable = len(self.unschedulable_nodes) num_schedulable = self.actual_capacity - num_unschedulable logger.info("Desired {}, currently at {}".format( desired_capacity, self.desired_capacity)) logger.info("Kube node: {} schedulable, {} unschedulable".format( num_schedulable, num_unschedulable)) # Try to get the number of schedulable nodes up if we don't have enough, regardless of whether # group's capacity is already at the same as the desired. if num_schedulable < desired_capacity: for node in self.unschedulable_nodes: if node.uncordon(): num_schedulable += 1 # Uncordon only what we need if num_schedulable == desired_capacity: break if self.desired_capacity != desired_capacity: if self.desired_capacity == self.max_size: logger.info( "Desired same as max, desired: {}, schedulable: {}".format( self.desired_capacity, num_schedulable)) return utils.CompletedFuture(False) scale_up = self.desired_capacity < desired_capacity # This should be a rare event # note: this micro-optimization is not worth doing as the race condition here is # tricky. when ec2 initializes some nodes in the meantime, asg will shutdown # nodes by its own policy # scale_down = self.desired_capacity > desired_capacity >= self.actual_capacity if scale_up: # should have gotten our num_schedulable to highest value possible # actually need to grow. return self.set_desired_capacity(desired_capacity) logger.info( "Doing nothing: desired_capacity correctly set: {}, schedulable: {}" .format(self.name, num_schedulable)) return utils.CompletedFuture(False)
def set_desired_capacity(self, new_desired_capacity): """ sets the desired capacity of the underlying ASG directly. note that this is for internal control. for scaling purposes, please use scale() instead. """ logger.info("ASG: {} new_desired_capacity: {}".format( self, new_desired_capacity)) self.client.set_desired_capacity(AutoScalingGroupName=self.name, DesiredCapacity=new_desired_capacity, HonorCooldown=False) self.desired_capacity = new_desired_capacity return utils.CompletedFuture(True)
def test_timed_out_group(self): with mock.patch('autoscaler.autoscaling_groups.AutoScalingGroup.is_timed_out') as is_timed_out: with mock.patch('autoscaler.autoscaling_groups.AutoScalingGroup.scale') as scale: is_timed_out.return_value = True scale.return_value = utils.CompletedFuture(None) pod = KubePod(pykube.Pod(self.api, self.dummy_pod)) selectors_hash = utils.selectors_to_hash(pod.selectors) asgs = self.cluster.autoscaling_groups.get_all_groups([]) self.cluster.fulfill_pending(asgs, selectors_hash, [pod]) scale.assert_not_called() response = self.asg_client.describe_auto_scaling_groups() self.assertEqual(len(response['AutoScalingGroups']), 1) self.assertEqual(response['AutoScalingGroups'][0]['DesiredCapacity'], 0)