def test_scale_up_notification(self): big_pod_spec = copy.deepcopy(self.dummy_pod) for container in big_pod_spec['spec']['containers']: container['resources']['requests']['cpu'] = '100' pod = KubePod(pykube.Pod(self.api, self.dummy_pod)) big_pod = KubePod(pykube.Pod(self.api, big_pod_spec)) selectors_hash = utils.selectors_to_hash(pod.selectors) asgs = self.cluster.autoscaling_groups.get_all_groups([]) self.cluster.fulfill_pending(asgs, selectors_hash, [pod, big_pod]) self.cluster.notifier.notify_scale.assert_called_with(mock.ANY, mock.ANY, [pod])
def test_scale_down_under_utilized_undrainable(self): """ kube node with daemonset and pod/rc-pod --> noop """ node = self._spin_up_node() all_nodes = [node] managed_nodes = [n for n in all_nodes if node.is_managed()] running_insts_map = self.cluster.get_running_instances_map( managed_nodes) pods_to_schedule = {} asgs = self.cluster.autoscaling_groups.get_all_groups(all_nodes) # create some undrainable pods ds_pod = KubePod(pykube.Pod(self.api, self.dummy_ds_pod)) for container in self.dummy_pod['spec']['containers']: container.pop('resources', None) pod = KubePod(pykube.Pod(self.api, self.dummy_pod)) self.dummy_rc_pod['metadata']['labels']['openai/do-not-drain'] = 'true' for container in self.dummy_rc_pod['spec']['containers']: container.pop('resources', None) rc_pod = KubePod(pykube.Pod(self.api, self.dummy_rc_pod)) pod_scenarios = [ # kube node with daemonset and pod with no resource ask --> noop [ds_pod, pod], # kube node with daemonset and critical rc pod --> noop [ds_pod, rc_pod] ] # make sure we're not on grace period self.cluster.idle_threshold = -1 self.cluster.type_idle_threshold = -1 self.cluster.LAUNCH_HOUR_THRESHOLD = -1 for pods in pod_scenarios: state = self.cluster.get_node_state(node, asgs[0], pods, pods_to_schedule, running_insts_map, collections.Counter()) self.assertEqual(state, ClusterNodeState.UNDER_UTILIZED_UNDRAINABLE) self.cluster.maintain(managed_nodes, running_insts_map, pods_to_schedule, pods, asgs) response = self.asg_client.describe_auto_scaling_groups() self.assertEqual(len(response['AutoScalingGroups']), 1) self.assertEqual( response['AutoScalingGroups'][0]['DesiredCapacity'], 1) node.cordon.assert_not_called()
def test_scale_down(self): """ kube node with daemonset and no pod --> cordon """ node = self._spin_up_node() node.cordon = mock.Mock(return_value="mocked stuff") node.drain = mock.Mock(return_value="mocked stuff") all_nodes = [node] managed_nodes = [n for n in all_nodes if node.is_managed()] running_insts_map = self.cluster.get_running_instances_map(managed_nodes) pods_to_schedule = {} asgs = self.cluster.autoscaling_groups.get_all_groups(all_nodes) ds_pod = KubePod(pykube.Pod(self.api, self.dummy_ds_pod)) running_or_pending_assigned_pods = [ds_pod] self.cluster.idle_threshold = -1 self.cluster.type_idle_threshold = -1 self.cluster.maintain( managed_nodes, running_insts_map, pods_to_schedule, running_or_pending_assigned_pods, asgs) response = self.asg_client.describe_auto_scaling_groups() assert len(response['AutoScalingGroups']) == 1 assert response['AutoScalingGroups'][0]['DesiredCapacity'] == 1 node.cordon.assert_called_once_with()
def test_scale_down_launch_grace_period(self): """ kube node with daemonset and no pod + launch grace period --> noop """ node = self._spin_up_node() all_nodes = [node] managed_nodes = [n for n in all_nodes if node.is_managed()] running_insts_map = self.cluster.get_running_instances_map(managed_nodes, []) pods_to_schedule = {} asgs = self.cluster.autoscaling_groups.get_all_groups(all_nodes) ds_pod = KubePod(pykube.Pod(self.api, self.dummy_ds_pod)) running_or_pending_assigned_pods = [ds_pod] self.cluster.idle_threshold = -1 self.cluster.type_idle_threshold = -1 self.cluster.LAUNCH_HOUR_THRESHOLD['aws'] = 60*30 self.cluster.maintain( managed_nodes, running_insts_map, pods_to_schedule, running_or_pending_assigned_pods, asgs) response = self.asg_client.describe_auto_scaling_groups() self.assertEqual(len(response['AutoScalingGroups']), 1) self.assertEqual(response['AutoScalingGroups'][0]['DesiredCapacity'], 1) node.cordon.assert_not_called()
def test_get_pending_pods(self): dummy_node = copy.deepcopy(self.dummy_node) dummy_node['metadata']['name'] = 'k8s-agentpool1-16334397-0' node = KubeNode(pykube.Node(self.api, dummy_node)) pod = KubePod(pykube.Pod(self.api, self.dummy_pod)) act = self.cluster.get_pending_pods([pod], [node]) self.assertEqual(len(act), 0) node = KubeNode(pykube.Node(self.api, dummy_node)) pod2 = KubePod(pykube.Pod(self.api, self.dummy_pod)) pod3 = KubePod(pykube.Pod(self.api, self.dummy_pod)) act = self.cluster.get_pending_pods([pod, pod2, pod3], [node]) #only one should fit self.assertEqual(len(act), 2)
def test_impossible(self): self.dummy_pod['spec']['nodeSelector'] = {'aws/type': 't2.micro'} print repr(self.dummy_pod['metadata']['creationTimestamp']) from dateutil.parser import parse as dateutil_parse print dateutil_parse(self.dummy_pod['metadata']['creationTimestamp']) pod = KubePod(pykube.Pod(self.api, self.dummy_pod)) assert not capacity.is_possible(pod)
def test_scale_up(self): pod = KubePod(pykube.Pod(self.api, self.dummy_pod)) selectors_hash = utils.selectors_to_hash(pod.selectors) asgs = self.cluster.autoscaling_groups.get_all_groups([]) self.cluster.fulfill_pending(asgs, selectors_hash, [pod]) response = self.asg_client.describe_auto_scaling_groups() self.assertEqual(len(response['AutoScalingGroups']), 1) self.assertGreater(response['AutoScalingGroups'][0]['DesiredCapacity'], 0)
def test_scale_down_busy(self): """ kube node with daemonset and pod/rc-pod --> noop """ node = self._spin_up_node() all_nodes = [node] managed_nodes = [n for n in all_nodes if node.is_managed()] running_insts_map = self.cluster.get_running_instances_map( managed_nodes) pods_to_schedule = {} asgs = self.cluster.autoscaling_groups.get_all_groups(all_nodes) # kube node with daemonset and pod --> noop ds_pod = KubePod(pykube.Pod(self.api, self.dummy_ds_pod)) pod = KubePod(pykube.Pod(self.api, self.dummy_pod)) rc_pod = KubePod(pykube.Pod(self.api, self.dummy_rc_pod)) pod_scenarios = [ # kube node with daemonset and pod --> noop [ds_pod, pod], # kube node with daemonset and rc pod --> noop [ds_pod, rc_pod] ] # make sure we're not on grace period self.cluster.idle_threshold = -1 self.cluster.type_idle_threshold = -1 for pods in pod_scenarios: state = self.cluster.get_node_state(node, asgs[0], pods, pods_to_schedule, running_insts_map, collections.Counter()) self.assertEqual(state, ClusterNodeState.BUSY) self.cluster.maintain(managed_nodes, running_insts_map, pods_to_schedule, pods, asgs) response = self.asg_client.describe_auto_scaling_groups() self.assertEqual(len(response['AutoScalingGroups']), 1) self.assertEqual( response['AutoScalingGroups'][0]['DesiredCapacity'], 1) node.cordon.assert_not_called()
def test_scale_up_selector(self): self.dummy_pod['spec']['nodeSelector'] = {'aws/type': 'm4.large'} pod = KubePod(pykube.Pod(self.api, self.dummy_pod)) selectors_hash = utils.selectors_to_hash(pod.selectors) asgs = self.cluster.autoscaling_groups.get_all_groups([]) self.cluster.fulfill_pending(asgs, selectors_hash, [pod]) response = self.asg_client.describe_auto_scaling_groups() self.assertEqual(len(response['AutoScalingGroups']), 1) self.assertEqual(response['AutoScalingGroups'][0]['DesiredCapacity'], 0)
def test_scale_down_under_utilized_drainable(self): """ kube node with daemonset and rc-pod --> cordon+drain """ node = self._spin_up_node() all_nodes = [node] managed_nodes = [n for n in all_nodes if node.is_managed()] running_insts_map = self.cluster.get_running_instances_map( managed_nodes) pods_to_schedule = {} asgs = self.cluster.autoscaling_groups.get_all_groups(all_nodes) # create some undrainable pods ds_pod = KubePod(pykube.Pod(self.api, self.dummy_ds_pod)) for container in self.dummy_rc_pod['spec']['containers']: container.pop('resources', None) rc_pod = KubePod(pykube.Pod(self.api, self.dummy_rc_pod)) pods = [ds_pod, rc_pod] # make sure we're not on grace period self.cluster.idle_threshold = -1 self.cluster.type_idle_threshold = -1 self.cluster.LAUNCH_HOUR_THRESHOLD = -1 state = self.cluster.get_node_state(node, asgs[0], pods, pods_to_schedule, running_insts_map, collections.Counter()) self.assertEqual(state, ClusterNodeState.UNDER_UTILIZED_DRAINABLE) self.cluster.maintain(managed_nodes, running_insts_map, pods_to_schedule, pods, asgs) response = self.asg_client.describe_auto_scaling_groups() self.assertEqual(len(response['AutoScalingGroups']), 1) self.assertEqual(response['AutoScalingGroups'][0]['DesiredCapacity'], 1) node.cordon.assert_called_once_with() node.drain.assert_called_once_with(pods, notifier=mock.ANY)
def test_fulfill_pending(self): nodes = self.create_nodes(2, 1) scaler = self.create_scaler(nodes) scaler.scale_pools = MagicMock() #Should add one node to pool 1 (2, 1) pod = KubePod(pykube.Pod(self.api, self.dummy_pod)) scaler.fulfill_pending([pod]) scaler.scale_pools.assert_called_with({ 'agentpool0': 2, 'agentpool1': 1 }) #The two pods should fit in the same new node dummy_pod_2 = copy.deepcopy(self.dummy_pod) dummy_pod_2['spec']['containers'][0]['resources']['requests'][ 'cpu'] = '400m' dummy_pod_2['spec']['containers'][0]['resources']['limits'][ 'cpu'] = '400m' dummy_pod_2['metadata']['uid'] = 'fake' pod_2 = KubePod(pykube.Pod(self.api, dummy_pod_2)) scaler.fulfill_pending([pod, pod_2]) scaler.scale_pools.assert_called_with({ 'agentpool0': 2, 'agentpool1': 1 }) #pod_2 shouldn't fit anymore, and so it should add 2 new VMs dummy_pod_2['spec']['containers'][0]['resources']['requests'][ 'cpu'] = '600m' dummy_pod_2['spec']['containers'][0]['resources']['limits'][ 'cpu'] = '600m' pod_2 = KubePod(pykube.Pod(self.api, dummy_pod_2)) scaler.fulfill_pending([pod, pod_2]) scaler.scale_pools.assert_called_with({ 'agentpool0': 3, 'agentpool1': 1 })
def test_timed_out_group(self): with mock.patch('autoscaler.autoscaling_groups.AutoScalingGroup.is_timed_out') as is_timed_out: with mock.patch('autoscaler.autoscaling_groups.AutoScalingGroup.scale') as scale: is_timed_out.return_value = True scale.return_value = utils.CompletedFuture(None) pod = KubePod(pykube.Pod(self.api, self.dummy_pod)) selectors_hash = utils.selectors_to_hash(pod.selectors) asgs = self.cluster.autoscaling_groups.get_all_groups([]) self.cluster.fulfill_pending(asgs, selectors_hash, [pod]) scale.assert_not_called() response = self.asg_client.describe_auto_scaling_groups() self.assertEqual(len(response['AutoScalingGroups']), 1) self.assertEqual(response['AutoScalingGroups'][0]['DesiredCapacity'], 0)
def test_scale_up(self): pod = KubePod(pykube.Pod(self.api, self.dummy_pod)) selectors_hash = utils.selectors_to_hash(pod.selectors) asgs = self.cluster.autoscaling_groups.get_all_groups([]) self.cluster.fulfill_pending(asgs, selectors_hash, [pod]) response = self.asg_client.describe_auto_scaling_groups() self.assertEqual(len(response['AutoScalingGroups']), 2) big_gpu_asg, small_gpu_asg = {}, {} if (response['AutoScalingGroups'][0]['AutoScalingGroupName'] == 'dummy-asg-small-gpu'): small_gpu_asg = response['AutoScalingGroups'][0] big_gpu_asg = response['AutoScalingGroups'][1] else: small_gpu_asg = response['AutoScalingGroups'][1] big_gpu_asg = response['AutoScalingGroups'][0] self.assertGreater(big_gpu_asg['DesiredCapacity'], 0) self.assertEqual(small_gpu_asg['DesiredCapacity'], 0)
def test_possible(self): pod = KubePod(pykube.Pod(self.api, self.dummy_pod)) assert capacity.is_possible(pod)
def test_can_fit(self): pod = KubePod(pykube.Pod(self.api, self.dummy_pod)) node = KubeNode(pykube.Node(self.api, self.dummy_node)) assert node.can_fit(pod.resources)
def build_kube_pod(pykube_pod): return KubePod(pykube_pod, self.drainable_labels)
def test_impossible(self): self.dummy_pod['spec']['nodeSelector'] = {'aws/type': 't2.micro'} pod = KubePod(pykube.Pod(self.api, self.dummy_pod)) assert not capacity.is_possible(pod)