def cluster_update(self, context, cluster, node_count, rollback=False): LOG.debug('cluster_heat cluster_update') osc = clients.OpenStackClients(context) allow_update_status = (fields.ClusterStatus.CREATE_COMPLETE, fields.ClusterStatus.UPDATE_COMPLETE, fields.ClusterStatus.RESUME_COMPLETE, fields.ClusterStatus.RESTORE_COMPLETE, fields.ClusterStatus.ROLLBACK_COMPLETE, fields.ClusterStatus.SNAPSHOT_COMPLETE, fields.ClusterStatus.CHECK_COMPLETE, fields.ClusterStatus.ADOPT_COMPLETE) if cluster.status not in allow_update_status: conductor_utils.notify_about_cluster_operation( context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE) operation = _('Updating a cluster when status is ' '"%s"') % cluster.status raise exception.NotSupported(operation=operation) # Updates will be only reflected to the default worker # nodegroup. worker_ng = cluster.default_ng_worker if worker_ng.node_count == node_count: return # Backup the old node count so that we can restore it # in case of an exception. old_node_count = worker_ng.node_count manager = scale_manager.get_scale_manager(context, osc, cluster) # Get driver ct = conductor_utils.retrieve_cluster_template(context, cluster) cluster_driver = driver.Driver.get_driver(ct.server_type, ct.cluster_distro, ct.coe) # Update cluster try: conductor_utils.notify_about_cluster_operation( context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_PENDING) worker_ng.node_count = node_count worker_ng.save() cluster_driver.update_cluster(context, cluster, manager, rollback) cluster.status = fields.ClusterStatus.UPDATE_IN_PROGRESS cluster.status_reason = None except Exception as e: cluster.status = fields.ClusterStatus.UPDATE_FAILED cluster.status_reason = six.text_type(e) cluster.save() # Restore the node_count worker_ng.node_count = old_node_count worker_ng.save() conductor_utils.notify_about_cluster_operation( context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE) if isinstance(e, exc.HTTPBadRequest): e = exception.InvalidParameterValue(message=six.text_type(e)) raise e raise cluster.save() return cluster
def test_get_scale_manager(self, mock_cluster_get): mock_context = mock.MagicMock() mock_osc = mock.MagicMock() k8s_cluster = mock.MagicMock() k8s_cluster.cluster_template.coe = 'kubernetes' mesos_cluster = mock.MagicMock() mesos_cluster.cluster_template.coe = 'mesos' invalid_cluster = mock.MagicMock() invalid_cluster.cluster_template.coe = 'fake' mgr = scale_manager.get_scale_manager( mock_context, mock_osc, k8s_cluster) self.assertIsInstance(mgr, scale_manager.K8sScaleManager) mgr = scale_manager.get_scale_manager( mock_context, mock_osc, mesos_cluster) self.assertIsInstance(mgr, scale_manager.MesosScaleManager) mgr = scale_manager.get_scale_manager( mock_context, mock_osc, invalid_cluster) self.assertIsNone(mgr)
def test_get_scale_manager(self, mock_cluster_get): mock_context = mock.MagicMock() mock_osc = mock.MagicMock() k8s_cluster = mock.MagicMock() k8s_cluster.baymodel.coe = 'kubernetes' mesos_cluster = mock.MagicMock() mesos_cluster.baymodel.coe = 'mesos' invalid_cluster = mock.MagicMock() invalid_cluster.baymodel.coe = 'fake' mgr = scale_manager.get_scale_manager( mock_context, mock_osc, k8s_cluster) self.assertIsInstance(mgr, scale_manager.K8sScaleManager) mgr = scale_manager.get_scale_manager( mock_context, mock_osc, mesos_cluster) self.assertIsInstance(mgr, scale_manager.MesosScaleManager) mgr = scale_manager.get_scale_manager( mock_context, mock_osc, invalid_cluster) self.assertIsNone(mgr)
def cluster_update(self, context, cluster, rollback=False): LOG.debug('cluster_heat cluster_update') osc = clients.OpenStackClients(context) allow_update_status = ( fields.ClusterStatus.CREATE_COMPLETE, fields.ClusterStatus.UPDATE_COMPLETE, fields.ClusterStatus.RESUME_COMPLETE, fields.ClusterStatus.RESTORE_COMPLETE, fields.ClusterStatus.ROLLBACK_COMPLETE, fields.ClusterStatus.SNAPSHOT_COMPLETE, fields.ClusterStatus.CHECK_COMPLETE, fields.ClusterStatus.ADOPT_COMPLETE ) if cluster.status not in allow_update_status: conductor_utils.notify_about_cluster_operation( context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE) operation = _('Updating a cluster when status is ' '"%s"') % cluster.status raise exception.NotSupported(operation=operation) delta = cluster.obj_what_changed() if not delta: return cluster manager = scale_manager.get_scale_manager(context, osc, cluster) # Get driver ct = conductor_utils.retrieve_cluster_template(context, cluster) cluster_driver = driver.Driver.get_driver(ct.server_type, ct.cluster_distro, ct.coe) # Update cluster try: conductor_utils.notify_about_cluster_operation( context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_PENDING) cluster_driver.update_cluster(context, cluster, manager, rollback) cluster.status = fields.ClusterStatus.UPDATE_IN_PROGRESS cluster.status_reason = None except Exception as e: cluster.status = fields.ClusterStatus.UPDATE_FAILED cluster.status_reason = six.text_type(e) cluster.save() conductor_utils.notify_about_cluster_operation( context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE) if isinstance(e, exc.HTTPBadRequest): e = exception.InvalidParameterValue(message=six.text_type(e)) raise e raise cluster.save() return cluster
def cluster_update(self, context, cluster, rollback=False): LOG.debug('cluster_heat cluster_update') osc = clients.OpenStackClients(context) stack = osc.heat().stacks.get(cluster.stack_id) allow_update_status = ( fields.ClusterStatus.CREATE_COMPLETE, fields.ClusterStatus.UPDATE_COMPLETE, fields.ClusterStatus.RESUME_COMPLETE, fields.ClusterStatus.RESTORE_COMPLETE, fields.ClusterStatus.ROLLBACK_COMPLETE, fields.ClusterStatus.SNAPSHOT_COMPLETE, fields.ClusterStatus.CHECK_COMPLETE, fields.ClusterStatus.ADOPT_COMPLETE ) if stack.stack_status not in allow_update_status: conductor_utils.notify_about_cluster_operation( context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE) operation = _('Updating a cluster when stack status is ' '"%s"') % stack.stack_status raise exception.NotSupported(operation=operation) delta = cluster.obj_what_changed() if not delta: return cluster manager = scale_manager.get_scale_manager(context, osc, cluster) conductor_utils.notify_about_cluster_operation( context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_PENDING) # Get driver ct = conductor_utils.retrieve_cluster_template(context, cluster) cluster_driver = driver.Driver.get_driver(ct.server_type, ct.cluster_distro, ct.coe) # Create cluster cluster_driver.update_stack(context, osc, cluster, manager, rollback) self._poll_and_check(osc, cluster, cluster_driver) return cluster
def cluster_update(self, context, cluster, rollback=False): LOG.debug('cluster_heat cluster_update') osc = clients.OpenStackClients(context) stack = osc.heat().stacks.get(cluster.stack_id) allow_update_status = (fields.ClusterStatus.CREATE_COMPLETE, fields.ClusterStatus.UPDATE_COMPLETE, fields.ClusterStatus.RESUME_COMPLETE, fields.ClusterStatus.RESTORE_COMPLETE, fields.ClusterStatus.ROLLBACK_COMPLETE, fields.ClusterStatus.SNAPSHOT_COMPLETE, fields.ClusterStatus.CHECK_COMPLETE, fields.ClusterStatus.ADOPT_COMPLETE) if stack.stack_status not in allow_update_status: conductor_utils.notify_about_cluster_operation( context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE) operation = _('Updating a cluster when stack status is ' '"%s"') % stack.stack_status raise exception.NotSupported(operation=operation) delta = cluster.obj_what_changed() if not delta: return cluster manager = scale_manager.get_scale_manager(context, osc, cluster) conductor_utils.notify_about_cluster_operation( context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_PENDING) # Get driver ct = conductor_utils.retrieve_cluster_template(context, cluster) cluster_driver = driver.Driver.get_driver(ct.server_type, ct.cluster_distro, ct.coe) # Create cluster cluster_driver.update_stack(context, osc, cluster, manager, rollback) self._poll_and_check(osc, cluster, cluster_driver) return cluster
def cluster_resize(self, context, cluster, node_count, nodes_to_remove, nodegroup): LOG.debug('cluster_conductor cluster_resize') osc = clients.OpenStackClients(context) # NOTE(flwang): One of important user cases of /resize API is # supporting the auto scaling action triggered by Kubernetes Cluster # Autoscaler, so there are 2 cases may happen: # 1. API could be triggered very offen # 2. Scale up or down may fail and we would like to offer the ability # that recover the cluster to allow it being resized when last # update failed. allow_update_status = ( fields.ClusterStatus.CREATE_COMPLETE, fields.ClusterStatus.UPDATE_COMPLETE, fields.ClusterStatus.RESUME_COMPLETE, fields.ClusterStatus.RESTORE_COMPLETE, fields.ClusterStatus.ROLLBACK_COMPLETE, fields.ClusterStatus.SNAPSHOT_COMPLETE, fields.ClusterStatus.CHECK_COMPLETE, fields.ClusterStatus.ADOPT_COMPLETE, fields.ClusterStatus.UPDATE_FAILED, fields.ClusterStatus.UPDATE_IN_PROGRESS, ) if cluster.status not in allow_update_status: conductor_utils.notify_about_cluster_operation( context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE, cluster) operation = _('Resizing a cluster when status is ' '"%s"') % cluster.status raise exception.NotSupported(operation=operation) resize_manager = scale_manager.get_scale_manager(context, osc, cluster) # Get driver ct = conductor_utils.retrieve_cluster_template(context, cluster) cluster_driver = driver.Driver.get_driver(ct.server_type, ct.cluster_distro, ct.coe) # Backup the old node count so that we can restore it # in case of an exception. old_node_count = nodegroup.node_count # Resize cluster try: nodegroup.node_count = node_count nodegroup.status = fields.ClusterStatus.UPDATE_IN_PROGRESS nodegroup.save() conductor_utils.notify_about_cluster_operation( context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_PENDING, cluster) cluster_driver.resize_cluster(context, cluster, resize_manager, node_count, nodes_to_remove, nodegroup) cluster.status = fields.ClusterStatus.UPDATE_IN_PROGRESS cluster.status_reason = None except Exception as e: cluster.status = fields.ClusterStatus.UPDATE_FAILED cluster.status_reason = six.text_type(e) cluster.save() nodegroup.node_count = old_node_count nodegroup.status = fields.ClusterStatus.UPDATE_FAILED nodegroup.status_reason = six.text_type(e) nodegroup.save() conductor_utils.notify_about_cluster_operation( context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE, cluster) if isinstance(e, exc.HTTPBadRequest): e = exception.InvalidParameterValue(message=six.text_type(e)) raise e raise cluster.save() return cluster
def cluster_update(self, context, cluster, node_count, rollback=False): LOG.debug('cluster_heat cluster_update') osc = clients.OpenStackClients(context) allow_update_status = ( fields.ClusterStatus.CREATE_COMPLETE, fields.ClusterStatus.UPDATE_COMPLETE, fields.ClusterStatus.RESUME_COMPLETE, fields.ClusterStatus.RESTORE_COMPLETE, fields.ClusterStatus.ROLLBACK_COMPLETE, fields.ClusterStatus.SNAPSHOT_COMPLETE, fields.ClusterStatus.CHECK_COMPLETE, fields.ClusterStatus.ADOPT_COMPLETE ) if cluster.status not in allow_update_status: conductor_utils.notify_about_cluster_operation( context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE) operation = _('Updating a cluster when status is ' '"%s"') % cluster.status raise exception.NotSupported(operation=operation) # Updates will be only reflected to the default worker # nodegroup. worker_ng = cluster.default_ng_worker if worker_ng.node_count == node_count: return # Backup the old node count so that we can restore it # in case of an exception. old_node_count = worker_ng.node_count manager = scale_manager.get_scale_manager(context, osc, cluster) # Get driver ct = conductor_utils.retrieve_cluster_template(context, cluster) cluster_driver = driver.Driver.get_driver(ct.server_type, ct.cluster_distro, ct.coe) # Update cluster try: conductor_utils.notify_about_cluster_operation( context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_PENDING) worker_ng.node_count = node_count worker_ng.save() cluster_driver.update_cluster(context, cluster, manager, rollback) cluster.status = fields.ClusterStatus.UPDATE_IN_PROGRESS cluster.status_reason = None except Exception as e: cluster.status = fields.ClusterStatus.UPDATE_FAILED cluster.status_reason = six.text_type(e) cluster.save() # Restore the node_count worker_ng.node_count = old_node_count worker_ng.save() conductor_utils.notify_about_cluster_operation( context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE) if isinstance(e, exc.HTTPBadRequest): e = exception.InvalidParameterValue(message=six.text_type(e)) raise e raise cluster.save() return cluster
def cluster_resize(self, context, cluster, node_count, nodes_to_remove, nodegroup): LOG.debug('cluster_conductor cluster_resize') osc = clients.OpenStackClients(context) # NOTE(flwang): One of important user cases of /resize API is # supporting the auto scaling action triggered by Kubernetes Cluster # Autoscaler, so there are 2 cases may happen: # 1. API could be triggered very offen # 2. Scale up or down may fail and we would like to offer the ability # that recover the cluster to allow it being resized when last # update failed. allow_update_status = ( fields.ClusterStatus.CREATE_COMPLETE, fields.ClusterStatus.UPDATE_COMPLETE, fields.ClusterStatus.RESUME_COMPLETE, fields.ClusterStatus.RESTORE_COMPLETE, fields.ClusterStatus.ROLLBACK_COMPLETE, fields.ClusterStatus.SNAPSHOT_COMPLETE, fields.ClusterStatus.CHECK_COMPLETE, fields.ClusterStatus.ADOPT_COMPLETE, fields.ClusterStatus.UPDATE_FAILED, fields.ClusterStatus.UPDATE_IN_PROGRESS, ) if cluster.status not in allow_update_status: conductor_utils.notify_about_cluster_operation( context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE) operation = _('Resizing a cluster when status is ' '"%s"') % cluster.status raise exception.NotSupported(operation=operation) resize_manager = scale_manager.get_scale_manager(context, osc, cluster) # Get driver ct = conductor_utils.retrieve_cluster_template(context, cluster) cluster_driver = driver.Driver.get_driver(ct.server_type, ct.cluster_distro, ct.coe) # Backup the old node count so that we can restore it # in case of an exception. old_node_count = nodegroup.node_count # Resize cluster try: nodegroup.node_count = node_count nodegroup.save() conductor_utils.notify_about_cluster_operation( context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_PENDING) cluster_driver.resize_cluster(context, cluster, resize_manager, node_count, nodes_to_remove, nodegroup) cluster.status = fields.ClusterStatus.UPDATE_IN_PROGRESS cluster.status_reason = None except Exception as e: cluster.status = fields.ClusterStatus.UPDATE_FAILED cluster.status_reason = six.text_type(e) cluster.save() nodegroup.node_count = old_node_count nodegroup.save() conductor_utils.notify_about_cluster_operation( context, taxonomy.ACTION_UPDATE, taxonomy.OUTCOME_FAILURE) if isinstance(e, exc.HTTPBadRequest): e = exception.InvalidParameterValue(message=six.text_type(e)) raise e raise cluster.save() return cluster