def _get_conf_rook_ceph_overrides(self): replication = 2 if utils.is_aio_simplex_system(self.dbapi): replication = 1 pools = { 'cinder-volumes': { 'app_name': 'cinder-volumes', 'chunk_size': 8, 'crush_rule': 'kube-rbd', 'replication': replication, }, 'backup': { 'app_name': 'cinder-volumes', 'chunk_size': 8, 'crush_rule': 'kube-rbd', 'replication': replication, }, } ceph_override = { 'admin_keyring': self._get_rook_ceph_admin_keyring(), 'monitors': [], 'pools': pools, } return ceph_override
def get_overrides(self, namespace=None): replicas = 2 if utils.is_aio_system(self.dbapi): if utils.is_aio_simplex_system(self.dbapi): replicas = 1 if (utils.is_aio_system(self.dbapi) and not self._is_distributed_cloud_role_system_controller()): esJavaOpts = \ "-Djava.net.preferIPv6Addresses=true -Xmx512m -Xms512m" else: esJavaOpts = \ "-Djava.net.preferIPv6Addresses=true -Xmx1024m -Xms1024m" overrides = { common.HELM_NS_MONITOR: { 'replicas': replicas, 'esJavaOpts': esJavaOpts, 'nodeSelector': { common.LABEL_MONITOR_CLIENT: "enabled" }, 'resources': self._get_client_resources_overrides(), } } if namespace in self.SUPPORTED_NAMESPACES: return overrides[namespace] elif namespace: raise exception.InvalidHelmNamespace(chart=self.CHART, namespace=namespace) else: return overrides
def get_overrides(self, namespace=None): combined_data_and_master = False replicas = 2 if utils.is_aio_system(self.dbapi): if (utils.is_aio_duplex_system(self.dbapi) and self._count_hosts_by_label( common.LABEL_MONITOR_MASTER) < 3): # For AIO-DX without master labelled worker nodes, # configure elasticsearch data pods as master capable, # so they will form a cluster of 3 masters with the single # elasticsearch master pod. combined_data_and_master = True if utils.is_aio_simplex_system(self.dbapi): replicas = 1 if (utils.is_aio_system(self.dbapi) and not self._is_distributed_cloud_role_system_controller()): esJavaOpts = \ "-Djava.net.preferIPv6Addresses=true -Xmx1536m -Xms1536m" else: esJavaOpts = \ "-Djava.net.preferIPv6Addresses=true -Xmx4096m -Xms4096m" overrides = { common.HELM_NS_MONITOR: { 'nodeGroup': 'data', 'replicas': replicas, 'esJavaOpts': esJavaOpts, 'resources': self._get_data_resources_overrides(), 'volumeClaimTemplate': { 'accessModes': ["ReadWriteOnce"], 'resources': { 'requests': { 'storage': str(self.DATA_VOLUME_SIZE_GB) + 'Gi' } }, 'storageClass': 'general' }, 'nodeSelector': { common.LABEL_MONITOR_DATA: "enabled" }, 'antiAffinity': "hard", } } if combined_data_and_master: overrides[common.HELM_NS_MONITOR]['roles'] = {'master': 'true'} overrides[common.HELM_NS_MONITOR]['minimumMasterNodes'] = 1 if namespace in self.SUPPORTED_NAMESPACES: return overrides[namespace] elif namespace: raise exception.InvalidHelmNamespace(chart=self.CHART, namespace=namespace) else: return overrides
def get_ceph_storage_model(dbapi=None): if not dbapi: dbapi = pecan.request.dbapi if cutils.is_aio_simplex_system(dbapi): return constants.CEPH_AIO_SX_MODEL if cutils.is_aio_duplex_system(dbapi): return constants.CEPH_CONTROLLER_MODEL is_storage_model = False is_controller_model = False monitor_list = dbapi.ceph_mon_get_list() for mon in monitor_list: ihost = dbapi.ihost_get(mon['forihostid']) if ihost.personality == constants.WORKER: # 3rd monitor is on a compute node, so OSDs are on controller is_controller_model = True elif ihost.personality == constants.STORAGE: # 3rd monitor is on storage-0, so OSDs are also on storage nodes is_storage_model = True # Check any storage nodes are provisioned if not is_storage_model: if dbapi.ihost_get_by_personality(constants.STORAGE): is_storage_model = True # There are cases where we delete the monitor on worker node and have not # yet assigned it to another worker. In this case check if any OSDs have # been configured on controller nodes. if not is_storage_model: controller_hosts = dbapi.ihost_get_by_personality(constants.CONTROLLER) for chost in controller_hosts: istors = dbapi.istor_get_by_ihost(chost['uuid']) if len(istors): is_controller_model = True break if is_storage_model and is_controller_model: # Both types should not be true at the same time, but we should log a # message for debug purposes # TODO(sdinescu): Improve error message LOG.error("Wrong ceph storage type. Bad configuration.") return constants.CEPH_STORAGE_MODEL elif is_storage_model: return constants.CEPH_STORAGE_MODEL elif is_controller_model: return constants.CEPH_CONTROLLER_MODEL else: # This case is for the install stage where the decision # to configure OSDs on controller or storage nodes is not # clear (before adding a monitor on a compute or before # configuring the first storage node) return constants.CEPH_UNDEFINED_MODEL
def _check_host(interface_obj): # In general, we don't want to support changing the interface configuration # at runtime, allowing only this specific combination, because it can have an # impact on the host availability and services if (cutils.is_aio_simplex_system(pecan.request.dbapi) and interface_obj.ifclass == constants.INTERFACE_CLASS_PCI_SRIOV): return host = pecan.request.dbapi.ihost_get(interface_obj.ihost_uuid) if host.administrative != constants.ADMIN_LOCKED: msg = _("Operation Rejected: Host '%s' is adminstrative '%s' " % (host.hostname, host.administrative)) raise wsme.exc.ClientSideError(msg)
def _get_conf_overrides(self): ceph_backend = self._get_primary_ceph_backend() if not ceph_backend and not self._rook_ceph: rbd_store_pool = "" rbd_store_user = "" replication = 1 elif self._rook_ceph: rbd_store_pool = constants.CEPH_POOL_IMAGES_NAME rbd_store_user = RBD_STORE_USER replication = 2 if utils.is_aio_simplex_system(self.dbapi): replication = 1 else: rbd_store_pool = app_constants.CEPH_POOL_IMAGES_NAME rbd_store_user = RBD_STORE_USER replication, min_replication = \ StorageBackendConfig.get_ceph_pool_replication(self.dbapi) if not self._rook_ceph: # Only the primary Ceph tier is used for the glance images pool rule_name = "{0}{1}{2}".format( constants.SB_TIER_DEFAULT_NAMES[constants.SB_TIER_TYPE_CEPH], constants.CEPH_CRUSH_TIER_SUFFIX, "-ruleset").replace('-', '_') else: rule_name = "storage_tier_ruleset" conf = { 'glance': { 'DEFAULT': { 'graceful_shutdown': True, 'show_image_direct_url': True, }, 'glance_store': { 'chunk_size': app_constants.CEPH_POOL_IMAGES_CHUNK_SIZE, 'filesystem_store_datadir': constants.GLANCE_IMAGE_PATH, 'rbd_store_pool': rbd_store_pool, 'rbd_store_user': rbd_store_user, 'rbd_store_replication': replication, 'rbd_store_crush_rule': rule_name, } } } if ceph_backend: conf['ceph'] = self._get_ceph_overrides() elif self._rook_ceph: conf['ceph'] = { 'admin_keyring': self._get_rook_ceph_admin_keyring() } return conf
def _semantic_check(self, db_app): """Semantic check for application deployment """ if db_app.name == constants.HELM_APP_MONITOR: hosts_to_label_check = pecan.request.dbapi.ihost_get_by_personality( constants.CONTROLLER) if not cutils.is_aio_simplex_system(pecan.request.dbapi): whosts = pecan.request.dbapi.ihost_get_by_personality( constants.WORKER) hosts_to_label_check.extend(whosts) self._check_monitor_labels(hosts_to_label_check)
def _crushmap_add_tier(self, tier): # create crush map tree for tier mirroring default root try: self._crushmap_root_mirror(self._default_tier, tier.name) except exception.CephCrushTierAlreadyExists: pass if cutils.is_aio_simplex_system(pecan.request.dbapi): # Since we have a single host replication is done on OSDs # to ensure disk based redundancy. replicate_by = 'osd' else: # Replication is done on different nodes of the same peer # group ensuring host based redundancy. replicate_by = 'host' try: self._crushmap_rule_add(tier.name, replicate_by=replicate_by) except exception.CephCrushRuleAlreadyExists: pass
def _get_cluster_overrides(self): env_vars = { 'MINIMUM_MASTER_NODES': "1", 'EXPECTED_MASTER_NODES': "1", 'RECOVER_AFTER_MASTER_NODES': "1" } if utils.is_aio_simplex_system(self.dbapi): cluster_initial_master_nodes = ['stx-elasticsearch-master-0'] else: cluster_initial_master_nodes = [ 'stx-elasticsearch-master-0', 'stx-elasticsearch-master-1' ] conf = { 'env': env_vars, 'config': { 'cluster.initial_master_nodes': cluster_initial_master_nodes }, } return conf
def _semantic_check(self, db_app): """Semantic check for application deployment """ if db_app.name == constants.HELM_APP_MONITOR: chosts = pecan.request.dbapi.ihost_get_by_personality( constants.CONTROLLER) if not cutils.is_aio_simplex_system(pecan.request.dbapi): if chosts and len(chosts) < 2: raise wsme.exc.ClientSideError( _("Operation rejected: application {} requires 2 " "controllers".format(db_app.name))) self._check_controller_labels(chosts) for chost in chosts: if (chost.administrative != constants.ADMIN_UNLOCKED or chost.operational != constants.OPERATIONAL_ENABLED): raise wsme.exc.ClientSideError( _("Operation rejected: application {} requires {} to be " "unlocked-enabled".format(db_app.name, chost.hostname)))
def get_overrides(self, namespace=None): system_fields, system_name_for_index = self.get_system_info_overrides() if utils.is_aio_simplex_system(self.dbapi): replicas = 1 else: replicas = 2 overrides = { common.HELM_NS_MONITOR: { 'replicaCount': replicas, 'resources': self._get_resources_overrides(), 'config': self._get_config(), 'systemNameForIndex': system_name_for_index, } } if self._is_distributed_cloud_role_subcloud(): subcloud_settings = { 'elasticsearch': { 'host': "http://%s" % self._system_controller_floating_address(), 'port': self.NODE_PORT }, 'ingress': { 'enabled': False }, } overrides[common.HELM_NS_MONITOR].update(subcloud_settings) if namespace in self.SUPPORTED_NAMESPACES: return overrides[namespace] elif namespace: raise exception.InvalidHelmNamespace(chart=self.CHART, namespace=namespace) else: return overrides
def _get_rook_ceph_rbd_ephemeral_storage(self): ephemeral_storage_conf = {} ephemeral_pools = [] # Get the values for replication and min replication from the storage # backend attributes. replication = 2 if utils.is_aio_simplex_system(self.dbapi): replication = 1 # Form the dictionary with the info for the ephemeral pool. # If needed, multiple pools can be specified. ephemeral_pool = { 'rbd_pool_name': constants.CEPH_POOL_EPHEMERAL_NAME, 'rbd_user': RBD_POOL_USER, 'rbd_crush_rule': "storage_tier_ruleset", 'rbd_replication': replication, 'rbd_chunk_size': constants.CEPH_POOL_EPHEMERAL_PG_NUM } ephemeral_pools.append(ephemeral_pool) ephemeral_storage_conf = {'type': 'rbd', 'rbd_pools': ephemeral_pools} return ephemeral_storage_conf
def update(self, body): """Update the applied application to a different version""" tarfile = body.get('tarfile') name = body.get('name', '') version = body.get('app_version', '') name, version, mname, mfile = self._check_tarfile( tarfile, name, version, constants.APP_UPDATE_OP) reuse_overrides_flag = body.get('reuse_user_overrides', None) if reuse_overrides_flag is None: # None means let the application decide reuse_overrides = None elif reuse_overrides_flag in ['true', 'True']: reuse_overrides = True elif reuse_overrides_flag in ['false', 'False']: reuse_overrides = False else: raise wsme.exc.ClientSideError( _("Application-update rejected: " "invalid reuse_user_overrides setting.")) try: applied_app = objects.kube_app.get_by_name(pecan.request.context, name) except exception.KubeAppNotFound: LOG.error( "Received a request to update app %s which does not exist." % name) raise wsme.exc.ClientSideError( _("Application-update rejected: application not found.")) if applied_app.status == constants.APP_UPDATE_IN_PROGRESS: raise wsme.exc.ClientSideError( _("Application-update rejected: update is already " "in progress.")) elif applied_app.status != constants.APP_APPLY_SUCCESS: raise wsme.exc.ClientSideError( _("Application-update rejected: operation is not allowed " "while the current status is {}.".format( applied_app.status))) if applied_app.app_version == version: raise wsme.exc.ClientSideError( _("Application-update rejected: the version %s is already " "applied." % version)) # Set the status for the current applied app to inactive applied_app.status = constants.APP_INACTIVE_STATE applied_app.progress = None applied_app.save() # If the version has ever applied before(inactive app found), # use armada rollback to apply application later, otherwise, # use armada apply. # On the AIO-SX, always use armada apply even it was applied # before, issue on AIO-SX(replicas is 1) to leverage rollback, # armada/helm rollback --wait does not wait for pods to be # ready before it returns. # related to helm issue, # https://github.com/helm/helm/issues/4210 # https://github.com/helm/helm/issues/2006 try: target_app = objects.kube_app.get_inactive_app_by_name_version( pecan.request.context, name, version) target_app.status = constants.APP_UPDATE_IN_PROGRESS target_app.save() if cutils.is_aio_simplex_system(pecan.request.dbapi): operation = constants.APP_APPLY_OP else: operation = constants.APP_ROLLBACK_OP except exception.KubeAppInactiveNotFound: target_app_data = { 'name': name, 'app_version': version, 'manifest_name': mname, 'manifest_file': os.path.basename(mfile), 'status': constants.APP_UPDATE_IN_PROGRESS, 'active': True } operation = constants.APP_APPLY_OP try: target_app = pecan.request.dbapi.kube_app_create( target_app_data) except exception.KubeAppAlreadyExists as e: applied_app.status = constants.APP_APPLY_SUCCESS applied_app.progress = constants.APP_PROGRESS_COMPLETED applied_app.save() LOG.exception(e) raise wsme.exc.ClientSideError( _("Application-update failed: Unable to start application update, " "application info update failed.")) pecan.request.rpcapi.perform_app_update(pecan.request.context, applied_app, target_app, tarfile, operation, reuse_overrides) return KubeApp.convert_with_links(target_app)
def _check_monitor_labels(hosts): logstash_active = cutils.is_chart_enabled( pecan.request.dbapi, constants.HELM_APP_MONITOR, helm_common.HELM_CHART_LOGSTASH, helm_common.HELM_NS_MONITOR) elasticsearch_client_active = cutils.is_chart_enabled( pecan.request.dbapi, constants.HELM_APP_MONITOR, helm_common.HELM_CHART_ELASTICSEARCH_CLIENT, helm_common.HELM_NS_MONITOR) elasticsearch_data_active = cutils.is_chart_enabled( pecan.request.dbapi, constants.HELM_APP_MONITOR, helm_common.HELM_CHART_ELASTICSEARCH_DATA, helm_common.HELM_NS_MONITOR) elasticsearch_master_active = cutils.is_chart_enabled( pecan.request.dbapi, constants.HELM_APP_MONITOR, helm_common.HELM_CHART_ELASTICSEARCH_MASTER, helm_common.HELM_NS_MONITOR) elasticsearch_active = (elasticsearch_client_active and elasticsearch_data_active and elasticsearch_master_active) # elasticsearch charts must either all be active or # all inactive if (not elasticsearch_active and (elasticsearch_client_active or elasticsearch_data_active or elasticsearch_master_active)): raise wsme.exc.ClientSideError( _("Operation rejected: application stx-monitor " "requires charts: elasticsearch-master, " "elasticsearch-client and elasticsearch-data either all " "enabled, or all disabled")) curator_active = cutils.is_chart_enabled( pecan.request.dbapi, constants.HELM_APP_MONITOR, helm_common.HELM_CHART_ELASTICSEARCH_CURATOR, helm_common.HELM_NS_MONITOR) if (not elasticsearch_active) and curator_active: raise wsme.exc.ClientSideError( _("Operation rejected: application stx-monitor " "does not allow elasticsearch-curator chart enabled " "without the elasticsearch charts also enabled")) if not elasticsearch_active and not logstash_active: # Nothing to check, exit return # The required counts of labelled # and unlocked-enabled hosts. required_label_counts = dict() # The counts of labelled hosts. label_counts = dict() # The counts of labelled hosts # that are also unlocked and enabled. good_label_counts = dict() is_aio_simplex = cutils.is_aio_simplex_system(pecan.request.dbapi) if elasticsearch_active: label_counts = { helm_common.LABEL_MONITOR_MASTER: 0, helm_common.LABEL_MONITOR_DATA: 0, helm_common.LABEL_MONITOR_CLIENT: 0 } good_label_counts = { helm_common.LABEL_MONITOR_MASTER: 0, helm_common.LABEL_MONITOR_DATA: 0, helm_common.LABEL_MONITOR_CLIENT: 0 } if is_aio_simplex: # AIO simplex means one of every label. required_label_counts = { helm_common.LABEL_MONITOR_MASTER: 1, helm_common.LABEL_MONITOR_DATA: 1, helm_common.LABEL_MONITOR_CLIENT: 1 } else: # Dual controller configs required_label_counts = { helm_common.LABEL_MONITOR_DATA: 2, helm_common.LABEL_MONITOR_CLIENT: 2, helm_common.LABEL_MONITOR_MASTER: 3 } # For AIO-DX without worker nodes, we only need 2 # hosts labelled as master. if (cutils.is_aio_duplex_system(pecan.request.dbapi) and (pecan.request.dbapi.count_hosts_by_label( helm_common.LABEL_MONITOR_MASTER) < 3)): required_label_counts[helm_common.LABEL_MONITOR_MASTER] = 2 if logstash_active: good_label_counts[helm_common.LABEL_MONITOR_CONTROLLER] = 0 label_counts[helm_common.LABEL_MONITOR_CONTROLLER] = 0 if is_aio_simplex: required_label_counts[helm_common.LABEL_MONITOR_CONTROLLER] = 1 else: required_label_counts[helm_common.LABEL_MONITOR_CONTROLLER] = 2 # Examine all the required labels on the given hosts # and build up our actual and good label counts. host_info = {} for host in hosts: labels = pecan.request.dbapi.label_get_by_host(host.uuid) host_good = (host.administrative == constants.ADMIN_UNLOCKED and host.operational == constants.OPERATIONAL_ENABLED) host_labels_dict = {} for label in labels: if label.label_key in required_label_counts: if label.label_value == helm_common.LABEL_VALUE_ENABLED: label_counts[label.label_key] += 1 if host_good: good_label_counts[label.label_key] += 1 host_labels_dict[label.label_key] = label.label_value host_info[host.hostname] = { "personality": host.personality, "labels": host_labels_dict } # If we are short of labels on unlocked and enabled hosts # inform the user with a detailed message. msg = "" for k, v in required_label_counts.items(): if good_label_counts[k] < required_label_counts[k]: msg += (", label:%s=%s, required=%d, labelled=%d," " labelled and unlocked-enabled=%d" % (k, helm_common.LABEL_VALUE_ENABLED, v, label_counts[k], good_label_counts[k])) if msg: app_helper = KubeAppHelper(pecan.request.dbapi) msg += "\n" msg += app_helper._extract_missing_labels_message( host_info, required_label_counts) if msg: raise wsme.exc.ClientSideError( _("Operation rejected: application stx-monitor " "does not have required unlocked-enabled and " "labelled hosts{}".format(msg)))