示例#1
0
    def _get_conf_rook_ceph_overrides(self):
        replication = 2
        if utils.is_aio_simplex_system(self.dbapi):
            replication = 1

        pools = {
            'cinder-volumes': {
                'app_name': 'cinder-volumes',
                'chunk_size': 8,
                'crush_rule': 'kube-rbd',
                'replication': replication,
            },
            'backup': {
                'app_name': 'cinder-volumes',
                'chunk_size': 8,
                'crush_rule': 'kube-rbd',
                'replication': replication,
            },
        }

        ceph_override = {
            'admin_keyring': self._get_rook_ceph_admin_keyring(),
            'monitors': [],
            'pools': pools,
        }
        return ceph_override
    def get_overrides(self, namespace=None):
        replicas = 2
        if utils.is_aio_system(self.dbapi):
            if utils.is_aio_simplex_system(self.dbapi):
                replicas = 1

        if (utils.is_aio_system(self.dbapi)
                and not self._is_distributed_cloud_role_system_controller()):
            esJavaOpts = \
                "-Djava.net.preferIPv6Addresses=true -Xmx512m -Xms512m"
        else:
            esJavaOpts = \
                "-Djava.net.preferIPv6Addresses=true -Xmx1024m -Xms1024m"

        overrides = {
            common.HELM_NS_MONITOR: {
                'replicas': replicas,
                'esJavaOpts': esJavaOpts,
                'nodeSelector': {
                    common.LABEL_MONITOR_CLIENT: "enabled"
                },
                'resources': self._get_client_resources_overrides(),
            }
        }

        if namespace in self.SUPPORTED_NAMESPACES:
            return overrides[namespace]
        elif namespace:
            raise exception.InvalidHelmNamespace(chart=self.CHART,
                                                 namespace=namespace)
        else:
            return overrides
    def get_overrides(self, namespace=None):

        combined_data_and_master = False
        replicas = 2
        if utils.is_aio_system(self.dbapi):
            if (utils.is_aio_duplex_system(self.dbapi)
                    and self._count_hosts_by_label(
                        common.LABEL_MONITOR_MASTER) < 3):
                # For AIO-DX without master labelled worker nodes,
                # configure elasticsearch data pods as master capable,
                # so they will form a cluster of 3 masters with the single
                # elasticsearch master pod.
                combined_data_and_master = True

            if utils.is_aio_simplex_system(self.dbapi):
                replicas = 1
        if (utils.is_aio_system(self.dbapi)
                and not self._is_distributed_cloud_role_system_controller()):
            esJavaOpts = \
                "-Djava.net.preferIPv6Addresses=true -Xmx1536m -Xms1536m"
        else:
            esJavaOpts = \
                "-Djava.net.preferIPv6Addresses=true -Xmx4096m -Xms4096m"

        overrides = {
            common.HELM_NS_MONITOR: {
                'nodeGroup': 'data',
                'replicas': replicas,
                'esJavaOpts': esJavaOpts,
                'resources': self._get_data_resources_overrides(),
                'volumeClaimTemplate': {
                    'accessModes': ["ReadWriteOnce"],
                    'resources': {
                        'requests': {
                            'storage': str(self.DATA_VOLUME_SIZE_GB) + 'Gi'
                        }
                    },
                    'storageClass': 'general'
                },
                'nodeSelector': {
                    common.LABEL_MONITOR_DATA: "enabled"
                },
                'antiAffinity': "hard",
            }
        }

        if combined_data_and_master:
            overrides[common.HELM_NS_MONITOR]['roles'] = {'master': 'true'}
            overrides[common.HELM_NS_MONITOR]['minimumMasterNodes'] = 1

        if namespace in self.SUPPORTED_NAMESPACES:
            return overrides[namespace]
        elif namespace:
            raise exception.InvalidHelmNamespace(chart=self.CHART,
                                                 namespace=namespace)
        else:
            return overrides
示例#4
0
def get_ceph_storage_model(dbapi=None):

    if not dbapi:
        dbapi = pecan.request.dbapi

    if cutils.is_aio_simplex_system(dbapi):
        return constants.CEPH_AIO_SX_MODEL

    if cutils.is_aio_duplex_system(dbapi):
        return constants.CEPH_CONTROLLER_MODEL

    is_storage_model = False
    is_controller_model = False

    monitor_list = dbapi.ceph_mon_get_list()
    for mon in monitor_list:
        ihost = dbapi.ihost_get(mon['forihostid'])
        if ihost.personality == constants.WORKER:
            # 3rd monitor is on a compute node, so OSDs are on controller
            is_controller_model = True
        elif ihost.personality == constants.STORAGE:
            # 3rd monitor is on storage-0, so OSDs are also on storage nodes
            is_storage_model = True

    # Check any storage nodes are provisioned
    if not is_storage_model:
        if dbapi.ihost_get_by_personality(constants.STORAGE):
            is_storage_model = True

    # There are cases where we delete the monitor on worker node and have not
    # yet assigned it to another worker. In this case check if any OSDs have
    # been configured on controller nodes.
    if not is_storage_model:
        controller_hosts = dbapi.ihost_get_by_personality(constants.CONTROLLER)
        for chost in controller_hosts:
            istors = dbapi.istor_get_by_ihost(chost['uuid'])
            if len(istors):
                is_controller_model = True
                break

    if is_storage_model and is_controller_model:
        # Both types should not be true at the same time, but we should log a
        # message for debug purposes
        # TODO(sdinescu): Improve error message
        LOG.error("Wrong ceph storage type. Bad configuration.")
        return constants.CEPH_STORAGE_MODEL
    elif is_storage_model:
        return constants.CEPH_STORAGE_MODEL
    elif is_controller_model:
        return constants.CEPH_CONTROLLER_MODEL
    else:
        # This case is for the install stage where the decision
        # to configure OSDs on controller or storage nodes is not
        # clear (before adding a monitor on a compute or before
        # configuring the first storage node)
        return constants.CEPH_UNDEFINED_MODEL
示例#5
0
 def _check_host(interface_obj):
     # In general, we don't want to support changing the interface configuration
     # at runtime, allowing only this specific combination, because it can have an
     # impact on the host availability and services
     if (cutils.is_aio_simplex_system(pecan.request.dbapi)
             and interface_obj.ifclass == constants.INTERFACE_CLASS_PCI_SRIOV):
         return
     host = pecan.request.dbapi.ihost_get(interface_obj.ihost_uuid)
     if host.administrative != constants.ADMIN_LOCKED:
         msg = _("Operation Rejected: Host '%s' is adminstrative '%s' " %
                 (host.hostname, host.administrative))
         raise wsme.exc.ClientSideError(msg)
示例#6
0
    def _get_conf_overrides(self):
        ceph_backend = self._get_primary_ceph_backend()
        if not ceph_backend and not self._rook_ceph:
            rbd_store_pool = ""
            rbd_store_user = ""
            replication = 1
        elif self._rook_ceph:
            rbd_store_pool = constants.CEPH_POOL_IMAGES_NAME
            rbd_store_user = RBD_STORE_USER

            replication = 2
            if utils.is_aio_simplex_system(self.dbapi):
                replication = 1
        else:
            rbd_store_pool = app_constants.CEPH_POOL_IMAGES_NAME
            rbd_store_user = RBD_STORE_USER
            replication, min_replication = \
                StorageBackendConfig.get_ceph_pool_replication(self.dbapi)

        if not self._rook_ceph:
            # Only the primary Ceph tier is used for the glance images pool
            rule_name = "{0}{1}{2}".format(
                constants.SB_TIER_DEFAULT_NAMES[constants.SB_TIER_TYPE_CEPH],
                constants.CEPH_CRUSH_TIER_SUFFIX,
                "-ruleset").replace('-', '_')
        else:
            rule_name = "storage_tier_ruleset"

        conf = {
            'glance': {
                'DEFAULT': {
                    'graceful_shutdown': True,
                    'show_image_direct_url': True,
                },
                'glance_store': {
                    'chunk_size': app_constants.CEPH_POOL_IMAGES_CHUNK_SIZE,
                    'filesystem_store_datadir': constants.GLANCE_IMAGE_PATH,
                    'rbd_store_pool': rbd_store_pool,
                    'rbd_store_user': rbd_store_user,
                    'rbd_store_replication': replication,
                    'rbd_store_crush_rule': rule_name,
                }
            }
        }

        if ceph_backend:
            conf['ceph'] = self._get_ceph_overrides()
        elif self._rook_ceph:
            conf['ceph'] = {
                'admin_keyring': self._get_rook_ceph_admin_keyring()
            }

        return conf
示例#7
0
    def _semantic_check(self, db_app):
        """Semantic check for application deployment
        """

        if db_app.name == constants.HELM_APP_MONITOR:

            hosts_to_label_check = pecan.request.dbapi.ihost_get_by_personality(
                constants.CONTROLLER)

            if not cutils.is_aio_simplex_system(pecan.request.dbapi):
                whosts = pecan.request.dbapi.ihost_get_by_personality(
                    constants.WORKER)
                hosts_to_label_check.extend(whosts)

            self._check_monitor_labels(hosts_to_label_check)
示例#8
0
 def _crushmap_add_tier(self, tier):
     # create crush map tree for tier mirroring default root
     try:
         self._crushmap_root_mirror(self._default_tier, tier.name)
     except exception.CephCrushTierAlreadyExists:
         pass
     if cutils.is_aio_simplex_system(pecan.request.dbapi):
         # Since we have a single host replication is done on OSDs
         # to ensure disk based redundancy.
         replicate_by = 'osd'
     else:
         # Replication is done on different nodes of the same peer
         # group ensuring host based redundancy.
         replicate_by = 'host'
     try:
         self._crushmap_rule_add(tier.name, replicate_by=replicate_by)
     except exception.CephCrushRuleAlreadyExists:
         pass
    def _get_cluster_overrides(self):

        env_vars = {
            'MINIMUM_MASTER_NODES': "1",
            'EXPECTED_MASTER_NODES': "1",
            'RECOVER_AFTER_MASTER_NODES': "1"
        }

        if utils.is_aio_simplex_system(self.dbapi):
            cluster_initial_master_nodes = ['stx-elasticsearch-master-0']
        else:
            cluster_initial_master_nodes = [
                'stx-elasticsearch-master-0', 'stx-elasticsearch-master-1'
            ]

        conf = {
            'env': env_vars,
            'config': {
                'cluster.initial_master_nodes': cluster_initial_master_nodes
            },
        }
        return conf
示例#10
0
    def _semantic_check(self, db_app):
        """Semantic check for application deployment
        """

        if db_app.name == constants.HELM_APP_MONITOR:
            chosts = pecan.request.dbapi.ihost_get_by_personality(
                constants.CONTROLLER)

            if not cutils.is_aio_simplex_system(pecan.request.dbapi):
                if chosts and len(chosts) < 2:
                    raise wsme.exc.ClientSideError(
                        _("Operation rejected: application {} requires 2 "
                          "controllers".format(db_app.name)))

            self._check_controller_labels(chosts)

            for chost in chosts:
                if (chost.administrative != constants.ADMIN_UNLOCKED
                        or chost.operational != constants.OPERATIONAL_ENABLED):
                    raise wsme.exc.ClientSideError(
                        _("Operation rejected: application {} requires {} to be "
                          "unlocked-enabled".format(db_app.name,
                                                    chost.hostname)))
示例#11
0
    def get_overrides(self, namespace=None):
        system_fields, system_name_for_index = self.get_system_info_overrides()
        if utils.is_aio_simplex_system(self.dbapi):
            replicas = 1
        else:
            replicas = 2

        overrides = {
            common.HELM_NS_MONITOR: {
                'replicaCount': replicas,
                'resources': self._get_resources_overrides(),
                'config': self._get_config(),
                'systemNameForIndex': system_name_for_index,
            }
        }

        if self._is_distributed_cloud_role_subcloud():
            subcloud_settings = {
                'elasticsearch': {
                    'host':
                    "http://%s" % self._system_controller_floating_address(),
                    'port':
                    self.NODE_PORT
                },
                'ingress': {
                    'enabled': False
                },
            }
            overrides[common.HELM_NS_MONITOR].update(subcloud_settings)

        if namespace in self.SUPPORTED_NAMESPACES:
            return overrides[namespace]
        elif namespace:
            raise exception.InvalidHelmNamespace(chart=self.CHART,
                                                 namespace=namespace)
        else:
            return overrides
示例#12
0
    def _get_rook_ceph_rbd_ephemeral_storage(self):
        ephemeral_storage_conf = {}
        ephemeral_pools = []

        # Get the values for replication and min replication from the storage
        # backend attributes.
        replication = 2
        if utils.is_aio_simplex_system(self.dbapi):
            replication = 1

        # Form the dictionary with the info for the ephemeral pool.
        # If needed, multiple pools can be specified.
        ephemeral_pool = {
            'rbd_pool_name': constants.CEPH_POOL_EPHEMERAL_NAME,
            'rbd_user': RBD_POOL_USER,
            'rbd_crush_rule': "storage_tier_ruleset",
            'rbd_replication': replication,
            'rbd_chunk_size': constants.CEPH_POOL_EPHEMERAL_PG_NUM
        }
        ephemeral_pools.append(ephemeral_pool)

        ephemeral_storage_conf = {'type': 'rbd', 'rbd_pools': ephemeral_pools}

        return ephemeral_storage_conf
示例#13
0
    def update(self, body):
        """Update the applied application to a different version"""
        tarfile = body.get('tarfile')
        name = body.get('name', '')
        version = body.get('app_version', '')
        name, version, mname, mfile = self._check_tarfile(
            tarfile, name, version, constants.APP_UPDATE_OP)

        reuse_overrides_flag = body.get('reuse_user_overrides', None)
        if reuse_overrides_flag is None:
            # None means let the application decide
            reuse_overrides = None
        elif reuse_overrides_flag in ['true', 'True']:
            reuse_overrides = True
        elif reuse_overrides_flag in ['false', 'False']:
            reuse_overrides = False
        else:
            raise wsme.exc.ClientSideError(
                _("Application-update rejected: "
                  "invalid reuse_user_overrides setting."))

        try:
            applied_app = objects.kube_app.get_by_name(pecan.request.context,
                                                       name)
        except exception.KubeAppNotFound:
            LOG.error(
                "Received a request to update app %s which does not exist." %
                name)
            raise wsme.exc.ClientSideError(
                _("Application-update rejected: application not found."))

        if applied_app.status == constants.APP_UPDATE_IN_PROGRESS:
            raise wsme.exc.ClientSideError(
                _("Application-update rejected: update is already "
                  "in progress."))
        elif applied_app.status != constants.APP_APPLY_SUCCESS:
            raise wsme.exc.ClientSideError(
                _("Application-update rejected: operation is not allowed "
                  "while the current status is {}.".format(
                      applied_app.status)))

        if applied_app.app_version == version:
            raise wsme.exc.ClientSideError(
                _("Application-update rejected: the version %s is already "
                  "applied." % version))
        # Set the status for the current applied app to inactive
        applied_app.status = constants.APP_INACTIVE_STATE
        applied_app.progress = None
        applied_app.save()

        # If the version has ever applied before(inactive app found),
        # use armada rollback to apply application later, otherwise,
        # use armada apply.
        # On the AIO-SX, always use armada apply even it was applied
        # before, issue on AIO-SX(replicas is 1) to leverage rollback,
        # armada/helm rollback --wait does not wait for pods to be
        # ready before it returns.
        # related to helm issue,
        # https://github.com/helm/helm/issues/4210
        # https://github.com/helm/helm/issues/2006
        try:
            target_app = objects.kube_app.get_inactive_app_by_name_version(
                pecan.request.context, name, version)
            target_app.status = constants.APP_UPDATE_IN_PROGRESS
            target_app.save()
            if cutils.is_aio_simplex_system(pecan.request.dbapi):
                operation = constants.APP_APPLY_OP
            else:
                operation = constants.APP_ROLLBACK_OP
        except exception.KubeAppInactiveNotFound:
            target_app_data = {
                'name': name,
                'app_version': version,
                'manifest_name': mname,
                'manifest_file': os.path.basename(mfile),
                'status': constants.APP_UPDATE_IN_PROGRESS,
                'active': True
            }
            operation = constants.APP_APPLY_OP

            try:
                target_app = pecan.request.dbapi.kube_app_create(
                    target_app_data)
            except exception.KubeAppAlreadyExists as e:
                applied_app.status = constants.APP_APPLY_SUCCESS
                applied_app.progress = constants.APP_PROGRESS_COMPLETED
                applied_app.save()
                LOG.exception(e)
                raise wsme.exc.ClientSideError(
                    _("Application-update failed: Unable to start application update, "
                      "application info update failed."))

        pecan.request.rpcapi.perform_app_update(pecan.request.context,
                                                applied_app, target_app,
                                                tarfile, operation,
                                                reuse_overrides)

        return KubeApp.convert_with_links(target_app)
示例#14
0
    def _check_monitor_labels(hosts):

        logstash_active = cutils.is_chart_enabled(
            pecan.request.dbapi, constants.HELM_APP_MONITOR,
            helm_common.HELM_CHART_LOGSTASH, helm_common.HELM_NS_MONITOR)

        elasticsearch_client_active = cutils.is_chart_enabled(
            pecan.request.dbapi, constants.HELM_APP_MONITOR,
            helm_common.HELM_CHART_ELASTICSEARCH_CLIENT,
            helm_common.HELM_NS_MONITOR)

        elasticsearch_data_active = cutils.is_chart_enabled(
            pecan.request.dbapi, constants.HELM_APP_MONITOR,
            helm_common.HELM_CHART_ELASTICSEARCH_DATA,
            helm_common.HELM_NS_MONITOR)

        elasticsearch_master_active = cutils.is_chart_enabled(
            pecan.request.dbapi, constants.HELM_APP_MONITOR,
            helm_common.HELM_CHART_ELASTICSEARCH_MASTER,
            helm_common.HELM_NS_MONITOR)

        elasticsearch_active = (elasticsearch_client_active
                                and elasticsearch_data_active
                                and elasticsearch_master_active)

        # elasticsearch charts must either all be active or
        # all inactive
        if (not elasticsearch_active
                and (elasticsearch_client_active or elasticsearch_data_active
                     or elasticsearch_master_active)):
            raise wsme.exc.ClientSideError(
                _("Operation rejected: application stx-monitor "
                  "requires charts: elasticsearch-master, "
                  "elasticsearch-client and elasticsearch-data either all "
                  "enabled, or all disabled"))

        curator_active = cutils.is_chart_enabled(
            pecan.request.dbapi, constants.HELM_APP_MONITOR,
            helm_common.HELM_CHART_ELASTICSEARCH_CURATOR,
            helm_common.HELM_NS_MONITOR)

        if (not elasticsearch_active) and curator_active:
            raise wsme.exc.ClientSideError(
                _("Operation rejected: application stx-monitor "
                  "does not allow elasticsearch-curator chart enabled "
                  "without the elasticsearch charts also enabled"))

        if not elasticsearch_active and not logstash_active:
            # Nothing to check, exit
            return

        # The required counts of labelled
        # and unlocked-enabled hosts.
        required_label_counts = dict()

        # The counts of labelled hosts.
        label_counts = dict()

        # The counts of labelled hosts
        # that are also unlocked and enabled.
        good_label_counts = dict()

        is_aio_simplex = cutils.is_aio_simplex_system(pecan.request.dbapi)

        if elasticsearch_active:
            label_counts = {
                helm_common.LABEL_MONITOR_MASTER: 0,
                helm_common.LABEL_MONITOR_DATA: 0,
                helm_common.LABEL_MONITOR_CLIENT: 0
            }

            good_label_counts = {
                helm_common.LABEL_MONITOR_MASTER: 0,
                helm_common.LABEL_MONITOR_DATA: 0,
                helm_common.LABEL_MONITOR_CLIENT: 0
            }

            if is_aio_simplex:
                # AIO simplex means one of every label.
                required_label_counts = {
                    helm_common.LABEL_MONITOR_MASTER: 1,
                    helm_common.LABEL_MONITOR_DATA: 1,
                    helm_common.LABEL_MONITOR_CLIENT: 1
                }
            else:
                # Dual controller configs
                required_label_counts = {
                    helm_common.LABEL_MONITOR_DATA: 2,
                    helm_common.LABEL_MONITOR_CLIENT: 2,
                    helm_common.LABEL_MONITOR_MASTER: 3
                }

                # For AIO-DX without worker nodes, we only need 2
                # hosts labelled as master.
                if (cutils.is_aio_duplex_system(pecan.request.dbapi)
                        and (pecan.request.dbapi.count_hosts_by_label(
                            helm_common.LABEL_MONITOR_MASTER) < 3)):
                    required_label_counts[helm_common.LABEL_MONITOR_MASTER] = 2

        if logstash_active:
            good_label_counts[helm_common.LABEL_MONITOR_CONTROLLER] = 0
            label_counts[helm_common.LABEL_MONITOR_CONTROLLER] = 0

            if is_aio_simplex:
                required_label_counts[helm_common.LABEL_MONITOR_CONTROLLER] = 1
            else:
                required_label_counts[helm_common.LABEL_MONITOR_CONTROLLER] = 2

        # Examine all the required labels on the given hosts
        # and build up our actual and good label counts.
        host_info = {}
        for host in hosts:
            labels = pecan.request.dbapi.label_get_by_host(host.uuid)

            host_good = (host.administrative == constants.ADMIN_UNLOCKED
                         and host.operational == constants.OPERATIONAL_ENABLED)

            host_labels_dict = {}
            for label in labels:
                if label.label_key in required_label_counts:
                    if label.label_value == helm_common.LABEL_VALUE_ENABLED:
                        label_counts[label.label_key] += 1
                        if host_good:
                            good_label_counts[label.label_key] += 1

                    host_labels_dict[label.label_key] = label.label_value

            host_info[host.hostname] = {
                "personality": host.personality,
                "labels": host_labels_dict
            }

        # If we are short of labels on unlocked and enabled hosts
        # inform the user with a detailed message.
        msg = ""
        for k, v in required_label_counts.items():
            if good_label_counts[k] < required_label_counts[k]:
                msg += (", label:%s=%s, required=%d, labelled=%d,"
                        " labelled and unlocked-enabled=%d" %
                        (k, helm_common.LABEL_VALUE_ENABLED, v,
                         label_counts[k], good_label_counts[k]))

        if msg:
            app_helper = KubeAppHelper(pecan.request.dbapi)
            msg += "\n"
            msg += app_helper._extract_missing_labels_message(
                host_info, required_label_counts)

        if msg:
            raise wsme.exc.ClientSideError(
                _("Operation rejected: application stx-monitor "
                  "does not have required unlocked-enabled and "
                  "labelled hosts{}".format(msg)))