Пример #1
0
    def describe_service(self,
                         service_type=None,
                         service_id=None,
                         node_name=None,
                         refresh=False):
        """
        There is no guarantee which daemons are returned by describe_service, except that
        it returns the mgr we're running in.
        """
        if service_type:
            assert service_type in ("mds", "osd", "mon", "rgw",
                                    "mgr"), service_type + " unsupported"

        out = map(str, check_output(['ps', 'aux']).splitlines())
        types = [service_type] if service_type else ("mds", "osd", "mon",
                                                     "rgw", "mgr")
        processes = [
            p for p in out if any([('ceph-' + t in p) for t in types])
        ]

        result = []
        for p in processes:
            sd = orchestrator.ServiceDescription()
            sd.nodename = 'localhost'
            sd.service_instance = re.search('ceph-[^ ]+', p).group()
            result.append(sd)

        return result
Пример #2
0
    def describe_service(self, service_type=None, service_name=None, refresh=False):
        if self._services:
            # Dummy data
            services = self._services
            if service_type is not None:
                services = list(filter(lambda s: s.spec.service_type == service_type, services))
        else:
            # Deduce services from daemons running on localhost
            all_daemons = self._get_ceph_daemons()
            services = []
            for daemon_type, daemons in itertools.groupby(all_daemons, key=lambda d: d.daemon_type):
                if service_type is not None and service_type != daemon_type:
                    continue
                daemon_size = len(list(daemons))
                services.append(orchestrator.ServiceDescription(
                    spec=ServiceSpec(
                        service_type=daemon_type,
                    ),
                    size=daemon_size, running=daemon_size))
        
        def _filter_func(svc):
            if service_name is not None and service_name != svc.spec.service_name():
                return False
            return True

        return list(filter(_filter_func, services))
Пример #3
0
        def process_result(event_data):
            result = []
            if event_data['success']:
                for service_node, service_info in event_data["return"].items():
                    node_service_cache = []
                    for this_service_type, service_dict in service_info.items():
                        if isinstance(service_dict, str):
                            # map old form where deepsea only returned service IDs
                            # to new form where it retuns a dict
                            service_dict = { 'service_instance': service_dict }
                        desc = orchestrator.ServiceDescription(nodename=service_node,
                                                               service_instance=service_dict['service_instance'],
                                                               service_type=_deepsea_to_ceph(this_service_type),
                                                               # the following may or may not be present
                                                               container_id=service_dict.get('container_id', None),
                                                               service=service_dict.get('service', None),
                                                               version=service_dict.get('version', None),
                                                               rados_config_location=service_dict.get('rados_config_location', None),
                                                               service_url = service_dict.get('service_url', None),
                                                               status=service_dict.get('status', None),
                                                               status_desc=service_dict.get('status_desc', None)
                                                               )
                        # Always add every service to the cache...
                        node_service_cache.append(desc.to_json())
                        # ...but only return the ones the caller asked for
                        if ((service_type is None or desc.service_type == service_type) and
                            (service_id is None or desc.service_instance == service_id) and
                            (node_name is None or desc.nodename == node_name)):
                            result.append(desc)

                    self.service_cache[service_node] = orchestrator.OutdatableData(node_service_cache)
            else:
                self.log.error(event_data['return'])
            return result
Пример #4
0
    def describe_service(self, service_type=None, service_id=None, node_name=None, refresh=False):
        """
        There is no guarantee which daemons are returned by describe_service, except that
        it returns the mgr we're running in.
        """
        if service_type:
            support_services = ("mds", "osd", "mon", "rgw", "mgr", "iscsi")
            assert service_type in support_services, service_type + " unsupported"

        if self._services:
            if node_name:
                return list(filter(lambda svc: svc.nodename == node_name, self._services))
            return self._services

        out = map(str, check_output(['ps', 'aux']).splitlines())
        types = (service_type, ) if service_type else ("mds", "osd", "mon", "rgw", "mgr")
        assert isinstance(types, tuple)
        processes = [p for p in out if any([('ceph-' + t in p) for t in types])]

        result = []
        for p in processes:
            sd = orchestrator.ServiceDescription()
            sd.nodename = 'localhost'
            res = re.search('ceph-[^ ]+', p)
            assert res
            sd.service_instance = res.group()
            result.append(sd)

        return result
Пример #5
0
 def add_rgw(self, spec):
     if not spec.placement.nodes or len(spec.placement.nodes) < spec.count:
         raise RuntimeError("must specify at least %d hosts" % spec.count)
     # ensure rgw_zone is set for these daemons
     ret, out, err = self.mon_command({
         'prefix': 'config set',
         'who': 'client.rgw.' + spec.name,
         'name': 'rgw_zone',
         'value': spec.name,
     })
     daemons = self._get_services('rgw')
     results = []
     num_added = 0
     for host in spec.placement.nodes:
         if num_added >= spec.count:
             break
         rgw_id = self.get_unique_name(daemons, spec.name)
         self.log.debug('placing rgw.%s on host %s' % (rgw_id, host))
         results.append(
             self._worker_pool.apply_async(self._create_rgw,
                                           (rgw_id, host)))
         # add to daemon list so next name(s) will also be unique
         sd = orchestrator.ServiceDescription()
         sd.service_instance = rgw_id
         sd.service_type = 'rgw'
         sd.nodename = host
         daemons.append(sd)
         num_added += 1
     return SSHWriteCompletion(results)
Пример #6
0
    def describe_service(self, service_type, service_id):

        assert service_type in ("mds", "osd", "mon",
                                "rgw"), service_type + " unsupported"

        pods = self.rook_cluster.describe_pods(service_type, service_id)

        result = orchestrator.ServiceDescription()
        for p in pods:
            sl = orchestrator.ServiceLocation()
            sl.nodename = p['nodename']
            sl.container_id = p['name']

            if service_type == "osd":
                sl.daemon_name = "%s" % p['labels']["ceph-osd-id"]
            elif service_type == "mds":
                # MDS daemon names are the tail of the pod name with
                # an 'm' prefix.
                # TODO: Would be nice to get this out a label though.
                sl.daemon_name = "m" + sl.container_id.split("-")[-1]
            elif service_type == "mon":
                sl.daemon_name = p['labels']["mon"]
            elif service_type == "mgr":
                # FIXME: put a label on the pod to consume
                # from here
                raise NotImplementedError("mgr")
            elif service_type == "rgw":
                # FIXME: put a label on the pod to consume
                # from here
                raise NotImplementedError("rgw")

            result.locations.append(sl)

        return result
Пример #7
0
    def describe_service(self, service_type, service_id, nodename):

        assert service_type in ("mds", "osd", "mgr", "mon", "nfs", None), service_type + " unsupported"

        pods = self.rook_cluster.describe_pods(service_type, service_id, nodename)

        result = []
        for p in pods:
            sd = orchestrator.ServiceDescription()
            sd.nodename = p['nodename']
            sd.container_id = p['name']
            sd.service_type = p['labels']['app'].replace('rook-ceph-', '')

            if sd.service_type == "osd":
                sd.daemon_name = "%s" % p['labels']["ceph-osd-id"]
            elif sd.service_type == "mds":
                sd.daemon_name = p['labels']["rook_file_system"]
            elif sd.service_type == "mon":
                sd.daemon_name = p['labels']["mon"]
            elif sd.service_type == "mgr":
                sd.daemon_name = p['labels']["mgr"]
            elif sd.service_type == "nfs":
                sd.daemon_name = p['labels']["ceph_nfs"]
                sd.rados_config_location = self.rook_cluster.get_nfs_conf_url(sd.daemon_name, p['labels']['instance'])
            else:
                # Unknown type -- skip it
                continue

            result.append(sd)

        return result
Пример #8
0
    def describe_service(self,
                         service_type=None,
                         service_id=None,
                         node_name=None,
                         refresh=False):

        if service_type not in ("mds", "osd", "mgr", "mon", "nfs", None):
            raise orchestrator.OrchestratorValidationError(service_type +
                                                           " unsupported")

        pods = self.rook_cluster.describe_pods(service_type, service_id,
                                               node_name)

        result = []
        for p in pods:
            sd = orchestrator.ServiceDescription()
            sd.nodename = p['nodename']
            sd.container_id = p['name']
            sd.service_type = p['labels']['app'].replace('rook-ceph-', '')
            status = {
                'Pending': -1,
                'Running': 1,
                'Succeeded': 0,
                'Failed': -1,
                'Unknown': -1,
            }[p['phase']]
            sd.status = status
            sd.status_desc = p['phase']

            if sd.service_type == "osd":
                sd.service_instance = "%s" % p['labels']["ceph-osd-id"]
            elif sd.service_type == "mds":
                sd.service = p['labels']['rook_file_system']
                pfx = "{0}-".format(sd.service)
                sd.service_instance = p['labels']['ceph_daemon_id'].replace(
                    pfx, '', 1)
            elif sd.service_type == "mon":
                sd.service_instance = p['labels']["mon"]
            elif sd.service_type == "mgr":
                sd.service_instance = p['labels']["mgr"]
            elif sd.service_type == "nfs":
                sd.service = p['labels']['ceph_nfs']
                sd.service_instance = p['labels']['instance']
                sd.rados_config_location = self.rook_cluster.get_nfs_conf_url(
                    sd.service, sd.service_instance)
            elif sd.service_type == "rgw":
                sd.service = p['labels']['rgw']
                sd.service_instance = p['labels']['ceph_daemon_id']
            else:
                # Unknown type -- skip it
                continue

            result.append(sd)

        return result
Пример #9
0
    def describe_service(self,
                         service_type=None,
                         service_id=None,
                         node_name=None,
                         refresh=False):

        if service_type not in ("mds", "osd", "mgr", "mon", "nfs", None):
            raise orchestrator.OrchestratorValidationError(service_type +
                                                           " unsupported")

        #daemons = self.get_daemons()
        daemons = {}
        for host, _ in self._get_hosts():
            self.log.info("refresh stale daemons for '{}'".format(host))
            out, code = self._run_ceph_daemon(host,
                                              'mon',
                                              'ls', [],
                                              no_fsid=True)
            daemons[host] = json.loads(''.join(out))

        result = []
        for host, ls in daemons.items():
            for d in ls:
                if not d['style'].startswith('ceph-daemon'):
                    self.log.debug('ignoring non-ceph-daemon on %s: %s' %
                                   (host, d))
                    continue
                if d['fsid'] != self._cluster_fsid:
                    self.log.debug('ignoring foreign daemon on %s: %s' %
                                   (host, d))
                    continue
                self.log.debug('including %s' % d)
                sd = orchestrator.ServiceDescription()
                sd.service_type = d['name'].split('.')[0]
                if service_type and service_type != sd.service_type:
                    continue
                if '.' in d['name']:
                    sd.service_instance = d['name'].split('.')[1]
                else:
                    sd.service_instance = host  # e.g., crash
                if service_id and service_id != sd.service_instance:
                    continue
                sd.nodename = host
                sd.container_id = d['container_id']
                sd.version = d['version']
                sd.status_desc = d['state']
                sd.status = {
                    'running': 1,
                    'inactive': 0,
                    'error': -1,
                    'unknown': -1,
                }[d['state']]
                result.append(sd)

        return orchestrator.TrivialReadCompletion(result)
Пример #10
0
 def process_result(event_data):
     result = []
     if event_data['success']:
         for node_name, service_info in event_data["return"].items():
             for service_type, service_instance in service_info.items():
                 desc = orchestrator.ServiceDescription()
                 desc.nodename = node_name
                 desc.service_instance = service_instance
                 desc.service_type = service_type
                 result.append(desc)
     return result
Пример #11
0
 def process_result(raw_event):
     result = []
     raw_event = json.loads(raw_event)
     if raw_event['data']['success']:
         for node_name, service_info in raw_event["data"][
                 "return"].items():
             for service_type, daemon_name in service_info.items():
                 desc = orchestrator.ServiceDescription()
                 desc.nodename = node_name
                 desc.daemon_name = daemon_name
                 desc.service_type = service_type
                 result.append(desc)
     return result
Пример #12
0
 def process_result(event_data):
     result = []
     if event_data['success']:
         for node_name, service_info in event_data["return"].items():
             node_service_cache = []
             for service_type, service_instance in service_info.items():
                 desc = orchestrator.ServiceDescription(nodename=node_name,
                                                        service_instance=service_instance,
                                                        service_type=service_type)
                 result.append(desc)
                 node_service_cache.append(desc.to_json())
             self.service_cache[node_name] = orchestrator.OutdatableData(node_service_cache)
     else:
         self.log.error(event_data['return'])
     return result
Пример #13
0
 def _add_rgw(daemons):
     args = []
     num_added = 0
     for host, _, name in spec.placement.nodes:
         if num_added >= spec.count:
             break
         rgw_id = self.get_unique_name(daemons, spec.name, name)
         self.log.debug('placing rgw.%s on host %s' % (rgw_id, host))
         args.append((rgw_id, host))
         # add to daemon list so next name(s) will also be unique
         sd = orchestrator.ServiceDescription()
         sd.service_instance = rgw_id
         sd.service_type = 'rgw'
         sd.nodename = host
         daemons.append(sd)
         num_added += 1
     return self._create_rgw(args)
Пример #14
0
        def _get_services_result(results):
            services = {}
            for host, data in zip(hosts, results + in_cache):
                services[host] = data

            result = []
            for host, ls in services.items():
                for d in ls:
                    if not d['style'].startswith('ceph-daemon'):
                        self.log.debug('ignoring non-ceph-daemon on %s: %s' % (host, d))
                        continue
                    if d['fsid'] != self._cluster_fsid:
                        self.log.debug('ignoring foreign daemon on %s: %s' % (host, d))
                        continue
                    self.log.debug('including %s' % d)
                    sd = orchestrator.ServiceDescription()
                    sd.service_type = d['name'].split('.')[0]
                    if service_type and service_type != sd.service_type:
                        continue
                    if '.' in d['name']:
                        sd.service_instance = '.'.join(d['name'].split('.')[1:])
                    else:
                        sd.service_instance = host  # e.g., crash
                    if service_id and service_id != sd.service_instance:
                        continue
                    if service_name and not sd.service_instance.startswith(service_name + '.'):
                        continue
                    sd.nodename = host
                    sd.container_id = d.get('container_id')
                    sd.container_image_name = d.get('container_image_name')
                    sd.container_image_id = d.get('container_image_id')
                    sd.version = d.get('version')
                    sd.status_desc = d['state']
                    sd.status = {
                        'running': 1,
                        'stopped': 0,
                        'error': -1,
                        'unknown': -1,
                    }[d['state']]
                    result.append(sd)
            return result
Пример #15
0
 def add_mds(self, spec):
     if not spec.placement.nodes or len(spec.placement.nodes) < spec.count:
         raise RuntimeError("must specify at least %d hosts" % spec.count)
     daemons = self._get_services('mds')
     results = []
     num_added = 0
     for host in spec.placement.nodes:
         if num_added >= spec.count:
             break
         mds_id = self.get_unique_name(daemons, spec.name)
         self.log.debug('placing mds.%s on host %s' % (mds_id, host))
         results.append(
             self._worker_pool.apply_async(self._create_mds,
                                           (mds_id, host)))
         # add to daemon list so next name(s) will also be unique
         sd = orchestrator.ServiceDescription()
         sd.service_instance = mds_id
         sd.service_type = 'mds'
         sd.nodename = host
         daemons.append(sd)
         num_added += 1
     return SSHWriteCompletion(results)
Пример #16
0
    def describe_service(self, service_type=None, service_name=None,
                         refresh=False):
        now = datetime.datetime.utcnow()

        # CephCluster
        cl = self.rook_cluster.rook_api_get(
            "cephclusters/{0}".format(self.rook_cluster.rook_env.cluster_name))
        self.log.debug('CephCluster %s' % cl)
        image_name = cl['spec'].get('cephVersion', {}).get('image', None)
        num_nodes = len(self.rook_cluster.get_node_names())

        spec = {}
        spec['mon'] = orchestrator.ServiceDescription(
            service_name='mon',
            spec=ServiceSpec(
                'mon',
                placement=PlacementSpec(
                    count=cl['spec'].get('mon', {}).get('count', 1),
                ),
            ),
            size=cl['spec'].get('mon', {}).get('count', 1),
            container_image_name=image_name,
            last_refresh=now,
        )
        spec['mgr'] = orchestrator.ServiceDescription(
            service_name='mgr',
            spec=ServiceSpec(
                'mgr',
                placement=PlacementSpec.from_string('count:1'),
            ),
            size=1,
            container_image_name=image_name,
            last_refresh=now,
        )
        if not cl['spec'].get('crashCollector', {}).get('disable', False):
            spec['crash'] = orchestrator.ServiceDescription(
                service_name='crash',
                spec=ServiceSpec(
                    'crash',
                    placement=PlacementSpec.from_string('all:true'),
                ),
                size=num_nodes,
                container_image_name=image_name,
                last_refresh=now,
            )

        # CephFilesystems
        all_fs = self.rook_cluster.rook_api_get(
            "cephfilesystems/")
        self.log.debug('CephFilesystems %s' % all_fs)
        for fs in all_fs.get('items', []):
            svc = 'mds.' + fs['metadata']['name']
            if svc in spec:
                continue
            # FIXME: we are conflating active (+ standby) with count
            active = fs['spec'].get('metadataServer', {}).get('activeCount', 1)
            total_mds = active
            if fs['spec'].get('metadataServer', {}).get('activeStandby', False):
                total_mds = active * 2
            spec[svc] = orchestrator.ServiceDescription(
                service_name=svc,
                spec=ServiceSpec(
                    svc,
                    placement=PlacementSpec(count=active),
                ),
                size=total_mds,
                container_image_name=image_name,
                last_refresh=now,
            )

        # FIXME: CephObjectstores

        for dd in self._list_daemons():
            if dd.service_name() not in spec:
                continue
            spec[dd.service_name()].running += 1
        return [v for k, v in spec.items()]
Пример #17
0
    def _get_services(self,
                      service_type=None,
                      service_name=None,
                      service_id=None,
                      node_name=None,
                      refresh=False):
        hosts = []
        wait_for = []
        for host, host_info in self.service_cache.items_filtered():
            hosts.append(host)
            if host_info.outdated(self.service_cache_timeout) or refresh:
                self.log.info("refresing stale services for '{}'".format(host))
                wait_for.append(
                    SSHReadCompletion(
                        self._worker_pool.apply_async(
                            self._refresh_host_services, (host, ))))
            else:
                self.log.debug('have recent services for %s: %s' %
                               (host, host_info.data))
                wait_for.append(
                    orchestrator.TrivialReadCompletion([host_info.data]))
        self._orchestrator_wait(wait_for)

        services = {}
        for host, c in zip(hosts, wait_for):
            services[host] = c.result[0]

        result = []
        for host, ls in services.items():
            for d in ls:
                if not d['style'].startswith('ceph-daemon'):
                    self.log.debug('ignoring non-ceph-daemon on %s: %s' %
                                   (host, d))
                    continue
                if d['fsid'] != self._cluster_fsid:
                    self.log.debug('ignoring foreign daemon on %s: %s' %
                                   (host, d))
                    continue
                self.log.debug('including %s' % d)
                sd = orchestrator.ServiceDescription()
                sd.service_type = d['name'].split('.')[0]
                if service_type and service_type != sd.service_type:
                    continue
                if '.' in d['name']:
                    sd.service_instance = '.'.join(d['name'].split('.')[1:])
                else:
                    sd.service_instance = host  # e.g., crash
                if service_id and service_id != sd.service_instance:
                    continue
                if service_name and not sd.service_instance.startswith(
                        service_name + '.'):
                    continue
                sd.nodename = host
                sd.container_id = d['container_id']
                sd.version = d['version']
                sd.status_desc = d['state']
                sd.status = {
                    'running': 1,
                    'stopped': 0,
                    'error': -1,
                    'unknown': -1,
                }[d['state']]
                result.append(sd)
        return result
Пример #18
0
    def describe_service(
            self,
            service_type: Optional[str] = None,
            service_name: Optional[str] = None,
            refresh: bool = False) -> List[orchestrator.ServiceDescription]:
        now = datetime_now()

        # CephCluster
        cl = self.rook_cluster.rook_api_get("cephclusters/{0}".format(
            self.rook_cluster.rook_env.cluster_name))
        self.log.debug('CephCluster %s' % cl)
        image_name = cl['spec'].get('cephVersion', {}).get('image', None)
        num_nodes = len(self.rook_cluster.get_node_names())

        spec = {}
        if service_type == 'mon' or service_type is None:
            spec['mon'] = orchestrator.ServiceDescription(
                spec=ServiceSpec(
                    'mon',
                    placement=PlacementSpec(count=cl['spec'].get(
                        'mon', {}).get('count', 1), ),
                ),
                size=cl['spec'].get('mon', {}).get('count', 1),
                container_image_name=image_name,
                last_refresh=now,
            )
        if service_type == 'mgr' or service_type is None:
            spec['mgr'] = orchestrator.ServiceDescription(
                spec=ServiceSpec(
                    'mgr',
                    placement=PlacementSpec.from_string('count:1'),
                ),
                size=1,
                container_image_name=image_name,
                last_refresh=now,
            )

        if (service_type == 'crash'
                or service_type is None and not cl['spec'].get(
                    'crashCollector', {}).get('disable', False)):
            spec['crash'] = orchestrator.ServiceDescription(
                spec=ServiceSpec(
                    'crash',
                    placement=PlacementSpec.from_string('*'),
                ),
                size=num_nodes,
                container_image_name=image_name,
                last_refresh=now,
            )

        if service_type == 'mds' or service_type is None:
            # CephFilesystems
            all_fs = self.rook_cluster.get_resource("cephfilesystems")
            for fs in all_fs:
                svc = 'mds.' + fs['metadata']['name']
                if svc in spec:
                    continue
                # FIXME: we are conflating active (+ standby) with count
                active = fs['spec'].get('metadataServer',
                                        {}).get('activeCount', 1)
                total_mds = active
                if fs['spec'].get('metadataServer',
                                  {}).get('activeStandby', False):
                    total_mds = active * 2
                spec[svc] = orchestrator.ServiceDescription(
                    spec=ServiceSpec(
                        service_type='mds',
                        service_id=fs['metadata']['name'],
                        placement=PlacementSpec(count=active),
                    ),
                    size=total_mds,
                    container_image_name=image_name,
                    last_refresh=now,
                )

        if service_type == 'rgw' or service_type is None:
            # CephObjectstores
            all_zones = self.rook_cluster.get_resource("cephobjectstores")
            for zone in all_zones:
                svc = 'rgw.' + zone['metadata']['name']
                if svc in spec:
                    continue
                active = zone['spec']['gateway']['instances']
                if 'securePort' in zone['spec']['gateway']:
                    ssl = True
                    port = zone['spec']['gateway']['securePort']
                else:
                    ssl = False
                    port = zone['spec']['gateway']['port'] or 80
                rgw_zone = zone['spec'].get('zone', {}).get('name') or None
                spec[svc] = orchestrator.ServiceDescription(
                    spec=RGWSpec(
                        service_id=zone['metadata']['name'],
                        rgw_zone=rgw_zone,
                        ssl=ssl,
                        rgw_frontend_port=port,
                        placement=PlacementSpec(count=active),
                    ),
                    size=active,
                    container_image_name=image_name,
                    last_refresh=now,
                )

        if service_type == 'nfs' or service_type is None:
            # CephNFSes
            all_nfs = self.rook_cluster.get_resource("cephnfses")
            nfs_pods = self.rook_cluster.describe_pods('nfs', None, None)
            for nfs in all_nfs:
                if nfs['spec']['rados']['pool'] != NFS_POOL_NAME:
                    continue
                nfs_name = nfs['metadata']['name']
                svc = 'nfs.' + nfs_name
                if svc in spec:
                    continue
                active = nfs['spec'].get('server', {}).get('active')
                creation_timestamp = datetime.datetime.strptime(
                    nfs['metadata']['creationTimestamp'], '%Y-%m-%dT%H:%M:%SZ')
                spec[svc] = orchestrator.ServiceDescription(
                    spec=NFSServiceSpec(
                        service_id=nfs_name,
                        placement=PlacementSpec(count=active),
                    ),
                    size=active,
                    last_refresh=now,
                    running=len([
                        1 for pod in nfs_pods
                        if pod['labels']['ceph_nfs'] == nfs_name
                    ]),
                    created=creation_timestamp.astimezone(
                        tz=datetime.timezone.utc))
        if service_type == 'osd' or service_type is None:
            # OSDs
            # FIXME: map running OSDs back to their respective services...

            # the catch-all unmanaged
            all_osds = self.rook_cluster.get_osds()
            svc = 'osd'
            spec[svc] = orchestrator.ServiceDescription(
                spec=DriveGroupSpec(
                    unmanaged=True,
                    service_type='osd',
                ),
                size=len(all_osds),
                last_refresh=now,
                running=sum(osd.status.phase == 'Running' for osd in all_osds))

            # drivegroups
            for name, dg in self._drive_group_map.items():
                spec[f'osd.{name}'] = orchestrator.ServiceDescription(
                    spec=dg,
                    last_refresh=now,
                    size=0,
                    running=0,
                )

        if service_type == 'rbd-mirror' or service_type is None:
            # rbd-mirrors
            all_mirrors = self.rook_cluster.get_resource("cephrbdmirrors")
            for mirror in all_mirrors:
                logging.warn(mirror)
                mirror_name = mirror['metadata']['name']
                svc = 'rbd-mirror.' + mirror_name
                if svc in spec:
                    continue
                spec[svc] = orchestrator.ServiceDescription(
                    spec=ServiceSpec(
                        service_id=mirror_name,
                        service_type="rbd-mirror",
                        placement=PlacementSpec(count=1),
                    ),
                    size=1,
                    last_refresh=now,
                )

        for dd in self._list_daemons():
            if dd.service_name() not in spec:
                continue
            service = spec[dd.service_name()]
            service.running += 1
            if not service.container_image_id:
                service.container_image_id = dd.container_image_id
            if not service.container_image_name:
                service.container_image_name = dd.container_image_name
            if service.last_refresh is None or not dd.last_refresh or dd.last_refresh < service.last_refresh:
                service.last_refresh = dd.last_refresh
            if service.created is None or dd.created is None or dd.created < service.created:
                service.created = dd.created

        return [v for k, v in spec.items()]
Пример #19
0
    def describe_service(
            self,
            service_type: Optional[str] = None,
            service_name: Optional[str] = None,
            refresh: bool = False) -> List[orchestrator.ServiceDescription]:
        now = datetime_now()

        # CephCluster
        cl = self.rook_cluster.rook_api_get("cephclusters/{0}".format(
            self.rook_cluster.rook_env.cluster_name))
        self.log.debug('CephCluster %s' % cl)
        image_name = cl['spec'].get('cephVersion', {}).get('image', None)
        num_nodes = len(self.rook_cluster.get_node_names())

        spec = {}
        if service_type == 'mon' or service_type is None:
            spec['mon'] = orchestrator.ServiceDescription(
                spec=ServiceSpec(
                    'mon',
                    placement=PlacementSpec(count=cl['spec'].get(
                        'mon', {}).get('count', 1), ),
                ),
                size=cl['spec'].get('mon', {}).get('count', 1),
                container_image_name=image_name,
                last_refresh=now,
            )
        if service_type == 'mgr' or service_type is None:
            spec['mgr'] = orchestrator.ServiceDescription(
                spec=ServiceSpec(
                    'mgr',
                    placement=PlacementSpec.from_string('count:1'),
                ),
                size=1,
                container_image_name=image_name,
                last_refresh=now,
            )
        if not cl['spec'].get('crashCollector', {}).get('disable', False):
            spec['crash'] = orchestrator.ServiceDescription(
                spec=ServiceSpec(
                    'crash',
                    placement=PlacementSpec.from_string('*'),
                ),
                size=num_nodes,
                container_image_name=image_name,
                last_refresh=now,
            )

        if service_type == 'mds' or service_type is None:
            # CephFilesystems
            all_fs = self.rook_cluster.rook_api_get("cephfilesystems/")
            self.log.debug('CephFilesystems %s' % all_fs)
            for fs in all_fs.get('items', []):
                svc = 'mds.' + fs['metadata']['name']
                if svc in spec:
                    continue
                # FIXME: we are conflating active (+ standby) with count
                active = fs['spec'].get('metadataServer',
                                        {}).get('activeCount', 1)
                total_mds = active
                if fs['spec'].get('metadataServer',
                                  {}).get('activeStandby', False):
                    total_mds = active * 2
                    spec[svc] = orchestrator.ServiceDescription(
                        spec=ServiceSpec(
                            service_type='mds',
                            service_id=fs['metadata']['name'],
                            placement=PlacementSpec(count=active),
                        ),
                        size=total_mds,
                        container_image_name=image_name,
                        last_refresh=now,
                    )

        if service_type == 'rgw' or service_type is None:
            # CephObjectstores
            all_zones = self.rook_cluster.rook_api_get("cephobjectstores/")
            self.log.debug('CephObjectstores %s' % all_zones)
            for zone in all_zones.get('items', []):
                rgw_realm = zone['metadata']['name']
                rgw_zone = rgw_realm
                svc = 'rgw.' + rgw_realm + '.' + rgw_zone
                if svc in spec:
                    continue
                active = zone['spec']['gateway']['instances']
                if 'securePort' in zone['spec']['gateway']:
                    ssl = True
                    port = zone['spec']['gateway']['securePort']
                else:
                    ssl = False
                    port = zone['spec']['gateway']['port'] or 80
                spec[svc] = orchestrator.ServiceDescription(
                    spec=RGWSpec(
                        service_id=rgw_realm + '.' + rgw_zone,
                        rgw_realm=rgw_realm,
                        rgw_zone=rgw_zone,
                        ssl=ssl,
                        rgw_frontend_port=port,
                        placement=PlacementSpec(count=active),
                    ),
                    size=active,
                    container_image_name=image_name,
                    last_refresh=now,
                )

        if service_type == 'nfs' or service_type is None:
            # CephNFSes
            all_nfs = self.rook_cluster.rook_api_get("cephnfses/")
            self.log.warning('CephNFS %s' % all_nfs)
            for nfs in all_nfs.get('items', []):
                nfs_name = nfs['metadata']['name']
                svc = 'nfs.' + nfs_name
                if svc in spec:
                    continue
                active = nfs['spec'].get('server', {}).get('active')
                spec[svc] = orchestrator.ServiceDescription(
                    spec=NFSServiceSpec(
                        service_id=nfs_name,
                        placement=PlacementSpec(count=active),
                    ),
                    size=active,
                    last_refresh=now,
                )

        for dd in self._list_daemons():
            if dd.service_name() not in spec:
                continue
            service = spec[dd.service_name()]
            service.running += 1
            if not service.container_image_id:
                service.container_image_id = dd.container_image_id
            if not service.container_image_name:
                service.container_image_name = dd.container_image_name
            if service.last_refresh is None or not dd.last_refresh or dd.last_refresh < service.last_refresh:
                service.last_refresh = dd.last_refresh
            if service.created is None or dd.created is None or dd.created < service.created:
                service.created = dd.created

        return [v for k, v in spec.items()]